feat: get image blob in backend (#495)

* feat: get image blob in backend

* chore: update
This commit is contained in:
boojack 2022-11-19 18:43:56 +08:00 committed by GitHub
parent 9036bd478b
commit 2d49e96a8a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 175 additions and 54 deletions

View file

@ -1,9 +1,10 @@
package crawler package getter
import ( import (
"fmt"
"io" "io"
"net/http" "net/http"
urlUtil "net/url" "net/url"
"golang.org/x/net/html" "golang.org/x/net/html"
"golang.org/x/net/html/atom" "golang.org/x/net/html/atom"
@ -15,19 +16,26 @@ type HTMLMeta struct {
Image string `json:"image"` Image string `json:"image"`
} }
func GetWebsiteMeta(url string) (*HTMLMeta, error) { func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
if _, err := urlUtil.Parse(url); err != nil { if _, err := url.Parse(urlStr); err != nil {
return nil, err return nil, err
} }
response, err := http.Get(url) response, err := http.Get(urlStr)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer response.Body.Close() defer response.Body.Close()
htmlMeta := extractHTMLMeta(response.Body) mediatype, err := getMediatype(response)
if err != nil {
return nil, err
}
if mediatype != "text/html" {
return nil, fmt.Errorf("Wrong website mediatype")
}
htmlMeta := extractHTMLMeta(response.Body)
return htmlMeta, nil return htmlMeta, nil
} }

View file

@ -1,4 +1,4 @@
package crawler package getter
import ( import (
"testing" "testing"
@ -6,19 +6,19 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func TestGetWebsiteMeta(t *testing.T) { func TestGetHTMLMeta(t *testing.T) {
tests := []struct { tests := []struct {
url string urlStr string
htmlMeta HTMLMeta htmlMeta HTMLMeta
}{ }{
{ {
url: "https://baidu.com", urlStr: "https://baidu.com",
htmlMeta: HTMLMeta{ htmlMeta: HTMLMeta{
Title: "百度一下,你就知道", Title: "百度一下,你就知道",
}, },
}, },
{ {
url: "https://www.bytebase.com/blog/sql-review-tool-for-devs", urlStr: "https://www.bytebase.com/blog/sql-review-tool-for-devs",
htmlMeta: HTMLMeta{ htmlMeta: HTMLMeta{
Title: "The SQL Review Tool for Developers", Title: "The SQL Review Tool for Developers",
Description: "Reviewing SQL can be somewhat tedious, yet is essential to keep your database fleet reliable. At Bytebase, we are building a developer-first SQL review tool to empower the DevOps system.", Description: "Reviewing SQL can be somewhat tedious, yet is essential to keep your database fleet reliable. At Bytebase, we are building a developer-first SQL review tool to empower the DevOps system.",
@ -27,7 +27,7 @@ func TestGetWebsiteMeta(t *testing.T) {
}, },
} }
for _, test := range tests { for _, test := range tests {
metadata, err := GetWebsiteMeta(test.url) metadata, err := GetHTMLMeta(test.urlStr)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, test.htmlMeta, *metadata) require.Equal(t, test.htmlMeta, *metadata)
} }

View file

@ -1,4 +1,4 @@
// crawler is using to get resources from url. // getter is using to get resources from url.
// * Get metadata for website; // * Get metadata for website;
// * Get image blob to avoid CORS; // * Get image blob to avoid CORS;
package crawler package getter

View file

@ -0,0 +1,45 @@
package getter
import (
"fmt"
"io"
"net/http"
"net/url"
"strings"
)
type Image struct {
Blob []byte
Mediatype string
}
func GetImage(urlStr string) (*Image, error) {
if _, err := url.Parse(urlStr); err != nil {
return nil, err
}
response, err := http.Get(urlStr)
if err != nil {
return nil, err
}
defer response.Body.Close()
mediatype, err := getMediatype(response)
if err != nil {
return nil, err
}
if !strings.HasPrefix(mediatype, "image/") {
return nil, fmt.Errorf("Wrong image mediatype")
}
bodyBytes, err := io.ReadAll(response.Body)
if err != nil {
return nil, err
}
image := &Image{
Blob: bodyBytes,
Mediatype: mediatype,
}
return image, nil
}

View file

@ -0,0 +1,21 @@
package getter
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestGetImage(t *testing.T) {
tests := []struct {
urlStr string
}{
{
urlStr: "https://star-history.com/bytebase.webp",
},
}
for _, test := range tests {
_, err := GetImage(test.urlStr)
require.NoError(t, err)
}
}

View file

@ -0,0 +1,15 @@
package getter
import (
"mime"
"net/http"
)
func getMediatype(response *http.Response) (string, error) {
contentType := response.Header.Get("content-type")
mediatype, _, err := mime.ParseMediaType(contentType)
if err != nil {
return "", err
}
return mediatype, nil
}

View file

@ -1,38 +0,0 @@
package server
import (
"encoding/json"
"fmt"
"net/http"
"github.com/labstack/echo/v4"
"github.com/usememos/memos/plugin/crawler"
metric "github.com/usememos/memos/plugin/metrics"
)
func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
g.GET("/crawler/website", func(c echo.Context) error {
ctx := c.Request().Context()
url := c.QueryParam("url")
if url == "" {
return echo.NewHTTPError(http.StatusBadRequest, "Missing website url")
}
htmlMeta, err := crawler.GetWebsiteMeta(url)
if err != nil {
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", url)).SetInternal(err)
}
s.Collector.Collect(ctx, &metric.Metric{
Name: "crawler used",
Labels: map[string]string{
"type": "website",
},
})
c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8)
if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err)
}
return nil
})
}

70
server/http_getter.go Normal file
View file

@ -0,0 +1,70 @@
package server
import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"github.com/labstack/echo/v4"
getter "github.com/usememos/memos/plugin/http_getter"
metric "github.com/usememos/memos/plugin/metrics"
)
func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
g.GET("/get/httpmeta", func(c echo.Context) error {
ctx := c.Request().Context()
urlStr := c.QueryParam("url")
if urlStr == "" {
return echo.NewHTTPError(http.StatusBadRequest, "Missing website url")
}
if _, err := url.Parse(urlStr); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
}
htmlMeta, err := getter.GetHTMLMeta(urlStr)
if err != nil {
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", urlStr)).SetInternal(err)
}
s.Collector.Collect(ctx, &metric.Metric{
Name: "getter used",
Labels: map[string]string{
"type": "httpmeta",
},
})
c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8)
if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err)
}
return nil
})
g.GET("/get/image", func(c echo.Context) error {
ctx := c.Request().Context()
urlStr := c.QueryParam("url")
if urlStr == "" {
return echo.NewHTTPError(http.StatusBadRequest, "Missing image url")
}
if _, err := url.Parse(urlStr); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
}
image, err := getter.GetImage(urlStr)
if err != nil {
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get image url: %s", urlStr)).SetInternal(err)
}
s.Collector.Collect(ctx, &metric.Metric{
Name: "getter used",
Labels: map[string]string{
"type": "image",
},
})
c.Response().Writer.WriteHeader(http.StatusOK)
c.Response().Writer.Header().Set("Content-Type", image.Mediatype)
if _, err := c.Response().Writer.Write(image.Blob); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write image blob").SetInternal(err)
}
return nil
})
}

View file

@ -161,7 +161,6 @@ func (s *Server) registerResourceRoutes(g *echo.Group) {
if _, err := c.Response().Writer.Write(resource.Blob); err != nil { if _, err := c.Response().Writer.Write(resource.Blob); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write resource blob").SetInternal(err) return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write resource blob").SetInternal(err)
} }
return nil return nil
}) })

View file

@ -8,7 +8,8 @@ const renderer = (rawStr: string): string => {
return rawStr; return rawStr;
} }
return `<img class='img' src='${escape(matchResult[1])}' />`; // NOTE: Get image blob from backend to avoid CORS.
return `<img class='img' src='/o/get/image?url=${escape(matchResult[1])}' />`;
}; };
export default { export default {