diff --git a/plugin/crawler/website.go b/plugin/http_getter/html_meta.go similarity index 83% rename from plugin/crawler/website.go rename to plugin/http_getter/html_meta.go index e48b277b..ca2d8a61 100644 --- a/plugin/crawler/website.go +++ b/plugin/http_getter/html_meta.go @@ -1,9 +1,10 @@ -package crawler +package getter import ( + "fmt" "io" "net/http" - urlUtil "net/url" + "net/url" "golang.org/x/net/html" "golang.org/x/net/html/atom" @@ -15,19 +16,26 @@ type HTMLMeta struct { Image string `json:"image"` } -func GetWebsiteMeta(url string) (*HTMLMeta, error) { - if _, err := urlUtil.Parse(url); err != nil { +func GetHTMLMeta(urlStr string) (*HTMLMeta, error) { + if _, err := url.Parse(urlStr); err != nil { return nil, err } - response, err := http.Get(url) + response, err := http.Get(urlStr) if err != nil { return nil, err } defer response.Body.Close() - htmlMeta := extractHTMLMeta(response.Body) + mediatype, err := getMediatype(response) + if err != nil { + return nil, err + } + if mediatype != "text/html" { + return nil, fmt.Errorf("Wrong website mediatype") + } + htmlMeta := extractHTMLMeta(response.Body) return htmlMeta, nil } diff --git a/plugin/crawler/website_test.go b/plugin/http_getter/html_meta_test.go similarity index 76% rename from plugin/crawler/website_test.go rename to plugin/http_getter/html_meta_test.go index 655e7773..0c0e6013 100644 --- a/plugin/crawler/website_test.go +++ b/plugin/http_getter/html_meta_test.go @@ -1,4 +1,4 @@ -package crawler +package getter import ( "testing" @@ -6,19 +6,19 @@ import ( "github.com/stretchr/testify/require" ) -func TestGetWebsiteMeta(t *testing.T) { +func TestGetHTMLMeta(t *testing.T) { tests := []struct { - url string + urlStr string htmlMeta HTMLMeta }{ { - url: "https://baidu.com", + urlStr: "https://baidu.com", htmlMeta: HTMLMeta{ Title: "百度一下,你就知道", }, }, { - url: "https://www.bytebase.com/blog/sql-review-tool-for-devs", + urlStr: "https://www.bytebase.com/blog/sql-review-tool-for-devs", htmlMeta: HTMLMeta{ Title: "The SQL Review Tool for Developers", Description: "Reviewing SQL can be somewhat tedious, yet is essential to keep your database fleet reliable. At Bytebase, we are building a developer-first SQL review tool to empower the DevOps system.", @@ -27,7 +27,7 @@ func TestGetWebsiteMeta(t *testing.T) { }, } for _, test := range tests { - metadata, err := GetWebsiteMeta(test.url) + metadata, err := GetHTMLMeta(test.urlStr) require.NoError(t, err) require.Equal(t, test.htmlMeta, *metadata) } diff --git a/plugin/crawler/crawler.go b/plugin/http_getter/http_getter.go similarity index 51% rename from plugin/crawler/crawler.go rename to plugin/http_getter/http_getter.go index 12752e56..db171171 100644 --- a/plugin/crawler/crawler.go +++ b/plugin/http_getter/http_getter.go @@ -1,4 +1,4 @@ -// crawler is using to get resources from url. +// getter is using to get resources from url. // * Get metadata for website; // * Get image blob to avoid CORS; -package crawler +package getter diff --git a/plugin/http_getter/image.go b/plugin/http_getter/image.go new file mode 100644 index 00000000..1dbd3220 --- /dev/null +++ b/plugin/http_getter/image.go @@ -0,0 +1,45 @@ +package getter + +import ( + "fmt" + "io" + "net/http" + "net/url" + "strings" +) + +type Image struct { + Blob []byte + Mediatype string +} + +func GetImage(urlStr string) (*Image, error) { + if _, err := url.Parse(urlStr); err != nil { + return nil, err + } + + response, err := http.Get(urlStr) + if err != nil { + return nil, err + } + defer response.Body.Close() + + mediatype, err := getMediatype(response) + if err != nil { + return nil, err + } + if !strings.HasPrefix(mediatype, "image/") { + return nil, fmt.Errorf("Wrong image mediatype") + } + + bodyBytes, err := io.ReadAll(response.Body) + if err != nil { + return nil, err + } + + image := &Image{ + Blob: bodyBytes, + Mediatype: mediatype, + } + return image, nil +} diff --git a/plugin/http_getter/image_test.go b/plugin/http_getter/image_test.go new file mode 100644 index 00000000..a81d3031 --- /dev/null +++ b/plugin/http_getter/image_test.go @@ -0,0 +1,21 @@ +package getter + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestGetImage(t *testing.T) { + tests := []struct { + urlStr string + }{ + { + urlStr: "https://star-history.com/bytebase.webp", + }, + } + for _, test := range tests { + _, err := GetImage(test.urlStr) + require.NoError(t, err) + } +} diff --git a/plugin/http_getter/util.go b/plugin/http_getter/util.go new file mode 100644 index 00000000..c9125ee8 --- /dev/null +++ b/plugin/http_getter/util.go @@ -0,0 +1,15 @@ +package getter + +import ( + "mime" + "net/http" +) + +func getMediatype(response *http.Response) (string, error) { + contentType := response.Header.Get("content-type") + mediatype, _, err := mime.ParseMediaType(contentType) + if err != nil { + return "", err + } + return mediatype, nil +} diff --git a/server/crawler.go b/server/crawler.go deleted file mode 100644 index b6845b15..00000000 --- a/server/crawler.go +++ /dev/null @@ -1,38 +0,0 @@ -package server - -import ( - "encoding/json" - "fmt" - "net/http" - - "github.com/labstack/echo/v4" - "github.com/usememos/memos/plugin/crawler" - metric "github.com/usememos/memos/plugin/metrics" -) - -func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) { - g.GET("/crawler/website", func(c echo.Context) error { - ctx := c.Request().Context() - url := c.QueryParam("url") - if url == "" { - return echo.NewHTTPError(http.StatusBadRequest, "Missing website url") - } - - htmlMeta, err := crawler.GetWebsiteMeta(url) - if err != nil { - return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", url)).SetInternal(err) - } - s.Collector.Collect(ctx, &metric.Metric{ - Name: "crawler used", - Labels: map[string]string{ - "type": "website", - }, - }) - - c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8) - if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil { - return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err) - } - return nil - }) -} diff --git a/server/http_getter.go b/server/http_getter.go new file mode 100644 index 00000000..65da8053 --- /dev/null +++ b/server/http_getter.go @@ -0,0 +1,70 @@ +package server + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + + "github.com/labstack/echo/v4" + getter "github.com/usememos/memos/plugin/http_getter" + metric "github.com/usememos/memos/plugin/metrics" +) + +func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) { + g.GET("/get/httpmeta", func(c echo.Context) error { + ctx := c.Request().Context() + urlStr := c.QueryParam("url") + if urlStr == "" { + return echo.NewHTTPError(http.StatusBadRequest, "Missing website url") + } + if _, err := url.Parse(urlStr); err != nil { + return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err) + } + + htmlMeta, err := getter.GetHTMLMeta(urlStr) + if err != nil { + return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", urlStr)).SetInternal(err) + } + s.Collector.Collect(ctx, &metric.Metric{ + Name: "getter used", + Labels: map[string]string{ + "type": "httpmeta", + }, + }) + + c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8) + if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil { + return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err) + } + return nil + }) + g.GET("/get/image", func(c echo.Context) error { + ctx := c.Request().Context() + urlStr := c.QueryParam("url") + if urlStr == "" { + return echo.NewHTTPError(http.StatusBadRequest, "Missing image url") + } + if _, err := url.Parse(urlStr); err != nil { + return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err) + } + + image, err := getter.GetImage(urlStr) + if err != nil { + return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get image url: %s", urlStr)).SetInternal(err) + } + s.Collector.Collect(ctx, &metric.Metric{ + Name: "getter used", + Labels: map[string]string{ + "type": "image", + }, + }) + + c.Response().Writer.WriteHeader(http.StatusOK) + c.Response().Writer.Header().Set("Content-Type", image.Mediatype) + if _, err := c.Response().Writer.Write(image.Blob); err != nil { + return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write image blob").SetInternal(err) + } + return nil + }) +} diff --git a/server/resource.go b/server/resource.go index 14dbab82..b21462fa 100644 --- a/server/resource.go +++ b/server/resource.go @@ -161,7 +161,6 @@ func (s *Server) registerResourceRoutes(g *echo.Group) { if _, err := c.Response().Writer.Write(resource.Blob); err != nil { return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write resource blob").SetInternal(err) } - return nil }) diff --git a/web/src/labs/marked/parser/Image.ts b/web/src/labs/marked/parser/Image.ts index b366e0d9..3432e5e2 100644 --- a/web/src/labs/marked/parser/Image.ts +++ b/web/src/labs/marked/parser/Image.ts @@ -8,7 +8,8 @@ const renderer = (rawStr: string): string => { return rawStr; } - return ``; + // NOTE: Get image blob from backend to avoid CORS. + return ``; }; export default {