diff --git a/plugin/crawler/website.go b/plugin/http_getter/html_meta.go
similarity index 83%
rename from plugin/crawler/website.go
rename to plugin/http_getter/html_meta.go
index e48b277b..ca2d8a61 100644
--- a/plugin/crawler/website.go
+++ b/plugin/http_getter/html_meta.go
@@ -1,9 +1,10 @@
-package crawler
+package getter
import (
+ "fmt"
"io"
"net/http"
- urlUtil "net/url"
+ "net/url"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
@@ -15,19 +16,26 @@ type HTMLMeta struct {
Image string `json:"image"`
}
-func GetWebsiteMeta(url string) (*HTMLMeta, error) {
- if _, err := urlUtil.Parse(url); err != nil {
+func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
+ if _, err := url.Parse(urlStr); err != nil {
return nil, err
}
- response, err := http.Get(url)
+ response, err := http.Get(urlStr)
if err != nil {
return nil, err
}
defer response.Body.Close()
- htmlMeta := extractHTMLMeta(response.Body)
+ mediatype, err := getMediatype(response)
+ if err != nil {
+ return nil, err
+ }
+ if mediatype != "text/html" {
+ return nil, fmt.Errorf("Wrong website mediatype")
+ }
+ htmlMeta := extractHTMLMeta(response.Body)
return htmlMeta, nil
}
diff --git a/plugin/crawler/website_test.go b/plugin/http_getter/html_meta_test.go
similarity index 76%
rename from plugin/crawler/website_test.go
rename to plugin/http_getter/html_meta_test.go
index 655e7773..0c0e6013 100644
--- a/plugin/crawler/website_test.go
+++ b/plugin/http_getter/html_meta_test.go
@@ -1,4 +1,4 @@
-package crawler
+package getter
import (
"testing"
@@ -6,19 +6,19 @@ import (
"github.com/stretchr/testify/require"
)
-func TestGetWebsiteMeta(t *testing.T) {
+func TestGetHTMLMeta(t *testing.T) {
tests := []struct {
- url string
+ urlStr string
htmlMeta HTMLMeta
}{
{
- url: "https://baidu.com",
+ urlStr: "https://baidu.com",
htmlMeta: HTMLMeta{
Title: "百度一下,你就知道",
},
},
{
- url: "https://www.bytebase.com/blog/sql-review-tool-for-devs",
+ urlStr: "https://www.bytebase.com/blog/sql-review-tool-for-devs",
htmlMeta: HTMLMeta{
Title: "The SQL Review Tool for Developers",
Description: "Reviewing SQL can be somewhat tedious, yet is essential to keep your database fleet reliable. At Bytebase, we are building a developer-first SQL review tool to empower the DevOps system.",
@@ -27,7 +27,7 @@ func TestGetWebsiteMeta(t *testing.T) {
},
}
for _, test := range tests {
- metadata, err := GetWebsiteMeta(test.url)
+ metadata, err := GetHTMLMeta(test.urlStr)
require.NoError(t, err)
require.Equal(t, test.htmlMeta, *metadata)
}
diff --git a/plugin/crawler/crawler.go b/plugin/http_getter/http_getter.go
similarity index 51%
rename from plugin/crawler/crawler.go
rename to plugin/http_getter/http_getter.go
index 12752e56..db171171 100644
--- a/plugin/crawler/crawler.go
+++ b/plugin/http_getter/http_getter.go
@@ -1,4 +1,4 @@
-// crawler is using to get resources from url.
+// getter is using to get resources from url.
// * Get metadata for website;
// * Get image blob to avoid CORS;
-package crawler
+package getter
diff --git a/plugin/http_getter/image.go b/plugin/http_getter/image.go
new file mode 100644
index 00000000..1dbd3220
--- /dev/null
+++ b/plugin/http_getter/image.go
@@ -0,0 +1,45 @@
+package getter
+
+import (
+ "fmt"
+ "io"
+ "net/http"
+ "net/url"
+ "strings"
+)
+
+type Image struct {
+ Blob []byte
+ Mediatype string
+}
+
+func GetImage(urlStr string) (*Image, error) {
+ if _, err := url.Parse(urlStr); err != nil {
+ return nil, err
+ }
+
+ response, err := http.Get(urlStr)
+ if err != nil {
+ return nil, err
+ }
+ defer response.Body.Close()
+
+ mediatype, err := getMediatype(response)
+ if err != nil {
+ return nil, err
+ }
+ if !strings.HasPrefix(mediatype, "image/") {
+ return nil, fmt.Errorf("Wrong image mediatype")
+ }
+
+ bodyBytes, err := io.ReadAll(response.Body)
+ if err != nil {
+ return nil, err
+ }
+
+ image := &Image{
+ Blob: bodyBytes,
+ Mediatype: mediatype,
+ }
+ return image, nil
+}
diff --git a/plugin/http_getter/image_test.go b/plugin/http_getter/image_test.go
new file mode 100644
index 00000000..a81d3031
--- /dev/null
+++ b/plugin/http_getter/image_test.go
@@ -0,0 +1,21 @@
+package getter
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestGetImage(t *testing.T) {
+ tests := []struct {
+ urlStr string
+ }{
+ {
+ urlStr: "https://star-history.com/bytebase.webp",
+ },
+ }
+ for _, test := range tests {
+ _, err := GetImage(test.urlStr)
+ require.NoError(t, err)
+ }
+}
diff --git a/plugin/http_getter/util.go b/plugin/http_getter/util.go
new file mode 100644
index 00000000..c9125ee8
--- /dev/null
+++ b/plugin/http_getter/util.go
@@ -0,0 +1,15 @@
+package getter
+
+import (
+ "mime"
+ "net/http"
+)
+
+func getMediatype(response *http.Response) (string, error) {
+ contentType := response.Header.Get("content-type")
+ mediatype, _, err := mime.ParseMediaType(contentType)
+ if err != nil {
+ return "", err
+ }
+ return mediatype, nil
+}
diff --git a/server/crawler.go b/server/crawler.go
deleted file mode 100644
index b6845b15..00000000
--- a/server/crawler.go
+++ /dev/null
@@ -1,38 +0,0 @@
-package server
-
-import (
- "encoding/json"
- "fmt"
- "net/http"
-
- "github.com/labstack/echo/v4"
- "github.com/usememos/memos/plugin/crawler"
- metric "github.com/usememos/memos/plugin/metrics"
-)
-
-func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
- g.GET("/crawler/website", func(c echo.Context) error {
- ctx := c.Request().Context()
- url := c.QueryParam("url")
- if url == "" {
- return echo.NewHTTPError(http.StatusBadRequest, "Missing website url")
- }
-
- htmlMeta, err := crawler.GetWebsiteMeta(url)
- if err != nil {
- return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", url)).SetInternal(err)
- }
- s.Collector.Collect(ctx, &metric.Metric{
- Name: "crawler used",
- Labels: map[string]string{
- "type": "website",
- },
- })
-
- c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8)
- if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil {
- return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err)
- }
- return nil
- })
-}
diff --git a/server/http_getter.go b/server/http_getter.go
new file mode 100644
index 00000000..65da8053
--- /dev/null
+++ b/server/http_getter.go
@@ -0,0 +1,70 @@
+package server
+
+import (
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "net/url"
+
+ "github.com/labstack/echo/v4"
+ getter "github.com/usememos/memos/plugin/http_getter"
+ metric "github.com/usememos/memos/plugin/metrics"
+)
+
+func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
+ g.GET("/get/httpmeta", func(c echo.Context) error {
+ ctx := c.Request().Context()
+ urlStr := c.QueryParam("url")
+ if urlStr == "" {
+ return echo.NewHTTPError(http.StatusBadRequest, "Missing website url")
+ }
+ if _, err := url.Parse(urlStr); err != nil {
+ return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
+ }
+
+ htmlMeta, err := getter.GetHTMLMeta(urlStr)
+ if err != nil {
+ return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", urlStr)).SetInternal(err)
+ }
+ s.Collector.Collect(ctx, &metric.Metric{
+ Name: "getter used",
+ Labels: map[string]string{
+ "type": "httpmeta",
+ },
+ })
+
+ c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8)
+ if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil {
+ return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err)
+ }
+ return nil
+ })
+ g.GET("/get/image", func(c echo.Context) error {
+ ctx := c.Request().Context()
+ urlStr := c.QueryParam("url")
+ if urlStr == "" {
+ return echo.NewHTTPError(http.StatusBadRequest, "Missing image url")
+ }
+ if _, err := url.Parse(urlStr); err != nil {
+ return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
+ }
+
+ image, err := getter.GetImage(urlStr)
+ if err != nil {
+ return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get image url: %s", urlStr)).SetInternal(err)
+ }
+ s.Collector.Collect(ctx, &metric.Metric{
+ Name: "getter used",
+ Labels: map[string]string{
+ "type": "image",
+ },
+ })
+
+ c.Response().Writer.WriteHeader(http.StatusOK)
+ c.Response().Writer.Header().Set("Content-Type", image.Mediatype)
+ if _, err := c.Response().Writer.Write(image.Blob); err != nil {
+ return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write image blob").SetInternal(err)
+ }
+ return nil
+ })
+}
diff --git a/server/resource.go b/server/resource.go
index 14dbab82..b21462fa 100644
--- a/server/resource.go
+++ b/server/resource.go
@@ -161,7 +161,6 @@ func (s *Server) registerResourceRoutes(g *echo.Group) {
if _, err := c.Response().Writer.Write(resource.Blob); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write resource blob").SetInternal(err)
}
-
return nil
})
diff --git a/web/src/labs/marked/parser/Image.ts b/web/src/labs/marked/parser/Image.ts
index b366e0d9..3432e5e2 100644
--- a/web/src/labs/marked/parser/Image.ts
+++ b/web/src/labs/marked/parser/Image.ts
@@ -8,7 +8,8 @@ const renderer = (rawStr: string): string => {
return rawStr;
}
- return ``;
+ // NOTE: Get image blob from backend to avoid CORS.
+ return ``;
};
export default {