mirror of
https://github.com/usememos/memos.git
synced 2025-01-31 09:37:51 +08:00
feat: get image blob in backend (#495)
* feat: get image blob in backend * chore: update
This commit is contained in:
parent
9036bd478b
commit
2d49e96a8a
10 changed files with 175 additions and 54 deletions
|
@ -1,9 +1,10 @@
|
|||
package crawler
|
||||
package getter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
urlUtil "net/url"
|
||||
"net/url"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
|
@ -15,19 +16,26 @@ type HTMLMeta struct {
|
|||
Image string `json:"image"`
|
||||
}
|
||||
|
||||
func GetWebsiteMeta(url string) (*HTMLMeta, error) {
|
||||
if _, err := urlUtil.Parse(url); err != nil {
|
||||
func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
|
||||
if _, err := url.Parse(urlStr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
response, err := http.Get(url)
|
||||
response, err := http.Get(urlStr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer response.Body.Close()
|
||||
|
||||
htmlMeta := extractHTMLMeta(response.Body)
|
||||
mediatype, err := getMediatype(response)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if mediatype != "text/html" {
|
||||
return nil, fmt.Errorf("Wrong website mediatype")
|
||||
}
|
||||
|
||||
htmlMeta := extractHTMLMeta(response.Body)
|
||||
return htmlMeta, nil
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package crawler
|
||||
package getter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
@ -6,19 +6,19 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetWebsiteMeta(t *testing.T) {
|
||||
func TestGetHTMLMeta(t *testing.T) {
|
||||
tests := []struct {
|
||||
url string
|
||||
urlStr string
|
||||
htmlMeta HTMLMeta
|
||||
}{
|
||||
{
|
||||
url: "https://baidu.com",
|
||||
urlStr: "https://baidu.com",
|
||||
htmlMeta: HTMLMeta{
|
||||
Title: "百度一下,你就知道",
|
||||
},
|
||||
},
|
||||
{
|
||||
url: "https://www.bytebase.com/blog/sql-review-tool-for-devs",
|
||||
urlStr: "https://www.bytebase.com/blog/sql-review-tool-for-devs",
|
||||
htmlMeta: HTMLMeta{
|
||||
Title: "The SQL Review Tool for Developers",
|
||||
Description: "Reviewing SQL can be somewhat tedious, yet is essential to keep your database fleet reliable. At Bytebase, we are building a developer-first SQL review tool to empower the DevOps system.",
|
||||
|
@ -27,7 +27,7 @@ func TestGetWebsiteMeta(t *testing.T) {
|
|||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
metadata, err := GetWebsiteMeta(test.url)
|
||||
metadata, err := GetHTMLMeta(test.urlStr)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, test.htmlMeta, *metadata)
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
// crawler is using to get resources from url.
|
||||
// getter is using to get resources from url.
|
||||
// * Get metadata for website;
|
||||
// * Get image blob to avoid CORS;
|
||||
package crawler
|
||||
package getter
|
45
plugin/http_getter/image.go
Normal file
45
plugin/http_getter/image.go
Normal file
|
@ -0,0 +1,45 @@
|
|||
package getter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Image struct {
|
||||
Blob []byte
|
||||
Mediatype string
|
||||
}
|
||||
|
||||
func GetImage(urlStr string) (*Image, error) {
|
||||
if _, err := url.Parse(urlStr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
response, err := http.Get(urlStr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer response.Body.Close()
|
||||
|
||||
mediatype, err := getMediatype(response)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !strings.HasPrefix(mediatype, "image/") {
|
||||
return nil, fmt.Errorf("Wrong image mediatype")
|
||||
}
|
||||
|
||||
bodyBytes, err := io.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
image := &Image{
|
||||
Blob: bodyBytes,
|
||||
Mediatype: mediatype,
|
||||
}
|
||||
return image, nil
|
||||
}
|
21
plugin/http_getter/image_test.go
Normal file
21
plugin/http_getter/image_test.go
Normal file
|
@ -0,0 +1,21 @@
|
|||
package getter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetImage(t *testing.T) {
|
||||
tests := []struct {
|
||||
urlStr string
|
||||
}{
|
||||
{
|
||||
urlStr: "https://star-history.com/bytebase.webp",
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
_, err := GetImage(test.urlStr)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
}
|
15
plugin/http_getter/util.go
Normal file
15
plugin/http_getter/util.go
Normal file
|
@ -0,0 +1,15 @@
|
|||
package getter
|
||||
|
||||
import (
|
||||
"mime"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func getMediatype(response *http.Response) (string, error) {
|
||||
contentType := response.Header.Get("content-type")
|
||||
mediatype, _, err := mime.ParseMediaType(contentType)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return mediatype, nil
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
package server
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/usememos/memos/plugin/crawler"
|
||||
metric "github.com/usememos/memos/plugin/metrics"
|
||||
)
|
||||
|
||||
func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
|
||||
g.GET("/crawler/website", func(c echo.Context) error {
|
||||
ctx := c.Request().Context()
|
||||
url := c.QueryParam("url")
|
||||
if url == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "Missing website url")
|
||||
}
|
||||
|
||||
htmlMeta, err := crawler.GetWebsiteMeta(url)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", url)).SetInternal(err)
|
||||
}
|
||||
s.Collector.Collect(ctx, &metric.Metric{
|
||||
Name: "crawler used",
|
||||
Labels: map[string]string{
|
||||
"type": "website",
|
||||
},
|
||||
})
|
||||
|
||||
c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8)
|
||||
if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
70
server/http_getter.go
Normal file
70
server/http_getter.go
Normal file
|
@ -0,0 +1,70 @@
|
|||
package server
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
getter "github.com/usememos/memos/plugin/http_getter"
|
||||
metric "github.com/usememos/memos/plugin/metrics"
|
||||
)
|
||||
|
||||
func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
|
||||
g.GET("/get/httpmeta", func(c echo.Context) error {
|
||||
ctx := c.Request().Context()
|
||||
urlStr := c.QueryParam("url")
|
||||
if urlStr == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "Missing website url")
|
||||
}
|
||||
if _, err := url.Parse(urlStr); err != nil {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
|
||||
}
|
||||
|
||||
htmlMeta, err := getter.GetHTMLMeta(urlStr)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", urlStr)).SetInternal(err)
|
||||
}
|
||||
s.Collector.Collect(ctx, &metric.Metric{
|
||||
Name: "getter used",
|
||||
Labels: map[string]string{
|
||||
"type": "httpmeta",
|
||||
},
|
||||
})
|
||||
|
||||
c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8)
|
||||
if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
g.GET("/get/image", func(c echo.Context) error {
|
||||
ctx := c.Request().Context()
|
||||
urlStr := c.QueryParam("url")
|
||||
if urlStr == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "Missing image url")
|
||||
}
|
||||
if _, err := url.Parse(urlStr); err != nil {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
|
||||
}
|
||||
|
||||
image, err := getter.GetImage(urlStr)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get image url: %s", urlStr)).SetInternal(err)
|
||||
}
|
||||
s.Collector.Collect(ctx, &metric.Metric{
|
||||
Name: "getter used",
|
||||
Labels: map[string]string{
|
||||
"type": "image",
|
||||
},
|
||||
})
|
||||
|
||||
c.Response().Writer.WriteHeader(http.StatusOK)
|
||||
c.Response().Writer.Header().Set("Content-Type", image.Mediatype)
|
||||
if _, err := c.Response().Writer.Write(image.Blob); err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write image blob").SetInternal(err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
|
@ -161,7 +161,6 @@ func (s *Server) registerResourceRoutes(g *echo.Group) {
|
|||
if _, err := c.Response().Writer.Write(resource.Blob); err != nil {
|
||||
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write resource blob").SetInternal(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
|
|
|
@ -8,7 +8,8 @@ const renderer = (rawStr: string): string => {
|
|||
return rawStr;
|
||||
}
|
||||
|
||||
return `<img class='img' src='${escape(matchResult[1])}' />`;
|
||||
// NOTE: Get image blob from backend to avoid CORS.
|
||||
return `<img class='img' src='/o/get/image?url=${escape(matchResult[1])}' />`;
|
||||
};
|
||||
|
||||
export default {
|
||||
|
|
Loading…
Reference in a new issue