mirror of
https://github.com/usememos/memos.git
synced 2025-01-31 09:37:51 +08:00
feat: get image blob in backend (#495)
* feat: get image blob in backend * chore: update
This commit is contained in:
parent
9036bd478b
commit
2d49e96a8a
10 changed files with 175 additions and 54 deletions
|
@ -1,9 +1,10 @@
|
||||||
package crawler
|
package getter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
urlUtil "net/url"
|
"net/url"
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
"golang.org/x/net/html/atom"
|
"golang.org/x/net/html/atom"
|
||||||
|
@ -15,19 +16,26 @@ type HTMLMeta struct {
|
||||||
Image string `json:"image"`
|
Image string `json:"image"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetWebsiteMeta(url string) (*HTMLMeta, error) {
|
func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
|
||||||
if _, err := urlUtil.Parse(url); err != nil {
|
if _, err := url.Parse(urlStr); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := http.Get(url)
|
response, err := http.Get(urlStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer response.Body.Close()
|
defer response.Body.Close()
|
||||||
|
|
||||||
htmlMeta := extractHTMLMeta(response.Body)
|
mediatype, err := getMediatype(response)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if mediatype != "text/html" {
|
||||||
|
return nil, fmt.Errorf("Wrong website mediatype")
|
||||||
|
}
|
||||||
|
|
||||||
|
htmlMeta := extractHTMLMeta(response.Body)
|
||||||
return htmlMeta, nil
|
return htmlMeta, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package crawler
|
package getter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
@ -6,19 +6,19 @@ import (
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestGetWebsiteMeta(t *testing.T) {
|
func TestGetHTMLMeta(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
url string
|
urlStr string
|
||||||
htmlMeta HTMLMeta
|
htmlMeta HTMLMeta
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
url: "https://baidu.com",
|
urlStr: "https://baidu.com",
|
||||||
htmlMeta: HTMLMeta{
|
htmlMeta: HTMLMeta{
|
||||||
Title: "百度一下,你就知道",
|
Title: "百度一下,你就知道",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
url: "https://www.bytebase.com/blog/sql-review-tool-for-devs",
|
urlStr: "https://www.bytebase.com/blog/sql-review-tool-for-devs",
|
||||||
htmlMeta: HTMLMeta{
|
htmlMeta: HTMLMeta{
|
||||||
Title: "The SQL Review Tool for Developers",
|
Title: "The SQL Review Tool for Developers",
|
||||||
Description: "Reviewing SQL can be somewhat tedious, yet is essential to keep your database fleet reliable. At Bytebase, we are building a developer-first SQL review tool to empower the DevOps system.",
|
Description: "Reviewing SQL can be somewhat tedious, yet is essential to keep your database fleet reliable. At Bytebase, we are building a developer-first SQL review tool to empower the DevOps system.",
|
||||||
|
@ -27,7 +27,7 @@ func TestGetWebsiteMeta(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
metadata, err := GetWebsiteMeta(test.url)
|
metadata, err := GetHTMLMeta(test.urlStr)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, test.htmlMeta, *metadata)
|
require.Equal(t, test.htmlMeta, *metadata)
|
||||||
}
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
// crawler is using to get resources from url.
|
// getter is using to get resources from url.
|
||||||
// * Get metadata for website;
|
// * Get metadata for website;
|
||||||
// * Get image blob to avoid CORS;
|
// * Get image blob to avoid CORS;
|
||||||
package crawler
|
package getter
|
45
plugin/http_getter/image.go
Normal file
45
plugin/http_getter/image.go
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
package getter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Image struct {
|
||||||
|
Blob []byte
|
||||||
|
Mediatype string
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetImage(urlStr string) (*Image, error) {
|
||||||
|
if _, err := url.Parse(urlStr); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
response, err := http.Get(urlStr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer response.Body.Close()
|
||||||
|
|
||||||
|
mediatype, err := getMediatype(response)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(mediatype, "image/") {
|
||||||
|
return nil, fmt.Errorf("Wrong image mediatype")
|
||||||
|
}
|
||||||
|
|
||||||
|
bodyBytes, err := io.ReadAll(response.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
image := &Image{
|
||||||
|
Blob: bodyBytes,
|
||||||
|
Mediatype: mediatype,
|
||||||
|
}
|
||||||
|
return image, nil
|
||||||
|
}
|
21
plugin/http_getter/image_test.go
Normal file
21
plugin/http_getter/image_test.go
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
package getter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetImage(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
urlStr string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
urlStr: "https://star-history.com/bytebase.webp",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, test := range tests {
|
||||||
|
_, err := GetImage(test.urlStr)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
}
|
15
plugin/http_getter/util.go
Normal file
15
plugin/http_getter/util.go
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
package getter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"mime"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
func getMediatype(response *http.Response) (string, error) {
|
||||||
|
contentType := response.Header.Get("content-type")
|
||||||
|
mediatype, _, err := mime.ParseMediaType(contentType)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return mediatype, nil
|
||||||
|
}
|
|
@ -1,38 +0,0 @@
|
||||||
package server
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"net/http"
|
|
||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
|
||||||
"github.com/usememos/memos/plugin/crawler"
|
|
||||||
metric "github.com/usememos/memos/plugin/metrics"
|
|
||||||
)
|
|
||||||
|
|
||||||
func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
|
|
||||||
g.GET("/crawler/website", func(c echo.Context) error {
|
|
||||||
ctx := c.Request().Context()
|
|
||||||
url := c.QueryParam("url")
|
|
||||||
if url == "" {
|
|
||||||
return echo.NewHTTPError(http.StatusBadRequest, "Missing website url")
|
|
||||||
}
|
|
||||||
|
|
||||||
htmlMeta, err := crawler.GetWebsiteMeta(url)
|
|
||||||
if err != nil {
|
|
||||||
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", url)).SetInternal(err)
|
|
||||||
}
|
|
||||||
s.Collector.Collect(ctx, &metric.Metric{
|
|
||||||
Name: "crawler used",
|
|
||||||
Labels: map[string]string{
|
|
||||||
"type": "website",
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8)
|
|
||||||
if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil {
|
|
||||||
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
}
|
|
70
server/http_getter.go
Normal file
70
server/http_getter.go
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
|
||||||
|
"github.com/labstack/echo/v4"
|
||||||
|
getter "github.com/usememos/memos/plugin/http_getter"
|
||||||
|
metric "github.com/usememos/memos/plugin/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
|
||||||
|
g.GET("/get/httpmeta", func(c echo.Context) error {
|
||||||
|
ctx := c.Request().Context()
|
||||||
|
urlStr := c.QueryParam("url")
|
||||||
|
if urlStr == "" {
|
||||||
|
return echo.NewHTTPError(http.StatusBadRequest, "Missing website url")
|
||||||
|
}
|
||||||
|
if _, err := url.Parse(urlStr); err != nil {
|
||||||
|
return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
htmlMeta, err := getter.GetHTMLMeta(urlStr)
|
||||||
|
if err != nil {
|
||||||
|
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", urlStr)).SetInternal(err)
|
||||||
|
}
|
||||||
|
s.Collector.Collect(ctx, &metric.Metric{
|
||||||
|
Name: "getter used",
|
||||||
|
Labels: map[string]string{
|
||||||
|
"type": "httpmeta",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8)
|
||||||
|
if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil {
|
||||||
|
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
g.GET("/get/image", func(c echo.Context) error {
|
||||||
|
ctx := c.Request().Context()
|
||||||
|
urlStr := c.QueryParam("url")
|
||||||
|
if urlStr == "" {
|
||||||
|
return echo.NewHTTPError(http.StatusBadRequest, "Missing image url")
|
||||||
|
}
|
||||||
|
if _, err := url.Parse(urlStr); err != nil {
|
||||||
|
return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
image, err := getter.GetImage(urlStr)
|
||||||
|
if err != nil {
|
||||||
|
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get image url: %s", urlStr)).SetInternal(err)
|
||||||
|
}
|
||||||
|
s.Collector.Collect(ctx, &metric.Metric{
|
||||||
|
Name: "getter used",
|
||||||
|
Labels: map[string]string{
|
||||||
|
"type": "image",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
c.Response().Writer.WriteHeader(http.StatusOK)
|
||||||
|
c.Response().Writer.Header().Set("Content-Type", image.Mediatype)
|
||||||
|
if _, err := c.Response().Writer.Write(image.Blob); err != nil {
|
||||||
|
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write image blob").SetInternal(err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
|
@ -161,7 +161,6 @@ func (s *Server) registerResourceRoutes(g *echo.Group) {
|
||||||
if _, err := c.Response().Writer.Write(resource.Blob); err != nil {
|
if _, err := c.Response().Writer.Write(resource.Blob); err != nil {
|
||||||
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write resource blob").SetInternal(err)
|
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write resource blob").SetInternal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,8 @@ const renderer = (rawStr: string): string => {
|
||||||
return rawStr;
|
return rawStr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return `<img class='img' src='${escape(matchResult[1])}' />`;
|
// NOTE: Get image blob from backend to avoid CORS.
|
||||||
|
return `<img class='img' src='/o/get/image?url=${escape(matchResult[1])}' />`;
|
||||||
};
|
};
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
|
|
Loading…
Reference in a new issue