mirror of
https://github.com/go-shiori/shiori.git
synced 2025-03-10 14:59:39 +08:00
Remove repeated code for archiving bookmarks
This commit is contained in:
parent
2da0c7e297
commit
64c62d6b12
11 changed files with 425 additions and 766 deletions
|
@ -1,18 +1,10 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
nurl "net/url"
|
||||
fp "path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-shiori/shiori/pkg/warc"
|
||||
|
||||
"github.com/go-shiori/go-readability"
|
||||
"github.com/go-shiori/shiori/internal/core"
|
||||
"github.com/go-shiori/shiori/internal/model"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
@ -45,28 +37,16 @@ func addHandler(cmd *cobra.Command, args []string) {
|
|||
noArchival, _ := cmd.Flags().GetBool("no-archival")
|
||||
logArchival, _ := cmd.Flags().GetBool("log-archival")
|
||||
|
||||
// Clean up URL by removing its fragment and UTM parameters
|
||||
tmp, err := nurl.Parse(url)
|
||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
||||
cError.Println("URL is not valid")
|
||||
return
|
||||
}
|
||||
|
||||
tmp.Fragment = ""
|
||||
clearUTMParams(tmp)
|
||||
|
||||
// Create bookmark item
|
||||
book := model.Bookmark{
|
||||
URL: tmp.String(),
|
||||
Title: normalizeSpace(title),
|
||||
Excerpt: normalizeSpace(excerpt),
|
||||
URL: url,
|
||||
Title: normalizeSpace(title),
|
||||
Excerpt: normalizeSpace(excerpt),
|
||||
CreateArchive: !noArchival,
|
||||
}
|
||||
|
||||
// Create bookmark ID
|
||||
book.ID, err = db.CreateNewID("bookmark")
|
||||
if err != nil {
|
||||
cError.Printf("Failed to create ID: %v\n", err)
|
||||
return
|
||||
if book.Title == "" {
|
||||
book.Title = book.URL
|
||||
}
|
||||
|
||||
// Set bookmark tags
|
||||
|
@ -75,101 +55,51 @@ func addHandler(cmd *cobra.Command, args []string) {
|
|||
book.Tags[i].Name = strings.TrimSpace(tag)
|
||||
}
|
||||
|
||||
// If it's not offline mode, fetch data from internet
|
||||
var imageURLs []string
|
||||
|
||||
if !offline {
|
||||
func() {
|
||||
cInfo.Println("Downloading article...")
|
||||
|
||||
// Prepare download request
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
cError.Printf("Failed to download article: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Send download request
|
||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
cError.Printf("Failed to download article: %v\n", err)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Split response body so it can be processed twice
|
||||
archivalInput := bytes.NewBuffer(nil)
|
||||
readabilityInput := bytes.NewBuffer(nil)
|
||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
||||
|
||||
_, err = io.Copy(multiWriter, resp.Body)
|
||||
if err != nil {
|
||||
cError.Printf("Failed to process article: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// If this is HTML, parse for readable content
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
||||
|
||||
article, err := readability.FromReader(readabilityInput, url)
|
||||
if err != nil {
|
||||
cError.Printf("Failed to parse article: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
book.Author = article.Byline
|
||||
book.Content = article.TextContent
|
||||
book.HTML = article.Content
|
||||
|
||||
// If title and excerpt doesnt have submitted value, use from article
|
||||
if book.Title == "" {
|
||||
book.Title = article.Title
|
||||
}
|
||||
|
||||
if book.Excerpt == "" {
|
||||
book.Excerpt = article.Excerpt
|
||||
}
|
||||
|
||||
if !isReadable {
|
||||
book.Content = ""
|
||||
}
|
||||
|
||||
// Get image URL
|
||||
if article.Image != "" {
|
||||
imageURLs = append(imageURLs, article.Image)
|
||||
}
|
||||
|
||||
if article.Favicon != "" {
|
||||
imageURLs = append(imageURLs, article.Favicon)
|
||||
}
|
||||
}
|
||||
|
||||
// If needed, create offline archive as well
|
||||
if !noArchival {
|
||||
archivePath := fp.Join(dataDir, "archive", fmt.Sprintf("%d", book.ID))
|
||||
archivalRequest := warc.ArchivalRequest{
|
||||
URL: url,
|
||||
Reader: archivalInput,
|
||||
ContentType: contentType,
|
||||
LogEnabled: logArchival,
|
||||
}
|
||||
|
||||
err = warc.NewArchive(archivalRequest, archivePath)
|
||||
if err != nil {
|
||||
cError.Printf("Failed to create archive: %v\n", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
// Create bookmark ID
|
||||
var err error
|
||||
book.ID, err = db.CreateNewID("bookmark")
|
||||
if err != nil {
|
||||
cError.Printf("Failed to create ID: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Make sure title is not empty
|
||||
if book.Title == "" {
|
||||
book.Title = book.URL
|
||||
// Clean up bookmark URL
|
||||
book.URL, err = core.RemoveUTMParams(book.URL)
|
||||
if err != nil {
|
||||
cError.Printf("Failed to clean URL: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// If it's not offline mode, fetch data from internet.
|
||||
if !offline {
|
||||
cInfo.Println("Downloading article...")
|
||||
|
||||
var isFatalErr bool
|
||||
content, contentType, err := core.DownloadBookmark(book.URL)
|
||||
if err != nil {
|
||||
cError.Printf("Failed to download: %v\n", err)
|
||||
}
|
||||
|
||||
if err == nil && content != nil {
|
||||
request := core.ProcessRequest{
|
||||
DataDir: dataDir,
|
||||
Bookmark: book,
|
||||
Content: content,
|
||||
ContentType: contentType,
|
||||
LogArchival: logArchival,
|
||||
}
|
||||
|
||||
book, isFatalErr, err = core.ProcessBookmark(request)
|
||||
content.Close()
|
||||
|
||||
if err != nil {
|
||||
cError.Printf("Failed: %v\n", err)
|
||||
}
|
||||
|
||||
if isFatalErr {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save bookmark to database
|
||||
|
@ -179,18 +109,6 @@ func addHandler(cmd *cobra.Command, args []string) {
|
|||
return
|
||||
}
|
||||
|
||||
// Save article image to local disk
|
||||
imgPath := fp.Join(dataDir, "thumb", fmt.Sprintf("%d", book.ID))
|
||||
for _, imageURL := range imageURLs {
|
||||
err = downloadBookImage(imageURL, imgPath, time.Minute)
|
||||
if err == nil {
|
||||
break
|
||||
} else {
|
||||
cError.Printf("Failed to download image: %v\n", err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Print added bookmark
|
||||
fmt.Println()
|
||||
printBookmarks(book)
|
||||
|
|
|
@ -2,11 +2,11 @@ package cmd
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
nurl "net/url"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/go-shiori/shiori/internal/core"
|
||||
"github.com/go-shiori/shiori/internal/model"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
@ -73,17 +73,14 @@ func importHandler(cmd *cobra.Command, args []string) {
|
|||
url, _ := a.Attr("href")
|
||||
strTags, _ := a.Attr("tags")
|
||||
|
||||
// Clean up URL by removing its fragment and UTM parameters
|
||||
tmp, err := nurl.Parse(url)
|
||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
||||
// Clean up URL
|
||||
var err error
|
||||
url, err = core.RemoveUTMParams(url)
|
||||
if err != nil {
|
||||
cError.Printf("Skip %s: URL is not valid\n", url)
|
||||
return
|
||||
}
|
||||
|
||||
tmp.Fragment = ""
|
||||
clearUTMParams(tmp)
|
||||
url = tmp.String()
|
||||
|
||||
// Make sure title is valid Utf-8
|
||||
title = toValidUtf8(title, url)
|
||||
|
||||
|
|
|
@ -2,13 +2,13 @@ package cmd
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
nurl "net/url"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/go-shiori/shiori/internal/core"
|
||||
"github.com/go-shiori/shiori/internal/model"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
@ -59,17 +59,14 @@ func pocketHandler(cmd *cobra.Command, args []string) {
|
|||
intModified, _ := strconv.ParseInt(strModified, 10, 64)
|
||||
modified := time.Unix(intModified, 0)
|
||||
|
||||
// Clean up URL by removing its fragment and UTM parameters
|
||||
tmp, err := nurl.Parse(url)
|
||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
||||
// Clean up URL
|
||||
var err error
|
||||
url, err = core.RemoveUTMParams(url)
|
||||
if err != nil {
|
||||
cError.Printf("Skip %s: URL is not valid\n", url)
|
||||
return
|
||||
}
|
||||
|
||||
tmp.Fragment = ""
|
||||
clearUTMParams(tmp)
|
||||
url = tmp.String()
|
||||
|
||||
// Make sure title is valid Utf-8
|
||||
title = toValidUtf8(title, url)
|
||||
|
||||
|
|
|
@ -1,22 +1,14 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
nurl "net/url"
|
||||
"os"
|
||||
fp "path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-shiori/go-readability"
|
||||
"github.com/go-shiori/shiori/internal/core"
|
||||
"github.com/go-shiori/shiori/internal/database"
|
||||
"github.com/go-shiori/shiori/internal/model"
|
||||
"github.com/go-shiori/shiori/pkg/warc"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
|
@ -83,17 +75,12 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
|||
excerpt = normalizeSpace(excerpt)
|
||||
|
||||
if cmd.Flags().Changed("url") {
|
||||
// Clean up URL by removing its fragment and UTM parameters
|
||||
tmp, err := nurl.Parse(url)
|
||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
||||
cError.Println("URL is not valid")
|
||||
return
|
||||
// Clean up bookmark URL
|
||||
url, err = core.RemoveUTMParams(url)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to clean URL: %v", err))
|
||||
}
|
||||
|
||||
tmp.Fragment = ""
|
||||
clearUTMParams(tmp)
|
||||
url = tmp.String()
|
||||
|
||||
// Since user uses custom URL, make sure there is only one ID to update
|
||||
if len(ids) != 1 {
|
||||
cError.Println("Update only accepts one index while using --url flag")
|
||||
|
@ -149,6 +136,9 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
|||
for i, book := range bookmarks {
|
||||
wg.Add(1)
|
||||
|
||||
// Mark whether book will be archived
|
||||
book.CreateArchive = !noArchival
|
||||
|
||||
// If used, use submitted URL
|
||||
if url != "" {
|
||||
book.URL = url
|
||||
|
@ -164,102 +154,32 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
|||
<-semaphore
|
||||
}()
|
||||
|
||||
// Prepare download request
|
||||
req, err := http.NewRequest("GET", book.URL, nil)
|
||||
// Download data from internet
|
||||
content, contentType, err := core.DownloadBookmark(book.URL)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
chMessage <- fmt.Errorf("Failed to download %s: %v", book.URL, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Send download request
|
||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
chMessage <- fmt.Errorf("Failed to download %s: %v", book.URL, err)
|
||||
return
|
||||
request := core.ProcessRequest{
|
||||
DataDir: dataDir,
|
||||
Bookmark: book,
|
||||
Content: content,
|
||||
ContentType: contentType,
|
||||
KeepMetadata: keepMetadata,
|
||||
LogArchival: logArchival,
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Split response body so it can be processed twice
|
||||
archivalInput := bytes.NewBuffer(nil)
|
||||
readabilityInput := bytes.NewBuffer(nil)
|
||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
||||
book, _, err = core.ProcessBookmark(request)
|
||||
content.Close()
|
||||
|
||||
_, err = io.Copy(multiWriter, resp.Body)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
chMessage <- fmt.Errorf("Failed to process %s: %v", book.URL, err)
|
||||
return
|
||||
}
|
||||
|
||||
// If this is HTML, parse for readable content
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
||||
|
||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
chMessage <- fmt.Errorf("Failed to parse %s: %v", book.URL, err)
|
||||
return
|
||||
}
|
||||
|
||||
book.Author = article.Byline
|
||||
book.Content = article.TextContent
|
||||
book.HTML = article.Content
|
||||
|
||||
if !isReadable {
|
||||
book.Content = ""
|
||||
}
|
||||
|
||||
if !keepMetadata {
|
||||
book.Title = article.Title
|
||||
book.Excerpt = article.Excerpt
|
||||
}
|
||||
|
||||
// Get image for thumbnail and save it to local disk
|
||||
var imageURLs []string
|
||||
if article.Image != "" {
|
||||
imageURLs = append(imageURLs, article.Image)
|
||||
}
|
||||
|
||||
if article.Favicon != "" {
|
||||
imageURLs = append(imageURLs, article.Favicon)
|
||||
}
|
||||
|
||||
imgPath := fp.Join(dataDir, "thumb", fmt.Sprintf("%d", book.ID))
|
||||
for _, imageURL := range imageURLs {
|
||||
err = downloadBookImage(imageURL, imgPath, time.Minute)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If needed, update offline archive as well.
|
||||
// Make sure to delete the old one first.
|
||||
if !noArchival {
|
||||
archivePath := fp.Join(dataDir, "archive", fmt.Sprintf("%d", book.ID))
|
||||
os.Remove(archivePath)
|
||||
|
||||
archivalRequest := warc.ArchivalRequest{
|
||||
URL: book.URL,
|
||||
Reader: archivalInput,
|
||||
ContentType: contentType,
|
||||
LogEnabled: logArchival,
|
||||
}
|
||||
|
||||
err = warc.NewArchive(archivalRequest, archivePath)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
chMessage <- fmt.Errorf("Failed to create archive %s: %v", book.URL, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Send success message
|
||||
chMessage <- fmt.Sprintf("Downloaded %s", book.URL)
|
||||
|
||||
|
|
|
@ -3,29 +3,17 @@ package cmd
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"image"
|
||||
clr "image/color"
|
||||
"image/draw"
|
||||
"image/jpeg"
|
||||
"math"
|
||||
"net/http"
|
||||
nurl "net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
fp "path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/disintegration/imaging"
|
||||
"github.com/fatih/color"
|
||||
"github.com/go-shiori/shiori/internal/model"
|
||||
"golang.org/x/crypto/ssh/terminal"
|
||||
|
||||
// Add supports for PNG image
|
||||
_ "image/png"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -54,95 +42,6 @@ func isURLValid(s string) bool {
|
|||
return err == nil && tmp.Scheme != "" && tmp.Hostname() != ""
|
||||
}
|
||||
|
||||
func clearUTMParams(url *nurl.URL) {
|
||||
queries := url.Query()
|
||||
|
||||
for key := range queries {
|
||||
if strings.HasPrefix(key, "utm_") {
|
||||
queries.Del(key)
|
||||
}
|
||||
}
|
||||
|
||||
url.RawQuery = queries.Encode()
|
||||
}
|
||||
|
||||
func downloadBookImage(url, dstPath string, timeout time.Duration) error {
|
||||
// Fetch data from URL
|
||||
client := &http.Client{Timeout: timeout}
|
||||
resp, err := client.Get(url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Make sure it's JPG or PNG image
|
||||
cp := resp.Header.Get("Content-Type")
|
||||
if !strings.Contains(cp, "image/jpeg") && !strings.Contains(cp, "image/png") {
|
||||
return fmt.Errorf("%s is not a supported image", url)
|
||||
}
|
||||
|
||||
// At this point, the download has finished successfully.
|
||||
// Prepare destination file.
|
||||
err = os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create image dir: %v", err)
|
||||
}
|
||||
|
||||
dstFile, err := os.Create(dstPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create image file: %v", err)
|
||||
}
|
||||
defer dstFile.Close()
|
||||
|
||||
// Parse image and process it.
|
||||
// If image is smaller than 600x400 or its ratio is less than 4:3, resize.
|
||||
// Else, save it as it is.
|
||||
img, _, err := image.Decode(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse image %s: %v", url, err)
|
||||
}
|
||||
|
||||
imgRect := img.Bounds()
|
||||
imgWidth := imgRect.Dx()
|
||||
imgHeight := imgRect.Dy()
|
||||
imgRatio := float64(imgWidth) / float64(imgHeight)
|
||||
|
||||
if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 {
|
||||
err = jpeg.Encode(dstFile, img, nil)
|
||||
} else {
|
||||
// Create background
|
||||
bg := image.NewNRGBA(imgRect)
|
||||
draw.Draw(bg, imgRect, image.NewUniform(clr.White), image.Point{}, draw.Src)
|
||||
draw.Draw(bg, imgRect, img, image.Point{}, draw.Over)
|
||||
|
||||
bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos)
|
||||
bg = imaging.Blur(bg, 150)
|
||||
bg = imaging.AdjustBrightness(bg, 30)
|
||||
|
||||
// Create foreground
|
||||
fg := imaging.Fit(img, 600, 400, imaging.Lanczos)
|
||||
|
||||
// Merge foreground and background
|
||||
bgRect := bg.Bounds()
|
||||
fgRect := fg.Bounds()
|
||||
fgPosition := image.Point{
|
||||
X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)),
|
||||
Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)),
|
||||
}
|
||||
|
||||
draw.Draw(bg, bgRect, fg, fgPosition, draw.Over)
|
||||
|
||||
// Save to file
|
||||
err = jpeg.Encode(dstFile, bg, nil)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to save image %s: %v", url, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func printBookmarks(bookmarks ...model.Bookmark) {
|
||||
for _, bookmark := range bookmarks {
|
||||
// Create bookmark index
|
||||
|
|
31
internal/core/download.go
Normal file
31
internal/core/download.go
Normal file
|
@ -0,0 +1,31 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
var httpClient = &http.Client{Timeout: time.Minute}
|
||||
|
||||
// DownloadBookmark downloads bookmarked page from specified URL.
|
||||
// Return response body, make sure to close it later.
|
||||
func DownloadBookmark(url string) (io.ReadCloser, string, error) {
|
||||
// Prepare download request
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
// Send download request
|
||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
// Get content type
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
|
||||
return resp.Body, contentType, nil
|
||||
}
|
218
internal/core/processing.go
Normal file
218
internal/core/processing.go
Normal file
|
@ -0,0 +1,218 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/draw"
|
||||
"image/jpeg"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"path"
|
||||
fp "path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/disintegration/imaging"
|
||||
"github.com/go-shiori/go-readability"
|
||||
"github.com/go-shiori/shiori/internal/model"
|
||||
"github.com/go-shiori/shiori/pkg/warc"
|
||||
|
||||
// Add support for png
|
||||
_ "image/png"
|
||||
)
|
||||
|
||||
// ProcessRequest is the request for processing bookmark.
|
||||
type ProcessRequest struct {
|
||||
DataDir string
|
||||
Bookmark model.Bookmark
|
||||
Content io.Reader
|
||||
ContentType string
|
||||
KeepMetadata bool
|
||||
LogArchival bool
|
||||
}
|
||||
|
||||
// ProcessBookmark process the bookmark and archive it if needed.
|
||||
// Return three values, the bookmark itself, is error fatal, and error value.
|
||||
func ProcessBookmark(req ProcessRequest) (model.Bookmark, bool, error) {
|
||||
book := req.Bookmark
|
||||
contentType := req.ContentType
|
||||
|
||||
// Make sure bookmark ID is defined
|
||||
if book.ID == 0 {
|
||||
return book, true, fmt.Errorf("bookmark ID is not valid")
|
||||
}
|
||||
|
||||
// Split bookmark content so it can be processed several times
|
||||
archivalInput := bytes.NewBuffer(nil)
|
||||
readabilityInput := bytes.NewBuffer(nil)
|
||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
||||
|
||||
var multiWriter io.Writer
|
||||
if !strings.Contains(contentType, "text/html") {
|
||||
multiWriter = io.MultiWriter(archivalInput)
|
||||
} else {
|
||||
multiWriter = io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
||||
}
|
||||
|
||||
_, err := io.Copy(multiWriter, req.Content)
|
||||
if err != nil {
|
||||
return book, false, fmt.Errorf("failed to process article: %v", err)
|
||||
}
|
||||
|
||||
// If this is HTML, parse for readable content
|
||||
var imageURLs []string
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
||||
|
||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
||||
if err != nil {
|
||||
return book, false, fmt.Errorf("failed to parse article: %v", err)
|
||||
}
|
||||
|
||||
book.Author = article.Byline
|
||||
book.Content = article.TextContent
|
||||
book.HTML = article.Content
|
||||
|
||||
// If title and excerpt doesnt have submitted value, use from article
|
||||
if !req.KeepMetadata || book.Title == "" {
|
||||
book.Title = article.Title
|
||||
}
|
||||
|
||||
if !req.KeepMetadata || book.Excerpt == "" {
|
||||
book.Excerpt = article.Excerpt
|
||||
}
|
||||
|
||||
// Sometimes article doesn't have any title, so make sure it is not empty
|
||||
if book.Title == "" {
|
||||
book.Title = book.URL
|
||||
}
|
||||
|
||||
// Get image URL
|
||||
if article.Image != "" {
|
||||
imageURLs = append(imageURLs, article.Image)
|
||||
}
|
||||
|
||||
if article.Favicon != "" {
|
||||
imageURLs = append(imageURLs, article.Favicon)
|
||||
}
|
||||
|
||||
if !isReadable {
|
||||
book.Content = ""
|
||||
}
|
||||
|
||||
book.HasContent = book.Content != ""
|
||||
}
|
||||
|
||||
// Save article image to local disk
|
||||
strID := strconv.Itoa(book.ID)
|
||||
imgPath := fp.Join(req.DataDir, "thumb", strID)
|
||||
|
||||
for _, imageURL := range imageURLs {
|
||||
err = downloadBookImage(imageURL, imgPath)
|
||||
if err == nil {
|
||||
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If needed, create offline archive as well
|
||||
if book.CreateArchive {
|
||||
archivePath := fp.Join(req.DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
||||
os.Remove(archivePath)
|
||||
|
||||
archivalRequest := warc.ArchivalRequest{
|
||||
URL: book.URL,
|
||||
Reader: archivalInput,
|
||||
ContentType: contentType,
|
||||
LogEnabled: req.LogArchival,
|
||||
}
|
||||
|
||||
err = warc.NewArchive(archivalRequest, archivePath)
|
||||
if err != nil {
|
||||
return book, false, fmt.Errorf("failed to create archive: %v", err)
|
||||
}
|
||||
|
||||
book.HasArchive = true
|
||||
}
|
||||
|
||||
return book, false, nil
|
||||
}
|
||||
|
||||
func downloadBookImage(url, dstPath string) error {
|
||||
// Fetch data from URL
|
||||
resp, err := httpClient.Get(url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Make sure it's JPG or PNG image
|
||||
cp := resp.Header.Get("Content-Type")
|
||||
if !strings.Contains(cp, "image/jpeg") && !strings.Contains(cp, "image/png") {
|
||||
return fmt.Errorf("%s is not a supported image", url)
|
||||
}
|
||||
|
||||
// At this point, the download has finished successfully.
|
||||
// Prepare destination file.
|
||||
err = os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create image dir: %v", err)
|
||||
}
|
||||
|
||||
dstFile, err := os.Create(dstPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create image file: %v", err)
|
||||
}
|
||||
defer dstFile.Close()
|
||||
|
||||
// Parse image and process it.
|
||||
// If image is smaller than 600x400 or its ratio is less than 4:3, resize.
|
||||
// Else, save it as it is.
|
||||
img, _, err := image.Decode(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse image %s: %v", url, err)
|
||||
}
|
||||
|
||||
imgRect := img.Bounds()
|
||||
imgWidth := imgRect.Dx()
|
||||
imgHeight := imgRect.Dy()
|
||||
imgRatio := float64(imgWidth) / float64(imgHeight)
|
||||
|
||||
if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 {
|
||||
err = jpeg.Encode(dstFile, img, nil)
|
||||
} else {
|
||||
// Create background
|
||||
bg := image.NewNRGBA(imgRect)
|
||||
draw.Draw(bg, imgRect, image.NewUniform(color.White), image.Point{}, draw.Src)
|
||||
draw.Draw(bg, imgRect, img, image.Point{}, draw.Over)
|
||||
|
||||
bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos)
|
||||
bg = imaging.Blur(bg, 150)
|
||||
bg = imaging.AdjustBrightness(bg, 30)
|
||||
|
||||
// Create foreground
|
||||
fg := imaging.Fit(img, 600, 400, imaging.Lanczos)
|
||||
|
||||
// Merge foreground and background
|
||||
bgRect := bg.Bounds()
|
||||
fgRect := fg.Bounds()
|
||||
fgPosition := image.Point{
|
||||
X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)),
|
||||
Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)),
|
||||
}
|
||||
|
||||
draw.Draw(bg, bgRect, fg, fgPosition, draw.Over)
|
||||
|
||||
// Save to file
|
||||
err = jpeg.Encode(dstFile, bg, nil)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to save image %s: %v", url, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
28
internal/core/url.go
Normal file
28
internal/core/url.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
nurl "net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RemoveUTMParams removes the UTM parameters from URL.
|
||||
func RemoveUTMParams(url string) (string, error) {
|
||||
// Parse string URL
|
||||
tmp, err := nurl.Parse(url)
|
||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
||||
return url, fmt.Errorf("URL is not valid")
|
||||
}
|
||||
|
||||
// Remove UTM queries
|
||||
queries := tmp.Query()
|
||||
for key := range queries {
|
||||
if strings.HasPrefix(key, "utm_") {
|
||||
queries.Del(key)
|
||||
}
|
||||
}
|
||||
|
||||
tmp.Fragment = ""
|
||||
tmp.RawQuery = queries.Encode()
|
||||
return tmp.String(), nil
|
||||
}
|
|
@ -6,17 +6,12 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
nurl "net/url"
|
||||
"os"
|
||||
"path"
|
||||
fp "path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-shiori/go-readability"
|
||||
"github.com/go-shiori/shiori/internal/core"
|
||||
"github.com/go-shiori/shiori/internal/model"
|
||||
"github.com/go-shiori/shiori/pkg/warc"
|
||||
"github.com/julienschmidt/httprouter"
|
||||
)
|
||||
|
||||
|
@ -31,18 +26,15 @@ func (h *handler) apiInsertViaExtension(w http.ResponseWriter, r *http.Request,
|
|||
err = json.NewDecoder(r.Body).Decode(&request)
|
||||
checkError(err)
|
||||
|
||||
// Clean up URL by removing its fragment and UTM parameters
|
||||
tmp, err := nurl.Parse(request.URL)
|
||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
||||
panic(fmt.Errorf("URL is not valid"))
|
||||
// Clean up bookmark URL
|
||||
request.URL, err = core.RemoveUTMParams(request.URL)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to clean URL: %v", err))
|
||||
}
|
||||
|
||||
tmp.Fragment = ""
|
||||
clearUTMParams(tmp)
|
||||
request.URL = tmp.String()
|
||||
|
||||
// Check if bookmark already exists.
|
||||
book, exist := h.DB.GetBookmark(0, request.URL)
|
||||
book.CreateArchive = true
|
||||
|
||||
// If it already exists, we need to set ID and tags.
|
||||
if exist {
|
||||
|
@ -69,119 +61,37 @@ func (h *handler) apiInsertViaExtension(w http.ResponseWriter, r *http.Request,
|
|||
// Since we are using extension, the extension might send the HTML content
|
||||
// so no need to download it again here. However, if it's empty, it might be not HTML file
|
||||
// so we download it here.
|
||||
contentType := "text/html; charset=UTF-8"
|
||||
contentBuffer := bytes.NewBufferString(book.HTML)
|
||||
var contentType string
|
||||
var contentBuffer io.Reader
|
||||
|
||||
if book.HTML == "" {
|
||||
func() {
|
||||
// Prepare download request
|
||||
req, err := http.NewRequest("GET", book.URL, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Send download request
|
||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Save response for later use
|
||||
contentType = resp.Header.Get("Content-Type")
|
||||
|
||||
contentBuffer.Reset()
|
||||
_, err = io.Copy(contentBuffer, resp.Body)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}()
|
||||
contentBuffer, contentType, _ = core.DownloadBookmark(book.URL)
|
||||
} else {
|
||||
contentType = "text/html; charset=UTF-8"
|
||||
contentBuffer = bytes.NewBufferString(book.HTML)
|
||||
}
|
||||
|
||||
// At this point the web page already downloaded.
|
||||
// Time to process it.
|
||||
func() {
|
||||
// Split response so it can be processed several times
|
||||
archivalInput := bytes.NewBuffer(nil)
|
||||
readabilityInput := bytes.NewBuffer(nil)
|
||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
||||
|
||||
_, err = io.Copy(multiWriter, contentBuffer)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// If it's HTML, parse the readable content.
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
||||
|
||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
book.Author = article.Byline
|
||||
book.Content = article.TextContent
|
||||
book.HTML = article.Content
|
||||
|
||||
if book.Title == "" {
|
||||
if article.Title == "" {
|
||||
book.Title = book.URL
|
||||
} else {
|
||||
book.Title = article.Title
|
||||
}
|
||||
}
|
||||
|
||||
if book.Excerpt == "" {
|
||||
book.Excerpt = article.Excerpt
|
||||
}
|
||||
|
||||
if !isReadable {
|
||||
book.Content = ""
|
||||
}
|
||||
|
||||
book.HasContent = book.Content != ""
|
||||
|
||||
// Get image for thumbnail and save it to local disk
|
||||
var imageURLs []string
|
||||
if article.Image != "" {
|
||||
imageURLs = append(imageURLs, article.Image)
|
||||
}
|
||||
|
||||
if article.Favicon != "" {
|
||||
imageURLs = append(imageURLs, article.Favicon)
|
||||
}
|
||||
|
||||
// Save article image to local disk
|
||||
strID := strconv.Itoa(book.ID)
|
||||
imgPath := fp.Join(h.DataDir, "thumb", strID)
|
||||
for _, imageURL := range imageURLs {
|
||||
err = downloadBookImage(imageURL, imgPath, time.Minute)
|
||||
if err == nil {
|
||||
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create offline archive as well
|
||||
archivePath := fp.Join(h.DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
||||
os.Remove(archivePath)
|
||||
|
||||
archivalRequest := warc.ArchivalRequest{
|
||||
URL: book.URL,
|
||||
Reader: archivalInput,
|
||||
if contentBuffer != nil {
|
||||
request := core.ProcessRequest{
|
||||
DataDir: h.DataDir,
|
||||
Bookmark: book,
|
||||
Content: contentBuffer,
|
||||
ContentType: contentType,
|
||||
}
|
||||
|
||||
err = warc.NewArchive(archivalRequest, archivePath)
|
||||
if err != nil {
|
||||
return
|
||||
var isFatalErr bool
|
||||
book, isFatalErr, err = core.ProcessBookmark(request)
|
||||
|
||||
if tmp, ok := contentBuffer.(io.ReadCloser); ok {
|
||||
tmp.Close()
|
||||
}
|
||||
|
||||
book.HasArchive = true
|
||||
}()
|
||||
if err != nil && isFatalErr {
|
||||
panic(fmt.Errorf("failed to process bookmark: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
// Save bookmark to database
|
||||
results, err := h.DB.SaveBookmarks(book)
|
||||
|
|
|
@ -1,13 +1,10 @@
|
|||
package webserver
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
nurl "net/url"
|
||||
"os"
|
||||
"path"
|
||||
fp "path/filepath"
|
||||
|
@ -16,10 +13,9 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-shiori/go-readability"
|
||||
"github.com/go-shiori/shiori/internal/core"
|
||||
"github.com/go-shiori/shiori/internal/database"
|
||||
"github.com/go-shiori/shiori/internal/model"
|
||||
"github.com/go-shiori/shiori/pkg/warc"
|
||||
"github.com/gofrs/uuid"
|
||||
"github.com/julienschmidt/httprouter"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
|
@ -251,112 +247,35 @@ func (h *handler) apiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h
|
|||
err = json.NewDecoder(r.Body).Decode(&book)
|
||||
checkError(err)
|
||||
|
||||
// Clean up URL by removing its fragment and UTM parameters
|
||||
tmp, err := nurl.Parse(book.URL)
|
||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
||||
panic(fmt.Errorf("URL is not valid"))
|
||||
}
|
||||
|
||||
tmp.Fragment = ""
|
||||
clearUTMParams(tmp)
|
||||
book.URL = tmp.String()
|
||||
|
||||
// Create bookmark ID
|
||||
book.ID, err = h.DB.CreateNewID("bookmark")
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to create ID: %v", err))
|
||||
}
|
||||
|
||||
// Clean up bookmark URL
|
||||
book.URL, err = core.RemoveUTMParams(book.URL)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to clean URL: %v", err))
|
||||
}
|
||||
|
||||
// Fetch data from internet
|
||||
var imageURLs []string
|
||||
func() {
|
||||
// Prepare download request
|
||||
req, err := http.NewRequest("GET", book.URL, nil)
|
||||
if err != nil {
|
||||
return
|
||||
var isFatalErr bool
|
||||
content, contentType, err := core.DownloadBookmark(book.URL)
|
||||
if err == nil && content != nil {
|
||||
request := core.ProcessRequest{
|
||||
DataDir: h.DataDir,
|
||||
Bookmark: book,
|
||||
Content: content,
|
||||
ContentType: contentType,
|
||||
}
|
||||
|
||||
// Send download request
|
||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return
|
||||
book, isFatalErr, err = core.ProcessBookmark(request)
|
||||
content.Close()
|
||||
|
||||
if err != nil && isFatalErr {
|
||||
panic(fmt.Errorf("failed to process bookmark: %v", err))
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Split response body so it can be processed twice
|
||||
archivalInput := bytes.NewBuffer(nil)
|
||||
readabilityInput := bytes.NewBuffer(nil)
|
||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
||||
|
||||
_, err = io.Copy(multiWriter, resp.Body)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// If this is HTML, parse for readable content
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
||||
|
||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
book.Author = article.Byline
|
||||
book.Content = article.TextContent
|
||||
book.HTML = article.Content
|
||||
|
||||
// If title and excerpt doesnt have submitted value, use from article
|
||||
if book.Title == "" {
|
||||
book.Title = article.Title
|
||||
}
|
||||
|
||||
if book.Excerpt == "" {
|
||||
book.Excerpt = article.Excerpt
|
||||
}
|
||||
|
||||
// Get image URL
|
||||
if article.Image != "" {
|
||||
imageURLs = append(imageURLs, article.Image)
|
||||
}
|
||||
|
||||
if article.Favicon != "" {
|
||||
imageURLs = append(imageURLs, article.Favicon)
|
||||
}
|
||||
|
||||
if !isReadable {
|
||||
book.Content = ""
|
||||
}
|
||||
|
||||
book.HasContent = book.Content != ""
|
||||
}
|
||||
|
||||
// If needed, create offline archive as well
|
||||
if book.CreateArchive {
|
||||
archivePath := fp.Join(h.DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
||||
os.Remove(archivePath)
|
||||
|
||||
archivalRequest := warc.ArchivalRequest{
|
||||
URL: book.URL,
|
||||
Reader: archivalInput,
|
||||
ContentType: contentType,
|
||||
}
|
||||
|
||||
err = warc.NewArchive(archivalRequest, archivePath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
book.HasArchive = true
|
||||
}
|
||||
}()
|
||||
|
||||
// Make sure title is not empty
|
||||
if book.Title == "" {
|
||||
book.Title = book.URL
|
||||
}
|
||||
|
||||
// Save bookmark to database
|
||||
|
@ -366,17 +285,6 @@ func (h *handler) apiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h
|
|||
}
|
||||
book = results[0]
|
||||
|
||||
// Save article image to local disk
|
||||
strID := strconv.Itoa(book.ID)
|
||||
imgPath := fp.Join(h.DataDir, "thumb", strID)
|
||||
for _, imageURL := range imageURLs {
|
||||
err = downloadBookImage(imageURL, imgPath, time.Minute)
|
||||
if err == nil {
|
||||
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Return the new bookmark
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
err = json.NewEncoder(w).Encode(&book)
|
||||
|
@ -446,6 +354,12 @@ func (h *handler) apiUpdateBookmark(w http.ResponseWriter, r *http.Request, ps h
|
|||
book.Excerpt = request.Excerpt
|
||||
book.Public = request.Public
|
||||
|
||||
// Clean up bookmark URL
|
||||
book.URL, err = core.RemoveUTMParams(book.URL)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to clean URL: %v", err))
|
||||
}
|
||||
|
||||
// Set new tags
|
||||
for i := range book.Tags {
|
||||
book.Tags[i].Deleted = true
|
||||
|
@ -525,6 +439,9 @@ func (h *handler) apiUpdateCache(w http.ResponseWriter, r *http.Request, ps http
|
|||
for i, book := range bookmarks {
|
||||
wg.Add(1)
|
||||
|
||||
// Mark whether book will be archived
|
||||
book.CreateArchive = request.CreateArchive
|
||||
|
||||
go func(i int, book model.Bookmark, keepMetadata bool) {
|
||||
// Make sure to finish the WG
|
||||
defer wg.Done()
|
||||
|
@ -535,107 +452,28 @@ func (h *handler) apiUpdateCache(w http.ResponseWriter, r *http.Request, ps http
|
|||
<-semaphore
|
||||
}()
|
||||
|
||||
// Prepare download request
|
||||
req, err := http.NewRequest("GET", book.URL, nil)
|
||||
// Download data from internet
|
||||
content, contentType, err := core.DownloadBookmark(book.URL)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
return
|
||||
}
|
||||
|
||||
// Send download request
|
||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
||||
resp, err := httpClient.Do(req)
|
||||
request := core.ProcessRequest{
|
||||
DataDir: h.DataDir,
|
||||
Bookmark: book,
|
||||
Content: content,
|
||||
ContentType: contentType,
|
||||
KeepMetadata: keepMetadata,
|
||||
}
|
||||
|
||||
book, _, err = core.ProcessBookmark(request)
|
||||
content.Close()
|
||||
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Split response body so it can be processed twice
|
||||
archivalInput := bytes.NewBuffer(nil)
|
||||
readabilityInput := bytes.NewBuffer(nil)
|
||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
||||
|
||||
_, err = io.Copy(multiWriter, resp.Body)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
return
|
||||
}
|
||||
|
||||
// If this is HTML, parse for readable content
|
||||
strID := strconv.Itoa(book.ID)
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
||||
|
||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
return
|
||||
}
|
||||
|
||||
book.Author = article.Byline
|
||||
book.Content = article.TextContent
|
||||
book.HTML = article.Content
|
||||
|
||||
if !isReadable {
|
||||
book.Content = ""
|
||||
}
|
||||
|
||||
if !keepMetadata {
|
||||
book.Title = article.Title
|
||||
book.Excerpt = article.Excerpt
|
||||
}
|
||||
|
||||
if book.Title == "" {
|
||||
book.Title = book.URL
|
||||
}
|
||||
|
||||
book.HasContent = book.Content != ""
|
||||
|
||||
// Get image for thumbnail and save it to local disk
|
||||
var imageURLs []string
|
||||
if article.Image != "" {
|
||||
imageURLs = append(imageURLs, article.Image)
|
||||
}
|
||||
|
||||
if article.Favicon != "" {
|
||||
imageURLs = append(imageURLs, article.Favicon)
|
||||
}
|
||||
|
||||
// Save article image to local disk
|
||||
imgPath := fp.Join(h.DataDir, "thumb", strID)
|
||||
for _, imageURL := range imageURLs {
|
||||
err = downloadBookImage(imageURL, imgPath, time.Minute)
|
||||
if err == nil {
|
||||
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If needed, update offline archive as well.
|
||||
// Make sure to delete the old one first.
|
||||
if request.CreateArchive {
|
||||
archivePath := fp.Join(h.DataDir, "archive", strID)
|
||||
os.Remove(archivePath)
|
||||
|
||||
archivalRequest := warc.ArchivalRequest{
|
||||
URL: book.URL,
|
||||
Reader: archivalInput,
|
||||
ContentType: contentType,
|
||||
}
|
||||
|
||||
err = warc.NewArchive(archivalRequest, archivePath)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
return
|
||||
}
|
||||
|
||||
book.HasArchive = true
|
||||
}
|
||||
|
||||
// Update list of bookmarks
|
||||
mx.Lock()
|
||||
|
|
|
@ -3,13 +3,8 @@ package webserver
|
|||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/draw"
|
||||
"image/jpeg"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"mime"
|
||||
"net"
|
||||
"net/http"
|
||||
|
@ -19,9 +14,6 @@ import (
|
|||
"regexp"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/disintegration/imaging"
|
||||
)
|
||||
|
||||
var rxRepeatedStrip = regexp.MustCompile(`(?i)-+`)
|
||||
|
@ -89,95 +81,6 @@ func fileExists(filePath string) bool {
|
|||
return !os.IsNotExist(err) && !info.IsDir()
|
||||
}
|
||||
|
||||
func clearUTMParams(url *nurl.URL) {
|
||||
queries := url.Query()
|
||||
|
||||
for key := range queries {
|
||||
if strings.HasPrefix(key, "utm_") {
|
||||
queries.Del(key)
|
||||
}
|
||||
}
|
||||
|
||||
url.RawQuery = queries.Encode()
|
||||
}
|
||||
|
||||
func downloadBookImage(url, dstPath string, timeout time.Duration) error {
|
||||
// Fetch data from URL
|
||||
client := &http.Client{Timeout: timeout}
|
||||
resp, err := client.Get(url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Make sure it's JPG or PNG image
|
||||
cp := resp.Header.Get("Content-Type")
|
||||
if !strings.Contains(cp, "image/jpeg") && !strings.Contains(cp, "image/png") {
|
||||
return fmt.Errorf("%s is not a supported image", url)
|
||||
}
|
||||
|
||||
// At this point, the download has finished successfully.
|
||||
// Prepare destination file.
|
||||
err = os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create image dir: %v", err)
|
||||
}
|
||||
|
||||
dstFile, err := os.Create(dstPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create image file: %v", err)
|
||||
}
|
||||
defer dstFile.Close()
|
||||
|
||||
// Parse image and process it.
|
||||
// If image is smaller than 600x400 or its ratio is less than 4:3, resize.
|
||||
// Else, save it as it is.
|
||||
img, _, err := image.Decode(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse image %s: %v", url, err)
|
||||
}
|
||||
|
||||
imgRect := img.Bounds()
|
||||
imgWidth := imgRect.Dx()
|
||||
imgHeight := imgRect.Dy()
|
||||
imgRatio := float64(imgWidth) / float64(imgHeight)
|
||||
|
||||
if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 {
|
||||
err = jpeg.Encode(dstFile, img, nil)
|
||||
} else {
|
||||
// Create background
|
||||
bg := image.NewNRGBA(imgRect)
|
||||
draw.Draw(bg, imgRect, image.NewUniform(color.White), image.Point{}, draw.Src)
|
||||
draw.Draw(bg, imgRect, img, image.Point{}, draw.Over)
|
||||
|
||||
bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos)
|
||||
bg = imaging.Blur(bg, 150)
|
||||
bg = imaging.AdjustBrightness(bg, 30)
|
||||
|
||||
// Create foreground
|
||||
fg := imaging.Fit(img, 600, 400, imaging.Lanczos)
|
||||
|
||||
// Merge foreground and background
|
||||
bgRect := bg.Bounds()
|
||||
fgRect := fg.Bounds()
|
||||
fgPosition := image.Point{
|
||||
X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)),
|
||||
Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)),
|
||||
}
|
||||
|
||||
draw.Draw(bg, bgRect, fg, fgPosition, draw.Over)
|
||||
|
||||
// Save to file
|
||||
err = jpeg.Encode(dstFile, bg, nil)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to save image %s: %v", url, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createTemplate(filename string, funcMap template.FuncMap) (*template.Template, error) {
|
||||
// Open file
|
||||
src, err := assets.Open(filename)
|
||||
|
|
Loading…
Reference in a new issue