mirror of
https://github.com/go-shiori/shiori.git
synced 2025-09-06 04:54:59 +08:00
Remove repeated code for archiving bookmarks
This commit is contained in:
parent
2da0c7e297
commit
64c62d6b12
11 changed files with 425 additions and 766 deletions
|
@ -1,18 +1,10 @@
|
||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
nurl "net/url"
|
|
||||||
fp "path/filepath"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/go-shiori/shiori/pkg/warc"
|
"github.com/go-shiori/shiori/internal/core"
|
||||||
|
|
||||||
"github.com/go-shiori/go-readability"
|
|
||||||
"github.com/go-shiori/shiori/internal/model"
|
"github.com/go-shiori/shiori/internal/model"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
@ -45,28 +37,16 @@ func addHandler(cmd *cobra.Command, args []string) {
|
||||||
noArchival, _ := cmd.Flags().GetBool("no-archival")
|
noArchival, _ := cmd.Flags().GetBool("no-archival")
|
||||||
logArchival, _ := cmd.Flags().GetBool("log-archival")
|
logArchival, _ := cmd.Flags().GetBool("log-archival")
|
||||||
|
|
||||||
// Clean up URL by removing its fragment and UTM parameters
|
|
||||||
tmp, err := nurl.Parse(url)
|
|
||||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
|
||||||
cError.Println("URL is not valid")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
tmp.Fragment = ""
|
|
||||||
clearUTMParams(tmp)
|
|
||||||
|
|
||||||
// Create bookmark item
|
// Create bookmark item
|
||||||
book := model.Bookmark{
|
book := model.Bookmark{
|
||||||
URL: tmp.String(),
|
URL: url,
|
||||||
Title: normalizeSpace(title),
|
Title: normalizeSpace(title),
|
||||||
Excerpt: normalizeSpace(excerpt),
|
Excerpt: normalizeSpace(excerpt),
|
||||||
|
CreateArchive: !noArchival,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create bookmark ID
|
if book.Title == "" {
|
||||||
book.ID, err = db.CreateNewID("bookmark")
|
book.Title = book.URL
|
||||||
if err != nil {
|
|
||||||
cError.Printf("Failed to create ID: %v\n", err)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set bookmark tags
|
// Set bookmark tags
|
||||||
|
@ -75,101 +55,51 @@ func addHandler(cmd *cobra.Command, args []string) {
|
||||||
book.Tags[i].Name = strings.TrimSpace(tag)
|
book.Tags[i].Name = strings.TrimSpace(tag)
|
||||||
}
|
}
|
||||||
|
|
||||||
// If it's not offline mode, fetch data from internet
|
// Create bookmark ID
|
||||||
var imageURLs []string
|
var err error
|
||||||
|
book.ID, err = db.CreateNewID("bookmark")
|
||||||
if !offline {
|
if err != nil {
|
||||||
func() {
|
cError.Printf("Failed to create ID: %v\n", err)
|
||||||
cInfo.Println("Downloading article...")
|
return
|
||||||
|
|
||||||
// Prepare download request
|
|
||||||
req, err := http.NewRequest("GET", url, nil)
|
|
||||||
if err != nil {
|
|
||||||
cError.Printf("Failed to download article: %v\n", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send download request
|
|
||||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
|
||||||
resp, err := httpClient.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
cError.Printf("Failed to download article: %v\n", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
// Split response body so it can be processed twice
|
|
||||||
archivalInput := bytes.NewBuffer(nil)
|
|
||||||
readabilityInput := bytes.NewBuffer(nil)
|
|
||||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
|
||||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
|
||||||
|
|
||||||
_, err = io.Copy(multiWriter, resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
cError.Printf("Failed to process article: %v\n", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this is HTML, parse for readable content
|
|
||||||
contentType := resp.Header.Get("Content-Type")
|
|
||||||
if strings.Contains(contentType, "text/html") {
|
|
||||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
|
||||||
|
|
||||||
article, err := readability.FromReader(readabilityInput, url)
|
|
||||||
if err != nil {
|
|
||||||
cError.Printf("Failed to parse article: %v\n", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
book.Author = article.Byline
|
|
||||||
book.Content = article.TextContent
|
|
||||||
book.HTML = article.Content
|
|
||||||
|
|
||||||
// If title and excerpt doesnt have submitted value, use from article
|
|
||||||
if book.Title == "" {
|
|
||||||
book.Title = article.Title
|
|
||||||
}
|
|
||||||
|
|
||||||
if book.Excerpt == "" {
|
|
||||||
book.Excerpt = article.Excerpt
|
|
||||||
}
|
|
||||||
|
|
||||||
if !isReadable {
|
|
||||||
book.Content = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get image URL
|
|
||||||
if article.Image != "" {
|
|
||||||
imageURLs = append(imageURLs, article.Image)
|
|
||||||
}
|
|
||||||
|
|
||||||
if article.Favicon != "" {
|
|
||||||
imageURLs = append(imageURLs, article.Favicon)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If needed, create offline archive as well
|
|
||||||
if !noArchival {
|
|
||||||
archivePath := fp.Join(dataDir, "archive", fmt.Sprintf("%d", book.ID))
|
|
||||||
archivalRequest := warc.ArchivalRequest{
|
|
||||||
URL: url,
|
|
||||||
Reader: archivalInput,
|
|
||||||
ContentType: contentType,
|
|
||||||
LogEnabled: logArchival,
|
|
||||||
}
|
|
||||||
|
|
||||||
err = warc.NewArchive(archivalRequest, archivePath)
|
|
||||||
if err != nil {
|
|
||||||
cError.Printf("Failed to create archive: %v\n", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure title is not empty
|
// Clean up bookmark URL
|
||||||
if book.Title == "" {
|
book.URL, err = core.RemoveUTMParams(book.URL)
|
||||||
book.Title = book.URL
|
if err != nil {
|
||||||
|
cError.Printf("Failed to clean URL: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// If it's not offline mode, fetch data from internet.
|
||||||
|
if !offline {
|
||||||
|
cInfo.Println("Downloading article...")
|
||||||
|
|
||||||
|
var isFatalErr bool
|
||||||
|
content, contentType, err := core.DownloadBookmark(book.URL)
|
||||||
|
if err != nil {
|
||||||
|
cError.Printf("Failed to download: %v\n", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil && content != nil {
|
||||||
|
request := core.ProcessRequest{
|
||||||
|
DataDir: dataDir,
|
||||||
|
Bookmark: book,
|
||||||
|
Content: content,
|
||||||
|
ContentType: contentType,
|
||||||
|
LogArchival: logArchival,
|
||||||
|
}
|
||||||
|
|
||||||
|
book, isFatalErr, err = core.ProcessBookmark(request)
|
||||||
|
content.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
cError.Printf("Failed: %v\n", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if isFatalErr {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save bookmark to database
|
// Save bookmark to database
|
||||||
|
@ -179,18 +109,6 @@ func addHandler(cmd *cobra.Command, args []string) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save article image to local disk
|
|
||||||
imgPath := fp.Join(dataDir, "thumb", fmt.Sprintf("%d", book.ID))
|
|
||||||
for _, imageURL := range imageURLs {
|
|
||||||
err = downloadBookImage(imageURL, imgPath, time.Minute)
|
|
||||||
if err == nil {
|
|
||||||
break
|
|
||||||
} else {
|
|
||||||
cError.Printf("Failed to download image: %v\n", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print added bookmark
|
// Print added bookmark
|
||||||
fmt.Println()
|
fmt.Println()
|
||||||
printBookmarks(book)
|
printBookmarks(book)
|
||||||
|
|
|
@ -2,11 +2,11 @@ package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
nurl "net/url"
|
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
"github.com/go-shiori/shiori/internal/core"
|
||||||
"github.com/go-shiori/shiori/internal/model"
|
"github.com/go-shiori/shiori/internal/model"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
@ -73,17 +73,14 @@ func importHandler(cmd *cobra.Command, args []string) {
|
||||||
url, _ := a.Attr("href")
|
url, _ := a.Attr("href")
|
||||||
strTags, _ := a.Attr("tags")
|
strTags, _ := a.Attr("tags")
|
||||||
|
|
||||||
// Clean up URL by removing its fragment and UTM parameters
|
// Clean up URL
|
||||||
tmp, err := nurl.Parse(url)
|
var err error
|
||||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
url, err = core.RemoveUTMParams(url)
|
||||||
|
if err != nil {
|
||||||
cError.Printf("Skip %s: URL is not valid\n", url)
|
cError.Printf("Skip %s: URL is not valid\n", url)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp.Fragment = ""
|
|
||||||
clearUTMParams(tmp)
|
|
||||||
url = tmp.String()
|
|
||||||
|
|
||||||
// Make sure title is valid Utf-8
|
// Make sure title is valid Utf-8
|
||||||
title = toValidUtf8(title, url)
|
title = toValidUtf8(title, url)
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,13 @@ package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
nurl "net/url"
|
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
"github.com/go-shiori/shiori/internal/core"
|
||||||
"github.com/go-shiori/shiori/internal/model"
|
"github.com/go-shiori/shiori/internal/model"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
@ -59,17 +59,14 @@ func pocketHandler(cmd *cobra.Command, args []string) {
|
||||||
intModified, _ := strconv.ParseInt(strModified, 10, 64)
|
intModified, _ := strconv.ParseInt(strModified, 10, 64)
|
||||||
modified := time.Unix(intModified, 0)
|
modified := time.Unix(intModified, 0)
|
||||||
|
|
||||||
// Clean up URL by removing its fragment and UTM parameters
|
// Clean up URL
|
||||||
tmp, err := nurl.Parse(url)
|
var err error
|
||||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
url, err = core.RemoveUTMParams(url)
|
||||||
|
if err != nil {
|
||||||
cError.Printf("Skip %s: URL is not valid\n", url)
|
cError.Printf("Skip %s: URL is not valid\n", url)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp.Fragment = ""
|
|
||||||
clearUTMParams(tmp)
|
|
||||||
url = tmp.String()
|
|
||||||
|
|
||||||
// Make sure title is valid Utf-8
|
// Make sure title is valid Utf-8
|
||||||
title = toValidUtf8(title, url)
|
title = toValidUtf8(title, url)
|
||||||
|
|
||||||
|
|
|
@ -1,22 +1,14 @@
|
||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
nurl "net/url"
|
|
||||||
"os"
|
|
||||||
fp "path/filepath"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/go-shiori/go-readability"
|
"github.com/go-shiori/shiori/internal/core"
|
||||||
"github.com/go-shiori/shiori/internal/database"
|
"github.com/go-shiori/shiori/internal/database"
|
||||||
"github.com/go-shiori/shiori/internal/model"
|
"github.com/go-shiori/shiori/internal/model"
|
||||||
"github.com/go-shiori/shiori/pkg/warc"
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -83,17 +75,12 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
||||||
excerpt = normalizeSpace(excerpt)
|
excerpt = normalizeSpace(excerpt)
|
||||||
|
|
||||||
if cmd.Flags().Changed("url") {
|
if cmd.Flags().Changed("url") {
|
||||||
// Clean up URL by removing its fragment and UTM parameters
|
// Clean up bookmark URL
|
||||||
tmp, err := nurl.Parse(url)
|
url, err = core.RemoveUTMParams(url)
|
||||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
if err != nil {
|
||||||
cError.Println("URL is not valid")
|
panic(fmt.Errorf("failed to clean URL: %v", err))
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp.Fragment = ""
|
|
||||||
clearUTMParams(tmp)
|
|
||||||
url = tmp.String()
|
|
||||||
|
|
||||||
// Since user uses custom URL, make sure there is only one ID to update
|
// Since user uses custom URL, make sure there is only one ID to update
|
||||||
if len(ids) != 1 {
|
if len(ids) != 1 {
|
||||||
cError.Println("Update only accepts one index while using --url flag")
|
cError.Println("Update only accepts one index while using --url flag")
|
||||||
|
@ -149,6 +136,9 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
||||||
for i, book := range bookmarks {
|
for i, book := range bookmarks {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
|
|
||||||
|
// Mark whether book will be archived
|
||||||
|
book.CreateArchive = !noArchival
|
||||||
|
|
||||||
// If used, use submitted URL
|
// If used, use submitted URL
|
||||||
if url != "" {
|
if url != "" {
|
||||||
book.URL = url
|
book.URL = url
|
||||||
|
@ -164,102 +154,32 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
||||||
<-semaphore
|
<-semaphore
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Prepare download request
|
// Download data from internet
|
||||||
req, err := http.NewRequest("GET", book.URL, nil)
|
content, contentType, err := core.DownloadBookmark(book.URL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
chProblem <- book.ID
|
chProblem <- book.ID
|
||||||
chMessage <- fmt.Errorf("Failed to download %s: %v", book.URL, err)
|
chMessage <- fmt.Errorf("Failed to download %s: %v", book.URL, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send download request
|
request := core.ProcessRequest{
|
||||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
DataDir: dataDir,
|
||||||
resp, err := httpClient.Do(req)
|
Bookmark: book,
|
||||||
if err != nil {
|
Content: content,
|
||||||
chProblem <- book.ID
|
ContentType: contentType,
|
||||||
chMessage <- fmt.Errorf("Failed to download %s: %v", book.URL, err)
|
KeepMetadata: keepMetadata,
|
||||||
return
|
LogArchival: logArchival,
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
// Split response body so it can be processed twice
|
book, _, err = core.ProcessBookmark(request)
|
||||||
archivalInput := bytes.NewBuffer(nil)
|
content.Close()
|
||||||
readabilityInput := bytes.NewBuffer(nil)
|
|
||||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
|
||||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
|
||||||
|
|
||||||
_, err = io.Copy(multiWriter, resp.Body)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
chProblem <- book.ID
|
chProblem <- book.ID
|
||||||
chMessage <- fmt.Errorf("Failed to process %s: %v", book.URL, err)
|
chMessage <- fmt.Errorf("Failed to process %s: %v", book.URL, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this is HTML, parse for readable content
|
|
||||||
contentType := resp.Header.Get("Content-Type")
|
|
||||||
if strings.Contains(contentType, "text/html") {
|
|
||||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
|
||||||
|
|
||||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
|
||||||
if err != nil {
|
|
||||||
chProblem <- book.ID
|
|
||||||
chMessage <- fmt.Errorf("Failed to parse %s: %v", book.URL, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
book.Author = article.Byline
|
|
||||||
book.Content = article.TextContent
|
|
||||||
book.HTML = article.Content
|
|
||||||
|
|
||||||
if !isReadable {
|
|
||||||
book.Content = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
if !keepMetadata {
|
|
||||||
book.Title = article.Title
|
|
||||||
book.Excerpt = article.Excerpt
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get image for thumbnail and save it to local disk
|
|
||||||
var imageURLs []string
|
|
||||||
if article.Image != "" {
|
|
||||||
imageURLs = append(imageURLs, article.Image)
|
|
||||||
}
|
|
||||||
|
|
||||||
if article.Favicon != "" {
|
|
||||||
imageURLs = append(imageURLs, article.Favicon)
|
|
||||||
}
|
|
||||||
|
|
||||||
imgPath := fp.Join(dataDir, "thumb", fmt.Sprintf("%d", book.ID))
|
|
||||||
for _, imageURL := range imageURLs {
|
|
||||||
err = downloadBookImage(imageURL, imgPath, time.Minute)
|
|
||||||
if err == nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If needed, update offline archive as well.
|
|
||||||
// Make sure to delete the old one first.
|
|
||||||
if !noArchival {
|
|
||||||
archivePath := fp.Join(dataDir, "archive", fmt.Sprintf("%d", book.ID))
|
|
||||||
os.Remove(archivePath)
|
|
||||||
|
|
||||||
archivalRequest := warc.ArchivalRequest{
|
|
||||||
URL: book.URL,
|
|
||||||
Reader: archivalInput,
|
|
||||||
ContentType: contentType,
|
|
||||||
LogEnabled: logArchival,
|
|
||||||
}
|
|
||||||
|
|
||||||
err = warc.NewArchive(archivalRequest, archivePath)
|
|
||||||
if err != nil {
|
|
||||||
chProblem <- book.ID
|
|
||||||
chMessage <- fmt.Errorf("Failed to create archive %s: %v", book.URL, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send success message
|
// Send success message
|
||||||
chMessage <- fmt.Sprintf("Downloaded %s", book.URL)
|
chMessage <- fmt.Sprintf("Downloaded %s", book.URL)
|
||||||
|
|
||||||
|
|
|
@ -3,29 +3,17 @@ package cmd
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"image"
|
|
||||||
clr "image/color"
|
|
||||||
"image/draw"
|
|
||||||
"image/jpeg"
|
|
||||||
"math"
|
|
||||||
"net/http"
|
|
||||||
nurl "net/url"
|
nurl "net/url"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
fp "path/filepath"
|
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/disintegration/imaging"
|
|
||||||
"github.com/fatih/color"
|
"github.com/fatih/color"
|
||||||
"github.com/go-shiori/shiori/internal/model"
|
"github.com/go-shiori/shiori/internal/model"
|
||||||
"golang.org/x/crypto/ssh/terminal"
|
"golang.org/x/crypto/ssh/terminal"
|
||||||
|
|
||||||
// Add supports for PNG image
|
|
||||||
_ "image/png"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -54,95 +42,6 @@ func isURLValid(s string) bool {
|
||||||
return err == nil && tmp.Scheme != "" && tmp.Hostname() != ""
|
return err == nil && tmp.Scheme != "" && tmp.Hostname() != ""
|
||||||
}
|
}
|
||||||
|
|
||||||
func clearUTMParams(url *nurl.URL) {
|
|
||||||
queries := url.Query()
|
|
||||||
|
|
||||||
for key := range queries {
|
|
||||||
if strings.HasPrefix(key, "utm_") {
|
|
||||||
queries.Del(key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
url.RawQuery = queries.Encode()
|
|
||||||
}
|
|
||||||
|
|
||||||
func downloadBookImage(url, dstPath string, timeout time.Duration) error {
|
|
||||||
// Fetch data from URL
|
|
||||||
client := &http.Client{Timeout: timeout}
|
|
||||||
resp, err := client.Get(url)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
// Make sure it's JPG or PNG image
|
|
||||||
cp := resp.Header.Get("Content-Type")
|
|
||||||
if !strings.Contains(cp, "image/jpeg") && !strings.Contains(cp, "image/png") {
|
|
||||||
return fmt.Errorf("%s is not a supported image", url)
|
|
||||||
}
|
|
||||||
|
|
||||||
// At this point, the download has finished successfully.
|
|
||||||
// Prepare destination file.
|
|
||||||
err = os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create image dir: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
dstFile, err := os.Create(dstPath)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create image file: %v", err)
|
|
||||||
}
|
|
||||||
defer dstFile.Close()
|
|
||||||
|
|
||||||
// Parse image and process it.
|
|
||||||
// If image is smaller than 600x400 or its ratio is less than 4:3, resize.
|
|
||||||
// Else, save it as it is.
|
|
||||||
img, _, err := image.Decode(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to parse image %s: %v", url, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
imgRect := img.Bounds()
|
|
||||||
imgWidth := imgRect.Dx()
|
|
||||||
imgHeight := imgRect.Dy()
|
|
||||||
imgRatio := float64(imgWidth) / float64(imgHeight)
|
|
||||||
|
|
||||||
if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 {
|
|
||||||
err = jpeg.Encode(dstFile, img, nil)
|
|
||||||
} else {
|
|
||||||
// Create background
|
|
||||||
bg := image.NewNRGBA(imgRect)
|
|
||||||
draw.Draw(bg, imgRect, image.NewUniform(clr.White), image.Point{}, draw.Src)
|
|
||||||
draw.Draw(bg, imgRect, img, image.Point{}, draw.Over)
|
|
||||||
|
|
||||||
bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos)
|
|
||||||
bg = imaging.Blur(bg, 150)
|
|
||||||
bg = imaging.AdjustBrightness(bg, 30)
|
|
||||||
|
|
||||||
// Create foreground
|
|
||||||
fg := imaging.Fit(img, 600, 400, imaging.Lanczos)
|
|
||||||
|
|
||||||
// Merge foreground and background
|
|
||||||
bgRect := bg.Bounds()
|
|
||||||
fgRect := fg.Bounds()
|
|
||||||
fgPosition := image.Point{
|
|
||||||
X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)),
|
|
||||||
Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)),
|
|
||||||
}
|
|
||||||
|
|
||||||
draw.Draw(bg, bgRect, fg, fgPosition, draw.Over)
|
|
||||||
|
|
||||||
// Save to file
|
|
||||||
err = jpeg.Encode(dstFile, bg, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to save image %s: %v", url, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func printBookmarks(bookmarks ...model.Bookmark) {
|
func printBookmarks(bookmarks ...model.Bookmark) {
|
||||||
for _, bookmark := range bookmarks {
|
for _, bookmark := range bookmarks {
|
||||||
// Create bookmark index
|
// Create bookmark index
|
||||||
|
|
31
internal/core/download.go
Normal file
31
internal/core/download.go
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
var httpClient = &http.Client{Timeout: time.Minute}
|
||||||
|
|
||||||
|
// DownloadBookmark downloads bookmarked page from specified URL.
|
||||||
|
// Return response body, make sure to close it later.
|
||||||
|
func DownloadBookmark(url string) (io.ReadCloser, string, error) {
|
||||||
|
// Prepare download request
|
||||||
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send download request
|
||||||
|
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
||||||
|
resp, err := httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get content type
|
||||||
|
contentType := resp.Header.Get("Content-Type")
|
||||||
|
|
||||||
|
return resp.Body, contentType, nil
|
||||||
|
}
|
218
internal/core/processing.go
Normal file
218
internal/core/processing.go
Normal file
|
@ -0,0 +1,218 @@
|
||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"image"
|
||||||
|
"image/color"
|
||||||
|
"image/draw"
|
||||||
|
"image/jpeg"
|
||||||
|
"io"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
fp "path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/disintegration/imaging"
|
||||||
|
"github.com/go-shiori/go-readability"
|
||||||
|
"github.com/go-shiori/shiori/internal/model"
|
||||||
|
"github.com/go-shiori/shiori/pkg/warc"
|
||||||
|
|
||||||
|
// Add support for png
|
||||||
|
_ "image/png"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ProcessRequest is the request for processing bookmark.
|
||||||
|
type ProcessRequest struct {
|
||||||
|
DataDir string
|
||||||
|
Bookmark model.Bookmark
|
||||||
|
Content io.Reader
|
||||||
|
ContentType string
|
||||||
|
KeepMetadata bool
|
||||||
|
LogArchival bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// ProcessBookmark process the bookmark and archive it if needed.
|
||||||
|
// Return three values, the bookmark itself, is error fatal, and error value.
|
||||||
|
func ProcessBookmark(req ProcessRequest) (model.Bookmark, bool, error) {
|
||||||
|
book := req.Bookmark
|
||||||
|
contentType := req.ContentType
|
||||||
|
|
||||||
|
// Make sure bookmark ID is defined
|
||||||
|
if book.ID == 0 {
|
||||||
|
return book, true, fmt.Errorf("bookmark ID is not valid")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split bookmark content so it can be processed several times
|
||||||
|
archivalInput := bytes.NewBuffer(nil)
|
||||||
|
readabilityInput := bytes.NewBuffer(nil)
|
||||||
|
readabilityCheckInput := bytes.NewBuffer(nil)
|
||||||
|
|
||||||
|
var multiWriter io.Writer
|
||||||
|
if !strings.Contains(contentType, "text/html") {
|
||||||
|
multiWriter = io.MultiWriter(archivalInput)
|
||||||
|
} else {
|
||||||
|
multiWriter = io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := io.Copy(multiWriter, req.Content)
|
||||||
|
if err != nil {
|
||||||
|
return book, false, fmt.Errorf("failed to process article: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is HTML, parse for readable content
|
||||||
|
var imageURLs []string
|
||||||
|
if strings.Contains(contentType, "text/html") {
|
||||||
|
isReadable := readability.IsReadable(readabilityCheckInput)
|
||||||
|
|
||||||
|
article, err := readability.FromReader(readabilityInput, book.URL)
|
||||||
|
if err != nil {
|
||||||
|
return book, false, fmt.Errorf("failed to parse article: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
book.Author = article.Byline
|
||||||
|
book.Content = article.TextContent
|
||||||
|
book.HTML = article.Content
|
||||||
|
|
||||||
|
// If title and excerpt doesnt have submitted value, use from article
|
||||||
|
if !req.KeepMetadata || book.Title == "" {
|
||||||
|
book.Title = article.Title
|
||||||
|
}
|
||||||
|
|
||||||
|
if !req.KeepMetadata || book.Excerpt == "" {
|
||||||
|
book.Excerpt = article.Excerpt
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sometimes article doesn't have any title, so make sure it is not empty
|
||||||
|
if book.Title == "" {
|
||||||
|
book.Title = book.URL
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get image URL
|
||||||
|
if article.Image != "" {
|
||||||
|
imageURLs = append(imageURLs, article.Image)
|
||||||
|
}
|
||||||
|
|
||||||
|
if article.Favicon != "" {
|
||||||
|
imageURLs = append(imageURLs, article.Favicon)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !isReadable {
|
||||||
|
book.Content = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
book.HasContent = book.Content != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save article image to local disk
|
||||||
|
strID := strconv.Itoa(book.ID)
|
||||||
|
imgPath := fp.Join(req.DataDir, "thumb", strID)
|
||||||
|
|
||||||
|
for _, imageURL := range imageURLs {
|
||||||
|
err = downloadBookImage(imageURL, imgPath)
|
||||||
|
if err == nil {
|
||||||
|
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If needed, create offline archive as well
|
||||||
|
if book.CreateArchive {
|
||||||
|
archivePath := fp.Join(req.DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
||||||
|
os.Remove(archivePath)
|
||||||
|
|
||||||
|
archivalRequest := warc.ArchivalRequest{
|
||||||
|
URL: book.URL,
|
||||||
|
Reader: archivalInput,
|
||||||
|
ContentType: contentType,
|
||||||
|
LogEnabled: req.LogArchival,
|
||||||
|
}
|
||||||
|
|
||||||
|
err = warc.NewArchive(archivalRequest, archivePath)
|
||||||
|
if err != nil {
|
||||||
|
return book, false, fmt.Errorf("failed to create archive: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
book.HasArchive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
return book, false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func downloadBookImage(url, dstPath string) error {
|
||||||
|
// Fetch data from URL
|
||||||
|
resp, err := httpClient.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// Make sure it's JPG or PNG image
|
||||||
|
cp := resp.Header.Get("Content-Type")
|
||||||
|
if !strings.Contains(cp, "image/jpeg") && !strings.Contains(cp, "image/png") {
|
||||||
|
return fmt.Errorf("%s is not a supported image", url)
|
||||||
|
}
|
||||||
|
|
||||||
|
// At this point, the download has finished successfully.
|
||||||
|
// Prepare destination file.
|
||||||
|
err = os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create image dir: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
dstFile, err := os.Create(dstPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create image file: %v", err)
|
||||||
|
}
|
||||||
|
defer dstFile.Close()
|
||||||
|
|
||||||
|
// Parse image and process it.
|
||||||
|
// If image is smaller than 600x400 or its ratio is less than 4:3, resize.
|
||||||
|
// Else, save it as it is.
|
||||||
|
img, _, err := image.Decode(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to parse image %s: %v", url, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
imgRect := img.Bounds()
|
||||||
|
imgWidth := imgRect.Dx()
|
||||||
|
imgHeight := imgRect.Dy()
|
||||||
|
imgRatio := float64(imgWidth) / float64(imgHeight)
|
||||||
|
|
||||||
|
if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 {
|
||||||
|
err = jpeg.Encode(dstFile, img, nil)
|
||||||
|
} else {
|
||||||
|
// Create background
|
||||||
|
bg := image.NewNRGBA(imgRect)
|
||||||
|
draw.Draw(bg, imgRect, image.NewUniform(color.White), image.Point{}, draw.Src)
|
||||||
|
draw.Draw(bg, imgRect, img, image.Point{}, draw.Over)
|
||||||
|
|
||||||
|
bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos)
|
||||||
|
bg = imaging.Blur(bg, 150)
|
||||||
|
bg = imaging.AdjustBrightness(bg, 30)
|
||||||
|
|
||||||
|
// Create foreground
|
||||||
|
fg := imaging.Fit(img, 600, 400, imaging.Lanczos)
|
||||||
|
|
||||||
|
// Merge foreground and background
|
||||||
|
bgRect := bg.Bounds()
|
||||||
|
fgRect := fg.Bounds()
|
||||||
|
fgPosition := image.Point{
|
||||||
|
X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)),
|
||||||
|
Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)),
|
||||||
|
}
|
||||||
|
|
||||||
|
draw.Draw(bg, bgRect, fg, fgPosition, draw.Over)
|
||||||
|
|
||||||
|
// Save to file
|
||||||
|
err = jpeg.Encode(dstFile, bg, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to save image %s: %v", url, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
28
internal/core/url.go
Normal file
28
internal/core/url.go
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
nurl "net/url"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RemoveUTMParams removes the UTM parameters from URL.
|
||||||
|
func RemoveUTMParams(url string) (string, error) {
|
||||||
|
// Parse string URL
|
||||||
|
tmp, err := nurl.Parse(url)
|
||||||
|
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
||||||
|
return url, fmt.Errorf("URL is not valid")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove UTM queries
|
||||||
|
queries := tmp.Query()
|
||||||
|
for key := range queries {
|
||||||
|
if strings.HasPrefix(key, "utm_") {
|
||||||
|
queries.Del(key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp.Fragment = ""
|
||||||
|
tmp.RawQuery = queries.Encode()
|
||||||
|
return tmp.String(), nil
|
||||||
|
}
|
|
@ -6,17 +6,12 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
nurl "net/url"
|
|
||||||
"os"
|
"os"
|
||||||
"path"
|
|
||||||
fp "path/filepath"
|
fp "path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/go-shiori/go-readability"
|
"github.com/go-shiori/shiori/internal/core"
|
||||||
"github.com/go-shiori/shiori/internal/model"
|
"github.com/go-shiori/shiori/internal/model"
|
||||||
"github.com/go-shiori/shiori/pkg/warc"
|
|
||||||
"github.com/julienschmidt/httprouter"
|
"github.com/julienschmidt/httprouter"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -31,18 +26,15 @@ func (h *handler) apiInsertViaExtension(w http.ResponseWriter, r *http.Request,
|
||||||
err = json.NewDecoder(r.Body).Decode(&request)
|
err = json.NewDecoder(r.Body).Decode(&request)
|
||||||
checkError(err)
|
checkError(err)
|
||||||
|
|
||||||
// Clean up URL by removing its fragment and UTM parameters
|
// Clean up bookmark URL
|
||||||
tmp, err := nurl.Parse(request.URL)
|
request.URL, err = core.RemoveUTMParams(request.URL)
|
||||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
if err != nil {
|
||||||
panic(fmt.Errorf("URL is not valid"))
|
panic(fmt.Errorf("failed to clean URL: %v", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp.Fragment = ""
|
|
||||||
clearUTMParams(tmp)
|
|
||||||
request.URL = tmp.String()
|
|
||||||
|
|
||||||
// Check if bookmark already exists.
|
// Check if bookmark already exists.
|
||||||
book, exist := h.DB.GetBookmark(0, request.URL)
|
book, exist := h.DB.GetBookmark(0, request.URL)
|
||||||
|
book.CreateArchive = true
|
||||||
|
|
||||||
// If it already exists, we need to set ID and tags.
|
// If it already exists, we need to set ID and tags.
|
||||||
if exist {
|
if exist {
|
||||||
|
@ -69,119 +61,37 @@ func (h *handler) apiInsertViaExtension(w http.ResponseWriter, r *http.Request,
|
||||||
// Since we are using extension, the extension might send the HTML content
|
// Since we are using extension, the extension might send the HTML content
|
||||||
// so no need to download it again here. However, if it's empty, it might be not HTML file
|
// so no need to download it again here. However, if it's empty, it might be not HTML file
|
||||||
// so we download it here.
|
// so we download it here.
|
||||||
contentType := "text/html; charset=UTF-8"
|
var contentType string
|
||||||
contentBuffer := bytes.NewBufferString(book.HTML)
|
var contentBuffer io.Reader
|
||||||
|
|
||||||
if book.HTML == "" {
|
if book.HTML == "" {
|
||||||
func() {
|
contentBuffer, contentType, _ = core.DownloadBookmark(book.URL)
|
||||||
// Prepare download request
|
} else {
|
||||||
req, err := http.NewRequest("GET", book.URL, nil)
|
contentType = "text/html; charset=UTF-8"
|
||||||
if err != nil {
|
contentBuffer = bytes.NewBufferString(book.HTML)
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send download request
|
|
||||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
|
||||||
resp, err := httpClient.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
// Save response for later use
|
|
||||||
contentType = resp.Header.Get("Content-Type")
|
|
||||||
|
|
||||||
contentBuffer.Reset()
|
|
||||||
_, err = io.Copy(contentBuffer, resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// At this point the web page already downloaded.
|
// At this point the web page already downloaded.
|
||||||
// Time to process it.
|
// Time to process it.
|
||||||
func() {
|
if contentBuffer != nil {
|
||||||
// Split response so it can be processed several times
|
request := core.ProcessRequest{
|
||||||
archivalInput := bytes.NewBuffer(nil)
|
DataDir: h.DataDir,
|
||||||
readabilityInput := bytes.NewBuffer(nil)
|
Bookmark: book,
|
||||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
Content: contentBuffer,
|
||||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
|
||||||
|
|
||||||
_, err = io.Copy(multiWriter, contentBuffer)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// If it's HTML, parse the readable content.
|
|
||||||
if strings.Contains(contentType, "text/html") {
|
|
||||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
|
||||||
|
|
||||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
book.Author = article.Byline
|
|
||||||
book.Content = article.TextContent
|
|
||||||
book.HTML = article.Content
|
|
||||||
|
|
||||||
if book.Title == "" {
|
|
||||||
if article.Title == "" {
|
|
||||||
book.Title = book.URL
|
|
||||||
} else {
|
|
||||||
book.Title = article.Title
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if book.Excerpt == "" {
|
|
||||||
book.Excerpt = article.Excerpt
|
|
||||||
}
|
|
||||||
|
|
||||||
if !isReadable {
|
|
||||||
book.Content = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
book.HasContent = book.Content != ""
|
|
||||||
|
|
||||||
// Get image for thumbnail and save it to local disk
|
|
||||||
var imageURLs []string
|
|
||||||
if article.Image != "" {
|
|
||||||
imageURLs = append(imageURLs, article.Image)
|
|
||||||
}
|
|
||||||
|
|
||||||
if article.Favicon != "" {
|
|
||||||
imageURLs = append(imageURLs, article.Favicon)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save article image to local disk
|
|
||||||
strID := strconv.Itoa(book.ID)
|
|
||||||
imgPath := fp.Join(h.DataDir, "thumb", strID)
|
|
||||||
for _, imageURL := range imageURLs {
|
|
||||||
err = downloadBookImage(imageURL, imgPath, time.Minute)
|
|
||||||
if err == nil {
|
|
||||||
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create offline archive as well
|
|
||||||
archivePath := fp.Join(h.DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
|
||||||
os.Remove(archivePath)
|
|
||||||
|
|
||||||
archivalRequest := warc.ArchivalRequest{
|
|
||||||
URL: book.URL,
|
|
||||||
Reader: archivalInput,
|
|
||||||
ContentType: contentType,
|
ContentType: contentType,
|
||||||
}
|
}
|
||||||
|
|
||||||
err = warc.NewArchive(archivalRequest, archivePath)
|
var isFatalErr bool
|
||||||
if err != nil {
|
book, isFatalErr, err = core.ProcessBookmark(request)
|
||||||
return
|
|
||||||
|
if tmp, ok := contentBuffer.(io.ReadCloser); ok {
|
||||||
|
tmp.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
book.HasArchive = true
|
if err != nil && isFatalErr {
|
||||||
}()
|
panic(fmt.Errorf("failed to process bookmark: %v", err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Save bookmark to database
|
// Save bookmark to database
|
||||||
results, err := h.DB.SaveBookmarks(book)
|
results, err := h.DB.SaveBookmarks(book)
|
||||||
|
|
|
@ -1,13 +1,10 @@
|
||||||
package webserver
|
package webserver
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"math"
|
"math"
|
||||||
"net/http"
|
"net/http"
|
||||||
nurl "net/url"
|
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
fp "path/filepath"
|
fp "path/filepath"
|
||||||
|
@ -16,10 +13,9 @@ import (
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/go-shiori/go-readability"
|
"github.com/go-shiori/shiori/internal/core"
|
||||||
"github.com/go-shiori/shiori/internal/database"
|
"github.com/go-shiori/shiori/internal/database"
|
||||||
"github.com/go-shiori/shiori/internal/model"
|
"github.com/go-shiori/shiori/internal/model"
|
||||||
"github.com/go-shiori/shiori/pkg/warc"
|
|
||||||
"github.com/gofrs/uuid"
|
"github.com/gofrs/uuid"
|
||||||
"github.com/julienschmidt/httprouter"
|
"github.com/julienschmidt/httprouter"
|
||||||
"golang.org/x/crypto/bcrypt"
|
"golang.org/x/crypto/bcrypt"
|
||||||
|
@ -251,112 +247,35 @@ func (h *handler) apiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h
|
||||||
err = json.NewDecoder(r.Body).Decode(&book)
|
err = json.NewDecoder(r.Body).Decode(&book)
|
||||||
checkError(err)
|
checkError(err)
|
||||||
|
|
||||||
// Clean up URL by removing its fragment and UTM parameters
|
|
||||||
tmp, err := nurl.Parse(book.URL)
|
|
||||||
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
|
|
||||||
panic(fmt.Errorf("URL is not valid"))
|
|
||||||
}
|
|
||||||
|
|
||||||
tmp.Fragment = ""
|
|
||||||
clearUTMParams(tmp)
|
|
||||||
book.URL = tmp.String()
|
|
||||||
|
|
||||||
// Create bookmark ID
|
// Create bookmark ID
|
||||||
book.ID, err = h.DB.CreateNewID("bookmark")
|
book.ID, err = h.DB.CreateNewID("bookmark")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("failed to create ID: %v", err))
|
panic(fmt.Errorf("failed to create ID: %v", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clean up bookmark URL
|
||||||
|
book.URL, err = core.RemoveUTMParams(book.URL)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("failed to clean URL: %v", err))
|
||||||
|
}
|
||||||
|
|
||||||
// Fetch data from internet
|
// Fetch data from internet
|
||||||
var imageURLs []string
|
var isFatalErr bool
|
||||||
func() {
|
content, contentType, err := core.DownloadBookmark(book.URL)
|
||||||
// Prepare download request
|
if err == nil && content != nil {
|
||||||
req, err := http.NewRequest("GET", book.URL, nil)
|
request := core.ProcessRequest{
|
||||||
if err != nil {
|
DataDir: h.DataDir,
|
||||||
return
|
Bookmark: book,
|
||||||
|
Content: content,
|
||||||
|
ContentType: contentType,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send download request
|
book, isFatalErr, err = core.ProcessBookmark(request)
|
||||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
content.Close()
|
||||||
resp, err := httpClient.Do(req)
|
|
||||||
if err != nil {
|
if err != nil && isFatalErr {
|
||||||
return
|
panic(fmt.Errorf("failed to process bookmark: %v", err))
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
// Split response body so it can be processed twice
|
|
||||||
archivalInput := bytes.NewBuffer(nil)
|
|
||||||
readabilityInput := bytes.NewBuffer(nil)
|
|
||||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
|
||||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
|
||||||
|
|
||||||
_, err = io.Copy(multiWriter, resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this is HTML, parse for readable content
|
|
||||||
contentType := resp.Header.Get("Content-Type")
|
|
||||||
if strings.Contains(contentType, "text/html") {
|
|
||||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
|
||||||
|
|
||||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
book.Author = article.Byline
|
|
||||||
book.Content = article.TextContent
|
|
||||||
book.HTML = article.Content
|
|
||||||
|
|
||||||
// If title and excerpt doesnt have submitted value, use from article
|
|
||||||
if book.Title == "" {
|
|
||||||
book.Title = article.Title
|
|
||||||
}
|
|
||||||
|
|
||||||
if book.Excerpt == "" {
|
|
||||||
book.Excerpt = article.Excerpt
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get image URL
|
|
||||||
if article.Image != "" {
|
|
||||||
imageURLs = append(imageURLs, article.Image)
|
|
||||||
}
|
|
||||||
|
|
||||||
if article.Favicon != "" {
|
|
||||||
imageURLs = append(imageURLs, article.Favicon)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !isReadable {
|
|
||||||
book.Content = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
book.HasContent = book.Content != ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// If needed, create offline archive as well
|
|
||||||
if book.CreateArchive {
|
|
||||||
archivePath := fp.Join(h.DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
|
||||||
os.Remove(archivePath)
|
|
||||||
|
|
||||||
archivalRequest := warc.ArchivalRequest{
|
|
||||||
URL: book.URL,
|
|
||||||
Reader: archivalInput,
|
|
||||||
ContentType: contentType,
|
|
||||||
}
|
|
||||||
|
|
||||||
err = warc.NewArchive(archivalRequest, archivePath)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
book.HasArchive = true
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Make sure title is not empty
|
|
||||||
if book.Title == "" {
|
|
||||||
book.Title = book.URL
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save bookmark to database
|
// Save bookmark to database
|
||||||
|
@ -366,17 +285,6 @@ func (h *handler) apiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h
|
||||||
}
|
}
|
||||||
book = results[0]
|
book = results[0]
|
||||||
|
|
||||||
// Save article image to local disk
|
|
||||||
strID := strconv.Itoa(book.ID)
|
|
||||||
imgPath := fp.Join(h.DataDir, "thumb", strID)
|
|
||||||
for _, imageURL := range imageURLs {
|
|
||||||
err = downloadBookImage(imageURL, imgPath, time.Minute)
|
|
||||||
if err == nil {
|
|
||||||
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the new bookmark
|
// Return the new bookmark
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
err = json.NewEncoder(w).Encode(&book)
|
err = json.NewEncoder(w).Encode(&book)
|
||||||
|
@ -446,6 +354,12 @@ func (h *handler) apiUpdateBookmark(w http.ResponseWriter, r *http.Request, ps h
|
||||||
book.Excerpt = request.Excerpt
|
book.Excerpt = request.Excerpt
|
||||||
book.Public = request.Public
|
book.Public = request.Public
|
||||||
|
|
||||||
|
// Clean up bookmark URL
|
||||||
|
book.URL, err = core.RemoveUTMParams(book.URL)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("failed to clean URL: %v", err))
|
||||||
|
}
|
||||||
|
|
||||||
// Set new tags
|
// Set new tags
|
||||||
for i := range book.Tags {
|
for i := range book.Tags {
|
||||||
book.Tags[i].Deleted = true
|
book.Tags[i].Deleted = true
|
||||||
|
@ -525,6 +439,9 @@ func (h *handler) apiUpdateCache(w http.ResponseWriter, r *http.Request, ps http
|
||||||
for i, book := range bookmarks {
|
for i, book := range bookmarks {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
|
|
||||||
|
// Mark whether book will be archived
|
||||||
|
book.CreateArchive = request.CreateArchive
|
||||||
|
|
||||||
go func(i int, book model.Bookmark, keepMetadata bool) {
|
go func(i int, book model.Bookmark, keepMetadata bool) {
|
||||||
// Make sure to finish the WG
|
// Make sure to finish the WG
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
@ -535,107 +452,28 @@ func (h *handler) apiUpdateCache(w http.ResponseWriter, r *http.Request, ps http
|
||||||
<-semaphore
|
<-semaphore
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Prepare download request
|
// Download data from internet
|
||||||
req, err := http.NewRequest("GET", book.URL, nil)
|
content, contentType, err := core.DownloadBookmark(book.URL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
chProblem <- book.ID
|
chProblem <- book.ID
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send download request
|
request := core.ProcessRequest{
|
||||||
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
|
DataDir: h.DataDir,
|
||||||
resp, err := httpClient.Do(req)
|
Bookmark: book,
|
||||||
|
Content: content,
|
||||||
|
ContentType: contentType,
|
||||||
|
KeepMetadata: keepMetadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
book, _, err = core.ProcessBookmark(request)
|
||||||
|
content.Close()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
chProblem <- book.ID
|
chProblem <- book.ID
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
// Split response body so it can be processed twice
|
|
||||||
archivalInput := bytes.NewBuffer(nil)
|
|
||||||
readabilityInput := bytes.NewBuffer(nil)
|
|
||||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
|
||||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
|
||||||
|
|
||||||
_, err = io.Copy(multiWriter, resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
chProblem <- book.ID
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this is HTML, parse for readable content
|
|
||||||
strID := strconv.Itoa(book.ID)
|
|
||||||
contentType := resp.Header.Get("Content-Type")
|
|
||||||
|
|
||||||
if strings.Contains(contentType, "text/html") {
|
|
||||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
|
||||||
|
|
||||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
|
||||||
if err != nil {
|
|
||||||
chProblem <- book.ID
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
book.Author = article.Byline
|
|
||||||
book.Content = article.TextContent
|
|
||||||
book.HTML = article.Content
|
|
||||||
|
|
||||||
if !isReadable {
|
|
||||||
book.Content = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
if !keepMetadata {
|
|
||||||
book.Title = article.Title
|
|
||||||
book.Excerpt = article.Excerpt
|
|
||||||
}
|
|
||||||
|
|
||||||
if book.Title == "" {
|
|
||||||
book.Title = book.URL
|
|
||||||
}
|
|
||||||
|
|
||||||
book.HasContent = book.Content != ""
|
|
||||||
|
|
||||||
// Get image for thumbnail and save it to local disk
|
|
||||||
var imageURLs []string
|
|
||||||
if article.Image != "" {
|
|
||||||
imageURLs = append(imageURLs, article.Image)
|
|
||||||
}
|
|
||||||
|
|
||||||
if article.Favicon != "" {
|
|
||||||
imageURLs = append(imageURLs, article.Favicon)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save article image to local disk
|
|
||||||
imgPath := fp.Join(h.DataDir, "thumb", strID)
|
|
||||||
for _, imageURL := range imageURLs {
|
|
||||||
err = downloadBookImage(imageURL, imgPath, time.Minute)
|
|
||||||
if err == nil {
|
|
||||||
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If needed, update offline archive as well.
|
|
||||||
// Make sure to delete the old one first.
|
|
||||||
if request.CreateArchive {
|
|
||||||
archivePath := fp.Join(h.DataDir, "archive", strID)
|
|
||||||
os.Remove(archivePath)
|
|
||||||
|
|
||||||
archivalRequest := warc.ArchivalRequest{
|
|
||||||
URL: book.URL,
|
|
||||||
Reader: archivalInput,
|
|
||||||
ContentType: contentType,
|
|
||||||
}
|
|
||||||
|
|
||||||
err = warc.NewArchive(archivalRequest, archivePath)
|
|
||||||
if err != nil {
|
|
||||||
chProblem <- book.ID
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
book.HasArchive = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update list of bookmarks
|
// Update list of bookmarks
|
||||||
mx.Lock()
|
mx.Lock()
|
||||||
|
|
|
@ -3,13 +3,8 @@ package webserver
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"html/template"
|
"html/template"
|
||||||
"image"
|
|
||||||
"image/color"
|
|
||||||
"image/draw"
|
|
||||||
"image/jpeg"
|
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"math"
|
|
||||||
"mime"
|
"mime"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
@ -19,9 +14,6 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/disintegration/imaging"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var rxRepeatedStrip = regexp.MustCompile(`(?i)-+`)
|
var rxRepeatedStrip = regexp.MustCompile(`(?i)-+`)
|
||||||
|
@ -89,95 +81,6 @@ func fileExists(filePath string) bool {
|
||||||
return !os.IsNotExist(err) && !info.IsDir()
|
return !os.IsNotExist(err) && !info.IsDir()
|
||||||
}
|
}
|
||||||
|
|
||||||
func clearUTMParams(url *nurl.URL) {
|
|
||||||
queries := url.Query()
|
|
||||||
|
|
||||||
for key := range queries {
|
|
||||||
if strings.HasPrefix(key, "utm_") {
|
|
||||||
queries.Del(key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
url.RawQuery = queries.Encode()
|
|
||||||
}
|
|
||||||
|
|
||||||
func downloadBookImage(url, dstPath string, timeout time.Duration) error {
|
|
||||||
// Fetch data from URL
|
|
||||||
client := &http.Client{Timeout: timeout}
|
|
||||||
resp, err := client.Get(url)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
// Make sure it's JPG or PNG image
|
|
||||||
cp := resp.Header.Get("Content-Type")
|
|
||||||
if !strings.Contains(cp, "image/jpeg") && !strings.Contains(cp, "image/png") {
|
|
||||||
return fmt.Errorf("%s is not a supported image", url)
|
|
||||||
}
|
|
||||||
|
|
||||||
// At this point, the download has finished successfully.
|
|
||||||
// Prepare destination file.
|
|
||||||
err = os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create image dir: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
dstFile, err := os.Create(dstPath)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create image file: %v", err)
|
|
||||||
}
|
|
||||||
defer dstFile.Close()
|
|
||||||
|
|
||||||
// Parse image and process it.
|
|
||||||
// If image is smaller than 600x400 or its ratio is less than 4:3, resize.
|
|
||||||
// Else, save it as it is.
|
|
||||||
img, _, err := image.Decode(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to parse image %s: %v", url, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
imgRect := img.Bounds()
|
|
||||||
imgWidth := imgRect.Dx()
|
|
||||||
imgHeight := imgRect.Dy()
|
|
||||||
imgRatio := float64(imgWidth) / float64(imgHeight)
|
|
||||||
|
|
||||||
if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 {
|
|
||||||
err = jpeg.Encode(dstFile, img, nil)
|
|
||||||
} else {
|
|
||||||
// Create background
|
|
||||||
bg := image.NewNRGBA(imgRect)
|
|
||||||
draw.Draw(bg, imgRect, image.NewUniform(color.White), image.Point{}, draw.Src)
|
|
||||||
draw.Draw(bg, imgRect, img, image.Point{}, draw.Over)
|
|
||||||
|
|
||||||
bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos)
|
|
||||||
bg = imaging.Blur(bg, 150)
|
|
||||||
bg = imaging.AdjustBrightness(bg, 30)
|
|
||||||
|
|
||||||
// Create foreground
|
|
||||||
fg := imaging.Fit(img, 600, 400, imaging.Lanczos)
|
|
||||||
|
|
||||||
// Merge foreground and background
|
|
||||||
bgRect := bg.Bounds()
|
|
||||||
fgRect := fg.Bounds()
|
|
||||||
fgPosition := image.Point{
|
|
||||||
X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)),
|
|
||||||
Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)),
|
|
||||||
}
|
|
||||||
|
|
||||||
draw.Draw(bg, bgRect, fg, fgPosition, draw.Over)
|
|
||||||
|
|
||||||
// Save to file
|
|
||||||
err = jpeg.Encode(dstFile, bg, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to save image %s: %v", url, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func createTemplate(filename string, funcMap template.FuncMap) (*template.Template, error) {
|
func createTemplate(filename string, funcMap template.FuncMap) (*template.Template, error) {
|
||||||
// Open file
|
// Open file
|
||||||
src, err := assets.Open(filename)
|
src, err := assets.Open(filename)
|
||||||
|
|
Loading…
Add table
Reference in a new issue