shiori/internal/cmd/update.go

356 lines
9.3 KiB
Go
Raw Normal View History

2019-05-21 11:31:40 +08:00
package cmd
2019-05-22 17:13:52 +08:00
import (
2019-06-09 15:54:07 +08:00
"bytes"
2019-05-22 17:13:52 +08:00
"fmt"
2019-06-09 15:54:07 +08:00
"io"
"net/http"
2019-05-22 17:13:52 +08:00
nurl "net/url"
"os"
2019-05-22 17:13:52 +08:00
fp "path/filepath"
"sort"
2019-05-22 17:13:52 +08:00
"strings"
"sync"
"time"
"github.com/go-shiori/go-readability"
"github.com/go-shiori/shiori/internal/database"
"github.com/go-shiori/shiori/internal/model"
2019-06-09 15:54:07 +08:00
"github.com/go-shiori/shiori/pkg/warc"
2019-05-22 17:13:52 +08:00
"github.com/spf13/cobra"
)
2019-05-21 11:31:40 +08:00
func updateCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "update [indices]",
Short: "Update the saved bookmarks",
Long: "Update fields of an existing bookmark. " +
"Accepts space-separated list of indices (e.g. 5 6 23 4 110 45), " +
"hyphenated range (e.g. 100-200) or both (e.g. 1-3 7 9). " +
"If no arguments, ALL bookmarks will be updated. Update works differently depending on the flags:\n" +
"- If indices are passed without any flags (--url, --title, --tag and --excerpt), read the URLs from DB and update titles from web.\n" +
"- If --url is passed (and --title is omitted), update the title from web using the URL. While using this flag, update only accept EXACTLY one index.\n" +
"While updating bookmark's tags, you can use - to remove tag (e.g. -nature to remove nature tag from this bookmark).",
2019-05-22 17:13:52 +08:00
Run: updateHandler,
2019-05-21 11:31:40 +08:00
}
2019-06-09 18:02:16 +08:00
cmd.Flags().StringP("url", "u", "", "New URL for this bookmark")
cmd.Flags().StringP("title", "i", "", "New title for this bookmark")
cmd.Flags().StringP("excerpt", "e", "", "New excerpt for this bookmark")
cmd.Flags().StringSliceP("tags", "t", []string{}, "Comma-separated tags for this bookmark")
cmd.Flags().BoolP("offline", "o", false, "Update bookmark without fetching data from internet")
2019-05-21 11:31:40 +08:00
cmd.Flags().BoolP("yes", "y", false, "Skip confirmation prompt and update ALL bookmarks")
2019-06-09 18:02:16 +08:00
cmd.Flags().Bool("dont-overwrite", false, "Don't overwrite existing metadata. Useful when only want to update bookmark's content")
cmd.Flags().BoolP("no-archive", "a", false, "Update bookmark without creating offline archive")
2019-05-21 11:31:40 +08:00
return cmd
}
2019-05-22 17:13:52 +08:00
func updateHandler(cmd *cobra.Command, args []string) {
// Parse flags
url, _ := cmd.Flags().GetString("url")
title, _ := cmd.Flags().GetString("title")
excerpt, _ := cmd.Flags().GetString("excerpt")
tags, _ := cmd.Flags().GetStringSlice("tags")
offline, _ := cmd.Flags().GetBool("offline")
skipConfirm, _ := cmd.Flags().GetBool("yes")
noArchival, _ := cmd.Flags().GetBool("no-archive")
2019-05-22 17:13:52 +08:00
dontOverwrite := cmd.Flags().Changed("dont-overwrite")
// If no arguments (i.e all bookmarks going to be updated), confirm to user
if len(args) == 0 && !skipConfirm {
confirmUpdate := ""
fmt.Print("Update ALL bookmarks? (y/N): ")
fmt.Scanln(&confirmUpdate)
if confirmUpdate != "y" {
fmt.Println("No bookmarks updated")
return
}
}
// Convert args to ids
ids, err := parseStrIndices(args)
if err != nil {
cError.Printf("Failed to parse args: %v\n", err)
return
}
// Check if user really want to batch update archive
if len(ids) > 3 && !noArchival && !skipConfirm {
fmt.Println("This update process will also generate the offline archive for the selected bookmarks.")
fmt.Println("This might take a long time and a lot of your network bandwith.")
confirmUpdate := ""
fmt.Printf("Continue update and archival process for %d bookmark(s)? (y/N): ", len(ids))
fmt.Scanln(&confirmUpdate)
if confirmUpdate != "y" {
fmt.Println("No bookmarks updated")
return
}
}
2019-05-22 17:13:52 +08:00
// Clean up new parameter from flags
title = normalizeSpace(title)
excerpt = normalizeSpace(excerpt)
if cmd.Flags().Changed("url") {
// Clean up URL by removing its fragment and UTM parameters
tmp, err := nurl.Parse(url)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
cError.Println("URL is not valid")
return
}
tmp.Fragment = ""
clearUTMParams(tmp)
url = tmp.String()
// Since user uses custom URL, make sure there is only one ID to update
if len(ids) != 1 {
cError.Println("Update only accepts one index while using --url flag")
return
}
}
// Fetch bookmarks from database
filterOptions := database.GetBookmarksOptions{
IDs: ids,
}
bookmarks, err := DB.GetBookmarks(filterOptions)
if err != nil {
cError.Printf("Failed to get bookmarks: %v\n", err)
return
}
if len(bookmarks) == 0 {
cError.Println("No matching index found")
return
}
// If it's not offline mode, fetch data from internet
idWithProblems := []int{}
2019-05-22 17:13:52 +08:00
if !offline {
mx := sync.RWMutex{}
wg := sync.WaitGroup{}
2019-05-24 00:56:38 +08:00
chDone := make(chan struct{})
chProblem := make(chan int, 10)
2019-05-24 00:56:38 +08:00
chMessage := make(chan interface{}, 10)
2019-05-22 17:13:52 +08:00
semaphore := make(chan struct{}, 10)
2019-05-24 00:56:38 +08:00
cInfo.Println("Downloading article(s)...")
2019-05-22 17:13:52 +08:00
for i, book := range bookmarks {
wg.Add(1)
// If used, use submitted URL
if url != "" {
book.URL = url
}
2019-05-24 00:56:38 +08:00
go func(i int, book model.Bookmark) {
2019-05-22 17:13:52 +08:00
// Make sure to finish the WG
defer wg.Done()
// Register goroutine to semaphore
semaphore <- struct{}{}
defer func() {
<-semaphore
}()
2019-06-09 15:54:07 +08:00
// Prepare request
req, err := http.NewRequest("GET", book.URL, nil)
if err != nil {
chProblem <- book.ID
chMessage <- fmt.Errorf("Failed to download %s: %v", book.URL, err)
return
}
// Send request
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
resp, err := httpClient.Do(req)
2019-05-24 00:56:38 +08:00
if err != nil {
chProblem <- book.ID
2019-05-24 00:56:38 +08:00
chMessage <- fmt.Errorf("Failed to download %s: %v", book.URL, err)
return
}
defer resp.Body.Close()
2019-05-22 17:13:52 +08:00
// Save as archive, make sure to delete the old one first
2019-06-09 23:49:25 +08:00
buffer := bytes.NewBuffer(nil)
archivePath := fp.Join(DataDir, "archive", fmt.Sprintf("%d", book.ID))
os.Remove(archivePath)
2019-06-09 23:49:25 +08:00
archivalRequest := warc.ArchivalRequest{
URL: book.URL,
Reader: io.TeeReader(resp.Body, buffer),
ContentType: resp.Header.Get("Content-Type"),
}
2019-06-09 15:54:07 +08:00
2019-06-09 23:49:25 +08:00
err = warc.NewArchive(archivalRequest, archivePath)
2019-06-09 15:54:07 +08:00
if err != nil {
chProblem <- book.ID
chMessage <- fmt.Errorf("Failed to create archive %s: %v", book.URL, err)
return
}
// Parse article
article, err := readability.FromReader(buffer, book.URL)
2019-05-22 17:13:52 +08:00
if err != nil {
chProblem <- book.ID
2019-05-24 00:56:38 +08:00
chMessage <- fmt.Errorf("Failed to parse %s: %v", book.URL, err)
2019-05-22 17:13:52 +08:00
return
}
book.Author = article.Byline
book.Content = article.TextContent
book.HTML = article.Content
if !dontOverwrite {
book.Title = article.Title
book.Excerpt = article.Excerpt
}
2019-05-24 14:25:29 +08:00
// Get image for thumbnail and save it to local disk
var imageURLs []string
2019-05-22 17:13:52 +08:00
if article.Image != "" {
2019-05-24 14:25:29 +08:00
imageURLs = append(imageURLs, article.Image)
2019-05-22 17:13:52 +08:00
}
2019-05-24 14:25:29 +08:00
if article.Favicon != "" {
imageURLs = append(imageURLs, article.Favicon)
}
imgPath := fp.Join(DataDir, "thumb", fmt.Sprintf("%d", book.ID))
for _, imageURL := range imageURLs {
err = downloadBookImage(imageURL, imgPath, time.Minute)
if err == nil {
break
}
2019-05-22 17:13:52 +08:00
}
2019-05-24 00:56:38 +08:00
// Send success message
chMessage <- fmt.Sprintf("Downloaded %s", book.URL)
2019-05-22 17:13:52 +08:00
// Save parse result to bookmark
mx.Lock()
bookmarks[i] = book
mx.Unlock()
2019-05-24 00:56:38 +08:00
}(i, book)
2019-05-22 17:13:52 +08:00
}
2019-05-24 00:56:38 +08:00
// Print log message
go func(nBookmark int) {
logIndex := 0
for {
select {
case <-chDone:
cInfo.Println("Download finished")
return
case id := <-chProblem:
idWithProblems = append(idWithProblems, id)
2019-05-24 00:56:38 +08:00
case msg := <-chMessage:
logIndex++
switch msg.(type) {
case error:
cError.Printf("[%d/%d] %v\n", logIndex, nBookmark, msg)
case string:
cInfo.Printf("[%d/%d] %s\n", logIndex, nBookmark, msg)
}
}
}
}(len(bookmarks))
2019-05-22 17:13:52 +08:00
// Wait until all download finished
wg.Wait()
2019-05-24 00:56:38 +08:00
close(chDone)
2019-05-22 17:13:52 +08:00
}
// Map which tags is new or deleted from flag --tags
addedTags := make(map[string]struct{})
deletedTags := make(map[string]struct{})
for _, tag := range tags {
tagName := strings.ToLower(tag)
tagName = strings.TrimSpace(tagName)
if strings.HasPrefix(tagName, "-") {
tagName = strings.TrimPrefix(tagName, "-")
deletedTags[tagName] = struct{}{}
} else {
addedTags[tagName] = struct{}{}
}
}
// Attach user submitted value to the bookmarks
for i, book := range bookmarks {
// If user submit his own title or excerpt, use it
if title != "" {
book.Title = title
}
if excerpt != "" {
book.Excerpt = excerpt
}
// Make sure title is not empty
if book.Title == "" {
book.Title = book.URL
}
// Generate new tags
tmpAddedTags := make(map[string]struct{})
for key, value := range addedTags {
tmpAddedTags[key] = value
}
newTags := []model.Tag{}
for _, tag := range book.Tags {
if _, isDeleted := deletedTags[tag.Name]; isDeleted {
tag.Deleted = true
}
if _, alreadyExist := addedTags[tag.Name]; alreadyExist {
delete(tmpAddedTags, tag.Name)
}
newTags = append(newTags, tag)
}
for tag := range tmpAddedTags {
newTags = append(newTags, model.Tag{Name: tag})
}
book.Tags = newTags
// Set bookmark's new data
bookmarks[i] = book
}
// Save bookmarks to database
bookmarks, err = DB.SaveBookmarks(bookmarks...)
if err != nil {
cError.Printf("Failed to save bookmark: %v\n", err)
return
}
// Print updated bookmarks
fmt.Println()
printBookmarks(bookmarks...)
if len(idWithProblems) > 0 {
sort.Ints(idWithProblems)
cError.Println("Encountered error while downloading some bookmark(s):")
for _, id := range idWithProblems {
cError.Printf("%d ", id)
}
fmt.Println()
}
2019-05-22 17:13:52 +08:00
}