2019-05-21 11:31:40 +08:00
package cmd
2019-05-22 17:13:52 +08:00
import (
2019-06-09 15:54:07 +08:00
"bytes"
2019-05-22 17:13:52 +08:00
"fmt"
2019-06-09 15:54:07 +08:00
"io"
"net/http"
2019-05-22 17:13:52 +08:00
nurl "net/url"
2019-06-09 17:59:03 +08:00
"os"
2019-05-22 17:13:52 +08:00
fp "path/filepath"
2019-05-24 18:13:19 +08:00
"sort"
2019-05-22 17:13:52 +08:00
"strings"
"sync"
"time"
"github.com/go-shiori/go-readability"
"github.com/go-shiori/shiori/internal/database"
"github.com/go-shiori/shiori/internal/model"
2019-06-09 15:54:07 +08:00
"github.com/go-shiori/shiori/pkg/warc"
2019-05-22 17:13:52 +08:00
"github.com/spf13/cobra"
)
2019-05-21 11:31:40 +08:00
func updateCmd ( ) * cobra . Command {
cmd := & cobra . Command {
Use : "update [indices]" ,
Short : "Update the saved bookmarks" ,
Long : "Update fields of an existing bookmark. " +
"Accepts space-separated list of indices (e.g. 5 6 23 4 110 45), " +
"hyphenated range (e.g. 100-200) or both (e.g. 1-3 7 9). " +
"If no arguments, ALL bookmarks will be updated. Update works differently depending on the flags:\n" +
"- If indices are passed without any flags (--url, --title, --tag and --excerpt), read the URLs from DB and update titles from web.\n" +
"- If --url is passed (and --title is omitted), update the title from web using the URL. While using this flag, update only accept EXACTLY one index.\n" +
"While updating bookmark's tags, you can use - to remove tag (e.g. -nature to remove nature tag from this bookmark)." ,
2019-05-22 17:13:52 +08:00
Run : updateHandler ,
2019-05-21 11:31:40 +08:00
}
cmd . Flags ( ) . StringP ( "url" , "u" , "" , "New URL for this bookmark." )
cmd . Flags ( ) . StringP ( "title" , "i" , "" , "New title for this bookmark." )
cmd . Flags ( ) . StringP ( "excerpt" , "e" , "" , "New excerpt for this bookmark." )
cmd . Flags ( ) . StringSliceP ( "tags" , "t" , [ ] string { } , "Comma-separated tags for this bookmark." )
cmd . Flags ( ) . BoolP ( "offline" , "o" , false , "Update bookmark without fetching data from internet." )
cmd . Flags ( ) . BoolP ( "yes" , "y" , false , "Skip confirmation prompt and update ALL bookmarks" )
cmd . Flags ( ) . Bool ( "dont-overwrite" , false , "Don't overwrite existing metadata. Useful when only want to update bookmark's content." )
return cmd
}
2019-05-22 17:13:52 +08:00
func updateHandler ( cmd * cobra . Command , args [ ] string ) {
// Parse flags
url , _ := cmd . Flags ( ) . GetString ( "url" )
title , _ := cmd . Flags ( ) . GetString ( "title" )
excerpt , _ := cmd . Flags ( ) . GetString ( "excerpt" )
tags , _ := cmd . Flags ( ) . GetStringSlice ( "tags" )
offline , _ := cmd . Flags ( ) . GetBool ( "offline" )
skipConfirm , _ := cmd . Flags ( ) . GetBool ( "yes" )
dontOverwrite := cmd . Flags ( ) . Changed ( "dont-overwrite" )
// If no arguments (i.e all bookmarks going to be updated), confirm to user
if len ( args ) == 0 && ! skipConfirm {
confirmUpdate := ""
fmt . Print ( "Update ALL bookmarks? (y/N): " )
fmt . Scanln ( & confirmUpdate )
if confirmUpdate != "y" {
fmt . Println ( "No bookmarks updated" )
return
}
}
// Convert args to ids
ids , err := parseStrIndices ( args )
if err != nil {
cError . Printf ( "Failed to parse args: %v\n" , err )
return
}
// Clean up new parameter from flags
title = normalizeSpace ( title )
excerpt = normalizeSpace ( excerpt )
if cmd . Flags ( ) . Changed ( "url" ) {
// Clean up URL by removing its fragment and UTM parameters
tmp , err := nurl . Parse ( url )
if err != nil || tmp . Scheme == "" || tmp . Hostname ( ) == "" {
cError . Println ( "URL is not valid" )
return
}
tmp . Fragment = ""
clearUTMParams ( tmp )
url = tmp . String ( )
// Since user uses custom URL, make sure there is only one ID to update
if len ( ids ) != 1 {
cError . Println ( "Update only accepts one index while using --url flag" )
return
}
}
// Fetch bookmarks from database
filterOptions := database . GetBookmarksOptions {
IDs : ids ,
}
bookmarks , err := DB . GetBookmarks ( filterOptions )
if err != nil {
cError . Printf ( "Failed to get bookmarks: %v\n" , err )
return
}
if len ( bookmarks ) == 0 {
cError . Println ( "No matching index found" )
return
}
// If it's not offline mode, fetch data from internet
2019-05-24 18:13:19 +08:00
idWithProblems := [ ] int { }
2019-05-22 17:13:52 +08:00
if ! offline {
mx := sync . RWMutex { }
wg := sync . WaitGroup { }
2019-05-24 00:56:38 +08:00
chDone := make ( chan struct { } )
2019-05-24 18:13:19 +08:00
chProblem := make ( chan int , 10 )
2019-05-24 00:56:38 +08:00
chMessage := make ( chan interface { } , 10 )
2019-05-22 17:13:52 +08:00
semaphore := make ( chan struct { } , 10 )
2019-05-24 00:56:38 +08:00
cInfo . Println ( "Downloading article(s)..." )
2019-05-22 17:13:52 +08:00
for i , book := range bookmarks {
wg . Add ( 1 )
// If used, use submitted URL
if url != "" {
book . URL = url
}
2019-05-24 00:56:38 +08:00
go func ( i int , book model . Bookmark ) {
2019-05-22 17:13:52 +08:00
// Make sure to finish the WG
defer wg . Done ( )
// Register goroutine to semaphore
semaphore <- struct { } { }
defer func ( ) {
<- semaphore
} ( )
2019-06-09 15:54:07 +08:00
// Prepare request
req , err := http . NewRequest ( "GET" , book . URL , nil )
if err != nil {
chProblem <- book . ID
chMessage <- fmt . Errorf ( "Failed to download %s: %v" , book . URL , err )
return
}
// Send request
req . Header . Set ( "User-Agent" , "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)" )
resp , err := httpClient . Do ( req )
2019-05-24 00:56:38 +08:00
if err != nil {
2019-05-24 18:13:19 +08:00
chProblem <- book . ID
2019-05-24 00:56:38 +08:00
chMessage <- fmt . Errorf ( "Failed to download %s: %v" , book . URL , err )
return
}
defer resp . Body . Close ( )
2019-05-22 17:13:52 +08:00
2019-06-09 17:59:03 +08:00
// Save as archive, make sure to delete the old one first
archivePath := fp . Join ( DataDir , "archive" , fmt . Sprintf ( "%d" , book . ID ) )
os . Remove ( archivePath )
2019-06-09 15:54:07 +08:00
buffer := bytes . NewBuffer ( nil )
tee := io . TeeReader ( resp . Body , buffer )
contentType := resp . Header . Get ( "Content-Type" )
err = warc . FromReader ( tee , book . URL , contentType , archivePath )
if err != nil {
chProblem <- book . ID
chMessage <- fmt . Errorf ( "Failed to create archive %s: %v" , book . URL , err )
return
}
// Parse article
article , err := readability . FromReader ( buffer , book . URL )
2019-05-22 17:13:52 +08:00
if err != nil {
2019-05-24 18:13:19 +08:00
chProblem <- book . ID
2019-05-24 00:56:38 +08:00
chMessage <- fmt . Errorf ( "Failed to parse %s: %v" , book . URL , err )
2019-05-22 17:13:52 +08:00
return
}
book . Author = article . Byline
book . Content = article . TextContent
book . HTML = article . Content
if ! dontOverwrite {
book . Title = article . Title
book . Excerpt = article . Excerpt
}
2019-05-24 14:25:29 +08:00
// Get image for thumbnail and save it to local disk
var imageURLs [ ] string
2019-05-22 17:13:52 +08:00
if article . Image != "" {
2019-05-24 14:25:29 +08:00
imageURLs = append ( imageURLs , article . Image )
2019-05-22 17:13:52 +08:00
}
2019-05-24 14:25:29 +08:00
if article . Favicon != "" {
imageURLs = append ( imageURLs , article . Favicon )
}
imgPath := fp . Join ( DataDir , "thumb" , fmt . Sprintf ( "%d" , book . ID ) )
for _ , imageURL := range imageURLs {
err = downloadBookImage ( imageURL , imgPath , time . Minute )
if err == nil {
break
}
2019-05-22 17:13:52 +08:00
}
2019-05-24 00:56:38 +08:00
// Send success message
chMessage <- fmt . Sprintf ( "Downloaded %s" , book . URL )
2019-05-22 17:13:52 +08:00
// Save parse result to bookmark
mx . Lock ( )
bookmarks [ i ] = book
mx . Unlock ( )
2019-05-24 00:56:38 +08:00
} ( i , book )
2019-05-22 17:13:52 +08:00
}
2019-05-24 00:56:38 +08:00
// Print log message
go func ( nBookmark int ) {
logIndex := 0
for {
select {
case <- chDone :
cInfo . Println ( "Download finished" )
return
2019-05-24 18:13:19 +08:00
case id := <- chProblem :
idWithProblems = append ( idWithProblems , id )
2019-05-24 00:56:38 +08:00
case msg := <- chMessage :
logIndex ++
switch msg . ( type ) {
case error :
cError . Printf ( "[%d/%d] %v\n" , logIndex , nBookmark , msg )
case string :
cInfo . Printf ( "[%d/%d] %s\n" , logIndex , nBookmark , msg )
}
}
}
} ( len ( bookmarks ) )
2019-05-22 17:13:52 +08:00
// Wait until all download finished
wg . Wait ( )
2019-05-24 00:56:38 +08:00
close ( chDone )
2019-05-22 17:13:52 +08:00
}
// Map which tags is new or deleted from flag --tags
addedTags := make ( map [ string ] struct { } )
deletedTags := make ( map [ string ] struct { } )
for _ , tag := range tags {
tagName := strings . ToLower ( tag )
tagName = strings . TrimSpace ( tagName )
if strings . HasPrefix ( tagName , "-" ) {
tagName = strings . TrimPrefix ( tagName , "-" )
deletedTags [ tagName ] = struct { } { }
} else {
addedTags [ tagName ] = struct { } { }
}
}
// Attach user submitted value to the bookmarks
for i , book := range bookmarks {
// If user submit his own title or excerpt, use it
if title != "" {
book . Title = title
}
if excerpt != "" {
book . Excerpt = excerpt
}
// Make sure title is not empty
if book . Title == "" {
book . Title = book . URL
}
// Generate new tags
tmpAddedTags := make ( map [ string ] struct { } )
for key , value := range addedTags {
tmpAddedTags [ key ] = value
}
newTags := [ ] model . Tag { }
for _ , tag := range book . Tags {
if _ , isDeleted := deletedTags [ tag . Name ] ; isDeleted {
tag . Deleted = true
}
if _ , alreadyExist := addedTags [ tag . Name ] ; alreadyExist {
delete ( tmpAddedTags , tag . Name )
}
newTags = append ( newTags , tag )
}
for tag := range tmpAddedTags {
newTags = append ( newTags , model . Tag { Name : tag } )
}
book . Tags = newTags
// Set bookmark's new data
bookmarks [ i ] = book
}
// Save bookmarks to database
bookmarks , err = DB . SaveBookmarks ( bookmarks ... )
if err != nil {
cError . Printf ( "Failed to save bookmark: %v\n" , err )
return
}
// Print updated bookmarks
fmt . Println ( )
printBookmarks ( bookmarks ... )
2019-05-24 18:13:19 +08:00
if len ( idWithProblems ) > 0 {
sort . Ints ( idWithProblems )
cError . Println ( "Encountered error while downloading some bookmark(s):" )
for _ , id := range idWithProblems {
cError . Printf ( "%d " , id )
}
fmt . Println ( )
}
2019-05-22 17:13:52 +08:00
}