shiori/internal/cmd/pocket.go

122 lines
2.6 KiB
Go
Raw Normal View History

2019-05-21 11:31:40 +08:00
package cmd
import (
2019-05-23 12:30:56 +08:00
"fmt"
nurl "net/url"
"os"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/go-shiori/shiori/internal/model"
2019-05-21 11:31:40 +08:00
"github.com/spf13/cobra"
)
func pocketCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "pocket source-file",
Short: "Import bookmarks from Pocket's exported HTML file",
Args: cobra.ExactArgs(1),
2019-05-23 12:30:56 +08:00
Run: pocketHandler,
2019-05-21 11:31:40 +08:00
}
return cmd
}
2019-05-23 12:30:56 +08:00
func pocketHandler(cmd *cobra.Command, args []string) {
// Prepare bookmark's ID
2019-08-09 11:19:43 +08:00
bookID, err := db.CreateNewID("bookmark")
2019-05-23 12:30:56 +08:00
if err != nil {
cError.Printf("Failed to create ID: %v\n", err)
return
}
// Open pocket's file
srcFile, err := os.Open(args[0])
if err != nil {
cError.Println(err)
return
}
defer srcFile.Close()
// Parse pocket's file
bookmarks := []model.Bookmark{}
mapURL := make(map[string]struct{})
doc, err := goquery.NewDocumentFromReader(srcFile)
if err != nil {
cError.Println(err)
return
}
doc.Find("a").Each(func(_ int, a *goquery.Selection) {
// Get metadata
title := a.Text()
url, _ := a.Attr("href")
strTags, _ := a.Attr("tags")
strModified, _ := a.Attr("time_added")
intModified, _ := strconv.ParseInt(strModified, 10, 64)
modified := time.Unix(intModified, 0)
// Clean up URL by removing its fragment and UTM parameters
tmp, err := nurl.Parse(url)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
cError.Printf("Skip %s: URL is not valid\n", url)
return
}
tmp.Fragment = ""
clearUTMParams(tmp)
url = tmp.String()
2019-08-10 09:13:13 +08:00
// Make sure title is valid Utf-8
title = toValidUtf8(title, url)
2019-05-23 12:30:56 +08:00
// Check if the URL already exist before, both in bookmark
// file or in database
if _, exist := mapURL[url]; exist {
cError.Printf("Skip %s: URL already exists\n", url)
return
}
2019-08-09 11:19:43 +08:00
if _, exist := db.GetBookmark(0, url); exist {
2019-05-23 12:30:56 +08:00
cError.Printf("Skip %s: URL already exists\n", url)
mapURL[url] = struct{}{}
return
}
// Get bookmark tags
tags := []model.Tag{}
for _, strTag := range strings.Split(strTags, ",") {
if strTag != "" {
tags = append(tags, model.Tag{Name: strTag})
}
}
// Add item to list
bookmark := model.Bookmark{
ID: bookID,
URL: url,
Title: normalizeSpace(title),
Modified: modified.Format("2006-01-02 15:04:05"),
Tags: tags,
}
bookID++
mapURL[url] = struct{}{}
bookmarks = append(bookmarks, bookmark)
})
// Save bookmark to database
2019-08-09 11:19:43 +08:00
bookmarks, err = db.SaveBookmarks(bookmarks...)
2019-05-23 12:30:56 +08:00
if err != nil {
cError.Printf("Failed to save bookmarks: %v\n", err)
return
}
// Print imported bookmark
fmt.Println()
printBookmarks(bookmarks...)
}