Implement logic for import cmd

This commit is contained in:
Radhi Fadlillah 2019-05-23 09:22:47 +07:00
parent ed2b6b0740
commit 4ab1253f8c
6 changed files with 168 additions and 2 deletions

5
go.mod
View file

@ -3,6 +3,7 @@ module github.com/go-shiori/shiori
go 1.12
require (
github.com/PuerkitoBio/goquery v1.5.0
github.com/fatih/color v1.7.0
github.com/go-shiori/go-readability v0.0.0-20190522013032-128e0c654d14
github.com/go-sql-driver/mysql v1.4.1 // indirect
@ -13,6 +14,8 @@ require (
github.com/mattn/go-sqlite3 v1.10.0
github.com/sirupsen/logrus v1.4.2
github.com/spf13/cobra v0.0.4
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2
golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f
golang.org/x/net v0.0.0-20190522135303-fa69b94a3b58 // indirect
golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5 // indirect
google.golang.org/appengine v1.6.0 // indirect
)

14
go.sum
View file

@ -1,4 +1,8 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
@ -69,16 +73,26 @@ github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f h1:R423Cnkcp5JABoeemiGEPlt9tHXFfw5kvc0yqlxRPWo=
golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190520210107-018c4d40a106 h1:EZofHp/BzEf3j39/+7CX1JvH0WaPG+ikBrqAdAPf+GM=
golang.org/x/net v0.0.0-20190520210107-018c4d40a106/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190522135303-fa69b94a3b58 h1:AZ8FNE2w7DVDFDK6u/iC9/Mqh73UupjaqSd/2qMoECQ=
golang.org/x/net v0.0.0-20190522135303-fa69b94a3b58/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190520201301-c432e742b0af h1:NXfmMfXz6JqGfG3ikSxcz2N93j6DgScr19Oo2uwFu88=
golang.org/x/sys v0.0.0-20190520201301-c432e742b0af/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5 h1:f005F/Jl5JLP036x7QIvUVhNTqxvSYwFIiyOh2q12iU=
golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
google.golang.org/appengine v1.6.0 h1:Tfd7cKwKbFRsI8RMAD3oqqw7JPFRrvFlOsfbgVkjOOw=
google.golang.org/appengine v1.6.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=

View file

@ -1,15 +1,145 @@
package cmd
import "github.com/spf13/cobra"
import (
"fmt"
nurl "net/url"
"os"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/go-shiori/shiori/internal/model"
"github.com/spf13/cobra"
)
func importCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "import source-file",
Short: "Import bookmarks from HTML file in Netscape Bookmark format",
Args: cobra.ExactArgs(1),
Run: importHandler,
}
cmd.Flags().BoolP("generate-tag", "t", false, "Auto generate tag from bookmark's category")
return cmd
}
func importHandler(cmd *cobra.Command, args []string) {
// Parse flags
generateTag := cmd.Flags().Changed("generate-tag")
// If user doesn't specify, ask if tag need to be generated
if !generateTag {
var submit string
fmt.Print("Add parents folder as tag? (y/N): ")
fmt.Scanln(&submit)
generateTag = submit == "y"
}
// Prepare bookmark's ID
bookID, err := DB.CreateNewID("bookmark")
if err != nil {
cError.Printf("Failed to create ID: %v\n", err)
return
}
// Open bookmark's file
srcFile, err := os.Open(args[0])
if err != nil {
cError.Printf("Failed to open %s: %v\n", args[0], err)
return
}
defer srcFile.Close()
// Parse bookmark's file
bookmarks := []model.Bookmark{}
mapURL := make(map[string]struct{})
doc, err := goquery.NewDocumentFromReader(srcFile)
if err != nil {
cError.Printf("Failed to parse bookmark: %v\n", err)
return
}
doc.Find("dt>a").Each(func(_ int, a *goquery.Selection) {
// Get related elements
dt := a.Parent()
dl := dt.Parent()
h3 := dl.Parent().Find("h3").First()
// Get metadata
title := a.Text()
url, _ := a.Attr("href")
strTags, _ := a.Attr("tags")
strModified, _ := a.Attr("last_modified")
intModified, _ := strconv.ParseInt(strModified, 10, 64)
modified := time.Unix(intModified, 0)
// Clean up URL by removing its fragment and UTM parameters
tmp, err := nurl.Parse(url)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
cError.Printf("Skip %s: URL is not valid\n", url)
return
}
tmp.Fragment = ""
clearUTMParams(tmp)
url = tmp.String()
// Check if the URL already exist before, both in bookmark
// file or in database
if _, exist := mapURL[url]; exist {
cError.Printf("Skip %s: URL already exists\n", url)
return
}
if _, exist := DB.GetBookmark(0, url); exist {
cError.Printf("Skip %s: URL already exists\n", url)
mapURL[url] = struct{}{}
return
}
// Get bookmark tags
tags := []model.Tag{}
for _, strTag := range strings.Split(strTags, ",") {
strTag = normalizeSpace(strTag)
if strTag != "" {
tags = append(tags, model.Tag{Name: strTag})
}
}
// Get category name for this bookmark
// and add it as tags (if necessary)
category := normalizeSpace(h3.Text())
if category != "" && generateTag {
tags = append(tags, model.Tag{Name: category})
}
// Add item to list
bookmark := model.Bookmark{
ID: bookID,
URL: url,
Title: normalizeSpace(title),
Modified: modified.Format("2006-01-02 15:04:05"),
Tags: tags,
}
bookID++
mapURL[url] = struct{}{}
bookmarks = append(bookmarks, bookmark)
})
// Save bookmark to database
bookmarks, err = DB.SaveBookmarks(bookmarks...)
if err != nil {
cError.Printf("Failed to save bookmarks: %v\n", err)
return
}
// Print imported bookmark
fmt.Println()
printBookmarks(bookmarks...)
}

View file

@ -37,6 +37,7 @@ var (
)
func normalizeSpace(str string) string {
str = strings.TrimSpace(str)
return strings.Join(strings.Fields(str), " ")
}

View file

@ -26,6 +26,9 @@ type DB interface {
// DeleteBookmarks removes all record with matching ids from database.
DeleteBookmarks(ids ...int) error
// GetBookmark fetchs bookmark based on its ID or URL.
GetBookmark(id int, url string) (model.Bookmark, bool)
// CreateNewID creates new id for specified table.
CreateNewID(table string) (int, error)
}

View file

@ -338,6 +338,21 @@ func (db *SQLiteDatabase) DeleteBookmarks(ids ...int) (err error) {
return err
}
// GetBookmark fetchs bookmark based on its ID or URL.
// Returns the bookmark and boolean whether it's exist or not.
func (db *SQLiteDatabase) GetBookmark(id int, url string) (model.Bookmark, bool) {
book := model.Bookmark{}
db.Get(&book, `SELECT
b.id, b.url, b.title, b.excerpt, b.author, b.modified,
bc.content, bc.html, bc.content <> "" has_content
FROM bookmark b
LEFT JOIN bookmark_content bc ON bc.docid = b.id
WHERE b.id = ? OR b.url = ?`,
id, url)
return book, book.ID != 0
}
// CreateNewID creates new ID for specified table
func (db *SQLiteDatabase) CreateNewID(table string) (int, error) {
var tableID int