diff --git a/go.mod b/go.mod index 2c817cc..e2eb630 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/go-shiori/shiori go 1.12 require ( + github.com/PuerkitoBio/goquery v1.5.0 github.com/fatih/color v1.7.0 github.com/go-shiori/go-readability v0.0.0-20190522013032-128e0c654d14 github.com/go-sql-driver/mysql v1.4.1 // indirect @@ -13,6 +14,8 @@ require ( github.com/mattn/go-sqlite3 v1.10.0 github.com/sirupsen/logrus v1.4.2 github.com/spf13/cobra v0.0.4 - golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 + golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f + golang.org/x/net v0.0.0-20190522135303-fa69b94a3b58 // indirect + golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5 // indirect google.golang.org/appengine v1.6.0 // indirect ) diff --git a/go.sum b/go.sum index 03591ce..9b8fd81 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= +github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= +github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= +github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= @@ -69,16 +73,26 @@ github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1: golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f h1:R423Cnkcp5JABoeemiGEPlt9tHXFfw5kvc0yqlxRPWo= +golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190520210107-018c4d40a106 h1:EZofHp/BzEf3j39/+7CX1JvH0WaPG+ikBrqAdAPf+GM= golang.org/x/net v0.0.0-20190520210107-018c4d40a106/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190522135303-fa69b94a3b58 h1:AZ8FNE2w7DVDFDK6u/iC9/Mqh73UupjaqSd/2qMoECQ= +golang.org/x/net v0.0.0-20190522135303-fa69b94a3b58/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190520201301-c432e742b0af h1:NXfmMfXz6JqGfG3ikSxcz2N93j6DgScr19Oo2uwFu88= golang.org/x/sys v0.0.0-20190520201301-c432e742b0af/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5 h1:f005F/Jl5JLP036x7QIvUVhNTqxvSYwFIiyOh2q12iU= +golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= google.golang.org/appengine v1.6.0 h1:Tfd7cKwKbFRsI8RMAD3oqqw7JPFRrvFlOsfbgVkjOOw= google.golang.org/appengine v1.6.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= diff --git a/internal/cmd/import.go b/internal/cmd/import.go index 8824a01..11d9675 100644 --- a/internal/cmd/import.go +++ b/internal/cmd/import.go @@ -1,15 +1,145 @@ package cmd -import "github.com/spf13/cobra" +import ( + "fmt" + nurl "net/url" + "os" + "strconv" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + "github.com/go-shiori/shiori/internal/model" + "github.com/spf13/cobra" +) func importCmd() *cobra.Command { cmd := &cobra.Command{ Use: "import source-file", Short: "Import bookmarks from HTML file in Netscape Bookmark format", Args: cobra.ExactArgs(1), + Run: importHandler, } cmd.Flags().BoolP("generate-tag", "t", false, "Auto generate tag from bookmark's category") return cmd } + +func importHandler(cmd *cobra.Command, args []string) { + // Parse flags + generateTag := cmd.Flags().Changed("generate-tag") + + // If user doesn't specify, ask if tag need to be generated + if !generateTag { + var submit string + fmt.Print("Add parents folder as tag? (y/N): ") + fmt.Scanln(&submit) + + generateTag = submit == "y" + } + + // Prepare bookmark's ID + bookID, err := DB.CreateNewID("bookmark") + if err != nil { + cError.Printf("Failed to create ID: %v\n", err) + return + } + + // Open bookmark's file + srcFile, err := os.Open(args[0]) + if err != nil { + cError.Printf("Failed to open %s: %v\n", args[0], err) + return + } + defer srcFile.Close() + + // Parse bookmark's file + bookmarks := []model.Bookmark{} + mapURL := make(map[string]struct{}) + + doc, err := goquery.NewDocumentFromReader(srcFile) + if err != nil { + cError.Printf("Failed to parse bookmark: %v\n", err) + return + } + + doc.Find("dt>a").Each(func(_ int, a *goquery.Selection) { + // Get related elements + dt := a.Parent() + dl := dt.Parent() + h3 := dl.Parent().Find("h3").First() + + // Get metadata + title := a.Text() + url, _ := a.Attr("href") + strTags, _ := a.Attr("tags") + strModified, _ := a.Attr("last_modified") + intModified, _ := strconv.ParseInt(strModified, 10, 64) + modified := time.Unix(intModified, 0) + + // Clean up URL by removing its fragment and UTM parameters + tmp, err := nurl.Parse(url) + if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" { + cError.Printf("Skip %s: URL is not valid\n", url) + return + } + + tmp.Fragment = "" + clearUTMParams(tmp) + url = tmp.String() + + // Check if the URL already exist before, both in bookmark + // file or in database + if _, exist := mapURL[url]; exist { + cError.Printf("Skip %s: URL already exists\n", url) + return + } + + if _, exist := DB.GetBookmark(0, url); exist { + cError.Printf("Skip %s: URL already exists\n", url) + mapURL[url] = struct{}{} + return + } + + // Get bookmark tags + tags := []model.Tag{} + for _, strTag := range strings.Split(strTags, ",") { + strTag = normalizeSpace(strTag) + if strTag != "" { + tags = append(tags, model.Tag{Name: strTag}) + } + } + + // Get category name for this bookmark + // and add it as tags (if necessary) + category := normalizeSpace(h3.Text()) + if category != "" && generateTag { + tags = append(tags, model.Tag{Name: category}) + } + + // Add item to list + bookmark := model.Bookmark{ + ID: bookID, + URL: url, + Title: normalizeSpace(title), + Modified: modified.Format("2006-01-02 15:04:05"), + Tags: tags, + } + + bookID++ + mapURL[url] = struct{}{} + bookmarks = append(bookmarks, bookmark) + }) + + // Save bookmark to database + bookmarks, err = DB.SaveBookmarks(bookmarks...) + if err != nil { + cError.Printf("Failed to save bookmarks: %v\n", err) + return + } + + // Print imported bookmark + fmt.Println() + printBookmarks(bookmarks...) +} diff --git a/internal/cmd/utils.go b/internal/cmd/utils.go index cb40e9d..d7bba09 100644 --- a/internal/cmd/utils.go +++ b/internal/cmd/utils.go @@ -37,6 +37,7 @@ var ( ) func normalizeSpace(str string) string { + str = strings.TrimSpace(str) return strings.Join(strings.Fields(str), " ") } diff --git a/internal/database/database.go b/internal/database/database.go index 62ea301..ac79fc1 100644 --- a/internal/database/database.go +++ b/internal/database/database.go @@ -26,6 +26,9 @@ type DB interface { // DeleteBookmarks removes all record with matching ids from database. DeleteBookmarks(ids ...int) error + // GetBookmark fetchs bookmark based on its ID or URL. + GetBookmark(id int, url string) (model.Bookmark, bool) + // CreateNewID creates new id for specified table. CreateNewID(table string) (int, error) } diff --git a/internal/database/sqlite.go b/internal/database/sqlite.go index b8ef0d1..2791508 100644 --- a/internal/database/sqlite.go +++ b/internal/database/sqlite.go @@ -338,6 +338,21 @@ func (db *SQLiteDatabase) DeleteBookmarks(ids ...int) (err error) { return err } +// GetBookmark fetchs bookmark based on its ID or URL. +// Returns the bookmark and boolean whether it's exist or not. +func (db *SQLiteDatabase) GetBookmark(id int, url string) (model.Bookmark, bool) { + book := model.Bookmark{} + db.Get(&book, `SELECT + b.id, b.url, b.title, b.excerpt, b.author, b.modified, + bc.content, bc.html, bc.content <> "" has_content + FROM bookmark b + LEFT JOIN bookmark_content bc ON bc.docid = b.id + WHERE b.id = ? OR b.url = ?`, + id, url) + + return book, book.ID != 0 +} + // CreateNewID creates new ID for specified table func (db *SQLiteDatabase) CreateNewID(table string) (int, error) { var tableID int