shiori/internal/cmd/import.go
Felipe Martin Garcia 05fee53bd0
fix: saving bookmarks inconsistencies (#500)
* chore: updated go-migrate dependencies

* fix: specify if we're saving bookmarks expecting a creation

up until now the SaveBookmarks method was doing some "magic" to do
"upserts" on the databases, but consistency between engines was scarce
and not knowing if we were expecting saving a new bookmark or updating
an existing one was leading to errors and inconsistencies in logic all
around the place. Now we need to specify a creation boolean when
saving and a differnt query will be make (INSERT vs UPDATE).

* fix(api): using incorrect bookmark for content downlaod

* test(db): added test pipeline for databases

Added functions that will share logic among the engines and will be
called on fresh databases on each test run

* dev: added basic docker-compose for development

* chore: uncommented tests

* ci(test): added mysql service

* typo

* test(mysql): select database after reset

* fix(mysql): ignore empty row errors when parsing tags

* fix(mysql): handle insert errors

* chore: added mysql variables to compose

* ci: explicit mysql service port exposed
2022-10-11 23:47:38 +02:00

166 lines
3.8 KiB
Go

package cmd
import (
"database/sql"
"errors"
"fmt"
"os"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/go-shiori/shiori/internal/core"
"github.com/go-shiori/shiori/internal/model"
"github.com/spf13/cobra"
)
func importCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "import source-file",
Short: "Import bookmarks from HTML file in Netscape Bookmark format",
Args: cobra.ExactArgs(1),
Run: importHandler,
}
cmd.Flags().BoolP("generate-tag", "t", false, "Auto generate tag from bookmark's category")
return cmd
}
func importHandler(cmd *cobra.Command, args []string) {
// Parse flags
generateTag := cmd.Flags().Changed("generate-tag")
// If user doesn't specify, ask if tag need to be generated
if !generateTag {
var submit string
fmt.Print("Add parents folder as tag? (y/N): ")
fmt.Scanln(&submit)
generateTag = submit == "y"
}
// Prepare bookmark's ID
bookID, err := db.CreateNewID(cmd.Context(), "bookmark")
if err != nil {
cError.Printf("Failed to create ID: %v\n", err)
os.Exit(1)
}
// Open bookmark's file
srcFile, err := os.Open(args[0])
if err != nil {
cError.Printf("Failed to open %s: %v\n", args[0], err)
os.Exit(1)
}
defer srcFile.Close()
// Parse bookmark's file
bookmarks := []model.Bookmark{}
mapURL := make(map[string]struct{})
doc, err := goquery.NewDocumentFromReader(srcFile)
if err != nil {
cError.Printf("Failed to parse bookmark: %v\n", err)
os.Exit(1)
}
doc.Find("dt>a").Each(func(_ int, a *goquery.Selection) {
// Get related elements
dt := a.Parent()
dl := dt.Parent()
h3 := dl.Parent().Find("h3").First()
// Get metadata
title := a.Text()
url, _ := a.Attr("href")
strTags, _ := a.Attr("tags")
dateStr, fieldExists := a.Attr("last_modified")
if !fieldExists {
dateStr, _ = a.Attr("add_date")
}
// Using now as default date in case no last_modified nor add_date are present
modifiedDate := time.Now()
if dateStr != "" {
modifiedTsInt, err := strconv.Atoi(dateStr)
if err != nil {
cError.Printf("Skip %s: date field is not valid: %s", url, err)
return
}
modifiedDate = time.Unix(int64(modifiedTsInt), 0)
}
// Clean up URL
url, err = core.RemoveUTMParams(url)
if err != nil {
cError.Printf("Skip %s: URL is not valid\n", url)
return
}
// Make sure title is valid Utf-8
title = validateTitle(title, url)
// Check if the URL already exist before, both in bookmark
// file or in database
if _, exist := mapURL[url]; exist {
cError.Printf("Skip %s: URL already exists\n", url)
return
}
_, exist, err := db.GetBookmark(cmd.Context(), 0, url)
if err != nil && !errors.Is(err, sql.ErrNoRows) {
cError.Printf("Skip %s: Get Bookmark fail, %v", url, err)
return
}
if exist {
cError.Printf("Skip %s: URL already exists\n", url)
mapURL[url] = struct{}{}
return
}
// Get bookmark tags
tags := []model.Tag{}
for _, strTag := range strings.Split(strTags, ",") {
strTag = normalizeSpace(strTag)
if strTag != "" {
tags = append(tags, model.Tag{Name: strTag})
}
}
// Get category name for this bookmark
// and add it as tags (if necessary)
category := normalizeSpace(h3.Text())
if category != "" && generateTag {
tags = append(tags, model.Tag{Name: category})
}
// Add item to list
bookmark := model.Bookmark{
ID: bookID,
URL: url,
Title: title,
Tags: tags,
Modified: modifiedDate.Format(model.DatabaseDateFormat),
}
bookID++
mapURL[url] = struct{}{}
bookmarks = append(bookmarks, bookmark)
})
// Save bookmark to database
bookmarks, err = db.SaveBookmarks(cmd.Context(), true, bookmarks...)
if err != nil {
cError.Printf("Failed to save bookmarks: %v\n", err)
os.Exit(1)
}
// Print imported bookmark
fmt.Println()
printBookmarks(bookmarks...)
}