fix: parse pocket new CSV format (#1112)

* fix pocket parsing error

Signed-off-by: bakito <github@bakito.ch>

* add tests forpocket csv

Signed-off-by: bakito <github@bakito.ch>

* Use file name from test case

* fix lint ant test issues

Signed-off-by: bakito <github@bakito.ch>

---------

Signed-off-by: bakito <github@bakito.ch>
Co-authored-by: Felipe Martin <812088+fmartingr@users.noreply.github.com>
This commit is contained in:
Marc Brugger 2025-06-27 13:35:15 +02:00 committed by GitHub
parent d86c9cc650
commit 3091d844c0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 88 additions and 5 deletions

View file

@ -10,7 +10,6 @@ import (
"regexp"
"slices"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
@ -123,19 +122,23 @@ func parseCsvExport(ctx context.Context, db model.DB, srcFile *os.File) []model.
os.Exit(1)
}
var titleIdx, urlIdx, timeAddedIdx, tagsIdx int
for i, cols := range records {
// Check and skip header
if i == 0 {
expected := []string{"title", "url", "time_added", "cursor", "tags", "status"}
if slices.Compare(cols, expected) != 0 {
cError.Printf("Invalid CSV format. Header must be: %s\n", strings.Join(expected, ","))
titleIdx = slices.Index(cols, "title")
urlIdx = slices.Index(cols, "url")
timeAddedIdx = slices.Index(cols, "time_added")
tagsIdx = slices.Index(cols, "tags")
if titleIdx == -1 || urlIdx == -1 || timeAddedIdx == -1 || tagsIdx == -1 {
cError.Printf("Invalid CSV format. Header must contain: title, url, time_added, tags\n")
os.Exit(1)
}
continue
}
// Get metadata
title, url, timeAdded, tags, err := verifyMetadata(cols[0], cols[1], cols[2], cols[4])
title, url, timeAdded, tags, err := verifyMetadata(cols[titleIdx], cols[urlIdx], cols[timeAddedIdx], cols[tagsIdx])
if err != nil {
cError.Printf("Skip %s: %v\n", url, err)
continue

View file

@ -0,0 +1,76 @@
package cmd
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/go-shiori/shiori/internal/database"
)
func Test_parseCsvExport_old_format(t *testing.T) {
tests := []struct {
name string
fileName string
}{
{
name: "Test old file format",
fileName: "pocket-old.csv",
},
{
name: "Test new file format",
fileName: "pocket-new.csv",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
file, err := os.Open("../../testdata/" + tt.fileName)
if err != nil {
t.Error(err.Error())
}
defer file.Close()
ctx := context.TODO()
tmpDir, err := os.MkdirTemp("", "shiori-test-*")
if err != nil {
t.Fatalf("failed to create temp dir: %v", err)
}
defer os.RemoveAll(tmpDir)
dbPath := filepath.Join(tmpDir, "shiori.db")
db, err := database.OpenSQLiteDatabase(ctx, dbPath)
if err != nil {
t.Fatalf("failed to open sqlite database: %v", err)
}
if err := db.Migrate(ctx); err != nil {
t.Fatalf("failed to migrate sqlite database: %v", err)
}
bookmarks := parseCsvExport(ctx, db, file)
if len(bookmarks) != 1 {
t.Errorf("Expected 1 bookmarks, got %d", len(bookmarks))
}
bm := bookmarks[0]
if bm.Title != "Shiori" {
t.Errorf("Expected Title Shiori got %s", bm.URL)
}
if bm.URL != "https://github.com/go-shiori/shiori" {
t.Errorf("Expected URL https://github.com/go-shiori/shiori, got %s", bm.URL)
}
if len(bm.Tags) != 1 {
t.Errorf("Expected 1 tags, got %d", len(bm.Tags))
}
if bm.Tags[0].Name != "shiori" {
t.Errorf("Expected tag shiori, got %s", bm.Tags[0].Name)
}
if bm.CreatedAt == "" {
t.Error("Expected CreatedAt to be not empty")
}
if bm.ModifiedAt == "" {
t.Error("Expected CreatedAt to be not empty")
}
})
}
}

2
testdata/pocket-new.csv vendored Normal file
View file

@ -0,0 +1,2 @@
title,url,time_added,tags,status
Shiori,https://github.com/go-shiori/shiori,1541343937,shiori,unread
1 title url time_added tags status
2 Shiori https://github.com/go-shiori/shiori 1541343937 shiori unread

2
testdata/pocket-old.csv vendored Normal file
View file

@ -0,0 +1,2 @@
title,url,time_added,cursor,tags,status
Shiori,https://github.com/go-shiori/shiori,1541343937,,shiori,unread
1 title url time_added cursor tags status
2 Shiori https://github.com/go-shiori/shiori 1541343937 shiori unread