mirror of
https://github.com/go-shiori/shiori.git
synced 2025-10-09 13:17:28 +08:00
Refactor archiver
This commit is contained in:
parent
89cc8caa23
commit
94b59a29a7
3 changed files with 47 additions and 30 deletions
|
@ -96,11 +96,15 @@ func addHandler(cmd *cobra.Command, args []string) {
|
||||||
|
|
||||||
// Save as archive
|
// Save as archive
|
||||||
buffer := bytes.NewBuffer(nil)
|
buffer := bytes.NewBuffer(nil)
|
||||||
tee := io.TeeReader(resp.Body, buffer)
|
|
||||||
|
|
||||||
contentType := resp.Header.Get("Content-Type")
|
|
||||||
archivePath := fp.Join(DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
archivePath := fp.Join(DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
||||||
err = warc.FromReader(tee, url, contentType, archivePath)
|
archivalRequest := warc.ArchivalRequest{
|
||||||
|
URL: url,
|
||||||
|
Reader: io.TeeReader(resp.Body, buffer),
|
||||||
|
ContentType: resp.Header.Get("Content-Type"),
|
||||||
|
}
|
||||||
|
|
||||||
|
err = warc.NewArchive(archivalRequest, archivePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cError.Printf("Failed to create archive: %v\n", err)
|
cError.Printf("Failed to create archive: %v\n", err)
|
||||||
return
|
return
|
||||||
|
|
|
@ -180,14 +180,18 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
// Save as archive, make sure to delete the old one first
|
// Save as archive, make sure to delete the old one first
|
||||||
|
buffer := bytes.NewBuffer(nil)
|
||||||
|
|
||||||
archivePath := fp.Join(DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
archivePath := fp.Join(DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
||||||
os.Remove(archivePath)
|
os.Remove(archivePath)
|
||||||
|
|
||||||
buffer := bytes.NewBuffer(nil)
|
archivalRequest := warc.ArchivalRequest{
|
||||||
tee := io.TeeReader(resp.Body, buffer)
|
URL: book.URL,
|
||||||
|
Reader: io.TeeReader(resp.Body, buffer),
|
||||||
|
ContentType: resp.Header.Get("Content-Type"),
|
||||||
|
}
|
||||||
|
|
||||||
contentType := resp.Header.Get("Content-Type")
|
err = warc.NewArchive(archivalRequest, archivePath)
|
||||||
err = warc.FromReader(tee, book.URL, contentType, archivePath)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
chProblem <- book.ID
|
chProblem <- book.ID
|
||||||
chMessage <- fmt.Errorf("Failed to create archive %s: %v", book.URL, err)
|
chMessage <- fmt.Errorf("Failed to create archive %s: %v", book.URL, err)
|
||||||
|
|
|
@ -13,18 +13,40 @@ import (
|
||||||
"go.etcd.io/bbolt"
|
"go.etcd.io/bbolt"
|
||||||
)
|
)
|
||||||
|
|
||||||
// FromReader create archive from the specified io.Reader.
|
// ArchivalRequest is request for archiving a web page,
|
||||||
func FromReader(input io.Reader, url, contentType, dstPath string) error {
|
// either from URL or from an io.Reader.
|
||||||
|
type ArchivalRequest struct {
|
||||||
|
URL string
|
||||||
|
Reader io.Reader
|
||||||
|
ContentType string
|
||||||
|
LogEnabled bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewArchive creates new archive based on submitted request,
|
||||||
|
// then save it to specified path.
|
||||||
|
func NewArchive(req ArchivalRequest, dstPath string) error {
|
||||||
// Make sure URL is valid
|
// Make sure URL is valid
|
||||||
parsedURL, err := nurl.ParseRequestURI(url)
|
parsedURL, err := nurl.ParseRequestURI(req.URL)
|
||||||
if err != nil || parsedURL.Scheme == "" || parsedURL.Hostname() == "" {
|
if err != nil || parsedURL.Scheme == "" || parsedURL.Hostname() == "" {
|
||||||
return fmt.Errorf("url %s is not valid", url)
|
return fmt.Errorf("url %s is not valid", req.URL)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate resource URL
|
// Generate resource URL
|
||||||
res := archiver.ToResourceURL(url, parsedURL)
|
res := archiver.ToResourceURL(req.URL, parsedURL)
|
||||||
res.ArchivalURL = "archive-root"
|
res.ArchivalURL = "archive-root"
|
||||||
|
|
||||||
|
// Download URL if needed
|
||||||
|
if req.Reader == nil || req.ContentType == "" {
|
||||||
|
resp, err := archiver.DownloadData(res.DownloadURL)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to download %s: %v", req.URL, err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
req.Reader = resp.Body
|
||||||
|
req.ContentType = resp.Header.Get("Content-Type")
|
||||||
|
}
|
||||||
|
|
||||||
// Create database for archive
|
// Create database for archive
|
||||||
os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
|
os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
|
||||||
|
|
||||||
|
@ -41,7 +63,7 @@ func FromReader(input io.Reader, url, contentType, dstPath string) error {
|
||||||
ChWarnings: make(chan error),
|
ChWarnings: make(chan error),
|
||||||
ChRequest: make(chan archiver.ResourceURL, 10),
|
ChRequest: make(chan archiver.ResourceURL, 10),
|
||||||
ResourceMap: make(map[string]struct{}),
|
ResourceMap: make(map[string]struct{}),
|
||||||
LogEnabled: true,
|
LogEnabled: req.LogEnabled,
|
||||||
}
|
}
|
||||||
defer arc.Close()
|
defer arc.Close()
|
||||||
|
|
||||||
|
@ -51,10 +73,10 @@ func FromReader(input io.Reader, url, contentType, dstPath string) error {
|
||||||
var result archiver.ProcessResult
|
var result archiver.ProcessResult
|
||||||
var subResources []archiver.ResourceURL
|
var subResources []archiver.ResourceURL
|
||||||
|
|
||||||
if strings.Contains(contentType, "text/html") {
|
if strings.Contains(req.ContentType, "text/html") {
|
||||||
result, subResources, err = arc.ProcessHTMLFile(res, input)
|
result, subResources, err = arc.ProcessHTMLFile(res, req.Reader)
|
||||||
} else {
|
} else {
|
||||||
result, err = arc.ProcessOtherFile(res, input)
|
result, err = arc.ProcessOtherFile(res, req.Reader)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -67,7 +89,7 @@ func FromReader(input io.Reader, url, contentType, dstPath string) error {
|
||||||
// Save content to storage
|
// Save content to storage
|
||||||
arc.Logf(0, "Downloaded %s", res.DownloadURL)
|
arc.Logf(0, "Downloaded %s", res.DownloadURL)
|
||||||
|
|
||||||
result.ContentType = contentType
|
result.ContentType = req.ContentType
|
||||||
err = arc.SaveToStorage(result)
|
err = arc.SaveToStorage(result)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to save %s: %v", res.DownloadURL, err)
|
return fmt.Errorf("failed to save %s: %v", res.DownloadURL, err)
|
||||||
|
@ -90,16 +112,3 @@ func FromReader(input io.Reader, url, contentType, dstPath string) error {
|
||||||
arc.StartArchiver()
|
arc.StartArchiver()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// FromURL create archive from the specified URL.
|
|
||||||
func FromURL(url, dstPath string) error {
|
|
||||||
// Download URL
|
|
||||||
resp, err := archiver.DownloadData(url)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to download %s: %v", url, err)
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
contentType := resp.Header.Get("Content-Type")
|
|
||||||
return FromReader(resp.Body, url, contentType, dstPath)
|
|
||||||
}
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue