mirror of
https://github.com/go-shiori/shiori.git
synced 2025-01-16 04:48:30 +08:00
Refactor archiver
This commit is contained in:
parent
89cc8caa23
commit
94b59a29a7
3 changed files with 47 additions and 30 deletions
|
@ -96,11 +96,15 @@ func addHandler(cmd *cobra.Command, args []string) {
|
|||
|
||||
// Save as archive
|
||||
buffer := bytes.NewBuffer(nil)
|
||||
tee := io.TeeReader(resp.Body, buffer)
|
||||
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
archivePath := fp.Join(DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
||||
err = warc.FromReader(tee, url, contentType, archivePath)
|
||||
archivalRequest := warc.ArchivalRequest{
|
||||
URL: url,
|
||||
Reader: io.TeeReader(resp.Body, buffer),
|
||||
ContentType: resp.Header.Get("Content-Type"),
|
||||
}
|
||||
|
||||
err = warc.NewArchive(archivalRequest, archivePath)
|
||||
if err != nil {
|
||||
cError.Printf("Failed to create archive: %v\n", err)
|
||||
return
|
||||
|
|
|
@ -180,14 +180,18 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
|||
defer resp.Body.Close()
|
||||
|
||||
// Save as archive, make sure to delete the old one first
|
||||
buffer := bytes.NewBuffer(nil)
|
||||
|
||||
archivePath := fp.Join(DataDir, "archive", fmt.Sprintf("%d", book.ID))
|
||||
os.Remove(archivePath)
|
||||
|
||||
buffer := bytes.NewBuffer(nil)
|
||||
tee := io.TeeReader(resp.Body, buffer)
|
||||
archivalRequest := warc.ArchivalRequest{
|
||||
URL: book.URL,
|
||||
Reader: io.TeeReader(resp.Body, buffer),
|
||||
ContentType: resp.Header.Get("Content-Type"),
|
||||
}
|
||||
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
err = warc.FromReader(tee, book.URL, contentType, archivePath)
|
||||
err = warc.NewArchive(archivalRequest, archivePath)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
chMessage <- fmt.Errorf("Failed to create archive %s: %v", book.URL, err)
|
||||
|
|
|
@ -13,18 +13,40 @@ import (
|
|||
"go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
// FromReader create archive from the specified io.Reader.
|
||||
func FromReader(input io.Reader, url, contentType, dstPath string) error {
|
||||
// ArchivalRequest is request for archiving a web page,
|
||||
// either from URL or from an io.Reader.
|
||||
type ArchivalRequest struct {
|
||||
URL string
|
||||
Reader io.Reader
|
||||
ContentType string
|
||||
LogEnabled bool
|
||||
}
|
||||
|
||||
// NewArchive creates new archive based on submitted request,
|
||||
// then save it to specified path.
|
||||
func NewArchive(req ArchivalRequest, dstPath string) error {
|
||||
// Make sure URL is valid
|
||||
parsedURL, err := nurl.ParseRequestURI(url)
|
||||
parsedURL, err := nurl.ParseRequestURI(req.URL)
|
||||
if err != nil || parsedURL.Scheme == "" || parsedURL.Hostname() == "" {
|
||||
return fmt.Errorf("url %s is not valid", url)
|
||||
return fmt.Errorf("url %s is not valid", req.URL)
|
||||
}
|
||||
|
||||
// Generate resource URL
|
||||
res := archiver.ToResourceURL(url, parsedURL)
|
||||
res := archiver.ToResourceURL(req.URL, parsedURL)
|
||||
res.ArchivalURL = "archive-root"
|
||||
|
||||
// Download URL if needed
|
||||
if req.Reader == nil || req.ContentType == "" {
|
||||
resp, err := archiver.DownloadData(res.DownloadURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download %s: %v", req.URL, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
req.Reader = resp.Body
|
||||
req.ContentType = resp.Header.Get("Content-Type")
|
||||
}
|
||||
|
||||
// Create database for archive
|
||||
os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
|
||||
|
||||
|
@ -41,7 +63,7 @@ func FromReader(input io.Reader, url, contentType, dstPath string) error {
|
|||
ChWarnings: make(chan error),
|
||||
ChRequest: make(chan archiver.ResourceURL, 10),
|
||||
ResourceMap: make(map[string]struct{}),
|
||||
LogEnabled: true,
|
||||
LogEnabled: req.LogEnabled,
|
||||
}
|
||||
defer arc.Close()
|
||||
|
||||
|
@ -51,10 +73,10 @@ func FromReader(input io.Reader, url, contentType, dstPath string) error {
|
|||
var result archiver.ProcessResult
|
||||
var subResources []archiver.ResourceURL
|
||||
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
result, subResources, err = arc.ProcessHTMLFile(res, input)
|
||||
if strings.Contains(req.ContentType, "text/html") {
|
||||
result, subResources, err = arc.ProcessHTMLFile(res, req.Reader)
|
||||
} else {
|
||||
result, err = arc.ProcessOtherFile(res, input)
|
||||
result, err = arc.ProcessOtherFile(res, req.Reader)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
|
@ -67,7 +89,7 @@ func FromReader(input io.Reader, url, contentType, dstPath string) error {
|
|||
// Save content to storage
|
||||
arc.Logf(0, "Downloaded %s", res.DownloadURL)
|
||||
|
||||
result.ContentType = contentType
|
||||
result.ContentType = req.ContentType
|
||||
err = arc.SaveToStorage(result)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to save %s: %v", res.DownloadURL, err)
|
||||
|
@ -90,16 +112,3 @@ func FromReader(input io.Reader, url, contentType, dstPath string) error {
|
|||
arc.StartArchiver()
|
||||
return nil
|
||||
}
|
||||
|
||||
// FromURL create archive from the specified URL.
|
||||
func FromURL(url, dstPath string) error {
|
||||
// Download URL
|
||||
resp, err := archiver.DownloadData(url)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download %s: %v", url, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
return FromReader(resp.Body, url, contentType, dstPath)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue