Now cache image will uses archive

This commit is contained in:
Radhi Fadlillah 2019-08-12 09:02:59 +07:00
parent 4e76288e09
commit 2cb95c1238
5 changed files with 119 additions and 5 deletions

View file

@ -152,6 +152,7 @@ export default {
var value = field.value;
if (field.type === 'number') value = parseInt(value, 10) || 0;
else if (field.type === 'float') value = parseFloat(value) || 0.0;
else if (field.type === 'check') value = value !== '';
data[field.name] = value;
})

File diff suppressed because one or more lines are too long

View file

@ -98,10 +98,72 @@ func (h *handler) serveBookmarkContent(w http.ResponseWriter, r *http.Request, p
}
}
// Check if it has archive
// Check if it has archive.
archivePath := fp.Join(h.DataDir, "archive", strID)
if fileExists(archivePath) {
bookmark.HasArchive = true
// Open archive, look in cache first
var archive *warc.Archive
cacheData, found := h.ArchiveCache.Get(strID)
if found {
archive = cacheData.(*warc.Archive)
} else {
archivePath := fp.Join(h.DataDir, "archive", strID)
archive, err = warc.Open(archivePath)
checkError(err)
h.ArchiveCache.Set(strID, archive, 0)
}
// Find all image and convert its source to use the archive URL.
createArchivalURL := func(archivalName string) string {
archivalURL := *r.URL
archivalURL.Path = path.Join("/", "bookmark", strID, "archive", archivalName)
return archivalURL.String()
}
buffer := strings.NewReader(bookmark.HTML)
doc, err := goquery.NewDocumentFromReader(buffer)
checkError(err)
doc.Find("img, picture, figure, source").Each(func(_ int, node *goquery.Selection) {
// Get the needed attributes
src, _ := node.Attr("src")
strSrcSets, _ := node.Attr("srcset")
// Convert `src` attributes
if src != "" {
archivalName := getArchivalName(src)
if archivalName != "" && archive.HasResource(archivalName) {
node.SetAttr("src", createArchivalURL(archivalName))
}
}
// Split srcset by comma, then process it like any URLs
srcSets := strings.Split(strSrcSets, ",")
for i, srcSet := range srcSets {
srcSet = strings.TrimSpace(srcSet)
parts := strings.SplitN(srcSet, " ", 2)
if parts[0] == "" {
continue
}
archivalName := getArchivalName(parts[0])
if archivalName != "" && archive.HasResource(archivalName) {
archivalURL := createArchivalURL(archivalName)
srcSets[i] = strings.Replace(srcSets[i], parts[0], archivalURL, 1)
}
}
if len(srcSets) > 0 {
node.SetAttr("srcset", strings.Join(srcSets, ","))
}
})
bookmark.HTML, err = goquery.OuterHtml(doc.Selection)
checkError(err)
}
// Create template

View file

@ -16,6 +16,7 @@ import (
nurl "net/url"
"os"
fp "path/filepath"
"regexp"
"strings"
"syscall"
"time"
@ -23,6 +24,8 @@ import (
"github.com/disintegration/imaging"
)
var rxRepeatedStrip = regexp.MustCompile(`(?i)-+`)
func serveFile(w http.ResponseWriter, filePath string, cache bool) error {
// Open file
src, err := assets.Open(filePath)
@ -184,6 +187,37 @@ func createTemplate(filename string, funcMap template.FuncMap) (*template.Templa
return template.New(filename).Delims("$$", "$$").Funcs(funcMap).Parse(string(srcContent))
}
// getArchivalName converts an URL into an archival name.
func getArchivalName(src string) string {
archivalURL := src
// Some URL have its query or path escaped, e.g. Wikipedia and Dev.to.
// For example, Wikipedia's stylesheet looks like this :
// load.php?lang=en&modules=ext.3d.styles%7Cext.cite.styles%7Cext.uls.interlanguage
// However, when browser download it, it will be registered as unescaped query :
// load.php?lang=en&modules=ext.3d.styles|ext.cite.styles|ext.uls.interlanguage
// So, for archival URL, we need to unescape the query and path first.
tmp, err := nurl.Parse(src)
if err == nil {
unescapedQuery, _ := nurl.QueryUnescape(tmp.RawQuery)
if unescapedQuery != "" {
tmp.RawQuery = unescapedQuery
}
archivalURL = tmp.String()
archivalURL = strings.Replace(archivalURL, tmp.EscapedPath(), tmp.Path, 1)
}
archivalURL = strings.ReplaceAll(archivalURL, "://", "/")
archivalURL = strings.ReplaceAll(archivalURL, "?", "-")
archivalURL = strings.ReplaceAll(archivalURL, "#", "-")
archivalURL = strings.ReplaceAll(archivalURL, "/", "-")
archivalURL = strings.ReplaceAll(archivalURL, " ", "-")
archivalURL = rxRepeatedStrip.ReplaceAllString(archivalURL, "-")
return archivalURL
}
func checkError(err error) {
if err == nil {
return

View file

@ -74,3 +74,20 @@ func (arc *Archive) Read(name string) ([]byte, string, error) {
return content, strContentType, nil
}
// HasResource checks if the resource exists in archive.
func (arc *Archive) HasResource(name string) bool {
// Make sure name exists
if name == "" {
name = "archive-root"
}
var exists bool
arc.db.View(func(tx *bbolt.Tx) error {
bucket := tx.Bucket([]byte(name))
exists = bucket != nil
return nil
})
return exists
}