mirror of
https://github.com/go-shiori/shiori.git
synced 2025-01-16 12:57:58 +08:00
Fix: unescaped archive URL can't be opened
This commit is contained in:
parent
7137c0693a
commit
598ea9476d
3 changed files with 26 additions and 13 deletions
|
@ -133,9 +133,11 @@ func (arc *Archiver) archive(res ResourceURL) {
|
|||
|
||||
// Save content to storage
|
||||
arc.Logf(infoLog, "Downloaded %s\n"+
|
||||
"\tArchive name %s\n"+
|
||||
"\tParent %s\n"+
|
||||
"\tSize %d Bytes\n",
|
||||
res.DownloadURL,
|
||||
res.ArchivalURL,
|
||||
res.Parent,
|
||||
resp.ContentLength)
|
||||
|
||||
|
|
|
@ -27,8 +27,6 @@ var (
|
|||
rxLazyImageSrcset = regexp.MustCompile(`(?i)\.(jpg|jpeg|png|webp)\s+\d`)
|
||||
rxLazyImageSrc = regexp.MustCompile(`(?i)^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$`)
|
||||
rxStyleURL = regexp.MustCompile(`(?i)^url\((.+)\)$`)
|
||||
rxSingleQuote = regexp.MustCompile(`(?i)^'([^']*)'$`)
|
||||
rxDoubleQuote = regexp.MustCompile(`(?i)^"([^"]*)"$`)
|
||||
rxJSContentType = regexp.MustCompile(`(?i)(text|application)/(java|ecma)script`)
|
||||
)
|
||||
|
||||
|
@ -398,8 +396,9 @@ func processCSS(input io.Reader, baseURL *nurl.URL) (string, []ResourceURL) {
|
|||
// Sanitize the URL by removing `url()`, quotation mark and trailing slash
|
||||
cssURL := string(bt)
|
||||
cssURL = rxStyleURL.ReplaceAllString(cssURL, "$1")
|
||||
cssURL = rxSingleQuote.ReplaceAllString(cssURL, "$1")
|
||||
cssURL = rxDoubleQuote.ReplaceAllString(cssURL, "$1")
|
||||
cssURL = strings.TrimSpace(cssURL)
|
||||
cssURL = strings.Trim(cssURL, `'`)
|
||||
cssURL = strings.Trim(cssURL, `"`)
|
||||
|
||||
// Save the CSS URL and replace it with archival URL
|
||||
res := ToResourceURL(cssURL, baseURL)
|
||||
|
@ -453,13 +452,15 @@ func processJS(input io.Reader, baseURL *nurl.URL) (string, []ResourceURL) {
|
|||
var newURL string
|
||||
|
||||
text := string(bt)
|
||||
text = rxSingleQuote.ReplaceAllString(text, "$1")
|
||||
text = rxDoubleQuote.ReplaceAllString(text, "$1")
|
||||
text = strings.TrimSpace(text)
|
||||
text = strings.Trim(text, `'`)
|
||||
text = strings.Trim(text, `"`)
|
||||
|
||||
if strings.HasPrefix(text, "url(") {
|
||||
cssURL := rxStyleURL.ReplaceAllString(text, "$1")
|
||||
cssURL = rxSingleQuote.ReplaceAllString(cssURL, "$1")
|
||||
cssURL = rxDoubleQuote.ReplaceAllString(cssURL, "$1")
|
||||
cssURL = strings.TrimSpace(cssURL)
|
||||
cssURL = strings.Trim(cssURL, `'`)
|
||||
cssURL = strings.Trim(cssURL, `"`)
|
||||
|
||||
res = ToResourceURL(cssURL, baseURL)
|
||||
newURL = fmt.Sprintf("\"url('%s')\"", res.ArchivalURL)
|
||||
|
@ -474,7 +475,14 @@ func processJS(input io.Reader, baseURL *nurl.URL) (string, []ResourceURL) {
|
|||
|
||||
ext := path.Ext(tmp.Path)
|
||||
cType := mime.TypeByExtension(ext)
|
||||
if !strings.Contains(cType, "text/css") && !rxJSContentType.MatchString(cType) {
|
||||
|
||||
switch {
|
||||
case rxJSContentType.MatchString(cType),
|
||||
strings.Contains(cType, "text/css"),
|
||||
strings.Contains(cType, "image/"),
|
||||
strings.Contains(cType, "audio/"),
|
||||
strings.Contains(cType, "video/"):
|
||||
default:
|
||||
buffer.Write(bt)
|
||||
continue
|
||||
}
|
||||
|
|
|
@ -39,22 +39,25 @@ func ToResourceURL(uri string, base *nurl.URL) ResourceURL {
|
|||
// Create archival URL
|
||||
archivalURL := downloadURL
|
||||
|
||||
// Some URL have its query escaped.
|
||||
// Some URL have its query or path escaped, e.g. Wikipedia and Dev.to.
|
||||
// For example, Wikipedia's stylesheet looks like this :
|
||||
// load.php?lang=en&modules=ext.3d.styles%7Cext.cite.styles%7Cext.uls.interlanguage
|
||||
// However, when browser download it, it will be registered as unescaped query :
|
||||
// load.php?lang=en&modules=ext.3d.styles|ext.cite.styles|ext.uls.interlanguage
|
||||
// So, for archival URL, we need to unescape the query first.
|
||||
// So, for archival URL, we need to unescape the query and path first.
|
||||
tmp, err := nurl.Parse(downloadURL)
|
||||
if err == nil {
|
||||
tmp.RawPath = tmp.Path
|
||||
|
||||
newQuery, _ := nurl.QueryUnescape(tmp.RawQuery)
|
||||
if newQuery != "" {
|
||||
tmp.RawQuery = newQuery
|
||||
archivalURL = tmp.String()
|
||||
}
|
||||
|
||||
archivalURL = tmp.String()
|
||||
}
|
||||
|
||||
archivalURL = strings.Replace(archivalURL, "://", "/", 1)
|
||||
archivalURL = strings.ReplaceAll(archivalURL, "://", "/")
|
||||
archivalURL = strings.ReplaceAll(archivalURL, "?", "-")
|
||||
archivalURL = strings.ReplaceAll(archivalURL, "#", "-")
|
||||
archivalURL = strings.ReplaceAll(archivalURL, "/", "-")
|
||||
|
|
Loading…
Reference in a new issue