From 0324784c7c7f21cf5e8feb8ea752a4c592e68367 Mon Sep 17 00:00:00 2001 From: Radhi Fadlillah Date: Thu, 8 Aug 2019 14:21:25 +0700 Subject: [PATCH] Fix URL not unescaped on archival process --- pkg/warc/internal/archiver/resource-url.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/warc/internal/archiver/resource-url.go b/pkg/warc/internal/archiver/resource-url.go index 239abf3..6567ea9 100644 --- a/pkg/warc/internal/archiver/resource-url.go +++ b/pkg/warc/internal/archiver/resource-url.go @@ -47,14 +47,13 @@ func ToResourceURL(uri string, base *nurl.URL) ResourceURL { // So, for archival URL, we need to unescape the query and path first. tmp, err := nurl.Parse(downloadURL) if err == nil { - tmp.RawPath = tmp.Path - - newQuery, _ := nurl.QueryUnescape(tmp.RawQuery) - if newQuery != "" { - tmp.RawQuery = newQuery + unescapedQuery, _ := nurl.QueryUnescape(tmp.RawQuery) + if unescapedQuery != "" { + tmp.RawQuery = unescapedQuery } archivalURL = tmp.String() + archivalURL = strings.Replace(archivalURL, tmp.EscapedPath(), tmp.Path, 1) } archivalURL = strings.ReplaceAll(archivalURL, "://", "/")