diff --git a/pkg/warc/internal/archiver/processor.go b/pkg/warc/internal/archiver/processor.go index 10f90e97..b0604483 100644 --- a/pkg/warc/internal/archiver/processor.go +++ b/pkg/warc/internal/archiver/processor.go @@ -44,6 +44,9 @@ func (arc *Archiver) ProcessHTMLFile(res ResourceURL, input io.Reader) (result P return ProcessResult{}, nil, fmt.Errorf("url %s is not valid", res.DownloadURL) } + // TODO: I'm still not really sure, but IMHO it's safer to disable Javascript + removeNodes(getElementsByTagName(doc, "script"), nil) + // Convert lazy loaded image to normal fixLazyImages(doc)