diff --git a/readability/read.go b/readability/read.go index 28dab62..d1cf743 100644 --- a/readability/read.go +++ b/readability/read.go @@ -1128,14 +1128,14 @@ func FromURL(url *nurl.URL, timeout time.Duration) (Article, error) { } defer resp.Body.Close() - // If response is not HTML, stop process - mimeType, err := getMimeType(resp.Body) - if err != nil { - return Article{}, err + // Check content type. If not HTML, stop process + contentType := resp.Header.Get("Content-type") + if contentType == "" { + contentType = "application/octet-stream" } - if !strings.HasPrefix(mimeType, "text/html") { - return Article{}, fmt.Errorf("URL must be a text/html, found %s", mimeType) + if !strings.HasPrefix(contentType, "text/html") { + return Article{}, fmt.Errorf("URL must be a text/html, found %s", contentType) } // Parse response body diff --git a/readability/utils.go b/readability/utils.go index cde610d..2b1d9a3 100644 --- a/readability/utils.go +++ b/readability/utils.go @@ -3,8 +3,6 @@ package readability import ( "crypto/md5" "fmt" - "io" - "net/http" "os" "strings" "unicode/utf8" @@ -70,17 +68,6 @@ func removeSeparator(str string, separators ...string) string { return strings.Join(finalWords, " ") } -func getMimeType(resp io.Reader) (string, error) { - buffer := make([]byte, 512) - _, err := resp.Read(buffer) - if err != nil { - return "", err - } - - mimeType := http.DetectContentType(buffer) - return mimeType, nil -} - func normalizeText(str string) string { return strings.Join(strings.Fields(str), " ") }