mirror of
https://github.com/go-shiori/shiori.git
synced 2024-11-10 17:36:02 +08:00
Now only fetch text content if page is readable
This commit is contained in:
parent
43040a9bc4
commit
af1a32ac4f
3 changed files with 28 additions and 4 deletions
|
@ -101,7 +101,8 @@ func addHandler(cmd *cobra.Command, args []string) {
|
|||
// Split response body so it can be processed twice
|
||||
archivalInput := bytes.NewBuffer(nil)
|
||||
readabilityInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput)
|
||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
||||
|
||||
_, err = io.Copy(multiWriter, resp.Body)
|
||||
if err != nil {
|
||||
|
@ -112,6 +113,8 @@ func addHandler(cmd *cobra.Command, args []string) {
|
|||
// If this is HTML, parse for readable content
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
||||
|
||||
article, err := readability.FromReader(readabilityInput, url)
|
||||
if err != nil {
|
||||
cError.Printf("Failed to parse article: %v\n", err)
|
||||
|
@ -131,6 +134,10 @@ func addHandler(cmd *cobra.Command, args []string) {
|
|||
book.Excerpt = article.Excerpt
|
||||
}
|
||||
|
||||
if !isReadable {
|
||||
book.Content = ""
|
||||
}
|
||||
|
||||
// Get image URL
|
||||
if article.Image != "" {
|
||||
imageURLs = append(imageURLs, article.Image)
|
||||
|
|
|
@ -185,7 +185,8 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
|||
// Split response body so it can be processed twice
|
||||
archivalInput := bytes.NewBuffer(nil)
|
||||
readabilityInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput)
|
||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
||||
|
||||
_, err = io.Copy(multiWriter, resp.Body)
|
||||
if err != nil {
|
||||
|
@ -197,6 +198,8 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
|||
// If this is HTML, parse for readable content
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
||||
|
||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
||||
if err != nil {
|
||||
chProblem <- book.ID
|
||||
|
@ -208,6 +211,10 @@ func updateHandler(cmd *cobra.Command, args []string) {
|
|||
book.Content = article.TextContent
|
||||
book.HTML = article.Content
|
||||
|
||||
if !isReadable {
|
||||
book.Content = ""
|
||||
}
|
||||
|
||||
if !dontOverwrite {
|
||||
book.Title = article.Title
|
||||
book.Excerpt = article.Excerpt
|
||||
|
|
|
@ -237,7 +237,8 @@ func (h *handler) apiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h
|
|||
// Split response body so it can be processed twice
|
||||
archivalInput := bytes.NewBuffer(nil)
|
||||
readabilityInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput)
|
||||
readabilityCheckInput := bytes.NewBuffer(nil)
|
||||
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
|
||||
|
||||
_, err = io.Copy(multiWriter, resp.Body)
|
||||
if err != nil {
|
||||
|
@ -247,6 +248,8 @@ func (h *handler) apiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h
|
|||
// If this is HTML, parse for readable content
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
isReadable := readability.IsReadable(readabilityCheckInput)
|
||||
|
||||
article, err := readability.FromReader(readabilityInput, book.URL)
|
||||
if err != nil {
|
||||
return
|
||||
|
@ -273,6 +276,10 @@ func (h *handler) apiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h
|
|||
if article.Favicon != "" {
|
||||
imageURLs = append(imageURLs, article.Favicon)
|
||||
}
|
||||
|
||||
if !isReadable {
|
||||
book.Content = ""
|
||||
}
|
||||
}
|
||||
|
||||
// If needed, create offline archive as well
|
||||
|
@ -513,7 +520,6 @@ func (h *handler) apiUpdateCache(w http.ResponseWriter, r *http.Request, ps http
|
|||
book.Author = article.Byline
|
||||
book.Content = article.TextContent
|
||||
book.HTML = article.Content
|
||||
book.HasContent = book.Content != "" && isReadable
|
||||
|
||||
if article.Title != "" {
|
||||
book.Title = article.Title
|
||||
|
@ -523,6 +529,10 @@ func (h *handler) apiUpdateCache(w http.ResponseWriter, r *http.Request, ps http
|
|||
book.Excerpt = article.Excerpt
|
||||
}
|
||||
|
||||
if !isReadable {
|
||||
book.Content = ""
|
||||
}
|
||||
|
||||
// Get image for thumbnail and save it to local disk
|
||||
var imageURLs []string
|
||||
if article.Image != "" {
|
||||
|
|
Loading…
Reference in a new issue