mirror of
https://github.com/Foundry376/Mailspring.git
synced 2024-12-27 10:33:56 +08:00
feat(quote): add new quoted text detector for quote strings
This commit is contained in:
parent
4e05fc45c8
commit
e7ebf0ba2c
5 changed files with 151 additions and 12 deletions
|
@ -297,6 +297,65 @@ describe "QuotedHTMLTransformer", ->
|
|||
<br></body>
|
||||
"""
|
||||
|
||||
# Test 13: If there's an "On date…" string immediatley before a blockquote,
|
||||
# then remove it.
|
||||
tests.push
|
||||
before: """
|
||||
Hey
|
||||
<div>
|
||||
On FOOBAR
|
||||
<br>
|
||||
On Thu, Mar 3, 2016
|
||||
at 3:19 AM,
|
||||
First Middle Last-Last
|
||||
<span dir="ltr">
|
||||
<
|
||||
<a href="mailto:test@nylas.com" target="_blank">
|
||||
test@nylas.com
|
||||
</a>
|
||||
>
|
||||
</span>
|
||||
wrote:
|
||||
<br>
|
||||
<blockquote>
|
||||
QUOTED TEXT
|
||||
</blockquote>
|
||||
</div>
|
||||
<br>
|
||||
"""
|
||||
after: """<head></head><body>
|
||||
Hey
|
||||
<div>
|
||||
On FOOBAR
|
||||
<br><br>
|
||||
</div><br></body>
|
||||
"""
|
||||
|
||||
# Test 14: Don't pick up false positives on the string precursors to block
|
||||
# quotes.
|
||||
tests.push
|
||||
before: """
|
||||
Hey
|
||||
<div>
|
||||
On FOOBAR
|
||||
<br>
|
||||
On Thu, Mar 3, 2016 I went to my writing club and wrote:
|
||||
<strong>A little song</strong>
|
||||
<blockquote>
|
||||
QUOTED TEXT
|
||||
</blockquote>
|
||||
</div>
|
||||
"""
|
||||
after: """<head></head><body>
|
||||
Hey
|
||||
<div>
|
||||
On FOOBAR
|
||||
<br>
|
||||
On Thu, Mar 3, 2016 I went to my writing club and wrote:
|
||||
<strong>A little song</strong>
|
||||
</div></body>
|
||||
"""
|
||||
|
||||
it 'works with these manual test cases', ->
|
||||
for {before, after} in tests
|
||||
opts = keepIfWholeBodyIsQuote: true
|
||||
|
|
|
@ -312,6 +312,12 @@ DOMUtils =
|
|||
else continue
|
||||
return lastNode
|
||||
|
||||
lastDescendent: (node) ->
|
||||
return null unless node
|
||||
if node.childNodes.length > 0
|
||||
return DOMUtils.lastNode(node.childNodes[node.childNodes.length - 1])
|
||||
else return null
|
||||
|
||||
findLastTextNode: (node) ->
|
||||
return null unless node
|
||||
return node if node.nodeType is Node.TEXT_NODE
|
||||
|
|
23
src/dom-walkers.es6
Normal file
23
src/dom-walkers.es6
Normal file
|
@ -0,0 +1,23 @@
|
|||
const DOMWalkers = {
|
||||
*walk(...treeWalkerArgs) {
|
||||
const walker = document.createTreeWalker(...treeWalkerArgs);
|
||||
let node = walker.nextNode();
|
||||
while (node) {
|
||||
yield node;
|
||||
node = walker.nextNode();
|
||||
}
|
||||
return;
|
||||
},
|
||||
|
||||
*walkBackwards(node) {
|
||||
if (!node) { return; }
|
||||
if (node.childNodes.length > 0) {
|
||||
for (let i = node.childNodes.length - 1; i >= 0; i--) {
|
||||
yield *this.walkBackwards(node.childNodes[i]);
|
||||
}
|
||||
}
|
||||
yield node;
|
||||
return;
|
||||
},
|
||||
}
|
||||
export default DOMWalkers
|
47
src/services/quote-string-detector.es6
Normal file
47
src/services/quote-string-detector.es6
Normal file
|
@ -0,0 +1,47 @@
|
|||
import DOMWalkers from '../dom-walkers'
|
||||
|
||||
/*
|
||||
* There are semi-common cases where immediately before a blockquote, we
|
||||
* encounter a string like: "On Thu … so and so … wrote:". This should be part
|
||||
* of the blockquote but was usually left as a collection of nodes. To help
|
||||
* with false-positives, we only look for strings like that that immediately
|
||||
* preceeded the blockquoted section. By the time the function gets here, the
|
||||
* last blockquote has been removed and the text we want will be at the end of
|
||||
* the document.
|
||||
*
|
||||
* This is in its own file to make use of ES6 generators
|
||||
*/
|
||||
export default function quoteStringDetector(doc) {
|
||||
const quoteNodesToRemove = [];
|
||||
let seenInitialQuoteEnd = false;
|
||||
for (const node of DOMWalkers.walkBackwards(doc)) {
|
||||
if (node.nodeType === Node.TEXT_NODE && node.nodeValue.trim().length > 0) {
|
||||
if (!seenInitialQuoteEnd) {
|
||||
if (/wrote:$/gim.test(node.nodeValue)) {
|
||||
seenInitialQuoteEnd = true;
|
||||
quoteNodesToRemove.push(node);
|
||||
if (/On \S/gim.test(node.nodeValue)) {
|
||||
// The beginning of the quoted string may be in the same node
|
||||
return quoteNodesToRemove;
|
||||
}
|
||||
} else {
|
||||
// This means there's some text in between the end of the content
|
||||
// (adjacent to the blockquote) and the quote string. We shouldn't be
|
||||
// killing any text in this case.
|
||||
return quoteNodesToRemove;
|
||||
}
|
||||
} else {
|
||||
quoteNodesToRemove.push(node)
|
||||
if (/On \S/gim.test(node.nodeValue)) {
|
||||
// This means we've reached the beginning of the quoted string.
|
||||
return quoteNodesToRemove;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (seenInitialQuoteEnd) {
|
||||
quoteNodesToRemove.push(node)
|
||||
}
|
||||
}
|
||||
}
|
||||
return quoteNodesToRemove;
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
_ = require 'underscore'
|
||||
crypto = require 'crypto'
|
||||
DOMUtils = require '../dom-utils'
|
||||
quoteStringDetector = require './quote-string-detector'
|
||||
|
||||
class QuotedHTMLTransformer
|
||||
|
||||
|
@ -49,19 +50,23 @@ class QuotedHTMLTransformer
|
|||
# It's possible that the entire body was quoted text and we've removed everything.
|
||||
return "<head></head><body></body>" unless doc.body
|
||||
|
||||
childNodes = doc.body.childNodes
|
||||
extraTailBrTags = []
|
||||
for i in [(childNodes.length - 1)..0] by -1
|
||||
curr = childNodes[i]
|
||||
next = childNodes[i - 1]
|
||||
if curr and curr.nodeName == 'BR' and next and next.nodeName == 'BR'
|
||||
extraTailBrTags.push(curr)
|
||||
else
|
||||
break
|
||||
|
||||
DOMUtils.Mutating.removeElements(extraTailBrTags)
|
||||
@removeTrailingBr(doc)
|
||||
DOMUtils.Mutating.removeElements(quoteStringDetector(doc))
|
||||
return doc.children[0].innerHTML
|
||||
|
||||
# Finds any trailing BR tags and removes them in place
|
||||
removeTrailingBr: (doc) ->
|
||||
childNodes = doc.body.childNodes
|
||||
extraTailBrTags = []
|
||||
for i in [(childNodes.length - 1)..0] by -1
|
||||
curr = childNodes[i]
|
||||
next = childNodes[i - 1]
|
||||
if curr and curr.nodeName == 'BR' and next and next.nodeName == 'BR'
|
||||
extraTailBrTags.push(curr)
|
||||
else
|
||||
break
|
||||
DOMUtils.Mutating.removeElements(extraTailBrTags)
|
||||
|
||||
appendQuotedHTML: (htmlWithoutQuotes, originalHTML) ->
|
||||
doc = @_parseHTML(originalHTML)
|
||||
quoteElements = @_findQuoteLikeElements(doc)
|
||||
|
@ -195,5 +200,4 @@ class QuotedHTMLTransformer
|
|||
_findBlockquoteQuotes: (doc) ->
|
||||
return Array::slice.call(doc.querySelectorAll('blockquote'))
|
||||
|
||||
|
||||
module.exports = new QuotedHTMLTransformer
|
||||
|
|
Loading…
Reference in a new issue