mirror of
https://github.com/Foundry376/Mailspring.git
synced 2025-09-12 15:44:40 +08:00
feat(quote): add new quoted text detector for quote strings
This commit is contained in:
parent
4e05fc45c8
commit
e7ebf0ba2c
5 changed files with 151 additions and 12 deletions
|
@ -297,6 +297,65 @@ describe "QuotedHTMLTransformer", ->
|
||||||
<br></body>
|
<br></body>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Test 13: If there's an "On date…" string immediatley before a blockquote,
|
||||||
|
# then remove it.
|
||||||
|
tests.push
|
||||||
|
before: """
|
||||||
|
Hey
|
||||||
|
<div>
|
||||||
|
On FOOBAR
|
||||||
|
<br>
|
||||||
|
On Thu, Mar 3, 2016
|
||||||
|
at 3:19 AM,
|
||||||
|
First Middle Last-Last
|
||||||
|
<span dir="ltr">
|
||||||
|
<
|
||||||
|
<a href="mailto:test@nylas.com" target="_blank">
|
||||||
|
test@nylas.com
|
||||||
|
</a>
|
||||||
|
>
|
||||||
|
</span>
|
||||||
|
wrote:
|
||||||
|
<br>
|
||||||
|
<blockquote>
|
||||||
|
QUOTED TEXT
|
||||||
|
</blockquote>
|
||||||
|
</div>
|
||||||
|
<br>
|
||||||
|
"""
|
||||||
|
after: """<head></head><body>
|
||||||
|
Hey
|
||||||
|
<div>
|
||||||
|
On FOOBAR
|
||||||
|
<br><br>
|
||||||
|
</div><br></body>
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Test 14: Don't pick up false positives on the string precursors to block
|
||||||
|
# quotes.
|
||||||
|
tests.push
|
||||||
|
before: """
|
||||||
|
Hey
|
||||||
|
<div>
|
||||||
|
On FOOBAR
|
||||||
|
<br>
|
||||||
|
On Thu, Mar 3, 2016 I went to my writing club and wrote:
|
||||||
|
<strong>A little song</strong>
|
||||||
|
<blockquote>
|
||||||
|
QUOTED TEXT
|
||||||
|
</blockquote>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
after: """<head></head><body>
|
||||||
|
Hey
|
||||||
|
<div>
|
||||||
|
On FOOBAR
|
||||||
|
<br>
|
||||||
|
On Thu, Mar 3, 2016 I went to my writing club and wrote:
|
||||||
|
<strong>A little song</strong>
|
||||||
|
</div></body>
|
||||||
|
"""
|
||||||
|
|
||||||
it 'works with these manual test cases', ->
|
it 'works with these manual test cases', ->
|
||||||
for {before, after} in tests
|
for {before, after} in tests
|
||||||
opts = keepIfWholeBodyIsQuote: true
|
opts = keepIfWholeBodyIsQuote: true
|
||||||
|
|
|
@ -312,6 +312,12 @@ DOMUtils =
|
||||||
else continue
|
else continue
|
||||||
return lastNode
|
return lastNode
|
||||||
|
|
||||||
|
lastDescendent: (node) ->
|
||||||
|
return null unless node
|
||||||
|
if node.childNodes.length > 0
|
||||||
|
return DOMUtils.lastNode(node.childNodes[node.childNodes.length - 1])
|
||||||
|
else return null
|
||||||
|
|
||||||
findLastTextNode: (node) ->
|
findLastTextNode: (node) ->
|
||||||
return null unless node
|
return null unless node
|
||||||
return node if node.nodeType is Node.TEXT_NODE
|
return node if node.nodeType is Node.TEXT_NODE
|
||||||
|
|
23
src/dom-walkers.es6
Normal file
23
src/dom-walkers.es6
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
const DOMWalkers = {
|
||||||
|
*walk(...treeWalkerArgs) {
|
||||||
|
const walker = document.createTreeWalker(...treeWalkerArgs);
|
||||||
|
let node = walker.nextNode();
|
||||||
|
while (node) {
|
||||||
|
yield node;
|
||||||
|
node = walker.nextNode();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
|
||||||
|
*walkBackwards(node) {
|
||||||
|
if (!node) { return; }
|
||||||
|
if (node.childNodes.length > 0) {
|
||||||
|
for (let i = node.childNodes.length - 1; i >= 0; i--) {
|
||||||
|
yield *this.walkBackwards(node.childNodes[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
yield node;
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
export default DOMWalkers
|
47
src/services/quote-string-detector.es6
Normal file
47
src/services/quote-string-detector.es6
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
import DOMWalkers from '../dom-walkers'
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There are semi-common cases where immediately before a blockquote, we
|
||||||
|
* encounter a string like: "On Thu … so and so … wrote:". This should be part
|
||||||
|
* of the blockquote but was usually left as a collection of nodes. To help
|
||||||
|
* with false-positives, we only look for strings like that that immediately
|
||||||
|
* preceeded the blockquoted section. By the time the function gets here, the
|
||||||
|
* last blockquote has been removed and the text we want will be at the end of
|
||||||
|
* the document.
|
||||||
|
*
|
||||||
|
* This is in its own file to make use of ES6 generators
|
||||||
|
*/
|
||||||
|
export default function quoteStringDetector(doc) {
|
||||||
|
const quoteNodesToRemove = [];
|
||||||
|
let seenInitialQuoteEnd = false;
|
||||||
|
for (const node of DOMWalkers.walkBackwards(doc)) {
|
||||||
|
if (node.nodeType === Node.TEXT_NODE && node.nodeValue.trim().length > 0) {
|
||||||
|
if (!seenInitialQuoteEnd) {
|
||||||
|
if (/wrote:$/gim.test(node.nodeValue)) {
|
||||||
|
seenInitialQuoteEnd = true;
|
||||||
|
quoteNodesToRemove.push(node);
|
||||||
|
if (/On \S/gim.test(node.nodeValue)) {
|
||||||
|
// The beginning of the quoted string may be in the same node
|
||||||
|
return quoteNodesToRemove;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// This means there's some text in between the end of the content
|
||||||
|
// (adjacent to the blockquote) and the quote string. We shouldn't be
|
||||||
|
// killing any text in this case.
|
||||||
|
return quoteNodesToRemove;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
quoteNodesToRemove.push(node)
|
||||||
|
if (/On \S/gim.test(node.nodeValue)) {
|
||||||
|
// This means we've reached the beginning of the quoted string.
|
||||||
|
return quoteNodesToRemove;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (seenInitialQuoteEnd) {
|
||||||
|
quoteNodesToRemove.push(node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return quoteNodesToRemove;
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
_ = require 'underscore'
|
_ = require 'underscore'
|
||||||
crypto = require 'crypto'
|
crypto = require 'crypto'
|
||||||
DOMUtils = require '../dom-utils'
|
DOMUtils = require '../dom-utils'
|
||||||
|
quoteStringDetector = require './quote-string-detector'
|
||||||
|
|
||||||
class QuotedHTMLTransformer
|
class QuotedHTMLTransformer
|
||||||
|
|
||||||
|
@ -49,19 +50,23 @@ class QuotedHTMLTransformer
|
||||||
# It's possible that the entire body was quoted text and we've removed everything.
|
# It's possible that the entire body was quoted text and we've removed everything.
|
||||||
return "<head></head><body></body>" unless doc.body
|
return "<head></head><body></body>" unless doc.body
|
||||||
|
|
||||||
childNodes = doc.body.childNodes
|
@removeTrailingBr(doc)
|
||||||
extraTailBrTags = []
|
DOMUtils.Mutating.removeElements(quoteStringDetector(doc))
|
||||||
for i in [(childNodes.length - 1)..0] by -1
|
|
||||||
curr = childNodes[i]
|
|
||||||
next = childNodes[i - 1]
|
|
||||||
if curr and curr.nodeName == 'BR' and next and next.nodeName == 'BR'
|
|
||||||
extraTailBrTags.push(curr)
|
|
||||||
else
|
|
||||||
break
|
|
||||||
|
|
||||||
DOMUtils.Mutating.removeElements(extraTailBrTags)
|
|
||||||
return doc.children[0].innerHTML
|
return doc.children[0].innerHTML
|
||||||
|
|
||||||
|
# Finds any trailing BR tags and removes them in place
|
||||||
|
removeTrailingBr: (doc) ->
|
||||||
|
childNodes = doc.body.childNodes
|
||||||
|
extraTailBrTags = []
|
||||||
|
for i in [(childNodes.length - 1)..0] by -1
|
||||||
|
curr = childNodes[i]
|
||||||
|
next = childNodes[i - 1]
|
||||||
|
if curr and curr.nodeName == 'BR' and next and next.nodeName == 'BR'
|
||||||
|
extraTailBrTags.push(curr)
|
||||||
|
else
|
||||||
|
break
|
||||||
|
DOMUtils.Mutating.removeElements(extraTailBrTags)
|
||||||
|
|
||||||
appendQuotedHTML: (htmlWithoutQuotes, originalHTML) ->
|
appendQuotedHTML: (htmlWithoutQuotes, originalHTML) ->
|
||||||
doc = @_parseHTML(originalHTML)
|
doc = @_parseHTML(originalHTML)
|
||||||
quoteElements = @_findQuoteLikeElements(doc)
|
quoteElements = @_findQuoteLikeElements(doc)
|
||||||
|
@ -195,5 +200,4 @@ class QuotedHTMLTransformer
|
||||||
_findBlockquoteQuotes: (doc) ->
|
_findBlockquoteQuotes: (doc) ->
|
||||||
return Array::slice.call(doc.querySelectorAll('blockquote'))
|
return Array::slice.call(doc.querySelectorAll('blockquote'))
|
||||||
|
|
||||||
|
|
||||||
module.exports = new QuotedHTMLTransformer
|
module.exports = new QuotedHTMLTransformer
|
||||||
|
|
Loading…
Add table
Reference in a new issue