fix(quote): properly detect "wrote: " strings with trailing space

This commit is contained in:
Evan Morikawa 2016-11-04 18:37:26 -07:00
parent 4a40074cd1
commit c0b28456a9
4 changed files with 97 additions and 3 deletions

80
spec/fixtures/emails/email_19.html vendored Normal file
View file

@ -0,0 +1,80 @@
<html>
<head>
<meta http-equiv=3D"Content-Type" content=3D"text/html; charset=3Diso-8859-=
1">
</head>
<body>
and again<br>
<br>
<signature>Sent from <a href=3D"https://link.nylas.com/link/aoi4q8bqixxd59z=
vuog74b15d/local-1b7450ec-1444/0?redirect=3Dhttps%3A%2F%2Fnylas.com%2Fn1%3F=
ref%3Dn1">
Nylas N1</a>, the extensible, open source mail client.</signature><img clas=
s=3D"n1-open" width=3D"0" height=3D"0" style=3D"border:0; width:0; height:0=
;" src=3D"https://link.nylas.com/open/aoi4q8bqixxd59zvuog74b15d/local-1b745=
0ec-1444">
<div class=3D"gmail_quote nylas-quote nylas-quote-id-12zz9ff2coj10pe2e6g23i=
ib4"><br>
On Nov 4 2016, at 2:28 pm, Juan Tejada &lt;juan@nylas.com&gt; wrote: <br>
<blockquote class=3D"gmail_quote" style=3D"margin:0 0 0 .8ex;border-left:1p=
x #ccc solid;padding-left:1ex;">
hey evan sorry to spam you so much<br>
<br>
Sent from <a href=3D"https://link.nylas.com/link/aoi4q8bqixxd59zvuog74b15d/=
local-038a4c1f-8bd7/0?redirect=3Dhttps%3A%2F%2Fnylas.com%2Fn1%3Fref%3Dn1">
Nylas N1</a>, the extensible, open source mail client. <img width=3D"0" hei=
ght=3D"0" style=3D"border:0; width:0; height:0;" src=3D"https://link.nylas.=
com/open/aoi4q8bqixxd59zvuog74b15d/local-038a4c1f-8bd7">
<div><br>
On Nov 4 2016, at 2:20 pm, Juan Tejada &lt;juan@nylas.com&gt; wrote: <br>
<blockquote style=3D"margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-l=
eft:1ex;">
wat<br>
<br>
Sent from <a href=3D"https://link.nylas.com/link/aoi4q8bqixxd59zvuog74b15d/=
local-fa431492-4362/0?redirect=3Dhttps%3A%2F%2Fnylas.com%2Fn1%3Fref%3Dn1">
Nylas N1</a>, the extensible, open source mail client.<img width=3D"0" heig=
ht=3D"0" style=3D"border:0; width:0; height:0;" src=3D"https://link.nylas.c=
om/open/aoi4q8bqixxd59zvuog74b15d/local-fa431492-4362">
<div><br>
On Nov 4 2016, at 1:19 pm, Juan Tejada &lt;juan@nylas.com&gt; wrote: <br>
<blockquote style=3D"margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-l=
eft:1ex;">
this should only happen once<br>
<br>
Sent from <a href=3D"https://link.nylas.com/link/aoi4q8bqixxd59zvuog74b15d/=
local-7e7f9d5f-73ac/0?redirect=3Dhttps%3A%2F%2Fnylas.com%2Fn1%3Fref%3Dn1">
Nylas N1</a>, the extensible, open source mail client.<img width=3D"0" heig=
ht=3D"0" style=3D"border:0; width:0; height:0;" src=3D"https://link.nylas.c=
om/open/aoi4q8bqixxd59zvuog74b15d/local-7e7f9d5f-73ac">
<div><br>
On Nov 4 2016, at 1:13 pm, Juan Tejada &lt;juan@nylas.com&gt; wrote: <br>
<blockquote style=3D"margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-l=
eft:1ex;">
and again<br>
<br>
Sent from <a href=3D"https://link.nylas.com/link/aoi4q8bqixxd59zvuog74b15d/=
local-da59d244-e3f8/0?redirect=3Dhttps%3A%2F%2Fnylas.com%2Fn1%3Fref%3Dn1">
Nylas N1</a>, the extensible, open source mail client.<img width=3D"0" heig=
ht=3D"0" style=3D"border:0; width:0; height:0;" src=3D"https://link.nylas.c=
om/open/aoi4q8bqixxd59zvuog74b15d/local-da59d244-e3f8">
<div><br>
On Nov 4 2016, at 1:13 pm, Juan Tejada &lt;juan@nylas.com&gt; wrote: <br>
<blockquote style=3D"margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-l=
eft:1ex;">
and some other stuff<br>
<br>
Sent from <a href=3D"https://nylas.com/n1?ref=3Dn1">Nylas N1</a>, the exten=
sible, open source mail client.
</blockquote>
</div>
</blockquote>
</div>
</blockquote>
</div>
</blockquote>
</div>
</blockquote>
</div>
</body>
</html>

View file

@ -0,0 +1,14 @@
<head>
<meta http-equiv="3D&quot;Content-Type&quot;" content="3D&quot;text/html;" charset="3Diso-8859-=" 1"="">
</head>
<body>
and again<br>
<br>
<signature>Sent from <a href="3D&quot;https://link.nylas.com/link/aoi4q8bqixxd59z=" vuog74b15d="" local-1b7450ec-1444="" 0?redirect="3Dhttps%3A%2F%2Fnylas.com%2Fn1%3F=" ref%3dn1"="">
Nylas N1</a>, the extensible, open source mail client.</signature><img clas="s=3D&quot;n1-open&quot;" width="3D&quot;0&quot;" height="3D&quot;0&quot;" style="3D&quot;border:0;" width:0;="" height:0=";&quot;" src="3D&quot;https://link.nylas.com/open/aoi4q8bqixxd59zvuog74b15d/local-1b745=" 0ec-1444"="">
<div class="3D&quot;gmail_quote" nylas-quote="" nylas-quote-id-12zz9ff2coj10pe2e6g23i="ib4&quot;"><br><br>
</div>
</body>

View file

@ -19,7 +19,7 @@ describe "QuotedHTMLTransformer", ->
re = new RegExp(QuotedHTMLTransformer.annotationClass, 'g')
html.match(re)?.length ? 0
[1..18].forEach (n) ->
[1..19].forEach (n) ->
it "properly parses email_#{n}", ->
opts = keepIfWholeBodyIsQuote: true
expect(removeQuotedHTML("email_#{n}.html", opts).trim()).toEqual(readFile("email_#{n}_stripped.html").trim())
@ -405,7 +405,7 @@ describe "QuotedHTMLTransformer", ->
# `QuotedHTMLTransformer` needs Electron booted up in order to work because
# of the DOMParser.
xit "Run this simple function to generate output files", ->
[18].forEach (n) ->
[19..20].forEach (n) ->
newHTML = QuotedHTMLTransformer.removeQuotedHTML(readFile("email_#{n}.html"))
outPath = path.resolve(__dirname, '..', 'fixtures', 'emails', "email_#{n}_raw_stripped.html")
fs.writeFileSync(outPath, newHTML)

View file

@ -17,7 +17,7 @@ export default function quoteStringDetector(doc) {
for (const node of DOMWalkers.walkBackwards(doc)) {
if (node.nodeType === Node.TEXT_NODE && node.nodeValue.trim().length > 0) {
if (!seenInitialQuoteEnd) {
if (/wrote:$/gim.test(node.nodeValue)) {
if (/wrote:\s*$/gim.test(node.nodeValue)) {
seenInitialQuoteEnd = true;
quoteNodesToRemove.push(node);
if (/On \S/gim.test(node.nodeValue)) {