mirror of
https://github.com/Foundry376/Mailspring.git
synced 2024-09-22 00:06:06 +08:00
feat(quote): improved quoted text detection for trailing signatures
This commit is contained in:
parent
c0b28456a9
commit
dbc81a87a4
14
spec/fixtures/emails/email_20.html
vendored
Normal file
14
spec/fixtures/emails/email_20.html
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
<div dir="ltr">Yaaay! So excited :) And no worries, see you in PR, if not before</div><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Nov 4, 2016 at 2:07 PM, Evan Morikawa <span dir="ltr"><<a href="mailto:evan@evanmorikawa.com" target="_blank">evan@evanmorikawa.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Y,<div><div><br></div><div>YES! We'd love to go to Puerto Rico. I just signed up on the site for Nora and I. I'm so sorry for the LONG delay on getting back to you. We had a lot of other commitments up in the air around then. So excited to see you guys in Puerto Rico!</div></div><div><br></div><div>Also, I'm unfortunately in NYC the week of Nov 7th and back in SF week of the 14th, otherwise I'd love to see you here too.</div><div><br></div><div>Evan</div><img class="m_2465269450974321714n1-open" width="0" height="0" style="border:0;width:0;height:0" src="https://link.nylas.com/open/1ocrhlu1fap8935xrnic0cmnm/local-139b0028-d812?r=eWlmYW56aGFuZzJAZ21haWwuY29t"><div class="HOEnZb"><div class="h5">
|
||||
<div class="gmail_quote m_2465269450974321714nylas-quote m_2465269450974321714nylas-quote-id-92my6rmekrk94aws2clzwhwgy">
|
||||
<br>
|
||||
On Nov 3 2016, at 6:56 pm, Y J <<a href="mailto:YJ2@gmail.com" target="_blank">YJ2@gmail.com</a>> wrote:
|
||||
<br>
|
||||
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
|
||||
<div dir="ltr">Hi Evan & Nora,<div>We're getting down to the wire and need to send final counts to our vendors <b>tomorrow</b>. If you could please let us know your RSVP for the Dec 10 engagement party and/or the Jan 15 wedding via our website, that would be amazing! Hope to see you soon :)</div><div><br></div><div><a href="https://YJ2-dot-yamm-track.appspot.com/Redirect?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177&link=http%3A%2F%2Ftheknot.com%2Fus%2FY-and-geoff&r=eWlmYW56aGFuZzJAZ21haWwuY29t" target="_blank">theknot.com/us/Y-and-geoff</a><br></div><div><br></div><div>Y & Geoff</div>
|
||||
<div><div><br></div>-- <br><div><div dir="ltr"><div>Ms. Y J<br><br><div> College 2010<br>AB in Economics</div><div><a href="https://YJ2-dot-yamm-track.appspot.com/Redirect?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177&link=http%3A%2F%2Fwww.twitter.com%2FYz&r=eWlmYW56aGFuZzJAZ21haWwuY29t" target="_blank">@Yz</a></div><div><a href="https://YJ2-dot-yamm-track.appspot.com/Redirect?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177&link=http%3A%2F%2Fwww.app.com&r=eWlmYW56aGFuZzJAZ21haWwuY29t" target="_blank">www.app.com</a></div><div><br></div></div></div></div>
|
||||
</div>
|
||||
</div>
|
||||
<img src="https://YJ2-dot-yamm-track.appspot.com/FireBase?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177" width="1" height="1" alt="beacon" style="display:none;display:none!important">
|
||||
</blockquote>
|
||||
</div></div></div></blockquote></div><br><br clear="all"><div><br></div>-- <br><div class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><div>Ms. Y J<br><br><div> College 2010<br>AB in Economics</div><div><a href="http://www.twitter.com/Yz" target="_blank">@Yz</a></div><div><a href="http://www.app.com" target="_blank">www.app.com</a></div><div><br></div></div></div></div>
|
||||
</div>
|
2
spec/fixtures/emails/email_20_stripped.html
vendored
Normal file
2
spec/fixtures/emails/email_20_stripped.html
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
<div dir="ltr">Yaaay! So excited :) And no worries, see you in PR, if not before</div><div class="gmail_extra"><br><br><br clear="all"><div><br></div><br>
|
||||
</div>
|
16
spec/fixtures/emails/email_21.html
vendored
Normal file
16
spec/fixtures/emails/email_21.html
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
<div dir="ltr">Yaaay! So excited :) And no worries, see you in PR, if not before</div><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Nov 4, 2016 at 2:07 PM, Evan Morikawa <span dir="ltr"><<a href="mailto:evan@evanmorikawa.com" target="_blank">evan@evanmorikawa.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Y,<div><div><br></div><div>YES! We'd love to go to Puerto Rico. I just signed up on the site for Nora and I. I'm so sorry for the LONG delay on getting back to you. We had a lot of other commitments up in the air around then. So excited to see you guys in Puerto Rico!</div></div><div><br></div><div>Also, I'm unfortunately in NYC the week of Nov 7th and back in SF week of the 14th, otherwise I'd love to see you here too.</div><div><br></div><div>Evan</div><img class="m_2465269450974321714n1-open" width="0" height="0" style="border:0;width:0;height:0" src="https://link.nylas.com/open/1ocrhlu1fap8935xrnic0cmnm/local-139b0028-d812?r=eWlmYW56aGFuZzJAZ21haWwuY29t"><div class="HOEnZb"><div class="h5">
|
||||
<div class="gmail_quote m_2465269450974321714nylas-quote m_2465269450974321714nylas-quote-id-92my6rmekrk94aws2clzwhwgy">
|
||||
<br>
|
||||
On Nov 3 2016, at 6:56 pm, Y J <<a href="mailto:YJ2@gmail.com" target="_blank">YJ2@gmail.com</a>> wrote:
|
||||
<br>
|
||||
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
|
||||
<div dir="ltr">Hi Evan & Nora,<div>We're getting down to the wire and need to send final counts to our vendors <b>tomorrow</b>. If you could please let us know your RSVP for the Dec 10 engagement party and/or the Jan 15 wedding via our website, that would be amazing! Hope to see you soon :)</div><div><br></div><div><a href="https://YJ2-dot-yamm-track.appspot.com/Redirect?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177&link=http%3A%2F%2Ftheknot.com%2Fus%2FY-and-geoff&r=eWlmYW56aGFuZzJAZ21haWwuY29t" target="_blank">theknot.com/us/Y-and-geoff</a><br></div><div><br></div><div>Y & Geoff</div>
|
||||
<div><div><br></div>-- <br><div><div dir="ltr"><div>Ms. Y J<br><br><div> College 2010<br>AB in Economics</div><div><a href="https://YJ2-dot-yamm-track.appspot.com/Redirect?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177&link=http%3A%2F%2Fwww.twitter.com%2FYz&r=eWlmYW56aGFuZzJAZ21haWwuY29t" target="_blank">@Yz</a></div><div><a href="https://YJ2-dot-yamm-track.appspot.com/Redirect?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177&link=http%3A%2F%2Fwww.app.com&r=eWlmYW56aGFuZzJAZ21haWwuY29t" target="_blank">www.app.com</a></div><div><br></div></div></div></div>
|
||||
</div>
|
||||
</div>
|
||||
<img src="https://YJ2-dot-yamm-track.appspot.com/FireBase?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177" width="1" height="1" alt="beacon" style="display:none;display:none!important">
|
||||
</blockquote>
|
||||
</div></div></div></blockquote></div><br><br clear="all"><div><br></div>-- <br><div class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><div>Ms. Y J<br><br><div> College 2010<br>AB in Economics</div><div><a href="http://www.twitter.com/Yz" target="_blank">@Yz</a></div><div><a href="http://www.app.com" target="_blank">www.app.com</a></div><div><br></div></div></div></div>
|
||||
<div>This is some unique text after the signature. It's as if I'm
|
||||
typing inline. We should NOT collapse this area</div>
|
||||
</div>
|
16
spec/fixtures/emails/email_21_stripped.html
vendored
Normal file
16
spec/fixtures/emails/email_21_stripped.html
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
<div dir="ltr">Yaaay! So excited :) And no worries, see you in PR, if not before</div><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Nov 4, 2016 at 2:07 PM, Evan Morikawa <span dir="ltr"><<a href="mailto:evan@evanmorikawa.com" target="_blank">evan@evanmorikawa.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Y,<div><div><br></div><div>YES! We'd love to go to Puerto Rico. I just signed up on the site for Nora and I. I'm so sorry for the LONG delay on getting back to you. We had a lot of other commitments up in the air around then. So excited to see you guys in Puerto Rico!</div></div><div><br></div><div>Also, I'm unfortunately in NYC the week of Nov 7th and back in SF week of the 14th, otherwise I'd love to see you here too.</div><div><br></div><div>Evan</div><img class="m_2465269450974321714n1-open" width="0" height="0" style="border:0;width:0;height:0" src="https://link.nylas.com/open/1ocrhlu1fap8935xrnic0cmnm/local-139b0028-d812?r=eWlmYW56aGFuZzJAZ21haWwuY29t"><div class="HOEnZb"><div class="h5">
|
||||
<div class="gmail_quote m_2465269450974321714nylas-quote m_2465269450974321714nylas-quote-id-92my6rmekrk94aws2clzwhwgy">
|
||||
<br>
|
||||
On Nov 3 2016, at 6:56 pm, Y J <<a href="mailto:YJ2@gmail.com" target="_blank">YJ2@gmail.com</a>> wrote:
|
||||
<br>
|
||||
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
|
||||
<div dir="ltr">Hi Evan & Nora,<div>We're getting down to the wire and need to send final counts to our vendors <b>tomorrow</b>. If you could please let us know your RSVP for the Dec 10 engagement party and/or the Jan 15 wedding via our website, that would be amazing! Hope to see you soon :)</div><div><br></div><div><a href="https://YJ2-dot-yamm-track.appspot.com/Redirect?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177&link=http%3A%2F%2Ftheknot.com%2Fus%2FY-and-geoff&r=eWlmYW56aGFuZzJAZ21haWwuY29t" target="_blank">theknot.com/us/Y-and-geoff</a><br></div><div><br></div><div>Y & Geoff</div>
|
||||
<div><div><br></div>-- <br><div><div dir="ltr"><div>Ms. Y J<br><br><div> College 2010<br>AB in Economics</div><div><a href="https://YJ2-dot-yamm-track.appspot.com/Redirect?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177&link=http%3A%2F%2Fwww.twitter.com%2FYz&r=eWlmYW56aGFuZzJAZ21haWwuY29t" target="_blank">@Yz</a></div><div><a href="https://YJ2-dot-yamm-track.appspot.com/Redirect?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177&link=http%3A%2F%2Fwww.app.com&r=eWlmYW56aGFuZzJAZ21haWwuY29t" target="_blank">www.app.com</a></div><div><br></div></div></div></div>
|
||||
</div>
|
||||
</div>
|
||||
<img src="https://YJ2-dot-yamm-track.appspot.com/FireBase?ukey=1KbnLIl4_8tooUTgmQ_uMogd5HthhVhfP6x6UFR8wq28-0&key=YAMMID-24566177" width="1" height="1" alt="beacon" style="display:none;display:none!important">
|
||||
</blockquote>
|
||||
</div></div></div></blockquote></div><br><br clear="all"><div><br></div>-- <br><div class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><div>Ms. Y J<br><br><div> College 2010<br>AB in Economics</div><div><a href="http://www.twitter.com/Yz" target="_blank">@Yz</a></div><div><a href="http://www.app.com" target="_blank">www.app.com</a></div><div><br></div></div></div></div>
|
||||
<div>This is some unique text after the signature. It's as if I'm
|
||||
typing inline. We should NOT collapse this area</div>
|
||||
</div>
|
|
@ -19,7 +19,7 @@ describe "QuotedHTMLTransformer", ->
|
|||
re = new RegExp(QuotedHTMLTransformer.annotationClass, 'g')
|
||||
html.match(re)?.length ? 0
|
||||
|
||||
[1..19].forEach (n) ->
|
||||
[1..21].forEach (n) ->
|
||||
it "properly parses email_#{n}", ->
|
||||
opts = keepIfWholeBodyIsQuote: true
|
||||
expect(removeQuotedHTML("email_#{n}.html", opts).trim()).toEqual(readFile("email_#{n}_stripped.html").trim())
|
||||
|
@ -405,7 +405,7 @@ describe "QuotedHTMLTransformer", ->
|
|||
# `QuotedHTMLTransformer` needs Electron booted up in order to work because
|
||||
# of the DOMParser.
|
||||
xit "Run this simple function to generate output files", ->
|
||||
[19..20].forEach (n) ->
|
||||
[21].forEach (n) ->
|
||||
newHTML = QuotedHTMLTransformer.removeQuotedHTML(readFile("email_#{n}.html"))
|
||||
outPath = path.resolve(__dirname, '..', 'fixtures', 'emails', "email_#{n}_raw_stripped.html")
|
||||
fs.writeFileSync(outPath, newHTML)
|
||||
|
|
|
@ -10,6 +10,8 @@ import DOMWalkers from '../dom-walkers'
|
|||
* the document.
|
||||
*
|
||||
* This is in its own file to make use of ES6 generators
|
||||
*
|
||||
* See email_19 as a test case for this.
|
||||
*/
|
||||
export default function quoteStringDetector(doc) {
|
||||
const quoteNodesToRemove = [];
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import _ from 'underscore';
|
||||
import DOMUtils from '../dom-utils';
|
||||
import quoteStringDetector from './quote-string-detector';
|
||||
import unwrappedSignatureDetector from './unwrapped-signature-detector';
|
||||
|
||||
class QuotedHTMLTransformer {
|
||||
|
||||
|
@ -151,6 +152,7 @@ class QuotedHTMLTransformer {
|
|||
for (const parser of parsers) {
|
||||
quoteElements = quoteElements.concat(parser(doc) || []);
|
||||
}
|
||||
quoteElements = quoteElements.concat(unwrappedSignatureDetector(doc, quoteElements))
|
||||
|
||||
if (!includeInline && quoteElements.length > 0) {
|
||||
// This means we only want to remove quoted text that shows up at the
|
||||
|
|
43
src/services/unwrapped-signature-detector.es6
Normal file
43
src/services/unwrapped-signature-detector.es6
Normal file
|
@ -0,0 +1,43 @@
|
|||
import DOMWalkers from '../dom-walkers'
|
||||
import Utils from '../flux/models/utils'
|
||||
|
||||
function textAndNodesAfterNode(node) {
|
||||
let text = "";
|
||||
let curNode = node;
|
||||
const nodes = []
|
||||
while (curNode) {
|
||||
let sibling = curNode.nextSibling;
|
||||
while (sibling) {
|
||||
text += sibling.textContent;
|
||||
nodes.push(sibling);
|
||||
sibling = sibling.nextSibling;
|
||||
}
|
||||
curNode = curNode.parentNode;
|
||||
}
|
||||
return {text, nodes}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sometimes the last signature of an email will not be placed in a quote
|
||||
* block. This will cause out quote detector to not strip anything since
|
||||
* it looks very similar to someone writing inline regular text after some
|
||||
* quoted text (which is allowed).
|
||||
*
|
||||
* See email_20 and email_21 as a test case for this.
|
||||
*/
|
||||
export default function unwrappedSignatureDetector(doc, quoteElements) {
|
||||
// Find the last quoteBlock
|
||||
for (const node of DOMWalkers.walkBackwards(doc)) {
|
||||
if (quoteElements.includes(node)) {
|
||||
const {text, nodes} = textAndNodesAfterNode(node);
|
||||
const maybeSig = text.trim();
|
||||
if (maybeSig.length > 0) {
|
||||
if ((node.textContent || "").search(Utils.escapeRegExp(maybeSig)) >= 0) {
|
||||
return nodes;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return []
|
||||
}
|
Loading…
Reference in a new issue