diff --git a/app/spec/fixtures/emails/email_24.html b/app/spec/fixtures/emails/email_24.html new file mode 100644 index 000000000..2a03cdaab --- /dev/null +++ b/app/spec/fixtures/emails/email_24.html @@ -0,0 +1,50 @@ +
+ + + + + + + + + +
+

Pretty much, I + think we’ll have to turn to this later as it’s low + priority.

+

 

+
+

+ + XXXXX
X, + XXXXX
X + Avenue of the Americas
+ New York, NY 10104-3800
+

+

 

+
+
+

+
+
+

 

+ +

 

+ +

 

+ + +

 

+ +
+

 

+
+ +

 

+ +

 

+

+  

+
+
+
\ No newline at end of file diff --git a/app/spec/fixtures/emails/email_24_stripped.html b/app/spec/fixtures/emails/email_24_stripped.html new file mode 100644 index 000000000..d5a7b882d --- /dev/null +++ b/app/spec/fixtures/emails/email_24_stripped.html @@ -0,0 +1,22 @@ +
+ + + + + + + + + +
+

Pretty much, I + think we’ll have to turn to this later as it’s low + priority.

+

 

+
+

+ + XXXXX
X, + XXXXX
X + Avenue of the Americas
+ New York, NY 10104-3800

\ No newline at end of file diff --git a/app/spec/services/quoted-html-transformer-spec.coffee b/app/spec/services/quoted-html-transformer-spec.coffee index 9d4c0e39d..f3d375409 100644 --- a/app/spec/services/quoted-html-transformer-spec.coffee +++ b/app/spec/services/quoted-html-transformer-spec.coffee @@ -19,7 +19,7 @@ describe "QuotedHTMLTransformer", -> re = new RegExp(QuotedHTMLTransformer.annotationClass, 'g') html.match(re)?.length ? 0 - [1..23].forEach (n) -> + [1..24].forEach (n) -> it "properly parses email_#{n}", -> opts = keepIfWholeBodyIsQuote: true expect(removeQuotedHTML("email_#{n}.html", opts).trim()).toEqual(readFile("email_#{n}_stripped.html").trim()) diff --git a/app/src/services/quoted-html-transformer.es6 b/app/src/services/quoted-html-transformer.es6 index 1ccf56913..7bb81d3f7 100644 --- a/app/src/services/quoted-html-transformer.es6 +++ b/app/src/services/quoted-html-transformer.es6 @@ -116,7 +116,7 @@ class QuotedHTMLTransformer { continue; } } - if (['BR', 'P', 'DIV', 'SPAN'].includes(child.nodeName)) { + if (['BR', 'P', 'DIV', 'SPAN', 'HR'].includes(child.nodeName)) { removeTrailingWhitespaceChildren(child); if ((child.childElementCount === 0) && (child.textContent.trim() === '')) { child.remove(); @@ -377,14 +377,18 @@ class QuotedHTMLTransformer { _findQuotesAfterMessageHeaderBlock(doc) { // This detector looks for a element in the DOM tree containing - // three children: Sent: and To: and Subject:. - // It then returns every node after that as quoted text. + // three children: Sent: or Date: and To: and + // Subject:. It then returns every node after that as quoted text. // Find a DOM node exactly matching Sent: - const to = doc.evaluate("//b[. = 'Sent:']", doc.body, null, XPathResult.ANY_TYPE, null).iterateNext(); - if (to) { + const dateMarker = ( + doc.evaluate("//b[. = 'Sent:']", doc.body, null, XPathResult.ANY_TYPE, null).iterateNext() || + doc.evaluate("//b[. = 'Date:']", doc.body, null, XPathResult.ANY_TYPE, null).iterateNext() + ); + + if (dateMarker) { // check to see if the parent container also contains the other two - const headerContainer = to.parentElement; + const headerContainer = dateMarker.parentElement; let matches = 0; for (const node of Array.from(headerContainer.children)) { if ((node.textContent === "To:") || (node.textContent === "Subject:")) {