_ = require('underscore') fs = require('fs') path = require 'path' QuotedHTMLTransformer = require('../src/services/quoted-html-transformer') describe "QuotedHTMLTransformer", -> readFile = (fname) -> emailPath = path.resolve(__dirname, 'fixtures', 'emails', fname) return fs.readFileSync(emailPath, 'utf8') hideQuotedHTML = (fname) -> return QuotedHTMLTransformer.hideQuotedHTML(readFile(fname)) removeQuotedHTML = (fname, opts={}) -> return QuotedHTMLTransformer.removeQuotedHTML(readFile(fname), opts) numQuotes = (html) -> re = new RegExp(QuotedHTMLTransformer.annotationClass, 'g') html.match(re)?.length ? 0 [1..18].forEach (n) -> it "properly parses email_#{n}", -> opts = keepIfWholeBodyIsQuote: true expect(removeQuotedHTML("email_#{n}.html", opts).trim()).toEqual(readFile("email_#{n}_stripped.html").trim()) describe 'manual quote detection tests', -> clean = (str) -> str.replace(/[\n\r]/g, "").replace(/\s{2,}/g, " ") # The key is the inHTML. The value is the outHTML tests = [] # Test 1 tests.push before: """

Some text

More text

Parent
Sub
Sub Sub
Sub

Text at end

The last quote!

""" after: """

Some text

More text

Parent
Sub
Sub Sub
Sub

Text at end

""" # Test 2: Basic quote removal tests.push before: """
Yo

Nothing but quotes

""" after: """
Yo

""" # Test 3: It found the blockquote in another div tests.push before: """

Hello World

Nothing but quotes

""" after: """

Hello World

""" # Test 4: It works inside of a wrapped div tests.push before: """

Nothing but quotes

""" after: """

""" # Test 5: Inline quotes and text tests.push before: """ Hello

Inline quote

World """ after: """ Hello

Inline quote

World """ # Test 6: No quoted elements at all tests.push before: """ Hello World """ after: """ Hello World """ # Test 7: Common ancestor is a quoted node tests.push before: """

Content

Some content
More content
Other content

""" after: """

Content

""" # Test 8: All of our quote blocks we want to remove are at the end… # sortof… but nested in a bunch of stuff # # Note that "content" is burried deep in the middle of a div tests.push before: """

Content

Some content
More content
Other content

Some text quote

Some text

More text

A quote

Another quote

More quotes!

""" after: """

Content

Some content
More content
Other content

Some text quote

Some text

More text

""" # Test 9: Last several tags are blockquotes. Note the 3 blockquote # at the end, the interstital div, and the blockquote inside of the # first div tests.push before: """

I'm inline

Content

Remove me

Foo

Bar

Baz

""" after: """

I'm inline

Content

""" # Test 10: If it's only a quote and no other text, then just show the # quote tests.push before: """

Nothing but quotes

""" after: """

Nothing but quotes

""" # Test 11: The tag itself is just a quoted text block. # I believe this is https://sentry.nylas.com/sentry/edgehill/group/8323/ tests.push before: """ This entire thing is quoted text! """ after: "" # Test 12: Make sure that a single quote inside of a bunch of other # content is detected. We used to have a bug where we were only # looking at the common ancestor of blockquotes (and if there's 1 then # the ancestor is itself). We now look at the root document for # trailing text. tests.push before: """
Yo

A	B
C	SAVE ME
E	F

Yo
""" after: """
Yo

A	B
C	SAVE ME
E	F

Yo
""" # Test 13: If there's an "On date…" string immediatley before a blockquote, # then remove it. tests.push before: """ Hey

On FOOBAR
On Thu, Mar 3, 2016 at 3:19 AM, First Middle Last-Last < test@nylas.com > wrote:

QUOTED TEXT

""" after: """ Hey

On FOOBAR

""" # Test 14: Don't pick up false positives on the string precursors to block # quotes. tests.push before: """ Hey

On FOOBAR
On Thu, Mar 3, 2016 I went to my writing club and wrote: A little song

QUOTED TEXT

""" after: """ Hey

On FOOBAR
On Thu, Mar 3, 2016 I went to my writing club and wrote: A little song

""" it 'works with these manual test cases', -> for {before, after} in tests opts = keepIfWholeBodyIsQuote: true test = clean(QuotedHTMLTransformer.removeQuotedHTML(before, opts)) expect(test).toEqual clean(after) it 'removes all trailing
tags except one', -> input0 = "hello world

foolololol

" expect0 = "hello world
" expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual expect0 it 'preserves
tags in the middle and only chops off tail', -> input0 = "hello

world

foolololol

" expect0 = "hello

world
" expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual expect0 it 'works as expected when body tag inside the html', -> input0 = """

On Dec 16 2015, at 7:08 pm, Juan Tejada <juan@nylas.com> wrote:

h2

he he hehehehehehe

dufjcasc

""" expect0 = "
" expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual expect0 # We have a little utility method that you can manually uncomment to # generate what the current iteration of the QuotedHTMLTransformer things the # `removeQuotedHTML` should look like. These can be manually inspected in # a browser before getting their filename changed to # `email_#{n}_stripped.html". The actually tests will run the current # iteration of the `removeQuotedHTML` against these files to catch if # anything has changed in the parser. # # It's inside of the specs here instaed of its own script because the # `QuotedHTMLTransformer` needs Electron booted up in order to work because # of the DOMParser. xit "Run this simple function to generate output files", -> [18].forEach (n) -> newHTML = QuotedHTMLTransformer.removeQuotedHTML(readFile("email_#{n}.html")) outPath = path.resolve(__dirname, 'fixtures', 'emails', "email_#{n}_raw_stripped.html") fs.writeFileSync(outPath, newHTML)