_ = require('underscore') fs = require('fs') path = require 'path' QuotedHTMLParser = require('../src/services/quoted-html-parser') describe "QuotedHTMLParser", -> readFile = (fname) -> emailPath = path.resolve(__dirname, 'fixtures', 'emails', fname) return fs.readFileSync(emailPath, 'utf8') hideQuotedHTML = (fname) -> return QuotedHTMLParser.hideQuotedHTML(readFile(fname)) removeQuotedHTML = (fname) -> return QuotedHTMLParser.removeQuotedHTML(readFile(fname)) numQuotes = (html) -> re = new RegExp(QuotedHTMLParser.annotationClass, 'g') html.match(re)?.length ? 0 [1..15].forEach (n) -> it "properly parses email_#{n}", -> expect(removeQuotedHTML("email_#{n}.html")).toEqual readFile("email_#{n}_stripped.html") describe 'manual quote detection tests', -> clean = (str) -> str.replace(/[\n\r]/g, "").replace(/\s{2,}/g, " ") # The key is the inHTML. The value is the outHTML tests = [] # Test 1 tests.push before: """

Some text

More text

Parent
Sub
Sub Sub
Sub

Text at end

The last quote!

""" after: """

Some text

More text

Parent
Sub
Sub Sub
Sub

Text at end

""" # Test 2 tests.push before: """

Nothing but quotes

""" after: """

""" # Test 3: It found the blockquote in another div tests.push before: """

Hello World

Nothing but quotes

""" after: """

Hello World

""" # Test 4: It works inside of a wrapped div tests.push before: """

Nothing but quotes

""" after: """

""" # Test 5: Inline quotes and text tests.push before: """ Hello

Inline quote

World """ after: """ Hello

Inline quote

World """ # Test 6: No quoted elements at all tests.push before: """ Hello World """ after: """ Hello World """ # Test 7: Common ancestor is a quoted node tests.push before: """

Content

Some content
More content
Other content

""" after: """

Content

""" # Test 8: All of our quote blocks we want to remove are at the end… # sortof… but nested in a bunch of stuff # # Note that "content" is burried deep in the middle of a div tests.push before: """

Content

Some content
More content
Other content

Some text quote

Some text

More text

A quote

Another quote

More quotes!

""" after: """

Content

Some content
More content
Other content

Some text quote

Some text

More text

""" # Test 9: Last several tags are blockquotes. Note the 3 blockquote # at the end, the interstital div, and the blockquote inside of the # first div tests.push before: """

I'm inline

Content

Remove me

Foo

Bar

Baz

""" after: """

I'm inline

Content

""" it 'works with these manual test cases', -> for {before, after} in tests test = clean(QuotedHTMLParser.removeQuotedHTML(before)) expect(test).toEqual clean(after) it 'removes all trailing
tags except one', -> input0 = "hello world

foolololol

" expect0 = "hello world
" expect(QuotedHTMLParser.removeQuotedHTML(input0)).toEqual expect0 it 'preserves
tags in the middle and only chops off tail', -> input0 = "hello

world

foolololol

" expect0 = "hello

world
" expect(QuotedHTMLParser.removeQuotedHTML(input0)).toEqual expect0 # We have a little utility method that you can manually uncomment to # generate what the current iteration of the QuotedHTMLParser things the # `removeQuotedHTML` should look like. These can be manually inspected in # a browser before getting their filename changed to # `email_#{n}_stripped.html". The actually tests will run the current # iteration of the `removeQuotedHTML` against these files to catch if # anything has changed in the parser. # # It's inside of the specs here instaed of its own script because the # `QuotedHTMLParser` needs Electron booted up in order to work because # of the DOMParser. xit "Run this simple funciton to generate output files", -> [1..15].forEach (n) -> newHTML = QuotedHTMLParser.removeQuotedHTML(readFile("email_#{n}.html")) outPath = path.resolve(__dirname, 'fixtures', 'emails', "email_#{n}_raw_stripped.html") fs.writeFileSync(outPath, newHTML)