_ = require('underscore')
fs = require('fs')
path = require 'path'
QuotedHTMLParser = require('../src/services/quoted-html-parser')
describe "QuotedHTMLParser", ->
readFile = (fname) ->
emailPath = path.resolve(__dirname, 'fixtures', 'emails', fname)
return fs.readFileSync(emailPath, 'utf8')
hideQuotedHTML = (fname) ->
return QuotedHTMLParser.hideQuotedHTML(readFile(fname))
removeQuotedHTML = (fname) ->
return QuotedHTMLParser.removeQuotedHTML(readFile(fname))
numQuotes = (html) ->
re = new RegExp(QuotedHTMLParser.annotationClass, 'g')
html.match(re)?.length ? 0
[1..15].forEach (n) ->
it "properly parses email_#{n}", ->
expect(removeQuotedHTML("email_#{n}.html")).toEqual readFile("email_#{n}_stripped.html")
describe 'manual quote detection tests', ->
clean = (str) ->
str.replace(/[\n\r]/g, "").replace(/\s{2,}/g, " ")
# The key is the inHTML. The value is the outHTML
tests = []
# Test 1
tests.push
before: """
Some text
More text
Parent
Sub
Sub Sub
Sub
Text at end
The last quote!
"""
after: """
Some text
More text
Parent
Sub
Sub Sub
Sub
Text at end
"""
# Test 2
tests.push
before: """
Nothing but quotes
"""
after: """
"""
# Test 3: It found the blockquote in another div
tests.push
before: """
Hello World
"""
after: """
Hello World
"""
# Test 4: It works inside of a wrapped div
tests.push
before: """
"""
after: """
"""
# Test 5: Inline quotes and text
tests.push
before: """
Hello
Inline quote
World
"""
after: """
Hello
Inline quote
World
"""
# Test 6: No quoted elements at all
tests.push
before: """
Hello World
"""
after: """
Hello World
"""
# Test 7: Common ancestor is a quoted node
tests.push
before: """
Content
Some content
More content
Other content
"""
after: """
Content
"""
# Test 8: All of our quote blocks we want to remove are at the end…
# sortof… but nested in a bunch of stuff
#
# Note that "content" is burried deep in the middle of a div
tests.push
before: """
Content
Some content
More content
Other content
Some text quote
Some text
Another quote
More quotes!
"""
after: """
Content
Some content
More content
Other content
Some text quote
Some text
More text
"""
# Test 9: Last several tags are blockquotes. Note the 3 blockquote
# at the end, the interstital div, and the blockquote inside of the
# first div
tests.push
before: """
I'm inline
Content
Remove me
Foo
Bar
Baz
"""
after: """
"""
it 'works with these manual test cases', ->
for {before, after} in tests
test = clean(QuotedHTMLParser.removeQuotedHTML(before))
expect(test).toEqual clean(after)
# We have a little utility method that you can manually uncomment to
# generate what the current iteration of the QuotedHTMLParser things the
# `removeQuotedHTML` should look like. These can be manually inspected in
# a browser before getting their filename changed to
# `email_#{n}_stripped.html". The actually tests will run the current
# iteration of the `removeQuotedHTML` against these files to catch if
# anything has changed in the parser.
#
# It's inside of the specs here instaed of its own script because the
# `QuotedHTMLParser` needs Electron booted up in order to work because
# of the DOMParser.
xit "Run this simple funciton to generate output files", ->
[1..15].forEach (n) ->
newHTML = QuotedHTMLParser.removeQuotedHTML(readFile("email_#{n}.html"))
outPath = path.resolve(__dirname, 'fixtures', 'emails', "email_#{n}_raw_stripped.html")
fs.writeFileSync(outPath, newHTML)