_ = require('underscore') fs = require('fs') path = require 'path' QuotedHTMLTransformer = require('../../src/services/quoted-html-transformer').default describe "QuotedHTMLTransformer", -> readFile = (fname) -> emailPath = path.resolve(__dirname, '..', 'fixtures', 'emails', fname) return fs.readFileSync(emailPath, 'utf8') hideQuotedHTML = (fname) -> return QuotedHTMLTransformer.hideQuotedHTML(readFile(fname)) removeQuotedHTML = (fname, opts={}) -> return QuotedHTMLTransformer.removeQuotedHTML(readFile(fname), opts) numQuotes = (html) -> re = new RegExp(QuotedHTMLTransformer.annotationClass, 'g') html.match(re)?.length ? 0 [1..24].forEach (n) -> it "properly parses email_#{n}", -> opts = keepIfWholeBodyIsQuote: true expect(removeQuotedHTML("email_#{n}.html", opts).trim()).toEqual(readFile("email_#{n}_stripped.html").trim()) describe 'manual quote detection tests', -> clean = (str) -> str.replace(/[\n\r]/g, "").replace(/\s{2,}/g, " ") # The key is the inHTML. The value is the outHTML tests = [] # Test 1 tests.push before: """
Some text

More text

Parent
Sub
Sub Sub
Sub
Text at end
The last quote!
""" after: """
Some text

More text

Parent
Sub
Sub Sub
Sub
Text at end
""" # Test 2: Basic quote removal tests.push before: """
Yo
Nothing but quotes


""" after: """
Yo """ # Test 3: It found the blockquote in another div tests.push before: """
Hello World

Nothing but quotes


""" after: """
Hello World
""" # Test 4: It works inside of a wrapped div tests.push before: """

Nothing but quotes


""" after: "" # Test 5: Inline quotes and text tests.push before: """ Hello
Inline quote
World """ after: """ Hello
Inline quote
World """ # Test 6: No quoted elements at all tests.push before: """ Hello World """ after: """ Hello World """ # Test 7: Common ancestor is a quoted node tests.push before: """
Content
Some content
More content
Other content
""" after: """
Content
""" # Test 8: All of our quote blocks we want to remove are at the end… # sortof… but nested in a bunch of stuff # # Note that "content" is burried deep in the middle of a div tests.push before: """
Content
Some content
More content
Other content
Some text quote
Some text
More text
A quote


Another quote


More quotes!
""" after: """
Content
Some content
More content
Other content
Some text quote
Some text
More text
""" # Test 9: Last several tags are blockquotes. Note the 3 blockquote # at the end, the interstital div, and the blockquote inside of the # first div tests.push before: """
I'm inline
Content
Remove me
Foo
Bar
Baz
""" after: """
I'm inline
Content
""" # Test 10: If it's only a quote and no other text, then just show the # quote tests.push before: """
Nothing but quotes


""" after: """
Nothing but quotes


""" # Test 11: The tag itself is just a quoted text block. # I believe this is https://sentry.nylas.com/sentry/edgehill/group/8323/ tests.push before: """ This entire thing is quoted text! """ after: "" options: { keepIfWholeBodyIsQuote: false } # Test 12: Make sure that a single quote inside of a bunch of other # content is detected. We used to have a bug where we were only # looking at the common ancestor of blockquotes (and if there's 1 then # the ancestor is itself). We now look at the root document for # trailing text. tests.push before: """
Yo
AB
C
SAVE ME
EF
Yo
""" after: """
Yo
AB
C
SAVE ME
EF
Yo """ # Test 13: If there's an "On date…" string immediatley before a blockquote, # then remove it. tests.push before: """ Hey
On FOOBAR
On Thu, Mar 3, 2016 at 3:19 AM, First Middle Last-Last < test@nylas.com > wrote:
QUOTED TEXT

""" after: """ Hey
On FOOBAR
""" # Test 14: Don't pick up false positives on the string precursors to block # quotes. tests.push before: """ Hey
On FOOBAR
On Thu, Mar 3, 2016 I went to my writing club and wrote: A little song
QUOTED TEXT
""" after: """ Hey
On FOOBAR
On Thu, Mar 3, 2016 I went to my writing club and wrote: A little song
""" # Test 15: Make sure inline quote in plaintext converted to HTML with
    # is not completely stripped.
    tests.push
      before: """
        
On Wed, Dec 14, 2016 at 02:05:44PM +0100, Bálint Réczey wrote:
        > I have uploaded a dpkg NMU with bindnow enabled to DELAYED/10
        > according to current NMU rules. If the Release Team increases the
        > severity of #835146 it can reach unstable earlier.
        Thanks!

        --
        WBR, wRAR
        
""" after: """
On Wed, Dec 14, 2016 at 02:05:44PM +0100, Bálint Réczey wrote:
        > I have uploaded a dpkg NMU with bindnow enabled to DELAYED/10
        > according to current NMU rules. If the Release Team increases the
        > severity of #835146 it can reach unstable earlier.
        Thanks!

        --
        WBR, wRAR
        
""" it 'works with these manual test cases', -> for {before, after, options} in tests if not options options = {keepIfWholeBodyIsQuote: true} test = clean(QuotedHTMLTransformer.removeQuotedHTML(before, options)) expect(test).toEqual clean(after) it 'removes all trailing
tags', -> input0 = "hello world

foolololol
" expect0 = "hello world" expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual expect0 it 'preserves
tags in the middle and only chops off tail', -> input0 = "hello

world

foolololol
" expect0 = "hello
world" expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual expect0 it 'works as expected when body tag inside the html', -> input0 = """

On Dec 16 2015, at 7:08 pm, Juan Tejada <juan@nylas.com> wrote:

h2

he he hehehehehehe

dufjcasc

""" expect0 = "" expect(QuotedHTMLTransformer.removeQuotedHTML(input0, {keepIfWholeBodyIsQuote: false})).toEqual expect0 # We have a little utility method that you can manually uncomment to # generate what the current iteration of the QuotedHTMLTransformer things the # `removeQuotedHTML` should look like. These can be manually inspected in # a browser before getting their filename changed to # `email_#{n}_stripped.html". The actually tests will run the current # iteration of the `removeQuotedHTML` against these files to catch if # anything has changed in the parser. # # It's inside of the specs here instaed of its own script because the # `QuotedHTMLTransformer` needs Electron booted up in order to work because # of the DOMParser. xit "Run this simple function to generate output files", -> [18, 20].forEach (n) -> newHTML = QuotedHTMLTransformer.removeQuotedHTML(readFile("email_#{n}.html")) outPath = path.resolve(__dirname, '..', 'fixtures', 'emails', "email_#{n}_raw_stripped.html") fs.writeFileSync(outPath, newHTML)