_ = require('underscore') fs = require('fs') path = require 'path' QuotedHTMLTransformer = require('../../src/services/quoted-html-transformer').default describe "QuotedHTMLTransformer", -> readFile = (fname) -> emailPath = path.resolve(__dirname, '..', 'fixtures', 'emails', fname) return fs.readFileSync(emailPath, 'utf8') hideQuotedHTML = (fname) -> return QuotedHTMLTransformer.hideQuotedHTML(readFile(fname)) removeQuotedHTML = (fname, opts={}) -> return QuotedHTMLTransformer.removeQuotedHTML(readFile(fname), opts) numQuotes = (html) -> re = new RegExp(QuotedHTMLTransformer.annotationClass, 'g') html.match(re)?.length ? 0 [1..24].forEach (n) -> it "properly parses email_#{n}", -> opts = keepIfWholeBodyIsQuote: true expect(removeQuotedHTML("email_#{n}.html", opts).trim()).toEqual(readFile("email_#{n}_stripped.html").trim()) describe 'manual quote detection tests', -> clean = (str) -> str.replace(/[\n\r]/g, "").replace(/\s{2,}/g, " ") # The key is the inHTML. The value is the outHTML tests = [] # Test 1 tests.push before: """

Some text

More text

Parent
Sub
Sub Sub
Sub

Text at end

The last quote!

""" after: """

Some text

More text

Parent
Sub
Sub Sub
Sub

Text at end

""" # Test 2: Basic quote removal tests.push before: """
Yo

Nothing but quotes

""" after: """
Yo """ # Test 3: It found the blockquote in another div tests.push before: """

Hello World

Nothing but quotes

""" after: """

Hello World

""" # Test 4: It works inside of a wrapped div tests.push before: """

Nothing but quotes

""" after: "" # Test 5: Inline quotes and text tests.push before: """ Hello

Inline quote

World """ after: """ Hello

Inline quote

World """ # Test 6: No quoted elements at all tests.push before: """ Hello World """ after: """ Hello World """ # Test 7: Common ancestor is a quoted node tests.push before: """

Content

Some content
More content
Other content

""" after: """

Content

""" # Test 8: All of our quote blocks we want to remove are at the end… # sortof… but nested in a bunch of stuff # # Note that "content" is burried deep in the middle of a div tests.push before: """

Content

Some content
More content
Other content

Some text quote

Some text

More text

A quote

Another quote

More quotes!

""" after: """

Content

Some content
More content
Other content

Some text quote

Some text

More text

""" # Test 9: Last several tags are blockquotes. Note the 3 blockquote # at the end, the interstital div, and the blockquote inside of the # first div tests.push before: """

I'm inline

Content

Remove me

Foo

Bar

Baz

""" after: """

I'm inline

Content

""" # Test 10: If it's only a quote and no other text, then just show the # quote tests.push before: """

Nothing but quotes

""" after: """

Nothing but quotes

""" # Test 11: The tag itself is just a quoted text block. # I believe this is https://sentry.nylas.com/sentry/edgehill/group/8323/ tests.push before: """ This entire thing is quoted text! """ after: "" options: { keepIfWholeBodyIsQuote: false } # Test 12: Make sure that a single quote inside of a bunch of other # content is detected. We used to have a bug where we were only # looking at the common ancestor of blockquotes (and if there's 1 then # the ancestor is itself). We now look at the root document for # trailing text. tests.push before: """
Yo

A	B
C	SAVE ME
E	F

Yo
""" after: """
Yo

A	B
C	SAVE ME
E	F

Yo """ # Test 13: If there's an "On date…" string immediatley before a blockquote, # then remove it. tests.push before: """ Hey

On FOOBAR
On Thu, Mar 3, 2016 at 3:19 AM, First Middle Last-Last < test@nylas.com > wrote:

QUOTED TEXT

""" after: """ Hey

On FOOBAR

""" # Test 14: Don't pick up false positives on the string precursors to block # quotes. tests.push before: """ Hey

On FOOBAR
On Thu, Mar 3, 2016 I went to my writing club and wrote: A little song

QUOTED TEXT

""" after: """ Hey

On FOOBAR
On Thu, Mar 3, 2016 I went to my writing club and wrote: A little song

""" # Test 15: Make sure inline quote in plaintext converted to HTML with

    # is not completely stripped.
    tests.push
      before: """
        On Wed, Dec 14, 2016 at 02:05:44PM +0100, Bálint Réczey wrote:
        > I have uploaded a dpkg NMU with bindnow enabled to DELAYED/10
        > according to current NMU rules. If the Release Team increases the
        > severity of #835146 it can reach unstable earlier.
        Thanks!

        --
        WBR, wRAR
        
      """
      after: """
        On Wed, Dec 14, 2016 at 02:05:44PM +0100, Bálint Réczey wrote:
        > I have uploaded a dpkg NMU with bindnow enabled to DELAYED/10
        > according to current NMU rules. If the Release Team increases the
        > severity of #835146 it can reach unstable earlier.
        Thanks!

        --
        WBR, wRAR
        
      """

    it 'works with these manual test cases', ->
      for {before, after, options} in tests
        if not options
          options = {keepIfWholeBodyIsQuote: true}
        test = clean(QuotedHTMLTransformer.removeQuotedHTML(before, options))
        expect(test).toEqual clean(after)

    it 'removes all trailing 
 tags', ->
      input0 = "hello world

foolololol"
      expect0 = "hello world"
      expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual expect0

    it 'preserves 
 tags in the middle and only chops off tail', ->
      input0 = "hello

world

foolololol"
      expect0 = "hello
world"
      expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual expect0

    it 'works as expected when body tag inside the html', ->
      input0 = """
      


        On Dec 16 2015, at 7:08 pm, Juan Tejada <juan@nylas.com> wrote:
        



      

      
      h2
      he he hehehehehehe
      dufjcasc
      """
      expect0 = ""
      expect(QuotedHTMLTransformer.removeQuotedHTML(input0, {keepIfWholeBodyIsQuote: false})).toEqual expect0


  # We have a little utility method that you can manually uncomment to
  # generate what the current iteration of the QuotedHTMLTransformer things the
  # `removeQuotedHTML` should look like. These can be manually inspected in
  # a browser before getting their filename changed to
  # `email_#{n}_stripped.html". The actually tests will run the current
  # iteration of the `removeQuotedHTML` against these files to catch if
  # anything has changed in the parser.
  #
  # It's inside of the specs here instaed of its own script because the
  # `QuotedHTMLTransformer` needs Electron booted up in order to work because
  # of the DOMParser.
  xit "Run this simple function to generate output files", ->
    [18, 20].forEach (n) ->
      newHTML = QuotedHTMLTransformer.removeQuotedHTML(readFile("email_#{n}.html"))
      outPath = path.resolve(__dirname, '..', 'fixtures', 'emails', "email_#{n}_raw_stripped.html")
      fs.writeFileSync(outPath, newHTML)