import fs from 'fs'; import path from 'path'; import QuotedHTMLTransformer from '../../src/services/quoted-html-transformer'; describe('QuotedHTMLTransformer', function() { const readFile = function(fname) { const emailPath = path.resolve(__dirname, '..', 'fixtures', 'emails', fname); return fs.readFileSync(emailPath, 'utf8'); }; const removeQuotedHTML = function(fname, opts = {}) { return QuotedHTMLTransformer.removeQuotedHTML(readFile(fname), opts); }; for (let n = 1; n <= 28; n++) { it(`properly parses email_${n}`, function() { const opts = { keepIfWholeBodyIsQuote: true }; const actual = removeQuotedHTML(`email_${n}.html`, opts).trim(); const expected = readFile(`email_${n}_stripped.html`).trim(); if (actual !== expected) { fs.writeFileSync( path.resolve(__dirname, '..', 'fixtures', 'emails', `email_${n}_actual.html`), actual ); } expect(actual).toEqual(expected); }); } describe('manual quote detection tests', function() { const clean = str => str.replace(/[\n\r]/g, '').replace(/\s{2,}/g, ' '); // The key is the inHTML. The value is the outHTML const tests = []; // Test 1 tests.push({ before: `\
Some text

More text

Parent
Sub
Sub Sub
Sub
Text at end
The last quote!
\ `, after: `\
Some text

More text

Parent
Sub
Sub Sub
Sub
Text at end
\ `, }); // Test 2: Basic quote removal tests.push({ before: `\
Yo
Nothing but quotes


\ `, after: `\
Yo\ `, }); // Test 3: It found the blockquote in another div tests.push({ before: `\
Hello World

Nothing but quotes


\ `, after: `\
Hello World
`, }); // Test 4: It works inside of a wrapped div tests.push({ before: `\ Reply here

Nothing but quotes


\ `, after: 'Reply here', }); // Test 5: Inline quotes and text tests.push({ before: `\ Hello
Inline quote
World\ `, after: `\ Hello
Inline quote
World\ `, }); // Test 6: No quoted elements at all tests.push({ before: `\ Hello World\ `, after: `\ Hello World\ `, }); // Test 7: Common ancestor is a quoted node tests.push({ before: `\
Content
Some content
More content
Other content
\ `, after: `\
Content
\ `, }); // Test 8: All of our quote blocks we want to remove are at the end… // sortof… but nested in a bunch of stuff // // Note that "content" is burried deep in the middle of a div tests.push({ before: `\
Content
Some content
More content
Other content
Some text quote
Some text
More text
A quote


Another quote


More quotes!
\ `, after: `\
Content
Some content
More content
Other content
Some text quote
Some text
More text
\ `, }); // Test 9: Last several tags are blockquotes. Note the 3 blockquote // at the end, the interstital div, and the blockquote inside of the // first div tests.push({ before: `\
I'm inline
Content
Remove me
Foo
Bar
Baz
\ `, after: `\
I'm inline
Content
\ `, }); // Test 10: If it's only a quote and no other text, then just show the // quote tests.push({ before: `\
Nothing but quotes


\ `, after: `\
Nothing but quotes


\ `, }); // Test 11: The tag itself is just a quoted text block. // I believe this is https://sentry.mailspring.com/sentry/edgehill/group/8323/ tests.push({ before: `\ This entire thing is quoted text! \ `, after: '', options: { keepIfWholeBodyIsQuote: false }, }); // Test 12: Make sure that a single quote inside of a bunch of other // content is detected. We used to have a bug where we were only // looking at the common ancestor of blockquotes (and if there's 1 then // the ancestor is itself). We now look at the root document for // trailing text. tests.push({ before: `\
Yo
AB
C
SAVE ME
EF
Yo
\ `, after: `\
Yo
AB
C
SAVE ME
EF
Yo\ `, }); // Test 13: If there's an "On date…" string immediatley before a blockquote, // then remove it. tests.push({ before: `\ Hey
On FOOBAR
On Thu, Mar 3, 2016 at 3:19 AM, First Middle Last-Last < test@mailspring.com > wrote:
QUOTED TEXT

\ `, after: `\ Hey
On FOOBAR
\ `, }); // Test 14: Don't pick up false positives on the string precursors to block // quotes. tests.push({ before: `\ Hey
On FOOBAR
On Thu, Mar 3, 2016 I went to my writing club and wrote: A little song
QUOTED TEXT
\ `, after: `\ Hey
On FOOBAR
On Thu, Mar 3, 2016 I went to my writing club and wrote: A little song
\ `, }); it('works with these manual test cases', () => (() => { const result = []; for (let { before, after, options } of tests) { if (!options) { options = { keepIfWholeBodyIsQuote: true }; } const test = clean(QuotedHTMLTransformer.removeQuotedHTML(before, options)); result.push(expect(test).toEqual(clean(after))); } return result; })()); it('removes all trailing
tags', function() { const input0 = 'hello world

foolololol
'; const expect0 = 'hello world'; expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual(expect0); }); it('preserves
tags in the middle and only chops off tail', function() { const input0 = 'hello

world

foolololol
'; const expect0 = 'hello
world'; expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual(expect0); }); it('works as expected when body tag inside the html', function() { const input0 = `\

On Dec 16 2015, at 7:08 pm, Juan Tejada <juan@mailspring.com> wrote:

h2

he he hehehehehehe

dufjcasc

\ `; const expect0 = ''; expect( QuotedHTMLTransformer.removeQuotedHTML(input0, { keepIfWholeBodyIsQuote: false }) ).toEqual(expect0); }); }); // We have a little utility method that you can manually uncomment to // generate what the current iteration of the QuotedHTMLTransformer things the // `removeQuotedHTML` should look like. These can be manually inspected in // a browser before getting their filename changed to // `email_${n}_stripped.html". The actually tests will run the current // iteration of the `removeQuotedHTML` against these files to catch if // anything has changed in the parser. // // It's inside of the specs here instaed of its own script because the // `QuotedHTMLTransformer` needs Electron booted up in order to work because // of the DOMParser. xit('Run this simple function to generate output files', () => [18, 20].forEach(function(n) { const newHTML = QuotedHTMLTransformer.removeQuotedHTML(readFile(`email_${n}.html`)); const outPath = path.resolve( __dirname, '..', 'fixtures', 'emails', `email_${n}_raw_stripped.html` ); fs.writeFileSync(outPath, newHTML); })); });