Mailspring/spec/quoted-html-transformer-spec.coffee

413 lines
11 KiB
CoffeeScript
Raw Normal View History

_ = require('underscore')
fs = require('fs')
path = require 'path'
QuotedHTMLTransformer = require('../src/services/quoted-html-transformer')
describe "QuotedHTMLTransformer", ->
readFile = (fname) ->
emailPath = path.resolve(__dirname, 'fixtures', 'emails', fname)
return fs.readFileSync(emailPath, 'utf8')
hideQuotedHTML = (fname) ->
return QuotedHTMLTransformer.hideQuotedHTML(readFile(fname))
removeQuotedHTML = (fname, opts={}) ->
return QuotedHTMLTransformer.removeQuotedHTML(readFile(fname), opts)
numQuotes = (html) ->
re = new RegExp(QuotedHTMLTransformer.annotationClass, 'g')
html.match(re)?.length ? 0
[1..18].forEach (n) ->
it "properly parses email_#{n}", ->
opts = keepIfWholeBodyIsQuote: true
expect(removeQuotedHTML("email_#{n}.html", opts)).toEqual readFile("email_#{n}_stripped.html")
describe 'manual quote detection tests', ->
clean = (str) ->
str.replace(/[\n\r]/g, "").replace(/\s{2,}/g, " ")
# The key is the inHTML. The value is the outHTML
tests = []
# Test 1
tests.push
before: """
<div>
Some text
<p>More text</p>
<blockquote id="inline-parent-quote">
Parent
<blockquote id="inline-sub-quote">
Sub
<blockquote id="inline-sub-sub-quote">Sub Sub</blockquote>
Sub
</blockquote>
</blockquote>
<div>Text at end</div>
<blockquote id="last-quote">
<blockquote>
The last quote!
</blockquote>
</blockquote>
</div>
"""
after: """<head></head><body>
<div>
Some text
<p>More text</p>
<blockquote id="inline-parent-quote">
Parent
<blockquote id="inline-sub-quote">
Sub
<blockquote id="inline-sub-sub-quote">Sub Sub</blockquote>
Sub
</blockquote>
</blockquote>
<div>Text at end</div>
</div></body>
"""
# Test 2: Basic quote removal
tests.push
before: """
<br>
Yo
<blockquote>Nothing but quotes</blockquote>
<br>
<br>
"""
after: """<head></head><body>
<br>
Yo
<br>
<br></body>
"""
# Test 3: It found the blockquote in another div
tests.push
before: """
<div>Hello World</div>
<br>
<div>
<blockquote>Nothing but quotes</blockquote>
</div>
<br>
<br>
"""
after: """<head></head><body>
<div>Hello World</div>
<br>
<div>
</div>
<br>
<br></body>
"""
# Test 4: It works inside of a wrapped div
tests.push
before: """
<div>
<br>
<blockquote>Nothing but quotes</blockquote>
<br>
<br>
</div>
"""
after: """<head></head><body>
<div>
<br>
<br>
<br>
</div></body>
"""
# Test 5: Inline quotes and text
tests.push
before: """
Hello
<blockquote>Inline quote</blockquote>
World
"""
after: """<head></head><body>
Hello
<blockquote>Inline quote</blockquote>
World</body>
"""
# Test 6: No quoted elements at all
tests.push
before: """
Hello World
"""
after: """<head></head><body>
Hello World</body>
"""
# Test 7: Common ancestor is a quoted node
tests.push
before: """
<div>Content</div>
<blockquote>
Some content
<blockquote>More content</blockquote>
Other content
</blockquote>
"""
after: """<head></head><body>
<div>Content</div></body>
"""
# Test 8: All of our quote blocks we want to remove are at the end…
# sortof… but nested in a bunch of stuff
#
# Note that "content" is burried deep in the middle of a div
tests.push
before: """
<div>Content</div>
<blockquote>
Some content
<blockquote>More content</blockquote>
Other content
</blockquote>
<div>
<blockquote>Some text quote</blockquote>
Some text
<div>
More text
<blockquote>A quote</blockquote>
<br>
</div>
<br>
<blockquote>Another quote</blockquote>
<br>
</div>
<br>
<blockquote>More quotes!</blockquote>
"""
after: """<head></head><body>
<div>Content</div>
<blockquote>
Some content
<blockquote>More content</blockquote>
Other content
</blockquote>
<div>
<blockquote>Some text quote</blockquote>
Some text
<div>
More text
<br>
</div>
<br>
<br>
</div>
<br>
</body>
"""
# Test 9: Last several tags are blockquotes. Note the 3 blockquote
# at the end, the interstital div, and the blockquote inside of the
# first div
tests.push
before: """
<div>
<blockquote>I'm inline</blockquote>
Content
<blockquote>Remove me</blockquote>
</div>
<blockquote>Foo</blockquote>
<div></div>
<blockquote>Bar</blockquote>
<blockquote>Baz</blockquote>
"""
after: """<head></head><body>
<div>
<blockquote>I'm inline</blockquote>
Content
</div>
<div></div></body>
"""
# Test 10: If it's only a quote and no other text, then just show the
# quote
tests.push
before: """
<br>
<blockquote>Nothing but quotes</blockquote>
<br>
<br>
"""
after: """<head></head><body>
<br>
<blockquote>Nothing but quotes</blockquote>
<br>
<br></body>
"""
# Test 11: The <body> tag itself is just a quoted text block.
# I believe this is https://sentry.nylas.com/sentry/edgehill/group/8323/
tests.push
before: """
<body id="OLK_SRC_BODY_SECTION">
This entire thing is quoted text!
</body>
"""
after: """<head></head><body></body>
"""
# Test 12: Make sure that a single quote inside of a bunch of other
# content is detected. We used to have a bug where we were only
# looking at the common ancestor of blockquotes (and if there's 1 then
# the ancestor is itself). We now look at the root document for
# trailing text.
tests.push
before: """
<br>
Yo
<table><tbody>
<tr><td>A</td><td>B</td></tr>
<tr><td>C</td><td><blockquote>SAVE ME</blockquote></td></tr>
<tr><td>E</td><td>F</td></tr>
</tbody></table>
Yo
<br>
"""
after: """<head></head><body>
<br>
Yo
<table><tbody>
<tr><td>A</td><td>B</td></tr>
<tr><td>C</td><td><blockquote>SAVE ME</blockquote></td></tr>
<tr><td>E</td><td>F</td></tr>
</tbody></table>
Yo
<br></body>
"""
# Test 13: If there's an "On date…" string immediatley before a blockquote,
# then remove it.
tests.push
before: """
Hey
<div>
On FOOBAR
<br>
On Thu, Mar 3, 2016
at 3:19 AM,
First Middle Last-Last
<span dir="ltr">
&lt;
<a href="mailto:test@nylas.com" target="_blank">
test@nylas.com
</a>
&gt;
</span>
wrote:
<br>
<blockquote>
QUOTED TEXT
</blockquote>
</div>
<br>
"""
after: """<head></head><body>
Hey
<div>
On FOOBAR
<br><br>
</div><br></body>
"""
# Test 14: Don't pick up false positives on the string precursors to block
# quotes.
tests.push
before: """
Hey
<div>
On FOOBAR
<br>
On Thu, Mar 3, 2016 I went to my writing club and wrote:
<strong>A little song</strong>
<blockquote>
QUOTED TEXT
</blockquote>
</div>
"""
after: """<head></head><body>
Hey
<div>
On FOOBAR
<br>
On Thu, Mar 3, 2016 I went to my writing club and wrote:
<strong>A little song</strong>
</div></body>
"""
it 'works with these manual test cases', ->
for {before, after} in tests
opts = keepIfWholeBodyIsQuote: true
test = clean(QuotedHTMLTransformer.removeQuotedHTML(before, opts))
expect(test).toEqual clean(after)
it 'removes all trailing <br> tags except one', ->
input0 = "hello world<br><br><blockquote>foolololol</blockquote>"
expect0 = "<head></head><body>hello world<br></body>"
expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual expect0
it 'preserves <br> tags in the middle and only chops off tail', ->
input0 = "hello<br><br>world<br><br><blockquote>foolololol</blockquote>"
expect0 = "<head></head><body>hello<br><br>world<br></body>"
expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual expect0
feat(editor-region): Add support to register components as editors Summary: - The main purpose of this is to be able to properly register the editor for the markdown plugin (and any other plugins to come) - Refactors ComposerView and Contenteditable -> - Replaces Contenteditable with an InjectedComponent for a new region role: "Composer:Editor" - Creates a new component called ComposerEditor, which is the one that is being registered by default as "Composer:Editor" - I used this class to try to standardize the props that should be passed to any would be editor Component: - Renamed a bunch of the props which (I think) had a bit of confusing names - Added a bunch of docs for these in the source file, although I feel like those docs should live elsewhere, like in the ComponentRegion docs. - In the process, I ended up pulling some stuff out of ComposerView and some stuff out of the Contenteditable, namely: - The scrolling logic to ensure that the composer is visible while typing was moved outside of the Contenteditable -- this feels more like the ComposerEditor's responsibility, especially since the Contenteditable is meant to be used in other contexts as well. - The ComposerExtensions state; it feels less awkward for me if this is inside the ComposerEditor because 1) ComposerView does less things, 2) these are actually just being passed to the Contenteditable, 3) I feel like other plugins shouldn't need to mess around with ComposerExtensions, so we shouldn't pass them to the editor. If you register an editor different from our default one, any other ComposerExtension callbacks will be disabled, which I feel is expected behavior. - I think there is still some more refactoring to be done, and I left some TODOS here and there, but I think this diff is already big enough and its a minimal set of changes to get the markdown editor working in a not so duck tapish way. - New props for InjectedComponent: - `requiredMethods`: allows you to define a collection of methods that should be implemented by any Component that registers for your desired region. - It will throw an error if these are not implemented - It will automatically pass calls made on the InjectedComponent to these methods down to the instance of the actual registered component - Would love some comments on this approach and impl - `fallback`: allows you to define a default component to use if none were registered through the ComponentRegistry - Misc: - Added a new test case for the QuotedHTMLTransformer - Tests: - They were minimally updated so that they don't break, but a big TODO is to properly refactor them. I plan to do that in an upcoming diff. Test Plan: - Unit tests Reviewers: bengotow, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D2372
2015-12-19 03:03:58 +08:00
it 'works as expected when body tag inside the html', ->
input0 = """
<br><br><blockquote class="gmail_quote"
style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex;">
On Dec 16 2015, at 7:08 pm, Juan Tejada &lt;juan@nylas.com&gt; wrote:
<br>
<meta content="text/html; charset=us-ascii" />
<body>
<h1 id="h2">h2</h1>
<p>he he hehehehehehe</p>
<p>dufjcasc</p>
</body>
</blockquote>
"""
expect0 = "<head></head><body><br></body>"
expect(QuotedHTMLTransformer.removeQuotedHTML(input0)).toEqual expect0
# We have a little utility method that you can manually uncomment to
# generate what the current iteration of the QuotedHTMLTransformer things the
# `removeQuotedHTML` should look like. These can be manually inspected in
# a browser before getting their filename changed to
# `email_#{n}_stripped.html". The actually tests will run the current
# iteration of the `removeQuotedHTML` against these files to catch if
# anything has changed in the parser.
#
# It's inside of the specs here instaed of its own script because the
# `QuotedHTMLTransformer` needs Electron booted up in order to work because
# of the DOMParser.
xit "Run this simple funciton to generate output files", ->
[18].forEach (n) ->
newHTML = QuotedHTMLTransformer.removeQuotedHTML(readFile("email_#{n}.html"))
outPath = path.resolve(__dirname, 'fixtures', 'emails', "email_#{n}_raw_stripped.html")
fs.writeFileSync(outPath, newHTML)