convert(es6): quoted-html-transformer to es6

2024-09-22 00:06:06 +08:00 · 2016-11-04 18:25:52 -07:00 · 2016-11-04 18:25:52 -07:00 · 4a40074cd1
parent 5a20dfce76
commit 4a40074cd1
3 changed files with 262 additions and 218 deletions
--- a/spec/services/quoted-html-transformer-spec.coffee
+++ b/spec/services/quoted-html-transformer-spec.coffee
@ -1,7 +1,7 @@
 _ = require('underscore')
 fs = require('fs')
 path = require 'path'
-QuotedHTMLTransformer = require('../../src/services/quoted-html-transformer')
+QuotedHTMLTransformer = require('../../src/services/quoted-html-transformer').default

 describe "QuotedHTMLTransformer", ->

@ -407,5 +407,5 @@ describe "QuotedHTMLTransformer", ->
  xit "Run this simple function to generate output files", ->
    [18].forEach (n) ->
      newHTML = QuotedHTMLTransformer.removeQuotedHTML(readFile("email_#{n}.html"))
-      outPath = path.resolve(__dirname, 'fixtures', 'emails', "email_#{n}_raw_stripped.html")
+      outPath = path.resolve(__dirname, '..', 'fixtures', 'emails', "email_#{n}_raw_stripped.html")
      fs.writeFileSync(outPath, newHTML)
--- a/src/services/quoted-html-transformer.coffee
+++ b/src/services/quoted-html-transformer.coffee
@ -1,216 +0,0 @@
-_ = require 'underscore'
-crypto = require 'crypto'
-DOMUtils = require '../dom-utils'
-quoteStringDetector = require('./quote-string-detector').default
-
-class QuotedHTMLTransformer
-
-  annotationClass: "nylas-quoted-text-segment"
-
-  # Given an html string, it will add the `annotationClass` to the DOM
-  # element
-  hideQuotedHTML: (html, {keepIfWholeBodyIsQuote}={}) ->
-    doc = @_parseHTML(html)
-    quoteElements = @_findQuoteLikeElements(doc)
-    unless keepIfWholeBodyIsQuote and @_wholeBodyIsQuote(doc, quoteElements)
-      @_annotateElements(quoteElements)
-    return @_outputHTMLFor(doc, {initialHTML: html})
-
-  hasQuotedHTML: (html) ->
-    doc = @_parseHTML(html)
-    quoteElements = @_findQuoteLikeElements(doc)
-    return quoteElements.length > 0
-
-  # Public: Removes quoted text from an HTML string
-  #
-  # If we find a quoted text region that is "inline" with the root level
-  # message, meaning it has non quoted text before and after it, then we
-  # leave it in the message. If you set the `includeInline` option to true,
-  # then all inline blocks will also be removed.
-  #
-  # - `html` The string full of quoted text areas
-  # - `options`
-  #   - `includeInline` Defaults false. If true, inline quotes are removed
-  #   too
-  #   - `keepIfWholeBodyIsQuote` Defaults false. If true, then it will
-  #   check to see if the whole html body is a giant quote. If so, it will
-  #   preserve it.
-  #
-  # Returns HTML without quoted text
-  removeQuotedHTML: (html, options={}) ->
-    doc = @_parseHTML(html)
-    quoteElements = @_findQuoteLikeElements(doc, options)
-    unless options.keepIfWholeBodyIsQuote and @_wholeBodyIsQuote(doc, quoteElements)
-      DOMUtils.Mutating.removeElements(quoteElements, options)
-
-      # It's possible that the entire body was quoted text and we've removed everything.
-      if not doc.body
-        return @_outputHTMLFor(@_parseHTML(""), {initialHTML: html})
-
-      @removeTrailingBr(doc)
-      DOMUtils.Mutating.removeElements(quoteStringDetector(doc))
-      if not doc.children[0]
-        return @_outputHTMLFor(@_parseHTML(""), {initialHTML: html})
-
-    if options.returnAsDOM
-      return doc
-    return @_outputHTMLFor(doc, {initialHTML: html})
-
-  # Finds any trailing BR tags and removes them in place
-  removeTrailingBr: (doc) ->
-    childNodes = doc.body.childNodes
-    extraTailBrTags = []
-    for i in [(childNodes.length - 1)..0] by -1
-      curr = childNodes[i]
-      next = childNodes[i - 1]
-      if curr and curr.nodeName == 'BR' and next and next.nodeName == 'BR'
-        extraTailBrTags.push(curr)
-      else
-        break
-    DOMUtils.Mutating.removeElements(extraTailBrTags)
-
-  appendQuotedHTML: (htmlWithoutQuotes, originalHTML) ->
-    doc = @_parseHTML(originalHTML)
-    quoteElements = @_findQuoteLikeElements(doc)
-    doc = @_parseHTML(htmlWithoutQuotes)
-    doc.body.appendChild(node) for node in quoteElements
-    return @_outputHTMLFor(doc, {initialHTML: originalHTML})
-
-  restoreAnnotatedHTML: (html) ->
-    doc = @_parseHTML(html)
-    quoteElements = @_findAnnotatedElements(doc)
-    @_removeAnnotation(quoteElements)
-    return @_outputHTMLFor(doc, {initialHTML: html})
-
-  _parseHTML: (text) ->
-    domParser = new DOMParser()
-    try
-      doc = domParser.parseFromString(text, "text/html")
-    catch error
-      text = "HTML Parser Error: #{error.toString()}"
-      doc = domParser.parseFromString(text, "text/html")
-      NylasEnv.reportError(error)
-
-    # As far as we can tell, when this succeeds, doc /always/ has at least
-    # one child: an <html> node.
-    return doc
-
-  _outputHTMLFor: (doc, {initialHTML}) ->
-    if /<\s?head\s?>/i.test(initialHTML) || /<\s?body[\s>]/i.test(initialHTML)
-      return doc.children[0].innerHTML
-    else
-      return doc.body.innerHTML
-
-  _wholeBodyIsQuote: (doc, quoteElements) ->
-    nonBlankChildElements = []
-    for child in doc.body.childNodes
-      if child.textContent.trim() is ""
-        continue
-      else nonBlankChildElements.push(child)
-
-    if nonBlankChildElements.length is 1
-      return nonBlankChildElements[0] in quoteElements
-    else return false
-
-    # We used to have a scheme where we cached the `doc` object, keyed by
-    # the md5 of the text. Unfortunately we can't do this because the
-    # `doc` is mutated in place. Returning clones of the DOM is just as
-    # bad as re-parsing from string, which is very fast anyway.
-
-  _findQuoteLikeElements: (doc, {includeInline}={}) ->
-    parsers = [
-      @_findGmailQuotes
-      @_findOffice365Quotes
-      @_findBlockquoteQuotes
-    ]
-
-    quoteElements = []
-    for parser in parsers
-      quoteElements = quoteElements.concat(parser(doc) ? [])
-
-    if not includeInline and quoteElements.length > 0
-      # This means we only want to remove quoted text that shows up at the
-      # end of a message. If there were non quoted content after, it'd be
-      # inline.
-
-      trailingQuotes = @_findTrailingQuotes(doc, quoteElements)
-
-      # Only keep the trailing quotes so we can delete them.
-      quoteElements = _.intersection(quoteElements, trailingQuotes)
-
-    return _.compact(_.uniq(quoteElements))
-
-  # This will recursievly move through the DOM, bottom to top, and pick
-  # out quoted text blocks. It will stop when it reaches a visible
-  # non-quote text region.
-  _findTrailingQuotes: (scopeElement, quoteElements=[]) ->
-    trailingQuotes = []
-
-    # We need to find only the child nodes that have content in them. We
-    # determine if it's an inline quote based on if there's VISIBLE
-    # content after a piece of quoted text
-    nodesWithContent = DOMUtils.nodesWithContent(scopeElement)
-
-    # There may be multiple quote blocks that are sibilings of each
-    # other at the end of the message. We want to include all of these
-    # trailing quote elements.
-    for nodeWithContent in nodesWithContent by -1
-      if nodeWithContent in quoteElements
-        # This is a valid quote. Let's keep it!
-        #
-        # This quote block may have many more quote blocks inside of it.
-        # Luckily we don't need to explicitly find all of those because
-        # one this block gets removed from the DOM, we'll delete all
-        # sub-quotes as well.
-        trailingQuotes.push(nodeWithContent)
-        continue
-      else
-        moreTrailing = @_findTrailingQuotes(nodeWithContent, quoteElements)
-        trailingQuotes = trailingQuotes.concat(moreTrailing)
-        break
-
-    return trailingQuotes
-
-  _contains: (node, quoteElement) ->
-    node is quoteElement or node.contains(quoteElement)
-
-  _findAnnotatedElements: (doc) ->
-    Array::slice.call(doc.getElementsByClassName(@annotationClass))
-
-  _annotateElements: (elements=[]) ->
-    for el in elements
-      el.classList.add(@annotationClass)
-      originalDisplay = el.style.display
-      el.style.display = "none"
-      el.setAttribute("data-nylas-quoted-text-original-display", originalDisplay)
-
-  _removeAnnotation: (elements=[]) ->
-    for el in elements
-      el.classList.remove(@annotationClass)
-      originalDisplay = el.getAttribute("data-nylas-quoted-text-original-display")
-      el.style.display = originalDisplay
-      el.removeAttribute("data-nylas-quoted-text-original-display")
-
-  _findGmailQuotes: (doc) ->
-    # Gmail creates both div.gmail_quote and blockquote.gmail_quote. The div
-    # version marks text but does not cause indentation, but both should be
-    # considered quoted text.
-    return Array::slice.call(doc.querySelectorAll('.gmail_quote'))
-
-  _findOffice365Quotes: (doc) ->
-    elements = doc.querySelectorAll('#divRplyFwdMsg, #OLK_SRC_BODY_SECTION')
-    elements = Array::slice.call(elements)
-
-    weirdEl = doc.getElementById('3D"divRplyFwdMsg"')
-    if weirdEl then elements.push(weirdEl)
-
-    elements = _.map elements, (el) ->
-      if el.previousElementSibling and el.previousElementSibling.nodeName is "HR"
-        return el.parentElement
-      else return el
-    return elements
-
-  _findBlockquoteQuotes: (doc) ->
-    return Array::slice.call(doc.querySelectorAll('blockquote'))
-
-module.exports = new QuotedHTMLTransformer
--- a/src/services/quoted-html-transformer.es6
+++ b/src/services/quoted-html-transformer.es6
@ -0,0 +1,260 @@
+import _ from 'underscore';
+import DOMUtils from '../dom-utils';
+import quoteStringDetector from './quote-string-detector';
+
+class QuotedHTMLTransformer {
+
+  annotationClass = "nylas-quoted-text-segment";
+
+  // Given an html string, it will add the `annotationClass` to the DOM
+  // element
+  hideQuotedHTML(html, {keepIfWholeBodyIsQuote} = {}) {
+    const doc = this._parseHTML(html);
+    const quoteElements = this._findQuoteLikeElements(doc);
+    if (!keepIfWholeBodyIsQuote || !this._wholeBodyIsQuote(doc, quoteElements)) {
+      this._annotateElements(quoteElements);
+    }
+    return this._outputHTMLFor(doc, {initialHTML: html});
+  }
+
+  hasQuotedHTML(html) {
+    const doc = this._parseHTML(html);
+    const quoteElements = this._findQuoteLikeElements(doc);
+    return quoteElements.length > 0;
+  }
+
+  // Public: Removes quoted text from an HTML string
+  //
+  // If we find a quoted text region that is "inline" with the root level
+  // message, meaning it has non quoted text before and after it, then we
+  // leave it in the message. If you set the `includeInline` option to true,
+  // then all inline blocks will also be removed.
+  //
+  // - `html` The string full of quoted text areas
+  // - `options`
+  //   - `includeInline` Defaults false. If true, inline quotes are removed
+  //   too
+  //   - `keepIfWholeBodyIsQuote` Defaults false. If true, then it will
+  //   check to see if the whole html body is a giant quote. If so, it will
+  //   preserve it.
+  //
+  // Returns HTML without quoted text
+  removeQuotedHTML(html, options = {}) {
+    const doc = this._parseHTML(html);
+    const quoteElements = this._findQuoteLikeElements(doc, options);
+    if (!options.keepIfWholeBodyIsQuote || !this._wholeBodyIsQuote(doc, quoteElements)) {
+      DOMUtils.Mutating.removeElements(quoteElements, options);
+
+      // It's possible that the entire body was quoted text and we've removed everything.
+      if (!doc.body) {
+        return this._outputHTMLFor(this._parseHTML(""), {initialHTML: html});
+      }
+
+      this.removeTrailingBr(doc);
+      DOMUtils.Mutating.removeElements(quoteStringDetector(doc));
+      if (!doc.children[0]) {
+        return this._outputHTMLFor(this._parseHTML(""), {initialHTML: html});
+      }
+    }
+
+    if (options.returnAsDOM) {
+      return doc;
+    }
+    return this._outputHTMLFor(doc, {initialHTML: html});
+  }
+
+  // Finds any trailing BR tags and removes them in place
+  removeTrailingBr(doc) {
+    const { childNodes } = doc.body;
+    const extraTailBrTags = [];
+    for (let i = childNodes.length - 1; i >= 0; i--) {
+      const curr = childNodes[i];
+      const next = childNodes[i - 1];
+      if (curr && curr.nodeName === 'BR' && next && next.nodeName === 'BR') {
+        extraTailBrTags.push(curr);
+      } else {
+        break;
+      }
+    }
+    return DOMUtils.Mutating.removeElements(extraTailBrTags);
+  }
+
+  appendQuotedHTML(htmlWithoutQuotes, originalHTML) {
+    let doc = this._parseHTML(originalHTML);
+    const quoteElements = this._findQuoteLikeElements(doc);
+    doc = this._parseHTML(htmlWithoutQuotes);
+    for (let i = 0; i < quoteElements.length; i++) {
+      const node = quoteElements[i];
+      doc.body.appendChild(node);
+    }
+    return this._outputHTMLFor(doc, {initialHTML: originalHTML});
+  }
+
+  restoreAnnotatedHTML(html) {
+    const doc = this._parseHTML(html);
+    const quoteElements = this._findAnnotatedElements(doc);
+    this._removeAnnotation(quoteElements);
+    return this._outputHTMLFor(doc, {initialHTML: html});
+  }
+
+  _parseHTML(text) {
+    const domParser = new DOMParser();
+    let doc;
+    try {
+      doc = domParser.parseFromString(text, "text/html");
+    } catch (error) {
+      const errText = `HTML Parser Error: ${error.toString()}`;
+      doc = domParser.parseFromString(errText, "text/html");
+      NylasEnv.reportError(error);
+    }
+
+    // As far as we can tell, when this succeeds, doc /always/ has at least
+    // one child: an <html> node.
+    return doc;
+  }
+
+  _outputHTMLFor(doc, {initialHTML}) {
+    if (/<\s?head\s?>/i.test(initialHTML) || /<\s?body[\s>]/i.test(initialHTML)) {
+      return doc.children[0].innerHTML;
+    }
+    return doc.body.innerHTML;
+  }
+
+  _wholeBodyIsQuote(doc, quoteElements) {
+    const nonBlankChildElements = [];
+    for (let i = 0; i < doc.body.childNodes.length; i++) {
+      const child = doc.body.childNodes[i];
+      if (child.textContent.trim() === "") {
+        continue;
+      } else { nonBlankChildElements.push(child); }
+    }
+
+    if (nonBlankChildElements.length === 1) {
+      return Array.from(quoteElements).includes(nonBlankChildElements[0])
+    }
+    return false;
+  }
+
+    // We used to have a scheme where we cached the `doc` object, keyed by
+    // the md5 of the text. Unfortunately we can't do this because the
+    // `doc` is mutated in place. Returning clones of the DOM is just as
+    // bad as re-parsing from string, which is very fast anyway.
+
+  _findQuoteLikeElements(doc, {includeInline} = {}) {
+    const parsers = [
+      this._findGmailQuotes,
+      this._findOffice365Quotes,
+      this._findBlockquoteQuotes,
+    ];
+
+    let quoteElements = [];
+    for (const parser of parsers) {
+      quoteElements = quoteElements.concat(parser(doc) || []);
+    }
+
+    if (!includeInline && quoteElements.length > 0) {
+      // This means we only want to remove quoted text that shows up at the
+      // end of a message. If there were non quoted content after, it'd be
+      // inline.
+
+      const trailingQuotes = this._findTrailingQuotes(doc, quoteElements);
+
+      // Only keep the trailing quotes so we can delete them.
+      quoteElements = _.intersection(quoteElements, trailingQuotes);
+    }
+
+    return _.compact(_.uniq(quoteElements));
+  }
+
+  // This will recursievly move through the DOM, bottom to top, and pick
+  // out quoted text blocks. It will stop when it reaches a visible
+  // non-quote text region.
+  _findTrailingQuotes(scopeElement, quoteElements = []) {
+    let trailingQuotes = [];
+
+    // We need to find only the child nodes that have content in them. We
+    // determine if it's an inline quote based on if there's VISIBLE
+    // content after a piece of quoted text
+    const nodesWithContent = DOMUtils.nodesWithContent(scopeElement);
+
+    // There may be multiple quote blocks that are sibilings of each
+    // other at the end of the message. We want to include all of these
+    // trailing quote elements.
+    for (let i = nodesWithContent.length - 1; i >= 0; i--) {
+      const nodeWithContent = nodesWithContent[i];
+      if (Array.from(quoteElements).includes(nodeWithContent)) {
+        // This is a valid quote. Let's keep it!
+        //
+        // This quote block may have many more quote blocks inside of it.
+        // Luckily we don't need to explicitly find all of those because
+        // one this block gets removed from the DOM, we'll delete all
+        // sub-quotes as well.
+        trailingQuotes.push(nodeWithContent);
+        continue;
+      } else {
+        const moreTrailing = this._findTrailingQuotes(nodeWithContent, quoteElements);
+        trailingQuotes = trailingQuotes.concat(moreTrailing);
+        break;
+      }
+    }
+
+    return trailingQuotes;
+  }
+
+  _contains(node, quoteElement) {
+    return node === quoteElement || node.contains(quoteElement);
+  }
+
+  _findAnnotatedElements(doc) {
+    return Array.prototype.slice.call(doc.getElementsByClassName(this.annotationClass));
+  }
+
+  _annotateElements(elements = []) {
+    let originalDisplay;
+    return elements.forEach((el) => {
+      el.classList.add(this.annotationClass)
+      originalDisplay = el.style.display
+      el.style.display = "none"
+      el.setAttribute("data-nylas-quoted-text-original-display", originalDisplay);
+    });
+  }
+
+  _removeAnnotation(elements = []) {
+    let originalDisplay;
+    return elements.forEach((el) => {
+      el.classList.remove(this.annotationClass)
+      originalDisplay = el.getAttribute("data-nylas-quoted-text-original-display")
+      el.style.display = originalDisplay
+      el.removeAttribute("data-nylas-quoted-text-original-display");
+    })
+  }
+
+  _findGmailQuotes(doc) {
+    // Gmail creates both div.gmail_quote and blockquote.gmail_quote. The div
+    // version marks text but does not cause indentation, but both should be
+    // considered quoted text.
+    return Array.prototype.slice.call(doc.querySelectorAll('.gmail_quote'));
+  }
+
+  _findOffice365Quotes(doc) {
+    let elements = doc.querySelectorAll('#divRplyFwdMsg, #OLK_SRC_BODY_SECTION');
+    elements = Array.prototype.slice.call(elements);
+
+    const weirdEl = doc.getElementById('3D"divRplyFwdMsg"');
+    if (weirdEl) { elements.push(weirdEl); }
+
+    elements = elements.map((el) => {
+      if (el.previousElementSibling && el.previousElementSibling.nodeName === "HR") {
+        return el.parentElement;
+      }
+      return el
+    });
+    return elements;
+  }
+
+  _findBlockquoteQuotes(doc) {
+    return Array.prototype.slice.call(doc.querySelectorAll('blockquote'));
+  }
+}
+
+export default new QuotedHTMLTransformer();