fix(composer): pasting into composer sanitizes and preserves whitespace

Summary: Fixes T1132 Added tests, and an html sanitizer. Test Plan: See contenteditable-component-spec.cjsx edgehill --test Reviewers: bengotow Reviewed By: bengotow Subscribers: mg Maniphest Tasks: T1132 Differential Revision: https://review.inboxapp.com/D1492
2024-09-21 15:56:10 +08:00 · 2015-05-11 18:07:06 -07:00 · 2015-05-11 18:07:06 -07:00 · c65425ab91
parent 3af8bc92c6
commit c65425ab91
3 changed files with 138 additions and 38 deletions
--- a/internal_packages/composer/lib/contenteditable-component.cjsx
+++ b/internal_packages/composer/lib/contenteditable-component.cjsx
@ -670,53 +670,57 @@ class ContenteditableComponent extends React.Component



-
  ####### CLEAN PASTE #########

  _onPaste: (evt) =>
-    html = evt.clipboardData.getData("text/html") ? ""
-    if html.length is 0
-      text = evt.clipboardData.getData("text/plain") ? ""
-      if text.length > 0
-        evt.preventDefault()
-        cleanHtml = text
-      else
-    else
-      evt.preventDefault()
-      cleanHtml = @_sanitizeHtml(html)
+    inputText = evt.clipboardData.getData("text/html") ? ""
+    type = "text/html"
+    if inputText.length is 0
+      inputText = evt.clipboardData.getData("text/plain") ? ""
+      type = "text/plain"

-    document.execCommand("insertHTML", false, cleanHtml)
+    if inputText.length > 0
+      cleanHtml = @_sanitizeInput(inputText, type)
+      document.execCommand("insertHTML", false, cleanHtml)
+
+    evt.preventDefault()
    return false

  # This is used primarily when pasting text in
-  _sanitizeHtml: (html) =>
-    cleanHTML = sanitizeHtml html.replace(/[\n\r]/g, "<br/>"),
-      allowedTags: ['p', 'b', 'i', 'em', 'strong', 'a', 'br', 'img', 'ul', 'ol', 'li', 'strike']
-      allowedAttributes:
-        a: ['href', 'name']
-        img: ['src', 'alt']
-      transformTags:
-        h1: "p"
-        h2: "p"
-        h3: "p"
-        h4: "p"
-        h5: "p"
-        h6: "p"
-        div: "p"
-        pre: "p"
-        blockquote: "p"
-        table: "p"
+  _sanitizeInput: (inputText="", type="text/html") =>
+    if type is "text/plain"
+      inputText = Utils.encodeHTMLEntities(inputText)
+      inputText = inputText.replace(/[\r\n]|&#1[03];/g, "<br/>").
+                            replace(/\s\s/g, " &nbsp;")
+    else
+      inputText = sanitizeHtml inputText.replace(/[\n\r]/g, "<br/>"),
+        allowedTags: ['p', 'b', 'i', 'em', 'strong', 'a', 'br', 'img', 'ul', 'ol', 'li', 'strike']
+        allowedAttributes:
+          a: ['href', 'name']
+          img: ['src', 'alt']
+        transformTags:
+          h1: "p"
+          h2: "p"
+          h3: "p"
+          h4: "p"
+          h5: "p"
+          h6: "p"
+          div: "p"
+          pre: "p"
+          blockquote: "p"
+          table: "p"

-    # We sanitized everything and convert all whitespace-inducing elements
-    # into <p> tags. We want to de-wrap <p> tags and replace with two line
-    # breaks instead.
-    cleanHTML = cleanHTML.replace(/<p[\s\S]*?>/gim, "").replace(/<\/p>/gi, "<br/>")
+      # We sanitized everything and convert all whitespace-inducing
+      # elements into <p> tags. We want to de-wrap <p> tags and replace
+      # with two line breaks instead.
+      inputText = inputText.replace(/<p[\s\S]*?>/gim, "").
+                            replace(/<\/p>/gi, "<br/>")

-    # We never want more then 2 line breaks in a row.
-    # https://regex101.com/r/gF6bF4/4
-    cleanHTML = cleanHTML.replace(/(<br\s*\/?>\s*){3,}/g, "<br/><br/>")
+      # We never want more then 2 line breaks in a row.
+      # https://regex101.com/r/gF6bF4/4
+      inputText = inputText.replace(/(<br\s*\/?>\s*){3,}/g, "<br/><br/>")

-    return cleanHTML
+    return inputText



@ -744,4 +748,4 @@ class ContenteditableComponent extends React.Component
    else return (innerHTML + @props.html.substr(quoteStart))


-module.exports = ContenteditableComponent
+module.exports = ContenteditableComponent
--- a/internal_packages/composer/spec/contenteditable-component-spec.cjsx
+++ b/internal_packages/composer/spec/contenteditable-component-spec.cjsx
@ -43,3 +43,77 @@ describe "ContenteditableComponent", ->
      expect(@onChange.callCount).toBe(1)
      @performEdit(@changedHtmlWithoutQuote)
      expect(@onChange.callCount).toBe(2)
+
+  describe "pasting behavior", ->
+    tests = [
+      {
+        in: ""
+        sanitizedAsHTML: ""
+        sanitizedAsPlain: ""
+      },
+      {
+        in: "Hello World"
+        sanitizedAsHTML: "Hello World"
+        sanitizedAsPlain: "Hello World"
+      },
+      {
+        in: "  Hello  World"
+        # Should collapse to 1 space when rendered
+        sanitizedAsHTML: "  Hello  World"
+        # Preserving 2 spaces
+        sanitizedAsPlain: " &nbsp;Hello &nbsp;World"
+      },
+      {
+        in: "   Hello   World"
+        sanitizedAsHTML: "   Hello   World"
+        # Preserving 3 spaces
+        sanitizedAsPlain: " &nbsp; Hello &nbsp; World"
+      },
+      {
+        in: "    Hello    World"
+        sanitizedAsHTML: "    Hello    World"
+        # Preserving 4 spaces
+        sanitizedAsPlain: " &nbsp; &nbsp;Hello &nbsp; &nbsp;World"
+      },
+      {
+        in: "Hello\nWorld"
+        sanitizedAsHTML: "Hello<br />World"
+        # Convert newline to br
+        sanitizedAsPlain: "Hello<br/>World"
+      },
+      {
+        in: "Hello\rWorld"
+        sanitizedAsHTML: "Hello<br />World"
+        # Convert carriage return to br
+        sanitizedAsPlain: "Hello<br/>World"
+      },
+      {
+        in: "Hello\n\n\nWorld"
+        # Never have more than 2 br's in a row
+        sanitizedAsHTML: "Hello<br/><br/>World"
+        # Convert multiple newlines to same number of brs
+        sanitizedAsPlain: "Hello<br/><br/><br/>World"
+      },
+      {
+        in: "<style>Yo</style> Foo Bar <div>Baz</div>"
+        # Strip bad tags
+        sanitizedAsHTML: " Foo Bar Baz<br/>"
+        # HTML encode tags for literal display
+        sanitizedAsPlain: "&lt;style&gt;Yo&lt;/style&gt; Foo Bar &lt;div&gt;Baz&lt;/div&gt;"
+      }
+      {
+        in: "<script>Bah</script> Yo < script>Boo! < / script >"
+        # Strip non white-list tags and encode malformed ones.
+        sanitizedAsHTML: " Yo &lt; script&gt;Boo! &lt; / script &gt;"
+        # HTML encode tags for literal display
+        sanitizedAsPlain: "&lt;script&gt;Bah&lt;/script&gt; Yo &lt; script&gt;Boo! &lt; / script &gt;"
+      }
+    ]
+
+    it "sanitizes plain text properly", ->
+      for test in tests
+        expect(@component._sanitizeInput(test.in, "text/plain")).toBe test.sanitizedAsPlain
+
+    it "sanitizes html text properly", ->
+      for test in tests
+        expect(@component._sanitizeInput(test.in, "text/html")).toBe test.sanitizedAsHTML
--- a/src/flux/models/utils.coffee
+++ b/src/flux/models/utils.coffee
@ -44,6 +44,28 @@ Utils =
    set[item] = true for item in arr
    return set

+  # Escapes potentially dangerous html characters
+  # This code is lifted from Angular.js
+  # See their specs here:
+  # https://github.com/angular/angular.js/blob/master/test/ngSanitize/sanitizeSpec.js
+  # And the original source here: https://github.com/angular/angular.js/blob/master/src/ngSanitize/sanitize.js#L451
+  encodeHTMLEntities: (value) ->
+    SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
+    pairFix = (value) ->
+      hi = value.charCodeAt(0)
+      low = value.charCodeAt(1)
+      return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';'
+
+    # Match everything outside of normal chars and " (quote character)
+    NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g
+    alphaFix = (value) -> '&#' + value.charCodeAt(0) + ';'
+
+    value.replace(/&/g, '&amp;').
+          replace(SURROGATE_PAIR_REGEXP, pairFix).
+          replace(NON_ALPHANUMERIC_REGEXP, alphaFix).
+          replace(/</g, '&lt;').
+          replace(/>/g, '&gt;')
+
  modelClassMap: ->
    return Utils._modelClassMap if Utils._modelClassMap