From c65425ab9173b7c2ef5298d5ee6e438878ee38da Mon Sep 17 00:00:00 2001 From: Evan Morikawa Date: Mon, 11 May 2015 18:07:06 -0700 Subject: [PATCH] fix(composer): pasting into composer sanitizes and preserves whitespace Summary: Fixes T1132 Added tests, and an html sanitizer. Test Plan: See contenteditable-component-spec.cjsx edgehill --test Reviewers: bengotow Reviewed By: bengotow Subscribers: mg Maniphest Tasks: T1132 Differential Revision: https://review.inboxapp.com/D1492 --- .../lib/contenteditable-component.cjsx | 80 ++++++++++--------- .../spec/contenteditable-component-spec.cjsx | 74 +++++++++++++++++ src/flux/models/utils.coffee | 22 +++++ 3 files changed, 138 insertions(+), 38 deletions(-) diff --git a/internal_packages/composer/lib/contenteditable-component.cjsx b/internal_packages/composer/lib/contenteditable-component.cjsx index b99930398..a169ac9fe 100644 --- a/internal_packages/composer/lib/contenteditable-component.cjsx +++ b/internal_packages/composer/lib/contenteditable-component.cjsx @@ -670,53 +670,57 @@ class ContenteditableComponent extends React.Component - ####### CLEAN PASTE ######### _onPaste: (evt) => - html = evt.clipboardData.getData("text/html") ? "" - if html.length is 0 - text = evt.clipboardData.getData("text/plain") ? "" - if text.length > 0 - evt.preventDefault() - cleanHtml = text - else - else - evt.preventDefault() - cleanHtml = @_sanitizeHtml(html) + inputText = evt.clipboardData.getData("text/html") ? "" + type = "text/html" + if inputText.length is 0 + inputText = evt.clipboardData.getData("text/plain") ? "" + type = "text/plain" - document.execCommand("insertHTML", false, cleanHtml) + if inputText.length > 0 + cleanHtml = @_sanitizeInput(inputText, type) + document.execCommand("insertHTML", false, cleanHtml) + + evt.preventDefault() return false # This is used primarily when pasting text in - _sanitizeHtml: (html) => - cleanHTML = sanitizeHtml html.replace(/[\n\r]/g, "
"), - allowedTags: ['p', 'b', 'i', 'em', 'strong', 'a', 'br', 'img', 'ul', 'ol', 'li', 'strike'] - allowedAttributes: - a: ['href', 'name'] - img: ['src', 'alt'] - transformTags: - h1: "p" - h2: "p" - h3: "p" - h4: "p" - h5: "p" - h6: "p" - div: "p" - pre: "p" - blockquote: "p" - table: "p" + _sanitizeInput: (inputText="", type="text/html") => + if type is "text/plain" + inputText = Utils.encodeHTMLEntities(inputText) + inputText = inputText.replace(/[\r\n]|[03];/g, "
"). + replace(/\s\s/g, "  ") + else + inputText = sanitizeHtml inputText.replace(/[\n\r]/g, "
"), + allowedTags: ['p', 'b', 'i', 'em', 'strong', 'a', 'br', 'img', 'ul', 'ol', 'li', 'strike'] + allowedAttributes: + a: ['href', 'name'] + img: ['src', 'alt'] + transformTags: + h1: "p" + h2: "p" + h3: "p" + h4: "p" + h5: "p" + h6: "p" + div: "p" + pre: "p" + blockquote: "p" + table: "p" - # We sanitized everything and convert all whitespace-inducing elements - # into

tags. We want to de-wrap

tags and replace with two line - # breaks instead. - cleanHTML = cleanHTML.replace(//gim, "").replace(/<\/p>/gi, "
") + # We sanitized everything and convert all whitespace-inducing + # elements into

tags. We want to de-wrap

tags and replace + # with two line breaks instead. + inputText = inputText.replace(//gim, ""). + replace(/<\/p>/gi, "
") - # We never want more then 2 line breaks in a row. - # https://regex101.com/r/gF6bF4/4 - cleanHTML = cleanHTML.replace(/(\s*){3,}/g, "

") + # We never want more then 2 line breaks in a row. + # https://regex101.com/r/gF6bF4/4 + inputText = inputText.replace(/(\s*){3,}/g, "

") - return cleanHTML + return inputText @@ -744,4 +748,4 @@ class ContenteditableComponent extends React.Component else return (innerHTML + @props.html.substr(quoteStart)) -module.exports = ContenteditableComponent \ No newline at end of file +module.exports = ContenteditableComponent diff --git a/internal_packages/composer/spec/contenteditable-component-spec.cjsx b/internal_packages/composer/spec/contenteditable-component-spec.cjsx index f9663535e..3bc4af042 100644 --- a/internal_packages/composer/spec/contenteditable-component-spec.cjsx +++ b/internal_packages/composer/spec/contenteditable-component-spec.cjsx @@ -43,3 +43,77 @@ describe "ContenteditableComponent", -> expect(@onChange.callCount).toBe(1) @performEdit(@changedHtmlWithoutQuote) expect(@onChange.callCount).toBe(2) + + describe "pasting behavior", -> + tests = [ + { + in: "" + sanitizedAsHTML: "" + sanitizedAsPlain: "" + }, + { + in: "Hello World" + sanitizedAsHTML: "Hello World" + sanitizedAsPlain: "Hello World" + }, + { + in: " Hello World" + # Should collapse to 1 space when rendered + sanitizedAsHTML: " Hello World" + # Preserving 2 spaces + sanitizedAsPlain: "  Hello  World" + }, + { + in: " Hello World" + sanitizedAsHTML: " Hello World" + # Preserving 3 spaces + sanitizedAsPlain: "   Hello   World" + }, + { + in: " Hello World" + sanitizedAsHTML: " Hello World" + # Preserving 4 spaces + sanitizedAsPlain: "    Hello    World" + }, + { + in: "Hello\nWorld" + sanitizedAsHTML: "Hello
World" + # Convert newline to br + sanitizedAsPlain: "Hello
World" + }, + { + in: "Hello\rWorld" + sanitizedAsHTML: "Hello
World" + # Convert carriage return to br + sanitizedAsPlain: "Hello
World" + }, + { + in: "Hello\n\n\nWorld" + # Never have more than 2 br's in a row + sanitizedAsHTML: "Hello

World" + # Convert multiple newlines to same number of brs + sanitizedAsPlain: "Hello


World" + }, + { + in: " Foo Bar

Baz
" + # Strip bad tags + sanitizedAsHTML: " Foo Bar Baz
" + # HTML encode tags for literal display + sanitizedAsPlain: "<style>Yo</style> Foo Bar <div>Baz</div>" + } + { + in: " Yo < script>Boo! < / script >" + # Strip non white-list tags and encode malformed ones. + sanitizedAsHTML: " Yo < script>Boo! < / script >" + # HTML encode tags for literal display + sanitizedAsPlain: "<script>Bah</script> Yo < script>Boo! < / script >" + } + ] + + it "sanitizes plain text properly", -> + for test in tests + expect(@component._sanitizeInput(test.in, "text/plain")).toBe test.sanitizedAsPlain + + it "sanitizes html text properly", -> + for test in tests + expect(@component._sanitizeInput(test.in, "text/html")).toBe test.sanitizedAsHTML diff --git a/src/flux/models/utils.coffee b/src/flux/models/utils.coffee index 63907536b..0fc609b55 100644 --- a/src/flux/models/utils.coffee +++ b/src/flux/models/utils.coffee @@ -44,6 +44,28 @@ Utils = set[item] = true for item in arr return set + # Escapes potentially dangerous html characters + # This code is lifted from Angular.js + # See their specs here: + # https://github.com/angular/angular.js/blob/master/test/ngSanitize/sanitizeSpec.js + # And the original source here: https://github.com/angular/angular.js/blob/master/src/ngSanitize/sanitize.js#L451 + encodeHTMLEntities: (value) -> + SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g + pairFix = (value) -> + hi = value.charCodeAt(0) + low = value.charCodeAt(1) + return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';' + + # Match everything outside of normal chars and " (quote character) + NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g + alphaFix = (value) -> '&#' + value.charCodeAt(0) + ';' + + value.replace(/&/g, '&'). + replace(SURROGATE_PAIR_REGEXP, pairFix). + replace(NON_ALPHANUMERIC_REGEXP, alphaFix). + replace(//g, '>') + modelClassMap: -> return Utils._modelClassMap if Utils._modelClassMap