fix(composer): pasting into composer sanitizes and preserves whitespace

Summary:
Fixes T1132
Added tests, and an html sanitizer.

Test Plan:
See contenteditable-component-spec.cjsx
edgehill --test

Reviewers: bengotow

Reviewed By: bengotow

Subscribers: mg

Maniphest Tasks: T1132

Differential Revision: https://review.inboxapp.com/D1492
This commit is contained in:
Evan Morikawa 2015-05-11 18:07:06 -07:00
parent 3af8bc92c6
commit c65425ab91
3 changed files with 138 additions and 38 deletions

View file

@ -670,53 +670,57 @@ class ContenteditableComponent extends React.Component
####### CLEAN PASTE #########
_onPaste: (evt) =>
html = evt.clipboardData.getData("text/html") ? ""
if html.length is 0
text = evt.clipboardData.getData("text/plain") ? ""
if text.length > 0
evt.preventDefault()
cleanHtml = text
else
else
evt.preventDefault()
cleanHtml = @_sanitizeHtml(html)
inputText = evt.clipboardData.getData("text/html") ? ""
type = "text/html"
if inputText.length is 0
inputText = evt.clipboardData.getData("text/plain") ? ""
type = "text/plain"
document.execCommand("insertHTML", false, cleanHtml)
if inputText.length > 0
cleanHtml = @_sanitizeInput(inputText, type)
document.execCommand("insertHTML", false, cleanHtml)
evt.preventDefault()
return false
# This is used primarily when pasting text in
_sanitizeHtml: (html) =>
cleanHTML = sanitizeHtml html.replace(/[\n\r]/g, "<br/>"),
allowedTags: ['p', 'b', 'i', 'em', 'strong', 'a', 'br', 'img', 'ul', 'ol', 'li', 'strike']
allowedAttributes:
a: ['href', 'name']
img: ['src', 'alt']
transformTags:
h1: "p"
h2: "p"
h3: "p"
h4: "p"
h5: "p"
h6: "p"
div: "p"
pre: "p"
blockquote: "p"
table: "p"
_sanitizeInput: (inputText="", type="text/html") =>
if type is "text/plain"
inputText = Utils.encodeHTMLEntities(inputText)
inputText = inputText.replace(/[\r\n]|&#1[03];/g, "<br/>").
replace(/\s\s/g, " &nbsp;")
else
inputText = sanitizeHtml inputText.replace(/[\n\r]/g, "<br/>"),
allowedTags: ['p', 'b', 'i', 'em', 'strong', 'a', 'br', 'img', 'ul', 'ol', 'li', 'strike']
allowedAttributes:
a: ['href', 'name']
img: ['src', 'alt']
transformTags:
h1: "p"
h2: "p"
h3: "p"
h4: "p"
h5: "p"
h6: "p"
div: "p"
pre: "p"
blockquote: "p"
table: "p"
# We sanitized everything and convert all whitespace-inducing elements
# into <p> tags. We want to de-wrap <p> tags and replace with two line
# breaks instead.
cleanHTML = cleanHTML.replace(/<p[\s\S]*?>/gim, "").replace(/<\/p>/gi, "<br/>")
# We sanitized everything and convert all whitespace-inducing
# elements into <p> tags. We want to de-wrap <p> tags and replace
# with two line breaks instead.
inputText = inputText.replace(/<p[\s\S]*?>/gim, "").
replace(/<\/p>/gi, "<br/>")
# We never want more then 2 line breaks in a row.
# https://regex101.com/r/gF6bF4/4
cleanHTML = cleanHTML.replace(/(<br\s*\/?>\s*){3,}/g, "<br/><br/>")
# We never want more then 2 line breaks in a row.
# https://regex101.com/r/gF6bF4/4
inputText = inputText.replace(/(<br\s*\/?>\s*){3,}/g, "<br/><br/>")
return cleanHTML
return inputText
@ -744,4 +748,4 @@ class ContenteditableComponent extends React.Component
else return (innerHTML + @props.html.substr(quoteStart))
module.exports = ContenteditableComponent
module.exports = ContenteditableComponent

View file

@ -43,3 +43,77 @@ describe "ContenteditableComponent", ->
expect(@onChange.callCount).toBe(1)
@performEdit(@changedHtmlWithoutQuote)
expect(@onChange.callCount).toBe(2)
describe "pasting behavior", ->
tests = [
{
in: ""
sanitizedAsHTML: ""
sanitizedAsPlain: ""
},
{
in: "Hello World"
sanitizedAsHTML: "Hello World"
sanitizedAsPlain: "Hello World"
},
{
in: " Hello World"
# Should collapse to 1 space when rendered
sanitizedAsHTML: " Hello World"
# Preserving 2 spaces
sanitizedAsPlain: " &nbsp;Hello &nbsp;World"
},
{
in: " Hello World"
sanitizedAsHTML: " Hello World"
# Preserving 3 spaces
sanitizedAsPlain: " &nbsp; Hello &nbsp; World"
},
{
in: " Hello World"
sanitizedAsHTML: " Hello World"
# Preserving 4 spaces
sanitizedAsPlain: " &nbsp; &nbsp;Hello &nbsp; &nbsp;World"
},
{
in: "Hello\nWorld"
sanitizedAsHTML: "Hello<br />World"
# Convert newline to br
sanitizedAsPlain: "Hello<br/>World"
},
{
in: "Hello\rWorld"
sanitizedAsHTML: "Hello<br />World"
# Convert carriage return to br
sanitizedAsPlain: "Hello<br/>World"
},
{
in: "Hello\n\n\nWorld"
# Never have more than 2 br's in a row
sanitizedAsHTML: "Hello<br/><br/>World"
# Convert multiple newlines to same number of brs
sanitizedAsPlain: "Hello<br/><br/><br/>World"
},
{
in: "<style>Yo</style> Foo Bar <div>Baz</div>"
# Strip bad tags
sanitizedAsHTML: " Foo Bar Baz<br/>"
# HTML encode tags for literal display
sanitizedAsPlain: "&lt;style&gt;Yo&lt;/style&gt; Foo Bar &lt;div&gt;Baz&lt;/div&gt;"
}
{
in: "<script>Bah</script> Yo < script>Boo! < / script >"
# Strip non white-list tags and encode malformed ones.
sanitizedAsHTML: " Yo &lt; script&gt;Boo! &lt; / script &gt;"
# HTML encode tags for literal display
sanitizedAsPlain: "&lt;script&gt;Bah&lt;/script&gt; Yo &lt; script&gt;Boo! &lt; / script &gt;"
}
]
it "sanitizes plain text properly", ->
for test in tests
expect(@component._sanitizeInput(test.in, "text/plain")).toBe test.sanitizedAsPlain
it "sanitizes html text properly", ->
for test in tests
expect(@component._sanitizeInput(test.in, "text/html")).toBe test.sanitizedAsHTML

View file

@ -44,6 +44,28 @@ Utils =
set[item] = true for item in arr
return set
# Escapes potentially dangerous html characters
# This code is lifted from Angular.js
# See their specs here:
# https://github.com/angular/angular.js/blob/master/test/ngSanitize/sanitizeSpec.js
# And the original source here: https://github.com/angular/angular.js/blob/master/src/ngSanitize/sanitize.js#L451
encodeHTMLEntities: (value) ->
SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
pairFix = (value) ->
hi = value.charCodeAt(0)
low = value.charCodeAt(1)
return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';'
# Match everything outside of normal chars and " (quote character)
NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g
alphaFix = (value) -> '&#' + value.charCodeAt(0) + ';'
value.replace(/&/g, '&amp;').
replace(SURROGATE_PAIR_REGEXP, pairFix).
replace(NON_ALPHANUMERIC_REGEXP, alphaFix).
replace(/</g, '&lt;').
replace(/>/g, '&gt;')
modelClassMap: ->
return Utils._modelClassMap if Utils._modelClassMap