From 6b26ddec7a0f0e1639b2d516b632f9f7581a5c68 Mon Sep 17 00:00:00 2001 From: the-djmaze <> Date: Tue, 1 Feb 2022 11:46:12 +0100 Subject: [PATCH] #89 Redesign HTML parsing for securing PGP decrypted HTML --- dev/Common/Html.js | 227 +++++++++++++++++++++++++++++- dev/Common/UtilsUser.js | 164 +-------------------- dev/External/SquireUI.js | 6 +- dev/Model/Attachment.js | 2 - dev/Model/AttachmentCollection.js | 4 +- dev/Model/Message.js | 19 +-- 6 files changed, 227 insertions(+), 195 deletions(-) diff --git a/dev/Common/Html.js b/dev/Common/Html.js index 31c8948ef..f035c2699 100644 --- a/dev/Common/Html.js +++ b/dev/Common/Html.js @@ -1,4 +1,23 @@ +import { createElement } from 'Common/Globals'; + const +/* + strip_tags = (m => { + return (str) => str.replace(m, ''); + })(/<\s*\/?\s*(\w+|!)[^>]*>/gi), + + htmlspecialchars = ((de,se,gt,lt,sq,dq) => { + return (str, quote_style, double_encode) => { + str = (''+str) + .replace((!defined(double_encode)||double_encode)?de:se,'&') + .replace(gt,'<') + .replace(lt,'>'); + if (!is_number(quote_style)) { quote_style = 2; } + if (quote_style & 1) { str = str.replace(sq,'''); } + return (quote_style & 2) ? str.replace(dq,'"') : str; + }; + })(/&/g,/&(?![\w#]+;)/gi,//g,/'/g,/"/g), +*/ htmlre = /[&<>"']/g, htmlmap = { '&': '&', @@ -8,13 +27,202 @@ const "'": ''' }; -/** - * @param {string} text - * @returns {string} - */ -export function encodeHtml(text) { - return (text && text.toString ? text.toString() : ''+text).replace(htmlre, m => htmlmap[m]); -} +export const + + /** + * @param {string} text + * @returns {string} + */ + encodeHtml = text => (text && text.toString ? text.toString() : '' + text).replace(htmlre, m => htmlmap[m]), + + /** + * @param {string} text + * @returns {string} + */ + clearHtml = html => { + html = html.replace(/(]*>)([\s\S]*?)(<\/pre>)/gi, aMatches => { + return (aMatches[1] + aMatches[2].trim() + aMatches[3].trim()).replace(/\r?\n/g, '
'); + }); +/* + \MailSo\Base\HtmlUtils::ClearHtml( + $sHtml, $bHasExternals, $aFoundCIDs, $aContentLocationUrls, $aFoundContentLocationUrls, + $fAdditionalExternalFilter, !!$this->Config()->Get('labs', 'try_to_detect_hidden_images', false) + ); +*/ + return html; + }, + + // Removes background and color + // Many e-mails incorrectly only define one, not both + // And in dark theme mode this kills the readability + removeColors = html => { + let l; + do { + l = html.length; + html = html + .replace(/(<[^>]+[;"'])\s*background(-[a-z]+)?\s*:[^;"']+/gi, '$1') + .replace(/(<[^>]+[;"'])\s*color\s*:[^;"']+/gi, '$1') + .replace(/(<[^>]+)\s(bg)?color=("[^"]+"|'[^']+')/gi, '$1'); + } while (l != html.length) + return html; + }, + + /** + * @param {string} html + * @returns {string} + */ + htmlToPlain = html => { + let pos = 0, + limit = 800, + iP1 = 0, + iP2 = 0, + iP3 = 0, + text = ''; + + const + tpl = createElement('template'), + + convertBlockquote = (blockquoteText) => { + blockquoteText = '> ' + blockquoteText.trim().replace(/\n/gm, '\n> '); + return blockquoteText.replace(/(^|\n)([> ]+)/gm, (...args) => + args && 2 < args.length ? args[1] + args[2].replace(/[\s]/g, '').trim() + ' ' : '' + ); + }, + + convertDivs = (...args) => { + let divText = 1 < args.length ? args[1].trim() : ''; + if (divText.length) { + divText = '\n' + divText.replace(/]*>([\s\S\r\n]*)<\/div>/gim, convertDivs).trim() + '\n'; + } + + return divText; + }, + + convertPre = (...args) => + 1 < args.length + ? args[1] + .toString() + .replace(/[\n]/gm, '
') + .replace(/[\r]/gm, '') + : '', + fixAttibuteValue = (...args) => (1 < args.length ? args[1] + encodeHtml(args[2]) : ''), + + convertLinks = (...args) => (1 < args.length ? args[1].trim() : ''); + + tpl.innerHTML = html + .replace(/]*><\/p>/gi, '') + .replace(/]*>([\s\S\r\n\t]*)<\/pre>/gim, convertPre) + .replace(/[\s]+/gm, ' ') + .replace(/((?:href|data)\s?=\s?)("[^"]+?"|'[^']+?')/gim, fixAttibuteValue) + .replace(/]*>/gim, '\n') + .replace(/<\/h[\d]>/gi, '\n') + .replace(/<\/p>/gi, '\n\n') + .replace(/]*>/gim, '\n') + .replace(/<\/ul>/gi, '\n') + .replace(/]*>/gim, ' * ') + .replace(/<\/li>/gi, '\n') + .replace(/<\/td>/gi, '\n') + .replace(/<\/tr>/gi, '\n') + .replace(/]*>/gim, '\n_______________________________\n\n') + .replace(/]*>([\s\S\r\n]*)<\/div>/gim, convertDivs) + .replace(/]*>/gim, '\n__bq__start__\n') + .replace(/<\/blockquote>/gim, '\n__bq__end__\n') + .replace(/]*>([\s\S\r\n]*?)<\/a>/gim, convertLinks) + .replace(/<\/div>/gi, '\n') + .replace(/ /gi, ' ') + .replace(/"/gi, '"') + .replace(/<[^>]*>/gm, ''); + + text = tpl.content.textContent; + if (text) { + text = text + .replace(/\n[ \t]+/gm, '\n') + .replace(/[\n]{3,}/gm, '\n\n') + .replace(/>/gi, '>') + .replace(/</gi, '<') + .replace(/&/gi, '&') + // wordwrap max line length 100 + .match(/.{1,100}(\s|$)|\S+?(\s|$)/g).join('\n'); + } + + while (0 < --limit) { + iP1 = text.indexOf('__bq__start__', pos); + if (0 > iP1) { + break; + } + iP2 = text.indexOf('__bq__start__', iP1 + 5); + iP3 = text.indexOf('__bq__end__', iP1 + 5); + + if ((-1 === iP2 || iP3 < iP2) && iP1 < iP3) { + text = text.slice(0, iP1) + convertBlockquote(text.slice(iP1 + 13, iP3)) + text.slice(iP3 + 11); + pos = 0; + } else if (-1 < iP2 && iP2 < iP3) { + pos = iP2 - 1; + } else { + pos = 0; + } + } + + return text.replace(/__bq__start__|__bq__end__/gm, '').trim(); + }, + + /** + * @param {string} plain + * @param {boolean} findEmailAndLinksInText = false + * @returns {string} + */ + plainToHtml = plain => { + plain = plain.toString().replace(/\r/g, ''); + plain = plain.replace(/^>[> ]>+/gm, ([match]) => (match ? match.replace(/[ ]+/g, '') : match)); + + let bIn = false, + bDo = true, + bStart = true, + aNextText = [], + aText = plain.split('\n'); + + do { + bDo = false; + aNextText = []; + aText.forEach(sLine => { + bStart = '>' === sLine.slice(0, 1); + if (bStart && !bIn) { + bDo = true; + bIn = true; + aNextText.push('~~~blockquote~~~'); + aNextText.push(sLine.slice(1)); + } else if (!bStart && bIn) { + if (sLine) { + bIn = false; + aNextText.push('~~~/blockquote~~~'); + aNextText.push(sLine); + } else { + aNextText.push(sLine); + } + } else if (bStart && bIn) { + aNextText.push(sLine.slice(1)); + } else { + aNextText.push(sLine); + } + }); + + if (bIn) { + bIn = false; + aNextText.push('~~~/blockquote~~~'); + } + + aText = aNextText; + } while (bDo); + + return aText.join('\n') + // .replace(/~~~\/blockquote~~~\n~~~blockquote~~~/g, '\n') + .replace(/&/g, '&') + .replace(/>/g, '>') + .replace(/') + .replace(/[\s]*~~~\/blockquote~~~/g, '') + .replace(/\n/g, '
'); + }; export class HtmlEditor { /** @@ -190,3 +398,8 @@ export class HtmlEditor { this.onReady(() => this.isPlain() ? this.setPlain('') : this.setHtml('')); } } + +rl.Utils = { + htmlToPlain: htmlToPlain, + plainToHtml: plainToHtml +}; diff --git a/dev/Common/UtilsUser.js b/dev/Common/UtilsUser.js index b4ae1182f..bbc4b28a3 100644 --- a/dev/Common/UtilsUser.js +++ b/dev/Common/UtilsUser.js @@ -1,11 +1,11 @@ import { ComposeType/*, FolderType*/ } from 'Common/EnumsUser'; import { EmailModel } from 'Model/Email'; -import { encodeHtml } from 'Common/Html'; import { isArray } from 'Common/Utils'; import { createElement } from 'Common/Globals'; import { FolderUserStore } from 'Stores/User/Folder'; import { SettingsUserStore } from 'Stores/User/Settings'; import * as Local from 'Storage/Client'; +import { plainToHtml } from 'Common/Html'; export const @@ -20,163 +20,6 @@ sortFolders = folders => { } }, -/** - * @param {string} html - * @returns {string} - */ -htmlToPlain = (html) => { - let pos = 0, - limit = 800, - iP1 = 0, - iP2 = 0, - iP3 = 0, - text = ''; - - const - tpl = createElement('template'), - - convertBlockquote = (blockquoteText) => { - blockquoteText = '> ' + blockquoteText.trim().replace(/\n/gm, '\n> '); - return blockquoteText.replace(/(^|\n)([> ]+)/gm, (...args) => - args && 2 < args.length ? args[1] + args[2].replace(/[\s]/g, '').trim() + ' ' : '' - ); - }, - - convertDivs = (...args) => { - let divText = 1 < args.length ? args[1].trim() : ''; - if (divText.length) { - divText = '\n' + divText.replace(/]*>([\s\S\r\n]*)<\/div>/gim, convertDivs).trim() + '\n'; - } - - return divText; - }, - - convertPre = (...args) => - 1 < args.length - ? args[1] - .toString() - .replace(/[\n]/gm, '
') - .replace(/[\r]/gm, '') - : '', - fixAttibuteValue = (...args) => (1 < args.length ? args[1] + encodeHtml(args[2]) : ''), - - convertLinks = (...args) => (1 < args.length ? args[1].trim() : ''); - - tpl.innerHTML = html - .replace(/]*><\/p>/gi, '') - .replace(/]*>([\s\S\r\n\t]*)<\/pre>/gim, convertPre) - .replace(/[\s]+/gm, ' ') - .replace(/((?:href|data)\s?=\s?)("[^"]+?"|'[^']+?')/gim, fixAttibuteValue) - .replace(/]*>/gim, '\n') - .replace(/<\/h[\d]>/gi, '\n') - .replace(/<\/p>/gi, '\n\n') - .replace(/]*>/gim, '\n') - .replace(/<\/ul>/gi, '\n') - .replace(/]*>/gim, ' * ') - .replace(/<\/li>/gi, '\n') - .replace(/<\/td>/gi, '\n') - .replace(/<\/tr>/gi, '\n') - .replace(/]*>/gim, '\n_______________________________\n\n') - .replace(/]*>([\s\S\r\n]*)<\/div>/gim, convertDivs) - .replace(/]*>/gim, '\n__bq__start__\n') - .replace(/<\/blockquote>/gim, '\n__bq__end__\n') - .replace(/
]*>([\s\S\r\n]*?)<\/a>/gim, convertLinks) - .replace(/<\/div>/gi, '\n') - .replace(/ /gi, ' ') - .replace(/"/gi, '"') - .replace(/<[^>]*>/gm, ''); - - text = tpl.content.textContent; - if (text) { - text = text - .replace(/\n[ \t]+/gm, '\n') - .replace(/[\n]{3,}/gm, '\n\n') - .replace(/>/gi, '>') - .replace(/</gi, '<') - .replace(/&/gi, '&') - // wordwrap max line length 100 - .match(/.{1,100}(\s|$)|\S+?(\s|$)/g).join('\n'); - } - - while (0 < --limit) { - iP1 = text.indexOf('__bq__start__', pos); - if (0 > iP1) { - break; - } - iP2 = text.indexOf('__bq__start__', iP1 + 5); - iP3 = text.indexOf('__bq__end__', iP1 + 5); - - if ((-1 === iP2 || iP3 < iP2) && iP1 < iP3) { - text = text.slice(0, iP1) + convertBlockquote(text.slice(iP1 + 13, iP3)) + text.slice(iP3 + 11); - pos = 0; - } else if (-1 < iP2 && iP2 < iP3) { - pos = iP2 - 1; - } else { - pos = 0; - } - } - - return text.replace(/__bq__start__|__bq__end__/gm, '').trim(); -}, - -/** - * @param {string} plain - * @param {boolean} findEmailAndLinksInText = false - * @returns {string} - */ -plainToHtml = (plain) => { - plain = plain.toString().replace(/\r/g, ''); - plain = plain.replace(/^>[> ]>+/gm, ([match]) => (match ? match.replace(/[ ]+/g, '') : match)); - - let bIn = false, - bDo = true, - bStart = true, - aNextText = [], - aText = plain.split('\n'); - - do { - bDo = false; - aNextText = []; - aText.forEach(sLine => { - bStart = '>' === sLine.slice(0, 1); - if (bStart && !bIn) { - bDo = true; - bIn = true; - aNextText.push('~~~blockquote~~~'); - aNextText.push(sLine.slice(1)); - } else if (!bStart && bIn) { - if (sLine) { - bIn = false; - aNextText.push('~~~/blockquote~~~'); - aNextText.push(sLine); - } else { - aNextText.push(sLine); - } - } else if (bStart && bIn) { - aNextText.push(sLine.slice(1)); - } else { - aNextText.push(sLine); - } - }); - - if (bIn) { - bIn = false; - aNextText.push('~~~/blockquote~~~'); - } - - aText = aNextText; - } while (bDo); - - return aText.join('\n') - // .replace(/~~~\/blockquote~~~\n~~~blockquote~~~/g, '\n') - .replace(/&/g, '&') - .replace(/>/g, '>') - .replace(/') - .replace(/[\s]*~~~\/blockquote~~~/g, '') - .replace(/\n/g, '
'); -}, - /** * @param {Array=} aDisabled * @param {Array=} aHeaderLines @@ -484,8 +327,3 @@ setLayoutResizer = (source, target, sClientSideKeyName, mode) => source.observer && source.observer.disconnect(); } }; - -rl.Utils = { - htmlToPlain: htmlToPlain, - plainToHtml: plainToHtml -}; diff --git a/dev/External/SquireUI.js b/dev/External/SquireUI.js index 5187de1dd..2c474f670 100644 --- a/dev/External/SquireUI.js +++ b/dev/External/SquireUI.js @@ -80,7 +80,7 @@ const }; } else if (!isHtml && prevSignature.isHtml) { prevSignature = { - body: rl.Utils.htmlToPlain(prevSignature.body), + body: clearHtmlLine(prevSignature.body), isHtml: true }; } @@ -479,7 +479,7 @@ class SquireUI let cl = this.container.classList; cl.remove('squire-mode-'+this.mode); if ('plain' == mode) { - this.plain.value = rl.Utils.htmlToPlain(this.squire.getHTML(), true).trim(); + this.plain.value = clearHtmlLine(this.squire.getHTML(), true); } else { this.setData(rl.Utils.plainToHtml(this.plain.value, true)); mode = 'wysiwyg'; @@ -515,7 +515,7 @@ class SquireUI } else try { if ('plain' === this.mode) { if (cfg.isHtml) { - cfg.signature = rl.Utils.htmlToPlain(cfg.signature); + cfg.signature = clearHtmlLine(cfg.signature); } this.plain.value = rl_signature_replacer(this, this.plain.value, cfg.signature, false, cfg.insertBefore); } else { diff --git a/dev/Model/Attachment.js b/dev/Model/Attachment.js index df1da1527..661e9a99c 100644 --- a/dev/Model/Attachment.js +++ b/dev/Model/Attachment.js @@ -25,7 +25,6 @@ export class AttachmentModel extends AbstractModel { this.isLinked = false; this.isThumbnail = false; this.cid = ''; - this.cidWithoutTags = ''; this.contentLocation = ''; this.download = ''; this.folder = ''; @@ -43,7 +42,6 @@ export class AttachmentModel extends AbstractModel { const attachment = super.reviveFromJson(json); if (attachment) { attachment.friendlySize = FileInfo.friendlySize(json.EstimatedSize); - attachment.cidWithoutTags = attachment.cid.replace(/^<+/, '').replace(/>+$/, ''); attachment.fileNameExt = FileInfo.getExtension(attachment.fileName); attachment.fileType = FileInfo.getType(attachment.fileNameExt, attachment.mimeType); diff --git a/dev/Model/AttachmentCollection.js b/dev/Model/AttachmentCollection.js index c22784bb7..8fd67abae 100644 --- a/dev/Model/AttachmentCollection.js +++ b/dev/Model/AttachmentCollection.js @@ -32,7 +32,7 @@ export class AttachmentCollectionModel extends AbstractCollectionModel * @returns {*} */ findByCid(cid) { - cid = cid.replace(/^<+|>+$/, ''); - return this.find(item => cid === item.cidWithoutTags); + let regex = /^<+|>+$/g, cidc = cid.replace(regex, ''); + return this.find(item => cid === item.cid || cidc === item.cid || cidc === item.cid.replace(regex, '')); } } diff --git a/dev/Model/Message.js b/dev/Model/Message.js index 10cdda500..1920217a4 100644 --- a/dev/Model/Message.js +++ b/dev/Model/Message.js @@ -4,10 +4,8 @@ import { MessagePriority } from 'Common/EnumsUser'; import { i18n } from 'Common/Translator'; import { doc } from 'Common/Globals'; -import { encodeHtml } from 'Common/Html'; +import { encodeHtml, removeColors, plainToHtml } from 'Common/Html'; import { isArray, arrayLength, forEachObjectEntry } from 'Common/Utils'; -import { plainToHtml } from 'Common/UtilsUser'; - import { serverRequestRaw } from 'Common/Links'; import { FolderUserStore } from 'Stores/User/Folder'; @@ -26,21 +24,6 @@ const // eslint-disable-next-line max-len email = /(^|[\s\n]|\/?>)((?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x21\x23-\x5b\x5d-\x7f]|\\[\x21\x23-\x5b\x5d-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x21-\x5a\x53-\x7f]|\\[\x21\x23-\x5b\x5d-\x7f])+)\]))/gi, - // Removes background and color - // Many e-mails incorrectly only define one, not both - // And in dark theme mode this kills the readability - removeColors = html => { - let l; - do { - l = html.length; - html = html - .replace(/(<[^>]+[;"'])\s*background(-[a-z]+)?\s*:[^;"']+/gi, '$1') - .replace(/(<[^>]+[;"'])\s*color\s*:[^;"']+/gi, '$1') - .replace(/(<[^>]+)\s(bg)?color=("[^"]+"|'[^']+')/gi, '$1'); - } while (l != html.length) - return html; - }, - hcont = Element.fromHTML('
'), getRealHeight = el => { hcont.innerHTML = el.outerHTML;