#89 Redesign HTML parsing for securing PGP decrypted HTML

This commit is contained in:
the-djmaze 2022-02-01 11:46:12 +01:00
parent d9e81d6123
commit 6b26ddec7a
6 changed files with 227 additions and 195 deletions

View file

@ -1,4 +1,23 @@
import { createElement } from 'Common/Globals';
const const
/*
strip_tags = (m => {
return (str) => str.replace(m, '');
})(/<\s*\/?\s*(\w+|!)[^>]*>/gi),
htmlspecialchars = ((de,se,gt,lt,sq,dq) => {
return (str, quote_style, double_encode) => {
str = (''+str)
.replace((!defined(double_encode)||double_encode)?de:se,'&amp;')
.replace(gt,'&lt;')
.replace(lt,'&gt;');
if (!is_number(quote_style)) { quote_style = 2; }
if (quote_style & 1) { str = str.replace(sq,'&#039;'); }
return (quote_style & 2) ? str.replace(dq,'&quot;') : str;
};
})(/&/g,/&(?![\w#]+;)/gi,/</g,/>/g,/'/g,/"/g),
*/
htmlre = /[&<>"']/g, htmlre = /[&<>"']/g,
htmlmap = { htmlmap = {
'&': '&amp;', '&': '&amp;',
@ -8,13 +27,202 @@ const
"'": '&#x27;' "'": '&#x27;'
}; };
/** export const
* @param {string} text
* @returns {string} /**
*/ * @param {string} text
export function encodeHtml(text) { * @returns {string}
return (text && text.toString ? text.toString() : ''+text).replace(htmlre, m => htmlmap[m]); */
} encodeHtml = text => (text && text.toString ? text.toString() : '' + text).replace(htmlre, m => htmlmap[m]),
/**
* @param {string} text
* @returns {string}
*/
clearHtml = html => {
html = html.replace(/(<pre[^>]*>)([\s\S]*?)(<\/pre>)/gi, aMatches => {
return (aMatches[1] + aMatches[2].trim() + aMatches[3].trim()).replace(/\r?\n/g, '<br>');
});
/*
\MailSo\Base\HtmlUtils::ClearHtml(
$sHtml, $bHasExternals, $aFoundCIDs, $aContentLocationUrls, $aFoundContentLocationUrls,
$fAdditionalExternalFilter, !!$this->Config()->Get('labs', 'try_to_detect_hidden_images', false)
);
*/
return html;
},
// Removes background and color
// Many e-mails incorrectly only define one, not both
// And in dark theme mode this kills the readability
removeColors = html => {
let l;
do {
l = html.length;
html = html
.replace(/(<[^>]+[;"'])\s*background(-[a-z]+)?\s*:[^;"']+/gi, '$1')
.replace(/(<[^>]+[;"'])\s*color\s*:[^;"']+/gi, '$1')
.replace(/(<[^>]+)\s(bg)?color=("[^"]+"|'[^']+')/gi, '$1');
} while (l != html.length)
return html;
},
/**
* @param {string} html
* @returns {string}
*/
htmlToPlain = html => {
let pos = 0,
limit = 800,
iP1 = 0,
iP2 = 0,
iP3 = 0,
text = '';
const
tpl = createElement('template'),
convertBlockquote = (blockquoteText) => {
blockquoteText = '> ' + blockquoteText.trim().replace(/\n/gm, '\n> ');
return blockquoteText.replace(/(^|\n)([> ]+)/gm, (...args) =>
args && 2 < args.length ? args[1] + args[2].replace(/[\s]/g, '').trim() + ' ' : ''
);
},
convertDivs = (...args) => {
let divText = 1 < args.length ? args[1].trim() : '';
if (divText.length) {
divText = '\n' + divText.replace(/<div[^>]*>([\s\S\r\n]*)<\/div>/gim, convertDivs).trim() + '\n';
}
return divText;
},
convertPre = (...args) =>
1 < args.length
? args[1]
.toString()
.replace(/[\n]/gm, '<br/>')
.replace(/[\r]/gm, '')
: '',
fixAttibuteValue = (...args) => (1 < args.length ? args[1] + encodeHtml(args[2]) : ''),
convertLinks = (...args) => (1 < args.length ? args[1].trim() : '');
tpl.innerHTML = html
.replace(/<p[^>]*><\/p>/gi, '')
.replace(/<pre[^>]*>([\s\S\r\n\t]*)<\/pre>/gim, convertPre)
.replace(/[\s]+/gm, ' ')
.replace(/((?:href|data)\s?=\s?)("[^"]+?"|'[^']+?')/gim, fixAttibuteValue)
.replace(/<br[^>]*>/gim, '\n')
.replace(/<\/h[\d]>/gi, '\n')
.replace(/<\/p>/gi, '\n\n')
.replace(/<ul[^>]*>/gim, '\n')
.replace(/<\/ul>/gi, '\n')
.replace(/<li[^>]*>/gim, ' * ')
.replace(/<\/li>/gi, '\n')
.replace(/<\/td>/gi, '\n')
.replace(/<\/tr>/gi, '\n')
.replace(/<hr[^>]*>/gim, '\n_______________________________\n\n')
.replace(/<div[^>]*>([\s\S\r\n]*)<\/div>/gim, convertDivs)
.replace(/<blockquote[^>]*>/gim, '\n__bq__start__\n')
.replace(/<\/blockquote>/gim, '\n__bq__end__\n')
.replace(/<a [^>]*>([\s\S\r\n]*?)<\/a>/gim, convertLinks)
.replace(/<\/div>/gi, '\n')
.replace(/&nbsp;/gi, ' ')
.replace(/&quot;/gi, '"')
.replace(/<[^>]*>/gm, '');
text = tpl.content.textContent;
if (text) {
text = text
.replace(/\n[ \t]+/gm, '\n')
.replace(/[\n]{3,}/gm, '\n\n')
.replace(/&gt;/gi, '>')
.replace(/&lt;/gi, '<')
.replace(/&amp;/gi, '&')
// wordwrap max line length 100
.match(/.{1,100}(\s|$)|\S+?(\s|$)/g).join('\n');
}
while (0 < --limit) {
iP1 = text.indexOf('__bq__start__', pos);
if (0 > iP1) {
break;
}
iP2 = text.indexOf('__bq__start__', iP1 + 5);
iP3 = text.indexOf('__bq__end__', iP1 + 5);
if ((-1 === iP2 || iP3 < iP2) && iP1 < iP3) {
text = text.slice(0, iP1) + convertBlockquote(text.slice(iP1 + 13, iP3)) + text.slice(iP3 + 11);
pos = 0;
} else if (-1 < iP2 && iP2 < iP3) {
pos = iP2 - 1;
} else {
pos = 0;
}
}
return text.replace(/__bq__start__|__bq__end__/gm, '').trim();
},
/**
* @param {string} plain
* @param {boolean} findEmailAndLinksInText = false
* @returns {string}
*/
plainToHtml = plain => {
plain = plain.toString().replace(/\r/g, '');
plain = plain.replace(/^>[> ]>+/gm, ([match]) => (match ? match.replace(/[ ]+/g, '') : match));
let bIn = false,
bDo = true,
bStart = true,
aNextText = [],
aText = plain.split('\n');
do {
bDo = false;
aNextText = [];
aText.forEach(sLine => {
bStart = '>' === sLine.slice(0, 1);
if (bStart && !bIn) {
bDo = true;
bIn = true;
aNextText.push('~~~blockquote~~~');
aNextText.push(sLine.slice(1));
} else if (!bStart && bIn) {
if (sLine) {
bIn = false;
aNextText.push('~~~/blockquote~~~');
aNextText.push(sLine);
} else {
aNextText.push(sLine);
}
} else if (bStart && bIn) {
aNextText.push(sLine.slice(1));
} else {
aNextText.push(sLine);
}
});
if (bIn) {
bIn = false;
aNextText.push('~~~/blockquote~~~');
}
aText = aNextText;
} while (bDo);
return aText.join('\n')
// .replace(/~~~\/blockquote~~~\n~~~blockquote~~~/g, '\n')
.replace(/&/g, '&amp;')
.replace(/>/g, '&gt;')
.replace(/</g, '&lt;')
.replace(/~~~blockquote~~~[\s]*/g, '<blockquote>')
.replace(/[\s]*~~~\/blockquote~~~/g, '</blockquote>')
.replace(/\n/g, '<br/>');
};
export class HtmlEditor { export class HtmlEditor {
/** /**
@ -190,3 +398,8 @@ export class HtmlEditor {
this.onReady(() => this.isPlain() ? this.setPlain('') : this.setHtml('')); this.onReady(() => this.isPlain() ? this.setPlain('') : this.setHtml(''));
} }
} }
rl.Utils = {
htmlToPlain: htmlToPlain,
plainToHtml: plainToHtml
};

View file

@ -1,11 +1,11 @@
import { ComposeType/*, FolderType*/ } from 'Common/EnumsUser'; import { ComposeType/*, FolderType*/ } from 'Common/EnumsUser';
import { EmailModel } from 'Model/Email'; import { EmailModel } from 'Model/Email';
import { encodeHtml } from 'Common/Html';
import { isArray } from 'Common/Utils'; import { isArray } from 'Common/Utils';
import { createElement } from 'Common/Globals'; import { createElement } from 'Common/Globals';
import { FolderUserStore } from 'Stores/User/Folder'; import { FolderUserStore } from 'Stores/User/Folder';
import { SettingsUserStore } from 'Stores/User/Settings'; import { SettingsUserStore } from 'Stores/User/Settings';
import * as Local from 'Storage/Client'; import * as Local from 'Storage/Client';
import { plainToHtml } from 'Common/Html';
export const export const
@ -20,163 +20,6 @@ sortFolders = folders => {
} }
}, },
/**
* @param {string} html
* @returns {string}
*/
htmlToPlain = (html) => {
let pos = 0,
limit = 800,
iP1 = 0,
iP2 = 0,
iP3 = 0,
text = '';
const
tpl = createElement('template'),
convertBlockquote = (blockquoteText) => {
blockquoteText = '> ' + blockquoteText.trim().replace(/\n/gm, '\n> ');
return blockquoteText.replace(/(^|\n)([> ]+)/gm, (...args) =>
args && 2 < args.length ? args[1] + args[2].replace(/[\s]/g, '').trim() + ' ' : ''
);
},
convertDivs = (...args) => {
let divText = 1 < args.length ? args[1].trim() : '';
if (divText.length) {
divText = '\n' + divText.replace(/<div[^>]*>([\s\S\r\n]*)<\/div>/gim, convertDivs).trim() + '\n';
}
return divText;
},
convertPre = (...args) =>
1 < args.length
? args[1]
.toString()
.replace(/[\n]/gm, '<br/>')
.replace(/[\r]/gm, '')
: '',
fixAttibuteValue = (...args) => (1 < args.length ? args[1] + encodeHtml(args[2]) : ''),
convertLinks = (...args) => (1 < args.length ? args[1].trim() : '');
tpl.innerHTML = html
.replace(/<p[^>]*><\/p>/gi, '')
.replace(/<pre[^>]*>([\s\S\r\n\t]*)<\/pre>/gim, convertPre)
.replace(/[\s]+/gm, ' ')
.replace(/((?:href|data)\s?=\s?)("[^"]+?"|'[^']+?')/gim, fixAttibuteValue)
.replace(/<br[^>]*>/gim, '\n')
.replace(/<\/h[\d]>/gi, '\n')
.replace(/<\/p>/gi, '\n\n')
.replace(/<ul[^>]*>/gim, '\n')
.replace(/<\/ul>/gi, '\n')
.replace(/<li[^>]*>/gim, ' * ')
.replace(/<\/li>/gi, '\n')
.replace(/<\/td>/gi, '\n')
.replace(/<\/tr>/gi, '\n')
.replace(/<hr[^>]*>/gim, '\n_______________________________\n\n')
.replace(/<div[^>]*>([\s\S\r\n]*)<\/div>/gim, convertDivs)
.replace(/<blockquote[^>]*>/gim, '\n__bq__start__\n')
.replace(/<\/blockquote>/gim, '\n__bq__end__\n')
.replace(/<a [^>]*>([\s\S\r\n]*?)<\/a>/gim, convertLinks)
.replace(/<\/div>/gi, '\n')
.replace(/&nbsp;/gi, ' ')
.replace(/&quot;/gi, '"')
.replace(/<[^>]*>/gm, '');
text = tpl.content.textContent;
if (text) {
text = text
.replace(/\n[ \t]+/gm, '\n')
.replace(/[\n]{3,}/gm, '\n\n')
.replace(/&gt;/gi, '>')
.replace(/&lt;/gi, '<')
.replace(/&amp;/gi, '&')
// wordwrap max line length 100
.match(/.{1,100}(\s|$)|\S+?(\s|$)/g).join('\n');
}
while (0 < --limit) {
iP1 = text.indexOf('__bq__start__', pos);
if (0 > iP1) {
break;
}
iP2 = text.indexOf('__bq__start__', iP1 + 5);
iP3 = text.indexOf('__bq__end__', iP1 + 5);
if ((-1 === iP2 || iP3 < iP2) && iP1 < iP3) {
text = text.slice(0, iP1) + convertBlockquote(text.slice(iP1 + 13, iP3)) + text.slice(iP3 + 11);
pos = 0;
} else if (-1 < iP2 && iP2 < iP3) {
pos = iP2 - 1;
} else {
pos = 0;
}
}
return text.replace(/__bq__start__|__bq__end__/gm, '').trim();
},
/**
* @param {string} plain
* @param {boolean} findEmailAndLinksInText = false
* @returns {string}
*/
plainToHtml = (plain) => {
plain = plain.toString().replace(/\r/g, '');
plain = plain.replace(/^>[> ]>+/gm, ([match]) => (match ? match.replace(/[ ]+/g, '') : match));
let bIn = false,
bDo = true,
bStart = true,
aNextText = [],
aText = plain.split('\n');
do {
bDo = false;
aNextText = [];
aText.forEach(sLine => {
bStart = '>' === sLine.slice(0, 1);
if (bStart && !bIn) {
bDo = true;
bIn = true;
aNextText.push('~~~blockquote~~~');
aNextText.push(sLine.slice(1));
} else if (!bStart && bIn) {
if (sLine) {
bIn = false;
aNextText.push('~~~/blockquote~~~');
aNextText.push(sLine);
} else {
aNextText.push(sLine);
}
} else if (bStart && bIn) {
aNextText.push(sLine.slice(1));
} else {
aNextText.push(sLine);
}
});
if (bIn) {
bIn = false;
aNextText.push('~~~/blockquote~~~');
}
aText = aNextText;
} while (bDo);
return aText.join('\n')
// .replace(/~~~\/blockquote~~~\n~~~blockquote~~~/g, '\n')
.replace(/&/g, '&amp;')
.replace(/>/g, '&gt;')
.replace(/</g, '&lt;')
.replace(/~~~blockquote~~~[\s]*/g, '<blockquote>')
.replace(/[\s]*~~~\/blockquote~~~/g, '</blockquote>')
.replace(/\n/g, '<br/>');
},
/** /**
* @param {Array=} aDisabled * @param {Array=} aDisabled
* @param {Array=} aHeaderLines * @param {Array=} aHeaderLines
@ -484,8 +327,3 @@ setLayoutResizer = (source, target, sClientSideKeyName, mode) =>
source.observer && source.observer.disconnect(); source.observer && source.observer.disconnect();
} }
}; };
rl.Utils = {
htmlToPlain: htmlToPlain,
plainToHtml: plainToHtml
};

View file

@ -80,7 +80,7 @@ const
}; };
} else if (!isHtml && prevSignature.isHtml) { } else if (!isHtml && prevSignature.isHtml) {
prevSignature = { prevSignature = {
body: rl.Utils.htmlToPlain(prevSignature.body), body: clearHtmlLine(prevSignature.body),
isHtml: true isHtml: true
}; };
} }
@ -479,7 +479,7 @@ class SquireUI
let cl = this.container.classList; let cl = this.container.classList;
cl.remove('squire-mode-'+this.mode); cl.remove('squire-mode-'+this.mode);
if ('plain' == mode) { if ('plain' == mode) {
this.plain.value = rl.Utils.htmlToPlain(this.squire.getHTML(), true).trim(); this.plain.value = clearHtmlLine(this.squire.getHTML(), true);
} else { } else {
this.setData(rl.Utils.plainToHtml(this.plain.value, true)); this.setData(rl.Utils.plainToHtml(this.plain.value, true));
mode = 'wysiwyg'; mode = 'wysiwyg';
@ -515,7 +515,7 @@ class SquireUI
} else try { } else try {
if ('plain' === this.mode) { if ('plain' === this.mode) {
if (cfg.isHtml) { if (cfg.isHtml) {
cfg.signature = rl.Utils.htmlToPlain(cfg.signature); cfg.signature = clearHtmlLine(cfg.signature);
} }
this.plain.value = rl_signature_replacer(this, this.plain.value, cfg.signature, false, cfg.insertBefore); this.plain.value = rl_signature_replacer(this, this.plain.value, cfg.signature, false, cfg.insertBefore);
} else { } else {

View file

@ -25,7 +25,6 @@ export class AttachmentModel extends AbstractModel {
this.isLinked = false; this.isLinked = false;
this.isThumbnail = false; this.isThumbnail = false;
this.cid = ''; this.cid = '';
this.cidWithoutTags = '';
this.contentLocation = ''; this.contentLocation = '';
this.download = ''; this.download = '';
this.folder = ''; this.folder = '';
@ -43,7 +42,6 @@ export class AttachmentModel extends AbstractModel {
const attachment = super.reviveFromJson(json); const attachment = super.reviveFromJson(json);
if (attachment) { if (attachment) {
attachment.friendlySize = FileInfo.friendlySize(json.EstimatedSize); attachment.friendlySize = FileInfo.friendlySize(json.EstimatedSize);
attachment.cidWithoutTags = attachment.cid.replace(/^<+/, '').replace(/>+$/, '');
attachment.fileNameExt = FileInfo.getExtension(attachment.fileName); attachment.fileNameExt = FileInfo.getExtension(attachment.fileName);
attachment.fileType = FileInfo.getType(attachment.fileNameExt, attachment.mimeType); attachment.fileType = FileInfo.getType(attachment.fileNameExt, attachment.mimeType);

View file

@ -32,7 +32,7 @@ export class AttachmentCollectionModel extends AbstractCollectionModel
* @returns {*} * @returns {*}
*/ */
findByCid(cid) { findByCid(cid) {
cid = cid.replace(/^<+|>+$/, ''); let regex = /^<+|>+$/g, cidc = cid.replace(regex, '');
return this.find(item => cid === item.cidWithoutTags); return this.find(item => cid === item.cid || cidc === item.cid || cidc === item.cid.replace(regex, ''));
} }
} }

View file

@ -4,10 +4,8 @@ import { MessagePriority } from 'Common/EnumsUser';
import { i18n } from 'Common/Translator'; import { i18n } from 'Common/Translator';
import { doc } from 'Common/Globals'; import { doc } from 'Common/Globals';
import { encodeHtml } from 'Common/Html'; import { encodeHtml, removeColors, plainToHtml } from 'Common/Html';
import { isArray, arrayLength, forEachObjectEntry } from 'Common/Utils'; import { isArray, arrayLength, forEachObjectEntry } from 'Common/Utils';
import { plainToHtml } from 'Common/UtilsUser';
import { serverRequestRaw } from 'Common/Links'; import { serverRequestRaw } from 'Common/Links';
import { FolderUserStore } from 'Stores/User/Folder'; import { FolderUserStore } from 'Stores/User/Folder';
@ -26,21 +24,6 @@ const
// eslint-disable-next-line max-len // eslint-disable-next-line max-len
email = /(^|[\s\n]|\/?>)((?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x21\x23-\x5b\x5d-\x7f]|\\[\x21\x23-\x5b\x5d-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x21-\x5a\x53-\x7f]|\\[\x21\x23-\x5b\x5d-\x7f])+)\]))/gi, email = /(^|[\s\n]|\/?>)((?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x21\x23-\x5b\x5d-\x7f]|\\[\x21\x23-\x5b\x5d-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x21-\x5a\x53-\x7f]|\\[\x21\x23-\x5b\x5d-\x7f])+)\]))/gi,
// Removes background and color
// Many e-mails incorrectly only define one, not both
// And in dark theme mode this kills the readability
removeColors = html => {
let l;
do {
l = html.length;
html = html
.replace(/(<[^>]+[;"'])\s*background(-[a-z]+)?\s*:[^;"']+/gi, '$1')
.replace(/(<[^>]+[;"'])\s*color\s*:[^;"']+/gi, '$1')
.replace(/(<[^>]+)\s(bg)?color=("[^"]+"|'[^']+')/gi, '$1');
} while (l != html.length)
return html;
},
hcont = Element.fromHTML('<div area="hidden" style="position:absolute;left:-5000px"></div>'), hcont = Element.fromHTML('<div area="hidden" style="position:absolute;left:-5000px"></div>'),
getRealHeight = el => { getRealHeight = el => {
hcont.innerHTML = el.outerHTML; hcont.innerHTML = el.outerHTML;