From ec9197cb859c9627084dbb1d760c71d85cbdd9db Mon Sep 17 00:00:00 2001 From: the-djmaze <> Date: Sun, 4 Feb 2024 23:03:53 +0100 Subject: [PATCH] Also fix MIME header parsing in JavaScript, read issue #1403 --- dev/Component/EmailAddresses.js | 3 +- dev/Mime/Address.js | 197 ++++++++++++++++++++++++++++++++ dev/Mime/Encoding.js | 35 ++++++ dev/Mime/Parser.js | 86 ++++++++++---- dev/Mime/Utils.js | 14 ++- dev/Model/Email.js | 196 ------------------------------- dev/Model/EmailCollection.js | 3 +- dev/View/Popup/Compose.js | 3 +- 8 files changed, 314 insertions(+), 223 deletions(-) create mode 100644 dev/Mime/Address.js create mode 100644 dev/Mime/Encoding.js diff --git a/dev/Component/EmailAddresses.js b/dev/Component/EmailAddresses.js index 1f7646c74..c4c6e364f 100644 --- a/dev/Component/EmailAddresses.js +++ b/dev/Component/EmailAddresses.js @@ -1,5 +1,6 @@ import { doc, createElement, addEventsListeners } from 'Common/Globals'; -import { EmailModel, addressparser } from 'Model/Email'; +import { EmailModel } from 'Model/Email'; +import { addressparser } from 'Mime/Address'; const contentType = 'snappymail/emailaddress', getAddressKey = li => li?.emailaddress?.key, diff --git a/dev/Mime/Address.js b/dev/Mime/Address.js new file mode 100644 index 000000000..5e9ae887d --- /dev/null +++ b/dev/Mime/Address.js @@ -0,0 +1,197 @@ +import { decodeEncodedWords } from 'Mime/Encoding'; + +/** + * Parses structured e-mail addresses from an address/mailbox(-list) field + * https://datatracker.ietf.org/doc/html/rfc2822#section-3.4 + * + * Example: + * + * "Name " + * + * will be converted to + * + * [{name: "Name", email: "address@domain"}] + * + * @param {String} str Address field + * @return {Array} An array of address objects + */ +export function addressparser(str) { + str = (str || '').toString(); + + let + endOperator = '', + node = { + type: 'text', + value: '' + }, + escaped = false, + address = [], + addresses = []; + + const + /* + * Operator tokens and which tokens are expected to end the sequence + */ + OPERATORS = { + '"': '"', + '(': ')', + '<': '>', + ',': '', + // Groups are ended by semicolons + ':': ';', + // Semicolons are not a legal delimiter per the RFC2822 grammar other + // than for terminating a group, but they are also not valid for any + // other use in this context. Given that some mail clients have + // historically allowed the semicolon as a delimiter equivalent to the + // comma in their UI, it makes sense to treat them the same as a comma + // when used outside of a group. + ';': '' + }, + pushToken = token => { + token.value = (token.value || '').toString().trim(); + token.value.length && address.push(token); + node = { + type: 'text', + value: '' + }, + escaped = false; + }, + pushAddress = () => { + if (address.length) { + address = _handleAddress(address); + if (address.length) { + addresses = addresses.concat(address); + } + } + address = []; + }; + + [...str].forEach(chr => { + if (!escaped && (chr === endOperator || (!endOperator && chr in OPERATORS))) { + pushToken(node); + if (',' === chr || ';' === chr) { + pushAddress(); + } else { + endOperator = endOperator ? '' : OPERATORS[chr]; + if ('<' === chr) { + node.type = 'email'; + } else if ('(' === chr) { + node.type = 'comment'; + } else if (':' === chr) { + node.type = 'group'; + } + } + } else { + node.value += chr; + escaped = !escaped && '\\' === chr; + } + }); + pushToken(node); + + pushAddress(); + + return addresses; +} + +/** + * Converts tokens for a single address into an address object + * + * @param {Array} tokens Tokens object + * @return {Object} Address object + */ +function _handleAddress(tokens) { + let + isGroup = false, + address = {}, + addresses = [], + data = { + email: [], + comment: [], + group: [], + text: [] + }; + + tokens.forEach(token => { + isGroup = isGroup || 'group' === token.type; + data[token.type].push(token.value); + }); + + // If there is no text but a comment, replace the two + if (!data.text.length && data.comment.length) { + data.text = data.comment; + data.comment = []; + } + + if (isGroup) { + // http://tools.ietf.org/html/rfc2822#appendix-A.1.3 +/* + addresses.push({ + email: '', + name: data.text.join(' ').trim(), + group: addressparser(data.group.join(',')) +// ,comment: data.comment.join(' ').trim() + }); +*/ + addresses = addresses.concat(addressparser(data.group.join(','))); + } else { + // If no address was found, try to detect one from regular text + if (!data.email.length && data.text.length) { + var i = data.text.length; + while (i--) { + if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) { + data.email = data.text.splice(i, 1); + break; + } + } + + // still no address + if (!data.email.length) { + i = data.text.length; + while (i--) { + data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, address => { + if (!data.email.length) { + data.email = [address.trim()]; + return ''; + } + return address.trim(); + }); + if (data.email.length) { + break; + } + } + } + } + + // If there's still no text but a comment exists, replace the two + if (!data.text.length && data.comment.length) { + data.text = data.comment; + data.comment = []; + } + + // Keep only the first address occurence, push others to regular text + if (data.email.length > 1) { + data.text = data.text.concat(data.email.splice(1)); + } + + address = { + // Join values with spaces + email: decodeEncodedWords(data.email.join(' ').trim()), + name: decodeEncodedWords(data.text.join(' ').trim()) +// ,comment: data.comment.join(' ').trim() + }; + + if (address.email === address.name) { + if (address.email.includes('@')) { + address.name = ''; + } else { + address.email = ''; + } + } + +// address.email = address.email.replace(/^[<]+(.*)[>]+$/g, '$1'); + + addresses.push(address); + } + + return addresses; +} diff --git a/dev/Mime/Encoding.js b/dev/Mime/Encoding.js new file mode 100644 index 000000000..6f9f8fcd0 --- /dev/null +++ b/dev/Mime/Encoding.js @@ -0,0 +1,35 @@ +const + QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))]; + +export const + // https://datatracker.ietf.org/doc/html/rfc2045#section-6.8 + BDecode = atob, + + // unescape(encodeURIComponent()) makes the UTF-16 DOMString to an UTF-8 string + BEncode = data => btoa(unescape(encodeURIComponent(data))), +/* // Without deprecated 'unescape': + BEncode = data => btoa(encodeURIComponent(data).replace( + /%([0-9A-F]{2})/g, (match, p1) => String.fromCharCode('0x' + p1) + )), +*/ + + // https://datatracker.ietf.org/doc/html/rfc2045#section-6.7 + QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams), + + // https://datatracker.ietf.org/doc/html/rfc2047#section-4.1 + // https://datatracker.ietf.org/doc/html/rfc2047#section-4.2 + // encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" + decodeEncodedWords = data => + data.replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) => + decodeText(charset, 'B' == encoding ? BDecode(text) : QPDecode(text)) + ) + , + + decodeText = (charset, data) => { + try { + // https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings + return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0))); + } catch (e) { + console.error({charset:charset,error:e}); + } + }; diff --git a/dev/Mime/Parser.js b/dev/Mime/Parser.js index 9ce19775b..ce9684351 100644 --- a/dev/Mime/Parser.js +++ b/dev/Mime/Parser.js @@ -1,17 +1,5 @@ -//import { b64Encode } from 'Common/Utils'; - -const - // RFC2045 - QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))], - QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams), - decodeText = (charset, data) => { - try { - // https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings - return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0))); - } catch (e) { - console.error({charset:charset,error:e}); - } - }; +import { decodeEncodedWords, BDecode, BEncode, QPDecode, decodeText } from 'Mime/Encoding'; +import { addressparser } from 'Mime/Address'; export function ParseMime(text) { @@ -27,7 +15,49 @@ export function ParseMime(text) this.bodyEnd = 0; this.boundary = ''; this.bodyText = ''; - this.headers = {}; + // https://datatracker.ietf.org/doc/html/rfc2822#section-3.6 + this.headers = { + // Required + date = null, + from = [], // mailbox-list + // Optional + sender = [], // MUST occur with multi-address + 'reply-to' = [], // address-list + to = [], // address-list + cc = [], // address-list + bcc = [], // address-list + 'message-id' = '', // msg-id SHOULD be present + 'in-reply-to' = '', // 1*msg-id SHOULD occur in some replies + references = '', // 1*msg-id SHOULD occur in some replies + subject = '', // unstructured + // Optional unlimited + comments = [], // unstructured + keywords = [], // phrase *("," phrase) + // https://datatracker.ietf.org/doc/html/rfc2822#section-3.6.6 + trace = [], + 'resent-date' = [], + 'resent-from' = [], + 'resent-sender' = [], + 'resent-to' = [], + 'resent-cc' = [], + 'resent-bcc' = [], + 'resent-msg-id' = [], + // optional others outside RFC2822 + 'mime-version' = '', + 'content-transfer-encoding' = '', + 'content-type' = '', + 'delivered-to' = '', // angle-addr + 'return-path' = '', // angle-addr + 'received' = [], + 'authentication-results' = '', // dkim, spf, dmarc + 'dkim-signature' = '', + 'x-rspamd-queue-id' = '', + 'x-rspamd-action' = '', + 'x-spamd-bar' = '', + 'x-rspamd-server' = '', + 'x-spamd-result' = '', + 'x-remote-address' = '', + }; } */ @@ -54,7 +84,7 @@ export function ParseMime(text) if ('quoted-printable' == encoding) { body = QPDecode(body); } else if ('base64' == encoding) { - body = atob(body.replace(/\r?\n/g, '')); + body = BDecode(body.replace(/\r?\n/g, '')); } return decodeText(charset, body); } @@ -68,8 +98,7 @@ export function ParseMime(text) if ('quoted-printable' == encoding) { body = QPDecode(body); } - body = btoa(body); -// body = b64Encode(body); + body = BEncode(body); } return 'data:' + this.headerValue('content-type') + ';base64,' + body; } @@ -92,6 +121,9 @@ export function ParseMime(text) } } + // mailbox-list or address-list + const lists = ['from','reply-to','to','cc','bcc']; + const ParsePart = (mimePart, start_pos = 0, id = '') => { let part = new MimePart, @@ -113,11 +145,19 @@ export function ParseMime(text) [...header.matchAll(/;\s*([^;=]+)=\s*"?([^;"]+)"?/g)].forEach(param => params[param[1].trim().toLowerCase()] = param[2].trim() ); - // encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" - match[2] = match[2].trim().replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) => - decodeText(charset, 'B' == encoding ? atob(text) : QPDecode(text)) - ); - headers[match[1].trim().toLowerCase()] = { + let field = match[1].trim().toLowerCase(); + if (lists.includes(field)) { + match[2] = addressparser(match[2]); + } else if ('keywords' === field) { + match[2] = match[2].split(',').forEach(entry => decodeEncodedWords(entry.trim())); + match[2] = (headers[field]?.value || []).concat(match[2]); + } else { + match[2] = decodeEncodedWords(match[2].trim()); + if ('comments' === field) { + match[2] = (headers[field]?.value || []).push(match[2]); + } + } + headers[field] = { value: match[2], params: params }; diff --git a/dev/Mime/Utils.js b/dev/Mime/Utils.js index c399e7c8c..0da0dc7ad 100644 --- a/dev/Mime/Utils.js +++ b/dev/Mime/Utils.js @@ -4,6 +4,8 @@ import { AttachmentModel } from 'Model/Attachment'; import { FileInfo } from 'Common/File'; import { BEGIN_PGP_MESSAGE } from 'Stores/User/Pgp'; +import { EmailModel } from 'Model/Email'; + /** * @param string data * @param MessageModel message @@ -20,7 +22,16 @@ export function MimeToMessage(data, message) subject && message.subject(subject); // EmailCollectionModel - ['from','to'].forEach(name => message[name].fromString(struct.headerValue(name))); + ['from','to'].forEach(name => { + const items = message[name]; + struct.headerValue(name)?.forEach(item => { + item = new EmailModel(item.email, item.name); + // Make them unique + if (item.email && item.name || !items.find(address => address.email == item.email)) { + items.push(item); + } + }); + }); struct.forEach(part => { let cd = part.header('content-disposition'), @@ -66,6 +77,7 @@ export function MimeToMessage(data, message) const text = struct.getByContentType('text/plain'); message.plain(text ? text.body : ''); message.html(html); +console.dir({message}); } else { message.plain(data); } diff --git a/dev/Model/Email.js b/dev/Model/Email.js index 4a4a15c97..077aab37b 100644 --- a/dev/Model/Email.js +++ b/dev/Model/Email.js @@ -4,202 +4,6 @@ import { AbstractModel } from 'Knoin/AbstractModel'; 'use strict'; -/** - * Parses structured e-mail addresses from an address field - * - * Example: - * - * "Name " - * - * will be converted to - * - * [{name: "Name", address: "address@domain"}] - * - * @param {String} str Address field - * @return {Array} An array of address objects - */ -export function addressparser(str) { - str = (str || '').toString(); - - let - endOperator = '', - node = { - type: 'text', - value: '' - }, - escaped = false, - address = [], - addresses = []; - - const - /* - * Operator tokens and which tokens are expected to end the sequence - */ - OPERATORS = { - '"': '"', - '(': ')', - '<': '>', - ',': '', - // Groups are ended by semicolons - ':': ';', - // Semicolons are not a legal delimiter per the RFC2822 grammar other - // than for terminating a group, but they are also not valid for any - // other use in this context. Given that some mail clients have - // historically allowed the semicolon as a delimiter equivalent to the - // comma in their UI, it makes sense to treat them the same as a comma - // when used outside of a group. - ';': '' - }, - pushToken = token => { - token.value = (token.value || '').toString().trim(); - token.value.length && address.push(token); - node = { - type: 'text', - value: '' - }, - escaped = false; - }, - pushAddress = () => { - if (address.length) { - address = _handleAddress(address); - if (address.length) { - addresses = addresses.concat(address); - } - } - address = []; - }; - - [...str].forEach(chr => { - if (!escaped && (chr === endOperator || (!endOperator && chr in OPERATORS))) { - pushToken(node); - if (',' === chr || ';' === chr) { - pushAddress(); - } else { - endOperator = endOperator ? '' : OPERATORS[chr]; - if ('<' === chr) { - node.type = 'email'; - } else if ('(' === chr) { - node.type = 'comment'; - } else if (':' === chr) { - node.type = 'group'; - } - } - } else { - node.value += chr; - escaped = !escaped && '\\' === chr; - } - }); - pushToken(node); - - pushAddress(); - - return addresses; -// return addresses.map(item => (item.name || item.email) ? new EmailModel(item.email, item.name) : null).filter(v => v); -} - -/** - * Converts tokens for a single address into an address object - * - * @param {Array} tokens Tokens object - * @return {Object} Address object - */ -function _handleAddress(tokens) { - let - isGroup = false, - address = {}, - addresses = [], - data = { - email: [], - comment: [], - group: [], - text: [] - }; - - tokens.forEach(token => { - isGroup = isGroup || 'group' === token.type; - data[token.type].push(token.value); - }); - - // If there is no text but a comment, replace the two - if (!data.text.length && data.comment.length) { - data.text = data.comment; - data.comment = []; - } - - if (isGroup) { - // http://tools.ietf.org/html/rfc2822#appendix-A.1.3 -/* - addresses.push({ - email: '', - name: data.text.join(' ').trim(), - group: addressparser(data.group.join(',')) -// ,comment: data.comment.join(' ').trim() - }); -*/ - addresses = addresses.concat(addressparser(data.group.join(','))); - } else { - // If no address was found, try to detect one from regular text - if (!data.email.length && data.text.length) { - var i = data.text.length; - while (i--) { - if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) { - data.email = data.text.splice(i, 1); - break; - } - } - - // still no address - if (!data.email.length) { - i = data.text.length; - while (i--) { - data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, address => { - if (!data.email.length) { - data.email = [address.trim()]; - return ''; - } - return address.trim(); - }); - if (data.email.length) { - break; - } - } - } - } - - // If there's still no text but a comment exists, replace the two - if (!data.text.length && data.comment.length) { - data.text = data.comment; - data.comment = []; - } - - // Keep only the first address occurence, push others to regular text - if (data.email.length > 1) { - data.text = data.text.concat(data.email.splice(1)); - } - - address = { - // Join values with spaces - email: data.email.join(' ').trim(), - name: data.text.join(' ').trim() -// ,comment: data.comment.join(' ').trim() - }; - - if (address.email === address.name) { - if (address.email.includes('@')) { - address.name = ''; - } else { - address.email = ''; - } - } - -// address.email = address.email.replace(/^[<]+(.*)[>]+$/g, '$1'); - - addresses.push(address); - } - - return addresses; -} - export class EmailModel extends AbstractModel { /** * @param {string=} email = '' diff --git a/dev/Model/EmailCollection.js b/dev/Model/EmailCollection.js index ab2d24aa8..8945fbe97 100644 --- a/dev/Model/EmailCollection.js +++ b/dev/Model/EmailCollection.js @@ -1,6 +1,7 @@ import { AbstractCollectionModel } from 'Model/AbstractCollection'; -import { EmailModel, addressparser } from 'Model/Email'; +import { EmailModel } from 'Model/Email'; import { forEachObjectValue } from 'Common/Utils'; +import { addressparser } from 'Mime/Address'; 'use strict'; diff --git a/dev/View/Popup/Compose.js b/dev/View/Popup/Compose.js index ec4b3381d..2e57a5ae8 100644 --- a/dev/View/Popup/Compose.js +++ b/dev/View/Popup/Compose.js @@ -36,7 +36,8 @@ import { MessagelistUserStore } from 'Stores/User/Messagelist'; import Remote from 'Remote/User/Fetch'; import { ComposeAttachmentModel } from 'Model/ComposeAttachment'; -import { EmailModel, addressparser } from 'Model/Email'; +import { EmailModel } from 'Model/Email'; +import { addressparser } from 'Mime/Address'; import { decorateKoCommands, showScreenPopup } from 'Knoin/Knoin'; import { AbstractViewPopup } from 'Knoin/AbstractViews';