From ec9197cb859c9627084dbb1d760c71d85cbdd9db Mon Sep 17 00:00:00 2001
From: the-djmaze <>
Date: Sun, 4 Feb 2024 23:03:53 +0100
Subject: [PATCH] Also fix MIME header parsing in JavaScript, read issue #1403

---
 dev/Component/EmailAddresses.js |   3 +-
 dev/Mime/Address.js             | 197 ++++++++++++++++++++++++++++++++
 dev/Mime/Encoding.js            |  35 ++++++
 dev/Mime/Parser.js              |  86 ++++++++++----
 dev/Mime/Utils.js               |  14 ++-
 dev/Model/Email.js              | 196 -------------------------------
 dev/Model/EmailCollection.js    |   3 +-
 dev/View/Popup/Compose.js       |   3 +-
 8 files changed, 314 insertions(+), 223 deletions(-)
 create mode 100644 dev/Mime/Address.js
 create mode 100644 dev/Mime/Encoding.js

diff --git a/dev/Component/EmailAddresses.js b/dev/Component/EmailAddresses.js
index 1f7646c74..c4c6e364f 100644
--- a/dev/Component/EmailAddresses.js
+++ b/dev/Component/EmailAddresses.js
@@ -1,5 +1,6 @@
 import { doc, createElement, addEventsListeners } from 'Common/Globals';
-import { EmailModel, addressparser } from 'Model/Email';
+import { EmailModel } from 'Model/Email';
+import { addressparser } from 'Mime/Address';
 
 const contentType = 'snappymail/emailaddress',
 	getAddressKey = li => li?.emailaddress?.key,
diff --git a/dev/Mime/Address.js b/dev/Mime/Address.js
new file mode 100644
index 000000000..5e9ae887d
--- /dev/null
+++ b/dev/Mime/Address.js
@@ -0,0 +1,197 @@
+import { decodeEncodedWords } from 'Mime/Encoding';
+
+/**
+ * Parses structured e-mail addresses from an address/mailbox(-list) field
+ * https://datatracker.ietf.org/doc/html/rfc2822#section-3.4
+ *
+ * Example:
+ *
+ *    "Name <address@domain>"
+ *
+ * will be converted to
+ *
+ *     [{name: "Name", email: "address@domain"}]
+ *
+ * @param {String} str Address field
+ * @return {Array} An array of address objects
+ */
+export function addressparser(str) {
+	str = (str || '').toString();
+
+	let
+		endOperator = '',
+		node = {
+			type: 'text',
+			value: ''
+		},
+		escaped = false,
+		address = [],
+		addresses = [];
+
+	const
+		/*
+		 * Operator tokens and which tokens are expected to end the sequence
+		 */
+		OPERATORS = {
+		  '"': '"',
+		  '(': ')',
+		  '<': '>',
+		  ',': '',
+		  // Groups are ended by semicolons
+		  ':': ';',
+		  // Semicolons are not a legal delimiter per the RFC2822 grammar other
+		  // than for terminating a group, but they are also not valid for any
+		  // other use in this context.  Given that some mail clients have
+		  // historically allowed the semicolon as a delimiter equivalent to the
+		  // comma in their UI, it makes sense to treat them the same as a comma
+		  // when used outside of a group.
+		  ';': ''
+		},
+		pushToken = token => {
+			token.value = (token.value || '').toString().trim();
+			token.value.length && address.push(token);
+			node = {
+				type: 'text',
+				value: ''
+			},
+			escaped = false;
+		},
+		pushAddress = () => {
+			if (address.length) {
+				address = _handleAddress(address);
+				if (address.length) {
+					addresses = addresses.concat(address);
+				}
+			}
+			address = [];
+		};
+
+	[...str].forEach(chr => {
+		if (!escaped && (chr === endOperator || (!endOperator && chr in OPERATORS))) {
+			pushToken(node);
+			if (',' === chr || ';' === chr) {
+				pushAddress();
+			} else {
+				endOperator = endOperator ? '' : OPERATORS[chr];
+				if ('<' === chr) {
+					node.type = 'email';
+				} else if ('(' === chr) {
+					node.type = 'comment';
+				} else if (':' === chr) {
+					node.type = 'group';
+				}
+			}
+		} else {
+			node.value += chr;
+			escaped = !escaped && '\\' === chr;
+		}
+	});
+	pushToken(node);
+
+	pushAddress();
+
+	return addresses;
+}
+
+/**
+ * Converts tokens for a single address into an address object
+ *
+ * @param {Array} tokens Tokens object
+ * @return {Object} Address object
+ */
+function _handleAddress(tokens) {
+	let
+		isGroup = false,
+		address = {},
+		addresses = [],
+		data = {
+			email: [],
+			comment: [],
+			group: [],
+			text: []
+		};
+
+	tokens.forEach(token => {
+		isGroup = isGroup || 'group' === token.type;
+		data[token.type].push(token.value);
+	});
+
+	// If there is no text but a comment, replace the two
+	if (!data.text.length && data.comment.length) {
+		data.text = data.comment;
+		data.comment = [];
+	}
+
+	if (isGroup) {
+		// http://tools.ietf.org/html/rfc2822#appendix-A.1.3
+/*
+		addresses.push({
+			email: '',
+			name: data.text.join(' ').trim(),
+			group: addressparser(data.group.join(','))
+//			,comment: data.comment.join(' ').trim()
+		});
+*/
+		addresses = addresses.concat(addressparser(data.group.join(',')));
+	} else {
+		// If no address was found, try to detect one from regular text
+		if (!data.email.length && data.text.length) {
+			var i = data.text.length;
+			while (i--) {
+				if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) {
+					data.email = data.text.splice(i, 1);
+					break;
+				}
+			}
+
+			// still no address
+			if (!data.email.length) {
+				i = data.text.length;
+				while (i--) {
+					data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, address => {
+						if (!data.email.length) {
+							data.email = [address.trim()];
+							return '';
+						}
+						return address.trim();
+					});
+					if (data.email.length) {
+						break;
+					}
+				}
+			}
+		}
+
+		// If there's still no text but a comment exists, replace the two
+		if (!data.text.length && data.comment.length) {
+			data.text = data.comment;
+			data.comment = [];
+		}
+
+		// Keep only the first address occurence, push others to regular text
+		if (data.email.length > 1) {
+			data.text = data.text.concat(data.email.splice(1));
+		}
+
+		address = {
+			// Join values with spaces
+			email: decodeEncodedWords(data.email.join(' ').trim()),
+			name: decodeEncodedWords(data.text.join(' ').trim())
+//			,comment: data.comment.join(' ').trim()
+		};
+
+		if (address.email === address.name) {
+			if (address.email.includes('@')) {
+				address.name = '';
+			} else {
+				address.email = '';
+			}
+		}
+
+//		address.email = address.email.replace(/^[<]+(.*)[>]+$/g, '$1');
+
+		addresses.push(address);
+	}
+
+	return addresses;
+}
diff --git a/dev/Mime/Encoding.js b/dev/Mime/Encoding.js
new file mode 100644
index 000000000..6f9f8fcd0
--- /dev/null
+++ b/dev/Mime/Encoding.js
@@ -0,0 +1,35 @@
+const
+	QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))];
+
+export const
+	// https://datatracker.ietf.org/doc/html/rfc2045#section-6.8
+	BDecode = atob,
+
+	// unescape(encodeURIComponent()) makes the UTF-16 DOMString to an UTF-8 string
+	BEncode = data => btoa(unescape(encodeURIComponent(data))),
+/* 	// Without deprecated 'unescape':
+	BEncode = data => btoa(encodeURIComponent(data).replace(
+		/%([0-9A-F]{2})/g, (match, p1) => String.fromCharCode('0x' + p1)
+	)),
+*/
+
+	// https://datatracker.ietf.org/doc/html/rfc2045#section-6.7
+	QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams),
+
+	// https://datatracker.ietf.org/doc/html/rfc2047#section-4.1
+	// https://datatracker.ietf.org/doc/html/rfc2047#section-4.2
+	// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
+	decodeEncodedWords = data =>
+		data.replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) =>
+			decodeText(charset, 'B' == encoding ? BDecode(text) : QPDecode(text))
+		)
+	,
+
+	decodeText = (charset, data) => {
+		try {
+			// https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
+			return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0)));
+		} catch (e) {
+			console.error({charset:charset,error:e});
+		}
+	};
diff --git a/dev/Mime/Parser.js b/dev/Mime/Parser.js
index 9ce19775b..ce9684351 100644
--- a/dev/Mime/Parser.js
+++ b/dev/Mime/Parser.js
@@ -1,17 +1,5 @@
-//import { b64Encode } from 'Common/Utils';
-
-const
-	// RFC2045
-	QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))],
-	QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams),
-	decodeText = (charset, data) => {
-		try {
-			// https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
-			return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0)));
-		} catch (e) {
-			console.error({charset:charset,error:e});
-		}
-	};
+import { decodeEncodedWords, BDecode, BEncode, QPDecode, decodeText } from 'Mime/Encoding';
+import { addressparser } from 'Mime/Address';
 
 export function ParseMime(text)
 {
@@ -27,7 +15,49 @@ export function ParseMime(text)
 			this.bodyEnd = 0;
 			this.boundary = '';
 			this.bodyText = '';
-			this.headers = {};
+			// https://datatracker.ietf.org/doc/html/rfc2822#section-3.6
+			this.headers = {
+				// Required
+				date = null,
+				from = [], // mailbox-list
+				// Optional
+				sender          = [], // MUST occur with multi-address
+				'reply-to'      = [], // address-list
+				to              = [], // address-list
+				cc              = [], // address-list
+				bcc             = [], // address-list
+				'message-id'    = '', // msg-id SHOULD be present
+				'in-reply-to'   = '', // 1*msg-id SHOULD occur in some replies
+				references      = '', // 1*msg-id SHOULD occur in some replies
+				subject         = '', // unstructured
+				// Optional unlimited
+				comments        = [], // unstructured
+				keywords        = [], // phrase *("," phrase)
+				// https://datatracker.ietf.org/doc/html/rfc2822#section-3.6.6
+				trace           = [],
+				'resent-date'   = [],
+				'resent-from'   = [],
+				'resent-sender' = [],
+				'resent-to'     = [],
+				'resent-cc'     = [],
+				'resent-bcc'    = [],
+				'resent-msg-id' = [],
+				// optional others outside RFC2822
+				'mime-version'              = '',
+				'content-transfer-encoding' = '',
+				'content-type'              = '',
+				'delivered-to'              = '', // angle-addr
+				'return-path'               = '', // angle-addr
+				'received'                  = [],
+				'authentication-results'    = '', // dkim, spf, dmarc
+				'dkim-signature'            = '',
+				'x-rspamd-queue-id'         = '',
+				'x-rspamd-action'           = '',
+				'x-spamd-bar'               = '',
+				'x-rspamd-server'           = '',
+				'x-spamd-result'            = '',
+				'x-remote-address'          = '',
+			};
 		}
 */
 
@@ -54,7 +84,7 @@ export function ParseMime(text)
 			if ('quoted-printable' == encoding) {
 				body = QPDecode(body);
 			} else if ('base64' == encoding) {
-				body = atob(body.replace(/\r?\n/g, ''));
+				body = BDecode(body.replace(/\r?\n/g, ''));
 			}
 			return decodeText(charset, body);
 		}
@@ -68,8 +98,7 @@ export function ParseMime(text)
 				if ('quoted-printable' == encoding) {
 					body = QPDecode(body);
 				}
-				body = btoa(body);
-//				body = b64Encode(body);
+				body = BEncode(body);
 			}
 			return 'data:' + this.headerValue('content-type') + ';base64,' + body;
 		}
@@ -92,6 +121,9 @@ export function ParseMime(text)
 		}
 	}
 
+	// mailbox-list or address-list
+	const lists = ['from','reply-to','to','cc','bcc'];
+
 	const ParsePart = (mimePart, start_pos = 0, id = '') =>
 	{
 		let part = new MimePart,
@@ -113,11 +145,19 @@ export function ParseMime(text)
 					[...header.matchAll(/;\s*([^;=]+)=\s*"?([^;"]+)"?/g)].forEach(param =>
 						params[param[1].trim().toLowerCase()] = param[2].trim()
 					);
-					// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
-					match[2] = match[2].trim().replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) =>
-						decodeText(charset, 'B' == encoding ? atob(text) : QPDecode(text))
-					);
-					headers[match[1].trim().toLowerCase()] = {
+					let field = match[1].trim().toLowerCase();
+					if (lists.includes(field)) {
+						match[2] = addressparser(match[2]);
+					} else if ('keywords' === field) {
+						match[2] = match[2].split(',').forEach(entry => decodeEncodedWords(entry.trim()));
+						match[2] = (headers[field]?.value || []).concat(match[2]);
+					} else {
+						match[2] = decodeEncodedWords(match[2].trim());
+						if ('comments' === field) {
+							match[2] = (headers[field]?.value || []).push(match[2]);
+						}
+					}
+					headers[field] = {
 						value: match[2],
 						params: params
 					};
diff --git a/dev/Mime/Utils.js b/dev/Mime/Utils.js
index c399e7c8c..0da0dc7ad 100644
--- a/dev/Mime/Utils.js
+++ b/dev/Mime/Utils.js
@@ -4,6 +4,8 @@ import { AttachmentModel } from 'Model/Attachment';
 import { FileInfo } from 'Common/File';
 import { BEGIN_PGP_MESSAGE } from 'Stores/User/Pgp';
 
+import { EmailModel } from 'Model/Email';
+
 /**
  * @param string data
  * @param MessageModel message
@@ -20,7 +22,16 @@ export function MimeToMessage(data, message)
 		subject && message.subject(subject);
 
 		// EmailCollectionModel
-		['from','to'].forEach(name => message[name].fromString(struct.headerValue(name)));
+		['from','to'].forEach(name => {
+			const items = message[name];
+			struct.headerValue(name)?.forEach(item => {
+				item = new EmailModel(item.email, item.name);
+				// Make them unique
+				if (item.email && item.name || !items.find(address => address.email == item.email)) {
+					items.push(item);
+				}
+			});
+		});
 
 		struct.forEach(part => {
 			let cd = part.header('content-disposition'),
@@ -66,6 +77,7 @@ export function MimeToMessage(data, message)
 		const text = struct.getByContentType('text/plain');
 		message.plain(text ? text.body : '');
 		message.html(html);
+console.dir({message});
 	} else {
 		message.plain(data);
 	}
diff --git a/dev/Model/Email.js b/dev/Model/Email.js
index 4a4a15c97..077aab37b 100644
--- a/dev/Model/Email.js
+++ b/dev/Model/Email.js
@@ -4,202 +4,6 @@ import { AbstractModel } from 'Knoin/AbstractModel';
 
 'use strict';
 
-/**
- * Parses structured e-mail addresses from an address field
- *
- * Example:
- *
- *    "Name <address@domain>"
- *
- * will be converted to
- *
- *     [{name: "Name", address: "address@domain"}]
- *
- * @param {String} str Address field
- * @return {Array} An array of address objects
- */
-export function addressparser(str) {
-	str = (str || '').toString();
-
-	let
-		endOperator = '',
-		node = {
-			type: 'text',
-			value: ''
-		},
-		escaped = false,
-		address = [],
-		addresses = [];
-
-	const
-		/*
-		 * Operator tokens and which tokens are expected to end the sequence
-		 */
-		OPERATORS = {
-		  '"': '"',
-		  '(': ')',
-		  '<': '>',
-		  ',': '',
-		  // Groups are ended by semicolons
-		  ':': ';',
-		  // Semicolons are not a legal delimiter per the RFC2822 grammar other
-		  // than for terminating a group, but they are also not valid for any
-		  // other use in this context.  Given that some mail clients have
-		  // historically allowed the semicolon as a delimiter equivalent to the
-		  // comma in their UI, it makes sense to treat them the same as a comma
-		  // when used outside of a group.
-		  ';': ''
-		},
-		pushToken = token => {
-			token.value = (token.value || '').toString().trim();
-			token.value.length && address.push(token);
-			node = {
-				type: 'text',
-				value: ''
-			},
-			escaped = false;
-		},
-		pushAddress = () => {
-			if (address.length) {
-				address = _handleAddress(address);
-				if (address.length) {
-					addresses = addresses.concat(address);
-				}
-			}
-			address = [];
-		};
-
-	[...str].forEach(chr => {
-		if (!escaped && (chr === endOperator || (!endOperator && chr in OPERATORS))) {
-			pushToken(node);
-			if (',' === chr || ';' === chr) {
-				pushAddress();
-			} else {
-				endOperator = endOperator ? '' : OPERATORS[chr];
-				if ('<' === chr) {
-					node.type = 'email';
-				} else if ('(' === chr) {
-					node.type = 'comment';
-				} else if (':' === chr) {
-					node.type = 'group';
-				}
-			}
-		} else {
-			node.value += chr;
-			escaped = !escaped && '\\' === chr;
-		}
-	});
-	pushToken(node);
-
-	pushAddress();
-
-	return addresses;
-//	return addresses.map(item => (item.name || item.email) ? new EmailModel(item.email, item.name) : null).filter(v => v);
-}
-
-/**
- * Converts tokens for a single address into an address object
- *
- * @param {Array} tokens Tokens object
- * @return {Object} Address object
- */
-function _handleAddress(tokens) {
-	let
-		isGroup = false,
-		address = {},
-		addresses = [],
-		data = {
-			email: [],
-			comment: [],
-			group: [],
-			text: []
-		};
-
-	tokens.forEach(token => {
-		isGroup = isGroup || 'group' === token.type;
-		data[token.type].push(token.value);
-	});
-
-	// If there is no text but a comment, replace the two
-	if (!data.text.length && data.comment.length) {
-		data.text = data.comment;
-		data.comment = [];
-	}
-
-	if (isGroup) {
-		// http://tools.ietf.org/html/rfc2822#appendix-A.1.3
-/*
-		addresses.push({
-			email: '',
-			name: data.text.join(' ').trim(),
-			group: addressparser(data.group.join(','))
-//			,comment: data.comment.join(' ').trim()
-		});
-*/
-		addresses = addresses.concat(addressparser(data.group.join(',')));
-	} else {
-		// If no address was found, try to detect one from regular text
-		if (!data.email.length && data.text.length) {
-			var i = data.text.length;
-			while (i--) {
-				if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) {
-					data.email = data.text.splice(i, 1);
-					break;
-				}
-			}
-
-			// still no address
-			if (!data.email.length) {
-				i = data.text.length;
-				while (i--) {
-					data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, address => {
-						if (!data.email.length) {
-							data.email = [address.trim()];
-							return '';
-						}
-						return address.trim();
-					});
-					if (data.email.length) {
-						break;
-					}
-				}
-			}
-		}
-
-		// If there's still no text but a comment exists, replace the two
-		if (!data.text.length && data.comment.length) {
-			data.text = data.comment;
-			data.comment = [];
-		}
-
-		// Keep only the first address occurence, push others to regular text
-		if (data.email.length > 1) {
-			data.text = data.text.concat(data.email.splice(1));
-		}
-
-		address = {
-			// Join values with spaces
-			email: data.email.join(' ').trim(),
-			name: data.text.join(' ').trim()
-//			,comment: data.comment.join(' ').trim()
-		};
-
-		if (address.email === address.name) {
-			if (address.email.includes('@')) {
-				address.name = '';
-			} else {
-				address.email = '';
-			}
-		}
-
-//		address.email = address.email.replace(/^[<]+(.*)[>]+$/g, '$1');
-
-		addresses.push(address);
-	}
-
-	return addresses;
-}
-
 export class EmailModel extends AbstractModel {
 	/**
 	 * @param {string=} email = ''
diff --git a/dev/Model/EmailCollection.js b/dev/Model/EmailCollection.js
index ab2d24aa8..8945fbe97 100644
--- a/dev/Model/EmailCollection.js
+++ b/dev/Model/EmailCollection.js
@@ -1,6 +1,7 @@
 import { AbstractCollectionModel } from 'Model/AbstractCollection';
-import { EmailModel, addressparser } from 'Model/Email';
+import { EmailModel } from 'Model/Email';
 import { forEachObjectValue } from 'Common/Utils';
+import { addressparser } from 'Mime/Address';
 
 'use strict';
 
diff --git a/dev/View/Popup/Compose.js b/dev/View/Popup/Compose.js
index ec4b3381d..2e57a5ae8 100644
--- a/dev/View/Popup/Compose.js
+++ b/dev/View/Popup/Compose.js
@@ -36,7 +36,8 @@ import { MessagelistUserStore } from 'Stores/User/Messagelist';
 import Remote from 'Remote/User/Fetch';
 
 import { ComposeAttachmentModel } from 'Model/ComposeAttachment';
-import { EmailModel, addressparser } from 'Model/Email';
+import { EmailModel } from 'Model/Email';
+import { addressparser } from 'Mime/Address';
 
 import { decorateKoCommands, showScreenPopup } from 'Knoin/Knoin';
 import { AbstractViewPopup } from 'Knoin/AbstractViews';