Also fix MIME header parsing in JavaScript, read issue #1403

This commit is contained in:
the-djmaze 2024-02-04 23:03:53 +01:00
parent 0914ede3a3
commit ec9197cb85
8 changed files with 314 additions and 223 deletions

View file

@ -1,5 +1,6 @@
import { doc, createElement, addEventsListeners } from 'Common/Globals';
import { EmailModel, addressparser } from 'Model/Email';
import { EmailModel } from 'Model/Email';
import { addressparser } from 'Mime/Address';
const contentType = 'snappymail/emailaddress',
getAddressKey = li => li?.emailaddress?.key,

197
dev/Mime/Address.js Normal file
View file

@ -0,0 +1,197 @@
import { decodeEncodedWords } from 'Mime/Encoding';
/**
* Parses structured e-mail addresses from an address/mailbox(-list) field
* https://datatracker.ietf.org/doc/html/rfc2822#section-3.4
*
* Example:
*
* "Name <address@domain>"
*
* will be converted to
*
* [{name: "Name", email: "address@domain"}]
*
* @param {String} str Address field
* @return {Array} An array of address objects
*/
export function addressparser(str) {
str = (str || '').toString();
let
endOperator = '',
node = {
type: 'text',
value: ''
},
escaped = false,
address = [],
addresses = [];
const
/*
* Operator tokens and which tokens are expected to end the sequence
*/
OPERATORS = {
'"': '"',
'(': ')',
'<': '>',
',': '',
// Groups are ended by semicolons
':': ';',
// Semicolons are not a legal delimiter per the RFC2822 grammar other
// than for terminating a group, but they are also not valid for any
// other use in this context. Given that some mail clients have
// historically allowed the semicolon as a delimiter equivalent to the
// comma in their UI, it makes sense to treat them the same as a comma
// when used outside of a group.
';': ''
},
pushToken = token => {
token.value = (token.value || '').toString().trim();
token.value.length && address.push(token);
node = {
type: 'text',
value: ''
},
escaped = false;
},
pushAddress = () => {
if (address.length) {
address = _handleAddress(address);
if (address.length) {
addresses = addresses.concat(address);
}
}
address = [];
};
[...str].forEach(chr => {
if (!escaped && (chr === endOperator || (!endOperator && chr in OPERATORS))) {
pushToken(node);
if (',' === chr || ';' === chr) {
pushAddress();
} else {
endOperator = endOperator ? '' : OPERATORS[chr];
if ('<' === chr) {
node.type = 'email';
} else if ('(' === chr) {
node.type = 'comment';
} else if (':' === chr) {
node.type = 'group';
}
}
} else {
node.value += chr;
escaped = !escaped && '\\' === chr;
}
});
pushToken(node);
pushAddress();
return addresses;
}
/**
* Converts tokens for a single address into an address object
*
* @param {Array} tokens Tokens object
* @return {Object} Address object
*/
function _handleAddress(tokens) {
let
isGroup = false,
address = {},
addresses = [],
data = {
email: [],
comment: [],
group: [],
text: []
};
tokens.forEach(token => {
isGroup = isGroup || 'group' === token.type;
data[token.type].push(token.value);
});
// If there is no text but a comment, replace the two
if (!data.text.length && data.comment.length) {
data.text = data.comment;
data.comment = [];
}
if (isGroup) {
// http://tools.ietf.org/html/rfc2822#appendix-A.1.3
/*
addresses.push({
email: '',
name: data.text.join(' ').trim(),
group: addressparser(data.group.join(','))
// ,comment: data.comment.join(' ').trim()
});
*/
addresses = addresses.concat(addressparser(data.group.join(',')));
} else {
// If no address was found, try to detect one from regular text
if (!data.email.length && data.text.length) {
var i = data.text.length;
while (i--) {
if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) {
data.email = data.text.splice(i, 1);
break;
}
}
// still no address
if (!data.email.length) {
i = data.text.length;
while (i--) {
data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, address => {
if (!data.email.length) {
data.email = [address.trim()];
return '';
}
return address.trim();
});
if (data.email.length) {
break;
}
}
}
}
// If there's still no text but a comment exists, replace the two
if (!data.text.length && data.comment.length) {
data.text = data.comment;
data.comment = [];
}
// Keep only the first address occurence, push others to regular text
if (data.email.length > 1) {
data.text = data.text.concat(data.email.splice(1));
}
address = {
// Join values with spaces
email: decodeEncodedWords(data.email.join(' ').trim()),
name: decodeEncodedWords(data.text.join(' ').trim())
// ,comment: data.comment.join(' ').trim()
};
if (address.email === address.name) {
if (address.email.includes('@')) {
address.name = '';
} else {
address.email = '';
}
}
// address.email = address.email.replace(/^[<]+(.*)[>]+$/g, '$1');
addresses.push(address);
}
return addresses;
}

35
dev/Mime/Encoding.js Normal file
View file

@ -0,0 +1,35 @@
const
QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))];
export const
// https://datatracker.ietf.org/doc/html/rfc2045#section-6.8
BDecode = atob,
// unescape(encodeURIComponent()) makes the UTF-16 DOMString to an UTF-8 string
BEncode = data => btoa(unescape(encodeURIComponent(data))),
/* // Without deprecated 'unescape':
BEncode = data => btoa(encodeURIComponent(data).replace(
/%([0-9A-F]{2})/g, (match, p1) => String.fromCharCode('0x' + p1)
)),
*/
// https://datatracker.ietf.org/doc/html/rfc2045#section-6.7
QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams),
// https://datatracker.ietf.org/doc/html/rfc2047#section-4.1
// https://datatracker.ietf.org/doc/html/rfc2047#section-4.2
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
decodeEncodedWords = data =>
data.replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) =>
decodeText(charset, 'B' == encoding ? BDecode(text) : QPDecode(text))
)
,
decodeText = (charset, data) => {
try {
// https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0)));
} catch (e) {
console.error({charset:charset,error:e});
}
};

View file

@ -1,17 +1,5 @@
//import { b64Encode } from 'Common/Utils';
const
// RFC2045
QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))],
QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams),
decodeText = (charset, data) => {
try {
// https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0)));
} catch (e) {
console.error({charset:charset,error:e});
}
};
import { decodeEncodedWords, BDecode, BEncode, QPDecode, decodeText } from 'Mime/Encoding';
import { addressparser } from 'Mime/Address';
export function ParseMime(text)
{
@ -27,7 +15,49 @@ export function ParseMime(text)
this.bodyEnd = 0;
this.boundary = '';
this.bodyText = '';
this.headers = {};
// https://datatracker.ietf.org/doc/html/rfc2822#section-3.6
this.headers = {
// Required
date = null,
from = [], // mailbox-list
// Optional
sender = [], // MUST occur with multi-address
'reply-to' = [], // address-list
to = [], // address-list
cc = [], // address-list
bcc = [], // address-list
'message-id' = '', // msg-id SHOULD be present
'in-reply-to' = '', // 1*msg-id SHOULD occur in some replies
references = '', // 1*msg-id SHOULD occur in some replies
subject = '', // unstructured
// Optional unlimited
comments = [], // unstructured
keywords = [], // phrase *("," phrase)
// https://datatracker.ietf.org/doc/html/rfc2822#section-3.6.6
trace = [],
'resent-date' = [],
'resent-from' = [],
'resent-sender' = [],
'resent-to' = [],
'resent-cc' = [],
'resent-bcc' = [],
'resent-msg-id' = [],
// optional others outside RFC2822
'mime-version' = '',
'content-transfer-encoding' = '',
'content-type' = '',
'delivered-to' = '', // angle-addr
'return-path' = '', // angle-addr
'received' = [],
'authentication-results' = '', // dkim, spf, dmarc
'dkim-signature' = '',
'x-rspamd-queue-id' = '',
'x-rspamd-action' = '',
'x-spamd-bar' = '',
'x-rspamd-server' = '',
'x-spamd-result' = '',
'x-remote-address' = '',
};
}
*/
@ -54,7 +84,7 @@ export function ParseMime(text)
if ('quoted-printable' == encoding) {
body = QPDecode(body);
} else if ('base64' == encoding) {
body = atob(body.replace(/\r?\n/g, ''));
body = BDecode(body.replace(/\r?\n/g, ''));
}
return decodeText(charset, body);
}
@ -68,8 +98,7 @@ export function ParseMime(text)
if ('quoted-printable' == encoding) {
body = QPDecode(body);
}
body = btoa(body);
// body = b64Encode(body);
body = BEncode(body);
}
return 'data:' + this.headerValue('content-type') + ';base64,' + body;
}
@ -92,6 +121,9 @@ export function ParseMime(text)
}
}
// mailbox-list or address-list
const lists = ['from','reply-to','to','cc','bcc'];
const ParsePart = (mimePart, start_pos = 0, id = '') =>
{
let part = new MimePart,
@ -113,11 +145,19 @@ export function ParseMime(text)
[...header.matchAll(/;\s*([^;=]+)=\s*"?([^;"]+)"?/g)].forEach(param =>
params[param[1].trim().toLowerCase()] = param[2].trim()
);
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
match[2] = match[2].trim().replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) =>
decodeText(charset, 'B' == encoding ? atob(text) : QPDecode(text))
);
headers[match[1].trim().toLowerCase()] = {
let field = match[1].trim().toLowerCase();
if (lists.includes(field)) {
match[2] = addressparser(match[2]);
} else if ('keywords' === field) {
match[2] = match[2].split(',').forEach(entry => decodeEncodedWords(entry.trim()));
match[2] = (headers[field]?.value || []).concat(match[2]);
} else {
match[2] = decodeEncodedWords(match[2].trim());
if ('comments' === field) {
match[2] = (headers[field]?.value || []).push(match[2]);
}
}
headers[field] = {
value: match[2],
params: params
};

View file

@ -4,6 +4,8 @@ import { AttachmentModel } from 'Model/Attachment';
import { FileInfo } from 'Common/File';
import { BEGIN_PGP_MESSAGE } from 'Stores/User/Pgp';
import { EmailModel } from 'Model/Email';
/**
* @param string data
* @param MessageModel message
@ -20,7 +22,16 @@ export function MimeToMessage(data, message)
subject && message.subject(subject);
// EmailCollectionModel
['from','to'].forEach(name => message[name].fromString(struct.headerValue(name)));
['from','to'].forEach(name => {
const items = message[name];
struct.headerValue(name)?.forEach(item => {
item = new EmailModel(item.email, item.name);
// Make them unique
if (item.email && item.name || !items.find(address => address.email == item.email)) {
items.push(item);
}
});
});
struct.forEach(part => {
let cd = part.header('content-disposition'),
@ -66,6 +77,7 @@ export function MimeToMessage(data, message)
const text = struct.getByContentType('text/plain');
message.plain(text ? text.body : '');
message.html(html);
console.dir({message});
} else {
message.plain(data);
}

View file

@ -4,202 +4,6 @@ import { AbstractModel } from 'Knoin/AbstractModel';
'use strict';
/**
* Parses structured e-mail addresses from an address field
*
* Example:
*
* "Name <address@domain>"
*
* will be converted to
*
* [{name: "Name", address: "address@domain"}]
*
* @param {String} str Address field
* @return {Array} An array of address objects
*/
export function addressparser(str) {
str = (str || '').toString();
let
endOperator = '',
node = {
type: 'text',
value: ''
},
escaped = false,
address = [],
addresses = [];
const
/*
* Operator tokens and which tokens are expected to end the sequence
*/
OPERATORS = {
'"': '"',
'(': ')',
'<': '>',
',': '',
// Groups are ended by semicolons
':': ';',
// Semicolons are not a legal delimiter per the RFC2822 grammar other
// than for terminating a group, but they are also not valid for any
// other use in this context. Given that some mail clients have
// historically allowed the semicolon as a delimiter equivalent to the
// comma in their UI, it makes sense to treat them the same as a comma
// when used outside of a group.
';': ''
},
pushToken = token => {
token.value = (token.value || '').toString().trim();
token.value.length && address.push(token);
node = {
type: 'text',
value: ''
},
escaped = false;
},
pushAddress = () => {
if (address.length) {
address = _handleAddress(address);
if (address.length) {
addresses = addresses.concat(address);
}
}
address = [];
};
[...str].forEach(chr => {
if (!escaped && (chr === endOperator || (!endOperator && chr in OPERATORS))) {
pushToken(node);
if (',' === chr || ';' === chr) {
pushAddress();
} else {
endOperator = endOperator ? '' : OPERATORS[chr];
if ('<' === chr) {
node.type = 'email';
} else if ('(' === chr) {
node.type = 'comment';
} else if (':' === chr) {
node.type = 'group';
}
}
} else {
node.value += chr;
escaped = !escaped && '\\' === chr;
}
});
pushToken(node);
pushAddress();
return addresses;
// return addresses.map(item => (item.name || item.email) ? new EmailModel(item.email, item.name) : null).filter(v => v);
}
/**
* Converts tokens for a single address into an address object
*
* @param {Array} tokens Tokens object
* @return {Object} Address object
*/
function _handleAddress(tokens) {
let
isGroup = false,
address = {},
addresses = [],
data = {
email: [],
comment: [],
group: [],
text: []
};
tokens.forEach(token => {
isGroup = isGroup || 'group' === token.type;
data[token.type].push(token.value);
});
// If there is no text but a comment, replace the two
if (!data.text.length && data.comment.length) {
data.text = data.comment;
data.comment = [];
}
if (isGroup) {
// http://tools.ietf.org/html/rfc2822#appendix-A.1.3
/*
addresses.push({
email: '',
name: data.text.join(' ').trim(),
group: addressparser(data.group.join(','))
// ,comment: data.comment.join(' ').trim()
});
*/
addresses = addresses.concat(addressparser(data.group.join(',')));
} else {
// If no address was found, try to detect one from regular text
if (!data.email.length && data.text.length) {
var i = data.text.length;
while (i--) {
if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) {
data.email = data.text.splice(i, 1);
break;
}
}
// still no address
if (!data.email.length) {
i = data.text.length;
while (i--) {
data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, address => {
if (!data.email.length) {
data.email = [address.trim()];
return '';
}
return address.trim();
});
if (data.email.length) {
break;
}
}
}
}
// If there's still no text but a comment exists, replace the two
if (!data.text.length && data.comment.length) {
data.text = data.comment;
data.comment = [];
}
// Keep only the first address occurence, push others to regular text
if (data.email.length > 1) {
data.text = data.text.concat(data.email.splice(1));
}
address = {
// Join values with spaces
email: data.email.join(' ').trim(),
name: data.text.join(' ').trim()
// ,comment: data.comment.join(' ').trim()
};
if (address.email === address.name) {
if (address.email.includes('@')) {
address.name = '';
} else {
address.email = '';
}
}
// address.email = address.email.replace(/^[<]+(.*)[>]+$/g, '$1');
addresses.push(address);
}
return addresses;
}
export class EmailModel extends AbstractModel {
/**
* @param {string=} email = ''

View file

@ -1,6 +1,7 @@
import { AbstractCollectionModel } from 'Model/AbstractCollection';
import { EmailModel, addressparser } from 'Model/Email';
import { EmailModel } from 'Model/Email';
import { forEachObjectValue } from 'Common/Utils';
import { addressparser } from 'Mime/Address';
'use strict';

View file

@ -36,7 +36,8 @@ import { MessagelistUserStore } from 'Stores/User/Messagelist';
import Remote from 'Remote/User/Fetch';
import { ComposeAttachmentModel } from 'Model/ComposeAttachment';
import { EmailModel, addressparser } from 'Model/Email';
import { EmailModel } from 'Model/Email';
import { addressparser } from 'Mime/Address';
import { decorateKoCommands, showScreenPopup } from 'Knoin/Knoin';
import { AbstractViewPopup } from 'Knoin/AbstractViews';