Mailspring/packages/local-sync/src/shared/message-factory.js

357 lines
13 KiB
JavaScript
Raw Normal View History

/* eslint no-useless-escape: 0 */
const mimelib = require('mimelib');
const encoding = require('encoding');
const he = require('he');
const os = require('os');
const fs = require('fs');
const path = require('path');
const mkdirp = require('mkdirp');
2016-12-30 02:35:24 +08:00
const {Errors: {APIError}} = require('isomorphic-core');
const {N1CloudAPI, RegExpUtils, Utils} = require('nylas-exports');
// Aiming for the former in length, but the latter is the hard db cutoff
const SNIPPET_SIZE = 100;
const SNIPPET_MAX_SIZE = 255;
// The input is the value of a to/cc/bcc/from header as parsed by the imap
// library we're using, but it currently parses them in a weird format. If an
// email is sent to a@example.com and b@example.com, the parsed output of the
// 'to' header is ['a@example.com, b@example.com']. (Note both emails are in
// the same string.) When fixed, this function will need to update accordingly.
function extractContacts(input) {
if (!input || input.length === 0 || !input[0]) {
return [];
}
const values = mimelib.parseAddresses(input[0]);
if (!values || values.length === 0 || !input[0]) {
return [];
}
return values.map(v => {
if (!v || v.length === 0) {
return null
}
const {name, address: email} = v;
// contacts without an email address are worthless, especially when
// extracted from emails
if (!email) {
return null;
}
return {name, email}
})
.filter(c => c != null)
}
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
function extractSnippet(body) {
const doc = new DOMParser().parseFromString(body, 'text/html')
const skipTags = new Set(['TITLE', 'SCRIPT', 'STYLE', 'IMG']);
const noSpaceTags = new Set(['B', 'I', 'STRONG', 'EM', 'SPAN']);
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
const treeWalker = document.createTreeWalker(doc, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT, (node) => {
if (skipTags.has(node.tagName)) {
// skip this node and all its children
return NodeFilter.FILTER_REJECT;
}
if (node.nodeType === Node.TEXT_NODE) {
const nodeValue = node.nodeValue ? node.nodeValue.trim() : null;
if (nodeValue) {
return NodeFilter.FILTER_ACCEPT;
}
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
return NodeFilter.FILTER_SKIP;
}
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
return NodeFilter.FILTER_ACCEPT;
});
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
let extractedText = "";
let lastNodeTag = "";
while (treeWalker.nextNode()) {
if (treeWalker.currentNode.nodeType === Node.ELEMENT_NODE) {
lastNodeTag = treeWalker.currentNode.nodeName;
} else {
if (extractedText && !noSpaceTags.has(lastNodeTag)) {
extractedText += " ";
}
extractedText += treeWalker.currentNode.nodeValue;
if (extractedText.length > SNIPPET_MAX_SIZE) {
break;
}
}
}
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
const snippetText = extractedText.trim();
// clean up and trim snippet
let trimmed = snippetText.replace(/[\n\r]/g, ' ').replace(/\s\s+/g, ' ').substr(0, SNIPPET_MAX_SIZE);
if (trimmed) {
// TODO: strip quoted text from snippets also
// trim down to approx. SNIPPET_SIZE w/out cutting off words right in the
// middle (if possible)
const wordBreak = trimmed.indexOf(' ', SNIPPET_SIZE);
if (wordBreak !== -1) {
trimmed = trimmed.substr(0, wordBreak);
}
}
return trimmed;
}
// Preserve whitespacing on plaintext emails -- has the side effect of
// monospacing, but that seems OK and perhaps sometimes even desired (for e.g.
// ascii art, alignment)
function htmlifyPlaintext(text) {
const escapedText = he.escape(text);
return `<pre class="nylas-plaintext">${escapedText}</pre>`;
}
function replaceMessageIdInBodyTrackingLinks(messageId, originalBody) {
const regex = new RegExp(`(${N1CloudAPI.APIRoot}.+?)MESSAGE_ID`, 'g')
return originalBody.replace(regex, `$1${messageId}`)
}
function stripTrackingLinksFromBody(originalBody) {
let body = originalBody.replace(/<img class="n1-open"[^<]+src="([a-zA-Z0-9-_:/.]*)">/g, () => {
return "";
});
body = body.replace(RegExpUtils.urlLinkTagRegex(), (match, prefix, url, suffix, content, closingTag) => {
const param = url.split("?")[1];
if (param) {
const link = decodeURIComponent(param.split("=")[1]);
return `${prefix}${link}${suffix}${content}${closingTag}`;
}
return match;
});
return body;
}
function buildTrackingBodyForRecipient({baseMessage, recipient, usesOpenTracking, usesLinkTracking} = {}) {
const {id: messageId, body} = baseMessage
const encodedEmail = btoa(recipient.email)
.replace(/\+/g, '-')
.replace(/\//g, '_');
let customBody = body
if (usesOpenTracking) {
customBody = customBody.replace(/<img class="n1-open"[^<]+src="([a-zA-Z0-9-_:/.]*)">/g, (match, url) => {
return `<img class="n1-open" width="0" height="0" style="border:0; width:0; height:0;" src="${url}?r=${encodedEmail}">`;
});
}
if (usesLinkTracking) {
customBody = customBody.replace(RegExpUtils.urlLinkTagRegex(), (match, prefix, url, suffix, content, closingTag) => {
return `${prefix}${url}&r=${encodedEmail}${suffix}${content}${closingTag}`;
});
}
return replaceMessageIdInBodyTrackingLinks(messageId, customBody);
}
function getReplyHeaders(messageReplyingTo) {
let inReplyTo;
let references;
if (messageReplyingTo.headerMessageId) {
inReplyTo = messageReplyingTo.headerMessageId;
if (messageReplyingTo.references) {
references = messageReplyingTo.references.concat(messageReplyingTo.headerMessageId);
} else {
references = [messageReplyingTo.headerMessageId];
}
}
return {inReplyTo, references}
}
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
function bodyFromParts(imapMessage, desiredParts) {
let body = '';
for (const {id, mimeType, transferEncoding, charset} of desiredParts) {
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
let decoded = '';
// see https://www.w3.org/Protocols/rfc1341/5_Content-Transfer-Encoding.html
if (!transferEncoding || new Set(['7bit', '8bit', 'binary']).has(transferEncoding.toLowerCase())) {
// NO transfer encoding has been performed --- how to decode to a string
// depends ONLY on the charset, which defaults to 'ascii' according to
// https://tools.ietf.org/html/rfc2045#section-5.2
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
decoded = encoding.convert(imapMessage.parts[id], 'utf-8', charset || 'ascii').toString('utf-8');
} else if (transferEncoding.toLowerCase() === 'quoted-printable') {
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
decoded = mimelib.decodeQuotedPrintable(imapMessage.parts[id], charset || 'ascii');
} else if (transferEncoding.toLowerCase() === 'base64') {
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
decoded = mimelib.decodeBase64(imapMessage.parts[id], charset || 'ascii');
} else {
2016-12-17 03:06:04 +08:00
// custom x-token content-transfer-encodings
return Promise.reject(new Error(`Unsupported Content-Transfer-Encoding ${transferEncoding}, mimetype ${mimeType}`))
}
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
// desiredParts are in order of the MIME tree walk, e.g. 1.1, 1.2, 2...,
// and for multipart/alternative arrays, we have already pulled out the
// highest fidelity part (generally HTML).
//
// Therefore, the correct way to display multiple parts is to simply
// concatenate later ones with the body of the previous MIME parts.
//
// This may seem kind of weird, but some MUAs _do_ send out whack stuff
// like an HTML body followed by a plaintext footer.
if (mimeType === 'text/plain') {
body += htmlifyPlaintext(decoded);
} else {
body += decoded;
}
}
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
// sometimes decoding results in a NUL-terminated body string, which makes
// SQLite blow up with an 'unrecognized token' error
body = body.replace(/\0/g, '');
return body;
}
// Since we only fetch the MIME structure and specific desired MIME parts from
// IMAP, we unfortunately can't use an existing library like mailparser to parse
// the message, and have to do fun stuff like deal with character sets and
// content-transfer-encodings ourselves.
async function parseFromImap(imapMessage, desiredParts, {db, accountId, folder}) {
const {Message, Label} = db;
const {attributes} = imapMessage;
const headers = imapMessage.headers.toString('ascii');
const parsedHeaders = mimelib.parseHeaders(headers);
for (const key of ['x-gm-thrid', 'x-gm-msgid', 'x-gm-labels']) {
parsedHeaders[key] = attributes[key];
}
const parsedMessage = {
to: extractContacts(parsedHeaders.to),
cc: extractContacts(parsedHeaders.cc),
bcc: extractContacts(parsedHeaders.bcc),
from: extractContacts(parsedHeaders.from),
replyTo: extractContacts(parsedHeaders['reply-to']),
accountId: accountId,
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
body: bodyFromParts(imapMessage, desiredParts),
snippet: null,
unread: !attributes.flags.includes('\\Seen'),
starred: attributes.flags.includes('\\Flagged'),
// Make sure we use the date from the headers because we use the header date
// for generating message ids.
// `attributes.date` is the server generated date and might differ from the
// header across accounts
// TODO: how to exclude the date header from the hash if there is no
// Date: header and we have to use the IMAP server date for message sort
// & display? seems like it should be OK within an account, but might
// generate different message IDs across different accounts (which I
// don't think is a problem we're intending to solve...)
date: parsedHeaders.date ? parsedHeaders.date[0] : imapMessage.attributes.date,
folderImapUID: attributes.uid,
folderId: folder.id,
folder: null,
labels: [],
headers: parsedHeaders,
headerMessageId: parsedHeaders['message-id'] ? parsedHeaders['message-id'][0] : '',
gMsgId: parsedHeaders['x-gm-msgid'],
subject: parsedHeaders.subject ? parsedHeaders.subject[0] : '(no subject)',
}
// Inversely to `buildForSend`, we leave the date header as it is so that the
// format is consistent for the generative IDs, then convert it to a Date object
parsedMessage.id = Message.hash(parsedMessage)
parsedMessage.date = new Date(Date.parse(parsedMessage.date))
[local-sync] Correctly handle messages with non-alternative multipart text bodies Summary: It's possible to have multiple inline HTML parts in a message, or even a multipart/alternative part that contains text and HTML, followed by a plaintext signature. Previously, if there was more than one text part in an email, we would pick the _last_ text/html or text/plain part that we found, and treat that as the entire message body. This works most of the time, but fails to display the full message body in some edge cases. This patch fixes that by resolving multipart/alternative subparts to a single part in the mimepart fetch stage, and then treating each desired mime part separately when parsing the message, concatenating them if there are multiple. This makes K2's handling of multipart MIME message text better, bug-wise, than the Python sync engine's, which has been mangling some rare messages forever. (Example from my email: every email from the MIT EECS Jobs List has never displayed the mailing list signature in N1.) Note that this patch also removes our tentative support for PGP encrypted messages. I'd rather add that back in later when I've dug up some real example messages to test on, rather than leaving it in in its current not-really-tested and probably not-really-working state, since it makes it harder to make sure that the rest of the logic isn't broken. Test Plan: manual for now - added examples of this to my growing list of regression tests to add to the message parser unit tests once I fix them Reviewers: juan, evan Reviewed By: evan Differential Revision: https://phab.nylas.com/D3600
2017-01-06 09:18:18 +08:00
parsedMessage.snippet = extractSnippet(parsedMessage.body);
parsedMessage.folder = folder;
// TODO: unclear if this is necessary given we already have parsed labels
const xGmLabels = attributes['x-gm-labels']
if (xGmLabels) {
parsedMessage.folderImapXGMLabels = JSON.stringify(xGmLabels)
parsedMessage.labels = await Label.findXGMLabels(xGmLabels)
}
if (process.env.NYLAS_DEBUG) {
const outJSON = JSON.stringify({imapMessage, desiredParts, result: parsedMessage});
const outDir = path.join(os.tmpdir(), "k2-parse-output", folder.name)
const outFile = path.join(outDir, imapMessage.attributes.uid.toString());
mkdirp.sync(outDir);
fs.writeFileSync(outFile, outJSON);
}
return parsedMessage;
}
async function buildForSend(db, json) {
const {Thread, Message} = db
let replyToThread;
let replyToMessage;
if (json.thread_id != null) {
replyToThread = await Thread.find({
where: {id: json.thread_id},
include: [{
model: Message,
as: 'messages',
attributes: ['id'],
}],
});
}
if (json.reply_to_message_id != null) {
replyToMessage = await Message.findById(json.reply_to_message_id);
}
if (replyToThread && replyToMessage) {
if (!replyToThread.messages.find((msg) => msg.id === replyToMessage.id)) {
throw new APIError(`Message ${replyToMessage.id} is not in thread ${replyToThread.id}`, 400)
}
}
let thread;
let replyHeaders = {};
if (replyToMessage) {
replyHeaders = getReplyHeaders(replyToMessage);
thread = await replyToMessage.getThread();
} else if (replyToThread) {
thread = replyToThread;
const previousMessages = thread.messages.filter(msg => !msg.isDraft);
if (previousMessages.length > 0) {
const lastMessage = previousMessages[previousMessages.length - 1]
replyHeaders = getReplyHeaders(lastMessage);
}
}
const {inReplyTo, references} = replyHeaders
const date = new Date()
const message = {
accountId: json.account_id,
threadId: thread ? thread.id : null,
headerMessageId: Message.buildHeaderMessageId(json.client_id),
from: json.from,
to: json.to,
cc: json.cc,
bcc: json.bcc,
references,
inReplyTo,
replyTo: json.reply_to,
subject: json.subject,
body: json.body,
unread: true,
isDraft: json.draft,
isSent: false,
version: 0,
date: date,
uploads: json.uploads,
}
// We have to clone the message and change the date for hashing because the
// date we get later when we parse from IMAP is a different format, per the
// nodemailer buildmail function that gives us the raw message and replaces
// the date header with this modified UTC string
// https://github.com/nodemailer/buildmail/blob/master/lib/buildmail.js#L470
const messageForHashing = Utils.deepClone(message)
messageForHashing.date = date.toUTCString().replace(/GMT/, '+0000')
message.id = Message.hash(messageForHashing)
message.body = replaceMessageIdInBodyTrackingLinks(message.id, message.body)
return Message.build(message)
}
module.exports = {
buildForSend,
parseFromImap,
extractSnippet,
extractContacts,
stripTrackingLinksFromBody,
buildTrackingBodyForRecipient,
replaceMessageIdInBodyTrackingLinks,
}