mirror of
https://github.com/Foundry376/Mailspring.git
synced 2025-09-06 04:35:30 +08:00
Summary: see title Test Plan: manual Reviewers: spang Reviewed By: spang Differential Revision: https://phab.nylas.com/D3781
423 lines
15 KiB
JavaScript
423 lines
15 KiB
JavaScript
/* eslint no-useless-escape: 0 */
|
|
const mimelib = require('mimelib');
|
|
const encoding = require('encoding');
|
|
const he = require('he');
|
|
const os = require('os');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const mkdirp = require('mkdirp');
|
|
const {Errors: {APIError}} = require('isomorphic-core');
|
|
const {N1CloudAPI, RegExpUtils, Utils} = require('nylas-exports');
|
|
|
|
// Aiming for the former in length, but the latter is the hard db cutoff
|
|
const SNIPPET_SIZE = 100;
|
|
const SNIPPET_MAX_SIZE = 255;
|
|
|
|
|
|
// Format of input: ['a@example.com, B <b@example.com>', 'c@example.com'],
|
|
// where each element of the array is the unparsed contents of a single
|
|
// element of the same header field. (It's totally valid to have multiple
|
|
// From/To/etc. headers on the same email.)
|
|
function parseContacts(input) {
|
|
if (!input || input.length === 0 || !input[0]) {
|
|
return [];
|
|
}
|
|
let contacts = [];
|
|
for (const headerLine of input) {
|
|
const values = mimelib.parseAddresses(headerLine);
|
|
if (!values || values.length === 0) {
|
|
continue;
|
|
}
|
|
contacts = contacts.concat(values.map(v => {
|
|
if (!v || v.length === 0) {
|
|
return null
|
|
}
|
|
const {name, address: email} = v;
|
|
return {name, email};
|
|
})
|
|
.filter(c => c != null))
|
|
}
|
|
return contacts;
|
|
}
|
|
|
|
|
|
function parseSnippet(body) {
|
|
const doc = new DOMParser().parseFromString(body, 'text/html')
|
|
const skipTags = new Set(['TITLE', 'SCRIPT', 'STYLE', 'IMG']);
|
|
const noSpaceTags = new Set(['B', 'I', 'STRONG', 'EM', 'SPAN']);
|
|
|
|
const treeWalker = document.createTreeWalker(doc, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT, (node) => {
|
|
if (skipTags.has(node.tagName)) {
|
|
// skip this node and all its children
|
|
return NodeFilter.FILTER_REJECT;
|
|
}
|
|
if (node.nodeType === Node.TEXT_NODE) {
|
|
const nodeValue = node.nodeValue ? node.nodeValue.trim() : null;
|
|
if (nodeValue) {
|
|
return NodeFilter.FILTER_ACCEPT;
|
|
}
|
|
return NodeFilter.FILTER_SKIP;
|
|
}
|
|
return NodeFilter.FILTER_ACCEPT;
|
|
});
|
|
|
|
let extractedText = "";
|
|
let lastNodeTag = "";
|
|
while (treeWalker.nextNode()) {
|
|
if (treeWalker.currentNode.nodeType === Node.ELEMENT_NODE) {
|
|
lastNodeTag = treeWalker.currentNode.nodeName;
|
|
} else {
|
|
if (extractedText && !noSpaceTags.has(lastNodeTag)) {
|
|
extractedText += " ";
|
|
}
|
|
extractedText += treeWalker.currentNode.nodeValue;
|
|
if (extractedText.length > SNIPPET_MAX_SIZE) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
const snippetText = extractedText.trim();
|
|
|
|
// clean up and trim snippet
|
|
let trimmed = snippetText.replace(/[\n\r]/g, ' ').replace(/\s\s+/g, ' ').substr(0, SNIPPET_MAX_SIZE);
|
|
if (trimmed) {
|
|
// TODO: strip quoted text from snippets also
|
|
// trim down to approx. SNIPPET_SIZE w/out cutting off words right in the
|
|
// middle (if possible)
|
|
const wordBreak = trimmed.indexOf(' ', SNIPPET_SIZE);
|
|
if (wordBreak !== -1) {
|
|
trimmed = trimmed.substr(0, wordBreak);
|
|
}
|
|
}
|
|
return trimmed;
|
|
}
|
|
|
|
// In goes arrays of text, out comes arrays of RFC2822 Message-Ids. Luckily,
|
|
// these days most all text in In-Reply-To, Message-Id, and References headers
|
|
// actually conforms to the spec.
|
|
function parseReferences(input) {
|
|
if (!input || !input.length || !input[0]) {
|
|
return [];
|
|
}
|
|
const references = new Set();
|
|
for (const headerLine of input) {
|
|
for (const ref of headerLine.split(/\s+/)) {
|
|
if (/^<.*>$/.test(ref)) {
|
|
references.add(ref);
|
|
}
|
|
}
|
|
}
|
|
return Array.from(references);
|
|
}
|
|
|
|
function htmlifyPlaintext(text) {
|
|
const escapedText = he.escape(text);
|
|
return `<pre class="nylas-plaintext">${escapedText}</pre>`;
|
|
}
|
|
|
|
|
|
function replaceMessageIdInBodyTrackingLinks(messageId, originalBody) {
|
|
const regex = new RegExp(`(${N1CloudAPI.APIRoot}.+?)MESSAGE_ID`, 'g')
|
|
return originalBody.replace(regex, `$1${messageId}`)
|
|
}
|
|
|
|
|
|
function stripTrackingLinksFromBody(originalBody) {
|
|
let body = originalBody.replace(/<img class="n1-open"[^<]+src="([a-zA-Z0-9-_:/.]*)">/g, () => {
|
|
return "";
|
|
});
|
|
body = body.replace(RegExpUtils.urlLinkTagRegex(), (match, prefix, url, suffix, content, closingTag) => {
|
|
const param = url.split("?")[1];
|
|
if (param) {
|
|
const link = decodeURIComponent(param.split("=")[1]);
|
|
return `${prefix}${link}${suffix}${content}${closingTag}`;
|
|
}
|
|
return match;
|
|
});
|
|
return body;
|
|
}
|
|
|
|
|
|
function buildTrackingBodyForRecipient({baseMessage, recipient, usesOpenTracking, usesLinkTracking} = {}) {
|
|
const {id: messageId, body} = baseMessage
|
|
const encodedEmail = btoa(recipient.email)
|
|
.replace(/\+/g, '-')
|
|
.replace(/\//g, '_');
|
|
let customBody = body
|
|
if (usesOpenTracking) {
|
|
customBody = customBody.replace(/<img class="n1-open"[^<]+src="([a-zA-Z0-9-_:/.]*)">/g, (match, url) => {
|
|
return `<img class="n1-open" width="0" height="0" style="border:0; width:0; height:0;" src="${url}?r=${encodedEmail}">`;
|
|
});
|
|
}
|
|
if (usesLinkTracking) {
|
|
customBody = customBody.replace(RegExpUtils.urlLinkTagRegex(), (match, prefix, url, suffix, content, closingTag) => {
|
|
return `${prefix}${url}&r=${encodedEmail}${suffix}${content}${closingTag}`;
|
|
});
|
|
}
|
|
return replaceMessageIdInBodyTrackingLinks(messageId, customBody);
|
|
}
|
|
|
|
|
|
function getReplyHeaders(messageReplyingTo) {
|
|
let inReplyTo;
|
|
let references;
|
|
if (messageReplyingTo.headerMessageId) {
|
|
inReplyTo = messageReplyingTo.headerMessageId;
|
|
if (messageReplyingTo.references) {
|
|
const refById = {};
|
|
for (const ref of messageReplyingTo.references) {
|
|
refById[ref.id] = ref;
|
|
}
|
|
references = [];
|
|
for (const referenceId of messageReplyingTo.referencesOrder) {
|
|
references.push(refById[referenceId].rfc2822MessageId);
|
|
}
|
|
if (!references.includes(messageReplyingTo.headerMessageId)) {
|
|
references.push(messageReplyingTo.headerMessageId);
|
|
}
|
|
} else {
|
|
references = [messageReplyingTo.headerMessageId];
|
|
}
|
|
}
|
|
return {inReplyTo, references}
|
|
}
|
|
|
|
function bodyFromParts(imapMessage, desiredParts) {
|
|
let body = '';
|
|
for (const {id, mimeType, transferEncoding, charset} of desiredParts) {
|
|
let decoded = '';
|
|
// see https://www.w3.org/Protocols/rfc1341/5_Content-Transfer-Encoding.html
|
|
if ((/quot(ed)?[-/]print(ed|able)?/gi).test(transferEncoding)) {
|
|
decoded = mimelib.decodeQuotedPrintable(imapMessage.parts[id], charset);
|
|
} else if ((/base64/gi).test(transferEncoding)) {
|
|
decoded = mimelib.decodeBase64(imapMessage.parts[id], charset);
|
|
} else {
|
|
// Treat this as having no encoding and decode based only on the charset
|
|
//
|
|
// According to https://tools.ietf.org/html/rfc2045#section-5.2,
|
|
// this should default to ascii; however, if we don't get a charset,
|
|
// it's possible clients (like nodemailer) encoded the data as utf-8
|
|
// anyway. Since ascii is a strict subset of utf-8, it's safer to
|
|
// try and decode as utf-8 if we don't have the charset.
|
|
//
|
|
// (This applies to decoding quoted-printable and base64 as well. The
|
|
// mimelib library, if charset is null, will default to utf-8)
|
|
//
|
|
decoded = encoding.convert(imapMessage.parts[id], 'utf-8', charset).toString('utf-8');
|
|
}
|
|
// desiredParts are in order of the MIME tree walk, e.g. 1.1, 1.2, 2...,
|
|
// and for multipart/alternative arrays, we have already pulled out the
|
|
// highest fidelity part (generally HTML).
|
|
//
|
|
// Therefore, the correct way to display multiple parts is to simply
|
|
// concatenate later ones with the body of the previous MIME parts.
|
|
//
|
|
// This may seem kind of weird, but some MUAs _do_ send out whack stuff
|
|
// like an HTML body followed by a plaintext footer.
|
|
if (mimeType === 'text/plain') {
|
|
body += htmlifyPlaintext(decoded);
|
|
} else {
|
|
body += decoded;
|
|
}
|
|
}
|
|
// sometimes decoding results in a NUL-terminated body string, which makes
|
|
// SQLite blow up with an 'unrecognized token' error
|
|
body = body.replace(/\0/g, '');
|
|
|
|
return body;
|
|
}
|
|
|
|
// Since we only fetch the MIME structure and specific desired MIME parts from
|
|
// IMAP, we unfortunately can't use an existing library like mailparser to parse
|
|
// the message, and have to do fun stuff like deal with character sets and
|
|
// content-transfer-encodings ourselves.
|
|
async function parseFromImap(imapMessage, desiredParts, {db, accountId, folder}) {
|
|
const {Message, Label} = db;
|
|
const {attributes} = imapMessage;
|
|
|
|
// this key name can change depending on which subset of headers we're downloading,
|
|
// so to prevent having to update this code every time we change the set,
|
|
// dynamically look up the key instead
|
|
const headerKey = Object.keys(imapMessage.parts).filter(k => k.startsWith('HEADER'))[0]
|
|
const headers = imapMessage.parts[headerKey].toString('ascii')
|
|
const parsedHeaders = mimelib.parseHeaders(headers);
|
|
for (const key of ['x-gm-thrid', 'x-gm-msgid', 'x-gm-labels']) {
|
|
parsedHeaders[key] = attributes[key];
|
|
}
|
|
|
|
const parsedMessage = {
|
|
to: parseContacts(parsedHeaders.to),
|
|
cc: parseContacts(parsedHeaders.cc),
|
|
bcc: parseContacts(parsedHeaders.bcc),
|
|
from: parseContacts(parsedHeaders.from),
|
|
replyTo: parseContacts(parsedHeaders['reply-to']),
|
|
accountId: accountId,
|
|
body: bodyFromParts(imapMessage, desiredParts),
|
|
snippet: null,
|
|
unread: !attributes.flags.includes('\\Seen'),
|
|
starred: attributes.flags.includes('\\Flagged'),
|
|
// We limit drafts to the drafts and all mail folders because some clients
|
|
// may send messages and improperly leave the draft flag set, and also
|
|
// because we want to exclude drafts moved to the trash from the drafts view
|
|
// see https://github.com/nylas/cloud-core/commit/1433921a166ddcba7c269158d65febb7928767d8
|
|
// & associated phabricator bug https://phab.nylas.com/T5696
|
|
isDraft: (
|
|
['drafts', 'all'].includes(folder.role) &&
|
|
(
|
|
attributes.flags.includes('\\Draft') ||
|
|
(parsedHeaders['x-gm-labels'] || []).includes('\\Draft')
|
|
)
|
|
),
|
|
// We prefer the date from the message headers because the date is one of
|
|
// the fields we use for generating unique message IDs, and the server
|
|
// INTERNALDATE, `attributes.date`, may differ across accounts for the same
|
|
// message. If the Date header is not included in the message, we fall
|
|
// back to the INTERNALDATE and it's possible we'll generate different IDs
|
|
// for the same message delivered to different accounts (which is better
|
|
// than having message ID collisions for different messages, which could
|
|
// happen if we did not include the date).
|
|
date: parsedHeaders.date ? parsedHeaders.date[0] : imapMessage.attributes.date,
|
|
folderImapUID: attributes.uid,
|
|
folderId: folder.id,
|
|
folder: null,
|
|
labels: [],
|
|
headerMessageId: parseReferences(parsedHeaders['message-id'])[0],
|
|
// References are not saved on the message model itself, but are later
|
|
// converted to associated Reference objects so we can index them. Since we
|
|
// don't do tree threading, we don't need to care about In-Reply-To
|
|
// separately, and can simply associate them all in the same way.
|
|
// Generally, References already contains the Message-IDs in In-Reply-To,
|
|
// but we concat and dedupe just in case.
|
|
references: parseReferences(
|
|
(parsedHeaders.references || []).concat(
|
|
(parsedHeaders['in-reply-to'] || []), (parsedHeaders['message-id'] || [])
|
|
)
|
|
),
|
|
gMsgId: parsedHeaders['x-gm-msgid'],
|
|
gThrId: parsedHeaders['x-gm-thrid'],
|
|
subject: parsedHeaders.subject ? parsedHeaders.subject[0] : '(no subject)',
|
|
}
|
|
// Inversely to `buildForSend`, we leave the date header as it is so that the
|
|
// format is consistent for the generative IDs, then convert it to a Date object
|
|
parsedMessage.id = Message.hash(parsedMessage)
|
|
parsedMessage.date = new Date(Date.parse(parsedMessage.date))
|
|
|
|
parsedMessage.snippet = parseSnippet(parsedMessage.body);
|
|
|
|
parsedMessage.folder = folder;
|
|
|
|
const xGmLabels = parsedHeaders['x-gm-labels']
|
|
if (xGmLabels) {
|
|
parsedMessage.folderImapXGMLabels = JSON.stringify(xGmLabels)
|
|
parsedMessage.labels = await Label.findXGMLabels(xGmLabels)
|
|
}
|
|
|
|
if (process.env.NYLAS_DEBUG) {
|
|
const outJSON = JSON.stringify({imapMessage, desiredParts, result: parsedMessage});
|
|
const outDir = path.join(os.tmpdir(), "k2-parse-output", folder.name)
|
|
const outFile = path.join(outDir, imapMessage.attributes.uid.toString());
|
|
mkdirp.sync(outDir);
|
|
fs.writeFileSync(outFile, outJSON);
|
|
}
|
|
|
|
return parsedMessage;
|
|
}
|
|
|
|
|
|
async function buildForSend(db, json) {
|
|
const {Thread, Message, Reference} = db
|
|
let replyToThread;
|
|
let replyToMessage;
|
|
|
|
if (json.thread_id != null) {
|
|
replyToThread = await Thread.find({
|
|
where: {id: json.thread_id},
|
|
include: [{
|
|
model: Message,
|
|
as: 'messages',
|
|
attributes: ['id'],
|
|
}],
|
|
});
|
|
}
|
|
|
|
if (json.reply_to_message_id != null) {
|
|
replyToMessage = await Message.findById(
|
|
json.reply_to_message_id,
|
|
{ include: [{model: Reference, as: 'references', attributes: ['id', 'rfc2822MessageId']}] }
|
|
)
|
|
}
|
|
|
|
if (replyToThread && replyToMessage) {
|
|
if (!replyToThread.messages.find((msg) => msg.id === replyToMessage.id)) {
|
|
throw new APIError(`Message ${replyToMessage.id} is not in thread ${replyToThread.id}`, 400)
|
|
}
|
|
}
|
|
|
|
let thread;
|
|
let replyHeaders = {};
|
|
let inReplyToLocalMessageId;
|
|
if (replyToMessage) {
|
|
inReplyToLocalMessageId = replyToMessage.id;
|
|
replyHeaders = getReplyHeaders(replyToMessage);
|
|
thread = await replyToMessage.getThread();
|
|
} else if (replyToThread) {
|
|
thread = replyToThread;
|
|
const previousMessages = thread.messages.filter(msg => !msg.isDraft);
|
|
if (previousMessages.length > 0) {
|
|
const lastMessage = previousMessages[previousMessages.length - 1]
|
|
inReplyToLocalMessageId = lastMessage.id;
|
|
replyHeaders = getReplyHeaders(lastMessage);
|
|
}
|
|
}
|
|
|
|
const {inReplyTo, references} = replyHeaders
|
|
const date = new Date()
|
|
const message = {
|
|
accountId: json.account_id,
|
|
threadId: thread ? thread.id : null,
|
|
headerMessageId: Message.buildHeaderMessageId(json.client_id),
|
|
from: json.from,
|
|
to: json.to,
|
|
cc: json.cc,
|
|
bcc: json.bcc,
|
|
replyTo: json.reply_to,
|
|
subject: json.subject,
|
|
body: json.body,
|
|
unread: false,
|
|
isDraft: json.draft,
|
|
isSent: false,
|
|
version: 0,
|
|
date: date,
|
|
inReplyToLocalMessageId: inReplyToLocalMessageId,
|
|
uploads: json.uploads,
|
|
}
|
|
// We have to clone the message and change the date for hashing because the
|
|
// date we get later when we parse from IMAP is a different format, per the
|
|
// nodemailer buildmail function that gives us the raw message and replaces
|
|
// the date header with this modified UTC string
|
|
// https://github.com/nodemailer/buildmail/blob/master/lib/buildmail.js#L470
|
|
const messageForHashing = Utils.deepClone(message)
|
|
messageForHashing.date = date.toUTCString().replace(/GMT/, '+0000')
|
|
message.id = Message.hash(messageForHashing)
|
|
message.body = replaceMessageIdInBodyTrackingLinks(message.id, message.body)
|
|
const instance = Message.build(message)
|
|
|
|
// TODO we set these temporary properties which aren't stored in the database
|
|
// model because SendmailClient requires them to send the message with the
|
|
// correct headers.
|
|
// This should be cleaned up
|
|
instance.inReplyTo = inReplyTo;
|
|
instance.references = references;
|
|
return instance;
|
|
}
|
|
|
|
module.exports = {
|
|
buildForSend,
|
|
getReplyHeaders,
|
|
parseFromImap,
|
|
parseSnippet,
|
|
parseContacts,
|
|
stripTrackingLinksFromBody,
|
|
buildTrackingBodyForRecipient,
|
|
replaceMessageIdInBodyTrackingLinks,
|
|
}
|