wildduck/imap-core/lib/indexer/indexer.js

770 lines
26 KiB
JavaScript
Raw Normal View History

2017-03-06 05:45:50 +08:00
'use strict';
const stream = require('stream');
const PassThrough = stream.PassThrough;
2017-03-06 05:45:50 +08:00
const BodyStructure = require('./body-structure');
const createEnvelope = require('./create-envelope');
const parseMimeTree = require('./parse-mime-tree');
const ObjectID = require('mongodb').ObjectID;
2017-05-15 21:09:08 +08:00
const GridFSBucket = require('mongodb').GridFSBucket;
const libmime = require('libmime');
const libqp = require('libqp');
const libbase64 = require('libbase64');
const iconv = require('iconv-lite');
2017-05-07 22:21:44 +08:00
const he = require('he');
const htmlToText = require('html-to-text');
const crypto = require('crypto');
2017-03-06 05:45:50 +08:00
class Indexer {
constructor(options) {
this.options = options || {};
this.fetchOptions = this.options.fetchOptions || {};
this.database = this.options.database;
if (this.database) {
2017-05-15 21:09:08 +08:00
this.gridstore = new GridFSBucket(this.database, {
bucketName: 'attachments'
});
}
2017-03-06 05:45:50 +08:00
// create logger
this.logger = this.options.logger || {
info: () => false,
debug: () => false,
error: () => false
};
}
/**
* Returns expected size for a node
*
* @param {Object} mimeTree Parsed mimeTree object (or sub node)
* @param {Boolean} textOnly If true, do not include the message header in the response
* @return {String} Expected message size
*/
getSize(mimeTree, textOnly) {
let size = 0;
let first = true;
let root = true;
// make sure that mixed body + mime gets rebuilt correctly
let append = (data, force) => {
if (Array.isArray(data)) {
data = data.join('\r\n');
}
if (data || force) {
2017-04-10 04:21:36 +08:00
size += Buffer.byteLength((first ? '' : '\r\n') + (data || ''), 'binary');
2017-03-06 05:45:50 +08:00
first = false;
}
};
let walk = (node, next) => {
if (!textOnly || !root) {
append(formatHeaders(node.header).join('\r\n') + '\r\n');
2017-03-06 05:45:50 +08:00
}
let finalize = () => {
if (node.boundary) {
append('--' + node.boundary + '--\r\n');
}
append();
next();
};
root = false;
2017-04-08 21:47:18 +08:00
if (node.size || node.attachmentId) {
2017-03-06 05:45:50 +08:00
append(false, true); // force newline
size += node.size;
}
if (node.boundary) {
append('--' + node.boundary);
}
if (Array.isArray(node.childNodes)) {
let pos = 0;
let processChildNodes = () => {
if (pos >= node.childNodes.length) {
return finalize();
}
let childNode = node.childNodes[pos++];
walk(childNode, () => {
if (pos < node.childNodes.length) {
append('--' + node.boundary);
}
return processChildNodes();
});
};
2017-03-06 05:45:50 +08:00
processChildNodes();
} else {
finalize();
}
};
walk(mimeTree, () => false);
return size;
}
/**
* Builds a parsed mime tree into a rfc822 message
*
* @param {Object} mimeTree Parsed mimeTree object
* @param {Boolean} textOnly If true, do not include the message header in the response
* @param {Boolean} skipExternal If true, do not include the external nodes
2017-03-06 05:45:50 +08:00
* @return {Stream} Message stream
*/
rebuild(mimeTree, textOnly, skipExternal) {
2017-03-06 05:45:50 +08:00
let res = new PassThrough();
let first = true;
let root = true;
let remainder = false;
2017-03-06 05:45:50 +08:00
2017-04-10 04:21:36 +08:00
let aborted = false;
2017-03-06 05:45:50 +08:00
// make sure that mixed body + mime gets rebuilt correctly
let append = (data, force) => {
if (Array.isArray(data)) {
data = data.join('\r\n');
}
if (remainder || data || force) {
if (!first) {
res.write('\r\n');
} else {
first = false;
}
if (remainder && remainder.length) {
res.write(remainder);
}
if (data) {
res.write(Buffer.from(data, 'binary'));
}
2017-03-06 05:45:50 +08:00
}
remainder = false;
2017-03-06 05:45:50 +08:00
};
let walk = (node, next) => {
2017-04-10 04:21:36 +08:00
if (aborted) {
return next();
}
2017-03-06 05:45:50 +08:00
if (!textOnly || !root) {
append(formatHeaders(node.header).join('\r\n') + '\r\n');
2017-03-06 05:45:50 +08:00
}
root = false;
if (Buffer.isBuffer(node.body)) {
// node Buffer
remainder = node.body;
} else if (node.body && node.body.buffer) {
// mongodb Binary
remainder = node.body.buffer;
} else if (typeof node.body === 'string') {
// binary string
remainder = Buffer.from(node.body, 'binary');
} else {
// whatever
remainder = node.body;
}
2017-03-06 05:45:50 +08:00
let finalize = () => {
if (node.boundary) {
append('--' + node.boundary + '--\r\n');
}
append();
next();
};
if (node.boundary) {
append('--' + node.boundary);
} else if (node.attachmentId && !skipExternal) {
2017-03-06 05:45:50 +08:00
append(false, true); // force newline between header and contents
2017-05-15 21:09:08 +08:00
let attachmentStream = this.gridstore.openDownloadStream(node.attachmentId);
2017-03-06 05:45:50 +08:00
attachmentStream.once('error', err => {
2017-03-06 05:45:50 +08:00
res.emit('error', err);
});
2017-04-08 21:47:18 +08:00
attachmentStream.once('end', () => finalize());
2017-03-06 05:45:50 +08:00
2017-04-08 21:47:18 +08:00
attachmentStream.pipe(res, {
end: false
});
2017-03-06 05:45:50 +08:00
return;
}
let pos = 0;
let processChildNodes = () => {
if (pos >= node.childNodes.length) {
return finalize();
}
let childNode = node.childNodes[pos++];
walk(childNode, () => {
2017-04-10 04:21:36 +08:00
if (aborted) {
return next();
}
2017-03-06 05:45:50 +08:00
if (pos < node.childNodes.length) {
append('--' + node.boundary);
}
setImmediate(processChildNodes);
});
};
if (Array.isArray(node.childNodes)) {
processChildNodes();
} else {
finalize();
}
};
setImmediate(walk.bind(null, mimeTree, () => {
res.end();
}));
2017-04-10 04:21:36 +08:00
// if called then stops resolving rest of the message
res.abort = () => {
aborted = true;
};
2017-03-06 05:45:50 +08:00
return {
type: 'stream',
value: res,
expectedLength: this.getSize(mimeTree, textOnly)
};
}
/**
* Parses structured MIME tree from a rfc822 message source
*
* @param {String|Buffer} rfc822 E-mail message as 'binary'-string or Buffer
* @return {Object} Parsed mime tree
*/
parseMimeTree(rfc822) {
return parseMimeTree(rfc822);
}
/**
2017-04-17 20:58:46 +08:00
* Decode text/plain and text/html parts, separate node bodies from the tree
*/
2017-05-15 21:09:08 +08:00
getMaildata(messageId, mimeTree) {
let magic = parseInt(crypto.randomBytes(2).toString('hex'), 16);
let map = {};
let maildata = {
2017-04-17 20:58:46 +08:00
nodes: [],
attachments: [],
text: '',
2017-05-15 21:09:08 +08:00
html: [],
// magic number to append to increment stored attachment object counter
magic,
// match ids referenced in document to actual attachment ids
map
};
2017-04-02 01:15:10 +08:00
2017-05-15 21:09:08 +08:00
let idcount = 0;
let htmlContent = [];
let textContent = [];
let cidMap = new Map();
2017-04-17 20:58:46 +08:00
let walk = (node, alternative, related) => {
let flowed = false;
let delSp = false;
2017-04-02 01:15:10 +08:00
let parsedContentType = node.parsedHeader['content-type'];
let parsedDisposition = node.parsedHeader['content-disposition'];
let transferEncoding = (node.parsedHeader['content-transfer-encoding'] || '7bit').toLowerCase().trim();
let contentType = (parsedContentType && parsedContentType.value || (node.rootNode ? 'text/plain' : 'application/octet-stream')).toLowerCase().trim();
alternative = alternative || contentType === 'multipart/alternative';
related = related || contentType === 'multipart/related';
if (parsedContentType && parsedContentType.params.format && parsedContentType.params.format.toLowerCase().trim() === 'flowed') {
flowed = true;
if (parsedContentType.params.delsp && parsedContentType.params.delsp.toLowerCase().trim() === 'yes') {
delSp = true;
}
}
let disposition = (parsedDisposition && parsedDisposition.value || '').toLowerCase().trim() || false;
2017-04-03 21:59:04 +08:00
let isInlineText = false;
let isMultipart = contentType.split('/')[0] === 'multipart';
// If the current node is HTML or Plaintext then allow larger content included in the mime tree
// Also decode text/html value
2017-04-03 21:59:04 +08:00
if (['text/plain', 'text/html', 'text/rfc822-headers', 'message/delivery-status'].includes(contentType) && (!disposition || disposition === 'inline')) {
isInlineText = true;
if (node.body && node.body.length) {
let charset = parsedContentType.params.charset || 'windows-1257';
let content = node.body;
if (transferEncoding === 'base64') {
content = libbase64.decode(content.toString());
} else if (transferEncoding === 'quoted-printable') {
content = libqp.decode(content.toString());
}
if (!['ascii', 'usascii', 'utf8'].includes(charset.replace(/[^a-z0-9]+/g, '').trim().toLowerCase())) {
try {
content = iconv.decode(content, charset);
} catch (E) {
// do not decode charset
}
}
if (flowed) {
content = libmime.decodeFlowed(content.toString(), delSp);
} else {
content = content.toString();
}
2017-04-03 21:59:04 +08:00
if (contentType === 'text/html') {
htmlContent.push(content.trim());
if (!alternative) {
textContent.push(htmlToText.fromString(content).trim());
}
} else {
textContent.push(content.trim());
if (!alternative) {
2017-05-07 22:21:44 +08:00
htmlContent.push(textToHtml(content));
}
}
}
}
// remove attachments and very large text nodes from the mime tree
if (!isMultipart && node.body && node.body.length && (!isInlineText || node.size > 300 * 1024)) {
2017-05-15 21:09:08 +08:00
let attachmentId = 'ATT' + leftPad(++idcount, '0', 5);
map[attachmentId] = new ObjectID();
let fileName = (node.parsedHeader['content-disposition'] && node.parsedHeader['content-disposition'].params && node.parsedHeader['content-disposition'].params.filename) || (node.parsedHeader['content-type'] && node.parsedHeader['content-type'].params && node.parsedHeader['content-type'].params.name) || false;
let contentId = (node.parsedHeader['content-id'] || '').toString().replace(/<|>/g, '').trim();
if (fileName) {
try {
fileName = libmime.decodeWords(fileName).trim();
} catch (E) {
// failed to parse filename, keep as is (most probably an unknown charset is used)
}
} else {
fileName = (crypto.randomBytes(4).toString('hex') + '.' + libmime.detectExtension(contentType));
}
cidMap.set(contentId, {
id: attachmentId,
fileName
});
2017-04-17 20:58:46 +08:00
// push to queue
2017-05-15 21:09:08 +08:00
maildata.nodes.push({
2017-04-17 20:58:46 +08:00
attachmentId,
options: {
fsync: true,
2017-05-15 21:09:08 +08:00
contentType,
2017-04-17 20:58:46 +08:00
// metadata should include only minimally required information, this would allow
// to share attachments between different messages if the content is exactly the same
// even though metadata (filename, content-disposition etc) might not
metadata: {
2017-05-15 21:09:08 +08:00
// values to detect if there are messages that reference to this attachment or not
m: maildata.magic,
c: 1,
2017-04-17 20:58:46 +08:00
// how to decode contents if a webclient or API asks for the attachment
transferEncoding
}
},
body: node.body
});
2017-04-03 21:59:04 +08:00
// do not include text content, multipart elements and embedded messages in the attachment list
if (!isInlineText && !(contentType === 'message/rfc822' && (!disposition || disposition === 'inline'))) {
// list in the attachments array
2017-05-15 21:09:08 +08:00
maildata.attachments.push({
2017-04-02 00:22:47 +08:00
id: attachmentId,
fileName,
contentType,
disposition,
transferEncoding,
related,
// approximite size in kilobytes
sizeKb: Math.ceil((transferEncoding === 'base64' ? this.expectedB64Size(node.size) : node.size) / 1024)
2017-04-02 00:22:47 +08:00
});
}
2017-04-17 20:58:46 +08:00
node.body = false;
node.attachmentId = attachmentId;
}
2017-04-17 20:58:46 +08:00
// message/rfc822
if (node.message) {
node = node.message;
}
2017-04-17 20:58:46 +08:00
if (Array.isArray(node.childNodes)) {
node.childNodes.forEach(childNode => {
walk(childNode, alternative, related);
});
}
};
2017-04-17 20:58:46 +08:00
walk(mimeTree, false, false);
let updateCidLinks = str => str.replace(/\bcid:([^\s"']+)/g, (match, cid) => {
if (cidMap.has(cid)) {
let attachment = cidMap.get(cid);
return 'attachment:' + messageId + '/' + attachment.id.toString();
2017-04-02 01:15:10 +08:00
}
2017-04-17 20:58:46 +08:00
return match;
});
2017-05-15 21:09:08 +08:00
maildata.html = htmlContent.filter(str => str.trim()).map(updateCidLinks);
maildata.text = textContent.filter(str => str.trim()).map(updateCidLinks).join('\n').trim();
2017-05-15 21:09:08 +08:00
return maildata;
2017-04-17 20:58:46 +08:00
}
/**
* Stores attachments to GridStore
*/
2017-05-15 21:09:08 +08:00
storeNodeBodies(messageId, maildata, mimeTree, callback) {
2017-04-17 20:58:46 +08:00
let pos = 0;
2017-05-15 21:09:08 +08:00
let nodes = maildata.nodes;
2017-04-17 20:58:46 +08:00
let storeNode = () => {
if (pos >= nodes.length) {
2017-05-15 21:09:08 +08:00
// replace attachment IDs with ObjectIDs in the mimeTree
let walk = (node, next) => {
if (node.attachmentId && maildata.map[node.attachmentId]) {
node.attachmentId = maildata.map[node.attachmentId];
}
if (Array.isArray(node.childNodes)) {
let pos = 0;
let processChildNodes = () => {
if (pos >= node.childNodes.length) {
return next();
}
let childNode = node.childNodes[pos++];
walk(childNode, () => processChildNodes());
};
processChildNodes();
} else {
next();
}
};
return walk(mimeTree, () => callback(null, true));
2017-04-17 20:58:46 +08:00
}
2017-05-15 21:09:08 +08:00
let node = nodes[pos++];
2017-04-17 20:58:46 +08:00
2017-05-15 21:09:08 +08:00
let hash = crypto.createHash('sha256').update(node.body).digest('hex');
this.database.collection('attachments.files').findOneAndUpdate({
'metadata.h': hash
}, {
$inc: {
'metadata.c': 1,
'metadata.m': maildata.magic
}
}, {
returnOriginal: false
}, (err, result) => {
if (err) {
return callback(err);
2017-04-17 20:58:46 +08:00
}
2017-05-15 21:09:08 +08:00
if (result && result.value) {
maildata.map[node.attachmentId] = result.value._id;
return storeNode();
}
2017-05-15 21:09:08 +08:00
let returned = false;
node.options.metadata.h = hash;
let store = this.gridstore.openUploadStreamWithId(maildata.map[node.attachmentId], null, node.options);
store.once('error', err => {
if (returned) {
return;
}
returned = true;
callback(err);
});
store.once('finish', () => {
if (returned) {
return;
}
returned = true;
return storeNode();
});
store.end(node.body);
});
2017-04-17 20:58:46 +08:00
};
2017-04-17 20:58:46 +08:00
storeNode();
}
expectedB64Size(b64size) {
b64size = Number(b64size) || 0;
if (!b64size || b64size <= 0) {
return 0;
}
let newlines = Math.floor(b64size / 78);
return Math.ceil((b64size - newlines * 2) / 4 * 3);
}
2017-03-06 05:45:50 +08:00
/**
* Generates IMAP compatible BODY object from message tree
*
* @param {Object} mimeTree Parsed mimeTree object
* @return {Array} BODY object as a structured Array
*/
getBody(mimeTree) {
// BODY BODYSTRUCTURE without extension data
let body = new BodyStructure(mimeTree, {
upperCaseKeys: true,
body: true
});
return body.create();
}
/**
* Generates IMAP compatible BODYSTRUCUTRE object from message tree
*
* @param {Object} mimeTree Parsed mimeTree object
* @return {Array} BODYSTRUCTURE object as a structured Array
*/
getBodyStructure(mimeTree) {
// full BODYSTRUCTURE
let bodystructure = new BodyStructure(mimeTree, {
upperCaseKeys: true,
skipContentLocation: false
});
return bodystructure.create();
}
/**
* Generates IMAP compatible ENVELOPE object from message headers
*
* @param {Object} mimeTree Parsed mimeTree object
* @return {Array} ENVELOPE object as a structured Array
*/
getEnvelope(mimeTree) {
return createEnvelope(mimeTree.parsedHeader || {});
}
/**
* Resolves numeric path to a node in the parsed MIME tree
*
* @param {Object} mimeTree Parsed mimeTree object
* @param {String} path Dot-separated numeric path
* @return {Object} Mime node
*/
resolveContentNode(mimeTree, path) {
if (!mimeTree.childNodes && path === '1') {
path = '';
}
let pathNumbers = (path || '').toString().split('.');
let contentNode = mimeTree;
let pathNumber;
while ((pathNumber = pathNumbers.shift())) {
pathNumber = Number(pathNumber) - 1;
if (contentNode.message) {
// redirect to message/rfc822
contentNode = contentNode.message;
}
if (contentNode.childNodes && contentNode.childNodes[pathNumber]) {
contentNode = contentNode.childNodes[pathNumber];
} else {
return false;
}
}
return contentNode;
}
bodyQuery(mimeTree, selector, callback) {
let data = this.getContents(mimeTree, selector);
if (data && data.type === 'stream') {
let sent = false;
let buffers = [];
let buflen = 0;
data.value.on('readable', () => {
let buf;
while ((buf = data.value.read())) {
buffers.push(buf);
buflen += buf.length;
}
});
data.value.on('error', err => {
if (sent) {
return;
}
sent = true;
return callback(err);
});
data.value.on('end', () => {
if (sent) {
return;
}
sent = true;
return callback(null, Buffer.concat(buffers, buflen));
});
} else {
2017-03-20 22:45:33 +08:00
return setImmediate(() => callback(null, Buffer.from((data || '').toString(), 'binary')));
2017-03-06 05:45:50 +08:00
}
}
/**
* Get node contents
*
* *selector* is an object with the following properties:
* * *path* numeric path 1.2.3
* * *type* - one of content|header|header.fields|header.fields.not|text|mime
* * *headers* - an array of headers to include/exclude
*
* @param {Object} mimeTree Parsed mimeTree object
* @param {Object} selector What data to return
* @param {Boolean} skipExternal If true, do not include the external nodes
2017-03-06 05:45:50 +08:00
* @return {String} node contents
*/
getContents(mimeTree, selector, skipExternal) {
2017-03-06 05:45:50 +08:00
let node = mimeTree;
if (typeof selector === 'string') {
selector = {
type: selector
};
}
selector = selector || {
type: ''
};
if (selector.path) {
node = this.resolveContentNode(mimeTree, selector.path);
}
if (!node) {
return '';
}
switch (selector.type) {
case '':
case 'content':
if (!selector.path) {
// BODY[]
return this.rebuild(node, false, skipExternal);
2017-03-06 05:45:50 +08:00
}
// BODY[1.2.3]
return this.rebuild(node, true, skipExternal);
2017-03-06 05:45:50 +08:00
case 'header':
if (!selector.path) {
// BODY[HEADER] mail header
return formatHeaders(node.header).join('\r\n') + '\r\n\r\n';
2017-03-06 05:45:50 +08:00
} else if (node.message) {
// BODY[1.2.3.HEADER] embedded message/rfc822 header
return (node.message.header || []).join('\r\n') + '\r\n\r\n';
}
return '';
case 'header.fields':
// BODY[HEADER.FIELDS.NOT (Key1 Key2 KeyN)] only selected header keys
if (!selector.headers || !selector.headers.length) {
return '\r\n\r\n';
}
return formatHeaders(node.header).filter(line => {
2017-03-06 05:45:50 +08:00
let key = line.split(':').shift().toLowerCase().trim();
return selector.headers.indexOf(key) >= 0;
}).join('\r\n') + '\r\n\r\n';
case 'header.fields.not':
// BODY[HEADER.FIELDS.NOT (Key1 Key2 KeyN)] all but selected header keys
if (!selector.headers || !selector.headers.length) {
return formatHeaders(node.header).join('\r\n') + '\r\n\r\n';
2017-03-06 05:45:50 +08:00
}
return formatHeaders(node.header).filter(line => {
2017-03-06 05:45:50 +08:00
let key = line.split(':').shift().toLowerCase().trim();
return selector.headers.indexOf(key) < 0;
}).join('\r\n') + '\r\n\r\n';
case 'mime':
// BODY[1.2.3.MIME] mime node header
return formatHeaders(node.header).join('\r\n') + '\r\n\r\n';
2017-03-06 05:45:50 +08:00
case 'text':
if (!selector.path) {
// BODY[TEXT] mail body without headers
return this.rebuild(node, true, skipExternal);
2017-03-06 05:45:50 +08:00
} else if (node.message) {
// BODY[1.2.3.TEXT] embedded message/rfc822 body without headers
return this.rebuild(node.message, true, skipExternal);
2017-03-06 05:45:50 +08:00
}
return '';
default:
return '';
}
}
}
function formatHeaders(headers) {
2017-03-06 05:45:50 +08:00
headers = headers || [];
if (!Array.isArray(headers)) {
headers = [].concat(headers || []);
}
return headers;
2017-03-06 05:45:50 +08:00
}
2017-05-07 22:21:44 +08:00
function textToHtml(str) {
let text = '<p>' + he.
// encode special chars
encode(
str, {
useNamedReferences: true
}).
replace(/\r?\n/g, '\n').trim(). // normalize line endings
replace(/[ \t]+$/mg, '').trim(). // trim empty line endings
replace(/\n\n+/g, '</p><p>').trim(). // insert <p> to multiple linebreaks
replace(/\n/g, '<br/>') + // insert <br> to single linebreaks
'</p>';
return text;
}
2017-05-15 21:09:08 +08:00
function leftPad(val, chr, len) {
return chr.repeat(len - val.toString().length) + val;
}
2017-03-06 05:45:50 +08:00
module.exports = Indexer;