[local-sync] decode quoted-printable encoded attachments

Summary:
Needed to stream process quoted-printable attachments
Fixes T7530

Test Plan: manual

Reviewers: juan, spang

Reviewed By: spang

Maniphest Tasks: T7530

Differential Revision: https://phab.nylas.com/D3690
This commit is contained in:
Evan Morikawa 2017-01-15 15:12:18 -08:00
parent e2b317d09d
commit a7bd1d66b7
4 changed files with 62 additions and 7 deletions

View file

@ -29,6 +29,7 @@ function collectFilesFromStruct({db, messageValues, struct, fileIds = new Set()}
id: fileId,
size: part.size,
partId: partId,
charset: part.params ? part.params.charset : null,
encoding: part.encoding,
filename: filename,
messageId: messageValues.id,

View file

@ -1,5 +1,6 @@
const base64 = require('base64-stream');
const {IMAPConnection} = require('isomorphic-core')
const {QuotedPrintableStreamDecoder} = require('../shared/stream-decoders')
module.exports = (sequelize, Sequelize) => {
return sequelize.define('file', {
@ -7,7 +8,8 @@ module.exports = (sequelize, Sequelize) => {
size: Sequelize.INTEGER,
partId: Sequelize.STRING,
version: Sequelize.INTEGER,
encoding: Sequelize.INTEGER,
charset: Sequelize.STRING,
encoding: Sequelize.STRING,
filename: Sequelize.STRING(500),
messageId: { type: Sequelize.STRING, allowNull: false },
accountId: { type: Sequelize.STRING, allowNull: false },
@ -37,7 +39,18 @@ module.exports = (sequelize, Sequelize) => {
if (!stream) {
throw new Error(`Unable to fetch binary data for File ${this.id}`)
}
return stream.pipe(base64.decode());
if (/quoted-printable/i.test(this.encoding)) {
return stream.pipe(new QuotedPrintableStreamDecoder({charset: this.charset}))
} else if (/base64/i.test(this.encoding)) {
return stream.pipe(base64.decode());
}
// If there is no encoding, or the encoding is something like
// '7bit', '8bit', or 'binary', just return the raw stream. This
// stream will be written directly to disk. It's then up to the
// user's computer to decide how to interpret the bytes we've
// dumped to disk.
return stream
} catch (err) {
connection.end();
throw err

View file

@ -189,13 +189,21 @@ function bodyFromParts(imapMessage, desiredParts) {
// see https://www.w3.org/Protocols/rfc1341/5_Content-Transfer-Encoding.html
if (!transferEncoding || new Set(['7bit', '8bit', 'binary']).has(transferEncoding.toLowerCase())) {
// NO transfer encoding has been performed --- how to decode to a string
// depends ONLY on the charset, which defaults to 'ascii' according to
// https://tools.ietf.org/html/rfc2045#section-5.2
decoded = encoding.convert(imapMessage.parts[id], 'utf-8', charset || 'ascii').toString('utf-8');
// depends ONLY on the charset,
//
// According to https://tools.ietf.org/html/rfc2045#section-5.2,
// this should default to ascii; however, if we don't get a charset,
// it's possible clients (like nodemailer) encoded the data as utf-8
// anyway. Since ascii is a strict subset of utf-8, it's safer to
// try and decode as utf-8 if we don't have the charset.
//
// This applies to decoding quoted-printable and base64 as well. The
// mimelib library, if charset is null, will default to utf-8
decoded = encoding.convert(imapMessage.parts[id], 'utf-8', charset).toString('utf-8');
} else if (transferEncoding.toLowerCase() === 'quoted-printable') {
decoded = mimelib.decodeQuotedPrintable(imapMessage.parts[id], charset || 'ascii');
decoded = mimelib.decodeQuotedPrintable(imapMessage.parts[id], charset);
} else if (transferEncoding.toLowerCase() === 'base64') {
decoded = mimelib.decodeBase64(imapMessage.parts[id], charset || 'ascii');
decoded = mimelib.decodeBase64(imapMessage.parts[id], charset);
} else {
// custom x-token content-transfer-encodings
return Promise.reject(new Error(`Unsupported Content-Transfer-Encoding ${transferEncoding}, mimetype ${mimeType}`))

View file

@ -0,0 +1,33 @@
import {Transform} from 'stream'
const mimelib = require('mimelib');
export class QuotedPrintableStreamDecoder extends Transform {
constructor(opts = {}) {
super(opts);
this.charset = opts.charset
this._text = "";
}
/**
* Overrides Transform::_transfor
*
* We can't decode quoted-printable in chunks, so we buffer it.
*/
_transform = (chunk, encoding, cb) => {
this._text += chunk.toString();
cb();
}
/**
* Overrides Transform::_flush
*
* At the end of the stream, decode the whole buffer at once and flush
* it out the end.
*/
_flush = (cb) => {
// If this.charset is null (a very common case for attachments),
// mimelib defaults to utf-8 as the charset.
this.push(mimelib.decodeQuotedPrintable(this._text, this.charset));
cb();
}
}