diff --git a/packages/local-sync/src/message-processor/extract-files.js b/packages/local-sync/src/message-processor/extract-files.js index acca3b8bc..728a136a9 100644 --- a/packages/local-sync/src/message-processor/extract-files.js +++ b/packages/local-sync/src/message-processor/extract-files.js @@ -29,6 +29,7 @@ function collectFilesFromStruct({db, messageValues, struct, fileIds = new Set()} id: fileId, size: part.size, partId: partId, + charset: part.params ? part.params.charset : null, encoding: part.encoding, filename: filename, messageId: messageValues.id, diff --git a/packages/local-sync/src/models/file.js b/packages/local-sync/src/models/file.js index 4cc63e38a..f146dc35c 100644 --- a/packages/local-sync/src/models/file.js +++ b/packages/local-sync/src/models/file.js @@ -1,5 +1,6 @@ const base64 = require('base64-stream'); const {IMAPConnection} = require('isomorphic-core') +const {QuotedPrintableStreamDecoder} = require('../shared/stream-decoders') module.exports = (sequelize, Sequelize) => { return sequelize.define('file', { @@ -7,7 +8,8 @@ module.exports = (sequelize, Sequelize) => { size: Sequelize.INTEGER, partId: Sequelize.STRING, version: Sequelize.INTEGER, - encoding: Sequelize.INTEGER, + charset: Sequelize.STRING, + encoding: Sequelize.STRING, filename: Sequelize.STRING(500), messageId: { type: Sequelize.STRING, allowNull: false }, accountId: { type: Sequelize.STRING, allowNull: false }, @@ -37,7 +39,18 @@ module.exports = (sequelize, Sequelize) => { if (!stream) { throw new Error(`Unable to fetch binary data for File ${this.id}`) } - return stream.pipe(base64.decode()); + if (/quoted-printable/i.test(this.encoding)) { + return stream.pipe(new QuotedPrintableStreamDecoder({charset: this.charset})) + } else if (/base64/i.test(this.encoding)) { + return stream.pipe(base64.decode()); + } + + // If there is no encoding, or the encoding is something like + // '7bit', '8bit', or 'binary', just return the raw stream. This + // stream will be written directly to disk. It's then up to the + // user's computer to decide how to interpret the bytes we've + // dumped to disk. + return stream } catch (err) { connection.end(); throw err diff --git a/packages/local-sync/src/shared/message-factory.js b/packages/local-sync/src/shared/message-factory.js index 07d8a7e03..175ce91de 100644 --- a/packages/local-sync/src/shared/message-factory.js +++ b/packages/local-sync/src/shared/message-factory.js @@ -189,13 +189,21 @@ function bodyFromParts(imapMessage, desiredParts) { // see https://www.w3.org/Protocols/rfc1341/5_Content-Transfer-Encoding.html if (!transferEncoding || new Set(['7bit', '8bit', 'binary']).has(transferEncoding.toLowerCase())) { // NO transfer encoding has been performed --- how to decode to a string - // depends ONLY on the charset, which defaults to 'ascii' according to - // https://tools.ietf.org/html/rfc2045#section-5.2 - decoded = encoding.convert(imapMessage.parts[id], 'utf-8', charset || 'ascii').toString('utf-8'); + // depends ONLY on the charset, + // + // According to https://tools.ietf.org/html/rfc2045#section-5.2, + // this should default to ascii; however, if we don't get a charset, + // it's possible clients (like nodemailer) encoded the data as utf-8 + // anyway. Since ascii is a strict subset of utf-8, it's safer to + // try and decode as utf-8 if we don't have the charset. + // + // This applies to decoding quoted-printable and base64 as well. The + // mimelib library, if charset is null, will default to utf-8 + decoded = encoding.convert(imapMessage.parts[id], 'utf-8', charset).toString('utf-8'); } else if (transferEncoding.toLowerCase() === 'quoted-printable') { - decoded = mimelib.decodeQuotedPrintable(imapMessage.parts[id], charset || 'ascii'); + decoded = mimelib.decodeQuotedPrintable(imapMessage.parts[id], charset); } else if (transferEncoding.toLowerCase() === 'base64') { - decoded = mimelib.decodeBase64(imapMessage.parts[id], charset || 'ascii'); + decoded = mimelib.decodeBase64(imapMessage.parts[id], charset); } else { // custom x-token content-transfer-encodings return Promise.reject(new Error(`Unsupported Content-Transfer-Encoding ${transferEncoding}, mimetype ${mimeType}`)) diff --git a/packages/local-sync/src/shared/stream-decoders.es6 b/packages/local-sync/src/shared/stream-decoders.es6 new file mode 100644 index 000000000..d1129e45d --- /dev/null +++ b/packages/local-sync/src/shared/stream-decoders.es6 @@ -0,0 +1,33 @@ +import {Transform} from 'stream' +const mimelib = require('mimelib'); + +export class QuotedPrintableStreamDecoder extends Transform { + constructor(opts = {}) { + super(opts); + this.charset = opts.charset + this._text = ""; + } + + /** + * Overrides Transform::_transfor + * + * We can't decode quoted-printable in chunks, so we buffer it. + */ + _transform = (chunk, encoding, cb) => { + this._text += chunk.toString(); + cb(); + } + + /** + * Overrides Transform::_flush + * + * At the end of the stream, decode the whole buffer at once and flush + * it out the end. + */ + _flush = (cb) => { + // If this.charset is null (a very common case for attachments), + // mimelib defaults to utf-8 as the charset. + this.push(mimelib.decodeQuotedPrintable(this._text, this.charset)); + cb(); + } +}