From 1657673c025dbba527d0702441c44415c369ef8c Mon Sep 17 00:00:00 2001 From: Andris Reinman Date: Wed, 9 Aug 2017 11:11:38 +0300 Subject: [PATCH] use sha256 hash as the _id for attachments --- README.md | 1 + indexes.yaml | 6 -- lib/attachments/gridstore-storage.js | 127 +++++++++++++++------------ 3 files changed, 72 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 1edaf4d4..ed2b3977 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,7 @@ Shard the following collections by these keys: sh.enableSharding('wildduck'); sh.shardCollection('wildduck.messages', { mailbox: 1, uid: 1 }); sh.shardCollection('wildduck.threads', { user: 'hashed' }); +// attachment _id is a sha256 hash of attachment contents sh.shardCollection('wildduck.attachments.files', { _id: 'hashed' }); sh.shardCollection('wildduck.attachments.chunks', { files_id: 'hashed' }); ``` diff --git a/indexes.yaml b/indexes.yaml index b6142ab7..621d3590 100644 --- a/indexes.yaml +++ b/indexes.yaml @@ -270,12 +270,6 @@ indexes: name: attachment_id_hashed key: _id: hashed -- collection: attachments.files - type: gridfs # index applies to gridfs database - index: - name: attachment_hash - key: - metadata.h: hashed - collection: attachments.files type: gridfs # index applies to gridfs database index: diff --git a/lib/attachments/gridstore-storage.js b/lib/attachments/gridstore-storage.js index 03aeb778..fa8e1bcf 100644 --- a/lib/attachments/gridstore-storage.js +++ b/lib/attachments/gridstore-storage.js @@ -1,6 +1,5 @@ 'use strict'; -const ObjectID = require('mongodb').ObjectID; const GridFSBucket = require('mongodb').GridFSBucket; class GridstoreStorage { @@ -28,69 +27,85 @@ class GridstoreStorage { transferEncoding: attachmentData.metadata.transferEncoding, length: attachmentData.length, count: attachmentData.metadata.c, - hash: attachmentData.metadata.h, + hash: attachmentData._id, metadata: attachmentData.metadata }); }); } create(attachment, hash, callback) { - this.gridfs.collection(this.bucketName + '.files').findOneAndUpdate({ - 'metadata.h': hash - }, { - $inc: { - 'metadata.c': 1, - 'metadata.m': attachment.magic + hash = Buffer.from(hash, 'hex'); + + let returned = false; + let retried = false; + + let id = hash; + let metadata = { + m: attachment.magic, + c: 1, + transferEncoding: attachment.transferEncoding + }; + + Object.keys(attachment.metadata || {}).forEach(key => { + if (!(key in attachment.metadata)) { + metadata[key] = attachment.metadata[key]; } - }, { - returnOriginal: false - }, (err, result) => { - if (err) { - return callback(err); - } - - if (result && result.value) { - return callback(null, result.value._id); - } - - let returned = false; - - let id = new ObjectID(); - let metadata = { - h: hash, - m: attachment.magic, - c: 1, - transferEncoding: attachment.transferEncoding - }; - Object.keys(attachment.metadata || {}).forEach(key => { - if (!(key in attachment.metadata)) { - metadata[key] = attachment.metadata[key]; - } - }); - - let store = this.gridstore.openUploadStreamWithId(id, null, { - contentType: attachment.contentType, - metadata - }); - - store.once('error', err => { - if (returned) { - return; - } - returned = true; - callback(err); - }); - - store.once('finish', () => { - if (returned) { - return; - } - returned = true; - return callback(null, id); - }); - - store.end(attachment.body); }); + + let tryStore = () => { + this.gridfs.collection(this.bucketName + '.files').findOneAndUpdate({ + _id: hash + }, { + $inc: { + 'metadata.c': 1, + 'metadata.m': attachment.magic + } + }, { + returnOriginal: false + }, (err, result) => { + if (err) { + return callback(err); + } + + if (result && result.value) { + // already exists + return callback(null, result.value._id); + } + + // try to insert it + let store = this.gridstore.openUploadStreamWithId(id, null, { + contentType: attachment.contentType, + metadata + }); + + store.once('error', err => { + if (returned) { + return; + } + if (err.code === 11000) { + // most probably a race condition, try again + if (!retried) { + retried = true; + return setTimeout(tryStore, 10); + } + } + returned = true; + callback(err); + }); + + store.once('finish', () => { + if (returned) { + return; + } + returned = true; + return callback(null, id); + }); + + store.end(attachment.body); + }); + }; + + tryStore(); } createReadStream(id) {