First working version of sharded messages

This commit is contained in:
Andris Reinman 2017-07-13 23:35:59 +03:00
parent 3f82ba0be6
commit 0752e712a7
4 changed files with 31 additions and 9 deletions

View file

@ -6,6 +6,8 @@ Wild Duck is a distributed IMAP/POP3 server built with Node.js, MongoDB and Redi
> **NB!** Wild Duck is currently in **beta**. Use it on your own responsibility.
*Distributed* means that Wild Duck uses a distributed database as a backend for storing emails. Wild Duck instances are also stateless, you can have multiple instances running and a client can connect to any of these. Wild Duck uses a write ahead log to keep different IMAP sessions in sync between different instances.
## Usage
Assuming you have MongoDB and Redis running somewhere.
@ -444,7 +446,14 @@ Shard the following collections by these keys:
* Collection: `messages`, key: `user` (by hash?)
* Collection: `attachment.files`, key: `_id` (by hash)
* Collection: `attachment.chunks`, key: `file_id` (by hash)
* Collection: `attachment.chunks`, key: `files_id` (by hash)
```javascript
sh.enableSharding('wildduck');
sh.shardCollection('wildduck.messages', { user: 'hashed' });
sh.shardCollection('wildduck.attachments.files', { 'metadata.h': 'hashed' });
sh.shardCollection('wildduck.attachments.chunks', { files_id: 'hashed' });
```
## IMAP Protocol Differences

View file

@ -23,7 +23,7 @@ const setupIndexes = yaml.safeLoad(fs.readFileSync(__dirname + '/indexes.yaml',
const BULK_BATCH_SIZE = 150;
// how often to clear expired messages
const GC_INTERVAL = 10 * 60 * 1000;
const GC_INTERVAL = 0.1 * 60 * 1000;
// artificail delay between deleting next expired message in ms
const GC_DELAY_DELETE = 100;

View file

@ -55,6 +55,13 @@ indexes:
# key should be 'user' so keep this field as the first one
# in indexes
- collection: messages
index:
# hashed index needed for sharding
name: messages_shard
key:
user: hashed
- collection: messages
index:
name: mailbox_by_id
@ -215,13 +222,19 @@ indexes:
index:
name: attachment_hash
key:
metadata.h: 1
metadata.h: hashed
- collection: attachments.files
index:
name: related_attachments
key:
metadata.c: 1
metadata.m: 1
- collection: attachments.chunks
index:
# hashed index needed for sharding
name: chunks_shard
key:
files_id: hashed
# Indexes for the journal collection

View file

@ -762,9 +762,9 @@ class MessageHandler {
let envelope = this.indexer.getEnvelope(mimeTree);
let idate = (options.date && parseDate(options.date)) || new Date();
let hdate = (mimeTree.parsedHeader.date && parseDate(mimeTree.parsedHeader.date, idate)) || false;
let hdate = (mimeTree.parsedHeader.date && parseDate([].concat(mimeTree.parsedHeader.date || []).pop() || '', idate)) || false;
let subject = (mimeTree.parsedHeader.subject || '').trim();
let subject = ([].concat(mimeTree.parsedHeader.subject || []).pop() || '').trim();
try {
subject = libmime.decodeWords(subject);
} catch (E) {
@ -801,10 +801,10 @@ class MessageHandler {
getThreadId(userId, subject, mimeTree, callback) {
let referenceIds = new Set(
[
mimeTree.parsedHeader['message-id'] || '',
mimeTree.parsedHeader['in-reply-to'] || '',
(mimeTree.parsedHeader['thread-index'] || '').substr(0, 22),
mimeTree.parsedHeader.references || ''
[].concat(mimeTree.parsedHeader['message-id'] || []).pop() || '',
[].concat(mimeTree.parsedHeader['in-reply-to'] || []).pop() || '',
([].concat(mimeTree.parsedHeader['thread-index'] || []).pop() || '').substr(0, 22),
[].concat(mimeTree.parsedHeader.references || []).pop() || ''
]
.join(' ')
.split(/\s+/)