2016-06-23 15:49:22 +08:00
|
|
|
const os = require('os');
|
|
|
|
const SyncWorker = require('./sync-worker');
|
2016-06-24 06:52:45 +08:00
|
|
|
const {DatabaseConnector, PubsubConnector, SchedulerUtils} = require(`nylas-core`)
|
2016-06-23 15:49:22 +08:00
|
|
|
|
|
|
|
const IDENTITY = `${os.hostname()}-${process.pid}`;
|
|
|
|
|
2016-06-24 06:52:45 +08:00
|
|
|
const {
|
|
|
|
ACCOUNTS_FOR,
|
|
|
|
ACCOUNTS_UNCLAIMED,
|
|
|
|
ACCOUNTS_CLAIMED_PREFIX,
|
|
|
|
HEARTBEAT_FOR,
|
|
|
|
HEARTBEAT_EXPIRES,
|
|
|
|
forEachAccountList,
|
|
|
|
} = SchedulerUtils;
|
2016-06-23 15:49:22 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
Accounts ALWAYS exist in either `accounts:unclaimed` or an `accounts:{id}` list.
|
|
|
|
They are atomically moved between these sets as they are claimed and returned.
|
|
|
|
|
|
|
|
Periodically, each worker in the pool looks at all the `accounts:{id}` lists.
|
|
|
|
For each list it finds, it checks for the existence of `heartbeat:{id}`, a key
|
|
|
|
that expires quickly if the sync process doesn't refresh it.
|
|
|
|
|
|
|
|
If it does not find the key, it moves all of the accounts in the list back to
|
|
|
|
the unclaimed key.
|
2016-06-24 03:02:57 +08:00
|
|
|
|
|
|
|
Sync processes only claim an account for a fixed period of time. This means that
|
|
|
|
an engineer can add new sync machines to the pool and the load across instances
|
|
|
|
will balance on it's own. It also means one bad instance will not permanently
|
|
|
|
disrupt sync for any accounts. (Eg: instance has faulty network connection.)
|
|
|
|
|
|
|
|
Sync processes periodically claim accounts when they can find them, regardless
|
|
|
|
of how busy they are. A separate API (`/routes/monitoring`) allows CloudWatch
|
|
|
|
to decide whether to spin up instances or take them offline based on CPU/RAM
|
|
|
|
utilization across the pool.
|
2016-06-23 15:49:22 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
class SyncProcessManager {
|
|
|
|
constructor() {
|
|
|
|
this._workers = {};
|
|
|
|
this._listenForSyncsClient = null;
|
|
|
|
this._exiting = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
start() {
|
2016-07-09 08:13:30 +08:00
|
|
|
global.Logger.info(`ProcessManager: Starting with ID ${IDENTITY}`)
|
2016-06-23 15:49:22 +08:00
|
|
|
|
|
|
|
this.unassignAccountsAssignedTo(IDENTITY).then(() => {
|
|
|
|
this.unassignAccountsMissingHeartbeats();
|
|
|
|
this.update();
|
|
|
|
});
|
|
|
|
|
|
|
|
setInterval(() => this.updateHeartbeat(), HEARTBEAT_EXPIRES / 5.0 * 1000);
|
|
|
|
this.updateHeartbeat();
|
|
|
|
|
|
|
|
process.on('SIGINT', () => this.onSigInt());
|
2016-06-30 05:18:43 +08:00
|
|
|
process.on('SIGTERM', () => this.onSigInt());
|
2016-06-23 15:49:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
updateHeartbeat() {
|
|
|
|
const key = HEARTBEAT_FOR(IDENTITY);
|
|
|
|
const client = PubsubConnector.broadcastClient();
|
|
|
|
client.setAsync(key, Date.now()).then(() =>
|
|
|
|
client.expireAsync(key, HEARTBEAT_EXPIRES)
|
|
|
|
).then(() =>
|
2016-07-09 08:13:30 +08:00
|
|
|
global.Logger.info("ProcessManager: 💘")
|
2016-06-23 15:49:22 +08:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
onSigInt() {
|
2016-07-09 08:13:30 +08:00
|
|
|
global.Logger.info(`ProcessManager: Exiting...`)
|
2016-06-23 15:49:22 +08:00
|
|
|
this._exiting = true;
|
|
|
|
|
|
|
|
this.unassignAccountsAssignedTo(IDENTITY).then(() =>
|
|
|
|
PubsubConnector.broadcastClient().delAsync(ACCOUNTS_FOR(IDENTITY)).then(() =>
|
|
|
|
PubsubConnector.broadcastClient().delAsync(HEARTBEAT_FOR(IDENTITY))
|
|
|
|
)
|
|
|
|
).finally(() => {
|
|
|
|
process.exit(1);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
ensureAccountIDsInRedis(accountIds) {
|
|
|
|
const client = PubsubConnector.broadcastClient();
|
|
|
|
|
|
|
|
let unseenIds = [].concat(accountIds);
|
|
|
|
|
2016-07-09 08:13:30 +08:00
|
|
|
global.Logger.info("ProcessManager: Starting scan for accountIds in database that are not present in Redis.")
|
2016-06-24 03:02:57 +08:00
|
|
|
|
2016-06-24 06:52:45 +08:00
|
|
|
return forEachAccountList((foundProcessIdentity, foundIds) => {
|
|
|
|
unseenIds = unseenIds.filter((a) => !foundIds.includes(`${a}`))
|
|
|
|
})
|
|
|
|
.finally(() => {
|
2016-06-23 15:49:22 +08:00
|
|
|
if (unseenIds.length === 0) {
|
|
|
|
return;
|
|
|
|
}
|
2016-07-09 08:13:30 +08:00
|
|
|
global.Logger.info(`ProcessManager: Adding account IDs ${unseenIds.join(',')} to ${ACCOUNTS_UNCLAIMED}.`)
|
2016-06-23 15:49:22 +08:00
|
|
|
unseenIds.map((id) => client.lpushAsync(ACCOUNTS_UNCLAIMED, id));
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
unassignAccountsMissingHeartbeats() {
|
|
|
|
const client = PubsubConnector.broadcastClient();
|
|
|
|
|
2016-07-09 08:13:30 +08:00
|
|
|
global.Logger.info("ProcessManager: Starting unassignment for processes missing heartbeats.")
|
2016-06-23 15:49:22 +08:00
|
|
|
|
|
|
|
Promise.each(client.keysAsync(`${ACCOUNTS_CLAIMED_PREFIX}*`), (key) => {
|
|
|
|
const id = key.replace(ACCOUNTS_CLAIMED_PREFIX, '');
|
|
|
|
return client.existsAsync(HEARTBEAT_FOR(id)).then((exists) =>
|
|
|
|
(exists ? Promise.resolve() : this.unassignAccountsAssignedTo(id))
|
|
|
|
)
|
|
|
|
}).finally(() => {
|
|
|
|
const delay = HEARTBEAT_EXPIRES * 1000;
|
|
|
|
setTimeout(() => this.unassignAccountsMissingHeartbeats(), delay);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
unassignAccountsAssignedTo(identity) {
|
|
|
|
const src = ACCOUNTS_FOR(identity);
|
|
|
|
const dst = ACCOUNTS_UNCLAIMED;
|
|
|
|
|
|
|
|
const unassignOne = (count) =>
|
|
|
|
PubsubConnector.broadcastClient().rpoplpushAsync(src, dst).then((val) =>
|
|
|
|
(val ? unassignOne(count + 1) : Promise.resolve(count))
|
|
|
|
)
|
|
|
|
|
|
|
|
return unassignOne(0).then((returned) => {
|
2016-07-09 08:13:30 +08:00
|
|
|
global.Logger.info(`ProcessManager: Returned ${returned} accounts assigned to ${identity}.`)
|
2016-06-23 15:49:22 +08:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
update() {
|
2016-07-09 08:13:30 +08:00
|
|
|
global.Logger.info(`ProcessManager: Searching for an unclaimed account to sync.`)
|
2016-06-23 15:49:22 +08:00
|
|
|
|
2016-06-24 03:02:57 +08:00
|
|
|
this.acceptUnclaimedAccount().finally(() => {
|
|
|
|
if (this._exiting) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
this.update();
|
|
|
|
});
|
2016-06-23 15:49:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
acceptUnclaimedAccount() {
|
|
|
|
if (!this._waitForAccountClient) {
|
|
|
|
this._waitForAccountClient = PubsubConnector.buildClient();
|
|
|
|
}
|
|
|
|
|
|
|
|
const src = ACCOUNTS_UNCLAIMED;
|
|
|
|
const dst = ACCOUNTS_FOR(IDENTITY);
|
|
|
|
|
2016-06-24 02:45:24 +08:00
|
|
|
return this._waitForAccountClient.brpoplpushAsync(src, dst, 10000).then((accountId) => {
|
2016-07-02 06:41:22 +08:00
|
|
|
if (!accountId) {
|
|
|
|
return Promise.resolve();
|
|
|
|
}
|
2016-06-24 02:45:24 +08:00
|
|
|
this.addWorkerForAccountId(accountId);
|
2016-07-02 06:41:22 +08:00
|
|
|
|
|
|
|
// If we've added an account, wait a second before asking for another one.
|
|
|
|
// Spacing them out is probably healthy.
|
|
|
|
return Promise.delay(2000);
|
2016-06-23 15:49:22 +08:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
addWorkerForAccountId(accountId) {
|
|
|
|
DatabaseConnector.forShared().then(({Account}) => {
|
|
|
|
Account.find({where: {id: accountId}}).then((account) => {
|
2016-06-25 07:46:38 +08:00
|
|
|
if (!account || this._workers[account.id]) {
|
2016-06-23 15:49:22 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
DatabaseConnector.forAccount(account.id).then((db) => {
|
2016-06-28 07:05:31 +08:00
|
|
|
if (this._exiting || this._workers[account.id]) {
|
2016-06-24 02:45:24 +08:00
|
|
|
return;
|
|
|
|
}
|
2016-07-09 08:13:30 +08:00
|
|
|
global.Logger.info(`ProcessManager: Starting worker for Account ${accountId}`)
|
2016-07-02 06:41:22 +08:00
|
|
|
this._workers[account.id] = new SyncWorker(account, db, () => {
|
|
|
|
this.removeWorkerForAccountId(accountId)
|
|
|
|
});
|
2016-06-23 15:49:22 +08:00
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
2016-06-24 02:45:24 +08:00
|
|
|
|
2016-07-02 06:41:22 +08:00
|
|
|
removeWorkerForAccountId(accountId) {
|
2016-06-24 02:45:24 +08:00
|
|
|
const src = ACCOUNTS_FOR(IDENTITY);
|
|
|
|
const dst = ACCOUNTS_UNCLAIMED;
|
|
|
|
|
2016-07-02 06:41:22 +08:00
|
|
|
return PubsubConnector.broadcastClient().lremAsync(src, 1, accountId).then((didRemove) => {
|
|
|
|
if (didRemove) {
|
|
|
|
PubsubConnector.broadcastClient().rpushAsync(dst, accountId)
|
|
|
|
} else {
|
|
|
|
throw new Error("Wanted to return item to pool, but didn't have claim on it.")
|
2016-06-24 02:45:24 +08:00
|
|
|
}
|
|
|
|
this._workers[accountId] = null;
|
|
|
|
});
|
|
|
|
}
|
2016-06-23 15:49:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = SyncProcessManager;
|