[client-sync] Detect when sync workers are stuck

Summary:
Periodically check the latest sync activity times to determine if a sync
worker has been inactive for too long. If it has been too long, discard
the worker and create a new one for that account. Part of T7681.

Test Plan: manual, specs

Reviewers: evan, mark, juan

Reviewed By: juan

Differential Revision: https://phab.nylas.com/D4262
This commit is contained in:
Halla Moore 2017-03-27 15:33:37 -07:00
parent b6887e386e
commit 619c69a522
2 changed files with 58 additions and 5 deletions

View file

@ -4,7 +4,14 @@ const {Actions, OnlineStatusStore, IdentityStore} = require('nylas-exports')
const SyncWorker = require('./sync-worker'); const SyncWorker = require('./sync-worker');
const LocalSyncDeltaEmitter = require('./local-sync-delta-emitter').default const LocalSyncDeltaEmitter = require('./local-sync-delta-emitter').default
const LocalDatabaseConnector = require('../shared/local-database-connector') const LocalDatabaseConnector = require('../shared/local-database-connector')
const SyncActivity = require('../shared/sync-activity').default
const MAX_WORKER_SILENCE_MS = Math.max(
SyncWorker.AC_SYNC_LOOP_INTERVAL_MS,
SyncWorker.BATTERY_SYNC_LOOP_INTERVAL_MS,
SyncWorker.MAX_SYNC_BACKOFF_MS,
)
const CHECK_HEALTH_TIME_INTERVAL = 1 * 60 * 1000
class SyncProcessManager { class SyncProcessManager {
constructor() { constructor() {
@ -24,6 +31,8 @@ class SyncProcessManager {
ipcRenderer.on('app-resumed-from-sleep', () => { ipcRenderer.on('app-resumed-from-sleep', () => {
this._wakeAllWorkers({reason: 'Computer resumed from sleep', interrupt: true}) this._wakeAllWorkers({reason: 'Computer resumed from sleep', interrupt: true})
}) })
this._checkHealthInterval = null;
} }
_onOnlineStatusChanged() { _onOnlineStatusChanged() {
@ -65,7 +74,7 @@ class SyncProcessManager {
) )
.timeout(500, 'Timed out while trying to stop sync') .timeout(500, 'Timed out while trying to stop sync')
} catch (err) { } catch (err) {
console.warn('SyncProcessManager._resetEmailCache: Error while stopping sync', err) global.Logger.warn('SyncProcessManager._resetEmailCache: Error while stopping sync', err)
} }
const accountIds = Object.keys(this._workersByAccountId) const accountIds = Object.keys(this._workersByAccountId)
for (const accountId of accountIds) { for (const accountId of accountIds) {
@ -82,6 +91,35 @@ class SyncProcessManager {
} }
} }
_checkHealthByAccountId = async (accountId) => {
const {time, activity} = SyncActivity.getLastSyncActivityForAccount(accountId);
if (time < Date.now() - this.MAX_WORKER_SILENCE_MS) {
const duration = Date.now() - time;
NylasEnv.reportError(new Error("SyncProcessManager: Detected stuck sync process"), {
rateLimit: {
ratePerHour: 30,
key: `SyncProcessManager:StuckProcess`,
},
})
Actions.recordUserEvent('Stuck Sync Process', {
accountId: accountId,
lastActivityTime: time,
lastActivity: activity,
duration,
})
global.Logger.log(`SyncProcessManager: Detected stuck worker for account ${accountId}`, activity, time)
await this.removeWorkerForAccountId(accountId)
const {Account} = await LocalDatabaseConnector.forShared();
const account = await Account.findById(accountId)
await this.addWorkerForAccount(account)
}
}
_checkHealth = async () => {
return Promise.all(Object.keys(this._workersByAccountId).map(this._checkHealthByAccountId))
}
/** /**
* Useful for debugging. * Useful for debugging.
*/ */
@ -94,8 +132,10 @@ class SyncProcessManager {
const {Account} = await LocalDatabaseConnector.forShared(); const {Account} = await LocalDatabaseConnector.forShared();
const accounts = await Account.findAll(); const accounts = await Account.findAll();
for (const account of accounts) { await Promise.all(accounts.map(this.addWorkerForAccount));
this.addWorkerForAccount(account);
if (!this._checkHealthInterval) {
this._checkHealthInterval = setInterval(this._checkHealth, this.CHECK_HEALTH_TIME_INTERVAL)
} }
} }
@ -114,7 +154,7 @@ class SyncProcessManager {
} }
} }
async addWorkerForAccount(account) { addWorkerForAccount = async (account) => {
await LocalDatabaseConnector.ensureAccountDatabase(account.id); await LocalDatabaseConnector.ensureAccountDatabase(account.id);
const logger = global.Logger.forAccount(account) const logger = global.Logger.forAccount(account)
@ -137,7 +177,13 @@ class SyncProcessManager {
async removeWorkerForAccountId(accountId) { async removeWorkerForAccountId(accountId) {
if (this._workersByAccountId[accountId]) { if (this._workersByAccountId[accountId]) {
await this._workersByAccountId[accountId].cleanup(); try {
await this._workersByAccountId[accountId].cleanup().timeout(500)
} catch (err) {
err.message = `Error while cleaning up sync worker: ${err.message}`
NylasEnv.reportError(err)
// Continue with local cleanup
}
this._workersByAccountId[accountId] = null; this._workersByAccountId[accountId] = null;
} }
@ -149,4 +195,7 @@ class SyncProcessManager {
} }
window.$n.SyncProcessManager = new SyncProcessManager(); window.$n.SyncProcessManager = new SyncProcessManager();
window.$n.SyncProcessManager.MAX_WORKER_SILENCE_MS = MAX_WORKER_SILENCE_MS
window.$n.SyncProcessManager.CHECK_HEALTH_TIME_INTERVAL = CHECK_HEALTH_TIME_INTERVAL
module.exports = window.$n.SyncProcessManager module.exports = window.$n.SyncProcessManager

View file

@ -588,4 +588,8 @@ class SyncWorker {
} }
} }
SyncWorker.AC_SYNC_LOOP_INTERVAL_MS = AC_SYNC_LOOP_INTERVAL_MS
SyncWorker.BATTERY_SYNC_LOOP_INTERVAL_MS = BATTERY_SYNC_LOOP_INTERVAL_MS
SyncWorker.MAX_SYNC_BACKOFF_MS = MAX_SYNC_BACKOFF_MS
module.exports = SyncWorker; module.exports = SyncWorker;