Add jitter to BackoffTimer

Summary:
This should help us avoid the thundering herd problem if we have some
kind of API outage affecting a wide number of clients.

Test Plan: Tests

Reviewers: bengotow

Reviewed By: bengotow

Subscribers: juan

Differential Revision: https://phab.nylas.com/D3297
This commit is contained in:
Mark Hahnenberg 2016-09-22 16:17:02 -07:00
parent 80e1c5b457
commit f954b85ad6
2 changed files with 25 additions and 17 deletions

View file

@ -19,18 +19,21 @@ class BackoffTimer
backoff: (delay) =>
@_delay = delay ? Math.min(@_delay * 1.7, 5 * 1000 * 60) # Cap at 5 minutes
# Add "full" jitter (see: https://www.awsarchitectureblog.com/2015/03/backoff.html)
@_actualDelay = Math.random() * @_delay
if not NylasEnv.inSpecMode()
console.log("Backing off after sync failure. Will retry in #{Math.floor(@_delay / 1000)} seconds.")
console.log("Backing off after sync failure. Will retry in #{Math.floor(@_actualDelay / 1000)} seconds.")
start: =>
clearTimeout(@_timeout) if @_timeout
@_timeout = setTimeout =>
@_timeout = null
@fn()
, @_delay
, @_actualDelay
resetDelay: =>
@_delay = 2 * 1000
@_actualDelay = Math.random() * @_delay
getCurrentDelay: =>
@_delay

View file

@ -103,24 +103,29 @@ describe "NylasSyncWorker", ->
@apiRequests = []
spyOn(@worker, 'resume').andCallThrough()
spyOn(Math, 'random').andReturn(1.0)
@worker.start()
expectThings = (resumeCallCount) ->
expect(@worker.resume.callCount).toBe(resumeCallCount)
expect(Math.random.callCount).toBe(resumeCallCount)
expect(@worker.resume.callCount).toBe(1)
simulateNetworkFailure(); expect(@worker.resume.callCount).toBe(1)
advanceClock(4000); expect(@worker.resume.callCount).toBe(2)
simulateNetworkFailure(); expect(@worker.resume.callCount).toBe(2)
advanceClock(4000); expect(@worker.resume.callCount).toBe(2)
advanceClock(4000); expect(@worker.resume.callCount).toBe(3)
simulateNetworkFailure(); expect(@worker.resume.callCount).toBe(3)
advanceClock(4000); expect(@worker.resume.callCount).toBe(3)
advanceClock(4000); expect(@worker.resume.callCount).toBe(3)
advanceClock(4000); expect(@worker.resume.callCount).toBe(4)
simulateNetworkFailure(); expect(@worker.resume.callCount).toBe(4)
advanceClock(4000); expect(@worker.resume.callCount).toBe(4)
advanceClock(4000); expect(@worker.resume.callCount).toBe(4)
advanceClock(4000); expect(@worker.resume.callCount).toBe(4)
advanceClock(4000); expect(@worker.resume.callCount).toBe(4)
advanceClock(4000); expect(@worker.resume.callCount).toBe(5)
simulateNetworkFailure(); expectThings(1)
advanceClock(4000); expectThings(2)
simulateNetworkFailure(); expectThings(2)
advanceClock(4000); expectThings(2)
advanceClock(4000); expectThings(3)
simulateNetworkFailure(); expectThings(3)
advanceClock(4000); expectThings(3)
advanceClock(4000); expectThings(3)
advanceClock(4000); expectThings(4)
simulateNetworkFailure(); expectThings(4)
advanceClock(4000); expectThings(4)
advanceClock(4000); expectThings(4)
advanceClock(4000); expectThings(4)
advanceClock(4000); expectThings(4)
advanceClock(4000); expectThings(5)
it "handles the request as a failure if we try and grab labels or folders without an 'inbox'", ->
spyOn(@worker, 'resume').andCallThrough()