Skip to content

Commit

Permalink
[finagle-core] fix ExponentialJittered and Rng.nextLong(n)
Browse files Browse the repository at this point in the history
# Problems

- Rng.nextLong(n) can be < 0
- ExponentialJittered first duration is always `start` without any jitter, so all services make the first retry simultaneously, potentially overwhelming a target service even more
- ExponentialJittered.duration can be > ExponentialJittered.next.duration for unlimited number of times, because it chooses a random value starting from 1

# Solution

Change `ExponentialJittered` to return a random number between `start/2` and `start + start/2`.
Advance `start` as `start * 2` for each `next` `ExponentialJittered`.

JIRA Issues: STOR-8883

Differential Revision: https://phabricator.twitter.biz/D1182252
  • Loading branch information
Anton Ivanov authored and jenkins committed Nov 13, 2024
1 parent bc815d5 commit 171b881
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 60 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ Runtime Behavior Changes
stats cpu_time_ms and active_sockets per netty worker thread.
* finagle-netty4: `EventLoopGroupTracker` now collects the distribution of cpu utilization by each netty thread
and all_sockets instead of active_sockets. ``PHAB_ID=D1177719``
* finagle-core: `Backoff.exponentialJittered` now jitter the first duration. ``PHAB_ID=D1182252``
* finalge-core: `Backoff.exponentialJittered` now uses a new range for jitters: `[dur/2; dur + dur/2]`.
Previously it was `[0, dur)`, which could result in `next.duration < duration`
for arbitrary long invocation chains. ``PHAB_ID=D1182252``


New Features
Expand Down
61 changes: 38 additions & 23 deletions finagle-core/src/main/scala/com/twitter/finagle/Backoff.scala
Original file line number Diff line number Diff line change
Expand Up @@ -159,17 +159,22 @@ object Backoff {
}

/**
* The formula to calculate the next backoff is:
*{{{
* backoff = random_between(0, min({@code maximum}, {@code start} * 2 ** attempt))
*}}}
* Current duration is:
* {{{
* random_between(start/2, start + start/2)
* }}}
* but >= 1 and <= maximum.
*
* Next backoff is:
* {{{
* exponentialJitterred(start * 2, maximum)
* }}}
* and Const(maximum) after start >= maximum
*
* @param start must be greater than 0 and less than or equal to `maximum`.
* @param maximum must be greater than 0 and greater than or equal to
* `start`.
*
* @note This is "full jitter" via
* https://www.awsarchitectureblog.com/2015/03/backoff.html
* @see [[decorrelatedJittered]] and [[equalJittered]] for alternative
* jittered approaches.
*/
Expand All @@ -182,7 +187,7 @@ object Backoff {
// compare start and maximum here to avoid one
// iteration of creating a new `ExponentialJittered`.
if (start == maximum) new Const(start)
else new ExponentialJittered(start, maximum, 1, Rng.threadLocal)
else new ExponentialJittered(start.inNanoseconds, maximum.inNanoseconds, Rng.threadLocal)
}

/**
Expand Down Expand Up @@ -309,32 +314,42 @@ object Backoff {
/** @see [[Backoff.exponentialJittered]] as the api to create this strategy. */
// exposed for testing
private[finagle] final class ExponentialJittered(
start: Duration,
maximum: Duration,
attempt: Int,
meanNanos: Long,
maxNanos: Long,
rng: Rng)
extends Backoff {

// Don't shift left more than 62 bits to avoid
// Long overflow of the multiplier `shift`.
private[this] final val MaxBitShift = 62
private[this] val backoffNanos = {
// gen random around min
val curMinNanos = Math.max(meanNanos >> 1, 1L)
var curMaxNanos = meanNanos + curMinNanos
if (curMaxNanos <= meanNanos) { // overflow
curMaxNanos = Long.MaxValue
}
Math.min(
curMinNanos + rng.nextLong(curMaxNanos - curMinNanos + 1L),
maxNanos
)
}

def duration: Duration = start
def duration: Duration = Duration.fromNanoseconds(backoffNanos)

def next: Backoff = {
val shift = 1L << MaxBitShift.min(attempt)
// to avoid Long overflow
val maxBackoff = if (start >= maximum / shift) maximum else start * shift
if (maxBackoff == maximum) {
new Const(maximum)
if (meanNanos >= maxNanos) {
new Const(Duration.fromNanoseconds(maxNanos))
} else {
// to avoid the case of random being 0
val random = 1 + rng.nextLong(maxBackoff.inNanoseconds)
new ExponentialJittered(Duration.fromNanoseconds(random), maximum, attempt + 1, rng)
var nextMeanNanos = meanNanos << 1
if (nextMeanNanos <= meanNanos) { // overflow
nextMeanNanos = Long.MaxValue
}
new ExponentialJittered(nextMeanNanos, maxNanos, rng)
}
}

def isExhausted: Boolean = false

override def toString: String =
s"${this.getClass.getSimpleName}($meanNanos,$maxNanos)"
}

/** @see [[Backoff.take]] as the api to create this strategy. */
Expand Down Expand Up @@ -415,7 +430,7 @@ object Backoff {
* 1. EqualJittered
* - Create backoffs that jitter between 0 and half of the exponential growth. Can be created via `Backoff.equalJittered`.
* 1. ExponentialJittered
* - Create backoffs that jitter randomly between 0 and a value that grows exponentially by 2. Can be created via `Backoff.exponentialJittered`.
* - Create backoffs that jitter randomly between value/2 and value+value/2 that grows exponentially by 2. Can be created via `Backoff.exponentialJittered`.
*
* @note A new [[Backoff]] will be created only when `next` is called.
* @note None of the [[Backoff]]s are memoized, for strategies that involve
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ object Rng {
def nextLong(n: Long): Long = {
require(n > 0)

// This is the algorithm used by Java's random number generator
// internally.
// This algorithm is inspired by Java's random number generator
// https://docs.oracle.com/javase/6/docs/api/java/util/Random.html#nextInt(int)
if ((n & -n) == n)
return r.nextLong() % n
val m = n - 1
if ((n & m) == 0L) // power of 2
return r.nextLong() & m

var bits = 0L
var v = 0L
Expand Down
70 changes: 39 additions & 31 deletions finagle-core/src/test/scala/com/twitter/finagle/BackoffTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -139,37 +139,45 @@ class BackoffTest extends AnyFunSuite with ScalaCheckDrivenPropertyChecks {

test("exponentialJittered") {
val exponentialGen = for {
startMs <- Gen.choose(1L, 1000L)
maxMs <- Gen.choose(startMs, startMs * 2)
startNs <- Gen.choose(1L, Long.MaxValue)
maxNs <- Gen.choose(startNs, Long.MaxValue)
seed <- Gen.choose(Long.MinValue, Long.MaxValue)
} yield (startMs, maxMs, seed)
} yield (startNs, maxNs, seed)

forAll(exponentialGen) {
case (startMs: Long, maxMs: Long, seed: Long) =>
val rng = Rng(seed)
var start = startMs.millis
val max = maxMs.millis
val backoff: Backoff = new ExponentialJittered(start, max, 1, Rng(seed))
val result: ArrayBuffer[Duration] = new ArrayBuffer[Duration]()
for (attempt <- 1 to 7) {
result.append(start)
start = nextStart(start, max, rng, attempt)
case (startNs: Long, maxNs: Long, seed: Long) =>
// I don't know why this if is needed, but it is
if (startNs > 0 && maxNs >= startNs) {
Array(Rng(seed), ZeroRng).foreach { rng =>
var backoff: Backoff = new ExponentialJittered(startNs, maxNs, rng)
var meanNs = startNs
var stop = false
while (!stop) {
val minExpected = Math.max(meanNs / 2, 1)
val maxExpected = if (meanNs + minExpected > meanNs) {
Math.min(meanNs + minExpected, maxNs)
} else { // overflow
maxNs
}
val actualDurNs = backoff.duration.inNanoseconds
assert(actualDurNs >= minExpected, backoff)
assert(actualDurNs <= maxExpected, backoff)

backoff = backoff.next
if (meanNs >= maxNs) {
assert(backoff.duration == Duration.fromNanoseconds(maxNs), backoff)
assert(backoff.next == backoff, backoff)
stop = true
} else {
meanNs = if (meanNs * 2 < meanNs) { // overflow
Long.MaxValue
} else {
meanNs * 2
}
}
}
}
}
verifyBackoff(backoff, result.toSeq, exhausted = false)

// Verify case where Rng returns 0.
val zeroRng = getZeroRng(rng)
val zeroRngBackoff: Backoff = new ExponentialJittered(start, max, 1, zeroRng)
val expectedResults = Seq(start, nextStart(start, max, zeroRng, 1))
verifyBackoff(zeroRngBackoff, expectedResults, exhausted = false)
}

def nextStart(start: Duration, max: Duration, rng: Rng, attempt: Int): Duration = {
val shift = 1L << attempt
// to avoid Long overflow
val maxBackoff = if (start >= max / shift) max else start * shift
if (maxBackoff == max) max
else Duration.fromNanoseconds(1 + rng.nextLong(maxBackoff.inNanoseconds))
}
}

Expand Down Expand Up @@ -338,12 +346,12 @@ class BackoffTest extends AnyFunSuite with ScalaCheckDrivenPropertyChecks {
assert(actualBackoff.isExhausted == exhausted)
}

private[this] def getZeroRng(rng: Rng): Rng = new Rng {
override def nextDouble(): Double = rng.nextDouble()
private[this] object ZeroRng extends Rng {
override def nextDouble(): Double = 0.0

override def nextInt(n: Int): Int = rng.nextInt(n)
override def nextInt(n: Int): Int = 0

override def nextInt(): Int = rng.nextInt()
override def nextInt(): Int = 0

override def nextLong(n: Long): Long = 0L
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,17 @@ class FixedInetResolverTest extends AnyFunSuite {
// since `ExponentialJittered` generates values randomly, we use the same
// seed here in order to validate the values returned from `nBackoffs`.
val nBackoffs: Backoff =
new ExponentialJittered(1.milliseconds, 100.milliseconds, 1, Rng(777)).take(shouldFailTimes)
new ExponentialJittered(
1.milliseconds.inNanoseconds,
100.milliseconds.inNanoseconds,
Rng(777)
).take(shouldFailTimes)
var actualBackoff: Backoff =
new ExponentialJittered(1.milliseconds, 100.milliseconds, 1, Rng(777)).take(shouldFailTimes)
new ExponentialJittered(
1.milliseconds.inNanoseconds,
100.milliseconds.inNanoseconds,
Rng(777)
).take(shouldFailTimes)
val mockTimer = new MockTimer
val cache = FixedInetResolver.cache(resolve, maxCacheSize, nBackoffs, mockTimer)
val resolver2 = new FixedInetResolver(cache, statsReceiver)
Expand Down

0 comments on commit 171b881

Please sign in to comment.