Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: optimize jitter factor calculation #3629

Merged
merged 5 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 47 additions & 43 deletions tm2/pkg/p2p/switch.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package p2p

import (
"bytes"
"context"
"crypto/rand"
"encoding/binary"
"fmt"
"math"
"math/big"
"sync"
"time"

Expand Down Expand Up @@ -356,7 +357,7 @@

type backoffItem struct {
lastDialTime time.Time
attempts int
attempts uint
}

var (
Expand Down Expand Up @@ -482,65 +483,68 @@
}
}

// calculateBackoff calculates a backoff time,
// based on the number of attempts and range limits
// calculateBackoff calculates the backoff interval by exponentiating the base interval
// by the number of attempts. The returned interval is capped at maxInterval and has a
// jitter factor applied to it (+/- 10% of interval, max 10 sec).
func calculateBackoff(
attempts int,
minTimeout time.Duration,
maxTimeout time.Duration,
attempts uint,
baseInterval time.Duration,
maxInterval time.Duration,
) time.Duration {
var (
minTime = time.Second * 1
maxTime = time.Second * 60
multiplier = float64(2) // exponential
const (
defaultBaseInterval = time.Second * 1
defaultMaxInterval = time.Second * 60
)

// Check the min limit
if minTimeout > 0 {
minTime = minTimeout
// Sanitize base interval parameter.
if baseInterval <= 0 {
baseInterval = defaultBaseInterval
}

// Check the max limit
if maxTimeout > 0 {
maxTime = maxTimeout
// Sanitize max interval parameter.
if maxInterval <= 0 {
maxInterval = defaultMaxInterval
}

// Sanity check the range
if minTime >= maxTime {
return maxTime
// Calculate the interval by exponentiating the base interval by the number of attempts.
interval := baseInterval << attempts

// Cap the interval to the maximum interval.
if interval > maxInterval {
interval = maxInterval
}

// Calculate the backoff duration
var (
base = float64(minTime)
calculated = base * math.Pow(multiplier, float64(attempts))
)
// Below is the code to add a jitter factor to the interval.
// Read random bytes into an 8 bytes buffer (size of an int64).
var randBytes [8]byte
if _, err := rand.Read(randBytes[:]); err != nil {
return interval
}

Check warning on line 522 in tm2/pkg/p2p/switch.go

View check run for this annotation

Codecov / codecov/patch

tm2/pkg/p2p/switch.go#L521-L522

Added lines #L521 - L522 were not covered by tests

// Attempt to calculate the jitter factor
n, err := rand.Int(rand.Reader, big.NewInt(math.MaxInt64))
if err == nil {
jitterFactor := float64(n.Int64()) / float64(math.MaxInt64) // range [0, 1]
// Convert the random bytes to an int64.
var randInt64 int64
_ = binary.Read(bytes.NewReader(randBytes[:]), binary.NativeEndian, &randInt64)

calculated = jitterFactor*(calculated-base) + base
}
// Calculate the random jitter multiplier (float between -1 and 1).
jitterMultiplier := float64(randInt64) / float64(math.MaxInt64)

// Prevent overflow for int64 (duration) cast
if calculated > float64(math.MaxInt64) {
return maxTime
}
const (
maxJitterDuration = 10 * time.Second
maxJitterPercentage = 10 // 10%
)

duration := time.Duration(calculated)
// Calculate the maximum jitter based on interval percentage.
maxJitter := interval * maxJitterPercentage / 100

// Clamp the duration within bounds
if duration < minTime {
return minTime
// Cap the maximum jitter to the maximum duration.
if maxJitter > maxJitterDuration {
maxJitter = maxJitterDuration
}

if duration > maxTime {
return maxTime
}
// Calculate the jitter.
jitter := time.Duration(float64(maxJitter) * jitterMultiplier)

return duration
return interval + jitter
}

// DialPeers adds the peers to the dial queue for async dialing.
Expand Down
67 changes: 67 additions & 0 deletions tm2/pkg/p2p/switch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -823,3 +823,70 @@ func TestMultiplexSwitch_DialPeers(t *testing.T) {
}
})
}

func TestCalculateBackoff(t *testing.T) {
t.Parallel()

checkJitterRange := func(t *testing.T, expectedAbs, actual time.Duration) {
t.Helper()
require.LessOrEqual(t, actual, expectedAbs)
require.GreaterOrEqual(t, actual, expectedAbs*-1)
}

// Test that the default jitter factor is 10% of the backoff duration.
t.Run("percentage jitter", func(t *testing.T) {
t.Parallel()

for i := 0; i < 1000; i++ {
checkJitterRange(t, 100*time.Millisecond, calculateBackoff(0, time.Second, 10*time.Minute)-time.Second)
checkJitterRange(t, 200*time.Millisecond, calculateBackoff(1, time.Second, 10*time.Minute)-2*time.Second)
checkJitterRange(t, 400*time.Millisecond, calculateBackoff(2, time.Second, 10*time.Minute)-4*time.Second)
checkJitterRange(t, 800*time.Millisecond, calculateBackoff(3, time.Second, 10*time.Minute)-8*time.Second)
checkJitterRange(t, 1600*time.Millisecond, calculateBackoff(4, time.Second, 10*time.Minute)-16*time.Second)
}
})

// Test that the jitter factor is capped at 10 sec.
t.Run("capped jitter", func(t *testing.T) {
t.Parallel()

for i := 0; i < 1000; i++ {
checkJitterRange(t, 10*time.Second, calculateBackoff(7, time.Second, 10*time.Minute)-128*time.Second)
checkJitterRange(t, 10*time.Second, calculateBackoff(10, time.Second, 20*time.Minute)-1024*time.Second)
checkJitterRange(t, 10*time.Second, calculateBackoff(20, time.Second, 300*time.Hour)-1048576*time.Second)
}
})

// Test that the backoff interval is based on the baseInterval.
t.Run("base interval", func(t *testing.T) {
t.Parallel()

for i := 0; i < 1000; i++ {
checkJitterRange(t, 4800*time.Millisecond, calculateBackoff(4, 3*time.Second, 10*time.Minute)-48*time.Second)
checkJitterRange(t, 8*time.Second, calculateBackoff(3, 10*time.Second, 10*time.Minute)-80*time.Second)
checkJitterRange(t, 10*time.Second, calculateBackoff(5, 3*time.Hour, 100*time.Hour)-96*time.Hour)
}
})

// Test that the backoff interval is capped at maxInterval +/- jitter factor.
t.Run("max interval", func(t *testing.T) {
t.Parallel()

for i := 0; i < 1000; i++ {
checkJitterRange(t, 100*time.Millisecond, calculateBackoff(10, 10*time.Hour, time.Second)-time.Second)
checkJitterRange(t, 1600*time.Millisecond, calculateBackoff(10, 10*time.Hour, 16*time.Second)-16*time.Second)
checkJitterRange(t, 10*time.Second, calculateBackoff(10, 10*time.Hour, 128*time.Second)-128*time.Second)
}
})

// Test parameters sanitization for base and max intervals.
t.Run("parameters sanitization", func(t *testing.T) {
t.Parallel()

for i := 0; i < 1000; i++ {
checkJitterRange(t, 100*time.Millisecond, calculateBackoff(0, -10, -10)-time.Second)
checkJitterRange(t, 1600*time.Millisecond, calculateBackoff(4, -10, -10)-16*time.Second)
checkJitterRange(t, 10*time.Second, calculateBackoff(7, -10, 10*time.Minute)-128*time.Second)
}
})
}
Loading