Skip to content

Commit

Permalink
fix(enginenetx): stabilize happy eyeballs algorithm (ooni#1301)
Browse files Browse the repository at this point in the history
* Use 1s as the base delay (which leads to simpler computations)

* Acknowledge that with very large indexes we still need to produce
reasonable positive values, while the current algorithm breaks for
indexes large than `63`

* Acknowledge that, after incrementing exponentially for a while, it
makes sense to continue in a flat fashion, where we increment linearly,
while the current algorithm was aiming to always return 30s, which means
several attempts would actually run in parallel

* Acknowledge that we should use the same zero for all timings rather
than having a goroutine dependent zero

* Acknowledge that for now we don't need to mock `time.Now`

Part of ooni/probe#2531
  • Loading branch information
bassosimone authored and Murphy-OrangeMud committed Feb 13, 2024
1 parent 069705b commit a032484
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 58 deletions.
25 changes: 14 additions & 11 deletions internal/enginenetx/happyeyeballs.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,32 @@ package enginenetx

import "time"

// happyEyeballsDelay implements an happy-eyeballs like algorithm with the
// given base delay and with the given index. The index is the attempt number
// happyEyeballsDelay implements an happy-eyeballs like algorithm with a
// base delay of 1 second and the given index. The index is the attempt number
// and the first attempt should have zero as its index.
//
// The algorithm should emit 0 as the first delay, the baseDelay as the
// The standard Go library uses a 300ms delay for connecting. Because a TCP
// connect is one round trip and the TLS handshake is two round trips (roughly),
// we use 1 second as the base delay increment here.
//
// The algorithm should emit 0 as the first delay, the base delay as the
// second delay, and then it should double the base delay at each attempt,
// until we reach the 30 seconds, after which the delay is constant.
// until we reach the 8 seconds, after which the delay increments
// linearly spacing each subsequent attempts 8 seconds in the future.
//
// By doubling the base delay, we account for the case where there are
// actual issues inside the network. By using this algorithm, we are still
// able to overlap and pack more dialing attempts overall.
func happyEyeballsDelay(baseDelay time.Duration, idx int) time.Duration {
const cutoff = 30 * time.Second
func happyEyeballsDelay(idx int) time.Duration {
const baseDelay = time.Second
switch {
case idx <= 0:
return 0
case idx == 1:
return baseDelay
case idx <= 4:
return baseDelay << (idx - 1)
default:
delay := baseDelay << (idx - 1)
if delay > cutoff {
delay = cutoff
}
return delay
return baseDelay << 3 * (time.Duration(idx) - 3)
}
}
30 changes: 14 additions & 16 deletions internal/enginenetx/happyeyeballs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,28 @@ func TestHappyEyeballsDelay(t *testing.T) {
expect time.Duration
}

const delay = 900 * time.Millisecond

cases := []testcase{
{-1, 0}, // make sure we gracefully handle negative numbers (i.e., we don't crash)
{0, 0},
{1, delay},
{2, delay * 2},
{3, delay * 4},
{4, delay * 8},
{5, delay * 16},
{6, delay * 32},
{7, 30 * time.Second},
{8, 30 * time.Second},
{9, 30 * time.Second},
{10, 30 * time.Second},
{1, time.Second},
{2, 2 * time.Second},
{3, 4 * time.Second},
{4, 8 * time.Second},
{5, 2 * 8 * time.Second},
{6, 3 * 8 * time.Second},
{7, 4 * 8 * time.Second},
{8, 5 * 8 * time.Second},
{9, 6 * 8 * time.Second},
{10, 7 * 8 * time.Second},
}

for _, tc := range cases {
t.Run(fmt.Sprintf("delay=%v tc.idx=%v", delay, tc.idx), func(t *testing.T) {
got := happyEyeballsDelay(delay, tc.idx)
t.Run(fmt.Sprintf("tc.idx=%v", tc.idx), func(t *testing.T) {
got := happyEyeballsDelay(tc.idx)
if got != tc.expect {
t.Fatalf("with delay=%v tc.idx=%v we got %v but expected %v", delay, tc.idx, got, tc.expect)
t.Fatalf("with tc.idx=%v we got %v but expected %v", tc.idx, got, tc.expect)
}
t.Logf("with delay=%v tc.idx=%v: got %v", delay, tc.idx, got)
t.Logf("with tc.idx=%v: got %v", tc.idx, got)
})
}
}
35 changes: 25 additions & 10 deletions internal/enginenetx/httpsdialercore.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,9 @@ func (hd *HTTPSDialer) DialTLSContext(ctx context.Context, network string, endpo
collector := make(chan *httpsDialerErrorOrConn)
joiner := make(chan any)
const parallelism = 16
t0 := time.Now()
for idx := 0; idx < parallelism; idx++ {
go hd.worker(ctx, joiner, emitter, collector)
go hd.worker(ctx, joiner, emitter, t0, collector)
}

// wait until all goroutines have joined
Expand Down Expand Up @@ -242,8 +243,13 @@ func httpsDialerReduceResult(connv []model.TLSConn, errorv []error) (model.TLSCo

// worker attempts to establish a TLS connection using and emits a single
// [*httpsDialerErrorOrConn] for each tactic.
func (hd *HTTPSDialer) worker(ctx context.Context, joiner chan<- any,
reader <-chan *HTTPSDialerTactic, writer chan<- *httpsDialerErrorOrConn) {
func (hd *HTTPSDialer) worker(
ctx context.Context,
joiner chan<- any,
reader <-chan *HTTPSDialerTactic,
t0 time.Time,
writer chan<- *httpsDialerErrorOrConn,
) {
// let the parent know that we terminated
defer func() { joiner <- true }()

Expand All @@ -253,17 +259,21 @@ func (hd *HTTPSDialer) worker(ctx context.Context, joiner chan<- any,
Logger: hd.logger,
}

conn, err := hd.dialTLS(ctx, prefixLogger, tactic)
conn, err := hd.dialTLS(ctx, prefixLogger, t0, tactic)

writer <- &httpsDialerErrorOrConn{Conn: conn, Err: err}
}
}

// dialTLS performs the actual TLS dial.
func (hd *HTTPSDialer) dialTLS(
ctx context.Context, logger model.Logger, tactic *HTTPSDialerTactic) (model.TLSConn, error) {
ctx context.Context,
logger model.Logger,
t0 time.Time,
tactic *HTTPSDialerTactic,
) (model.TLSConn, error) {
// wait for the tactic to be ready to run
if err := httpsDialerTacticWaitReady(ctx, tactic); err != nil {
if err := httpsDialerTacticWaitReady(ctx, t0, tactic); err != nil {
return nil, err
}

Expand Down Expand Up @@ -330,13 +340,18 @@ func (hd *HTTPSDialer) dialTLS(
// httpsDialerWaitReady waits for the given delay to expire or the context to be canceled. If the
// delay is zero or negative, we immediately return nil. We also return nil when the delay expires. We
// return the context error if the context expires.
func httpsDialerTacticWaitReady(ctx context.Context, tactic *HTTPSDialerTactic) error {
delay := tactic.InitialDelay
if delay <= 0 {
func httpsDialerTacticWaitReady(
ctx context.Context,
t0 time.Time,
tactic *HTTPSDialerTactic,
) error {
deadline := t0.Add(tactic.InitialDelay)
delta := time.Until(deadline)
if delta <= 0 {
return nil
}

timer := time.NewTimer(delay)
timer := time.NewTimer(delta)
defer timer.Stop()

select {
Expand Down
9 changes: 1 addition & 8 deletions internal/enginenetx/httpsdialernull.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package enginenetx
import (
"context"
"net"
"time"

"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
Expand All @@ -29,12 +28,6 @@ type HTTPSDialerNullPolicy struct {

var _ HTTPSDialerPolicy = &HTTPSDialerNullPolicy{}

// httpsDialerHappyEyeballsDelay is the delay after which we should start a new TCP
// connect and TLS handshake using another tactic. The standard Go library uses a 300ms
// delay for connecting. Because a TCP connect is one round trip and the TLS handshake
// is two round trips (roughly), we multiply this value by three.
const httpsDialerHappyEyeballsDelay = 900 * time.Millisecond

// LookupTactics implements HTTPSDialerPolicy.
func (p *HTTPSDialerNullPolicy) LookupTactics(
ctx context.Context, domain, port string) <-chan *HTTPSDialerTactic {
Expand All @@ -57,7 +50,7 @@ func (p *HTTPSDialerNullPolicy) LookupTactics(
for idx, addr := range addrs {
tactic := &HTTPSDialerTactic{
Endpoint: net.JoinHostPort(addr, port),
InitialDelay: happyEyeballsDelay(httpsDialerHappyEyeballsDelay, idx),
InitialDelay: happyEyeballsDelay(idx),
SNI: domain,
VerifyHostname: domain,
}
Expand Down
15 changes: 5 additions & 10 deletions internal/enginenetx/httpsdialerstats.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,6 @@ func NewHTTPSDialerStatsRootContainer() *HTTPSDialerStatsRootContainer {
// The zero value of this structure is not ready to use; please, use the
// [NewHTTPSDialerStatsManager] factory to create a new instance.
type HTTPSDialerStatsManager struct {
// TimeNow is a field that allows you to override how we obtain the
// current time; modify this field BEFORE using this structure.
TimeNow func() time.Time

// kvStore is the key-value store we're using
kvStore model.KeyValueStore

Expand Down Expand Up @@ -178,7 +174,6 @@ func NewHTTPSDialerStatsManager(kvStore model.KeyValueStore, logger model.Logger
}

return &HTTPSDialerStatsManager{
TimeNow: time.Now,
root: root,
kvStore: kvStore,
logger: logger,
Expand Down Expand Up @@ -216,7 +211,7 @@ func (mt *HTTPSDialerStatsManager) OnStarting(tactic *HTTPSDialerTactic) {

// update stats
record.CountStarted++
record.LastUpdated = mt.TimeNow()
record.LastUpdated = time.Now()
}

// OnTCPConnectError implements HTTPSDialerStatsManager.
Expand All @@ -233,7 +228,7 @@ func (mt *HTTPSDialerStatsManager) OnTCPConnectError(ctx context.Context, tactic
}

// update stats
record.LastUpdated = mt.TimeNow()
record.LastUpdated = time.Now()
if ctx.Err() != nil {
record.CountTCPConnectInterrupt++
return
Expand All @@ -256,7 +251,7 @@ func (mt *HTTPSDialerStatsManager) OnTLSHandshakeError(ctx context.Context, tact
}

// update stats
record.LastUpdated = mt.TimeNow()
record.LastUpdated = time.Now()
if ctx.Err() != nil {
record.CountTLSHandshakeInterrupt++
return
Expand All @@ -281,7 +276,7 @@ func (mt *HTTPSDialerStatsManager) OnTLSVerifyError(tactic *HTTPSDialerTactic, e
// update stats
record.CountTLSVerificationError++
record.HistoTLSVerificationError[err.Error()]++
record.LastUpdated = mt.TimeNow()
record.LastUpdated = time.Now()
}

// OnSuccess implements HTTPSDialerStatsManager.
Expand All @@ -299,7 +294,7 @@ func (mt *HTTPSDialerStatsManager) OnSuccess(tactic *HTTPSDialerTactic) {

// update stats
record.CountSuccess++
record.LastUpdated = mt.TimeNow()
record.LastUpdated = time.Now()
}

// Close implements io.Closer
Expand Down
3 changes: 0 additions & 3 deletions internal/enginenetx/network_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package enginenetx
import (
"sync"
"testing"
"time"

"github.com/ooni/probe-cli/v3/internal/kvstore"
"github.com/ooni/probe-cli/v3/internal/mocks"
Expand Down Expand Up @@ -33,7 +32,6 @@ func TestNetworkUnit(t *testing.T) {
},
},
stats: &HTTPSDialerStatsManager{
TimeNow: time.Now,
kvStore: &kvstore.Memory{},
logger: model.DiscardLogger,
mu: sync.Mutex{},
Expand All @@ -59,7 +57,6 @@ func TestNetworkUnit(t *testing.T) {
netx := &Network{
reso: expected,
stats: &HTTPSDialerStatsManager{
TimeNow: time.Now,
kvStore: &kvstore.Memory{},
logger: model.DiscardLogger,
mu: sync.Mutex{},
Expand Down

0 comments on commit a032484

Please sign in to comment.