Skip to content

Commit

Permalink
fix(enginenetx): compute zero time when we start dialing
Browse files Browse the repository at this point in the history
This diff addresses a bug observed on the wild where a slow DNS
causes several tactics to be ready concurrently.

If we want several tactics to be ready concurrently, we should
arrange for that, and for now BTW that's not the case.

Part of ooni/probe#2704.
  • Loading branch information
bassosimone committed May 10, 2024
1 parent 25e2eb8 commit 83ec4da
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 11 deletions.
3 changes: 3 additions & 0 deletions internal/enginenetx/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ func filterAssignInitialDelays(input <-chan *httpsDialerTactic) <-chan *httpsDia

index := 0
for tx := range input {
// TODO(bassosimone): what do we do now about the user configured
// initial delays? Should we declare them as deprecated?

// rewrite the delays
tx.InitialDelay = happyEyeballsDelay(index)
index++
Expand Down
51 changes: 40 additions & 11 deletions internal/enginenetx/httpsdialer.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"net"
"sync"
"sync/atomic"
"time"

Expand Down Expand Up @@ -206,24 +207,22 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo
// pattern used by `./internal/httpclientx` to perform attempts faster
// in case there is an initial early failure.

// TODO(bassosimone): the algorithm to filter and assign initial
// delays is broken because, if the DNS runs for more than one
// second, then several policies will immediately be due. We should
// probably use a better strategy that takes as the zero the time
// when the first dialing policy becomes available.

// We need a cancellable context to interrupt the tactics emitter early when we
// immediately get a valid response and we don't need to use other tactics.
ctx, cancel := context.WithCancel(ctx)
defer cancel()

// Create structure for computing the zero dialing time once during
// the first dial, so that subsequent attempts use happy eyeballs based
// on the moment in which we tried the first dial.
t0 := &httpsDialerWorkerZeroTime{}

// The emitter will emit tactics and then close the channel when done. We spawn 16 workers
// that handle tactics in parallel and post results on the collector channel.
emitter := httpsDialerFilterTactics(hd.policy.LookupTactics(ctx, hostname, port))
collector := make(chan *httpsDialerErrorOrConn)
joiner := make(chan any)
const parallelism = 16
t0 := time.Now()
for idx := 0; idx < parallelism; idx++ {
go hd.worker(ctx, joiner, emitter, t0, collector)
}
Expand Down Expand Up @@ -257,6 +256,36 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo
return httpsDialerReduceResult(connv, errorv)
}

// httpsDialerWorkerZeroTime contains the zero time used when dialing. We set this
// zero time when we start the first dialing attempt, such that subsequent attempts
// are correctly spaced starting from such a zero time.
//
// A previous approach was that we were taking the zero time when we started
// getting tactics, but this approach was wrong, because it caused several tactics
// to be ready, when the DNS lookup was slow.
//
// The zero value of this structure is ready to use.
type httpsDialerWorkerZeroTime struct {
// mu provides mutual exclusion.
mu sync.Mutex

// t is the zero time.
t time.Time
}

// Get returns the zero dialing time. The first invocation of this method
// saves the zero dialing time and subsquent invocations just return it.
//
// This method is safe to be called concurrently by goroutines.
func (t0 *httpsDialerWorkerZeroTime) Get() time.Time {
defer t0.mu.Unlock()
t0.mu.Lock()
if t0.t.IsZero() {
t0.t = time.Now()
}
return t0.t
}

// httpsDialerFilterTactics filters the tactics to:
//
// 1. be paranoid and filter out nil tactics if any;
Expand Down Expand Up @@ -297,7 +326,7 @@ func (hd *httpsDialer) worker(
ctx context.Context,
joiner chan<- any,
reader <-chan *httpsDialerTactic,
t0 time.Time,
t0 *httpsDialerWorkerZeroTime,
writer chan<- *httpsDialerErrorOrConn,
) {
// let the parent know that we terminated
Expand All @@ -321,7 +350,7 @@ func (hd *httpsDialer) worker(
func (hd *httpsDialer) dialTLS(
ctx context.Context,
logger model.Logger,
t0 time.Time,
t0 *httpsDialerWorkerZeroTime,
tactic *httpsDialerTactic,
) (model.TLSConn, error) {
// honor happy-eyeballs delays and wait for the tactic to be ready to run
Expand Down Expand Up @@ -398,10 +427,10 @@ func (hd *httpsDialer) dialTLS(
// return the context error if the context expires.
func httpsDialerTacticWaitReady(
ctx context.Context,
t0 time.Time,
t0 *httpsDialerWorkerZeroTime,
tactic *httpsDialerTactic,
) error {
deadline := t0.Add(tactic.InitialDelay)
deadline := t0.Get().Add(tactic.InitialDelay)
delta := time.Until(deadline)
if delta <= 0 {
return nil
Expand Down

0 comments on commit 83ec4da

Please sign in to comment.