From 80b5e1f175f61bca63ac51d0dcb38254bfc7eb20 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 15 Apr 2024 10:37:10 +0200 Subject: [PATCH 001/127] doc(enginenetx): improve documentation Part of https://github.com/ooni/probe/issues/2704 --- internal/enginenetx/httpsdialer.go | 2 +- internal/enginenetx/network.go | 14 ++++---------- internal/enginenetx/statsmanager.go | 6 ++++-- internal/enginenetx/userpolicy.go | 2 +- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index 12c46e314f..d36def5618 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -97,7 +97,7 @@ type httpsDialerPolicy interface { // httpsDialerEventsHandler handles events occurring while we try dialing TLS. type httpsDialerEventsHandler interface { - // These callbacks are invoked during the TLS handshake to inform this + // These callbacks are invoked during the TLS dialing to inform this // interface about events that occurred. A policy SHOULD keep track of which // addresses, SNIs, etc. work and return them more frequently. // diff --git a/internal/enginenetx/network.go b/internal/enginenetx/network.go index fce1d53c7c..e681de1f1e 100644 --- a/internal/enginenetx/network.go +++ b/internal/enginenetx/network.go @@ -93,7 +93,8 @@ func NewNetwork( netx := &netxlite.Netx{} dialer := netx.NewDialerWithResolver(logger, resolver) - // Create manager for keeping track of statistics + // Create manager for keeping track of statistics. This implies creating a background + // goroutine that we'll need to close when we're done. const trimInterval = 30 * time.Second stats := newStatsManager(kvStore, logger, trimInterval) @@ -118,15 +119,8 @@ func NewNetwork( // the proxy, otherwise it means that we're using the ooni/oohttp library // to dial for proxies, which has some restrictions. // - // In particular, the returned transport uses dialer for dialing with - // cleartext proxies (e.g., socks5 and http) and httpsDialer for dialing - // with encrypted proxies (e.g., https). After this has happened, - // the code currently falls back to using the standard library's tls - // client code for establishing TLS connections over the proxy. The main - // implication here is that we're not using our custom mozilla CA for - // validating TLS certificates, rather we're using the system's cert store. - // - // Fixing this issue is TODO(https://github.com/ooni/probe/issues/2536). + // - this code does not work as intended when using netem and proxies + // as documented by TODO(https://github.com/ooni/probe/issues/2536). txp := netxlite.NewHTTPTransportWithOptions( logger, dialer, httpsDialer, netxlite.HTTPTransportOptionDisableCompression(false), diff --git a/internal/enginenetx/statsmanager.go b/internal/enginenetx/statsmanager.go index a95c9aa9e0..e82bcd0d0b 100644 --- a/internal/enginenetx/statsmanager.go +++ b/internal/enginenetx/statsmanager.go @@ -137,6 +137,8 @@ func statsDefensivelySortTacticsByDescendingSuccessRateWithAcceptPredicate( input []*statsTactic, acceptfunc func(*statsTactic) bool) []*statsTactic { // first let's create a working list such that we don't modify // the input in place thus avoiding any data race + // + // make sure we explicitly filter out malformed entries work := []*statsTactic{} for _, t := range input { if t != nil && t.Tactic != nil { @@ -193,8 +195,8 @@ func (st *statsTactic) Clone() *statsTactic { // a pointer to a location which is typically immutable, so it's perfectly // fine to copy the LastUpdate field by assignment. // - // here we're using a bunch of robustness aware mechanisms to clone - // considering that the struct may be edited by the user + // here we're using safe functions to clone the original struct considering + // that a user can edit the content on disk freely introducing nulls. return &statsTactic{ CountStarted: st.CountStarted, CountTCPConnectError: st.CountTCPConnectError, diff --git a/internal/enginenetx/userpolicy.go b/internal/enginenetx/userpolicy.go index 9409570b43..778c1393f2 100644 --- a/internal/enginenetx/userpolicy.go +++ b/internal/enginenetx/userpolicy.go @@ -104,7 +104,7 @@ func (ldp *userPolicy) LookupTactics( return ldp.Fallback.LookupTactics(ctx, domain, port) } - // emit the resuults, which may possibly be empty + // emit the results, which may possibly be empty out := make(chan *httpsDialerTactic) go func() { defer close(out) // let the caller know we're done From 62c39170693e4fdecab7d066d2e97154824c4bd0 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 15 Apr 2024 12:12:53 +0200 Subject: [PATCH 002/127] fix(enginenetx): mix bridges and DNS tactics This diff refactors the code generating tactics to mix bridge and DNS tactics, such that we avoid trying all bridge tactics before falling back to DNS tactics. In the event in which the bridge is IP or endpoint blocked, this change makes sure we try using DNS tactics earlier, and, if the DNS is working, this means a faster bootstrap. Based on testing, where I replaced the bridge address with 10.0.0.1, we try DNS tactics after 8 seconds. After the first run, if the DNS tactics are working, we would immediately use them before bridge tactics, since we store information about tactics inside the $OONI_HOME/engine dir. Part of https://github.com/ooni/probe/issues/2704. --- internal/enginenetx/bridgespolicy.go | 33 ++++++++++++++++++++++------ internal/enginenetx/statspolicy.go | 4 ++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/internal/enginenetx/bridgespolicy.go b/internal/enginenetx/bridgespolicy.go index 0b282630dc..266ab5b1d1 100644 --- a/internal/enginenetx/bridgespolicy.go +++ b/internal/enginenetx/bridgespolicy.go @@ -25,6 +25,9 @@ type bridgesPolicy struct { var _ httpsDialerPolicy = &bridgesPolicy{} +// maxInitialBridgeTactics is the number of initial bridge tactics we return. +const maxInitialBridgeTactics = 4 + // LookupTactics implements httpsDialerPolicy. func (p *bridgesPolicy) LookupTactics(ctx context.Context, domain, port string) <-chan *httpsDialerTactic { out := make(chan *httpsDialerTactic) @@ -33,24 +36,36 @@ func (p *bridgesPolicy) LookupTactics(ctx context.Context, domain, port string) defer close(out) // tell the parent when we're done index := 0 - // emit bridges related tactics first which are empty if there are - // no bridges for the givend domain and port - for tx := range p.bridgesTacticsForDomain(domain, port) { + // Get channel for reading bridge tactics. + bridges := p.bridgesTacticsForDomain(domain, port) + + // Emit the first N bridge tactics. Note that tactics are empty if there + // is no bridge configured for the given domain and port. + for tx := range bridges { tx.InitialDelay = happyEyeballsDelay(index) index += 1 out <- tx + if index >= maxInitialBridgeTactics { + break + } } - // now fallback to get more tactics (typically here the fallback - // uses the DNS and obtains some extra tactics) + // Now fallback to get more tactics (typically via DNS). // - // we wrap whatever the underlying policy returns us with some - // extra logic for better communicating with test helpers + // We wrap whatever the underlying policy returns us with some + // extra logic for better communicating with test helpers. for tx := range p.maybeRewriteTestHelpersTactics(p.Fallback.LookupTactics(ctx, domain, port)) { tx.InitialDelay = happyEyeballsDelay(index) index += 1 out <- tx } + + // Now finish emitting bridge tactics. + for tx := range bridges { + tx.InitialDelay = happyEyeballsDelay(index) + index += 1 + out <- tx + } }() return out @@ -81,6 +96,10 @@ func (p *bridgesPolicy) maybeRewriteTestHelpersTactics(input <-chan *httpsDialer defer close(out) // tell the parent when we're done for tactic := range input { + // TODO(bassosimone): here we could potentially attempt using tactics + // changing the SNI also for api.ooni.io when we're getting its address + // using a DNS resolver that is working as intended. + // When we're not connecting to a TH, pass the policy down the chain unmodified if !bridgesPolicySlicesContains(bridgesPolicyTestHelpersDomains, tactic.VerifyHostname) { out <- tactic diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index 8ff144f6a8..c300a33689 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -60,6 +60,10 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str out <- t } + // TODO(bassosimone): as an optimization, here we could mix cached tactics + // and fallback tactics to avoid slow bootstraps in the event in which + // known-to-work cached tactics have become obsolete. + // give priority to what we know from stats for _, t := range statsPolicyPostProcessTactics(p.Stats.LookupTactics(domain, port)) { maybeEmitTactic(t) From 0d2b0f2dd64c714c0ca1d1318815e0ff3f792d94 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 15 Apr 2024 12:43:46 +0200 Subject: [PATCH 003/127] fix: update test name and add comment --- internal/enginenetx/bridgespolicy_test.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/bridgespolicy_test.go b/internal/enginenetx/bridgespolicy_test.go index 21521a1387..67c136dd21 100644 --- a/internal/enginenetx/bridgespolicy_test.go +++ b/internal/enginenetx/bridgespolicy_test.go @@ -9,7 +9,7 @@ import ( "github.com/ooni/probe-cli/v3/internal/model" ) -func TestBeaconsPolicy(t *testing.T) { +func TestBridgesPolicy(t *testing.T) { t.Run("for domains for which we don't have bridges and DNS failure", func(t *testing.T) { expected := errors.New("mocked error") p := &bridgesPolicy{ @@ -76,7 +76,10 @@ func TestBeaconsPolicy(t *testing.T) { } }) - t.Run("for the api.ooni.io domain", func(t *testing.T) { + // TODO(bassosimone): we need to write better test cases for what + // happens when we have a mixture of tactics here. + + t.Run("for the api.ooni.io domain with DNS failure", func(t *testing.T) { expected := errors.New("mocked error") p := &bridgesPolicy{ Fallback: &dnsPolicy{ From aee17cffae93986e55552048ea6fd8d05631993e Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 15 Apr 2024 12:53:54 +0200 Subject: [PATCH 004/127] feat: test for the mixed policies case Previously, we were only testing with DNS returning error, while now we should also have a test case for when it's working given that we're mixing tactics together now. --- internal/enginenetx/bridgespolicy_test.go | 74 ++++++++++++++++++++++- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/internal/enginenetx/bridgespolicy_test.go b/internal/enginenetx/bridgespolicy_test.go index 67c136dd21..1be87ce9da 100644 --- a/internal/enginenetx/bridgespolicy_test.go +++ b/internal/enginenetx/bridgespolicy_test.go @@ -76,9 +76,6 @@ func TestBridgesPolicy(t *testing.T) { } }) - // TODO(bassosimone): we need to write better test cases for what - // happens when we have a mixture of tactics here. - t.Run("for the api.ooni.io domain with DNS failure", func(t *testing.T) { expected := errors.New("mocked error") p := &bridgesPolicy{ @@ -95,6 +92,7 @@ func TestBridgesPolicy(t *testing.T) { ctx := context.Background() tactics := p.LookupTactics(ctx, "api.ooni.io", "443") + // since the DNS fails, we should only see tactics generated by bridges var count int for tactic := range tactics { count++ @@ -120,6 +118,76 @@ func TestBridgesPolicy(t *testing.T) { } }) + t.Run("for the api.ooni.io domain with DNS success", func(t *testing.T) { + p := &bridgesPolicy{ + Fallback: &dnsPolicy{ + Logger: model.DiscardLogger, + Resolver: &mocks.Resolver{ + MockLookupHost: func(ctx context.Context, domain string) ([]string, error) { + return []string{"130.192.91.211"}, nil + }, + }, + }, + } + + ctx := context.Background() + tactics := p.LookupTactics(ctx, "api.ooni.io", "443") + + // since the DNS succeeds we should see bridge tactics mixed with DNS tactics + var ( + bridgesCount int + dnsCount int + overallCount int + ) + for tactic := range tactics { + overallCount++ + + t.Log(overallCount, tactic) + + if tactic.Port != "443" { + t.Fatal("the port should always be 443") + } + + switch { + case overallCount == 5: + if tactic.Address != "130.192.91.211" { + t.Fatal("the host should be 130.192.91.211 for count == 5") + } + + if tactic.SNI != "api.ooni.io" { + t.Fatal("we should see the `api.ooni.io` SNI on the wire for count == 5") + } + + dnsCount++ + + default: + if tactic.Address != "162.55.247.208" { + t.Fatal("the host should be 162.55.247.208 for count != 5") + } + + if tactic.SNI == "api.ooni.io" { + t.Fatal("we should not see the `api.ooni.io` SNI on the wire for count != 5") + } + + bridgesCount++ + } + + if tactic.VerifyHostname != "api.ooni.io" { + t.Fatal("the VerifyHostname field should always be like `api.ooni.io`") + } + } + + if overallCount <= 0 { + t.Fatal("expected to see at least one tactic") + } + if dnsCount != 1 { + t.Fatal("expected to see exactly one DNS based tactic") + } + if bridgesCount <= 0 { + t.Fatal("expected to see at least one bridge tactic") + } + }) + t.Run("for test helper domains", func(t *testing.T) { for _, domain := range bridgesPolicyTestHelpersDomains { t.Run(domain, func(t *testing.T) { From 7576fc72529d8509b698ad8fd39b76bb6b14208a Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 15 Apr 2024 15:11:37 +0200 Subject: [PATCH 005/127] feat(enginenetx): add support for filtering tactics --- internal/enginenetx/httpsdialer.go | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index d36def5618..507884fa7f 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -209,7 +209,7 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo // The emitter will emit tactics and then close the channel when done. We spawn 16 workers // that handle tactics in parallel and post results on the collector channel. - emitter := hd.policy.LookupTactics(ctx, hostname, port) + emitter := httpsFilterTactics(hd.policy.LookupTactics(ctx, hostname, port)) collector := make(chan *httpsDialerErrorOrConn) joiner := make(chan any) const parallelism = 16 @@ -245,6 +245,31 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo return httpsDialerReduceResult(connv, errorv) } +// httpsFilterTactics filters the tactics and rewrites their InitialDelay. +func httpsFilterTactics(input <-chan *httpsDialerTactic) <-chan *httpsDialerTactic { + output := make(chan *httpsDialerTactic) + go func() { + + // make sure we close output chan + defer close(output) + + index := 0 + for tx := range input { + // rewrite the delays + tx.InitialDelay = happyEyeballsDelay(index) + index++ + + // TODO(bassosimone): here we should also avoid connecting + // to TCP endpoints that are unreachable + + // emit the tactic + output <- tx + } + + }() + return output +} + // httpsDialerReduceResult returns either an established conn or an error, using [errDNSNoAnswer] in // case the list of connections and the list of errors are empty. func httpsDialerReduceResult(connv []model.TLSConn, errorv []error) (model.TLSConn, error) { From 8120c06edc704fe3f1cbf64f48eaaf65619eb3d1 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 15 Apr 2024 15:21:43 +0200 Subject: [PATCH 006/127] refactor: rename function --- internal/enginenetx/statspolicy.go | 5 +++-- internal/enginenetx/statspolicy_test.go | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index c300a33689..e6728c5dbc 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -65,7 +65,7 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str // known-to-work cached tactics have become obsolete. // give priority to what we know from stats - for _, t := range statsPolicyPostProcessTactics(p.Stats.LookupTactics(domain, port)) { + for _, t := range statsPolicyFilterStatsTactics(p.Stats.LookupTactics(domain, port)) { maybeEmitTactic(t) } @@ -78,7 +78,8 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str return out } -func statsPolicyPostProcessTactics(tactics []*statsTactic, good bool) (out []*httpsDialerTactic) { +// statsPolicyFilterStatsTactics filters the tactics generated by consulting the stats. +func statsPolicyFilterStatsTactics(tactics []*statsTactic, good bool) (out []*httpsDialerTactic) { // when good is false, it means p.Stats.LookupTactics failed if !good { return diff --git a/internal/enginenetx/statspolicy_test.go b/internal/enginenetx/statspolicy_test.go index e7c66514e9..31880dccf6 100644 --- a/internal/enginenetx/statspolicy_test.go +++ b/internal/enginenetx/statspolicy_test.go @@ -319,9 +319,9 @@ func (p *mocksPolicy) LookupTactics(ctx context.Context, domain string, port str return p.MockLookupTactics(ctx, domain, port) } -func TestStatsPolicyPostProcessTactics(t *testing.T) { +func TestStatsPolicyFilterStatsTactics(t *testing.T) { t.Run("we do nothing when good is false", func(t *testing.T) { - tactics := statsPolicyPostProcessTactics(nil, false) + tactics := statsPolicyFilterStatsTactics(nil, false) if len(tactics) != 0 { t.Fatal("expected zero-lenght return value") } @@ -390,7 +390,7 @@ func TestStatsPolicyPostProcessTactics(t *testing.T) { }, } - got := statsPolicyPostProcessTactics(input, true) + got := statsPolicyFilterStatsTactics(input, true) if len(got) != 1 { t.Fatal("expected just one element") From 32072558d025d132c4b7c3200a7a95fd23a01fba Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 15 Apr 2024 17:27:24 +0200 Subject: [PATCH 007/127] feat: improve TCP connect statistics --- internal/enginenetx/httpsdialer.go | 10 +++++++--- internal/enginenetx/statsmanager.go | 10 ++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index 507884fa7f..f75c1b9fb1 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -108,6 +108,7 @@ type httpsDialerEventsHandler interface { // case, obviously, you MUST NOT consider the tactic failed. OnStarting(tactic *httpsDialerTactic) OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) + OnTCPConnectSuccess(tactic *httpsDialerTactic) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) OnTLSVerifyError(tactic *httpsDialerTactic, err error) OnSuccess(tactic *httpsDialerTactic) @@ -259,9 +260,6 @@ func httpsFilterTactics(input <-chan *httpsDialerTactic) <-chan *httpsDialerTact tx.InitialDelay = happyEyeballsDelay(index) index++ - // TODO(bassosimone): here we should also avoid connecting - // to TCP endpoints that are unreachable - // emit the tactic output <- tx } @@ -344,6 +342,12 @@ func (hd *httpsDialer) dialTLS( return nil, err } + // track successful TCP connections such that we have stats + // regarding which endpoints work as intended: if we can't dial + // a specific TCP endpoint a couple of times, it doesn't make + // sense to continue trying with different SNIs. + hd.stats.OnTCPConnectSuccess(tactic) + // create TLS configuration tlsConfig := &tls.Config{ InsecureSkipVerify: true, // Note: we're going to verify at the end of the func! diff --git a/internal/enginenetx/statsmanager.go b/internal/enginenetx/statsmanager.go index e82bcd0d0b..cf0bee3482 100644 --- a/internal/enginenetx/statsmanager.go +++ b/internal/enginenetx/statsmanager.go @@ -39,6 +39,11 @@ func (*nullStatsManager) OnTCPConnectError(ctx context.Context, tactic *httpsDia // nothing } +// OnTCPConnectSuccess implements httpsDialerEventsHandler. +func (*nullStatsManager) OnTCPConnectSuccess(tactic *httpsDialerTactic) { + // nothing +} + // OnTLSHandshakeError implements httpsDialerEventsHandler. func (*nullStatsManager) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) { // nothing @@ -524,6 +529,11 @@ func (mt *statsManager) OnTCPConnectError(ctx context.Context, tactic *httpsDial statsSafeIncrementMapStringInt64(&record.HistoTCPConnectError, err.Error()) } +// OnTCPConnectSuccess implements httpsDialerEventsHandler. +func (mt *statsManager) OnTCPConnectSuccess(tactic *httpsDialerTactic) { + // TODO(bassosimone): implement this method +} + // OnTLSHandshakeError implements httpsDialerEventsHandler. func (mt *statsManager) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) { // get exclusive access From 5618b72f28a072abc04a0e5da81c072fd9f40a8e Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 15 Apr 2024 17:28:22 +0200 Subject: [PATCH 008/127] fix previous --- internal/enginenetx/httpsdialer_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/enginenetx/httpsdialer_test.go b/internal/enginenetx/httpsdialer_test.go index e44ec96a54..f5f1b785ea 100644 --- a/internal/enginenetx/httpsdialer_test.go +++ b/internal/enginenetx/httpsdialer_test.go @@ -50,6 +50,11 @@ func (*httpsDialerCancelingContextStatsTracker) OnTCPConnectError(ctx context.Co // nothing } +// OnTCPConnectSuccess implements httpsDialerEventsHandler. +func (*httpsDialerCancelingContextStatsTracker) OnTCPConnectSuccess(tactic *httpsDialerTactic) { + // nothing +} + // OnTLSHandshakeError implements httpsDialerEventsHandler. func (*httpsDialerCancelingContextStatsTracker) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) { // nothing From 2153e3695ee699dea5ac3d71d393c641450060dc Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 09:52:24 +0200 Subject: [PATCH 009/127] feat: start to prepare for filtering endpoints --- internal/enginenetx/statsmanager.go | 6 ++++ internal/enginenetx/statspolicy.go | 47 ++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/internal/enginenetx/statsmanager.go b/internal/enginenetx/statsmanager.go index cf0bee3482..51b1c0def9 100644 --- a/internal/enginenetx/statsmanager.go +++ b/internal/enginenetx/statsmanager.go @@ -681,3 +681,9 @@ func (mt *statsManager) LookupTactics(domain string, port string) ([]*statsTacti } return out, len(out) > 0 } + +// IsTCPEndpointAccessible returns whether a given TCP endpoint has recently been accessible. +func (mt *statsManager) IsTCPEndpointAccessible(address, port string) bool { + // TODO(bassosimone): implement + return true +} diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index e6728c5dbc..6e9da17239 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -43,21 +43,19 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str maybeEmitTactic := func(t *httpsDialerTactic) { // as a safety mechanism let's gracefully handle the // case in which the tactic is nil - if t == nil { - return + if t != nil { + // handle the case in which we already emitted a policy + key := t.tacticSummaryKey() + if uniq[key] > 0 { + return + } + uniq[key]++ + + // 🚀!!! + t.InitialDelay = happyEyeballsDelay(index) + index += 1 + out <- t } - - // handle the case in which we already emitted a policy - key := t.tacticSummaryKey() - if uniq[key] > 0 { - return - } - uniq[key]++ - - // 🚀!!! - t.InitialDelay = happyEyeballsDelay(index) - index += 1 - out <- t } // TODO(bassosimone): as an optimization, here we could mix cached tactics @@ -70,7 +68,7 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str } // fallback to the secondary policy - for t := range p.Fallback.LookupTactics(ctx, domain, port) { + for t := range p.onlyAccessibleEndpoints(p.Fallback.LookupTactics(ctx, domain, port)) { maybeEmitTactic(t) } }() @@ -99,3 +97,22 @@ func statsPolicyFilterStatsTactics(tactics []*statsTactic, good bool) (out []*ht } return } + +// onlyAccessibleEndpoints uses stats-based knowledge to exclude using endpoints that +// have recently been observed as being failing during TCP connect. +func (p *statsPolicy) onlyAccessibleEndpoints(input <-chan *httpsDialerTactic) <-chan *httpsDialerTactic { + output := make(chan *httpsDialerTactic) + go func() { + // make sure we close the output channel + defer close(output) + + // avoid including tactics using endpoints that are consistently failing + for tx := range input { + if tx == nil || !p.Stats.IsTCPEndpointAccessible(tx.Address, tx.Port) { + continue + } + output <- tx + } + }() + return output +} From e5cdbb0247e1850da493b0d610966c0fd6fea756 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:25:17 +0200 Subject: [PATCH 010/127] ongoing work while documenting and clarifying --- internal/enginenetx/DESIGN.md | 344 +++++++++++++++++++++++++++ internal/enginenetx/bridgespolicy.go | 68 +++--- internal/enginenetx/dnspolicy.go | 4 +- internal/enginenetx/httpsdialer.go | 30 ++- internal/enginenetx/remix.go | 84 +++++++ internal/enginenetx/statspolicy.go | 68 +++--- 6 files changed, 511 insertions(+), 87 deletions(-) create mode 100644 internal/enginenetx/DESIGN.md create mode 100644 internal/enginenetx/remix.go diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md new file mode 100644 index 0000000000..9db2ae3e65 --- /dev/null +++ b/internal/enginenetx/DESIGN.md @@ -0,0 +1,344 @@ +# Engine Network Extensions + +This file documents the [./internal/enginenetx](.) package design. The content is current +as of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552). + +## Design Goals + +We define "bridge" an IP address with the following properties: + +1. the IP address is not expected to change; + +2. the IP address listens on port 443 and accepts _any_ incoming SNI; + +3. the webserver on port 443 proxies to the OONI APIs. + +The notion of bridge is central to the `bridgePolicy` as we will shortly see. Before +discussing each policy, though, we need to explain how we create TLS connections. + +We also assume that the Web Connectivity test helpers (TH), while they may +have changing-in-time IP addresses, could accept any SNIs. + +Considering the definition of bridges and the assumptions about the THs, we aim to: + +1. overcome DNS-based censorship for "api.ooni.io" by hardcoding known-good +bridges IP addresses inside the codebase; + +2. overcome SNI-based censorship for "api.ooni.io" and test helpers by choosing +from a pre-defined list of SNIs to use _instead_; + +3. introduce state by remembering which tactics for creating TLS connections +have worked in the past and trying to reuse them in the future; + +4. allow for relatively fast recovery in case of network-condition changes +by remixing known-good solutions and bridge strategies with more conventional +approaches relying on using the DNS and sending the true SNI; + +5. adopt a censored-users-first approach where the strategy we use by default +should allow for smooth operations _for them_ rather than prioritizing the +non-censored case and using additional tactics as the fallback; + +6. try to defer sending the true `SNI` on the wire, therefore trying to +avoid triggering potential residual censorship for the TCP endpoint; + +7. provide a configuration file (`$OONI_HOME/engine/bridges.conf`) such that +users can manually configure TLS dialing for any backend service and third party +service that may be required by OONI Probe, therefore allowing to bypass also +IP-based restrictions as long as a bridge exists. + +The rest of this document explains how we designed for achieving these goals. + +## High-Level API + +The purpose of the `enginenetx` package is to provide a `*Network` object from which +consumers can obtain a `model.HTTPTransport` or an `*http.Client`: + +```Go +func (n *Network) HTTPTransport() model.HTTPTransport +func (n *Network) NewHTTPClient() *http.Client +``` + +The returned `*http.Client` uses an internal transport, which is returned when the +package user invokes the `HTTPTransport` method. + +In turn, the internal transport is configured to significantly customize creating +TLS connections, so to meet the objectives explained before. + +## Creating TLS Connections + +In [network.go](network.go), `newHTTPSDialerPolicy` determines the dialing policy +depending on the arguments passed to the `NewNetwork` constructor: + +1. if the `proxyURL` argument is not `nil`, we use the `dnsPolicy` alone; + +2. othwerwise, we compose policies as illustrated by the following diagram: + +``` ++------------+ +-------------+ +--------------+ +-----------+ +| userPolicy | --> | statsPolicy | --> | bridgePolicy | --> | dnsPolicy | ++------------+ +-------------+ +--------------+ +-----------+ +``` + +As a first approximation, we can consider each arrow in the diagram to mean that +a policy tries to produce instructions for creating a connection and then falls back +to the subsequent policy in case it cannot generate instructions. In reality, some +policies implement a more complex strategy where they remix tactics they know and +tactics provided by the fallback, to more quickly recover from changes. + +## Instructions For Dialing + +Each policy implements the following interface (defined in [httpsdialer.go](httpsdialer.go)): + +```Go +type httpsDialerPolicy interface { + LookupTactics(ctx context.Context, domain, port string) <-chan *httpsDialerTactic +} +``` + +The `LookupTactics` operation is _conceptually_ similar to +[net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost), because +both operations map a domain name to IP addresses to connect to. However, +there are also some key differences, namely: + +1. `LookupTactics` is domain _and_ port specific, while `LookupHost` +only takes in input the domain name to resolve; + +2. `LookupTactics` returns _a stream_ of viable "tactics" to establish +TLS connections, while `LookupHost` returns a list of IP addresses. + +The second point, in particular, is crucial. The design of `LookupTactics` is +such that we can start attempting to dial as soon as we have some tactics +to try, while more advanced tactics are generated. A composed `httpsDialerPolicy` can, +in fact, start multiple child `LookupTactics` operations and then return them to the +caller as soon as they are ready, thus avoiding to block dialing until all of the +child operations are ready. + +Also, as you may have guessed, the `dnsPolicy` is a policy that, under the hood, +eventually calls [net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost) +to get IP addresses using the DNS used by the `engine.Session`. Typically, such a +resolver, in turn, composes several DNS-over-HTTPS resolvers with the `getaddrinfo` resolver. + +A "tactic" looks like this: + +```Go +type httpsDialerTactic struct { + Address string + + InitialDelay time.Duration + + Port string + + SNI string + + VerifyHostname string +} +``` + +Here's an explanation of why we have each field in the struct: + +- `Address` and `Port` qualify the TCP endpoint; + +- `InitialDelay` allows a policy to delay a connect operation to implement +something similar to [happy eyeballs](https://en.wikipedia.org/wiki/Happy_Eyeballs); + +- `SNI` is the `SNI` to send as part of the TLS ClientHello; + +- `VerifyHostname` is the hostname to use for TLS certificate verification. + +The separation of `SNI` and `VerifyHostname` is what allows us to send an innocuous +SNI over the network and then verify the certificate using the real SNI. + +## HTTPS Dialer + +Creating TLS connections is implemented by `(*httpsDialer).DialTLSContext`, also +part of [httpsdialer.go](httpsdialer.go). This method _morally_ implements the following +algorithm (where we omitted error handling and returning a conn for simplicity): + +```Go +index := 0 +for tx := range policy.LookupTactics() { + // avoid trying the same policy twice + if isDuplicate(tx) { + continue + } + + // create delay for this tactic + delay := happyEyeballsDelay(index) + index++ + + // dial in a background gorountine (simplified algorithm) + go func(tx, delay) { + time.Sleep(delay) + conn := tcpConnect(tx.Address, tx.Port) + tconn := tlsHandshake(conn, tx.SNI, false /* skip verification */) + verifyHostname(tlsConn, tx.VerifyHostname) + }(tx, delay) +} +``` + +When a connection attempt succeds, we use cancellable `context.Context` to cancel +all the other connect attempts that may be in progress (not shown in the above +algorithm for simplicity). If all connection attempts fail, instead, we return a +composed error (again, not showed above for simplicity). + +By using a modified happy eyeballs with baseline values that take into account +the overall time to perform a TLS handshake, we attempt to strike a balance +between simplicity (i.e., running operations sequentially), performance (running +them in parallel) and network load (hence the usage of happy eyeballs). + +Additionally, the dialing algorithm keeps statistics about the operations it +performs using an `httpsDialerEventsHandler` type: + +```Go +type httpsDialerEventsHandler interface { + OnStarting(tactic *httpsDialerTactic) + OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) + OnTCPConnectSuccess(tactic *httpsDialerTactic) + OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) + OnTLSVerifyError(tactic *httpsDialerTactic, err error) + OnSuccess(tactic *httpsDialerTactic) +} +``` + +These statistics contribute to construct knowledge about the network +conditions and influence the choice of policies. + +## dnsPolicy + +The `dnsPolicy` is implemented by [dnspolicy.go](dnspolicy.go). + +Its `LookupTactics` algorithm is quite simple: + +1. we arrange for short circuiting cases in which the `domain` argument +contains an IP address to "resolve" exactly that IP address (thus emulating +what `getaddrinfo` would do and avoiding to call onto the more-complex +underlying composed DNS resolver); + +2. for each resolved address, we generate tactics in the most straightforward +way, e.g., where the `SNI` and `VerifyHostname` equal the `domain`. + +Using this policy alone is functionally equivalent to combining a DNS lookup +operation with TCP connect and TLS handshake operations. + +## userPolicy + +The `userPolicy` is implemented by [userpolicy.go](userpolicy.go). + +When constructing a `userPolicy` with `newUserPolicy` we indicate a fallback +`httpsDialerPolicy` to use if there is no `$OONI_HOME/engine/bridges.conf` file. + +As of 2024-04-16, the structure of such a file is like in the following example: + +```JSON +{ + "DomainEndpoints": { + "api.ooni.io:443": [{ + "Address": "162.55.247.208", + "Port": "443", + "SNI": "www.example.com", + "VerifyHostname": "api.ooni.io" + }] + }, + "Version": 3 +} +``` + +The `newUserPolicy` constructor reads this file from disk on startup +and keeps its content in memory. + +`LookupTactics` will: + +1. check whether there's an entry for the given `domain` and `port` +inside the `DomainEndpoints` map; + +2. if there are no entries, fallback to the fallback `httpsDialerPolicy`; + +3. otherwise return all the tactic entries. + +Because `userPolicy` is user-configured, we _entirely bypass_ the +fallback policy when there's an user-configured entry. + +## statsPolicy + +The `statsPolicy` is implemented by [statspolicy.go](statspolicy.go). + +The general idea of this policy is that it depends on: + +1. a `*statsManager` that keeps persistent stats about tactics; + +2. a "fallback" policy. + +In principle, one would expect `LookupTactics` to first return all +the tactics we can see from the stats and then try tactics obtained +from the fallback policy. However, this simplified algorithm would +lead to suboptimal results in the following case: + +1. say there are 10 tactics for "api.ooni.io:443" that are bound +to a specific bridge address that has been discontinued; + +2. if we try all these 10 tactics before trying fallback tactics, we +would waste lots of time failing before falling back. + +Conversely, a better strategy is to remix tactics as implemented +by the [remix](remix.go) file: + +1. we take the first two tactics from the stats; + +2. then we take the first two tactics from the fallback; + +3. then we remix the rest, not caring much about whether we're +reading from the stats of from the fallback. + +Because we sort tactics from the stats by our understanding of whether +they are working as intended, we'll prioritize what we know to be working, +but then we'll also throw some new tactics into the mix. + +As an additional optimization, when reading from the fallback, the +`statsPolicy` will automatically exclude TCP endpoints that have +failed recently during their TCP connect stage. If an IP address seems +IP blocked, it does not make sense to continue wasting time trying +to connect to it (a timeout is in the order of ~10s). + +## bridgePolicy + +The `bridgePolicy` is implemented by [bridgespolicy.go](bridgespolicy.go) and +rests on the assumptions made explicit in the design section. That is: + +1. that there is a _bridge_ for "api.ooni.io"; + +2. that the Web Connectivity Test Helpers accepts any SNI. + +Here we're also using the [remix.go](remix.go) algorithm to remix +two different sources of tactics: + +1. the `bridgesTacticsForDomain` only returns tactics for "api.ooni.io" +using existing knowledge of bridges and random SNIs; + +2. the `maybeRewriteTestHelpersTactics` method filters the results +coming from the fallback tactic such that, if we are connecting +to a known test-helper domain name, we're trying to hide its SNI. + +## Overall Algorithm + +**TODO(bassosimone)**: adapt the mixing algorithm to do exactly +this and make sure there are tests for this. + +Having discussed all the polices in isolation, it now seems useful +to describe what is the overall algorithm we want to achieve: + +1. if there is a `$OONI_HOME/engine/bridges.conf` with a valid entry +for the domain and port, use it without trying subsequent tactics; + +2. use the first two tactics coming from stats, if any; + +3. then use the first two tactics coming from bridges, if any; + +4. then use the first two tactics coming from the DNS; + +5. after that, randomly remix the remaining tactics. + +Now, it only remains to discuss managing stats. + +## Managing Stats + +TODO diff --git a/internal/enginenetx/bridgespolicy.go b/internal/enginenetx/bridgespolicy.go index 266ab5b1d1..0ed861d7c7 100644 --- a/internal/enginenetx/bridgespolicy.go +++ b/internal/enginenetx/bridgespolicy.go @@ -25,50 +25,39 @@ type bridgesPolicy struct { var _ httpsDialerPolicy = &bridgesPolicy{} -// maxInitialBridgeTactics is the number of initial bridge tactics we return. -const maxInitialBridgeTactics = 4 - // LookupTactics implements httpsDialerPolicy. +// +// The remix policy of this operation is such that the following happens: +// +// 1. we emit the first two bridge tactics, if any; +// +// 2. we emit the first two fallback (usually DNS) tactics, if any; +// +// 3. we randomly remix the rest. func (p *bridgesPolicy) LookupTactics(ctx context.Context, domain, port string) <-chan *httpsDialerTactic { - out := make(chan *httpsDialerTactic) - - go func() { - defer close(out) // tell the parent when we're done - index := 0 - - // Get channel for reading bridge tactics. - bridges := p.bridgesTacticsForDomain(domain, port) + rx := &remix{ + // Prioritize emitting tactics for bridges. Currently we only have bridges + // for "api.ooni.io", therefore, for all other hosts this arm ends up + // returning a channel that will be immediately closed. + Left: p.bridgesTacticsForDomain(domain, port), - // Emit the first N bridge tactics. Note that tactics are empty if there - // is no bridge configured for the given domain and port. - for tx := range bridges { - tx.InitialDelay = happyEyeballsDelay(index) - index += 1 - out <- tx - if index >= maxInitialBridgeTactics { - break - } - } - - // Now fallback to get more tactics (typically via DNS). + // This ensures we read the first two bridge tactics. // - // We wrap whatever the underlying policy returns us with some - // extra logic for better communicating with test helpers. - for tx := range p.maybeRewriteTestHelpersTactics(p.Fallback.LookupTactics(ctx, domain, port)) { - tx.InitialDelay = happyEyeballsDelay(index) - index += 1 - out <- tx - } + // Note: modifying this field likely indicates you also need to modify the + // corresponding remix{} instantiation in statspolicy.go. + ReadFromLeft: 2, - // Now finish emitting bridge tactics. - for tx := range bridges { - tx.InitialDelay = happyEyeballsDelay(index) - index += 1 - out <- tx - } - }() + // Mix the above with using the fallback policy and rewriting the SNIs + // used by the test helpers to avoid exposing the real SNIs. + Right: p.maybeRewriteTestHelpersTactics(p.Fallback.LookupTactics(ctx, domain, port)), - return out + // This ensures we read the first two DNS tactics. + // + // Note: modifying this field likely indicates you also need to modify the + // corresponding remix{} instantiation in statspolicy.go. + ReadFromRight: 2, + } + return rx.Run() } var bridgesPolicyTestHelpersDomains = []string{ @@ -106,6 +95,9 @@ func (p *bridgesPolicy) maybeRewriteTestHelpersTactics(input <-chan *httpsDialer continue } + // TODO(bassosimone): potentially we should also throw the real SNI + // into the mix, but it should not be the first SNI we emit. + // This is the case where we're connecting to a test helper. Let's try // to produce policies hiding the SNI to censoring middleboxes. for _, sni := range p.bridgesDomainsInRandomOrder() { diff --git a/internal/enginenetx/dnspolicy.go b/internal/enginenetx/dnspolicy.go index 3812230739..39dc5fb14a 100644 --- a/internal/enginenetx/dnspolicy.go +++ b/internal/enginenetx/dnspolicy.go @@ -56,10 +56,10 @@ func (p *dnsPolicy) LookupTactics( } // The tactics we generate here have SNI == VerifyHostname == domain - for idx, addr := range addrs { + for _, addr := range addrs { tactic := &httpsDialerTactic{ Address: addr, - InitialDelay: happyEyeballsDelay(idx), + InitialDelay: 0, // set when dialing Port: port, SNI: domain, VerifyHostname: domain, diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index f75c1b9fb1..e39e9ba246 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -237,8 +237,10 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo continue } - // Save the conn and tell goroutines to stop ASAP + // Save the conn connv = append(connv, result.Conn) + + // Interrupt other concurrent dialing attempts cancel() } } @@ -254,14 +256,28 @@ func httpsFilterTactics(input <-chan *httpsDialerTactic) <-chan *httpsDialerTact // make sure we close output chan defer close(output) + // useful to make sure we don't emit two equal policy in a single run + uniq := make(map[string]int) + index := 0 for tx := range input { - // rewrite the delays - tx.InitialDelay = happyEyeballsDelay(index) - index++ - - // emit the tactic - output <- tx + // as a safety mechanism let's gracefully handle the + // case in which the tactic is nil + if tx != nil { + // handle the case in which we already emitted a policy + key := tx.tacticSummaryKey() + if uniq[key] > 0 { + return + } + uniq[key]++ + + // rewrite the delays + tx.InitialDelay = happyEyeballsDelay(index) + index++ + + // emit the tactic + output <- tx + } } }() diff --git a/internal/enginenetx/remix.go b/internal/enginenetx/remix.go new file mode 100644 index 0000000000..311a071ebf --- /dev/null +++ b/internal/enginenetx/remix.go @@ -0,0 +1,84 @@ +package enginenetx + +import "sync" + +// remix remixes the tactics emitted on Left and Right. +type remix struct { + // Left is the left channel from which we read the first ReadFromLeft tactics. + Left <-chan *httpsDialerTactic + + // ReadFromLeft is the number of entries to read from Left at the beginning. + ReadFromLeft int + + // Right is the right channel from which we read the first ReadFromRight tactics + // once we've read ReadFromLeft tactics from the Left channel. + Right <-chan *httpsDialerTactic + + // ReadFromRight is the number of tactics to read from Right once we + // have read ReadFromLeft tactics from the Left channel. + ReadFromRight int +} + +// Run remixes the Left and Right channel according to its configuration. +// +// The returned channel is closed when both Left and Right are closed. +func (rx *remix) Run() <-chan *httpsDialerTactic { + output := make(chan *httpsDialerTactic) + go func() { + // close the output channel when done + defer close(output) + + // emit the first N tactics from the left channel + remixEmitN(rx.Left, rx.ReadFromLeft, output) + + // emit the first M tactics from the right channel + remixEmitN(rx.Right, rx.ReadFromRight, output) + + // remix all remaining entries + for tx := range remixDrainBoth(rx.Left, rx.Right) { + output <- tx + } + }() + return output +} + +func remixEmitN(input <-chan *httpsDialerTactic, numToRead int, output chan<- *httpsDialerTactic) { + for idx := 0; idx < numToRead; idx++ { + tactic, good := <-input + if !good { + return + } + output <- tactic + } +} + +func remixDrainBoth(left, right <-chan *httpsDialerTactic) <-chan *httpsDialerTactic { + output := make(chan *httpsDialerTactic) + go func() { + // read from left + waitg := &sync.WaitGroup{} + waitg.Add(1) + go func() { + defer waitg.Done() + for tx := range left { + output <- tx + } + }() + + // read from right + waitg.Add(1) + go func() { + defer waitg.Done() + for tx := range right { + output <- tx + } + }() + + // close when done + go func() { + waitg.Wait() + close(output) + }() + }() + return output +} diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index 6e9da17239..ff5e7f10a8 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -30,50 +30,38 @@ var _ httpsDialerPolicy = &statsPolicy{} // LookupTactics implements HTTPSDialerPolicy. func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { - out := make(chan *httpsDialerTactic) + rx := &remix{ + // Give priority to what we know from stats + Left: statsPolicyStream(statsPolicyFilterStatsTactics(p.Stats.LookupTactics(domain, port))), + + // We make sure we emit two stats-based tactics if possible + ReadFromLeft: 2, + + // And remix it with the fallback + Right: p.onlyAccessibleEndpoints(p.Fallback.LookupTactics(ctx, domain, port)), + + // Under the assumption that below us we have bridgePolicy composed with DNS policy + // and that the stage below emits two bridge tactics, if possible, followed by two + // additional DNS tactics, if possible, we need to allow for four tactics to pass through + // befofe we start remixing from the two channels. + // + // Note: modifying this field likely indicates you also need to modify the + // corresponding remix{} instantiation in bridgespolicy.go. + ReadFromRight: 4, + } + return rx.Run() +} +// statsPolicyStream streams a vector of tactics. +func statsPolicyStream(txs []*httpsDialerTactic) <-chan *httpsDialerTactic { + output := make(chan *httpsDialerTactic) go func() { - defer close(out) // make sure the parent knows when we're done - index := 0 - - // useful to make sure we don't emit two equal policy in a single run - uniq := make(map[string]int) - - // function that emits a given tactic unless we already emitted it - maybeEmitTactic := func(t *httpsDialerTactic) { - // as a safety mechanism let's gracefully handle the - // case in which the tactic is nil - if t != nil { - // handle the case in which we already emitted a policy - key := t.tacticSummaryKey() - if uniq[key] > 0 { - return - } - uniq[key]++ - - // 🚀!!! - t.InitialDelay = happyEyeballsDelay(index) - index += 1 - out <- t - } - } - - // TODO(bassosimone): as an optimization, here we could mix cached tactics - // and fallback tactics to avoid slow bootstraps in the event in which - // known-to-work cached tactics have become obsolete. - - // give priority to what we know from stats - for _, t := range statsPolicyFilterStatsTactics(p.Stats.LookupTactics(domain, port)) { - maybeEmitTactic(t) - } - - // fallback to the secondary policy - for t := range p.onlyAccessibleEndpoints(p.Fallback.LookupTactics(ctx, domain, port)) { - maybeEmitTactic(t) + defer close(output) + for _, tx := range txs { + output <- tx } }() - - return out + return output } // statsPolicyFilterStatsTactics filters the tactics generated by consulting the stats. From 089f70b65d103eb486f50d4582e8a971f8ce4c7e Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:27:43 +0200 Subject: [PATCH 011/127] x --- internal/enginenetx/DESIGN.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 9db2ae3e65..f92f21efb4 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -13,11 +13,7 @@ We define "bridge" an IP address with the following properties: 3. the webserver on port 443 proxies to the OONI APIs. -The notion of bridge is central to the `bridgePolicy` as we will shortly see. Before -discussing each policy, though, we need to explain how we create TLS connections. - -We also assume that the Web Connectivity test helpers (TH), while they may -have changing-in-time IP addresses, could accept any SNIs. +We also assume that the Web Connectivity test helpers (TH) could accept any SNIs. Considering the definition of bridges and the assumptions about the THs, we aim to: From 20e71e837cb2043f69efe71ce643445d9051b56d Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:30:40 +0200 Subject: [PATCH 012/127] [ci skip] --- internal/enginenetx/DESIGN.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index f92f21efb4..b7a5fe2e3d 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -15,7 +15,7 @@ We define "bridge" an IP address with the following properties: We also assume that the Web Connectivity test helpers (TH) could accept any SNIs. -Considering the definition of bridges and the assumptions about the THs, we aim to: +Considering all of this, this package aims to: 1. overcome DNS-based censorship for "api.ooni.io" by hardcoding known-good bridges IP addresses inside the codebase; @@ -24,7 +24,7 @@ bridges IP addresses inside the codebase; from a pre-defined list of SNIs to use _instead_; 3. introduce state by remembering which tactics for creating TLS connections -have worked in the past and trying to reuse them in the future; +have worked in the past and trying to reuse them; 4. allow for relatively fast recovery in case of network-condition changes by remixing known-good solutions and bridge strategies with more conventional @@ -35,19 +35,19 @@ should allow for smooth operations _for them_ rather than prioritizing the non-censored case and using additional tactics as the fallback; 6. try to defer sending the true `SNI` on the wire, therefore trying to -avoid triggering potential residual censorship for the TCP endpoint; +avoid triggering potential residual censorship; 7. provide a configuration file (`$OONI_HOME/engine/bridges.conf`) such that users can manually configure TLS dialing for any backend service and third party service that may be required by OONI Probe, therefore allowing to bypass also -IP-based restrictions as long as a bridge exists. +IP-based restrictions as long as a known-good bridge exists. The rest of this document explains how we designed for achieving these goals. ## High-Level API -The purpose of the `enginenetx` package is to provide a `*Network` object from which -consumers can obtain a `model.HTTPTransport` or an `*http.Client`: +The purpose of the `enginenetx` package is to provide a `*Network` object from which consumers +can obtain a `model.HTTPTransport` and `*http.Client` for HTTP operations: ```Go func (n *Network) HTTPTransport() model.HTTPTransport @@ -55,10 +55,8 @@ func (n *Network) NewHTTPClient() *http.Client ``` The returned `*http.Client` uses an internal transport, which is returned when the -package user invokes the `HTTPTransport` method. - -In turn, the internal transport is configured to significantly customize creating -TLS connections, so to meet the objectives explained before. +package user invokes the `HTTPTransport` method. In turn, the internal transport is configured +to significantly customize creating TLS connections, so to meet the objectives explained before. ## Creating TLS Connections From b5b2e496b35bc5d49402a61d2997456e9ff23a3f Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:31:14 +0200 Subject: [PATCH 013/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index b7a5fe2e3d..acdd9e3099 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -55,8 +55,8 @@ func (n *Network) NewHTTPClient() *http.Client ``` The returned `*http.Client` uses an internal transport, which is returned when the -package user invokes the `HTTPTransport` method. In turn, the internal transport is configured -to significantly customize creating TLS connections, so to meet the objectives explained before. +package user invokes the `HTTPTransport` method. In turn, the internal transport customizes +creating TLS connections, to meet the objectives explained before. ## Creating TLS Connections From 94eb284cb6445b6cea8b2c46042e241264f72b90 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:32:02 +0200 Subject: [PATCH 014/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index acdd9e3099..6793aa715e 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -60,8 +60,8 @@ creating TLS connections, to meet the objectives explained before. ## Creating TLS Connections -In [network.go](network.go), `newHTTPSDialerPolicy` determines the dialing policy -depending on the arguments passed to the `NewNetwork` constructor: +In [network.go](network.go), `newHTTPSDialerPolicy` configures the dialing policy +depending on the arguments passed `NewNetwork`: 1. if the `proxyURL` argument is not `nil`, we use the `dnsPolicy` alone; From 90601c6aa05b9872188d2d0e7519154ce840e8aa Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:32:48 +0200 Subject: [PATCH 015/127] [ci skip] --- internal/enginenetx/DESIGN.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 6793aa715e..f67e55afff 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -73,11 +73,9 @@ depending on the arguments passed `NewNetwork`: +------------+ +-------------+ +--------------+ +-----------+ ``` -As a first approximation, we can consider each arrow in the diagram to mean that -a policy tries to produce instructions for creating a connection and then falls back -to the subsequent policy in case it cannot generate instructions. In reality, some -policies implement a more complex strategy where they remix tactics they know and -tactics provided by the fallback, to more quickly recover from changes. +As a first approximation, we can consider each arrow in the diagram to mean "fall +back to". In reality, some policies implement a more complex strategy where they remix +tactics they know and tactics provided by the downstream policy. ## Instructions For Dialing From ef8fdfe8ac571f55556e12298b05482c3345ebd7 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:33:37 +0200 Subject: [PATCH 016/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index f67e55afff..3d22f04e7b 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -95,8 +95,8 @@ there are also some key differences, namely: 1. `LookupTactics` is domain _and_ port specific, while `LookupHost` only takes in input the domain name to resolve; -2. `LookupTactics` returns _a stream_ of viable "tactics" to establish -TLS connections, while `LookupHost` returns a list of IP addresses. +2. `LookupTactics` returns _a stream_ of viable "tactics", while `LookupHost` +returns a list of IP addresses. The second point, in particular, is crucial. The design of `LookupTactics` is such that we can start attempting to dial as soon as we have some tactics From aa65cb75b7d3816948c3ce6b7958b66cbda9d597 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:34:25 +0200 Subject: [PATCH 017/127] [ci skip] --- internal/enginenetx/DESIGN.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 3d22f04e7b..aaf3d114db 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -100,10 +100,9 @@ returns a list of IP addresses. The second point, in particular, is crucial. The design of `LookupTactics` is such that we can start attempting to dial as soon as we have some tactics -to try, while more advanced tactics are generated. A composed `httpsDialerPolicy` can, -in fact, start multiple child `LookupTactics` operations and then return them to the -caller as soon as they are ready, thus avoiding to block dialing until all of the -child operations are ready. +to try. A composed `httpsDialerPolicy` can, in fact, start multiple child `LookupTactics` +operations and then return them to the caller as soon as they are ready, thus avoiding +to block dialing until _all_ the child operations are complete. Also, as you may have guessed, the `dnsPolicy` is a policy that, under the hood, eventually calls [net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost) From 28a6265d8340891df60d3f57b5269d42ae11702a Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:34:56 +0200 Subject: [PATCH 018/127] [ci skip] --- internal/enginenetx/DESIGN.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index aaf3d114db..8ebd180426 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -106,8 +106,9 @@ to block dialing until _all_ the child operations are complete. Also, as you may have guessed, the `dnsPolicy` is a policy that, under the hood, eventually calls [net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost) -to get IP addresses using the DNS used by the `engine.Session`. Typically, such a -resolver, in turn, composes several DNS-over-HTTPS resolvers with the `getaddrinfo` resolver. +to get IP addresses using the DNS used by the `*engine.Session` type. Typically, such a +resolver, in turn, composes several DNS-over-HTTPS resolvers with the fallback +`getaddrinfo` resolver. A "tactic" looks like this: From f4522084d7d5002a381edd204184bfcba85f2ba4 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:35:22 +0200 Subject: [PATCH 019/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 8ebd180426..160141ca79 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -108,7 +108,7 @@ Also, as you may have guessed, the `dnsPolicy` is a policy that, under the hood, eventually calls [net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost) to get IP addresses using the DNS used by the `*engine.Session` type. Typically, such a resolver, in turn, composes several DNS-over-HTTPS resolvers with the fallback -`getaddrinfo` resolver. +`getaddrinfo` resolver, and remebers which resolvers work. A "tactic" looks like this: From 119e6102d9fbeba7394e790cece2b0357a338054 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:35:54 +0200 Subject: [PATCH 020/127] [ci skip] --- internal/enginenetx/DESIGN.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 160141ca79..a92ad5fc30 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -114,14 +114,10 @@ A "tactic" looks like this: ```Go type httpsDialerTactic struct { - Address string - - InitialDelay time.Duration - - Port string - - SNI string - + Address string + InitialDelay time.Duration + Port string + SNI string VerifyHostname string } ``` From f3fb1dd74ba1e5596f30676bacd02f13def81603 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:36:38 +0200 Subject: [PATCH 021/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index a92ad5fc30..bfd8edcd9f 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -134,7 +134,8 @@ something similar to [happy eyeballs](https://en.wikipedia.org/wiki/Happy_Eyebal - `VerifyHostname` is the hostname to use for TLS certificate verification. The separation of `SNI` and `VerifyHostname` is what allows us to send an innocuous -SNI over the network and then verify the certificate using the real SNI. +SNI over the network and then verify the certificate using the real SNI after a +`skipVerify=true` TLS handshake has completed. ## HTTPS Dialer From ce6ec84a7dec93552f9584dad3748cf9e9c04e5f Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:37:52 +0200 Subject: [PATCH 022/127] [ci skip] --- internal/enginenetx/DESIGN.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index bfd8edcd9f..9bb33b3a5b 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -165,10 +165,10 @@ for tx := range policy.LookupTactics() { } ``` -When a connection attempt succeds, we use cancellable `context.Context` to cancel -all the other connect attempts that may be in progress (not shown in the above +When a TLS connection attempt succeds, we use cancellable `context.Context` to cancel +all the other the TLS connect attempts that may be in progress (not shown in the above algorithm for simplicity). If all connection attempts fail, instead, we return a -composed error (again, not showed above for simplicity). +composed error including all errors (again, not showed above for simplicity). By using a modified happy eyeballs with baseline values that take into account the overall time to perform a TLS handshake, we attempt to strike a balance From 4f63b6080807932bdf652b67b748c1f1ba3d1b53 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:39:19 +0200 Subject: [PATCH 023/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 9bb33b3a5b..9f8b6a2f4c 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -173,7 +173,9 @@ composed error including all errors (again, not showed above for simplicity). By using a modified happy eyeballs with baseline values that take into account the overall time to perform a TLS handshake, we attempt to strike a balance between simplicity (i.e., running operations sequentially), performance (running -them in parallel) and network load (hence the usage of happy eyeballs). +them in parallel) and network load: there is some parallelism but operations +are reasonably spaced in time with increasing delays. This is implemented by the +[happyeyeballs.go](happyeyeballs.go) file. Additionally, the dialing algorithm keeps statistics about the operations it performs using an `httpsDialerEventsHandler` type: From 45e655c6fa34c7fd884e6391e3f402b52ab2be9e Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:41:50 +0200 Subject: [PATCH 024/127] x --- internal/enginenetx/DESIGN.md | 14 +++++++++++++- internal/enginenetx/happyeyeballs.go | 2 +- internal/enginenetx/happyeyeballs_test.go | 12 ++++++------ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 9f8b6a2f4c..32bf7d66bc 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -175,7 +175,19 @@ the overall time to perform a TLS handshake, we attempt to strike a balance between simplicity (i.e., running operations sequentially), performance (running them in parallel) and network load: there is some parallelism but operations are reasonably spaced in time with increasing delays. This is implemented by the -[happyeyeballs.go](happyeyeballs.go) file. +[happyeyeballs.go](happyeyeballs.go) file and roughly works as follows: + +1. the first attempt has zero delay; + +2. the second attempt has a one-second delay; + +3. the third attempt has a two-second delay; + +4. the fourth attempt has a four-second delay; + +5. the fifth attempt has an eight-second delay; + +6. subsequent attempts are spaced eight-second in time. Additionally, the dialing algorithm keeps statistics about the operations it performs using an `httpsDialerEventsHandler` type: diff --git a/internal/enginenetx/happyeyeballs.go b/internal/enginenetx/happyeyeballs.go index 75259b9aa5..d6bb63ff6a 100644 --- a/internal/enginenetx/happyeyeballs.go +++ b/internal/enginenetx/happyeyeballs.go @@ -28,6 +28,6 @@ func happyEyeballsDelay(idx int) time.Duration { case idx <= 4: return baseDelay << (idx - 1) default: - return baseDelay << 3 * (time.Duration(idx) - 3) + return baseDelay << 3 } } diff --git a/internal/enginenetx/happyeyeballs_test.go b/internal/enginenetx/happyeyeballs_test.go index b1f7c4af56..3728aa12bf 100644 --- a/internal/enginenetx/happyeyeballs_test.go +++ b/internal/enginenetx/happyeyeballs_test.go @@ -19,12 +19,12 @@ func TestHappyEyeballsDelay(t *testing.T) { {2, 2 * time.Second}, {3, 4 * time.Second}, {4, 8 * time.Second}, - {5, 2 * 8 * time.Second}, - {6, 3 * 8 * time.Second}, - {7, 4 * 8 * time.Second}, - {8, 5 * 8 * time.Second}, - {9, 6 * 8 * time.Second}, - {10, 7 * 8 * time.Second}, + {5, 8 * time.Second}, + {6, 8 * time.Second}, + {7, 8 * time.Second}, + {8, 8 * time.Second}, + {9, 8 * time.Second}, + {10, 8 * time.Second}, } for _, tc := range cases { From 8e2a1f372d6d48021fc5d53e9262080f22920162 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:42:40 +0200 Subject: [PATCH 025/127] x --- internal/enginenetx/DESIGN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 32bf7d66bc..bddc7a561b 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -175,7 +175,8 @@ the overall time to perform a TLS handshake, we attempt to strike a balance between simplicity (i.e., running operations sequentially), performance (running them in parallel) and network load: there is some parallelism but operations are reasonably spaced in time with increasing delays. This is implemented by the -[happyeyeballs.go](happyeyeballs.go) file and roughly works as follows: +[happyeyeballs.go](happyeyeballs.go) file and produces the following delays depending +on the index used by the current attempt: 1. the first attempt has zero delay; From e2aed073678575beec60a4c0bd4e1adebb7c8479 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:45:52 +0200 Subject: [PATCH 026/127] x --- internal/enginenetx/DESIGN.md | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index bddc7a561b..3da7de06a7 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -178,17 +178,14 @@ are reasonably spaced in time with increasing delays. This is implemented by the [happyeyeballs.go](happyeyeballs.go) file and produces the following delays depending on the index used by the current attempt: -1. the first attempt has zero delay; - -2. the second attempt has a one-second delay; - -3. the third attempt has a two-second delay; - -4. the fourth attempt has a four-second delay; - -5. the fifth attempt has an eight-second delay; - -6. subsequent attempts are spaced eight-second in time. +| Attempt number | Delay since the beginning of dialing (seconds) | +| -------------- | ---------------------------------------------- | +| 1 | 0 | +| 2 | 1 | +| 4 | 2 | +| 4 | 4 | +| 5 | 8 | +| 6 | XXX did I break it? | Additionally, the dialing algorithm keeps statistics about the operations it performs using an `httpsDialerEventsHandler` type: From 08fbf485fd280bbd87e5039c2d21ec4950efedaa Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:57:17 +0200 Subject: [PATCH 027/127] [ci skip] --- internal/enginenetx/DESIGN.md | 28 +++++++++++++++++----------- internal/enginenetx/happyeyeballs.go | 2 ++ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 3da7de06a7..c3b2d69d59 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -175,17 +175,23 @@ the overall time to perform a TLS handshake, we attempt to strike a balance between simplicity (i.e., running operations sequentially), performance (running them in parallel) and network load: there is some parallelism but operations are reasonably spaced in time with increasing delays. This is implemented by the -[happyeyeballs.go](happyeyeballs.go) file and produces the following delays depending -on the index used by the current attempt: - -| Attempt number | Delay since the beginning of dialing (seconds) | -| -------------- | ---------------------------------------------- | -| 1 | 0 | -| 2 | 1 | -| 4 | 2 | -| 4 | 4 | -| 5 | 8 | -| 6 | XXX did I break it? | +[happyeyeballs.go](happyeyeballs.go) file and, assuming `T0` is the time when +we start dialing, produces the following minimum dial times: + +| Attempt | MinDialTime | +| ------- | ------------- | +| 1 | `T0 + 0` | +| 2 | `T0 + 1s` | +| 4 | `T0 + 2s` | +| 4 | `T0 + 4s` | +| 5 | `T0 + 8s` | +| 6 | `T0 + 16s` | +| 7 | `T0 + 24s` | +| 8 | `T0 + 32s` | +| ... | ... | + +In other words, we exponentially increase the delay until we reach `8s` and +then we linearly space each attempt by `8s` from the previous one. Additionally, the dialing algorithm keeps statistics about the operations it performs using an `httpsDialerEventsHandler` type: diff --git a/internal/enginenetx/happyeyeballs.go b/internal/enginenetx/happyeyeballs.go index d6bb63ff6a..9186915cf9 100644 --- a/internal/enginenetx/happyeyeballs.go +++ b/internal/enginenetx/happyeyeballs.go @@ -19,6 +19,8 @@ import "time" // actual issues inside the network. By using this algorithm, we are still // able to overlap and pack more dialing attempts overall. func happyEyeballsDelay(idx int) time.Duration { + // FIXME: we need to adjust how we implement happy eyeballs + // to make sure the time is relative to a deadline. const baseDelay = time.Second switch { case idx <= 0: From 7c6ab4bd83a42365fb176f2c4a750b1956049dea Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:58:11 +0200 Subject: [PATCH 028/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index c3b2d69d59..84338a9411 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -190,8 +190,8 @@ we start dialing, produces the following minimum dial times: | 8 | `T0 + 32s` | | ... | ... | -In other words, we exponentially increase the delay until we reach `8s` and -then we linearly space each attempt by `8s` from the previous one. +That, we exponentially increase the delay until `8s`, then we linearly space +each attempt by `8s`. Additionally, the dialing algorithm keeps statistics about the operations it performs using an `httpsDialerEventsHandler` type: From c8432411a4eafe474aac8d8a7c38c9c18b00c123 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 12:58:59 +0200 Subject: [PATCH 029/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 84338a9411..e5dbb61f45 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -190,7 +190,7 @@ we start dialing, produces the following minimum dial times: | 8 | `T0 + 32s` | | ... | ... | -That, we exponentially increase the delay until `8s`, then we linearly space +That is, we exponentially increase the delay until `8s`, then we linearly space each attempt by `8s`. Additionally, the dialing algorithm keeps statistics about the operations it From 7f165778a13d7b8d0e3f8132ed19cf589e36f528 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:18:04 +0200 Subject: [PATCH 030/127] [ci skip] --- internal/enginenetx/DESIGN.md | 124 +++++++++++++++++++++++----------- 1 file changed, 86 insertions(+), 38 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index e5dbb61f45..b7e273d61c 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -140,37 +140,84 @@ SNI over the network and then verify the certificate using the real SNI after a ## HTTPS Dialer Creating TLS connections is implemented by `(*httpsDialer).DialTLSContext`, also -part of [httpsdialer.go](httpsdialer.go). This method _morally_ implements the following -algorithm (where we omitted error handling and returning a conn for simplicity): +part of [httpsdialer.go](httpsdialer.go). This method _morally_ does the following: ```Go -index := 0 -for tx := range policy.LookupTactics() { - // avoid trying the same policy twice - if isDuplicate(tx) { - continue +func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpoint string) (net.Conn, error) { + // map to ensure we don't have duplicate tactics + uniq := make(map[string]int) + + // time when we started dialing + t0 := time.Now() + + // index of each dialing attempt + idx := 0 + + // [...] omitting code to get hostname and port from endpoint [...] + + // fetch tactics asynchronously + for tx := range hd.policy.LookupTactics(ctx, hostname, port) { + + // avoid using the same tactic more than once + summary := tx.tacticSummaryKey() + if uniq[summary] > 0 { + continue + } + uniq[summary]++ + + // compute the happy eyeballs deadline + deadline := t0.Add(happyEyeballsDelay(idx)) + idx++ + + // dial in a background goroutine + go func(tx *httpsDialerTactic, deadline time.Duration) { + // wait for deadline + if d := time.Until(deadline); d > 0 { + time.Sleep(d) + } + + // dial TCP + conn, err := tcpConnect(tx.Address, tx.Port) + + // [...] omitting error handling [...] + + // handshake + tconn, err := tlsHandshake(conn, tx.SNI, false /* skip verification */) + + // [...] omitting error handling [...] + + // make sure the hostname's OK + err := verifyHostname(tconn, tx.VerifyHostname) + + // [...] omitting error handling and producing result [...] + + }(tx, deadline) } - // create delay for this tactic - delay := happyEyeballsDelay(index) - index++ - - // dial in a background gorountine (simplified algorithm) - go func(tx, delay) { - time.Sleep(delay) - conn := tcpConnect(tx.Address, tx.Port) - tconn := tlsHandshake(conn, tx.SNI, false /* skip verification */) - verifyHostname(tlsConn, tx.VerifyHostname) - }(tx, delay) + // [...] omitting code to decide what to return [...] } ``` -When a TLS connection attempt succeds, we use cancellable `context.Context` to cancel -all the other the TLS connect attempts that may be in progress (not shown in the above -algorithm for simplicity). If all connection attempts fail, instead, we return a -composed error including all errors (again, not showed above for simplicity). +This simplified algorithm differs for the real implementation in that we +have omitted the following (boring) implementation details: + +1. code to obtain `hostname` and `port` from `endpoint` (e.g., code to extract +`"api.ooni.io"` and `"443"` from `"api.ooni.io:443"`); + +2. code to pass back a connection or an error from a background +goroutine to the `DialTLSContext` method; + +3. code to decide whether to return a `net.Conn` or an `error`; + +4. the fact that `DialTLSContext` uses a goroutine pool rather than creating a +new goroutine for each tactic (which could create too many goroutines); + +5. the fact that, as soon as we successfully have a good TLS connection, we +immediately cancel any other parallel attempt at connecting. + +We `happyEyeballsDelay` function (in [happyeyeballs.go](happyeyeballs.go)) is +such that we generate the following delays: -By using a modified happy eyeballs with baseline values that take into account the overall time to perform a TLS handshake, we attempt to strike a balance between simplicity (i.e., running operations sequentially), performance (running them in parallel) and network load: there is some parallelism but operations @@ -178,23 +225,24 @@ are reasonably spaced in time with increasing delays. This is implemented by the [happyeyeballs.go](happyeyeballs.go) file and, assuming `T0` is the time when we start dialing, produces the following minimum dial times: -| Attempt | MinDialTime | -| ------- | ------------- | -| 1 | `T0 + 0` | -| 2 | `T0 + 1s` | -| 4 | `T0 + 2s` | -| 4 | `T0 + 4s` | -| 5 | `T0 + 8s` | -| 6 | `T0 + 16s` | -| 7 | `T0 + 24s` | -| 8 | `T0 + 32s` | -| ... | ... | +| idx | delay (s) | +| --- | --------- | +| 1 | 0 | +| 2 | 1 | +| 4 | 2 | +| 4 | 4 | +| 5 | 8 | +| 6 | 16 | +| 7 | 24 | +| 8 | 32 | +| ... | ... | That is, we exponentially increase the delay until `8s`, then we linearly space -each attempt by `8s`. +each attempt by `8s`. We aim to space attempts to accommodate for slow access networks +and/or access network experiencing temporary failures to deliver packets. -Additionally, the dialing algorithm keeps statistics about the operations it -performs using an `httpsDialerEventsHandler` type: +Additionally, the `*httpsDialer` algorithm keeps statistics about the operations +it performs using an `httpsDialerEventsHandler` type: ```Go type httpsDialerEventsHandler interface { @@ -208,7 +256,7 @@ type httpsDialerEventsHandler interface { ``` These statistics contribute to construct knowledge about the network -conditions and influence the choice of policies. +conditions and influence the generation of tactics. ## dnsPolicy From 4b0c7681227cc03f14bbf26baa87f97135ce2858 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:18:59 +0200 Subject: [PATCH 031/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index b7e273d61c..1a6bf15e53 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -21,7 +21,7 @@ Considering all of this, this package aims to: bridges IP addresses inside the codebase; 2. overcome SNI-based censorship for "api.ooni.io" and test helpers by choosing -from a pre-defined list of SNIs to use _instead_; +from a pre-defined list of SNIs; 3. introduce state by remembering which tactics for creating TLS connections have worked in the past and trying to reuse them; From 7ea130ded0e5061df4869933444efdeb61bdab2b Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:21:08 +0200 Subject: [PATCH 032/127] [ci skip] --- internal/enginenetx/DESIGN.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 1a6bf15e53..2c85757180 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -23,12 +23,10 @@ bridges IP addresses inside the codebase; 2. overcome SNI-based censorship for "api.ooni.io" and test helpers by choosing from a pre-defined list of SNIs; -3. introduce state by remembering which tactics for creating TLS connections -have worked in the past and trying to reuse them; +3. use tactics for creating TLS connections that worked previously; -4. allow for relatively fast recovery in case of network-condition changes -by remixing known-good solutions and bridge strategies with more conventional -approaches relying on using the DNS and sending the true SNI; +4. recover ~quickly if the network condition change (e.g., if a bridge is +discontinued, the code should ~quickly attempt other strategies); 5. adopt a censored-users-first approach where the strategy we use by default should allow for smooth operations _for them_ rather than prioritizing the From 1cbc10919a619d95334dff5b9ddf363e06af9fa6 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:21:35 +0200 Subject: [PATCH 033/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 2c85757180..e130a046af 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -25,8 +25,7 @@ from a pre-defined list of SNIs; 3. use tactics for creating TLS connections that worked previously; -4. recover ~quickly if the network condition change (e.g., if a bridge is -discontinued, the code should ~quickly attempt other strategies); +4. recover ~quickly if the conditions change (e.g., if a bridge is discontinued); 5. adopt a censored-users-first approach where the strategy we use by default should allow for smooth operations _for them_ rather than prioritizing the From 7edfbb88ddfef1b463ee145228cb31167d5fab48 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:22:33 +0200 Subject: [PATCH 034/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index e130a046af..c604aee9df 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -36,8 +36,8 @@ avoid triggering potential residual censorship; 7. provide a configuration file (`$OONI_HOME/engine/bridges.conf`) such that users can manually configure TLS dialing for any backend service and third party -service that may be required by OONI Probe, therefore allowing to bypass also -IP-based restrictions as long as a known-good bridge exists. +service required by OONI Probe, therefore allowing to bypass IP-based +restrictions as long as known-good bridges are available. The rest of this document explains how we designed for achieving these goals. From 21f9b900f0446002d1acd800c21ab1aa26d80e27 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:23:07 +0200 Subject: [PATCH 035/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index c604aee9df..53403ba113 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -35,9 +35,7 @@ non-censored case and using additional tactics as the fallback; avoid triggering potential residual censorship; 7. provide a configuration file (`$OONI_HOME/engine/bridges.conf`) such that -users can manually configure TLS dialing for any backend service and third party -service required by OONI Probe, therefore allowing to bypass IP-based -restrictions as long as known-good bridges are available. +users can manually force using specific bridges and SNIs. The rest of this document explains how we designed for achieving these goals. From 8e5fee9e6803f7a1dd38d664e6b182620a1a71c7 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:24:00 +0200 Subject: [PATCH 036/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 53403ba113..5c28bb277c 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -34,8 +34,7 @@ non-censored case and using additional tactics as the fallback; 6. try to defer sending the true `SNI` on the wire, therefore trying to avoid triggering potential residual censorship; -7. provide a configuration file (`$OONI_HOME/engine/bridges.conf`) such that -users can manually force using specific bridges and SNIs. +7. allow users to force specific bridges and SNIs by edigint `$OONI_HOME/engine/bridges.conf`. The rest of this document explains how we designed for achieving these goals. From 4f8cf91ba4466ed831ffce11e7152fb66adca614 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:25:08 +0200 Subject: [PATCH 037/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 5c28bb277c..39cc6b517a 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -23,7 +23,7 @@ bridges IP addresses inside the codebase; 2. overcome SNI-based censorship for "api.ooni.io" and test helpers by choosing from a pre-defined list of SNIs; -3. use tactics for creating TLS connections that worked previously; +3. remember and use tactics for creating TLS connections that worked previously; 4. recover ~quickly if the conditions change (e.g., if a bridge is discontinued); From 436fb50e4a8eb03cb9644bbd8199bcc4cc4677c0 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:26:03 +0200 Subject: [PATCH 038/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 39cc6b517a..2966c9d363 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -32,7 +32,8 @@ should allow for smooth operations _for them_ rather than prioritizing the non-censored case and using additional tactics as the fallback; 6. try to defer sending the true `SNI` on the wire, therefore trying to -avoid triggering potential residual censorship; +avoid triggering potential residual censorship blocking a given TCP endpoint +for some time regardless of what `SNI` is being used next; 7. allow users to force specific bridges and SNIs by edigint `$OONI_HOME/engine/bridges.conf`. From b6aebc2821b3d04e2e289b269726df01aecf3296 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:26:22 +0200 Subject: [PATCH 039/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 2966c9d363..e40dcb7778 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -35,7 +35,7 @@ non-censored case and using additional tactics as the fallback; avoid triggering potential residual censorship blocking a given TCP endpoint for some time regardless of what `SNI` is being used next; -7. allow users to force specific bridges and SNIs by edigint `$OONI_HOME/engine/bridges.conf`. +7. allow users to force specific bridges and SNIs by editing `$OONI_HOME/engine/bridges.conf`. The rest of this document explains how we designed for achieving these goals. From 77b03bd8c4a641cd2c50dcb3630ccf88d0911c2a Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:27:04 +0200 Subject: [PATCH 040/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index e40dcb7778..2e116ba8c9 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -56,7 +56,7 @@ creating TLS connections, to meet the objectives explained before. ## Creating TLS Connections In [network.go](network.go), `newHTTPSDialerPolicy` configures the dialing policy -depending on the arguments passed `NewNetwork`: +depending on the arguments passed to `NewNetwork`: 1. if the `proxyURL` argument is not `nil`, we use the `dnsPolicy` alone; From f565e68f4909c5e21f2b4279e9f0daded6103482 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:27:54 +0200 Subject: [PATCH 041/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 2e116ba8c9..31d91f864e 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -72,6 +72,8 @@ As a first approximation, we can consider each arrow in the diagram to mean "fal back to". In reality, some policies implement a more complex strategy where they remix tactics they know and tactics provided by the downstream policy. +When using a proxy, we just use the `dnsPolicy` assuming the proxy knows how to do circumvention. + ## Instructions For Dialing Each policy implements the following interface (defined in [httpsdialer.go](httpsdialer.go)): From e9eee041869b3df4dba2a264f026ddd99f99e0c9 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:29:02 +0200 Subject: [PATCH 042/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 31d91f864e..fa023eec7c 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -93,7 +93,8 @@ there are also some key differences, namely: only takes in input the domain name to resolve; 2. `LookupTactics` returns _a stream_ of viable "tactics", while `LookupHost` -returns a list of IP addresses. +returns a list of IP addresses (we define "stream" a channel where a background +goroutine posts content and which is closed when done). The second point, in particular, is crucial. The design of `LookupTactics` is such that we can start attempting to dial as soon as we have some tactics From 393968de3498d4342f5648de6b766bd41c047cf9 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:29:53 +0200 Subject: [PATCH 043/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index fa023eec7c..4fe7396d61 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -99,8 +99,8 @@ goroutine posts content and which is closed when done). The second point, in particular, is crucial. The design of `LookupTactics` is such that we can start attempting to dial as soon as we have some tactics to try. A composed `httpsDialerPolicy` can, in fact, start multiple child `LookupTactics` -operations and then return them to the caller as soon as they are ready, thus avoiding -to block dialing until _all_ the child operations are complete. +operations and then return tactics to the caller as soon as they are ready, without +blocking dialing until _all_ the child operations are complete. Also, as you may have guessed, the `dnsPolicy` is a policy that, under the hood, eventually calls [net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost) From 665b961b894d731e8225e7142bda7c13347d6c0c Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:30:01 +0200 Subject: [PATCH 044/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 4fe7396d61..9e1e1efd1d 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -99,7 +99,7 @@ goroutine posts content and which is closed when done). The second point, in particular, is crucial. The design of `LookupTactics` is such that we can start attempting to dial as soon as we have some tactics to try. A composed `httpsDialerPolicy` can, in fact, start multiple child `LookupTactics` -operations and then return tactics to the caller as soon as they are ready, without +operations and then return tactics to the caller as soon as some are ready, without blocking dialing until _all_ the child operations are complete. Also, as you may have guessed, the `dnsPolicy` is a policy that, under the hood, From fb651c77b19b798fc6b75d9a730e863d28452701 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:30:34 +0200 Subject: [PATCH 045/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 9e1e1efd1d..0d9fb50cc1 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -104,9 +104,9 @@ blocking dialing until _all_ the child operations are complete. Also, as you may have guessed, the `dnsPolicy` is a policy that, under the hood, eventually calls [net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost) -to get IP addresses using the DNS used by the `*engine.Session` type. Typically, such a +to get IP addresses using the DNS used by the `*engine.Session` type. (Typically, such a resolver, in turn, composes several DNS-over-HTTPS resolvers with the fallback -`getaddrinfo` resolver, and remebers which resolvers work. +`getaddrinfo` resolver, and remebers which resolvers work.) A "tactic" looks like this: From cb0dbfc61c72232931e00d17ef511a7e2096e6cc Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:31:27 +0200 Subject: [PATCH 046/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 0d9fb50cc1..d705ad2cfb 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -125,7 +125,8 @@ Here's an explanation of why we have each field in the struct: - `Address` and `Port` qualify the TCP endpoint; - `InitialDelay` allows a policy to delay a connect operation to implement -something similar to [happy eyeballs](https://en.wikipedia.org/wiki/Happy_Eyeballs); +something similar to [happy eyeballs](https://en.wikipedia.org/wiki/Happy_Eyeballs), +where dialing attempts run in parallel but are staggered in time; - `SNI` is the `SNI` to send as part of the TLS ClientHello; From bfc0a1dfabab279581ecf6cb31c8d9a606d16e83 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:32:17 +0200 Subject: [PATCH 047/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index d705ad2cfb..3aed408b41 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -134,7 +134,8 @@ where dialing attempts run in parallel but are staggered in time; The separation of `SNI` and `VerifyHostname` is what allows us to send an innocuous SNI over the network and then verify the certificate using the real SNI after a -`skipVerify=true` TLS handshake has completed. +`skipVerify=true` TLS handshake has completed. (Obviously, for this trick to work, +the HTTPS server we're using must be okay with receiving unrelated SNIs.) ## HTTPS Dialer From 02660decad112372bc665426c9d54859228dbd82 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:32:37 +0200 Subject: [PATCH 048/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 3aed408b41..e201406800 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -140,7 +140,9 @@ the HTTPS server we're using must be okay with receiving unrelated SNIs.) ## HTTPS Dialer Creating TLS connections is implemented by `(*httpsDialer).DialTLSContext`, also -part of [httpsdialer.go](httpsdialer.go). This method _morally_ does the following: +part of [httpsdialer.go](httpsdialer.go). + +This method _morally_ does the following: ```Go func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpoint string) (net.Conn, error) { From f7076166b0429aa4a972a8e2fded4624bb2b880b Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:32:54 +0200 Subject: [PATCH 049/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index e201406800..6eca0de17e 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -145,7 +145,8 @@ part of [httpsdialer.go](httpsdialer.go). This method _morally_ does the following: ```Go -func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpoint string) (net.Conn, error) { +func (hd *httpsDialer) DialTLSContext( + ctx context.Context, network string, endpoint string) (net.Conn, error) { // map to ensure we don't have duplicate tactics uniq := make(map[string]int) From 492ab69c25cc9efaba247d59a7808fedea0cde12 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:33:56 +0200 Subject: [PATCH 050/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 6eca0de17e..3ed4ca6789 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -175,8 +175,8 @@ func (hd *httpsDialer) DialTLSContext( // dial in a background goroutine go func(tx *httpsDialerTactic, deadline time.Duration) { // wait for deadline - if d := time.Until(deadline); d > 0 { - time.Sleep(d) + if delta := time.Until(deadline); delta > 0 { + time.Sleep(delta) } // dial TCP From 3b63fbddf043d728c6555683318be7c805d4a617 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:34:47 +0200 Subject: [PATCH 051/127] [ci skip] --- internal/enginenetx/DESIGN.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 3ed4ca6789..8eed45b7c0 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -182,17 +182,17 @@ func (hd *httpsDialer) DialTLSContext( // dial TCP conn, err := tcpConnect(tx.Address, tx.Port) - // [...] omitting error handling [...] + // [...] omitting error handling and passing error to DialTLSContext [...] // handshake tconn, err := tlsHandshake(conn, tx.SNI, false /* skip verification */) - // [...] omitting error handling [...] + // [...] omitting error handling and passing error to DialTLSContext [...] // make sure the hostname's OK err := verifyHostname(tconn, tx.VerifyHostname) - // [...] omitting error handling and producing result [...] + // [...] omitting error handling and passing error or conn to DialTLSContext [...] }(tx, deadline) } From 0c06b533919b182841d3f8c03ad90d1847570b5b Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:35:10 +0200 Subject: [PATCH 052/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 8eed45b7c0..315f9c0c6a 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -197,7 +197,7 @@ func (hd *httpsDialer) DialTLSContext( }(tx, deadline) } - // [...] omitting code to decide what to return [...] + // [...] omitting code to decide whether to return a conn or an error [...] } ``` From 86916069b6fa4b6b3d8fcf1a3940296e81fa8495 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:35:27 +0200 Subject: [PATCH 053/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 315f9c0c6a..3ba0e995e8 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -202,7 +202,7 @@ func (hd *httpsDialer) DialTLSContext( ``` This simplified algorithm differs for the real implementation in that we -have omitted the following (boring) implementation details: +have omitted the following (boring) details: 1. code to obtain `hostname` and `port` from `endpoint` (e.g., code to extract `"api.ooni.io"` and `"443"` from `"api.ooni.io:443"`); From 20f800a8d18d5d0312effaa35370748e5d821cbc Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:36:03 +0200 Subject: [PATCH 054/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 3ba0e995e8..464985f7e5 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -205,7 +205,7 @@ This simplified algorithm differs for the real implementation in that we have omitted the following (boring) details: 1. code to obtain `hostname` and `port` from `endpoint` (e.g., code to extract -`"api.ooni.io"` and `"443"` from `"api.ooni.io:443"`); +`"x.org"` and `"443"` from `"x.org:443"`); 2. code to pass back a connection or an error from a background goroutine to the `DialTLSContext` method; From e02c5d46a0ba246f3cf7f42f386bdd2e991fd433 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:36:52 +0200 Subject: [PATCH 055/127] [ci skip] --- internal/enginenetx/DESIGN.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 464985f7e5..88e8976855 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -213,12 +213,12 @@ goroutine to the `DialTLSContext` method; 3. code to decide whether to return a `net.Conn` or an `error`; 4. the fact that `DialTLSContext` uses a goroutine pool rather than creating a -new goroutine for each tactic (which could create too many goroutines); +goroutine for each tactic; -5. the fact that, as soon as we successfully have a good TLS connection, we -immediately cancel any other parallel attempt at connecting. +5. the fact that, as soon as we successfully have a connection, we +immediately cancel any other parallel attempts. -We `happyEyeballsDelay` function (in [happyeyeballs.go](happyeyeballs.go)) is +The `happyEyeballsDelay` function (in [happyeyeballs.go](happyeyeballs.go)) is such that we generate the following delays: the overall time to perform a TLS handshake, we attempt to strike a balance From dd128d8e6ad46c92be00b9169220017de8dd9971 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:38:00 +0200 Subject: [PATCH 056/127] [ci skip] --- internal/enginenetx/DESIGN.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 88e8976855..f714a013f6 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -221,13 +221,6 @@ immediately cancel any other parallel attempts. The `happyEyeballsDelay` function (in [happyeyeballs.go](happyeyeballs.go)) is such that we generate the following delays: -the overall time to perform a TLS handshake, we attempt to strike a balance -between simplicity (i.e., running operations sequentially), performance (running -them in parallel) and network load: there is some parallelism but operations -are reasonably spaced in time with increasing delays. This is implemented by the -[happyeyeballs.go](happyeyeballs.go) file and, assuming `T0` is the time when -we start dialing, produces the following minimum dial times: - | idx | delay (s) | | --- | --------- | | 1 | 0 | @@ -242,7 +235,9 @@ we start dialing, produces the following minimum dial times: That is, we exponentially increase the delay until `8s`, then we linearly space each attempt by `8s`. We aim to space attempts to accommodate for slow access networks -and/or access network experiencing temporary failures to deliver packets. +and/or access network experiencing temporary failures to deliver packets. However, +we also aim to have dialing parallelism, to reduce the overall time to connect +when we're experiencing many timeouts when attempting to dial. Additionally, the `*httpsDialer` algorithm keeps statistics about the operations it performs using an `httpsDialerEventsHandler` type: From 8c5bc6016d0883e073d77e480e1cecff6ae95d4b Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:39:02 +0200 Subject: [PATCH 057/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index f714a013f6..ae3fb28188 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -233,8 +233,8 @@ such that we generate the following delays: | 8 | 32 | | ... | ... | -That is, we exponentially increase the delay until `8s`, then we linearly space -each attempt by `8s`. We aim to space attempts to accommodate for slow access networks +That is, we exponentially increase the delay until `8s`, then we linearly increase by `8s`. We +aim to space attempts to accommodate for slow access networks and/or access network experiencing temporary failures to deliver packets. However, we also aim to have dialing parallelism, to reduce the overall time to connect when we're experiencing many timeouts when attempting to dial. From 6e472585012841fad8fe742cf0dc3e51a65b76e5 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:39:24 +0200 Subject: [PATCH 058/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index ae3fb28188..1ac03cc243 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -239,8 +239,8 @@ and/or access network experiencing temporary failures to deliver packets. Howeve we also aim to have dialing parallelism, to reduce the overall time to connect when we're experiencing many timeouts when attempting to dial. -Additionally, the `*httpsDialer` algorithm keeps statistics about the operations -it performs using an `httpsDialerEventsHandler` type: +Additionally, the `*httpsDialer` algorithm keeps statistics +using an `httpsDialerEventsHandler` type: ```Go type httpsDialerEventsHandler interface { From 018fec47dd37c9c853846d8d59a71e17fc08368d Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:42:33 +0200 Subject: [PATCH 059/127] [ci skip] --- internal/enginenetx/DESIGN.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 1ac03cc243..9ab03fd9e8 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -256,6 +256,13 @@ type httpsDialerEventsHandler interface { These statistics contribute to construct knowledge about the network conditions and influence the generation of tactics. +You may notice that we record both TCP connects and failures, while we +only record TLS handshake and certificate verification failures. This +happens because the same TCP endpoint (e.g., `162.55.247.208:443`) may +be used with different SNIs, but there's no point to try with other +SNIs when we see we cannot connect to such an endpoint. Hence, this knowledge +allows avoiding to perform usless (possibly time-costly) operations. + ## dnsPolicy The `dnsPolicy` is implemented by [dnspolicy.go](dnspolicy.go). From c834599059c59b300b5040a3e52770bc76258c37 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:43:50 +0200 Subject: [PATCH 060/127] [ci skip] --- internal/enginenetx/DESIGN.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 9ab03fd9e8..b1eee0046a 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -258,10 +258,11 @@ conditions and influence the generation of tactics. You may notice that we record both TCP connects and failures, while we only record TLS handshake and certificate verification failures. This -happens because the same TCP endpoint (e.g., `162.55.247.208:443`) may -be used with different SNIs, but there's no point to try with other -SNIs when we see we cannot connect to such an endpoint. Hence, this knowledge -allows avoiding to perform usless (possibly time-costly) operations. +happens because the same TCP endpoint (e.g., `162.55.247.208:443`) will +be tried with different SNIs when using bridges. However, there's no +point to continue trying once we learn that we cannot connect to such +an endpoint. And, because TCP connect may fail with timeout, by not +attempting, we reduce the amount of operations parked waiting for timeouts. ## dnsPolicy From ccd26c4db7f146c0055ee04ed07fcebfc40da3f4 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:44:21 +0200 Subject: [PATCH 061/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index b1eee0046a..73d3902cf3 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -259,7 +259,7 @@ conditions and influence the generation of tactics. You may notice that we record both TCP connects and failures, while we only record TLS handshake and certificate verification failures. This happens because the same TCP endpoint (e.g., `162.55.247.208:443`) will -be tried with different SNIs when using bridges. However, there's no +be tried with different SNIs when using `bridgePolicy`. However, there's no point to continue trying once we learn that we cannot connect to such an endpoint. And, because TCP connect may fail with timeout, by not attempting, we reduce the amount of operations parked waiting for timeouts. From 0b32b47aa807996d2abf1f6bde9914bbd5502b8c Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:45:05 +0200 Subject: [PATCH 062/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 73d3902cf3..d225fe7ed0 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -261,8 +261,8 @@ only record TLS handshake and certificate verification failures. This happens because the same TCP endpoint (e.g., `162.55.247.208:443`) will be tried with different SNIs when using `bridgePolicy`. However, there's no point to continue trying once we learn that we cannot connect to such -an endpoint. And, because TCP connect may fail with timeout, by not -attempting, we reduce the amount of operations parked waiting for timeouts. +an endpoint. And, because TCP connect may timeout, by not attempting, we +avoid wasting time waiting for timeouts. ## dnsPolicy From 6066a7fee04c585a3fde8cbf2f6bba4af69cd2fa Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:46:41 +0200 Subject: [PATCH 063/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index d225fe7ed0..78be94d449 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -272,8 +272,7 @@ Its `LookupTactics` algorithm is quite simple: 1. we arrange for short circuiting cases in which the `domain` argument contains an IP address to "resolve" exactly that IP address (thus emulating -what `getaddrinfo` would do and avoiding to call onto the more-complex -underlying composed DNS resolver); +what `getaddrinfo` would do when asked to "resolve" an IP address); 2. for each resolved address, we generate tactics in the most straightforward way, e.g., where the `SNI` and `VerifyHostname` equal the `domain`. From 6c83c257c68996d1f9f454058a2c8cd5316cd607 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:47:18 +0200 Subject: [PATCH 064/127] [ci skip] --- internal/enginenetx/DESIGN.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 78be94d449..8a65a4715a 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -270,12 +270,12 @@ The `dnsPolicy` is implemented by [dnspolicy.go](dnspolicy.go). Its `LookupTactics` algorithm is quite simple: -1. we arrange for short circuiting cases in which the `domain` argument +1. we short circuit the cases in which the `domain` argument contains an IP address to "resolve" exactly that IP address (thus emulating what `getaddrinfo` would do when asked to "resolve" an IP address); -2. for each resolved address, we generate tactics in the most straightforward -way, e.g., where the `SNI` and `VerifyHostname` equal the `domain`. +2. for each resolved address, we generate tactics where the `SNI` and +`VerifyHostname` equal the `domain`. Using this policy alone is functionally equivalent to combining a DNS lookup operation with TCP connect and TLS handshake operations. From 19fc4b53b2e61fbb72280ef39a8ec613b810f1a3 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:48:25 +0200 Subject: [PATCH 065/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 8a65a4715a..c117db1229 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -277,8 +277,8 @@ what `getaddrinfo` would do when asked to "resolve" an IP address); 2. for each resolved address, we generate tactics where the `SNI` and `VerifyHostname` equal the `domain`. -Using this policy alone is functionally equivalent to combining a DNS lookup -operation with TCP connect and TLS handshake operations. +If `httpsDialer` uses this policy as its only policy, the operation it +performs are morally equivalent to normally dialing for TLS. ## userPolicy From 856d261c9f548705d104b3ca9bca448c05d3a378 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:50:42 +0200 Subject: [PATCH 066/127] [ci skip] --- internal/enginenetx/DESIGN.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index c117db1229..97c0b8c1c2 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -285,7 +285,8 @@ performs are morally equivalent to normally dialing for TLS. The `userPolicy` is implemented by [userpolicy.go](userpolicy.go). When constructing a `userPolicy` with `newUserPolicy` we indicate a fallback -`httpsDialerPolicy` to use if there is no `$OONI_HOME/engine/bridges.conf` file. +`httpsDialerPolicy` to use as the fallback, when either `$OONI_HOME/engine/bridges.conf` +does not exist or it does not contain actionable dialing rules. As of 2024-04-16, the structure of such a file is like in the following example: @@ -303,6 +304,9 @@ As of 2024-04-16, the structure of such a file is like in the following example: } ``` +This example instructs to use the given `Address`, `Port`, `SNI`, and `VerifyHostname` +when trying to establish a TLS connection to `"api.ooni.io:443"`. + The `newUserPolicy` constructor reads this file from disk on startup and keeps its content in memory. From 2b7a881789a44f7260341fed942da62f90db3e3d Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:51:41 +0200 Subject: [PATCH 067/127] [ci skip] --- internal/enginenetx/DESIGN.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 97c0b8c1c2..694450c56b 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -304,8 +304,9 @@ As of 2024-04-16, the structure of such a file is like in the following example: } ``` -This example instructs to use the given `Address`, `Port`, `SNI`, and `VerifyHostname` -when trying to establish a TLS connection to `"api.ooni.io:443"`. +This example instructs to use the given tactic when establishing a TLS connection to +`"api.ooni.io:443"`. Any other destination hostname and port would instead use the +configured "fallback" dialing policy. The `newUserPolicy` constructor reads this file from disk on startup and keeps its content in memory. From b7327e2632197690e5e451c352c13e9fabf4c8c4 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:52:54 +0200 Subject: [PATCH 068/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 694450c56b..0eb519cdfa 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -345,7 +345,7 @@ to a specific bridge address that has been discontinued; would waste lots of time failing before falling back. Conversely, a better strategy is to remix tactics as implemented -by the [remix](remix.go) file: +by the [remix.go](remix.go) file: 1. we take the first two tactics from the stats; From 3399fec3cfb3775c2b35d36fc6ec54d53562e010 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:54:01 +0200 Subject: [PATCH 069/127] [ci skip] --- internal/enginenetx/DESIGN.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 0eb519cdfa..9a0139e9a3 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -349,7 +349,7 @@ by the [remix.go](remix.go) file: 1. we take the first two tactics from the stats; -2. then we take the first two tactics from the fallback; +2. then we take the first four tactics from the fallback; 3. then we remix the rest, not caring much about whether we're reading from the stats of from the fallback. @@ -358,6 +358,10 @@ Because we sort tactics from the stats by our understanding of whether they are working as intended, we'll prioritize what we know to be working, but then we'll also throw some new tactics into the mix. +(We read four tactics from the fallback because that allows us to +include two bridge tactics and two DNS tactics, as explained below +when we discuss the `bridgePolicy` policy.) + As an additional optimization, when reading from the fallback, the `statsPolicy` will automatically exclude TCP endpoints that have failed recently during their TCP connect stage. If an IP address seems From baef14ff739f614c911abddffff65d3ad8bf8d43 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:55:30 +0200 Subject: [PATCH 070/127] [ci skip] --- internal/enginenetx/DESIGN.md | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 9a0139e9a3..914495f8b6 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -356,17 +356,15 @@ reading from the stats of from the fallback. Because we sort tactics from the stats by our understanding of whether they are working as intended, we'll prioritize what we know to be working, -but then we'll also throw some new tactics into the mix. - -(We read four tactics from the fallback because that allows us to -include two bridge tactics and two DNS tactics, as explained below -when we discuss the `bridgePolicy` policy.) +but then we'll also throw some new tactics into the mix. (We read four +tactics from the fallback because that allows us to include two bridge tactics +and two DNS tactics, as explained below when we discuss the +`bridgePolicy` policy.) As an additional optimization, when reading from the fallback, the `statsPolicy` will automatically exclude TCP endpoints that have -failed recently during their TCP connect stage. If an IP address seems -IP blocked, it does not make sense to continue wasting time trying -to connect to it (a timeout is in the order of ~10s). +failed recently during their TCP connect stage. By doing this, we +avoid wasting time with known-to-be-broken endpoints. ## bridgePolicy From 15d28f2dd63eacd48f5e8178b8cf4ecbbefab20c Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:56:44 +0200 Subject: [PATCH 071/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 914495f8b6..368aadf6e0 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -369,9 +369,9 @@ avoid wasting time with known-to-be-broken endpoints. ## bridgePolicy The `bridgePolicy` is implemented by [bridgespolicy.go](bridgespolicy.go) and -rests on the assumptions made explicit in the design section. That is: +rests on the assumptions made explicit above. That is: -1. that there is a _bridge_ for "api.ooni.io"; +1. that there is at least one _bridge_ for "api.ooni.io"; 2. that the Web Connectivity Test Helpers accepts any SNI. From 0baaf9b96d37ce0305aa0a5cec009bbd6c7331ac Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 14:59:37 +0200 Subject: [PATCH 072/127] [ci skip] --- internal/enginenetx/DESIGN.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 368aadf6e0..efa1f8bae7 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -385,6 +385,12 @@ using existing knowledge of bridges and random SNIs; coming from the fallback tactic such that, if we are connecting to a known test-helper domain name, we're trying to hide its SNI. +The first two returned tactics will be bridges tactics for "api.ooni.io", +if applicable, followed by two tactics generated using the DNS, +followed by a random remix of all the remaining tactics. This is the +reason why in `statsPolicy` we return the first four tactics from +the fallback after getting two tactics from the stats. + ## Overall Algorithm **TODO(bassosimone)**: adapt the mixing algorithm to do exactly From 49bbf25301ea1fbdd94e4ccb750e7944950d186b Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 15:00:17 +0200 Subject: [PATCH 073/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index efa1f8bae7..77c4771ae0 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -387,7 +387,8 @@ to a known test-helper domain name, we're trying to hide its SNI. The first two returned tactics will be bridges tactics for "api.ooni.io", if applicable, followed by two tactics generated using the DNS, -followed by a random remix of all the remaining tactics. This is the +followed by a random remix of all the remaining tactics. This choice of +returning two and two tactics first, is the reason why in `statsPolicy` we return the first four tactics from the fallback after getting two tactics from the stats. From d739ddd64299cccd01ba08a99351046ad1bd5ff0 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 15:03:28 +0200 Subject: [PATCH 074/127] [ci skip] --- internal/enginenetx/DESIGN.md | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 77c4771ae0..5670ff4259 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -394,24 +394,29 @@ the fallback after getting two tactics from the stats. ## Overall Algorithm -**TODO(bassosimone)**: adapt the mixing algorithm to do exactly -this and make sure there are tests for this. +The composed policy is the following (as discussed above): + +``` ++------------+ +-------------+ +--------------+ +-----------+ +| userPolicy | --> | statsPolicy | --> | bridgePolicy | --> | dnsPolicy | ++------------+ +-------------+ +--------------+ +-----------+ +``` + +Therefore, the compose policy will return the following tactics: -Having discussed all the polices in isolation, it now seems useful -to describe what is the overall algorithm we want to achieve: 1. if there is a `$OONI_HOME/engine/bridges.conf` with a valid entry -for the domain and port, use it without trying subsequent tactics; +for the domain and port, use it without trying more tactics; 2. use the first two tactics coming from stats, if any; 3. then use the first two tactics coming from bridges, if any; -4. then use the first two tactics coming from the DNS; +4. then use the first two tactics coming from the DNS, if successful; -5. after that, randomly remix the remaining tactics. +5. finally, randomly remix the remaining tactics. -Now, it only remains to discuss managing stats. +Having discussed this, it only remains to discuss managing stats. ## Managing Stats From 4896f68acc65c33cd59c45c8267b071de2903135 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 15:04:17 +0200 Subject: [PATCH 075/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 5670ff4259..e8881b0471 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -405,8 +405,8 @@ The composed policy is the following (as discussed above): Therefore, the compose policy will return the following tactics: -1. if there is a `$OONI_HOME/engine/bridges.conf` with a valid entry -for the domain and port, use it without trying more tactics; +1. if there is a `$OONI_HOME/engine/bridges.conf` with a valid entry, +use it without trying more tactics; otherwise, 2. use the first two tactics coming from stats, if any; From 8bdbbaf72021a61ac0fb1002932aab567b8e5c33 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 16:03:20 +0200 Subject: [PATCH 076/127] x --- internal/enginenetx/DESIGN.md | 82 ++++++++++++++++++++++- internal/enginenetx/happyeyeballs.go | 4 +- internal/enginenetx/happyeyeballs_test.go | 12 ++-- 3 files changed, 88 insertions(+), 10 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index e8881b0471..439039cf3f 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -420,4 +420,84 @@ Having discussed this, it only remains to discuss managing stats. ## Managing Stats -TODO +The [statsmanager.go](statsmanager.go) file implements the `*statsManager`. + +We initialize the `*statsManager` by calling `newStatsManager` with a stats trim +interval of 30 seconds in `NewNetwork` in [network.go](network.go). + +The `*statsManager` keeps stats at `$OONI_HOME/engine/httpsdialerstats.state`. + +In `newStatsManager`, we attempt to read this file using `loadStatsContainer` and, if +not present, we fall back to create empty stats with `newStatsContainer`. + +While creating the `*statsManager` we also spawn a goroutine that trims the stats +at every stats trimming internal by calling `(*statsManager).trim`. In turn, `trim` +calls `statsContainerPruneEntries`, which eventually: + +1. removes entries not modified for more than one week; + +2. sort entries by descending success rate and only keep the top 10 entries. + +More specifically we sort entries using this algorithm: + +1. by decreasing success rate; + +2. by decreasing number of successes; + +3. by decreasing last update time. + +Likewise, calling `(*statsManager).Close` invokes `statsContainerPruneEntries` +and ensures that we write `$OONI_HOME/engine/httpsdialerstats.state`. + +This way, subsequent OONI Probe runs could load the stats thare are more likely +to work and `statsPolicy` can take advantage of this information. + +The overall structure of `httpsdialerstats.state` is roughly the following: + +```JavaScript +{ + "DomainEndpoints": { + "api.ooni.io:443": { + "Tactics": { + "162.55.247.208:443 sni=api.trademe.co.nz verify=api.ooni.io": { + "CountStarted": 58, + "CountTCPConnectError": 0, + "CountTCPConnectInterrupt": 0, + "CountTCPConnectSuccess": 58, + "CountTLSHandshakeError": 0, + "CountTLSHandshakeInterrupt": 0, + "CountTLSVerificationError": 0, + "CountSuccess": 58, + "HistoTCPConnectError": {}, + "HistoTLSHandshakeError": {}, + "HistoTLSVerificationError": {}, + "LastUpdated": "2024-04-15T10:38:53.575561+02:00", + "Tactic": { + "Address": "162.55.247.208", + "InitialDelay": 0, + "Port": "443", + "SNI": "api.trademe.co.nz", + "VerifyHostname": "api.ooni.io" + } + } + } + } + } + "Version": 5 +} +``` + +That is, the `DomainEndpoints` map contains contains an entry for each +TLS endpoint and, in turn, such an entry contains tactics. We index each +tactic by a summary string to speed up looking it up. + +For each tactic, we keep counters and histograms, the time when the +entry had been updated last, and the tactic itself. + +The `*statsManager` implements `httpsDialerEventsHandler`, which means +that it has callbacks invoked by the `*httpsDialer` for interesting +events regarding dialing (e.g., whether TCP connect failed). + +These callbacks basically create or update stats by locking a mutex +and updating the relevant counters and histograms. + diff --git a/internal/enginenetx/happyeyeballs.go b/internal/enginenetx/happyeyeballs.go index 9186915cf9..75259b9aa5 100644 --- a/internal/enginenetx/happyeyeballs.go +++ b/internal/enginenetx/happyeyeballs.go @@ -19,8 +19,6 @@ import "time" // actual issues inside the network. By using this algorithm, we are still // able to overlap and pack more dialing attempts overall. func happyEyeballsDelay(idx int) time.Duration { - // FIXME: we need to adjust how we implement happy eyeballs - // to make sure the time is relative to a deadline. const baseDelay = time.Second switch { case idx <= 0: @@ -30,6 +28,6 @@ func happyEyeballsDelay(idx int) time.Duration { case idx <= 4: return baseDelay << (idx - 1) default: - return baseDelay << 3 + return baseDelay << 3 * (time.Duration(idx) - 3) } } diff --git a/internal/enginenetx/happyeyeballs_test.go b/internal/enginenetx/happyeyeballs_test.go index 3728aa12bf..b1f7c4af56 100644 --- a/internal/enginenetx/happyeyeballs_test.go +++ b/internal/enginenetx/happyeyeballs_test.go @@ -19,12 +19,12 @@ func TestHappyEyeballsDelay(t *testing.T) { {2, 2 * time.Second}, {3, 4 * time.Second}, {4, 8 * time.Second}, - {5, 8 * time.Second}, - {6, 8 * time.Second}, - {7, 8 * time.Second}, - {8, 8 * time.Second}, - {9, 8 * time.Second}, - {10, 8 * time.Second}, + {5, 2 * 8 * time.Second}, + {6, 3 * 8 * time.Second}, + {7, 4 * 8 * time.Second}, + {8, 5 * 8 * time.Second}, + {9, 6 * 8 * time.Second}, + {10, 7 * 8 * time.Second}, } for _, tc := range cases { From 4e3a8afefb00e1eb7800c64c271480403e8289c2 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 17:00:53 +0200 Subject: [PATCH 077/127] the design document should now be good --- internal/enginenetx/DESIGN.md | 261 +++++++++++++++++++++++++++++++--- 1 file changed, 238 insertions(+), 23 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 439039cf3f..2c61bf5b91 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -142,7 +142,18 @@ the HTTPS server we're using must be okay with receiving unrelated SNIs.) Creating TLS connections is implemented by `(*httpsDialer).DialTLSContext`, also part of [httpsdialer.go](httpsdialer.go). -This method _morally_ does the following: +This method _morally_ does the following in ~parallel: + +```mermaid +stateDiagram-v2 + tacticsGenerator --> skipDuplicate + skipDuplicate --> computeHappyEyeballsDelay + computeHappyEyeballsDelay --> tcpConnect + tcpConnect --> tlsHandshake + tlsHandshake --> verifyCertificate +``` + +Such a diagram roughly corresponds to this Go ~pseudo-code: ```Go func (hd *httpsDialer) DialTLSContext( @@ -172,7 +183,7 @@ func (hd *httpsDialer) DialTLSContext( deadline := t0.Add(happyEyeballsDelay(idx)) idx++ - // dial in a background goroutine + // dial in a background goroutine so this code runs in parallel go func(tx *httpsDialerTactic, deadline time.Duration) { // wait for deadline if delta := time.Until(deadline); delta > 0 { @@ -246,7 +257,6 @@ using an `httpsDialerEventsHandler` type: type httpsDialerEventsHandler interface { OnStarting(tactic *httpsDialerTactic) OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) - OnTCPConnectSuccess(tactic *httpsDialerTactic) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) OnTLSVerifyError(tactic *httpsDialerTactic, err error) OnSuccess(tactic *httpsDialerTactic) @@ -256,14 +266,6 @@ type httpsDialerEventsHandler interface { These statistics contribute to construct knowledge about the network conditions and influence the generation of tactics. -You may notice that we record both TCP connects and failures, while we -only record TLS handshake and certificate verification failures. This -happens because the same TCP endpoint (e.g., `162.55.247.208:443`) will -be tried with different SNIs when using `bridgePolicy`. However, there's no -point to continue trying once we learn that we cannot connect to such -an endpoint. And, because TCP connect may timeout, by not attempting, we -avoid wasting time waiting for timeouts. - ## dnsPolicy The `dnsPolicy` is implemented by [dnspolicy.go](dnspolicy.go). @@ -344,7 +346,7 @@ to a specific bridge address that has been discontinued; 2. if we try all these 10 tactics before trying fallback tactics, we would waste lots of time failing before falling back. -Conversely, a better strategy is to remix tactics as implemented +Conversely, a better strategy is to "remix" tactics as implemented by the [remix.go](remix.go) file: 1. we take the first two tactics from the stats; @@ -361,11 +363,6 @@ tactics from the fallback because that allows us to include two bridge tactics and two DNS tactics, as explained below when we discuss the `bridgePolicy` policy.) -As an additional optimization, when reading from the fallback, the -`statsPolicy` will automatically exclude TCP endpoints that have -failed recently during their TCP connect stage. By doing this, we -avoid wasting time with known-to-be-broken endpoints. - ## bridgePolicy The `bridgePolicy` is implemented by [bridgespolicy.go](bridgespolicy.go) and @@ -416,6 +413,40 @@ use it without trying more tactics; otherwise, 5. finally, randomly remix the remaining tactics. +Excluding the case where we have a valid entry in `bridges.conf`, the following +diagram illustrates how we're mixing tactics: + +```mermaid +stateDiagram-v2 + state statsTacticsChan <> + statsTactics --> statsTacticsChan + + state bridgesTacticsChan <> + bridgesTactics --> bridgesTacticsChan + + state dnsTacticsChan <> + dnsTactics --> dnsTacticsChan + + state "mix(2, 2)" as mix22 + bridgesTacticsChan --> mix22 + dnsTacticsChan --> mix22 + + state mix22Chan <> + mix22 --> mix22Chan + + state "mix(2, 4)" as mix24 + statsTacticsChan --> mix24 + mix22Chan --> mix24 + + state tacticsChan <> + mix24 --> tacticsChan + tacticsChan --> tactics +``` + +Here `mix(X, Y)` means taking `X` from the left block, if possible, then `Y` from the +right block, if possible, and then mixing the remainder in random order. Also, the "join" +blocks in the diagram represent channels. + Having discussed this, it only remains to discuss managing stats. ## Managing Stats @@ -431,23 +462,23 @@ In `newStatsManager`, we attempt to read this file using `loadStatsContainer` an not present, we fall back to create empty stats with `newStatsContainer`. While creating the `*statsManager` we also spawn a goroutine that trims the stats -at every stats trimming internal by calling `(*statsManager).trim`. In turn, `trim` +at every stats trimming interval by calling `(*statsManager).trim`. In turn, `trim` calls `statsContainerPruneEntries`, which eventually: 1. removes entries not modified for more than one week; -2. sort entries by descending success rate and only keep the top 10 entries. +2. sorts entries and only keeps the top 10 entries. More specifically we sort entries using this algorithm: -1. by decreasing success rate; +1. by decreasing success rate; then -2. by decreasing number of successes; +2. by decreasing number of successes; then 3. by decreasing last update time. -Likewise, calling `(*statsManager).Close` invokes `statsContainerPruneEntries` -and ensures that we write `$OONI_HOME/engine/httpsdialerstats.state`. +Likewise, calling `(*statsManager).Close` invokes `statsContainerPruneEntries`, and +then ensures that we write `$OONI_HOME/engine/httpsdialerstats.state`. This way, subsequent OONI Probe runs could load the stats thare are more likely to work and `statsPolicy` can take advantage of this information. @@ -501,3 +532,187 @@ events regarding dialing (e.g., whether TCP connect failed). These callbacks basically create or update stats by locking a mutex and updating the relevant counters and histograms. +## Real-World Scenarios + +This section illustrates the behavior of this package under specific +network failure conditions, with specific emphasis on what happens if +the bridge IP address becomes, for any reason, unavailable. (We are +doing this because all this work was prompeted by addressing the +[ooni/probe#2704](https://github.com/ooni/probe/issues/2704) issue.) + +### Invalid bridge without cached data + +In this first scenario, we're showing what happens if the bridge IP address +becomes unavailable without any previous state saved on disk. (To emulate +this scenario, change the bridge IP address in [bridgespolicy.go](bridgespolicy.go) +to become `10.0.0.1`, recompile, and wipe `httpsdialerstats.state`). + +Here's an excerpt from the logs: + +``` +[ 0.001346] httpsDialer: [#1] TCPConnect 10.0.0.1:443... started +[ 0.002101] sessionresolver: lookup api.ooni.io using https://wikimedia-dns.org/dns-query... started +[ 0.264132] sessionresolver: lookup api.ooni.io using https://wikimedia-dns.org/dns-query... ok +[ 0.501774] httpsDialer: [#1] TCPConnect 10.0.0.1:443... in progress +[ 1.002330] httpsDialer: [#2] TCPConnect 10.0.0.1:443... started +[ 1.503687] httpsDialer: [#2] TCPConnect 10.0.0.1:443... in progress +[ 2.001488] httpsDialer: [#4] TCPConnect 162.55.247.208:443... started +[ 2.046917] httpsDialer: [#4] TCPConnect 162.55.247.208:443... ok +[ 2.047016] httpsDialer: [#4] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... started +[ 2.093148] httpsDialer: [#4] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... ok +[ 2.093181] httpsDialer: [#4] TLSVerifyCertificateChain api.ooni.io... started +[ 2.095923] httpsDialer: [#4] TLSVerifyCertificateChain api.ooni.io... ok +[ 2.096054] httpsDialer: [#1] TCPConnect 10.0.0.1:443... interrupted +[ 2.096077] httpsDialer: [#2] TCPConnect 10.0.0.1:443... interrupted +``` + +After 2s, we start dialing with the IP addresses obtained through the DNS. + +Subsequent runs will cache this information on disk and use it. + +### Invalid bridge with cached data + +This scenario is like the previous one, however we also assume that we have +a cached `httpsdialerstats.state` containing now-invalid lines. To this +end, we replace the original file with this content: + +```JSON +{ + "DomainEndpoints": { + "api.ooni.io:443": { + "Tactics": { + "10.0.0.1:443 sni=static-tracking.klaviyo.com verify=api.ooni.io": { + "CountStarted": 1, + "CountTCPConnectError": 0, + "CountTCPConnectInterrupt": 0, + "CountTLSHandshakeError": 0, + "CountTLSHandshakeInterrupt": 0, + "CountTLSVerificationError": 0, + "CountSuccess": 1, + "HistoTCPConnectError": {}, + "HistoTLSHandshakeError": {}, + "HistoTLSVerificationError": {}, + "LastUpdated": "2024-04-16T16:04:34.398778+02:00", + "Tactic": { + "Address": "10.0.0.1", + "InitialDelay": 0, + "Port": "443", + "SNI": "static-tracking.klaviyo.com", + "VerifyHostname": "api.ooni.io" + } + }, + "10.0.0.1:443 sni=vidstat.taboola.com verify=api.ooni.io": { + "CountStarted": 1, + "CountTCPConnectError": 0, + "CountTCPConnectInterrupt": 0, + "CountTLSHandshakeError": 0, + "CountTLSHandshakeInterrupt": 0, + "CountTLSVerificationError": 0, + "CountSuccess": 1, + "HistoTCPConnectError": {}, + "HistoTLSHandshakeError": {}, + "HistoTLSVerificationError": {}, + "LastUpdated": "2024-04-16T16:04:34.398795+02:00", + "Tactic": { + "Address": "10.0.0.1", + "InitialDelay": 1000000000, + "Port": "443", + "SNI": "vidstat.taboola.com", + "VerifyHostname": "api.ooni.io" + } + }, + "10.0.0.1:443 sni=www.example.com verify=api.ooni.io": { + "CountStarted": 1, + "CountTCPConnectError": 0, + "CountTCPConnectInterrupt": 0, + "CountTLSHandshakeError": 0, + "CountTLSHandshakeInterrupt": 0, + "CountTLSVerificationError": 0, + "CountSuccess": 1, + "HistoTCPConnectError": {}, + "HistoTLSHandshakeError": {}, + "HistoTLSVerificationError": {}, + "LastUpdated": "2024-04-16T16:04:34.398641+02:00", + "Tactic": { + "Address": "10.0.0.1", + "InitialDelay": 2000000000, + "Port": "443", + "SNI": "www.example.com", + "VerifyHostname": "api.ooni.io" + } + } + } + } + }, + "Version": 5 +} +``` + +Here's an excerpt from the logs: + +``` +[ 0.004017] sessionresolver: lookup api.ooni.io using https://wikimedia-dns.org/dns-query... started +[ 0.003854] httpsDialer: [#2] TCPConnect 10.0.0.1:443... started +[ 0.108089] sessionresolver: lookup api.ooni.io using https://wikimedia-dns.org/dns-query... ok +[ 0.505472] httpsDialer: [#2] TCPConnect 10.0.0.1:443... in progress +[ 1.004614] httpsDialer: [#1] TCPConnect 10.0.0.1:443... started +[ 1.506069] httpsDialer: [#1] TCPConnect 10.0.0.1:443... in progress +[ 2.003650] httpsDialer: [#3] TCPConnect 10.0.0.1:443... started +[ 2.505130] httpsDialer: [#3] TCPConnect 10.0.0.1:443... in progress +[ 4.004683] httpsDialer: [#4] TCPConnect 10.0.0.1:443... started +[ 4.506176] httpsDialer: [#4] TCPConnect 10.0.0.1:443... in progress +[ 8.004547] httpsDialer: [#5] TCPConnect 162.55.247.208:443... started +[ 8.042946] httpsDialer: [#5] TCPConnect 162.55.247.208:443... ok +[ 8.043015] httpsDialer: [#5] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... started +[ 8.088383] httpsDialer: [#5] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... ok +[ 8.088417] httpsDialer: [#5] TLSVerifyCertificateChain api.ooni.io... started +[ 8.091007] httpsDialer: [#5] TLSVerifyCertificateChain api.ooni.io... ok +[ 8.091174] httpsDialer: [#1] TCPConnect 10.0.0.1:443... interrupted +[ 8.091234] httpsDialer: [#3] TCPConnect 10.0.0.1:443... interrupted +[ 8.091258] httpsDialer: [#2] TCPConnect 10.0.0.1:443... interrupted +[ 8.091324] httpsDialer: [#4] TCPConnect 10.0.0.1:443... interrupted +``` + +So, here the fifth attempt is using the DNS. This is in line with the mixing algorithm, where +the first four attempt come from the stats or from the bridge policies. + +Let's also shows what happens if we repeat the bootstrap: + +``` +[ 0.000938] httpsDialer: [#2] TCPConnect 162.55.247.208:443... started +[ 0.001014] sessionresolver: lookup api.ooni.io using https://mozilla.cloudflare-dns.com/dns-query... started +[ 0.053325] httpsDialer: [#2] TCPConnect 162.55.247.208:443... ok +[ 0.053355] httpsDialer: [#2] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... started +[ 0.080695] sessionresolver: lookup api.ooni.io using https://mozilla.cloudflare-dns.com/dns-query... ok +[ 0.094648] httpsDialer: [#2] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... ok +[ 0.094662] httpsDialer: [#2] TLSVerifyCertificateChain api.ooni.io... started +[ 0.096677] httpsDialer: [#2] TLSVerifyCertificateChain api.ooni.io... ok +``` + +You see that now we immediately use the correct address thanks to the stats. + +### Valid bridge with invalid cached data + +In this scenario, the bridge inside [bridgespolicy.go](bridgespolicy.go) is valid +but we have a cache listing an invalid bridge (I modified my cache to use `10.0.0.1`). + +Here's an excerpt from the logs: + +``` +[ 0.002641] sessionresolver: lookup api.ooni.io using https://mozilla.cloudflare-dns.com/dns-query... started +[ 0.081401] sessionresolver: lookup api.ooni.io using https://mozilla.cloudflare-dns.com/dns-query... ok +[ 0.503518] httpsDialer: [#1] TCPConnect 10.0.0.1:443... in progress +[ 1.005322] httpsDialer: [#2] TCPConnect 10.0.0.1:443... started +[ 1.506304] httpsDialer: [#2] TCPConnect 10.0.0.1:443... in progress +[ 2.002837] httpsDialer: [#4] TCPConnect 162.55.247.208:443... started +[ 2.048721] httpsDialer: [#4] TCPConnect 162.55.247.208:443... ok +[ 2.048760] httpsDialer: [#4] TLSHandshake with 162.55.247.208:443 SNI=player.ex.co ALPN=[h2 http/1.1]... started +[ 2.091016] httpsDialer: [#4] TLSHandshake with 162.55.247.208:443 SNI=player.ex.co ALPN=[h2 http/1.1]... ok +[ 2.091033] httpsDialer: [#4] TLSVerifyCertificateChain api.ooni.io... started +[ 2.093542] httpsDialer: [#4] TLSVerifyCertificateChain api.ooni.io... ok +[ 2.093708] httpsDialer: [#2] TCPConnect 10.0.0.1:443... interrupted +[ 2.093718] httpsDialer: [#1] TCPConnect 10.0.0.1:443... interrupted +``` + +In this case, we pick up the right bridge configuration and successfully +use it after two seconds. This configuration is provided by the `bridgesPolicy`. From fd81cf714cd7ec8981281d322b441b91c673ff02 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 17:07:26 +0200 Subject: [PATCH 078/127] x --- internal/enginenetx/httpsdialer.go | 7 ------- internal/enginenetx/httpsdialer_test.go | 5 ----- internal/enginenetx/statsmanager.go | 10 ---------- 3 files changed, 22 deletions(-) diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index e39e9ba246..bc2438cefa 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -108,7 +108,6 @@ type httpsDialerEventsHandler interface { // case, obviously, you MUST NOT consider the tactic failed. OnStarting(tactic *httpsDialerTactic) OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) - OnTCPConnectSuccess(tactic *httpsDialerTactic) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) OnTLSVerifyError(tactic *httpsDialerTactic, err error) OnSuccess(tactic *httpsDialerTactic) @@ -358,12 +357,6 @@ func (hd *httpsDialer) dialTLS( return nil, err } - // track successful TCP connections such that we have stats - // regarding which endpoints work as intended: if we can't dial - // a specific TCP endpoint a couple of times, it doesn't make - // sense to continue trying with different SNIs. - hd.stats.OnTCPConnectSuccess(tactic) - // create TLS configuration tlsConfig := &tls.Config{ InsecureSkipVerify: true, // Note: we're going to verify at the end of the func! diff --git a/internal/enginenetx/httpsdialer_test.go b/internal/enginenetx/httpsdialer_test.go index f5f1b785ea..e44ec96a54 100644 --- a/internal/enginenetx/httpsdialer_test.go +++ b/internal/enginenetx/httpsdialer_test.go @@ -50,11 +50,6 @@ func (*httpsDialerCancelingContextStatsTracker) OnTCPConnectError(ctx context.Co // nothing } -// OnTCPConnectSuccess implements httpsDialerEventsHandler. -func (*httpsDialerCancelingContextStatsTracker) OnTCPConnectSuccess(tactic *httpsDialerTactic) { - // nothing -} - // OnTLSHandshakeError implements httpsDialerEventsHandler. func (*httpsDialerCancelingContextStatsTracker) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) { // nothing diff --git a/internal/enginenetx/statsmanager.go b/internal/enginenetx/statsmanager.go index 51b1c0def9..a07faad8fc 100644 --- a/internal/enginenetx/statsmanager.go +++ b/internal/enginenetx/statsmanager.go @@ -39,11 +39,6 @@ func (*nullStatsManager) OnTCPConnectError(ctx context.Context, tactic *httpsDia // nothing } -// OnTCPConnectSuccess implements httpsDialerEventsHandler. -func (*nullStatsManager) OnTCPConnectSuccess(tactic *httpsDialerTactic) { - // nothing -} - // OnTLSHandshakeError implements httpsDialerEventsHandler. func (*nullStatsManager) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) { // nothing @@ -529,11 +524,6 @@ func (mt *statsManager) OnTCPConnectError(ctx context.Context, tactic *httpsDial statsSafeIncrementMapStringInt64(&record.HistoTCPConnectError, err.Error()) } -// OnTCPConnectSuccess implements httpsDialerEventsHandler. -func (mt *statsManager) OnTCPConnectSuccess(tactic *httpsDialerTactic) { - // TODO(bassosimone): implement this method -} - // OnTLSHandshakeError implements httpsDialerEventsHandler. func (mt *statsManager) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) { // get exclusive access From 8a9353355ea9b96f1580676c235e3ddf9b55f23a Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 17:12:46 +0200 Subject: [PATCH 079/127] x --- internal/enginenetx/httpsdialer.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index bc2438cefa..db8db4c8a6 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -247,7 +247,16 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo return httpsDialerReduceResult(connv, errorv) } -// httpsFilterTactics filters the tactics and rewrites their InitialDelay. +// httpsFilterTactics filters the tactics to: +// +// 1. be paranoid and filter out nil tactics if any; +// +// 2. avoid emitting duplicate tactics as part of the same run; +// +// 3. rewrite the happy eyeball delays. +// +// This function returns a channel where we emit the edited +// tactics, and which we clone when we're done. func httpsFilterTactics(input <-chan *httpsDialerTactic) <-chan *httpsDialerTactic { output := make(chan *httpsDialerTactic) go func() { From 2949035e8965000940f26df4dc72270eeaab56c5 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 16 Apr 2024 17:33:40 +0200 Subject: [PATCH 080/127] remove more code that we probably don't need --- internal/enginenetx/statsmanager.go | 6 ------ internal/enginenetx/statspolicy.go | 21 +-------------------- 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/internal/enginenetx/statsmanager.go b/internal/enginenetx/statsmanager.go index a07faad8fc..e82bcd0d0b 100644 --- a/internal/enginenetx/statsmanager.go +++ b/internal/enginenetx/statsmanager.go @@ -671,9 +671,3 @@ func (mt *statsManager) LookupTactics(domain string, port string) ([]*statsTacti } return out, len(out) > 0 } - -// IsTCPEndpointAccessible returns whether a given TCP endpoint has recently been accessible. -func (mt *statsManager) IsTCPEndpointAccessible(address, port string) bool { - // TODO(bassosimone): implement - return true -} diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index ff5e7f10a8..2a184622ff 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -38,7 +38,7 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str ReadFromLeft: 2, // And remix it with the fallback - Right: p.onlyAccessibleEndpoints(p.Fallback.LookupTactics(ctx, domain, port)), + Right: p.Fallback.LookupTactics(ctx, domain, port), // Under the assumption that below us we have bridgePolicy composed with DNS policy // and that the stage below emits two bridge tactics, if possible, followed by two @@ -85,22 +85,3 @@ func statsPolicyFilterStatsTactics(tactics []*statsTactic, good bool) (out []*ht } return } - -// onlyAccessibleEndpoints uses stats-based knowledge to exclude using endpoints that -// have recently been observed as being failing during TCP connect. -func (p *statsPolicy) onlyAccessibleEndpoints(input <-chan *httpsDialerTactic) <-chan *httpsDialerTactic { - output := make(chan *httpsDialerTactic) - go func() { - // make sure we close the output channel - defer close(output) - - // avoid including tactics using endpoints that are consistently failing - for tx := range input { - if tx == nil || !p.Stats.IsTCPEndpointAccessible(tx.Address, tx.Port) { - continue - } - output <- tx - } - }() - return output -} From c075625b06355dd00a49443c12cf80a1cd1f04cc Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:27:06 +0200 Subject: [PATCH 081/127] [ci skip] --- internal/enginenetx/DESIGN.md | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 2c61bf5b91..0e6fb32e1a 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -15,6 +15,10 @@ We define "bridge" an IP address with the following properties: We also assume that the Web Connectivity test helpers (TH) could accept any SNIs. +We also define "tactic" a tactic to perform a TLS handshake either with a +bridge or with a TH. We also define "policy" the collection of algorithms for +producing tactics for performing TLS handshakes. + Considering all of this, this package aims to: 1. overcome DNS-based censorship for "api.ooni.io" by hardcoding known-good @@ -68,11 +72,30 @@ depending on the arguments passed to `NewNetwork`: +------------+ +-------------+ +--------------+ +-----------+ ``` -As a first approximation, we can consider each arrow in the diagram to mean "fall -back to". In reality, some policies implement a more complex strategy where they remix -tactics they know and tactics provided by the downstream policy. +Policies are described in detail below. On a high-level, here's what each policy does: + +1. `userPolicy`: honours the `bridges.conf` configuration file and, if no entry is found +inside it, then it falls back to the subsequent policy; + +2. `statsPolicy`: uses statistics collected from previous runs to select tactics that +worked recently, otherwise it falls back to the subsequent policy; + +3. `bridgePolicy`: adopts a bridge strategy for `api.ooni.io`, hides the SNI for +THs, and otherwise falls back to the subsequent policy; + +4. `dnsPolicy`: uses the `*engine.Session` DNS resolver to lookup domain names +and produces trivial tactics equivalent to connecting normally. + +While the previous description says "falls back to," the actual semantics of falling +back is more complex than just falling back. For `statsPolicy` and `bridgePolicy`, +we remix the current policy strategy and subsequent policies strategies to strike a +balance between what a policy suggests and what subsequent policies would suggest. In +turn, this reduces the overall bootstrap time in light of issues with policies. + +We added remix as part of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552). Before +this pull requests, OONI Probe implemented strict falling back. -When using a proxy, we just use the `dnsPolicy` assuming the proxy knows how to do circumvention. +Also, when using a proxy, we just use `dnsPolicy` assuming the proxy knows how to do circumvention. ## Instructions For Dialing From 8d1458721d7eada71ffabab8e452b89a9b89b24b Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:28:39 +0200 Subject: [PATCH 082/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 0e6fb32e1a..79355211cd 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -72,7 +72,7 @@ depending on the arguments passed to `NewNetwork`: +------------+ +-------------+ +--------------+ +-----------+ ``` -Policies are described in detail below. On a high-level, here's what each policy does: +Policies are described in detail in subsequent sections. On a high-level, here's what each does: 1. `userPolicy`: honours the `bridges.conf` configuration file and, if no entry is found inside it, then it falls back to the subsequent policy; From 7a2a360d1d9bc64d84225da2f712ca78e9171c6d Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:29:52 +0200 Subject: [PATCH 083/127] [ci skip] --- internal/enginenetx/DESIGN.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 79355211cd..ff4c9e4f1e 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -78,13 +78,13 @@ Policies are described in detail in subsequent sections. On a high-level, here's inside it, then it falls back to the subsequent policy; 2. `statsPolicy`: uses statistics collected from previous runs to select tactics that -worked recently, otherwise it falls back to the subsequent policy; +worked recently for specific dialing targets, otherwise it falls back to the subsequent policy; -3. `bridgePolicy`: adopts a bridge strategy for `api.ooni.io`, hides the SNI for -THs, and otherwise falls back to the subsequent policy; +3. `bridgePolicy`: adopts a bridge strategy for `api.ooni.io` (i.e., uses known-in-advance +IP addresses), hides the SNI for THs, and otherwise falls back to the subsequent policy; 4. `dnsPolicy`: uses the `*engine.Session` DNS resolver to lookup domain names -and produces trivial tactics equivalent to connecting normally. +and produces trivial tactics equivalent to connecting normally using the Go standard library. While the previous description says "falls back to," the actual semantics of falling back is more complex than just falling back. For `statsPolicy` and `bridgePolicy`, From dfed510575057010c860d9b3a85b0bf607466526 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:30:13 +0200 Subject: [PATCH 084/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index ff4c9e4f1e..7cb0dd8283 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -81,7 +81,8 @@ inside it, then it falls back to the subsequent policy; worked recently for specific dialing targets, otherwise it falls back to the subsequent policy; 3. `bridgePolicy`: adopts a bridge strategy for `api.ooni.io` (i.e., uses known-in-advance -IP addresses), hides the SNI for THs, and otherwise falls back to the subsequent policy; +IP addresses), and otherwise falls back to the subsequent policy, still taking care of +hiding the THs SNIs; 4. `dnsPolicy`: uses the `*engine.Session` DNS resolver to lookup domain names and produces trivial tactics equivalent to connecting normally using the Go standard library. From be41fa95f0847a26d8066a1e60b3b30342981896 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:30:53 +0200 Subject: [PATCH 085/127] [ci skip] --- internal/enginenetx/DESIGN.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 7cb0dd8283..8aa49b8d6f 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -91,10 +91,9 @@ While the previous description says "falls back to," the actual semantics of fal back is more complex than just falling back. For `statsPolicy` and `bridgePolicy`, we remix the current policy strategy and subsequent policies strategies to strike a balance between what a policy suggests and what subsequent policies would suggest. In -turn, this reduces the overall bootstrap time in light of issues with policies. - -We added remix as part of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552). Before -this pull requests, OONI Probe implemented strict falling back. +turn, this reduces the overall bootstrap time in light of issues with policies. (We +added remix as part of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552); before +that, OONI Probe implemented strict falling back.) Also, when using a proxy, we just use `dnsPolicy` assuming the proxy knows how to do circumvention. From 5f7302a834331162b83a32c1153db51c89d4625e Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:31:17 +0200 Subject: [PATCH 086/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 8aa49b8d6f..a5e6215410 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -92,8 +92,8 @@ back is more complex than just falling back. For `statsPolicy` and `bridgePolicy we remix the current policy strategy and subsequent policies strategies to strike a balance between what a policy suggests and what subsequent policies would suggest. In turn, this reduces the overall bootstrap time in light of issues with policies. (We -added remix as part of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552); before -that, OONI Probe implemented strict falling back.) +added remix as part of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552); before, +we implemented strict falling back.) Also, when using a proxy, we just use `dnsPolicy` assuming the proxy knows how to do circumvention. From 744371080d46967b6ec685a353593cf06ea8ea12 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:38:07 +0200 Subject: [PATCH 087/127] [ci skip] --- internal/enginenetx/DESIGN.md | 40 +++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index a5e6215410..b2d9ad8415 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -53,6 +53,8 @@ func (n *Network) HTTPTransport() model.HTTPTransport func (n *Network) NewHTTPClient() *http.Client ``` +**Listing 1** `*enginenetx.Network` HTTP APIs. + The returned `*http.Client` uses an internal transport, which is returned when the package user invokes the `HTTPTransport` method. In turn, the internal transport customizes creating TLS connections, to meet the objectives explained before. @@ -72,6 +74,8 @@ depending on the arguments passed to `NewNetwork`: +------------+ +-------------+ +--------------+ +-----------+ ``` +**Diagram 1** Sequence of policies constructed when not using a proxy. + Policies are described in detail in subsequent sections. On a high-level, here's what each does: 1. `userPolicy`: honours the `bridges.conf` configuration file and, if no entry is found @@ -107,6 +111,8 @@ type httpsDialerPolicy interface { } ``` +**Listing 2** Interface implemented by policies. + The `LookupTactics` operation is _conceptually_ similar to [net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost), because both operations map a domain name to IP addresses to connect to. However, @@ -143,6 +149,8 @@ type httpsDialerTactic struct { } ``` +**Listing 3** Structure describing a tactic. + Here's an explanation of why we have each field in the struct: - `Address` and `Port` qualify the TCP endpoint; @@ -176,6 +184,8 @@ stateDiagram-v2 tlsHandshake --> verifyCertificate ``` +**Diagram 2** Sequence of operations when dialing TLS connections. + Such a diagram roughly corresponds to this Go ~pseudo-code: ```Go @@ -235,6 +245,8 @@ func (hd *httpsDialer) DialTLSContext( } ``` +**Listing 4** Algorithm implementing dialing TLS connections. + This simplified algorithm differs for the real implementation in that we have omitted the following (boring) details: @@ -267,6 +279,8 @@ such that we generate the following delays: | 8 | 32 | | ... | ... | +**Table 1** Happy eyeballs delays. + That is, we exponentially increase the delay until `8s`, then we linearly increase by `8s`. We aim to space attempts to accommodate for slow access networks and/or access network experiencing temporary failures to deliver packets. However, @@ -286,6 +300,8 @@ type httpsDialerEventsHandler interface { } ``` +**Listing 5** Interface for collecting statistics. + These statistics contribute to construct knowledge about the network conditions and influence the generation of tactics. @@ -329,6 +345,8 @@ As of 2024-04-16, the structure of such a file is like in the following example: } ``` +**Listing 6** Sample `bridges.conf` content. + This example instructs to use the given tactic when establishing a TLS connection to `"api.ooni.io:443"`. Any other destination hostname and port would instead use the configured "fallback" dialing policy. @@ -414,13 +432,7 @@ the fallback after getting two tactics from the stats. ## Overall Algorithm -The composed policy is the following (as discussed above): - -``` -+------------+ +-------------+ +--------------+ +-----------+ -| userPolicy | --> | statsPolicy | --> | bridgePolicy | --> | dnsPolicy | -+------------+ +-------------+ +--------------+ +-----------+ -``` +The composed policy is as described in Diagram 1. Therefore, the compose policy will return the following tactics: @@ -466,6 +478,8 @@ stateDiagram-v2 tacticsChan --> tactics ``` +**Diagram 3** Tactics generation priorities when not using a proxy. + Here `mix(X, Y)` means taking `X` from the left block, if possible, then `Y` from the right block, if possible, and then mixing the remainder in random order. Also, the "join" blocks in the diagram represent channels. @@ -541,6 +555,8 @@ The overall structure of `httpsdialerstats.state` is roughly the following: } ``` +**Listing 7** Content of the stats state as cached on disk. + That is, the `DomainEndpoints` map contains contains an entry for each TLS endpoint and, in turn, such an entry contains tactics. We index each tactic by a summary string to speed up looking it up. @@ -589,6 +605,8 @@ Here's an excerpt from the logs: [ 2.096077] httpsDialer: [#2] TCPConnect 10.0.0.1:443... interrupted ``` +**Listing 8** Run with no previous cached state and unreachable hardcoded bridge address. + After 2s, we start dialing with the IP addresses obtained through the DNS. Subsequent runs will cache this information on disk and use it. @@ -671,6 +689,8 @@ end, we replace the original file with this content: } ``` +**Listing 9** Cached state for run with invalid cached state and invalid bridge address. + Here's an excerpt from the logs: ``` @@ -696,6 +716,8 @@ Here's an excerpt from the logs: [ 8.091324] httpsDialer: [#4] TCPConnect 10.0.0.1:443... interrupted ``` +**Listing 10** Run with invalid cached state and invalid bridge address. + So, here the fifth attempt is using the DNS. This is in line with the mixing algorithm, where the first four attempt come from the stats or from the bridge policies. @@ -712,6 +734,8 @@ Let's also shows what happens if we repeat the bootstrap: [ 0.096677] httpsDialer: [#2] TLSVerifyCertificateChain api.ooni.io... ok ``` +**Listing 11** Re-run with invalid cached state and bridge address. + You see that now we immediately use the correct address thanks to the stats. ### Valid bridge with invalid cached data @@ -737,5 +761,7 @@ Here's an excerpt from the logs: [ 2.093718] httpsDialer: [#1] TCPConnect 10.0.0.1:443... interrupted ``` +**Listing 11** Re with invalid cached state and valid bridge address. + In this case, we pick up the right bridge configuration and successfully use it after two seconds. This configuration is provided by the `bridgesPolicy`. From 2944ea020b641bf8cc9b4ba76a8017b6b73dad8b Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:39:28 +0200 Subject: [PATCH 088/127] [ci skip] --- internal/enginenetx/DESIGN.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index b2d9ad8415..285eb4a1b0 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -53,7 +53,7 @@ func (n *Network) HTTPTransport() model.HTTPTransport func (n *Network) NewHTTPClient() *http.Client ``` -**Listing 1** `*enginenetx.Network` HTTP APIs. +**Listing 1.** `*enginenetx.Network` HTTP APIs. The returned `*http.Client` uses an internal transport, which is returned when the package user invokes the `HTTPTransport` method. In turn, the internal transport customizes @@ -74,7 +74,7 @@ depending on the arguments passed to `NewNetwork`: +------------+ +-------------+ +--------------+ +-----------+ ``` -**Diagram 1** Sequence of policies constructed when not using a proxy. +**Diagram 1.** Sequence of policies constructed when not using a proxy. Policies are described in detail in subsequent sections. On a high-level, here's what each does: @@ -111,7 +111,7 @@ type httpsDialerPolicy interface { } ``` -**Listing 2** Interface implemented by policies. +**Listing 2.** Interface implemented by policies. The `LookupTactics` operation is _conceptually_ similar to [net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost), because @@ -149,7 +149,7 @@ type httpsDialerTactic struct { } ``` -**Listing 3** Structure describing a tactic. +**Listing 3.** Structure describing a tactic. Here's an explanation of why we have each field in the struct: @@ -184,7 +184,7 @@ stateDiagram-v2 tlsHandshake --> verifyCertificate ``` -**Diagram 2** Sequence of operations when dialing TLS connections. +**Diagram 2.** Sequence of operations when dialing TLS connections. Such a diagram roughly corresponds to this Go ~pseudo-code: @@ -245,7 +245,7 @@ func (hd *httpsDialer) DialTLSContext( } ``` -**Listing 4** Algorithm implementing dialing TLS connections. +**Listing 4.** Algorithm implementing dialing TLS connections. This simplified algorithm differs for the real implementation in that we have omitted the following (boring) details: @@ -279,7 +279,7 @@ such that we generate the following delays: | 8 | 32 | | ... | ... | -**Table 1** Happy eyeballs delays. +**Table 1.** Happy-eyeballs-like delays. That is, we exponentially increase the delay until `8s`, then we linearly increase by `8s`. We aim to space attempts to accommodate for slow access networks @@ -300,7 +300,7 @@ type httpsDialerEventsHandler interface { } ``` -**Listing 5** Interface for collecting statistics. +**Listing 5.** Interface for collecting statistics. These statistics contribute to construct knowledge about the network conditions and influence the generation of tactics. @@ -345,7 +345,7 @@ As of 2024-04-16, the structure of such a file is like in the following example: } ``` -**Listing 6** Sample `bridges.conf` content. +**Listing 6.** Sample `bridges.conf` content. This example instructs to use the given tactic when establishing a TLS connection to `"api.ooni.io:443"`. Any other destination hostname and port would instead use the @@ -478,7 +478,7 @@ stateDiagram-v2 tacticsChan --> tactics ``` -**Diagram 3** Tactics generation priorities when not using a proxy. +**Diagram 3.** Tactics generation priorities when not using a proxy. Here `mix(X, Y)` means taking `X` from the left block, if possible, then `Y` from the right block, if possible, and then mixing the remainder in random order. Also, the "join" @@ -555,7 +555,7 @@ The overall structure of `httpsdialerstats.state` is roughly the following: } ``` -**Listing 7** Content of the stats state as cached on disk. +**Listing 7.** Content of the stats state as cached on disk. That is, the `DomainEndpoints` map contains contains an entry for each TLS endpoint and, in turn, such an entry contains tactics. We index each @@ -605,7 +605,7 @@ Here's an excerpt from the logs: [ 2.096077] httpsDialer: [#2] TCPConnect 10.0.0.1:443... interrupted ``` -**Listing 8** Run with no previous cached state and unreachable hardcoded bridge address. +**Listing 8.** Run with no previous cached state and unreachable hardcoded bridge address. After 2s, we start dialing with the IP addresses obtained through the DNS. @@ -689,7 +689,7 @@ end, we replace the original file with this content: } ``` -**Listing 9** Cached state for run with invalid cached state and invalid bridge address. +**Listing 9.** Cached state for run with invalid cached state and invalid bridge address. Here's an excerpt from the logs: @@ -716,7 +716,7 @@ Here's an excerpt from the logs: [ 8.091324] httpsDialer: [#4] TCPConnect 10.0.0.1:443... interrupted ``` -**Listing 10** Run with invalid cached state and invalid bridge address. +**Listing 10.** Run with invalid cached state and invalid bridge address. So, here the fifth attempt is using the DNS. This is in line with the mixing algorithm, where the first four attempt come from the stats or from the bridge policies. @@ -734,7 +734,7 @@ Let's also shows what happens if we repeat the bootstrap: [ 0.096677] httpsDialer: [#2] TLSVerifyCertificateChain api.ooni.io... ok ``` -**Listing 11** Re-run with invalid cached state and bridge address. +**Listing 11.** Re-run with invalid cached state and bridge address. You see that now we immediately use the correct address thanks to the stats. @@ -761,7 +761,7 @@ Here's an excerpt from the logs: [ 2.093718] httpsDialer: [#1] TCPConnect 10.0.0.1:443... interrupted ``` -**Listing 11** Re with invalid cached state and valid bridge address. +**Listing 12.** Re with invalid cached state and valid bridge address. In this case, we pick up the right bridge configuration and successfully use it after two seconds. This configuration is provided by the `bridgesPolicy`. From 43c1e7cf26fe432e93e059062184b199b87a25a9 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:42:05 +0200 Subject: [PATCH 089/127] [ci skip] --- internal/enginenetx/DESIGN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 285eb4a1b0..0ce9aa4d90 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -157,7 +157,8 @@ Here's an explanation of why we have each field in the struct: - `InitialDelay` allows a policy to delay a connect operation to implement something similar to [happy eyeballs](https://en.wikipedia.org/wiki/Happy_Eyeballs), -where dialing attempts run in parallel but are staggered in time; +where dialing attempts run in parallel that are staggered in time (the classical +example being: dialing for IPv6 and then attempting dialing for IPv4 after 0.3s); - `SNI` is the `SNI` to send as part of the TLS ClientHello; From d6159d62adf9a2fdac3a411704686561b2ce80fc Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:44:57 +0200 Subject: [PATCH 090/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 0ce9aa4d90..e69b8c079f 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -288,6 +288,10 @@ and/or access network experiencing temporary failures to deliver packets. Howeve we also aim to have dialing parallelism, to reduce the overall time to connect when we're experiencing many timeouts when attempting to dial. +(We chose 1s as the baseline delay because that would be three happy-eyeballs delays as +implemented by the Go standard library, and overall a TCP connect followed by a TLS +handshake should roughly amount to three round trips.) + Additionally, the `*httpsDialer` algorithm keeps statistics using an `httpsDialerEventsHandler` type: From dcf4a0300bfed0bff13dc33f6e8dd2307f3b5abb Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:48:59 +0200 Subject: [PATCH 091/127] [ci skip] --- internal/enginenetx/DESIGN.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index e69b8c079f..d5ab0fdf63 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -334,9 +334,9 @@ When constructing a `userPolicy` with `newUserPolicy` we indicate a fallback `httpsDialerPolicy` to use as the fallback, when either `$OONI_HOME/engine/bridges.conf` does not exist or it does not contain actionable dialing rules. -As of 2024-04-16, the structure of such a file is like in the following example: +As of 2024-04-16, the structure of `bridges.conf` is like in the following example: -```JSON +```JavaScript { "DomainEndpoints": { "api.ooni.io:443": [{ @@ -344,6 +344,8 @@ As of 2024-04-16, the structure of such a file is like in the following example: "Port": "443", "SNI": "www.example.com", "VerifyHostname": "api.ooni.io" + }, { + /* omitted */ }] }, "Version": 3 @@ -352,7 +354,7 @@ As of 2024-04-16, the structure of such a file is like in the following example: **Listing 6.** Sample `bridges.conf` content. -This example instructs to use the given tactic when establishing a TLS connection to +This example instructs to use the given tactic(s) when establishing a TLS connection to `"api.ooni.io:443"`. Any other destination hostname and port would instead use the configured "fallback" dialing policy. @@ -480,7 +482,7 @@ stateDiagram-v2 state tacticsChan <> mix24 --> tacticsChan - tacticsChan --> tactics + tacticsChan --> DialTLSContext ``` **Diagram 3.** Tactics generation priorities when not using a proxy. From f037eb3e697ea725d328f15094479e595fc87b5c Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:49:33 +0200 Subject: [PATCH 092/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index d5ab0fdf63..ba1b898baa 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -489,7 +489,7 @@ stateDiagram-v2 Here `mix(X, Y)` means taking `X` from the left block, if possible, then `Y` from the right block, if possible, and then mixing the remainder in random order. Also, the "join" -blocks in the diagram represent channels. +blocks in the diagram represent Go channels. Having discussed this, it only remains to discuss managing stats. From 586c4501cc936a422ed080143071209432786e4e Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:51:16 +0200 Subject: [PATCH 093/127] [ci skip] --- internal/enginenetx/DESIGN.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index ba1b898baa..2ffdb54cd6 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -497,7 +497,7 @@ Having discussed this, it only remains to discuss managing stats. The [statsmanager.go](statsmanager.go) file implements the `*statsManager`. -We initialize the `*statsManager` by calling `newStatsManager` with a stats trim +We initialize the `*statsManager` by calling `newStatsManager` with a stats-trim interval of 30 seconds in `NewNetwork` in [network.go](network.go). The `*statsManager` keeps stats at `$OONI_HOME/engine/httpsdialerstats.state`. @@ -506,7 +506,7 @@ In `newStatsManager`, we attempt to read this file using `loadStatsContainer` an not present, we fall back to create empty stats with `newStatsContainer`. While creating the `*statsManager` we also spawn a goroutine that trims the stats -at every stats trimming interval by calling `(*statsManager).trim`. In turn, `trim` +at every stats-trimming interval by calling `(*statsManager).trim`. In turn, `trim` calls `statsContainerPruneEntries`, which eventually: 1. removes entries not modified for more than one week; @@ -524,7 +524,7 @@ More specifically we sort entries using this algorithm: Likewise, calling `(*statsManager).Close` invokes `statsContainerPruneEntries`, and then ensures that we write `$OONI_HOME/engine/httpsdialerstats.state`. -This way, subsequent OONI Probe runs could load the stats thare are more likely +This way, subsequent OONI Probe runs could load the stats that are more likely to work and `statsPolicy` can take advantage of this information. The overall structure of `httpsdialerstats.state` is roughly the following: @@ -554,7 +554,8 @@ The overall structure of `httpsdialerstats.state` is roughly the following: "SNI": "api.trademe.co.nz", "VerifyHostname": "api.ooni.io" } - } + }, + /* ... */ } } } From de31fd55d8538d27ca780464cc105da1aeedcea8 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:51:39 +0200 Subject: [PATCH 094/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 2ffdb54cd6..bb31a19bc2 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -555,7 +555,7 @@ The overall structure of `httpsdialerstats.state` is roughly the following: "VerifyHostname": "api.ooni.io" } }, - /* ... */ + /* ... */ } } } From 8a3eef767848c3979bae39f73d1d1030835af70a Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:52:19 +0200 Subject: [PATCH 095/127] [ci skip] --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index bb31a19bc2..7a13441cba 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -566,8 +566,8 @@ The overall structure of `httpsdialerstats.state` is roughly the following: **Listing 7.** Content of the stats state as cached on disk. That is, the `DomainEndpoints` map contains contains an entry for each -TLS endpoint and, in turn, such an entry contains tactics. We index each -tactic by a summary string to speed up looking it up. +TLS endpoint and, in turn, such an entry contains tactics indexed by +a summary string to speed up looking them up. For each tactic, we keep counters and histograms, the time when the entry had been updated last, and the tactic itself. From 697f6b3d66bc117488b8a9b8373c1ac82765845c Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:57:48 +0200 Subject: [PATCH 096/127] [ci skip] --- internal/enginenetx/DESIGN.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 7a13441cba..40b361adf2 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -773,3 +773,14 @@ Here's an excerpt from the logs: In this case, we pick up the right bridge configuration and successfully use it after two seconds. This configuration is provided by the `bridgesPolicy`. + +## Limitations and Future Work + +1. We should integrate the [engineresolver](../engineresolver/) package with this package +more tightly: doing that would allow users to configure the order in which we use DNS-over-HTTPS +resolvers (see [probe#2675](https://github.com/ooni/probe/issues/2675)) and would allow us +to improve our adaptive strategies when using these resolvers. + +2. We lack a mechanism to dynamically distribute new bridges IP addresses to probes using, +for example, the check-in API and possibly other mechanisms. Lacking this functionality, our +bridge strategy is incomplete since it rests on a single bridge being available. From e652147d45d79f823e56e0ddf03a63e34a6f333c Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 09:59:27 +0200 Subject: [PATCH 097/127] [ci skip] --- internal/enginenetx/DESIGN.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 40b361adf2..55d7bb3638 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -783,4 +783,7 @@ to improve our adaptive strategies when using these resolvers. 2. We lack a mechanism to dynamically distribute new bridges IP addresses to probes using, for example, the check-in API and possibly other mechanisms. Lacking this functionality, our -bridge strategy is incomplete since it rests on a single bridge being available. +bridge strategy is incomplete since it rests on a single bridge being available. What's +more, if this bridge disappears or is IP blocked, all the probes will have one slow bootstrap +and probes where DNS is not working will stop working (see +[probe#2500](https://github.com/ooni/probe/issues/2500)). From 4baca70d543dcacec5c8b6025056bbe1b7b611c0 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 10:01:04 +0200 Subject: [PATCH 098/127] [ci skip] --- internal/enginenetx/DESIGN.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 55d7bb3638..bc77beb2dd 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -778,8 +778,7 @@ use it after two seconds. This configuration is provided by the `bridgesPolicy`. 1. We should integrate the [engineresolver](../engineresolver/) package with this package more tightly: doing that would allow users to configure the order in which we use DNS-over-HTTPS -resolvers (see [probe#2675](https://github.com/ooni/probe/issues/2675)) and would allow us -to improve our adaptive strategies when using these resolvers. +resolvers (see [probe#2675](https://github.com/ooni/probe/issues/2675)). 2. We lack a mechanism to dynamically distribute new bridges IP addresses to probes using, for example, the check-in API and possibly other mechanisms. Lacking this functionality, our @@ -787,3 +786,5 @@ bridge strategy is incomplete since it rests on a single bridge being available. more, if this bridge disappears or is IP blocked, all the probes will have one slow bootstrap and probes where DNS is not working will stop working (see [probe#2500](https://github.com/ooni/probe/issues/2500)). + +3. We should consider adding TLS ClientHello fragmentation as a tactic. From fa1c237d5b5d4b2d2c9d2ac92631a9e6e3679749 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 10:01:23 +0200 Subject: [PATCH 099/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index bc77beb2dd..c603a48c3a 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -788,3 +788,5 @@ and probes where DNS is not working will stop working (see [probe#2500](https://github.com/ooni/probe/issues/2500)). 3. We should consider adding TLS ClientHello fragmentation as a tactic. + +4. We should add support for HTTP/3 bridges. From d0f721a689cc6b72e4a971c80db918b9bc4c5c5c Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 16:18:19 +0200 Subject: [PATCH 100/127] x --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index c603a48c3a..bc6e1dc984 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -395,7 +395,7 @@ to a specific bridge address that has been discontinued; would waste lots of time failing before falling back. Conversely, a better strategy is to "remix" tactics as implemented -by the [remix.go](remix.go) file: +by the [mix.go](mix.go) file: 1. we take the first two tactics from the stats; @@ -420,7 +420,7 @@ rests on the assumptions made explicit above. That is: 2. that the Web Connectivity Test Helpers accepts any SNI. -Here we're also using the [remix.go](remix.go) algorithm to remix +Here we're also using the [mix.go](mix.go) algorithm to remix two different sources of tactics: 1. the `bridgesTacticsForDomain` only returns tactics for "api.ooni.io" From 7058b7d3be6467b8191f469c148d25958a54233e Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 16:19:41 +0200 Subject: [PATCH 101/127] x --- internal/enginenetx/bridgespolicy.go | 2 +- internal/enginenetx/statspolicy.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/bridgespolicy.go b/internal/enginenetx/bridgespolicy.go index bdbf9526cd..3cf4de4c54 100644 --- a/internal/enginenetx/bridgespolicy.go +++ b/internal/enginenetx/bridgespolicy.go @@ -43,7 +43,7 @@ func (p *bridgesPolicy) LookupTactics(ctx context.Context, domain, port string) // This ensures we read the first two bridge tactics. // // Note: modifying this field likely indicates you also need to modify the - // corresponding remix{} instantiation in statspolicy.go. + // corresponding instantiation in statspolicy.go. &mixDeterministicThenRandomConfig{ C: p.bridgesTacticsForDomain(domain, port), N: 2, diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index 67c38099f6..28a9714ed0 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -47,7 +47,7 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str // befofe we start remixing from the two channels. // // Note: modifying this field likely indicates you also need to modify the - // corresponding remix{} instantiation in bridgespolicy.go. + // corresponding instantiation in bridgespolicy.go. &mixDeterministicThenRandomConfig{ C: p.Fallback.LookupTactics(ctx, domain, port), N: 4, From 4e628dba1b47905068b226bd5526d240aa8fae33 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 16:21:13 +0200 Subject: [PATCH 102/127] x --- internal/enginenetx/bridgespolicy.go | 30 ++++++++++++++-------------- internal/enginenetx/statspolicy.go | 24 +++++++++++----------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/internal/enginenetx/bridgespolicy.go b/internal/enginenetx/bridgespolicy.go index 3cf4de4c54..852527f969 100644 --- a/internal/enginenetx/bridgespolicy.go +++ b/internal/enginenetx/bridgespolicy.go @@ -36,28 +36,28 @@ var _ httpsDialerPolicy = &bridgesPolicy{} // 3. we randomly remix the rest. func (p *bridgesPolicy) LookupTactics(ctx context.Context, domain, port string) <-chan *httpsDialerTactic { return mixDeterministicThenRandom( - // Prioritize emitting tactics for bridges. Currently we only have bridges - // for "api.ooni.io", therefore, for all other hosts this arm ends up - // returning a channel that will be immediately closed. - // - // This ensures we read the first two bridge tactics. - // - // Note: modifying this field likely indicates you also need to modify the - // corresponding instantiation in statspolicy.go. &mixDeterministicThenRandomConfig{ + // Prioritize emitting tactics for bridges. Currently we only have bridges + // for "api.ooni.io", therefore, for all other hosts this arm ends up + // returning a channel that will be immediately closed. C: p.bridgesTacticsForDomain(domain, port), + + // This ensures we read the first two bridge tactics. + // + // Note: modifying this field likely indicates you also need to modify the + // corresponding instantiation in statspolicy.go. N: 2, }, - // Mix the above with using the fallback policy and rewriting the SNIs - // used by the test helpers to avoid exposing the real SNIs. - // - // This ensures we read the first two DNS tactics. - // - // Note: modifying this field likely indicates you also need to modify the - // corresponding remix{} instantiation in statspolicy.go. &mixDeterministicThenRandomConfig{ + // Mix the above with using the fallback policy and rewriting the SNIs + // used by the test helpers to avoid exposing the real SNIs. C: p.maybeRewriteTestHelpersTactics(p.Fallback.LookupTactics(ctx, domain, port)), + + // This ensures we read the first two DNS tactics. + // + // Note: modifying this field likely indicates you also need to modify the + // corresponding remix{} instantiation in statspolicy.go. N: 2, }, ) diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index 28a9714ed0..7b2c94905c 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -31,25 +31,25 @@ var _ httpsDialerPolicy = &statsPolicy{} // LookupTactics implements HTTPSDialerPolicy. func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { return mixDeterministicThenRandom( - // Give priority to what we know from stats - // - // We make sure we emit two stats-based tactics if possible &mixDeterministicThenRandomConfig{ + // Give priority to what we know from stats. C: statsPolicyStream(statsPolicyFilterStatsTactics(p.Stats.LookupTactics(domain, port))), + + // We make sure we emit two stats-based tactics if possible. N: 2, }, - // And remix it with the fallback - // - // Under the assumption that below us we have bridgePolicy composed with DNS policy - // and that the stage below emits two bridge tactics, if possible, followed by two - // additional DNS tactics, if possible, we need to allow for four tactics to pass through - // befofe we start remixing from the two channels. - // - // Note: modifying this field likely indicates you also need to modify the - // corresponding instantiation in bridgespolicy.go. &mixDeterministicThenRandomConfig{ + // And remix it with the fallback. C: p.Fallback.LookupTactics(ctx, domain, port), + + // Under the assumption that below us we have bridgePolicy composed with DNS policy + // and that the stage below emits two bridge tactics, if possible, followed by two + // additional DNS tactics, if possible, we need to allow for four tactics to pass through + // befofe we start remixing from the two channels. + // + // Note: modifying this field likely indicates you also need to modify the + // corresponding instantiation in bridgespolicy.go. N: 4, }, ) From 3b0e11026b814717df82d8a9d8029681ef7a06c9 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 16:23:17 +0200 Subject: [PATCH 103/127] x --- internal/enginenetx/bridgespolicy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/bridgespolicy.go b/internal/enginenetx/bridgespolicy.go index 852527f969..23a16a1860 100644 --- a/internal/enginenetx/bridgespolicy.go +++ b/internal/enginenetx/bridgespolicy.go @@ -57,7 +57,7 @@ func (p *bridgesPolicy) LookupTactics(ctx context.Context, domain, port string) // This ensures we read the first two DNS tactics. // // Note: modifying this field likely indicates you also need to modify the - // corresponding remix{} instantiation in statspolicy.go. + // corresponding instantiation in statspolicy.go. N: 2, }, ) From 32178c6b602f72451c4bf2ec9639e2dd4ad898e5 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 17:13:29 +0200 Subject: [PATCH 104/127] x --- internal/enginenetx/statspolicy.go | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index 7b2c94905c..86e18c2276 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -30,10 +30,10 @@ var _ httpsDialerPolicy = &statsPolicy{} // LookupTactics implements HTTPSDialerPolicy. func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { - return mixDeterministicThenRandom( + return filterOnlyKeepUniqueTactics(filterOutNilTactics(mixDeterministicThenRandom( &mixDeterministicThenRandomConfig{ // Give priority to what we know from stats. - C: statsPolicyStream(statsPolicyFilterStatsTactics(p.Stats.LookupTactics(domain, port))), + C: streamTacticsFromSlice(statsPolicyFilterStatsTactics(p.Stats.LookupTactics(domain, port))), // We make sure we emit two stats-based tactics if possible. N: 2, @@ -52,22 +52,9 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str // corresponding instantiation in bridgespolicy.go. N: 4, }, - ) -} - -// statsPolicyStream streams a vector of tactics. -func statsPolicyStream(txs []*httpsDialerTactic) <-chan *httpsDialerTactic { - output := make(chan *httpsDialerTactic) - go func() { - defer close(output) - for _, tx := range txs { - output <- tx - } - }() - return output + ))) } -// statsPolicyFilterStatsTactics filters the tactics generated by consulting the stats. func statsPolicyFilterStatsTactics(tactics []*statsTactic, good bool) (out []*httpsDialerTactic) { // when good is false, it means p.Stats.LookupTactics failed if !good { From e7c75417df21999c4894ba9e61dff5c6c2084fbf Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 17:15:16 +0200 Subject: [PATCH 105/127] x --- internal/enginenetx/bridgespolicy.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/internal/enginenetx/bridgespolicy.go b/internal/enginenetx/bridgespolicy.go index 23a16a1860..fcafb4f7c7 100644 --- a/internal/enginenetx/bridgespolicy.go +++ b/internal/enginenetx/bridgespolicy.go @@ -88,19 +88,12 @@ func (p *bridgesPolicy) maybeRewriteTestHelpersTactics(input <-chan *httpsDialer defer close(out) // tell the parent when we're done for tactic := range input { - // TODO(bassosimone): here we could potentially attempt using tactics - // changing the SNI also for api.ooni.io when we're getting its address - // using a DNS resolver that is working as intended. - // When we're not connecting to a TH, pass the policy down the chain unmodified if !bridgesPolicySlicesContains(bridgesPolicyTestHelpersDomains, tactic.VerifyHostname) { out <- tactic continue } - // TODO(bassosimone): potentially we should also throw the real SNI - // into the mix, but it should not be the first SNI we emit. - // This is the case where we're connecting to a test helper. Let's try // to produce policies hiding the SNI to censoring middleboxes. for _, sni := range p.bridgesDomainsInRandomOrder() { From f16784cd28869202bd75ce846ed0a2137c3cc28c Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 17:16:14 +0200 Subject: [PATCH 106/127] x --- internal/enginenetx/statspolicy.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index 86e18c2276..49b13e2ce0 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -30,6 +30,7 @@ var _ httpsDialerPolicy = &statsPolicy{} // LookupTactics implements HTTPSDialerPolicy. func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { + // avoid emitting nil tactics and duplicate tactics return filterOnlyKeepUniqueTactics(filterOutNilTactics(mixDeterministicThenRandom( &mixDeterministicThenRandomConfig{ // Give priority to what we know from stats. From 28bcd5fdff3194f87a2150d55f84f6ced7be2cda Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 17:25:41 +0200 Subject: [PATCH 107/127] [ci skip] --- internal/enginenetx/DESIGN.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index bc6e1dc984..fa21fd0743 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -55,9 +55,10 @@ func (n *Network) NewHTTPClient() *http.Client **Listing 1.** `*enginenetx.Network` HTTP APIs. -The returned `*http.Client` uses an internal transport, which is returned when the -package user invokes the `HTTPTransport` method. In turn, the internal transport customizes -creating TLS connections, to meet the objectives explained before. +The `HTTPTransport` method returns a `*Network` field containing an HTTP transport with +custom TLS connection establishment tactics depending on the configured policies. + +The `NewHTTPClient` method wraps such a transport into an `*http.Client`. ## Creating TLS Connections From da06bbb2796bb42ff389eae4721c8ba9f79ca0e7 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 17:30:20 +0200 Subject: [PATCH 108/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index fa21fd0743..8607d71d04 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -136,7 +136,7 @@ Also, as you may have guessed, the `dnsPolicy` is a policy that, under the hood, eventually calls [net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost) to get IP addresses using the DNS used by the `*engine.Session` type. (Typically, such a resolver, in turn, composes several DNS-over-HTTPS resolvers with the fallback -`getaddrinfo` resolver, and remebers which resolvers work.) +`getaddrinfo` resolver, and remembers which resolvers work.) A "tactic" looks like this: From 67ae4d96c4dd020a4c36151dd2c8010dfeb8e1ad Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 17:31:37 +0200 Subject: [PATCH 109/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 8607d71d04..9940494200 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -158,7 +158,7 @@ Here's an explanation of why we have each field in the struct: - `InitialDelay` allows a policy to delay a connect operation to implement something similar to [happy eyeballs](https://en.wikipedia.org/wiki/Happy_Eyeballs), -where dialing attempts run in parallel that are staggered in time (the classical +where dialing attempts run in parallel and are staggered in time (the classical example being: dialing for IPv6 and then attempting dialing for IPv4 after 0.3s); - `SNI` is the `SNI` to send as part of the TLS ClientHello; From 02ee95d68188beaea0187d4473e62e6952734c02 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 17 Apr 2024 17:35:40 +0200 Subject: [PATCH 110/127] [ci skip] --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 9940494200..10b2e903df 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -289,7 +289,7 @@ and/or access network experiencing temporary failures to deliver packets. Howeve we also aim to have dialing parallelism, to reduce the overall time to connect when we're experiencing many timeouts when attempting to dial. -(We chose 1s as the baseline delay because that would be three happy-eyeballs delays as +(We chose 1s as the baseline delay because that would be ~three happy-eyeballs delays as implemented by the Go standard library, and overall a TCP connect followed by a TLS handshake should roughly amount to three round trips.) From 3877575a697914eb22f8e94396a205f394412cab Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 18 Apr 2024 10:08:59 +0200 Subject: [PATCH 111/127] fix: adapt test after remix change --- internal/enginenetx/bridgespolicy_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/bridgespolicy_test.go b/internal/enginenetx/bridgespolicy_test.go index 4d5b2ad3c4..0984a1ee6f 100644 --- a/internal/enginenetx/bridgespolicy_test.go +++ b/internal/enginenetx/bridgespolicy_test.go @@ -147,7 +147,7 @@ func TestBridgesPolicy(t *testing.T) { dnsCount int overallCount int ) - const expectedDNSEntryCount = 153 // yikes! + const expectedDNSEntryCount = 3 for tactic := range tactics { overallCount++ From af87c6e458525c66040aa9e3174ed82ce51079f7 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 18 Apr 2024 12:31:48 +0200 Subject: [PATCH 112/127] doc: add table of contents --- internal/enginenetx/DESIGN.md | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 10b2e903df..52c072d49e 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -3,6 +3,26 @@ This file documents the [./internal/enginenetx](.) package design. The content is current as of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552). +## Table of Contents + +- [Design Goals](#design-goals) +- [High-Level API](#high-level-api) +- [Creating TLS Connections](#creating-tls-connections) +- [Dialing Tactics](#dialing-tactics) +- [Dialing Algorithm](#dialing-algorithm) +- [Dialing Policies](#dialing-policies) + - [dnsPolicy](#dnspolicy) + - [userPolicy](#userpolicy) + - [statsPolicy](#statspolicy) + - [bridgePolicy](#bridgepolicy) +- [Overall Algorithm](#overall-algorithm) +- [Managing Stats](#managing-stats) +- [Real-World Scenarios](#real-world-scenarios) + - [Invalid bridge without cached data](#invalid-bridge-without-cached-data) + - [Invalid bridge with cached data](#invalid-bridge-with-cached-data) + - [Valid bridge with invalid cached data](#valid-bridge-with-invalid-cached-data) +- [Limitations and Future Work](#limitations-and-future-work) + ## Design Goals We define "bridge" an IP address with the following properties: @@ -102,7 +122,7 @@ we implemented strict falling back.) Also, when using a proxy, we just use `dnsPolicy` assuming the proxy knows how to do circumvention. -## Instructions For Dialing +## Dialing Tactics Each policy implements the following interface (defined in [httpsdialer.go](httpsdialer.go)): @@ -170,7 +190,7 @@ SNI over the network and then verify the certificate using the real SNI after a `skipVerify=true` TLS handshake has completed. (Obviously, for this trick to work, the HTTPS server we're using must be okay with receiving unrelated SNIs.) -## HTTPS Dialer +## Dialing Algorithm Creating TLS connections is implemented by `(*httpsDialer).DialTLSContext`, also part of [httpsdialer.go](httpsdialer.go). @@ -311,7 +331,9 @@ type httpsDialerEventsHandler interface { These statistics contribute to construct knowledge about the network conditions and influence the generation of tactics. -## dnsPolicy +## Dialing Policies + +### dnsPolicy The `dnsPolicy` is implemented by [dnspolicy.go](dnspolicy.go). @@ -327,7 +349,7 @@ what `getaddrinfo` would do when asked to "resolve" an IP address); If `httpsDialer` uses this policy as its only policy, the operation it performs are morally equivalent to normally dialing for TLS. -## userPolicy +### userPolicy The `userPolicy` is implemented by [userpolicy.go](userpolicy.go). @@ -374,7 +396,7 @@ inside the `DomainEndpoints` map; Because `userPolicy` is user-configured, we _entirely bypass_ the fallback policy when there's an user-configured entry. -## statsPolicy +### statsPolicy The `statsPolicy` is implemented by [statspolicy.go](statspolicy.go). @@ -412,7 +434,7 @@ tactics from the fallback because that allows us to include two bridge tactics and two DNS tactics, as explained below when we discuss the `bridgePolicy` policy.) -## bridgePolicy +### bridgePolicy The `bridgePolicy` is implemented by [bridgespolicy.go](bridgespolicy.go) and rests on the assumptions made explicit above. That is: From 1a5026626d80fe7b8674ba4d62e21e9af5f66587 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 18 Apr 2024 12:36:04 +0200 Subject: [PATCH 113/127] doc: rename the goals section to be more clear --- internal/enginenetx/DESIGN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 52c072d49e..e802f80c4e 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -5,7 +5,7 @@ as of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552). ## Table of Contents -- [Design Goals](#design-goals) +- [Goals & Assumptions](#goals--assumptions) - [High-Level API](#high-level-api) - [Creating TLS Connections](#creating-tls-connections) - [Dialing Tactics](#dialing-tactics) @@ -23,7 +23,7 @@ as of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552). - [Valid bridge with invalid cached data](#valid-bridge-with-invalid-cached-data) - [Limitations and Future Work](#limitations-and-future-work) -## Design Goals +## Goals & Assumptions We define "bridge" an IP address with the following properties: From 139f10c11962b771cee8622b170d743f5c3ba975 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 26 Apr 2024 09:54:42 +0200 Subject: [PATCH 114/127] Update internal/enginenetx/DESIGN.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Arturo Filastò --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index e802f80c4e..53cc104595 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -27,7 +27,7 @@ as of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552). We define "bridge" an IP address with the following properties: -1. the IP address is not expected to change; +1. the IP address is not expected to change frequently; 2. the IP address listens on port 443 and accepts _any_ incoming SNI; From 1d8a786e6f5de7dd7b6b2a324383561385bab49f Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 26 Apr 2024 09:54:53 +0200 Subject: [PATCH 115/127] Update internal/enginenetx/DESIGN.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Arturo Filastò --- internal/enginenetx/DESIGN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index 53cc104595..e9b24b5d7e 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -31,7 +31,7 @@ We define "bridge" an IP address with the following properties: 2. the IP address listens on port 443 and accepts _any_ incoming SNI; -3. the webserver on port 443 proxies to the OONI APIs. +3. the webserver on port 443 provides unified access to [OONI API services](https://docs.ooni.org/backend/ooniapi/services/). We also assume that the Web Connectivity test helpers (TH) could accept any SNIs. From bffc8111594bbf18e5b5c72ae5b397ac192e7ae4 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 7 May 2024 15:21:54 +0200 Subject: [PATCH 116/127] Apply suggestions from @hellais MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Arturo Filastò --- internal/enginenetx/DESIGN.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index e9b24b5d7e..a496f8444f 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -49,11 +49,9 @@ from a pre-defined list of SNIs; 3. remember and use tactics for creating TLS connections that worked previously; -4. recover ~quickly if the conditions change (e.g., if a bridge is discontinued); +4. for the trivial case, an uncensored API backend, communication to the API should use the simplest way possible. This naturally leads to the fact that it should recover ~quickly if the conditions change (e.g., if a bridge is discontinued); -5. adopt a censored-users-first approach where the strategy we use by default -should allow for smooth operations _for them_ rather than prioritizing the -non-censored case and using additional tactics as the fallback; +5. for users in censored regions it should be possible to use tactics to overcome the restrictions; 6. try to defer sending the true `SNI` on the wire, therefore trying to avoid triggering potential residual censorship blocking a given TCP endpoint From 9d2c47f1d047f92d1a81124e979dcda2a10f1efd Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 8 May 2024 12:55:30 +0200 Subject: [PATCH 117/127] feat: allow chaining policies after the DNS --- internal/enginenetx/bridgespolicy_test.go | 5 +++ internal/enginenetx/dnspolicy.go | 15 +++++-- internal/enginenetx/dnspolicy_test.go | 49 +++++++++++++++++++++++ internal/enginenetx/httpsdialer_test.go | 2 + internal/enginenetx/network.go | 14 +++++-- internal/enginenetx/nullpolicy.go | 27 +++++++++++++ internal/enginenetx/nullpolicy_test.go | 17 ++++++++ internal/enginenetx/statspolicy_test.go | 2 + internal/enginenetx/userpolicy_test.go | 4 +- 9 files changed, 128 insertions(+), 7 deletions(-) create mode 100644 internal/enginenetx/nullpolicy.go create mode 100644 internal/enginenetx/nullpolicy_test.go diff --git a/internal/enginenetx/bridgespolicy_test.go b/internal/enginenetx/bridgespolicy_test.go index 0984a1ee6f..0b80af3dbb 100644 --- a/internal/enginenetx/bridgespolicy_test.go +++ b/internal/enginenetx/bridgespolicy_test.go @@ -20,6 +20,7 @@ func TestBridgesPolicy(t *testing.T) { return nil, expected }, }, + Fallback: &nullPolicy{}, }, } @@ -45,6 +46,7 @@ func TestBridgesPolicy(t *testing.T) { return []string{"93.184.216.34"}, nil }, }, + Fallback: &nullPolicy{}, }, } @@ -90,6 +92,7 @@ func TestBridgesPolicy(t *testing.T) { return nil, expected }, }, + Fallback: &nullPolicy{}, }, } @@ -135,6 +138,7 @@ func TestBridgesPolicy(t *testing.T) { return []string{"130.192.91.211"}, nil }, }, + Fallback: &nullPolicy{}, }, } @@ -214,6 +218,7 @@ func TestBridgesPolicy(t *testing.T) { return expectedAddrs, nil }, }, + Fallback: &nullPolicy{}, }, } diff --git a/internal/enginenetx/dnspolicy.go b/internal/enginenetx/dnspolicy.go index 39dc5fb14a..12ff2233eb 100644 --- a/internal/enginenetx/dnspolicy.go +++ b/internal/enginenetx/dnspolicy.go @@ -16,14 +16,16 @@ import ( // given resolver and the domain as the SNI. // // The zero value is invalid; please, init all MANDATORY fields. -// -// This policy uses an Happy-Eyeballs-like algorithm. type dnsPolicy struct { // Logger is the MANDATORY logger. Logger model.Logger // Resolver is the MANDATORY resolver. Resolver model.Resolver + + // Fallback is the MANDATORY fallback policy. Use the [*nullPolicy] if + // you don't want any other policy to runafter the DNS. + Fallback httpsDialerPolicy } var _ httpsDialerPolicy = &dnsPolicy{} @@ -52,7 +54,9 @@ func (p *dnsPolicy) LookupTactics( addrs, err := resoWithShortCircuit.LookupHost(ctx, domain) if err != nil { p.Logger.Warnf("resoWithShortCircuit.LookupHost: %s", err.Error()) - return + // fallthrough because we need to also read from tactics + // from the fallback policy. The returned address list will + // be zero-length when the lookup fails anyway. } // The tactics we generate here have SNI == VerifyHostname == domain @@ -66,6 +70,11 @@ func (p *dnsPolicy) LookupTactics( } out <- tactic } + + // Now forward tactics from the fallback policy + for tactic := range p.Fallback.LookupTactics(ctx, domain, port) { + out <- tactic + } }() return out diff --git a/internal/enginenetx/dnspolicy_test.go b/internal/enginenetx/dnspolicy_test.go index 1c85a85cc8..6a7354faeb 100644 --- a/internal/enginenetx/dnspolicy_test.go +++ b/internal/enginenetx/dnspolicy_test.go @@ -4,8 +4,10 @@ import ( "context" "testing" + "github.com/google/go-cmp/cmp" "github.com/ooni/probe-cli/v3/internal/mocks" "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/netxlite" ) func TestDNSPolicy(t *testing.T) { @@ -19,6 +21,7 @@ func TestDNSPolicy(t *testing.T) { }, }, Resolver: &mocks.Resolver{}, // empty so we crash if we hit the resolver + Fallback: &nullPolicy{}, } ctx, cancel := context.WithCancel(context.Background()) @@ -43,6 +46,7 @@ func TestDNSPolicy(t *testing.T) { policy := &dnsPolicy{ Logger: model.DiscardLogger, Resolver: &mocks.Resolver{}, // empty so we crash if we hit the resolver + Fallback: &nullPolicy{}, } tactics := policy.LookupTactics(context.Background(), "130.192.91.211", "443") @@ -72,4 +76,49 @@ func TestDNSPolicy(t *testing.T) { t.Fatal("expected to see just one tactic") } }) + + t.Run("we fallback if the fallback is implemented", func(t *testing.T) { + // define what tactic we expect to see in output + expectTactic := &httpsDialerTactic{ + Address: "130.192.91.211", + InitialDelay: 0, + Port: "443", + SNI: "shelob.polito.it", + VerifyHostname: "api.ooni.io", + } + + // create a DNS policy where the DNS lookup fails and then add a fallback + // use policy where we return back the expected tactic + policy := &dnsPolicy{ + Logger: model.DiscardLogger, + Resolver: &mocks.Resolver{ + MockLookupHost: func(ctx context.Context, domain string) ([]string, error) { + return nil, netxlite.ErrOODNSNoSuchHost + }, + }, + Fallback: &userPolicy{ + Fallback: &nullPolicy{}, + Root: &userPolicyRoot{ + DomainEndpoints: map[string][]*httpsDialerTactic{ + "api.ooni.io:443": {expectTactic}, + }, + Version: userPolicyVersion, + }, + }, + } + + // lookup for api.ooni.io:443 + input := policy.LookupTactics(context.Background(), "api.ooni.io", "443") + + // collect all the returned tactics + var tactics []*httpsDialerTactic + for tx := range input { + tactics = append(tactics, tx) + } + + // make sure we exactly got the tactic we expected + if diff := cmp.Diff([]*httpsDialerTactic{expectTactic}, tactics); diff != "" { + t.Fatal(diff) + } + }) } diff --git a/internal/enginenetx/httpsdialer_test.go b/internal/enginenetx/httpsdialer_test.go index d5f55af5ad..0477849722 100644 --- a/internal/enginenetx/httpsdialer_test.go +++ b/internal/enginenetx/httpsdialer_test.go @@ -379,6 +379,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) { policy := &dnsPolicy{ Logger: log.Log, Resolver: resolver, + Fallback: &nullPolicy{}, } // create the TLS dialer @@ -512,6 +513,7 @@ func TestHTTPSDialerHostNetworkQA(t *testing.T) { &dnsPolicy{ Logger: log.Log, Resolver: resolver, + Fallback: &nullPolicy{}, }, &nullStatsManager{}, ) diff --git a/internal/enginenetx/network.go b/internal/enginenetx/network.go index e681de1f1e..7a042e570b 100644 --- a/internal/enginenetx/network.go +++ b/internal/enginenetx/network.go @@ -149,13 +149,21 @@ func newHTTPSDialerPolicy( // in case there's a proxy URL, we're going to trust the proxy to do the right thing and // know what it's doing, hence we'll have a very simple DNS policy if proxyURL != nil { - return &dnsPolicy{logger, resolver} + return &dnsPolicy{ + Logger: logger, + Resolver: resolver, + Fallback: &nullPolicy{}, + } } // create a composed fallback TLS dialer policy fallback := &statsPolicy{ - Fallback: &bridgesPolicy{Fallback: &dnsPolicy{logger, resolver}}, - Stats: stats, + Fallback: &bridgesPolicy{Fallback: &dnsPolicy{ + Logger: logger, + Resolver: resolver, + Fallback: &nullPolicy{}, + }}, + Stats: stats, } // make sure we honor a user-provided policy diff --git a/internal/enginenetx/nullpolicy.go b/internal/enginenetx/nullpolicy.go new file mode 100644 index 0000000000..44424991bc --- /dev/null +++ b/internal/enginenetx/nullpolicy.go @@ -0,0 +1,27 @@ +package enginenetx + +// +// A policy that never returns any tactic. +// + +import "context" + +// nullPolicy is a policy that never returns any tactics. +// +// You can use this policy to terminate the policy chain and +// ensure ane existing policy has a "null" fallback. +// +// The zero value is ready to use. +type nullPolicy struct{} + +var _ httpsDialerPolicy = &nullPolicy{} + +// LookupTactics implements httpsDialerPolicy. +// +// This policy returns a closed channel such that it won't +// be possible to read policies from it. +func (n *nullPolicy) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { + output := make(chan *httpsDialerTactic) + close(output) + return output +} diff --git a/internal/enginenetx/nullpolicy_test.go b/internal/enginenetx/nullpolicy_test.go new file mode 100644 index 0000000000..0bb40ec0dd --- /dev/null +++ b/internal/enginenetx/nullpolicy_test.go @@ -0,0 +1,17 @@ +package enginenetx + +import ( + "context" + "testing" +) + +func TestNullPolicy(t *testing.T) { + p := &nullPolicy{} + var count int + for range p.LookupTactics(context.Background(), "api.ooni.io", "443") { + count++ + } + if count != 0 { + t.Fatal("should have not returned any policy") + } +} diff --git a/internal/enginenetx/statspolicy_test.go b/internal/enginenetx/statspolicy_test.go index 6431ea3f3e..04524b3a31 100644 --- a/internal/enginenetx/statspolicy_test.go +++ b/internal/enginenetx/statspolicy_test.go @@ -157,6 +157,7 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { } }, }, + Fallback: &nullPolicy{}, }, Stats: stats, } @@ -220,6 +221,7 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { } }, }, + Fallback: &nullPolicy{}, }, Stats: stats, } diff --git a/internal/enginenetx/userpolicy_test.go b/internal/enginenetx/userpolicy_test.go index 9a7b8b3c95..7639f4d9d0 100644 --- a/internal/enginenetx/userpolicy_test.go +++ b/internal/enginenetx/userpolicy_test.go @@ -33,7 +33,7 @@ func TestUserPolicy(t *testing.T) { expectedPolicy *userPolicy } - fallback := &dnsPolicy{} + fallback := &nullPolicy{} cases := []testcase{{ name: "when there is no key in the kvstore", @@ -242,6 +242,7 @@ func TestUserPolicy(t *testing.T) { return []string{"93.184.216.34"}, nil }, }, + Fallback: &nullPolicy{}, } policy, err := newUserPolicy(kvStore, fallback) @@ -279,6 +280,7 @@ func TestUserPolicy(t *testing.T) { return []string{"93.184.216.34"}, nil }, }, + Fallback: &nullPolicy{}, } policy, err := newUserPolicy(kvStore, fallback) From f7509d95e58d498065fc5689608314270f273a15 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 8 May 2024 14:04:26 +0200 Subject: [PATCH 118/127] feat: implement the mix-with-interleaving policy --- internal/enginenetx/mixpolicy.go | 90 ++++++++++++ internal/enginenetx/mixpolicy_test.go | 189 ++++++++++++++++++++++++++ 2 files changed, 279 insertions(+) create mode 100644 internal/enginenetx/mixpolicy.go create mode 100644 internal/enginenetx/mixpolicy_test.go diff --git a/internal/enginenetx/mixpolicy.go b/internal/enginenetx/mixpolicy.go new file mode 100644 index 0000000000..d42725701e --- /dev/null +++ b/internal/enginenetx/mixpolicy.go @@ -0,0 +1,90 @@ +package enginenetx + +// +// Mix policies - ability of mixing from a primary policy and a fallback policy +// in a more flexible way than strictly falling back +// + +import ( + "context" + + "github.com/ooni/probe-cli/v3/internal/optional" +) + +// mixPolicyInterleave interleaves policies by a given interleaving +// factor. Say the interleave factor is N, then we first read N tactics +// from the primary policy, then N from the fallback one, and we keep +// going on like this until we've read all the tactics from both. +type mixPolicyInterleave struct { + // Primary is the primary policy. We will read N from this + // policy first, then N from fallback, and so on. + Primary httpsDialerPolicy + + // Fallback is the fallback policy. + Fallback httpsDialerPolicy + + // Factor is the interleaving factor to use. + Factor uint8 +} + +var _ httpsDialerPolicy = &mixPolicyInterleave{} + +// LookupTactics implements httpsDialerPolicy. +func (p *mixPolicyInterleave) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { + // create the output channel + output := make(chan *httpsDialerTactic) + + go func() { + // make sure we eventually close the output channel + defer close(output) + + // obtain the primary channel + primary := optional.Some(p.Primary.LookupTactics(ctx, domain, port)) + + // obtain the fallback channel + fallback := optional.Some(p.Fallback.LookupTactics(ctx, domain, port)) + + // loop until both channels are drained + for !primary.IsNone() || !fallback.IsNone() { + // take N from primary if possible + primary = p.maybeTakeN(primary, output) + + // take N from secondary if possible + fallback = p.maybeTakeN(fallback, output) + } + }() + + return output +} + +// maybeTakeN takes N entries from input if it's not none. When input is not +// none and reading from it indicates EOF, this function returns none. Otherwise, +// it returns the same value given as input. +func (p *mixPolicyInterleave) maybeTakeN( + input optional.Value[<-chan *httpsDialerTactic], + output chan<- *httpsDialerTactic, +) optional.Value[<-chan *httpsDialerTactic] { + // make sure we've not already drained this channel + if !input.IsNone() { + + // obtain the underlying channel + ch := input.Unwrap() + + // take N entries from the channel + for idx := uint8(0); idx < p.Factor; idx++ { + + // attempt to get the next tactic + tactic, good := <-ch + + // handle the case where the channel has been drained + if !good { + return optional.None[<-chan *httpsDialerTactic]() + } + + // emit the tactic + output <- tactic + } + } + + return input +} diff --git a/internal/enginenetx/mixpolicy_test.go b/internal/enginenetx/mixpolicy_test.go new file mode 100644 index 0000000000..e186c39490 --- /dev/null +++ b/internal/enginenetx/mixpolicy_test.go @@ -0,0 +1,189 @@ +package enginenetx + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestMixPolicyInterleave(t *testing.T) { + // testcase is a test case implemented by this function. + type testcase struct { + // name is the name of the test case + name string + + // primary is the primary policy to use + primary httpsDialerPolicy + + // fallback is the fallback policy to use + fallback httpsDialerPolicy + + // factor is the interleave factor + factor uint8 + + // domain is the domain to pass to LookupTactics + domain string + + // port is the port to pass to LookupTactics + port string + + // expect is the expectations in terms of tactics + expect []*httpsDialerTactic + } + + // This is the list of tactics that we expect the primary + // policy to return when we're not using a null policy + expectedPrimaryTactics := []*httpsDialerTactic{{ + Address: "130.192.91.211", + InitialDelay: 0, + Port: "443", + SNI: "shelob.polito.it", + VerifyHostname: "api.ooni.io", + }, { + Address: "130.192.91.211", + InitialDelay: 0, + Port: "443", + SNI: "whitespider.polito.it", + VerifyHostname: "api.ooni.io", + }, { + Address: "130.192.91.211", + InitialDelay: 0, + Port: "443", + SNI: "mirkwood.polito.it", + VerifyHostname: "api.ooni.io", + }, { + Address: "130.192.91.211", + InitialDelay: 0, + Port: "443", + SNI: "highgarden.polito.it", + VerifyHostname: "api.ooni.io", + }} + + // Create the non-null primary policy + primary := &userPolicy{ + Fallback: &nullPolicy{}, + Root: &userPolicyRoot{ + DomainEndpoints: map[string][]*httpsDialerTactic{ + "api.ooni.io:443": expectedPrimaryTactics, + }, + Version: userPolicyVersion, + }, + } + + // This is the list of tactics that we expect the fallback + // policy to return when we're not using a null policy + expectedFallbackTactics := []*httpsDialerTactic{{ + Address: "130.192.91.231", + InitialDelay: 0, + Port: "443", + SNI: "kingslanding.polito.it", + VerifyHostname: "api.ooni.io", + }, { + Address: "130.192.91.231", + InitialDelay: 0, + Port: "443", + SNI: "pyke.polito.it", + VerifyHostname: "api.ooni.io", + }, { + Address: "130.192.91.231", + InitialDelay: 0, + Port: "443", + SNI: "winterfell.polito.it", + VerifyHostname: "api.ooni.io", + }} + + // Create the non-null fallback policy + fallback := &userPolicy{ + Fallback: &nullPolicy{}, + Root: &userPolicyRoot{ + DomainEndpoints: map[string][]*httpsDialerTactic{ + "api.ooni.io:443": expectedFallbackTactics, + }, + Version: userPolicyVersion, + }, + } + + cases := []testcase{ + + // This test ensures that the code is WAI with two null policies + { + name: "with two null policies", + primary: &nullPolicy{}, + fallback: &nullPolicy{}, + factor: 2, + domain: "api.ooni.io", + port: "443", + expect: nil, + }, + + // This test ensures that we get the content of the primary + // policy when the fallback policy is the null policy + { + name: "with the fallback policy being null", + primary: primary, + fallback: &nullPolicy{}, + factor: 2, + domain: "api.ooni.io", + port: "443", + expect: expectedPrimaryTactics, + }, + + // This test ensures that we get the content of the fallback + // policy when the primary policy is the null policy + { + name: "with the primary policy being null", + primary: &nullPolicy{}, + fallback: fallback, + factor: 2, + domain: "api.ooni.io", + port: "443", + expect: expectedFallbackTactics, + }, + + // This test ensures that we correctly interleave the tactics + { + name: "with both policies being nonnull", + primary: primary, + fallback: fallback, + factor: 2, + domain: "api.ooni.io", + port: "443", + expect: []*httpsDialerTactic{ + expectedPrimaryTactics[0], + expectedPrimaryTactics[1], + expectedFallbackTactics[0], + expectedFallbackTactics[1], + expectedPrimaryTactics[2], + expectedPrimaryTactics[3], + expectedFallbackTactics[2], + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + + // construct the mixPolicyInterleave instance + p := &mixPolicyInterleave{ + Primary: tc.primary, + Fallback: tc.fallback, + Factor: tc.factor, + } + + // start looking up for tactics + outch := p.LookupTactics(context.Background(), tc.domain, tc.port) + + // collect all the generated tactics + var got []*httpsDialerTactic + for entry := range outch { + got = append(got, entry) + } + + // compare to expectations + if diff := cmp.Diff(tc.expect, got); diff != "" { + t.Fatal(diff) + } + }) + } +} From ab3926e7be5d762d6c172b94d46ff76bbf0c61f2 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 8 May 2024 14:10:27 +0200 Subject: [PATCH 119/127] fix: we actually need to get rid of fallbacks --- internal/enginenetx/bridgespolicy_test.go | 5 --- internal/enginenetx/dnspolicy.go | 13 +----- internal/enginenetx/dnspolicy_test.go | 49 ----------------------- internal/enginenetx/httpsdialer_test.go | 2 - internal/enginenetx/network.go | 2 - internal/enginenetx/statspolicy_test.go | 2 - internal/enginenetx/userpolicy_test.go | 2 - 7 files changed, 1 insertion(+), 74 deletions(-) diff --git a/internal/enginenetx/bridgespolicy_test.go b/internal/enginenetx/bridgespolicy_test.go index 0b80af3dbb..0984a1ee6f 100644 --- a/internal/enginenetx/bridgespolicy_test.go +++ b/internal/enginenetx/bridgespolicy_test.go @@ -20,7 +20,6 @@ func TestBridgesPolicy(t *testing.T) { return nil, expected }, }, - Fallback: &nullPolicy{}, }, } @@ -46,7 +45,6 @@ func TestBridgesPolicy(t *testing.T) { return []string{"93.184.216.34"}, nil }, }, - Fallback: &nullPolicy{}, }, } @@ -92,7 +90,6 @@ func TestBridgesPolicy(t *testing.T) { return nil, expected }, }, - Fallback: &nullPolicy{}, }, } @@ -138,7 +135,6 @@ func TestBridgesPolicy(t *testing.T) { return []string{"130.192.91.211"}, nil }, }, - Fallback: &nullPolicy{}, }, } @@ -218,7 +214,6 @@ func TestBridgesPolicy(t *testing.T) { return expectedAddrs, nil }, }, - Fallback: &nullPolicy{}, }, } diff --git a/internal/enginenetx/dnspolicy.go b/internal/enginenetx/dnspolicy.go index 12ff2233eb..75e43148e8 100644 --- a/internal/enginenetx/dnspolicy.go +++ b/internal/enginenetx/dnspolicy.go @@ -22,10 +22,6 @@ type dnsPolicy struct { // Resolver is the MANDATORY resolver. Resolver model.Resolver - - // Fallback is the MANDATORY fallback policy. Use the [*nullPolicy] if - // you don't want any other policy to runafter the DNS. - Fallback httpsDialerPolicy } var _ httpsDialerPolicy = &dnsPolicy{} @@ -54,9 +50,7 @@ func (p *dnsPolicy) LookupTactics( addrs, err := resoWithShortCircuit.LookupHost(ctx, domain) if err != nil { p.Logger.Warnf("resoWithShortCircuit.LookupHost: %s", err.Error()) - // fallthrough because we need to also read from tactics - // from the fallback policy. The returned address list will - // be zero-length when the lookup fails anyway. + return } // The tactics we generate here have SNI == VerifyHostname == domain @@ -70,11 +64,6 @@ func (p *dnsPolicy) LookupTactics( } out <- tactic } - - // Now forward tactics from the fallback policy - for tactic := range p.Fallback.LookupTactics(ctx, domain, port) { - out <- tactic - } }() return out diff --git a/internal/enginenetx/dnspolicy_test.go b/internal/enginenetx/dnspolicy_test.go index 6a7354faeb..1c85a85cc8 100644 --- a/internal/enginenetx/dnspolicy_test.go +++ b/internal/enginenetx/dnspolicy_test.go @@ -4,10 +4,8 @@ import ( "context" "testing" - "github.com/google/go-cmp/cmp" "github.com/ooni/probe-cli/v3/internal/mocks" "github.com/ooni/probe-cli/v3/internal/model" - "github.com/ooni/probe-cli/v3/internal/netxlite" ) func TestDNSPolicy(t *testing.T) { @@ -21,7 +19,6 @@ func TestDNSPolicy(t *testing.T) { }, }, Resolver: &mocks.Resolver{}, // empty so we crash if we hit the resolver - Fallback: &nullPolicy{}, } ctx, cancel := context.WithCancel(context.Background()) @@ -46,7 +43,6 @@ func TestDNSPolicy(t *testing.T) { policy := &dnsPolicy{ Logger: model.DiscardLogger, Resolver: &mocks.Resolver{}, // empty so we crash if we hit the resolver - Fallback: &nullPolicy{}, } tactics := policy.LookupTactics(context.Background(), "130.192.91.211", "443") @@ -76,49 +72,4 @@ func TestDNSPolicy(t *testing.T) { t.Fatal("expected to see just one tactic") } }) - - t.Run("we fallback if the fallback is implemented", func(t *testing.T) { - // define what tactic we expect to see in output - expectTactic := &httpsDialerTactic{ - Address: "130.192.91.211", - InitialDelay: 0, - Port: "443", - SNI: "shelob.polito.it", - VerifyHostname: "api.ooni.io", - } - - // create a DNS policy where the DNS lookup fails and then add a fallback - // use policy where we return back the expected tactic - policy := &dnsPolicy{ - Logger: model.DiscardLogger, - Resolver: &mocks.Resolver{ - MockLookupHost: func(ctx context.Context, domain string) ([]string, error) { - return nil, netxlite.ErrOODNSNoSuchHost - }, - }, - Fallback: &userPolicy{ - Fallback: &nullPolicy{}, - Root: &userPolicyRoot{ - DomainEndpoints: map[string][]*httpsDialerTactic{ - "api.ooni.io:443": {expectTactic}, - }, - Version: userPolicyVersion, - }, - }, - } - - // lookup for api.ooni.io:443 - input := policy.LookupTactics(context.Background(), "api.ooni.io", "443") - - // collect all the returned tactics - var tactics []*httpsDialerTactic - for tx := range input { - tactics = append(tactics, tx) - } - - // make sure we exactly got the tactic we expected - if diff := cmp.Diff([]*httpsDialerTactic{expectTactic}, tactics); diff != "" { - t.Fatal(diff) - } - }) } diff --git a/internal/enginenetx/httpsdialer_test.go b/internal/enginenetx/httpsdialer_test.go index 0477849722..d5f55af5ad 100644 --- a/internal/enginenetx/httpsdialer_test.go +++ b/internal/enginenetx/httpsdialer_test.go @@ -379,7 +379,6 @@ func TestHTTPSDialerNetemQA(t *testing.T) { policy := &dnsPolicy{ Logger: log.Log, Resolver: resolver, - Fallback: &nullPolicy{}, } // create the TLS dialer @@ -513,7 +512,6 @@ func TestHTTPSDialerHostNetworkQA(t *testing.T) { &dnsPolicy{ Logger: log.Log, Resolver: resolver, - Fallback: &nullPolicy{}, }, &nullStatsManager{}, ) diff --git a/internal/enginenetx/network.go b/internal/enginenetx/network.go index 7a042e570b..197a20ee01 100644 --- a/internal/enginenetx/network.go +++ b/internal/enginenetx/network.go @@ -152,7 +152,6 @@ func newHTTPSDialerPolicy( return &dnsPolicy{ Logger: logger, Resolver: resolver, - Fallback: &nullPolicy{}, } } @@ -161,7 +160,6 @@ func newHTTPSDialerPolicy( Fallback: &bridgesPolicy{Fallback: &dnsPolicy{ Logger: logger, Resolver: resolver, - Fallback: &nullPolicy{}, }}, Stats: stats, } diff --git a/internal/enginenetx/statspolicy_test.go b/internal/enginenetx/statspolicy_test.go index 04524b3a31..6431ea3f3e 100644 --- a/internal/enginenetx/statspolicy_test.go +++ b/internal/enginenetx/statspolicy_test.go @@ -157,7 +157,6 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { } }, }, - Fallback: &nullPolicy{}, }, Stats: stats, } @@ -221,7 +220,6 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { } }, }, - Fallback: &nullPolicy{}, }, Stats: stats, } diff --git a/internal/enginenetx/userpolicy_test.go b/internal/enginenetx/userpolicy_test.go index 7639f4d9d0..9733a0deb4 100644 --- a/internal/enginenetx/userpolicy_test.go +++ b/internal/enginenetx/userpolicy_test.go @@ -242,7 +242,6 @@ func TestUserPolicy(t *testing.T) { return []string{"93.184.216.34"}, nil }, }, - Fallback: &nullPolicy{}, } policy, err := newUserPolicy(kvStore, fallback) @@ -280,7 +279,6 @@ func TestUserPolicy(t *testing.T) { return []string{"93.184.216.34"}, nil }, }, - Fallback: &nullPolicy{}, } policy, err := newUserPolicy(kvStore, fallback) From d4f60b115bae7c0ebea4d6952f81bb7c7c4d5bc7 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 8 May 2024 14:21:20 +0200 Subject: [PATCH 120/127] refactor(bridgespolicy.go): use free functions --- internal/enginenetx/bridgespolicy.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/internal/enginenetx/bridgespolicy.go b/internal/enginenetx/bridgespolicy.go index 7de74790a9..989eaba8bc 100644 --- a/internal/enginenetx/bridgespolicy.go +++ b/internal/enginenetx/bridgespolicy.go @@ -41,7 +41,7 @@ func (p *bridgesPolicy) LookupTactics(ctx context.Context, domain, port string) // Prioritize emitting tactics for bridges. Currently we only have bridges // for "api.ooni.io", therefore, for all other hosts this arm ends up // returning a channel that will be immediately closed. - C: p.bridgesTacticsForDomain(domain, port), + C: bridgesTacticsForDomain(domain, port), // This ensures we read the first two bridge tactics. // @@ -53,7 +53,7 @@ func (p *bridgesPolicy) LookupTactics(ctx context.Context, domain, port string) &mixDeterministicThenRandomConfig{ // Mix the above with using the fallback policy and rewriting the SNIs // used by the test helpers to avoid exposing the real SNIs. - C: p.maybeRewriteTestHelpersTactics(p.Fallback.LookupTactics(ctx, domain, port)), + C: maybeRewriteTestHelpersTactics(p.Fallback.LookupTactics(ctx, domain, port)), // This ensures we read the first two DNS tactics. // @@ -72,7 +72,7 @@ var bridgesPolicyTestHelpersDomains = []string{ "d33d1gs9kpq1c5.cloudfront.net", } -func (p *bridgesPolicy) maybeRewriteTestHelpersTactics(input <-chan *httpsDialerTactic) <-chan *httpsDialerTactic { +func maybeRewriteTestHelpersTactics(input <-chan *httpsDialerTactic) <-chan *httpsDialerTactic { out := make(chan *httpsDialerTactic) go func() { @@ -87,7 +87,7 @@ func (p *bridgesPolicy) maybeRewriteTestHelpersTactics(input <-chan *httpsDialer // This is the case where we're connecting to a test helper. Let's try // to produce policies hiding the SNI to censoring middleboxes. - for _, sni := range p.bridgesDomainsInRandomOrder() { + for _, sni := range bridgesDomainsInRandomOrder() { out <- &httpsDialerTactic{ Address: tactic.Address, InitialDelay: 0, // set when dialing @@ -102,7 +102,7 @@ func (p *bridgesPolicy) maybeRewriteTestHelpersTactics(input <-chan *httpsDialer return out } -func (p *bridgesPolicy) bridgesTacticsForDomain(domain, port string) <-chan *httpsDialerTactic { +func bridgesTacticsForDomain(domain, port string) <-chan *httpsDialerTactic { out := make(chan *httpsDialerTactic) go func() { @@ -113,8 +113,8 @@ func (p *bridgesPolicy) bridgesTacticsForDomain(domain, port string) <-chan *htt return } - for _, ipAddr := range p.bridgesAddrs() { - for _, sni := range p.bridgesDomainsInRandomOrder() { + for _, ipAddr := range bridgesAddrs() { + for _, sni := range bridgesDomainsInRandomOrder() { out <- &httpsDialerTactic{ Address: ipAddr, InitialDelay: 0, // set when dialing @@ -129,8 +129,8 @@ func (p *bridgesPolicy) bridgesTacticsForDomain(domain, port string) <-chan *htt return out } -func (p *bridgesPolicy) bridgesDomainsInRandomOrder() (out []string) { - out = p.bridgesDomains() +func bridgesDomainsInRandomOrder() (out []string) { + out = bridgesDomains() r := rand.New(rand.NewSource(time.Now().UnixNano())) r.Shuffle(len(out), func(i, j int) { out[i], out[j] = out[j], out[i] @@ -138,14 +138,14 @@ func (p *bridgesPolicy) bridgesDomainsInRandomOrder() (out []string) { return } -func (p *bridgesPolicy) bridgesAddrs() (out []string) { +func bridgesAddrs() (out []string) { return append( out, "162.55.247.208", ) } -func (p *bridgesPolicy) bridgesDomains() (out []string) { +func bridgesDomains() (out []string) { // See https://gitlab.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake/-/issues/40273 return append( out, From b0f88bdfdcf1ba399ae99155453dee8f1a7eb56f Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 8 May 2024 14:27:14 +0200 Subject: [PATCH 121/127] feat: introduce fallback-less v2 policies --- internal/enginenetx/bridgespolicy.go | 23 ++++++++ internal/enginenetx/statspolicy.go | 22 ++++++++ internal/enginenetx/userpolicy.go | 78 ++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) diff --git a/internal/enginenetx/bridgespolicy.go b/internal/enginenetx/bridgespolicy.go index 989eaba8bc..790cf6e77b 100644 --- a/internal/enginenetx/bridgespolicy.go +++ b/internal/enginenetx/bridgespolicy.go @@ -64,6 +64,26 @@ func (p *bridgesPolicy) LookupTactics(ctx context.Context, domain, port string) ) } +// bridgesPolicyV2 is a policy where we use bridges for communicating +// with the OONI backend, i.e., api.ooni.io. +// +// A bridge is an IP address that can route traffic from and to +// the OONI backend and accepts any SNI. +// +// The zero value is invalid; please, init MANDATORY fields. +// +// This is v2 of the bridgesPolicy because the previous implementation +// incorporated mixing logic, while now the mixing happens outside +// of this policy, this giving us much more flexibility. +type bridgesPolicyV2 struct{} + +var _ httpsDialerPolicy = &bridgesPolicyV2{} + +// LookupTactics implements httpsDialerPolicy. +func (p *bridgesPolicyV2) LookupTactics(ctx context.Context, domain, port string) <-chan *httpsDialerTactic { + return bridgesTacticsForDomain(domain, port) +} + var bridgesPolicyTestHelpersDomains = []string{ "0.th.ooni.org", "1.th.ooni.org", @@ -72,6 +92,9 @@ var bridgesPolicyTestHelpersDomains = []string{ "d33d1gs9kpq1c5.cloudfront.net", } +// TODO(bassosimone): the rewriting of test helper tactics should happen elsewhere +// once we stop using the bridgesPolicy (i.e., version 1) + func maybeRewriteTestHelpersTactics(input <-chan *httpsDialerTactic) <-chan *httpsDialerTactic { out := make(chan *httpsDialerTactic) diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index 49b13e2ce0..37f2cd14ac 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -56,6 +56,28 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str ))) } +// statsPolicyV2 is a policy that schedules tactics already known +// to work based on the previously collected stats. +// +// The zero value of this struct is invalid; please, make sure +// you fill all the fields marked as MANDATORY. +// +// This is v2 of the statsPolicy because the previous implementation +// incorporated mixing logic, while now the mixing happens outside +// of this policy, this giving us much more flexibility. +type statsPolicyV2 struct { + // Stats is the MANDATORY stats manager. + Stats *statsManager +} + +var _ httpsDialerPolicy = &statsPolicyV2{} + +// LookupTactics implements httpsDialerPolicy. +func (p *statsPolicyV2) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { + // avoid emitting nil tactics and duplicate tactics + return streamTacticsFromSlice(statsPolicyFilterStatsTactics(p.Stats.LookupTactics(domain, port))) +} + func statsPolicyFilterStatsTactics(tactics []*statsTactic, good bool) (out []*httpsDialerTactic) { // when good is false, it means p.Stats.LookupTactics failed if !good { diff --git a/internal/enginenetx/userpolicy.go b/internal/enginenetx/userpolicy.go index 778c1393f2..641fe0f890 100644 --- a/internal/enginenetx/userpolicy.go +++ b/internal/enginenetx/userpolicy.go @@ -123,3 +123,81 @@ func userPolicyRemoveNilEntries(input []*httpsDialerTactic) (output []*httpsDial } return } + +// userPolicyV2 is an [httpsDialerPolicy] incorporating verbatim +// a user policy loaded from the engine's key-value store. +// +// This policy is very useful for exploration and experimentation. +// +// This is v2 of the userPolicy because the previous implementation +// incorporated mixing logic, while now the mixing happens outside +// of this policy, this giving us much more flexibility. +type userPolicyV2 struct { + // Root is the root of the user policy loaded from disk. + Root *userPolicyRoot +} + +// newUserPolicyV2 attempts to constructs a user policy. The typical error case is the one +// in which there's no httpsDialerUserPolicyKey in the key-value store. +func newUserPolicyV2(kvStore model.KeyValueStore) (*userPolicyV2, error) { + // attempt to read the user policy bytes from the kvstore + data, err := kvStore.Get(userPolicyKey) + if err != nil { + return nil, err + } + + // attempt to parse the user policy using human-readable JSON + var root userPolicyRoot + if err := hujsonx.Unmarshal(data, &root); err != nil { + return nil, err + } + + // make sure the version is OK + if root.Version != userPolicyVersion { + err := fmt.Errorf( + "%s: %w: expected=%d got=%d", + userPolicyKey, + errUserPolicyWrongVersion, + userPolicyVersion, + root.Version, + ) + return nil, err + } + + out := &userPolicyV2{Root: &root} + return out, nil +} + +var _ httpsDialerPolicy = &userPolicyV2{} + +// LookupTactics implements httpsDialerPolicy. +func (ldp *userPolicyV2) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { + // create the output channel + out := make(chan *httpsDialerTactic) + + go func() { + // make sure we close the output channel + defer close(out) + + // check whether an entry exists in the user-provided map, which MAY be nil + // if/when the user has chosen their policy to be as such + tactics, found := ldp.Root.DomainEndpoints[net.JoinHostPort(domain, port)] + if !found { + return + } + + // note that we also need to fallback when the tactics contains an empty list + // or a list that only contains nil entries + tactics = userPolicyRemoveNilEntries(tactics) + if len(tactics) <= 0 { + return + } + + // emit all the user-configured tactics + for _, tactic := range tactics { + out <- tactic + } + }() + + return out +} From 35d667fef74be504f507e376b04688c537c217e3 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 8 May 2024 15:17:26 +0200 Subject: [PATCH 122/127] Shove all the changes I have together It's time to run some tests. --- internal/enginenetx/DESIGN.md | 28 +++++++---- internal/enginenetx/bridgespolicy.go | 10 +--- internal/enginenetx/bridgespolicy_test.go | 2 +- internal/enginenetx/httpsdialer.go | 4 ++ internal/enginenetx/mixpolicy.go | 42 ++++++++++++++++ internal/enginenetx/network.go | 41 ++++++++++++---- internal/enginenetx/testhelperspolicy.go | 58 +++++++++++++++++++++++ 7 files changed, 158 insertions(+), 27 deletions(-) create mode 100644 internal/enginenetx/testhelperspolicy.go diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md index a496f8444f..4d8ed19b14 100644 --- a/internal/enginenetx/DESIGN.md +++ b/internal/enginenetx/DESIGN.md @@ -31,7 +31,8 @@ We define "bridge" an IP address with the following properties: 2. the IP address listens on port 443 and accepts _any_ incoming SNI; -3. the webserver on port 443 provides unified access to [OONI API services](https://docs.ooni.org/backend/ooniapi/services/). +3. the webserver on port 443 provides unified access to +[OONI API services](https://docs.ooni.org/backend/ooniapi/services/). We also assume that the Web Connectivity test helpers (TH) could accept any SNIs. @@ -49,15 +50,20 @@ from a pre-defined list of SNIs; 3. remember and use tactics for creating TLS connections that worked previously; -4. for the trivial case, an uncensored API backend, communication to the API should use the simplest way possible. This naturally leads to the fact that it should recover ~quickly if the conditions change (e.g., if a bridge is discontinued); +4. for the trivial case, an uncensored API backend, communication to the API +should use the simplest way possible. This naturally leads to the fact that +it should recover ~quickly if the conditions change (e.g., if a bridge +is discontinued); -5. for users in censored regions it should be possible to use tactics to overcome the restrictions; +5. for users in censored regions it should be possible to use +tactics to overcome the restrictions; -6. try to defer sending the true `SNI` on the wire, therefore trying to -avoid triggering potential residual censorship blocking a given TCP endpoint -for some time regardless of what `SNI` is being used next; +6. when using tactics, try to defer sending the true `SNI` on the wire, +therefore trying to avoid triggering potential residual censorship blocking +a given TCP endpoint for some time regardless of what `SNI` is being used next; -7. allow users to force specific bridges and SNIs by editing `$OONI_HOME/engine/bridges.conf`. +7. allow users to force specific bridges and SNIs by editing +`$OONI_HOME/engine/bridges.conf`. The rest of this document explains how we designed for achieving these goals. @@ -118,6 +124,10 @@ turn, this reduces the overall bootstrap time in light of issues with policies. added remix as part of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552); before, we implemented strict falling back.) +In particular, `statsPolicy` and `bridgePolicy` allow DNS resolved addresses to being +used first, because "for the trivial case, an uncensored API backend, communication to the API +should use the simplest way possible." + Also, when using a proxy, we just use `dnsPolicy` assuming the proxy knows how to do circumvention. ## Dialing Tactics @@ -418,9 +428,9 @@ would waste lots of time failing before falling back. Conversely, a better strategy is to "remix" tactics as implemented by the [mix.go](mix.go) file: -1. we take the first two tactics from the stats; +1. we take the first four tactics from the fallback; -2. then we take the first four tactics from the fallback; +2. then, we take the first two tactics from the stats; 3. then we remix the rest, not caring much about whether we're reading from the stats of from the fallback. diff --git a/internal/enginenetx/bridgespolicy.go b/internal/enginenetx/bridgespolicy.go index 790cf6e77b..e421489acf 100644 --- a/internal/enginenetx/bridgespolicy.go +++ b/internal/enginenetx/bridgespolicy.go @@ -84,14 +84,6 @@ func (p *bridgesPolicyV2) LookupTactics(ctx context.Context, domain, port string return bridgesTacticsForDomain(domain, port) } -var bridgesPolicyTestHelpersDomains = []string{ - "0.th.ooni.org", - "1.th.ooni.org", - "2.th.ooni.org", - "3.th.ooni.org", - "d33d1gs9kpq1c5.cloudfront.net", -} - // TODO(bassosimone): the rewriting of test helper tactics should happen elsewhere // once we stop using the bridgesPolicy (i.e., version 1) @@ -103,7 +95,7 @@ func maybeRewriteTestHelpersTactics(input <-chan *httpsDialerTactic) <-chan *htt for tactic := range input { // When we're not connecting to a TH, pass the policy down the chain unmodified - if !slices.Contains(bridgesPolicyTestHelpersDomains, tactic.VerifyHostname) { + if !slices.Contains(testHelpersDomains, tactic.VerifyHostname) { out <- tactic continue } diff --git a/internal/enginenetx/bridgespolicy_test.go b/internal/enginenetx/bridgespolicy_test.go index 0984a1ee6f..44999cab9e 100644 --- a/internal/enginenetx/bridgespolicy_test.go +++ b/internal/enginenetx/bridgespolicy_test.go @@ -202,7 +202,7 @@ func TestBridgesPolicy(t *testing.T) { }) t.Run("for test helper domains", func(t *testing.T) { - for _, domain := range bridgesPolicyTestHelpersDomains { + for _, domain := range testHelpersDomains { t.Run(domain, func(t *testing.T) { expectedAddrs := []string{"164.92.180.7"} diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index 2f0e28386d..20ea477856 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -202,6 +202,10 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo return nil, err } + // TODO(bassosimone): this code should be refactored using the same + // pattern used by `./internal/httpclientx` to perform attempts faster + // in case there is an initial early failure + // We need a cancellable context to interrupt the tactics emitter early when we // immediately get a valid response and we don't need to use other tactics. ctx, cancel := context.WithCancel(ctx) diff --git a/internal/enginenetx/mixpolicy.go b/internal/enginenetx/mixpolicy.go index d42725701e..685266221d 100644 --- a/internal/enginenetx/mixpolicy.go +++ b/internal/enginenetx/mixpolicy.go @@ -11,6 +11,48 @@ import ( "github.com/ooni/probe-cli/v3/internal/optional" ) +// mixPolicyEitherOr reads from primary and only if primary does +// not return any tactic, then it reads from fallback. +type mixPolicyEitherOr struct { + // Primary is the primary policy. + Primary httpsDialerPolicy + + // Fallback is the fallback policy. + Fallback httpsDialerPolicy +} + +var _ httpsDialerPolicy = &mixPolicyEitherOr{} + +// LookupTactics implements httpsDialerPolicy. +func (m *mixPolicyEitherOr) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { + // create the output channel + output := make(chan *httpsDialerTactic) + + go func() { + // make sure we eventually close the output channel + defer close(output) + + // drain the primary policy + var count int + for tx := range m.Primary.LookupTactics(ctx, domain, port) { + output <- tx + count++ + } + + // if the primary worked, we're good + if count > 0 { + return + } + + // drain the fallback policy + for tx := range m.Fallback.LookupTactics(ctx, domain, port) { + output <- tx + } + }() + + return output +} + // mixPolicyInterleave interleaves policies by a given interleaving // factor. Say the interleave factor is N, then we first read N tactics // from the primary policy, then N from the fallback one, and we keep diff --git a/internal/enginenetx/network.go b/internal/enginenetx/network.go index 197a20ee01..39c1bdad1a 100644 --- a/internal/enginenetx/network.go +++ b/internal/enginenetx/network.go @@ -155,19 +155,44 @@ func newHTTPSDialerPolicy( } } - // create a composed fallback TLS dialer policy - fallback := &statsPolicy{ - Fallback: &bridgesPolicy{Fallback: &dnsPolicy{ + // create a policy interleaving stats policies and bridges policies + statsOrBridges := &mixPolicyInterleave{ + Primary: &statsPolicyV2{ + Stats: stats, + }, + Fallback: &bridgesPolicyV2{}, + Factor: 3, + } + + // wrap the DNS policy with a policy that extends tactics for test + // helpers so that we also try using different SNIs. + dnsExt := &testHelpersPolicy{ + Child: &dnsPolicy{ Logger: logger, Resolver: resolver, - }}, - Stats: stats, + }, + } + + // compose dnsExt and statsOrBridges such that dnsExt has + // priority in the selection of tactics + composed := &mixPolicyInterleave{ + Primary: dnsExt, + Fallback: statsOrBridges, + Factor: 3, } - // make sure we honor a user-provided policy - policy, err := newUserPolicy(kvStore, fallback) + // attempt to load a user-provided dialing policy + primary, err := newUserPolicyV2(kvStore) + + // on error, just use composed if err != nil { - return fallback + return composed + } + + // otherwise, finish creating the dialing policy + policy := &mixPolicyEitherOr{ + Primary: primary, + Fallback: composed, } return policy diff --git a/internal/enginenetx/testhelperspolicy.go b/internal/enginenetx/testhelperspolicy.go new file mode 100644 index 0000000000..ddf6dbee74 --- /dev/null +++ b/internal/enginenetx/testhelperspolicy.go @@ -0,0 +1,58 @@ +package enginenetx + +import ( + "context" + "slices" +) + +var testHelpersDomains = []string{ + "0.th.ooni.org", + "1.th.ooni.org", + "2.th.ooni.org", + "3.th.ooni.org", + "d33d1gs9kpq1c5.cloudfront.net", +} + +// testHelpersPolicy is a policy where we use attempt to +// hide the test helpers domains. +// +// The zero value is invalid; please, init MANDATORY fields. +type testHelpersPolicy struct { + // Child is the MANDATORY child policy. + Child httpsDialerPolicy +} + +var _ httpsDialerPolicy = &testHelpersPolicy{} + +// LookupTactics implements httpsDialerPolicy. +func (p *testHelpersPolicy) LookupTactics(ctx context.Context, domain, port string) <-chan *httpsDialerTactic { + out := make(chan *httpsDialerTactic) + + go func() { + defer close(out) // tell the parent when we're done + + for tactic := range p.Child.LookupTactics(ctx, domain, port) { + // always emit the original tactic first + out <- tactic + + // When we're not connecting to a TH, our job is done + if !slices.Contains(testHelpersDomains, tactic.VerifyHostname) { + continue + } + + // This is the case where we're connecting to a test helper. Let's try + // to produce policies using different SNIs for the domain. + for _, sni := range bridgesDomainsInRandomOrder() { + out <- &httpsDialerTactic{ + Address: tactic.Address, + InitialDelay: 0, // set when dialing + Port: tactic.Port, + SNI: sni, + VerifyHostname: tactic.VerifyHostname, + } + } + } + }() + + return out +} From 2582614fe8c9f1977bb94fe2b0b93e78694068c0 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 9 May 2024 09:47:43 +0200 Subject: [PATCH 123/127] chore: add more debugging info --- internal/enginenetx/httpsdialer.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index 20ea477856..394f43b9e5 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -323,6 +323,9 @@ func (hd *httpsDialer) dialTLS( return nil, err } + // for debugging let the user know which tactic is ready + logger.Infof("tactic '%+v' is ready", tactic) + // tell the observer that we're starting hd.stats.OnStarting(tactic) From 1e818f4088d63e7ee28cd3df7b213d234e1868a5 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 9 May 2024 12:56:27 +0200 Subject: [PATCH 124/127] x --- internal/enginenetx/userpolicy.go | 78 ------------------------------- 1 file changed, 78 deletions(-) diff --git a/internal/enginenetx/userpolicy.go b/internal/enginenetx/userpolicy.go index e4c3f7d73a..b782b8313f 100644 --- a/internal/enginenetx/userpolicy.go +++ b/internal/enginenetx/userpolicy.go @@ -200,81 +200,3 @@ func userPolicyRemoveNilEntries(input []*httpsDialerTactic) (output []*httpsDial } return } - -// userPolicyV2 is an [httpsDialerPolicy] incorporating verbatim -// a user policy loaded from the engine's key-value store. -// -// This policy is very useful for exploration and experimentation. -// -// This is v2 of the userPolicy because the previous implementation -// incorporated mixing logic, while now the mixing happens outside -// of this policy, this giving us much more flexibility. -type userPolicyV2 struct { - // Root is the root of the user policy loaded from disk. - Root *userPolicyRoot -} - -// newUserPolicyV2 attempts to constructs a user policy. The typical error case is the one -// in which there's no httpsDialerUserPolicyKey in the key-value store. -func newUserPolicyV2(kvStore model.KeyValueStore) (*userPolicyV2, error) { - // attempt to read the user policy bytes from the kvstore - data, err := kvStore.Get(userPolicyKey) - if err != nil { - return nil, err - } - - // attempt to parse the user policy using human-readable JSON - var root userPolicyRoot - if err := hujsonx.Unmarshal(data, &root); err != nil { - return nil, err - } - - // make sure the version is OK - if root.Version != userPolicyVersion { - err := fmt.Errorf( - "%s: %w: expected=%d got=%d", - userPolicyKey, - errUserPolicyWrongVersion, - userPolicyVersion, - root.Version, - ) - return nil, err - } - - out := &userPolicyV2{Root: &root} - return out, nil -} - -var _ httpsDialerPolicy = &userPolicyV2{} - -// LookupTactics implements httpsDialerPolicy. -func (ldp *userPolicyV2) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { - // create the output channel - out := make(chan *httpsDialerTactic) - - go func() { - // make sure we close the output channel - defer close(out) - - // check whether an entry exists in the user-provided map, which MAY be nil - // if/when the user has chosen their policy to be as such - tactics, found := ldp.Root.DomainEndpoints[net.JoinHostPort(domain, port)] - if !found { - return - } - - // note that we also need to fallback when the tactics contains an empty list - // or a list that only contains nil entries - tactics = userPolicyRemoveNilEntries(tactics) - if len(tactics) <= 0 { - return - } - - // emit all the user-configured tactics - for _, tactic := range tactics { - out <- tactic - } - }() - - return out -} From 25e2eb8620331476c194d0dc225051544942bcae Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 10 May 2024 13:58:46 +0200 Subject: [PATCH 125/127] x --- internal/enginenetx/network.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/internal/enginenetx/network.go b/internal/enginenetx/network.go index 3534a50896..f3bc819ed8 100644 --- a/internal/enginenetx/network.go +++ b/internal/enginenetx/network.go @@ -151,10 +151,7 @@ func newHTTPSDialerPolicy( // in case there's a proxy URL, we're going to trust the proxy to do the right thing and // know what it's doing, hence we'll have a very simple DNS policy if proxyURL != nil { - return &dnsPolicy{ - Logger: logger, - Resolver: resolver, - } + return &dnsPolicy{logger, resolver} } // create a policy interleaving stats policies and bridges policies From 83ec4da1fa04ab10271d40744f048b86f44bc752 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 10 May 2024 14:53:30 +0200 Subject: [PATCH 126/127] fix(enginenetx): compute zero time when we start dialing This diff addresses a bug observed on the wild where a slow DNS causes several tactics to be ready concurrently. If we want several tactics to be ready concurrently, we should arrange for that, and for now BTW that's not the case. Part of https://github.com/ooni/probe/issues/2704. --- internal/enginenetx/filter.go | 3 ++ internal/enginenetx/httpsdialer.go | 51 +++++++++++++++++++++++------- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/internal/enginenetx/filter.go b/internal/enginenetx/filter.go index 19fb470867..998c1c98d3 100644 --- a/internal/enginenetx/filter.go +++ b/internal/enginenetx/filter.go @@ -60,6 +60,9 @@ func filterAssignInitialDelays(input <-chan *httpsDialerTactic) <-chan *httpsDia index := 0 for tx := range input { + // TODO(bassosimone): what do we do now about the user configured + // initial delays? Should we declare them as deprecated? + // rewrite the delays tx.InitialDelay = happyEyeballsDelay(index) index++ diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index 0375967d88..f59ceb938a 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "net" + "sync" "sync/atomic" "time" @@ -206,24 +207,22 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo // pattern used by `./internal/httpclientx` to perform attempts faster // in case there is an initial early failure. - // TODO(bassosimone): the algorithm to filter and assign initial - // delays is broken because, if the DNS runs for more than one - // second, then several policies will immediately be due. We should - // probably use a better strategy that takes as the zero the time - // when the first dialing policy becomes available. - // We need a cancellable context to interrupt the tactics emitter early when we // immediately get a valid response and we don't need to use other tactics. ctx, cancel := context.WithCancel(ctx) defer cancel() + // Create structure for computing the zero dialing time once during + // the first dial, so that subsequent attempts use happy eyeballs based + // on the moment in which we tried the first dial. + t0 := &httpsDialerWorkerZeroTime{} + // The emitter will emit tactics and then close the channel when done. We spawn 16 workers // that handle tactics in parallel and post results on the collector channel. emitter := httpsDialerFilterTactics(hd.policy.LookupTactics(ctx, hostname, port)) collector := make(chan *httpsDialerErrorOrConn) joiner := make(chan any) const parallelism = 16 - t0 := time.Now() for idx := 0; idx < parallelism; idx++ { go hd.worker(ctx, joiner, emitter, t0, collector) } @@ -257,6 +256,36 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo return httpsDialerReduceResult(connv, errorv) } +// httpsDialerWorkerZeroTime contains the zero time used when dialing. We set this +// zero time when we start the first dialing attempt, such that subsequent attempts +// are correctly spaced starting from such a zero time. +// +// A previous approach was that we were taking the zero time when we started +// getting tactics, but this approach was wrong, because it caused several tactics +// to be ready, when the DNS lookup was slow. +// +// The zero value of this structure is ready to use. +type httpsDialerWorkerZeroTime struct { + // mu provides mutual exclusion. + mu sync.Mutex + + // t is the zero time. + t time.Time +} + +// Get returns the zero dialing time. The first invocation of this method +// saves the zero dialing time and subsquent invocations just return it. +// +// This method is safe to be called concurrently by goroutines. +func (t0 *httpsDialerWorkerZeroTime) Get() time.Time { + defer t0.mu.Unlock() + t0.mu.Lock() + if t0.t.IsZero() { + t0.t = time.Now() + } + return t0.t +} + // httpsDialerFilterTactics filters the tactics to: // // 1. be paranoid and filter out nil tactics if any; @@ -297,7 +326,7 @@ func (hd *httpsDialer) worker( ctx context.Context, joiner chan<- any, reader <-chan *httpsDialerTactic, - t0 time.Time, + t0 *httpsDialerWorkerZeroTime, writer chan<- *httpsDialerErrorOrConn, ) { // let the parent know that we terminated @@ -321,7 +350,7 @@ func (hd *httpsDialer) worker( func (hd *httpsDialer) dialTLS( ctx context.Context, logger model.Logger, - t0 time.Time, + t0 *httpsDialerWorkerZeroTime, tactic *httpsDialerTactic, ) (model.TLSConn, error) { // honor happy-eyeballs delays and wait for the tactic to be ready to run @@ -398,10 +427,10 @@ func (hd *httpsDialer) dialTLS( // return the context error if the context expires. func httpsDialerTacticWaitReady( ctx context.Context, - t0 time.Time, + t0 *httpsDialerWorkerZeroTime, tactic *httpsDialerTactic, ) error { - deadline := t0.Add(tactic.InitialDelay) + deadline := t0.Get().Add(tactic.InitialDelay) delta := time.Until(deadline) if delta <= 0 { return nil From 8a5d0b7bf8f1e1090589527b2409fe9638350fa1 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 10 May 2024 15:06:09 +0200 Subject: [PATCH 127/127] x --- internal/enginenetx/DESIGN.md | 823 ---------------------------------- 1 file changed, 823 deletions(-) delete mode 100644 internal/enginenetx/DESIGN.md diff --git a/internal/enginenetx/DESIGN.md b/internal/enginenetx/DESIGN.md deleted file mode 100644 index 4d8ed19b14..0000000000 --- a/internal/enginenetx/DESIGN.md +++ /dev/null @@ -1,823 +0,0 @@ -# Engine Network Extensions - -This file documents the [./internal/enginenetx](.) package design. The content is current -as of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552). - -## Table of Contents - -- [Goals & Assumptions](#goals--assumptions) -- [High-Level API](#high-level-api) -- [Creating TLS Connections](#creating-tls-connections) -- [Dialing Tactics](#dialing-tactics) -- [Dialing Algorithm](#dialing-algorithm) -- [Dialing Policies](#dialing-policies) - - [dnsPolicy](#dnspolicy) - - [userPolicy](#userpolicy) - - [statsPolicy](#statspolicy) - - [bridgePolicy](#bridgepolicy) -- [Overall Algorithm](#overall-algorithm) -- [Managing Stats](#managing-stats) -- [Real-World Scenarios](#real-world-scenarios) - - [Invalid bridge without cached data](#invalid-bridge-without-cached-data) - - [Invalid bridge with cached data](#invalid-bridge-with-cached-data) - - [Valid bridge with invalid cached data](#valid-bridge-with-invalid-cached-data) -- [Limitations and Future Work](#limitations-and-future-work) - -## Goals & Assumptions - -We define "bridge" an IP address with the following properties: - -1. the IP address is not expected to change frequently; - -2. the IP address listens on port 443 and accepts _any_ incoming SNI; - -3. the webserver on port 443 provides unified access to -[OONI API services](https://docs.ooni.org/backend/ooniapi/services/). - -We also assume that the Web Connectivity test helpers (TH) could accept any SNIs. - -We also define "tactic" a tactic to perform a TLS handshake either with a -bridge or with a TH. We also define "policy" the collection of algorithms for -producing tactics for performing TLS handshakes. - -Considering all of this, this package aims to: - -1. overcome DNS-based censorship for "api.ooni.io" by hardcoding known-good -bridges IP addresses inside the codebase; - -2. overcome SNI-based censorship for "api.ooni.io" and test helpers by choosing -from a pre-defined list of SNIs; - -3. remember and use tactics for creating TLS connections that worked previously; - -4. for the trivial case, an uncensored API backend, communication to the API -should use the simplest way possible. This naturally leads to the fact that -it should recover ~quickly if the conditions change (e.g., if a bridge -is discontinued); - -5. for users in censored regions it should be possible to use -tactics to overcome the restrictions; - -6. when using tactics, try to defer sending the true `SNI` on the wire, -therefore trying to avoid triggering potential residual censorship blocking -a given TCP endpoint for some time regardless of what `SNI` is being used next; - -7. allow users to force specific bridges and SNIs by editing -`$OONI_HOME/engine/bridges.conf`. - -The rest of this document explains how we designed for achieving these goals. - -## High-Level API - -The purpose of the `enginenetx` package is to provide a `*Network` object from which consumers -can obtain a `model.HTTPTransport` and `*http.Client` for HTTP operations: - -```Go -func (n *Network) HTTPTransport() model.HTTPTransport -func (n *Network) NewHTTPClient() *http.Client -``` - -**Listing 1.** `*enginenetx.Network` HTTP APIs. - -The `HTTPTransport` method returns a `*Network` field containing an HTTP transport with -custom TLS connection establishment tactics depending on the configured policies. - -The `NewHTTPClient` method wraps such a transport into an `*http.Client`. - -## Creating TLS Connections - -In [network.go](network.go), `newHTTPSDialerPolicy` configures the dialing policy -depending on the arguments passed to `NewNetwork`: - -1. if the `proxyURL` argument is not `nil`, we use the `dnsPolicy` alone; - -2. othwerwise, we compose policies as illustrated by the following diagram: - -``` -+------------+ +-------------+ +--------------+ +-----------+ -| userPolicy | --> | statsPolicy | --> | bridgePolicy | --> | dnsPolicy | -+------------+ +-------------+ +--------------+ +-----------+ -``` - -**Diagram 1.** Sequence of policies constructed when not using a proxy. - -Policies are described in detail in subsequent sections. On a high-level, here's what each does: - -1. `userPolicy`: honours the `bridges.conf` configuration file and, if no entry is found -inside it, then it falls back to the subsequent policy; - -2. `statsPolicy`: uses statistics collected from previous runs to select tactics that -worked recently for specific dialing targets, otherwise it falls back to the subsequent policy; - -3. `bridgePolicy`: adopts a bridge strategy for `api.ooni.io` (i.e., uses known-in-advance -IP addresses), and otherwise falls back to the subsequent policy, still taking care of -hiding the THs SNIs; - -4. `dnsPolicy`: uses the `*engine.Session` DNS resolver to lookup domain names -and produces trivial tactics equivalent to connecting normally using the Go standard library. - -While the previous description says "falls back to," the actual semantics of falling -back is more complex than just falling back. For `statsPolicy` and `bridgePolicy`, -we remix the current policy strategy and subsequent policies strategies to strike a -balance between what a policy suggests and what subsequent policies would suggest. In -turn, this reduces the overall bootstrap time in light of issues with policies. (We -added remix as part of [probe-cli#1552](https://github.com/ooni/probe-cli/pull/1552); before, -we implemented strict falling back.) - -In particular, `statsPolicy` and `bridgePolicy` allow DNS resolved addresses to being -used first, because "for the trivial case, an uncensored API backend, communication to the API -should use the simplest way possible." - -Also, when using a proxy, we just use `dnsPolicy` assuming the proxy knows how to do circumvention. - -## Dialing Tactics - -Each policy implements the following interface (defined in [httpsdialer.go](httpsdialer.go)): - -```Go -type httpsDialerPolicy interface { - LookupTactics(ctx context.Context, domain, port string) <-chan *httpsDialerTactic -} -``` - -**Listing 2.** Interface implemented by policies. - -The `LookupTactics` operation is _conceptually_ similar to -[net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost), because -both operations map a domain name to IP addresses to connect to. However, -there are also some key differences, namely: - -1. `LookupTactics` is domain _and_ port specific, while `LookupHost` -only takes in input the domain name to resolve; - -2. `LookupTactics` returns _a stream_ of viable "tactics", while `LookupHost` -returns a list of IP addresses (we define "stream" a channel where a background -goroutine posts content and which is closed when done). - -The second point, in particular, is crucial. The design of `LookupTactics` is -such that we can start attempting to dial as soon as we have some tactics -to try. A composed `httpsDialerPolicy` can, in fact, start multiple child `LookupTactics` -operations and then return tactics to the caller as soon as some are ready, without -blocking dialing until _all_ the child operations are complete. - -Also, as you may have guessed, the `dnsPolicy` is a policy that, under the hood, -eventually calls [net.Resolver.LookupHost](https://pkg.go.dev/net#Resolver.LookupHost) -to get IP addresses using the DNS used by the `*engine.Session` type. (Typically, such a -resolver, in turn, composes several DNS-over-HTTPS resolvers with the fallback -`getaddrinfo` resolver, and remembers which resolvers work.) - -A "tactic" looks like this: - -```Go -type httpsDialerTactic struct { - Address string - InitialDelay time.Duration - Port string - SNI string - VerifyHostname string -} -``` - -**Listing 3.** Structure describing a tactic. - -Here's an explanation of why we have each field in the struct: - -- `Address` and `Port` qualify the TCP endpoint; - -- `InitialDelay` allows a policy to delay a connect operation to implement -something similar to [happy eyeballs](https://en.wikipedia.org/wiki/Happy_Eyeballs), -where dialing attempts run in parallel and are staggered in time (the classical -example being: dialing for IPv6 and then attempting dialing for IPv4 after 0.3s); - -- `SNI` is the `SNI` to send as part of the TLS ClientHello; - -- `VerifyHostname` is the hostname to use for TLS certificate verification. - -The separation of `SNI` and `VerifyHostname` is what allows us to send an innocuous -SNI over the network and then verify the certificate using the real SNI after a -`skipVerify=true` TLS handshake has completed. (Obviously, for this trick to work, -the HTTPS server we're using must be okay with receiving unrelated SNIs.) - -## Dialing Algorithm - -Creating TLS connections is implemented by `(*httpsDialer).DialTLSContext`, also -part of [httpsdialer.go](httpsdialer.go). - -This method _morally_ does the following in ~parallel: - -```mermaid -stateDiagram-v2 - tacticsGenerator --> skipDuplicate - skipDuplicate --> computeHappyEyeballsDelay - computeHappyEyeballsDelay --> tcpConnect - tcpConnect --> tlsHandshake - tlsHandshake --> verifyCertificate -``` - -**Diagram 2.** Sequence of operations when dialing TLS connections. - -Such a diagram roughly corresponds to this Go ~pseudo-code: - -```Go -func (hd *httpsDialer) DialTLSContext( - ctx context.Context, network string, endpoint string) (net.Conn, error) { - // map to ensure we don't have duplicate tactics - uniq := make(map[string]int) - - // time when we started dialing - t0 := time.Now() - - // index of each dialing attempt - idx := 0 - - // [...] omitting code to get hostname and port from endpoint [...] - - // fetch tactics asynchronously - for tx := range hd.policy.LookupTactics(ctx, hostname, port) { - - // avoid using the same tactic more than once - summary := tx.tacticSummaryKey() - if uniq[summary] > 0 { - continue - } - uniq[summary]++ - - // compute the happy eyeballs deadline - deadline := t0.Add(happyEyeballsDelay(idx)) - idx++ - - // dial in a background goroutine so this code runs in parallel - go func(tx *httpsDialerTactic, deadline time.Duration) { - // wait for deadline - if delta := time.Until(deadline); delta > 0 { - time.Sleep(delta) - } - - // dial TCP - conn, err := tcpConnect(tx.Address, tx.Port) - - // [...] omitting error handling and passing error to DialTLSContext [...] - - // handshake - tconn, err := tlsHandshake(conn, tx.SNI, false /* skip verification */) - - // [...] omitting error handling and passing error to DialTLSContext [...] - - // make sure the hostname's OK - err := verifyHostname(tconn, tx.VerifyHostname) - - // [...] omitting error handling and passing error or conn to DialTLSContext [...] - - }(tx, deadline) - } - - // [...] omitting code to decide whether to return a conn or an error [...] -} -``` - -**Listing 4.** Algorithm implementing dialing TLS connections. - -This simplified algorithm differs for the real implementation in that we -have omitted the following (boring) details: - -1. code to obtain `hostname` and `port` from `endpoint` (e.g., code to extract -`"x.org"` and `"443"` from `"x.org:443"`); - -2. code to pass back a connection or an error from a background -goroutine to the `DialTLSContext` method; - -3. code to decide whether to return a `net.Conn` or an `error`; - -4. the fact that `DialTLSContext` uses a goroutine pool rather than creating a -goroutine for each tactic; - -5. the fact that, as soon as we successfully have a connection, we -immediately cancel any other parallel attempts. - -The `happyEyeballsDelay` function (in [happyeyeballs.go](happyeyeballs.go)) is -such that we generate the following delays: - -| idx | delay (s) | -| --- | --------- | -| 1 | 0 | -| 2 | 1 | -| 4 | 2 | -| 4 | 4 | -| 5 | 8 | -| 6 | 16 | -| 7 | 24 | -| 8 | 32 | -| ... | ... | - -**Table 1.** Happy-eyeballs-like delays. - -That is, we exponentially increase the delay until `8s`, then we linearly increase by `8s`. We -aim to space attempts to accommodate for slow access networks -and/or access network experiencing temporary failures to deliver packets. However, -we also aim to have dialing parallelism, to reduce the overall time to connect -when we're experiencing many timeouts when attempting to dial. - -(We chose 1s as the baseline delay because that would be ~three happy-eyeballs delays as -implemented by the Go standard library, and overall a TCP connect followed by a TLS -handshake should roughly amount to three round trips.) - -Additionally, the `*httpsDialer` algorithm keeps statistics -using an `httpsDialerEventsHandler` type: - -```Go -type httpsDialerEventsHandler interface { - OnStarting(tactic *httpsDialerTactic) - OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) - OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) - OnTLSVerifyError(tactic *httpsDialerTactic, err error) - OnSuccess(tactic *httpsDialerTactic) -} -``` - -**Listing 5.** Interface for collecting statistics. - -These statistics contribute to construct knowledge about the network -conditions and influence the generation of tactics. - -## Dialing Policies - -### dnsPolicy - -The `dnsPolicy` is implemented by [dnspolicy.go](dnspolicy.go). - -Its `LookupTactics` algorithm is quite simple: - -1. we short circuit the cases in which the `domain` argument -contains an IP address to "resolve" exactly that IP address (thus emulating -what `getaddrinfo` would do when asked to "resolve" an IP address); - -2. for each resolved address, we generate tactics where the `SNI` and -`VerifyHostname` equal the `domain`. - -If `httpsDialer` uses this policy as its only policy, the operation it -performs are morally equivalent to normally dialing for TLS. - -### userPolicy - -The `userPolicy` is implemented by [userpolicy.go](userpolicy.go). - -When constructing a `userPolicy` with `newUserPolicy` we indicate a fallback -`httpsDialerPolicy` to use as the fallback, when either `$OONI_HOME/engine/bridges.conf` -does not exist or it does not contain actionable dialing rules. - -As of 2024-04-16, the structure of `bridges.conf` is like in the following example: - -```JavaScript -{ - "DomainEndpoints": { - "api.ooni.io:443": [{ - "Address": "162.55.247.208", - "Port": "443", - "SNI": "www.example.com", - "VerifyHostname": "api.ooni.io" - }, { - /* omitted */ - }] - }, - "Version": 3 -} -``` - -**Listing 6.** Sample `bridges.conf` content. - -This example instructs to use the given tactic(s) when establishing a TLS connection to -`"api.ooni.io:443"`. Any other destination hostname and port would instead use the -configured "fallback" dialing policy. - -The `newUserPolicy` constructor reads this file from disk on startup -and keeps its content in memory. - -`LookupTactics` will: - -1. check whether there's an entry for the given `domain` and `port` -inside the `DomainEndpoints` map; - -2. if there are no entries, fallback to the fallback `httpsDialerPolicy`; - -3. otherwise return all the tactic entries. - -Because `userPolicy` is user-configured, we _entirely bypass_ the -fallback policy when there's an user-configured entry. - -### statsPolicy - -The `statsPolicy` is implemented by [statspolicy.go](statspolicy.go). - -The general idea of this policy is that it depends on: - -1. a `*statsManager` that keeps persistent stats about tactics; - -2. a "fallback" policy. - -In principle, one would expect `LookupTactics` to first return all -the tactics we can see from the stats and then try tactics obtained -from the fallback policy. However, this simplified algorithm would -lead to suboptimal results in the following case: - -1. say there are 10 tactics for "api.ooni.io:443" that are bound -to a specific bridge address that has been discontinued; - -2. if we try all these 10 tactics before trying fallback tactics, we -would waste lots of time failing before falling back. - -Conversely, a better strategy is to "remix" tactics as implemented -by the [mix.go](mix.go) file: - -1. we take the first four tactics from the fallback; - -2. then, we take the first two tactics from the stats; - -3. then we remix the rest, not caring much about whether we're -reading from the stats of from the fallback. - -Because we sort tactics from the stats by our understanding of whether -they are working as intended, we'll prioritize what we know to be working, -but then we'll also throw some new tactics into the mix. (We read four -tactics from the fallback because that allows us to include two bridge tactics -and two DNS tactics, as explained below when we discuss the -`bridgePolicy` policy.) - -### bridgePolicy - -The `bridgePolicy` is implemented by [bridgespolicy.go](bridgespolicy.go) and -rests on the assumptions made explicit above. That is: - -1. that there is at least one _bridge_ for "api.ooni.io"; - -2. that the Web Connectivity Test Helpers accepts any SNI. - -Here we're also using the [mix.go](mix.go) algorithm to remix -two different sources of tactics: - -1. the `bridgesTacticsForDomain` only returns tactics for "api.ooni.io" -using existing knowledge of bridges and random SNIs; - -2. the `maybeRewriteTestHelpersTactics` method filters the results -coming from the fallback tactic such that, if we are connecting -to a known test-helper domain name, we're trying to hide its SNI. - -The first two returned tactics will be bridges tactics for "api.ooni.io", -if applicable, followed by two tactics generated using the DNS, -followed by a random remix of all the remaining tactics. This choice of -returning two and two tactics first, is the -reason why in `statsPolicy` we return the first four tactics from -the fallback after getting two tactics from the stats. - -## Overall Algorithm - -The composed policy is as described in Diagram 1. - -Therefore, the compose policy will return the following tactics: - - -1. if there is a `$OONI_HOME/engine/bridges.conf` with a valid entry, -use it without trying more tactics; otherwise, - -2. use the first two tactics coming from stats, if any; - -3. then use the first two tactics coming from bridges, if any; - -4. then use the first two tactics coming from the DNS, if successful; - -5. finally, randomly remix the remaining tactics. - -Excluding the case where we have a valid entry in `bridges.conf`, the following -diagram illustrates how we're mixing tactics: - -```mermaid -stateDiagram-v2 - state statsTacticsChan <> - statsTactics --> statsTacticsChan - - state bridgesTacticsChan <> - bridgesTactics --> bridgesTacticsChan - - state dnsTacticsChan <> - dnsTactics --> dnsTacticsChan - - state "mix(2, 2)" as mix22 - bridgesTacticsChan --> mix22 - dnsTacticsChan --> mix22 - - state mix22Chan <> - mix22 --> mix22Chan - - state "mix(2, 4)" as mix24 - statsTacticsChan --> mix24 - mix22Chan --> mix24 - - state tacticsChan <> - mix24 --> tacticsChan - tacticsChan --> DialTLSContext -``` - -**Diagram 3.** Tactics generation priorities when not using a proxy. - -Here `mix(X, Y)` means taking `X` from the left block, if possible, then `Y` from the -right block, if possible, and then mixing the remainder in random order. Also, the "join" -blocks in the diagram represent Go channels. - -Having discussed this, it only remains to discuss managing stats. - -## Managing Stats - -The [statsmanager.go](statsmanager.go) file implements the `*statsManager`. - -We initialize the `*statsManager` by calling `newStatsManager` with a stats-trim -interval of 30 seconds in `NewNetwork` in [network.go](network.go). - -The `*statsManager` keeps stats at `$OONI_HOME/engine/httpsdialerstats.state`. - -In `newStatsManager`, we attempt to read this file using `loadStatsContainer` and, if -not present, we fall back to create empty stats with `newStatsContainer`. - -While creating the `*statsManager` we also spawn a goroutine that trims the stats -at every stats-trimming interval by calling `(*statsManager).trim`. In turn, `trim` -calls `statsContainerPruneEntries`, which eventually: - -1. removes entries not modified for more than one week; - -2. sorts entries and only keeps the top 10 entries. - -More specifically we sort entries using this algorithm: - -1. by decreasing success rate; then - -2. by decreasing number of successes; then - -3. by decreasing last update time. - -Likewise, calling `(*statsManager).Close` invokes `statsContainerPruneEntries`, and -then ensures that we write `$OONI_HOME/engine/httpsdialerstats.state`. - -This way, subsequent OONI Probe runs could load the stats that are more likely -to work and `statsPolicy` can take advantage of this information. - -The overall structure of `httpsdialerstats.state` is roughly the following: - -```JavaScript -{ - "DomainEndpoints": { - "api.ooni.io:443": { - "Tactics": { - "162.55.247.208:443 sni=api.trademe.co.nz verify=api.ooni.io": { - "CountStarted": 58, - "CountTCPConnectError": 0, - "CountTCPConnectInterrupt": 0, - "CountTCPConnectSuccess": 58, - "CountTLSHandshakeError": 0, - "CountTLSHandshakeInterrupt": 0, - "CountTLSVerificationError": 0, - "CountSuccess": 58, - "HistoTCPConnectError": {}, - "HistoTLSHandshakeError": {}, - "HistoTLSVerificationError": {}, - "LastUpdated": "2024-04-15T10:38:53.575561+02:00", - "Tactic": { - "Address": "162.55.247.208", - "InitialDelay": 0, - "Port": "443", - "SNI": "api.trademe.co.nz", - "VerifyHostname": "api.ooni.io" - } - }, - /* ... */ - } - } - } - "Version": 5 -} -``` - -**Listing 7.** Content of the stats state as cached on disk. - -That is, the `DomainEndpoints` map contains contains an entry for each -TLS endpoint and, in turn, such an entry contains tactics indexed by -a summary string to speed up looking them up. - -For each tactic, we keep counters and histograms, the time when the -entry had been updated last, and the tactic itself. - -The `*statsManager` implements `httpsDialerEventsHandler`, which means -that it has callbacks invoked by the `*httpsDialer` for interesting -events regarding dialing (e.g., whether TCP connect failed). - -These callbacks basically create or update stats by locking a mutex -and updating the relevant counters and histograms. - -## Real-World Scenarios - -This section illustrates the behavior of this package under specific -network failure conditions, with specific emphasis on what happens if -the bridge IP address becomes, for any reason, unavailable. (We are -doing this because all this work was prompeted by addressing the -[ooni/probe#2704](https://github.com/ooni/probe/issues/2704) issue.) - -### Invalid bridge without cached data - -In this first scenario, we're showing what happens if the bridge IP address -becomes unavailable without any previous state saved on disk. (To emulate -this scenario, change the bridge IP address in [bridgespolicy.go](bridgespolicy.go) -to become `10.0.0.1`, recompile, and wipe `httpsdialerstats.state`). - -Here's an excerpt from the logs: - -``` -[ 0.001346] httpsDialer: [#1] TCPConnect 10.0.0.1:443... started -[ 0.002101] sessionresolver: lookup api.ooni.io using https://wikimedia-dns.org/dns-query... started -[ 0.264132] sessionresolver: lookup api.ooni.io using https://wikimedia-dns.org/dns-query... ok -[ 0.501774] httpsDialer: [#1] TCPConnect 10.0.0.1:443... in progress -[ 1.002330] httpsDialer: [#2] TCPConnect 10.0.0.1:443... started -[ 1.503687] httpsDialer: [#2] TCPConnect 10.0.0.1:443... in progress -[ 2.001488] httpsDialer: [#4] TCPConnect 162.55.247.208:443... started -[ 2.046917] httpsDialer: [#4] TCPConnect 162.55.247.208:443... ok -[ 2.047016] httpsDialer: [#4] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... started -[ 2.093148] httpsDialer: [#4] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... ok -[ 2.093181] httpsDialer: [#4] TLSVerifyCertificateChain api.ooni.io... started -[ 2.095923] httpsDialer: [#4] TLSVerifyCertificateChain api.ooni.io... ok -[ 2.096054] httpsDialer: [#1] TCPConnect 10.0.0.1:443... interrupted -[ 2.096077] httpsDialer: [#2] TCPConnect 10.0.0.1:443... interrupted -``` - -**Listing 8.** Run with no previous cached state and unreachable hardcoded bridge address. - -After 2s, we start dialing with the IP addresses obtained through the DNS. - -Subsequent runs will cache this information on disk and use it. - -### Invalid bridge with cached data - -This scenario is like the previous one, however we also assume that we have -a cached `httpsdialerstats.state` containing now-invalid lines. To this -end, we replace the original file with this content: - -```JSON -{ - "DomainEndpoints": { - "api.ooni.io:443": { - "Tactics": { - "10.0.0.1:443 sni=static-tracking.klaviyo.com verify=api.ooni.io": { - "CountStarted": 1, - "CountTCPConnectError": 0, - "CountTCPConnectInterrupt": 0, - "CountTLSHandshakeError": 0, - "CountTLSHandshakeInterrupt": 0, - "CountTLSVerificationError": 0, - "CountSuccess": 1, - "HistoTCPConnectError": {}, - "HistoTLSHandshakeError": {}, - "HistoTLSVerificationError": {}, - "LastUpdated": "2024-04-16T16:04:34.398778+02:00", - "Tactic": { - "Address": "10.0.0.1", - "InitialDelay": 0, - "Port": "443", - "SNI": "static-tracking.klaviyo.com", - "VerifyHostname": "api.ooni.io" - } - }, - "10.0.0.1:443 sni=vidstat.taboola.com verify=api.ooni.io": { - "CountStarted": 1, - "CountTCPConnectError": 0, - "CountTCPConnectInterrupt": 0, - "CountTLSHandshakeError": 0, - "CountTLSHandshakeInterrupt": 0, - "CountTLSVerificationError": 0, - "CountSuccess": 1, - "HistoTCPConnectError": {}, - "HistoTLSHandshakeError": {}, - "HistoTLSVerificationError": {}, - "LastUpdated": "2024-04-16T16:04:34.398795+02:00", - "Tactic": { - "Address": "10.0.0.1", - "InitialDelay": 1000000000, - "Port": "443", - "SNI": "vidstat.taboola.com", - "VerifyHostname": "api.ooni.io" - } - }, - "10.0.0.1:443 sni=www.example.com verify=api.ooni.io": { - "CountStarted": 1, - "CountTCPConnectError": 0, - "CountTCPConnectInterrupt": 0, - "CountTLSHandshakeError": 0, - "CountTLSHandshakeInterrupt": 0, - "CountTLSVerificationError": 0, - "CountSuccess": 1, - "HistoTCPConnectError": {}, - "HistoTLSHandshakeError": {}, - "HistoTLSVerificationError": {}, - "LastUpdated": "2024-04-16T16:04:34.398641+02:00", - "Tactic": { - "Address": "10.0.0.1", - "InitialDelay": 2000000000, - "Port": "443", - "SNI": "www.example.com", - "VerifyHostname": "api.ooni.io" - } - } - } - } - }, - "Version": 5 -} -``` - -**Listing 9.** Cached state for run with invalid cached state and invalid bridge address. - -Here's an excerpt from the logs: - -``` -[ 0.004017] sessionresolver: lookup api.ooni.io using https://wikimedia-dns.org/dns-query... started -[ 0.003854] httpsDialer: [#2] TCPConnect 10.0.0.1:443... started -[ 0.108089] sessionresolver: lookup api.ooni.io using https://wikimedia-dns.org/dns-query... ok -[ 0.505472] httpsDialer: [#2] TCPConnect 10.0.0.1:443... in progress -[ 1.004614] httpsDialer: [#1] TCPConnect 10.0.0.1:443... started -[ 1.506069] httpsDialer: [#1] TCPConnect 10.0.0.1:443... in progress -[ 2.003650] httpsDialer: [#3] TCPConnect 10.0.0.1:443... started -[ 2.505130] httpsDialer: [#3] TCPConnect 10.0.0.1:443... in progress -[ 4.004683] httpsDialer: [#4] TCPConnect 10.0.0.1:443... started -[ 4.506176] httpsDialer: [#4] TCPConnect 10.0.0.1:443... in progress -[ 8.004547] httpsDialer: [#5] TCPConnect 162.55.247.208:443... started -[ 8.042946] httpsDialer: [#5] TCPConnect 162.55.247.208:443... ok -[ 8.043015] httpsDialer: [#5] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... started -[ 8.088383] httpsDialer: [#5] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... ok -[ 8.088417] httpsDialer: [#5] TLSVerifyCertificateChain api.ooni.io... started -[ 8.091007] httpsDialer: [#5] TLSVerifyCertificateChain api.ooni.io... ok -[ 8.091174] httpsDialer: [#1] TCPConnect 10.0.0.1:443... interrupted -[ 8.091234] httpsDialer: [#3] TCPConnect 10.0.0.1:443... interrupted -[ 8.091258] httpsDialer: [#2] TCPConnect 10.0.0.1:443... interrupted -[ 8.091324] httpsDialer: [#4] TCPConnect 10.0.0.1:443... interrupted -``` - -**Listing 10.** Run with invalid cached state and invalid bridge address. - -So, here the fifth attempt is using the DNS. This is in line with the mixing algorithm, where -the first four attempt come from the stats or from the bridge policies. - -Let's also shows what happens if we repeat the bootstrap: - -``` -[ 0.000938] httpsDialer: [#2] TCPConnect 162.55.247.208:443... started -[ 0.001014] sessionresolver: lookup api.ooni.io using https://mozilla.cloudflare-dns.com/dns-query... started -[ 0.053325] httpsDialer: [#2] TCPConnect 162.55.247.208:443... ok -[ 0.053355] httpsDialer: [#2] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... started -[ 0.080695] sessionresolver: lookup api.ooni.io using https://mozilla.cloudflare-dns.com/dns-query... ok -[ 0.094648] httpsDialer: [#2] TLSHandshake with 162.55.247.208:443 SNI=api.ooni.io ALPN=[h2 http/1.1]... ok -[ 0.094662] httpsDialer: [#2] TLSVerifyCertificateChain api.ooni.io... started -[ 0.096677] httpsDialer: [#2] TLSVerifyCertificateChain api.ooni.io... ok -``` - -**Listing 11.** Re-run with invalid cached state and bridge address. - -You see that now we immediately use the correct address thanks to the stats. - -### Valid bridge with invalid cached data - -In this scenario, the bridge inside [bridgespolicy.go](bridgespolicy.go) is valid -but we have a cache listing an invalid bridge (I modified my cache to use `10.0.0.1`). - -Here's an excerpt from the logs: - -``` -[ 0.002641] sessionresolver: lookup api.ooni.io using https://mozilla.cloudflare-dns.com/dns-query... started -[ 0.081401] sessionresolver: lookup api.ooni.io using https://mozilla.cloudflare-dns.com/dns-query... ok -[ 0.503518] httpsDialer: [#1] TCPConnect 10.0.0.1:443... in progress -[ 1.005322] httpsDialer: [#2] TCPConnect 10.0.0.1:443... started -[ 1.506304] httpsDialer: [#2] TCPConnect 10.0.0.1:443... in progress -[ 2.002837] httpsDialer: [#4] TCPConnect 162.55.247.208:443... started -[ 2.048721] httpsDialer: [#4] TCPConnect 162.55.247.208:443... ok -[ 2.048760] httpsDialer: [#4] TLSHandshake with 162.55.247.208:443 SNI=player.ex.co ALPN=[h2 http/1.1]... started -[ 2.091016] httpsDialer: [#4] TLSHandshake with 162.55.247.208:443 SNI=player.ex.co ALPN=[h2 http/1.1]... ok -[ 2.091033] httpsDialer: [#4] TLSVerifyCertificateChain api.ooni.io... started -[ 2.093542] httpsDialer: [#4] TLSVerifyCertificateChain api.ooni.io... ok -[ 2.093708] httpsDialer: [#2] TCPConnect 10.0.0.1:443... interrupted -[ 2.093718] httpsDialer: [#1] TCPConnect 10.0.0.1:443... interrupted -``` - -**Listing 12.** Re with invalid cached state and valid bridge address. - -In this case, we pick up the right bridge configuration and successfully -use it after two seconds. This configuration is provided by the `bridgesPolicy`. - -## Limitations and Future Work - -1. We should integrate the [engineresolver](../engineresolver/) package with this package -more tightly: doing that would allow users to configure the order in which we use DNS-over-HTTPS -resolvers (see [probe#2675](https://github.com/ooni/probe/issues/2675)). - -2. We lack a mechanism to dynamically distribute new bridges IP addresses to probes using, -for example, the check-in API and possibly other mechanisms. Lacking this functionality, our -bridge strategy is incomplete since it rests on a single bridge being available. What's -more, if this bridge disappears or is IP blocked, all the probes will have one slow bootstrap -and probes where DNS is not working will stop working (see -[probe#2500](https://github.com/ooni/probe/issues/2500)). - -3. We should consider adding TLS ClientHello fragmentation as a tactic. - -4. We should add support for HTTP/3 bridges.