From a3d214cb8b41c60a160617d068a9dc01114c71ad Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Sun, 19 Sep 2021 16:07:58 +0200 Subject: [PATCH 01/53] netx: carry over changes from plan B Plan B's branch name is "db" --- internal/netxlite/dialer.go | 23 ++++ internal/netxlite/dnsx/decoder.go | 88 ++++++++++-- internal/netxlite/dnsx/decoder_test.go | 16 +-- internal/netxlite/dnsx/dnsoverhttps.go | 4 +- internal/netxlite/dnsx/model/model.go | 14 ++ internal/netxlite/dnsx/serial.go | 29 +++- internal/netxlite/errorsx/classify.go | 28 +++- internal/netxlite/errorsx/errno.go | 8 +- internal/netxlite/errorsx/errno_test.go | 2 +- internal/netxlite/errorsx/errno_unix.go | 2 +- internal/netxlite/errorsx/errno_windows.go | 2 +- .../errorsx/internal/generrno/main.go | 3 + internal/netxlite/http.go | 20 ++- internal/netxlite/legacy.go | 10 ++ internal/netxlite/mocks/resolver.go | 19 ++- internal/netxlite/resolver.go | 126 +++++++++++++++++- internal/netxlite/tls.go | 2 +- 17 files changed, 352 insertions(+), 44 deletions(-) create mode 100644 internal/netxlite/dnsx/model/model.go diff --git a/internal/netxlite/dialer.go b/internal/netxlite/dialer.go index df61e6de03..5cb173c0f0 100644 --- a/internal/netxlite/dialer.go +++ b/internal/netxlite/dialer.go @@ -67,6 +67,29 @@ func NewDialerWithoutResolver(logger Logger) Dialer { return NewDialerWithResolver(logger, &nullResolver{}) } +type Connector = Dialer + +func NewDialerWithConnector( + logger Logger, resolver Resolver, connector Connector) Dialer { + return &dialerLogger{ + Dialer: &dialerResolver{ + Dialer: connector, + Resolver: resolver, + }, + Logger: logger, + } +} + +func NewConnector(logger Logger) Connector { + return &dialerLogger{ + Dialer: &dialerErrWrapper{ + Dialer: &dialerSystem{}, + }, + Logger: logger, + operationSuffix: "_address", + } +} + // dialerSystem uses system facilities to perform domain name // resolution and guarantees we have a dialer timeout. type dialerSystem struct { diff --git a/internal/netxlite/dnsx/decoder.go b/internal/netxlite/dnsx/decoder.go index 47404a35a7..74182ad250 100644 --- a/internal/netxlite/dnsx/decoder.go +++ b/internal/netxlite/dnsx/decoder.go @@ -1,34 +1,100 @@ package dnsx import ( - "errors" - "github.com/miekg/dns" + "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx/model" + "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) -// The Decoder decodes a DNS reply into A or AAAA entries. It will use the -// provided qtype and only look for mathing entries. It will return error if -// there are no entries for the requested qtype inside the reply. +// HTTPS is an HTTPS reply. +type HTTPS = model.HTTPS + +type https struct { + alpn []string + ipv4hint []string + ipv6hint []string +} + +var _ HTTPS = &https{} + +func (h *https) ALPN() []string { + return h.alpn +} + +func (h *https) IPv4Hint() []string { + return h.ipv4hint +} + +func (h *https) IPv6Hint() []string { + return h.ipv6hint +} + +// The Decoder decodes a DNS replies. type Decoder interface { - Decode(qtype uint16, data []byte) ([]string, error) + // DecodeLookupHost decodes an A or AAAA reply. + DecodeLookupHost(qtype uint16, data []byte) ([]string, error) + + // DecodeHTTPS decodes an HTTPS reply. + DecodeHTTPS(data []byte) (HTTPS, error) } // MiekgDecoder uses github.com/miekg/dns to implement the Decoder. type MiekgDecoder struct{} -// Decode implements Decoder.Decode. -func (d *MiekgDecoder) Decode(qtype uint16, data []byte) ([]string, error) { +func (d *MiekgDecoder) parseReply(data []byte) (*dns.Msg, error) { reply := new(dns.Msg) if err := reply.Unpack(data); err != nil { return nil, err } // TODO(bassosimone): map more errors to net.DNSError names + // TODO(bassosimone): add support for lame referral. switch reply.Rcode { case dns.RcodeSuccess: + return reply, nil case dns.RcodeNameError: - return nil, errors.New("ooniresolver: no such host") + return nil, errorsx.ErrOODNSNoSuchHost + case dns.RcodeRefused: + return nil, errorsx.ErrOODNSRefused default: - return nil, errors.New("ooniresolver: query failed") + return nil, errorsx.ErrOODNSMisbehaving + } +} + +func (d *MiekgDecoder) DecodeHTTPS(data []byte) (HTTPS, error) { + reply, err := d.parseReply(data) + if err != nil { + return nil, err + } + out := &https{} + for _, answer := range reply.Answer { + switch avalue := answer.(type) { + case *dns.HTTPS: + for _, v := range avalue.Value { + switch extv := v.(type) { + case *dns.SVCBAlpn: + out.alpn = extv.Alpn + case *dns.SVCBIPv4Hint: + for _, ip := range extv.Hint { + out.ipv4hint = append(out.ipv4hint, ip.String()) + } + case *dns.SVCBIPv6Hint: + for _, ip := range extv.Hint { + out.ipv6hint = append(out.ipv6hint, ip.String()) + } + } + } + } + } + if len(out.alpn) <= 0 { + return nil, errorsx.ErrOODNSNoAnswer + } + return out, nil +} + +func (d *MiekgDecoder) DecodeLookupHost(qtype uint16, data []byte) ([]string, error) { + reply, err := d.parseReply(data) + if err != nil { + return nil, err } var addrs []string for _, answer := range reply.Answer { @@ -46,7 +112,7 @@ func (d *MiekgDecoder) Decode(qtype uint16, data []byte) ([]string, error) { } } if len(addrs) <= 0 { - return nil, errors.New("ooniresolver: no response returned") + return nil, errorsx.ErrOODNSNoAnswer } return addrs, nil } diff --git a/internal/netxlite/dnsx/decoder_test.go b/internal/netxlite/dnsx/decoder_test.go index 0de8485af9..c5202e4709 100644 --- a/internal/netxlite/dnsx/decoder_test.go +++ b/internal/netxlite/dnsx/decoder_test.go @@ -10,7 +10,7 @@ import ( func TestDecoderUnpackError(t *testing.T) { d := &MiekgDecoder{} - data, err := d.Decode(dns.TypeA, nil) + data, err := d.DecodeLookupHost(dns.TypeA, nil) if err == nil { t.Fatal("expected an error here") } @@ -21,7 +21,7 @@ func TestDecoderUnpackError(t *testing.T) { func TestDecoderNXDOMAIN(t *testing.T) { d := &MiekgDecoder{} - data, err := d.Decode(dns.TypeA, genReplyError(t, dns.RcodeNameError)) + data, err := d.DecodeLookupHost(dns.TypeA, genReplyError(t, dns.RcodeNameError)) if err == nil || !strings.HasSuffix(err.Error(), "no such host") { t.Fatal("not the error we expected") } @@ -32,7 +32,7 @@ func TestDecoderNXDOMAIN(t *testing.T) { func TestDecoderOtherError(t *testing.T) { d := &MiekgDecoder{} - data, err := d.Decode(dns.TypeA, genReplyError(t, dns.RcodeRefused)) + data, err := d.DecodeLookupHost(dns.TypeA, genReplyError(t, dns.RcodeRefused)) if err == nil || !strings.HasSuffix(err.Error(), "query failed") { t.Fatal("not the error we expected") } @@ -43,7 +43,7 @@ func TestDecoderOtherError(t *testing.T) { func TestDecoderNoAddress(t *testing.T) { d := &MiekgDecoder{} - data, err := d.Decode(dns.TypeA, genReplySuccess(t, dns.TypeA)) + data, err := d.DecodeLookupHost(dns.TypeA, genReplySuccess(t, dns.TypeA)) if err == nil || !strings.HasSuffix(err.Error(), "no response returned") { t.Fatal("not the error we expected") } @@ -54,7 +54,7 @@ func TestDecoderNoAddress(t *testing.T) { func TestDecoderDecodeA(t *testing.T) { d := &MiekgDecoder{} - data, err := d.Decode( + data, err := d.DecodeLookupHost( dns.TypeA, genReplySuccess(t, dns.TypeA, "1.1.1.1", "8.8.8.8")) if err != nil { t.Fatal(err) @@ -72,7 +72,7 @@ func TestDecoderDecodeA(t *testing.T) { func TestDecoderDecodeAAAA(t *testing.T) { d := &MiekgDecoder{} - data, err := d.Decode( + data, err := d.DecodeLookupHost( dns.TypeAAAA, genReplySuccess(t, dns.TypeAAAA, "::1", "fe80::1")) if err != nil { t.Fatal(err) @@ -90,7 +90,7 @@ func TestDecoderDecodeAAAA(t *testing.T) { func TestDecoderUnexpectedAReply(t *testing.T) { d := &MiekgDecoder{} - data, err := d.Decode( + data, err := d.DecodeLookupHost( dns.TypeA, genReplySuccess(t, dns.TypeAAAA, "::1", "fe80::1")) if err == nil || !strings.HasSuffix(err.Error(), "no response returned") { t.Fatal("not the error we expected") @@ -102,7 +102,7 @@ func TestDecoderUnexpectedAReply(t *testing.T) { func TestDecoderUnexpectedAAAAReply(t *testing.T) { d := &MiekgDecoder{} - data, err := d.Decode( + data, err := d.DecodeLookupHost( dns.TypeAAAA, genReplySuccess(t, dns.TypeA, "1.1.1.1", "8.8.4.4.")) if err == nil || !strings.HasSuffix(err.Error(), "no response returned") { t.Fatal("not the error we expected") diff --git a/internal/netxlite/dnsx/dnsoverhttps.go b/internal/netxlite/dnsx/dnsoverhttps.go index 749fda42e2..b5b176ea70 100644 --- a/internal/netxlite/dnsx/dnsoverhttps.go +++ b/internal/netxlite/dnsx/dnsoverhttps.go @@ -27,14 +27,14 @@ type DNSOverHTTPS struct { // NewDNSOverHTTPS creates a new DNSOverHTTP instance from the // specified http.Client and URL, as a convenience. -func NewDNSOverHTTPS(client *http.Client, URL string) *DNSOverHTTPS { +func NewDNSOverHTTPS(client HTTPClient, URL string) *DNSOverHTTPS { return NewDNSOverHTTPSWithHostOverride(client, URL, "") } // NewDNSOverHTTPSWithHostOverride is like NewDNSOverHTTPS except that // it's creating a resolver where we use the specified host. func NewDNSOverHTTPSWithHostOverride( - client *http.Client, URL, hostOverride string) *DNSOverHTTPS { + client HTTPClient, URL, hostOverride string) *DNSOverHTTPS { return &DNSOverHTTPS{Client: client, URL: URL, HostOverride: hostOverride} } diff --git a/internal/netxlite/dnsx/model/model.go b/internal/netxlite/dnsx/model/model.go new file mode 100644 index 0000000000..f2e6d43dce --- /dev/null +++ b/internal/netxlite/dnsx/model/model.go @@ -0,0 +1,14 @@ +// Package model contains the dnsx model. +package model + +// HTTPS is an HTTPS reply. +type HTTPS interface { + // ALPN returns the ALPNs inside the SVCBAlpn structure + ALPN() []string + + // IPv4Hint returns the IPv4 hints. + IPv4Hint() []string + + // IPv6Hint returns the IPv6 hints. + IPv6Hint() []string +} diff --git a/internal/netxlite/dnsx/serial.go b/internal/netxlite/dnsx/serial.go index 6389c88cd7..ddddf49280 100644 --- a/internal/netxlite/dnsx/serial.go +++ b/internal/netxlite/dnsx/serial.go @@ -51,8 +51,8 @@ func (r *SerialResolver) CloseIdleConnections() { // LookupHost implements Resolver.LookupHost. func (r *SerialResolver) LookupHost(ctx context.Context, hostname string) ([]string, error) { var addrs []string - addrsA, errA := r.roundTripWithRetry(ctx, hostname, dns.TypeA) - addrsAAAA, errAAAA := r.roundTripWithRetry(ctx, hostname, dns.TypeAAAA) + addrsA, errA := r.lookupHostWithRetry(ctx, hostname, dns.TypeA) + addrsAAAA, errAAAA := r.lookupHostWithRetry(ctx, hostname, dns.TypeAAAA) if errA != nil && errAAAA != nil { return nil, errA } @@ -61,11 +61,26 @@ func (r *SerialResolver) LookupHost(ctx context.Context, hostname string) ([]str return addrs, nil } -func (r *SerialResolver) roundTripWithRetry( +// LookupHTTPSWithoutRetry issues an HTTPS query without retrying on failure. +func (r *SerialResolver) LookupHTTPSWithoutRetry( + ctx context.Context, hostname string) (HTTPS, error) { + querydata, err := r.Encoder.Encode( + hostname, dns.TypeHTTPS, r.Txp.RequiresPadding()) + if err != nil { + return nil, err + } + replydata, err := r.Txp.RoundTrip(ctx, querydata) + if err != nil { + return nil, err + } + return r.Decoder.DecodeHTTPS(replydata) +} + +func (r *SerialResolver) lookupHostWithRetry( ctx context.Context, hostname string, qtype uint16) ([]string, error) { var errorslist []error for i := 0; i < 3; i++ { - replies, err := r.roundTrip(ctx, hostname, qtype) + replies, err := r.LookupHostWithoutRetry(ctx, hostname, qtype) if err == nil { return replies, nil } @@ -87,7 +102,9 @@ func (r *SerialResolver) roundTripWithRetry( return nil, errorslist[0] } -func (r *SerialResolver) roundTrip( +// LookupHostWithoutRetry issues a lookup host query for the specified +// qtype (dns.A or dns.AAAA) without retrying on failure. +func (r *SerialResolver) LookupHostWithoutRetry( ctx context.Context, hostname string, qtype uint16) ([]string, error) { querydata, err := r.Encoder.Encode(hostname, qtype, r.Txp.RequiresPadding()) if err != nil { @@ -97,5 +114,5 @@ func (r *SerialResolver) roundTrip( if err != nil { return nil, err } - return r.Decoder.Decode(qtype, replydata) + return r.Decoder.DecodeLookupHost(qtype, replydata) } diff --git a/internal/netxlite/errorsx/classify.go b/internal/netxlite/errorsx/classify.go index 7af75da07f..c9bd177258 100644 --- a/internal/netxlite/errorsx/classify.go +++ b/internal/netxlite/errorsx/classify.go @@ -84,12 +84,18 @@ func classifyWithStringSuffix(err error) string { if strings.HasSuffix(s, "TLS handshake timeout") { return FailureGenericTimeoutError } - if strings.HasSuffix(s, "no such host") { + if strings.HasSuffix(s, DNSNoSuchHostSuffix) { // This is dns_lookup_error in MK but such error is used as a // generic "hey, the lookup failed" error. Instead, this error // that we return here is significantly more specific. return FailureDNSNXDOMAINError } + if strings.HasSuffix(s, DNSServerMisbehavingSuffix) { + return FailureDNSServerMisbehaving + } + if strings.HasSuffix(s, DNSNoAnswerSuffix) { + return FailureDNSNoAnswer + } return "" // not found } @@ -220,6 +226,21 @@ func quicIsCertificateError(alert uint8) bool { // filters for DNS bogons MUST use this error. var ErrDNSBogon = errors.New("dns: detected bogon address") +// These strings are same as the standard library. +const ( + DNSNoSuchHostSuffix = "no such host" + DNSServerMisbehavingSuffix = "server misbehaving" + DNSNoAnswerSuffix = "no answer from DNS server" +) + +// These errors are returned by the decoder and/or the serial resolver. +var ( + ErrOODNSNoSuchHost = fmt.Errorf("ooniresolver: %s", DNSNoSuchHostSuffix) + ErrOODNSRefused = errors.New("ooniresolver: refused") + ErrOODNSMisbehaving = fmt.Errorf("ooniresolver: %s", DNSServerMisbehavingSuffix) + ErrOODNSNoAnswer = fmt.Errorf("ooniresolver: %s", DNSNoAnswerSuffix) +) + // ClassifyResolverError maps an error occurred during a domain name // resolution to the corresponding OONI failure string. // @@ -236,6 +257,11 @@ func ClassifyResolverError(err error) string { if errors.Is(err, ErrDNSBogon) { return FailureDNSBogonError // not in MK } + // Implementation note: we match errors that share the same + // string of the stdlib in the generic classifier. + if errors.Is(err, ErrOODNSRefused) { + return FailureDNSRefusedError // not in MK + } return ClassifyGenericError(err) } diff --git a/internal/netxlite/errorsx/errno.go b/internal/netxlite/errorsx/errno.go index aa0bc1ece9..ddc0dd654e 100644 --- a/internal/netxlite/errorsx/errno.go +++ b/internal/netxlite/errorsx/errno.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2021-09-08 23:09:33.336763 +0200 CEST m=+0.192836793 +// Generated: 2021-09-15 01:19:41.870222 +0200 CEST m=+0.133778293 package errorsx @@ -50,6 +50,9 @@ const ( // FailureDNSBogonError = "dns_bogon_error" FailureDNSNXDOMAINError = "dns_nxdomain_error" + FailureDNSRefusedError = "dns_refused_error" + FailureDNSServerMisbehaving = "dns_server_misbehaving" + FailureDNSNoAnswer = "dns_no_answer" FailureEOFError = "eof_error" FailureGenericTimeoutError = "generic_timeout_error" FailureQUICIncompatibleVersion = "quic_incompatible_version" @@ -93,6 +96,9 @@ var failuresMap = map[string]string{ "wrong_protocol_type": "wrong_protocol_type", "dns_bogon_error": "dns_bogon_error", "dns_nxdomain_error": "dns_nxdomain_error", + "dns_refused_error": "dns_refused_error", + "dns_server_misbehaving": "dns_server_misbehaving", + "dns_no_answer": "dns_no_answer", "eof_error": "eof_error", "generic_timeout_error": "generic_timeout_error", "quic_incompatible_version": "quic_incompatible_version", diff --git a/internal/netxlite/errorsx/errno_test.go b/internal/netxlite/errorsx/errno_test.go index b5f5709926..f33de7e5e6 100644 --- a/internal/netxlite/errorsx/errno_test.go +++ b/internal/netxlite/errorsx/errno_test.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2021-09-08 23:09:33.382965 +0200 CEST m=+0.239039834 +// Generated: 2021-09-15 01:19:41.914231 +0200 CEST m=+0.177788626 package errorsx diff --git a/internal/netxlite/errorsx/errno_unix.go b/internal/netxlite/errorsx/errno_unix.go index 8a6ad8cbc4..b9c76f7794 100644 --- a/internal/netxlite/errorsx/errno_unix.go +++ b/internal/netxlite/errorsx/errno_unix.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2021-09-08 23:09:33.144513 +0200 CEST m=+0.000582543 +// Generated: 2021-09-15 01:19:41.737047 +0200 CEST m=+0.000598959 package errorsx diff --git a/internal/netxlite/errorsx/errno_windows.go b/internal/netxlite/errorsx/errno_windows.go index 78fde75cee..d789bbf1cc 100644 --- a/internal/netxlite/errorsx/errno_windows.go +++ b/internal/netxlite/errorsx/errno_windows.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2021-09-08 23:09:33.310337 +0200 CEST m=+0.166410043 +// Generated: 2021-09-15 01:19:41.845953 +0200 CEST m=+0.109508834 package errorsx diff --git a/internal/netxlite/errorsx/internal/generrno/main.go b/internal/netxlite/errorsx/internal/generrno/main.go index dd0ab973b4..0cf23ecf44 100644 --- a/internal/netxlite/errorsx/internal/generrno/main.go +++ b/internal/netxlite/errorsx/internal/generrno/main.go @@ -93,6 +93,9 @@ var Specs = []*ErrorSpec{ // we must write "DNS" rather than writing "dns". NewLibraryError("DNS_bogon_error"), NewLibraryError("DNS_NXDOMAIN_error"), + NewLibraryError("DNS_refused_error"), + NewLibraryError("DNS_server_misbehaving"), + NewLibraryError("DNS_no_answer"), NewLibraryError("EOF_error"), NewLibraryError("generic_timeout_error"), NewLibraryError("QUIC_incompatible_version"), diff --git a/internal/netxlite/http.go b/internal/netxlite/http.go index 7ac528e417..c408471a36 100644 --- a/internal/netxlite/http.go +++ b/internal/netxlite/http.go @@ -110,6 +110,10 @@ func (txp *httpTransportConnectionsCloser) CloseIdleConnections() { // The returned transport will set a default user agent if the // request has not already set a user agent. func NewHTTPTransport(logger Logger, dialer Dialer, tlsDialer TLSDialer) HTTPTransport { + return WrapHTTPTransport(logger, NewOOHTTPBaseTransport(dialer, tlsDialer)) +} + +func NewOOHTTPBaseTransport(dialer Dialer, tlsDialer TLSDialer) HTTPTransport { // Using oohttp to support any TLS library. txp := oohttp.DefaultTransport.(*oohttp.Transport).Clone() @@ -138,16 +142,20 @@ func NewHTTPTransport(logger Logger, dialer Dialer, tlsDialer TLSDialer) HTTPTra // upon us when we are using TLS parroting). txp.ForceAttemptHTTP2 = true + return &httpTransportConnectionsCloser{ + HTTPTransport: &oohttp.StdlibTransport{Transport: txp}, + Dialer: dialer, + TLSDialer: tlsDialer, + } +} + +func WrapHTTPTransport(logger Logger, txp HTTPTransport) HTTPTransport { // Ensure we correctly forward CloseIdleConnections and compose // with a logging transport thus enabling logging. return &httpUserAgentTransport{ HTTPTransport: &httpTransportLogger{ - HTTPTransport: &httpTransportConnectionsCloser{ - HTTPTransport: &oohttp.StdlibTransport{Transport: txp}, - Dialer: dialer, - TLSDialer: tlsDialer, - }, - Logger: logger, + HTTPTransport: txp, + Logger: logger, }, } } diff --git a/internal/netxlite/legacy.go b/internal/netxlite/legacy.go index dbf2116c1f..e450aada94 100644 --- a/internal/netxlite/legacy.go +++ b/internal/netxlite/legacy.go @@ -98,6 +98,16 @@ func (r *ResolverLegacyAdapter) CloseIdleConnections() { } } +func (r *ResolverLegacyAdapter) LookupHostWithoutRetry( + ctx context.Context, domain string, qtype uint16) ([]string, error) { + return nil, ErrNoDNSTransport +} + +func (r *ResolverLegacyAdapter) LookupHTTPSWithoutRetry( + ctx context.Context, domain string) (HTTPS, error) { + return nil, ErrNoDNSTransport +} + // DialerLegacy establishes network connections. // // This definition is DEPRECATED. Please, use Dialer. diff --git a/internal/netxlite/mocks/resolver.go b/internal/netxlite/mocks/resolver.go index 3abf749c02..2876357e9a 100644 --- a/internal/netxlite/mocks/resolver.go +++ b/internal/netxlite/mocks/resolver.go @@ -1,6 +1,10 @@ package mocks -import "context" +import ( + "context" + + "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx/model" +) // Resolver is a mockable Resolver. type Resolver struct { @@ -29,3 +33,16 @@ func (r *Resolver) Network() string { func (r *Resolver) CloseIdleConnections() { r.MockCloseIdleConnections() } + +func (r *Resolver) LookupHostWithoutRetry( + ctx context.Context, domain string, qtype uint16) ([]string, error) { + panic("not yet implemented") +} + +// HTTPS is an HTTPS reply. +type HTTPS = model.HTTPS + +func (r *Resolver) LookupHTTPSWithoutRetry( + ctx context.Context, domain string) (HTTPS, error) { + panic("not yet implemented") +} diff --git a/internal/netxlite/resolver.go b/internal/netxlite/resolver.go index a85e9667a8..0183eaddec 100644 --- a/internal/netxlite/resolver.go +++ b/internal/netxlite/resolver.go @@ -3,13 +3,19 @@ package netxlite import ( "context" "errors" + "fmt" "net" "time" + "github.com/miekg/dns" + "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" "golang.org/x/net/idna" ) +// HTTPS is the type returned for HTTPS queries. +type HTTPS = dnsx.HTTPS + // Resolver performs domain name resolutions. type Resolver interface { // LookupHost behaves like net.Resolver.LookupHost. @@ -23,8 +29,23 @@ type Resolver interface { // CloseIdleConnections closes idle connections, if any. CloseIdleConnections() + + // LookupHostWithoutRetry issues a single lookup host query + // for the given qtype (dns.TypeA or dns.TypeAAAA) without any + // retry mechanism whatsoever. + LookupHostWithoutRetry( + ctx context.Context, domain string, qtype uint16) ([]string, error) + + // LookupHTTPSWithoutRetry issues a single HTTPS query for + // a domain without any retry mechanism whatsoever. + LookupHTTPSWithoutRetry( + ctx context.Context, domain string) (HTTPS, error) } +// ErrNoDNSTransport indicates that the requested Resolver operation +// cannot be performed because we're using the "system" resolver. +var ErrNoDNSTransport = errors.New("operation requires a DNS transport") + // NewResolverStdlib creates a new resolver using system // facilities for resolving domain names (e.g., getaddrinfo). // @@ -42,11 +63,15 @@ type Resolver interface { // 5. enforces reasonable timeouts ( // see https://github.com/ooni/probe/issues/1726). func NewResolverStdlib(logger Logger) Resolver { + return WrapResolver(logger, &resolverSystem{}) +} + +func WrapResolver(logger Logger, resolver Resolver) Resolver { return &resolverIDNA{ Resolver: &resolverLogger{ Resolver: &resolverShortCircuitIPAddr{ Resolver: &resolverErrWrapper{ - Resolver: &resolverSystem{}, + Resolver: resolver, }, }, Logger: logger, @@ -114,6 +139,16 @@ func (r *resolverSystem) CloseIdleConnections() { // nothing to do } +func (r *resolverSystem) LookupHostWithoutRetry( + ctx context.Context, domain string, qtype uint16) ([]string, error) { + return nil, ErrNoDNSTransport +} + +func (r *resolverSystem) LookupHTTPSWithoutRetry( + ctx context.Context, domain string) (HTTPS, error) { + return nil, ErrNoDNSTransport +} + // resolverLogger is a resolver that emits events type resolverLogger struct { Resolver @@ -123,18 +158,53 @@ type resolverLogger struct { var _ Resolver = &resolverLogger{} func (r *resolverLogger) LookupHost(ctx context.Context, hostname string) ([]string, error) { - r.Logger.Debugf("resolve %s...", hostname) + prefix := fmt.Sprintf("resolve[A,AAAA] %s with %s (%s)", hostname, r.Network(), r.Address()) + r.Logger.Debugf("%s...", prefix) start := time.Now() addrs, err := r.Resolver.LookupHost(ctx, hostname) elapsed := time.Since(start) if err != nil { - r.Logger.Debugf("resolve %s... %s in %s", hostname, err, elapsed) + r.Logger.Debugf("%s... %s in %s", prefix, err, elapsed) + return nil, err + } + r.Logger.Debugf("%s... %+v in %s", prefix, addrs, elapsed) + return addrs, nil +} + +func (r *resolverLogger) LookupHostWithoutRetry( + ctx context.Context, domain string, qtype uint16) ([]string, error) { + qtypename := dns.TypeToString[qtype] + prefix := fmt.Sprintf("resolve[%s] %s with %s (%s)", qtypename, domain, r.Network(), r.Address()) + r.Logger.Debugf("%s...", prefix) + start := time.Now() + addrs, err := r.Resolver.LookupHostWithoutRetry(ctx, domain, qtype) + elapsed := time.Since(start) + if err != nil { + r.Logger.Debugf("%s... %s in %s", prefix, err, elapsed) return nil, err } - r.Logger.Debugf("resolve %s... %+v in %s", hostname, addrs, elapsed) + r.Logger.Debugf("%s... %+v in %s", prefix, addrs, elapsed) return addrs, nil } +func (r *resolverLogger) LookupHTTPSWithoutRetry( + ctx context.Context, domain string) (HTTPS, error) { + prefix := fmt.Sprintf("resolve[HTTPS] %s with %s (%s)", domain, r.Network(), r.Address()) + r.Logger.Debugf("%s...", prefix) + start := time.Now() + https, err := r.Resolver.LookupHTTPSWithoutRetry(ctx, domain) + elapsed := time.Since(start) + if err != nil { + r.Logger.Debugf("%s... %s in %s", prefix, err, elapsed) + return nil, err + } + alpn := https.ALPN() + a := https.IPv4Hint() + aaaa := https.IPv6Hint() + r.Logger.Debugf("%s... %+v %+v %+v in %s", prefix, alpn, a, aaaa, elapsed) + return https, nil +} + // resolverIDNA supports resolving Internationalized Domain Names. // // See RFC3492 for more information. @@ -150,6 +220,24 @@ func (r *resolverIDNA) LookupHost(ctx context.Context, hostname string) ([]strin return r.Resolver.LookupHost(ctx, host) } +func (r *resolverIDNA) LookupHostWithoutRetry( + ctx context.Context, domain string, qtype uint16) ([]string, error) { + host, err := idna.ToASCII(domain) + if err != nil { + return nil, err + } + return r.Resolver.LookupHostWithoutRetry(ctx, host, qtype) +} + +func (r *resolverIDNA) LookupHTTPSWithoutRetry( + ctx context.Context, domain string) (HTTPS, error) { + host, err := idna.ToASCII(domain) + if err != nil { + return nil, err + } + return r.Resolver.LookupHTTPSWithoutRetry(ctx, host) +} + // resolverShortCircuitIPAddr recognizes when the input hostname is an // IP address and returns it immediately to the caller. type resolverShortCircuitIPAddr struct { @@ -186,6 +274,16 @@ func (r *nullResolver) CloseIdleConnections() { // nothing to do } +func (r *nullResolver) LookupHostWithoutRetry( + ctx context.Context, domain string, qtype uint16) ([]string, error) { + return nil, ErrNoDNSTransport +} + +func (r *nullResolver) LookupHTTPSWithoutRetry( + ctx context.Context, domain string) (HTTPS, error) { + return nil, ErrNoDNSTransport +} + // resolverErrWrapper is a Resolver that knows about wrapping errors. type resolverErrWrapper struct { Resolver @@ -201,3 +299,23 @@ func (r *resolverErrWrapper) LookupHost(ctx context.Context, hostname string) ([ } return addrs, nil } + +func (r *resolverErrWrapper) LookupHostWithoutRetry( + ctx context.Context, domain string, qtype uint16) ([]string, error) { + addrs, err := r.Resolver.LookupHostWithoutRetry(ctx, domain, qtype) + if err != nil { + return nil, errorsx.NewErrWrapper( + errorsx.ClassifyResolverError, errorsx.ResolveOperation, err) + } + return addrs, nil +} + +func (r *resolverErrWrapper) LookupHTTPSWithoutRetry( + ctx context.Context, domain string) (HTTPS, error) { + out, err := r.Resolver.LookupHTTPSWithoutRetry(ctx, domain) + if err != nil { + return nil, errorsx.NewErrWrapper( + errorsx.ClassifyResolverError, errorsx.ResolveOperation, err) + } + return out, nil +} diff --git a/internal/netxlite/tls.go b/internal/netxlite/tls.go index ac0b042b97..c002909c2f 100644 --- a/internal/netxlite/tls.go +++ b/internal/netxlite/tls.go @@ -121,7 +121,7 @@ type TLSHandshaker interface { // // QUIRK: The returned connection will always implement the TLSConn interface // exposed by this package. A future version of this interface will instead - // return directly a TLSConn and remove the ConnectionState param. + // return directly a TLSConn to avoid unconditional castings. Handshake(ctx context.Context, conn net.Conn, config *tls.Config) ( net.Conn, tls.ConnectionState, error) } From a4ac1a25211064d580b4ecc34c412b94b386e970 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 20 Sep 2021 02:08:09 +0200 Subject: [PATCH 02/53] plan C --- internal/measurex/db.go | 49 ++ internal/measurex/dialer.go | 175 +++++++ internal/measurex/dnsx.go | 50 ++ internal/measurex/doc.go | 2 + internal/measurex/http.go | 297 +++++++++++ internal/measurex/logger.go | 11 + internal/measurex/measurer.go | 728 ++++++++++++++++++++++++++ internal/measurex/origin.go | 10 + internal/measurex/resolver.go | 94 ++++ internal/measurex/saver.go | 159 ++++++ internal/measurex/tls.go | 125 +++++ internal/measurex/wcth.go | 186 +++++++ internal/measurex/websteps.go | 247 +++++++++ internal/netxlite/dnsx/decoder.go | 10 +- internal/netxlite/dnsx/model/model.go | 4 +- internal/netxlite/dnsx/serial.go | 6 +- internal/netxlite/legacy.go | 4 +- internal/netxlite/mocks/resolver.go | 8 +- internal/netxlite/resolver.go | 32 +- 19 files changed, 2165 insertions(+), 32 deletions(-) create mode 100644 internal/measurex/db.go create mode 100644 internal/measurex/dialer.go create mode 100644 internal/measurex/dnsx.go create mode 100644 internal/measurex/doc.go create mode 100644 internal/measurex/http.go create mode 100644 internal/measurex/logger.go create mode 100644 internal/measurex/measurer.go create mode 100644 internal/measurex/origin.go create mode 100644 internal/measurex/resolver.go create mode 100644 internal/measurex/saver.go create mode 100644 internal/measurex/tls.go create mode 100644 internal/measurex/wcth.go create mode 100644 internal/measurex/websteps.go diff --git a/internal/measurex/db.go b/internal/measurex/db.go new file mode 100644 index 0000000000..da1c5ba788 --- /dev/null +++ b/internal/measurex/db.go @@ -0,0 +1,49 @@ +package measurex + +// DB is the database holding measurements. +type DB interface { + // Dial table + InsertIntoDial(ev *NetworkEvent) + SelectAllFromDial() []*NetworkEvent + + // ReadWrite table + InsertIntoReadWrite(ev *NetworkEvent) + SelectAllFromReadWrite() []*NetworkEvent + + // Close table + InsertIntoClose(ev *NetworkEvent) + SelectAllFromClose() []*NetworkEvent + + // TLSHandshake table + InsertIntoTLSHandshake(ev *TLSHandshakeEvent) + SelectAllFromTLSHandshake() []*TLSHandshakeEvent + + // LookupHost table + InsertIntoLookupHost(ev *LookupHostEvent) + SelectAllFromLookupHost() []*LookupHostEvent + + // LookupHTTPSSvc table + InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) + SelectAllFromLookupHTTPSSvc() []*LookupHTTPSSvcEvent + + // DNSRoundTrip table + InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) + SelectAllFromDNSRoundTrip() []*DNSRoundTripEvent + + // HTTPRoundTrip table + InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) + SelectAllFromHTTPRoundTrip() []*HTTPRoundTripEvent + + // HTTPRedirect table + InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) + SelectAllFromHTTPRedirect() []*HTTPRedirectEvent + + // NextConnID increments and returns the connection ID. + NextConnID() int64 + + // MeasurementID returns the measurement ID. + MeasurementID() int64 + + // NextMeasurement increments and returns the measurement ID. + NextMeasurement() int64 +} diff --git a/internal/measurex/dialer.go b/internal/measurex/dialer.go new file mode 100644 index 0000000000..2b82200447 --- /dev/null +++ b/internal/measurex/dialer.go @@ -0,0 +1,175 @@ +package measurex + +import ( + "context" + "net" + "time" + + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// Conn is the connection type we use. +type Conn interface { + net.Conn + + // ConnID returns the connection ID. + ConnID() int64 +} + +// Dialer is the dialer type we use. +type Dialer interface { + DialContext(ctx context.Context, network, address string) (Conn, error) + CloseIdleConnections() +} + +// WrapDialer wraps a Dialer to add measurex capabilities. +// +// DialContext algorithm +// +// 1. perform TCP/UDP connect as usual; +// +// 2. insert a DialEvent into the DB; +// +// 3. on success, wrap the returned net.Conn so that it +// inserts Read, Write, and Close events into the DB. +// +// 4. return net.Conn or error. +func WrapDialer(origin Origin, db DB, d netxlite.Dialer) Dialer { + return &dialerx{Dialer: d, db: db, origin: origin} + +} + +type dialerx struct { + netxlite.Dialer + db DB + origin Origin +} + +// NetworkEvent contains a network event. +type NetworkEvent struct { + Origin Origin + MeasurementID int64 + ConnID int64 + Operation string + Network string + RemoteAddr string + LocalAddr string + Started time.Time + Finished time.Time + Error error + Count int +} + +func (d *dialerx) DialContext( + ctx context.Context, network, address string) (Conn, error) { + connID := d.db.NextConnID() + started := time.Now() + conn, err := d.Dialer.DialContext(ctx, network, address) + finished := time.Now() + d.db.InsertIntoDial(&NetworkEvent{ + Origin: d.origin, + MeasurementID: d.db.MeasurementID(), + ConnID: connID, + Operation: "connect", + Network: network, + RemoteAddr: address, + LocalAddr: d.localAddrIfNotNil(conn), + Started: started, + Finished: finished, + Error: err, + Count: 0, + }) + if err != nil { + return nil, err + } + return &connx{ + Conn: conn, + db: d.db, + connID: connID, + remoteAddr: address, + localAddr: conn.LocalAddr().String(), + network: network, + origin: d.origin, + }, nil +} + +func (c *dialerx) localAddrIfNotNil(conn net.Conn) (addr string) { + if conn != nil { + addr = conn.LocalAddr().String() + } + return +} + +type connx struct { + net.Conn + db DB + connID int64 + remoteAddr string + localAddr string + network string + origin Origin +} + +func (c *connx) ConnID() int64 { + return c.connID +} + +func (c *connx) Read(b []byte) (int, error) { + started := time.Now() + count, err := c.Conn.Read(b) + finished := time.Now() + c.db.InsertIntoReadWrite(&NetworkEvent{ + Origin: c.origin, + MeasurementID: c.db.MeasurementID(), + ConnID: c.connID, + Operation: "read", + Network: c.network, + RemoteAddr: c.remoteAddr, + LocalAddr: c.localAddr, + Started: started, + Finished: finished, + Error: err, + Count: count, + }) + return count, err +} + +func (c *connx) Write(b []byte) (int, error) { + started := time.Now() + count, err := c.Conn.Write(b) + finished := time.Now() + c.db.InsertIntoReadWrite(&NetworkEvent{ + Origin: c.origin, + MeasurementID: c.db.MeasurementID(), + ConnID: c.connID, + Operation: "write", + Network: c.network, + RemoteAddr: c.remoteAddr, + LocalAddr: c.localAddr, + Started: started, + Finished: finished, + Error: err, + Count: count, + }) + return count, err +} + +func (c *connx) Close() error { + started := time.Now() + err := c.Conn.Close() + finished := time.Now() + c.db.InsertIntoClose(&NetworkEvent{ + Origin: c.origin, + MeasurementID: c.db.MeasurementID(), + ConnID: c.connID, + Operation: "close", + Network: c.network, + RemoteAddr: c.remoteAddr, + LocalAddr: c.localAddr, + Started: started, + Finished: finished, + Error: err, + Count: 0, + }) + return err +} diff --git a/internal/measurex/dnsx.go b/internal/measurex/dnsx.go new file mode 100644 index 0000000000..d83371452c --- /dev/null +++ b/internal/measurex/dnsx.go @@ -0,0 +1,50 @@ +package measurex + +import ( + "context" + "time" + + "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" +) + +// DNSTransport is the DNS transport type we use. +type DNSTransport = dnsx.RoundTripper + +// WrapDNSXRoundTripper wraps a dnsx.RoundTripper to add measurex capabilities. +func WrapDNSXRoundTripper(db DB, rt dnsx.RoundTripper) DNSTransport { + return &dnsxTransportx{db: db, RoundTripper: rt} +} + +type dnsxTransportx struct { + dnsx.RoundTripper + db DB +} + +// DNSRoundTripEvent contains the result of a DNS round trip. +type DNSRoundTripEvent struct { + MeasurementID int64 + Network string + Address string + Query []byte + Started time.Time + Finished time.Time + Error error + Reply []byte +} + +func (txp *dnsxTransportx) RoundTrip(ctx context.Context, query []byte) ([]byte, error) { + started := time.Now() + reply, err := txp.RoundTripper.RoundTrip(ctx, query) + finished := time.Now() + txp.db.InsertIntoDNSRoundTrip(&DNSRoundTripEvent{ + MeasurementID: txp.db.MeasurementID(), + Network: txp.RoundTripper.Network(), + Address: txp.RoundTripper.Address(), + Query: query, + Started: started, + Finished: finished, + Error: err, + Reply: reply, + }) + return reply, err +} diff --git a/internal/measurex/doc.go b/internal/measurex/doc.go new file mode 100644 index 0000000000..f902e5ea7c --- /dev/null +++ b/internal/measurex/doc.go @@ -0,0 +1,2 @@ +// Package measurex contains measurement extensions. +package measurex diff --git a/internal/measurex/http.go b/internal/measurex/http.go new file mode 100644 index 0000000000..d9ad2e45a8 --- /dev/null +++ b/internal/measurex/http.go @@ -0,0 +1,297 @@ +package measurex + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "io" + "net/http" + "net/http/cookiejar" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/engine/httpheader" + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/iox" + "github.com/ooni/probe-cli/v3/internal/runtimex" + "golang.org/x/net/publicsuffix" +) + +// HTTPTransport is the HTTP transport type we use. +type HTTPTransport interface { + netxlite.HTTPTransport + + // ConnID returns the connection ID. + ConnID() int64 +} + +// WrapHTTPTransport wraps a netxlite.HTTPTransport to add measurex +// capabilities. With this constructor the conn ID is undefined. +func WrapHTTPTransport( + origin Origin, db DB, txp netxlite.HTTPTransport) HTTPTransport { + return WrapHTTPTransportWithConnID(origin, db, txp, 0) +} + +// WrapHTTPTransportWithConnID is like WrapHTTPTransport but also +// sets the conn ID, which is otherwise undefined. +func WrapHTTPTransportWithConnID(origin Origin, + db DB, txp netxlite.HTTPTransport, connID int64) HTTPTransport { + return &httpTransportx{ + HTTPTransport: txp, db: db, connID: connID, origin: origin} +} + +// NewHTTPTransportWithConn creates and wraps an HTTPTransport that +// does not dial and only uses the given conn. +func NewHTTPTransportWithConn( + origin Origin, logger Logger, db DB, conn Conn) HTTPTransport { + return WrapHTTPTransportWithConnID(origin, db, netxlite.NewHTTPTransport( + logger, netxlite.NewSingleUseDialer(conn), + netxlite.NewNullTLSDialer(), + ), conn.ConnID()) +} + +// NewHTTPTransportWithTLSConn creates and wraps an HTTPTransport that +// does not dial and only uses the given conn. +func NewHTTPTransportWithTLSConn( + origin Origin, logger Logger, db DB, conn TLSConn) HTTPTransport { + return WrapHTTPTransportWithConnID(origin, db, netxlite.NewHTTPTransport( + logger, netxlite.NewNullDialer(), + netxlite.NewSingleUseTLSDialer(conn), + ), conn.ConnID()) +} + +type httpTransportx struct { + netxlite.HTTPTransport + connID int64 + db DB + origin Origin +} + +// HTTPRoundTripEvent contains information about an HTTP round trip. +// +// If ConnID is zero or negative, it means undefined. This happens +// when we create a transport without knowing the ConnID. +type HTTPRoundTripEvent struct { + Origin Origin + MeasurementID int64 + ConnID int64 + RequestMethod string + RequestURL *url.URL + RequestHeader http.Header + Started time.Time + Finished time.Time + Error error + ResponseStatus int + ResponseHeader http.Header + ResponseBodySnapshot []byte +} + +// We only read a small snapshot of the body to keep measurements +// lean, since we're mostly interested in TLS interference nowadays +// but we'll also allow for reading more bytes from the conn. +const maxBodySnapshot = 1 << 11 + +func (txp *httpTransportx) RoundTrip(req *http.Request) (*http.Response, error) { + started := time.Now() + resp, err := txp.HTTPTransport.RoundTrip(req) + rt := &HTTPRoundTripEvent{ + Origin: txp.origin, + MeasurementID: txp.db.MeasurementID(), + ConnID: txp.connID, + RequestMethod: req.Method, + RequestURL: req.URL, + RequestHeader: req.Header, + Started: started, + } + if err != nil { + rt.Finished = time.Now() + rt.Error = err + txp.db.InsertIntoHTTPRoundTrip(rt) + return nil, err + } + rt.ResponseStatus = resp.StatusCode + rt.ResponseHeader = resp.Header + r := io.LimitReader(resp.Body, maxBodySnapshot) + body, err := iox.ReadAllContext(req.Context(), r) + if errors.Is(err, io.EOF) && resp.Close { + err = nil // we expected to see an EOF here + } + if err != nil { + rt.Finished = time.Now() + rt.Error = err + txp.db.InsertIntoHTTPRoundTrip(rt) + return nil, err + } + resp.Body = &httpTransportBody{ // allow for reading more if needed + Reader: io.MultiReader(bytes.NewReader(body), resp.Body), + Closer: resp.Body, + } + rt.ResponseBodySnapshot = body + rt.Finished = time.Now() + txp.db.InsertIntoHTTPRoundTrip(rt) + return resp, nil +} + +type httpTransportBody struct { + io.Reader + io.Closer +} + +func (txp *httpTransportx) ConnID() int64 { + return txp.connID +} + +// HTTPClient is the HTTP client type we use. +type HTTPClient interface { + Do(req *http.Request) (*http.Response, error) + CloseIdleConnections() +} + +// NewHTTPClient creates a new HTTPClient instance that +// does not automatically perform redirects. +func NewHTTPClientWithoutRedirects(origin Origin, db DB, txp HTTPTransport) HTTPClient { + return newHTTPClient(origin, db, txp, http.ErrUseLastResponse) +} + +// NewHTTPClientWithRedirects creates a new HTTPClient +// instance that automatically perform redirects. +func NewHTTPClientWithRedirects(origin Origin, db DB, txp HTTPTransport) HTTPClient { + return newHTTPClient(origin, db, txp, nil) +} + +// HTTPRedirectEvent records an HTTP redirect. +// +// If ConnID is zero or negative, it means undefined. This happens +// when we create a transport without knowing the ConnID. +// +// The Request field contains the next request to issue. When +// redirects are disabled, this field contains the request you +// should issue to continue the redirect chain. +// +// The Via field contains the requests issued so far. The first +// request inside Via is the last one that has been issued. +// +// The Error field can have three values: +// +// - nil if the redirect occurred; +// +// - ErrHTTPTooManyRedirects when we see too many redirections; +// +// - http.ErrUseLastResponse if redirections are disabled. +type HTTPRedirectEvent struct { + Origin Origin + MeasurementID int64 + ConnID int64 + Request *http.Request + Via []*http.Request + Error error +} + +// MarshalJSON marshals an HTTPRedirectEvent to JSON. +func (ev *HTTPRedirectEvent) MarshalJSON() ([]byte, error) { + m := map[string]interface{}{ + "Origin": ev.Origin, + "MeasurementID": ev.MeasurementID, + "ConnID": ev.ConnID, + "Request": ev.simplifyRequest(ev.Request), + "Via": ev.simplifyRequests(ev.Via), + "Error": ev.Error, + } + return json.Marshal(m) +} + +// simplifyRequest simplifies a single http.Request so +// that it could be serialized as a JSON. +func (ev *HTTPRedirectEvent) simplifyRequest(req *http.Request) (out map[string]interface{}) { + out = map[string]interface{}{ + "URL": req.URL, + "Header": req.Header, + } + return +} + +// simplifyRequests is simplifyRequest applied to a list +// of http.Request rather than just one of them. +func (ev *HTTPRedirectEvent) simplifyRequests(req []*http.Request) (out []map[string]interface{}) { + for _, r := range req { + out = append(out, ev.simplifyRequest(r)) + } + return +} + +// ErrHTTPTooManyRedirects is the unexported error that the standard library +// would return when hitting too many redirects. +var ErrHTTPTooManyRedirects = errors.New("stopped after 10 redirects") + +func newHTTPClient(origin Origin, db DB, txp HTTPTransport, defaultErr error) HTTPClient { + return &http.Client{ + Transport: txp, + Jar: NewCookieJar(), + CheckRedirect: func(req *http.Request, via []*http.Request) error { + err := defaultErr + if len(via) >= 10 { + err = ErrHTTPTooManyRedirects + } + db.InsertIntoHTTPRedirect(&HTTPRedirectEvent{ + Origin: origin, + MeasurementID: db.MeasurementID(), + ConnID: txp.ConnID(), + Request: req, + Via: via, + Error: err, + }) + return err + }, + } +} + +// NewCookieJar is a convenience factory for creating an http.CookieJar +// that is aware of the effective TLS / public suffix list. This +// means that the jar won't allow a domain to set cookies for another +// unrelated domain (in the public-suffix-list sense). +func NewCookieJar() http.CookieJar { + jar, err := cookiejar.New(&cookiejar.Options{ + PublicSuffixList: publicsuffix.List, + }) + // Safe to PanicOnError here: cookiejar.New _always_ returns nil. + runtimex.PanicOnError(err, "cookiejar.New failed") + return jar +} + +// NewHTTPRequestHeaderForMeasuring returns an http.Header where +// the headers are the ones we use for measuring. +func NewHTTPRequestHeaderForMeasuring() http.Header { + h := http.Header{} + h.Set("Accept", httpheader.Accept()) + h.Set("Accept-Language", httpheader.AcceptLanguage()) + h.Set("User-Agent", httpheader.UserAgent()) + return h +} + +// NewHTTPRequestWithContext is a convenience factory for creating +// a new HTTP request with the typical headers we use when performing +// measurements already set inside of req.Header. +func NewHTTPRequestWithContext(ctx context.Context, + method, URL string, body io.Reader) (*http.Request, error) { + req, err := http.NewRequestWithContext(ctx, method, URL, body) + if err != nil { + return nil, err + } + req.Header = NewHTTPRequestHeaderForMeasuring() + return req, nil +} + +// NewHTTPGetRequest is a convenience factory for creating a new +// http.Request using the GET method and the given URL. +func NewHTTPGetRequest(ctx context.Context, URL string) (*http.Request, error) { + return NewHTTPRequestWithContext(ctx, "GET", URL, nil) +} + +// MustNewHTTPGetRequest is a convenience factory for creating +// a new http.Request using GET that panics on error. +func MustNewHTTPGetRequest(ctx context.Context, URL string) *http.Request { + req, err := NewHTTPGetRequest(ctx, URL) + runtimex.PanicOnError(err, "NewHTTPGetRequest failed") + return req +} diff --git a/internal/measurex/logger.go b/internal/measurex/logger.go new file mode 100644 index 0000000000..9c10995ca5 --- /dev/null +++ b/internal/measurex/logger.go @@ -0,0 +1,11 @@ +package measurex + +import "github.com/ooni/probe-cli/v3/internal/netxlite" + +// Logger is the logger type we use. +type Logger interface { + netxlite.Logger + + Info(msg string) + Infof(format string, v ...interface{}) +} diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go new file mode 100644 index 0000000000..fada77b166 --- /dev/null +++ b/internal/measurex/measurer.go @@ -0,0 +1,728 @@ +package measurex + +import ( + "context" + "crypto/tls" + "errors" + "fmt" + "net" + "net/http" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" +) + +// Measurer performs measurements. +// +// You call measurer methods to perform measurements. All methods +// will save measurements into the DB field as a side effect. +// +// Some methods will also return (a subset of) their measurement +// results when doing that is convenient. +// +// This implementation currently uses the Web Connectivity Test +// Helper (WCTH) to help with measuring HTTP endpoints. We'll use +// an ad-hoc, more effective test helper in the near future. +// +// Remarks +// +// Make sure to initialize all the fields marked as MANDATORY. +type Measurer struct { + // DB is the MANDATORY database to use. + DB DB + + // HTTPClient is the MANDATORY HTTP client for the WCTH. + HTTPClient HTTPClient + + // Logger is the MANDATORY logger to use. + Logger Logger + + // Origin is the MANDATORY measurements origin to use. + Origin Origin + + // TLSHandshaker is the MANDATORY TLS handshaker. + TLSHandshaker TLSHandshaker + + // WCTHURL is the MANDATORY URL of the WCTH. + WCTHURL string +} + +// NewMeasurement increments the DB's MeasurementID +// and returns such an ID for later usage. +// +// Every operation we perform (e.g., a TCP connect) saves +// measurements into mx.DB using separate tables. +// +// We save the MeasurementID for each operation. +// +// By calling NewMeasurement you increment such an ID +// which later allows you to separate measurements. +func (mx *Measurer) NewMeasurement() int64 { + return mx.DB.NextMeasurement() +} + +// LookupHostSystem performs a LookupHost using the system resolver. +// +// The system resolver is equivalent to calling getaddrinfo on Unix systems. +// +// Arguments +// +// - ctx is the context allowing to timeout the operation; +// +// - domain is the domain to lookup. +// +// Return value +// +// Either a list of resolved IP addresses or an error. +func (mx *Measurer) LookupHostSystem( + ctx context.Context, domain string) (addrs []string, err error) { + const timeout = 4 * time.Second + mx.infof("LookupHost[getaddrinfo] %s (timeout %s)...", domain, timeout) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + r := mx.newResolverSystem() + defer r.CloseIdleConnections() + return r.LookupHost(ctx, domain) +} + +// newResolverSystem is a convenience factory for creating a +// system resolver that saves measurements into mx.DB. +func (mx *Measurer) newResolverSystem() Resolver { + return WrapResolver(mx.Origin, mx.DB, netxlite.NewResolverStdlib(mx.Logger)) +} + +// newDialerWithSystemResolver is a convenience factory for creating +// a dialer that saves measurements into mx.DB. +func (mx *Measurer) newDialerWithSystemResolver() Dialer { + r := mx.newResolverSystem() + return WrapDialer(mx.Origin, mx.DB, netxlite.NewDialerWithResolver( + mx.Logger, r, + )) +} + +// netxliteDialerAdapter adapts measurex.Dialer to netxlite.Dialer. +type netxliteDialerAdapter struct { + Dialer +} + +// DialContext implements netxlite.Dialer.DialContext. +func (d *netxliteDialerAdapter) DialContext( + ctx context.Context, network, address string) (net.Conn, error) { + return d.Dialer.DialContext(ctx, network, address) +} + +// newResolverUDP is a convenience factory for creating a resolver +// using UDP that saves measurements into mx.DB. +// +// Arguments +// +// - address is the resolver address (e.g., "1.1.1.1:53"). +// +// Return value +// +// A Resolver. +func (mx *Measurer) newResolverUDP(address string) Resolver { + // TODO(bassosimone): the resolver we compose here is missing + // some capabilities like IDNA. We should instead have the proper + // factory inside netxlite for creating this resolver. + return WrapResolver(mx.Origin, mx.DB, &netxlite.ResolverLogger{ + Resolver: dnsx.NewSerialResolver( + WrapDNSXRoundTripper(mx.DB, dnsx.NewDNSOverUDP( + &netxliteDialerAdapter{mx.newDialerWithSystemResolver()}, + address, + ))), + Logger: mx.Logger, + }) +} + +// LookupHostUDP is like LookupHostSystem but uses an UDP resolver. +// +// Arguments +// +// - ctx is the context allowing to timeout the operation; +// +// - domain is the domain to resolve (e.g., "x.org"); +// +// - address is the UDP resolver address (e.g., "dns.google:53"). +// +// Return value +// +// Either the resolved addresses or an error. +func (mx *Measurer) LookupHostUDP( + ctx context.Context, domain, address string) ([]string, error) { + const timeout = 4 * time.Second + mx.infof("LookupHost[udp://%s] %s (timeout %s)...", address, domain, timeout) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + r := mx.newResolverUDP(address) + defer r.CloseIdleConnections() + return r.LookupHost(ctx, domain) +} + +// LookupHTTPSSvcUDP issues an HTTPSSvc query for the given domain. +// +// Arguments +// +// - ctx is the context allowing to timeout the operation; +// +// - domain is the domain to resolve (e.g., "x.org"); +// +// - address is the UDP resolver address (e.g., "dns.google:53"). +// +// Return value +// +// Either the query result, on success, or an error. +func (mx *Measurer) LookupHTTPSSvcUDP( + ctx context.Context, domain, address string) (HTTPSSvc, error) { + const timeout = 4 * time.Second + mx.infof("LookupHTTPSSvc[udp://%s] %s (timeout %s)...", address, domain, timeout) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + r := mx.newResolverUDP(address) + defer r.CloseIdleConnections() + return r.LookupHTTPSSvcWithoutRetry(ctx, domain) +} + +// newDialerWithSystemResolver is a convenience factory for creating +// a dialer that saves measurements into mx.DB. +func (mx *Measurer) newDialerWithoutResolver() Dialer { + return WrapDialer(mx.Origin, mx.DB, netxlite.NewDialerWithoutResolver( + mx.Logger, + )) +} + +// TCPConnect establishes a connection with a TCP endpoint. +// +// Arguments +// +// - ctx is the context allowing to timeout the connect; +// +// - address is the TCP endpoint address (e.g., "8.8.4.4:443"). +// +// Return value +// +// Either an established Conn or an error. +func (mx *Measurer) TCPConnect(ctx context.Context, address string) (Conn, error) { + const timeout = 10 * time.Second + mx.infof("TCPConnect %s (timeout %s)...", address, timeout) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + d := mx.newDialerWithoutResolver() + defer d.CloseIdleConnections() + return d.DialContext(ctx, "tcp", address) +} + +// TLSConnect connects and TLS handshakes with a TCP endpoint. +// +// Arguments +// +// - ctx is the context allowing to timeout the whole operation; +// +// - address is the endpoint address (e.g., "1.1.1.1:443"); +// +// - config contains the TLS config (see below). +// +// TLS config +// +// You MUST set the following config fields: +// +// - ServerName to the desired SNI or InsecureSkipVerify to +// skip the certificate name verification; +// +// - RootCAs to nextlite.NewDefaultCertPool() output; +// +// - NextProtos to the desired ALPN ([]string{"h2", "http/1.1"} for +// HTTPS and []string{"dot"} for DNS-over-TLS). +// +// Caveats +// +// The mx.TLSHandshaker field could point to a TLS handshaker using +// the Go stdlib or one using gitlab.com/yawning/utls.git. +// +// In the latter case, the content of the ClientHello message +// will not only depend on the config field but also on the +// utls.ClientHelloID thay you're using. +// +// Return value +// +// Either an established TLSConn or an error. +func (mx *Measurer) TLSConnect(ctx context.Context, + address string, config *tls.Config) (TLSConn, error) { + conn, err := mx.TCPConnect(ctx, address) + if err != nil { + return nil, err + } + const timeout = 10 * time.Second + mx.infof("TLSHandshake[SNI=%s,ALPN=%+v] %s (timeout %s)...", + config.ServerName, config.NextProtos, address, timeout) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + return mx.TLSHandshaker.Handshake(ctx, conn, config) +} + +// ErrUnknownHTTPEndpointNetwork indicates that we don't know +// how to handle the value of an HTTPEndpoint.Network. +var ErrUnknownHTTPEndpointNetwork = errors.New("unknown HTTPEndpoint.Network") + +// HTTPEndpointGet performs a GET request for an HTTP endpoint. +// +// This function WILL NOT follow redirects. If there is a redirect +// you will see it inside the specific mx.DB table. +// +// Arguments +// +// - ctx is the context allowing to timeout the operation; +// +// - epnt is the HTTP endpoint. +// +// Return value +// +// Either an HTTP response, on success, or an error. +func (mx *Measurer) HTTPEndpointGet( + ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { + switch epnt.Network { + case NetworkQUIC: + return nil, ErrUnknownHTTPEndpointNetwork + case NetworkTCP: + return mx.httpEndpointGetTCP(ctx, epnt) + default: + return nil, ErrUnknownHTTPEndpointNetwork + } +} + +// ErrUnknownHTTPEndpointURLScheme indicates that we don't know how to +// handle the value of an HTTPEndpoint.URLScheme. +var ErrUnknownHTTPEndpointURLScheme = errors.New("unknown HTTPEndpoint.URL.Scheme") + +// httpEndpointGetTCP specializes HTTPSEndpointGet for HTTP and HTTPS. +func (mx *Measurer) httpEndpointGetTCP( + ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { + switch epnt.URL.Scheme { + case "http": + return mx.httpEndpointGetHTTP(ctx, epnt) + case "https": + return mx.httpEndpointGetHTTPS(ctx, epnt) + default: + return nil, ErrUnknownHTTPEndpointURLScheme + } +} + +// httpEndpointGetHTTP specializes httpEndpointGetTCP for HTTP. +func (mx *Measurer) httpEndpointGetHTTP( + ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { + req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) + if err != nil { + return nil, err + } + req.Header = epnt.Header + conn, err := mx.TCPConnect(ctx, epnt.Address) + if err != nil { + return nil, err + } + defer conn.Close() // we own it + clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, + NewHTTPTransportWithConn(mx.Origin, mx.Logger, mx.DB, conn)) + defer clnt.CloseIdleConnections() + return mx.httpClientDo(ctx, clnt, epnt, req) +} + +// httpEndpointGetHTTPS specializes httpEndpointGetTCP for HTTPS. +func (mx *Measurer) httpEndpointGetHTTPS( + ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { + req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) + if err != nil { + return nil, err + } + req.Header = epnt.Header + conn, err := mx.TLSConnect(ctx, epnt.Address, &tls.Config{ + ServerName: epnt.SNI, + NextProtos: epnt.ALPN, + RootCAs: netxlite.NewDefaultCertPool(), + }) + if err != nil { + return nil, err + } + defer conn.Close() // we own it + clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, + NewHTTPTransportWithTLSConn(mx.Origin, mx.Logger, mx.DB, conn)) + defer clnt.CloseIdleConnections() + return mx.httpClientDo(ctx, clnt, epnt, req) +} + +func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, + epnt *HTTPEndpoint, req *http.Request) (*http.Response, error) { + const timeout = 15 * time.Second + mx.infof("HTTPGet[epnt=%s] %s (timeout %s)...", + epnt.Address, epnt.URL.String(), timeout) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + return clnt.Do(req.WithContext(ctx)) +} + +// EndpointNetwork is the network of an endpoint. +type EndpointNetwork string + +const ( + // NetworkTCP identifies endpoints using TCP. + NetworkTCP = EndpointNetwork("tcp") + + // NetworkQUIC identifies endpoints using QUIC. + NetworkQUIC = EndpointNetwork("quic") +) + +// Endpoint is an endpoint for a domain. +type Endpoint struct { + // Network is the network (e.g., "tcp", "quic") + Network EndpointNetwork + + // Address is the endpoint address (e.g., "8.8.8.8:443") + Address string +} + +// String converts an endpoint to a string (e.g., "8.8.8.8:443/tcp") +func (e *Endpoint) String() string { + return fmt.Sprintf("%s/%s", e.Address, e.Network) +} + +// ErrLookupEndpoints failed indicates that we could not +// successfully lookup the endpoints for a domain. +var ErrLookupEndpoints = errors.New("endpoints lookup failed") + +// LookupEndpoints discovers the endpoints for a domain. +// +// This function performs two lookups: +// +// - with the system resolver; +// +// - with a DNS over UDP resolver. +// +// Arguments +// +// - ctx is the context carrying timeouts; +// +// - domain is the domain to lookup endpoints for; +// +// - port is the port we want to use; +// +// - address is the address of a DNS over UDP resolver. +// +// Return value +// +// Returns either a list of endpoints or an error. The error will just +// indicate that we could not resolve _any_ endpoint. Precise results +// regarding each performed operation are into the mx.DB field. +func (mx *Measurer) LookupEndpoints( + ctx context.Context, domain, port, address string) ([]*Endpoint, error) { + udpAddrs, _ := mx.LookupHostUDP(ctx, domain, address) + systemAddrs, _ := mx.LookupHostSystem(ctx, domain) + var out []*Endpoint + out = append(out, mx.parseLookupHostReply(port, systemAddrs)...) + out = append(out, mx.parseLookupHostReply(port, udpAddrs)...) + out = mx.mergeEndpoints(out) + if len(out) < 1 { + return nil, ErrLookupEndpoints + } + return out, nil +} + +// mergeEndpoints merges duplicate endpoints in the input list. +// +// Arguments +// +// - input is the input list of endpoints to merge. +// +// Return value +// +// A list where duplicates have been removed. +func (mx *Measurer) mergeEndpoints(input []*Endpoint) (out []*Endpoint) { + var ( + tcp = make(map[string]int) + quic = make(map[string]int) + ) + for _, epnt := range input { + switch epnt.Network { + case NetworkQUIC: + quic[epnt.Address]++ + case NetworkTCP: + tcp[epnt.Address]++ + } + } + for addr := range tcp { + out = append(out, &Endpoint{ + Network: NetworkTCP, + Address: addr, + }) + } + for addr := range quic { + out = append(out, &Endpoint{ + Network: NetworkQUIC, + Address: addr, + }) + } + return +} + +// ErrCannotDeterminePortFromURL indicates that we could not determine +// the correct port from the URL authority and scheme. +var ErrCannotDeterminePortFromURL = errors.New("cannot determine port from URL") + +// urlPort returns the port implied by an URL. +// +// If the URL contains an explicit port, we return it. Otherwise we +// attempt to guess the port based on the URL scheme. +// +// We currently recognize only these schemes: +// +// - "https"; +// +// - "http". +// +// Arguments +// +// - URL is the URL for which to guess the port. +// +// Return value +// +// Either a string containing the port or an error. +func (mx *Measurer) urlPort(URL *url.URL) (string, error) { + switch { + case URL.Port() != "": + return URL.Port(), nil + case URL.Scheme == "https": + return "443", nil + case URL.Scheme == "http": + return "80", nil + default: + return "", ErrCannotDeterminePortFromURL + } +} + +// HTTPEndpoint is an HTTP/HTTPS/HTTP3 endpoint. +type HTTPEndpoint struct { + // Domain is the endpoint domain (e.g., "dns.google"). + Domain string + + // Network is the network (e.g., "tcp" or "quic"). + Network EndpointNetwork + + // Address is the endpoint address (e.g., "8.8.8.8:443"). + Address string + + // SNI is the SNI to use (only used with URL.scheme == "https"). + SNI string + + // ALPN is the ALPN to use (only used with URL.scheme == "https"). + ALPN []string + + // URL is the endpoint URL. + URL *url.URL + + // Header contains request headers. + Header http.Header +} + +// String converts an HTTP endpoint to a string (e.g., "8.8.8.8:443/tcp") +func (e *HTTPEndpoint) String() string { + return fmt.Sprintf("%s/%s", e.Address, e.Network) +} + +// LookupHTTPEndpoints is like LookupEndpoints but performs a +// specialized lookup for an HTTP/HTTPS URL. Such a lookup also +// includes querying the WCTH to discover extra endpoints. +// +// Arguments +// +// - ctx is the context carrying timeouts; +// +// - URL is the URL to perform the lookup for; +// +// - address is the address of the DNS over +// UDP server to use. +// +// Return value +// +// Returns either a list of endpoints or an error. The returned error +// only indicates we could not fetch _any_ endpoint. Check into the +// database (i.e., mx.DB) for precise results of each operation. +func (mx *Measurer) LookupHTTPEndpoints( + ctx context.Context, URL *url.URL, address string) ([]*HTTPEndpoint, error) { + port, err := mx.urlPort(URL) + if err != nil { + return nil, err + } + httpsSvcInfo, _ := mx.LookupHTTPSSvcUDP(ctx, URL.Hostname(), address) + endpoints, _ := mx.LookupEndpoints(ctx, URL.Hostname(), port, address) + endpoints = append(endpoints, mx.parseHTTPSSvcReply(port, httpsSvcInfo)...) + endpoints, _ = mx.lookupWCTH(ctx, URL, endpoints, port) + endpoints = mx.mergeEndpoints(endpoints) + if len(endpoints) < 1 { + return nil, ErrLookupEndpoints + } + return mx.newHTTPEndpoints(URL, endpoints), nil +} + +// newHTTPEndpoints takes in input a list of Endpoint and +// returns in output a list of HTTPEndpoint. +// +// Arguments +// +// - URL is the URL for which we're discovering HTTPEndpoint; +// +// - endpoints is the list of discovered Endpoint. +// +// Return value +// +// The list of HTTPEndpoint. +func (mx *Measurer) newHTTPEndpoints( + URL *url.URL, endpoints []*Endpoint) (out []*HTTPEndpoint) { + for _, epnt := range endpoints { + out = append(out, &HTTPEndpoint{ + Domain: URL.Hostname(), + Network: epnt.Network, + Address: epnt.Address, + SNI: URL.Hostname(), + ALPN: mx.alpnForHTTPEndpoint(epnt.Network), + URL: URL, + Header: NewHTTPRequestHeaderForMeasuring(), + }) + } + return +} + +// alpnForHTTPEndpoint takes in input the network of an endpoint +// (i.e., "tcp" or "quic") and returns the corresponding ALPN. +// +// Arguments +// +// - network is the network of the endpoint. +// +// Return value +// +// The corresponding ALPN. If we do not recognize the input +// network we return a nil string array. +func (mx *Measurer) alpnForHTTPEndpoint(network EndpointNetwork) []string { + switch network { + case NetworkQUIC: + return []string{"h3"} + case NetworkTCP: + return []string{"h2", "http/1.1"} + default: + return nil + } +} + +// lookupWCTH performs an Endpoint looking using the WCTH (i.e., +// the Web Connectivity Test Helper) web service. +// +// Arguments +// +// - ctx is the context carrying timeouts; +// +// - URL is the URL for which we're looking up endpoints; +// +// - endpoints is the list of endpoints discovered so far using +// the means available to the probe (e.g., DNS); +// +// - port is the port for the endpoints. +// +// Return value +// +// Either a list of endpoints (which may possibly be empty) in case +// of success or an error in case of failure. +func (mx *Measurer) lookupWCTH(ctx context.Context, + URL *url.URL, endpoints []*Endpoint, port string) ([]*Endpoint, error) { + const timeout = 30 * time.Second + mx.infof("WCTH[backend=%s] %s %+v %s (timeout %s)...", + mx.WCTHURL, URL.String(), endpoints, port, timeout) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + w := NewWCTHWorker(mx.Logger, mx.DB, mx.HTTPClient, mx.WCTHURL) + resp, err := w.Run(ctx, URL, mx.onlyTCPEndpoints(endpoints)) + if err != nil { + return nil, err + } + for _, addr := range resp.DNS.Addrs { + addrport := net.JoinHostPort(addr, port) + endpoints = append(endpoints, &Endpoint{ + Network: NetworkTCP, + Address: addrport, + }) + } + return endpoints, nil +} + +// onlyTCPEndpoints takes in input a list of endpoints and returns +// in output a list of endpoints only containing the TCP ones. +func (mx *Measurer) onlyTCPEndpoints(endpoints []*Endpoint) (out []string) { + for _, epnt := range endpoints { + switch epnt.Network { + case NetworkTCP: + out = append(out, epnt.Address) + } + } + return +} + +// parseLookupHostReply builds a list of endpoints from a LookupHost reply. +// +// Arguments: +// +// - port is the port to use for the endpoints; +// +// - addrs is the possibly empty list of addresses from LookupHost. +// +// Return value +// +// A possibly empty list of endpoints. +func (mx *Measurer) parseLookupHostReply(port string, addrs []string) (out []*Endpoint) { + for _, addr := range addrs { + out = append(out, &Endpoint{ + Network: "tcp", + Address: net.JoinHostPort(addr, port), + }) + } + return +} + +// ParseHTTPSSvcReply builds a list of endpoints from the LookupHTTPSSvc result. +// +// Arguments +// +// - port is the port for the endpoints; +// +// - info is either nil or contains the result of the LookupHostHTTPSSvc call. +// +// Return value +// +// A possibly-empty list of endpoints. +func (mx *Measurer) parseHTTPSSvcReply(port string, info HTTPSSvc) (out []*Endpoint) { + if info == nil { + return + } + for _, proto := range info.ALPN() { + switch proto { + case "h3": // we do not support experimental protocols like h3-29 anymore + for _, addr := range info.IPv4Hint() { + out = append(out, &Endpoint{ + Network: "quic", + Address: net.JoinHostPort(addr, port), + }) + } + for _, addr := range info.IPv6Hint() { + out = append(out, &Endpoint{ + Network: "quic", + Address: net.JoinHostPort(addr, port), + }) + } + return // we found what we were looking for + } + } + return +} + +// infof formats and logs an informational message using mx.Logger. +func (mx *Measurer) infof(format string, v ...interface{}) { + mx.Logger.Infof(format, v...) +} diff --git a/internal/measurex/origin.go b/internal/measurex/origin.go new file mode 100644 index 0000000000..2e32ecbdd4 --- /dev/null +++ b/internal/measurex/origin.go @@ -0,0 +1,10 @@ +package measurex + +// Origin is the origin of a measurement. +type Origin string + +// These are the possible origins. +var ( + OriginProbe = Origin("probe") + OriginTH = Origin("th") +) diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go new file mode 100644 index 0000000000..15245885c9 --- /dev/null +++ b/internal/measurex/resolver.go @@ -0,0 +1,94 @@ +package measurex + +import ( + "context" + "time" + + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" +) + +// HTTPSSvc is the result returned by HTTPSSvc queries. +type HTTPSSvc = dnsx.HTTPSSvc + +// Resolver is the resolver type we use. +type Resolver interface { + netxlite.Resolver +} + +// WrapResolver wraps a netxlite.Resolver to add measurex capabilities. +func WrapResolver(origin Origin, db DB, r netxlite.Resolver) Resolver { + return &resolverx{Resolver: r, db: db, origin: origin} +} + +type resolverx struct { + netxlite.Resolver + db DB + origin Origin +} + +// LookupHostEvent contains the result of a host lookup. +type LookupHostEvent struct { + Origin Origin + MeasurementID int64 + Network string + Address string + Domain string + Started time.Time + Finished time.Time + Error error + Addrs []string +} + +func (r *resolverx) LookupHost(ctx context.Context, domain string) ([]string, error) { + started := time.Now() + addrs, err := r.Resolver.LookupHost(ctx, domain) + finished := time.Now() + r.db.InsertIntoLookupHost(&LookupHostEvent{ + Origin: r.origin, + MeasurementID: r.db.MeasurementID(), + Network: r.Resolver.Network(), + Address: r.Resolver.Address(), + Domain: domain, + Started: started, + Finished: finished, + Error: err, + Addrs: addrs, + }) + return addrs, err +} + +// LookupHTTPSSvcEvent is the event emitted when we perform +// an HTTPSSvc DNS query for a domain. +type LookupHTTPSSvcEvent struct { + Origin Origin + MeasurementID int64 + Domain string + Started time.Time + Finished time.Time + Error error + IPv4 []string + IPv6 []string + ALPN []string +} + +func (r *resolverx) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain string) (HTTPSSvc, error) { + started := time.Now() + https, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) + finished := time.Now() + ev := &LookupHTTPSSvcEvent{ + Origin: r.origin, + MeasurementID: r.db.MeasurementID(), + Domain: domain, + Started: started, + Finished: finished, + Error: err, + } + if err == nil { + ev.IPv4 = https.IPv4Hint() + ev.IPv6 = https.IPv6Hint() + ev.ALPN = https.ALPN() + } + r.db.InsertIntoLookupHTTPSSvc(ev) + return https, err +} diff --git a/internal/measurex/saver.go b/internal/measurex/saver.go new file mode 100644 index 0000000000..8f28c427ce --- /dev/null +++ b/internal/measurex/saver.go @@ -0,0 +1,159 @@ +package measurex + +import "sync" + +// Saver is a DB that saves measurements. +type Saver struct { + dialTable []*NetworkEvent + readWriteTable []*NetworkEvent + closeTable []*NetworkEvent + tlsHandshakeTable []*TLSHandshakeEvent + lookupHostTable []*LookupHostEvent + lookupHTTPSvcTable []*LookupHTTPSSvcEvent + dnsRoundTripTable []*DNSRoundTripEvent + httpRoundTripTable []*HTTPRoundTripEvent + httpRedirectTable []*HTTPRedirectEvent + connID int64 + measurementID int64 + mu sync.Mutex +} + +func (s *Saver) InsertIntoDial(ev *NetworkEvent) { + s.mu.Lock() + s.dialTable = append(s.dialTable, ev) + s.mu.Unlock() +} + +func (s *Saver) SelectAllFromDial() (out []*NetworkEvent) { + s.mu.Lock() + out = append(out, s.dialTable...) + s.mu.Unlock() + return +} + +func (s *Saver) InsertIntoReadWrite(ev *NetworkEvent) { + s.mu.Lock() + s.readWriteTable = append(s.readWriteTable, ev) + s.mu.Unlock() +} + +func (s *Saver) SelectAllFromReadWrite() (out []*NetworkEvent) { + s.mu.Lock() + out = append(out, s.readWriteTable...) + s.mu.Unlock() + return +} + +func (s *Saver) InsertIntoClose(ev *NetworkEvent) { + s.mu.Lock() + s.closeTable = append(s.closeTable, ev) + s.mu.Unlock() +} + +func (s *Saver) SelectAllFromClose() (out []*NetworkEvent) { + s.mu.Lock() + out = append(out, s.closeTable...) + s.mu.Unlock() + return +} + +func (s *Saver) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { + s.mu.Lock() + s.tlsHandshakeTable = append(s.tlsHandshakeTable, ev) + s.mu.Unlock() +} + +func (s *Saver) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { + s.mu.Lock() + out = append(out, s.tlsHandshakeTable...) + s.mu.Unlock() + return +} + +func (s *Saver) InsertIntoLookupHost(ev *LookupHostEvent) { + s.mu.Lock() + s.lookupHostTable = append(s.lookupHostTable, ev) + s.mu.Unlock() +} + +func (s *Saver) SelectAllFromLookupHost() (out []*LookupHostEvent) { + s.mu.Lock() + out = append(out, s.lookupHostTable...) + s.mu.Unlock() + return +} + +func (s *Saver) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { + s.mu.Lock() + s.lookupHTTPSvcTable = append(s.lookupHTTPSvcTable, ev) + s.mu.Unlock() +} + +func (s *Saver) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { + s.mu.Lock() + out = append(out, s.lookupHTTPSvcTable...) + s.mu.Unlock() + return +} + +func (s *Saver) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { + s.mu.Lock() + s.dnsRoundTripTable = append(s.dnsRoundTripTable, ev) + s.mu.Unlock() +} + +func (s *Saver) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { + s.mu.Lock() + out = append(out, s.dnsRoundTripTable...) + s.mu.Unlock() + return +} + +func (s *Saver) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { + s.mu.Lock() + s.httpRoundTripTable = append(s.httpRoundTripTable, ev) + s.mu.Unlock() +} + +func (s *Saver) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { + s.mu.Lock() + out = append(out, s.httpRoundTripTable...) + s.mu.Unlock() + return +} + +func (s *Saver) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { + s.mu.Lock() + s.httpRedirectTable = append(s.httpRedirectTable, ev) + s.mu.Unlock() +} + +func (s *Saver) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { + s.mu.Lock() + out = append(out, s.httpRedirectTable...) + s.mu.Unlock() + return +} + +func (s *Saver) NextConnID() (out int64) { + s.mu.Lock() + s.connID++ // start from 1 + out = s.connID + s.mu.Unlock() + return +} + +func (s *Saver) MeasurementID() (out int64) { + s.mu.Lock() + out = s.measurementID + s.mu.Unlock() + return +} + +func (s *Saver) NextMeasurement() (out int64) { + s.mu.Lock() + s.measurementID++ // start from 1 + out = s.measurementID + s.mu.Unlock() + return +} diff --git a/internal/measurex/tls.go b/internal/measurex/tls.go new file mode 100644 index 0000000000..67e3e6b967 --- /dev/null +++ b/internal/measurex/tls.go @@ -0,0 +1,125 @@ +package measurex + +import ( + "context" + "crypto/tls" + "crypto/x509" + "errors" + "time" + + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// TLSConn is the TLS conn type we use. +type TLSConn interface { + netxlite.TLSConn + + // ConnID returns the connection ID. + ConnID() int64 +} + +// TLSHandshaker is the TLS handshaker type we use. +type TLSHandshaker interface { + Handshake(ctx context.Context, conn Conn, config *tls.Config) (TLSConn, error) +} + +// WrapTLSHandshaker wraps a netxlite.TLSHandshaker to add measurex capabilities. +func WrapTLSHandshaker(origin Origin, db DB, thx netxlite.TLSHandshaker) TLSHandshaker { + return &tlsHandshakerx{TLSHandshaker: thx, db: db, origin: origin} +} + +type tlsHandshakerx struct { + netxlite.TLSHandshaker + db DB + origin Origin +} + +// TLSHandshakeEvent contains a TLS handshake event. +// +// Note that EndpointID and HTTPRoundTripID only make sense when +// the DB we're using enforces precise HTTP round trips. +type TLSHandshakeEvent struct { + Origin Origin + MeasurementID int64 + ConnID int64 + Engine string + Network string + RemoteAddr string + LocalAddr string + SNI string + ALPN []string + SkipVerify bool + Started time.Time + Finished time.Time + Error error + TLSVersion string + CipherSuite string + NegotiatedProto string + PeerCerts [][]byte +} + +func (thx *tlsHandshakerx) Handshake(ctx context.Context, + conn Conn, config *tls.Config) (TLSConn, error) { + network := conn.RemoteAddr().Network() + remoteAddr := conn.RemoteAddr().String() + localAddr := conn.LocalAddr().String() + started := time.Now() + tconn, state, err := thx.TLSHandshaker.Handshake(ctx, conn, config) + finished := time.Now() + thx.db.InsertIntoTLSHandshake(&TLSHandshakeEvent{ + Origin: thx.origin, + MeasurementID: thx.db.MeasurementID(), + ConnID: conn.ConnID(), + Engine: "", // TODO(bassosimone): add support + Network: network, + RemoteAddr: remoteAddr, + LocalAddr: localAddr, + SNI: config.ServerName, + ALPN: config.NextProtos, + SkipVerify: config.InsecureSkipVerify, + Started: started, + Finished: finished, + Error: err, + TLSVersion: netxlite.TLSVersionString(state.Version), + CipherSuite: netxlite.TLSCipherSuiteString(state.CipherSuite), + NegotiatedProto: state.NegotiatedProtocol, + PeerCerts: peerCerts(err, &state), + }) + if err != nil { + return nil, err + } + return &tlsConnx{ + TLSConn: tconn.(netxlite.TLSConn), connID: conn.ConnID()}, nil +} + +type tlsConnx struct { + netxlite.TLSConn + connID int64 +} + +func (c *tlsConnx) ConnID() int64 { + return c.connID +} + +func peerCerts(err error, state *tls.ConnectionState) (out [][]byte) { + var x509HostnameError x509.HostnameError + if errors.As(err, &x509HostnameError) { + // Test case: https://wrong.host.badssl.com/ + return [][]byte{x509HostnameError.Certificate.Raw} + } + var x509UnknownAuthorityError x509.UnknownAuthorityError + if errors.As(err, &x509UnknownAuthorityError) { + // Test case: https://self-signed.badssl.com/. This error has + // never been among the ones returned by MK. + return [][]byte{x509UnknownAuthorityError.Cert.Raw} + } + var x509CertificateInvalidError x509.CertificateInvalidError + if errors.As(err, &x509CertificateInvalidError) { + // Test case: https://expired.badssl.com/ + return [][]byte{x509CertificateInvalidError.Cert.Raw} + } + for _, cert := range state.PeerCertificates { + out = append(out, cert.Raw) + } + return +} diff --git a/internal/measurex/wcth.go b/internal/measurex/wcth.go new file mode 100644 index 0000000000..53b1cb3365 --- /dev/null +++ b/internal/measurex/wcth.go @@ -0,0 +1,186 @@ +package measurex + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "net/http" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/netxlite/iox" + "github.com/ooni/probe-cli/v3/internal/runtimex" + "github.com/ooni/probe-cli/v3/internal/version" +) + +// WCTHWorker is the Web Connectivity test helper worker. +type WCTHWorker struct { + db DB + logger Logger + clnt HTTPClient + URL string +} + +// NewWCTHWorker creates a new TestHelper instance using the +// web connectivity test helper protocol. +// +// Arguments +// +// - logger is the logger to use; +// +// - db is the database to use; +// +// - clnt is the HTTP client to use; +// +// - URL is the WCTH service URL. +// +// All arguments are mandatory. +func NewWCTHWorker( + logger Logger, db DB, clnt HTTPClient, URL string) *WCTHWorker { + return &WCTHWorker{db: db, logger: logger, clnt: clnt, URL: URL} +} + +var errWCTHRequestFailed = errors.New("wcth: request failed") + +// Run runs the WCTH for the given URL and endpoints and creates +// measurements into the DB that derive on the WCTH response. +// +// CAVEAT: this implementation is very inefficient because the +// WCTH will fetch the whole redirection chain for every request +// but the WCTH is already there and it can bootstrap us. +func (w *WCTHWorker) Run( + ctx context.Context, URL *url.URL, endpoints []string) (*WCTHResponse, error) { + req, err := w.newHTTPRequest(ctx, URL, endpoints) + if err != nil { + return nil, err + } + resp, err := w.do(req) + if err != nil { + return nil, err + } + w.parseResp(URL, resp) + return resp, nil +} + +func (w *WCTHWorker) parseResp(URL *url.URL, resp *WCTHResponse) { + w.db.InsertIntoLookupHost(&LookupHostEvent{ + Origin: OriginTH, + MeasurementID: w.db.MeasurementID(), + Network: "system", + Address: "", + Domain: URL.Hostname(), + Started: time.Time{}, + Finished: time.Time{}, + Error: w.newError(resp.DNS.Failure), + Addrs: w.filterDNSAddrs(resp.DNS.Addrs), + }) + for addr, status := range resp.TCPConnect { + w.db.InsertIntoDial(&NetworkEvent{ + Origin: OriginTH, + MeasurementID: w.db.MeasurementID(), + ConnID: 0, + Operation: "connect", + Network: "tcp", + RemoteAddr: addr, + LocalAddr: "", + Started: time.Time{}, + Finished: time.Time{}, + Error: w.newError(status.Failure), + Count: 0, + }) + } +} + +func (w *WCTHWorker) newHTTPRequest(ctx context.Context, + URL *url.URL, endpoints []string) (*http.Request, error) { + wtchReq := &wcthRequest{ + HTTPRequest: URL.String(), + HTTPRequestHeaders: NewHTTPRequestHeaderForMeasuring(), + TCPConnect: endpoints, + } + reqBody, err := json.Marshal(wtchReq) + runtimex.PanicOnError(err, "json.Marshal failed") + req, err := http.NewRequestWithContext(ctx, "POST", w.URL, bytes.NewReader(reqBody)) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", fmt.Sprintf("miniooni/%s", version.Version)) + return req, nil +} + +func (w *WCTHWorker) do(req *http.Request) (*WCTHResponse, error) { + resp, err := w.clnt.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return nil, errWCTHRequestFailed + } + const maxResponseBodySize = 1 << 20 // limit test helper response size + r := io.LimitReader(resp.Body, maxResponseBodySize) + respBody, err := iox.ReadAllContext(req.Context(), r) + if err != nil { + return nil, err + } + var wcthResp WCTHResponse + if err := json.Unmarshal(respBody, &wcthResp); err != nil { + return nil, err + } + return &wcthResp, nil +} + +func (w *WCTHWorker) filterDNSAddrs(addrs []string) (out []string) { + for _, addr := range addrs { + if net.ParseIP(addr) == nil { + continue // WCTH also returns the CNAME + } + out = append(out, addr) + } + return +} + +func (w *WCTHWorker) newError(failure *string) error { + if failure != nil { + return errors.New(*failure) + } + return nil +} + +type wcthRequest struct { + HTTPRequest string `json:"http_request"` + HTTPRequestHeaders map[string][]string `json:"http_request_headers"` + TCPConnect []string `json:"tcp_connect"` +} + +// WCTHTCPConnectResult contains the TCP connect result. +type WCTHTCPConnectResult struct { + Status bool `json:"status"` + Failure *string `json:"failure"` +} + +// WCTHHTTPRequestResult contains the HTTP result. +type WCTHHTTPRequestResult struct { + BodyLength int64 `json:"body_length"` + Failure *string `json:"failure"` + Title string `json:"title"` + Headers map[string]string `json:"headers"` + StatusCode int64 `json:"status_code"` +} + +// WCTHDNSResult contains the DNS result. +type WCTHDNSResult struct { + Failure *string `json:"failure"` + Addrs []string `json:"addrs"` +} + +// WCTHResponse is the response from the WCTH service. +type WCTHResponse struct { + TCPConnect map[string]WCTHTCPConnectResult `json:"tcp_connect"` + HTTPRequest WCTHHTTPRequestResult `json:"http_request"` + DNS WCTHDNSResult `json:"dns"` +} diff --git a/internal/measurex/websteps.go b/internal/measurex/websteps.go new file mode 100644 index 0000000000..13766ab313 --- /dev/null +++ b/internal/measurex/websteps.go @@ -0,0 +1,247 @@ +package measurex + +import ( + "context" + "net/url" +) + +// TODO(bassosimone): we need a table for cookies since we cannot +// read them from redirects and we want an easy way to get them + +// WebStepResult contains the results of Measurer.WebStep. +type WebStepResult struct { + // URL is the URL this measurement refers to. + URL string + + // LookupEndpoints contains the LookupEndpoints measurement. + LookupEndpoints *WebStepLookupEndpoints + + // Endpoints contains all the endpoints measurements. + Endpoints []*WebStepEndpoint +} + +// WebStepBaseMeasurement is a measurement part of WebStepResult. +type WebStepBaseMeasurement struct { + // Connect contains all the connect operations. + Connect []*NetworkEvent + + // ReadWrite contains all the read and write operations. + ReadWrite []*NetworkEvent + + // Close contains all the close operations. + Close []*NetworkEvent + + // TLSHandshake contains all the TLS handshakes. + TLSHandshake []*TLSHandshakeEvent + + // LookupHost contains all the host lookups. + LookupHost []*LookupHostEvent + + // LookupHTTPSSvc contains all the HTTPSSvc lookups. + LookupHTTPSSvc []*LookupHTTPSSvcEvent + + // DNSRoundTrip contains all the DNS round trips. + DNSRoundTrip []*DNSRoundTripEvent + + // HTTPRoundTrip contains all the HTTP round trips. + HTTPRoundTrip []*HTTPRoundTripEvent + + // HTTPRedirect contains all the redirections. + HTTPRedirect []*HTTPRedirectEvent +} + +// WebStepLookupEndpoints describes the measurement of endpoints lookup. +type WebStepLookupEndpoints struct { + // Domain is the domain this measurement refers to. + Domain string + + *WebStepBaseMeasurement +} + +// WebStepEndpoint describes the measurement of a given endpoint. +type WebStepEndpoint struct { + // Endpoint is the endpoint this measurement refers to. + Endpoint string + + *WebStepBaseMeasurement +} + +// WebStep performs a simplified WebStep measurement. +// +// We define WebStep as the process by which we have an input URL +// and we perform the following operations: +// +// +// 1. lookup of all the possible endpoints for the URL; +// +// 2. measurement of each available endpoint. +// +// This function DOES NOT automatically follow redirections. Though +// we have enough information to know how to follow them. +// +// Arguments +// +// - ctx is the context to implement timeouts; +// +// - URL is the URL to measure; +// +// - dnsResolverUDP is the address of the DNS resolver endpoint +// using UDP we wish to use (e.g., "8.8.8.8:53"). +// +// Return value +// +// A WebStepResult structure where the Endpoints array may be +// empty if we have no been able to discover endpoints. +func (mx *Measurer) WebStep( + ctx context.Context, URL *url.URL, dnsResolverUDP string) (m *WebStepResult) { + m = &WebStepResult{ + URL: URL.String(), + } + mid := mx.NewMeasurement() + epnts, _ := mx.LookupHTTPEndpoints(ctx, URL, dnsResolverUDP) + m.LookupEndpoints = &WebStepLookupEndpoints{ + Domain: URL.Hostname(), + WebStepBaseMeasurement: mx.newWebStepBaseMeasurement(mid), + } + for _, epnt := range epnts { + mid = mx.NewMeasurement() + mx.HTTPEndpointGet(ctx, epnt) + m.Endpoints = append(m.Endpoints, &WebStepEndpoint{ + Endpoint: epnt.String(), + WebStepBaseMeasurement: mx.newWebStepBaseMeasurement(mid), + }) + } + return +} + +// newWebStepMeasurements creates a new WebStepMeasurement. +// +// To this end, it filters all possible events by MeasurementID. +// +// Arguments +// +// - id is the MeasurementID. +// +// Return value +// +// A valid WebStepMeasurement containing possibly empty lists of events. +func (mx *Measurer) newWebStepBaseMeasurement(id int64) *WebStepBaseMeasurement { + return &WebStepBaseMeasurement{ + Connect: mx.selectAllFromConnect(id), + ReadWrite: mx.selectAllFromReadWrite(id), + Close: mx.selectAllFromClose(id), + TLSHandshake: mx.selectAllFromTLSHandshake(id), + LookupHost: mx.selectAllFromLookupHost(id), + LookupHTTPSSvc: mx.selectAllFromLookupHTTPSSvc(id), + DNSRoundTrip: mx.selectAllFromDNSRoundTrip(id), + HTTPRoundTrip: mx.selectAllFromHTTPRoundTrip(id), + HTTPRedirect: mx.selectAllFromHTTPRedirect(id), + } +} + +// selectAllFromConnect selects all the entries inside of the +// Connect table that have the given MeasurementID. +// +// Arguments +// +// - id is the MeasurementID to filter for. +// +// Return value +// +// A possibly-empty list of events. +func (mx *Measurer) selectAllFromConnect(id int64) (out []*NetworkEvent) { + for _, ev := range mx.DB.SelectAllFromDial() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromReadWrite is like selectAllFromConnect except +// that it works on the table named ReadWrite. +func (mx *Measurer) selectAllFromReadWrite(id int64) (out []*NetworkEvent) { + for _, ev := range mx.DB.SelectAllFromReadWrite() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromClose is like selectAllFromConnect except +// that it works on the table named Close. +func (mx *Measurer) selectAllFromClose(id int64) (out []*NetworkEvent) { + for _, ev := range mx.DB.SelectAllFromClose() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromTLSHandshake is like selectAllFromConnect except +// that it works on the table named TLSHandshake. +func (mx *Measurer) selectAllFromTLSHandshake(id int64) (out []*TLSHandshakeEvent) { + for _, ev := range mx.DB.SelectAllFromTLSHandshake() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromLookupHost is like selectAllFromConnect except +// that it works on the table named LookupHost. +func (mx *Measurer) selectAllFromLookupHost(id int64) (out []*LookupHostEvent) { + for _, ev := range mx.DB.SelectAllFromLookupHost() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromLookupHTTPSSvc is like selectAllFromConnect except +// that it works on the table named LookupHTTPSSvc. +func (mx *Measurer) selectAllFromLookupHTTPSSvc(id int64) (out []*LookupHTTPSSvcEvent) { + for _, ev := range mx.DB.SelectAllFromLookupHTTPSSvc() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromDNSRoundTrip is like selectAllFromConnect except +// that it works on the table named DNSRoundTrip. +func (mx *Measurer) selectAllFromDNSRoundTrip(id int64) (out []*DNSRoundTripEvent) { + for _, ev := range mx.DB.SelectAllFromDNSRoundTrip() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromHTTPRoundTrip is like selectAllFromConnect except +// that it works on the table named HTTPRoundTrip. +func (mx *Measurer) selectAllFromHTTPRoundTrip(id int64) (out []*HTTPRoundTripEvent) { + for _, ev := range mx.DB.SelectAllFromHTTPRoundTrip() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromHTTPRedirect is like selectAllFromConnect except +// that it works on the table named HTTPRedirect. +func (mx *Measurer) selectAllFromHTTPRedirect(id int64) (out []*HTTPRedirectEvent) { + for _, ev := range mx.DB.SelectAllFromHTTPRedirect() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} diff --git a/internal/netxlite/dnsx/decoder.go b/internal/netxlite/dnsx/decoder.go index 74182ad250..d8e19d62fd 100644 --- a/internal/netxlite/dnsx/decoder.go +++ b/internal/netxlite/dnsx/decoder.go @@ -6,8 +6,8 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) -// HTTPS is an HTTPS reply. -type HTTPS = model.HTTPS +// HTTPSSvc is an HTTPSSvc reply. +type HTTPSSvc = model.HTTPSSvc type https struct { alpn []string @@ -15,7 +15,7 @@ type https struct { ipv6hint []string } -var _ HTTPS = &https{} +var _ HTTPSSvc = &https{} func (h *https) ALPN() []string { return h.alpn @@ -35,7 +35,7 @@ type Decoder interface { DecodeLookupHost(qtype uint16, data []byte) ([]string, error) // DecodeHTTPS decodes an HTTPS reply. - DecodeHTTPS(data []byte) (HTTPS, error) + DecodeHTTPS(data []byte) (HTTPSSvc, error) } // MiekgDecoder uses github.com/miekg/dns to implement the Decoder. @@ -60,7 +60,7 @@ func (d *MiekgDecoder) parseReply(data []byte) (*dns.Msg, error) { } } -func (d *MiekgDecoder) DecodeHTTPS(data []byte) (HTTPS, error) { +func (d *MiekgDecoder) DecodeHTTPS(data []byte) (HTTPSSvc, error) { reply, err := d.parseReply(data) if err != nil { return nil, err diff --git a/internal/netxlite/dnsx/model/model.go b/internal/netxlite/dnsx/model/model.go index f2e6d43dce..3360faebf9 100644 --- a/internal/netxlite/dnsx/model/model.go +++ b/internal/netxlite/dnsx/model/model.go @@ -1,8 +1,8 @@ // Package model contains the dnsx model. package model -// HTTPS is an HTTPS reply. -type HTTPS interface { +// HTTPSSvc is an HTTPSSvc reply. +type HTTPSSvc interface { // ALPN returns the ALPNs inside the SVCBAlpn structure ALPN() []string diff --git a/internal/netxlite/dnsx/serial.go b/internal/netxlite/dnsx/serial.go index ddddf49280..0d2317a7f6 100644 --- a/internal/netxlite/dnsx/serial.go +++ b/internal/netxlite/dnsx/serial.go @@ -61,9 +61,9 @@ func (r *SerialResolver) LookupHost(ctx context.Context, hostname string) ([]str return addrs, nil } -// LookupHTTPSWithoutRetry issues an HTTPS query without retrying on failure. -func (r *SerialResolver) LookupHTTPSWithoutRetry( - ctx context.Context, hostname string) (HTTPS, error) { +// LookupHTTPSSvcWithoutRetry issues an HTTPS query without retrying on failure. +func (r *SerialResolver) LookupHTTPSSvcWithoutRetry( + ctx context.Context, hostname string) (HTTPSSvc, error) { querydata, err := r.Encoder.Encode( hostname, dns.TypeHTTPS, r.Txp.RequiresPadding()) if err != nil { diff --git a/internal/netxlite/legacy.go b/internal/netxlite/legacy.go index e450aada94..92bcc862a7 100644 --- a/internal/netxlite/legacy.go +++ b/internal/netxlite/legacy.go @@ -103,8 +103,8 @@ func (r *ResolverLegacyAdapter) LookupHostWithoutRetry( return nil, ErrNoDNSTransport } -func (r *ResolverLegacyAdapter) LookupHTTPSWithoutRetry( - ctx context.Context, domain string) (HTTPS, error) { +func (r *ResolverLegacyAdapter) LookupHTTPSSvcWithoutRetry( + ctx context.Context, domain string) (HTTPSSvc, error) { return nil, ErrNoDNSTransport } diff --git a/internal/netxlite/mocks/resolver.go b/internal/netxlite/mocks/resolver.go index 2876357e9a..9aa61fcb0d 100644 --- a/internal/netxlite/mocks/resolver.go +++ b/internal/netxlite/mocks/resolver.go @@ -39,10 +39,10 @@ func (r *Resolver) LookupHostWithoutRetry( panic("not yet implemented") } -// HTTPS is an HTTPS reply. -type HTTPS = model.HTTPS +// HTTPSSvc is an HTTPSSvc reply. +type HTTPSSvc = model.HTTPSSvc -func (r *Resolver) LookupHTTPSWithoutRetry( - ctx context.Context, domain string) (HTTPS, error) { +func (r *Resolver) LookupHTTPSSvcWithoutRetry( + ctx context.Context, domain string) (HTTPSSvc, error) { panic("not yet implemented") } diff --git a/internal/netxlite/resolver.go b/internal/netxlite/resolver.go index 0183eaddec..6386545fe5 100644 --- a/internal/netxlite/resolver.go +++ b/internal/netxlite/resolver.go @@ -13,8 +13,8 @@ import ( "golang.org/x/net/idna" ) -// HTTPS is the type returned for HTTPS queries. -type HTTPS = dnsx.HTTPS +// HTTPSSvc is the type returned for HTTPSSvc queries. +type HTTPSSvc = dnsx.HTTPSSvc // Resolver performs domain name resolutions. type Resolver interface { @@ -36,10 +36,10 @@ type Resolver interface { LookupHostWithoutRetry( ctx context.Context, domain string, qtype uint16) ([]string, error) - // LookupHTTPSWithoutRetry issues a single HTTPS query for + // LookupHTTPSSvcWithoutRetry issues a single HTTPS query for // a domain without any retry mechanism whatsoever. - LookupHTTPSWithoutRetry( - ctx context.Context, domain string) (HTTPS, error) + LookupHTTPSSvcWithoutRetry( + ctx context.Context, domain string) (HTTPSSvc, error) } // ErrNoDNSTransport indicates that the requested Resolver operation @@ -144,8 +144,8 @@ func (r *resolverSystem) LookupHostWithoutRetry( return nil, ErrNoDNSTransport } -func (r *resolverSystem) LookupHTTPSWithoutRetry( - ctx context.Context, domain string) (HTTPS, error) { +func (r *resolverSystem) LookupHTTPSSvcWithoutRetry( + ctx context.Context, domain string) (HTTPSSvc, error) { return nil, ErrNoDNSTransport } @@ -187,12 +187,12 @@ func (r *resolverLogger) LookupHostWithoutRetry( return addrs, nil } -func (r *resolverLogger) LookupHTTPSWithoutRetry( - ctx context.Context, domain string) (HTTPS, error) { +func (r *resolverLogger) LookupHTTPSSvcWithoutRetry( + ctx context.Context, domain string) (HTTPSSvc, error) { prefix := fmt.Sprintf("resolve[HTTPS] %s with %s (%s)", domain, r.Network(), r.Address()) r.Logger.Debugf("%s...", prefix) start := time.Now() - https, err := r.Resolver.LookupHTTPSWithoutRetry(ctx, domain) + https, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) elapsed := time.Since(start) if err != nil { r.Logger.Debugf("%s... %s in %s", prefix, err, elapsed) @@ -230,12 +230,12 @@ func (r *resolverIDNA) LookupHostWithoutRetry( } func (r *resolverIDNA) LookupHTTPSWithoutRetry( - ctx context.Context, domain string) (HTTPS, error) { + ctx context.Context, domain string) (HTTPSSvc, error) { host, err := idna.ToASCII(domain) if err != nil { return nil, err } - return r.Resolver.LookupHTTPSWithoutRetry(ctx, host) + return r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, host) } // resolverShortCircuitIPAddr recognizes when the input hostname is an @@ -279,8 +279,8 @@ func (r *nullResolver) LookupHostWithoutRetry( return nil, ErrNoDNSTransport } -func (r *nullResolver) LookupHTTPSWithoutRetry( - ctx context.Context, domain string) (HTTPS, error) { +func (r *nullResolver) LookupHTTPSSvcWithoutRetry( + ctx context.Context, domain string) (HTTPSSvc, error) { return nil, ErrNoDNSTransport } @@ -311,8 +311,8 @@ func (r *resolverErrWrapper) LookupHostWithoutRetry( } func (r *resolverErrWrapper) LookupHTTPSWithoutRetry( - ctx context.Context, domain string) (HTTPS, error) { - out, err := r.Resolver.LookupHTTPSWithoutRetry(ctx, domain) + ctx context.Context, domain string) (HTTPSSvc, error) { + out, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) if err != nil { return nil, errorsx.NewErrWrapper( errorsx.ClassifyResolverError, errorsx.ResolveOperation, err) From e1de317a4db73cd1e18d653a3f85e9da2750365b Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 20 Sep 2021 12:52:47 +0200 Subject: [PATCH 03/53] improve websteps prototype --- internal/measurex/db.go | 4 + internal/measurex/http.go | 80 ++++++----- internal/measurex/measurer.go | 224 +++++++++++++++++++++++++++-- internal/measurex/quic.go | 257 ++++++++++++++++++++++++++++++++++ internal/measurex/saver.go | 14 ++ internal/measurex/websteps.go | 117 ++-------------- 6 files changed, 536 insertions(+), 160 deletions(-) create mode 100644 internal/measurex/quic.go diff --git a/internal/measurex/db.go b/internal/measurex/db.go index da1c5ba788..58a399494f 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -38,6 +38,10 @@ type DB interface { InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) SelectAllFromHTTPRedirect() []*HTTPRedirectEvent + // QUICHandshake table + InsertIntoQUICHandshake(ev *QUICHandshakeEvent) + SelectAllFromQUICHandshake() []*QUICHandshakeEvent + // NextConnID increments and returns the connection ID. NextConnID() int64 diff --git a/internal/measurex/http.go b/internal/measurex/http.go index d9ad2e45a8..c993c877db 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -3,7 +3,7 @@ package measurex import ( "bytes" "context" - "encoding/json" + "crypto/tls" "errors" "io" "net/http" @@ -61,6 +61,15 @@ func NewHTTPTransportWithTLSConn( ), conn.ConnID()) } +// NewHTTPTransportWithQUICSess creates and wraps an HTTPTransport that +// does not dial and only uses the given QUIC session. +func NewHTTPTransportWithQUICSess( + origin Origin, logger Logger, db DB, sess QUICEarlySession) HTTPTransport { + return WrapHTTPTransportWithConnID(origin, db, netxlite.NewHTTP3Transport( + logger, netxlite.NewSingleUseQUICDialer(sess), &tls.Config{}, + ), sess.ConnID()) +} + type httpTransportx struct { netxlite.HTTPTransport connID int64 @@ -172,6 +181,9 @@ func NewHTTPClientWithRedirects(origin Origin, db DB, txp HTTPTransport) HTTPCli // The Via field contains the requests issued so far. The first // request inside Via is the last one that has been issued. // +// The Cookies field contains all the cookies that the +// implementation would set for the Request.URL. +// // The Error field can have three values: // // - nil if the redirect occurred; @@ -180,44 +192,34 @@ func NewHTTPClientWithRedirects(origin Origin, db DB, txp HTTPTransport) HTTPCli // // - http.ErrUseLastResponse if redirections are disabled. type HTTPRedirectEvent struct { - Origin Origin + // Origin is the event origin ("probe" or "th") + Origin Origin + + // MeasurementID is the measurement inside which + // this event occurred. MeasurementID int64 - ConnID int64 - Request *http.Request - Via []*http.Request - Error error -} -// MarshalJSON marshals an HTTPRedirectEvent to JSON. -func (ev *HTTPRedirectEvent) MarshalJSON() ([]byte, error) { - m := map[string]interface{}{ - "Origin": ev.Origin, - "MeasurementID": ev.MeasurementID, - "ConnID": ev.ConnID, - "Request": ev.simplifyRequest(ev.Request), - "Via": ev.simplifyRequests(ev.Via), - "Error": ev.Error, - } - return json.Marshal(m) -} + // ConnID is the ID of the connection we are using, + // which may be zero if undefined. + ConnID int64 -// simplifyRequest simplifies a single http.Request so -// that it could be serialized as a JSON. -func (ev *HTTPRedirectEvent) simplifyRequest(req *http.Request) (out map[string]interface{}) { - out = map[string]interface{}{ - "URL": req.URL, - "Header": req.Header, - } - return -} + // URL is the URL triggering the redirect. + URL string -// simplifyRequests is simplifyRequest applied to a list -// of http.Request rather than just one of them. -func (ev *HTTPRedirectEvent) simplifyRequests(req []*http.Request) (out []map[string]interface{}) { - for _, r := range req { - out = append(out, ev.simplifyRequest(r)) - } - return + // Location is the URL to which we're redirected. + Location string + + // Cookies contains the cookies for Location. + Cookies []*http.Cookie + + // The Error field can have three values: + // + // - nil if the redirect occurred; + // + // - ErrHTTPTooManyRedirects when we see too many redirections; + // + // - http.ErrUseLastResponse if redirections are disabled. + Error error } // ErrHTTPTooManyRedirects is the unexported error that the standard library @@ -225,9 +227,10 @@ func (ev *HTTPRedirectEvent) simplifyRequests(req []*http.Request) (out []map[st var ErrHTTPTooManyRedirects = errors.New("stopped after 10 redirects") func newHTTPClient(origin Origin, db DB, txp HTTPTransport, defaultErr error) HTTPClient { + cookiejar := NewCookieJar() return &http.Client{ Transport: txp, - Jar: NewCookieJar(), + Jar: cookiejar, CheckRedirect: func(req *http.Request, via []*http.Request) error { err := defaultErr if len(via) >= 10 { @@ -237,8 +240,9 @@ func newHTTPClient(origin Origin, db DB, txp HTTPTransport, defaultErr error) HT Origin: origin, MeasurementID: db.MeasurementID(), ConnID: txp.ConnID(), - Request: req, - Via: via, + URL: via[0].URL.String(), + Location: req.URL.String(), + Cookies: cookiejar.Cookies(req.URL), Error: err, }) return err diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index fada77b166..782112e1ad 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -79,7 +79,7 @@ func (mx *Measurer) NewMeasurement() int64 { func (mx *Measurer) LookupHostSystem( ctx context.Context, domain string) (addrs []string, err error) { const timeout = 4 * time.Second - mx.infof("LookupHost[getaddrinfo] %s (timeout %s)...", domain, timeout) + mx.infof("LookupHostSystem domain=%s timeout=%s...", domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() r := mx.newResolverSystem() @@ -153,7 +153,8 @@ func (mx *Measurer) newResolverUDP(address string) Resolver { func (mx *Measurer) LookupHostUDP( ctx context.Context, domain, address string) ([]string, error) { const timeout = 4 * time.Second - mx.infof("LookupHost[udp://%s] %s (timeout %s)...", address, domain, timeout) + mx.infof("LookupHostUDP serverEndpoint=%s/udp domain=%s timeout=%s...", + address, domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() r := mx.newResolverUDP(address) @@ -177,7 +178,8 @@ func (mx *Measurer) LookupHostUDP( func (mx *Measurer) LookupHTTPSSvcUDP( ctx context.Context, domain, address string) (HTTPSSvc, error) { const timeout = 4 * time.Second - mx.infof("LookupHTTPSSvc[udp://%s] %s (timeout %s)...", address, domain, timeout) + mx.infof("LookupHTTPSSvcUDP engine=udp://%s domain=%s timeout=%s...", + address, domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() r := mx.newResolverUDP(address) @@ -206,7 +208,7 @@ func (mx *Measurer) newDialerWithoutResolver() Dialer { // Either an established Conn or an error. func (mx *Measurer) TCPConnect(ctx context.Context, address string) (Conn, error) { const timeout = 10 * time.Second - mx.infof("TCPConnect %s (timeout %s)...", address, timeout) + mx.infof("TCPConnect endpoint=%s timeout=%s...", address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() d := mx.newDialerWithoutResolver() @@ -255,13 +257,53 @@ func (mx *Measurer) TLSConnect(ctx context.Context, return nil, err } const timeout = 10 * time.Second - mx.infof("TLSHandshake[SNI=%s,ALPN=%+v] %s (timeout %s)...", + mx.infof("TLSHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", config.ServerName, config.NextProtos, address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() return mx.TLSHandshaker.Handshake(ctx, conn, config) } +// QUICConnect connects and TLS handshakes with a QUIC endpoint. +// +// Arguments +// +// - ctx is the context allowing to timeout the whole operation; +// +// - address is the endpoint address (e.g., "1.1.1.1:443"); +// +// - config contains the TLS config (see below). +// +// TLS config +// +// You MUST set the following config fields: +// +// - ServerName to the desired SNI or InsecureSkipVerify to +// skip the certificate name verification; +// +// - RootCAs to nextlite.NewDefaultCertPool() output; +// +// - NextProtos to the desired ALPN ([]string{"h2", "http/1.1"} for +// HTTPS and []string{"dot"} for DNS-over-TLS). +// +// Return value +// +// Either an established quic.EarlySession or an error. +func (mx *Measurer) QUICConnect(ctx context.Context, + address string, config *tls.Config) (QUICEarlySession, error) { + const timeout = 10 * time.Second + mx.infof("QUICHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", + config.ServerName, config.NextProtos, address, timeout) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + qd := WrapQUICDialer(mx.Origin, mx.DB, netxlite.NewQUICDialerWithoutResolver( + WrapQUICListener(mx.Origin, mx.DB, netxlite.NewQUICListener()), + mx.Logger, + )) + defer qd.CloseIdleConnections() + return qd.DialContext(ctx, address, config) +} + // ErrUnknownHTTPEndpointNetwork indicates that we don't know // how to handle the value of an HTTPEndpoint.Network. var ErrUnknownHTTPEndpointNetwork = errors.New("unknown HTTPEndpoint.Network") @@ -284,7 +326,7 @@ func (mx *Measurer) HTTPEndpointGet( ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { switch epnt.Network { case NetworkQUIC: - return nil, ErrUnknownHTTPEndpointNetwork + return mx.httpEndpointGetQUIC(ctx, epnt) case NetworkTCP: return mx.httpEndpointGetTCP(ctx, epnt) default: @@ -351,11 +393,35 @@ func (mx *Measurer) httpEndpointGetHTTPS( return mx.httpClientDo(ctx, clnt, epnt, req) } +// httpEndpointGetQUIC specializes httpEndpointGetTCP for QUIC. +func (mx *Measurer) httpEndpointGetQUIC( + ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { + req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) + if err != nil { + return nil, err + } + req.Header = epnt.Header + sess, err := mx.QUICConnect(ctx, epnt.Address, &tls.Config{ + ServerName: epnt.SNI, + NextProtos: epnt.ALPN, + RootCAs: netxlite.NewDefaultCertPool(), + }) + if err != nil { + return nil, err + } + // TODO(bassosimone): close session with correct message + defer sess.CloseWithError(0, "") // we own it + clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, + NewHTTPTransportWithQUICSess(mx.Origin, mx.Logger, mx.DB, sess)) + defer clnt.CloseIdleConnections() + return mx.httpClientDo(ctx, clnt, epnt, req) +} + func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, epnt *HTTPEndpoint, req *http.Request) (*http.Response, error) { const timeout = 15 * time.Second - mx.infof("HTTPGet[epnt=%s] %s (timeout %s)...", - epnt.Address, epnt.URL.String(), timeout) + mx.infof("httpClientDo endpoint=%s method=%s url=%s headers=%+v timeout=%s...", + epnt.String(), req.Method, req.URL.String(), req.Header, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() return clnt.Do(req.WithContext(ctx)) @@ -416,7 +482,9 @@ var ErrLookupEndpoints = errors.New("endpoints lookup failed") func (mx *Measurer) LookupEndpoints( ctx context.Context, domain, port, address string) ([]*Endpoint, error) { udpAddrs, _ := mx.LookupHostUDP(ctx, domain, address) + mx.infof("LookupHostUDP addrs=%+v", udpAddrs) systemAddrs, _ := mx.LookupHostSystem(ctx, domain) + mx.infof("LookupHostSystem addrs=%+v", systemAddrs) var out []*Endpoint out = append(out, mx.parseLookupHostReply(port, systemAddrs)...) out = append(out, mx.parseLookupHostReply(port, udpAddrs)...) @@ -553,10 +621,15 @@ func (mx *Measurer) LookupHTTPEndpoints( return nil, err } httpsSvcInfo, _ := mx.LookupHTTPSSvcUDP(ctx, URL.Hostname(), address) + httpsSvcEndpoints := mx.parseHTTPSSvcReply(port, httpsSvcInfo) + mx.infof("LookupHTTPSSvcUDP endpoints=%+v", httpsSvcEndpoints) endpoints, _ := mx.LookupEndpoints(ctx, URL.Hostname(), port, address) - endpoints = append(endpoints, mx.parseHTTPSSvcReply(port, httpsSvcInfo)...) - endpoints, _ = mx.lookupWCTH(ctx, URL, endpoints, port) + endpoints = append(endpoints, httpsSvcEndpoints...) + wcthEndpoints, _ := mx.lookupWCTH(ctx, URL, endpoints, port) + mx.infof("lookupWCTH endpoints=%+v", wcthEndpoints) + endpoints = append(endpoints, wcthEndpoints...) endpoints = mx.mergeEndpoints(endpoints) + mx.infof("mergeEndpoints endpoints=%+v", endpoints) if len(endpoints) < 1 { return nil, ErrLookupEndpoints } @@ -630,11 +703,12 @@ func (mx *Measurer) alpnForHTTPEndpoint(network EndpointNetwork) []string { // Return value // // Either a list of endpoints (which may possibly be empty) in case -// of success or an error in case of failure. +// of success or an error in case of failure. Note that the returned +// list of endpoints ONLY includes the ones discovered via WCTH. func (mx *Measurer) lookupWCTH(ctx context.Context, URL *url.URL, endpoints []*Endpoint, port string) ([]*Endpoint, error) { const timeout = 30 * time.Second - mx.infof("WCTH[backend=%s] %s %+v %s (timeout %s)...", + mx.infof("lookupWCTH backend=%s url=%s endpoints=%+v port=%s timeout=%s...", mx.WCTHURL, URL.String(), endpoints, port, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -643,14 +717,18 @@ func (mx *Measurer) lookupWCTH(ctx context.Context, if err != nil { return nil, err } + var out []*Endpoint for _, addr := range resp.DNS.Addrs { + if net.ParseIP(addr) == nil { + continue // the WCTH may also return the CNAME + } addrport := net.JoinHostPort(addr, port) - endpoints = append(endpoints, &Endpoint{ + out = append(out, &Endpoint{ Network: NetworkTCP, Address: addrport, }) } - return endpoints, nil + return out, nil } // onlyTCPEndpoints takes in input a list of endpoints and returns @@ -726,3 +804,121 @@ func (mx *Measurer) parseHTTPSSvcReply(port string, info HTTPSSvc) (out []*Endpo func (mx *Measurer) infof(format string, v ...interface{}) { mx.Logger.Infof(format, v...) } + +// selectAllFromConnect selects all the entries inside of the +// Connect table that have the given MeasurementID. +// +// Arguments +// +// - id is the MeasurementID to filter for. +// +// Return value +// +// A possibly-empty list of events. +func (mx *Measurer) selectAllFromConnect(id int64) (out []*NetworkEvent) { + for _, ev := range mx.DB.SelectAllFromDial() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromReadWrite is like selectAllFromConnect except +// that it works on the table named ReadWrite. +func (mx *Measurer) selectAllFromReadWrite(id int64) (out []*NetworkEvent) { + for _, ev := range mx.DB.SelectAllFromReadWrite() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromClose is like selectAllFromConnect except +// that it works on the table named Close. +func (mx *Measurer) selectAllFromClose(id int64) (out []*NetworkEvent) { + for _, ev := range mx.DB.SelectAllFromClose() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromTLSHandshake is like selectAllFromConnect except +// that it works on the table named TLSHandshake. +func (mx *Measurer) selectAllFromTLSHandshake(id int64) (out []*TLSHandshakeEvent) { + for _, ev := range mx.DB.SelectAllFromTLSHandshake() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromQUICHandshake is like selectAllFromConnect except +// that it works on the table named QUICHandshake. +func (mx *Measurer) selectAllFromQUICHandshake(id int64) (out []*QUICHandshakeEvent) { + for _, ev := range mx.DB.SelectAllFromQUICHandshake() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromLookupHost is like selectAllFromConnect except +// that it works on the table named LookupHost. +func (mx *Measurer) selectAllFromLookupHost(id int64) (out []*LookupHostEvent) { + for _, ev := range mx.DB.SelectAllFromLookupHost() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromLookupHTTPSSvc is like selectAllFromConnect except +// that it works on the table named LookupHTTPSSvc. +func (mx *Measurer) selectAllFromLookupHTTPSSvc(id int64) (out []*LookupHTTPSSvcEvent) { + for _, ev := range mx.DB.SelectAllFromLookupHTTPSSvc() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromDNSRoundTrip is like selectAllFromConnect except +// that it works on the table named DNSRoundTrip. +func (mx *Measurer) selectAllFromDNSRoundTrip(id int64) (out []*DNSRoundTripEvent) { + for _, ev := range mx.DB.SelectAllFromDNSRoundTrip() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromHTTPRoundTrip is like selectAllFromConnect except +// that it works on the table named HTTPRoundTrip. +func (mx *Measurer) selectAllFromHTTPRoundTrip(id int64) (out []*HTTPRoundTripEvent) { + for _, ev := range mx.DB.SelectAllFromHTTPRoundTrip() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// selectAllFromHTTPRedirect is like selectAllFromConnect except +// that it works on the table named HTTPRedirect. +func (mx *Measurer) selectAllFromHTTPRedirect(id int64) (out []*HTTPRedirectEvent) { + for _, ev := range mx.DB.SelectAllFromHTTPRedirect() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} diff --git a/internal/measurex/quic.go b/internal/measurex/quic.go new file mode 100644 index 0000000000..95f76bffd8 --- /dev/null +++ b/internal/measurex/quic.go @@ -0,0 +1,257 @@ +package measurex + +import ( + "context" + "crypto/tls" + "net" + "time" + + "github.com/lucas-clemente/quic-go" + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/quicx" +) + +// QUICListener creates listening connections for QUIC. +type QUICListener = netxlite.QUICListener + +// WrapQUICListener wraps a netxlite.QUICListener to add measurex capabilities. +func WrapQUICListener(origin Origin, db DB, ql netxlite.QUICListener) QUICListener { + return &quicListenerx{ + QUICListener: ql, + db: db, + origin: origin, + } +} + +type quicListenerx struct { + netxlite.QUICListener + db DB + origin Origin +} + +func (ql *quicListenerx) Listen(addr *net.UDPAddr) (quicx.UDPLikeConn, error) { + pconn, err := ql.QUICListener.Listen(addr) + if err != nil { + return nil, err + } + return &quicUDPLikeConnx{ + UDPLikeConn: pconn, + connID: ql.db.NextConnID(), + db: ql.db, + localAddr: pconn.LocalAddr().String(), + origin: ql.origin, + }, nil +} + +type quicUDPLikeConnx struct { + quicx.UDPLikeConn + connID int64 + db DB + localAddr string + origin Origin +} + +func (c *quicUDPLikeConnx) WriteTo(p []byte, addr net.Addr) (int, error) { + started := time.Now() + count, err := c.UDPLikeConn.WriteTo(p, addr) + finished := time.Now() + c.db.InsertIntoReadWrite(&NetworkEvent{ + Origin: c.origin, + MeasurementID: c.db.MeasurementID(), + ConnID: c.connID, + Operation: "write_to", + Network: string(NetworkQUIC), + RemoteAddr: addr.String(), + LocalAddr: c.localAddr, + Started: started, + Finished: finished, + Error: err, + Count: count, + }) + return count, err +} + +func (c *quicUDPLikeConnx) ReadFrom(b []byte) (int, net.Addr, error) { + started := time.Now() + count, addr, err := c.UDPLikeConn.ReadFrom(b) + finished := time.Now() + c.db.InsertIntoReadWrite(&NetworkEvent{ + Origin: c.origin, + MeasurementID: c.db.MeasurementID(), + ConnID: c.connID, + Operation: "read_from", + Network: string(NetworkQUIC), + RemoteAddr: c.addrStringIfNotNil(addr), + LocalAddr: c.localAddr, + Started: started, + Finished: finished, + Error: err, + Count: count, + }) + return count, addr, err +} + +func (c *quicUDPLikeConnx) addrStringIfNotNil(addr net.Addr) (out string) { + if addr != nil { + out = addr.String() + } + return +} + +func (c *quicUDPLikeConnx) Close() error { + started := time.Now() + err := c.UDPLikeConn.Close() + finished := time.Now() + c.db.InsertIntoReadWrite(&NetworkEvent{ + Origin: c.origin, + MeasurementID: c.db.MeasurementID(), + ConnID: c.connID, + Operation: "close", + Network: string(NetworkQUIC), + RemoteAddr: "", + LocalAddr: c.localAddr, + Started: started, + Finished: finished, + Error: err, + Count: 0, + }) + return err +} + +func (c *quicUDPLikeConnx) LocalAddr() net.Addr { + localAddr := c.UDPLikeConn.LocalAddr() + if localAddr == nil { + return nil + } + return &quicLocalAddrx{Addr: localAddr, connID: c.connID} +} + +type quicLocalAddrx struct { + net.Addr + connID int64 +} + +// QUICEarlySession is the type we use to wrap quic.EarlySession +type QUICEarlySession interface { + quic.EarlySession + + ConnID() int64 +} + +// QUICDialer creates QUIC sessions. +type QUICDialer interface { + DialContext(ctx context.Context, address string, + tlsConfig *tls.Config) (QUICEarlySession, error) + + CloseIdleConnections() +} + +// QUICHandshakeEvent is the result of QUICHandshake. +type QUICHandshakeEvent struct { + Origin Origin + MeasurementID int64 + ConnID int64 + Network string + RemoteAddr string + LocalAddr string + SNI string + ALPN []string + SkipVerify bool + Started time.Time + Finished time.Time + Error error + TLSVersion string + CipherSuite string + NegotiatedProto string + PeerCerts [][]byte +} + +// WrapQUICDialer wraps a netxlite.QUICDialer to add measurex capabilities. +func WrapQUICDialer(origin Origin, db DB, dialer netxlite.QUICDialer) QUICDialer { + return &quicDialerx{ + QUICDialer: dialer, + origin: origin, + db: db, + } +} + +type quicDialerx struct { + netxlite.QUICDialer + db DB + origin Origin +} + +func (qh *quicDialerx) DialContext(ctx context.Context, + address string, tlsConfig *tls.Config) (QUICEarlySession, error) { + started := time.Now() + var ( + localAddr *quicLocalAddrx + state tls.ConnectionState + ) + sess, err := qh.QUICDialer.DialContext( + ctx, "udp", address, tlsConfig, &quic.Config{}) + if err == nil { + select { + case <-sess.HandshakeComplete().Done(): + state = sess.ConnectionState().TLS.ConnectionState + if addr := sess.LocalAddr(); addr != nil { + if laddr, ok := addr.(*quicLocalAddrx); ok { + localAddr = laddr + } + } + case <-ctx.Done(): + sess, err = nil, ctx.Err() + } + } + finished := time.Now() + qh.db.InsertIntoQUICHandshake(&QUICHandshakeEvent{ + Origin: qh.origin, + MeasurementID: qh.db.MeasurementID(), + ConnID: qh.connIDIfNotNil(localAddr), + Network: string(NetworkQUIC), + RemoteAddr: address, + LocalAddr: qh.localAddrIfNotNil(localAddr), + SNI: tlsConfig.ServerName, + ALPN: tlsConfig.NextProtos, + SkipVerify: tlsConfig.InsecureSkipVerify, + Started: started, + Finished: finished, + Error: err, + TLSVersion: netxlite.TLSVersionString(state.Version), + CipherSuite: netxlite.TLSCipherSuiteString(state.CipherSuite), + NegotiatedProto: state.NegotiatedProtocol, + PeerCerts: peerCerts(nil, &state), + }) + if err != nil { + return nil, err + } + return &quicEarlySessionx{ + EarlySession: sess, connID: qh.connIDIfNotNil(localAddr)}, nil +} + +type quicEarlySessionx struct { + quic.EarlySession + connID int64 +} + +func (qes *quicEarlySessionx) ConnID() int64 { + return qes.connID +} + +func (qh *quicDialerx) connIDIfNotNil(addr *quicLocalAddrx) (out int64) { + if addr != nil { + out = addr.connID + } + return +} + +func (qh *quicDialerx) localAddrIfNotNil(addr *quicLocalAddrx) (out string) { + if addr != nil { + out = addr.String() + } + return +} + +func (qh *quicDialerx) CloseIdleConnections() { + qh.QUICDialer.CloseIdleConnections() +} diff --git a/internal/measurex/saver.go b/internal/measurex/saver.go index 8f28c427ce..9577328d6b 100644 --- a/internal/measurex/saver.go +++ b/internal/measurex/saver.go @@ -13,6 +13,7 @@ type Saver struct { dnsRoundTripTable []*DNSRoundTripEvent httpRoundTripTable []*HTTPRoundTripEvent httpRedirectTable []*HTTPRedirectEvent + quicHandshakeTable []*QUICHandshakeEvent connID int64 measurementID int64 mu sync.Mutex @@ -135,6 +136,19 @@ func (s *Saver) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { return } +func (s *Saver) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { + s.mu.Lock() + s.quicHandshakeTable = append(s.quicHandshakeTable, ev) + s.mu.Unlock() +} + +func (s *Saver) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { + s.mu.Lock() + out = append(out, s.quicHandshakeTable...) + s.mu.Unlock() + return +} + func (s *Saver) NextConnID() (out int64) { s.mu.Lock() s.connID++ // start from 1 diff --git a/internal/measurex/websteps.go b/internal/measurex/websteps.go index 13766ab313..f93435fe68 100644 --- a/internal/measurex/websteps.go +++ b/internal/measurex/websteps.go @@ -34,6 +34,9 @@ type WebStepBaseMeasurement struct { // TLSHandshake contains all the TLS handshakes. TLSHandshake []*TLSHandshakeEvent + // QUICHandshake contains all the QUIC handshakes. + QUICHandshake []*QUICHandshakeEvent + // LookupHost contains all the host lookups. LookupHost []*LookupHostEvent @@ -71,7 +74,6 @@ type WebStepEndpoint struct { // We define WebStep as the process by which we have an input URL // and we perform the following operations: // -// // 1. lookup of all the possible endpoints for the URL; // // 2. measurement of each available endpoint. @@ -94,10 +96,13 @@ type WebStepEndpoint struct { // empty if we have no been able to discover endpoints. func (mx *Measurer) WebStep( ctx context.Context, URL *url.URL, dnsResolverUDP string) (m *WebStepResult) { + mx.infof("WebStep url=%s dnsResolverUDP=%s", URL.String(), dnsResolverUDP) m = &WebStepResult{ URL: URL.String(), } mid := mx.NewMeasurement() + mx.infof("LookupHTTPEndpoints measurementID=%d url=%s dnsResolverUDP=%s", + mid, URL.String(), dnsResolverUDP) epnts, _ := mx.LookupHTTPEndpoints(ctx, URL, dnsResolverUDP) m.LookupEndpoints = &WebStepLookupEndpoints{ Domain: URL.Hostname(), @@ -105,6 +110,8 @@ func (mx *Measurer) WebStep( } for _, epnt := range epnts { mid = mx.NewMeasurement() + mx.infof("HTTPEndpointGet measurementID=%d url=%s endpoint=%s dnsResolverUDP=%s", + mid, URL.String(), epnt.String(), dnsResolverUDP) mx.HTTPEndpointGet(ctx, epnt) m.Endpoints = append(m.Endpoints, &WebStepEndpoint{ Endpoint: epnt.String(), @@ -131,6 +138,7 @@ func (mx *Measurer) newWebStepBaseMeasurement(id int64) *WebStepBaseMeasurement ReadWrite: mx.selectAllFromReadWrite(id), Close: mx.selectAllFromClose(id), TLSHandshake: mx.selectAllFromTLSHandshake(id), + QUICHandshake: mx.selectAllFromQUICHandshake(id), LookupHost: mx.selectAllFromLookupHost(id), LookupHTTPSSvc: mx.selectAllFromLookupHTTPSSvc(id), DNSRoundTrip: mx.selectAllFromDNSRoundTrip(id), @@ -138,110 +146,3 @@ func (mx *Measurer) newWebStepBaseMeasurement(id int64) *WebStepBaseMeasurement HTTPRedirect: mx.selectAllFromHTTPRedirect(id), } } - -// selectAllFromConnect selects all the entries inside of the -// Connect table that have the given MeasurementID. -// -// Arguments -// -// - id is the MeasurementID to filter for. -// -// Return value -// -// A possibly-empty list of events. -func (mx *Measurer) selectAllFromConnect(id int64) (out []*NetworkEvent) { - for _, ev := range mx.DB.SelectAllFromDial() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// selectAllFromReadWrite is like selectAllFromConnect except -// that it works on the table named ReadWrite. -func (mx *Measurer) selectAllFromReadWrite(id int64) (out []*NetworkEvent) { - for _, ev := range mx.DB.SelectAllFromReadWrite() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// selectAllFromClose is like selectAllFromConnect except -// that it works on the table named Close. -func (mx *Measurer) selectAllFromClose(id int64) (out []*NetworkEvent) { - for _, ev := range mx.DB.SelectAllFromClose() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// selectAllFromTLSHandshake is like selectAllFromConnect except -// that it works on the table named TLSHandshake. -func (mx *Measurer) selectAllFromTLSHandshake(id int64) (out []*TLSHandshakeEvent) { - for _, ev := range mx.DB.SelectAllFromTLSHandshake() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// selectAllFromLookupHost is like selectAllFromConnect except -// that it works on the table named LookupHost. -func (mx *Measurer) selectAllFromLookupHost(id int64) (out []*LookupHostEvent) { - for _, ev := range mx.DB.SelectAllFromLookupHost() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// selectAllFromLookupHTTPSSvc is like selectAllFromConnect except -// that it works on the table named LookupHTTPSSvc. -func (mx *Measurer) selectAllFromLookupHTTPSSvc(id int64) (out []*LookupHTTPSSvcEvent) { - for _, ev := range mx.DB.SelectAllFromLookupHTTPSSvc() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// selectAllFromDNSRoundTrip is like selectAllFromConnect except -// that it works on the table named DNSRoundTrip. -func (mx *Measurer) selectAllFromDNSRoundTrip(id int64) (out []*DNSRoundTripEvent) { - for _, ev := range mx.DB.SelectAllFromDNSRoundTrip() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// selectAllFromHTTPRoundTrip is like selectAllFromConnect except -// that it works on the table named HTTPRoundTrip. -func (mx *Measurer) selectAllFromHTTPRoundTrip(id int64) (out []*HTTPRoundTripEvent) { - for _, ev := range mx.DB.SelectAllFromHTTPRoundTrip() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// selectAllFromHTTPRedirect is like selectAllFromConnect except -// that it works on the table named HTTPRedirect. -func (mx *Measurer) selectAllFromHTTPRedirect(id int64) (out []*HTTPRedirectEvent) { - for _, ev := range mx.DB.SelectAllFromHTTPRedirect() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} From ac0ee0bbb72625d86656d3ef5a18030490ecfca8 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 20 Sep 2021 13:20:13 +0200 Subject: [PATCH 04/53] refactor: move websteps specific code outisde of measurex --- .../engine/experiment/webstepsx/webstepsx.go | 151 ++++++++++++++++++ internal/measurex/http.go | 14 +- internal/measurex/measurer.go | 98 ++++++------ internal/measurex/websteps.go | 148 ----------------- 4 files changed, 209 insertions(+), 202 deletions(-) create mode 100644 internal/engine/experiment/webstepsx/webstepsx.go delete mode 100644 internal/measurex/websteps.go diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/webstepsx.go new file mode 100644 index 0000000000..0a488c4236 --- /dev/null +++ b/internal/engine/experiment/webstepsx/webstepsx.go @@ -0,0 +1,151 @@ +// Package webstepsx contains a websteps implementation +// based on the internal/measurex package. +package webstepsx + +import ( + "context" + "net/http" + "net/url" + + "github.com/ooni/probe-cli/v3/internal/measurex" +) + +// SingleStep contains the results of a single web step. +type SingleStep struct { + // URL is the URL this measurement refers to. + URL string + + // LookupEndpoints contains the LookupEndpoints measurement. + LookupEndpoints *LookupEndpoints + + // Endpoints contains all the endpoints measurements. + Endpoints []*Endpoint +} + +// BaseMeasurement is a measurement part of Result. +type BaseMeasurement struct { + // Connect contains all the connect operations. + Connect []*measurex.NetworkEvent + + // ReadWrite contains all the read and write operations. + ReadWrite []*measurex.NetworkEvent + + // Close contains all the close operations. + Close []*measurex.NetworkEvent + + // TLSHandshake contains all the TLS handshakes. + TLSHandshake []*measurex.TLSHandshakeEvent + + // QUICHandshake contains all the QUIC handshakes. + QUICHandshake []*measurex.QUICHandshakeEvent + + // LookupHost contains all the host lookups. + LookupHost []*measurex.LookupHostEvent + + // LookupHTTPSSvc contains all the HTTPSSvc lookups. + LookupHTTPSSvc []*measurex.LookupHTTPSSvcEvent + + // DNSRoundTrip contains all the DNS round trips. + DNSRoundTrip []*measurex.DNSRoundTripEvent + + // HTTPRoundTrip contains all the HTTP round trips. + HTTPRoundTrip []*measurex.HTTPRoundTripEvent + + // HTTPRedirect contains all the redirections. + HTTPRedirect []*measurex.HTTPRedirectEvent +} + +// LookupEndpoints describes the measurement of endpoints lookup. +type LookupEndpoints struct { + // Domain is the domain this measurement refers to. + Domain string + + *BaseMeasurement +} + +// Endpoint describes the measurement of a given endpoint. +type Endpoint struct { + // Endpoint is the endpoint this measurement refers to. + Endpoint string + + *BaseMeasurement +} + +// RunSingleStep performs a single WebSteps step. +// +// We define "step" as the process by which we have an input URL +// and we perform the following operations: +// +// 1. lookup of all the possible endpoints for the URL; +// +// 2. measurement of each available endpoint. +// +// This function DOES NOT automatically follow redirections. Though +// we have enough information to know how to follow them. +// +// Arguments +// +// - ctx is the context to implement timeouts; +// +// - mx is the measurex.Measurer to use; +// +// - cookiejar is the http.CookieJar for cookies; +// +// - URL is the URL to measure; +// +// - dnsResolverUDP is the address of the DNS resolver endpoint +// using UDP we wish to use (e.g., "8.8.8.8:53"). +// +// Return value +// +// A SingleStep structure where the Endpoints array may be empty +// if we have no been able to discover endpoints. +func RunSingleStep(ctx context.Context, mx *measurex.Measurer, + cookiekar http.CookieJar, URL *url.URL, dnsResolverUDP string) (m *SingleStep) { + m = &SingleStep{URL: URL.String()} + mid := mx.NewMeasurement() + mx.Infof("LookupHTTPEndpoints measurementID=%d url=%s dnsResolverUDP=%s", + mid, URL.String(), dnsResolverUDP) + epnts, _ := mx.LookupHTTPEndpoints(ctx, URL, dnsResolverUDP) + m.LookupEndpoints = &LookupEndpoints{ + Domain: URL.Hostname(), + BaseMeasurement: newBaseMeasurement(mx, mid), + } + for _, epnt := range epnts { + mid = mx.NewMeasurement() + mx.Infof("HTTPEndpointGet measurementID=%d url=%s endpoint=%s dnsResolverUDP=%s", + mid, URL.String(), epnt.String(), dnsResolverUDP) + mx.HTTPEndpointGet(ctx, epnt, cookiekar) + m.Endpoints = append(m.Endpoints, &Endpoint{ + Endpoint: epnt.String(), + BaseMeasurement: newBaseMeasurement(mx, mid), + }) + } + return +} + +// newBaseMeasurement creates a new Base Measurement. +// +// To this end, it filters all possible events by MeasurementID. +// +// Arguments +// +// - id is the MeasurementID. +// +// Return value +// +// A valid BaseMeasurement containing possibly empty lists of events. +func newBaseMeasurement(mx *measurex.Measurer, id int64) *BaseMeasurement { + return &BaseMeasurement{ + Connect: mx.SelectAllFromConnect(id), + ReadWrite: mx.SelectAllFromReadWrite(id), + Close: mx.SelectAllFromClose(id), + TLSHandshake: mx.SelectAllFromTLSHandshake(id), + QUICHandshake: mx.SelectAllFromQUICHandshake(id), + LookupHost: mx.SelectAllFromLookupHost(id), + LookupHTTPSSvc: mx.SelectAllFromLookupHTTPSSvc(id), + DNSRoundTrip: mx.SelectAllFromDNSRoundTrip(id), + HTTPRoundTrip: mx.SelectAllFromHTTPRoundTrip(id), + HTTPRedirect: mx.SelectAllFromHTTPRedirect(id), + } +} diff --git a/internal/measurex/http.go b/internal/measurex/http.go index c993c877db..3051be699b 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -159,14 +159,16 @@ type HTTPClient interface { // NewHTTPClient creates a new HTTPClient instance that // does not automatically perform redirects. -func NewHTTPClientWithoutRedirects(origin Origin, db DB, txp HTTPTransport) HTTPClient { - return newHTTPClient(origin, db, txp, http.ErrUseLastResponse) +func NewHTTPClientWithoutRedirects( + origin Origin, db DB, jar http.CookieJar, txp HTTPTransport) HTTPClient { + return newHTTPClient(origin, db, jar, txp, http.ErrUseLastResponse) } // NewHTTPClientWithRedirects creates a new HTTPClient // instance that automatically perform redirects. -func NewHTTPClientWithRedirects(origin Origin, db DB, txp HTTPTransport) HTTPClient { - return newHTTPClient(origin, db, txp, nil) +func NewHTTPClientWithRedirects( + origin Origin, db DB, jar http.CookieJar, txp HTTPTransport) HTTPClient { + return newHTTPClient(origin, db, jar, txp, nil) } // HTTPRedirectEvent records an HTTP redirect. @@ -226,8 +228,8 @@ type HTTPRedirectEvent struct { // would return when hitting too many redirects. var ErrHTTPTooManyRedirects = errors.New("stopped after 10 redirects") -func newHTTPClient(origin Origin, db DB, txp HTTPTransport, defaultErr error) HTTPClient { - cookiejar := NewCookieJar() +func newHTTPClient(origin Origin, db DB, + cookiejar http.CookieJar, txp HTTPTransport, defaultErr error) HTTPClient { return &http.Client{ Transport: txp, Jar: cookiejar, diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 782112e1ad..f8054b746d 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -79,7 +79,7 @@ func (mx *Measurer) NewMeasurement() int64 { func (mx *Measurer) LookupHostSystem( ctx context.Context, domain string) (addrs []string, err error) { const timeout = 4 * time.Second - mx.infof("LookupHostSystem domain=%s timeout=%s...", domain, timeout) + mx.Infof("LookupHostSystem domain=%s timeout=%s...", domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() r := mx.newResolverSystem() @@ -153,7 +153,7 @@ func (mx *Measurer) newResolverUDP(address string) Resolver { func (mx *Measurer) LookupHostUDP( ctx context.Context, domain, address string) ([]string, error) { const timeout = 4 * time.Second - mx.infof("LookupHostUDP serverEndpoint=%s/udp domain=%s timeout=%s...", + mx.Infof("LookupHostUDP serverEndpoint=%s/udp domain=%s timeout=%s...", address, domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -178,7 +178,7 @@ func (mx *Measurer) LookupHostUDP( func (mx *Measurer) LookupHTTPSSvcUDP( ctx context.Context, domain, address string) (HTTPSSvc, error) { const timeout = 4 * time.Second - mx.infof("LookupHTTPSSvcUDP engine=udp://%s domain=%s timeout=%s...", + mx.Infof("LookupHTTPSSvcUDP engine=udp://%s domain=%s timeout=%s...", address, domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -208,7 +208,7 @@ func (mx *Measurer) newDialerWithoutResolver() Dialer { // Either an established Conn or an error. func (mx *Measurer) TCPConnect(ctx context.Context, address string) (Conn, error) { const timeout = 10 * time.Second - mx.infof("TCPConnect endpoint=%s timeout=%s...", address, timeout) + mx.Infof("TCPConnect endpoint=%s timeout=%s...", address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() d := mx.newDialerWithoutResolver() @@ -257,7 +257,7 @@ func (mx *Measurer) TLSConnect(ctx context.Context, return nil, err } const timeout = 10 * time.Second - mx.infof("TLSHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", + mx.Infof("TLSHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", config.ServerName, config.NextProtos, address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -292,7 +292,7 @@ func (mx *Measurer) TLSConnect(ctx context.Context, func (mx *Measurer) QUICConnect(ctx context.Context, address string, config *tls.Config) (QUICEarlySession, error) { const timeout = 10 * time.Second - mx.infof("QUICHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", + mx.Infof("QUICHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", config.ServerName, config.NextProtos, address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -317,18 +317,20 @@ var ErrUnknownHTTPEndpointNetwork = errors.New("unknown HTTPEndpoint.Network") // // - ctx is the context allowing to timeout the operation; // -// - epnt is the HTTP endpoint. +// - epnt is the HTTP endpoint; +// +// - jar is the cookie jar to use. // // Return value // // Either an HTTP response, on success, or an error. func (mx *Measurer) HTTPEndpointGet( - ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { + ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { switch epnt.Network { case NetworkQUIC: - return mx.httpEndpointGetQUIC(ctx, epnt) + return mx.httpEndpointGetQUIC(ctx, epnt, jar) case NetworkTCP: - return mx.httpEndpointGetTCP(ctx, epnt) + return mx.httpEndpointGetTCP(ctx, epnt, jar) default: return nil, ErrUnknownHTTPEndpointNetwork } @@ -340,12 +342,12 @@ var ErrUnknownHTTPEndpointURLScheme = errors.New("unknown HTTPEndpoint.URL.Schem // httpEndpointGetTCP specializes HTTPSEndpointGet for HTTP and HTTPS. func (mx *Measurer) httpEndpointGetTCP( - ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { + ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { switch epnt.URL.Scheme { case "http": - return mx.httpEndpointGetHTTP(ctx, epnt) + return mx.httpEndpointGetHTTP(ctx, epnt, jar) case "https": - return mx.httpEndpointGetHTTPS(ctx, epnt) + return mx.httpEndpointGetHTTPS(ctx, epnt, jar) default: return nil, ErrUnknownHTTPEndpointURLScheme } @@ -353,7 +355,7 @@ func (mx *Measurer) httpEndpointGetTCP( // httpEndpointGetHTTP specializes httpEndpointGetTCP for HTTP. func (mx *Measurer) httpEndpointGetHTTP( - ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { + ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) if err != nil { return nil, err @@ -364,7 +366,7 @@ func (mx *Measurer) httpEndpointGetHTTP( return nil, err } defer conn.Close() // we own it - clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, + clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, jar, NewHTTPTransportWithConn(mx.Origin, mx.Logger, mx.DB, conn)) defer clnt.CloseIdleConnections() return mx.httpClientDo(ctx, clnt, epnt, req) @@ -372,7 +374,7 @@ func (mx *Measurer) httpEndpointGetHTTP( // httpEndpointGetHTTPS specializes httpEndpointGetTCP for HTTPS. func (mx *Measurer) httpEndpointGetHTTPS( - ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { + ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) if err != nil { return nil, err @@ -387,7 +389,7 @@ func (mx *Measurer) httpEndpointGetHTTPS( return nil, err } defer conn.Close() // we own it - clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, + clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, jar, NewHTTPTransportWithTLSConn(mx.Origin, mx.Logger, mx.DB, conn)) defer clnt.CloseIdleConnections() return mx.httpClientDo(ctx, clnt, epnt, req) @@ -395,7 +397,7 @@ func (mx *Measurer) httpEndpointGetHTTPS( // httpEndpointGetQUIC specializes httpEndpointGetTCP for QUIC. func (mx *Measurer) httpEndpointGetQUIC( - ctx context.Context, epnt *HTTPEndpoint) (*http.Response, error) { + ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) if err != nil { return nil, err @@ -411,7 +413,7 @@ func (mx *Measurer) httpEndpointGetQUIC( } // TODO(bassosimone): close session with correct message defer sess.CloseWithError(0, "") // we own it - clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, + clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, jar, NewHTTPTransportWithQUICSess(mx.Origin, mx.Logger, mx.DB, sess)) defer clnt.CloseIdleConnections() return mx.httpClientDo(ctx, clnt, epnt, req) @@ -420,7 +422,7 @@ func (mx *Measurer) httpEndpointGetQUIC( func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, epnt *HTTPEndpoint, req *http.Request) (*http.Response, error) { const timeout = 15 * time.Second - mx.infof("httpClientDo endpoint=%s method=%s url=%s headers=%+v timeout=%s...", + mx.Infof("httpClientDo endpoint=%s method=%s url=%s headers=%+v timeout=%s...", epnt.String(), req.Method, req.URL.String(), req.Header, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -482,9 +484,9 @@ var ErrLookupEndpoints = errors.New("endpoints lookup failed") func (mx *Measurer) LookupEndpoints( ctx context.Context, domain, port, address string) ([]*Endpoint, error) { udpAddrs, _ := mx.LookupHostUDP(ctx, domain, address) - mx.infof("LookupHostUDP addrs=%+v", udpAddrs) + mx.Infof("LookupHostUDP addrs=%+v", udpAddrs) systemAddrs, _ := mx.LookupHostSystem(ctx, domain) - mx.infof("LookupHostSystem addrs=%+v", systemAddrs) + mx.Infof("LookupHostSystem addrs=%+v", systemAddrs) var out []*Endpoint out = append(out, mx.parseLookupHostReply(port, systemAddrs)...) out = append(out, mx.parseLookupHostReply(port, udpAddrs)...) @@ -622,14 +624,14 @@ func (mx *Measurer) LookupHTTPEndpoints( } httpsSvcInfo, _ := mx.LookupHTTPSSvcUDP(ctx, URL.Hostname(), address) httpsSvcEndpoints := mx.parseHTTPSSvcReply(port, httpsSvcInfo) - mx.infof("LookupHTTPSSvcUDP endpoints=%+v", httpsSvcEndpoints) + mx.Infof("LookupHTTPSSvcUDP endpoints=%+v", httpsSvcEndpoints) endpoints, _ := mx.LookupEndpoints(ctx, URL.Hostname(), port, address) endpoints = append(endpoints, httpsSvcEndpoints...) wcthEndpoints, _ := mx.lookupWCTH(ctx, URL, endpoints, port) - mx.infof("lookupWCTH endpoints=%+v", wcthEndpoints) + mx.Infof("lookupWCTH endpoints=%+v", wcthEndpoints) endpoints = append(endpoints, wcthEndpoints...) endpoints = mx.mergeEndpoints(endpoints) - mx.infof("mergeEndpoints endpoints=%+v", endpoints) + mx.Infof("mergeEndpoints endpoints=%+v", endpoints) if len(endpoints) < 1 { return nil, ErrLookupEndpoints } @@ -708,7 +710,7 @@ func (mx *Measurer) alpnForHTTPEndpoint(network EndpointNetwork) []string { func (mx *Measurer) lookupWCTH(ctx context.Context, URL *url.URL, endpoints []*Endpoint, port string) ([]*Endpoint, error) { const timeout = 30 * time.Second - mx.infof("lookupWCTH backend=%s url=%s endpoints=%+v port=%s timeout=%s...", + mx.Infof("lookupWCTH backend=%s url=%s endpoints=%+v port=%s timeout=%s...", mx.WCTHURL, URL.String(), endpoints, port, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -800,12 +802,12 @@ func (mx *Measurer) parseHTTPSSvcReply(port string, info HTTPSSvc) (out []*Endpo return } -// infof formats and logs an informational message using mx.Logger. -func (mx *Measurer) infof(format string, v ...interface{}) { +// Infof formats and logs an informational message using mx.Logger. +func (mx *Measurer) Infof(format string, v ...interface{}) { mx.Logger.Infof(format, v...) } -// selectAllFromConnect selects all the entries inside of the +// SelectAllFromConnect selects all the entries inside of the // Connect table that have the given MeasurementID. // // Arguments @@ -815,7 +817,7 @@ func (mx *Measurer) infof(format string, v ...interface{}) { // Return value // // A possibly-empty list of events. -func (mx *Measurer) selectAllFromConnect(id int64) (out []*NetworkEvent) { +func (mx *Measurer) SelectAllFromConnect(id int64) (out []*NetworkEvent) { for _, ev := range mx.DB.SelectAllFromDial() { if id == ev.MeasurementID { out = append(out, ev) @@ -824,9 +826,9 @@ func (mx *Measurer) selectAllFromConnect(id int64) (out []*NetworkEvent) { return } -// selectAllFromReadWrite is like selectAllFromConnect except +// SelectAllFromReadWrite is like selectAllFromConnect except // that it works on the table named ReadWrite. -func (mx *Measurer) selectAllFromReadWrite(id int64) (out []*NetworkEvent) { +func (mx *Measurer) SelectAllFromReadWrite(id int64) (out []*NetworkEvent) { for _, ev := range mx.DB.SelectAllFromReadWrite() { if id == ev.MeasurementID { out = append(out, ev) @@ -835,9 +837,9 @@ func (mx *Measurer) selectAllFromReadWrite(id int64) (out []*NetworkEvent) { return } -// selectAllFromClose is like selectAllFromConnect except +// SelectAllFromClose is like selectAllFromConnect except // that it works on the table named Close. -func (mx *Measurer) selectAllFromClose(id int64) (out []*NetworkEvent) { +func (mx *Measurer) SelectAllFromClose(id int64) (out []*NetworkEvent) { for _, ev := range mx.DB.SelectAllFromClose() { if id == ev.MeasurementID { out = append(out, ev) @@ -846,9 +848,9 @@ func (mx *Measurer) selectAllFromClose(id int64) (out []*NetworkEvent) { return } -// selectAllFromTLSHandshake is like selectAllFromConnect except +// SelectAllFromTLSHandshake is like selectAllFromConnect except // that it works on the table named TLSHandshake. -func (mx *Measurer) selectAllFromTLSHandshake(id int64) (out []*TLSHandshakeEvent) { +func (mx *Measurer) SelectAllFromTLSHandshake(id int64) (out []*TLSHandshakeEvent) { for _, ev := range mx.DB.SelectAllFromTLSHandshake() { if id == ev.MeasurementID { out = append(out, ev) @@ -857,9 +859,9 @@ func (mx *Measurer) selectAllFromTLSHandshake(id int64) (out []*TLSHandshakeEven return } -// selectAllFromQUICHandshake is like selectAllFromConnect except +// SelectAllFromQUICHandshake is like selectAllFromConnect except // that it works on the table named QUICHandshake. -func (mx *Measurer) selectAllFromQUICHandshake(id int64) (out []*QUICHandshakeEvent) { +func (mx *Measurer) SelectAllFromQUICHandshake(id int64) (out []*QUICHandshakeEvent) { for _, ev := range mx.DB.SelectAllFromQUICHandshake() { if id == ev.MeasurementID { out = append(out, ev) @@ -868,9 +870,9 @@ func (mx *Measurer) selectAllFromQUICHandshake(id int64) (out []*QUICHandshakeEv return } -// selectAllFromLookupHost is like selectAllFromConnect except +// SelectAllFromLookupHost is like selectAllFromConnect except // that it works on the table named LookupHost. -func (mx *Measurer) selectAllFromLookupHost(id int64) (out []*LookupHostEvent) { +func (mx *Measurer) SelectAllFromLookupHost(id int64) (out []*LookupHostEvent) { for _, ev := range mx.DB.SelectAllFromLookupHost() { if id == ev.MeasurementID { out = append(out, ev) @@ -879,9 +881,9 @@ func (mx *Measurer) selectAllFromLookupHost(id int64) (out []*LookupHostEvent) { return } -// selectAllFromLookupHTTPSSvc is like selectAllFromConnect except +// SelectAllFromLookupHTTPSSvc is like selectAllFromConnect except // that it works on the table named LookupHTTPSSvc. -func (mx *Measurer) selectAllFromLookupHTTPSSvc(id int64) (out []*LookupHTTPSSvcEvent) { +func (mx *Measurer) SelectAllFromLookupHTTPSSvc(id int64) (out []*LookupHTTPSSvcEvent) { for _, ev := range mx.DB.SelectAllFromLookupHTTPSSvc() { if id == ev.MeasurementID { out = append(out, ev) @@ -890,9 +892,9 @@ func (mx *Measurer) selectAllFromLookupHTTPSSvc(id int64) (out []*LookupHTTPSSvc return } -// selectAllFromDNSRoundTrip is like selectAllFromConnect except +// SelectAllFromDNSRoundTrip is like selectAllFromConnect except // that it works on the table named DNSRoundTrip. -func (mx *Measurer) selectAllFromDNSRoundTrip(id int64) (out []*DNSRoundTripEvent) { +func (mx *Measurer) SelectAllFromDNSRoundTrip(id int64) (out []*DNSRoundTripEvent) { for _, ev := range mx.DB.SelectAllFromDNSRoundTrip() { if id == ev.MeasurementID { out = append(out, ev) @@ -901,9 +903,9 @@ func (mx *Measurer) selectAllFromDNSRoundTrip(id int64) (out []*DNSRoundTripEven return } -// selectAllFromHTTPRoundTrip is like selectAllFromConnect except +// SelectAllFromHTTPRoundTrip is like selectAllFromConnect except // that it works on the table named HTTPRoundTrip. -func (mx *Measurer) selectAllFromHTTPRoundTrip(id int64) (out []*HTTPRoundTripEvent) { +func (mx *Measurer) SelectAllFromHTTPRoundTrip(id int64) (out []*HTTPRoundTripEvent) { for _, ev := range mx.DB.SelectAllFromHTTPRoundTrip() { if id == ev.MeasurementID { out = append(out, ev) @@ -912,9 +914,9 @@ func (mx *Measurer) selectAllFromHTTPRoundTrip(id int64) (out []*HTTPRoundTripEv return } -// selectAllFromHTTPRedirect is like selectAllFromConnect except +// SelectAllFromHTTPRedirect is like selectAllFromConnect except // that it works on the table named HTTPRedirect. -func (mx *Measurer) selectAllFromHTTPRedirect(id int64) (out []*HTTPRedirectEvent) { +func (mx *Measurer) SelectAllFromHTTPRedirect(id int64) (out []*HTTPRedirectEvent) { for _, ev := range mx.DB.SelectAllFromHTTPRedirect() { if id == ev.MeasurementID { out = append(out, ev) diff --git a/internal/measurex/websteps.go b/internal/measurex/websteps.go deleted file mode 100644 index f93435fe68..0000000000 --- a/internal/measurex/websteps.go +++ /dev/null @@ -1,148 +0,0 @@ -package measurex - -import ( - "context" - "net/url" -) - -// TODO(bassosimone): we need a table for cookies since we cannot -// read them from redirects and we want an easy way to get them - -// WebStepResult contains the results of Measurer.WebStep. -type WebStepResult struct { - // URL is the URL this measurement refers to. - URL string - - // LookupEndpoints contains the LookupEndpoints measurement. - LookupEndpoints *WebStepLookupEndpoints - - // Endpoints contains all the endpoints measurements. - Endpoints []*WebStepEndpoint -} - -// WebStepBaseMeasurement is a measurement part of WebStepResult. -type WebStepBaseMeasurement struct { - // Connect contains all the connect operations. - Connect []*NetworkEvent - - // ReadWrite contains all the read and write operations. - ReadWrite []*NetworkEvent - - // Close contains all the close operations. - Close []*NetworkEvent - - // TLSHandshake contains all the TLS handshakes. - TLSHandshake []*TLSHandshakeEvent - - // QUICHandshake contains all the QUIC handshakes. - QUICHandshake []*QUICHandshakeEvent - - // LookupHost contains all the host lookups. - LookupHost []*LookupHostEvent - - // LookupHTTPSSvc contains all the HTTPSSvc lookups. - LookupHTTPSSvc []*LookupHTTPSSvcEvent - - // DNSRoundTrip contains all the DNS round trips. - DNSRoundTrip []*DNSRoundTripEvent - - // HTTPRoundTrip contains all the HTTP round trips. - HTTPRoundTrip []*HTTPRoundTripEvent - - // HTTPRedirect contains all the redirections. - HTTPRedirect []*HTTPRedirectEvent -} - -// WebStepLookupEndpoints describes the measurement of endpoints lookup. -type WebStepLookupEndpoints struct { - // Domain is the domain this measurement refers to. - Domain string - - *WebStepBaseMeasurement -} - -// WebStepEndpoint describes the measurement of a given endpoint. -type WebStepEndpoint struct { - // Endpoint is the endpoint this measurement refers to. - Endpoint string - - *WebStepBaseMeasurement -} - -// WebStep performs a simplified WebStep measurement. -// -// We define WebStep as the process by which we have an input URL -// and we perform the following operations: -// -// 1. lookup of all the possible endpoints for the URL; -// -// 2. measurement of each available endpoint. -// -// This function DOES NOT automatically follow redirections. Though -// we have enough information to know how to follow them. -// -// Arguments -// -// - ctx is the context to implement timeouts; -// -// - URL is the URL to measure; -// -// - dnsResolverUDP is the address of the DNS resolver endpoint -// using UDP we wish to use (e.g., "8.8.8.8:53"). -// -// Return value -// -// A WebStepResult structure where the Endpoints array may be -// empty if we have no been able to discover endpoints. -func (mx *Measurer) WebStep( - ctx context.Context, URL *url.URL, dnsResolverUDP string) (m *WebStepResult) { - mx.infof("WebStep url=%s dnsResolverUDP=%s", URL.String(), dnsResolverUDP) - m = &WebStepResult{ - URL: URL.String(), - } - mid := mx.NewMeasurement() - mx.infof("LookupHTTPEndpoints measurementID=%d url=%s dnsResolverUDP=%s", - mid, URL.String(), dnsResolverUDP) - epnts, _ := mx.LookupHTTPEndpoints(ctx, URL, dnsResolverUDP) - m.LookupEndpoints = &WebStepLookupEndpoints{ - Domain: URL.Hostname(), - WebStepBaseMeasurement: mx.newWebStepBaseMeasurement(mid), - } - for _, epnt := range epnts { - mid = mx.NewMeasurement() - mx.infof("HTTPEndpointGet measurementID=%d url=%s endpoint=%s dnsResolverUDP=%s", - mid, URL.String(), epnt.String(), dnsResolverUDP) - mx.HTTPEndpointGet(ctx, epnt) - m.Endpoints = append(m.Endpoints, &WebStepEndpoint{ - Endpoint: epnt.String(), - WebStepBaseMeasurement: mx.newWebStepBaseMeasurement(mid), - }) - } - return -} - -// newWebStepMeasurements creates a new WebStepMeasurement. -// -// To this end, it filters all possible events by MeasurementID. -// -// Arguments -// -// - id is the MeasurementID. -// -// Return value -// -// A valid WebStepMeasurement containing possibly empty lists of events. -func (mx *Measurer) newWebStepBaseMeasurement(id int64) *WebStepBaseMeasurement { - return &WebStepBaseMeasurement{ - Connect: mx.selectAllFromConnect(id), - ReadWrite: mx.selectAllFromReadWrite(id), - Close: mx.selectAllFromClose(id), - TLSHandshake: mx.selectAllFromTLSHandshake(id), - QUICHandshake: mx.selectAllFromQUICHandshake(id), - LookupHost: mx.selectAllFromLookupHost(id), - LookupHTTPSSvc: mx.selectAllFromLookupHTTPSSvc(id), - DNSRoundTrip: mx.selectAllFromDNSRoundTrip(id), - HTTPRoundTrip: mx.selectAllFromHTTPRoundTrip(id), - HTTPRedirect: mx.selectAllFromHTTPRedirect(id), - } -} From 7304342522db9289f18104ca89150c8563dd8310 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 20 Sep 2021 13:37:13 +0200 Subject: [PATCH 05/53] feat: from a single step to multiple steps --- .../engine/experiment/webstepsx/webstepsx.go | 59 +++++++++++++++++++ internal/measurex/http.go | 8 +-- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/webstepsx.go index 0a488c4236..f886741f40 100644 --- a/internal/engine/experiment/webstepsx/webstepsx.go +++ b/internal/engine/experiment/webstepsx/webstepsx.go @@ -71,6 +71,65 @@ type Endpoint struct { *BaseMeasurement } +// Run performs all the WebSteps step. +// +// We define "step" as the process by which we have an input URL +// and we perform the following operations: +// +// 1. lookup of all the possible endpoints for the URL; +// +// 2. measurement of each available endpoint. +// +// After a step has run, we search for all the redirection URLs +// and we run a new step with the new URLs. +// +// Arguments +// +// - ctx is the context to implement timeouts; +// +// - mx is the measurex.Measurer to use; +// +// - URL is the URL from which we start measuring; +// +// - dnsResolverUDP is the address of the DNS resolver endpoint +// using UDP we wish to use (e.g., "8.8.8.8:53"). +// +// Return value +// +// A list of SingleStep structures where the Endpoints array may be empty +// if we have no been able to discover endpoints. +func Run(ctx context.Context, mx *measurex.Measurer, + URL *url.URL, dnsResolverUDP string) (v []*SingleStep) { + jar := measurex.NewCookieJar() + inputs := []*url.URL{URL} +Loop: + for len(inputs) > 0 { + dups := make(map[string]*url.URL) + for _, input := range inputs { + select { + case <-ctx.Done(): + break Loop + default: + mx.Infof("RunSingleStep url=%s dnsResolverUDP=%s jar=%+v", + input, dnsResolverUDP, jar) + m := RunSingleStep(ctx, mx, jar, input, dnsResolverUDP) + v = append(v, m) + for _, epnt := range m.Endpoints { + for _, redir := range epnt.HTTPRedirect { + dups[redir.Location.String()] = redir.Location + } + } + } + } + inputs = nil + for _, input := range dups { + mx.Infof("newRedirection %s", input) + inputs = append(inputs, input) + } + } + return +} + // RunSingleStep performs a single WebSteps step. // // We define "step" as the process by which we have an input URL diff --git a/internal/measurex/http.go b/internal/measurex/http.go index 3051be699b..0fc443a1c8 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -206,10 +206,10 @@ type HTTPRedirectEvent struct { ConnID int64 // URL is the URL triggering the redirect. - URL string + URL *url.URL // Location is the URL to which we're redirected. - Location string + Location *url.URL // Cookies contains the cookies for Location. Cookies []*http.Cookie @@ -242,8 +242,8 @@ func newHTTPClient(origin Origin, db DB, Origin: origin, MeasurementID: db.MeasurementID(), ConnID: txp.ConnID(), - URL: via[0].URL.String(), - Location: req.URL.String(), + URL: via[0].URL, // bug in Go stdlib if we crash here + Location: req.URL, Cookies: cookiejar.Cookies(req.URL), Error: err, }) From 2b33c258d2846c3a26ac3013e00080077d013a18 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 20 Sep 2021 14:49:51 +0200 Subject: [PATCH 06/53] feat: add oddity and holding logger --- internal/measurex/bogon.go | 48 +++++++++++++++ internal/measurex/dialer.go | 17 ++++++ internal/measurex/logger.go | 111 +++++++++++++++++++++++++++++++++- internal/measurex/quic.go | 15 +++++ internal/measurex/resolver.go | 36 +++++++++++ internal/measurex/tls.go | 17 ++++++ 6 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 internal/measurex/bogon.go diff --git a/internal/measurex/bogon.go b/internal/measurex/bogon.go new file mode 100644 index 0000000000..2c01d3a09a --- /dev/null +++ b/internal/measurex/bogon.go @@ -0,0 +1,48 @@ +package measurex + +import ( + "net" + + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +var privateIPBlocks []*net.IPNet + +func init() { + for _, cidr := range []string{ + "0.0.0.0/8", // "This" network (however, Linux...) + "10.0.0.0/8", // RFC1918 + "100.64.0.0/10", // Carrier grade NAT + "127.0.0.0/8", // IPv4 loopback + "169.254.0.0/16", // RFC3927 link-local + "172.16.0.0/12", // RFC1918 + "192.168.0.0/16", // RFC1918 + "224.0.0.0/4", // Multicast + "::1/128", // IPv6 loopback + "fe80::/10", // IPv6 link-local + "fc00::/7", // IPv6 unique local addr + } { + _, block, err := net.ParseCIDR(cidr) + runtimex.PanicOnError(err, "net.ParseCIDR failed") + privateIPBlocks = append(privateIPBlocks, block) + } +} + +func isPrivate(ip net.IP) bool { + if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { + return true + } + for _, block := range privateIPBlocks { + if block.Contains(ip) { + return true + } + } + return false +} + +// IsBogon returns whether if an IP address is bogon. Passing to this +// function a non-IP address causes it to return bogon. +func IsBogon(address string) bool { + ip := net.ParseIP(address) + return ip == nil || isPrivate(ip) +} diff --git a/internal/measurex/dialer.go b/internal/measurex/dialer.go index 2b82200447..212b564ffd 100644 --- a/internal/measurex/dialer.go +++ b/internal/measurex/dialer.go @@ -6,6 +6,7 @@ import ( "time" "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) // Conn is the connection type we use. @@ -57,6 +58,7 @@ type NetworkEvent struct { Started time.Time Finished time.Time Error error + Oddity Oddity Count int } @@ -77,6 +79,7 @@ func (d *dialerx) DialContext( Started: started, Finished: finished, Error: err, + Oddity: d.computeOddity(err), Count: 0, }) if err != nil { @@ -100,6 +103,20 @@ func (c *dialerx) localAddrIfNotNil(conn net.Conn) (addr string) { return } +func (c *dialerx) computeOddity(err error) Oddity { + if err == nil { + return "" + } + switch err.Error() { + case errorsx.FailureGenericTimeoutError: + return OddityTCPConnectTimeout + case errorsx.FailureConnectionRefused: + return OddityTCPConnectRefused + default: + return OddityTCPConnectOher + } +} + type connx struct { net.Conn db DB diff --git a/internal/measurex/logger.go b/internal/measurex/logger.go index 9c10995ca5..72dc81bdd3 100644 --- a/internal/measurex/logger.go +++ b/internal/measurex/logger.go @@ -1,6 +1,13 @@ package measurex -import "github.com/ooni/probe-cli/v3/internal/netxlite" +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/ooni/probe-cli/v3/internal/netxlite" +) // Logger is the logger type we use. type Logger interface { @@ -9,3 +16,105 @@ type Logger interface { Info(msg string) Infof(format string, v ...interface{}) } + +// HoldingLogger is a Logger that holds messages for a bunch +// of milliseconds and thene emits them in a batch. +// +// This kind of logger improves the UX in case there are many +// timeouts and doesn't overwhelm the screen otherwise. +// +// Make sure you call HoldingLogger.Stop when done with it. +type HoldingLogger struct { + begin time.Time + cancel context.CancelFunc + ch chan *holdingLoggerEntry + fin chan interface{} + logger Logger + once *sync.Once +} + +type holdingLoggerEntry struct { + f func(msg string) + msg string + t time.Duration +} + +// Debug implements Logger.Debug. +func (hl *HoldingLogger) Debug(message string) { + hl.ch <- &holdingLoggerEntry{ + f: hl.logger.Debug, + msg: message, + t: time.Since(hl.begin), + } +} + +// Debugf implements Logger.Debugf. +func (hl *HoldingLogger) Debugf(format string, v ...interface{}) { + hl.Debug(fmt.Sprintf(format, v...)) +} + +// Info implements Logger.Info. +func (hl *HoldingLogger) Info(message string) { + hl.ch <- &holdingLoggerEntry{ + f: hl.logger.Info, + msg: message, + t: time.Since(hl.begin), + } +} + +// Infof implements Logger.Infof. +func (hl *HoldingLogger) Infof(format string, v ...interface{}) { + hl.Info(fmt.Sprintf(format, v...)) +} + +// NewHoldingLogger is a factory that creates a new HoldingLogger +// using the given logger for emitting messages. +func NewHoldingLogger(logger Logger) *HoldingLogger { + ctx, cancel := context.WithCancel(context.Background()) + hl := &HoldingLogger{ + begin: time.Now(), + cancel: cancel, + ch: make(chan *holdingLoggerEntry), + fin: make(chan interface{}), + logger: logger, + once: &sync.Once{}, + } + go hl.loop(ctx) + return hl +} + +// Stop stops the HoldingLogger and waits for the +// background goroutine to terminate logging. +func (hl *HoldingLogger) Stop() { + hl.once.Do(func() { + hl.cancel() + <-hl.fin + }) +} + +// loop runs the HoldingLogger main loop +func (hl *HoldingLogger) loop(ctx context.Context) { + interval := 500 * time.Millisecond + var all []*holdingLoggerEntry + defer close(hl.fin) + ticker := time.NewTicker(interval) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + hl.emit(all) + return + case <-ticker.C: + hl.emit(all) + case entry := <-hl.ch: + all = append(all, entry) + } + } +} + +// emit emits all the messages inside `all` +func (hl *HoldingLogger) emit(all []*holdingLoggerEntry) { + for _, entry := range all { + entry.f(fmt.Sprintf("[%8.3f] %s", entry.t.Seconds(), entry.msg)) + } +} diff --git a/internal/measurex/quic.go b/internal/measurex/quic.go index 95f76bffd8..f6aa2d26f6 100644 --- a/internal/measurex/quic.go +++ b/internal/measurex/quic.go @@ -8,6 +8,7 @@ import ( "github.com/lucas-clemente/quic-go" "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" "github.com/ooni/probe-cli/v3/internal/netxlite/quicx" ) @@ -160,6 +161,7 @@ type QUICHandshakeEvent struct { Started time.Time Finished time.Time Error error + Oddity Oddity TLSVersion string CipherSuite string NegotiatedProto string @@ -217,6 +219,7 @@ func (qh *quicDialerx) DialContext(ctx context.Context, Started: started, Finished: finished, Error: err, + Oddity: qh.computeOddity(err), TLSVersion: netxlite.TLSVersionString(state.Version), CipherSuite: netxlite.TLSCipherSuiteString(state.CipherSuite), NegotiatedProto: state.NegotiatedProtocol, @@ -229,6 +232,18 @@ func (qh *quicDialerx) DialContext(ctx context.Context, EarlySession: sess, connID: qh.connIDIfNotNil(localAddr)}, nil } +func (qh *quicDialerx) computeOddity(err error) Oddity { + if err == nil { + return "" + } + switch err.Error() { + case errorsx.FailureGenericTimeoutError: + return OddityQUICHandshakeTimeout + default: + return OddityQUICHandshakeOther + } +} + type quicEarlySessionx struct { quic.EarlySession connID int64 diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 15245885c9..5f9a2f473f 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -6,6 +6,7 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" + "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) // HTTPSSvc is the result returned by HTTPSSvc queries. @@ -37,6 +38,7 @@ type LookupHostEvent struct { Started time.Time Finished time.Time Error error + Oddity Oddity Addrs []string } @@ -53,11 +55,33 @@ func (r *resolverx) LookupHost(ctx context.Context, domain string) ([]string, er Started: started, Finished: finished, Error: err, + Oddity: r.computeOddityLookupHost(addrs, err), Addrs: addrs, }) return addrs, err } +func (r *resolverx) computeOddityLookupHost(addrs []string, err error) Oddity { + if err == nil { + for _, addr := range addrs { + if IsBogon(addr) { + return OddityDNSLookupBogon + } + } + return "" + } + switch err.Error() { + case errorsx.FailureGenericTimeoutError: + return OddityDNSLookupTimeout + case errorsx.FailureDNSNXDOMAINError: + return OddityDNSLookupNXDOMAIN + case errorsx.FailureDNSRefusedError: + return OddityDNSLookupRefused + default: + return OddityDNSLookupOther + } +} + // LookupHTTPSSvcEvent is the event emitted when we perform // an HTTPSSvc DNS query for a domain. type LookupHTTPSSvcEvent struct { @@ -67,6 +91,7 @@ type LookupHTTPSSvcEvent struct { Started time.Time Finished time.Time Error error + Oddity Oddity IPv4 []string IPv6 []string ALPN []string @@ -83,6 +108,7 @@ func (r *resolverx) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain strin Started: started, Finished: finished, Error: err, + Oddity: Oddity(r.computeOddityHTTPSSvc(https, err)), } if err == nil { ev.IPv4 = https.IPv4Hint() @@ -92,3 +118,13 @@ func (r *resolverx) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain strin r.db.InsertIntoLookupHTTPSSvc(ev) return https, err } + +func (r *resolverx) computeOddityHTTPSSvc(https HTTPSSvc, err error) Oddity { + if err != nil { + return r.computeOddityLookupHost(nil, err) + } + var addrs []string + addrs = append(addrs, https.IPv4Hint()...) + addrs = append(addrs, https.IPv6Hint()...) + return r.computeOddityLookupHost(addrs, nil) +} diff --git a/internal/measurex/tls.go b/internal/measurex/tls.go index 67e3e6b967..af8c346725 100644 --- a/internal/measurex/tls.go +++ b/internal/measurex/tls.go @@ -8,6 +8,7 @@ import ( "time" "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) // TLSConn is the TLS conn type we use. @@ -52,6 +53,7 @@ type TLSHandshakeEvent struct { Started time.Time Finished time.Time Error error + Oddity Oddity TLSVersion string CipherSuite string NegotiatedProto string @@ -80,6 +82,7 @@ func (thx *tlsHandshakerx) Handshake(ctx context.Context, Started: started, Finished: finished, Error: err, + Oddity: thx.computeOddity(err), TLSVersion: netxlite.TLSVersionString(state.Version), CipherSuite: netxlite.TLSCipherSuiteString(state.CipherSuite), NegotiatedProto: state.NegotiatedProtocol, @@ -92,6 +95,20 @@ func (thx *tlsHandshakerx) Handshake(ctx context.Context, TLSConn: tconn.(netxlite.TLSConn), connID: conn.ConnID()}, nil } +func (thx *tlsHandshakerx) computeOddity(err error) Oddity { + if err == nil { + return "" + } + switch err.Error() { + case errorsx.FailureGenericTimeoutError: + return OddityTLSHandshakeTimeout + case errorsx.FailureConnectionReset: + return OddityTLSHandshakeReset + default: + return OddityTLSHandshakeOther + } +} + type tlsConnx struct { netxlite.TLSConn connID int64 From 7bda89e8478b03e8b4c68efc5f21e68bd248475a Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 20 Sep 2021 14:50:14 +0200 Subject: [PATCH 07/53] forgot to commit oddity.go --- internal/measurex/oddity.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 internal/measurex/oddity.go diff --git a/internal/measurex/oddity.go b/internal/measurex/oddity.go new file mode 100644 index 0000000000..7be8734849 --- /dev/null +++ b/internal/measurex/oddity.go @@ -0,0 +1,31 @@ +package measurex + +// Oddity is an unexpected result on the probe or +// or test helper side during a measurement. We will +// promote the oddity to anomaly if the probe and +// the test helper see different results. +type Oddity string + +// This enumeration lists all known oddities +var ( + // tcp.connect + OddityTCPConnectTimeout = Oddity("tcp.connect.timeout") + OddityTCPConnectRefused = Oddity("tcp.connect.refused") + OddityTCPConnectOher = Oddity("tcp.connect.other") + + // tls.handshake + OddityTLSHandshakeTimeout = Oddity("tls.handshake.timeout") + OddityTLSHandshakeReset = Oddity("tls.handshake.reset") + OddityTLSHandshakeOther = Oddity("tls.handshake.other") + + // quic.handshake + OddityQUICHandshakeTimeout = Oddity("quic.handshake.timeout") + OddityQUICHandshakeOther = Oddity("quic.handshake.other") + + // dns.lookup + OddityDNSLookupNXDOMAIN = Oddity("dns.lookup.nxdomain") + OddityDNSLookupTimeout = Oddity("dns.lookup.timeout") + OddityDNSLookupRefused = Oddity("dns.lookup.refused") + OddityDNSLookupBogon = Oddity("dns.lookup.bogon") + OddityDNSLookupOther = Oddity("dns.lookup.other") +) From 15da819af2128ae762214fbd7da8d81926cfef54 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 20 Sep 2021 15:43:09 +0200 Subject: [PATCH 08/53] start doing some QA and fixing bugs --- .../engine/experiment/webstepsx/webstepsx.go | 86 ++++-------- internal/measurex/dialer.go | 2 + internal/measurex/http.go | 11 ++ internal/measurex/logger.go | 107 --------------- internal/measurex/measurer.go | 123 ++++++++++++++++-- internal/measurex/oddity.go | 18 ++- internal/measurex/quic.go | 2 + 7 files changed, 167 insertions(+), 182 deletions(-) diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/webstepsx.go index f886741f40..b6efe0299c 100644 --- a/internal/engine/experiment/webstepsx/webstepsx.go +++ b/internal/engine/experiment/webstepsx/webstepsx.go @@ -15,6 +15,9 @@ type SingleStep struct { // URL is the URL this measurement refers to. URL string + // Oddities contains all the oddities of all endpoints. + Oddities []measurex.Oddity + // LookupEndpoints contains the LookupEndpoints measurement. LookupEndpoints *LookupEndpoints @@ -22,37 +25,23 @@ type SingleStep struct { Endpoints []*Endpoint } -// BaseMeasurement is a measurement part of Result. -type BaseMeasurement struct { - // Connect contains all the connect operations. - Connect []*measurex.NetworkEvent - - // ReadWrite contains all the read and write operations. - ReadWrite []*measurex.NetworkEvent - - // Close contains all the close operations. - Close []*measurex.NetworkEvent - - // TLSHandshake contains all the TLS handshakes. - TLSHandshake []*measurex.TLSHandshakeEvent - - // QUICHandshake contains all the QUIC handshakes. - QUICHandshake []*measurex.QUICHandshakeEvent - - // LookupHost contains all the host lookups. - LookupHost []*measurex.LookupHostEvent - - // LookupHTTPSSvc contains all the HTTPSSvc lookups. - LookupHTTPSSvc []*measurex.LookupHTTPSSvcEvent - - // DNSRoundTrip contains all the DNS round trips. - DNSRoundTrip []*measurex.DNSRoundTripEvent - - // HTTPRoundTrip contains all the HTTP round trips. - HTTPRoundTrip []*measurex.HTTPRoundTripEvent - - // HTTPRedirect contains all the redirections. - HTTPRedirect []*measurex.HTTPRedirectEvent +// computeOddities computes the Oddities field my merging all +// the oddities appearing in the Endpoints list. +func (ss *SingleStep) computeOddities() { + unique := make(map[measurex.Oddity]bool) + for _, oddity := range ss.LookupEndpoints.Oddities { + unique[oddity] = true + } + for _, epnt := range ss.Endpoints { + for _, oddity := range epnt.Oddities { + unique[oddity] = true + } + } + for oddity := range unique { + if oddity != "" { + ss.Oddities = append(ss.Oddities, oddity) + } + } } // LookupEndpoints describes the measurement of endpoints lookup. @@ -60,7 +49,7 @@ type LookupEndpoints struct { // Domain is the domain this measurement refers to. Domain string - *BaseMeasurement + *measurex.BaseMeasurement } // Endpoint describes the measurement of a given endpoint. @@ -68,7 +57,7 @@ type Endpoint struct { // Endpoint is the endpoint this measurement refers to. Endpoint string - *BaseMeasurement + *measurex.BaseMeasurement } // Run performs all the WebSteps step. @@ -162,13 +151,14 @@ Loop: func RunSingleStep(ctx context.Context, mx *measurex.Measurer, cookiekar http.CookieJar, URL *url.URL, dnsResolverUDP string) (m *SingleStep) { m = &SingleStep{URL: URL.String()} + defer m.computeOddities() mid := mx.NewMeasurement() mx.Infof("LookupHTTPEndpoints measurementID=%d url=%s dnsResolverUDP=%s", mid, URL.String(), dnsResolverUDP) epnts, _ := mx.LookupHTTPEndpoints(ctx, URL, dnsResolverUDP) m.LookupEndpoints = &LookupEndpoints{ Domain: URL.Hostname(), - BaseMeasurement: newBaseMeasurement(mx, mid), + BaseMeasurement: mx.NewBaseMeasurement(mid), } for _, epnt := range epnts { mid = mx.NewMeasurement() @@ -177,34 +167,8 @@ func RunSingleStep(ctx context.Context, mx *measurex.Measurer, mx.HTTPEndpointGet(ctx, epnt, cookiekar) m.Endpoints = append(m.Endpoints, &Endpoint{ Endpoint: epnt.String(), - BaseMeasurement: newBaseMeasurement(mx, mid), + BaseMeasurement: mx.NewBaseMeasurement(mid), }) } return } - -// newBaseMeasurement creates a new Base Measurement. -// -// To this end, it filters all possible events by MeasurementID. -// -// Arguments -// -// - id is the MeasurementID. -// -// Return value -// -// A valid BaseMeasurement containing possibly empty lists of events. -func newBaseMeasurement(mx *measurex.Measurer, id int64) *BaseMeasurement { - return &BaseMeasurement{ - Connect: mx.SelectAllFromConnect(id), - ReadWrite: mx.SelectAllFromReadWrite(id), - Close: mx.SelectAllFromClose(id), - TLSHandshake: mx.SelectAllFromTLSHandshake(id), - QUICHandshake: mx.SelectAllFromQUICHandshake(id), - LookupHost: mx.SelectAllFromLookupHost(id), - LookupHTTPSSvc: mx.SelectAllFromLookupHTTPSSvc(id), - DNSRoundTrip: mx.SelectAllFromDNSRoundTrip(id), - HTTPRoundTrip: mx.SelectAllFromHTTPRoundTrip(id), - HTTPRedirect: mx.SelectAllFromHTTPRedirect(id), - } -} diff --git a/internal/measurex/dialer.go b/internal/measurex/dialer.go index 212b564ffd..e440630ff8 100644 --- a/internal/measurex/dialer.go +++ b/internal/measurex/dialer.go @@ -112,6 +112,8 @@ func (c *dialerx) computeOddity(err error) Oddity { return OddityTCPConnectTimeout case errorsx.FailureConnectionRefused: return OddityTCPConnectRefused + case errorsx.FailureHostUnreachable: + return OddityTCPConnectHostUnreachable default: return OddityTCPConnectOher } diff --git a/internal/measurex/http.go b/internal/measurex/http.go index 0fc443a1c8..b26af7f99d 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -91,6 +91,7 @@ type HTTPRoundTripEvent struct { Started time.Time Finished time.Time Error error + Oddity Oddity ResponseStatus int ResponseHeader http.Header ResponseBodySnapshot []byte @@ -119,6 +120,16 @@ func (txp *httpTransportx) RoundTrip(req *http.Request) (*http.Response, error) txp.db.InsertIntoHTTPRoundTrip(rt) return nil, err } + switch { + case resp.StatusCode == 403: + rt.Oddity = OddityStatus403 + case resp.StatusCode == 404: + rt.Oddity = OddityStatus404 + case resp.StatusCode == 503: + rt.Oddity = OddityStatus503 + case resp.StatusCode >= 400: + rt.Oddity = OddityStatusOther + } rt.ResponseStatus = resp.StatusCode rt.ResponseHeader = resp.Header r := io.LimitReader(resp.Body, maxBodySnapshot) diff --git a/internal/measurex/logger.go b/internal/measurex/logger.go index 72dc81bdd3..a5d529c8a8 100644 --- a/internal/measurex/logger.go +++ b/internal/measurex/logger.go @@ -1,11 +1,6 @@ package measurex import ( - "context" - "fmt" - "sync" - "time" - "github.com/ooni/probe-cli/v3/internal/netxlite" ) @@ -16,105 +11,3 @@ type Logger interface { Info(msg string) Infof(format string, v ...interface{}) } - -// HoldingLogger is a Logger that holds messages for a bunch -// of milliseconds and thene emits them in a batch. -// -// This kind of logger improves the UX in case there are many -// timeouts and doesn't overwhelm the screen otherwise. -// -// Make sure you call HoldingLogger.Stop when done with it. -type HoldingLogger struct { - begin time.Time - cancel context.CancelFunc - ch chan *holdingLoggerEntry - fin chan interface{} - logger Logger - once *sync.Once -} - -type holdingLoggerEntry struct { - f func(msg string) - msg string - t time.Duration -} - -// Debug implements Logger.Debug. -func (hl *HoldingLogger) Debug(message string) { - hl.ch <- &holdingLoggerEntry{ - f: hl.logger.Debug, - msg: message, - t: time.Since(hl.begin), - } -} - -// Debugf implements Logger.Debugf. -func (hl *HoldingLogger) Debugf(format string, v ...interface{}) { - hl.Debug(fmt.Sprintf(format, v...)) -} - -// Info implements Logger.Info. -func (hl *HoldingLogger) Info(message string) { - hl.ch <- &holdingLoggerEntry{ - f: hl.logger.Info, - msg: message, - t: time.Since(hl.begin), - } -} - -// Infof implements Logger.Infof. -func (hl *HoldingLogger) Infof(format string, v ...interface{}) { - hl.Info(fmt.Sprintf(format, v...)) -} - -// NewHoldingLogger is a factory that creates a new HoldingLogger -// using the given logger for emitting messages. -func NewHoldingLogger(logger Logger) *HoldingLogger { - ctx, cancel := context.WithCancel(context.Background()) - hl := &HoldingLogger{ - begin: time.Now(), - cancel: cancel, - ch: make(chan *holdingLoggerEntry), - fin: make(chan interface{}), - logger: logger, - once: &sync.Once{}, - } - go hl.loop(ctx) - return hl -} - -// Stop stops the HoldingLogger and waits for the -// background goroutine to terminate logging. -func (hl *HoldingLogger) Stop() { - hl.once.Do(func() { - hl.cancel() - <-hl.fin - }) -} - -// loop runs the HoldingLogger main loop -func (hl *HoldingLogger) loop(ctx context.Context) { - interval := 500 * time.Millisecond - var all []*holdingLoggerEntry - defer close(hl.fin) - ticker := time.NewTicker(interval) - defer ticker.Stop() - for { - select { - case <-ctx.Done(): - hl.emit(all) - return - case <-ticker.C: - hl.emit(all) - case entry := <-hl.ch: - all = append(all, entry) - } - } -} - -// emit emits all the messages inside `all` -func (hl *HoldingLogger) emit(all []*holdingLoggerEntry) { - for _, entry := range all { - entry.f(fmt.Sprintf("[%8.3f] %s", entry.t.Seconds(), entry.msg)) - } -} diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index f8054b746d..bfe5c99269 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -124,15 +124,12 @@ func (d *netxliteDialerAdapter) DialContext( // // A Resolver. func (mx *Measurer) newResolverUDP(address string) Resolver { - // TODO(bassosimone): the resolver we compose here is missing - // some capabilities like IDNA. We should instead have the proper - // factory inside netxlite for creating this resolver. return WrapResolver(mx.Origin, mx.DB, &netxlite.ResolverLogger{ - Resolver: dnsx.NewSerialResolver( + Resolver: netxlite.WrapResolver(mx.Logger, dnsx.NewSerialResolver( WrapDNSXRoundTripper(mx.DB, dnsx.NewDNSOverUDP( &netxliteDialerAdapter{mx.newDialerWithSystemResolver()}, address, - ))), + )))), Logger: mx.Logger, }) } @@ -600,7 +597,8 @@ func (e *HTTPEndpoint) String() string { // LookupHTTPEndpoints is like LookupEndpoints but performs a // specialized lookup for an HTTP/HTTPS URL. Such a lookup also -// includes querying the WCTH to discover extra endpoints. +// includes querying the WCTH to discover extra endpoints. If +// the URL scheme is HTTPS we also query for HTTPSSvc. // // Arguments // @@ -622,9 +620,13 @@ func (mx *Measurer) LookupHTTPEndpoints( if err != nil { return nil, err } - httpsSvcInfo, _ := mx.LookupHTTPSSvcUDP(ctx, URL.Hostname(), address) - httpsSvcEndpoints := mx.parseHTTPSSvcReply(port, httpsSvcInfo) - mx.Infof("LookupHTTPSSvcUDP endpoints=%+v", httpsSvcEndpoints) + var httpsSvcEndpoints []*Endpoint + switch URL.Scheme { + case "https": // only lookup for HTTP3 endpoints when scheme is HTTPS + info, _ := mx.LookupHTTPSSvcUDP(ctx, URL.Hostname(), address) + httpsSvcEndpoints = mx.parseHTTPSSvcReply(port, info) + mx.Infof("LookupHTTPSSvcUDP endpoints=%+v", httpsSvcEndpoints) + } endpoints, _ := mx.LookupEndpoints(ctx, URL.Hostname(), port, address) endpoints = append(endpoints, httpsSvcEndpoints...) wcthEndpoints, _ := mx.lookupWCTH(ctx, URL, endpoints, port) @@ -924,3 +926,106 @@ func (mx *Measurer) SelectAllFromHTTPRedirect(id int64) (out []*HTTPRedirectEven } return } + +// BaseMeasurement groups all the events that have the same MeasurementID. +type BaseMeasurement struct { + // Oddities lists all the oddities inside this measurement. See + // NewBaseMeasurement's docs for more info. + Oddities []Oddity + + // Connect contains all the connect operations. + Connect []*NetworkEvent + + // ReadWrite contains all the read and write operations. + ReadWrite []*NetworkEvent + + // Close contains all the close operations. + Close []*NetworkEvent + + // TLSHandshake contains all the TLS handshakes. + TLSHandshake []*TLSHandshakeEvent + + // QUICHandshake contains all the QUIC handshakes. + QUICHandshake []*QUICHandshakeEvent + + // LookupHost contains all the host lookups. + LookupHost []*LookupHostEvent + + // LookupHTTPSSvc contains all the HTTPSSvc lookups. + LookupHTTPSSvc []*LookupHTTPSSvcEvent + + // DNSRoundTrip contains all the DNS round trips. + DNSRoundTrip []*DNSRoundTripEvent + + // HTTPRoundTrip contains all the HTTP round trips. + HTTPRoundTrip []*HTTPRoundTripEvent + + // HTTPRedirect contains all the redirections. + HTTPRedirect []*HTTPRedirectEvent +} + +// NewBaseMeasurement creates a new Base Measurement by gathering all +// the events inside the database with a given MeasurementID. +// +// As part of the process, this function computes the Oddities field by +// gathering the oddities of the following operations: +// +// - connect; +// +// - tlsHandshake; +// +// - quicHandshake; +// +// - lookupHost; +// +// - httpRoundTrip. +// +// Arguments +// +// - id is the MeasurementID. +// +// Return value +// +// A valid BaseMeasurement containing possibly empty lists of events. +func (mx *Measurer) NewBaseMeasurement(id int64) *BaseMeasurement { + m := &BaseMeasurement{ + Connect: mx.SelectAllFromConnect(id), + ReadWrite: mx.SelectAllFromReadWrite(id), + Close: mx.SelectAllFromClose(id), + TLSHandshake: mx.SelectAllFromTLSHandshake(id), + QUICHandshake: mx.SelectAllFromQUICHandshake(id), + LookupHost: mx.SelectAllFromLookupHost(id), + LookupHTTPSSvc: mx.SelectAllFromLookupHTTPSSvc(id), + DNSRoundTrip: mx.SelectAllFromDNSRoundTrip(id), + HTTPRoundTrip: mx.SelectAllFromHTTPRoundTrip(id), + HTTPRedirect: mx.SelectAllFromHTTPRedirect(id), + } + m.computeOddities() + return m +} + +// computeOddities computes all the oddities inside m. See +// NewBaseMeasurement's docs for more details. +func (m *BaseMeasurement) computeOddities() { + unique := make(map[Oddity]bool) + for _, ev := range m.Connect { + unique[ev.Oddity] = true + } + for _, ev := range m.TLSHandshake { + unique[ev.Oddity] = true + } + for _, ev := range m.QUICHandshake { + unique[ev.Oddity] = true + } + for _, ev := range m.LookupHost { + unique[ev.Oddity] = true + } + for _, ev := range m.HTTPRoundTrip { + unique[ev.Oddity] = true + } + for key := range unique { + if key != "" { + m.Oddities = append(m.Oddities, key) + } + } +} diff --git a/internal/measurex/oddity.go b/internal/measurex/oddity.go index 7be8734849..b97f98d350 100644 --- a/internal/measurex/oddity.go +++ b/internal/measurex/oddity.go @@ -9,9 +9,10 @@ type Oddity string // This enumeration lists all known oddities var ( // tcp.connect - OddityTCPConnectTimeout = Oddity("tcp.connect.timeout") - OddityTCPConnectRefused = Oddity("tcp.connect.refused") - OddityTCPConnectOher = Oddity("tcp.connect.other") + OddityTCPConnectTimeout = Oddity("tcp.connect.timeout") + OddityTCPConnectRefused = Oddity("tcp.connect.refused") + OddityTCPConnectHostUnreachable = Oddity("tcp.connect.host_unreachable") + OddityTCPConnectOher = Oddity("tcp.connect.other") // tls.handshake OddityTLSHandshakeTimeout = Oddity("tls.handshake.timeout") @@ -19,8 +20,9 @@ var ( OddityTLSHandshakeOther = Oddity("tls.handshake.other") // quic.handshake - OddityQUICHandshakeTimeout = Oddity("quic.handshake.timeout") - OddityQUICHandshakeOther = Oddity("quic.handshake.other") + OddityQUICHandshakeTimeout = Oddity("quic.handshake.timeout") + OddityQUICHandshakeHostUnreachable = Oddity("quic.handshake.host_unreachable") + OddityQUICHandshakeOther = Oddity("quic.handshake.other") // dns.lookup OddityDNSLookupNXDOMAIN = Oddity("dns.lookup.nxdomain") @@ -28,4 +30,10 @@ var ( OddityDNSLookupRefused = Oddity("dns.lookup.refused") OddityDNSLookupBogon = Oddity("dns.lookup.bogon") OddityDNSLookupOther = Oddity("dns.lookup.other") + + // http.status + OddityStatus403 = Oddity("http.status.403") + OddityStatus404 = Oddity("http.status.404") + OddityStatus503 = Oddity("http.status.503") + OddityStatusOther = Oddity("http.status.other") ) diff --git a/internal/measurex/quic.go b/internal/measurex/quic.go index f6aa2d26f6..06b20f87b9 100644 --- a/internal/measurex/quic.go +++ b/internal/measurex/quic.go @@ -239,6 +239,8 @@ func (qh *quicDialerx) computeOddity(err error) Oddity { switch err.Error() { case errorsx.FailureGenericTimeoutError: return OddityQUICHandshakeTimeout + case errorsx.FailureHostUnreachable: + return OddityQUICHandshakeHostUnreachable default: return OddityQUICHandshakeOther } From f1db43adfb3d5ae34e0760a623b3d675a3dc98d3 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 00:40:52 +0200 Subject: [PATCH 09/53] heavy refactoring --- .../engine/experiment/webstepsx/webstepsx.go | 136 ++- internal/measurex/bogon.go | 14 +- internal/measurex/db.go | 46 +- internal/measurex/dialer.go | 114 ++- internal/measurex/dnsx.go | 38 +- internal/measurex/http.go | 145 ++-- internal/measurex/logger.go | 3 +- internal/measurex/measurement.go | 115 +++ internal/measurex/measurer.go | 820 +++--------------- internal/measurex/oddity.go | 2 +- internal/measurex/origin.go | 6 +- internal/measurex/quic.go | 67 +- internal/measurex/resolver.go | 62 +- internal/measurex/saver.go | 367 +++++++- internal/measurex/tls.go | 21 +- internal/measurex/wcth.go | 21 +- 16 files changed, 943 insertions(+), 1034 deletions(-) create mode 100644 internal/measurex/measurement.go diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/webstepsx.go index b6efe0299c..9f28fcaa01 100644 --- a/internal/engine/experiment/webstepsx/webstepsx.go +++ b/internal/engine/experiment/webstepsx/webstepsx.go @@ -13,27 +13,33 @@ import ( // SingleStep contains the results of a single web step. type SingleStep struct { // URL is the URL this measurement refers to. - URL string + URL string `json:"url"` // Oddities contains all the oddities of all endpoints. - Oddities []measurex.Oddity + Oddities []measurex.Oddity `json:"oddities"` - // LookupEndpoints contains the LookupEndpoints measurement. - LookupEndpoints *LookupEndpoints + // DNS contains all the DNS measurements. + DNS []*measurex.Measurement `json:"dns"` - // Endpoints contains all the endpoints measurements. - Endpoints []*Endpoint + // Control contains all the control measurements. + Control []*measurex.Measurement `json:"control"` + + // Endpoints contains a measurement for each endpoints (which + // may be empty if DNS lookup failed). + Endpoints []*measurex.Measurement `json:"endpoints"` } // computeOddities computes the Oddities field my merging all // the oddities appearing in the Endpoints list. func (ss *SingleStep) computeOddities() { unique := make(map[measurex.Oddity]bool) - for _, oddity := range ss.LookupEndpoints.Oddities { - unique[oddity] = true + for _, entry := range ss.DNS { + for _, oddity := range entry.Oddities { + unique[oddity] = true + } } - for _, epnt := range ss.Endpoints { - for _, oddity := range epnt.Oddities { + for _, entry := range ss.Endpoints { + for _, oddity := range entry.Oddities { unique[oddity] = true } } @@ -44,20 +50,19 @@ func (ss *SingleStep) computeOddities() { } } -// LookupEndpoints describes the measurement of endpoints lookup. -type LookupEndpoints struct { - // Domain is the domain this measurement refers to. - Domain string - - *measurex.BaseMeasurement -} +// URLMeasurer measures a single URL. +// +// Make sure you fill the fields marked as MANDATORY. +type URLMeasurer struct { + // DNSResolverUDP is the MANDATORY address of an DNS + // over UDP resolver (e.g., "8.8.4.4.:53"). + DNSResolverUDP string -// Endpoint describes the measurement of a given endpoint. -type Endpoint struct { - // Endpoint is the endpoint this measurement refers to. - Endpoint string + // Mx is the MANDATORY measurex.Measurer. + Mx *measurex.Measurer - *measurex.BaseMeasurement + // URL is the MANDATORY URL to measure. + URL *url.URL } // Run performs all the WebSteps step. @@ -72,25 +77,13 @@ type Endpoint struct { // After a step has run, we search for all the redirection URLs // and we run a new step with the new URLs. // -// Arguments -// -// - ctx is the context to implement timeouts; -// -// - mx is the measurex.Measurer to use; -// -// - URL is the URL from which we start measuring; -// -// - dnsResolverUDP is the address of the DNS resolver endpoint -// using UDP we wish to use (e.g., "8.8.8.8:53"). -// -// Return value +// Return value: // // A list of SingleStep structures where the Endpoints array may be empty // if we have no been able to discover endpoints. -func Run(ctx context.Context, mx *measurex.Measurer, - URL *url.URL, dnsResolverUDP string) (v []*SingleStep) { +func (um *URLMeasurer) Run(ctx context.Context) (v []*SingleStep) { jar := measurex.NewCookieJar() - inputs := []*url.URL{URL} + inputs := []*url.URL{um.URL} Loop: for len(inputs) > 0 { dups := make(map[string]*url.URL) @@ -99,9 +92,9 @@ Loop: case <-ctx.Done(): break Loop default: - mx.Infof("RunSingleStep url=%s dnsResolverUDP=%s jar=%+v", - input, dnsResolverUDP, jar) - m := RunSingleStep(ctx, mx, jar, input, dnsResolverUDP) + um.Mx.Infof("RunSingleStep url=%s dnsResolverUDP=%s jar=%+v", + input, um.DNSResolverUDP, jar) + m := um.RunSingleStep(ctx, jar, input) v = append(v, m) for _, epnt := range m.Endpoints { for _, redir := range epnt.HTTPRedirect { @@ -112,7 +105,7 @@ Loop: } inputs = nil for _, input := range dups { - mx.Infof("newRedirection %s", input) + um.Mx.Infof("newRedirection %s", input) inputs = append(inputs, input) } } @@ -121,54 +114,45 @@ Loop: // RunSingleStep performs a single WebSteps step. // -// We define "step" as the process by which we have an input URL -// and we perform the following operations: -// -// 1. lookup of all the possible endpoints for the URL; -// -// 2. measurement of each available endpoint. +// This function DOES NOT automatically follow redirections. // -// This function DOES NOT automatically follow redirections. Though -// we have enough information to know how to follow them. -// -// Arguments +// Arguments: // // - ctx is the context to implement timeouts; // -// - mx is the measurex.Measurer to use; -// // - cookiejar is the http.CookieJar for cookies; // -// - URL is the URL to measure; -// -// - dnsResolverUDP is the address of the DNS resolver endpoint -// using UDP we wish to use (e.g., "8.8.8.8:53"). +// - URL is the URL to measure. // -// Return value +// Return value: // // A SingleStep structure where the Endpoints array may be empty // if we have no been able to discover endpoints. -func RunSingleStep(ctx context.Context, mx *measurex.Measurer, - cookiekar http.CookieJar, URL *url.URL, dnsResolverUDP string) (m *SingleStep) { +func (um *URLMeasurer) RunSingleStep(ctx context.Context, + cookiekar http.CookieJar, URL *url.URL) (m *SingleStep) { m = &SingleStep{URL: URL.String()} defer m.computeOddities() - mid := mx.NewMeasurement() - mx.Infof("LookupHTTPEndpoints measurementID=%d url=%s dnsResolverUDP=%s", - mid, URL.String(), dnsResolverUDP) - epnts, _ := mx.LookupHTTPEndpoints(ctx, URL, dnsResolverUDP) - m.LookupEndpoints = &LookupEndpoints{ - Domain: URL.Hostname(), - BaseMeasurement: mx.NewBaseMeasurement(mid), + port, err := measurex.PortFromURL(URL) + if err != nil { + return + } + switch URL.Scheme { + case "https": + m.DNS = append(m.DNS, um.Mx.LookupHTTPSSvcUDP( + ctx, URL.Hostname(), um.DNSResolverUDP)) + default: + // nothing to do + } + m.DNS = append(m.DNS, um.Mx.LookupHostSystem(ctx, URL.Hostname())) + m.DNS = append(m.DNS, um.Mx.LookupHostUDP(ctx, URL.Hostname(), um.DNSResolverUDP)) + endpoints := um.Mx.DB.SelectAllEndpointsForDomain(URL.Hostname(), port) + m.Control = append(m.Control, um.Mx.LookupWCTH(ctx, URL, endpoints, port)) + httpEndpoints, err := um.Mx.DB.SelectAllHTTPEndpointsForDomain(URL) + if err != nil { + return } - for _, epnt := range epnts { - mid = mx.NewMeasurement() - mx.Infof("HTTPEndpointGet measurementID=%d url=%s endpoint=%s dnsResolverUDP=%s", - mid, URL.String(), epnt.String(), dnsResolverUDP) - mx.HTTPEndpointGet(ctx, epnt, cookiekar) - m.Endpoints = append(m.Endpoints, &Endpoint{ - Endpoint: epnt.String(), - BaseMeasurement: mx.NewBaseMeasurement(mid), - }) + for _, epnt := range httpEndpoints { + m.Endpoints = append(m.Endpoints, um.Mx.HTTPEndpointGet(ctx, epnt, cookiekar)) } return } diff --git a/internal/measurex/bogon.go b/internal/measurex/bogon.go index 2c01d3a09a..e2191b6fc0 100644 --- a/internal/measurex/bogon.go +++ b/internal/measurex/bogon.go @@ -6,6 +6,13 @@ import ( "github.com/ooni/probe-cli/v3/internal/runtimex" ) +// isBogon returns whether if an IP address is bogon. Passing to this +// function a non-IP address causes it to return true. +func isBogon(address string) bool { + ip := net.ParseIP(address) + return ip == nil || isPrivate(ip) +} + var privateIPBlocks []*net.IPNet func init() { @@ -39,10 +46,3 @@ func isPrivate(ip net.IP) bool { } return false } - -// IsBogon returns whether if an IP address is bogon. Passing to this -// function a non-IP address causes it to return bogon. -func IsBogon(address string) bool { - ip := net.ParseIP(address) - return ip == nil || isPrivate(ip) -} diff --git a/internal/measurex/db.go b/internal/measurex/db.go index 58a399494f..95db672a37 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -1,53 +1,47 @@ package measurex -// DB is the database holding measurements. -type DB interface { - // Dial table +import "time" + +// EventDB is a "database" holding events records as seen by the +// networking code that needs to save events. +type EventDB interface { + // ElapsedTime returns the elapsed time since the beginning + // of time as configured into the database. + ElapsedTime() time.Duration + + // InsertIntoDial saves a Dial event. InsertIntoDial(ev *NetworkEvent) - SelectAllFromDial() []*NetworkEvent - // ReadWrite table + // InsertIntoReadWrite saves an I/O event. InsertIntoReadWrite(ev *NetworkEvent) - SelectAllFromReadWrite() []*NetworkEvent - // Close table + // InsertIntoClose saves a close event. InsertIntoClose(ev *NetworkEvent) - SelectAllFromClose() []*NetworkEvent - // TLSHandshake table + // InsertIntoTLSHandshake saves a TLS handshake event. InsertIntoTLSHandshake(ev *TLSHandshakeEvent) - SelectAllFromTLSHandshake() []*TLSHandshakeEvent - // LookupHost table + // InsertIntoLookupHost saves a lookup host event. InsertIntoLookupHost(ev *LookupHostEvent) - SelectAllFromLookupHost() []*LookupHostEvent - // LookupHTTPSSvc table + // InsertIntoLookupHTTPSvc saves an HTTPSvc lookup event. InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) - SelectAllFromLookupHTTPSSvc() []*LookupHTTPSSvcEvent - // DNSRoundTrip table + // InsertIntoDNSRoundTrip saves a DNS round trip event. InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) - SelectAllFromDNSRoundTrip() []*DNSRoundTripEvent - // HTTPRoundTrip table + // InsertIntoHTTPRoundTrip saves an HTTP round trip event. InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) - SelectAllFromHTTPRoundTrip() []*HTTPRoundTripEvent - // HTTPRedirect table + // InsertIntoHTTPRedirect saves an HTTP redirect event. InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) - SelectAllFromHTTPRedirect() []*HTTPRedirectEvent - // QUICHandshake table + // InsertIntoQUICHandshake saves a QUIC handshake event. InsertIntoQUICHandshake(ev *QUICHandshakeEvent) - SelectAllFromQUICHandshake() []*QUICHandshakeEvent // NextConnID increments and returns the connection ID. NextConnID() int64 - // MeasurementID returns the measurement ID. + // MeasurementID returns the current measurement ID. MeasurementID() int64 - - // NextMeasurement increments and returns the measurement ID. - NextMeasurement() int64 } diff --git a/internal/measurex/dialer.go b/internal/measurex/dialer.go index e440630ff8..f9d1cdacbb 100644 --- a/internal/measurex/dialer.go +++ b/internal/measurex/dialer.go @@ -9,65 +9,109 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) -// Conn is the connection type we use. +// Conn is like net.Conn but also knows its ConnID and has a +// reference to its Dialer's database. On Read, Write, and Close, this +// Conn will write a specific event into the database. type Conn interface { net.Conn - // ConnID returns the connection ID. + // ConnID returns the ConnID. This should be a positive + // integer. A zero or negative value means "unknown". ConnID() int64 } -// Dialer is the dialer type we use. +// Dialer is like netxlite.Dialer but dials connections of +// the Conn type defined inside this package. type Dialer interface { + // DialContext wraps a netxlite.Dialer.DialContext + // and implements this algorithm: + // + // 1. perform TCP/UDP dial using the underlying netxlite.Dialer; + // + // 2. insert a DialEvent into the DB; + // + // 3. on error, return error; + // + // 4. otherwise, wrap the net.Conn to be a Conn and return it. DialContext(ctx context.Context, network, address string) (Conn, error) + + // CloseIdleConnections calls the namesake method + // of the underlying netxlite.Dialer. CloseIdleConnections() } -// WrapDialer wraps a Dialer to add measurex capabilities. -// -// DialContext algorithm +// WrapDialer takes in input a netxlite.Dialer and returns +// in output a Dialer of the type used in this package. // -// 1. perform TCP/UDP connect as usual; +// Arguments: // -// 2. insert a DialEvent into the DB; +// - origin is either OriginProbe or OriginTH // -// 3. on success, wrap the returned net.Conn so that it -// inserts Read, Write, and Close events into the DB. +// - db is the database in which to store measurements // -// 4. return net.Conn or error. -func WrapDialer(origin Origin, db DB, d netxlite.Dialer) Dialer { +// - d is the underlying netxlite.Dialer to use +func WrapDialer(origin Origin, db EventDB, d netxlite.Dialer) Dialer { return &dialerx{Dialer: d, db: db, origin: origin} +} + +// NewDialerWithoutResolver is a convenience factory for creating +// a dialer that saves measurements into the DB and that is not attached +// to any resolver (hence only works when passed IP addresses). +func NewDialerWithoutResolver(origin Origin, db EventDB, logger Logger) Dialer { + return WrapDialer(origin, db, netxlite.NewDialerWithoutResolver( + logger, + )) +} + +// netxliteDialerAdapter adapts measurex.Dialer to netxlite.Dialer. +type netxliteDialerAdapter struct { + Dialer +} + +// DialContext implements netxlite.Dialer.DialContext. +func (d *netxliteDialerAdapter) DialContext( + ctx context.Context, network, address string) (net.Conn, error) { + return d.Dialer.DialContext(ctx, network, address) +} +// NewDialerWithSystemResolver is a convenience factory for creating +// a dialer that saves measurements into mx.DB and uses the system resolver. +func NewDialerWithSystemResolver(origin Origin, db EventDB, logger Logger) Dialer { + r := NewResolverSystem(origin, db, logger) + return WrapDialer(origin, db, netxlite.NewDialerWithResolver( + logger, r, + )) } type dialerx struct { netxlite.Dialer - db DB + db EventDB origin Origin } -// NetworkEvent contains a network event. +// NetworkEvent contains a network event. This kind of events +// are generated by Dialer, QUICDialer, Conn, QUICConn. type NetworkEvent struct { - Origin Origin - MeasurementID int64 - ConnID int64 - Operation string - Network string - RemoteAddr string - LocalAddr string - Started time.Time - Finished time.Time - Error error - Oddity Oddity - Count int + Origin Origin // OriginProbe or OriginTH + MeasurementID int64 // ID of the measurement + ConnID int64 // ID of the conn + Operation string // "read", "write", ... + Network string // "tcp", "udp" + RemoteAddr string // remote addr (e.g., "1.1.1.1:443") + LocalAddr string // local addr + Started time.Duration // when we called dial + Finished time.Duration // when dial returned + Error error // error or nil + Oddity Oddity // oddity classification + Count int // bytes sent or recv (where applicable) } func (d *dialerx) DialContext( ctx context.Context, network, address string) (Conn, error) { connID := d.db.NextConnID() - started := time.Now() + started := d.db.ElapsedTime() conn, err := d.Dialer.DialContext(ctx, network, address) - finished := time.Now() + finished := d.db.ElapsedTime() d.db.InsertIntoDial(&NetworkEvent{ Origin: d.origin, MeasurementID: d.db.MeasurementID(), @@ -121,7 +165,7 @@ func (c *dialerx) computeOddity(err error) Oddity { type connx struct { net.Conn - db DB + db EventDB connID int64 remoteAddr string localAddr string @@ -134,9 +178,9 @@ func (c *connx) ConnID() int64 { } func (c *connx) Read(b []byte) (int, error) { - started := time.Now() + started := c.db.ElapsedTime() count, err := c.Conn.Read(b) - finished := time.Now() + finished := c.db.ElapsedTime() c.db.InsertIntoReadWrite(&NetworkEvent{ Origin: c.origin, MeasurementID: c.db.MeasurementID(), @@ -154,9 +198,9 @@ func (c *connx) Read(b []byte) (int, error) { } func (c *connx) Write(b []byte) (int, error) { - started := time.Now() + started := c.db.ElapsedTime() count, err := c.Conn.Write(b) - finished := time.Now() + finished := c.db.ElapsedTime() c.db.InsertIntoReadWrite(&NetworkEvent{ Origin: c.origin, MeasurementID: c.db.MeasurementID(), @@ -174,9 +218,9 @@ func (c *connx) Write(b []byte) (int, error) { } func (c *connx) Close() error { - started := time.Now() + started := c.db.ElapsedTime() err := c.Conn.Close() - finished := time.Now() + finished := c.db.ElapsedTime() c.db.InsertIntoClose(&NetworkEvent{ Origin: c.origin, MeasurementID: c.db.MeasurementID(), diff --git a/internal/measurex/dnsx.go b/internal/measurex/dnsx.go index d83371452c..3f86496be7 100644 --- a/internal/measurex/dnsx.go +++ b/internal/measurex/dnsx.go @@ -7,35 +7,41 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" ) -// DNSTransport is the DNS transport type we use. +// DNSTransport is a transport for sending raw DNS queries +// and receiving raw DNS replies. The internal/netxlite/dnsx +// package implements a bunch of these transports. type DNSTransport = dnsx.RoundTripper -// WrapDNSXRoundTripper wraps a dnsx.RoundTripper to add measurex capabilities. -func WrapDNSXRoundTripper(db DB, rt dnsx.RoundTripper) DNSTransport { - return &dnsxTransportx{db: db, RoundTripper: rt} +// WrapDNSXRoundTripper wraps a dnsx.RoundTripper and returns a +// DNSTransport that saves DNSRoundTripEvents into the DB. +func WrapDNSXRoundTripper(origin Origin, db EventDB, rt dnsx.RoundTripper) DNSTransport { + return &dnsxTransportx{db: db, RoundTripper: rt, origin: origin} } type dnsxTransportx struct { dnsx.RoundTripper - db DB + db EventDB + origin Origin } -// DNSRoundTripEvent contains the result of a DNS round trip. +// DNSRoundTripEvent contains the result of a DNS round trip. These +// events are generated by DNSTransport types. type DNSRoundTripEvent struct { - MeasurementID int64 - Network string - Address string - Query []byte - Started time.Time - Finished time.Time - Error error - Reply []byte + Origin Origin // OriginProbe or OriginTH + MeasurementID int64 // ID of the measurement + Network string // DNS resolver's network (e.g., "dot", "doh") + Address string // DNS resolver's address or URL (for "doh") + Query []byte // Raw query + Started time.Duration // When we started the round trip + Finished time.Duration // When we were done + Error error // Error or nil + Reply []byte // Raw reply } func (txp *dnsxTransportx) RoundTrip(ctx context.Context, query []byte) ([]byte, error) { - started := time.Now() + started := txp.db.ElapsedTime() reply, err := txp.RoundTripper.RoundTrip(ctx, query) - finished := time.Now() + finished := txp.db.ElapsedTime() txp.db.InsertIntoDNSRoundTrip(&DNSRoundTripEvent{ MeasurementID: txp.db.MeasurementID(), Network: txp.RoundTripper.Network(), diff --git a/internal/measurex/http.go b/internal/measurex/http.go index b26af7f99d..0b164b3fef 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -18,25 +18,37 @@ import ( "golang.org/x/net/publicsuffix" ) -// HTTPTransport is the HTTP transport type we use. +// HTTPTransport is the HTTP transport type we use. This transport +// is a normal netxlite.HTTPTransport but also knows about the ConnID. +// +// The RoundTrip method of this transport MAY read a small snapshot +// of the response body to include it into the measurement. When this +// happens, the transport will nonetheless return a response body +// that is suitable for reading the whole body again. The only difference +// with reading the body normally is timing. The snapshot will be read +// immediately because it's already cached in RAM. The rest of the +// body instead will be read normally, using the network. type HTTPTransport interface { netxlite.HTTPTransport - // ConnID returns the connection ID. + // ConnID returns the connection ID. When this value is zero + // or negative it means it has not been set. ConnID() int64 } -// WrapHTTPTransport wraps a netxlite.HTTPTransport to add measurex -// capabilities. With this constructor the conn ID is undefined. +// WrapHTTPTransport takes in input a netxlite.HTTPTransport and +// returns an HTTPTransport that uses the DB to save events occurring +// during HTTP round trips. With this constructor the ConnID is +// not set, hence ConnID will always return zero. func WrapHTTPTransport( - origin Origin, db DB, txp netxlite.HTTPTransport) HTTPTransport { + origin Origin, db EventDB, txp netxlite.HTTPTransport) HTTPTransport { return WrapHTTPTransportWithConnID(origin, db, txp, 0) } // WrapHTTPTransportWithConnID is like WrapHTTPTransport but also -// sets the conn ID, which is otherwise undefined. +// sets the conn ID, which is otherwise set to zero. func WrapHTTPTransportWithConnID(origin Origin, - db DB, txp netxlite.HTTPTransport, connID int64) HTTPTransport { + db EventDB, txp netxlite.HTTPTransport, connID int64) HTTPTransport { return &httpTransportx{ HTTPTransport: txp, db: db, connID: connID, origin: origin} } @@ -44,36 +56,34 @@ func WrapHTTPTransportWithConnID(origin Origin, // NewHTTPTransportWithConn creates and wraps an HTTPTransport that // does not dial and only uses the given conn. func NewHTTPTransportWithConn( - origin Origin, logger Logger, db DB, conn Conn) HTTPTransport { - return WrapHTTPTransportWithConnID(origin, db, netxlite.NewHTTPTransport( - logger, netxlite.NewSingleUseDialer(conn), - netxlite.NewNullTLSDialer(), - ), conn.ConnID()) + origin Origin, logger Logger, db EventDB, conn Conn) HTTPTransport { + txp := netxlite.NewHTTPTransport(logger, netxlite.NewSingleUseDialer(conn), + netxlite.NewNullTLSDialer()) + return WrapHTTPTransportWithConnID(origin, db, txp, conn.ConnID()) } // NewHTTPTransportWithTLSConn creates and wraps an HTTPTransport that // does not dial and only uses the given conn. func NewHTTPTransportWithTLSConn( - origin Origin, logger Logger, db DB, conn TLSConn) HTTPTransport { - return WrapHTTPTransportWithConnID(origin, db, netxlite.NewHTTPTransport( - logger, netxlite.NewNullDialer(), - netxlite.NewSingleUseTLSDialer(conn), - ), conn.ConnID()) + origin Origin, logger Logger, db EventDB, conn TLSConn) HTTPTransport { + txp := netxlite.NewHTTPTransport(logger, netxlite.NewNullDialer(), + netxlite.NewSingleUseTLSDialer(conn)) + return WrapHTTPTransportWithConnID(origin, db, txp, conn.ConnID()) } // NewHTTPTransportWithQUICSess creates and wraps an HTTPTransport that // does not dial and only uses the given QUIC session. func NewHTTPTransportWithQUICSess( - origin Origin, logger Logger, db DB, sess QUICEarlySession) HTTPTransport { - return WrapHTTPTransportWithConnID(origin, db, netxlite.NewHTTP3Transport( - logger, netxlite.NewSingleUseQUICDialer(sess), &tls.Config{}, - ), sess.ConnID()) + origin Origin, logger Logger, db EventDB, sess QUICEarlySession) HTTPTransport { + txp := netxlite.NewHTTP3Transport( + logger, netxlite.NewSingleUseQUICDialer(sess), &tls.Config{}) + return WrapHTTPTransportWithConnID(origin, db, txp, sess.ConnID()) } type httpTransportx struct { netxlite.HTTPTransport connID int64 - db DB + db EventDB origin Origin } @@ -82,19 +92,20 @@ type httpTransportx struct { // If ConnID is zero or negative, it means undefined. This happens // when we create a transport without knowing the ConnID. type HTTPRoundTripEvent struct { - Origin Origin - MeasurementID int64 - ConnID int64 - RequestMethod string - RequestURL *url.URL - RequestHeader http.Header - Started time.Time - Finished time.Time - Error error - Oddity Oddity - ResponseStatus int - ResponseHeader http.Header - ResponseBodySnapshot []byte + Origin Origin // OriginProbe or OriginTH + MeasurementID int64 // ID of the measurement + ConnID int64 // ID of the conn (<= zero means undefined) + RequestMethod string // Request method + RequestURL *url.URL // Request URL + RequestHeader http.Header // Request headers + Started time.Duration // Beginning of round trip + Finished time.Duration // End of round trip + Error error // Error or nil + Oddity Oddity // Oddity classification + ResponseStatus int // Status code + ResponseHeader http.Header // Response headers + ResponseBodySnapshot []byte // Body snapshot + MaxBodySnapshotSize int64 // Max size for snapshot } // We only read a small snapshot of the body to keep measurements @@ -103,19 +114,20 @@ type HTTPRoundTripEvent struct { const maxBodySnapshot = 1 << 11 func (txp *httpTransportx) RoundTrip(req *http.Request) (*http.Response, error) { - started := time.Now() + started := txp.db.ElapsedTime() resp, err := txp.HTTPTransport.RoundTrip(req) rt := &HTTPRoundTripEvent{ - Origin: txp.origin, - MeasurementID: txp.db.MeasurementID(), - ConnID: txp.connID, - RequestMethod: req.Method, - RequestURL: req.URL, - RequestHeader: req.Header, - Started: started, + Origin: txp.origin, + MeasurementID: txp.db.MeasurementID(), + ConnID: txp.connID, + RequestMethod: req.Method, + RequestURL: req.URL, + RequestHeader: req.Header, + Started: started, + MaxBodySnapshotSize: maxBodySnapshot, } if err != nil { - rt.Finished = time.Now() + rt.Finished = txp.db.ElapsedTime() rt.Error = err txp.db.InsertIntoHTTPRoundTrip(rt) return nil, err @@ -135,10 +147,10 @@ func (txp *httpTransportx) RoundTrip(req *http.Request) (*http.Response, error) r := io.LimitReader(resp.Body, maxBodySnapshot) body, err := iox.ReadAllContext(req.Context(), r) if errors.Is(err, io.EOF) && resp.Close { - err = nil // we expected to see an EOF here + err = nil // we expected to see an EOF here, so no real error } if err != nil { - rt.Finished = time.Now() + rt.Finished = txp.db.ElapsedTime() rt.Error = err txp.db.InsertIntoHTTPRoundTrip(rt) return nil, err @@ -148,7 +160,7 @@ func (txp *httpTransportx) RoundTrip(req *http.Request) (*http.Response, error) Closer: resp.Body, } rt.ResponseBodySnapshot = body - rt.Finished = time.Now() + rt.Finished = txp.db.ElapsedTime() txp.db.InsertIntoHTTPRoundTrip(rt) return resp, nil } @@ -162,7 +174,9 @@ func (txp *httpTransportx) ConnID() int64 { return txp.connID } -// HTTPClient is the HTTP client type we use. +// HTTPClient is the HTTP client type we use. This interface is +// compatible with http.Client. What changes in this kind of clients +// is that we'll insert redirection events into the DB. type HTTPClient interface { Do(req *http.Request) (*http.Response, error) CloseIdleConnections() @@ -171,39 +185,18 @@ type HTTPClient interface { // NewHTTPClient creates a new HTTPClient instance that // does not automatically perform redirects. func NewHTTPClientWithoutRedirects( - origin Origin, db DB, jar http.CookieJar, txp HTTPTransport) HTTPClient { + origin Origin, db EventDB, jar http.CookieJar, txp HTTPTransport) HTTPClient { return newHTTPClient(origin, db, jar, txp, http.ErrUseLastResponse) } // NewHTTPClientWithRedirects creates a new HTTPClient // instance that automatically perform redirects. func NewHTTPClientWithRedirects( - origin Origin, db DB, jar http.CookieJar, txp HTTPTransport) HTTPClient { + origin Origin, db EventDB, jar http.CookieJar, txp HTTPTransport) HTTPClient { return newHTTPClient(origin, db, jar, txp, nil) } // HTTPRedirectEvent records an HTTP redirect. -// -// If ConnID is zero or negative, it means undefined. This happens -// when we create a transport without knowing the ConnID. -// -// The Request field contains the next request to issue. When -// redirects are disabled, this field contains the request you -// should issue to continue the redirect chain. -// -// The Via field contains the requests issued so far. The first -// request inside Via is the last one that has been issued. -// -// The Cookies field contains all the cookies that the -// implementation would set for the Request.URL. -// -// The Error field can have three values: -// -// - nil if the redirect occurred; -// -// - ErrHTTPTooManyRedirects when we see too many redirections; -// -// - http.ErrUseLastResponse if redirections are disabled. type HTTPRedirectEvent struct { // Origin is the event origin ("probe" or "th") Origin Origin @@ -239,7 +232,7 @@ type HTTPRedirectEvent struct { // would return when hitting too many redirects. var ErrHTTPTooManyRedirects = errors.New("stopped after 10 redirects") -func newHTTPClient(origin Origin, db DB, +func newHTTPClient(origin Origin, db EventDB, cookiejar http.CookieJar, txp HTTPTransport, defaultErr error) HTTPClient { return &http.Client{ Transport: txp, @@ -304,11 +297,3 @@ func NewHTTPRequestWithContext(ctx context.Context, func NewHTTPGetRequest(ctx context.Context, URL string) (*http.Request, error) { return NewHTTPRequestWithContext(ctx, "GET", URL, nil) } - -// MustNewHTTPGetRequest is a convenience factory for creating -// a new http.Request using GET that panics on error. -func MustNewHTTPGetRequest(ctx context.Context, URL string) *http.Request { - req, err := NewHTTPGetRequest(ctx, URL) - runtimex.PanicOnError(err, "NewHTTPGetRequest failed") - return req -} diff --git a/internal/measurex/logger.go b/internal/measurex/logger.go index a5d529c8a8..2443b7a42e 100644 --- a/internal/measurex/logger.go +++ b/internal/measurex/logger.go @@ -4,7 +4,8 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite" ) -// Logger is the logger type we use. +// Logger is the logger type we use. This type is compatible +// with the logger type of github.com/apex/log. type Logger interface { netxlite.Logger diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go new file mode 100644 index 0000000000..bbb97f11d5 --- /dev/null +++ b/internal/measurex/measurement.go @@ -0,0 +1,115 @@ +package measurex + +// Measurement groups all the events that have the same MeasurementID. This +// data format is not compatible with the OONI data format. +type Measurement struct { + // ID is the measurement ID. + ID int64 + + // URL is the OPTIONAL URL this measurement refers to. + URL string + + // Endpoint is the OPTIONAL endpoint this measurement refers to. + Endpoint string + + // Oddities lists all the oddities inside this measurement. See + // newMeasurement's docs for more info. + Oddities []Oddity + + // Connect contains all the connect operations. + Connect []*NetworkEvent + + // ReadWrite contains all the read and write operations. + ReadWrite []*NetworkEvent + + // Close contains all the close operations. + Close []*NetworkEvent + + // TLSHandshake contains all the TLS handshakes. + TLSHandshake []*TLSHandshakeEvent + + // QUICHandshake contains all the QUIC handshakes. + QUICHandshake []*QUICHandshakeEvent + + // LookupHost contains all the host lookups. + LookupHost []*LookupHostEvent + + // LookupHTTPSSvc contains all the HTTPSSvc lookups. + LookupHTTPSSvc []*LookupHTTPSSvcEvent + + // DNSRoundTrip contains all the DNS round trips. + DNSRoundTrip []*DNSRoundTripEvent + + // HTTPRoundTrip contains all the HTTP round trips. + HTTPRoundTrip []*HTTPRoundTripEvent + + // HTTPRedirect contains all the redirections. + HTTPRedirect []*HTTPRedirectEvent +} + +// NewMeasurement creates a new Measurement by gathering all the +// events inside the database with a given MeasurementID. +// +// As part of the process, this function computes the Oddities field by +// gathering the oddities of the following operations: +// +// - connect; +// +// - tlsHandshake; +// +// - quicHandshake; +// +// - lookupHost; +// +// - httpRoundTrip. +// +// Arguments: +// +// - begin is the time when we started measuring; +// +// - id is the MeasurementID. +// +// Returns a Measurement possibly containing empty lists of events. +func NewMeasurement(db *Saver, id int64) *Measurement { + m := &Measurement{ + ID: id, + Connect: db.SelectAllFromDialWithMeasurementID(id), + ReadWrite: db.SelectAllFromReadWriteWithMeasurementID(id), + Close: db.SelectAllFromCloseWithMeasurementID(id), + TLSHandshake: db.SelectAllFromTLSHandshakeWithMeasurementID(id), + QUICHandshake: db.SelectAllFromQUICHandshakeWithMeasurementID(id), + LookupHost: db.SelectAllFromLookupHostWithMeasurementID(id), + LookupHTTPSSvc: db.SelectAllFromLookupHTTPSSvcWithMeasurementID(id), + DNSRoundTrip: db.SelectAllFromDNSRoundTripWithMeasurementID(id), + HTTPRoundTrip: db.SelectAllFromHTTPRoundTripWithMeasurementID(id), + HTTPRedirect: db.SelectAllFromHTTPRedirectWithMeasurementID(id), + } + m.computeOddities() + return m +} + +// computeOddities computes all the oddities inside m. See +// newMeasurement's docs for more information. +func (m *Measurement) computeOddities() { + unique := make(map[Oddity]bool) + for _, ev := range m.Connect { + unique[ev.Oddity] = true + } + for _, ev := range m.TLSHandshake { + unique[ev.Oddity] = true + } + for _, ev := range m.QUICHandshake { + unique[ev.Oddity] = true + } + for _, ev := range m.LookupHost { + unique[ev.Oddity] = true + } + for _, ev := range m.HTTPRoundTrip { + unique[ev.Oddity] = true + } + for key := range unique { + if key != "" { + m.Oddities = append(m.Oddities, key) + } + } +} diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index bfe5c99269..379c80e491 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -4,34 +4,17 @@ import ( "context" "crypto/tls" "errors" - "fmt" - "net" "net/http" "net/url" "time" "github.com/ooni/probe-cli/v3/internal/netxlite" - "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" ) // Measurer performs measurements. -// -// You call measurer methods to perform measurements. All methods -// will save measurements into the DB field as a side effect. -// -// Some methods will also return (a subset of) their measurement -// results when doing that is convenient. -// -// This implementation currently uses the Web Connectivity Test -// Helper (WCTH) to help with measuring HTTP endpoints. We'll use -// an ad-hoc, more effective test helper in the near future. -// -// Remarks -// -// Make sure to initialize all the fields marked as MANDATORY. type Measurer struct { // DB is the MANDATORY database to use. - DB DB + DB *Saver // HTTPClient is the MANDATORY HTTP client for the WCTH. HTTPClient HTTPClient @@ -49,94 +32,26 @@ type Measurer struct { WCTHURL string } -// NewMeasurement increments the DB's MeasurementID -// and returns such an ID for later usage. -// -// Every operation we perform (e.g., a TCP connect) saves -// measurements into mx.DB using separate tables. -// -// We save the MeasurementID for each operation. -// -// By calling NewMeasurement you increment such an ID -// which later allows you to separate measurements. -func (mx *Measurer) NewMeasurement() int64 { +func (mx *Measurer) nextMeasurement() int64 { return mx.DB.NextMeasurement() } // LookupHostSystem performs a LookupHost using the system resolver. -// -// The system resolver is equivalent to calling getaddrinfo on Unix systems. -// -// Arguments -// -// - ctx is the context allowing to timeout the operation; -// -// - domain is the domain to lookup. -// -// Return value -// -// Either a list of resolved IP addresses or an error. -func (mx *Measurer) LookupHostSystem( - ctx context.Context, domain string) (addrs []string, err error) { +func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *Measurement { const timeout = 4 * time.Second mx.Infof("LookupHostSystem domain=%s timeout=%s...", domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - r := mx.newResolverSystem() + r := NewResolverSystem(mx.Origin, mx.DB, mx.Logger) defer r.CloseIdleConnections() - return r.LookupHost(ctx, domain) -} - -// newResolverSystem is a convenience factory for creating a -// system resolver that saves measurements into mx.DB. -func (mx *Measurer) newResolverSystem() Resolver { - return WrapResolver(mx.Origin, mx.DB, netxlite.NewResolverStdlib(mx.Logger)) -} - -// newDialerWithSystemResolver is a convenience factory for creating -// a dialer that saves measurements into mx.DB. -func (mx *Measurer) newDialerWithSystemResolver() Dialer { - r := mx.newResolverSystem() - return WrapDialer(mx.Origin, mx.DB, netxlite.NewDialerWithResolver( - mx.Logger, r, - )) -} - -// netxliteDialerAdapter adapts measurex.Dialer to netxlite.Dialer. -type netxliteDialerAdapter struct { - Dialer -} - -// DialContext implements netxlite.Dialer.DialContext. -func (d *netxliteDialerAdapter) DialContext( - ctx context.Context, network, address string) (net.Conn, error) { - return d.Dialer.DialContext(ctx, network, address) -} - -// newResolverUDP is a convenience factory for creating a resolver -// using UDP that saves measurements into mx.DB. -// -// Arguments -// -// - address is the resolver address (e.g., "1.1.1.1:53"). -// -// Return value -// -// A Resolver. -func (mx *Measurer) newResolverUDP(address string) Resolver { - return WrapResolver(mx.Origin, mx.DB, &netxlite.ResolverLogger{ - Resolver: netxlite.WrapResolver(mx.Logger, dnsx.NewSerialResolver( - WrapDNSXRoundTripper(mx.DB, dnsx.NewDNSOverUDP( - &netxliteDialerAdapter{mx.newDialerWithSystemResolver()}, - address, - )))), - Logger: mx.Logger, - }) + id := mx.nextMeasurement() + _, _ = r.LookupHost(ctx, domain) + return NewMeasurement(mx.DB, id) } // LookupHostUDP is like LookupHostSystem but uses an UDP resolver. // -// Arguments +// Arguments: // // - ctx is the context allowing to timeout the operation; // @@ -144,24 +59,24 @@ func (mx *Measurer) newResolverUDP(address string) Resolver { // // - address is the UDP resolver address (e.g., "dns.google:53"). // -// Return value -// -// Either the resolved addresses or an error. +// Returns a Measurement. func (mx *Measurer) LookupHostUDP( - ctx context.Context, domain, address string) ([]string, error) { + ctx context.Context, domain, address string) *Measurement { const timeout = 4 * time.Second mx.Infof("LookupHostUDP serverEndpoint=%s/udp domain=%s timeout=%s...", address, domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - r := mx.newResolverUDP(address) + r := NewResolverUDP(mx.Origin, mx.DB, mx.Logger, address) defer r.CloseIdleConnections() - return r.LookupHost(ctx, domain) + id := mx.nextMeasurement() + _, _ = r.LookupHost(ctx, domain) + return NewMeasurement(mx.DB, id) } // LookupHTTPSSvcUDP issues an HTTPSSvc query for the given domain. // -// Arguments +// Arguments: // // - ctx is the context allowing to timeout the operation; // @@ -169,53 +84,54 @@ func (mx *Measurer) LookupHostUDP( // // - address is the UDP resolver address (e.g., "dns.google:53"). // -// Return value -// -// Either the query result, on success, or an error. +// Returns a Measurement. func (mx *Measurer) LookupHTTPSSvcUDP( - ctx context.Context, domain, address string) (HTTPSSvc, error) { + ctx context.Context, domain, address string) *Measurement { const timeout = 4 * time.Second mx.Infof("LookupHTTPSSvcUDP engine=udp://%s domain=%s timeout=%s...", address, domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - r := mx.newResolverUDP(address) + r := NewResolverUDP(mx.Origin, mx.DB, mx.Logger, address) defer r.CloseIdleConnections() - return r.LookupHTTPSSvcWithoutRetry(ctx, domain) -} - -// newDialerWithSystemResolver is a convenience factory for creating -// a dialer that saves measurements into mx.DB. -func (mx *Measurer) newDialerWithoutResolver() Dialer { - return WrapDialer(mx.Origin, mx.DB, netxlite.NewDialerWithoutResolver( - mx.Logger, - )) + id := mx.nextMeasurement() + _, _ = r.LookupHTTPSSvcWithoutRetry(ctx, domain) + return NewMeasurement(mx.DB, id) } // TCPConnect establishes a connection with a TCP endpoint. // -// Arguments +// Arguments: // // - ctx is the context allowing to timeout the connect; // // - address is the TCP endpoint address (e.g., "8.8.4.4:443"). // -// Return value -// -// Either an established Conn or an error. -func (mx *Measurer) TCPConnect(ctx context.Context, address string) (Conn, error) { +// Returns a Measurement. +func (mx *Measurer) TCPConnect(ctx context.Context, address string) *Measurement { + id := mx.nextMeasurement() + conn, _ := mx.tcpConnect(ctx, address) + measurement := NewMeasurement(mx.DB, id) + if conn != nil { + conn.Close() + } + return measurement +} + +// tcpConnect is like TCPConnect but does not create a new measurement. +func (mx *Measurer) tcpConnect(ctx context.Context, address string) (Conn, error) { const timeout = 10 * time.Second mx.Infof("TCPConnect endpoint=%s timeout=%s...", address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - d := mx.newDialerWithoutResolver() + d := NewDialerWithoutResolver(mx.Origin, mx.DB, mx.Logger) defer d.CloseIdleConnections() return d.DialContext(ctx, "tcp", address) } // TLSConnect connects and TLS handshakes with a TCP endpoint. // -// Arguments +// Arguments: // // - ctx is the context allowing to timeout the whole operation; // @@ -223,8 +139,6 @@ func (mx *Measurer) TCPConnect(ctx context.Context, address string) (Conn, error // // - config contains the TLS config (see below). // -// TLS config -// // You MUST set the following config fields: // // - ServerName to the desired SNI or InsecureSkipVerify to @@ -235,7 +149,7 @@ func (mx *Measurer) TCPConnect(ctx context.Context, address string) (Conn, error // - NextProtos to the desired ALPN ([]string{"h2", "http/1.1"} for // HTTPS and []string{"dot"} for DNS-over-TLS). // -// Caveats +// Caveats: // // The mx.TLSHandshaker field could point to a TLS handshaker using // the Go stdlib or one using gitlab.com/yawning/utls.git. @@ -244,12 +158,22 @@ func (mx *Measurer) TCPConnect(ctx context.Context, address string) (Conn, error // will not only depend on the config field but also on the // utls.ClientHelloID thay you're using. // -// Return value -// -// Either an established TLSConn or an error. +// Returns a Measurement. func (mx *Measurer) TLSConnect(ctx context.Context, + address string, config *tls.Config) *Measurement { + id := mx.nextMeasurement() + conn, _ := mx.tlsConnect(ctx, address, config) + measurement := NewMeasurement(mx.DB, id) + if conn != nil { + conn.Close() + } + return measurement +} + +// tlsConnect is like TLSConnect but does not create a new measurement +func (mx *Measurer) tlsConnect(ctx context.Context, address string, config *tls.Config) (TLSConn, error) { - conn, err := mx.TCPConnect(ctx, address) + conn, err := mx.tcpConnect(ctx, address) if err != nil { return nil, err } @@ -261,9 +185,9 @@ func (mx *Measurer) TLSConnect(ctx context.Context, return mx.TLSHandshaker.Handshake(ctx, conn, config) } -// QUICConnect connects and TLS handshakes with a QUIC endpoint. +// QUICHandshake connects and TLS handshakes with a QUIC endpoint. // -// Arguments +// Arguments: // // - ctx is the context allowing to timeout the whole operation; // @@ -271,8 +195,6 @@ func (mx *Measurer) TLSConnect(ctx context.Context, // // - config contains the TLS config (see below). // -// TLS config -// // You MUST set the following config fields: // // - ServerName to the desired SNI or InsecureSkipVerify to @@ -283,10 +205,21 @@ func (mx *Measurer) TLSConnect(ctx context.Context, // - NextProtos to the desired ALPN ([]string{"h2", "http/1.1"} for // HTTPS and []string{"dot"} for DNS-over-TLS). // -// Return value -// -// Either an established quic.EarlySession or an error. -func (mx *Measurer) QUICConnect(ctx context.Context, +// Returns a Measurement. +func (mx *Measurer) QUICHandshake(ctx context.Context, address string, + config *tls.Config) *Measurement { + id := mx.nextMeasurement() + sess, _ := mx.quicHandshake(ctx, address, config) + measurement := NewMeasurement(mx.DB, id) + if sess != nil { + // TODO(bassosimone): close session with correct message + sess.CloseWithError(0, "") + } + return measurement +} + +// quicHandshake is like QUICHandshake but does not create a new measurement. +func (mx *Measurer) quicHandshake(ctx context.Context, address string, config *tls.Config) (QUICEarlySession, error) { const timeout = 10 * time.Second mx.Infof("QUICHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", @@ -301,16 +234,12 @@ func (mx *Measurer) QUICConnect(ctx context.Context, return qd.DialContext(ctx, address, config) } -// ErrUnknownHTTPEndpointNetwork indicates that we don't know -// how to handle the value of an HTTPEndpoint.Network. -var ErrUnknownHTTPEndpointNetwork = errors.New("unknown HTTPEndpoint.Network") - // HTTPEndpointGet performs a GET request for an HTTP endpoint. // // This function WILL NOT follow redirects. If there is a redirect // you will see it inside the specific mx.DB table. // -// Arguments +// Arguments: // // - ctx is the context allowing to timeout the operation; // @@ -318,24 +247,29 @@ var ErrUnknownHTTPEndpointNetwork = errors.New("unknown HTTPEndpoint.Network") // // - jar is the cookie jar to use. // -// Return value -// -// Either an HTTP response, on success, or an error. +// Returns a measurement. The returned measurement is empty if +// the endpoint is misconfigured or the URL has an unknow scheme. func (mx *Measurer) HTTPEndpointGet( - ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { + ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (m *Measurement) { + id := mx.nextMeasurement() + var resp *http.Response switch epnt.Network { case NetworkQUIC: - return mx.httpEndpointGetQUIC(ctx, epnt, jar) + resp, _ = mx.httpEndpointGetQUIC(ctx, epnt, jar) + m = NewMeasurement(mx.DB, id) case NetworkTCP: - return mx.httpEndpointGetTCP(ctx, epnt, jar) + resp, _ = mx.httpEndpointGetTCP(ctx, epnt, jar) + m = NewMeasurement(mx.DB, id) default: - return nil, ErrUnknownHTTPEndpointNetwork + m = &Measurement{} } + if resp != nil { + resp.Body.Close() + } + return } -// ErrUnknownHTTPEndpointURLScheme indicates that we don't know how to -// handle the value of an HTTPEndpoint.URLScheme. -var ErrUnknownHTTPEndpointURLScheme = errors.New("unknown HTTPEndpoint.URL.Scheme") +var errUnknownHTTPEndpointURLScheme = errors.New("unknown HTTPEndpoint.URL.Scheme") // httpEndpointGetTCP specializes HTTPSEndpointGet for HTTP and HTTPS. func (mx *Measurer) httpEndpointGetTCP( @@ -346,7 +280,7 @@ func (mx *Measurer) httpEndpointGetTCP( case "https": return mx.httpEndpointGetHTTPS(ctx, epnt, jar) default: - return nil, ErrUnknownHTTPEndpointURLScheme + return nil, errUnknownHTTPEndpointURLScheme } } @@ -358,7 +292,7 @@ func (mx *Measurer) httpEndpointGetHTTP( return nil, err } req.Header = epnt.Header - conn, err := mx.TCPConnect(ctx, epnt.Address) + conn, err := mx.tcpConnect(ctx, epnt.Address) if err != nil { return nil, err } @@ -377,7 +311,7 @@ func (mx *Measurer) httpEndpointGetHTTPS( return nil, err } req.Header = epnt.Header - conn, err := mx.TLSConnect(ctx, epnt.Address, &tls.Config{ + conn, err := mx.tlsConnect(ctx, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, RootCAs: netxlite.NewDefaultCertPool(), @@ -400,7 +334,7 @@ func (mx *Measurer) httpEndpointGetQUIC( return nil, err } req.Header = epnt.Header - sess, err := mx.QUICConnect(ctx, epnt.Address, &tls.Config{ + sess, err := mx.quicHandshake(ctx, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, RootCAs: netxlite.NewDefaultCertPool(), @@ -426,274 +360,10 @@ func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, return clnt.Do(req.WithContext(ctx)) } -// EndpointNetwork is the network of an endpoint. -type EndpointNetwork string - -const ( - // NetworkTCP identifies endpoints using TCP. - NetworkTCP = EndpointNetwork("tcp") - - // NetworkQUIC identifies endpoints using QUIC. - NetworkQUIC = EndpointNetwork("quic") -) - -// Endpoint is an endpoint for a domain. -type Endpoint struct { - // Network is the network (e.g., "tcp", "quic") - Network EndpointNetwork - - // Address is the endpoint address (e.g., "8.8.8.8:443") - Address string -} - -// String converts an endpoint to a string (e.g., "8.8.8.8:443/tcp") -func (e *Endpoint) String() string { - return fmt.Sprintf("%s/%s", e.Address, e.Network) -} - -// ErrLookupEndpoints failed indicates that we could not -// successfully lookup the endpoints for a domain. -var ErrLookupEndpoints = errors.New("endpoints lookup failed") - -// LookupEndpoints discovers the endpoints for a domain. -// -// This function performs two lookups: -// -// - with the system resolver; -// -// - with a DNS over UDP resolver. -// -// Arguments -// -// - ctx is the context carrying timeouts; -// -// - domain is the domain to lookup endpoints for; -// -// - port is the port we want to use; -// -// - address is the address of a DNS over UDP resolver. -// -// Return value -// -// Returns either a list of endpoints or an error. The error will just -// indicate that we could not resolve _any_ endpoint. Precise results -// regarding each performed operation are into the mx.DB field. -func (mx *Measurer) LookupEndpoints( - ctx context.Context, domain, port, address string) ([]*Endpoint, error) { - udpAddrs, _ := mx.LookupHostUDP(ctx, domain, address) - mx.Infof("LookupHostUDP addrs=%+v", udpAddrs) - systemAddrs, _ := mx.LookupHostSystem(ctx, domain) - mx.Infof("LookupHostSystem addrs=%+v", systemAddrs) - var out []*Endpoint - out = append(out, mx.parseLookupHostReply(port, systemAddrs)...) - out = append(out, mx.parseLookupHostReply(port, udpAddrs)...) - out = mx.mergeEndpoints(out) - if len(out) < 1 { - return nil, ErrLookupEndpoints - } - return out, nil -} - -// mergeEndpoints merges duplicate endpoints in the input list. -// -// Arguments -// -// - input is the input list of endpoints to merge. -// -// Return value -// -// A list where duplicates have been removed. -func (mx *Measurer) mergeEndpoints(input []*Endpoint) (out []*Endpoint) { - var ( - tcp = make(map[string]int) - quic = make(map[string]int) - ) - for _, epnt := range input { - switch epnt.Network { - case NetworkQUIC: - quic[epnt.Address]++ - case NetworkTCP: - tcp[epnt.Address]++ - } - } - for addr := range tcp { - out = append(out, &Endpoint{ - Network: NetworkTCP, - Address: addr, - }) - } - for addr := range quic { - out = append(out, &Endpoint{ - Network: NetworkQUIC, - Address: addr, - }) - } - return -} - -// ErrCannotDeterminePortFromURL indicates that we could not determine -// the correct port from the URL authority and scheme. -var ErrCannotDeterminePortFromURL = errors.New("cannot determine port from URL") - -// urlPort returns the port implied by an URL. -// -// If the URL contains an explicit port, we return it. Otherwise we -// attempt to guess the port based on the URL scheme. -// -// We currently recognize only these schemes: -// -// - "https"; -// -// - "http". -// -// Arguments -// -// - URL is the URL for which to guess the port. -// -// Return value -// -// Either a string containing the port or an error. -func (mx *Measurer) urlPort(URL *url.URL) (string, error) { - switch { - case URL.Port() != "": - return URL.Port(), nil - case URL.Scheme == "https": - return "443", nil - case URL.Scheme == "http": - return "80", nil - default: - return "", ErrCannotDeterminePortFromURL - } -} - -// HTTPEndpoint is an HTTP/HTTPS/HTTP3 endpoint. -type HTTPEndpoint struct { - // Domain is the endpoint domain (e.g., "dns.google"). - Domain string - - // Network is the network (e.g., "tcp" or "quic"). - Network EndpointNetwork - - // Address is the endpoint address (e.g., "8.8.8.8:443"). - Address string - - // SNI is the SNI to use (only used with URL.scheme == "https"). - SNI string - - // ALPN is the ALPN to use (only used with URL.scheme == "https"). - ALPN []string - - // URL is the endpoint URL. - URL *url.URL - - // Header contains request headers. - Header http.Header -} - -// String converts an HTTP endpoint to a string (e.g., "8.8.8.8:443/tcp") -func (e *HTTPEndpoint) String() string { - return fmt.Sprintf("%s/%s", e.Address, e.Network) -} - -// LookupHTTPEndpoints is like LookupEndpoints but performs a -// specialized lookup for an HTTP/HTTPS URL. Such a lookup also -// includes querying the WCTH to discover extra endpoints. If -// the URL scheme is HTTPS we also query for HTTPSSvc. -// -// Arguments -// -// - ctx is the context carrying timeouts; -// -// - URL is the URL to perform the lookup for; -// -// - address is the address of the DNS over -// UDP server to use. -// -// Return value -// -// Returns either a list of endpoints or an error. The returned error -// only indicates we could not fetch _any_ endpoint. Check into the -// database (i.e., mx.DB) for precise results of each operation. -func (mx *Measurer) LookupHTTPEndpoints( - ctx context.Context, URL *url.URL, address string) ([]*HTTPEndpoint, error) { - port, err := mx.urlPort(URL) - if err != nil { - return nil, err - } - var httpsSvcEndpoints []*Endpoint - switch URL.Scheme { - case "https": // only lookup for HTTP3 endpoints when scheme is HTTPS - info, _ := mx.LookupHTTPSSvcUDP(ctx, URL.Hostname(), address) - httpsSvcEndpoints = mx.parseHTTPSSvcReply(port, info) - mx.Infof("LookupHTTPSSvcUDP endpoints=%+v", httpsSvcEndpoints) - } - endpoints, _ := mx.LookupEndpoints(ctx, URL.Hostname(), port, address) - endpoints = append(endpoints, httpsSvcEndpoints...) - wcthEndpoints, _ := mx.lookupWCTH(ctx, URL, endpoints, port) - mx.Infof("lookupWCTH endpoints=%+v", wcthEndpoints) - endpoints = append(endpoints, wcthEndpoints...) - endpoints = mx.mergeEndpoints(endpoints) - mx.Infof("mergeEndpoints endpoints=%+v", endpoints) - if len(endpoints) < 1 { - return nil, ErrLookupEndpoints - } - return mx.newHTTPEndpoints(URL, endpoints), nil -} - -// newHTTPEndpoints takes in input a list of Endpoint and -// returns in output a list of HTTPEndpoint. -// -// Arguments -// -// - URL is the URL for which we're discovering HTTPEndpoint; -// -// - endpoints is the list of discovered Endpoint. -// -// Return value -// -// The list of HTTPEndpoint. -func (mx *Measurer) newHTTPEndpoints( - URL *url.URL, endpoints []*Endpoint) (out []*HTTPEndpoint) { - for _, epnt := range endpoints { - out = append(out, &HTTPEndpoint{ - Domain: URL.Hostname(), - Network: epnt.Network, - Address: epnt.Address, - SNI: URL.Hostname(), - ALPN: mx.alpnForHTTPEndpoint(epnt.Network), - URL: URL, - Header: NewHTTPRequestHeaderForMeasuring(), - }) - } - return -} - -// alpnForHTTPEndpoint takes in input the network of an endpoint -// (i.e., "tcp" or "quic") and returns the corresponding ALPN. -// -// Arguments -// -// - network is the network of the endpoint. -// -// Return value -// -// The corresponding ALPN. If we do not recognize the input -// network we return a nil string array. -func (mx *Measurer) alpnForHTTPEndpoint(network EndpointNetwork) []string { - switch network { - case NetworkQUIC: - return []string{"h3"} - case NetworkTCP: - return []string{"h2", "http/1.1"} - default: - return nil - } -} - -// lookupWCTH performs an Endpoint looking using the WCTH (i.e., +// LookupWCTH performs an Endpoint lookup using the WCTH (i.e., // the Web Connectivity Test Helper) web service. // -// Arguments +// Arguments: // // - ctx is the context carrying timeouts; // @@ -704,35 +374,21 @@ func (mx *Measurer) alpnForHTTPEndpoint(network EndpointNetwork) []string { // // - port is the port for the endpoints. // -// Return value +// This function will safely discard any non-TCP endpoints +// in the input list and will only use TCP endpoints. // -// Either a list of endpoints (which may possibly be empty) in case -// of success or an error in case of failure. Note that the returned -// list of endpoints ONLY includes the ones discovered via WCTH. -func (mx *Measurer) lookupWCTH(ctx context.Context, - URL *url.URL, endpoints []*Endpoint, port string) ([]*Endpoint, error) { +// Returns a measurement. +func (mx *Measurer) LookupWCTH(ctx context.Context, URL *url.URL, + endpoints []*Endpoint, port string) *Measurement { const timeout = 30 * time.Second mx.Infof("lookupWCTH backend=%s url=%s endpoints=%+v port=%s timeout=%s...", mx.WCTHURL, URL.String(), endpoints, port, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() w := NewWCTHWorker(mx.Logger, mx.DB, mx.HTTPClient, mx.WCTHURL) - resp, err := w.Run(ctx, URL, mx.onlyTCPEndpoints(endpoints)) - if err != nil { - return nil, err - } - var out []*Endpoint - for _, addr := range resp.DNS.Addrs { - if net.ParseIP(addr) == nil { - continue // the WCTH may also return the CNAME - } - addrport := net.JoinHostPort(addr, port) - out = append(out, &Endpoint{ - Network: NetworkTCP, - Address: addrport, - }) - } - return out, nil + id := mx.nextMeasurement() + _, _ = w.Run(ctx, URL, mx.onlyTCPEndpoints(endpoints)) + return NewMeasurement(mx.DB, id) } // onlyTCPEndpoints takes in input a list of endpoints and returns @@ -747,285 +403,7 @@ func (mx *Measurer) onlyTCPEndpoints(endpoints []*Endpoint) (out []string) { return } -// parseLookupHostReply builds a list of endpoints from a LookupHost reply. -// -// Arguments: -// -// - port is the port to use for the endpoints; -// -// - addrs is the possibly empty list of addresses from LookupHost. -// -// Return value -// -// A possibly empty list of endpoints. -func (mx *Measurer) parseLookupHostReply(port string, addrs []string) (out []*Endpoint) { - for _, addr := range addrs { - out = append(out, &Endpoint{ - Network: "tcp", - Address: net.JoinHostPort(addr, port), - }) - } - return -} - -// ParseHTTPSSvcReply builds a list of endpoints from the LookupHTTPSSvc result. -// -// Arguments -// -// - port is the port for the endpoints; -// -// - info is either nil or contains the result of the LookupHostHTTPSSvc call. -// -// Return value -// -// A possibly-empty list of endpoints. -func (mx *Measurer) parseHTTPSSvcReply(port string, info HTTPSSvc) (out []*Endpoint) { - if info == nil { - return - } - for _, proto := range info.ALPN() { - switch proto { - case "h3": // we do not support experimental protocols like h3-29 anymore - for _, addr := range info.IPv4Hint() { - out = append(out, &Endpoint{ - Network: "quic", - Address: net.JoinHostPort(addr, port), - }) - } - for _, addr := range info.IPv6Hint() { - out = append(out, &Endpoint{ - Network: "quic", - Address: net.JoinHostPort(addr, port), - }) - } - return // we found what we were looking for - } - } - return -} - // Infof formats and logs an informational message using mx.Logger. func (mx *Measurer) Infof(format string, v ...interface{}) { mx.Logger.Infof(format, v...) } - -// SelectAllFromConnect selects all the entries inside of the -// Connect table that have the given MeasurementID. -// -// Arguments -// -// - id is the MeasurementID to filter for. -// -// Return value -// -// A possibly-empty list of events. -func (mx *Measurer) SelectAllFromConnect(id int64) (out []*NetworkEvent) { - for _, ev := range mx.DB.SelectAllFromDial() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromReadWrite is like selectAllFromConnect except -// that it works on the table named ReadWrite. -func (mx *Measurer) SelectAllFromReadWrite(id int64) (out []*NetworkEvent) { - for _, ev := range mx.DB.SelectAllFromReadWrite() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromClose is like selectAllFromConnect except -// that it works on the table named Close. -func (mx *Measurer) SelectAllFromClose(id int64) (out []*NetworkEvent) { - for _, ev := range mx.DB.SelectAllFromClose() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromTLSHandshake is like selectAllFromConnect except -// that it works on the table named TLSHandshake. -func (mx *Measurer) SelectAllFromTLSHandshake(id int64) (out []*TLSHandshakeEvent) { - for _, ev := range mx.DB.SelectAllFromTLSHandshake() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromQUICHandshake is like selectAllFromConnect except -// that it works on the table named QUICHandshake. -func (mx *Measurer) SelectAllFromQUICHandshake(id int64) (out []*QUICHandshakeEvent) { - for _, ev := range mx.DB.SelectAllFromQUICHandshake() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromLookupHost is like selectAllFromConnect except -// that it works on the table named LookupHost. -func (mx *Measurer) SelectAllFromLookupHost(id int64) (out []*LookupHostEvent) { - for _, ev := range mx.DB.SelectAllFromLookupHost() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromLookupHTTPSSvc is like selectAllFromConnect except -// that it works on the table named LookupHTTPSSvc. -func (mx *Measurer) SelectAllFromLookupHTTPSSvc(id int64) (out []*LookupHTTPSSvcEvent) { - for _, ev := range mx.DB.SelectAllFromLookupHTTPSSvc() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromDNSRoundTrip is like selectAllFromConnect except -// that it works on the table named DNSRoundTrip. -func (mx *Measurer) SelectAllFromDNSRoundTrip(id int64) (out []*DNSRoundTripEvent) { - for _, ev := range mx.DB.SelectAllFromDNSRoundTrip() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromHTTPRoundTrip is like selectAllFromConnect except -// that it works on the table named HTTPRoundTrip. -func (mx *Measurer) SelectAllFromHTTPRoundTrip(id int64) (out []*HTTPRoundTripEvent) { - for _, ev := range mx.DB.SelectAllFromHTTPRoundTrip() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromHTTPRedirect is like selectAllFromConnect except -// that it works on the table named HTTPRedirect. -func (mx *Measurer) SelectAllFromHTTPRedirect(id int64) (out []*HTTPRedirectEvent) { - for _, ev := range mx.DB.SelectAllFromHTTPRedirect() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// BaseMeasurement groups all the events that have the same MeasurementID. -type BaseMeasurement struct { - // Oddities lists all the oddities inside this measurement. See - // NewBaseMeasurement's docs for more info. - Oddities []Oddity - - // Connect contains all the connect operations. - Connect []*NetworkEvent - - // ReadWrite contains all the read and write operations. - ReadWrite []*NetworkEvent - - // Close contains all the close operations. - Close []*NetworkEvent - - // TLSHandshake contains all the TLS handshakes. - TLSHandshake []*TLSHandshakeEvent - - // QUICHandshake contains all the QUIC handshakes. - QUICHandshake []*QUICHandshakeEvent - - // LookupHost contains all the host lookups. - LookupHost []*LookupHostEvent - - // LookupHTTPSSvc contains all the HTTPSSvc lookups. - LookupHTTPSSvc []*LookupHTTPSSvcEvent - - // DNSRoundTrip contains all the DNS round trips. - DNSRoundTrip []*DNSRoundTripEvent - - // HTTPRoundTrip contains all the HTTP round trips. - HTTPRoundTrip []*HTTPRoundTripEvent - - // HTTPRedirect contains all the redirections. - HTTPRedirect []*HTTPRedirectEvent -} - -// NewBaseMeasurement creates a new Base Measurement by gathering all -// the events inside the database with a given MeasurementID. -// -// As part of the process, this function computes the Oddities field by -// gathering the oddities of the following operations: -// -// - connect; -// -// - tlsHandshake; -// -// - quicHandshake; -// -// - lookupHost; -// -// - httpRoundTrip. -// -// Arguments -// -// - id is the MeasurementID. -// -// Return value -// -// A valid BaseMeasurement containing possibly empty lists of events. -func (mx *Measurer) NewBaseMeasurement(id int64) *BaseMeasurement { - m := &BaseMeasurement{ - Connect: mx.SelectAllFromConnect(id), - ReadWrite: mx.SelectAllFromReadWrite(id), - Close: mx.SelectAllFromClose(id), - TLSHandshake: mx.SelectAllFromTLSHandshake(id), - QUICHandshake: mx.SelectAllFromQUICHandshake(id), - LookupHost: mx.SelectAllFromLookupHost(id), - LookupHTTPSSvc: mx.SelectAllFromLookupHTTPSSvc(id), - DNSRoundTrip: mx.SelectAllFromDNSRoundTrip(id), - HTTPRoundTrip: mx.SelectAllFromHTTPRoundTrip(id), - HTTPRedirect: mx.SelectAllFromHTTPRedirect(id), - } - m.computeOddities() - return m -} - -// computeOddities computes all the oddities inside m. See -// NewBaseMeasurement's docs for more details. -func (m *BaseMeasurement) computeOddities() { - unique := make(map[Oddity]bool) - for _, ev := range m.Connect { - unique[ev.Oddity] = true - } - for _, ev := range m.TLSHandshake { - unique[ev.Oddity] = true - } - for _, ev := range m.QUICHandshake { - unique[ev.Oddity] = true - } - for _, ev := range m.LookupHost { - unique[ev.Oddity] = true - } - for _, ev := range m.HTTPRoundTrip { - unique[ev.Oddity] = true - } - for key := range unique { - if key != "" { - m.Oddities = append(m.Oddities, key) - } - } -} diff --git a/internal/measurex/oddity.go b/internal/measurex/oddity.go index b97f98d350..6cd6e19b76 100644 --- a/internal/measurex/oddity.go +++ b/internal/measurex/oddity.go @@ -6,7 +6,7 @@ package measurex // the test helper see different results. type Oddity string -// This enumeration lists all known oddities +// This enumeration lists all known oddities. var ( // tcp.connect OddityTCPConnectTimeout = Oddity("tcp.connect.timeout") diff --git a/internal/measurex/origin.go b/internal/measurex/origin.go index 2e32ecbdd4..c93811f56b 100644 --- a/internal/measurex/origin.go +++ b/internal/measurex/origin.go @@ -3,8 +3,10 @@ package measurex // Origin is the origin of a measurement. type Origin string -// These are the possible origins. var ( + // OriginProbe means that the probe performed this measurement. OriginProbe = Origin("probe") - OriginTH = Origin("th") + + // OriginTH means that the test helper performed this measurement. + OriginTH = Origin("th") ) diff --git a/internal/measurex/quic.go b/internal/measurex/quic.go index 06b20f87b9..483a2a1efb 100644 --- a/internal/measurex/quic.go +++ b/internal/measurex/quic.go @@ -15,8 +15,9 @@ import ( // QUICListener creates listening connections for QUIC. type QUICListener = netxlite.QUICListener -// WrapQUICListener wraps a netxlite.QUICListener to add measurex capabilities. -func WrapQUICListener(origin Origin, db DB, ql netxlite.QUICListener) QUICListener { +// WrapQUICListener takes in input a netxlite.QUICListener and returns +// a new listener that saves measurements into the DB. +func WrapQUICListener(origin Origin, db EventDB, ql netxlite.QUICListener) QUICListener { return &quicListenerx{ QUICListener: ql, db: db, @@ -26,11 +27,14 @@ func WrapQUICListener(origin Origin, db DB, ql netxlite.QUICListener) QUICListen type quicListenerx struct { netxlite.QUICListener - db DB + db EventDB origin Origin } -func (ql *quicListenerx) Listen(addr *net.UDPAddr) (quicx.UDPLikeConn, error) { +// QUICPacketConn is an UDP PacketConn used by QUIC. +type QUICPacketConn = quicx.UDPLikeConn + +func (ql *quicListenerx) Listen(addr *net.UDPAddr) (QUICPacketConn, error) { pconn, err := ql.QUICListener.Listen(addr) if err != nil { return nil, err @@ -47,15 +51,15 @@ func (ql *quicListenerx) Listen(addr *net.UDPAddr) (quicx.UDPLikeConn, error) { type quicUDPLikeConnx struct { quicx.UDPLikeConn connID int64 - db DB + db EventDB localAddr string origin Origin } func (c *quicUDPLikeConnx) WriteTo(p []byte, addr net.Addr) (int, error) { - started := time.Now() + started := c.db.ElapsedTime() count, err := c.UDPLikeConn.WriteTo(p, addr) - finished := time.Now() + finished := c.db.ElapsedTime() c.db.InsertIntoReadWrite(&NetworkEvent{ Origin: c.origin, MeasurementID: c.db.MeasurementID(), @@ -73,9 +77,9 @@ func (c *quicUDPLikeConnx) WriteTo(p []byte, addr net.Addr) (int, error) { } func (c *quicUDPLikeConnx) ReadFrom(b []byte) (int, net.Addr, error) { - started := time.Now() + started := c.db.ElapsedTime() count, addr, err := c.UDPLikeConn.ReadFrom(b) - finished := time.Now() + finished := c.db.ElapsedTime() c.db.InsertIntoReadWrite(&NetworkEvent{ Origin: c.origin, MeasurementID: c.db.MeasurementID(), @@ -100,9 +104,9 @@ func (c *quicUDPLikeConnx) addrStringIfNotNil(addr net.Addr) (out string) { } func (c *quicUDPLikeConnx) Close() error { - started := time.Now() + started := c.db.ElapsedTime() err := c.UDPLikeConn.Close() - finished := time.Now() + finished := c.db.ElapsedTime() c.db.InsertIntoReadWrite(&NetworkEvent{ Origin: c.origin, MeasurementID: c.db.MeasurementID(), @@ -119,6 +123,8 @@ func (c *quicUDPLikeConnx) Close() error { return err } +// LocalAddr returns the local address and also implements a +// hack to pass to the session the ConnID. func (c *quicUDPLikeConnx) LocalAddr() net.Addr { localAddr := c.UDPLikeConn.LocalAddr() if localAddr == nil { @@ -132,14 +138,16 @@ type quicLocalAddrx struct { connID int64 } -// QUICEarlySession is the type we use to wrap quic.EarlySession +// QUICEarlySession is the type we use to wrap quic.EarlySession. This +// kind of session knows about the underlying ConnID. type QUICEarlySession interface { quic.EarlySession ConnID() int64 } -// QUICDialer creates QUIC sessions. +// QUICDialer creates QUIC sessions. This kind of dialer will +// save QUIC handshake measurements into the DB. type QUICDialer interface { DialContext(ctx context.Context, address string, tlsConfig *tls.Config) (QUICEarlySession, error) @@ -158,8 +166,8 @@ type QUICHandshakeEvent struct { SNI string ALPN []string SkipVerify bool - Started time.Time - Finished time.Time + Started time.Duration + Finished time.Duration Error error Oddity Oddity TLSVersion string @@ -168,8 +176,9 @@ type QUICHandshakeEvent struct { PeerCerts [][]byte } -// WrapQUICDialer wraps a netxlite.QUICDialer to add measurex capabilities. -func WrapQUICDialer(origin Origin, db DB, dialer netxlite.QUICDialer) QUICDialer { +// WrapQUICDialer creates a new QUICDialer that will save +// QUIC handshake events into the DB. +func WrapQUICDialer(origin Origin, db EventDB, dialer netxlite.QUICDialer) QUICDialer { return &quicDialerx{ QUICDialer: dialer, origin: origin, @@ -179,13 +188,13 @@ func WrapQUICDialer(origin Origin, db DB, dialer netxlite.QUICDialer) QUICDialer type quicDialerx struct { netxlite.QUICDialer - db DB + db EventDB origin Origin } func (qh *quicDialerx) DialContext(ctx context.Context, address string, tlsConfig *tls.Config) (QUICEarlySession, error) { - started := time.Now() + started := qh.db.ElapsedTime() var ( localAddr *quicLocalAddrx state tls.ConnectionState @@ -205,7 +214,7 @@ func (qh *quicDialerx) DialContext(ctx context.Context, sess, err = nil, ctx.Err() } } - finished := time.Now() + finished := qh.db.ElapsedTime() qh.db.InsertIntoQUICHandshake(&QUICHandshakeEvent{ Origin: qh.origin, MeasurementID: qh.db.MeasurementID(), @@ -246,15 +255,6 @@ func (qh *quicDialerx) computeOddity(err error) Oddity { } } -type quicEarlySessionx struct { - quic.EarlySession - connID int64 -} - -func (qes *quicEarlySessionx) ConnID() int64 { - return qes.connID -} - func (qh *quicDialerx) connIDIfNotNil(addr *quicLocalAddrx) (out int64) { if addr != nil { out = addr.connID @@ -272,3 +272,12 @@ func (qh *quicDialerx) localAddrIfNotNil(addr *quicLocalAddrx) (out string) { func (qh *quicDialerx) CloseIdleConnections() { qh.QUICDialer.CloseIdleConnections() } + +type quicEarlySessionx struct { + quic.EarlySession + connID int64 +} + +func (qes *quicEarlySessionx) ConnID() int64 { + return qes.connID +} diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 5f9a2f473f..7d1b2a92ac 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -12,19 +12,51 @@ import ( // HTTPSSvc is the result returned by HTTPSSvc queries. type HTTPSSvc = dnsx.HTTPSSvc -// Resolver is the resolver type we use. +// Resolver is the resolver type we use. This resolver will +// store resolve events into the DB. type Resolver interface { netxlite.Resolver } -// WrapResolver wraps a netxlite.Resolver to add measurex capabilities. -func WrapResolver(origin Origin, db DB, r netxlite.Resolver) Resolver { +// WrapResolver wraps a Resolver so that we save measurements into the DB. +func WrapResolver(origin Origin, db EventDB, r netxlite.Resolver) Resolver { return &resolverx{Resolver: r, db: db, origin: origin} } +// NewResolverSystem is a convenience factory for creating a +// system resolver that saves measurements into a DB. +func NewResolverSystem(origin Origin, db EventDB, logger Logger) Resolver { + return WrapResolver(origin, db, netxlite.NewResolverStdlib(logger)) +} + +// NewResolverUDP is a convenience factory for creating a Resolver +// using UDP that saves measurements into the DB. +// +// Arguments: +// +// - origin is OrigiProbe or OriginTH; +// +// - db is where to save events; +// +// - logger is the logger; +// +// - address is the resolver address (e.g., "1.1.1.1:53"). +func NewResolverUDP(origin Origin, db EventDB, logger Logger, address string) Resolver { + return WrapResolver(origin, db, &netxlite.ResolverLogger{ + Resolver: netxlite.WrapResolver(logger, dnsx.NewSerialResolver( + WrapDNSXRoundTripper(origin, db, dnsx.NewDNSOverUDP( + &netxliteDialerAdapter{ + NewDialerWithSystemResolver(origin, db, logger), + }, + address, + )))), + Logger: logger, + }) +} + type resolverx struct { netxlite.Resolver - db DB + db EventDB origin Origin } @@ -35,17 +67,17 @@ type LookupHostEvent struct { Network string Address string Domain string - Started time.Time - Finished time.Time + Started time.Duration + Finished time.Duration Error error Oddity Oddity Addrs []string } func (r *resolverx) LookupHost(ctx context.Context, domain string) ([]string, error) { - started := time.Now() + started := r.db.ElapsedTime() addrs, err := r.Resolver.LookupHost(ctx, domain) - finished := time.Now() + finished := r.db.ElapsedTime() r.db.InsertIntoLookupHost(&LookupHostEvent{ Origin: r.origin, MeasurementID: r.db.MeasurementID(), @@ -64,7 +96,7 @@ func (r *resolverx) LookupHost(ctx context.Context, domain string) ([]string, er func (r *resolverx) computeOddityLookupHost(addrs []string, err error) Oddity { if err == nil { for _, addr := range addrs { - if IsBogon(addr) { + if isBogon(addr) { return OddityDNSLookupBogon } } @@ -87,9 +119,11 @@ func (r *resolverx) computeOddityLookupHost(addrs []string, err error) Oddity { type LookupHTTPSSvcEvent struct { Origin Origin MeasurementID int64 + Network string + Address string Domain string - Started time.Time - Finished time.Time + Started time.Duration + Finished time.Duration Error error Oddity Oddity IPv4 []string @@ -98,12 +132,14 @@ type LookupHTTPSSvcEvent struct { } func (r *resolverx) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain string) (HTTPSSvc, error) { - started := time.Now() + started := r.db.ElapsedTime() https, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) - finished := time.Now() + finished := r.db.ElapsedTime() ev := &LookupHTTPSSvcEvent{ Origin: r.origin, MeasurementID: r.db.MeasurementID(), + Network: r.Resolver.Network(), + Address: r.Resolver.Address(), Domain: domain, Started: started, Finished: finished, diff --git a/internal/measurex/saver.go b/internal/measurex/saver.go index 9577328d6b..9db9c05a8b 100644 --- a/internal/measurex/saver.go +++ b/internal/measurex/saver.go @@ -1,8 +1,17 @@ package measurex -import "sync" +import ( + "errors" + "fmt" + "net" + "net/http" + "net/url" + "sync" + "time" +) -// Saver is a DB that saves measurements. +// Saver is an EventDB that saves events and allows to +// ask questions regarding the saved events. type Saver struct { dialTable []*NetworkEvent readWriteTable []*NetworkEvent @@ -14,17 +23,49 @@ type Saver struct { httpRoundTripTable []*HTTPRoundTripEvent httpRedirectTable []*HTTPRedirectEvent quicHandshakeTable []*QUICHandshakeEvent - connID int64 - measurementID int64 - mu sync.Mutex + + begin time.Time + connID int64 + measurementID int64 + mu sync.Mutex +} + +var _ EventDB = &Saver{} + +// NewSaver creates a new instance of Saver. +func NewSaver(begin time.Time) *Saver { + return &Saver{begin: begin} +} + +// ElapsedTime implements EventDB.ElapsedTime. +func (s *Saver) ElapsedTime() time.Duration { + return time.Since(s.begin) } +// DeleteAll deletes all the saved data. +func (s *Saver) DeleteAll() { + s.mu.Lock() + s.dialTable = nil + s.readWriteTable = nil + s.closeTable = nil + s.tlsHandshakeTable = nil + s.lookupHostTable = nil + s.lookupHTTPSvcTable = nil + s.dnsRoundTripTable = nil + s.httpRoundTripTable = nil + s.httpRedirectTable = nil + s.quicHandshakeTable = nil + s.mu.Unlock() +} + +// InsertIntoDial implements EventDB.InsertIntoDial. func (s *Saver) InsertIntoDial(ev *NetworkEvent) { s.mu.Lock() s.dialTable = append(s.dialTable, ev) s.mu.Unlock() } +// SelectAllFromDial returns all dial events. func (s *Saver) SelectAllFromDial() (out []*NetworkEvent) { s.mu.Lock() out = append(out, s.dialTable...) @@ -32,12 +73,14 @@ func (s *Saver) SelectAllFromDial() (out []*NetworkEvent) { return } +// InsertIntoReadWrite implements EventDB.InsertIntoReadWrite. func (s *Saver) InsertIntoReadWrite(ev *NetworkEvent) { s.mu.Lock() s.readWriteTable = append(s.readWriteTable, ev) s.mu.Unlock() } +// SelectAllFromReadWrite returns all I/O events. func (s *Saver) SelectAllFromReadWrite() (out []*NetworkEvent) { s.mu.Lock() out = append(out, s.readWriteTable...) @@ -45,12 +88,14 @@ func (s *Saver) SelectAllFromReadWrite() (out []*NetworkEvent) { return } +// InsertIntoClose implements EventDB.InsertIntoClose. func (s *Saver) InsertIntoClose(ev *NetworkEvent) { s.mu.Lock() s.closeTable = append(s.closeTable, ev) s.mu.Unlock() } +// SelectAllFromClose returns all close events. func (s *Saver) SelectAllFromClose() (out []*NetworkEvent) { s.mu.Lock() out = append(out, s.closeTable...) @@ -58,12 +103,14 @@ func (s *Saver) SelectAllFromClose() (out []*NetworkEvent) { return } +// InsertIntoTLSHandshake implements EventDB.InsertIntoTLSHandshake. func (s *Saver) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { s.mu.Lock() s.tlsHandshakeTable = append(s.tlsHandshakeTable, ev) s.mu.Unlock() } +// SelectAllFromTLSHandshake returns all TLS handshake events. func (s *Saver) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { s.mu.Lock() out = append(out, s.tlsHandshakeTable...) @@ -71,12 +118,14 @@ func (s *Saver) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { return } +// InsertIntoLookupHost implements EventDB.InsertIntoLookupHost. func (s *Saver) InsertIntoLookupHost(ev *LookupHostEvent) { s.mu.Lock() s.lookupHostTable = append(s.lookupHostTable, ev) s.mu.Unlock() } +// SelectAllFromLookupHost returns all the lookup host events. func (s *Saver) SelectAllFromLookupHost() (out []*LookupHostEvent) { s.mu.Lock() out = append(out, s.lookupHostTable...) @@ -84,12 +133,14 @@ func (s *Saver) SelectAllFromLookupHost() (out []*LookupHostEvent) { return } +// InsertIntoHTTPSSvc implements EventDB.InsertIntoHTTPSSvc func (s *Saver) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { s.mu.Lock() s.lookupHTTPSvcTable = append(s.lookupHTTPSvcTable, ev) s.mu.Unlock() } +// SelectAllFromLookupHTTPSSvc returns all HTTPSSvc lookup events. func (s *Saver) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { s.mu.Lock() out = append(out, s.lookupHTTPSvcTable...) @@ -97,12 +148,14 @@ func (s *Saver) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { return } +// InsertIntoDNSRoundTrip implements EventDB.InsertIntoDNSRoundTrip. func (s *Saver) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { s.mu.Lock() s.dnsRoundTripTable = append(s.dnsRoundTripTable, ev) s.mu.Unlock() } +// SelectAllFromDNSRoundTrip returns all DNS round trip events. func (s *Saver) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { s.mu.Lock() out = append(out, s.dnsRoundTripTable...) @@ -110,12 +163,14 @@ func (s *Saver) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { return } +// InsertIntoHTTPRoundTrip implements EventDB.InsertIntoHTTPRoundTrip. func (s *Saver) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { s.mu.Lock() s.httpRoundTripTable = append(s.httpRoundTripTable, ev) s.mu.Unlock() } +// SelectAllFromHTTPRoundTrip returns all HTTP round trip events. func (s *Saver) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { s.mu.Lock() out = append(out, s.httpRoundTripTable...) @@ -123,12 +178,14 @@ func (s *Saver) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { return } +// InsertIntoHTTPRedirect implements EventDB.InsertIntoHTTPRedirect. func (s *Saver) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { s.mu.Lock() s.httpRedirectTable = append(s.httpRedirectTable, ev) s.mu.Unlock() } +// SelectAllFromHTTPRedirect returns all HTTP redirections. func (s *Saver) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { s.mu.Lock() out = append(out, s.httpRedirectTable...) @@ -136,12 +193,14 @@ func (s *Saver) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { return } +// InsertIntoQUICHandshake implements EventDB.InsertIntoQUICHandshake. func (s *Saver) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { s.mu.Lock() s.quicHandshakeTable = append(s.quicHandshakeTable, ev) s.mu.Unlock() } +// SelectAllFromQUICHandshake returns all QUIC handshake events. func (s *Saver) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { s.mu.Lock() out = append(out, s.quicHandshakeTable...) @@ -149,6 +208,7 @@ func (s *Saver) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { return } +// NextConnID implements EventDB.NextConnID. func (s *Saver) NextConnID() (out int64) { s.mu.Lock() s.connID++ // start from 1 @@ -157,6 +217,7 @@ func (s *Saver) NextConnID() (out int64) { return } +// MeasurementID implements EventDB.MeasurementID. func (s *Saver) MeasurementID() (out int64) { s.mu.Lock() out = s.measurementID @@ -164,6 +225,8 @@ func (s *Saver) MeasurementID() (out int64) { return } +// NextMeasurement increments the internal MeasurementID and +// returns it, so that later you can reference the current measurement. func (s *Saver) NextMeasurement() (out int64) { s.mu.Lock() s.measurementID++ // start from 1 @@ -171,3 +234,297 @@ func (s *Saver) NextMeasurement() (out int64) { s.mu.Unlock() return } + +// SelectAllFromDialWithMeasurementID calls SelectAllFromConnect +// and filters the result by MeasurementID. +func (s *Saver) SelectAllFromDialWithMeasurementID(id int64) (out []*NetworkEvent) { + for _, ev := range s.SelectAllFromDial() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromReadWriteWithMeasurementID calls SelectAllFromReadWrite and +// filters the result by MeasurementID. +func (s *Saver) SelectAllFromReadWriteWithMeasurementID(id int64) (out []*NetworkEvent) { + for _, ev := range s.SelectAllFromReadWrite() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromCloseWithMeasurementID calls SelectAllFromClose +// and filters the result by MeasurementID. +func (s *Saver) SelectAllFromCloseWithMeasurementID(id int64) (out []*NetworkEvent) { + for _, ev := range s.SelectAllFromClose() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromTLSHandshakeWithMeasurementID calls SelectAllFromTLSHandshake +// and filters the result by MeasurementID. +func (s *Saver) SelectAllFromTLSHandshakeWithMeasurementID(id int64) (out []*TLSHandshakeEvent) { + for _, ev := range s.SelectAllFromTLSHandshake() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromQUICHandshakeWithMeasurementID calls SelectAllFromQUICSHandshake +// and filters the result by MeasurementID. +func (s *Saver) SelectAllFromQUICHandshakeWithMeasurementID(id int64) (out []*QUICHandshakeEvent) { + for _, ev := range s.SelectAllFromQUICHandshake() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromLookupHostWithMeasurementID calls SelectAllFromLookupHost +// and filters the result by MeasurementID. +func (s *Saver) SelectAllFromLookupHostWithMeasurementID(id int64) (out []*LookupHostEvent) { + for _, ev := range s.SelectAllFromLookupHost() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromLookupHTTPSSvcWithMeasurementID calls SelectAllFromHTTPSSvc +// and filters the result by MeasurementID. +func (s *Saver) SelectAllFromLookupHTTPSSvcWithMeasurementID(id int64) (out []*LookupHTTPSSvcEvent) { + for _, ev := range s.SelectAllFromLookupHTTPSSvc() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromDNSRoundTripWithMeasurementID calls SelectAllFromDNSRoundTrip +// and filters the result by MeasurementID. +func (s *Saver) SelectAllFromDNSRoundTripWithMeasurementID(id int64) (out []*DNSRoundTripEvent) { + for _, ev := range s.SelectAllFromDNSRoundTrip() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromHTTPRoundTripWithMeasurementID calls SelectAllFromHTTPRoundTrip +// and filters the result by MeasurementID. +func (s *Saver) SelectAllFromHTTPRoundTripWithMeasurementID(id int64) (out []*HTTPRoundTripEvent) { + for _, ev := range s.SelectAllFromHTTPRoundTrip() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromHTTPRedirectWithMeasurementID calls SelectAllFromHTTPRedirect +// and filters the result by MeasurementID. +func (s *Saver) SelectAllFromHTTPRedirectWithMeasurementID(id int64) (out []*HTTPRedirectEvent) { + for _, ev := range s.SelectAllFromHTTPRedirect() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// EndpointNetwork is the network of an endpoint. +type EndpointNetwork string + +const ( + // NetworkTCP identifies endpoints using TCP. + NetworkTCP = EndpointNetwork("tcp") + + // NetworkQUIC identifies endpoints using QUIC. + NetworkQUIC = EndpointNetwork("quic") +) + +// Endpoint is an endpoint for a domain. +type Endpoint struct { + // Network is the network (e.g., "tcp", "quic") + Network EndpointNetwork + + // Address is the endpoint address (e.g., "8.8.8.8:443") + Address string +} + +// String converts an endpoint to a string (e.g., "8.8.8.8:443/tcp") +func (e *Endpoint) String() string { + return fmt.Sprintf("%s/%s", e.Address, e.Network) +} + +// SelectAllEndpointsForDomain returns all the +// endpoints for a specific domain. +// +// Arguments: +// +// - domain is the domain we want to connect to; +// +// - port is the port for the endpoint. +func (s *Saver) SelectAllEndpointsForDomain(domain, port string) (out []*Endpoint) { + out = append(out, s.selectAllTCPEndpoints(domain, port)...) + out = append(out, s.selectAllQUICEndpoints(domain, port)...) + out = s.deduplicateEndpoints(out) + return +} + +func (s *Saver) selectAllTCPEndpoints(domain, port string) (out []*Endpoint) { + for _, entry := range s.SelectAllFromLookupHost() { + if domain != entry.Domain { + continue + } + for _, addr := range entry.Addrs { + if net.ParseIP(addr) == nil { + continue // skip CNAME entries courtesy the WCTH + } + out = append(out, s.newEndpoint(addr, port, NetworkTCP)) + } + } + return +} + +func (s *Saver) selectAllQUICEndpoints(domain, port string) (out []*Endpoint) { + for _, entry := range s.SelectAllFromLookupHTTPSSvc() { + if domain != entry.Domain { + continue + } + if !s.supportsHTTP3(entry) { + continue + } + addrs := append([]string{}, entry.IPv4...) + for _, addr := range append(addrs, entry.IPv6...) { + out = append(out, s.newEndpoint(addr, port, NetworkQUIC)) + } + } + return +} + +func (s *Saver) deduplicateEndpoints(epnts []*Endpoint) (out []*Endpoint) { + duplicates := make(map[string]*Endpoint) + for _, epnt := range epnts { + duplicates[epnt.String()] = epnt + } + for _, epnt := range duplicates { + out = append(out, epnt) + } + return +} + +func (s *Saver) newEndpoint(addr, port string, network EndpointNetwork) *Endpoint { + return &Endpoint{Network: network, Address: net.JoinHostPort(addr, port)} +} + +func (s *Saver) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { + for _, alpn := range entry.ALPN { + switch alpn { + case "h3": + return true + } + } + return false +} + +// HTTPEndpoint is an HTTP/HTTPS/HTTP3 endpoint. +type HTTPEndpoint struct { + // Domain is the endpoint domain (e.g., "dns.google"). + Domain string + + // Network is the network (e.g., "tcp" or "quic"). + Network EndpointNetwork + + // Address is the endpoint address (e.g., "8.8.8.8:443"). + Address string + + // SNI is the SNI to use (only used with URL.scheme == "https"). + SNI string + + // ALPN is the ALPN to use (only used with URL.scheme == "https"). + ALPN []string + + // URL is the endpoint URL. + URL *url.URL + + // Header contains request headers. + Header http.Header +} + +// String converts an HTTP endpoint to a string (e.g., "8.8.8.8:443/tcp") +func (e *HTTPEndpoint) String() string { + return fmt.Sprintf("%s/%s", e.Address, e.Network) +} + +// SelectAllHTTPEndpointsForDomainAndMeasurementID returns all the +// HTTPEndpoints matching a specific domain and MeasurementID. +// +// Arguments: +// +// - URL is the URL for which we want endpoints; +// +// Returns a list of endpoints or an error. +func (s *Saver) SelectAllHTTPEndpointsForDomain(URL *url.URL) ([]*HTTPEndpoint, error) { + domain := URL.Hostname() + port, err := PortFromURL(URL) + if err != nil { + return nil, err + } + epnts := s.SelectAllEndpointsForDomain(domain, port) + var out []*HTTPEndpoint + for _, epnt := range epnts { + out = append(out, &HTTPEndpoint{ + Domain: domain, + Network: epnt.Network, + Address: epnt.Address, + SNI: domain, + ALPN: s.alpnForHTTPEndpoint(epnt.Network), + URL: URL, + Header: NewHTTPRequestHeaderForMeasuring(), + }) + } + return out, nil +} + +// ErrCannotDeterminePortFromURL indicates that we could not determine +// the correct port from the URL authority and scheme. +var ErrCannotDeterminePortFromURL = errors.New("cannot determine port from URL") + +// PortFromURL returns the port determined from the URL or an error. +func PortFromURL(URL *url.URL) (string, error) { + switch { + case URL.Port() != "": + return URL.Port(), nil + case URL.Scheme == "https": + return "443", nil + case URL.Scheme == "http": + return "80", nil + default: + return "", ErrCannotDeterminePortFromURL + } +} + +func (s *Saver) alpnForHTTPEndpoint(network EndpointNetwork) []string { + switch network { + case NetworkQUIC: + return []string{"h3"} + case NetworkTCP: + return []string{"h2", "http/1.1"} + default: + return nil + } +} diff --git a/internal/measurex/tls.go b/internal/measurex/tls.go index af8c346725..685c6ef9d5 100644 --- a/internal/measurex/tls.go +++ b/internal/measurex/tls.go @@ -19,26 +19,25 @@ type TLSConn interface { ConnID() int64 } -// TLSHandshaker is the TLS handshaker type we use. +// TLSHandshaker is the TLS handshaker type we use. This handshaker +// will save TLS handshake events into the DB. type TLSHandshaker interface { Handshake(ctx context.Context, conn Conn, config *tls.Config) (TLSConn, error) } -// WrapTLSHandshaker wraps a netxlite.TLSHandshaker to add measurex capabilities. -func WrapTLSHandshaker(origin Origin, db DB, thx netxlite.TLSHandshaker) TLSHandshaker { +// WrapTLSHandshaker wraps a netxlite.TLSHandshaker to return a new +// instance of TLSHandshaker that saves events into the DB. +func WrapTLSHandshaker(origin Origin, db EventDB, thx netxlite.TLSHandshaker) TLSHandshaker { return &tlsHandshakerx{TLSHandshaker: thx, db: db, origin: origin} } type tlsHandshakerx struct { netxlite.TLSHandshaker - db DB + db EventDB origin Origin } // TLSHandshakeEvent contains a TLS handshake event. -// -// Note that EndpointID and HTTPRoundTripID only make sense when -// the DB we're using enforces precise HTTP round trips. type TLSHandshakeEvent struct { Origin Origin MeasurementID int64 @@ -50,8 +49,8 @@ type TLSHandshakeEvent struct { SNI string ALPN []string SkipVerify bool - Started time.Time - Finished time.Time + Started time.Duration + Finished time.Duration Error error Oddity Oddity TLSVersion string @@ -65,9 +64,9 @@ func (thx *tlsHandshakerx) Handshake(ctx context.Context, network := conn.RemoteAddr().Network() remoteAddr := conn.RemoteAddr().String() localAddr := conn.LocalAddr().String() - started := time.Now() + started := thx.db.ElapsedTime() tconn, state, err := thx.TLSHandshaker.Handshake(ctx, conn, config) - finished := time.Now() + finished := thx.db.ElapsedTime() thx.db.InsertIntoTLSHandshake(&TLSHandshakeEvent{ Origin: thx.origin, MeasurementID: thx.db.MeasurementID(), diff --git a/internal/measurex/wcth.go b/internal/measurex/wcth.go index 53b1cb3365..271faab85e 100644 --- a/internal/measurex/wcth.go +++ b/internal/measurex/wcth.go @@ -10,7 +10,6 @@ import ( "net" "net/http" "net/url" - "time" "github.com/ooni/probe-cli/v3/internal/netxlite/iox" "github.com/ooni/probe-cli/v3/internal/runtimex" @@ -19,16 +18,16 @@ import ( // WCTHWorker is the Web Connectivity test helper worker. type WCTHWorker struct { - db DB + db EventDB logger Logger clnt HTTPClient - URL string + url string } // NewWCTHWorker creates a new TestHelper instance using the // web connectivity test helper protocol. // -// Arguments +// Arguments: // // - logger is the logger to use; // @@ -40,8 +39,8 @@ type WCTHWorker struct { // // All arguments are mandatory. func NewWCTHWorker( - logger Logger, db DB, clnt HTTPClient, URL string) *WCTHWorker { - return &WCTHWorker{db: db, logger: logger, clnt: clnt, URL: URL} + logger Logger, db EventDB, clnt HTTPClient, URL string) *WCTHWorker { + return &WCTHWorker{db: db, logger: logger, clnt: clnt, url: URL} } var errWCTHRequestFailed = errors.New("wcth: request failed") @@ -73,8 +72,8 @@ func (w *WCTHWorker) parseResp(URL *url.URL, resp *WCTHResponse) { Network: "system", Address: "", Domain: URL.Hostname(), - Started: time.Time{}, - Finished: time.Time{}, + Started: 0, + Finished: 0, Error: w.newError(resp.DNS.Failure), Addrs: w.filterDNSAddrs(resp.DNS.Addrs), }) @@ -87,8 +86,8 @@ func (w *WCTHWorker) parseResp(URL *url.URL, resp *WCTHResponse) { Network: "tcp", RemoteAddr: addr, LocalAddr: "", - Started: time.Time{}, - Finished: time.Time{}, + Started: 0, + Finished: 0, Error: w.newError(status.Failure), Count: 0, }) @@ -104,7 +103,7 @@ func (w *WCTHWorker) newHTTPRequest(ctx context.Context, } reqBody, err := json.Marshal(wtchReq) runtimex.PanicOnError(err, "json.Marshal failed") - req, err := http.NewRequestWithContext(ctx, "POST", w.URL, bytes.NewReader(reqBody)) + req, err := http.NewRequestWithContext(ctx, "POST", w.url, bytes.NewReader(reqBody)) if err != nil { return nil, err } From 82289ede2403b68edb5fe1ecda69d9e4abae6670 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 12:12:42 +0200 Subject: [PATCH 10/53] support parallel HTTPEndpoint measurements --- .../engine/experiment/webstepsx/webstepsx.go | 2 +- internal/measurex/db.go | 557 +++++++++++++++++- internal/measurex/dnsx.go | 2 + internal/measurex/measurement.go | 36 +- internal/measurex/measurer.go | 152 ++++- internal/measurex/oddity.go | 9 +- internal/measurex/resolver.go | 2 + internal/measurex/saver.go | 530 ----------------- internal/measurex/tls.go | 12 + internal/netxlite/dnsx/serial.go | 6 + 10 files changed, 736 insertions(+), 572 deletions(-) delete mode 100644 internal/measurex/saver.go diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/webstepsx.go index 9f28fcaa01..39b7b5eef4 100644 --- a/internal/engine/experiment/webstepsx/webstepsx.go +++ b/internal/engine/experiment/webstepsx/webstepsx.go @@ -147,7 +147,7 @@ func (um *URLMeasurer) RunSingleStep(ctx context.Context, m.DNS = append(m.DNS, um.Mx.LookupHostUDP(ctx, URL.Hostname(), um.DNSResolverUDP)) endpoints := um.Mx.DB.SelectAllEndpointsForDomain(URL.Hostname(), port) m.Control = append(m.Control, um.Mx.LookupWCTH(ctx, URL, endpoints, port)) - httpEndpoints, err := um.Mx.DB.SelectAllHTTPEndpointsForDomain(URL) + httpEndpoints, err := um.Mx.DB.SelectAllHTTPEndpointsForURL(URL) if err != nil { return } diff --git a/internal/measurex/db.go b/internal/measurex/db.go index 95db672a37..eddf7035b6 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -1,6 +1,14 @@ package measurex -import "time" +import ( + "errors" + "fmt" + "net" + "net/http" + "net/url" + "sync" + "time" +) // EventDB is a "database" holding events records as seen by the // networking code that needs to save events. @@ -45,3 +53,550 @@ type EventDB interface { // MeasurementID returns the current measurement ID. MeasurementID() int64 } + +// DB is an EventDB that saves events and also allows to +// ask questions regarding the saved events. +type DB struct { + // database tables + dialTable []*NetworkEvent + readWriteTable []*NetworkEvent + closeTable []*NetworkEvent + tlsHandshakeTable []*TLSHandshakeEvent + lookupHostTable []*LookupHostEvent + lookupHTTPSvcTable []*LookupHTTPSSvcEvent + dnsRoundTripTable []*DNSRoundTripEvent + httpRoundTripTable []*HTTPRoundTripEvent + httpRedirectTable []*HTTPRedirectEvent + quicHandshakeTable []*QUICHandshakeEvent + + // mu protects all the above tables + mu sync.Mutex + + // internals is shared with child databases + internals *dbInternals +} + +func (db *DB) clone() *DB { + return &DB{internals: db.internals} +} + +type dbInternals struct { + begin time.Time + connID int64 + measurementID int64 + mu sync.Mutex +} + +func (dbi *dbInternals) NextConnID() (out int64) { + dbi.mu.Lock() + dbi.connID++ // start from 1 + out = dbi.connID + dbi.mu.Unlock() + return +} + +func (dbi *dbInternals) MeasurementID() (out int64) { + dbi.mu.Lock() + out = dbi.measurementID + dbi.mu.Unlock() + return +} + +func (dbi *dbInternals) NextMeasurement() (out int64) { + dbi.mu.Lock() + dbi.measurementID++ // start from 1 + out = dbi.measurementID + dbi.mu.Unlock() + return +} + +var _ EventDB = &DB{} + +// NewSaver creates a new instance of Saver. +func NewSaver(begin time.Time) *DB { + return &DB{internals: &dbInternals{begin: begin}} +} + +// ElapsedTime implements EventDB.ElapsedTime. +func (db *DB) ElapsedTime() time.Duration { + return time.Since(db.internals.begin) +} + +// DeleteAll deletes all the saved data. +func (db *DB) DeleteAll() { + db.mu.Lock() + db.dialTable = nil + db.readWriteTable = nil + db.closeTable = nil + db.tlsHandshakeTable = nil + db.lookupHostTable = nil + db.lookupHTTPSvcTable = nil + db.dnsRoundTripTable = nil + db.httpRoundTripTable = nil + db.httpRedirectTable = nil + db.quicHandshakeTable = nil + db.mu.Unlock() +} + +// InsertIntoDial implements EventDB.InsertIntoDial. +func (db *DB) InsertIntoDial(ev *NetworkEvent) { + db.mu.Lock() + db.dialTable = append(db.dialTable, ev) + db.mu.Unlock() +} + +// SelectAllFromDial returns all dial events. +func (db *DB) SelectAllFromDial() (out []*NetworkEvent) { + db.mu.Lock() + out = append(out, db.dialTable...) + db.mu.Unlock() + return +} + +// InsertIntoReadWrite implements EventDB.InsertIntoReadWrite. +func (db *DB) InsertIntoReadWrite(ev *NetworkEvent) { + db.mu.Lock() + db.readWriteTable = append(db.readWriteTable, ev) + db.mu.Unlock() +} + +// SelectAllFromReadWrite returns all I/O events. +func (db *DB) SelectAllFromReadWrite() (out []*NetworkEvent) { + db.mu.Lock() + out = append(out, db.readWriteTable...) + db.mu.Unlock() + return +} + +// InsertIntoClose implements EventDB.InsertIntoClose. +func (db *DB) InsertIntoClose(ev *NetworkEvent) { + db.mu.Lock() + db.closeTable = append(db.closeTable, ev) + db.mu.Unlock() +} + +// SelectAllFromClose returns all close events. +func (db *DB) SelectAllFromClose() (out []*NetworkEvent) { + db.mu.Lock() + out = append(out, db.closeTable...) + db.mu.Unlock() + return +} + +// InsertIntoTLSHandshake implements EventDB.InsertIntoTLSHandshake. +func (db *DB) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { + db.mu.Lock() + db.tlsHandshakeTable = append(db.tlsHandshakeTable, ev) + db.mu.Unlock() +} + +// SelectAllFromTLSHandshake returns all TLS handshake events. +func (db *DB) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { + db.mu.Lock() + out = append(out, db.tlsHandshakeTable...) + db.mu.Unlock() + return +} + +// InsertIntoLookupHost implements EventDB.InsertIntoLookupHost. +func (db *DB) InsertIntoLookupHost(ev *LookupHostEvent) { + db.mu.Lock() + db.lookupHostTable = append(db.lookupHostTable, ev) + db.mu.Unlock() +} + +// SelectAllFromLookupHost returns all the lookup host events. +func (db *DB) SelectAllFromLookupHost() (out []*LookupHostEvent) { + db.mu.Lock() + out = append(out, db.lookupHostTable...) + db.mu.Unlock() + return +} + +// InsertIntoHTTPSSvc implements EventDB.InsertIntoHTTPSSvc +func (db *DB) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { + db.mu.Lock() + db.lookupHTTPSvcTable = append(db.lookupHTTPSvcTable, ev) + db.mu.Unlock() +} + +// SelectAllFromLookupHTTPSSvc returns all HTTPSSvc lookup events. +func (db *DB) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { + db.mu.Lock() + out = append(out, db.lookupHTTPSvcTable...) + db.mu.Unlock() + return +} + +// InsertIntoDNSRoundTrip implements EventDB.InsertIntoDNSRoundTrip. +func (db *DB) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { + db.mu.Lock() + db.dnsRoundTripTable = append(db.dnsRoundTripTable, ev) + db.mu.Unlock() +} + +// SelectAllFromDNSRoundTrip returns all DNS round trip events. +func (db *DB) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { + db.mu.Lock() + out = append(out, db.dnsRoundTripTable...) + db.mu.Unlock() + return +} + +// InsertIntoHTTPRoundTrip implements EventDB.InsertIntoHTTPRoundTrip. +func (db *DB) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { + db.mu.Lock() + db.httpRoundTripTable = append(db.httpRoundTripTable, ev) + db.mu.Unlock() +} + +// SelectAllFromHTTPRoundTrip returns all HTTP round trip events. +func (db *DB) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { + db.mu.Lock() + out = append(out, db.httpRoundTripTable...) + db.mu.Unlock() + return +} + +// InsertIntoHTTPRedirect implements EventDB.InsertIntoHTTPRedirect. +func (db *DB) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { + db.mu.Lock() + db.httpRedirectTable = append(db.httpRedirectTable, ev) + db.mu.Unlock() +} + +// SelectAllFromHTTPRedirect returns all HTTP redirections. +func (db *DB) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { + db.mu.Lock() + out = append(out, db.httpRedirectTable...) + db.mu.Unlock() + return +} + +// InsertIntoQUICHandshake implements EventDB.InsertIntoQUICHandshake. +func (db *DB) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { + db.mu.Lock() + db.quicHandshakeTable = append(db.quicHandshakeTable, ev) + db.mu.Unlock() +} + +// SelectAllFromQUICHandshake returns all QUIC handshake events. +func (db *DB) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { + db.mu.Lock() + out = append(out, db.quicHandshakeTable...) + db.mu.Unlock() + return +} + +// NextConnID implements EventDB.NextConnID. +func (db *DB) NextConnID() (out int64) { + return db.internals.NextConnID() +} + +// MeasurementID implements EventDB.MeasurementID. +func (db *DB) MeasurementID() (out int64) { + return db.internals.MeasurementID() +} + +// NextMeasurement increments the internal MeasurementID and +// returns it, so that later you can reference the current measurement. +func (db *DB) NextMeasurement() (out int64) { + return db.internals.NextMeasurement() +} + +// SelectAllFromDialWithMeasurementID calls SelectAllFromConnect +// and filters the result by MeasurementID. +func (db *DB) SelectAllFromDialWithMeasurementID(id int64) (out []*NetworkEvent) { + for _, ev := range db.SelectAllFromDial() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromReadWriteWithMeasurementID calls SelectAllFromReadWrite and +// filters the result by MeasurementID. +func (db *DB) SelectAllFromReadWriteWithMeasurementID(id int64) (out []*NetworkEvent) { + for _, ev := range db.SelectAllFromReadWrite() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromCloseWithMeasurementID calls SelectAllFromClose +// and filters the result by MeasurementID. +func (db *DB) SelectAllFromCloseWithMeasurementID(id int64) (out []*NetworkEvent) { + for _, ev := range db.SelectAllFromClose() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromTLSHandshakeWithMeasurementID calls SelectAllFromTLSHandshake +// and filters the result by MeasurementID. +func (db *DB) SelectAllFromTLSHandshakeWithMeasurementID(id int64) (out []*TLSHandshakeEvent) { + for _, ev := range db.SelectAllFromTLSHandshake() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromQUICHandshakeWithMeasurementID calls SelectAllFromQUICSHandshake +// and filters the result by MeasurementID. +func (db *DB) SelectAllFromQUICHandshakeWithMeasurementID(id int64) (out []*QUICHandshakeEvent) { + for _, ev := range db.SelectAllFromQUICHandshake() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromLookupHostWithMeasurementID calls SelectAllFromLookupHost +// and filters the result by MeasurementID. +func (db *DB) SelectAllFromLookupHostWithMeasurementID(id int64) (out []*LookupHostEvent) { + for _, ev := range db.SelectAllFromLookupHost() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromLookupHTTPSSvcWithMeasurementID calls SelectAllFromHTTPSSvc +// and filters the result by MeasurementID. +func (db *DB) SelectAllFromLookupHTTPSSvcWithMeasurementID(id int64) (out []*LookupHTTPSSvcEvent) { + for _, ev := range db.SelectAllFromLookupHTTPSSvc() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromDNSRoundTripWithMeasurementID calls SelectAllFromDNSRoundTrip +// and filters the result by MeasurementID. +func (db *DB) SelectAllFromDNSRoundTripWithMeasurementID(id int64) (out []*DNSRoundTripEvent) { + for _, ev := range db.SelectAllFromDNSRoundTrip() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromHTTPRoundTripWithMeasurementID calls SelectAllFromHTTPRoundTrip +// and filters the result by MeasurementID. +func (db *DB) SelectAllFromHTTPRoundTripWithMeasurementID(id int64) (out []*HTTPRoundTripEvent) { + for _, ev := range db.SelectAllFromHTTPRoundTrip() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// SelectAllFromHTTPRedirectWithMeasurementID calls SelectAllFromHTTPRedirect +// and filters the result by MeasurementID. +func (db *DB) SelectAllFromHTTPRedirectWithMeasurementID(id int64) (out []*HTTPRedirectEvent) { + for _, ev := range db.SelectAllFromHTTPRedirect() { + if id == ev.MeasurementID { + out = append(out, ev) + } + } + return +} + +// EndpointNetwork is the network of an endpoint. +type EndpointNetwork string + +const ( + // NetworkTCP identifies endpoints using TCP. + NetworkTCP = EndpointNetwork("tcp") + + // NetworkQUIC identifies endpoints using QUIC. + NetworkQUIC = EndpointNetwork("quic") +) + +// Endpoint is an endpoint for a domain. +type Endpoint struct { + // Network is the network (e.g., "tcp", "quic") + Network EndpointNetwork + + // Address is the endpoint address (e.g., "8.8.8.8:443") + Address string +} + +// String converts an endpoint to a string (e.g., "8.8.8.8:443/tcp") +func (e *Endpoint) String() string { + return fmt.Sprintf("%s/%s", e.Address, e.Network) +} + +// SelectAllEndpointsForDomain returns all the +// endpoints for a specific domain. +// +// Arguments: +// +// - domain is the domain we want to connect to; +// +// - port is the port for the endpoint. +func (db *DB) SelectAllEndpointsForDomain(domain, port string) (out []*Endpoint) { + out = append(out, db.selectAllTCPEndpoints(domain, port)...) + out = append(out, db.selectAllQUICEndpoints(domain, port)...) + out = db.deduplicateEndpoints(out) + return +} + +func (db *DB) selectAllTCPEndpoints(domain, port string) (out []*Endpoint) { + for _, entry := range db.SelectAllFromLookupHost() { + if domain != entry.Domain { + continue + } + for _, addr := range entry.Addrs { + if net.ParseIP(addr) == nil { + continue // skip CNAME entries courtesy the WCTH + } + out = append(out, db.newEndpoint(addr, port, NetworkTCP)) + } + } + return +} + +func (db *DB) selectAllQUICEndpoints(domain, port string) (out []*Endpoint) { + for _, entry := range db.SelectAllFromLookupHTTPSSvc() { + if domain != entry.Domain { + continue + } + if !db.supportsHTTP3(entry) { + continue + } + addrs := append([]string{}, entry.IPv4...) + for _, addr := range append(addrs, entry.IPv6...) { + out = append(out, db.newEndpoint(addr, port, NetworkQUIC)) + } + } + return +} + +func (db *DB) deduplicateEndpoints(epnts []*Endpoint) (out []*Endpoint) { + duplicates := make(map[string]*Endpoint) + for _, epnt := range epnts { + duplicates[epnt.String()] = epnt + } + for _, epnt := range duplicates { + out = append(out, epnt) + } + return +} + +func (db *DB) newEndpoint(addr, port string, network EndpointNetwork) *Endpoint { + return &Endpoint{Network: network, Address: net.JoinHostPort(addr, port)} +} + +func (db *DB) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { + for _, alpn := range entry.ALPN { + switch alpn { + case "h3": + return true + } + } + return false +} + +// HTTPEndpoint is an HTTP/HTTPS/HTTP3 endpoint. +type HTTPEndpoint struct { + // Domain is the endpoint domain (e.g., "dns.google"). + Domain string + + // Network is the network (e.g., "tcp" or "quic"). + Network EndpointNetwork + + // Address is the endpoint address (e.g., "8.8.8.8:443"). + Address string + + // SNI is the SNI to use (only used with URL.scheme == "https"). + SNI string + + // ALPN is the ALPN to use (only used with URL.scheme == "https"). + ALPN []string + + // URL is the endpoint URL. + URL *url.URL + + // Header contains request headers. + Header http.Header +} + +// String converts an HTTP endpoint to a string (e.g., "8.8.8.8:443/tcp") +func (e *HTTPEndpoint) String() string { + return fmt.Sprintf("%s/%s", e.Address, e.Network) +} + +// SelectAllHTTPEndpointsForURL returns all the +// HTTPEndpoints matching a specific URL' domain. +// +// Arguments: +// +// - URL is the URL for which we want endpoints; +// +// Returns a list of endpoints or an error. +func (db *DB) SelectAllHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, error) { + domain := URL.Hostname() + port, err := PortFromURL(URL) + if err != nil { + return nil, err + } + epnts := db.SelectAllEndpointsForDomain(domain, port) + var out []*HTTPEndpoint + for _, epnt := range epnts { + if URL.Scheme != "https" && epnt.Network == NetworkQUIC { + continue // we'll only use QUIC with HTTPS + } + out = append(out, &HTTPEndpoint{ + Domain: domain, + Network: epnt.Network, + Address: epnt.Address, + SNI: domain, + ALPN: db.alpnForHTTPEndpoint(epnt.Network), + URL: URL, + Header: NewHTTPRequestHeaderForMeasuring(), + }) + } + return out, nil +} + +// ErrCannotDeterminePortFromURL indicates that we could not determine +// the correct port from the URL authority and scheme. +var ErrCannotDeterminePortFromURL = errors.New("cannot determine port from URL") + +// PortFromURL returns the port determined from the URL or an error. +func PortFromURL(URL *url.URL) (string, error) { + switch { + case URL.Port() != "": + return URL.Port(), nil + case URL.Scheme == "https": + return "443", nil + case URL.Scheme == "http": + return "80", nil + default: + return "", ErrCannotDeterminePortFromURL + } +} + +func (db *DB) alpnForHTTPEndpoint(network EndpointNetwork) []string { + switch network { + case NetworkQUIC: + return []string{"h3"} + case NetworkTCP: + return []string{"h2", "http/1.1"} + default: + return nil + } +} diff --git a/internal/measurex/dnsx.go b/internal/measurex/dnsx.go index 3f86496be7..875ff0e0b2 100644 --- a/internal/measurex/dnsx.go +++ b/internal/measurex/dnsx.go @@ -29,6 +29,7 @@ type dnsxTransportx struct { type DNSRoundTripEvent struct { Origin Origin // OriginProbe or OriginTH MeasurementID int64 // ID of the measurement + ConnID int64 // connID (typically zero) Network string // DNS resolver's network (e.g., "dot", "doh") Address string // DNS resolver's address or URL (for "doh") Query []byte // Raw query @@ -43,6 +44,7 @@ func (txp *dnsxTransportx) RoundTrip(ctx context.Context, query []byte) ([]byte, reply, err := txp.RoundTripper.RoundTrip(ctx, query) finished := txp.db.ElapsedTime() txp.db.InsertIntoDNSRoundTrip(&DNSRoundTripEvent{ + Origin: txp.origin, MeasurementID: txp.db.MeasurementID(), Network: txp.RoundTripper.Network(), Address: txp.RoundTripper.Address(), diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go index bbb97f11d5..36d80d0fbe 100644 --- a/internal/measurex/measurement.go +++ b/internal/measurex/measurement.go @@ -3,48 +3,48 @@ package measurex // Measurement groups all the events that have the same MeasurementID. This // data format is not compatible with the OONI data format. type Measurement struct { - // ID is the measurement ID. - ID int64 + // MeasurementID is the measurement MeasurementID. + MeasurementID int64 - // URL is the OPTIONAL URL this measurement refers to. - URL string + // URL is the URL this measurement refers to. + URL string `json:",omitempty"` - // Endpoint is the OPTIONAL endpoint this measurement refers to. - Endpoint string + // Endpoint is the endpoint this measurement refers to. + Endpoint string `json:",omitempty"` // Oddities lists all the oddities inside this measurement. See // newMeasurement's docs for more info. Oddities []Oddity // Connect contains all the connect operations. - Connect []*NetworkEvent + Connect []*NetworkEvent `json:",omitempty"` // ReadWrite contains all the read and write operations. - ReadWrite []*NetworkEvent + ReadWrite []*NetworkEvent `json:",omitempty"` // Close contains all the close operations. - Close []*NetworkEvent + Close []*NetworkEvent `json:",omitempty"` // TLSHandshake contains all the TLS handshakes. - TLSHandshake []*TLSHandshakeEvent + TLSHandshake []*TLSHandshakeEvent `json:",omitempty"` // QUICHandshake contains all the QUIC handshakes. - QUICHandshake []*QUICHandshakeEvent + QUICHandshake []*QUICHandshakeEvent `json:",omitempty"` // LookupHost contains all the host lookups. - LookupHost []*LookupHostEvent + LookupHost []*LookupHostEvent `json:",omitempty"` // LookupHTTPSSvc contains all the HTTPSSvc lookups. - LookupHTTPSSvc []*LookupHTTPSSvcEvent + LookupHTTPSSvc []*LookupHTTPSSvcEvent `json:",omitempty"` // DNSRoundTrip contains all the DNS round trips. - DNSRoundTrip []*DNSRoundTripEvent + DNSRoundTrip []*DNSRoundTripEvent `json:",omitempty"` // HTTPRoundTrip contains all the HTTP round trips. - HTTPRoundTrip []*HTTPRoundTripEvent + HTTPRoundTrip []*HTTPRoundTripEvent `json:",omitempty"` // HTTPRedirect contains all the redirections. - HTTPRedirect []*HTTPRedirectEvent + HTTPRedirect []*HTTPRedirectEvent `json:",omitempty"` } // NewMeasurement creates a new Measurement by gathering all the @@ -70,9 +70,9 @@ type Measurement struct { // - id is the MeasurementID. // // Returns a Measurement possibly containing empty lists of events. -func NewMeasurement(db *Saver, id int64) *Measurement { +func NewMeasurement(db *DB, id int64) *Measurement { m := &Measurement{ - ID: id, + MeasurementID: id, Connect: db.SelectAllFromDialWithMeasurementID(id), ReadWrite: db.SelectAllFromReadWriteWithMeasurementID(id), Close: db.SelectAllFromCloseWithMeasurementID(id), diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 379c80e491..bcaaa58725 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -8,13 +8,14 @@ import ( "net/url" "time" + "github.com/apex/log" "github.com/ooni/probe-cli/v3/internal/netxlite" ) // Measurer performs measurements. type Measurer struct { // DB is the MANDATORY database to use. - DB *Saver + DB *DB // HTTPClient is the MANDATORY HTTP client for the WCTH. HTTPClient HTTPClient @@ -32,6 +33,32 @@ type Measurer struct { WCTHURL string } +// NewMeasurerWithDefaultSettings creates a new Measurer +// instance using the most default settings. +func NewMeasurerWithDefaultSettings() *Measurer { + db := NewSaver(time.Now()) + return &Measurer{ + DB: db, + HTTPClient: &http.Client{}, + Logger: log.Log, + Origin: OriginProbe, + TLSHandshaker: NewTLSHandshakerStdlib(OriginProbe, db, log.Log), + WCTHURL: "https://wcth.ooni.io/", + } +} + +// clone returns a clone of the current measurer with a new DB. +func (mx *Measurer) clone(db *DB) *Measurer { + return &Measurer{ + DB: db, + HTTPClient: mx.HTTPClient, + Logger: mx.Logger, + Origin: mx.Origin, + TLSHandshaker: mx.TLSHandshaker, + WCTHURL: mx.WCTHURL, + } +} + func (mx *Measurer) nextMeasurement() int64 { return mx.DB.NextMeasurement() } @@ -129,7 +156,7 @@ func (mx *Measurer) tcpConnect(ctx context.Context, address string) (Conn, error return d.DialContext(ctx, "tcp", address) } -// TLSConnect connects and TLS handshakes with a TCP endpoint. +// TLSConnectAndHandshake connects and TLS handshakes with a TCP endpoint. // // Arguments: // @@ -159,10 +186,10 @@ func (mx *Measurer) tcpConnect(ctx context.Context, address string) (Conn, error // utls.ClientHelloID thay you're using. // // Returns a Measurement. -func (mx *Measurer) TLSConnect(ctx context.Context, +func (mx *Measurer) TLSConnectAndHandshake(ctx context.Context, address string, config *tls.Config) *Measurement { id := mx.nextMeasurement() - conn, _ := mx.tlsConnect(ctx, address, config) + conn, _ := mx.tlsConnectAndHandshake(ctx, address, config) measurement := NewMeasurement(mx.DB, id) if conn != nil { conn.Close() @@ -170,8 +197,9 @@ func (mx *Measurer) TLSConnect(ctx context.Context, return measurement } -// tlsConnect is like TLSConnect but does not create a new measurement -func (mx *Measurer) tlsConnect(ctx context.Context, +// tlsConnectAndHandshake is like TLSConnectAndHandshake +// but does not create a new measurement. +func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, address string, config *tls.Config) (TLSConn, error) { conn, err := mx.tcpConnect(ctx, address) if err != nil { @@ -237,7 +265,7 @@ func (mx *Measurer) quicHandshake(ctx context.Context, // HTTPEndpointGet performs a GET request for an HTTP endpoint. // // This function WILL NOT follow redirects. If there is a redirect -// you will see it inside the specific mx.DB table. +// you will see it inside the specific database table. // // Arguments: // @@ -248,29 +276,78 @@ func (mx *Measurer) quicHandshake(ctx context.Context, // - jar is the cookie jar to use. // // Returns a measurement. The returned measurement is empty if -// the endpoint is misconfigured or the URL has an unknow scheme. +// the endpoint is misconfigured or the URL has an unknown scheme. func (mx *Measurer) HTTPEndpointGet( - ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (m *Measurement) { + ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) *Measurement { + resp, m, _ := mx.httpEndpointGet(ctx, epnt, jar) + if resp != nil { + resp.Body.Close() + } + return m +} + +var ( + errUnknownHTTPEndpointURLScheme = errors.New("unknown HTTPEndpoint.URL.Scheme") + errUnknownHTTPEndpointNetwork = errors.New("unknown HTTPEndpoint.Network") +) + +// HTTPPreparedRequest is a suspended request that only awaits +// for you to Resume it to deliver a result. +type HTTPPreparedRequest struct { + resp *http.Response + m *Measurement + err error +} + +// Resume resumes the request and yields either a response or an error. You +// shall not call this function more than once. +func (r *HTTPPreparedRequest) Resume() (*http.Response, error) { + return r.resp, r.err +} + +// Measurement returns the associated measurement. +func (r *HTTPPreparedRequest) Measurement() *Measurement { + return r.m +} + +// HTTPEndpointPrepareGet prepares a GET request for an HTTP endpoint. +// +// This prepared request WILL NOT follow redirects. If there is a redirect +// you will see it inside the specific database table. +// +// Arguments: +// +// - ctx is the context allowing to timeout the operation; +// +// - epnt is the HTTP endpoint; +// +// - jar is the cookie jar to use. +// +// Returns either a prepared request or an error. +func (mx *Measurer) HTTPEndpointPrepareGet(ctx context.Context, + epnt *HTTPEndpoint, jar http.CookieJar) *HTTPPreparedRequest { + out := &HTTPPreparedRequest{} + out.resp, out.m, out.err = mx.httpEndpointGet(ctx, epnt, jar) + return out +} + +// httpEndpointGet implements HTTPEndpointGet. +func (mx *Measurer) httpEndpointGet(ctx context.Context, epnt *HTTPEndpoint, + jar http.CookieJar) (resp *http.Response, m *Measurement, err error) { id := mx.nextMeasurement() - var resp *http.Response switch epnt.Network { case NetworkQUIC: - resp, _ = mx.httpEndpointGetQUIC(ctx, epnt, jar) + resp, err = mx.httpEndpointGetQUIC(ctx, epnt, jar) m = NewMeasurement(mx.DB, id) case NetworkTCP: - resp, _ = mx.httpEndpointGetTCP(ctx, epnt, jar) + resp, err = mx.httpEndpointGetTCP(ctx, epnt, jar) m = NewMeasurement(mx.DB, id) default: - m = &Measurement{} - } - if resp != nil { - resp.Body.Close() + m, err = &Measurement{}, errUnknownHTTPEndpointNetwork } return } -var errUnknownHTTPEndpointURLScheme = errors.New("unknown HTTPEndpoint.URL.Scheme") - // httpEndpointGetTCP specializes HTTPSEndpointGet for HTTP and HTTPS. func (mx *Measurer) httpEndpointGetTCP( ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { @@ -311,7 +388,7 @@ func (mx *Measurer) httpEndpointGetHTTPS( return nil, err } req.Header = epnt.Header - conn, err := mx.tlsConnect(ctx, epnt.Address, &tls.Config{ + conn, err := mx.tlsConnectAndHandshake(ctx, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, RootCAs: netxlite.NewDefaultCertPool(), @@ -407,3 +484,40 @@ func (mx *Measurer) onlyTCPEndpoints(endpoints []*Endpoint) (out []string) { func (mx *Measurer) Infof(format string, v ...interface{}) { mx.Logger.Infof(format, v...) } + +// HTTPEndpointGetParallel performs an HTTPEndpointGet for each +// input endpoint using a pool of background goroutines. +// +// This function returns to the caller a channel where to run +// measurements from. The channel is closed when done. +func (mx *Measurer) HTTPEndpointGetParallel(ctx context.Context, + jar http.CookieJar, epnts ...*HTTPEndpoint) <-chan *Measurement { + var ( + done = make(chan interface{}) + input = make(chan *HTTPEndpoint) + output = make(chan *Measurement) + ) + go func() { + defer close(input) + for _, epnt := range epnts { + input <- epnt + } + }() + const parallelism = 3 + for i := 0; i < parallelism; i++ { + go func() { + child := mx.clone(mx.DB.clone()) + for epnt := range input { + output <- child.HTTPEndpointGet(ctx, epnt, jar) + } + done <- true + }() + } + go func() { + for i := 0; i < parallelism; i++ { + <-done + } + close(output) + }() + return output +} diff --git a/internal/measurex/oddity.go b/internal/measurex/oddity.go index 6cd6e19b76..b5f486eacf 100644 --- a/internal/measurex/oddity.go +++ b/internal/measurex/oddity.go @@ -15,9 +15,12 @@ var ( OddityTCPConnectOher = Oddity("tcp.connect.other") // tls.handshake - OddityTLSHandshakeTimeout = Oddity("tls.handshake.timeout") - OddityTLSHandshakeReset = Oddity("tls.handshake.reset") - OddityTLSHandshakeOther = Oddity("tls.handshake.other") + OddityTLSHandshakeTimeout = Oddity("tls.handshake.timeout") + OddityTLSHandshakeReset = Oddity("tls.handshake.reset") + OddityTLSHandshakeOther = Oddity("tls.handshake.other") + OddityTLSHandshakeUnexpectedEOF = Oddity("tls.handshake.unexpected_eof") + OddityTLSHandshakeInvalidHostname = Oddity("tls.handshake.invalid_hostname") + OddityTLSHandshakeUnknownAuthority = Oddity("tls.handshake.unknown_authority") // quic.handshake OddityQUICHandshakeTimeout = Oddity("quic.handshake.timeout") diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 7d1b2a92ac..60024bac5d 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -64,6 +64,7 @@ type resolverx struct { type LookupHostEvent struct { Origin Origin MeasurementID int64 + ConnID int64 // connID (typically zero) Network string Address string Domain string @@ -119,6 +120,7 @@ func (r *resolverx) computeOddityLookupHost(addrs []string, err error) Oddity { type LookupHTTPSSvcEvent struct { Origin Origin MeasurementID int64 + ConnID int64 // connID (typically zero) Network string Address string Domain string diff --git a/internal/measurex/saver.go b/internal/measurex/saver.go deleted file mode 100644 index 9db9c05a8b..0000000000 --- a/internal/measurex/saver.go +++ /dev/null @@ -1,530 +0,0 @@ -package measurex - -import ( - "errors" - "fmt" - "net" - "net/http" - "net/url" - "sync" - "time" -) - -// Saver is an EventDB that saves events and allows to -// ask questions regarding the saved events. -type Saver struct { - dialTable []*NetworkEvent - readWriteTable []*NetworkEvent - closeTable []*NetworkEvent - tlsHandshakeTable []*TLSHandshakeEvent - lookupHostTable []*LookupHostEvent - lookupHTTPSvcTable []*LookupHTTPSSvcEvent - dnsRoundTripTable []*DNSRoundTripEvent - httpRoundTripTable []*HTTPRoundTripEvent - httpRedirectTable []*HTTPRedirectEvent - quicHandshakeTable []*QUICHandshakeEvent - - begin time.Time - connID int64 - measurementID int64 - mu sync.Mutex -} - -var _ EventDB = &Saver{} - -// NewSaver creates a new instance of Saver. -func NewSaver(begin time.Time) *Saver { - return &Saver{begin: begin} -} - -// ElapsedTime implements EventDB.ElapsedTime. -func (s *Saver) ElapsedTime() time.Duration { - return time.Since(s.begin) -} - -// DeleteAll deletes all the saved data. -func (s *Saver) DeleteAll() { - s.mu.Lock() - s.dialTable = nil - s.readWriteTable = nil - s.closeTable = nil - s.tlsHandshakeTable = nil - s.lookupHostTable = nil - s.lookupHTTPSvcTable = nil - s.dnsRoundTripTable = nil - s.httpRoundTripTable = nil - s.httpRedirectTable = nil - s.quicHandshakeTable = nil - s.mu.Unlock() -} - -// InsertIntoDial implements EventDB.InsertIntoDial. -func (s *Saver) InsertIntoDial(ev *NetworkEvent) { - s.mu.Lock() - s.dialTable = append(s.dialTable, ev) - s.mu.Unlock() -} - -// SelectAllFromDial returns all dial events. -func (s *Saver) SelectAllFromDial() (out []*NetworkEvent) { - s.mu.Lock() - out = append(out, s.dialTable...) - s.mu.Unlock() - return -} - -// InsertIntoReadWrite implements EventDB.InsertIntoReadWrite. -func (s *Saver) InsertIntoReadWrite(ev *NetworkEvent) { - s.mu.Lock() - s.readWriteTable = append(s.readWriteTable, ev) - s.mu.Unlock() -} - -// SelectAllFromReadWrite returns all I/O events. -func (s *Saver) SelectAllFromReadWrite() (out []*NetworkEvent) { - s.mu.Lock() - out = append(out, s.readWriteTable...) - s.mu.Unlock() - return -} - -// InsertIntoClose implements EventDB.InsertIntoClose. -func (s *Saver) InsertIntoClose(ev *NetworkEvent) { - s.mu.Lock() - s.closeTable = append(s.closeTable, ev) - s.mu.Unlock() -} - -// SelectAllFromClose returns all close events. -func (s *Saver) SelectAllFromClose() (out []*NetworkEvent) { - s.mu.Lock() - out = append(out, s.closeTable...) - s.mu.Unlock() - return -} - -// InsertIntoTLSHandshake implements EventDB.InsertIntoTLSHandshake. -func (s *Saver) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { - s.mu.Lock() - s.tlsHandshakeTable = append(s.tlsHandshakeTable, ev) - s.mu.Unlock() -} - -// SelectAllFromTLSHandshake returns all TLS handshake events. -func (s *Saver) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { - s.mu.Lock() - out = append(out, s.tlsHandshakeTable...) - s.mu.Unlock() - return -} - -// InsertIntoLookupHost implements EventDB.InsertIntoLookupHost. -func (s *Saver) InsertIntoLookupHost(ev *LookupHostEvent) { - s.mu.Lock() - s.lookupHostTable = append(s.lookupHostTable, ev) - s.mu.Unlock() -} - -// SelectAllFromLookupHost returns all the lookup host events. -func (s *Saver) SelectAllFromLookupHost() (out []*LookupHostEvent) { - s.mu.Lock() - out = append(out, s.lookupHostTable...) - s.mu.Unlock() - return -} - -// InsertIntoHTTPSSvc implements EventDB.InsertIntoHTTPSSvc -func (s *Saver) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { - s.mu.Lock() - s.lookupHTTPSvcTable = append(s.lookupHTTPSvcTable, ev) - s.mu.Unlock() -} - -// SelectAllFromLookupHTTPSSvc returns all HTTPSSvc lookup events. -func (s *Saver) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { - s.mu.Lock() - out = append(out, s.lookupHTTPSvcTable...) - s.mu.Unlock() - return -} - -// InsertIntoDNSRoundTrip implements EventDB.InsertIntoDNSRoundTrip. -func (s *Saver) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { - s.mu.Lock() - s.dnsRoundTripTable = append(s.dnsRoundTripTable, ev) - s.mu.Unlock() -} - -// SelectAllFromDNSRoundTrip returns all DNS round trip events. -func (s *Saver) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { - s.mu.Lock() - out = append(out, s.dnsRoundTripTable...) - s.mu.Unlock() - return -} - -// InsertIntoHTTPRoundTrip implements EventDB.InsertIntoHTTPRoundTrip. -func (s *Saver) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { - s.mu.Lock() - s.httpRoundTripTable = append(s.httpRoundTripTable, ev) - s.mu.Unlock() -} - -// SelectAllFromHTTPRoundTrip returns all HTTP round trip events. -func (s *Saver) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { - s.mu.Lock() - out = append(out, s.httpRoundTripTable...) - s.mu.Unlock() - return -} - -// InsertIntoHTTPRedirect implements EventDB.InsertIntoHTTPRedirect. -func (s *Saver) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { - s.mu.Lock() - s.httpRedirectTable = append(s.httpRedirectTable, ev) - s.mu.Unlock() -} - -// SelectAllFromHTTPRedirect returns all HTTP redirections. -func (s *Saver) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { - s.mu.Lock() - out = append(out, s.httpRedirectTable...) - s.mu.Unlock() - return -} - -// InsertIntoQUICHandshake implements EventDB.InsertIntoQUICHandshake. -func (s *Saver) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { - s.mu.Lock() - s.quicHandshakeTable = append(s.quicHandshakeTable, ev) - s.mu.Unlock() -} - -// SelectAllFromQUICHandshake returns all QUIC handshake events. -func (s *Saver) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { - s.mu.Lock() - out = append(out, s.quicHandshakeTable...) - s.mu.Unlock() - return -} - -// NextConnID implements EventDB.NextConnID. -func (s *Saver) NextConnID() (out int64) { - s.mu.Lock() - s.connID++ // start from 1 - out = s.connID - s.mu.Unlock() - return -} - -// MeasurementID implements EventDB.MeasurementID. -func (s *Saver) MeasurementID() (out int64) { - s.mu.Lock() - out = s.measurementID - s.mu.Unlock() - return -} - -// NextMeasurement increments the internal MeasurementID and -// returns it, so that later you can reference the current measurement. -func (s *Saver) NextMeasurement() (out int64) { - s.mu.Lock() - s.measurementID++ // start from 1 - out = s.measurementID - s.mu.Unlock() - return -} - -// SelectAllFromDialWithMeasurementID calls SelectAllFromConnect -// and filters the result by MeasurementID. -func (s *Saver) SelectAllFromDialWithMeasurementID(id int64) (out []*NetworkEvent) { - for _, ev := range s.SelectAllFromDial() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromReadWriteWithMeasurementID calls SelectAllFromReadWrite and -// filters the result by MeasurementID. -func (s *Saver) SelectAllFromReadWriteWithMeasurementID(id int64) (out []*NetworkEvent) { - for _, ev := range s.SelectAllFromReadWrite() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromCloseWithMeasurementID calls SelectAllFromClose -// and filters the result by MeasurementID. -func (s *Saver) SelectAllFromCloseWithMeasurementID(id int64) (out []*NetworkEvent) { - for _, ev := range s.SelectAllFromClose() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromTLSHandshakeWithMeasurementID calls SelectAllFromTLSHandshake -// and filters the result by MeasurementID. -func (s *Saver) SelectAllFromTLSHandshakeWithMeasurementID(id int64) (out []*TLSHandshakeEvent) { - for _, ev := range s.SelectAllFromTLSHandshake() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromQUICHandshakeWithMeasurementID calls SelectAllFromQUICSHandshake -// and filters the result by MeasurementID. -func (s *Saver) SelectAllFromQUICHandshakeWithMeasurementID(id int64) (out []*QUICHandshakeEvent) { - for _, ev := range s.SelectAllFromQUICHandshake() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromLookupHostWithMeasurementID calls SelectAllFromLookupHost -// and filters the result by MeasurementID. -func (s *Saver) SelectAllFromLookupHostWithMeasurementID(id int64) (out []*LookupHostEvent) { - for _, ev := range s.SelectAllFromLookupHost() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromLookupHTTPSSvcWithMeasurementID calls SelectAllFromHTTPSSvc -// and filters the result by MeasurementID. -func (s *Saver) SelectAllFromLookupHTTPSSvcWithMeasurementID(id int64) (out []*LookupHTTPSSvcEvent) { - for _, ev := range s.SelectAllFromLookupHTTPSSvc() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromDNSRoundTripWithMeasurementID calls SelectAllFromDNSRoundTrip -// and filters the result by MeasurementID. -func (s *Saver) SelectAllFromDNSRoundTripWithMeasurementID(id int64) (out []*DNSRoundTripEvent) { - for _, ev := range s.SelectAllFromDNSRoundTrip() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromHTTPRoundTripWithMeasurementID calls SelectAllFromHTTPRoundTrip -// and filters the result by MeasurementID. -func (s *Saver) SelectAllFromHTTPRoundTripWithMeasurementID(id int64) (out []*HTTPRoundTripEvent) { - for _, ev := range s.SelectAllFromHTTPRoundTrip() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromHTTPRedirectWithMeasurementID calls SelectAllFromHTTPRedirect -// and filters the result by MeasurementID. -func (s *Saver) SelectAllFromHTTPRedirectWithMeasurementID(id int64) (out []*HTTPRedirectEvent) { - for _, ev := range s.SelectAllFromHTTPRedirect() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// EndpointNetwork is the network of an endpoint. -type EndpointNetwork string - -const ( - // NetworkTCP identifies endpoints using TCP. - NetworkTCP = EndpointNetwork("tcp") - - // NetworkQUIC identifies endpoints using QUIC. - NetworkQUIC = EndpointNetwork("quic") -) - -// Endpoint is an endpoint for a domain. -type Endpoint struct { - // Network is the network (e.g., "tcp", "quic") - Network EndpointNetwork - - // Address is the endpoint address (e.g., "8.8.8.8:443") - Address string -} - -// String converts an endpoint to a string (e.g., "8.8.8.8:443/tcp") -func (e *Endpoint) String() string { - return fmt.Sprintf("%s/%s", e.Address, e.Network) -} - -// SelectAllEndpointsForDomain returns all the -// endpoints for a specific domain. -// -// Arguments: -// -// - domain is the domain we want to connect to; -// -// - port is the port for the endpoint. -func (s *Saver) SelectAllEndpointsForDomain(domain, port string) (out []*Endpoint) { - out = append(out, s.selectAllTCPEndpoints(domain, port)...) - out = append(out, s.selectAllQUICEndpoints(domain, port)...) - out = s.deduplicateEndpoints(out) - return -} - -func (s *Saver) selectAllTCPEndpoints(domain, port string) (out []*Endpoint) { - for _, entry := range s.SelectAllFromLookupHost() { - if domain != entry.Domain { - continue - } - for _, addr := range entry.Addrs { - if net.ParseIP(addr) == nil { - continue // skip CNAME entries courtesy the WCTH - } - out = append(out, s.newEndpoint(addr, port, NetworkTCP)) - } - } - return -} - -func (s *Saver) selectAllQUICEndpoints(domain, port string) (out []*Endpoint) { - for _, entry := range s.SelectAllFromLookupHTTPSSvc() { - if domain != entry.Domain { - continue - } - if !s.supportsHTTP3(entry) { - continue - } - addrs := append([]string{}, entry.IPv4...) - for _, addr := range append(addrs, entry.IPv6...) { - out = append(out, s.newEndpoint(addr, port, NetworkQUIC)) - } - } - return -} - -func (s *Saver) deduplicateEndpoints(epnts []*Endpoint) (out []*Endpoint) { - duplicates := make(map[string]*Endpoint) - for _, epnt := range epnts { - duplicates[epnt.String()] = epnt - } - for _, epnt := range duplicates { - out = append(out, epnt) - } - return -} - -func (s *Saver) newEndpoint(addr, port string, network EndpointNetwork) *Endpoint { - return &Endpoint{Network: network, Address: net.JoinHostPort(addr, port)} -} - -func (s *Saver) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { - for _, alpn := range entry.ALPN { - switch alpn { - case "h3": - return true - } - } - return false -} - -// HTTPEndpoint is an HTTP/HTTPS/HTTP3 endpoint. -type HTTPEndpoint struct { - // Domain is the endpoint domain (e.g., "dns.google"). - Domain string - - // Network is the network (e.g., "tcp" or "quic"). - Network EndpointNetwork - - // Address is the endpoint address (e.g., "8.8.8.8:443"). - Address string - - // SNI is the SNI to use (only used with URL.scheme == "https"). - SNI string - - // ALPN is the ALPN to use (only used with URL.scheme == "https"). - ALPN []string - - // URL is the endpoint URL. - URL *url.URL - - // Header contains request headers. - Header http.Header -} - -// String converts an HTTP endpoint to a string (e.g., "8.8.8.8:443/tcp") -func (e *HTTPEndpoint) String() string { - return fmt.Sprintf("%s/%s", e.Address, e.Network) -} - -// SelectAllHTTPEndpointsForDomainAndMeasurementID returns all the -// HTTPEndpoints matching a specific domain and MeasurementID. -// -// Arguments: -// -// - URL is the URL for which we want endpoints; -// -// Returns a list of endpoints or an error. -func (s *Saver) SelectAllHTTPEndpointsForDomain(URL *url.URL) ([]*HTTPEndpoint, error) { - domain := URL.Hostname() - port, err := PortFromURL(URL) - if err != nil { - return nil, err - } - epnts := s.SelectAllEndpointsForDomain(domain, port) - var out []*HTTPEndpoint - for _, epnt := range epnts { - out = append(out, &HTTPEndpoint{ - Domain: domain, - Network: epnt.Network, - Address: epnt.Address, - SNI: domain, - ALPN: s.alpnForHTTPEndpoint(epnt.Network), - URL: URL, - Header: NewHTTPRequestHeaderForMeasuring(), - }) - } - return out, nil -} - -// ErrCannotDeterminePortFromURL indicates that we could not determine -// the correct port from the URL authority and scheme. -var ErrCannotDeterminePortFromURL = errors.New("cannot determine port from URL") - -// PortFromURL returns the port determined from the URL or an error. -func PortFromURL(URL *url.URL) (string, error) { - switch { - case URL.Port() != "": - return URL.Port(), nil - case URL.Scheme == "https": - return "443", nil - case URL.Scheme == "http": - return "80", nil - default: - return "", ErrCannotDeterminePortFromURL - } -} - -func (s *Saver) alpnForHTTPEndpoint(network EndpointNetwork) []string { - switch network { - case NetworkQUIC: - return []string{"h3"} - case NetworkTCP: - return []string{"h2", "http/1.1"} - default: - return nil - } -} diff --git a/internal/measurex/tls.go b/internal/measurex/tls.go index 685c6ef9d5..953456b24b 100644 --- a/internal/measurex/tls.go +++ b/internal/measurex/tls.go @@ -31,6 +31,12 @@ func WrapTLSHandshaker(origin Origin, db EventDB, thx netxlite.TLSHandshaker) TL return &tlsHandshakerx{TLSHandshaker: thx, db: db, origin: origin} } +// NewTLSHandshakerStdlib creates a new TLS handshaker that +// saves results into the DB and uses the stdlib for TLS. +func NewTLSHandshakerStdlib(origin Origin, db EventDB, logger Logger) TLSHandshaker { + return WrapTLSHandshaker(origin, db, netxlite.NewTLSHandshakerStdlib(logger)) +} + type tlsHandshakerx struct { netxlite.TLSHandshaker db EventDB @@ -103,6 +109,12 @@ func (thx *tlsHandshakerx) computeOddity(err error) Oddity { return OddityTLSHandshakeTimeout case errorsx.FailureConnectionReset: return OddityTLSHandshakeReset + case errorsx.FailureEOFError: + return OddityTLSHandshakeUnexpectedEOF + case errorsx.FailureSSLInvalidHostname: + return OddityTLSHandshakeInvalidHostname + case errorsx.FailureSSLUnknownAuthority: + return OddityTLSHandshakeUnknownAuthority default: return OddityTLSHandshakeOther } diff --git a/internal/netxlite/dnsx/serial.go b/internal/netxlite/dnsx/serial.go index 0d2317a7f6..05594f4467 100644 --- a/internal/netxlite/dnsx/serial.go +++ b/internal/netxlite/dnsx/serial.go @@ -80,6 +80,12 @@ func (r *SerialResolver) lookupHostWithRetry( ctx context.Context, hostname string, qtype uint16) ([]string, error) { var errorslist []error for i := 0; i < 3; i++ { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + // fallthrough + } replies, err := r.LookupHostWithoutRetry(ctx, hostname, qtype) if err == nil { return replies, nil From be77fd7d92a1c2702f9791723d0a191fc8427eea Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 13:12:42 +0200 Subject: [PATCH 11/53] identify sharing constraints and fix --- internal/measurex/db.go | 195 ++++++++++++++++++++-------------- internal/measurex/measurer.go | 8 +- 2 files changed, 121 insertions(+), 82 deletions(-) diff --git a/internal/measurex/db.go b/internal/measurex/db.go index eddf7035b6..87692c7e07 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -56,7 +56,32 @@ type EventDB interface { // DB is an EventDB that saves events and also allows to // ask questions regarding the saved events. +// +// Caveats: the way in which we assign an ID to a measurement +// (i.e., a related set of operations) assumes that we don't +// run paralell measurements with the same DB. To run parallel +// measurements, you need to create a children DB using the +// NewChildrenDB factory function. type DB struct { + *dbSharedWithChildren + + // the measurementID must not be shared with children + // running in parallel because it must stay const during + // the whole measurements or we cannot measure. + // + // Use NewChildrenDB to create one or more children + // that can run in parallel with each other and their parent. + measurementID int64 + mu sync.Mutex +} + +// dbSharedWithChildren is the part of the database shared +// with children databases. These databases are created during +// parallel measurements but need to insert into the same +// tables of the parent database. The ConnID can also be part +// of this structure. The only element that needs to be constant +// during a measurement is the MeasurementID. +type dbSharedWithChildren struct { // database tables dialTable []*NetworkEvent readWriteTable []*NetworkEvent @@ -69,61 +94,60 @@ type DB struct { httpRedirectTable []*HTTPRedirectEvent quicHandshakeTable []*QUICHandshakeEvent - // mu protects all the above tables + // mu protects all the above tables and ConnID mu sync.Mutex - // internals is shared with child databases - internals *dbInternals -} - -func (db *DB) clone() *DB { - return &DB{internals: db.internals} + // other pieces of data we can share with children + // because they have more relaxed constaints than the + // measurement ID: begin is read-only and ConnID is + // read once and then propagated. + begin time.Time + connID int64 } -type dbInternals struct { - begin time.Time - connID int64 - measurementID int64 - mu sync.Mutex -} +var _ EventDB = &DB{} -func (dbi *dbInternals) NextConnID() (out int64) { - dbi.mu.Lock() - dbi.connID++ // start from 1 - out = dbi.connID - dbi.mu.Unlock() - return -} +var ( + baseMeasurementID int32 + measurementIDmu sync.Mutex +) -func (dbi *dbInternals) MeasurementID() (out int64) { - dbi.mu.Lock() - out = dbi.measurementID - dbi.mu.Unlock() +func nextBaseMeasurementID() (out int64) { + measurementIDmu.Lock() + baseMeasurementID++ + out = (int64(baseMeasurementID) << 32) + measurementIDmu.Unlock() return } -func (dbi *dbInternals) NextMeasurement() (out int64) { - dbi.mu.Lock() - dbi.measurementID++ // start from 1 - out = dbi.measurementID - dbi.mu.Unlock() - return +// NewDB creates a new instance of DB. This instance will have the base +// time configured to be begin. Also, its base measurement ID will depend +// on how many databases we have created so far. Each database gets its +// own 31 bit namespace for measurements. +func NewDB(begin time.Time) *DB { + return &DB{ + dbSharedWithChildren: &dbSharedWithChildren{begin: begin}, + measurementID: nextBaseMeasurementID(), + } } -var _ EventDB = &DB{} - -// NewSaver creates a new instance of Saver. -func NewSaver(begin time.Time) *DB { - return &DB{internals: &dbInternals{begin: begin}} +// NewChildDB returns a DB that has the same base measurement and +// tables of the original DB but gets a new measurement namespace. You +// should use this factory every time you run parallel measurements. +func (db *DB) NewChildDB() *DB { + return &DB{ + dbSharedWithChildren: db.dbSharedWithChildren, + measurementID: nextBaseMeasurementID(), + } } // ElapsedTime implements EventDB.ElapsedTime. -func (db *DB) ElapsedTime() time.Duration { - return time.Since(db.internals.begin) +func (db *dbSharedWithChildren) ElapsedTime() time.Duration { + return time.Since(db.begin) } // DeleteAll deletes all the saved data. -func (db *DB) DeleteAll() { +func (db *dbSharedWithChildren) DeleteAll() { db.mu.Lock() db.dialTable = nil db.readWriteTable = nil @@ -139,14 +163,14 @@ func (db *DB) DeleteAll() { } // InsertIntoDial implements EventDB.InsertIntoDial. -func (db *DB) InsertIntoDial(ev *NetworkEvent) { +func (db *dbSharedWithChildren) InsertIntoDial(ev *NetworkEvent) { db.mu.Lock() db.dialTable = append(db.dialTable, ev) db.mu.Unlock() } // SelectAllFromDial returns all dial events. -func (db *DB) SelectAllFromDial() (out []*NetworkEvent) { +func (db *dbSharedWithChildren) SelectAllFromDial() (out []*NetworkEvent) { db.mu.Lock() out = append(out, db.dialTable...) db.mu.Unlock() @@ -154,14 +178,14 @@ func (db *DB) SelectAllFromDial() (out []*NetworkEvent) { } // InsertIntoReadWrite implements EventDB.InsertIntoReadWrite. -func (db *DB) InsertIntoReadWrite(ev *NetworkEvent) { +func (db *dbSharedWithChildren) InsertIntoReadWrite(ev *NetworkEvent) { db.mu.Lock() db.readWriteTable = append(db.readWriteTable, ev) db.mu.Unlock() } // SelectAllFromReadWrite returns all I/O events. -func (db *DB) SelectAllFromReadWrite() (out []*NetworkEvent) { +func (db *dbSharedWithChildren) SelectAllFromReadWrite() (out []*NetworkEvent) { db.mu.Lock() out = append(out, db.readWriteTable...) db.mu.Unlock() @@ -169,14 +193,14 @@ func (db *DB) SelectAllFromReadWrite() (out []*NetworkEvent) { } // InsertIntoClose implements EventDB.InsertIntoClose. -func (db *DB) InsertIntoClose(ev *NetworkEvent) { +func (db *dbSharedWithChildren) InsertIntoClose(ev *NetworkEvent) { db.mu.Lock() db.closeTable = append(db.closeTable, ev) db.mu.Unlock() } // SelectAllFromClose returns all close events. -func (db *DB) SelectAllFromClose() (out []*NetworkEvent) { +func (db *dbSharedWithChildren) SelectAllFromClose() (out []*NetworkEvent) { db.mu.Lock() out = append(out, db.closeTable...) db.mu.Unlock() @@ -184,14 +208,14 @@ func (db *DB) SelectAllFromClose() (out []*NetworkEvent) { } // InsertIntoTLSHandshake implements EventDB.InsertIntoTLSHandshake. -func (db *DB) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { +func (db *dbSharedWithChildren) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { db.mu.Lock() db.tlsHandshakeTable = append(db.tlsHandshakeTable, ev) db.mu.Unlock() } // SelectAllFromTLSHandshake returns all TLS handshake events. -func (db *DB) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { +func (db *dbSharedWithChildren) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { db.mu.Lock() out = append(out, db.tlsHandshakeTable...) db.mu.Unlock() @@ -199,14 +223,14 @@ func (db *DB) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { } // InsertIntoLookupHost implements EventDB.InsertIntoLookupHost. -func (db *DB) InsertIntoLookupHost(ev *LookupHostEvent) { +func (db *dbSharedWithChildren) InsertIntoLookupHost(ev *LookupHostEvent) { db.mu.Lock() db.lookupHostTable = append(db.lookupHostTable, ev) db.mu.Unlock() } // SelectAllFromLookupHost returns all the lookup host events. -func (db *DB) SelectAllFromLookupHost() (out []*LookupHostEvent) { +func (db *dbSharedWithChildren) SelectAllFromLookupHost() (out []*LookupHostEvent) { db.mu.Lock() out = append(out, db.lookupHostTable...) db.mu.Unlock() @@ -214,14 +238,14 @@ func (db *DB) SelectAllFromLookupHost() (out []*LookupHostEvent) { } // InsertIntoHTTPSSvc implements EventDB.InsertIntoHTTPSSvc -func (db *DB) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { +func (db *dbSharedWithChildren) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { db.mu.Lock() db.lookupHTTPSvcTable = append(db.lookupHTTPSvcTable, ev) db.mu.Unlock() } // SelectAllFromLookupHTTPSSvc returns all HTTPSSvc lookup events. -func (db *DB) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { +func (db *dbSharedWithChildren) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { db.mu.Lock() out = append(out, db.lookupHTTPSvcTable...) db.mu.Unlock() @@ -229,14 +253,14 @@ func (db *DB) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { } // InsertIntoDNSRoundTrip implements EventDB.InsertIntoDNSRoundTrip. -func (db *DB) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { +func (db *dbSharedWithChildren) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { db.mu.Lock() db.dnsRoundTripTable = append(db.dnsRoundTripTable, ev) db.mu.Unlock() } // SelectAllFromDNSRoundTrip returns all DNS round trip events. -func (db *DB) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { +func (db *dbSharedWithChildren) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { db.mu.Lock() out = append(out, db.dnsRoundTripTable...) db.mu.Unlock() @@ -244,14 +268,14 @@ func (db *DB) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { } // InsertIntoHTTPRoundTrip implements EventDB.InsertIntoHTTPRoundTrip. -func (db *DB) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { +func (db *dbSharedWithChildren) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { db.mu.Lock() db.httpRoundTripTable = append(db.httpRoundTripTable, ev) db.mu.Unlock() } // SelectAllFromHTTPRoundTrip returns all HTTP round trip events. -func (db *DB) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { +func (db *dbSharedWithChildren) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { db.mu.Lock() out = append(out, db.httpRoundTripTable...) db.mu.Unlock() @@ -259,14 +283,14 @@ func (db *DB) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { } // InsertIntoHTTPRedirect implements EventDB.InsertIntoHTTPRedirect. -func (db *DB) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { +func (db *dbSharedWithChildren) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { db.mu.Lock() db.httpRedirectTable = append(db.httpRedirectTable, ev) db.mu.Unlock() } // SelectAllFromHTTPRedirect returns all HTTP redirections. -func (db *DB) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { +func (db *dbSharedWithChildren) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { db.mu.Lock() out = append(out, db.httpRedirectTable...) db.mu.Unlock() @@ -274,14 +298,14 @@ func (db *DB) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { } // InsertIntoQUICHandshake implements EventDB.InsertIntoQUICHandshake. -func (db *DB) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { +func (db *dbSharedWithChildren) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { db.mu.Lock() db.quicHandshakeTable = append(db.quicHandshakeTable, ev) db.mu.Unlock() } // SelectAllFromQUICHandshake returns all QUIC handshake events. -func (db *DB) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { +func (db *dbSharedWithChildren) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { db.mu.Lock() out = append(out, db.quicHandshakeTable...) db.mu.Unlock() @@ -289,24 +313,35 @@ func (db *DB) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { } // NextConnID implements EventDB.NextConnID. -func (db *DB) NextConnID() (out int64) { - return db.internals.NextConnID() +func (db *dbSharedWithChildren) NextConnID() (out int64) { + db.mu.Lock() + db.connID++ // start from 1 + out = db.connID + db.mu.Unlock() + return } // MeasurementID implements EventDB.MeasurementID. func (db *DB) MeasurementID() (out int64) { - return db.internals.MeasurementID() + db.mu.Lock() + out = db.measurementID + db.mu.Unlock() + return } // NextMeasurement increments the internal MeasurementID and // returns it, so that later you can reference the current measurement. func (db *DB) NextMeasurement() (out int64) { - return db.internals.NextMeasurement() + db.mu.Lock() + db.measurementID++ // start from 1 + out = db.measurementID + db.mu.Unlock() + return } // SelectAllFromDialWithMeasurementID calls SelectAllFromConnect // and filters the result by MeasurementID. -func (db *DB) SelectAllFromDialWithMeasurementID(id int64) (out []*NetworkEvent) { +func (db *dbSharedWithChildren) SelectAllFromDialWithMeasurementID(id int64) (out []*NetworkEvent) { for _, ev := range db.SelectAllFromDial() { if id == ev.MeasurementID { out = append(out, ev) @@ -317,7 +352,7 @@ func (db *DB) SelectAllFromDialWithMeasurementID(id int64) (out []*NetworkEvent) // SelectAllFromReadWriteWithMeasurementID calls SelectAllFromReadWrite and // filters the result by MeasurementID. -func (db *DB) SelectAllFromReadWriteWithMeasurementID(id int64) (out []*NetworkEvent) { +func (db *dbSharedWithChildren) SelectAllFromReadWriteWithMeasurementID(id int64) (out []*NetworkEvent) { for _, ev := range db.SelectAllFromReadWrite() { if id == ev.MeasurementID { out = append(out, ev) @@ -328,7 +363,7 @@ func (db *DB) SelectAllFromReadWriteWithMeasurementID(id int64) (out []*NetworkE // SelectAllFromCloseWithMeasurementID calls SelectAllFromClose // and filters the result by MeasurementID. -func (db *DB) SelectAllFromCloseWithMeasurementID(id int64) (out []*NetworkEvent) { +func (db *dbSharedWithChildren) SelectAllFromCloseWithMeasurementID(id int64) (out []*NetworkEvent) { for _, ev := range db.SelectAllFromClose() { if id == ev.MeasurementID { out = append(out, ev) @@ -339,7 +374,7 @@ func (db *DB) SelectAllFromCloseWithMeasurementID(id int64) (out []*NetworkEvent // SelectAllFromTLSHandshakeWithMeasurementID calls SelectAllFromTLSHandshake // and filters the result by MeasurementID. -func (db *DB) SelectAllFromTLSHandshakeWithMeasurementID(id int64) (out []*TLSHandshakeEvent) { +func (db *dbSharedWithChildren) SelectAllFromTLSHandshakeWithMeasurementID(id int64) (out []*TLSHandshakeEvent) { for _, ev := range db.SelectAllFromTLSHandshake() { if id == ev.MeasurementID { out = append(out, ev) @@ -350,7 +385,7 @@ func (db *DB) SelectAllFromTLSHandshakeWithMeasurementID(id int64) (out []*TLSHa // SelectAllFromQUICHandshakeWithMeasurementID calls SelectAllFromQUICSHandshake // and filters the result by MeasurementID. -func (db *DB) SelectAllFromQUICHandshakeWithMeasurementID(id int64) (out []*QUICHandshakeEvent) { +func (db *dbSharedWithChildren) SelectAllFromQUICHandshakeWithMeasurementID(id int64) (out []*QUICHandshakeEvent) { for _, ev := range db.SelectAllFromQUICHandshake() { if id == ev.MeasurementID { out = append(out, ev) @@ -361,7 +396,7 @@ func (db *DB) SelectAllFromQUICHandshakeWithMeasurementID(id int64) (out []*QUIC // SelectAllFromLookupHostWithMeasurementID calls SelectAllFromLookupHost // and filters the result by MeasurementID. -func (db *DB) SelectAllFromLookupHostWithMeasurementID(id int64) (out []*LookupHostEvent) { +func (db *dbSharedWithChildren) SelectAllFromLookupHostWithMeasurementID(id int64) (out []*LookupHostEvent) { for _, ev := range db.SelectAllFromLookupHost() { if id == ev.MeasurementID { out = append(out, ev) @@ -372,7 +407,7 @@ func (db *DB) SelectAllFromLookupHostWithMeasurementID(id int64) (out []*LookupH // SelectAllFromLookupHTTPSSvcWithMeasurementID calls SelectAllFromHTTPSSvc // and filters the result by MeasurementID. -func (db *DB) SelectAllFromLookupHTTPSSvcWithMeasurementID(id int64) (out []*LookupHTTPSSvcEvent) { +func (db *dbSharedWithChildren) SelectAllFromLookupHTTPSSvcWithMeasurementID(id int64) (out []*LookupHTTPSSvcEvent) { for _, ev := range db.SelectAllFromLookupHTTPSSvc() { if id == ev.MeasurementID { out = append(out, ev) @@ -383,7 +418,7 @@ func (db *DB) SelectAllFromLookupHTTPSSvcWithMeasurementID(id int64) (out []*Loo // SelectAllFromDNSRoundTripWithMeasurementID calls SelectAllFromDNSRoundTrip // and filters the result by MeasurementID. -func (db *DB) SelectAllFromDNSRoundTripWithMeasurementID(id int64) (out []*DNSRoundTripEvent) { +func (db *dbSharedWithChildren) SelectAllFromDNSRoundTripWithMeasurementID(id int64) (out []*DNSRoundTripEvent) { for _, ev := range db.SelectAllFromDNSRoundTrip() { if id == ev.MeasurementID { out = append(out, ev) @@ -394,7 +429,7 @@ func (db *DB) SelectAllFromDNSRoundTripWithMeasurementID(id int64) (out []*DNSRo // SelectAllFromHTTPRoundTripWithMeasurementID calls SelectAllFromHTTPRoundTrip // and filters the result by MeasurementID. -func (db *DB) SelectAllFromHTTPRoundTripWithMeasurementID(id int64) (out []*HTTPRoundTripEvent) { +func (db *dbSharedWithChildren) SelectAllFromHTTPRoundTripWithMeasurementID(id int64) (out []*HTTPRoundTripEvent) { for _, ev := range db.SelectAllFromHTTPRoundTrip() { if id == ev.MeasurementID { out = append(out, ev) @@ -405,7 +440,7 @@ func (db *DB) SelectAllFromHTTPRoundTripWithMeasurementID(id int64) (out []*HTTP // SelectAllFromHTTPRedirectWithMeasurementID calls SelectAllFromHTTPRedirect // and filters the result by MeasurementID. -func (db *DB) SelectAllFromHTTPRedirectWithMeasurementID(id int64) (out []*HTTPRedirectEvent) { +func (db *dbSharedWithChildren) SelectAllFromHTTPRedirectWithMeasurementID(id int64) (out []*HTTPRedirectEvent) { for _, ev := range db.SelectAllFromHTTPRedirect() { if id == ev.MeasurementID { out = append(out, ev) @@ -447,14 +482,14 @@ func (e *Endpoint) String() string { // - domain is the domain we want to connect to; // // - port is the port for the endpoint. -func (db *DB) SelectAllEndpointsForDomain(domain, port string) (out []*Endpoint) { +func (db *dbSharedWithChildren) SelectAllEndpointsForDomain(domain, port string) (out []*Endpoint) { out = append(out, db.selectAllTCPEndpoints(domain, port)...) out = append(out, db.selectAllQUICEndpoints(domain, port)...) out = db.deduplicateEndpoints(out) return } -func (db *DB) selectAllTCPEndpoints(domain, port string) (out []*Endpoint) { +func (db *dbSharedWithChildren) selectAllTCPEndpoints(domain, port string) (out []*Endpoint) { for _, entry := range db.SelectAllFromLookupHost() { if domain != entry.Domain { continue @@ -469,7 +504,7 @@ func (db *DB) selectAllTCPEndpoints(domain, port string) (out []*Endpoint) { return } -func (db *DB) selectAllQUICEndpoints(domain, port string) (out []*Endpoint) { +func (db *dbSharedWithChildren) selectAllQUICEndpoints(domain, port string) (out []*Endpoint) { for _, entry := range db.SelectAllFromLookupHTTPSSvc() { if domain != entry.Domain { continue @@ -485,7 +520,7 @@ func (db *DB) selectAllQUICEndpoints(domain, port string) (out []*Endpoint) { return } -func (db *DB) deduplicateEndpoints(epnts []*Endpoint) (out []*Endpoint) { +func (db *dbSharedWithChildren) deduplicateEndpoints(epnts []*Endpoint) (out []*Endpoint) { duplicates := make(map[string]*Endpoint) for _, epnt := range epnts { duplicates[epnt.String()] = epnt @@ -496,11 +531,11 @@ func (db *DB) deduplicateEndpoints(epnts []*Endpoint) (out []*Endpoint) { return } -func (db *DB) newEndpoint(addr, port string, network EndpointNetwork) *Endpoint { +func (db *dbSharedWithChildren) newEndpoint(addr, port string, network EndpointNetwork) *Endpoint { return &Endpoint{Network: network, Address: net.JoinHostPort(addr, port)} } -func (db *DB) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { +func (db *dbSharedWithChildren) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { for _, alpn := range entry.ALPN { switch alpn { case "h3": @@ -547,7 +582,7 @@ func (e *HTTPEndpoint) String() string { // - URL is the URL for which we want endpoints; // // Returns a list of endpoints or an error. -func (db *DB) SelectAllHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, error) { +func (db *dbSharedWithChildren) SelectAllHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, error) { domain := URL.Hostname() port, err := PortFromURL(URL) if err != nil { @@ -590,7 +625,7 @@ func PortFromURL(URL *url.URL) (string, error) { } } -func (db *DB) alpnForHTTPEndpoint(network EndpointNetwork) []string { +func (db *dbSharedWithChildren) alpnForHTTPEndpoint(network EndpointNetwork) []string { switch network { case NetworkQUIC: return []string{"h3"} diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index bcaaa58725..148a0bf567 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -36,7 +36,7 @@ type Measurer struct { // NewMeasurerWithDefaultSettings creates a new Measurer // instance using the most default settings. func NewMeasurerWithDefaultSettings() *Measurer { - db := NewSaver(time.Now()) + db := NewDB(time.Now()) return &Measurer{ DB: db, HTTPClient: &http.Client{}, @@ -506,7 +506,11 @@ func (mx *Measurer) HTTPEndpointGetParallel(ctx context.Context, const parallelism = 3 for i := 0; i < parallelism; i++ { go func() { - child := mx.clone(mx.DB.clone()) + // Important: we need a children DB because we need a + // separate MeasurementID namespace. The whole package + // does not keep constant MeasurementID if you don't + // use this factory for creating a new child. + child := mx.clone(mx.DB.NewChildDB()) for epnt := range input { output <- child.HTTPEndpointGet(ctx, epnt, jar) } From 1272aa273d43d714fe876528e083a6609e26c941 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 13:59:25 +0200 Subject: [PATCH 12/53] allow for parallel dns resolutions --- internal/measurex/db.go | 44 ++++++++++++++++ internal/measurex/measurer.go | 95 +++++++++++++++++++++++++++++++++-- 2 files changed, 135 insertions(+), 4 deletions(-) diff --git a/internal/measurex/db.go b/internal/measurex/db.go index 87692c7e07..4b2d872c63 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -93,6 +93,7 @@ type dbSharedWithChildren struct { httpRoundTripTable []*HTTPRoundTripEvent httpRedirectTable []*HTTPRedirectEvent quicHandshakeTable []*QUICHandshakeEvent + resolversTable []*ResolverInfo // mu protects all the above tables and ConnID mu sync.Mutex @@ -312,6 +313,49 @@ func (db *dbSharedWithChildren) SelectAllFromQUICHandshake() (out []*QUICHandsha return } +// ResolverInfo contains info about a DNS resolver. +type ResolverInfo struct { + // Network is the resolver's network (e.g., "doh", "udp") + Network string + + // Address is the address (e.g., "1.1.1.1:53", "https://1.1.1.1/dns-query") + Address string +} + +// string returns a string representation of the resolver. +func (ri *ResolverInfo) string() string { + return fmt.Sprintf("%s@%s", ri.Network, ri.Address) +} + +// InsertIntoResolvers inserts a given resolver into the resolver's table. +func (db *dbSharedWithChildren) InsertIntoResolvers(network, address string) { + db.mu.Lock() + db.resolversTable = append(db.resolversTable, &ResolverInfo{ + Network: network, + Address: address, + }) + db.mu.Unlock() +} + +// SelectAllFromResolvers returns all the configured resolvers. This function +// ensures that the system resolver is in the list and also ensures that we +// return in output a list only containing unique resolvers. +func (db *dbSharedWithChildren) SelectAllFromResolvers() (out []*ResolverInfo) { + all := append([]*ResolverInfo{}, &ResolverInfo{Network: "system"}) + db.mu.Lock() + all = append(all, db.resolversTable...) + db.mu.Unlock() + unique := make(map[string]bool) + for _, reso := range all { + if _, found := unique[reso.string()]; found { + continue + } + unique[reso.string()] = true + out = append(out, reso) + } + return +} + // NextConnID implements EventDB.NextConnID. func (db *dbSharedWithChildren) NextConnID() (out int64) { db.mu.Lock() diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 148a0bf567..dae1069f75 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -4,6 +4,7 @@ import ( "context" "crypto/tls" "errors" + "net" "net/http" "net/url" "time" @@ -47,8 +48,9 @@ func NewMeasurerWithDefaultSettings() *Measurer { } } -// clone returns a clone of the current measurer with a new DB. -func (mx *Measurer) clone(db *DB) *Measurer { +// Clone returns a clone of the current Measurer with the given +// DB instead of the DB used by the original Measurer. +func (mx *Measurer) Clone(db *DB) *Measurer { return &Measurer{ DB: db, HTTPClient: mx.HTTPClient, @@ -488,7 +490,7 @@ func (mx *Measurer) Infof(format string, v ...interface{}) { // HTTPEndpointGetParallel performs an HTTPEndpointGet for each // input endpoint using a pool of background goroutines. // -// This function returns to the caller a channel where to run +// This function returns to the caller a channel where to read // measurements from. The channel is closed when done. func (mx *Measurer) HTTPEndpointGetParallel(ctx context.Context, jar http.CookieJar, epnts ...*HTTPEndpoint) <-chan *Measurement { @@ -510,7 +512,7 @@ func (mx *Measurer) HTTPEndpointGetParallel(ctx context.Context, // separate MeasurementID namespace. The whole package // does not keep constant MeasurementID if you don't // use this factory for creating a new child. - child := mx.clone(mx.DB.NewChildDB()) + child := mx.Clone(mx.DB.NewChildDB()) for epnt := range input { output <- child.HTTPEndpointGet(ctx, epnt, jar) } @@ -525,3 +527,88 @@ func (mx *Measurer) HTTPEndpointGetParallel(ctx context.Context, }() return output } + +// RegisterUDPResolvers registers UDP resolvers into the DB. +func (mx *Measurer) RegisterUDPResolvers(resolvers ...string) { + for _, resolver := range resolvers { + mx.DB.InsertIntoResolvers("udp", resolver) + } +} + +// LookupURLHostParallel performs an LookupHost-like operation for each +// DNS resolver registered into the database using a pool of background +// goroutines. +// +// This function returns to the caller a channel where to read +// measurements from. The channel is closed when done. +func (mx *Measurer) LookupURLHostParallel( + ctx context.Context, URL *url.URL) <-chan *Measurement { + var ( + done = make(chan interface{}) + resolvers = make(chan *ResolverInfo) + output = make(chan *Measurement) + ) + go func() { + defer close(resolvers) + for _, reso := range mx.DB.SelectAllFromResolvers() { + resolvers <- reso + } + }() + const parallelism = 3 + for i := 0; i < parallelism; i++ { + go func() { + // Important: we need a children DB because we need a + // separate MeasurementID namespace. The whole package + // does not keep constant MeasurementID if you don't + // use this factory for creating a new child. + child := mx.Clone(mx.DB.NewChildDB()) + for reso := range resolvers { + child.lookupHostWithResolverInfo(ctx, reso, URL, output) + } + done <- true + }() + } + go func() { + for i := 0; i < parallelism; i++ { + <-done + } + close(output) + }() + return output +} + +// lookupHostWithResolverInfo performs a LookupHost-like +// operation using the given ResolverInfo. +func (mx *Measurer) lookupHostWithResolverInfo( + ctx context.Context, reso *ResolverInfo, URL *url.URL, + output chan<- *Measurement) { + switch reso.Network { + case "system": + output <- mx.LookupHostSystem(ctx, URL.Hostname()) + case "udp": + output <- mx.LookupHostUDP(ctx, URL.Hostname(), reso.Address) + default: + return + } + switch URL.Scheme { + case "https": + default: + return + } + switch reso.Network { + case "udp": + output <- mx.LookupHTTPSSvcUDP(ctx, URL.Hostname(), reso.Address) + } +} + +// LookupostParallel is like LookupURLHostParallel but we only +// have in input an hostname rather than a URL. As such, we cannot +// determine whether to perform HTTPSSvc lookups and so we aren't +// going to perform this kind of lookups in this case. +func (mx *Measurer) LookupHostParallel( + ctx context.Context, hostname, port string) <-chan *Measurement { + return mx.LookupURLHostParallel(ctx, &url.URL{ + Scheme: "", // so we don't see https and we don't try HTTPSSvc + Host: net.JoinHostPort(hostname, port), + }) +} From df68e4655a603a1cfa05660df3ab17bf8858f858 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 14:26:33 +0200 Subject: [PATCH 13/53] allow running automatic "parallel" TH queries --- internal/measurex/db.go | 43 +++++++++++++++++++++ internal/measurex/measurer.go | 72 +++++++++++++++++++++++++++++++---- 2 files changed, 107 insertions(+), 8 deletions(-) diff --git a/internal/measurex/db.go b/internal/measurex/db.go index 4b2d872c63..9e7a4ab724 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -94,6 +94,7 @@ type dbSharedWithChildren struct { httpRedirectTable []*HTTPRedirectEvent quicHandshakeTable []*QUICHandshakeEvent resolversTable []*ResolverInfo + testHelpersTable []*TestHelperInfo // mu protects all the above tables and ConnID mu sync.Mutex @@ -356,6 +357,48 @@ func (db *dbSharedWithChildren) SelectAllFromResolvers() (out []*ResolverInfo) { return } +// TestHelperInfo contains info about a test helper. +type TestHelperInfo struct { + // Protocol is the test helpers's protocol (e.g., "wcth") + Protocol string + + // URL is the URL (e.g., "https://wcth.ooni.io/") + URL string +} + +// string returns a string representation of the resolver. +func (ti *TestHelperInfo) string() string { + return fmt.Sprintf("%s@%s", ti.Protocol, ti.URL) +} + +// InsertIntoTestHelpers inserts a given TH into the test helpers's table. +func (db *dbSharedWithChildren) InsertIntoTestHelpers(proto, URL string) { + db.mu.Lock() + db.testHelpersTable = append(db.testHelpersTable, &TestHelperInfo{ + Protocol: proto, + URL: URL, + }) + db.mu.Unlock() +} + +// SelectAllFromTestHelperss returns all the configured THs. This function +// ensures that we return in output a list only containing unique THs. +func (db *dbSharedWithChildren) SelectAllFromTestHelpers() (out []*TestHelperInfo) { + var all []*TestHelperInfo + db.mu.Lock() + all = append(all, db.testHelpersTable...) + db.mu.Unlock() + unique := make(map[string]bool) + for _, th := range all { + if _, found := unique[th.string()]; found { + continue + } + unique[th.string()] = true + out = append(out, th) + } + return +} + // NextConnID implements EventDB.NextConnID. func (db *dbSharedWithChildren) NextConnID() (out int64) { db.mu.Lock() diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index dae1069f75..2cd94b62e5 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -29,9 +29,6 @@ type Measurer struct { // TLSHandshaker is the MANDATORY TLS handshaker. TLSHandshaker TLSHandshaker - - // WCTHURL is the MANDATORY URL of the WCTH. - WCTHURL string } // NewMeasurerWithDefaultSettings creates a new Measurer @@ -44,7 +41,6 @@ func NewMeasurerWithDefaultSettings() *Measurer { Logger: log.Log, Origin: OriginProbe, TLSHandshaker: NewTLSHandshakerStdlib(OriginProbe, db, log.Log), - WCTHURL: "https://wcth.ooni.io/", } } @@ -57,7 +53,6 @@ func (mx *Measurer) Clone(db *DB) *Measurer { Logger: mx.Logger, Origin: mx.Origin, TLSHandshaker: mx.TLSHandshaker, - WCTHURL: mx.WCTHURL, } } @@ -458,13 +453,13 @@ func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, // // Returns a measurement. func (mx *Measurer) LookupWCTH(ctx context.Context, URL *url.URL, - endpoints []*Endpoint, port string) *Measurement { + endpoints []*Endpoint, port string, WCTHURL string) *Measurement { const timeout = 30 * time.Second mx.Infof("lookupWCTH backend=%s url=%s endpoints=%+v port=%s timeout=%s...", - mx.WCTHURL, URL.String(), endpoints, port, timeout) + WCTHURL, URL.String(), endpoints, port, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - w := NewWCTHWorker(mx.Logger, mx.DB, mx.HTTPClient, mx.WCTHURL) + w := NewWCTHWorker(mx.Logger, mx.DB, mx.HTTPClient, WCTHURL) id := mx.nextMeasurement() _, _ = w.Run(ctx, URL, mx.onlyTCPEndpoints(endpoints)) return NewMeasurement(mx.DB, id) @@ -612,3 +607,64 @@ func (mx *Measurer) LookupHostParallel( Host: net.JoinHostPort(hostname, port), }) } + +// RegisterWCTH registers URLs for the WCTH. +func (mx *Measurer) RegisterWCTH(URLs ...string) { + for _, URL := range URLs { + mx.DB.InsertIntoTestHelpers("wcth", URL) + } +} + +// QueryTestHelperParallel performs a parallel query for the +// given URL to all known test helpers. +func (mx *Measurer) QueryTestHelperParallel( + ctx context.Context, URL *url.URL) <-chan *Measurement { + var ( + done = make(chan interface{}) + ths = make(chan *TestHelperInfo) + output = make(chan *Measurement) + ) + go func() { + defer close(ths) + for _, th := range mx.DB.SelectAllFromTestHelpers() { + ths <- th + } + }() + const parallelism = 1 // maybe raise in the future? + for i := 0; i < parallelism; i++ { + go func() { + // Important: we need a children DB because we need a + // separate MeasurementID namespace. The whole package + // does not keep constant MeasurementID if you don't + // use this factory for creating a new child. + child := mx.Clone(mx.DB.NewChildDB()) + for th := range ths { + child.asyncTestHelperQuery(ctx, th, URL, output) + } + done <- true + }() + } + go func() { + for i := 0; i < parallelism; i++ { + <-done + } + close(output) + }() + return output +} + +func (mx *Measurer) asyncTestHelperQuery( + ctx context.Context, th *TestHelperInfo, URL *url.URL, + output chan<- *Measurement) { + switch th.Protocol { + case "wcth": + port, err := PortFromURL(URL) + if err != nil { + return // TODO(bassosimone): what to do about this error? + } + endpoints := mx.DB.SelectAllEndpointsForDomain(URL.Hostname(), port) + output <- mx.LookupWCTH(ctx, URL, endpoints, port, th.URL) + default: + // don't know what to do + } +} From 9a25631492cbd63516696258f43a855345e48257 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 15:01:34 +0200 Subject: [PATCH 14/53] at last add code for parsing a URL --- internal/measurex/measurer.go | 74 +++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 2cd94b62e5..70140dae28 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -668,3 +668,77 @@ func (mx *Measurer) asyncTestHelperQuery( // don't know what to do } } + +// URLMeasurement is the measurement of a whole URL. It contains +// a bunch of measurements detailing each measurement step. +type URLMeasurement struct { + // URL is the URL we're measuring. + URL string + + // CannotParseURL is true if the input URL could not be parsed. + CannotParseURL bool + + // DNS contains all the DNS related measurements. + DNS []*Measurement + + // TH contains all the measurements from the test helpers. + TH []*Measurement + + // CannotGenerateEndpoints for URL is true if the code tasked of + // generating a list of endpoints for the URL fails. + CannotGenerateEndpoints bool + + // Endpoints contains a measurement for each endpoint + // that we discovered via DNS or TH. + Endpoints []*Measurement +} + +// MeasureHTTPURL measures an HTTP or HTTPS URL. The DNS resolvers +// and the Test Helpers we use in this measurement are the ones +// configured into the database. The default is to use the system +// resolver and to use not Test Helper. Use RegisterWCTH and +// RegisterUDPResolvers (and other similar functions that have +// not been written at the moment of writing this note) to +// augment the set of resolvers and Test Helpers we use here. +// +// Arguments: +// +// - ctx is the context for timeout/cancellation +// +// - URL is the URL to measure +// +// - cookies contains the cookies we should use for measuring +// this URL and possibly future redirections. +// +// To create an empty set of cookies, use NewCookieJar. It's +// normal to have empty cookies at the beginning. If we follow +// extra redirections after this run then the cookie jar will +// contain the cookies for following the next redirection. +// +// We need cookies because a small amount of URLs does not +// redirect properly without cookies. This has been +// documented at https://github.com/ooni/probe/issues/1727. +func (mx *Measurer) MeasureHTTPURL( + ctx context.Context, URL string, cookies http.CookieJar) *URLMeasurement { + m := &URLMeasurement{URL: URL} + parsed, err := url.Parse(URL) + if err != nil { + m.CannotParseURL = true + return m + } + for dns := range mx.LookupURLHostParallel(ctx, parsed) { + m.DNS = append(m.DNS, dns) + } + for th := range mx.QueryTestHelperParallel(ctx, parsed) { + m.TH = append(m.TH, th) + } + epnts, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + if err != nil { + m.CannotGenerateEndpoints = true + return m + } + for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, epnts...) { + m.Endpoints = append(m.Endpoints, epnt) + } + return m +} From e77020f9fe47cf07e5cc3d5efcfcf17802f0b973 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 15:08:14 +0200 Subject: [PATCH 15/53] compute all the redirections --- internal/measurex/measurer.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 70140dae28..60953d2e26 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -691,6 +691,10 @@ type URLMeasurement struct { // Endpoints contains a measurement for each endpoint // that we discovered via DNS or TH. Endpoints []*Measurement + + // RedirectURLs contain the URLs to which we should fetch + // if we choose to follow redirections. + RedirectURLs []string } // MeasureHTTPURL measures an HTTP or HTTPS URL. The DNS resolvers @@ -740,5 +744,16 @@ func (mx *Measurer) MeasureHTTPURL( for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, epnts...) { m.Endpoints = append(m.Endpoints, epnt) } + dups := make(map[string]bool) + for _, epnt := range m.Endpoints { + for _, redir := range epnt.HTTPRedirect { + loc := redir.Location.String() + if _, found := dups[loc]; found { + continue + } + dups[loc] = true + m.RedirectURLs = append(m.RedirectURLs, loc) + } + } return m } From f922ec5d9006a09adfebc83eea86eb4275357f09 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 15:29:50 +0200 Subject: [PATCH 16/53] implement following redirections --- internal/measurex/measurer.go | 80 +++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 13 deletions(-) diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 60953d2e26..335c691c03 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -63,7 +63,7 @@ func (mx *Measurer) nextMeasurement() int64 { // LookupHostSystem performs a LookupHost using the system resolver. func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *Measurement { const timeout = 4 * time.Second - mx.Infof("LookupHostSystem domain=%s timeout=%s...", domain, timeout) + mx.Logf("LookupHostSystem domain=%s timeout=%s...", domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() r := NewResolverSystem(mx.Origin, mx.DB, mx.Logger) @@ -87,7 +87,7 @@ func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *Measur func (mx *Measurer) LookupHostUDP( ctx context.Context, domain, address string) *Measurement { const timeout = 4 * time.Second - mx.Infof("LookupHostUDP serverEndpoint=%s/udp domain=%s timeout=%s...", + mx.Logf("LookupHostUDP serverEndpoint=%s/udp domain=%s timeout=%s...", address, domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -112,7 +112,7 @@ func (mx *Measurer) LookupHostUDP( func (mx *Measurer) LookupHTTPSSvcUDP( ctx context.Context, domain, address string) *Measurement { const timeout = 4 * time.Second - mx.Infof("LookupHTTPSSvcUDP engine=udp://%s domain=%s timeout=%s...", + mx.Logf("LookupHTTPSSvcUDP engine=udp://%s domain=%s timeout=%s...", address, domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -145,7 +145,7 @@ func (mx *Measurer) TCPConnect(ctx context.Context, address string) *Measurement // tcpConnect is like TCPConnect but does not create a new measurement. func (mx *Measurer) tcpConnect(ctx context.Context, address string) (Conn, error) { const timeout = 10 * time.Second - mx.Infof("TCPConnect endpoint=%s timeout=%s...", address, timeout) + mx.Logf("TCPConnect endpoint=%s timeout=%s...", address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() d := NewDialerWithoutResolver(mx.Origin, mx.DB, mx.Logger) @@ -203,7 +203,7 @@ func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, return nil, err } const timeout = 10 * time.Second - mx.Infof("TLSHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", + mx.Logf("TLSHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", config.ServerName, config.NextProtos, address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -247,7 +247,7 @@ func (mx *Measurer) QUICHandshake(ctx context.Context, address string, func (mx *Measurer) quicHandshake(ctx context.Context, address string, config *tls.Config) (QUICEarlySession, error) { const timeout = 10 * time.Second - mx.Infof("QUICHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", + mx.Logf("QUICHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", config.ServerName, config.NextProtos, address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -427,7 +427,7 @@ func (mx *Measurer) httpEndpointGetQUIC( func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, epnt *HTTPEndpoint, req *http.Request) (*http.Response, error) { const timeout = 15 * time.Second - mx.Infof("httpClientDo endpoint=%s method=%s url=%s headers=%+v timeout=%s...", + mx.Logf("httpClientDo endpoint=%s method=%s url=%s headers=%+v timeout=%s...", epnt.String(), req.Method, req.URL.String(), req.Header, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -455,7 +455,7 @@ func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, func (mx *Measurer) LookupWCTH(ctx context.Context, URL *url.URL, endpoints []*Endpoint, port string, WCTHURL string) *Measurement { const timeout = 30 * time.Second - mx.Infof("lookupWCTH backend=%s url=%s endpoints=%+v port=%s timeout=%s...", + mx.Logf("lookupWCTH backend=%s url=%s endpoints=%+v port=%s timeout=%s...", WCTHURL, URL.String(), endpoints, port, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -477,8 +477,9 @@ func (mx *Measurer) onlyTCPEndpoints(endpoints []*Endpoint) (out []string) { return } -// Infof formats and logs an informational message using mx.Logger. -func (mx *Measurer) Infof(format string, v ...interface{}) { +// Logf formats and logs a message using mx.Logger. All messages +// logged by Measurer should use this function to emit logs. +func (mx *Measurer) Logf(format string, v ...interface{}) { mx.Logger.Infof(format, v...) } @@ -697,7 +698,7 @@ type URLMeasurement struct { RedirectURLs []string } -// MeasureHTTPURL measures an HTTP or HTTPS URL. The DNS resolvers +// MeasureURL measures an HTTP or HTTPS URL. The DNS resolvers // and the Test Helpers we use in this measurement are the ones // configured into the database. The default is to use the system // resolver and to use not Test Helper. Use RegisterWCTH and @@ -722,8 +723,9 @@ type URLMeasurement struct { // We need cookies because a small amount of URLs does not // redirect properly without cookies. This has been // documented at https://github.com/ooni/probe/issues/1727. -func (mx *Measurer) MeasureHTTPURL( +func (mx *Measurer) MeasureURL( ctx context.Context, URL string, cookies http.CookieJar) *URLMeasurement { + mx.Logf("MeasureURL url=%s", URL) m := &URLMeasurement{URL: URL} parsed, err := url.Parse(URL) if err != nil { @@ -744,6 +746,13 @@ func (mx *Measurer) MeasureHTTPURL( for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, epnts...) { m.Endpoints = append(m.Endpoints, epnt) } + m.fillRedirects() + return m +} + +// fillRedirects takes in input a complete URLMeasurement and fills +// the field named Redirects with all redirections. +func (m *URLMeasurement) fillRedirects() { dups := make(map[string]bool) for _, epnt := range m.Endpoints { for _, redir := range epnt.HTTPRedirect { @@ -755,5 +764,50 @@ func (mx *Measurer) MeasureHTTPURL( m.RedirectURLs = append(m.RedirectURLs, loc) } } - return m +} + +// redirectionQueue is the type we use to manage the redirection +// queue and to follow a reasonable number of redirects. +type redirectionQueue struct { + q []string + cnt int +} + +func (r *redirectionQueue) append(URL ...string) { + r.q = append(r.q, URL...) +} + +func (r *redirectionQueue) popleft() (URL string) { + URL = r.q[0] + r.q = r.q[1:] + return +} + +func (r *redirectionQueue) empty() bool { + return len(r.q) <= 0 +} + +func (r *redirectionQueue) redirectionsCount() int { + return r.cnt +} + +// MeasureURLAndFollowRedirections is like MeasureURL except +// that it _also_ follows all the HTTP redirections. +func (mx *Measurer) MeasureHTTPURLAndFollowRedirections(ctx context.Context, + URL string, cookies http.CookieJar) <-chan *URLMeasurement { + out := make(chan *URLMeasurement) + go func() { + defer close(out) + m := mx.MeasureURL(ctx, URL, cookies) + out <- m + rq := &redirectionQueue{q: m.RedirectURLs} + const maxRedirects = 7 + for !rq.empty() && rq.redirectionsCount() < maxRedirects { + URL = rq.popleft() + m = mx.MeasureURL(ctx, URL, cookies) + out <- m + rq.append(m.RedirectURLs...) + } + }() + return out } From 97519ae9f59d16454777998d91172a8e93cb6a22 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 18:07:14 +0200 Subject: [PATCH 17/53] fix: pass around the measurement ID Allows to cut the most significant bit of complexity which also happened to be buggy :^) --- internal/measurex/db.go | 175 ++++++++++--------------------- internal/measurex/dialer.go | 36 ++++--- internal/measurex/dnsx.go | 13 ++- internal/measurex/http.go | 45 +++++--- internal/measurex/measurement.go | 6 -- internal/measurex/measurer.go | 154 ++++++++++++--------------- internal/measurex/quic.go | 27 +++-- internal/measurex/resolver.go | 28 +++-- internal/measurex/tls.go | 19 +++- internal/measurex/wcth.go | 19 +++- 10 files changed, 245 insertions(+), 277 deletions(-) diff --git a/internal/measurex/db.go b/internal/measurex/db.go index 9e7a4ab724..e5e3020e63 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -49,39 +49,11 @@ type EventDB interface { // NextConnID increments and returns the connection ID. NextConnID() int64 - - // MeasurementID returns the current measurement ID. - MeasurementID() int64 } // DB is an EventDB that saves events and also allows to // ask questions regarding the saved events. -// -// Caveats: the way in which we assign an ID to a measurement -// (i.e., a related set of operations) assumes that we don't -// run paralell measurements with the same DB. To run parallel -// measurements, you need to create a children DB using the -// NewChildrenDB factory function. type DB struct { - *dbSharedWithChildren - - // the measurementID must not be shared with children - // running in parallel because it must stay const during - // the whole measurements or we cannot measure. - // - // Use NewChildrenDB to create one or more children - // that can run in parallel with each other and their parent. - measurementID int64 - mu sync.Mutex -} - -// dbSharedWithChildren is the part of the database shared -// with children databases. These databases are created during -// parallel measurements but need to insert into the same -// tables of the parent database. The ConnID can also be part -// of this structure. The only element that needs to be constant -// during a measurement is the MeasurementID. -type dbSharedWithChildren struct { // database tables dialTable []*NetworkEvent readWriteTable []*NetworkEvent @@ -96,60 +68,29 @@ type dbSharedWithChildren struct { resolversTable []*ResolverInfo testHelpersTable []*TestHelperInfo - // mu protects all the above tables and ConnID + // mu protects all the fields mu sync.Mutex - // other pieces of data we can share with children - // because they have more relaxed constaints than the - // measurement ID: begin is read-only and ConnID is - // read once and then propagated. - begin time.Time - connID int64 + // non-table database fields + begin time.Time + connID int64 + measurementID int64 } var _ EventDB = &DB{} -var ( - baseMeasurementID int32 - measurementIDmu sync.Mutex -) - -func nextBaseMeasurementID() (out int64) { - measurementIDmu.Lock() - baseMeasurementID++ - out = (int64(baseMeasurementID) << 32) - measurementIDmu.Unlock() - return -} - -// NewDB creates a new instance of DB. This instance will have the base -// time configured to be begin. Also, its base measurement ID will depend -// on how many databases we have created so far. Each database gets its -// own 31 bit namespace for measurements. +// NewDB creates a new instance of DB. func NewDB(begin time.Time) *DB { - return &DB{ - dbSharedWithChildren: &dbSharedWithChildren{begin: begin}, - measurementID: nextBaseMeasurementID(), - } -} - -// NewChildDB returns a DB that has the same base measurement and -// tables of the original DB but gets a new measurement namespace. You -// should use this factory every time you run parallel measurements. -func (db *DB) NewChildDB() *DB { - return &DB{ - dbSharedWithChildren: db.dbSharedWithChildren, - measurementID: nextBaseMeasurementID(), - } + return &DB{begin: begin} } // ElapsedTime implements EventDB.ElapsedTime. -func (db *dbSharedWithChildren) ElapsedTime() time.Duration { +func (db *DB) ElapsedTime() time.Duration { return time.Since(db.begin) } // DeleteAll deletes all the saved data. -func (db *dbSharedWithChildren) DeleteAll() { +func (db *DB) DeleteAll() { db.mu.Lock() db.dialTable = nil db.readWriteTable = nil @@ -165,14 +106,14 @@ func (db *dbSharedWithChildren) DeleteAll() { } // InsertIntoDial implements EventDB.InsertIntoDial. -func (db *dbSharedWithChildren) InsertIntoDial(ev *NetworkEvent) { +func (db *DB) InsertIntoDial(ev *NetworkEvent) { db.mu.Lock() db.dialTable = append(db.dialTable, ev) db.mu.Unlock() } // SelectAllFromDial returns all dial events. -func (db *dbSharedWithChildren) SelectAllFromDial() (out []*NetworkEvent) { +func (db *DB) SelectAllFromDial() (out []*NetworkEvent) { db.mu.Lock() out = append(out, db.dialTable...) db.mu.Unlock() @@ -180,14 +121,14 @@ func (db *dbSharedWithChildren) SelectAllFromDial() (out []*NetworkEvent) { } // InsertIntoReadWrite implements EventDB.InsertIntoReadWrite. -func (db *dbSharedWithChildren) InsertIntoReadWrite(ev *NetworkEvent) { +func (db *DB) InsertIntoReadWrite(ev *NetworkEvent) { db.mu.Lock() db.readWriteTable = append(db.readWriteTable, ev) db.mu.Unlock() } // SelectAllFromReadWrite returns all I/O events. -func (db *dbSharedWithChildren) SelectAllFromReadWrite() (out []*NetworkEvent) { +func (db *DB) SelectAllFromReadWrite() (out []*NetworkEvent) { db.mu.Lock() out = append(out, db.readWriteTable...) db.mu.Unlock() @@ -195,14 +136,14 @@ func (db *dbSharedWithChildren) SelectAllFromReadWrite() (out []*NetworkEvent) { } // InsertIntoClose implements EventDB.InsertIntoClose. -func (db *dbSharedWithChildren) InsertIntoClose(ev *NetworkEvent) { +func (db *DB) InsertIntoClose(ev *NetworkEvent) { db.mu.Lock() db.closeTable = append(db.closeTable, ev) db.mu.Unlock() } // SelectAllFromClose returns all close events. -func (db *dbSharedWithChildren) SelectAllFromClose() (out []*NetworkEvent) { +func (db *DB) SelectAllFromClose() (out []*NetworkEvent) { db.mu.Lock() out = append(out, db.closeTable...) db.mu.Unlock() @@ -210,14 +151,14 @@ func (db *dbSharedWithChildren) SelectAllFromClose() (out []*NetworkEvent) { } // InsertIntoTLSHandshake implements EventDB.InsertIntoTLSHandshake. -func (db *dbSharedWithChildren) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { +func (db *DB) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { db.mu.Lock() db.tlsHandshakeTable = append(db.tlsHandshakeTable, ev) db.mu.Unlock() } // SelectAllFromTLSHandshake returns all TLS handshake events. -func (db *dbSharedWithChildren) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { +func (db *DB) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { db.mu.Lock() out = append(out, db.tlsHandshakeTable...) db.mu.Unlock() @@ -225,14 +166,14 @@ func (db *dbSharedWithChildren) SelectAllFromTLSHandshake() (out []*TLSHandshake } // InsertIntoLookupHost implements EventDB.InsertIntoLookupHost. -func (db *dbSharedWithChildren) InsertIntoLookupHost(ev *LookupHostEvent) { +func (db *DB) InsertIntoLookupHost(ev *LookupHostEvent) { db.mu.Lock() db.lookupHostTable = append(db.lookupHostTable, ev) db.mu.Unlock() } // SelectAllFromLookupHost returns all the lookup host events. -func (db *dbSharedWithChildren) SelectAllFromLookupHost() (out []*LookupHostEvent) { +func (db *DB) SelectAllFromLookupHost() (out []*LookupHostEvent) { db.mu.Lock() out = append(out, db.lookupHostTable...) db.mu.Unlock() @@ -240,14 +181,14 @@ func (db *dbSharedWithChildren) SelectAllFromLookupHost() (out []*LookupHostEven } // InsertIntoHTTPSSvc implements EventDB.InsertIntoHTTPSSvc -func (db *dbSharedWithChildren) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { +func (db *DB) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { db.mu.Lock() db.lookupHTTPSvcTable = append(db.lookupHTTPSvcTable, ev) db.mu.Unlock() } // SelectAllFromLookupHTTPSSvc returns all HTTPSSvc lookup events. -func (db *dbSharedWithChildren) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { +func (db *DB) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { db.mu.Lock() out = append(out, db.lookupHTTPSvcTable...) db.mu.Unlock() @@ -255,14 +196,14 @@ func (db *dbSharedWithChildren) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTP } // InsertIntoDNSRoundTrip implements EventDB.InsertIntoDNSRoundTrip. -func (db *dbSharedWithChildren) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { +func (db *DB) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { db.mu.Lock() db.dnsRoundTripTable = append(db.dnsRoundTripTable, ev) db.mu.Unlock() } // SelectAllFromDNSRoundTrip returns all DNS round trip events. -func (db *dbSharedWithChildren) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { +func (db *DB) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { db.mu.Lock() out = append(out, db.dnsRoundTripTable...) db.mu.Unlock() @@ -270,14 +211,14 @@ func (db *dbSharedWithChildren) SelectAllFromDNSRoundTrip() (out []*DNSRoundTrip } // InsertIntoHTTPRoundTrip implements EventDB.InsertIntoHTTPRoundTrip. -func (db *dbSharedWithChildren) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { +func (db *DB) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { db.mu.Lock() db.httpRoundTripTable = append(db.httpRoundTripTable, ev) db.mu.Unlock() } // SelectAllFromHTTPRoundTrip returns all HTTP round trip events. -func (db *dbSharedWithChildren) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { +func (db *DB) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { db.mu.Lock() out = append(out, db.httpRoundTripTable...) db.mu.Unlock() @@ -285,14 +226,14 @@ func (db *dbSharedWithChildren) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTr } // InsertIntoHTTPRedirect implements EventDB.InsertIntoHTTPRedirect. -func (db *dbSharedWithChildren) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { +func (db *DB) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { db.mu.Lock() db.httpRedirectTable = append(db.httpRedirectTable, ev) db.mu.Unlock() } // SelectAllFromHTTPRedirect returns all HTTP redirections. -func (db *dbSharedWithChildren) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { +func (db *DB) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { db.mu.Lock() out = append(out, db.httpRedirectTable...) db.mu.Unlock() @@ -300,14 +241,14 @@ func (db *dbSharedWithChildren) SelectAllFromHTTPRedirect() (out []*HTTPRedirect } // InsertIntoQUICHandshake implements EventDB.InsertIntoQUICHandshake. -func (db *dbSharedWithChildren) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { +func (db *DB) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { db.mu.Lock() db.quicHandshakeTable = append(db.quicHandshakeTable, ev) db.mu.Unlock() } // SelectAllFromQUICHandshake returns all QUIC handshake events. -func (db *dbSharedWithChildren) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { +func (db *DB) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { db.mu.Lock() out = append(out, db.quicHandshakeTable...) db.mu.Unlock() @@ -329,7 +270,7 @@ func (ri *ResolverInfo) string() string { } // InsertIntoResolvers inserts a given resolver into the resolver's table. -func (db *dbSharedWithChildren) InsertIntoResolvers(network, address string) { +func (db *DB) InsertIntoResolvers(network, address string) { db.mu.Lock() db.resolversTable = append(db.resolversTable, &ResolverInfo{ Network: network, @@ -341,7 +282,7 @@ func (db *dbSharedWithChildren) InsertIntoResolvers(network, address string) { // SelectAllFromResolvers returns all the configured resolvers. This function // ensures that the system resolver is in the list and also ensures that we // return in output a list only containing unique resolvers. -func (db *dbSharedWithChildren) SelectAllFromResolvers() (out []*ResolverInfo) { +func (db *DB) SelectAllFromResolvers() (out []*ResolverInfo) { all := append([]*ResolverInfo{}, &ResolverInfo{Network: "system"}) db.mu.Lock() all = append(all, db.resolversTable...) @@ -372,7 +313,7 @@ func (ti *TestHelperInfo) string() string { } // InsertIntoTestHelpers inserts a given TH into the test helpers's table. -func (db *dbSharedWithChildren) InsertIntoTestHelpers(proto, URL string) { +func (db *DB) InsertIntoTestHelpers(proto, URL string) { db.mu.Lock() db.testHelpersTable = append(db.testHelpersTable, &TestHelperInfo{ Protocol: proto, @@ -383,7 +324,7 @@ func (db *dbSharedWithChildren) InsertIntoTestHelpers(proto, URL string) { // SelectAllFromTestHelperss returns all the configured THs. This function // ensures that we return in output a list only containing unique THs. -func (db *dbSharedWithChildren) SelectAllFromTestHelpers() (out []*TestHelperInfo) { +func (db *DB) SelectAllFromTestHelpers() (out []*TestHelperInfo) { var all []*TestHelperInfo db.mu.Lock() all = append(all, db.testHelpersTable...) @@ -400,7 +341,7 @@ func (db *dbSharedWithChildren) SelectAllFromTestHelpers() (out []*TestHelperInf } // NextConnID implements EventDB.NextConnID. -func (db *dbSharedWithChildren) NextConnID() (out int64) { +func (db *DB) NextConnID() (out int64) { db.mu.Lock() db.connID++ // start from 1 out = db.connID @@ -408,17 +349,9 @@ func (db *dbSharedWithChildren) NextConnID() (out int64) { return } -// MeasurementID implements EventDB.MeasurementID. -func (db *DB) MeasurementID() (out int64) { - db.mu.Lock() - out = db.measurementID - db.mu.Unlock() - return -} - -// NextMeasurement increments the internal MeasurementID and +// NextMeasurementID increments the internal MeasurementID and // returns it, so that later you can reference the current measurement. -func (db *DB) NextMeasurement() (out int64) { +func (db *DB) NextMeasurementID() (out int64) { db.mu.Lock() db.measurementID++ // start from 1 out = db.measurementID @@ -428,7 +361,7 @@ func (db *DB) NextMeasurement() (out int64) { // SelectAllFromDialWithMeasurementID calls SelectAllFromConnect // and filters the result by MeasurementID. -func (db *dbSharedWithChildren) SelectAllFromDialWithMeasurementID(id int64) (out []*NetworkEvent) { +func (db *DB) SelectAllFromDialWithMeasurementID(id int64) (out []*NetworkEvent) { for _, ev := range db.SelectAllFromDial() { if id == ev.MeasurementID { out = append(out, ev) @@ -439,7 +372,7 @@ func (db *dbSharedWithChildren) SelectAllFromDialWithMeasurementID(id int64) (ou // SelectAllFromReadWriteWithMeasurementID calls SelectAllFromReadWrite and // filters the result by MeasurementID. -func (db *dbSharedWithChildren) SelectAllFromReadWriteWithMeasurementID(id int64) (out []*NetworkEvent) { +func (db *DB) SelectAllFromReadWriteWithMeasurementID(id int64) (out []*NetworkEvent) { for _, ev := range db.SelectAllFromReadWrite() { if id == ev.MeasurementID { out = append(out, ev) @@ -450,7 +383,7 @@ func (db *dbSharedWithChildren) SelectAllFromReadWriteWithMeasurementID(id int64 // SelectAllFromCloseWithMeasurementID calls SelectAllFromClose // and filters the result by MeasurementID. -func (db *dbSharedWithChildren) SelectAllFromCloseWithMeasurementID(id int64) (out []*NetworkEvent) { +func (db *DB) SelectAllFromCloseWithMeasurementID(id int64) (out []*NetworkEvent) { for _, ev := range db.SelectAllFromClose() { if id == ev.MeasurementID { out = append(out, ev) @@ -461,7 +394,7 @@ func (db *dbSharedWithChildren) SelectAllFromCloseWithMeasurementID(id int64) (o // SelectAllFromTLSHandshakeWithMeasurementID calls SelectAllFromTLSHandshake // and filters the result by MeasurementID. -func (db *dbSharedWithChildren) SelectAllFromTLSHandshakeWithMeasurementID(id int64) (out []*TLSHandshakeEvent) { +func (db *DB) SelectAllFromTLSHandshakeWithMeasurementID(id int64) (out []*TLSHandshakeEvent) { for _, ev := range db.SelectAllFromTLSHandshake() { if id == ev.MeasurementID { out = append(out, ev) @@ -472,7 +405,7 @@ func (db *dbSharedWithChildren) SelectAllFromTLSHandshakeWithMeasurementID(id in // SelectAllFromQUICHandshakeWithMeasurementID calls SelectAllFromQUICSHandshake // and filters the result by MeasurementID. -func (db *dbSharedWithChildren) SelectAllFromQUICHandshakeWithMeasurementID(id int64) (out []*QUICHandshakeEvent) { +func (db *DB) SelectAllFromQUICHandshakeWithMeasurementID(id int64) (out []*QUICHandshakeEvent) { for _, ev := range db.SelectAllFromQUICHandshake() { if id == ev.MeasurementID { out = append(out, ev) @@ -483,7 +416,7 @@ func (db *dbSharedWithChildren) SelectAllFromQUICHandshakeWithMeasurementID(id i // SelectAllFromLookupHostWithMeasurementID calls SelectAllFromLookupHost // and filters the result by MeasurementID. -func (db *dbSharedWithChildren) SelectAllFromLookupHostWithMeasurementID(id int64) (out []*LookupHostEvent) { +func (db *DB) SelectAllFromLookupHostWithMeasurementID(id int64) (out []*LookupHostEvent) { for _, ev := range db.SelectAllFromLookupHost() { if id == ev.MeasurementID { out = append(out, ev) @@ -494,7 +427,7 @@ func (db *dbSharedWithChildren) SelectAllFromLookupHostWithMeasurementID(id int6 // SelectAllFromLookupHTTPSSvcWithMeasurementID calls SelectAllFromHTTPSSvc // and filters the result by MeasurementID. -func (db *dbSharedWithChildren) SelectAllFromLookupHTTPSSvcWithMeasurementID(id int64) (out []*LookupHTTPSSvcEvent) { +func (db *DB) SelectAllFromLookupHTTPSSvcWithMeasurementID(id int64) (out []*LookupHTTPSSvcEvent) { for _, ev := range db.SelectAllFromLookupHTTPSSvc() { if id == ev.MeasurementID { out = append(out, ev) @@ -505,7 +438,7 @@ func (db *dbSharedWithChildren) SelectAllFromLookupHTTPSSvcWithMeasurementID(id // SelectAllFromDNSRoundTripWithMeasurementID calls SelectAllFromDNSRoundTrip // and filters the result by MeasurementID. -func (db *dbSharedWithChildren) SelectAllFromDNSRoundTripWithMeasurementID(id int64) (out []*DNSRoundTripEvent) { +func (db *DB) SelectAllFromDNSRoundTripWithMeasurementID(id int64) (out []*DNSRoundTripEvent) { for _, ev := range db.SelectAllFromDNSRoundTrip() { if id == ev.MeasurementID { out = append(out, ev) @@ -516,7 +449,7 @@ func (db *dbSharedWithChildren) SelectAllFromDNSRoundTripWithMeasurementID(id in // SelectAllFromHTTPRoundTripWithMeasurementID calls SelectAllFromHTTPRoundTrip // and filters the result by MeasurementID. -func (db *dbSharedWithChildren) SelectAllFromHTTPRoundTripWithMeasurementID(id int64) (out []*HTTPRoundTripEvent) { +func (db *DB) SelectAllFromHTTPRoundTripWithMeasurementID(id int64) (out []*HTTPRoundTripEvent) { for _, ev := range db.SelectAllFromHTTPRoundTrip() { if id == ev.MeasurementID { out = append(out, ev) @@ -527,7 +460,7 @@ func (db *dbSharedWithChildren) SelectAllFromHTTPRoundTripWithMeasurementID(id i // SelectAllFromHTTPRedirectWithMeasurementID calls SelectAllFromHTTPRedirect // and filters the result by MeasurementID. -func (db *dbSharedWithChildren) SelectAllFromHTTPRedirectWithMeasurementID(id int64) (out []*HTTPRedirectEvent) { +func (db *DB) SelectAllFromHTTPRedirectWithMeasurementID(id int64) (out []*HTTPRedirectEvent) { for _, ev := range db.SelectAllFromHTTPRedirect() { if id == ev.MeasurementID { out = append(out, ev) @@ -569,14 +502,14 @@ func (e *Endpoint) String() string { // - domain is the domain we want to connect to; // // - port is the port for the endpoint. -func (db *dbSharedWithChildren) SelectAllEndpointsForDomain(domain, port string) (out []*Endpoint) { +func (db *DB) SelectAllEndpointsForDomain(domain, port string) (out []*Endpoint) { out = append(out, db.selectAllTCPEndpoints(domain, port)...) out = append(out, db.selectAllQUICEndpoints(domain, port)...) out = db.deduplicateEndpoints(out) return } -func (db *dbSharedWithChildren) selectAllTCPEndpoints(domain, port string) (out []*Endpoint) { +func (db *DB) selectAllTCPEndpoints(domain, port string) (out []*Endpoint) { for _, entry := range db.SelectAllFromLookupHost() { if domain != entry.Domain { continue @@ -591,7 +524,7 @@ func (db *dbSharedWithChildren) selectAllTCPEndpoints(domain, port string) (out return } -func (db *dbSharedWithChildren) selectAllQUICEndpoints(domain, port string) (out []*Endpoint) { +func (db *DB) selectAllQUICEndpoints(domain, port string) (out []*Endpoint) { for _, entry := range db.SelectAllFromLookupHTTPSSvc() { if domain != entry.Domain { continue @@ -607,7 +540,7 @@ func (db *dbSharedWithChildren) selectAllQUICEndpoints(domain, port string) (out return } -func (db *dbSharedWithChildren) deduplicateEndpoints(epnts []*Endpoint) (out []*Endpoint) { +func (db *DB) deduplicateEndpoints(epnts []*Endpoint) (out []*Endpoint) { duplicates := make(map[string]*Endpoint) for _, epnt := range epnts { duplicates[epnt.String()] = epnt @@ -618,11 +551,11 @@ func (db *dbSharedWithChildren) deduplicateEndpoints(epnts []*Endpoint) (out []* return } -func (db *dbSharedWithChildren) newEndpoint(addr, port string, network EndpointNetwork) *Endpoint { +func (db *DB) newEndpoint(addr, port string, network EndpointNetwork) *Endpoint { return &Endpoint{Network: network, Address: net.JoinHostPort(addr, port)} } -func (db *dbSharedWithChildren) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { +func (db *DB) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { for _, alpn := range entry.ALPN { switch alpn { case "h3": @@ -669,7 +602,7 @@ func (e *HTTPEndpoint) String() string { // - URL is the URL for which we want endpoints; // // Returns a list of endpoints or an error. -func (db *dbSharedWithChildren) SelectAllHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, error) { +func (db *DB) SelectAllHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, error) { domain := URL.Hostname() port, err := PortFromURL(URL) if err != nil { @@ -712,7 +645,7 @@ func PortFromURL(URL *url.URL) (string, error) { } } -func (db *dbSharedWithChildren) alpnForHTTPEndpoint(network EndpointNetwork) []string { +func (db *DB) alpnForHTTPEndpoint(network EndpointNetwork) []string { switch network { case NetworkQUIC: return []string{"h3"} diff --git a/internal/measurex/dialer.go b/internal/measurex/dialer.go index f9d1cdacbb..5a0afb88e0 100644 --- a/internal/measurex/dialer.go +++ b/internal/measurex/dialer.go @@ -45,20 +45,24 @@ type Dialer interface { // // Arguments: // -// - origin is either OriginProbe or OriginTH +// - measurementID is the measurement ID; // -// - db is the database in which to store measurements +// - origin is either OriginProbe or OriginTH; // -// - d is the underlying netxlite.Dialer to use -func WrapDialer(origin Origin, db EventDB, d netxlite.Dialer) Dialer { - return &dialerx{Dialer: d, db: db, origin: origin} +// - db is the database in which to store measurements; +// +// - d is the underlying netxlite.Dialer to use. +func WrapDialer(measurementID int64, + origin Origin, db EventDB, d netxlite.Dialer) Dialer { + return &dialerx{Dialer: d, db: db, origin: origin, mid: measurementID} } // NewDialerWithoutResolver is a convenience factory for creating // a dialer that saves measurements into the DB and that is not attached // to any resolver (hence only works when passed IP addresses). -func NewDialerWithoutResolver(origin Origin, db EventDB, logger Logger) Dialer { - return WrapDialer(origin, db, netxlite.NewDialerWithoutResolver( +func NewDialerWithoutResolver( + measurementID int64, origin Origin, db EventDB, logger Logger) Dialer { + return WrapDialer(measurementID, origin, db, netxlite.NewDialerWithoutResolver( logger, )) } @@ -76,9 +80,10 @@ func (d *netxliteDialerAdapter) DialContext( // NewDialerWithSystemResolver is a convenience factory for creating // a dialer that saves measurements into mx.DB and uses the system resolver. -func NewDialerWithSystemResolver(origin Origin, db EventDB, logger Logger) Dialer { - r := NewResolverSystem(origin, db, logger) - return WrapDialer(origin, db, netxlite.NewDialerWithResolver( +func NewDialerWithSystemResolver( + measurementID int64, origin Origin, db EventDB, logger Logger) Dialer { + r := NewResolverSystem(measurementID, origin, db, logger) + return WrapDialer(measurementID, origin, db, netxlite.NewDialerWithResolver( logger, r, )) } @@ -86,6 +91,7 @@ func NewDialerWithSystemResolver(origin Origin, db EventDB, logger Logger) Diale type dialerx struct { netxlite.Dialer db EventDB + mid int64 origin Origin } @@ -114,7 +120,7 @@ func (d *dialerx) DialContext( finished := d.db.ElapsedTime() d.db.InsertIntoDial(&NetworkEvent{ Origin: d.origin, - MeasurementID: d.db.MeasurementID(), + MeasurementID: d.mid, ConnID: connID, Operation: "connect", Network: network, @@ -137,6 +143,7 @@ func (d *dialerx) DialContext( localAddr: conn.LocalAddr().String(), network: network, origin: d.origin, + mid: d.mid, }, nil } @@ -169,6 +176,7 @@ type connx struct { connID int64 remoteAddr string localAddr string + mid int64 network string origin Origin } @@ -183,7 +191,7 @@ func (c *connx) Read(b []byte) (int, error) { finished := c.db.ElapsedTime() c.db.InsertIntoReadWrite(&NetworkEvent{ Origin: c.origin, - MeasurementID: c.db.MeasurementID(), + MeasurementID: c.mid, ConnID: c.connID, Operation: "read", Network: c.network, @@ -203,7 +211,7 @@ func (c *connx) Write(b []byte) (int, error) { finished := c.db.ElapsedTime() c.db.InsertIntoReadWrite(&NetworkEvent{ Origin: c.origin, - MeasurementID: c.db.MeasurementID(), + MeasurementID: c.mid, ConnID: c.connID, Operation: "write", Network: c.network, @@ -223,7 +231,7 @@ func (c *connx) Close() error { finished := c.db.ElapsedTime() c.db.InsertIntoClose(&NetworkEvent{ Origin: c.origin, - MeasurementID: c.db.MeasurementID(), + MeasurementID: c.mid, ConnID: c.connID, Operation: "close", Network: c.network, diff --git a/internal/measurex/dnsx.go b/internal/measurex/dnsx.go index 875ff0e0b2..fda8473ab0 100644 --- a/internal/measurex/dnsx.go +++ b/internal/measurex/dnsx.go @@ -14,13 +14,20 @@ type DNSTransport = dnsx.RoundTripper // WrapDNSXRoundTripper wraps a dnsx.RoundTripper and returns a // DNSTransport that saves DNSRoundTripEvents into the DB. -func WrapDNSXRoundTripper(origin Origin, db EventDB, rt dnsx.RoundTripper) DNSTransport { - return &dnsxTransportx{db: db, RoundTripper: rt, origin: origin} +func WrapDNSXRoundTripper( + measurementID int64, origin Origin, db EventDB, rt dnsx.RoundTripper) DNSTransport { + return &dnsxTransportx{ + db: db, + RoundTripper: rt, + origin: origin, + mid: measurementID, + } } type dnsxTransportx struct { dnsx.RoundTripper db EventDB + mid int64 origin Origin } @@ -45,7 +52,7 @@ func (txp *dnsxTransportx) RoundTrip(ctx context.Context, query []byte) ([]byte, finished := txp.db.ElapsedTime() txp.db.InsertIntoDNSRoundTrip(&DNSRoundTripEvent{ Origin: txp.origin, - MeasurementID: txp.db.MeasurementID(), + MeasurementID: txp.mid, Network: txp.RoundTripper.Network(), Address: txp.RoundTripper.Address(), Query: query, diff --git a/internal/measurex/http.go b/internal/measurex/http.go index 0b164b3fef..c5b6b635ab 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -40,50 +40,59 @@ type HTTPTransport interface { // returns an HTTPTransport that uses the DB to save events occurring // during HTTP round trips. With this constructor the ConnID is // not set, hence ConnID will always return zero. -func WrapHTTPTransport( +func WrapHTTPTransport(measurementID int64, origin Origin, db EventDB, txp netxlite.HTTPTransport) HTTPTransport { - return WrapHTTPTransportWithConnID(origin, db, txp, 0) + return WrapHTTPTransportWithConnID(measurementID, origin, db, txp, 0) } // WrapHTTPTransportWithConnID is like WrapHTTPTransport but also // sets the conn ID, which is otherwise set to zero. -func WrapHTTPTransportWithConnID(origin Origin, +func WrapHTTPTransportWithConnID(measurementID int64, origin Origin, db EventDB, txp netxlite.HTTPTransport, connID int64) HTTPTransport { return &httpTransportx{ - HTTPTransport: txp, db: db, connID: connID, origin: origin} + HTTPTransport: txp, + db: db, + connID: connID, + mid: measurementID, + origin: origin, + } } // NewHTTPTransportWithConn creates and wraps an HTTPTransport that // does not dial and only uses the given conn. -func NewHTTPTransportWithConn( +func NewHTTPTransportWithConn(measurementID int64, origin Origin, logger Logger, db EventDB, conn Conn) HTTPTransport { txp := netxlite.NewHTTPTransport(logger, netxlite.NewSingleUseDialer(conn), netxlite.NewNullTLSDialer()) - return WrapHTTPTransportWithConnID(origin, db, txp, conn.ConnID()) + return WrapHTTPTransportWithConnID( + measurementID, origin, db, txp, conn.ConnID()) } // NewHTTPTransportWithTLSConn creates and wraps an HTTPTransport that // does not dial and only uses the given conn. -func NewHTTPTransportWithTLSConn( +func NewHTTPTransportWithTLSConn(measurementID int64, origin Origin, logger Logger, db EventDB, conn TLSConn) HTTPTransport { txp := netxlite.NewHTTPTransport(logger, netxlite.NewNullDialer(), netxlite.NewSingleUseTLSDialer(conn)) - return WrapHTTPTransportWithConnID(origin, db, txp, conn.ConnID()) + return WrapHTTPTransportWithConnID( + measurementID, origin, db, txp, conn.ConnID()) } // NewHTTPTransportWithQUICSess creates and wraps an HTTPTransport that // does not dial and only uses the given QUIC session. -func NewHTTPTransportWithQUICSess( +func NewHTTPTransportWithQUICSess(measurementID int64, origin Origin, logger Logger, db EventDB, sess QUICEarlySession) HTTPTransport { txp := netxlite.NewHTTP3Transport( logger, netxlite.NewSingleUseQUICDialer(sess), &tls.Config{}) - return WrapHTTPTransportWithConnID(origin, db, txp, sess.ConnID()) + return WrapHTTPTransportWithConnID( + measurementID, origin, db, txp, sess.ConnID()) } type httpTransportx struct { netxlite.HTTPTransport connID int64 db EventDB + mid int64 origin Origin } @@ -118,7 +127,7 @@ func (txp *httpTransportx) RoundTrip(req *http.Request) (*http.Response, error) resp, err := txp.HTTPTransport.RoundTrip(req) rt := &HTTPRoundTripEvent{ Origin: txp.origin, - MeasurementID: txp.db.MeasurementID(), + MeasurementID: txp.mid, ConnID: txp.connID, RequestMethod: req.Method, RequestURL: req.URL, @@ -184,16 +193,18 @@ type HTTPClient interface { // NewHTTPClient creates a new HTTPClient instance that // does not automatically perform redirects. -func NewHTTPClientWithoutRedirects( +func NewHTTPClientWithoutRedirects(measurementID int64, origin Origin, db EventDB, jar http.CookieJar, txp HTTPTransport) HTTPClient { - return newHTTPClient(origin, db, jar, txp, http.ErrUseLastResponse) + return newHTTPClient( + measurementID, origin, db, jar, txp, http.ErrUseLastResponse) } // NewHTTPClientWithRedirects creates a new HTTPClient // instance that automatically perform redirects. -func NewHTTPClientWithRedirects( +func NewHTTPClientWithRedirects(measurementID int64, origin Origin, db EventDB, jar http.CookieJar, txp HTTPTransport) HTTPClient { - return newHTTPClient(origin, db, jar, txp, nil) + return newHTTPClient( + measurementID, origin, db, jar, txp, nil) } // HTTPRedirectEvent records an HTTP redirect. @@ -232,7 +243,7 @@ type HTTPRedirectEvent struct { // would return when hitting too many redirects. var ErrHTTPTooManyRedirects = errors.New("stopped after 10 redirects") -func newHTTPClient(origin Origin, db EventDB, +func newHTTPClient(measurementID int64, origin Origin, db EventDB, cookiejar http.CookieJar, txp HTTPTransport, defaultErr error) HTTPClient { return &http.Client{ Transport: txp, @@ -244,7 +255,7 @@ func newHTTPClient(origin Origin, db EventDB, } db.InsertIntoHTTPRedirect(&HTTPRedirectEvent{ Origin: origin, - MeasurementID: db.MeasurementID(), + MeasurementID: measurementID, ConnID: txp.ConnID(), URL: via[0].URL, // bug in Go stdlib if we crash here Location: req.URL, diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go index 36d80d0fbe..c829606e35 100644 --- a/internal/measurex/measurement.go +++ b/internal/measurex/measurement.go @@ -6,12 +6,6 @@ type Measurement struct { // MeasurementID is the measurement MeasurementID. MeasurementID int64 - // URL is the URL this measurement refers to. - URL string `json:",omitempty"` - - // Endpoint is the endpoint this measurement refers to. - Endpoint string `json:",omitempty"` - // Oddities lists all the oddities inside this measurement. See // newMeasurement's docs for more info. Oddities []Oddity diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 335c691c03..e417070b12 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -28,7 +28,7 @@ type Measurer struct { Origin Origin // TLSHandshaker is the MANDATORY TLS handshaker. - TLSHandshaker TLSHandshaker + TLSHandshaker netxlite.TLSHandshaker } // NewMeasurerWithDefaultSettings creates a new Measurer @@ -40,24 +40,12 @@ func NewMeasurerWithDefaultSettings() *Measurer { HTTPClient: &http.Client{}, Logger: log.Log, Origin: OriginProbe, - TLSHandshaker: NewTLSHandshakerStdlib(OriginProbe, db, log.Log), - } -} - -// Clone returns a clone of the current Measurer with the given -// DB instead of the DB used by the original Measurer. -func (mx *Measurer) Clone(db *DB) *Measurer { - return &Measurer{ - DB: db, - HTTPClient: mx.HTTPClient, - Logger: mx.Logger, - Origin: mx.Origin, - TLSHandshaker: mx.TLSHandshaker, + TLSHandshaker: netxlite.NewTLSHandshakerStdlib(log.Log), } } func (mx *Measurer) nextMeasurement() int64 { - return mx.DB.NextMeasurement() + return mx.DB.NextMeasurementID() } // LookupHostSystem performs a LookupHost using the system resolver. @@ -66,11 +54,11 @@ func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *Measur mx.Logf("LookupHostSystem domain=%s timeout=%s...", domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - r := NewResolverSystem(mx.Origin, mx.DB, mx.Logger) + mid := mx.nextMeasurement() + r := NewResolverSystem(mid, mx.Origin, mx.DB, mx.Logger) defer r.CloseIdleConnections() - id := mx.nextMeasurement() _, _ = r.LookupHost(ctx, domain) - return NewMeasurement(mx.DB, id) + return NewMeasurement(mx.DB, mid) } // LookupHostUDP is like LookupHostSystem but uses an UDP resolver. @@ -91,11 +79,11 @@ func (mx *Measurer) LookupHostUDP( address, domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - r := NewResolverUDP(mx.Origin, mx.DB, mx.Logger, address) + mid := mx.nextMeasurement() + r := NewResolverUDP(mid, mx.Origin, mx.DB, mx.Logger, address) defer r.CloseIdleConnections() - id := mx.nextMeasurement() _, _ = r.LookupHost(ctx, domain) - return NewMeasurement(mx.DB, id) + return NewMeasurement(mx.DB, mid) } // LookupHTTPSSvcUDP issues an HTTPSSvc query for the given domain. @@ -116,11 +104,11 @@ func (mx *Measurer) LookupHTTPSSvcUDP( address, domain, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - r := NewResolverUDP(mx.Origin, mx.DB, mx.Logger, address) + mid := mx.nextMeasurement() + r := NewResolverUDP(mid, mx.Origin, mx.DB, mx.Logger, address) defer r.CloseIdleConnections() - id := mx.nextMeasurement() _, _ = r.LookupHTTPSSvcWithoutRetry(ctx, domain) - return NewMeasurement(mx.DB, id) + return NewMeasurement(mx.DB, mid) } // TCPConnect establishes a connection with a TCP endpoint. @@ -133,9 +121,9 @@ func (mx *Measurer) LookupHTTPSSvcUDP( // // Returns a Measurement. func (mx *Measurer) TCPConnect(ctx context.Context, address string) *Measurement { - id := mx.nextMeasurement() - conn, _ := mx.tcpConnect(ctx, address) - measurement := NewMeasurement(mx.DB, id) + mid := mx.nextMeasurement() + conn, _ := mx.tcpConnect(ctx, mid, address) + measurement := NewMeasurement(mx.DB, mid) if conn != nil { conn.Close() } @@ -143,12 +131,13 @@ func (mx *Measurer) TCPConnect(ctx context.Context, address string) *Measurement } // tcpConnect is like TCPConnect but does not create a new measurement. -func (mx *Measurer) tcpConnect(ctx context.Context, address string) (Conn, error) { +func (mx *Measurer) tcpConnect(ctx context.Context, + measurementID int64, address string) (Conn, error) { const timeout = 10 * time.Second mx.Logf("TCPConnect endpoint=%s timeout=%s...", address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - d := NewDialerWithoutResolver(mx.Origin, mx.DB, mx.Logger) + d := NewDialerWithoutResolver(measurementID, mx.Origin, mx.DB, mx.Logger) defer d.CloseIdleConnections() return d.DialContext(ctx, "tcp", address) } @@ -185,9 +174,9 @@ func (mx *Measurer) tcpConnect(ctx context.Context, address string) (Conn, error // Returns a Measurement. func (mx *Measurer) TLSConnectAndHandshake(ctx context.Context, address string, config *tls.Config) *Measurement { - id := mx.nextMeasurement() - conn, _ := mx.tlsConnectAndHandshake(ctx, address, config) - measurement := NewMeasurement(mx.DB, id) + mid := mx.nextMeasurement() + conn, _ := mx.tlsConnectAndHandshake(ctx, mid, address, config) + measurement := NewMeasurement(mx.DB, mid) if conn != nil { conn.Close() } @@ -197,8 +186,8 @@ func (mx *Measurer) TLSConnectAndHandshake(ctx context.Context, // tlsConnectAndHandshake is like TLSConnectAndHandshake // but does not create a new measurement. func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, - address string, config *tls.Config) (TLSConn, error) { - conn, err := mx.tcpConnect(ctx, address) + measurementID int64, address string, config *tls.Config) (TLSConn, error) { + conn, err := mx.tcpConnect(ctx, measurementID, address) if err != nil { return nil, err } @@ -207,7 +196,8 @@ func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, config.ServerName, config.NextProtos, address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - return mx.TLSHandshaker.Handshake(ctx, conn, config) + th := WrapTLSHandshaker(measurementID, mx.Origin, mx.DB, mx.TLSHandshaker) + return th.Handshake(ctx, conn, config) } // QUICHandshake connects and TLS handshakes with a QUIC endpoint. @@ -233,9 +223,9 @@ func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, // Returns a Measurement. func (mx *Measurer) QUICHandshake(ctx context.Context, address string, config *tls.Config) *Measurement { - id := mx.nextMeasurement() - sess, _ := mx.quicHandshake(ctx, address, config) - measurement := NewMeasurement(mx.DB, id) + mid := mx.nextMeasurement() + sess, _ := mx.quicHandshake(ctx, mid, address, config) + measurement := NewMeasurement(mx.DB, mid) if sess != nil { // TODO(bassosimone): close session with correct message sess.CloseWithError(0, "") @@ -244,17 +234,18 @@ func (mx *Measurer) QUICHandshake(ctx context.Context, address string, } // quicHandshake is like QUICHandshake but does not create a new measurement. -func (mx *Measurer) quicHandshake(ctx context.Context, +func (mx *Measurer) quicHandshake(ctx context.Context, measurementID int64, address string, config *tls.Config) (QUICEarlySession, error) { const timeout = 10 * time.Second mx.Logf("QUICHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", config.ServerName, config.NextProtos, address, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - qd := WrapQUICDialer(mx.Origin, mx.DB, netxlite.NewQUICDialerWithoutResolver( - WrapQUICListener(mx.Origin, mx.DB, netxlite.NewQUICListener()), - mx.Logger, - )) + qd := WrapQUICDialer(measurementID, mx.Origin, mx.DB, + netxlite.NewQUICDialerWithoutResolver(WrapQUICListener( + measurementID, mx.Origin, mx.DB, netxlite.NewQUICListener()), + mx.Logger, + )) defer qd.CloseIdleConnections() return qd.DialContext(ctx, address, config) } @@ -331,14 +322,14 @@ func (mx *Measurer) HTTPEndpointPrepareGet(ctx context.Context, // httpEndpointGet implements HTTPEndpointGet. func (mx *Measurer) httpEndpointGet(ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (resp *http.Response, m *Measurement, err error) { - id := mx.nextMeasurement() + mid := mx.nextMeasurement() switch epnt.Network { case NetworkQUIC: - resp, err = mx.httpEndpointGetQUIC(ctx, epnt, jar) - m = NewMeasurement(mx.DB, id) + resp, err = mx.httpEndpointGetQUIC(ctx, mid, epnt, jar) + m = NewMeasurement(mx.DB, mid) case NetworkTCP: - resp, err = mx.httpEndpointGetTCP(ctx, epnt, jar) - m = NewMeasurement(mx.DB, id) + resp, err = mx.httpEndpointGetTCP(ctx, mid, epnt, jar) + m = NewMeasurement(mx.DB, mid) default: m, err = &Measurement{}, errUnknownHTTPEndpointNetwork } @@ -346,46 +337,46 @@ func (mx *Measurer) httpEndpointGet(ctx context.Context, epnt *HTTPEndpoint, } // httpEndpointGetTCP specializes HTTPSEndpointGet for HTTP and HTTPS. -func (mx *Measurer) httpEndpointGetTCP( - ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { +func (mx *Measurer) httpEndpointGetTCP(ctx context.Context, + measurementID int64, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { switch epnt.URL.Scheme { case "http": - return mx.httpEndpointGetHTTP(ctx, epnt, jar) + return mx.httpEndpointGetHTTP(ctx, measurementID, epnt, jar) case "https": - return mx.httpEndpointGetHTTPS(ctx, epnt, jar) + return mx.httpEndpointGetHTTPS(ctx, measurementID, epnt, jar) default: return nil, errUnknownHTTPEndpointURLScheme } } // httpEndpointGetHTTP specializes httpEndpointGetTCP for HTTP. -func (mx *Measurer) httpEndpointGetHTTP( - ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { +func (mx *Measurer) httpEndpointGetHTTP(ctx context.Context, + measurementID int64, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) if err != nil { return nil, err } req.Header = epnt.Header - conn, err := mx.tcpConnect(ctx, epnt.Address) + conn, err := mx.tcpConnect(ctx, measurementID, epnt.Address) if err != nil { return nil, err } defer conn.Close() // we own it - clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, jar, - NewHTTPTransportWithConn(mx.Origin, mx.Logger, mx.DB, conn)) + clnt := NewHTTPClientWithoutRedirects(measurementID, mx.Origin, mx.DB, jar, + NewHTTPTransportWithConn(measurementID, mx.Origin, mx.Logger, mx.DB, conn)) defer clnt.CloseIdleConnections() return mx.httpClientDo(ctx, clnt, epnt, req) } // httpEndpointGetHTTPS specializes httpEndpointGetTCP for HTTPS. -func (mx *Measurer) httpEndpointGetHTTPS( - ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { +func (mx *Measurer) httpEndpointGetHTTPS(ctx context.Context, + measurementID int64, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) if err != nil { return nil, err } req.Header = epnt.Header - conn, err := mx.tlsConnectAndHandshake(ctx, epnt.Address, &tls.Config{ + conn, err := mx.tlsConnectAndHandshake(ctx, measurementID, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, RootCAs: netxlite.NewDefaultCertPool(), @@ -394,21 +385,21 @@ func (mx *Measurer) httpEndpointGetHTTPS( return nil, err } defer conn.Close() // we own it - clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, jar, - NewHTTPTransportWithTLSConn(mx.Origin, mx.Logger, mx.DB, conn)) + clnt := NewHTTPClientWithoutRedirects(measurementID, mx.Origin, mx.DB, jar, + NewHTTPTransportWithTLSConn(measurementID, mx.Origin, mx.Logger, mx.DB, conn)) defer clnt.CloseIdleConnections() return mx.httpClientDo(ctx, clnt, epnt, req) } // httpEndpointGetQUIC specializes httpEndpointGetTCP for QUIC. -func (mx *Measurer) httpEndpointGetQUIC( - ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { +func (mx *Measurer) httpEndpointGetQUIC(ctx context.Context, + measurementID int64, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) if err != nil { return nil, err } req.Header = epnt.Header - sess, err := mx.quicHandshake(ctx, epnt.Address, &tls.Config{ + sess, err := mx.quicHandshake(ctx, measurementID, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, RootCAs: netxlite.NewDefaultCertPool(), @@ -418,8 +409,8 @@ func (mx *Measurer) httpEndpointGetQUIC( } // TODO(bassosimone): close session with correct message defer sess.CloseWithError(0, "") // we own it - clnt := NewHTTPClientWithoutRedirects(mx.Origin, mx.DB, jar, - NewHTTPTransportWithQUICSess(mx.Origin, mx.Logger, mx.DB, sess)) + clnt := NewHTTPClientWithoutRedirects(measurementID, mx.Origin, mx.DB, jar, + NewHTTPTransportWithQUICSess(measurementID, mx.Origin, mx.Logger, mx.DB, sess)) defer clnt.CloseIdleConnections() return mx.httpClientDo(ctx, clnt, epnt, req) } @@ -459,10 +450,10 @@ func (mx *Measurer) LookupWCTH(ctx context.Context, URL *url.URL, WCTHURL, URL.String(), endpoints, port, timeout) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - w := NewWCTHWorker(mx.Logger, mx.DB, mx.HTTPClient, WCTHURL) - id := mx.nextMeasurement() + mid := mx.nextMeasurement() + w := NewWCTHWorker(mid, mx.Logger, mx.DB, mx.HTTPClient, WCTHURL) _, _ = w.Run(ctx, URL, mx.onlyTCPEndpoints(endpoints)) - return NewMeasurement(mx.DB, id) + return NewMeasurement(mx.DB, mid) } // onlyTCPEndpoints takes in input a list of endpoints and returns @@ -504,13 +495,8 @@ func (mx *Measurer) HTTPEndpointGetParallel(ctx context.Context, const parallelism = 3 for i := 0; i < parallelism; i++ { go func() { - // Important: we need a children DB because we need a - // separate MeasurementID namespace. The whole package - // does not keep constant MeasurementID if you don't - // use this factory for creating a new child. - child := mx.Clone(mx.DB.NewChildDB()) for epnt := range input { - output <- child.HTTPEndpointGet(ctx, epnt, jar) + output <- mx.HTTPEndpointGet(ctx, epnt, jar) } done <- true }() @@ -553,13 +539,8 @@ func (mx *Measurer) LookupURLHostParallel( const parallelism = 3 for i := 0; i < parallelism; i++ { go func() { - // Important: we need a children DB because we need a - // separate MeasurementID namespace. The whole package - // does not keep constant MeasurementID if you don't - // use this factory for creating a new child. - child := mx.Clone(mx.DB.NewChildDB()) for reso := range resolvers { - child.lookupHostWithResolverInfo(ctx, reso, URL, output) + mx.lookupHostWithResolverInfo(ctx, reso, URL, output) } done <- true }() @@ -597,7 +578,7 @@ func (mx *Measurer) lookupHostWithResolverInfo( } } -// LookupostParallel is like LookupURLHostParallel but we only +// LookupHostParallel is like LookupURLHostParallel but we only // have in input an hostname rather than a URL. As such, we cannot // determine whether to perform HTTPSSvc lookups and so we aren't // going to perform this kind of lookups in this case. @@ -634,13 +615,8 @@ func (mx *Measurer) QueryTestHelperParallel( const parallelism = 1 // maybe raise in the future? for i := 0; i < parallelism; i++ { go func() { - // Important: we need a children DB because we need a - // separate MeasurementID namespace. The whole package - // does not keep constant MeasurementID if you don't - // use this factory for creating a new child. - child := mx.Clone(mx.DB.NewChildDB()) for th := range ths { - child.asyncTestHelperQuery(ctx, th, URL, output) + mx.asyncTestHelperQuery(ctx, th, URL, output) } done <- true }() diff --git a/internal/measurex/quic.go b/internal/measurex/quic.go index 483a2a1efb..21385e5797 100644 --- a/internal/measurex/quic.go +++ b/internal/measurex/quic.go @@ -17,10 +17,12 @@ type QUICListener = netxlite.QUICListener // WrapQUICListener takes in input a netxlite.QUICListener and returns // a new listener that saves measurements into the DB. -func WrapQUICListener(origin Origin, db EventDB, ql netxlite.QUICListener) QUICListener { +func WrapQUICListener(measurementID int64, + origin Origin, db EventDB, ql netxlite.QUICListener) QUICListener { return &quicListenerx{ QUICListener: ql, db: db, + mid: measurementID, origin: origin, } } @@ -28,6 +30,7 @@ func WrapQUICListener(origin Origin, db EventDB, ql netxlite.QUICListener) QUICL type quicListenerx struct { netxlite.QUICListener db EventDB + mid int64 origin Origin } @@ -45,6 +48,7 @@ func (ql *quicListenerx) Listen(addr *net.UDPAddr) (QUICPacketConn, error) { db: ql.db, localAddr: pconn.LocalAddr().String(), origin: ql.origin, + mid: ql.mid, }, nil } @@ -53,6 +57,7 @@ type quicUDPLikeConnx struct { connID int64 db EventDB localAddr string + mid int64 origin Origin } @@ -62,7 +67,7 @@ func (c *quicUDPLikeConnx) WriteTo(p []byte, addr net.Addr) (int, error) { finished := c.db.ElapsedTime() c.db.InsertIntoReadWrite(&NetworkEvent{ Origin: c.origin, - MeasurementID: c.db.MeasurementID(), + MeasurementID: c.mid, ConnID: c.connID, Operation: "write_to", Network: string(NetworkQUIC), @@ -82,7 +87,7 @@ func (c *quicUDPLikeConnx) ReadFrom(b []byte) (int, net.Addr, error) { finished := c.db.ElapsedTime() c.db.InsertIntoReadWrite(&NetworkEvent{ Origin: c.origin, - MeasurementID: c.db.MeasurementID(), + MeasurementID: c.mid, ConnID: c.connID, Operation: "read_from", Network: string(NetworkQUIC), @@ -109,7 +114,7 @@ func (c *quicUDPLikeConnx) Close() error { finished := c.db.ElapsedTime() c.db.InsertIntoReadWrite(&NetworkEvent{ Origin: c.origin, - MeasurementID: c.db.MeasurementID(), + MeasurementID: c.mid, ConnID: c.connID, Operation: "close", Network: string(NetworkQUIC), @@ -130,12 +135,17 @@ func (c *quicUDPLikeConnx) LocalAddr() net.Addr { if localAddr == nil { return nil } - return &quicLocalAddrx{Addr: localAddr, connID: c.connID} + return &quicLocalAddrx{ + Addr: localAddr, + connID: c.connID, + mid: c.mid, + } } type quicLocalAddrx struct { net.Addr connID int64 + mid int64 } // QUICEarlySession is the type we use to wrap quic.EarlySession. This @@ -178,17 +188,20 @@ type QUICHandshakeEvent struct { // WrapQUICDialer creates a new QUICDialer that will save // QUIC handshake events into the DB. -func WrapQUICDialer(origin Origin, db EventDB, dialer netxlite.QUICDialer) QUICDialer { +func WrapQUICDialer(measurementID int64, + origin Origin, db EventDB, dialer netxlite.QUICDialer) QUICDialer { return &quicDialerx{ QUICDialer: dialer, origin: origin, db: db, + mid: measurementID, } } type quicDialerx struct { netxlite.QUICDialer db EventDB + mid int64 origin Origin } @@ -217,7 +230,7 @@ func (qh *quicDialerx) DialContext(ctx context.Context, finished := qh.db.ElapsedTime() qh.db.InsertIntoQUICHandshake(&QUICHandshakeEvent{ Origin: qh.origin, - MeasurementID: qh.db.MeasurementID(), + MeasurementID: qh.mid, ConnID: qh.connIDIfNotNil(localAddr), Network: string(NetworkQUIC), RemoteAddr: address, diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 60024bac5d..d989a92fee 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -19,14 +19,17 @@ type Resolver interface { } // WrapResolver wraps a Resolver so that we save measurements into the DB. -func WrapResolver(origin Origin, db EventDB, r netxlite.Resolver) Resolver { - return &resolverx{Resolver: r, db: db, origin: origin} +func WrapResolver(measurementID int64, + origin Origin, db EventDB, r netxlite.Resolver) Resolver { + return &resolverx{Resolver: r, db: db, origin: origin, mid: measurementID} } // NewResolverSystem is a convenience factory for creating a // system resolver that saves measurements into a DB. -func NewResolverSystem(origin Origin, db EventDB, logger Logger) Resolver { - return WrapResolver(origin, db, netxlite.NewResolverStdlib(logger)) +func NewResolverSystem(measurementID int64, + origin Origin, db EventDB, logger Logger) Resolver { + return WrapResolver( + measurementID, origin, db, netxlite.NewResolverStdlib(logger)) } // NewResolverUDP is a convenience factory for creating a Resolver @@ -34,6 +37,8 @@ func NewResolverSystem(origin Origin, db EventDB, logger Logger) Resolver { // // Arguments: // +// - measurementID is the measurement ID; +// // - origin is OrigiProbe or OriginTH; // // - db is where to save events; @@ -41,12 +46,14 @@ func NewResolverSystem(origin Origin, db EventDB, logger Logger) Resolver { // - logger is the logger; // // - address is the resolver address (e.g., "1.1.1.1:53"). -func NewResolverUDP(origin Origin, db EventDB, logger Logger, address string) Resolver { - return WrapResolver(origin, db, &netxlite.ResolverLogger{ +func NewResolverUDP(measurementID int64, + origin Origin, db EventDB, logger Logger, address string) Resolver { + return WrapResolver(measurementID, origin, db, &netxlite.ResolverLogger{ Resolver: netxlite.WrapResolver(logger, dnsx.NewSerialResolver( - WrapDNSXRoundTripper(origin, db, dnsx.NewDNSOverUDP( + WrapDNSXRoundTripper(measurementID, origin, db, dnsx.NewDNSOverUDP( &netxliteDialerAdapter{ - NewDialerWithSystemResolver(origin, db, logger), + NewDialerWithSystemResolver( + measurementID, origin, db, logger), }, address, )))), @@ -57,6 +64,7 @@ func NewResolverUDP(origin Origin, db EventDB, logger Logger, address string) Re type resolverx struct { netxlite.Resolver db EventDB + mid int64 origin Origin } @@ -81,7 +89,7 @@ func (r *resolverx) LookupHost(ctx context.Context, domain string) ([]string, er finished := r.db.ElapsedTime() r.db.InsertIntoLookupHost(&LookupHostEvent{ Origin: r.origin, - MeasurementID: r.db.MeasurementID(), + MeasurementID: r.mid, Network: r.Resolver.Network(), Address: r.Resolver.Address(), Domain: domain, @@ -139,7 +147,7 @@ func (r *resolverx) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain strin finished := r.db.ElapsedTime() ev := &LookupHTTPSSvcEvent{ Origin: r.origin, - MeasurementID: r.db.MeasurementID(), + MeasurementID: r.mid, Network: r.Resolver.Network(), Address: r.Resolver.Address(), Domain: domain, diff --git a/internal/measurex/tls.go b/internal/measurex/tls.go index 953456b24b..a884164772 100644 --- a/internal/measurex/tls.go +++ b/internal/measurex/tls.go @@ -27,19 +27,28 @@ type TLSHandshaker interface { // WrapTLSHandshaker wraps a netxlite.TLSHandshaker to return a new // instance of TLSHandshaker that saves events into the DB. -func WrapTLSHandshaker(origin Origin, db EventDB, thx netxlite.TLSHandshaker) TLSHandshaker { - return &tlsHandshakerx{TLSHandshaker: thx, db: db, origin: origin} +func WrapTLSHandshaker(measurementID int64, + origin Origin, db EventDB, thx netxlite.TLSHandshaker) TLSHandshaker { + return &tlsHandshakerx{ + TLSHandshaker: thx, + db: db, + origin: origin, + mid: measurementID, + } } // NewTLSHandshakerStdlib creates a new TLS handshaker that // saves results into the DB and uses the stdlib for TLS. -func NewTLSHandshakerStdlib(origin Origin, db EventDB, logger Logger) TLSHandshaker { - return WrapTLSHandshaker(origin, db, netxlite.NewTLSHandshakerStdlib(logger)) +func NewTLSHandshakerStdlib(measurementID int64, + origin Origin, db EventDB, logger Logger) TLSHandshaker { + return WrapTLSHandshaker( + measurementID, origin, db, netxlite.NewTLSHandshakerStdlib(logger)) } type tlsHandshakerx struct { netxlite.TLSHandshaker db EventDB + mid int64 origin Origin } @@ -75,7 +84,7 @@ func (thx *tlsHandshakerx) Handshake(ctx context.Context, finished := thx.db.ElapsedTime() thx.db.InsertIntoTLSHandshake(&TLSHandshakeEvent{ Origin: thx.origin, - MeasurementID: thx.db.MeasurementID(), + MeasurementID: thx.mid, ConnID: conn.ConnID(), Engine: "", // TODO(bassosimone): add support Network: network, diff --git a/internal/measurex/wcth.go b/internal/measurex/wcth.go index 271faab85e..1eeaeeb650 100644 --- a/internal/measurex/wcth.go +++ b/internal/measurex/wcth.go @@ -18,9 +18,10 @@ import ( // WCTHWorker is the Web Connectivity test helper worker. type WCTHWorker struct { + clnt HTTPClient db EventDB logger Logger - clnt HTTPClient + mid int64 url string } @@ -29,6 +30,8 @@ type WCTHWorker struct { // // Arguments: // +// - measurementID is the measurement ID; +// // - logger is the logger to use; // // - db is the database to use; @@ -38,9 +41,15 @@ type WCTHWorker struct { // - URL is the WCTH service URL. // // All arguments are mandatory. -func NewWCTHWorker( +func NewWCTHWorker(measurementID int64, logger Logger, db EventDB, clnt HTTPClient, URL string) *WCTHWorker { - return &WCTHWorker{db: db, logger: logger, clnt: clnt, url: URL} + return &WCTHWorker{ + db: db, + logger: logger, + clnt: clnt, + url: URL, + mid: measurementID, + } } var errWCTHRequestFailed = errors.New("wcth: request failed") @@ -68,7 +77,7 @@ func (w *WCTHWorker) Run( func (w *WCTHWorker) parseResp(URL *url.URL, resp *WCTHResponse) { w.db.InsertIntoLookupHost(&LookupHostEvent{ Origin: OriginTH, - MeasurementID: w.db.MeasurementID(), + MeasurementID: w.mid, Network: "system", Address: "", Domain: URL.Hostname(), @@ -80,7 +89,7 @@ func (w *WCTHWorker) parseResp(URL *url.URL, resp *WCTHResponse) { for addr, status := range resp.TCPConnect { w.db.InsertIntoDial(&NetworkEvent{ Origin: OriginTH, - MeasurementID: w.db.MeasurementID(), + MeasurementID: w.mid, ConnID: 0, Operation: "connect", Network: "tcp", From d163f46bbe35ef54182a26281df61fb0057f00ae Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 19:20:06 +0200 Subject: [PATCH 18/53] also archival now is okay --- internal/measurex/archival.go | 479 ++++++++++++++++++++++++++++++++++ 1 file changed, 479 insertions(+) create mode 100644 internal/measurex/archival.go diff --git a/internal/measurex/archival.go b/internal/measurex/archival.go new file mode 100644 index 0000000000..b76c4e89ce --- /dev/null +++ b/internal/measurex/archival.go @@ -0,0 +1,479 @@ +package measurex + +import ( + "net/http" + "strings" + "unicode/utf8" +) + +// ArchivalURLMeasurement is the archival format for URLMeasurement. +type ArchivalURLMeasurement struct { + URL string `json:"url"` + CannotParseURL bool `json:"cannot_parse_url"` + DNS []*ArchivalMeasurement `json:"dns"` + TH []*ArchivalMeasurement `json:"th"` + CannotGenerateEndpoints bool `json:"cannot_generate_endpoints"` + Endpoints []*ArchivalMeasurement `json:"endpoints"` +} + +// NewArchivalURLMeasurement constructs a new instance +// of the ArchivalURLMeasurement type. +func NewArchivalURLMeasurement(in *URLMeasurement) (out *ArchivalURLMeasurement) { + return &ArchivalURLMeasurement{ + URL: in.URL, + CannotParseURL: in.CannotParseURL, + DNS: NewArchivalMeasurementList(in.DNS...), + TH: NewArchivalMeasurementList(in.TH...), + CannotGenerateEndpoints: in.CannotGenerateEndpoints, + Endpoints: NewArchivalMeasurementList(in.Endpoints...), + } +} + +// ArchivalMeasurement is the archival type for Measurement. +type ArchivalMeasurement struct { + Oddities []Oddity `json:"oddities"` + Connect []*ArchivalNetworkEvent `json:"connect,omitempty"` + ReadWrite []*ArchivalNetworkEvent `json:"read_write,omitempty"` + TLSHandshake []*ArchivalTLSQUICHandshake `json:"tls_handshake,omitempty"` + QUICHandshake []*ArchivalTLSQUICHandshake `json:"quic_handshake,omitempty"` + LookupHost []*ArchivalDNSLookup `json:"lookup_host,omitempty"` + LookupHTTPSSvc []*ArchivalDNSLookup `json:"lookup_httpssvc,omitempty"` + DNSRoundTrip []*ArchivalDNSRoundTrip `json:"dns_round_trip,omitempty"` + HTTPRoundTrip []*ArchivalHTTPRoundTrip `json:"http_round_trip,omitempty"` +} + +// NewArchivalMeasurement constructs a new instance +// of the ArchivalMeasurement type. +func NewArchivalMeasurement(in *Measurement) (out *ArchivalMeasurement) { + return &ArchivalMeasurement{ + Oddities: in.Oddities, + Connect: NewArchivalNetworkEventList(in.Connect...), + ReadWrite: NewArchivalNetworkEventList(in.ReadWrite...), + TLSHandshake: NewArchivalTLSHandshakeList(in.TLSHandshake...), + QUICHandshake: NewArchivalQUICHandshakeList(in.QUICHandshake...), + LookupHost: NewArchivalLookupHostList(in.LookupHost...), + LookupHTTPSSvc: NewArchivalLookupHTTPSSvcList(in.LookupHTTPSSvc...), + DNSRoundTrip: NewArchivalDNSRoundTripList(in.DNSRoundTrip...), + HTTPRoundTrip: NewArchivalHTTPRoundTripList(in.HTTPRoundTrip...), + } +} + +// NewArchivalMeasurementList takes in input a list of +// Measurement and builds a list of ArchivalMeasurement. +func NewArchivalMeasurementList(in ...*Measurement) (out []*ArchivalMeasurement) { + for _, m := range in { + out = append(out, NewArchivalMeasurement(m)) + } + return +} + +// ArchivalNetworkEvent is the data format we use +// to archive all the network events. +type ArchivalNetworkEvent struct { + // JSON names compatible with df-008-netevents + RemoteAddr string `json:"address"` + ConnID int64 `json:"conn_id"` + Error error `json:"failure"` + Count int `json:"num_bytes,omitempty"` + Operation string `json:"operation"` + Network string `json:"proto"` + Finished float64 `json:"t"` + + // JSON names that are not part of the spec + Origin Origin `json:"origin"` + Started float64 `json:"started"` + Oddity Oddity `json:"oddity"` +} + +// NewArchivalNetworkEvent takes in input a NetworkEvent +// and emits in output an ArchivalNetworkEvent. +func NewArchivalNetworkEvent(in *NetworkEvent) (out *ArchivalNetworkEvent) { + return &ArchivalNetworkEvent{ + RemoteAddr: in.RemoteAddr, + ConnID: in.ConnID, + Error: in.Error, + Count: in.Count, + Operation: in.Operation, + Network: in.Network, + Finished: in.Finished.Seconds(), + Origin: in.Origin, + Started: in.Started.Seconds(), + Oddity: in.Oddity, + } +} + +// NewArchivalNetworkEventList takes in input a list of +// NetworkEvent and builds a list of ArchivalNetworkEvent. +func NewArchivalNetworkEventList(in ...*NetworkEvent) (out []*ArchivalNetworkEvent) { + for _, ev := range in { + out = append(out, NewArchivalNetworkEvent(ev)) + } + return +} + +// ArchivalTLSQUICHandshake is the archival format for TLSHandshakeEvent +// as well as for QUICHandshakeEvent. +type ArchivalTLSQUICHandshake struct { + // JSON names compatible with df-006-tlshandshake + CipherSuite string `json:"cipher_suite"` + ConnID int64 `json:"conn_id"` + Error error `json:"failure"` + NegotiatedProto string `json:"negotiated_protocol"` + PeerCerts []*ArchivalBinaryData `json:"peer_certificates"` + Finished float64 `json:"t"` + TLSVersion string `json:"tls_version"` + + // JSON names that are not part of the spec + Origin Origin `json:"origin"` + Engine string `json:"engine"` + RemoteAddr string `json:"address"` + SNI string `json:"server_name"` // already used in prod + ALPN []string `json:"alpn"` + SkipVerify bool `json:"no_tls_verify"` // already used in prod + Started float64 `json:"started"` + Oddity Oddity `json:"oddity"` + Network string `json:"network"` +} + +// NewArchivalTLSHandshakeList takes in input a list of +// TLSHandshakeEvent and builds a list of ArchivalTLSQUICHandshake. +func NewArchivalTLSHandshakeList(in ...*TLSHandshakeEvent) (out []*ArchivalTLSQUICHandshake) { + for _, ev := range in { + out = append(out, NewArchivalTLSHandshake(ev)) + } + return +} + +// NewArchivalTLSHandshake converts a TLSHandshakeEvent to +// its corresponding archival format. +func NewArchivalTLSHandshake(in *TLSHandshakeEvent) (out *ArchivalTLSQUICHandshake) { + return &ArchivalTLSQUICHandshake{ + CipherSuite: in.CipherSuite, + ConnID: in.ConnID, + Error: in.Error, + NegotiatedProto: in.NegotiatedProto, + PeerCerts: NewArchivalTLSCert(in.PeerCerts), + Finished: in.Finished.Seconds(), + TLSVersion: in.TLSVersion, + Origin: in.Origin, + Engine: in.Engine, + RemoteAddr: in.RemoteAddr, + SNI: in.SNI, + ALPN: in.ALPN, + SkipVerify: in.SkipVerify, + Started: in.Started.Seconds(), + Oddity: in.Oddity, + Network: in.Network, + } +} + +// NewArchivalQUICHandshakeList takes in input a list of +// QUICHandshakeEvent and builds a list of ArchivalTLSQUICHandshake. +func NewArchivalQUICHandshakeList(in ...*QUICHandshakeEvent) (out []*ArchivalTLSQUICHandshake) { + for _, ev := range in { + out = append(out, NewArchivalQUICHandshake(ev)) + } + return +} + +// NewArchivalQUICHandshake converts a QUICHandshakeEvent to +// its corresponding archival format. +func NewArchivalQUICHandshake(in *QUICHandshakeEvent) (out *ArchivalTLSQUICHandshake) { + return &ArchivalTLSQUICHandshake{ + CipherSuite: in.CipherSuite, + ConnID: in.ConnID, + Error: in.Error, + NegotiatedProto: in.NegotiatedProto, + PeerCerts: NewArchivalTLSCert(in.PeerCerts), + Finished: in.Finished.Seconds(), + TLSVersion: in.TLSVersion, + Origin: in.Origin, + RemoteAddr: in.RemoteAddr, + SNI: in.SNI, + ALPN: in.ALPN, + SkipVerify: in.SkipVerify, + Started: in.Started.Seconds(), + Oddity: in.Oddity, + Network: in.Network, + } +} + +// ArchivalBinaryData is the archival format for binary data. +type ArchivalBinaryData struct { + Data []byte + Format string +} + +// NewArchivalTLSCertList builds a new []ArchivalBinaryData +// from a list of raw x509 certificates data. +func NewArchivalTLSCert(in [][]byte) (out []*ArchivalBinaryData) { + for _, cert := range in { + out = append(out, &ArchivalBinaryData{ + Data: cert, + Format: "base64", + }) + } + return +} + +// ArchivalDNSLookup is the archival format for DNS. +type ArchivalDNSLookup struct { + // JSON names compatible with df-002-dnst's spec + Answers []*ArchivalDNSAnswer `json:"answers"` + Network string `json:"engine"` + Error error `json:"failure"` + Domain string `json:"hostname"` + QueryType string `json:"query_type"` + Address string `json:"resolver_address"` + Finished float64 `json:"t"` + + // Names not part of the spec. + Started float64 `json:"started"` + Origin Origin `json:"origin"` + Oddity Oddity `json:"oddity"` +} + +// ArchivalDNSAnswer is an answer inside ArchivalDNS. +type ArchivalDNSAnswer struct { + // JSON names compatible with df-002-dnst's spec + Type string `json:"answer_type"` + IPv4 string `json:"ipv4,omitempty"` + IPv6 string `json:"ivp6,omitempty"` + + // Names not part of the spec. + ALPN string `json:"alpn,omitempty"` +} + +// NewArchivalLookupHost generates an ArchivalDNS entry for the given +// LookupHost event and for the given query type. (OONI's DNS data +// format splits A and AAAA queries, so we need to run this func twice.) +func NewArchivalLookupHost(in *LookupHostEvent, qtype string) (out *ArchivalDNSLookup) { + return &ArchivalDNSLookup{ + Answers: NewArchivalDNSAnswersLookupHost(in.Addrs, qtype), + Network: in.Network, + Error: in.Error, + Domain: in.Domain, + QueryType: qtype, + Address: in.Address, + Finished: in.Finished.Seconds(), + Started: in.Started.Seconds(), + Origin: in.Origin, + Oddity: in.Oddity, + } +} + +// NewArchivalDNSAnswersLookupHost builds the ArchivalDNSAnswer +// vector for a LookupHost operation and a given query type. +func NewArchivalDNSAnswersLookupHost(addrs []string, qtype string) (out []*ArchivalDNSAnswer) { + for _, addr := range addrs { + switch qtype { + case "A": + if !strings.Contains(addr, ":") { + out = append(out, &ArchivalDNSAnswer{ + Type: qtype, + IPv4: addr, + }) + } + case "AAAA": + if strings.Contains(addr, ":") { + out = append(out, &ArchivalDNSAnswer{ + Type: qtype, + IPv6: addr, + }) + } + } + } + return +} + +// NewArchivalLookupHostList converts a []*LookupHostEvent +// to the corresponding archival format. +func NewArchivalLookupHostList(in ...*LookupHostEvent) (out []*ArchivalDNSLookup) { + for _, ev := range in { + out = append(out, NewArchivalLookupHost(ev, "A")) + out = append(out, NewArchivalLookupHost(ev, "AAAA")) + } + return +} + +// NewArchivalLookupHTTPSSvc generates an ArchivalDNS entry for the given +// LookupHTTPSSvc event. +func NewArchivalLookupHTTPSSvc(in *LookupHTTPSSvcEvent) (out *ArchivalDNSLookup) { + return &ArchivalDNSLookup{ + Answers: NewArchivalDNSAnswersLookupHTTPSSvc(in), + Network: in.Network, + Error: in.Error, + Domain: in.Domain, + QueryType: "HTTPS", + Address: in.Address, + Finished: in.Finished.Seconds(), + Started: in.Started.Seconds(), + Origin: in.Origin, + Oddity: in.Oddity, + } +} + +// NewArchivalDNSAnswersLookupHTTPSSvc builds the ArchivalDNSAnswer +// vector for a LookupHTTPSSvc operation. +func NewArchivalDNSAnswersLookupHTTPSSvc(in *LookupHTTPSSvcEvent) (out []*ArchivalDNSAnswer) { + for _, addr := range in.IPv4 { + out = append(out, &ArchivalDNSAnswer{ + Type: "A", + IPv4: addr, + }) + } + for _, addr := range in.IPv6 { + out = append(out, &ArchivalDNSAnswer{ + Type: "AAAA", + IPv6: addr, + }) + } + for _, alpn := range in.ALPN { + out = append(out, &ArchivalDNSAnswer{ + Type: "ALPN", + ALPN: alpn, + }) + } + return +} + +// NewArchivalLookupHTTPSSvcList converts a []*LookupHTTPSSvcEvent +// to the corresponding archival format. +func NewArchivalLookupHTTPSSvcList(in ...*LookupHTTPSSvcEvent) (out []*ArchivalDNSLookup) { + for _, ev := range in { + out = append(out, NewArchivalLookupHTTPSSvc(ev)) + } + return +} + +// ArchivalDNSRoundTrip is the archival fromat for DNSRoundTripEvent. +type ArchivalDNSRoundTrip struct { + Origin Origin `json:"origin"` + Network string `json:"engine"` + Address string `json:"resolver_address"` + Query *ArchivalBinaryData `json:"raw_query"` + Started float64 `json:"started"` + Finished float64 `json:"t"` + Error error `json:"failure"` + Reply *ArchivalBinaryData `json:"raw_reply"` +} + +// NewArchivalBinaryData builds a new ArchivalBinaryData +// from an array of bytes. If the array is nil, we return nil. +func NewArchivalBinaryData(data []byte) (out *ArchivalBinaryData) { + if len(data) > 0 { + out = &ArchivalBinaryData{ + Data: data, + Format: "base64", + } + } + return +} + +// NewArchivalDNSRoundTrip converts a DNSRoundTripEvent +// to the corresponding archival format. +func NewArchivalDNSRoundTrip(in *DNSRoundTripEvent) (out *ArchivalDNSRoundTrip) { + return &ArchivalDNSRoundTrip{ + Origin: in.Origin, + Network: in.Network, + Address: in.Address, + Query: NewArchivalBinaryData(in.Query), + Started: in.Started.Seconds(), + Finished: in.Finished.Seconds(), + Error: in.Error, + Reply: NewArchivalBinaryData(in.Reply), + } +} + +// NewArchivalDNSRoundTripList converts a []*DNSRoundTripEvent +// to the corresponding archival format. +func NewArchivalDNSRoundTripList(in ...*DNSRoundTripEvent) (out []*ArchivalDNSRoundTrip) { + for _, ev := range in { + out = append(out, NewArchivalDNSRoundTrip(ev)) + } + return +} + +// ArchivalHTTPRoundTrip is the archival format for HTTPRoundTripEvent. +type ArchivalHTTPRoundTrip struct { + // JSON names following the df-001-httpt data format. + Error error `json:"failure"` + Request *ArchivalHTTPRequest `json:"request"` + Response *ArchivalHTTPResponse `json:"response"` + Finished float64 `json:"t"` + ConnID int64 `json:"conn_id"` + Started float64 `json:"started"` + + // Names not in the specification + Origin Origin `json:"origin"` + Oddity Oddity `json:"oddity"` +} + +// ArchivalHTTPRequest is the archival representation of a request. +type ArchivalHTTPRequest struct { + Method string `json:"method"` + URL string `json:"url"` + HeadersList [][]string `json:"headers_list"` +} + +// ArchivalHTTPResponse is the archival representation of a response. +type ArchivalHTTPResponse struct { + Code int64 `json:"code"` + HeadersList [][]string `json:"headers_list"` + Body interface{} `json:"body"` + BodyIsTruncated bool `json:"body_is_truncated"` +} + +// NewArchivalHTTPRoundTrip converts an HTTPRoundTripEvent +// to the corresponding archival format. +func NewArchivalHTTPRoundTrip(in *HTTPRoundTripEvent) (out *ArchivalHTTPRoundTrip) { + return &ArchivalHTTPRoundTrip{ + Error: in.Error, + Request: &ArchivalHTTPRequest{ + Method: in.RequestMethod, + URL: in.RequestURL.String(), + HeadersList: NewArchivalHeadersList(in.RequestHeader), + }, + Response: &ArchivalHTTPResponse{ + Code: int64(in.ResponseStatus), + HeadersList: NewArchivalHeadersList(in.ResponseHeader), + Body: NewArchivalHTTPBody(in.ResponseBodySnapshot), + BodyIsTruncated: int64(len(in.ResponseBodySnapshot)) >= in.MaxBodySnapshotSize, + }, + Finished: in.Finished.Seconds(), + ConnID: in.ConnID, + Started: in.Started.Seconds(), + Origin: in.Origin, + Oddity: in.Oddity, + } +} + +// NewArchivalHTTPBody builds a new HTTP body for archival from the body. +func NewArchivalHTTPBody(body []byte) interface{} { + if utf8.Valid(body) { + return string(body) + } + return &ArchivalBinaryData{ + Data: body, + Format: "base64", + } +} + +// NewArchivalHeadersList builds a new HeadersList from http.Header. +func NewArchivalHeadersList(in http.Header) (out [][]string) { + for k, vv := range in { + for _, v := range vv { + out = append(out, []string{k, v}) + } + } + return +} + +// NewArchivalHTTPRoundTripList converts a []*HTTPRoundTripEvent +// to the corresponding archival format. +func NewArchivalHTTPRoundTripList(in ...*HTTPRoundTripEvent) (out []*ArchivalHTTPRoundTrip) { + for _, ev := range in { + out = append(out, NewArchivalHTTPRoundTrip(ev)) + } + return +} From 57d9f5dc45f8b4b36274af78e216f36cdb8c43ff Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 19:39:13 +0200 Subject: [PATCH 19/53] start to commit the tutorials --- internal/tutorial/measurex/chapter01/main.go | 25 ++++++++++ internal/tutorial/measurex/chapter02/main.go | 25 ++++++++++ internal/tutorial/measurex/chapter03/main.go | 26 ++++++++++ internal/tutorial/measurex/chapter04/main.go | 32 ++++++++++++ internal/tutorial/measurex/chapter05/main.go | 32 ++++++++++++ internal/tutorial/measurex/chapter06/main.go | 51 ++++++++++++++++++++ internal/tutorial/measurex/chapter07/main.go | 41 ++++++++++++++++ internal/tutorial/measurex/chapter08/main.go | 42 ++++++++++++++++ internal/tutorial/measurex/chapter09/main.go | 42 ++++++++++++++++ internal/tutorial/measurex/chapter10/main.go | 43 +++++++++++++++++ internal/tutorial/measurex/chapter11/main.go | 48 ++++++++++++++++++ internal/tutorial/measurex/chapter12/main.go | 28 +++++++++++ internal/tutorial/measurex/chapter13/main.go | 35 ++++++++++++++ internal/tutorial/measurex/chapter14/main.go | 35 ++++++++++++++ 14 files changed, 505 insertions(+) create mode 100644 internal/tutorial/measurex/chapter01/main.go create mode 100644 internal/tutorial/measurex/chapter02/main.go create mode 100644 internal/tutorial/measurex/chapter03/main.go create mode 100644 internal/tutorial/measurex/chapter04/main.go create mode 100644 internal/tutorial/measurex/chapter05/main.go create mode 100644 internal/tutorial/measurex/chapter06/main.go create mode 100644 internal/tutorial/measurex/chapter07/main.go create mode 100644 internal/tutorial/measurex/chapter08/main.go create mode 100644 internal/tutorial/measurex/chapter09/main.go create mode 100644 internal/tutorial/measurex/chapter10/main.go create mode 100644 internal/tutorial/measurex/chapter11/main.go create mode 100644 internal/tutorial/measurex/chapter12/main.go create mode 100644 internal/tutorial/measurex/chapter13/main.go create mode 100644 internal/tutorial/measurex/chapter14/main.go diff --git a/internal/tutorial/measurex/chapter01/main.go b/internal/tutorial/measurex/chapter01/main.go new file mode 100644 index 0000000000..cf25b9162d --- /dev/null +++ b/internal/tutorial/measurex/chapter01/main.go @@ -0,0 +1,25 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { + domain := flag.String("domain", "example.com", "domain to resolve") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() + m := mx.LookupHostSystem(ctx, *domain) + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter02/main.go b/internal/tutorial/measurex/chapter02/main.go new file mode 100644 index 0000000000..3fea2f7f55 --- /dev/null +++ b/internal/tutorial/measurex/chapter02/main.go @@ -0,0 +1,25 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { + address := flag.String("address", "8.8.4.4:443", "remote endpoint address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() + m := mx.TCPConnect(ctx, *address) + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter03/main.go b/internal/tutorial/measurex/chapter03/main.go new file mode 100644 index 0000000000..c660fc8783 --- /dev/null +++ b/internal/tutorial/measurex/chapter03/main.go @@ -0,0 +1,26 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { + query := flag.String("query", "example.com", "domain to resolver") + address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() + m := mx.LookupHostUDP(ctx, *query, *address) + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter04/main.go b/internal/tutorial/measurex/chapter04/main.go new file mode 100644 index 0000000000..8a19f643b5 --- /dev/null +++ b/internal/tutorial/measurex/chapter04/main.go @@ -0,0 +1,32 @@ +package main + +import ( + "context" + "crypto/tls" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { + sni := flag.String("sni", "dns.google", "domain to resolver") + address := flag.String("address", "8.8.4.4:443", "remote endpoint address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() + m := mx.TLSConnectAndHandshake(ctx, *address, &tls.Config{ + ServerName: *sni, + NextProtos: []string{"h2", "http/1.1"}, + RootCAs: netxlite.NewDefaultCertPool(), + }) + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter05/main.go b/internal/tutorial/measurex/chapter05/main.go new file mode 100644 index 0000000000..9ab67606fc --- /dev/null +++ b/internal/tutorial/measurex/chapter05/main.go @@ -0,0 +1,32 @@ +package main + +import ( + "context" + "crypto/tls" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { + sni := flag.String("sni", "dns.google", "value for SNI extension") + address := flag.String("address", "8.8.4.4:443", "remote endpoint address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() + m := mx.QUICHandshake(ctx, *address, &tls.Config{ + ServerName: *sni, + NextProtos: []string{"h3"}, + RootCAs: netxlite.NewDefaultCertPool(), + }) + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter06/main.go b/internal/tutorial/measurex/chapter06/main.go new file mode 100644 index 0000000000..04d8e90211 --- /dev/null +++ b/internal/tutorial/measurex/chapter06/main.go @@ -0,0 +1,51 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/netxlite/iox" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { + sni := flag.String("sni", "dns.google", "value for SNI extension") + address := flag.String("address", "8.8.4.4:443", "remote endpoint address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() + epnt := &measurex.HTTPEndpoint{ + Domain: *sni, + Network: "tcp", + Address: *address, + SNI: *sni, + ALPN: []string{"h2", "http/1.1"}, + URL: &url.URL{ + Scheme: "https", + Host: *sni, + Path: "/", + }, + Header: measurex.NewHTTPRequestHeaderForMeasuring(), + } + cookies := measurex.NewCookieJar() + prep := mx.HTTPEndpointPrepareGet(ctx, epnt, cookies) + m := prep.Measurement() + resp, err := prep.Resume() + if err == nil { + data, err := iox.ReadAllContext(ctx, resp.Body) + if err == nil { + fmt.Printf("{\"full body size\": %d}\n", len(data)) + } + resp.Body.Close() + } + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter07/main.go b/internal/tutorial/measurex/chapter07/main.go new file mode 100644 index 0000000000..379332c7d1 --- /dev/null +++ b/internal/tutorial/measurex/chapter07/main.go @@ -0,0 +1,41 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + DNS *measurex.Measurement + Endpoints []*measurex.Measurement +} + +func main() { + URL := flag.String("url", "https://google.com/", "URL to fetch") + address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + parsed, err := url.Parse(*URL) + runtimex.PanicOnError(err, "url.Parse failed") + mx := measurex.NewMeasurerWithDefaultSettings() + m := &measurement{} + m.DNS = mx.LookupHostUDP(ctx, parsed.Hostname(), *address) + httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") + cookies := measurex.NewCookieJar() + for _, epnt := range httpEndpoints { + m.Endpoints = append(m.Endpoints, mx.HTTPEndpointGet(ctx, epnt, cookies)) + } + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter08/main.go b/internal/tutorial/measurex/chapter08/main.go new file mode 100644 index 0000000000..6ff73a7242 --- /dev/null +++ b/internal/tutorial/measurex/chapter08/main.go @@ -0,0 +1,42 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + DNS []*measurex.Measurement + Endpoints []*measurex.Measurement +} + +func main() { + URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + parsed, err := url.Parse(*URL) + runtimex.PanicOnError(err, "url.Parse failed") + mx := measurex.NewMeasurerWithDefaultSettings() + m := &measurement{} + m.DNS = append(m.DNS, mx.LookupHostUDP(ctx, parsed.Hostname(), *address)) + m.DNS = append(m.DNS, mx.LookupHTTPSSvcUDP(ctx, parsed.Hostname(), *address)) + httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") + cookies := measurex.NewCookieJar() + for _, epnt := range httpEndpoints { + m.Endpoints = append(m.Endpoints, mx.HTTPEndpointGet(ctx, epnt, cookies)) + } + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter09/main.go b/internal/tutorial/measurex/chapter09/main.go new file mode 100644 index 0000000000..cf47314f62 --- /dev/null +++ b/internal/tutorial/measurex/chapter09/main.go @@ -0,0 +1,42 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + DNS []*measurex.Measurement + Endpoints []*measurex.Measurement +} + +func main() { + URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + parsed, err := url.Parse(*URL) + runtimex.PanicOnError(err, "url.Parse failed") + mx := measurex.NewMeasurerWithDefaultSettings() + m := &measurement{} + m.DNS = append(m.DNS, mx.LookupHostUDP(ctx, parsed.Hostname(), *address)) + m.DNS = append(m.DNS, mx.LookupHTTPSSvcUDP(ctx, parsed.Hostname(), *address)) + httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") + cookies := measurex.NewCookieJar() + for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, httpEndpoints...) { + m.Endpoints = append(m.Endpoints, epnt) + } + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter10/main.go b/internal/tutorial/measurex/chapter10/main.go new file mode 100644 index 0000000000..b15c2d2109 --- /dev/null +++ b/internal/tutorial/measurex/chapter10/main.go @@ -0,0 +1,43 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + DNS []*measurex.Measurement + Endpoints []*measurex.Measurement +} + +func main() { + URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + parsed, err := url.Parse(*URL) + runtimex.PanicOnError(err, "url.Parse failed") + mx := measurex.NewMeasurerWithDefaultSettings() + m := &measurement{} + mx.RegisterUDPResolvers("8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53") + for dns := range mx.LookupURLHostParallel(ctx, parsed) { + m.DNS = append(m.DNS, dns) + } + httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") + cookies := measurex.NewCookieJar() + for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, httpEndpoints...) { + m.Endpoints = append(m.Endpoints, epnt) + } + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter11/main.go b/internal/tutorial/measurex/chapter11/main.go new file mode 100644 index 0000000000..9a81114954 --- /dev/null +++ b/internal/tutorial/measurex/chapter11/main.go @@ -0,0 +1,48 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + DNS []*measurex.Measurement + TH []*measurex.Measurement + Endpoints []*measurex.Measurement +} + +func main() { + URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + parsed, err := url.Parse(*URL) + runtimex.PanicOnError(err, "url.Parse failed") + mx := measurex.NewMeasurerWithDefaultSettings() + m := &measurement{} + mx.RegisterUDPResolvers("8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53") + for dns := range mx.LookupURLHostParallel(ctx, parsed) { + m.DNS = append(m.DNS, dns) + } + mx.RegisterWCTH("https://wcth.ooni.io/") + for th := range mx.QueryTestHelperParallel(ctx, parsed) { + m.TH = append(m.TH, th) + } + httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") + cookies := measurex.NewCookieJar() + for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, httpEndpoints...) { + m.Endpoints = append(m.Endpoints, epnt) + } + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter12/main.go b/internal/tutorial/measurex/chapter12/main.go new file mode 100644 index 0000000000..352e5106c9 --- /dev/null +++ b/internal/tutorial/measurex/chapter12/main.go @@ -0,0 +1,28 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { + URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() + mx.RegisterWCTH("https://wcth.ooni.io/") + mx.RegisterUDPResolvers("8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53") + cookies := measurex.NewCookieJar() + m := mx.MeasureURL(ctx, *URL, cookies) + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter13/main.go b/internal/tutorial/measurex/chapter13/main.go new file mode 100644 index 0000000000..bb28c2164c --- /dev/null +++ b/internal/tutorial/measurex/chapter13/main.go @@ -0,0 +1,35 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + URLs []*measurex.URLMeasurement +} + +func main() { + URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() + mx.RegisterWCTH("https://wcth.ooni.io/") + mx.RegisterUDPResolvers("8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53") + cookies := measurex.NewCookieJar() + all := &measurement{} + for m := range mx.MeasureHTTPURLAndFollowRedirections(ctx, *URL, cookies) { + all.URLs = append(all.URLs, m) + } + data, err := json.Marshal(all) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} diff --git a/internal/tutorial/measurex/chapter14/main.go b/internal/tutorial/measurex/chapter14/main.go new file mode 100644 index 0000000000..d3101169b3 --- /dev/null +++ b/internal/tutorial/measurex/chapter14/main.go @@ -0,0 +1,35 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + URLs []*measurex.ArchivalURLMeasurement `json:"urls"` +} + +func main() { + URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() + mx.RegisterWCTH("https://wcth.ooni.io/") + mx.RegisterUDPResolvers("8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53") + cookies := measurex.NewCookieJar() + all := &measurement{} + for m := range mx.MeasureHTTPURLAndFollowRedirections(ctx, *URL, cookies) { + all.URLs = append(all.URLs, measurex.NewArchivalURLMeasurement(m)) + } + data, err := json.Marshal(all) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} From 91b0ae3bb4b6bd92c47f0524bc4371792bd43186 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 19:40:16 +0200 Subject: [PATCH 20/53] ignore jafar binary --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index ce15ab4164..af5a368a29 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ /*.deb /debops-ci .DS_Store +/jafar +/jafar.exe /*.jsonl /miniooni /miniooni.exe From 6dd602ac7b4f3f735f301364dc9ecb1aae986551 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 21:28:34 +0200 Subject: [PATCH 21/53] teach miniooni to run websteps --- internal/cmd/miniooni/libminiooni.go | 9 +- internal/engine/allexperiments.go | 8 +- internal/engine/experiment.go | 96 ++++++- .../engine/experiment/webstepsx/webstepsx.go | 252 +++++++++--------- internal/engine/inputprocessor.go | 44 +-- internal/engine/model/experiment.go | 38 +++ 6 files changed, 279 insertions(+), 168 deletions(-) diff --git a/internal/cmd/miniooni/libminiooni.go b/internal/cmd/miniooni/libminiooni.go index 9f7213a660..387f917fa4 100644 --- a/internal/cmd/miniooni/libminiooni.go +++ b/internal/cmd/miniooni/libminiooni.go @@ -460,15 +460,12 @@ type experimentWrapper struct { total int } -func (ew *experimentWrapper) MeasureWithContext( - ctx context.Context, idx int, input string) (*model.Measurement, error) { +func (ew *experimentWrapper) MeasureAsync( + ctx context.Context, input string, idx int) (<-chan *model.Measurement, error) { if input != "" { log.Infof("[%d/%d] running with input: %s", idx+1, ew.total, input) } - measurement, err := ew.child.MeasureWithContext(ctx, idx, input) - warnOnError(err, "measurement failed") - // policy: we do not stop the loop if the measurement fails - return measurement, nil + return ew.child.MeasureAsync(ctx, input, idx) } type submitterWrapper struct { diff --git a/internal/engine/allexperiments.go b/internal/engine/allexperiments.go index d587d6e4d6..dd12d3d6cf 100644 --- a/internal/engine/allexperiments.go +++ b/internal/engine/allexperiments.go @@ -23,7 +23,7 @@ import ( "github.com/ooni/probe-cli/v3/internal/engine/experiment/torsf" "github.com/ooni/probe-cli/v3/internal/engine/experiment/urlgetter" "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity" - "github.com/ooni/probe-cli/v3/internal/engine/experiment/websteps" + "github.com/ooni/probe-cli/v3/internal/engine/experiment/webstepsx" "github.com/ooni/probe-cli/v3/internal/engine/experiment/whatsapp" ) @@ -330,11 +330,11 @@ var experimentsByName = map[string]func(*Session) *ExperimentBuilder{ "websteps": func(session *Session) *ExperimentBuilder { return &ExperimentBuilder{ build: func(config interface{}) *Experiment { - return NewExperiment(session, websteps.NewExperimentMeasurer( - *config.(*websteps.Config), + return NewExperiment(session, webstepsx.NewExperimentMeasurer( + *config.(*webstepsx.Config), )) }, - config: &websteps.Config{}, + config: &webstepsx.Config{}, inputPolicy: InputOrQueryBackend, } }, diff --git a/internal/engine/experiment.go b/internal/engine/experiment.go index acf7150e2e..f28e52384b 100644 --- a/internal/engine/experiment.go +++ b/internal/engine/experiment.go @@ -92,28 +92,100 @@ func (e *Experiment) ReportID() string { // Measure performs a measurement with input. We assume that you have // configured the available test helpers, either manually or by calling // the session's MaybeLookupBackends() method. +// +// Deprecated: This function will return just the first measurement +// returned by the experiments that implement the model.ExperimentRunnerAsync +// interface. All the other measurements will be lost. To get all the +// measurements returned by such experiments, use MeasureAsync. func (e *Experiment) Measure(input string) (*model.Measurement, error) { return e.MeasureWithContext(context.Background(), input) } +// experimentAsyncWrapper makes a sync experiment behave like it was async +type experimentAsyncWrapper struct { + *Experiment +} + +var _ model.ExperimentMeasurerAsync = &experimentAsyncWrapper{} + +// RunAsync implements ExperimentMeasurerAsync.RunAsync. +func (eaw *experimentAsyncWrapper) RunAsync( + ctx context.Context, sess model.ExperimentSession, input string, + callbacks model.ExperimentCallbacks) (<-chan *model.ExperimentAsyncTestKeys, error) { + out := make(chan *model.ExperimentAsyncTestKeys) + measurement := eaw.newMeasurement(input) + start := time.Now() + err := eaw.measurer.Run(ctx, eaw.session, measurement, eaw.callbacks) + stop := time.Now() + if err != nil { + return nil, err + } + go func() { + out <- &model.ExperimentAsyncTestKeys{ + Extensions: measurement.Extensions, + MeasurementRuntime: stop.Sub(start).Seconds(), + TestKeys: measurement.TestKeys, + } + close(out) + }() + return out, nil +} + +func (e *Experiment) MeasureAsync( + ctx context.Context, input string) (<-chan *model.Measurement, error) { + err := e.session.MaybeLookupLocationContext(ctx) // this already tracks session bytes + if err != nil { + return nil, err + } + ctx = dialer.WithSessionByteCounter(ctx, e.session.byteCounter) + ctx = dialer.WithExperimentByteCounter(ctx, e.byteCounter) + var async model.ExperimentMeasurerAsync + if v, okay := e.measurer.(model.ExperimentMeasurerAsync); okay { + async = v + } else { + async = &experimentAsyncWrapper{e} + } + in, err := async.RunAsync(ctx, e.session, input, e.callbacks) + if err != nil { + return nil, err + } + out := make(chan *model.Measurement) + go func() { + defer close(out) + for tk := range in { + measurement := e.newMeasurement(input) + measurement.Extensions = tk.Extensions + measurement.MeasurementRuntime = tk.MeasurementRuntime + measurement.TestKeys = tk.TestKeys + if err := measurement.Scrub(e.session.ProbeIP()); err != nil { + continue + } + out <- measurement + } + }() + return out, nil +} + // MeasureWithContext is like Measure but with context. +// +// Deprecated: This function will return just the first measurement +// returned by the experiments that implement the model.ExperimentRunnerAsync +// interface. All the other measurements will be lost. To get all the +// measurements returned by such experiments, use MeasureAsync. func (e *Experiment) MeasureWithContext( ctx context.Context, input string, ) (measurement *model.Measurement, err error) { - err = e.session.MaybeLookupLocationContext(ctx) // this already tracks session bytes + out, err := e.MeasureAsync(ctx, input) if err != nil { - return + return nil, err } - ctx = dialer.WithSessionByteCounter(ctx, e.session.byteCounter) - ctx = dialer.WithExperimentByteCounter(ctx, e.byteCounter) - measurement = e.newMeasurement(input) - start := time.Now() - err = e.measurer.Run(ctx, e.session, measurement, e.callbacks) - stop := time.Now() - measurement.MeasurementRuntime = stop.Sub(start).Seconds() - scrubErr := measurement.Scrub(e.session.ProbeIP()) - if err == nil { - err = scrubErr + for m := range out { + if measurement == nil { + measurement = m // as documented + } + } + if measurement == nil { + err = errors.New("experiment returned no measurements") } return } diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/webstepsx.go index 39b7b5eef4..46c635a5aa 100644 --- a/internal/engine/experiment/webstepsx/webstepsx.go +++ b/internal/engine/experiment/webstepsx/webstepsx.go @@ -1,158 +1,154 @@ // Package webstepsx contains a websteps implementation // based on the internal/measurex package. +// +// This implementation does not follow any existing spec +// rather we are modeling the spec on this one. package webstepsx import ( "context" - "net/http" + "errors" "net/url" + "time" + "github.com/ooni/probe-cli/v3/internal/engine/model" + "github.com/ooni/probe-cli/v3/internal/engine/netx/archival" "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +const ( + testName = "websteps" + testVersion = "0.0.2" ) -// SingleStep contains the results of a single web step. -type SingleStep struct { - // URL is the URL this measurement refers to. - URL string `json:"url"` +// Config contains the experiment config. +type Config struct{} + +// TestKeys contains the experiment's test keys. +type TestKeys struct { + *measurex.ArchivalURLMeasurement +} + +// Measurer performs the measurement. +type Measurer struct { + Config Config +} - // Oddities contains all the oddities of all endpoints. - Oddities []measurex.Oddity `json:"oddities"` +var ( + _ model.ExperimentMeasurer = &Measurer{} + _ model.ExperimentMeasurerAsync = &Measurer{} +) - // DNS contains all the DNS measurements. - DNS []*measurex.Measurement `json:"dns"` +// NewExperimentMeasurer creates a new ExperimentMeasurer. +func NewExperimentMeasurer(config Config) model.ExperimentMeasurer { + return &Measurer{Config: config} +} - // Control contains all the control measurements. - Control []*measurex.Measurement `json:"control"` +// ExperimentName implements ExperimentMeasurer.ExperExperimentName. +func (mx *Measurer) ExperimentName() string { + return testName +} - // Endpoints contains a measurement for each endpoints (which - // may be empty if DNS lookup failed). - Endpoints []*measurex.Measurement `json:"endpoints"` +// ExperimentVersion implements ExperimentMeasurer.ExperExperimentVersion. +func (mx *Measurer) ExperimentVersion() string { + return testVersion } -// computeOddities computes the Oddities field my merging all -// the oddities appearing in the Endpoints list. -func (ss *SingleStep) computeOddities() { - unique := make(map[measurex.Oddity]bool) - for _, entry := range ss.DNS { - for _, oddity := range entry.Oddities { - unique[oddity] = true - } +var ( + // ErrNoAvailableTestHelpers is emitted when there are no available test helpers. + ErrNoAvailableTestHelpers = errors.New("no available helpers") + + // ErrNoInput indicates that no input was provided. + ErrNoInput = errors.New("no input provided") + + // ErrInputIsNotAnURL indicates that the input is not an URL. + ErrInputIsNotAnURL = errors.New("input is not an URL") + + // ErrUnsupportedInput indicates that the input URL scheme is unsupported. + ErrUnsupportedInput = errors.New("unsupported input scheme") +) + +// RunAsync implements ExperimentMeasurerAsync.RunAsync. +func (mx *Measurer) RunAsync( + ctx context.Context, sess model.ExperimentSession, input string, + callbacks model.ExperimentCallbacks) (<-chan *model.ExperimentAsyncTestKeys, error) { + // 1. Parse and verify URL + URL, err := url.Parse(input) + if err != nil { + return nil, ErrInputIsNotAnURL } - for _, entry := range ss.Endpoints { - for _, oddity := range entry.Oddities { - unique[oddity] = true - } + if URL.Scheme != "http" && URL.Scheme != "https" { + return nil, ErrUnsupportedInput } - for oddity := range unique { - if oddity != "" { - ss.Oddities = append(ss.Oddities, oddity) + // 2. Find the testhelper + testhelpers, _ := sess.GetTestHelpersByName("web-connectivity") + var testhelper *model.Service + for _, th := range testhelpers { + if th.Type == "https" { + testhelper = &th + break } } + if testhelper == nil { + return nil, ErrNoAvailableTestHelpers + } + out := make(chan *model.ExperimentAsyncTestKeys) + go mx.runAsync(ctx, sess, input, testhelper, out) + return out, nil } -// URLMeasurer measures a single URL. -// -// Make sure you fill the fields marked as MANDATORY. -type URLMeasurer struct { - // DNSResolverUDP is the MANDATORY address of an DNS - // over UDP resolver (e.g., "8.8.4.4.:53"). - DNSResolverUDP string - - // Mx is the MANDATORY measurex.Measurer. - Mx *measurex.Measurer +func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, + URL string, th *model.Service, out chan<- *model.ExperimentAsyncTestKeys) { + defer close(out) + begin := time.Now() + db := measurex.NewDB(begin) + mmx := &measurex.Measurer{ + DB: db, + HTTPClient: sess.DefaultHTTPClient(), + Logger: sess.Logger(), + Origin: measurex.OriginProbe, + TLSHandshaker: netxlite.NewTLSHandshakerStdlib(sess.Logger()), + } + mmx.RegisterUDPResolvers("8.8.4.4:53", "8.8.8.8:53", "1.1.1.1:53", "1.0.0.1:53") + mmx.RegisterWCTH(th.Address) + cookies := measurex.NewCookieJar() + in := mmx.MeasureHTTPURLAndFollowRedirections(ctx, URL, cookies) + for m := range in { + out <- &model.ExperimentAsyncTestKeys{ + MeasurementRuntime: time.Since(begin).Seconds(), + TestKeys: &TestKeys{ + measurex.NewArchivalURLMeasurement(m), + }, + Extensions: map[string]int64{ + archival.ExtHTTP.Name: archival.ExtHTTP.V, + archival.ExtDNS.Name: archival.ExtDNS.V, + archival.ExtNetevents.Name: archival.ExtNetevents.V, + archival.ExtTCPConnect.Name: archival.ExtTCPConnect.V, + archival.ExtTLSHandshake.Name: archival.ExtTLSHandshake.V, + }, + } + } +} - // URL is the MANDATORY URL to measure. - URL *url.URL +// Run implements ExperimentMeasurer.Run. +func (mx *Measurer) Run(ctx context.Context, sess model.ExperimentSession, + measurement *model.Measurement, callbacks model.ExperimentCallbacks) error { + return errors.New("sync run is not implemented") } -// Run performs all the WebSteps step. -// -// We define "step" as the process by which we have an input URL -// and we perform the following operations: -// -// 1. lookup of all the possible endpoints for the URL; -// -// 2. measurement of each available endpoint. -// -// After a step has run, we search for all the redirection URLs -// and we run a new step with the new URLs. +// SummaryKeys contains summary keys for this experiment. // -// Return value: -// -// A list of SingleStep structures where the Endpoints array may be empty -// if we have no been able to discover endpoints. -func (um *URLMeasurer) Run(ctx context.Context) (v []*SingleStep) { - jar := measurex.NewCookieJar() - inputs := []*url.URL{um.URL} -Loop: - for len(inputs) > 0 { - dups := make(map[string]*url.URL) - for _, input := range inputs { - select { - case <-ctx.Done(): - break Loop - default: - um.Mx.Infof("RunSingleStep url=%s dnsResolverUDP=%s jar=%+v", - input, um.DNSResolverUDP, jar) - m := um.RunSingleStep(ctx, jar, input) - v = append(v, m) - for _, epnt := range m.Endpoints { - for _, redir := range epnt.HTTPRedirect { - dups[redir.Location.String()] = redir.Location - } - } - } - } - inputs = nil - for _, input := range dups { - um.Mx.Infof("newRedirection %s", input) - inputs = append(inputs, input) - } - } - return +// Note that this structure is part of the ABI contract with probe-cli +// therefore we should be careful when changing it. +type SummaryKeys struct { + Accessible bool `json:"accessible"` + Blocking string `json:"blocking"` + IsAnomaly bool `json:"-"` } -// RunSingleStep performs a single WebSteps step. -// -// This function DOES NOT automatically follow redirections. -// -// Arguments: -// -// - ctx is the context to implement timeouts; -// -// - cookiejar is the http.CookieJar for cookies; -// -// - URL is the URL to measure. -// -// Return value: -// -// A SingleStep structure where the Endpoints array may be empty -// if we have no been able to discover endpoints. -func (um *URLMeasurer) RunSingleStep(ctx context.Context, - cookiekar http.CookieJar, URL *url.URL) (m *SingleStep) { - m = &SingleStep{URL: URL.String()} - defer m.computeOddities() - port, err := measurex.PortFromURL(URL) - if err != nil { - return - } - switch URL.Scheme { - case "https": - m.DNS = append(m.DNS, um.Mx.LookupHTTPSSvcUDP( - ctx, URL.Hostname(), um.DNSResolverUDP)) - default: - // nothing to do - } - m.DNS = append(m.DNS, um.Mx.LookupHostSystem(ctx, URL.Hostname())) - m.DNS = append(m.DNS, um.Mx.LookupHostUDP(ctx, URL.Hostname(), um.DNSResolverUDP)) - endpoints := um.Mx.DB.SelectAllEndpointsForDomain(URL.Hostname(), port) - m.Control = append(m.Control, um.Mx.LookupWCTH(ctx, URL, endpoints, port)) - httpEndpoints, err := um.Mx.DB.SelectAllHTTPEndpointsForURL(URL) - if err != nil { - return - } - for _, epnt := range httpEndpoints { - m.Endpoints = append(m.Endpoints, um.Mx.HTTPEndpointGet(ctx, epnt, cookiekar)) - } - return +// GetSummaryKeys implements model.ExperimentMeasurer.GetSummaryKeys. +func (mx *Measurer) GetSummaryKeys(measurement *model.Measurement) (interface{}, error) { + sk := SummaryKeys{} + return sk, nil } diff --git a/internal/engine/inputprocessor.go b/internal/engine/inputprocessor.go index c828cacfd0..b9e5651429 100644 --- a/internal/engine/inputprocessor.go +++ b/internal/engine/inputprocessor.go @@ -10,15 +10,15 @@ import ( // InputProcessorExperiment is the Experiment // according to InputProcessor. type InputProcessorExperiment interface { - MeasureWithContext( - ctx context.Context, input string) (*model.Measurement, error) + MeasureAsync( + ctx context.Context, input string) (<-chan *model.Measurement, error) } // InputProcessorExperimentWrapper is a wrapper for an // Experiment that also allow to pass around the input index. type InputProcessorExperimentWrapper interface { - MeasureWithContext( - ctx context.Context, idx int, input string) (*model.Measurement, error) + MeasureAsync( + ctx context.Context, input string, idx int) (<-chan *model.Measurement, error) } // NewInputProcessorExperimentWrapper creates a new @@ -32,9 +32,9 @@ type inputProcessorExperimentWrapper struct { exp InputProcessorExperiment } -func (ipew inputProcessorExperimentWrapper) MeasureWithContext( - ctx context.Context, idx int, input string) (*model.Measurement, error) { - return ipew.exp.MeasureWithContext(ctx, input) +func (ipew inputProcessorExperimentWrapper) MeasureAsync( + ctx context.Context, input string, idx int) (<-chan *model.Measurement, error) { + return ipew.exp.MeasureAsync(ctx, input) } var _ InputProcessorExperimentWrapper = inputProcessorExperimentWrapper{} @@ -142,21 +142,29 @@ func (ip *InputProcessor) run(ctx context.Context) (int, error) { return stopMaxRuntime, nil } input := url.URL - meas, err := ip.Experiment.MeasureWithContext(ctx, idx, input) + var measurements []*model.Measurement + source, err := ip.Experiment.MeasureAsync(ctx, input, idx) if err != nil { return 0, err } - meas.AddAnnotations(ip.Annotations) - meas.Options = ip.Options - err = ip.Submitter.Submit(ctx, idx, meas) - if err != nil { - return 0, err + // NOTE: we don't want to intermix measuring with submitting + // therefore we collect all measurements first + for meas := range source { + measurements = append(measurements, meas) } - // Note: must be after submission because submission modifies - // the measurement to include the report ID. - err = ip.Saver.SaveMeasurement(idx, meas) - if err != nil { - return 0, err + for _, meas := range measurements { + meas.AddAnnotations(ip.Annotations) + meas.Options = ip.Options + err = ip.Submitter.Submit(ctx, idx, meas) + if err != nil { + return 0, err + } + // Note: must be after submission because submission modifies + // the measurement to include the report ID. + err = ip.Saver.SaveMeasurement(idx, meas) + if err != nil { + return 0, err + } } } return stopNormal, nil diff --git a/internal/engine/model/experiment.go b/internal/engine/model/experiment.go index 053fe96928..d32456af8a 100644 --- a/internal/engine/model/experiment.go +++ b/internal/engine/model/experiment.go @@ -21,6 +21,44 @@ type ExperimentSession interface { UserAgent() string } +// ExperimentAsyncTestKeys is the type of test keys returned by an experiment +// when running in async fashion rather than in sync fashion. +type ExperimentAsyncTestKeys struct { + // MeasurementRuntime should return the total measurement runtime. + MeasurementRuntime float64 + + // TestKeys should return the actual test keys. + TestKeys interface{} + + // Extensions returns the extensions used by this experiment. + Extensions map[string]int64 +} + +// ExperimentMeasurerAsync is an experiment that can run in async fashion. +type ExperimentMeasurerAsync interface { + // RunAsync runs the experiment in async fashion. + // + // Arguments: + // + // - ctx is the context for deadline/timeout/cancellation + // + // - sess is the measurement session + // + // - input is the input URL to measure + // + // - callbacks contains the experiment callbacks + // + // Returns either a channel where TestKeys are posted or an error. + // + // An error indicate specific preconditions for running the experiment + // are not met (e.g., the input URL is invalid). + // + // On success, the experiment will post on the channel each new + // measurement until it is done and closes the channel. + RunAsync(ctx context.Context, sess ExperimentSession, input string, + callbacks ExperimentCallbacks) (<-chan *ExperimentAsyncTestKeys, error) +} + // ExperimentCallbacks contains experiment event-handling callbacks type ExperimentCallbacks interface { // OnProgress provides information about an experiment progress. From 4b8f005e480f9f1e81bd07ec4fdbeb87c2760766 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 21:55:47 +0200 Subject: [PATCH 22/53] fix: compute measurement runtime in a better way --- .../engine/experiment/webstepsx/webstepsx.go | 2 +- internal/measurex/measurement.go | 49 +++++++++++++++++++ internal/measurex/measurer.go | 46 +---------------- 3 files changed, 52 insertions(+), 45 deletions(-) diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/webstepsx.go index 46c635a5aa..9e51dadc87 100644 --- a/internal/engine/experiment/webstepsx/webstepsx.go +++ b/internal/engine/experiment/webstepsx/webstepsx.go @@ -116,7 +116,7 @@ func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, in := mmx.MeasureHTTPURLAndFollowRedirections(ctx, URL, cookies) for m := range in { out <- &model.ExperimentAsyncTestKeys{ - MeasurementRuntime: time.Since(begin).Seconds(), + MeasurementRuntime: m.Runtime.Seconds(), TestKeys: &TestKeys{ measurex.NewArchivalURLMeasurement(m), }, diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go index c829606e35..2d72e9994d 100644 --- a/internal/measurex/measurement.go +++ b/internal/measurex/measurement.go @@ -1,5 +1,7 @@ package measurex +import "time" + // Measurement groups all the events that have the same MeasurementID. This // data format is not compatible with the OONI data format. type Measurement struct { @@ -107,3 +109,50 @@ func (m *Measurement) computeOddities() { } } } + +// URLMeasurement is the measurement of a whole URL. It contains +// a bunch of measurements detailing each measurement step. +type URLMeasurement struct { + // URL is the URL we're measuring. + URL string + + // CannotParseURL is true if the input URL could not be parsed. + CannotParseURL bool + + // DNS contains all the DNS related measurements. + DNS []*Measurement + + // TH contains all the measurements from the test helpers. + TH []*Measurement + + // CannotGenerateEndpoints for URL is true if the code tasked of + // generating a list of endpoints for the URL fails. + CannotGenerateEndpoints bool + + // Endpoints contains a measurement for each endpoint + // that we discovered via DNS or TH. + Endpoints []*Measurement + + // RedirectURLs contain the URLs to which we should fetch + // if we choose to follow redirections. + RedirectURLs []string + + // Runtime is the total time to measure this URL. + Runtime time.Duration +} + +// fillRedirects takes in input a complete URLMeasurement and fills +// the field named Redirects with all redirections. +func (m *URLMeasurement) fillRedirects() { + dups := make(map[string]bool) + for _, epnt := range m.Endpoints { + for _, redir := range epnt.HTTPRedirect { + loc := redir.Location.String() + if _, found := dups[loc]; found { + continue + } + dups[loc] = true + m.RedirectURLs = append(m.RedirectURLs, loc) + } + } +} diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index e417070b12..d3dc76d5d8 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -646,34 +646,6 @@ func (mx *Measurer) asyncTestHelperQuery( } } -// URLMeasurement is the measurement of a whole URL. It contains -// a bunch of measurements detailing each measurement step. -type URLMeasurement struct { - // URL is the URL we're measuring. - URL string - - // CannotParseURL is true if the input URL could not be parsed. - CannotParseURL bool - - // DNS contains all the DNS related measurements. - DNS []*Measurement - - // TH contains all the measurements from the test helpers. - TH []*Measurement - - // CannotGenerateEndpoints for URL is true if the code tasked of - // generating a list of endpoints for the URL fails. - CannotGenerateEndpoints bool - - // Endpoints contains a measurement for each endpoint - // that we discovered via DNS or TH. - Endpoints []*Measurement - - // RedirectURLs contain the URLs to which we should fetch - // if we choose to follow redirections. - RedirectURLs []string -} - // MeasureURL measures an HTTP or HTTPS URL. The DNS resolvers // and the Test Helpers we use in this measurement are the ones // configured into the database. The default is to use the system @@ -703,6 +675,8 @@ func (mx *Measurer) MeasureURL( ctx context.Context, URL string, cookies http.CookieJar) *URLMeasurement { mx.Logf("MeasureURL url=%s", URL) m := &URLMeasurement{URL: URL} + begin := time.Now() + defer func() { m.Runtime = time.Since(begin) }() parsed, err := url.Parse(URL) if err != nil { m.CannotParseURL = true @@ -726,22 +700,6 @@ func (mx *Measurer) MeasureURL( return m } -// fillRedirects takes in input a complete URLMeasurement and fills -// the field named Redirects with all redirections. -func (m *URLMeasurement) fillRedirects() { - dups := make(map[string]bool) - for _, epnt := range m.Endpoints { - for _, redir := range epnt.HTTPRedirect { - loc := redir.Location.String() - if _, found := dups[loc]; found { - continue - } - dups[loc] = true - m.RedirectURLs = append(m.RedirectURLs, loc) - } - } -} - // redirectionQueue is the type we use to manage the redirection // queue and to follow a reasonable number of redirects. type redirectionQueue struct { From c9f88ccce0c54113554ebebad7e9e8d30b7abbe9 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 22:10:27 +0200 Subject: [PATCH 23/53] feat: add runtime metrics before experimental run --- internal/engine/experiment/webstepsx/webstepsx.go | 2 +- internal/measurex/archival.go | 6 ++++++ internal/measurex/measurement.go | 13 +++++++++++-- internal/measurex/measurer.go | 8 +++++++- 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/webstepsx.go index 9e51dadc87..f01b334836 100644 --- a/internal/engine/experiment/webstepsx/webstepsx.go +++ b/internal/engine/experiment/webstepsx/webstepsx.go @@ -116,7 +116,7 @@ func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, in := mmx.MeasureHTTPURLAndFollowRedirections(ctx, URL, cookies) for m := range in { out <- &model.ExperimentAsyncTestKeys{ - MeasurementRuntime: m.Runtime.Seconds(), + MeasurementRuntime: m.TotalRuntime.Seconds(), TestKeys: &TestKeys{ measurex.NewArchivalURLMeasurement(m), }, diff --git a/internal/measurex/archival.go b/internal/measurex/archival.go index b76c4e89ce..2218a9a10d 100644 --- a/internal/measurex/archival.go +++ b/internal/measurex/archival.go @@ -14,6 +14,9 @@ type ArchivalURLMeasurement struct { TH []*ArchivalMeasurement `json:"th"` CannotGenerateEndpoints bool `json:"cannot_generate_endpoints"` Endpoints []*ArchivalMeasurement `json:"endpoints"` + DNSRuntime float64 `json:"dns_runtime"` + THRuntime float64 `json:"th_runtime"` + EpntsRuntime float64 `json:"epnts_runtime"` } // NewArchivalURLMeasurement constructs a new instance @@ -26,6 +29,9 @@ func NewArchivalURLMeasurement(in *URLMeasurement) (out *ArchivalURLMeasurement) TH: NewArchivalMeasurementList(in.TH...), CannotGenerateEndpoints: in.CannotGenerateEndpoints, Endpoints: NewArchivalMeasurementList(in.Endpoints...), + DNSRuntime: in.DNSRuntime.Seconds(), + THRuntime: in.THRuntime.Seconds(), + EpntsRuntime: in.EpntsRuntime.Seconds(), } } diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go index 2d72e9994d..ef438b7c3b 100644 --- a/internal/measurex/measurement.go +++ b/internal/measurex/measurement.go @@ -137,8 +137,17 @@ type URLMeasurement struct { // if we choose to follow redirections. RedirectURLs []string - // Runtime is the total time to measure this URL. - Runtime time.Duration + // TotalRuntime is the total time to measure this URL. + TotalRuntime time.Duration + + // DNSRuntime is the time to run all DNS checks. + DNSRuntime time.Duration + + // THRuntime is the total time to invoke all test helpers. + THRuntime time.Duration + + // EpntsRuntime is the total time to check all the endpoints. + EpntsRuntime time.Duration } // fillRedirects takes in input a complete URLMeasurement and fills diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index d3dc76d5d8..ba1170812f 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -676,26 +676,32 @@ func (mx *Measurer) MeasureURL( mx.Logf("MeasureURL url=%s", URL) m := &URLMeasurement{URL: URL} begin := time.Now() - defer func() { m.Runtime = time.Since(begin) }() + defer func() { m.TotalRuntime = time.Since(begin) }() parsed, err := url.Parse(URL) if err != nil { m.CannotParseURL = true return m } + dnsBegin := time.Now() for dns := range mx.LookupURLHostParallel(ctx, parsed) { m.DNS = append(m.DNS, dns) } + m.DNSRuntime = time.Since(dnsBegin) + thBegin := time.Now() for th := range mx.QueryTestHelperParallel(ctx, parsed) { m.TH = append(m.TH, th) } + m.THRuntime = time.Since(thBegin) epnts, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) if err != nil { m.CannotGenerateEndpoints = true return m } + epntRuntime := time.Now() for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, epnts...) { m.Endpoints = append(m.Endpoints, epnt) } + m.EpntsRuntime = time.Since(epntRuntime) m.fillRedirects() return m } From 9e11026a263e9e895220d9c11075741b53950fbd Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 23:31:43 +0200 Subject: [PATCH 24/53] fix: improve UX related to logging --- internal/measurex/logger.go | 62 +++++++++++++++++++++++++++++++++++ internal/measurex/measurer.go | 61 ++++++++++++++++++---------------- internal/measurex/resolver.go | 7 ++-- internal/netxlite/resolver.go | 4 +-- 4 files changed, 99 insertions(+), 35 deletions(-) diff --git a/internal/measurex/logger.go b/internal/measurex/logger.go index 2443b7a42e..2233c03bb8 100644 --- a/internal/measurex/logger.go +++ b/internal/measurex/logger.go @@ -1,6 +1,10 @@ package measurex import ( + "fmt" + "sync" + "time" + "github.com/ooni/probe-cli/v3/internal/netxlite" ) @@ -12,3 +16,61 @@ type Logger interface { Info(msg string) Infof(format string, v ...interface{}) } + +// newOperationLogger creates a new logger that logs +// about an in-progress operation. +func newOperationLogger(logger Logger, format string, v ...interface{}) *operationLogger { + ol := &operationLogger{ + sighup: make(chan interface{}), + logger: logger, + once: &sync.Once{}, + message: fmt.Sprintf(format, v...), + wg: &sync.WaitGroup{}, + } + ol.wg.Add(1) + go ol.logloop() + return ol +} + +// operationLogger logs about an in-progress operation +type operationLogger struct { + logger Logger + message string + once *sync.Once + sighup chan interface{} + wg *sync.WaitGroup +} + +func (ol *operationLogger) logloop() { + defer ol.wg.Done() + timer := time.NewTimer(500 * time.Millisecond) + defer timer.Stop() + select { + case <-timer.C: + case <-ol.sighup: + return + } + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + for { + ol.logger.Infof("%s... in progress", ol.message) + select { + case <-ol.sighup: + return + case <-ticker.C: + // continue the loop + } + } +} + +func (ol *operationLogger) Stop(err error) { + ol.once.Do(func() { + close(ol.sighup) + ol.wg.Wait() + if err != nil { + ol.logger.Infof("%s... %s", ol.message, err.Error()) + return + } + ol.logger.Infof("%s... ok", ol.message) + }) +} diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index ba1170812f..c6aa3406d4 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -51,13 +51,14 @@ func (mx *Measurer) nextMeasurement() int64 { // LookupHostSystem performs a LookupHost using the system resolver. func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *Measurement { const timeout = 4 * time.Second - mx.Logf("LookupHostSystem domain=%s timeout=%s...", domain, timeout) + ol := newOperationLogger(mx.Logger, "LookupHost %s", domain) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() mid := mx.nextMeasurement() r := NewResolverSystem(mid, mx.Origin, mx.DB, mx.Logger) defer r.CloseIdleConnections() - _, _ = r.LookupHost(ctx, domain) + _, err := r.LookupHost(ctx, domain) + ol.Stop(err) return NewMeasurement(mx.DB, mid) } @@ -75,14 +76,14 @@ func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *Measur func (mx *Measurer) LookupHostUDP( ctx context.Context, domain, address string) *Measurement { const timeout = 4 * time.Second - mx.Logf("LookupHostUDP serverEndpoint=%s/udp domain=%s timeout=%s...", - address, domain, timeout) + ol := newOperationLogger(mx.Logger, "LookupHost %s with %s/udp", domain, address) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() mid := mx.nextMeasurement() r := NewResolverUDP(mid, mx.Origin, mx.DB, mx.Logger, address) defer r.CloseIdleConnections() - _, _ = r.LookupHost(ctx, domain) + _, err := r.LookupHost(ctx, domain) + ol.Stop(err) return NewMeasurement(mx.DB, mid) } @@ -100,14 +101,14 @@ func (mx *Measurer) LookupHostUDP( func (mx *Measurer) LookupHTTPSSvcUDP( ctx context.Context, domain, address string) *Measurement { const timeout = 4 * time.Second - mx.Logf("LookupHTTPSSvcUDP engine=udp://%s domain=%s timeout=%s...", - address, domain, timeout) + ol := newOperationLogger(mx.Logger, "LookupHTTPSvc %s with %s/udp", domain, address) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() mid := mx.nextMeasurement() r := NewResolverUDP(mid, mx.Origin, mx.DB, mx.Logger, address) defer r.CloseIdleConnections() - _, _ = r.LookupHTTPSSvcWithoutRetry(ctx, domain) + _, err := r.LookupHTTPSSvcWithoutRetry(ctx, domain) + ol.Stop(err) return NewMeasurement(mx.DB, mid) } @@ -134,12 +135,14 @@ func (mx *Measurer) TCPConnect(ctx context.Context, address string) *Measurement func (mx *Measurer) tcpConnect(ctx context.Context, measurementID int64, address string) (Conn, error) { const timeout = 10 * time.Second - mx.Logf("TCPConnect endpoint=%s timeout=%s...", address, timeout) + ol := newOperationLogger(mx.Logger, "TCPConnect %s", address) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() d := NewDialerWithoutResolver(measurementID, mx.Origin, mx.DB, mx.Logger) defer d.CloseIdleConnections() - return d.DialContext(ctx, "tcp", address) + conn, err := d.DialContext(ctx, "tcp", address) + ol.Stop(err) + return conn, err } // TLSConnectAndHandshake connects and TLS handshakes with a TCP endpoint. @@ -192,12 +195,14 @@ func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, return nil, err } const timeout = 10 * time.Second - mx.Logf("TLSHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", - config.ServerName, config.NextProtos, address, timeout) + ol := newOperationLogger(mx.Logger, + "TLSHandshake %s with sni=%s", address, config.ServerName) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() th := WrapTLSHandshaker(measurementID, mx.Origin, mx.DB, mx.TLSHandshaker) - return th.Handshake(ctx, conn, config) + tlsConn, err := th.Handshake(ctx, conn, config) + ol.Stop(err) + return tlsConn, err } // QUICHandshake connects and TLS handshakes with a QUIC endpoint. @@ -237,8 +242,8 @@ func (mx *Measurer) QUICHandshake(ctx context.Context, address string, func (mx *Measurer) quicHandshake(ctx context.Context, measurementID int64, address string, config *tls.Config) (QUICEarlySession, error) { const timeout = 10 * time.Second - mx.Logf("QUICHandshake sni=%s alpn=%+v endpoint=%s timeout=%s...", - config.ServerName, config.NextProtos, address, timeout) + ol := newOperationLogger(mx.Logger, + "QUICHandshake %s with sni=%s", address, config.ServerName) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() qd := WrapQUICDialer(measurementID, mx.Origin, mx.DB, @@ -247,7 +252,9 @@ func (mx *Measurer) quicHandshake(ctx context.Context, measurementID int64, mx.Logger, )) defer qd.CloseIdleConnections() - return qd.DialContext(ctx, address, config) + sess, err := qd.DialContext(ctx, address, config) + ol.Stop(err) + return sess, err } // HTTPEndpointGet performs a GET request for an HTTP endpoint. @@ -418,11 +425,13 @@ func (mx *Measurer) httpEndpointGetQUIC(ctx context.Context, func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, epnt *HTTPEndpoint, req *http.Request) (*http.Response, error) { const timeout = 15 * time.Second - mx.Logf("httpClientDo endpoint=%s method=%s url=%s headers=%+v timeout=%s...", - epnt.String(), req.Method, req.URL.String(), req.Header, timeout) + ol := newOperationLogger(mx.Logger, + "%s %s with %s/%s", req.Method, req.URL.String(), epnt.Address, epnt.Network) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - return clnt.Do(req.WithContext(ctx)) + resp, err := clnt.Do(req.WithContext(ctx)) + ol.Stop(err) + return resp, err } // LookupWCTH performs an Endpoint lookup using the WCTH (i.e., @@ -446,13 +455,13 @@ func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, func (mx *Measurer) LookupWCTH(ctx context.Context, URL *url.URL, endpoints []*Endpoint, port string, WCTHURL string) *Measurement { const timeout = 30 * time.Second - mx.Logf("lookupWCTH backend=%s url=%s endpoints=%+v port=%s timeout=%s...", - WCTHURL, URL.String(), endpoints, port, timeout) + ol := newOperationLogger(mx.Logger, "WCTH %s with %s", URL.String(), WCTHURL) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() mid := mx.nextMeasurement() w := NewWCTHWorker(mid, mx.Logger, mx.DB, mx.HTTPClient, WCTHURL) - _, _ = w.Run(ctx, URL, mx.onlyTCPEndpoints(endpoints)) + _, err := w.Run(ctx, URL, mx.onlyTCPEndpoints(endpoints)) + ol.Stop(err) return NewMeasurement(mx.DB, mid) } @@ -468,12 +477,6 @@ func (mx *Measurer) onlyTCPEndpoints(endpoints []*Endpoint) (out []string) { return } -// Logf formats and logs a message using mx.Logger. All messages -// logged by Measurer should use this function to emit logs. -func (mx *Measurer) Logf(format string, v ...interface{}) { - mx.Logger.Infof(format, v...) -} - // HTTPEndpointGetParallel performs an HTTPEndpointGet for each // input endpoint using a pool of background goroutines. // @@ -673,7 +676,7 @@ func (mx *Measurer) asyncTestHelperQuery( // documented at https://github.com/ooni/probe/issues/1727. func (mx *Measurer) MeasureURL( ctx context.Context, URL string, cookies http.CookieJar) *URLMeasurement { - mx.Logf("MeasureURL url=%s", URL) + mx.Logger.Infof("MeasureURL url=%s", URL) m := &URLMeasurement{URL: URL} begin := time.Now() defer func() { m.TotalRuntime = time.Since(begin) }() diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index d989a92fee..87e8edebac 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -48,8 +48,8 @@ func NewResolverSystem(measurementID int64, // - address is the resolver address (e.g., "1.1.1.1:53"). func NewResolverUDP(measurementID int64, origin Origin, db EventDB, logger Logger, address string) Resolver { - return WrapResolver(measurementID, origin, db, &netxlite.ResolverLogger{ - Resolver: netxlite.WrapResolver(logger, dnsx.NewSerialResolver( + return WrapResolver(measurementID, origin, db, + netxlite.WrapResolver(logger, dnsx.NewSerialResolver( WrapDNSXRoundTripper(measurementID, origin, db, dnsx.NewDNSOverUDP( &netxliteDialerAdapter{ NewDialerWithSystemResolver( @@ -57,8 +57,7 @@ func NewResolverUDP(measurementID int64, }, address, )))), - Logger: logger, - }) + ) } type resolverx struct { diff --git a/internal/netxlite/resolver.go b/internal/netxlite/resolver.go index 6386545fe5..e1a289cd65 100644 --- a/internal/netxlite/resolver.go +++ b/internal/netxlite/resolver.go @@ -229,7 +229,7 @@ func (r *resolverIDNA) LookupHostWithoutRetry( return r.Resolver.LookupHostWithoutRetry(ctx, host, qtype) } -func (r *resolverIDNA) LookupHTTPSWithoutRetry( +func (r *resolverIDNA) LookupHTTPSSvcWithoutRetry( ctx context.Context, domain string) (HTTPSSvc, error) { host, err := idna.ToASCII(domain) if err != nil { @@ -310,7 +310,7 @@ func (r *resolverErrWrapper) LookupHostWithoutRetry( return addrs, nil } -func (r *resolverErrWrapper) LookupHTTPSWithoutRetry( +func (r *resolverErrWrapper) LookupHTTPSSvcWithoutRetry( ctx context.Context, domain string) (HTTPSSvc, error) { out, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) if err != nil { From 0ec2e7ab945b7e6fd5e7b2d09d25d50d11de0704 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 21 Sep 2021 23:42:02 +0200 Subject: [PATCH 25/53] more clarity --- internal/measurex/measurer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index c6aa3406d4..143b65b8bd 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -51,7 +51,7 @@ func (mx *Measurer) nextMeasurement() int64 { // LookupHostSystem performs a LookupHost using the system resolver. func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *Measurement { const timeout = 4 * time.Second - ol := newOperationLogger(mx.Logger, "LookupHost %s", domain) + ol := newOperationLogger(mx.Logger, "LookupHost %s with getaddrinfo", domain) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() mid := mx.nextMeasurement() From a4b1cfacc885d113ca4b33e295ce228af9a3e0a3 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 22 Sep 2021 01:31:42 +0200 Subject: [PATCH 26/53] improve logging after feedback and add bandwidth monitoring --- .gitignore | 17 +-- internal/cmd/miniooni/libminiooni.go | 9 ++ internal/measurex/logger.go | 14 +-- internal/netxlite/bwmon.go | 181 +++++++++++++++++++++++++++ internal/netxlite/dialer.go | 3 +- internal/netxlite/quic.go | 2 +- 6 files changed, 204 insertions(+), 22 deletions(-) create mode 100644 internal/netxlite/bwmon.go diff --git a/.gitignore b/.gitignore index af5a368a29..ffe7e715ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,26 +1,27 @@ +.DS_Store +/*.asc +/*.deb +/*.jsonl +/*.tar.gz +/*.zip /apitool /apitool.exe -/*.asc +/bandwidth.json /coverage.cov -/*.deb /debops-ci -.DS_Store /jafar /jafar.exe -/*.jsonl /miniooni /miniooni.exe /oohelper +/oohelper.exe /oohelperd /oohelperd.exe -/oohelper.exe /ooniprobe +/ooniprobe.exe /ooniprobe_checksums.txt /ooniprobe_checksums.txt.asc -/ooniprobe.exe /probe-cli.cov /ptxclient /ptxclient.exe -/*.tar.gz /testdata/gotmp -/*.zip diff --git a/internal/cmd/miniooni/libminiooni.go b/internal/cmd/miniooni/libminiooni.go index 387f917fa4..5be960787e 100644 --- a/internal/cmd/miniooni/libminiooni.go +++ b/internal/cmd/miniooni/libminiooni.go @@ -20,6 +20,7 @@ import ( "github.com/ooni/probe-cli/v3/internal/engine/model" "github.com/ooni/probe-cli/v3/internal/humanize" "github.com/ooni/probe-cli/v3/internal/kvstore" + "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/version" "github.com/pborman/getopt/v2" ) @@ -27,6 +28,7 @@ import ( // Options contains the options you can set from the CLI. type Options struct { Annotations []string + BWMon string ExtraOptions []string HomeDir string Inputs []string @@ -61,6 +63,9 @@ func init() { getopt.FlagLong( &globalOptions.Annotations, "annotation", 'A', "Add annotaton", "KEY=VALUE", ) + getopt.FlagLong( + &globalOptions.BWMon, "bwmon", 0, "Monitor bandwidth and save results to file", "PATH", + ) getopt.FlagLong( &globalOptions.ExtraOptions, "option", 'O', "Pass an option to the experiment", "KEY=VALUE", @@ -295,6 +300,10 @@ func MainWithConfiguration(experimentName string, currentOptions Options) { ctx := context.Background() + if currentOptions.BWMon != "" { + netxlite.MonitorBandwidth(ctx, currentOptions.BWMon) + } + extraOptions := mustMakeMap(currentOptions.ExtraOptions) annotations := mustMakeMap(currentOptions.Annotations) diff --git a/internal/measurex/logger.go b/internal/measurex/logger.go index 2233c03bb8..4ccbbc1579 100644 --- a/internal/measurex/logger.go +++ b/internal/measurex/logger.go @@ -47,19 +47,9 @@ func (ol *operationLogger) logloop() { defer timer.Stop() select { case <-timer.C: - case <-ol.sighup: - return - } - ticker := time.NewTicker(time.Second) - defer ticker.Stop() - for { ol.logger.Infof("%s... in progress", ol.message) - select { - case <-ol.sighup: - return - case <-ticker.C: - // continue the loop - } + case <-ol.sighup: + // we'll emit directly in stop } } diff --git a/internal/netxlite/bwmon.go b/internal/netxlite/bwmon.go new file mode 100644 index 0000000000..151e39dd95 --- /dev/null +++ b/internal/netxlite/bwmon.go @@ -0,0 +1,181 @@ +package netxlite + +import ( + "context" + "encoding/json" + "net" + "os" + "sync" + "time" + + "github.com/ooni/probe-cli/v3/internal/atomicx" + "github.com/ooni/probe-cli/v3/internal/netxlite/quicx" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +// bandwidthStats contains bandwidth stats. +type bandwidthStats struct { + // Timestamp is the timestamp when we saved this snapshot. + Timestamp time.Time + + // Read is the number of bytes read using Read. + Read int64 + + // ReadFrom is the number of bytes read using ReadFrom. + ReadFrom int64 + + // Write is the number of bytes written using Write. + Write int64 + + // WriteTo is the number of bytes written using WriteTo. + WriteTo int64 +} + +// bandwidthMonitor monitors the bandwidth usage. +type bandwidthMonitor struct { + enabled *atomicx.Int64 + stats bandwidthStats + mu sync.Mutex +} + +// MonitorBandwidth configures bandwidth monitoring. The filename +// argument is the name of the file where to write snapshots. By +// default bandwidth monitoring is disabled and you only enable it +// by calling this function once in your main function. +func MonitorBandwidth(ctx context.Context, filename string) { + bwmonitor.enabled.Add(1) + go bwmonitor.measure(ctx, filename) +} + +// measure performs periodic measurements. +func (bwmon *bandwidthMonitor) measure(ctx context.Context, filename string) { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + for { + select { + case t := <-ticker.C: + bwmon.saveSnapshot(t, filename) + case <-ctx.Done(): + return + } + } +} + +// saveSnapshot appends the snapshot to the snapshots file. +func (bwmon *bandwidthMonitor) saveSnapshot(t time.Time, filename string) { + bwmon.mu.Lock() + bwmon.stats.Timestamp = t + data, err := json.Marshal(bwmon.stats) + bwmon.stats = bandwidthStats{} + bwmon.mu.Unlock() + data = append(data, '\n') + runtimex.PanicOnError(err, "json.Marshal failed") + const flags = os.O_APPEND | os.O_CREATE | os.O_WRONLY + filep, err := os.OpenFile(filename, flags, 0644) + if err != nil { + return + } + defer filep.Close() + if _, err := filep.Write(data); err != nil { + filep.Close() + return + } +} + +// MaybeWrapConn possibly wraps a net.Conn to add bandwidth monitoring. If there is +// an error this function immediately returns an error. Bandwidth monitoring is +// disabled by default, but can be enabled when required. +func (bwmon *bandwidthMonitor) MaybeWrapConn(conn net.Conn, err error) (net.Conn, error) { + if bwmon.enabled.Load() == 0 { + return conn, err + } + if err != nil { + return nil, err + } + return &bwmonConn{Conn: conn, bwmon: bwmon}, nil +} + +// OnRead measures the results of Conn.Read. +func (bwmon *bandwidthMonitor) OnRead(count int, err error) (int, error) { + bwmon.mu.Lock() + bwmon.stats.Read += int64(count) + bwmon.mu.Unlock() + return count, err +} + +// OnWrite measures the results of Conn.Write. +func (bwmon *bandwidthMonitor) OnWrite(count int, err error) (int, error) { + bwmon.mu.Lock() + bwmon.stats.Write += int64(count) + bwmon.mu.Unlock() + return count, err +} + +// OnWriteTo measures the results of UDPLikeConn.WriteTo. +func (bwmon *bandwidthMonitor) OnWriteTo(count int, err error) (int, error) { + bwmon.mu.Lock() + bwmon.stats.WriteTo += int64(count) + bwmon.mu.Unlock() + return count, err +} + +// OnReadFrom measures the results of UDPLikeConn.ReadFrom. +func (bwmon *bandwidthMonitor) OnReadFrom( + count int, addr net.Addr, err error) (int, net.Addr, error) { + bwmon.mu.Lock() + bwmon.stats.ReadFrom += int64(count) + bwmon.mu.Unlock() + return count, addr, err +} + +// bwmonConn wraps a net.Conn to add bandwidth monitoring. +type bwmonConn struct { + net.Conn + bwmon *bandwidthMonitor +} + +// Read implements net.Conn.Read. +func (c *bwmonConn) Read(b []byte) (int, error) { + return c.bwmon.OnRead(c.Conn.Read(b)) +} + +// Read implements net.Conn.Read. +func (c *bwmonConn) Write(b []byte) (int, error) { + return c.bwmon.OnWrite(c.Conn.Write(b)) +} + +// MaybeWrapUDPLikeConn possibly wraps a quicx.UDPLikeConn to add bandwidth +// monitoring. If there is an error this function immediately returns an +// error. Bandwidth monitoring is disabled by default, but can be +// enabled when required. +func (bwmon *bandwidthMonitor) MaybeWrapUDPLikeConn( + conn quicx.UDPLikeConn, err error) (quicx.UDPLikeConn, error) { + if bwmon.enabled.Load() == 0 { + return conn, err + } + if err != nil { + return nil, err + } + return &bwmonUDPLikeConn{UDPLikeConn: conn, bwmon: bwmon}, nil +} + +// bwmonUDPLikeConn wraps a quicx.UDPLikeConn to add bandwidth monitoring. +type bwmonUDPLikeConn struct { + quicx.UDPLikeConn + bwmon *bandwidthMonitor +} + +// WriteTo implements quicx.UDPLikeConn.WriteTo. +func (c *bwmonUDPLikeConn) WriteTo(p []byte, addr net.Addr) (int, error) { + return c.bwmon.OnWriteTo(c.UDPLikeConn.WriteTo(p, addr)) +} + +// ReadFrom implements quicx.UDPLikeConn.ReadFrom. +func (c *bwmonUDPLikeConn) ReadFrom(b []byte) (int, net.Addr, error) { + return c.bwmon.OnReadFrom(c.UDPLikeConn.ReadFrom(b)) +} + +// bwmonitor is the bandwidth monitor singleton +var bwmonitor = &bandwidthMonitor{ + enabled: &atomicx.Int64{}, +} diff --git a/internal/netxlite/dialer.go b/internal/netxlite/dialer.go index 5cb173c0f0..9c9fb041ab 100644 --- a/internal/netxlite/dialer.go +++ b/internal/netxlite/dialer.go @@ -110,7 +110,8 @@ func (d *dialerSystem) newUnderlyingDialer() *net.Dialer { } func (d *dialerSystem) DialContext(ctx context.Context, network, address string) (net.Conn, error) { - return d.newUnderlyingDialer().DialContext(ctx, network, address) + return bwmonitor.MaybeWrapConn( + d.newUnderlyingDialer().DialContext(ctx, network, address)) } func (d *dialerSystem) CloseIdleConnections() { diff --git a/internal/netxlite/quic.go b/internal/netxlite/quic.go index ebb3182605..17d309e357 100644 --- a/internal/netxlite/quic.go +++ b/internal/netxlite/quic.go @@ -32,7 +32,7 @@ var _ QUICListener = &quicListenerStdlib{} // Listen implements QUICListener.Listen. func (qls *quicListenerStdlib) Listen(addr *net.UDPAddr) (quicx.UDPLikeConn, error) { - return net.ListenUDP("udp", addr) + return bwmonitor.MaybeWrapUDPLikeConn(net.ListenUDP("udp", addr)) } // QUICDialer dials QUIC sessions. From 74e8fa57ce4be1864de5f178548371960db7e6bc Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 22 Sep 2021 02:37:18 +0200 Subject: [PATCH 27/53] fix: also save elapsed time Simplifies processing with python --- internal/netxlite/bwmon.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/netxlite/bwmon.go b/internal/netxlite/bwmon.go index 151e39dd95..b6b32374b2 100644 --- a/internal/netxlite/bwmon.go +++ b/internal/netxlite/bwmon.go @@ -18,6 +18,9 @@ type bandwidthStats struct { // Timestamp is the timestamp when we saved this snapshot. Timestamp time.Time + // Elapsed is the elapsed time since the beginning. + Elapsed time.Duration + // Read is the number of bytes read using Read. Read int64 @@ -33,6 +36,7 @@ type bandwidthStats struct { // bandwidthMonitor monitors the bandwidth usage. type bandwidthMonitor struct { + begin time.Time enabled *atomicx.Int64 stats bandwidthStats mu sync.Mutex @@ -65,6 +69,7 @@ func (bwmon *bandwidthMonitor) measure(ctx context.Context, filename string) { func (bwmon *bandwidthMonitor) saveSnapshot(t time.Time, filename string) { bwmon.mu.Lock() bwmon.stats.Timestamp = t + bwmon.stats.Elapsed = t.Sub(bwmon.begin) data, err := json.Marshal(bwmon.stats) bwmon.stats = bandwidthStats{} bwmon.mu.Unlock() @@ -177,5 +182,6 @@ func (c *bwmonUDPLikeConn) ReadFrom(b []byte) (int, net.Addr, error) { // bwmonitor is the bandwidth monitor singleton var bwmonitor = &bandwidthMonitor{ + begin: time.Now(), enabled: &atomicx.Int64{}, } From a431a0d20cf1334dbaaba103d277eb6a8e6accd8 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 22 Sep 2021 16:15:56 +0200 Subject: [PATCH 28/53] implement and expose new websteps test helper We need to do something different now: simpler TH and basically no need to explore too much --- internal/cmd/oohelper/oohelper.go | 42 +- internal/cmd/oohelperd/oohelperd.go | 2 + internal/measurex/db.go | 13 +- internal/measurex/th.go | 612 ++++++++++++++++++++++++++++ internal/measurex/utils.go | 14 + 5 files changed, 665 insertions(+), 18 deletions(-) create mode 100644 internal/measurex/th.go create mode 100644 internal/measurex/utils.go diff --git a/internal/cmd/oohelper/oohelper.go b/internal/cmd/oohelper/oohelper.go index 46977bcc7a..97faa30706 100644 --- a/internal/cmd/oohelper/oohelper.go +++ b/internal/cmd/oohelper/oohelper.go @@ -12,6 +12,7 @@ import ( "github.com/apex/log" "github.com/ooni/probe-cli/v3/internal/cmd/oohelper/internal" "github.com/ooni/probe-cli/v3/internal/engine/netx" + "github.com/ooni/probe-cli/v3/internal/measurex" "github.com/ooni/probe-cli/v3/internal/runtimex" ) @@ -20,8 +21,9 @@ var ( debug = flag.Bool("debug", false, "Toggle debug mode") httpClient *http.Client resolver netx.Resolver - server = flag.String("server", "https://wcth.ooni.io/", "URL of the test helper") + server = flag.String("server", "", "URL of the test helper") target = flag.String("target", "", "Target URL for the test helper") + fwebsteps = flag.Bool("websteps", false, "Use the websteps TH") ) func newhttpclient() *http.Client { @@ -43,18 +45,46 @@ func init() { } func main() { + defer cancel() logmap := map[bool]log.Level{ true: log.DebugLevel, false: log.InfoLevel, } flag.Parse() log.SetLevel(logmap[*debug]) - clnt := internal.OOClient{HTTPClient: httpClient, Resolver: resolver} - config := internal.OOConfig{TargetURL: *target, ServerURL: *server} - defer cancel() - cresp, err := clnt.Do(ctx, config) - runtimex.PanicOnError(err, "client.Do failed") + apimap := map[bool]func() interface{}{ + false: wcth, + true: webstepsth, + } + cresp := apimap[*fwebsteps]() data, err := json.MarshalIndent(cresp, "", " ") runtimex.PanicOnError(err, "json.MarshalIndent failed") fmt.Printf("%s\n", string(data)) } + +func webstepsth() interface{} { + serverURL := *server + if serverURL == "" { + serverURL = "http://127.0.0.1:8080/api/v1/websteps" + } + clnt := &measurex.THClient{ + DNServers: []string{"8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53"}, + HTTPClient: httpClient, + ServerURL: serverURL, + } + cresp, err := clnt.Run(ctx, *target) + runtimex.PanicOnError(err, "client.Run failed") + return cresp +} + +func wcth() interface{} { + serverURL := *server + if serverURL == "" { + serverURL = "https://wcth.ooni.io/" + } + clnt := internal.OOClient{HTTPClient: httpClient, Resolver: resolver} + config := internal.OOConfig{TargetURL: *target, ServerURL: serverURL} + cresp, err := clnt.Do(ctx, config) + runtimex.PanicOnError(err, "client.Do failed") + return cresp +} diff --git a/internal/cmd/oohelperd/oohelperd.go b/internal/cmd/oohelperd/oohelperd.go index 189b0076ab..78d24819df 100644 --- a/internal/cmd/oohelperd/oohelperd.go +++ b/internal/cmd/oohelperd/oohelperd.go @@ -12,6 +12,7 @@ import ( "github.com/ooni/probe-cli/v3/internal/cmd/oohelperd/internal/webconnectivity" "github.com/ooni/probe-cli/v3/internal/cmd/oohelperd/internal/websteps" "github.com/ooni/probe-cli/v3/internal/engine/netx" + "github.com/ooni/probe-cli/v3/internal/measurex" ) const maxAcceptableBody = 1 << 24 @@ -54,6 +55,7 @@ func main() { func testableMain() { mux := http.NewServeMux() mux.Handle("/api/unstable/websteps", &websteps.Handler{Config: &websteps.Config{}}) + mux.Handle("/api/v1/websteps", &measurex.THHandler{}) mux.Handle("/", webconnectivity.Handler{ Client: httpx, Dialer: dialer, diff --git a/internal/measurex/db.go b/internal/measurex/db.go index e5e3020e63..537d6f58c9 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -619,7 +619,7 @@ func (db *DB) SelectAllHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, error Network: epnt.Network, Address: epnt.Address, SNI: domain, - ALPN: db.alpnForHTTPEndpoint(epnt.Network), + ALPN: alpnForHTTPEndpoint(epnt.Network), URL: URL, Header: NewHTTPRequestHeaderForMeasuring(), }) @@ -644,14 +644,3 @@ func PortFromURL(URL *url.URL) (string, error) { return "", ErrCannotDeterminePortFromURL } } - -func (db *DB) alpnForHTTPEndpoint(network EndpointNetwork) []string { - switch network { - case NetworkQUIC: - return []string{"h3"} - case NetworkTCP: - return []string{"h2", "http/1.1"} - default: - return nil - } -} diff --git a/internal/measurex/th.go b/internal/measurex/th.go new file mode 100644 index 0000000000..90f0cda39f --- /dev/null +++ b/internal/measurex/th.go @@ -0,0 +1,612 @@ +package measurex + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/apex/log" + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" + "github.com/ooni/probe-cli/v3/internal/netxlite/iox" + "github.com/ooni/probe-cli/v3/internal/runtimex" + "github.com/ooni/probe-cli/v3/internal/version" +) + +// +// Messages exchanged by the TH client and server +// + +// THClientRequest is the request received by the test helper. +type THClientRequest struct { + // Endpoints is a list of endpoints to measure. + Endpoints []*Endpoint + + // URL is the URL we want to measure. + URL string + + // HTTPRequestHeaders contains the request headers. + HTTPRequestHeaders http.Header +} + +// THServerResponse is the response from the test helper. +type THServerResponse struct { + // DNS contains all the DNS related measurements. + DNS *THDNSMeasurement + + // Endpoints contains a measurement for each endpoint + // that was discovered by the probe or the TH. + Endpoints []*THEndpointMeasurement +} + +// THDNSMeasurement is a DNS measurement performed by the test helper. +type THDNSMeasurement struct { + // Oddities lists all the oddities inside this measurement. + Oddities []Oddity + + // LookupHost contains all the host lookups. + LookupHost []*THLookupHostEvent `json:",omitempty"` + + // LookupHTTPSSvc contains all the HTTPSSvc lookups. + LookupHTTPSSvc []*THLookupHTTPSSvcEvent `json:",omitempty"` +} + +// THLookupHostEvent is the LookupHost event sent +// back by the test helper. +type THLookupHostEvent struct { + Network string + Address string + Domain string + Error *string + Oddity Oddity + Addrs []string +} + +// THLookupHTTPSSvcEvent is the LookupHTTPSvc event sent +// back by the test helper. +type THLookupHTTPSSvcEvent struct { + Network string + Address string + Domain string + Error *string + Oddity Oddity + IPv4 []string + IPv6 []string + ALPN []string +} + +// THEndpointMeasurement is an endpoint measurement +// performed by the test helper. +type THEndpointMeasurement struct { + // Oddities lists all the oddities inside this measurement. + Oddities []Oddity + + // Connect contains all the connect operations. + Connect []*THConnectEvent `json:",omitempty"` + + // TLSHandshake contains all the TLS handshakes. + TLSHandshake []*THHandshakeEvent `json:",omitempty"` + + // QUICHandshake contains all the QUIC handshakes. + QUICHandshake []*THHandshakeEvent `json:",omitempty"` + + // HTTPRoundTrip contains all the HTTP round trips. + HTTPRoundTrip []*THHTTPRoundTripEvent `json:",omitempty"` +} + +// THConnectEvent is the connect event sent back by the test helper. +type THConnectEvent struct { + Network string + RemoteAddr string + Error *string + Oddity Oddity +} + +// THHandshakeEvent is the handshake event sent +// back by the test helper. +type THHandshakeEvent struct { + Network string + RemoteAddr string + SNI string + ALPN []string + Error *string + Oddity Oddity + TLSVersion string + CipherSuite string + NegotiatedProto string +} + +// THHTTPRoundTripEvent is the HTTP round trip event +// sent back by the test helper. +type THHTTPRoundTripEvent struct { + RequestMethod string + RequestURL string + RequestHeader http.Header + Error *string + Oddity Oddity + ResponseStatus int64 + ResponseHeader http.Header + ResponseBodySnapshotSize int64 + MaxBodySnapshotSize int64 +} + +// thMaxAcceptableBodySize is the maximum acceptable body size by TH code. +const thMaxAcceptableBodySize = 1 << 20 + +// +// TH client implementation +// + +// THClient is the high-level API to invoke the TH. This API +// should be used by command line clients. +type THClient struct { + // DNSServers is the MANDATORY list of DNS-over-UDP + // servers to use to discover endpoints locally. + DNServers []string + + // HTTPClient is the MANDATORY HTTP client to + // use for contacting the TH. + HTTPClient HTTPClient + + // ServerURL is the MANDATORY URL of the TH HTTP endpoint. + ServerURL string +} + +// Run calls the TH and returns the response or an error. +// +// Arguments: +// +// - ctx is the context with timeout/deadline/cancellation +// +// - URL is the URL the TH server should measure for us +// +// Algorithm: +// +// - use DNSServers to discover extra endpoints for the target URL +// +// - call the TH using the HTTPClient and the ServerURL +// +// - return response or error. +func (c *THClient) Run(ctx context.Context, URL string) (*THServerResponse, error) { + parsed, err := url.Parse(URL) + if err != nil { + return nil, err + } + mx := NewMeasurerWithDefaultSettings() + mx.RegisterUDPResolvers(c.DNServers...) + mx.LookupURLHostParallel(ctx, parsed) + httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + if err != nil { + return nil, err + } + var endpoints []*Endpoint + for _, epnt := range httpEndpoints { + endpoints = append(endpoints, &Endpoint{ + Network: epnt.Network, + Address: epnt.Address, + }) + } + return (&THClientCall{ + Endpoints: endpoints, + HTTPClient: c.HTTPClient, + Header: NewHTTPRequestHeaderForMeasuring(), + THURL: c.ServerURL, + TargetURL: URL, + }).Call(ctx) +} + +// THClientCall allows to perform a single TH client call. Make sure +// you fill all the fields marked as MANDATORY before use. +// +// This is a low-level API for calling the TH. If you are writing +// a CLI client, use THClient. If you are writing code for the +// Measurer, use THMeasurerClientCall. +type THClientCall struct { + // Endpoints contains the MANDATORY endpoints we discovered. + Endpoints []*Endpoint + + // HTTPClient is the MANDATORY HTTP client to + // use for contacting the TH. + HTTPClient HTTPClient + + // Header contains the MANDATORY request headers. + Header http.Header + + // THURL is the MANDATORY test helper URL. + THURL string + + // TargetURL is the MANDATORY URL to measure. + TargetURL string +} + +// Call performs the specified TH call and returns either a response or an error. +func (c *THClientCall) Call(ctx context.Context) (*THServerResponse, error) { + creq := &THClientRequest{ + Endpoints: c.Endpoints, + URL: c.TargetURL, + HTTPRequestHeaders: c.Header, + } + reqBody, err := json.Marshal(creq) + if err != nil { + return nil, err + } + req, err := http.NewRequestWithContext( + ctx, "POST", c.THURL, bytes.NewReader(reqBody)) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", fmt.Sprintf("ooniprobe-cli/%s", version.Version)) + return c.httpClientDo(req) +} + +// errTHRequestFailed is the error returned if the TH response is not 200 Ok. +var errTHRequestFailed = errors.New("th: request failed") + +func (c *THClientCall) httpClientDo(req *http.Request) (*THServerResponse, error) { + resp, err := c.HTTPClient.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != 200 { // THHandler returns either 400 or 200 + return nil, errTHRequestFailed + } + defer resp.Body.Close() + r := io.LimitReader(resp.Body, thMaxAcceptableBodySize) + respBody, err := iox.ReadAllContext(req.Context(), r) + if err != nil { + return nil, err + } + var sresp THServerResponse + if err := json.Unmarshal(respBody, &sresp); err != nil { + return nil, err + } + return &sresp, nil +} + +// +// TH server implementation +// + +// THHandler implements the test helper API. +// +// This handler exposes a unique HTTP endpoint that you need to +// mount to the desired path when creating the server. +// +// The canonical mount point for the HTTP endpoint is /api/v1/websteps. +// +// Accepted methods and request body: +// +// - we only accept POST; +// +// - we expect a THClientRequest as the body. +// +// Status code and response body: +// +// - on success, status is 200 and THServerResponse is the body; +// +// - on failure, status is 400 and there is no body. +// +type THHandler struct{} + +// ServerHTTP implements http.Handler.ServeHTTP. +func (h *THHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { + w.Header().Add("Server", fmt.Sprintf("oohelperd/%s", version.Version)) + if req.Method != "POST" { + w.WriteHeader(400) + return + } + reader := io.LimitReader(req.Body, thMaxAcceptableBodySize) + data, err := iox.ReadAllContext(req.Context(), reader) + if err != nil { + w.WriteHeader(400) + return + } + var creq THClientRequest + if err := json.Unmarshal(data, &creq); err != nil { + w.WriteHeader(400) + return + } + cresp, err := h.singleStep(req.Context(), &creq) + if err != nil { + w.WriteHeader(400) + return + } + // We assume that the following call cannot fail because it's a + // clearly serializable data structure. + data, err = json.Marshal(cresp) + runtimex.PanicOnError(err, "json.Marshal failed") + w.Header().Add("Content-Type", "application/json") + w.Write(data) +} + +// singleStep performs a singleStep measurement. +// +// The function name derives from the definition (we invented) +// of "web steps". Each redirection is a step. For each step you +// need to figure out the endpoints to use with the DNS. After +// that, you need to check all endpoints. Because here we do not +// perform redirection, this is just a single "step". +// +// The algorithm is the following: +// +// 1. parse the URL and return error if it does not parse or +// the scheme is neither HTTP nor HTTPS; +// +// 2. discover additional endpoints using a suitable DoH +// resolver and the URL's hostname as the domain; +// +// 3. measure each discovered endpoint. +// +// The return value is either a THServerResponse or an error. +func (h *THHandler) singleStep( + ctx context.Context, req *THClientRequest) (*THServerResponse, error) { + parsedURL, err := url.Parse(req.URL) + if err != nil || (parsedURL.Scheme != "http" && parsedURL.Scheme != "https") { + return nil, errors.New("invalid request url") + } + epnts, dns := h.dohQuery(ctx, parsedURL) + m := &THServerResponse{DNS: dns} + epnts = h.prepareEnpoints( + epnts, parsedURL, req.Endpoints, req.HTTPRequestHeaders) + mx := NewMeasurerWithDefaultSettings() + jar := NewCookieJar() + for me := range mx.HTTPEndpointGetParallel(ctx, jar, epnts...) { + m.Endpoints = append(m.Endpoints, h.newTHEndpointMeasurement(me)) + } + return m, nil +} + +// prepareEnpoints takes in input a list of endpoints discovered +// so far by the TH and extends this list by adding the endpoints +// discovered by the client. Before returning, this function +// ensures that we don't have any duplicate endpoint. +// +// Arguments: +// +// - the list of endpoints discovered by the TH +// +// - the URL provided by the probe +// +// - the endpoints provided by the probe +// +// - the headers provided by the probe +// +// The return value may be an empty list if both the client +// and the TH failed to discover any endpoint. +// +// When the return value contains endpoints, we also fill +// the HTTPEndpoint.Header field using the header param +// provided by the client. We don't allow arbitrary headers: +// we only copy a subset of allowed headers. +func (h *THHandler) prepareEnpoints(epnts []*HTTPEndpoint, URL *url.URL, + clientEpnts []*Endpoint, header http.Header) (out []*HTTPEndpoint) { + for _, epnt := range clientEpnts { + epnts = append(epnts, &HTTPEndpoint{ + Domain: URL.Hostname(), + Network: epnt.Network, + Address: epnt.Address, + SNI: URL.Hostname(), + ALPN: alpnForHTTPEndpoint(epnt.Network), + URL: URL, + Header: http.Header{}, // see the loop below + }) + } + dups := make(map[string]bool) + for _, epnt := range epnts { + id := epnt.String() + if _, found := dups[id]; found { + continue + } + dups[id] = true + epnt.Header = h.onlyAllowedHeaders(header) + out = append(out, epnt) + } + return +} + +func (h *THHandler) onlyAllowedHeaders(header http.Header) (out http.Header) { + out = http.Header{} + for k, vv := range header { + switch strings.ToLower(k) { + case "accept", "accept-language", "user-agent": + for _, v := range vv { + out.Add(k, v) + } + default: + // ignore all the other headers + } + } + return +} + +// +// TH server: marshalling of endpoint measurements +// + +// newTHEndpointMeasurement takes in input an endpoint +// measurement performed by a measurer and emits in output +// the simplified THEndpointMeasurement equivalent. +func (h *THHandler) newTHEndpointMeasurement(in *Measurement) *THEndpointMeasurement { + return &THEndpointMeasurement{ + Oddities: in.Oddities, + Connect: h.newTHConnectEventList(in.Connect), + TLSHandshake: h.newTLSHandshakesList(in.TLSHandshake), + QUICHandshake: h.newQUICHandshakeList(in.QUICHandshake), + HTTPRoundTrip: h.newHTTPRoundTripList(in.HTTPRoundTrip), + } +} + +func (h *THHandler) newTHConnectEventList(in []*NetworkEvent) (out []*THConnectEvent) { + for _, e := range in { + out = append(out, &THConnectEvent{ + Network: e.Network, + RemoteAddr: e.RemoteAddr, + Error: h.errorToFailure(e.Error), + Oddity: e.Oddity, + }) + } + return +} + +func (h *THHandler) newTLSHandshakesList(in []*TLSHandshakeEvent) (out []*THHandshakeEvent) { + for _, e := range in { + out = append(out, &THHandshakeEvent{ + Network: e.Network, + RemoteAddr: e.RemoteAddr, + SNI: e.SNI, + ALPN: e.ALPN, + Error: h.errorToFailure(e.Error), + Oddity: e.Oddity, + TLSVersion: e.TLSVersion, + CipherSuite: e.CipherSuite, + NegotiatedProto: e.NegotiatedProto, + }) + } + return +} + +func (h *THHandler) newQUICHandshakeList(in []*QUICHandshakeEvent) (out []*THHandshakeEvent) { + for _, e := range in { + out = append(out, &THHandshakeEvent{ + Network: e.Network, + RemoteAddr: e.RemoteAddr, + SNI: e.SNI, + ALPN: e.ALPN, + Error: h.errorToFailure(e.Error), + Oddity: e.Oddity, + TLSVersion: e.TLSVersion, + CipherSuite: e.CipherSuite, + NegotiatedProto: e.NegotiatedProto, + }) + } + return +} + +func (h *THHandler) newHTTPRoundTripList(in []*HTTPRoundTripEvent) (out []*THHTTPRoundTripEvent) { + for _, e := range in { + out = append(out, &THHTTPRoundTripEvent{ + RequestMethod: e.RequestMethod, + RequestURL: e.RequestURL.String(), + RequestHeader: e.RequestHeader, + Error: h.errorToFailure(e.Error), + Oddity: e.Oddity, + ResponseStatus: int64(e.ResponseStatus), + ResponseHeader: e.ResponseHeader, + ResponseBodySnapshotSize: int64(len(e.ResponseBodySnapshot)), + MaxBodySnapshotSize: e.MaxBodySnapshotSize, + }) + } + return +} + +// +// TH server: DNS +// + +// thResolverURL is the DNS resolver URL used by the TH. We use an +// encrypted resolver to reduce the risk that there is DNS-over-UDP +// censorship in the place where we deploy the TH. +const thResolverURL = "https://dns.google/dns-query" + +// thResolver is the DNS resolver used by the TH. +// +// Here we're using github.com/apex/log as the logger, which +// is fine because this is backend only code. +var thResolver = netxlite.WrapResolver(log.Log, dnsx.NewSerialResolver( + dnsx.NewDNSOverHTTPS(http.DefaultClient, thResolverURL), +)) + +// dohQuery discovers endpoints for the URL's hostname using DoH. +// +// Arguments: +// +// - ctx is the context for deadline/cancellation/timeout +// +// - parsedURL is the parsed URL +// +// Returns: +// +// - a possibly empty list of HTTPEndpoints (this happens for +// example if the URL's hostname causes NXDOMAIN) +// +// - the THDNSMeasurement for the THServeResponse message +func (h *THHandler) dohQuery(ctx context.Context, URL *url.URL) ( + epnts []*HTTPEndpoint, meas *THDNSMeasurement) { + db := NewDB(time.Now()) // timing is not sent back to client + r := WrapResolver(0, OriginTH, db, thResolver) + meas = &THDNSMeasurement{} + op := newOperationLogger(log.Log, + "dohQuery A/AAAA for %s with %s", URL.Hostname(), r.Address()) + _, err := r.LookupHost(ctx, URL.Hostname()) + op.Stop(err) + meas.LookupHost = h.newTHLookupHostList(db) + switch URL.Scheme { + case "https": + op := newOperationLogger(log.Log, + "dohQuery HTTPSSvc for %s with %s", URL.Hostname(), r.Address()) + _, err = r.LookupHTTPSSvcWithoutRetry(ctx, URL.Hostname()) + op.Stop(err) + meas.LookupHTTPSSvc = h.newTHLookupHTTPSSvcList(db) + default: + // nothing + } + epnts, _ = db.SelectAllHTTPEndpointsForURL(URL) // nil on failure + return +} + +func (h *THHandler) newTHLookupHostList(db *DB) (out []*THLookupHostEvent) { + for _, entry := range db.SelectAllFromLookupHost() { + out = append(out, &THLookupHostEvent{ + Network: entry.Network, + Address: entry.Address, + Domain: entry.Domain, + Error: h.errorToFailure(entry.Error), + Oddity: entry.Oddity, + Addrs: entry.Addrs, + }) + } + return +} + +func (h *THHandler) newTHLookupHTTPSSvcList(db *DB) (out []*THLookupHTTPSSvcEvent) { + for _, entry := range db.SelectAllFromLookupHTTPSSvc() { + out = append(out, &THLookupHTTPSSvcEvent{ + Network: entry.Network, + Address: entry.Address, + Domain: entry.Domain, + Error: h.errorToFailure(entry.Error), + Oddity: entry.Oddity, + IPv4: entry.IPv4, + IPv6: entry.IPv6, + ALPN: entry.ALPN, + }) + } + return +} + +// +// TH server: utility functions +// + +// errorToFailure converts an error type to a failure type (which +// is loosely defined as a pointer to a string). +// +// When the error is nil, the string pointer is nil. When the error is +// not nil, the pointer points to the err.Error() string. +// +// We cannot unmarshal Go errors from JSON. Therefore, we need to +// convert to this type when we're marshalling. +func (h *THHandler) errorToFailure(err error) (out *string) { + if err != nil { + s := err.Error() + out = &s + } + return +} diff --git a/internal/measurex/utils.go b/internal/measurex/utils.go new file mode 100644 index 0000000000..c22c3c66bc --- /dev/null +++ b/internal/measurex/utils.go @@ -0,0 +1,14 @@ +package measurex + +// alpnForHTTPEndpoint selects the correct ALPN for an HTTP endpoint +// given the network. On failure, we return a nil list. +func alpnForHTTPEndpoint(network EndpointNetwork) []string { + switch network { + case NetworkQUIC: + return []string{"h3"} + case NetworkTCP: + return []string{"h2", "http/1.1"} + default: + return nil + } +} From 2a304713d33f3fdde44207d00e6e27ffb45b387a Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 22 Sep 2021 16:58:39 +0200 Subject: [PATCH 29/53] make sure we also parse alt-svc --- internal/measurex/th.go | 70 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/internal/measurex/th.go b/internal/measurex/th.go index 90f0cda39f..567f07a736 100644 --- a/internal/measurex/th.go +++ b/internal/measurex/th.go @@ -360,6 +360,7 @@ func (h *THHandler) singleStep( for me := range mx.HTTPEndpointGetParallel(ctx, jar, epnts...) { m.Endpoints = append(m.Endpoints, h.newTHEndpointMeasurement(me)) } + h.maybeQUICFollowUp(ctx, m, epnts...) return m, nil } @@ -426,6 +427,75 @@ func (h *THHandler) onlyAllowedHeaders(header http.Header) (out http.Header) { return } +// maybeQUICFollowUp checks whether we need to use Alt-Svc to check +// for QUIC. We query for HTTPSSvc but currently only Cloudflare +// implements this proposed standard. So, this function is +// where we take care of all the other servers implementing QUIC. +func (h *THHandler) maybeQUICFollowUp(ctx context.Context, + m *THServerResponse, epnts ...*HTTPEndpoint) { + altsvc := []string{} + for _, epnt := range m.Endpoints { + // Check whether we have a QUIC handshake. If so, then + // HTTPSSvc worked and we can stop here. + if epnt.QUICHandshake != nil { + return + } + for _, rtrip := range epnt.HTTPRoundTrip { + if v := rtrip.ResponseHeader.Get("alt-svc"); v != "" { + altsvc = append(altsvc, v) + } + } + } + // syntax: + // + // Alt-Svc: clear + // Alt-Svc: =; ma= + // Alt-Svc: =; ma=; persist=1 + // + // multiple entries may be separated by comma. + // + // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Alt-Svc + for _, header := range altsvc { + entries := strings.Split(header, ",") + if len(entries) < 1 { + continue + } + for _, entry := range entries { + parts := strings.Split(entry, ";") + if len(parts) < 1 { + continue + } + if parts[0] == "h3=\":443\"" { + h.doQUICFollowUp(ctx, m, epnts...) + return + } + } + } +} + +// doQUICFollowUp runs when we know there's QUIC support via Alt-Svc. +func (h *THHandler) doQUICFollowUp(ctx context.Context, + m *THServerResponse, epnts ...*HTTPEndpoint) { + quicEpnts := []*HTTPEndpoint{} + // do not mutate the existing list rather create a new one + for _, epnt := range epnts { + quicEpnts = append(quicEpnts, &HTTPEndpoint{ + Domain: epnt.Domain, + Network: NetworkQUIC, + Address: epnt.Address, + SNI: epnt.SNI, + ALPN: []string{"h3"}, + URL: epnt.URL, + Header: epnt.Header, + }) + } + mx := NewMeasurerWithDefaultSettings() + jar := NewCookieJar() + for me := range mx.HTTPEndpointGetParallel(ctx, jar, quicEpnts...) { + m.Endpoints = append(m.Endpoints, h.newTHEndpointMeasurement(me)) + } +} + // // TH server: marshalling of endpoint measurements // From a75f50f5e29e2172b278209a38bcb7e56de164fe Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 22 Sep 2021 17:24:57 +0200 Subject: [PATCH 30/53] document each file's content and rationale --- internal/measurex/archival.go | 6 ++++++ internal/measurex/bogon.go | 8 +++++++ internal/measurex/db.go | 13 +++++++++++ internal/measurex/dialer.go | 7 ++++++ internal/measurex/dnsx.go | 7 ++++++ internal/measurex/http.go | 13 +++++++++++ internal/measurex/logger.go | 6 ++++++ internal/measurex/measurement.go | 7 ++++++ internal/measurex/measurer.go | 9 ++++++++ internal/measurex/oddity.go | 6 ++++++ internal/measurex/origin.go | 6 ++++++ internal/measurex/quic.go | 7 ++++++ internal/measurex/resolver.go | 7 ++++++ internal/measurex/th.go | 37 ++++++++++++++++++++++++++++++++ internal/measurex/tls.go | 7 ++++++ internal/measurex/utils.go | 6 ++++++ internal/measurex/wcth.go | 11 ++++++++++ 17 files changed, 163 insertions(+) diff --git a/internal/measurex/archival.go b/internal/measurex/archival.go index 2218a9a10d..fb13601f0b 100644 --- a/internal/measurex/archival.go +++ b/internal/measurex/archival.go @@ -1,5 +1,11 @@ package measurex +// +// Archival +// +// This file defines how we serialize measurements to JSON. +// + import ( "net/http" "strings" diff --git a/internal/measurex/bogon.go b/internal/measurex/bogon.go index e2191b6fc0..e27d604b8b 100644 --- a/internal/measurex/bogon.go +++ b/internal/measurex/bogon.go @@ -1,5 +1,13 @@ package measurex +// +// Bogon +// +// This file helps us to decide if an IPAddr is a bogon. +// + +// TODO(bassosimone): code in engine/netx should use this file. + import ( "net" diff --git a/internal/measurex/db.go b/internal/measurex/db.go index 537d6f58c9..042473fd47 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -1,5 +1,18 @@ package measurex +// +// DB +// +// This file defines two types: +// +// - EventDB is the interface for storing events that +// we pass to the networking code +// +// - DB is a concrete implementation of EventDB that we +// use for building measurements and generally for extracting +// information useful for implementing experiments +// + import ( "errors" "fmt" diff --git a/internal/measurex/dialer.go b/internal/measurex/dialer.go index 5a0afb88e0..8d355aafaa 100644 --- a/internal/measurex/dialer.go +++ b/internal/measurex/dialer.go @@ -1,5 +1,12 @@ package measurex +// +// Dialer +// +// This file contains basic networking code. We wrap the fundamental +// netxlite.Dialer type to store measurements into an EventDB. +// + import ( "context" "net" diff --git a/internal/measurex/dnsx.go b/internal/measurex/dnsx.go index fda8473ab0..10e930c8dc 100644 --- a/internal/measurex/dnsx.go +++ b/internal/measurex/dnsx.go @@ -1,5 +1,12 @@ package measurex +// +// DNSX (DNS eXtensions) +// +// This file contains basic networking code. We wrap the fundamental +// dnsx.RoundTripper type to store measurements into an EventDB. +// + import ( "context" "time" diff --git a/internal/measurex/http.go b/internal/measurex/http.go index c5b6b635ab..5c9e93d791 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -1,5 +1,18 @@ package measurex +// +// HTTP +// +// This file contains basic networking code. We provide: +// +// - a wrapper for netxlite.HTTPTransport that stores +// round trip events into an EventDB +// +// - an interface that is http.Client like and one internal +// implementation of such an interface that helps us to +// store HTTP redirections info into an EventDB +// + import ( "bytes" "context" diff --git a/internal/measurex/logger.go b/internal/measurex/logger.go index 4ccbbc1579..82f113ee80 100644 --- a/internal/measurex/logger.go +++ b/internal/measurex/logger.go @@ -1,5 +1,11 @@ package measurex +// +// Logger +// +// Code for logging +// + import ( "fmt" "sync" diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go index ef438b7c3b..3d6248131e 100644 --- a/internal/measurex/measurement.go +++ b/internal/measurex/measurement.go @@ -1,5 +1,12 @@ package measurex +// +// Measurement +// +// Here we define the fundamental measurement types +// produced by this package. +// + import "time" // Measurement groups all the events that have the same MeasurementID. This diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 143b65b8bd..50d8e7a389 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -1,5 +1,14 @@ package measurex +// +// Measurer +// +// High-level API for running measurements. The code in here +// has been designed to easily implement the new websteps +// network experiment, which is quite complex. It should be +// possible to write most other experiments using a Measurer. +// + import ( "context" "crypto/tls" diff --git a/internal/measurex/oddity.go b/internal/measurex/oddity.go index b5f486eacf..cb4dda9e29 100644 --- a/internal/measurex/oddity.go +++ b/internal/measurex/oddity.go @@ -1,5 +1,11 @@ package measurex +// +// Oddity +// +// Here we define the oddity type. +// + // Oddity is an unexpected result on the probe or // or test helper side during a measurement. We will // promote the oddity to anomaly if the probe and diff --git a/internal/measurex/origin.go b/internal/measurex/origin.go index c93811f56b..ab9c7a06a6 100644 --- a/internal/measurex/origin.go +++ b/internal/measurex/origin.go @@ -1,5 +1,11 @@ package measurex +// +// Origin +// +// Here we define the origin type. +// + // Origin is the origin of a measurement. type Origin string diff --git a/internal/measurex/quic.go b/internal/measurex/quic.go index 21385e5797..c471fe9d8c 100644 --- a/internal/measurex/quic.go +++ b/internal/measurex/quic.go @@ -1,5 +1,12 @@ package measurex +// +// QUIC +// +// Wrappers for netxlite's QUIC code that are capable of +// saving interesting events into an EventDB. +// + import ( "context" "crypto/tls" diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 87e8edebac..6a2a7caa3b 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -1,5 +1,12 @@ package measurex +// +// Resolver +// +// Wrappers for netxlite's resolvers that are able +// to store events into an EventDB. +// + import ( "context" "time" diff --git a/internal/measurex/th.go b/internal/measurex/th.go index 567f07a736..250d745183 100644 --- a/internal/measurex/th.go +++ b/internal/measurex/th.go @@ -1,5 +1,42 @@ package measurex +// +// TH (Test Helper) +// +// This file contains an implementation of the +// (proposed) websteps test helper spec. +// +// Why is this code in this package? +// +// The measurex model allows you to define test +// helpers that run in the DNS lookup phase. This +// model is quite nice because it allows you to +// discover additional IP addresses for the domain +// you're testing. When your local resolver is +// censored, the TH is how we get extra IP addresses +// for the domain to test. +// +// The current TH code requires you to submit an +// HTTP or HTTPS URL. If we relax this constraint, +// we can have a more flexible test helper that +// may be useful also for other experiments. +// +// Here are some ideas: +// +// - `dnslookup://domain` lookups a domain according +// to the test helper's resolver; +// +// - `tlshandshake://endpoint` performs a domain +// lookup and then a TLS handshake; +// +// - `quichandshake://endpoint` likewise. +// +// To conclude, this code is here because its +// trajectory is that of making it the base +// building block for building several types +// of test helpers. +// + import ( "bytes" "context" diff --git a/internal/measurex/tls.go b/internal/measurex/tls.go index a884164772..5a6e58cb65 100644 --- a/internal/measurex/tls.go +++ b/internal/measurex/tls.go @@ -1,5 +1,12 @@ package measurex +// +// TLS +// +// Wrappers for netxlite's TLS that allow one to +// save network events into an EventDB type. +// + import ( "context" "crypto/tls" diff --git a/internal/measurex/utils.go b/internal/measurex/utils.go index c22c3c66bc..ea8fec6dd1 100644 --- a/internal/measurex/utils.go +++ b/internal/measurex/utils.go @@ -1,5 +1,11 @@ package measurex +// +// Utils +// +// This is where we put free functions. +// + // alpnForHTTPEndpoint selects the correct ALPN for an HTTP endpoint // given the network. On failure, we return a nil list. func alpnForHTTPEndpoint(network EndpointNetwork) []string { diff --git a/internal/measurex/wcth.go b/internal/measurex/wcth.go index 1eeaeeb650..a164c05a27 100644 --- a/internal/measurex/wcth.go +++ b/internal/measurex/wcth.go @@ -1,5 +1,16 @@ package measurex +// +// WCTH (Web Connectivity Test Helper) +// +// We use the WCTH as an alternative DNS for gathering +// additional IP addresses to test, which is useful when +// your local DNS is censored. +// +// This code is merely here to bootstrap websteps and +// should be removed when we have a proper test helper. +// + import ( "bytes" "context" From 00258d2eee35b112ccc33c9a3782136bf05c3b36 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 22 Sep 2021 23:56:35 +0200 Subject: [PATCH 31/53] heavy refactoring to simplify plan C --- .../engine/experiment/webstepsx/webstepsx.go | 27 +- internal/measurex/archival.go | 480 +++++---------- internal/measurex/db.go | 549 ++---------------- internal/measurex/dialer.go | 248 +++----- internal/measurex/dnsx.go | 78 ++- internal/measurex/endpoint.go | 67 +++ internal/measurex/http.go | 176 ++---- internal/measurex/logger.go | 3 + internal/measurex/measurement.go | 307 ++++++---- internal/measurex/measurer.go | 402 ++++++------- internal/measurex/origin.go | 18 - internal/measurex/quic.go | 285 +++------ internal/measurex/resolver.go | 189 +++--- internal/measurex/th.go | 33 +- internal/measurex/tls.go | 110 ++-- internal/measurex/utils.go | 57 ++ internal/measurex/wcth.go | 205 ------- 17 files changed, 1103 insertions(+), 2131 deletions(-) create mode 100644 internal/measurex/endpoint.go delete mode 100644 internal/measurex/origin.go delete mode 100644 internal/measurex/wcth.go diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/webstepsx.go index f01b334836..1e92bbce10 100644 --- a/internal/engine/experiment/webstepsx/webstepsx.go +++ b/internal/engine/experiment/webstepsx/webstepsx.go @@ -27,7 +27,7 @@ type Config struct{} // TestKeys contains the experiment's test keys. type TestKeys struct { - *measurex.ArchivalURLMeasurement + *measurex.URLMeasurement } // Measurer performs the measurement. @@ -101,25 +101,28 @@ func (mx *Measurer) RunAsync( func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, URL string, th *model.Service, out chan<- *model.ExperimentAsyncTestKeys) { defer close(out) - begin := time.Now() - db := measurex.NewDB(begin) mmx := &measurex.Measurer{ - DB: db, - HTTPClient: sess.DefaultHTTPClient(), - Logger: sess.Logger(), - Origin: measurex.OriginProbe, + Begin: time.Now(), + HTTPClient: sess.DefaultHTTPClient(), + Logger: sess.Logger(), + Resolvers: []*measurex.ResolverInfo{{ + Network: "system", + Address: "", + }, { + Network: "udp", + Address: "8.8.4.4:53", + }, { + Network: "udp", + Address: "1.1.1.1:53", + }}, TLSHandshaker: netxlite.NewTLSHandshakerStdlib(sess.Logger()), } - mmx.RegisterUDPResolvers("8.8.4.4:53", "8.8.8.8:53", "1.1.1.1:53", "1.0.0.1:53") - mmx.RegisterWCTH(th.Address) cookies := measurex.NewCookieJar() in := mmx.MeasureHTTPURLAndFollowRedirections(ctx, URL, cookies) for m := range in { out <- &model.ExperimentAsyncTestKeys{ MeasurementRuntime: m.TotalRuntime.Seconds(), - TestKeys: &TestKeys{ - measurex.NewArchivalURLMeasurement(m), - }, + TestKeys: &TestKeys{m}, Extensions: map[string]int64{ archival.ExtHTTP.Name: archival.ExtHTTP.V, archival.ExtDNS.Name: archival.ExtDNS.V, diff --git a/internal/measurex/archival.go b/internal/measurex/archival.go index fb13601f0b..0866c3295a 100644 --- a/internal/measurex/archival.go +++ b/internal/measurex/archival.go @@ -1,224 +1,159 @@ package measurex -// -// Archival -// -// This file defines how we serialize measurements to JSON. -// - import ( "net/http" "strings" "unicode/utf8" ) -// ArchivalURLMeasurement is the archival format for URLMeasurement. -type ArchivalURLMeasurement struct { - URL string `json:"url"` - CannotParseURL bool `json:"cannot_parse_url"` - DNS []*ArchivalMeasurement `json:"dns"` - TH []*ArchivalMeasurement `json:"th"` - CannotGenerateEndpoints bool `json:"cannot_generate_endpoints"` - Endpoints []*ArchivalMeasurement `json:"endpoints"` - DNSRuntime float64 `json:"dns_runtime"` - THRuntime float64 `json:"th_runtime"` - EpntsRuntime float64 `json:"epnts_runtime"` -} +// +// Archival +// +// This file defines helpers to serialize to the OONI data format. Some of +// our data structure are already pretty close to the desired format, while +// other are more flat, which makes processing simpler. So, when we need +// help we use routines from this file to serialize correctly. +// -// NewArchivalURLMeasurement constructs a new instance -// of the ArchivalURLMeasurement type. -func NewArchivalURLMeasurement(in *URLMeasurement) (out *ArchivalURLMeasurement) { - return &ArchivalURLMeasurement{ - URL: in.URL, - CannotParseURL: in.CannotParseURL, - DNS: NewArchivalMeasurementList(in.DNS...), - TH: NewArchivalMeasurementList(in.TH...), - CannotGenerateEndpoints: in.CannotGenerateEndpoints, - Endpoints: NewArchivalMeasurementList(in.Endpoints...), - DNSRuntime: in.DNSRuntime.Seconds(), - THRuntime: in.THRuntime.Seconds(), - EpntsRuntime: in.EpntsRuntime.Seconds(), - } -} +// +// DNSRoundTrip +// -// ArchivalMeasurement is the archival type for Measurement. -type ArchivalMeasurement struct { - Oddities []Oddity `json:"oddities"` - Connect []*ArchivalNetworkEvent `json:"connect,omitempty"` - ReadWrite []*ArchivalNetworkEvent `json:"read_write,omitempty"` - TLSHandshake []*ArchivalTLSQUICHandshake `json:"tls_handshake,omitempty"` - QUICHandshake []*ArchivalTLSQUICHandshake `json:"quic_handshake,omitempty"` - LookupHost []*ArchivalDNSLookup `json:"lookup_host,omitempty"` - LookupHTTPSSvc []*ArchivalDNSLookup `json:"lookup_httpssvc,omitempty"` - DNSRoundTrip []*ArchivalDNSRoundTrip `json:"dns_round_trip,omitempty"` - HTTPRoundTrip []*ArchivalHTTPRoundTrip `json:"http_round_trip,omitempty"` -} +// TODO(bassosimone): this is a candidate to not be in archival but +// rather to be what we actually save into the WritableDB. -// NewArchivalMeasurement constructs a new instance -// of the ArchivalMeasurement type. -func NewArchivalMeasurement(in *Measurement) (out *ArchivalMeasurement) { - return &ArchivalMeasurement{ - Oddities: in.Oddities, - Connect: NewArchivalNetworkEventList(in.Connect...), - ReadWrite: NewArchivalNetworkEventList(in.ReadWrite...), - TLSHandshake: NewArchivalTLSHandshakeList(in.TLSHandshake...), - QUICHandshake: NewArchivalQUICHandshakeList(in.QUICHandshake...), - LookupHost: NewArchivalLookupHostList(in.LookupHost...), - LookupHTTPSSvc: NewArchivalLookupHTTPSSvcList(in.LookupHTTPSSvc...), - DNSRoundTrip: NewArchivalDNSRoundTripList(in.DNSRoundTrip...), - HTTPRoundTrip: NewArchivalHTTPRoundTripList(in.HTTPRoundTrip...), - } +// ArchivalDNSRoundTrip is the archival fromat for DNSRoundTripEvent. +type ArchivalDNSRoundTrip struct { + Network string `json:"engine"` + Address string `json:"resolver_address"` + Query *ArchivalBinaryData `json:"raw_query"` + Started float64 `json:"started"` + Finished float64 `json:"t"` + Error error `json:"failure"` + Reply *ArchivalBinaryData `json:"raw_reply"` } -// NewArchivalMeasurementList takes in input a list of -// Measurement and builds a list of ArchivalMeasurement. -func NewArchivalMeasurementList(in ...*Measurement) (out []*ArchivalMeasurement) { - for _, m := range in { - out = append(out, NewArchivalMeasurement(m)) +// NewArchivalDNSRoundTrip converts a DNSRoundTripEvent +// to the corresponding archival format. +func NewArchivalDNSRoundTrip(in *DNSRoundTripEvent) (out *ArchivalDNSRoundTrip) { + return &ArchivalDNSRoundTrip{ + Network: in.Network, + Address: in.Address, + Query: NewArchivalBinaryData(in.Query), + Started: in.Started, + Finished: in.Finished, + Error: in.Error, + Reply: NewArchivalBinaryData(in.Reply), } - return } -// ArchivalNetworkEvent is the data format we use -// to archive all the network events. -type ArchivalNetworkEvent struct { - // JSON names compatible with df-008-netevents - RemoteAddr string `json:"address"` - ConnID int64 `json:"conn_id"` - Error error `json:"failure"` - Count int `json:"num_bytes,omitempty"` - Operation string `json:"operation"` - Network string `json:"proto"` - Finished float64 `json:"t"` - - // JSON names that are not part of the spec - Origin Origin `json:"origin"` - Started float64 `json:"started"` - Oddity Oddity `json:"oddity"` -} +// +// BinaryData +// -// NewArchivalNetworkEvent takes in input a NetworkEvent -// and emits in output an ArchivalNetworkEvent. -func NewArchivalNetworkEvent(in *NetworkEvent) (out *ArchivalNetworkEvent) { - return &ArchivalNetworkEvent{ - RemoteAddr: in.RemoteAddr, - ConnID: in.ConnID, - Error: in.Error, - Count: in.Count, - Operation: in.Operation, - Network: in.Network, - Finished: in.Finished.Seconds(), - Origin: in.Origin, - Started: in.Started.Seconds(), - Oddity: in.Oddity, - } +// ArchivalBinaryData is the archival format for binary data. +type ArchivalBinaryData struct { + Data []byte `json:"data"` + Format string `json:"format"` } -// NewArchivalNetworkEventList takes in input a list of -// NetworkEvent and builds a list of ArchivalNetworkEvent. -func NewArchivalNetworkEventList(in ...*NetworkEvent) (out []*ArchivalNetworkEvent) { - for _, ev := range in { - out = append(out, NewArchivalNetworkEvent(ev)) +// NewArchivalBinaryData builds a new ArchivalBinaryData +// from an array of bytes. If the array is nil, we return nil. +func NewArchivalBinaryData(data []byte) (out *ArchivalBinaryData) { + if len(data) > 0 { + out = &ArchivalBinaryData{ + Data: data, + Format: "base64", + } } return } -// ArchivalTLSQUICHandshake is the archival format for TLSHandshakeEvent -// as well as for QUICHandshakeEvent. -type ArchivalTLSQUICHandshake struct { - // JSON names compatible with df-006-tlshandshake - CipherSuite string `json:"cipher_suite"` - ConnID int64 `json:"conn_id"` - Error error `json:"failure"` - NegotiatedProto string `json:"negotiated_protocol"` - PeerCerts []*ArchivalBinaryData `json:"peer_certificates"` - Finished float64 `json:"t"` - TLSVersion string `json:"tls_version"` +// +// HTTPRoundTrip +// + +// ArchivalHTTPRoundTrip is the archival format for HTTPRoundTripEvent. +type ArchivalHTTPRoundTrip struct { + // JSON names following the df-001-httpt data format. + Error error `json:"failure"` + Request *ArchivalHTTPRequest `json:"request"` + Response *ArchivalHTTPResponse `json:"response"` + Finished float64 `json:"t"` + Started float64 `json:"started"` - // JSON names that are not part of the spec - Origin Origin `json:"origin"` - Engine string `json:"engine"` - RemoteAddr string `json:"address"` - SNI string `json:"server_name"` // already used in prod - ALPN []string `json:"alpn"` - SkipVerify bool `json:"no_tls_verify"` // already used in prod - Started float64 `json:"started"` - Oddity Oddity `json:"oddity"` - Network string `json:"network"` + // Names not in the specification + Oddity Oddity `json:"oddity"` } -// NewArchivalTLSHandshakeList takes in input a list of -// TLSHandshakeEvent and builds a list of ArchivalTLSQUICHandshake. -func NewArchivalTLSHandshakeList(in ...*TLSHandshakeEvent) (out []*ArchivalTLSQUICHandshake) { - for _, ev := range in { - out = append(out, NewArchivalTLSHandshake(ev)) - } - return +// ArchivalHTTPRequest is the archival representation of a request. +type ArchivalHTTPRequest struct { + Method string `json:"method"` + URL string `json:"url"` + HeadersList [][]string `json:"headers_list"` } -// NewArchivalTLSHandshake converts a TLSHandshakeEvent to -// its corresponding archival format. -func NewArchivalTLSHandshake(in *TLSHandshakeEvent) (out *ArchivalTLSQUICHandshake) { - return &ArchivalTLSQUICHandshake{ - CipherSuite: in.CipherSuite, - ConnID: in.ConnID, - Error: in.Error, - NegotiatedProto: in.NegotiatedProto, - PeerCerts: NewArchivalTLSCert(in.PeerCerts), - Finished: in.Finished.Seconds(), - TLSVersion: in.TLSVersion, - Origin: in.Origin, - Engine: in.Engine, - RemoteAddr: in.RemoteAddr, - SNI: in.SNI, - ALPN: in.ALPN, - SkipVerify: in.SkipVerify, - Started: in.Started.Seconds(), - Oddity: in.Oddity, - Network: in.Network, +// ArchivalHTTPResponse is the archival representation of a response. +type ArchivalHTTPResponse struct { + Code int64 `json:"code"` + HeadersList [][]string `json:"headers_list"` + Body interface{} `json:"body"` + BodyIsTruncated bool `json:"body_is_truncated"` +} + +// NewArchivalHTTPRoundTrip converts an HTTPRoundTripEvent +// to the corresponding archival format. +func NewArchivalHTTPRoundTrip(in *HTTPRoundTripEvent) (out *ArchivalHTTPRoundTrip) { + return &ArchivalHTTPRoundTrip{ + Error: in.Error, + Request: &ArchivalHTTPRequest{ + Method: in.RequestMethod, + URL: in.RequestURL.String(), + HeadersList: NewArchivalHeadersList(in.RequestHeader), + }, + Response: &ArchivalHTTPResponse{ + Code: int64(in.ResponseStatus), + HeadersList: NewArchivalHeadersList(in.ResponseHeader), + Body: NewArchivalHTTPBody(in.ResponseBodySnapshot), + BodyIsTruncated: int64(len(in.ResponseBodySnapshot)) >= in.MaxBodySnapshotSize, + }, + Finished: in.Finished, + Started: in.Started, + Oddity: in.Oddity, } } -// NewArchivalQUICHandshakeList takes in input a list of -// QUICHandshakeEvent and builds a list of ArchivalTLSQUICHandshake. -func NewArchivalQUICHandshakeList(in ...*QUICHandshakeEvent) (out []*ArchivalTLSQUICHandshake) { - for _, ev := range in { - out = append(out, NewArchivalQUICHandshake(ev)) +// NewArchivalHeadersList builds a new HeadersList from http.Header. +func NewArchivalHeadersList(in http.Header) (out [][]string) { + for k, vv := range in { + for _, v := range vv { + out = append(out, []string{k, v}) + } } return } -// NewArchivalQUICHandshake converts a QUICHandshakeEvent to -// its corresponding archival format. -func NewArchivalQUICHandshake(in *QUICHandshakeEvent) (out *ArchivalTLSQUICHandshake) { - return &ArchivalTLSQUICHandshake{ - CipherSuite: in.CipherSuite, - ConnID: in.ConnID, - Error: in.Error, - NegotiatedProto: in.NegotiatedProto, - PeerCerts: NewArchivalTLSCert(in.PeerCerts), - Finished: in.Finished.Seconds(), - TLSVersion: in.TLSVersion, - Origin: in.Origin, - RemoteAddr: in.RemoteAddr, - SNI: in.SNI, - ALPN: in.ALPN, - SkipVerify: in.SkipVerify, - Started: in.Started.Seconds(), - Oddity: in.Oddity, - Network: in.Network, +// NewArchivalHTTPBody builds a new HTTP body for archival from the body. +func NewArchivalHTTPBody(body []byte) (out interface{}) { + if body != nil { + if utf8.Valid(body) { + return string(body) + } + out = &ArchivalBinaryData{ + Data: body, + Format: "base64", + } } + return } -// ArchivalBinaryData is the archival format for binary data. -type ArchivalBinaryData struct { - Data []byte - Format string -} +// +// TLSCerts +// // NewArchivalTLSCertList builds a new []ArchivalBinaryData // from a list of raw x509 certificates data. -func NewArchivalTLSCert(in [][]byte) (out []*ArchivalBinaryData) { +func NewArchivalTLSCerts(in [][]byte) (out []*ArchivalBinaryData) { for _, cert := range in { out = append(out, &ArchivalBinaryData{ Data: cert, @@ -228,6 +163,10 @@ func NewArchivalTLSCert(in [][]byte) (out []*ArchivalBinaryData) { return } +// +// DNS LookupHost and LookupHTTPSSvc +// + // ArchivalDNSLookup is the archival format for DNS. type ArchivalDNSLookup struct { // JSON names compatible with df-002-dnst's spec @@ -241,7 +180,6 @@ type ArchivalDNSLookup struct { // Names not part of the spec. Started float64 `json:"started"` - Origin Origin `json:"origin"` Oddity Oddity `json:"oddity"` } @@ -256,6 +194,16 @@ type ArchivalDNSAnswer struct { ALPN string `json:"alpn,omitempty"` } +// NewArchivalLookupHostList converts a []*LookupHostEvent +// to the corresponding archival format. +func NewArchivalLookupHostList(in ...*LookupHostEvent) (out []*ArchivalDNSLookup) { + for _, ev := range in { + out = append(out, NewArchivalLookupHost(ev, "A")) + out = append(out, NewArchivalLookupHost(ev, "AAAA")) + } + return +} + // NewArchivalLookupHost generates an ArchivalDNS entry for the given // LookupHost event and for the given query type. (OONI's DNS data // format splits A and AAAA queries, so we need to run this func twice.) @@ -267,9 +215,8 @@ func NewArchivalLookupHost(in *LookupHostEvent, qtype string) (out *ArchivalDNSL Domain: in.Domain, QueryType: qtype, Address: in.Address, - Finished: in.Finished.Seconds(), - Started: in.Started.Seconds(), - Origin: in.Origin, + Finished: in.Finished, + Started: in.Started, Oddity: in.Oddity, } } @@ -298,16 +245,6 @@ func NewArchivalDNSAnswersLookupHost(addrs []string, qtype string) (out []*Archi return } -// NewArchivalLookupHostList converts a []*LookupHostEvent -// to the corresponding archival format. -func NewArchivalLookupHostList(in ...*LookupHostEvent) (out []*ArchivalDNSLookup) { - for _, ev := range in { - out = append(out, NewArchivalLookupHost(ev, "A")) - out = append(out, NewArchivalLookupHost(ev, "AAAA")) - } - return -} - // NewArchivalLookupHTTPSSvc generates an ArchivalDNS entry for the given // LookupHTTPSSvc event. func NewArchivalLookupHTTPSSvc(in *LookupHTTPSSvcEvent) (out *ArchivalDNSLookup) { @@ -318,13 +255,21 @@ func NewArchivalLookupHTTPSSvc(in *LookupHTTPSSvcEvent) (out *ArchivalDNSLookup) Domain: in.Domain, QueryType: "HTTPS", Address: in.Address, - Finished: in.Finished.Seconds(), - Started: in.Started.Seconds(), - Origin: in.Origin, + Finished: in.Finished, + Started: in.Started, Oddity: in.Oddity, } } +// NewArchivalLookupHTTPSSvcList converts a []*LookupHTTPSSvcEvent +// to the corresponding archival format. +func NewArchivalLookupHTTPSSvcList(in ...*LookupHTTPSSvcEvent) (out []*ArchivalDNSLookup) { + for _, ev := range in { + out = append(out, NewArchivalLookupHTTPSSvc(ev)) + } + return +} + // NewArchivalDNSAnswersLookupHTTPSSvc builds the ArchivalDNSAnswer // vector for a LookupHTTPSSvc operation. func NewArchivalDNSAnswersLookupHTTPSSvc(in *LookupHTTPSSvcEvent) (out []*ArchivalDNSAnswer) { @@ -348,144 +293,3 @@ func NewArchivalDNSAnswersLookupHTTPSSvc(in *LookupHTTPSSvcEvent) (out []*Archiv } return } - -// NewArchivalLookupHTTPSSvcList converts a []*LookupHTTPSSvcEvent -// to the corresponding archival format. -func NewArchivalLookupHTTPSSvcList(in ...*LookupHTTPSSvcEvent) (out []*ArchivalDNSLookup) { - for _, ev := range in { - out = append(out, NewArchivalLookupHTTPSSvc(ev)) - } - return -} - -// ArchivalDNSRoundTrip is the archival fromat for DNSRoundTripEvent. -type ArchivalDNSRoundTrip struct { - Origin Origin `json:"origin"` - Network string `json:"engine"` - Address string `json:"resolver_address"` - Query *ArchivalBinaryData `json:"raw_query"` - Started float64 `json:"started"` - Finished float64 `json:"t"` - Error error `json:"failure"` - Reply *ArchivalBinaryData `json:"raw_reply"` -} - -// NewArchivalBinaryData builds a new ArchivalBinaryData -// from an array of bytes. If the array is nil, we return nil. -func NewArchivalBinaryData(data []byte) (out *ArchivalBinaryData) { - if len(data) > 0 { - out = &ArchivalBinaryData{ - Data: data, - Format: "base64", - } - } - return -} - -// NewArchivalDNSRoundTrip converts a DNSRoundTripEvent -// to the corresponding archival format. -func NewArchivalDNSRoundTrip(in *DNSRoundTripEvent) (out *ArchivalDNSRoundTrip) { - return &ArchivalDNSRoundTrip{ - Origin: in.Origin, - Network: in.Network, - Address: in.Address, - Query: NewArchivalBinaryData(in.Query), - Started: in.Started.Seconds(), - Finished: in.Finished.Seconds(), - Error: in.Error, - Reply: NewArchivalBinaryData(in.Reply), - } -} - -// NewArchivalDNSRoundTripList converts a []*DNSRoundTripEvent -// to the corresponding archival format. -func NewArchivalDNSRoundTripList(in ...*DNSRoundTripEvent) (out []*ArchivalDNSRoundTrip) { - for _, ev := range in { - out = append(out, NewArchivalDNSRoundTrip(ev)) - } - return -} - -// ArchivalHTTPRoundTrip is the archival format for HTTPRoundTripEvent. -type ArchivalHTTPRoundTrip struct { - // JSON names following the df-001-httpt data format. - Error error `json:"failure"` - Request *ArchivalHTTPRequest `json:"request"` - Response *ArchivalHTTPResponse `json:"response"` - Finished float64 `json:"t"` - ConnID int64 `json:"conn_id"` - Started float64 `json:"started"` - - // Names not in the specification - Origin Origin `json:"origin"` - Oddity Oddity `json:"oddity"` -} - -// ArchivalHTTPRequest is the archival representation of a request. -type ArchivalHTTPRequest struct { - Method string `json:"method"` - URL string `json:"url"` - HeadersList [][]string `json:"headers_list"` -} - -// ArchivalHTTPResponse is the archival representation of a response. -type ArchivalHTTPResponse struct { - Code int64 `json:"code"` - HeadersList [][]string `json:"headers_list"` - Body interface{} `json:"body"` - BodyIsTruncated bool `json:"body_is_truncated"` -} - -// NewArchivalHTTPRoundTrip converts an HTTPRoundTripEvent -// to the corresponding archival format. -func NewArchivalHTTPRoundTrip(in *HTTPRoundTripEvent) (out *ArchivalHTTPRoundTrip) { - return &ArchivalHTTPRoundTrip{ - Error: in.Error, - Request: &ArchivalHTTPRequest{ - Method: in.RequestMethod, - URL: in.RequestURL.String(), - HeadersList: NewArchivalHeadersList(in.RequestHeader), - }, - Response: &ArchivalHTTPResponse{ - Code: int64(in.ResponseStatus), - HeadersList: NewArchivalHeadersList(in.ResponseHeader), - Body: NewArchivalHTTPBody(in.ResponseBodySnapshot), - BodyIsTruncated: int64(len(in.ResponseBodySnapshot)) >= in.MaxBodySnapshotSize, - }, - Finished: in.Finished.Seconds(), - ConnID: in.ConnID, - Started: in.Started.Seconds(), - Origin: in.Origin, - Oddity: in.Oddity, - } -} - -// NewArchivalHTTPBody builds a new HTTP body for archival from the body. -func NewArchivalHTTPBody(body []byte) interface{} { - if utf8.Valid(body) { - return string(body) - } - return &ArchivalBinaryData{ - Data: body, - Format: "base64", - } -} - -// NewArchivalHeadersList builds a new HeadersList from http.Header. -func NewArchivalHeadersList(in http.Header) (out [][]string) { - for k, vv := range in { - for _, v := range vv { - out = append(out, []string{k, v}) - } - } - return -} - -// NewArchivalHTTPRoundTripList converts a []*HTTPRoundTripEvent -// to the corresponding archival format. -func NewArchivalHTTPRoundTripList(in ...*HTTPRoundTripEvent) (out []*ArchivalHTTPRoundTrip) { - for _, ev := range in { - out = append(out, NewArchivalHTTPRoundTrip(ev)) - } - return -} diff --git a/internal/measurex/db.go b/internal/measurex/db.go index 042473fd47..953ba82466 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -5,31 +5,20 @@ package measurex // // This file defines two types: // -// - EventDB is the interface for storing events that +// - WritableDB is the interface for storing events that // we pass to the networking code // -// - DB is a concrete implementation of EventDB that we -// use for building measurements and generally for extracting -// information useful for implementing experiments +// - MeasurementDB is a concrete database in which network +// code stores events and from which you can create a +// measurement with all the collected events // import ( - "errors" - "fmt" - "net" - "net/http" - "net/url" "sync" - "time" ) -// EventDB is a "database" holding events records as seen by the -// networking code that needs to save events. -type EventDB interface { - // ElapsedTime returns the elapsed time since the beginning - // of time as configured into the database. - ElapsedTime() time.Duration - +// WritableDB is a measurement database in which you can write. +type WritableDB interface { // InsertIntoDial saves a Dial event. InsertIntoDial(ev *NetworkEvent) @@ -59,14 +48,10 @@ type EventDB interface { // InsertIntoQUICHandshake saves a QUIC handshake event. InsertIntoQUICHandshake(ev *QUICHandshakeEvent) - - // NextConnID increments and returns the connection ID. - NextConnID() int64 } -// DB is an EventDB that saves events and also allows to -// ask questions regarding the saved events. -type DB struct { +// MeasurementDB is a database for assembling a measurement. +type MeasurementDB struct { // database tables dialTable []*NetworkEvent readWriteTable []*NetworkEvent @@ -78,582 +63,162 @@ type DB struct { httpRoundTripTable []*HTTPRoundTripEvent httpRedirectTable []*HTTPRedirectEvent quicHandshakeTable []*QUICHandshakeEvent - resolversTable []*ResolverInfo - testHelpersTable []*TestHelperInfo // mu protects all the fields mu sync.Mutex - - // non-table database fields - begin time.Time - connID int64 - measurementID int64 -} - -var _ EventDB = &DB{} - -// NewDB creates a new instance of DB. -func NewDB(begin time.Time) *DB { - return &DB{begin: begin} -} - -// ElapsedTime implements EventDB.ElapsedTime. -func (db *DB) ElapsedTime() time.Duration { - return time.Since(db.begin) } -// DeleteAll deletes all the saved data. -func (db *DB) DeleteAll() { - db.mu.Lock() - db.dialTable = nil - db.readWriteTable = nil - db.closeTable = nil - db.tlsHandshakeTable = nil - db.lookupHostTable = nil - db.lookupHTTPSvcTable = nil - db.dnsRoundTripTable = nil - db.httpRoundTripTable = nil - db.httpRedirectTable = nil - db.quicHandshakeTable = nil - db.mu.Unlock() -} +var _ WritableDB = &MeasurementDB{} // InsertIntoDial implements EventDB.InsertIntoDial. -func (db *DB) InsertIntoDial(ev *NetworkEvent) { +func (db *MeasurementDB) InsertIntoDial(ev *NetworkEvent) { db.mu.Lock() db.dialTable = append(db.dialTable, ev) db.mu.Unlock() } -// SelectAllFromDial returns all dial events. -func (db *DB) SelectAllFromDial() (out []*NetworkEvent) { - db.mu.Lock() +// selectAllFromDial returns all dial events. +func (db *MeasurementDB) selectAllFromDial() (out []*NetworkEvent) { out = append(out, db.dialTable...) - db.mu.Unlock() return } // InsertIntoReadWrite implements EventDB.InsertIntoReadWrite. -func (db *DB) InsertIntoReadWrite(ev *NetworkEvent) { +func (db *MeasurementDB) InsertIntoReadWrite(ev *NetworkEvent) { db.mu.Lock() db.readWriteTable = append(db.readWriteTable, ev) db.mu.Unlock() } -// SelectAllFromReadWrite returns all I/O events. -func (db *DB) SelectAllFromReadWrite() (out []*NetworkEvent) { - db.mu.Lock() +// selectAllFromReadWrite returns all I/O events. +func (db *MeasurementDB) selectAllFromReadWrite() (out []*NetworkEvent) { out = append(out, db.readWriteTable...) - db.mu.Unlock() return } // InsertIntoClose implements EventDB.InsertIntoClose. -func (db *DB) InsertIntoClose(ev *NetworkEvent) { +func (db *MeasurementDB) InsertIntoClose(ev *NetworkEvent) { db.mu.Lock() db.closeTable = append(db.closeTable, ev) db.mu.Unlock() } -// SelectAllFromClose returns all close events. -func (db *DB) SelectAllFromClose() (out []*NetworkEvent) { - db.mu.Lock() +// selectAllFromClose returns all close events. +func (db *MeasurementDB) selectAllFromClose() (out []*NetworkEvent) { out = append(out, db.closeTable...) - db.mu.Unlock() return } // InsertIntoTLSHandshake implements EventDB.InsertIntoTLSHandshake. -func (db *DB) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { +func (db *MeasurementDB) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { db.mu.Lock() db.tlsHandshakeTable = append(db.tlsHandshakeTable, ev) db.mu.Unlock() } -// SelectAllFromTLSHandshake returns all TLS handshake events. -func (db *DB) SelectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { - db.mu.Lock() +// selectAllFromTLSHandshake returns all TLS handshake events. +func (db *MeasurementDB) selectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { out = append(out, db.tlsHandshakeTable...) - db.mu.Unlock() return } // InsertIntoLookupHost implements EventDB.InsertIntoLookupHost. -func (db *DB) InsertIntoLookupHost(ev *LookupHostEvent) { +func (db *MeasurementDB) InsertIntoLookupHost(ev *LookupHostEvent) { db.mu.Lock() db.lookupHostTable = append(db.lookupHostTable, ev) db.mu.Unlock() } -// SelectAllFromLookupHost returns all the lookup host events. -func (db *DB) SelectAllFromLookupHost() (out []*LookupHostEvent) { - db.mu.Lock() +// selectAllFromLookupHost returns all the lookup host events. +func (db *MeasurementDB) selectAllFromLookupHost() (out []*LookupHostEvent) { out = append(out, db.lookupHostTable...) - db.mu.Unlock() return } // InsertIntoHTTPSSvc implements EventDB.InsertIntoHTTPSSvc -func (db *DB) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { +func (db *MeasurementDB) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { db.mu.Lock() db.lookupHTTPSvcTable = append(db.lookupHTTPSvcTable, ev) db.mu.Unlock() } -// SelectAllFromLookupHTTPSSvc returns all HTTPSSvc lookup events. -func (db *DB) SelectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { - db.mu.Lock() +// selectAllFromLookupHTTPSSvc returns all HTTPSSvc lookup events. +func (db *MeasurementDB) selectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { out = append(out, db.lookupHTTPSvcTable...) - db.mu.Unlock() return } // InsertIntoDNSRoundTrip implements EventDB.InsertIntoDNSRoundTrip. -func (db *DB) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { +func (db *MeasurementDB) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { db.mu.Lock() db.dnsRoundTripTable = append(db.dnsRoundTripTable, ev) db.mu.Unlock() } -// SelectAllFromDNSRoundTrip returns all DNS round trip events. -func (db *DB) SelectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { - db.mu.Lock() +// selectAllFromDNSRoundTrip returns all DNS round trip events. +func (db *MeasurementDB) selectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { out = append(out, db.dnsRoundTripTable...) - db.mu.Unlock() return } // InsertIntoHTTPRoundTrip implements EventDB.InsertIntoHTTPRoundTrip. -func (db *DB) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { +func (db *MeasurementDB) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { db.mu.Lock() db.httpRoundTripTable = append(db.httpRoundTripTable, ev) db.mu.Unlock() } -// SelectAllFromHTTPRoundTrip returns all HTTP round trip events. -func (db *DB) SelectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { - db.mu.Lock() +// selectAllFromHTTPRoundTrip returns all HTTP round trip events. +func (db *MeasurementDB) selectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { out = append(out, db.httpRoundTripTable...) - db.mu.Unlock() return } // InsertIntoHTTPRedirect implements EventDB.InsertIntoHTTPRedirect. -func (db *DB) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { +func (db *MeasurementDB) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { db.mu.Lock() db.httpRedirectTable = append(db.httpRedirectTable, ev) db.mu.Unlock() } -// SelectAllFromHTTPRedirect returns all HTTP redirections. -func (db *DB) SelectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { - db.mu.Lock() +// selectAllFromHTTPRedirect returns all HTTP redirections. +func (db *MeasurementDB) selectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { out = append(out, db.httpRedirectTable...) - db.mu.Unlock() return } // InsertIntoQUICHandshake implements EventDB.InsertIntoQUICHandshake. -func (db *DB) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { +func (db *MeasurementDB) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { db.mu.Lock() db.quicHandshakeTable = append(db.quicHandshakeTable, ev) db.mu.Unlock() } -// SelectAllFromQUICHandshake returns all QUIC handshake events. -func (db *DB) SelectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { - db.mu.Lock() +// selectAllFromQUICHandshake returns all QUIC handshake events. +func (db *MeasurementDB) selectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { out = append(out, db.quicHandshakeTable...) - db.mu.Unlock() - return -} - -// ResolverInfo contains info about a DNS resolver. -type ResolverInfo struct { - // Network is the resolver's network (e.g., "doh", "udp") - Network string - - // Address is the address (e.g., "1.1.1.1:53", "https://1.1.1.1/dns-query") - Address string -} - -// string returns a string representation of the resolver. -func (ri *ResolverInfo) string() string { - return fmt.Sprintf("%s@%s", ri.Network, ri.Address) -} - -// InsertIntoResolvers inserts a given resolver into the resolver's table. -func (db *DB) InsertIntoResolvers(network, address string) { - db.mu.Lock() - db.resolversTable = append(db.resolversTable, &ResolverInfo{ - Network: network, - Address: address, - }) - db.mu.Unlock() -} - -// SelectAllFromResolvers returns all the configured resolvers. This function -// ensures that the system resolver is in the list and also ensures that we -// return in output a list only containing unique resolvers. -func (db *DB) SelectAllFromResolvers() (out []*ResolverInfo) { - all := append([]*ResolverInfo{}, &ResolverInfo{Network: "system"}) - db.mu.Lock() - all = append(all, db.resolversTable...) - db.mu.Unlock() - unique := make(map[string]bool) - for _, reso := range all { - if _, found := unique[reso.string()]; found { - continue - } - unique[reso.string()] = true - out = append(out, reso) - } return } -// TestHelperInfo contains info about a test helper. -type TestHelperInfo struct { - // Protocol is the test helpers's protocol (e.g., "wcth") - Protocol string - - // URL is the URL (e.g., "https://wcth.ooni.io/") - URL string -} - -// string returns a string representation of the resolver. -func (ti *TestHelperInfo) string() string { - return fmt.Sprintf("%s@%s", ti.Protocol, ti.URL) -} - -// InsertIntoTestHelpers inserts a given TH into the test helpers's table. -func (db *DB) InsertIntoTestHelpers(proto, URL string) { - db.mu.Lock() - db.testHelpersTable = append(db.testHelpersTable, &TestHelperInfo{ - Protocol: proto, - URL: URL, - }) - db.mu.Unlock() -} - -// SelectAllFromTestHelperss returns all the configured THs. This function -// ensures that we return in output a list only containing unique THs. -func (db *DB) SelectAllFromTestHelpers() (out []*TestHelperInfo) { - var all []*TestHelperInfo +// AsMeasurement converts the current state of the database into +// a finalized Measurement structure. The original events will remain +// into the database. To start a new measurement cycle, just create +// a new MeasurementDB instance. You are not supposed to modify +// the Measurement returned by this method. +func (db *MeasurementDB) AsMeasurement() *Measurement { db.mu.Lock() - all = append(all, db.testHelpersTable...) - db.mu.Unlock() - unique := make(map[string]bool) - for _, th := range all { - if _, found := unique[th.string()]; found { - continue - } - unique[th.string()] = true - out = append(out, th) + meas := &Measurement{ + Connect: db.selectAllFromDial(), + ReadWrite: db.selectAllFromReadWrite(), + Close: db.selectAllFromClose(), + TLSHandshake: db.selectAllFromTLSHandshake(), + QUICHandshake: db.selectAllFromQUICHandshake(), + LookupHost: db.selectAllFromLookupHost(), + LookupHTTPSSvc: db.selectAllFromLookupHTTPSSvc(), + DNSRoundTrip: db.selectAllFromDNSRoundTrip(), + HTTPRoundTrip: db.selectAllFromHTTPRoundTrip(), + HTTPRedirect: db.selectAllFromHTTPRedirect(), } - return -} - -// NextConnID implements EventDB.NextConnID. -func (db *DB) NextConnID() (out int64) { - db.mu.Lock() - db.connID++ // start from 1 - out = db.connID db.mu.Unlock() - return -} - -// NextMeasurementID increments the internal MeasurementID and -// returns it, so that later you can reference the current measurement. -func (db *DB) NextMeasurementID() (out int64) { - db.mu.Lock() - db.measurementID++ // start from 1 - out = db.measurementID - db.mu.Unlock() - return -} - -// SelectAllFromDialWithMeasurementID calls SelectAllFromConnect -// and filters the result by MeasurementID. -func (db *DB) SelectAllFromDialWithMeasurementID(id int64) (out []*NetworkEvent) { - for _, ev := range db.SelectAllFromDial() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromReadWriteWithMeasurementID calls SelectAllFromReadWrite and -// filters the result by MeasurementID. -func (db *DB) SelectAllFromReadWriteWithMeasurementID(id int64) (out []*NetworkEvent) { - for _, ev := range db.SelectAllFromReadWrite() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromCloseWithMeasurementID calls SelectAllFromClose -// and filters the result by MeasurementID. -func (db *DB) SelectAllFromCloseWithMeasurementID(id int64) (out []*NetworkEvent) { - for _, ev := range db.SelectAllFromClose() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromTLSHandshakeWithMeasurementID calls SelectAllFromTLSHandshake -// and filters the result by MeasurementID. -func (db *DB) SelectAllFromTLSHandshakeWithMeasurementID(id int64) (out []*TLSHandshakeEvent) { - for _, ev := range db.SelectAllFromTLSHandshake() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromQUICHandshakeWithMeasurementID calls SelectAllFromQUICSHandshake -// and filters the result by MeasurementID. -func (db *DB) SelectAllFromQUICHandshakeWithMeasurementID(id int64) (out []*QUICHandshakeEvent) { - for _, ev := range db.SelectAllFromQUICHandshake() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromLookupHostWithMeasurementID calls SelectAllFromLookupHost -// and filters the result by MeasurementID. -func (db *DB) SelectAllFromLookupHostWithMeasurementID(id int64) (out []*LookupHostEvent) { - for _, ev := range db.SelectAllFromLookupHost() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromLookupHTTPSSvcWithMeasurementID calls SelectAllFromHTTPSSvc -// and filters the result by MeasurementID. -func (db *DB) SelectAllFromLookupHTTPSSvcWithMeasurementID(id int64) (out []*LookupHTTPSSvcEvent) { - for _, ev := range db.SelectAllFromLookupHTTPSSvc() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromDNSRoundTripWithMeasurementID calls SelectAllFromDNSRoundTrip -// and filters the result by MeasurementID. -func (db *DB) SelectAllFromDNSRoundTripWithMeasurementID(id int64) (out []*DNSRoundTripEvent) { - for _, ev := range db.SelectAllFromDNSRoundTrip() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromHTTPRoundTripWithMeasurementID calls SelectAllFromHTTPRoundTrip -// and filters the result by MeasurementID. -func (db *DB) SelectAllFromHTTPRoundTripWithMeasurementID(id int64) (out []*HTTPRoundTripEvent) { - for _, ev := range db.SelectAllFromHTTPRoundTrip() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// SelectAllFromHTTPRedirectWithMeasurementID calls SelectAllFromHTTPRedirect -// and filters the result by MeasurementID. -func (db *DB) SelectAllFromHTTPRedirectWithMeasurementID(id int64) (out []*HTTPRedirectEvent) { - for _, ev := range db.SelectAllFromHTTPRedirect() { - if id == ev.MeasurementID { - out = append(out, ev) - } - } - return -} - -// EndpointNetwork is the network of an endpoint. -type EndpointNetwork string - -const ( - // NetworkTCP identifies endpoints using TCP. - NetworkTCP = EndpointNetwork("tcp") - - // NetworkQUIC identifies endpoints using QUIC. - NetworkQUIC = EndpointNetwork("quic") -) - -// Endpoint is an endpoint for a domain. -type Endpoint struct { - // Network is the network (e.g., "tcp", "quic") - Network EndpointNetwork - - // Address is the endpoint address (e.g., "8.8.8.8:443") - Address string -} - -// String converts an endpoint to a string (e.g., "8.8.8.8:443/tcp") -func (e *Endpoint) String() string { - return fmt.Sprintf("%s/%s", e.Address, e.Network) -} - -// SelectAllEndpointsForDomain returns all the -// endpoints for a specific domain. -// -// Arguments: -// -// - domain is the domain we want to connect to; -// -// - port is the port for the endpoint. -func (db *DB) SelectAllEndpointsForDomain(domain, port string) (out []*Endpoint) { - out = append(out, db.selectAllTCPEndpoints(domain, port)...) - out = append(out, db.selectAllQUICEndpoints(domain, port)...) - out = db.deduplicateEndpoints(out) - return -} - -func (db *DB) selectAllTCPEndpoints(domain, port string) (out []*Endpoint) { - for _, entry := range db.SelectAllFromLookupHost() { - if domain != entry.Domain { - continue - } - for _, addr := range entry.Addrs { - if net.ParseIP(addr) == nil { - continue // skip CNAME entries courtesy the WCTH - } - out = append(out, db.newEndpoint(addr, port, NetworkTCP)) - } - } - return -} - -func (db *DB) selectAllQUICEndpoints(domain, port string) (out []*Endpoint) { - for _, entry := range db.SelectAllFromLookupHTTPSSvc() { - if domain != entry.Domain { - continue - } - if !db.supportsHTTP3(entry) { - continue - } - addrs := append([]string{}, entry.IPv4...) - for _, addr := range append(addrs, entry.IPv6...) { - out = append(out, db.newEndpoint(addr, port, NetworkQUIC)) - } - } - return -} - -func (db *DB) deduplicateEndpoints(epnts []*Endpoint) (out []*Endpoint) { - duplicates := make(map[string]*Endpoint) - for _, epnt := range epnts { - duplicates[epnt.String()] = epnt - } - for _, epnt := range duplicates { - out = append(out, epnt) - } - return -} - -func (db *DB) newEndpoint(addr, port string, network EndpointNetwork) *Endpoint { - return &Endpoint{Network: network, Address: net.JoinHostPort(addr, port)} -} - -func (db *DB) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { - for _, alpn := range entry.ALPN { - switch alpn { - case "h3": - return true - } - } - return false -} - -// HTTPEndpoint is an HTTP/HTTPS/HTTP3 endpoint. -type HTTPEndpoint struct { - // Domain is the endpoint domain (e.g., "dns.google"). - Domain string - - // Network is the network (e.g., "tcp" or "quic"). - Network EndpointNetwork - - // Address is the endpoint address (e.g., "8.8.8.8:443"). - Address string - - // SNI is the SNI to use (only used with URL.scheme == "https"). - SNI string - - // ALPN is the ALPN to use (only used with URL.scheme == "https"). - ALPN []string - - // URL is the endpoint URL. - URL *url.URL - - // Header contains request headers. - Header http.Header -} - -// String converts an HTTP endpoint to a string (e.g., "8.8.8.8:443/tcp") -func (e *HTTPEndpoint) String() string { - return fmt.Sprintf("%s/%s", e.Address, e.Network) -} - -// SelectAllHTTPEndpointsForURL returns all the -// HTTPEndpoints matching a specific URL' domain. -// -// Arguments: -// -// - URL is the URL for which we want endpoints; -// -// Returns a list of endpoints or an error. -func (db *DB) SelectAllHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, error) { - domain := URL.Hostname() - port, err := PortFromURL(URL) - if err != nil { - return nil, err - } - epnts := db.SelectAllEndpointsForDomain(domain, port) - var out []*HTTPEndpoint - for _, epnt := range epnts { - if URL.Scheme != "https" && epnt.Network == NetworkQUIC { - continue // we'll only use QUIC with HTTPS - } - out = append(out, &HTTPEndpoint{ - Domain: domain, - Network: epnt.Network, - Address: epnt.Address, - SNI: domain, - ALPN: alpnForHTTPEndpoint(epnt.Network), - URL: URL, - Header: NewHTTPRequestHeaderForMeasuring(), - }) - } - return out, nil -} - -// ErrCannotDeterminePortFromURL indicates that we could not determine -// the correct port from the URL authority and scheme. -var ErrCannotDeterminePortFromURL = errors.New("cannot determine port from URL") - -// PortFromURL returns the port determined from the URL or an error. -func PortFromURL(URL *url.URL) (string, error) { - switch { - case URL.Port() != "": - return URL.Port(), nil - case URL.Scheme == "https": - return "443", nil - case URL.Scheme == "http": - return "80", nil - default: - return "", ErrCannotDeterminePortFromURL - } + return meas } diff --git a/internal/measurex/dialer.go b/internal/measurex/dialer.go index 8d355aafaa..f95eb74dfe 100644 --- a/internal/measurex/dialer.go +++ b/internal/measurex/dialer.go @@ -3,8 +3,7 @@ package measurex // // Dialer // -// This file contains basic networking code. We wrap the fundamental -// netxlite.Dialer type to store measurements into an EventDB. +// Wrappers for Dialer and Conn to store events into a WritableDB. // import ( @@ -16,152 +15,82 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) -// Conn is like net.Conn but also knows its ConnID and has a -// reference to its Dialer's database. On Read, Write, and Close, this -// Conn will write a specific event into the database. -type Conn interface { - net.Conn +// Conn is a network connection. +type Conn = net.Conn - // ConnID returns the ConnID. This should be a positive - // integer. A zero or negative value means "unknown". - ConnID() int64 -} +// Dialer dials network connections. +type Dialer = netxlite.Dialer -// Dialer is like netxlite.Dialer but dials connections of -// the Conn type defined inside this package. -type Dialer interface { - // DialContext wraps a netxlite.Dialer.DialContext - // and implements this algorithm: - // - // 1. perform TCP/UDP dial using the underlying netxlite.Dialer; - // - // 2. insert a DialEvent into the DB; - // - // 3. on error, return error; - // - // 4. otherwise, wrap the net.Conn to be a Conn and return it. - DialContext(ctx context.Context, network, address string) (Conn, error) - - // CloseIdleConnections calls the namesake method - // of the underlying netxlite.Dialer. - CloseIdleConnections() +// WrapDialer creates a new dialer that writes events +// into the given WritableDB. The net.Conns created by +// a wrapped dialer also write into the WritableDB. +func (mx *Measurer) WrapDialer(db WritableDB, dialer netxlite.Dialer) Dialer { + return &dialerDB{Dialer: dialer, db: db, begin: mx.Begin} } -// WrapDialer takes in input a netxlite.Dialer and returns -// in output a Dialer of the type used in this package. -// -// Arguments: -// -// - measurementID is the measurement ID; -// -// - origin is either OriginProbe or OriginTH; -// -// - db is the database in which to store measurements; -// -// - d is the underlying netxlite.Dialer to use. -func WrapDialer(measurementID int64, - origin Origin, db EventDB, d netxlite.Dialer) Dialer { - return &dialerx{Dialer: d, db: db, origin: origin, mid: measurementID} +// NewDialerWithSystemResolver creates a +func (mx *Measurer) NewDialerWithSystemResolver(db WritableDB, logger Logger) Dialer { + r := mx.NewResolverSystem(db, logger) + return mx.WrapDialer(db, netxlite.NewDialerWithResolver(logger, r)) } // NewDialerWithoutResolver is a convenience factory for creating // a dialer that saves measurements into the DB and that is not attached // to any resolver (hence only works when passed IP addresses). -func NewDialerWithoutResolver( - measurementID int64, origin Origin, db EventDB, logger Logger) Dialer { - return WrapDialer(measurementID, origin, db, netxlite.NewDialerWithoutResolver( - logger, - )) -} - -// netxliteDialerAdapter adapts measurex.Dialer to netxlite.Dialer. -type netxliteDialerAdapter struct { - Dialer +func (mx *Measurer) NewDialerWithoutResolver(db WritableDB, logger Logger) Dialer { + return mx.WrapDialer(db, netxlite.NewDialerWithoutResolver(logger)) } -// DialContext implements netxlite.Dialer.DialContext. -func (d *netxliteDialerAdapter) DialContext( - ctx context.Context, network, address string) (net.Conn, error) { - return d.Dialer.DialContext(ctx, network, address) -} - -// NewDialerWithSystemResolver is a convenience factory for creating -// a dialer that saves measurements into mx.DB and uses the system resolver. -func NewDialerWithSystemResolver( - measurementID int64, origin Origin, db EventDB, logger Logger) Dialer { - r := NewResolverSystem(measurementID, origin, db, logger) - return WrapDialer(measurementID, origin, db, netxlite.NewDialerWithResolver( - logger, r, - )) -} - -type dialerx struct { +type dialerDB struct { netxlite.Dialer - db EventDB - mid int64 - origin Origin + begin time.Time + db WritableDB } // NetworkEvent contains a network event. This kind of events // are generated by Dialer, QUICDialer, Conn, QUICConn. type NetworkEvent struct { - Origin Origin // OriginProbe or OriginTH - MeasurementID int64 // ID of the measurement - ConnID int64 // ID of the conn - Operation string // "read", "write", ... - Network string // "tcp", "udp" - RemoteAddr string // remote addr (e.g., "1.1.1.1:443") - LocalAddr string // local addr - Started time.Duration // when we called dial - Finished time.Duration // when dial returned - Error error // error or nil - Oddity Oddity // oddity classification - Count int // bytes sent or recv (where applicable) + // JSON names compatible with df-008-netevents + RemoteAddr string `json:"address"` + Error error `json:"failure"` + Count int `json:"num_bytes,omitempty"` + Operation string `json:"operation"` + Network string `json:"proto"` + Finished float64 `json:"t"` + Started float64 `json:"started"` + + // Names that are not part of the spec. + Oddity Oddity `json:"oddity"` } -func (d *dialerx) DialContext( +func (d *dialerDB) DialContext( ctx context.Context, network, address string) (Conn, error) { - connID := d.db.NextConnID() - started := d.db.ElapsedTime() + started := time.Since(d.begin).Seconds() conn, err := d.Dialer.DialContext(ctx, network, address) - finished := d.db.ElapsedTime() + finished := time.Since(d.begin).Seconds() d.db.InsertIntoDial(&NetworkEvent{ - Origin: d.origin, - MeasurementID: d.mid, - ConnID: connID, - Operation: "connect", - Network: network, - RemoteAddr: address, - LocalAddr: d.localAddrIfNotNil(conn), - Started: started, - Finished: finished, - Error: err, - Oddity: d.computeOddity(err), - Count: 0, + Operation: "connect", + Network: network, + RemoteAddr: address, + Started: started, + Finished: finished, + Error: err, + Oddity: d.computeOddity(err), + Count: 0, }) if err != nil { return nil, err } - return &connx{ + return &connDB{ Conn: conn, + begin: d.begin, db: d.db, - connID: connID, - remoteAddr: address, - localAddr: conn.LocalAddr().String(), network: network, - origin: d.origin, - mid: d.mid, + remoteAddr: address, }, nil } -func (c *dialerx) localAddrIfNotNil(conn net.Conn) (addr string) { - if conn != nil { - addr = conn.LocalAddr().String() - } - return -} - -func (c *dialerx) computeOddity(err error) Oddity { +func (c *dialerDB) computeOddity(err error) Oddity { if err == nil { return "" } @@ -177,77 +106,58 @@ func (c *dialerx) computeOddity(err error) Oddity { } } -type connx struct { +type connDB struct { net.Conn - db EventDB - connID int64 - remoteAddr string - localAddr string - mid int64 + begin time.Time + db WritableDB network string - origin Origin -} - -func (c *connx) ConnID() int64 { - return c.connID + remoteAddr string } -func (c *connx) Read(b []byte) (int, error) { - started := c.db.ElapsedTime() +func (c *connDB) Read(b []byte) (int, error) { + started := time.Since(c.begin).Seconds() count, err := c.Conn.Read(b) - finished := c.db.ElapsedTime() + finished := time.Since(c.begin).Seconds() c.db.InsertIntoReadWrite(&NetworkEvent{ - Origin: c.origin, - MeasurementID: c.mid, - ConnID: c.connID, - Operation: "read", - Network: c.network, - RemoteAddr: c.remoteAddr, - LocalAddr: c.localAddr, - Started: started, - Finished: finished, - Error: err, - Count: count, + Operation: "read", + Network: c.network, + RemoteAddr: c.remoteAddr, + Started: started, + Finished: finished, + Error: err, + Count: count, }) return count, err } -func (c *connx) Write(b []byte) (int, error) { - started := c.db.ElapsedTime() +func (c *connDB) Write(b []byte) (int, error) { + started := time.Since(c.begin).Seconds() count, err := c.Conn.Write(b) - finished := c.db.ElapsedTime() + finished := time.Since(c.begin).Seconds() c.db.InsertIntoReadWrite(&NetworkEvent{ - Origin: c.origin, - MeasurementID: c.mid, - ConnID: c.connID, - Operation: "write", - Network: c.network, - RemoteAddr: c.remoteAddr, - LocalAddr: c.localAddr, - Started: started, - Finished: finished, - Error: err, - Count: count, + Operation: "write", + Network: c.network, + RemoteAddr: c.remoteAddr, + Started: started, + Finished: finished, + Error: err, + Count: count, }) return count, err } -func (c *connx) Close() error { - started := c.db.ElapsedTime() +func (c *connDB) Close() error { + started := time.Since(c.begin).Seconds() err := c.Conn.Close() - finished := c.db.ElapsedTime() + finished := time.Since(c.begin).Seconds() c.db.InsertIntoClose(&NetworkEvent{ - Origin: c.origin, - MeasurementID: c.mid, - ConnID: c.connID, - Operation: "close", - Network: c.network, - RemoteAddr: c.remoteAddr, - LocalAddr: c.localAddr, - Started: started, - Finished: finished, - Error: err, - Count: 0, + Operation: "close", + Network: c.network, + RemoteAddr: c.remoteAddr, + Started: started, + Finished: finished, + Error: err, + Count: 0, }) return err } diff --git a/internal/measurex/dnsx.go b/internal/measurex/dnsx.go index 10e930c8dc..cbe3ed3d6a 100644 --- a/internal/measurex/dnsx.go +++ b/internal/measurex/dnsx.go @@ -3,70 +3,64 @@ package measurex // // DNSX (DNS eXtensions) // -// This file contains basic networking code. We wrap the fundamental -// dnsx.RoundTripper type to store measurements into an EventDB. +// We wrap dnsx.RoundTripper to store events into a WritableDB. // import ( "context" + "encoding/json" "time" "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" ) -// DNSTransport is a transport for sending raw DNS queries +// DNSXRoundTripper is a transport for sending raw DNS queries // and receiving raw DNS replies. The internal/netxlite/dnsx // package implements a bunch of these transports. -type DNSTransport = dnsx.RoundTripper +type DNSXRoundTripper = dnsx.RoundTripper -// WrapDNSXRoundTripper wraps a dnsx.RoundTripper and returns a -// DNSTransport that saves DNSRoundTripEvents into the DB. -func WrapDNSXRoundTripper( - measurementID int64, origin Origin, db EventDB, rt dnsx.RoundTripper) DNSTransport { - return &dnsxTransportx{ - db: db, - RoundTripper: rt, - origin: origin, - mid: measurementID, - } +// WrapDNSXRoundTripper creates a new DNSXRoundTripper that +// saves events into the given WritableDB. +func (mx *Measurer) WrapDNSXRoundTripper(db WritableDB, rtx dnsx.RoundTripper) DNSXRoundTripper { + return &dnsxRoundTripperDB{db: db, RoundTripper: rtx, begin: mx.Begin} } -type dnsxTransportx struct { +type dnsxRoundTripperDB struct { dnsx.RoundTripper - db EventDB - mid int64 - origin Origin + begin time.Time + db WritableDB } -// DNSRoundTripEvent contains the result of a DNS round trip. These -// events are generated by DNSTransport types. +// DNSRoundTripEvent contains the result of a DNS round trip. type DNSRoundTripEvent struct { - Origin Origin // OriginProbe or OriginTH - MeasurementID int64 // ID of the measurement - ConnID int64 // connID (typically zero) - Network string // DNS resolver's network (e.g., "dot", "doh") - Address string // DNS resolver's address or URL (for "doh") - Query []byte // Raw query - Started time.Duration // When we started the round trip - Finished time.Duration // When we were done - Error error // Error or nil - Reply []byte // Raw reply + Network string + Address string + Query []byte + Started float64 + Finished float64 + Error error + Reply []byte } -func (txp *dnsxTransportx) RoundTrip(ctx context.Context, query []byte) ([]byte, error) { - started := txp.db.ElapsedTime() +// MarshalJSON marshals a DNSRoundTripEvent to the archival +// format that is similar to df-002-dnst. +func (ev *DNSRoundTripEvent) MarshalJSON() ([]byte, error) { + archival := NewArchivalDNSRoundTrip(ev) + return json.Marshal(archival) +} + +func (txp *dnsxRoundTripperDB) RoundTrip(ctx context.Context, query []byte) ([]byte, error) { + started := time.Since(txp.begin).Seconds() reply, err := txp.RoundTripper.RoundTrip(ctx, query) - finished := txp.db.ElapsedTime() + finished := time.Since(txp.begin).Seconds() txp.db.InsertIntoDNSRoundTrip(&DNSRoundTripEvent{ - Origin: txp.origin, - MeasurementID: txp.mid, - Network: txp.RoundTripper.Network(), - Address: txp.RoundTripper.Address(), - Query: query, - Started: started, - Finished: finished, - Error: err, - Reply: reply, + Network: txp.RoundTripper.Network(), + Address: txp.RoundTripper.Address(), + Query: query, + Started: started, + Finished: finished, + Error: err, + Reply: reply, }) return reply, err } diff --git a/internal/measurex/endpoint.go b/internal/measurex/endpoint.go new file mode 100644 index 0000000000..9d374e0f06 --- /dev/null +++ b/internal/measurex/endpoint.go @@ -0,0 +1,67 @@ +package measurex + +import ( + "fmt" + "net/http" + "net/url" +) + +// +// Endpoint +// +// This file contains the definition of Endpoint and HTTPEndpoint +// + +// EndpointNetwork is the network of an endpoint. +type EndpointNetwork string + +const ( + // NetworkTCP identifies endpoints using TCP. + NetworkTCP = EndpointNetwork("tcp") + + // NetworkQUIC identifies endpoints using QUIC. + NetworkQUIC = EndpointNetwork("quic") +) + +// Endpoint is an endpoint for a domain. +type Endpoint struct { + // Network is the network (e.g., "tcp", "quic") + Network EndpointNetwork + + // Address is the endpoint address (e.g., "8.8.8.8:443") + Address string +} + +// String converts an endpoint to a string (e.g., "8.8.8.8:443/tcp") +func (e *Endpoint) String() string { + return fmt.Sprintf("%s/%s", e.Address, e.Network) +} + +// HTTPEndpoint is an HTTP/HTTPS/HTTP3 endpoint. +type HTTPEndpoint struct { + // Domain is the endpoint domain (e.g., "dns.google"). + Domain string + + // Network is the network (e.g., "tcp" or "quic"). + Network EndpointNetwork + + // Address is the endpoint address (e.g., "8.8.8.8:443"). + Address string + + // SNI is the SNI to use (only used with URL.scheme == "https"). + SNI string + + // ALPN is the ALPN to use (only used with URL.scheme == "https"). + ALPN []string + + // URL is the endpoint URL. + URL *url.URL + + // Header contains request headers. + Header http.Header +} + +// String converts an HTTP endpoint to a string (e.g., "8.8.8.8:443/tcp") +func (e *HTTPEndpoint) String() string { + return fmt.Sprintf("%s/%s", e.Address, e.Network) +} diff --git a/internal/measurex/http.go b/internal/measurex/http.go index 5c9e93d791..0c72baa3f1 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -17,6 +17,7 @@ import ( "bytes" "context" "crypto/tls" + "encoding/json" "errors" "io" "net/http" @@ -24,6 +25,7 @@ import ( "net/url" "time" + "github.com/lucas-clemente/quic-go" "github.com/ooni/probe-cli/v3/internal/engine/httpheader" "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/netxlite/iox" @@ -31,103 +33,64 @@ import ( "golang.org/x/net/publicsuffix" ) -// HTTPTransport is the HTTP transport type we use. This transport -// is a normal netxlite.HTTPTransport but also knows about the ConnID. -// -// The RoundTrip method of this transport MAY read a small snapshot -// of the response body to include it into the measurement. When this -// happens, the transport will nonetheless return a response body -// that is suitable for reading the whole body again. The only difference -// with reading the body normally is timing. The snapshot will be read -// immediately because it's already cached in RAM. The rest of the -// body instead will be read normally, using the network. -type HTTPTransport interface { - netxlite.HTTPTransport - - // ConnID returns the connection ID. When this value is zero - // or negative it means it has not been set. - ConnID() int64 -} +// HTTPTransport is the HTTP transport type we use. +type HTTPTransport = netxlite.HTTPTransport -// WrapHTTPTransport takes in input a netxlite.HTTPTransport and -// returns an HTTPTransport that uses the DB to save events occurring -// during HTTP round trips. With this constructor the ConnID is -// not set, hence ConnID will always return zero. -func WrapHTTPTransport(measurementID int64, - origin Origin, db EventDB, txp netxlite.HTTPTransport) HTTPTransport { - return WrapHTTPTransportWithConnID(measurementID, origin, db, txp, 0) -} - -// WrapHTTPTransportWithConnID is like WrapHTTPTransport but also -// sets the conn ID, which is otherwise set to zero. -func WrapHTTPTransportWithConnID(measurementID int64, origin Origin, - db EventDB, txp netxlite.HTTPTransport, connID int64) HTTPTransport { - return &httpTransportx{ - HTTPTransport: txp, - db: db, - connID: connID, - mid: measurementID, - origin: origin, - } +// WrapHTTPTransport creates a new transport that saves +// HTTP events into the WritableDB. +func (mx *Measurer) WrapHTTPTransport(db WritableDB, txp HTTPTransport) HTTPTransport { + return &httpTransportDB{HTTPTransport: txp, db: db, begin: mx.Begin} } // NewHTTPTransportWithConn creates and wraps an HTTPTransport that // does not dial and only uses the given conn. -func NewHTTPTransportWithConn(measurementID int64, - origin Origin, logger Logger, db EventDB, conn Conn) HTTPTransport { - txp := netxlite.NewHTTPTransport(logger, netxlite.NewSingleUseDialer(conn), - netxlite.NewNullTLSDialer()) - return WrapHTTPTransportWithConnID( - measurementID, origin, db, txp, conn.ConnID()) +func (mx *Measurer) NewHTTPTransportWithConn(logger Logger, db WritableDB, conn Conn) HTTPTransport { + return mx.WrapHTTPTransport(db, netxlite.NewHTTPTransport( + logger, netxlite.NewSingleUseDialer(conn), netxlite.NewNullTLSDialer())) } // NewHTTPTransportWithTLSConn creates and wraps an HTTPTransport that // does not dial and only uses the given conn. -func NewHTTPTransportWithTLSConn(measurementID int64, - origin Origin, logger Logger, db EventDB, conn TLSConn) HTTPTransport { - txp := netxlite.NewHTTPTransport(logger, netxlite.NewNullDialer(), - netxlite.NewSingleUseTLSDialer(conn)) - return WrapHTTPTransportWithConnID( - measurementID, origin, db, txp, conn.ConnID()) +func (mx *Measurer) NewHTTPTransportWithTLSConn( + logger Logger, db WritableDB, conn netxlite.TLSConn) HTTPTransport { + return mx.WrapHTTPTransport(db, netxlite.NewHTTPTransport( + logger, netxlite.NewNullDialer(), netxlite.NewSingleUseTLSDialer(conn))) } // NewHTTPTransportWithQUICSess creates and wraps an HTTPTransport that // does not dial and only uses the given QUIC session. -func NewHTTPTransportWithQUICSess(measurementID int64, - origin Origin, logger Logger, db EventDB, sess QUICEarlySession) HTTPTransport { - txp := netxlite.NewHTTP3Transport( - logger, netxlite.NewSingleUseQUICDialer(sess), &tls.Config{}) - return WrapHTTPTransportWithConnID( - measurementID, origin, db, txp, sess.ConnID()) +func (mx *Measurer) NewHTTPTransportWithQUICSess( + logger Logger, db WritableDB, sess quic.EarlySession) HTTPTransport { + return mx.WrapHTTPTransport(db, netxlite.NewHTTP3Transport( + logger, netxlite.NewSingleUseQUICDialer(sess), &tls.Config{})) } -type httpTransportx struct { +type httpTransportDB struct { netxlite.HTTPTransport - connID int64 - db EventDB - mid int64 - origin Origin + begin time.Time + db WritableDB } // HTTPRoundTripEvent contains information about an HTTP round trip. -// -// If ConnID is zero or negative, it means undefined. This happens -// when we create a transport without knowing the ConnID. type HTTPRoundTripEvent struct { - Origin Origin // OriginProbe or OriginTH - MeasurementID int64 // ID of the measurement - ConnID int64 // ID of the conn (<= zero means undefined) - RequestMethod string // Request method - RequestURL *url.URL // Request URL - RequestHeader http.Header // Request headers - Started time.Duration // Beginning of round trip - Finished time.Duration // End of round trip - Error error // Error or nil - Oddity Oddity // Oddity classification - ResponseStatus int // Status code - ResponseHeader http.Header // Response headers - ResponseBodySnapshot []byte // Body snapshot - MaxBodySnapshotSize int64 // Max size for snapshot + RequestMethod string + RequestURL *url.URL + RequestHeader http.Header + Started float64 + Finished float64 + Error error + Oddity Oddity + ResponseStatus int + ResponseHeader http.Header + ResponseBodySnapshot []byte + MaxBodySnapshotSize int64 +} + +// MarshalJSON marshals a HTTPRoundTripEvent to the archival +// format that is similar to df-001-httpt. +func (ev *HTTPRoundTripEvent) MarshalJSON() ([]byte, error) { + archival := NewArchivalHTTPRoundTrip(ev) + return json.Marshal(archival) } // We only read a small snapshot of the body to keep measurements @@ -135,13 +98,10 @@ type HTTPRoundTripEvent struct { // but we'll also allow for reading more bytes from the conn. const maxBodySnapshot = 1 << 11 -func (txp *httpTransportx) RoundTrip(req *http.Request) (*http.Response, error) { - started := txp.db.ElapsedTime() +func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) { + started := time.Since(txp.begin).Seconds() resp, err := txp.HTTPTransport.RoundTrip(req) rt := &HTTPRoundTripEvent{ - Origin: txp.origin, - MeasurementID: txp.mid, - ConnID: txp.connID, RequestMethod: req.Method, RequestURL: req.URL, RequestHeader: req.Header, @@ -149,7 +109,7 @@ func (txp *httpTransportx) RoundTrip(req *http.Request) (*http.Response, error) MaxBodySnapshotSize: maxBodySnapshot, } if err != nil { - rt.Finished = txp.db.ElapsedTime() + rt.Finished = time.Since(txp.begin).Seconds() rt.Error = err txp.db.InsertIntoHTTPRoundTrip(rt) return nil, err @@ -172,7 +132,7 @@ func (txp *httpTransportx) RoundTrip(req *http.Request) (*http.Response, error) err = nil // we expected to see an EOF here, so no real error } if err != nil { - rt.Finished = txp.db.ElapsedTime() + rt.Finished = time.Since(txp.begin).Seconds() rt.Error = err txp.db.InsertIntoHTTPRoundTrip(rt) return nil, err @@ -182,7 +142,7 @@ func (txp *httpTransportx) RoundTrip(req *http.Request) (*http.Response, error) Closer: resp.Body, } rt.ResponseBodySnapshot = body - rt.Finished = txp.db.ElapsedTime() + rt.Finished = time.Since(txp.begin).Seconds() txp.db.InsertIntoHTTPRoundTrip(rt) return resp, nil } @@ -192,13 +152,9 @@ type httpTransportBody struct { io.Closer } -func (txp *httpTransportx) ConnID() int64 { - return txp.connID -} - // HTTPClient is the HTTP client type we use. This interface is // compatible with http.Client. What changes in this kind of clients -// is that we'll insert redirection events into the DB. +// is that we'll insert redirection events into the WritableDB. type HTTPClient interface { Do(req *http.Request) (*http.Response, error) CloseIdleConnections() @@ -206,33 +162,20 @@ type HTTPClient interface { // NewHTTPClient creates a new HTTPClient instance that // does not automatically perform redirects. -func NewHTTPClientWithoutRedirects(measurementID int64, - origin Origin, db EventDB, jar http.CookieJar, txp HTTPTransport) HTTPClient { - return newHTTPClient( - measurementID, origin, db, jar, txp, http.ErrUseLastResponse) +func NewHTTPClientWithoutRedirects( + db WritableDB, jar http.CookieJar, txp HTTPTransport) HTTPClient { + return newHTTPClient(db, jar, txp, http.ErrUseLastResponse) } // NewHTTPClientWithRedirects creates a new HTTPClient // instance that automatically perform redirects. -func NewHTTPClientWithRedirects(measurementID int64, - origin Origin, db EventDB, jar http.CookieJar, txp HTTPTransport) HTTPClient { - return newHTTPClient( - measurementID, origin, db, jar, txp, nil) +func NewHTTPClientWithRedirects( + db WritableDB, jar http.CookieJar, txp HTTPTransport) HTTPClient { + return newHTTPClient(db, jar, txp, nil) } // HTTPRedirectEvent records an HTTP redirect. type HTTPRedirectEvent struct { - // Origin is the event origin ("probe" or "th") - Origin Origin - - // MeasurementID is the measurement inside which - // this event occurred. - MeasurementID int64 - - // ConnID is the ID of the connection we are using, - // which may be zero if undefined. - ConnID int64 - // URL is the URL triggering the redirect. URL *url.URL @@ -256,8 +199,8 @@ type HTTPRedirectEvent struct { // would return when hitting too many redirects. var ErrHTTPTooManyRedirects = errors.New("stopped after 10 redirects") -func newHTTPClient(measurementID int64, origin Origin, db EventDB, - cookiejar http.CookieJar, txp HTTPTransport, defaultErr error) HTTPClient { +func newHTTPClient(db WritableDB, cookiejar http.CookieJar, + txp HTTPTransport, defaultErr error) HTTPClient { return &http.Client{ Transport: txp, Jar: cookiejar, @@ -267,13 +210,10 @@ func newHTTPClient(measurementID int64, origin Origin, db EventDB, err = ErrHTTPTooManyRedirects } db.InsertIntoHTTPRedirect(&HTTPRedirectEvent{ - Origin: origin, - MeasurementID: measurementID, - ConnID: txp.ConnID(), - URL: via[0].URL, // bug in Go stdlib if we crash here - Location: req.URL, - Cookies: cookiejar.Cookies(req.URL), - Error: err, + URL: via[0].URL, // bug in Go stdlib if we crash here + Location: req.URL, + Cookies: cookiejar.Cookies(req.URL), + Error: err, }) return err }, diff --git a/internal/measurex/logger.go b/internal/measurex/logger.go index 82f113ee80..112980c12d 100644 --- a/internal/measurex/logger.go +++ b/internal/measurex/logger.go @@ -21,6 +21,9 @@ type Logger interface { Info(msg string) Infof(format string, v ...interface{}) + + Warn(msg string) + Warnf(format string, v ...interface{}) } // newOperationLogger creates a new logger that logs diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go index 3d6248131e..be3d324ec8 100644 --- a/internal/measurex/measurement.go +++ b/internal/measurex/measurement.go @@ -1,5 +1,11 @@ package measurex +import ( + "net" + "net/url" + "time" +) + // // Measurement // @@ -7,168 +13,231 @@ package measurex // produced by this package. // -import "time" +// URLMeasurement is the measurement of a whole URL. It contains +// a bunch of measurements detailing each measurement step. +type URLMeasurement struct { + // URL is the URL we're measuring. + URL string `json:"url"` + + // DNS contains all the DNS related measurements. + DNS []*DNSMeasurement `json:"dns"` + + // Endpoints contains a measurement for each endpoint + // that we discovered via DNS or TH. + Endpoints []*HTTPEndpointMeasurement `json:"endpoints"` + + // RedirectURLs contain the URLs to which we should fetch + // if we choose to follow redirections. + RedirectURLs []string `json:"-"` + + // TotalRuntime is the total time to measure this URL. + TotalRuntime time.Duration `json:"-"` + + // DNSRuntime is the time to run all DNS checks. + DNSRuntime time.Duration `json:"x_dns_runtime"` + + // THRuntime is the total time to invoke all test helpers. + THRuntime time.Duration `json:"x_th_runtime"` + + // EpntsRuntime is the total time to check all the endpoints. + EpntsRuntime time.Duration `json:"x_epnts_runtime"` +} + +// fillRedirects takes in input a complete URLMeasurement and fills +// the field named Redirects with all redirections. +func (m *URLMeasurement) fillRedirects() { + dups := make(map[string]bool) + for _, epnt := range m.Endpoints { + for _, redir := range epnt.HTTPRedirect { + loc := redir.Location.String() + if _, found := dups[loc]; found { + continue + } + dups[loc] = true + m.RedirectURLs = append(m.RedirectURLs, loc) + } + } +} // Measurement groups all the events that have the same MeasurementID. This // data format is not compatible with the OONI data format. type Measurement struct { - // MeasurementID is the measurement MeasurementID. - MeasurementID int64 - - // Oddities lists all the oddities inside this measurement. See - // newMeasurement's docs for more info. - Oddities []Oddity - // Connect contains all the connect operations. - Connect []*NetworkEvent `json:",omitempty"` + Connect []*NetworkEvent `json:"connect,omitempty"` // ReadWrite contains all the read and write operations. - ReadWrite []*NetworkEvent `json:",omitempty"` + ReadWrite []*NetworkEvent `json:"read_write,omitempty"` // Close contains all the close operations. - Close []*NetworkEvent `json:",omitempty"` + Close []*NetworkEvent `json:"-"` // TLSHandshake contains all the TLS handshakes. - TLSHandshake []*TLSHandshakeEvent `json:",omitempty"` + TLSHandshake []*TLSHandshakeEvent `json:"tls_handshake,omitempty"` // QUICHandshake contains all the QUIC handshakes. - QUICHandshake []*QUICHandshakeEvent `json:",omitempty"` + QUICHandshake []*QUICHandshakeEvent `json:"quic_handshake,omitempty"` // LookupHost contains all the host lookups. - LookupHost []*LookupHostEvent `json:",omitempty"` + LookupHost []*LookupHostEvent `json:"lookup_host,omitempty"` // LookupHTTPSSvc contains all the HTTPSSvc lookups. - LookupHTTPSSvc []*LookupHTTPSSvcEvent `json:",omitempty"` + LookupHTTPSSvc []*LookupHTTPSSvcEvent `json:"lookup_httpssvc,omitempty"` // DNSRoundTrip contains all the DNS round trips. - DNSRoundTrip []*DNSRoundTripEvent `json:",omitempty"` + DNSRoundTrip []*DNSRoundTripEvent `json:"dns_round_trip,omitempty"` // HTTPRoundTrip contains all the HTTP round trips. - HTTPRoundTrip []*HTTPRoundTripEvent `json:",omitempty"` + HTTPRoundTrip []*HTTPRoundTripEvent `json:"http_round_trip,omitempty"` // HTTPRedirect contains all the redirections. - HTTPRedirect []*HTTPRedirectEvent `json:",omitempty"` + HTTPRedirect []*HTTPRedirectEvent `json:"-"` } -// NewMeasurement creates a new Measurement by gathering all the -// events inside the database with a given MeasurementID. -// -// As part of the process, this function computes the Oddities field by -// gathering the oddities of the following operations: -// -// - connect; -// -// - tlsHandshake; -// -// - quicHandshake; -// -// - lookupHost; -// -// - httpRoundTrip. +// DNSMeasurement is a DNS measurement. +type DNSMeasurement struct { + // Domain is the domain this measurement refers to. + Domain string `json:"domain"` + + // A DNSMeasurement is a Measurement. + *Measurement +} + +// allEndpointsForDomain returns all the endpoints for +// a specific domain contained in a measurement. // // Arguments: // -// - begin is the time when we started measuring; +// - domain is the domain we want to connect to; // -// - id is the MeasurementID. -// -// Returns a Measurement possibly containing empty lists of events. -func NewMeasurement(db *DB, id int64) *Measurement { - m := &Measurement{ - MeasurementID: id, - Connect: db.SelectAllFromDialWithMeasurementID(id), - ReadWrite: db.SelectAllFromReadWriteWithMeasurementID(id), - Close: db.SelectAllFromCloseWithMeasurementID(id), - TLSHandshake: db.SelectAllFromTLSHandshakeWithMeasurementID(id), - QUICHandshake: db.SelectAllFromQUICHandshakeWithMeasurementID(id), - LookupHost: db.SelectAllFromLookupHostWithMeasurementID(id), - LookupHTTPSSvc: db.SelectAllFromLookupHTTPSSvcWithMeasurementID(id), - DNSRoundTrip: db.SelectAllFromDNSRoundTripWithMeasurementID(id), - HTTPRoundTrip: db.SelectAllFromHTTPRoundTripWithMeasurementID(id), - HTTPRedirect: db.SelectAllFromHTTPRedirectWithMeasurementID(id), - } - m.computeOddities() - return m +// - port is the port for the endpoint. +func (m *DNSMeasurement) allEndpointsForDomain(domain, port string) (out []*Endpoint) { + out = append(out, m.allTCPEndpoints(domain, port)...) + out = append(out, m.allQUICEndpoints(domain, port)...) + return } -// computeOddities computes all the oddities inside m. See -// newMeasurement's docs for more information. -func (m *Measurement) computeOddities() { - unique := make(map[Oddity]bool) - for _, ev := range m.Connect { - unique[ev.Oddity] = true - } - for _, ev := range m.TLSHandshake { - unique[ev.Oddity] = true - } - for _, ev := range m.QUICHandshake { - unique[ev.Oddity] = true - } - for _, ev := range m.LookupHost { - unique[ev.Oddity] = true - } - for _, ev := range m.HTTPRoundTrip { - unique[ev.Oddity] = true +// AllEndpointsForDomain gathers all the endpoints for a given domain from +// a list of DNSMeasurements, removes duplicates and returns the result. +func AllEndpointsForDomain(domain, port string, meas ...*DNSMeasurement) ([]*Endpoint, error) { + var out []*Endpoint + for _, m := range meas { + epnt := m.allEndpointsForDomain(domain, port) + out = append(out, epnt...) } - for key := range unique { - if key != "" { - m.Oddities = append(m.Oddities, key) + return removeDuplicateEndpoints(out...), nil +} + +func (m *DNSMeasurement) allTCPEndpoints(domain, port string) (out []*Endpoint) { + for _, entry := range m.LookupHost { + if domain != entry.Domain { + continue + } + for _, addr := range entry.Addrs { + if net.ParseIP(addr) == nil { + continue // skip CNAME entries courtesy the WCTH + } + out = append(out, m.newEndpoint(addr, port, NetworkTCP)) } } + return } -// URLMeasurement is the measurement of a whole URL. It contains -// a bunch of measurements detailing each measurement step. -type URLMeasurement struct { - // URL is the URL we're measuring. - URL string - - // CannotParseURL is true if the input URL could not be parsed. - CannotParseURL bool - - // DNS contains all the DNS related measurements. - DNS []*Measurement +func (m *DNSMeasurement) allQUICEndpoints(domain, port string) (out []*Endpoint) { + for _, entry := range m.LookupHTTPSSvc { + if domain != entry.Domain { + continue + } + if !m.supportsHTTP3(entry) { + continue + } + addrs := append([]string{}, entry.IPv4...) + for _, addr := range append(addrs, entry.IPv6...) { + out = append(out, m.newEndpoint(addr, port, NetworkQUIC)) + } + } + return +} - // TH contains all the measurements from the test helpers. - TH []*Measurement +func (m *DNSMeasurement) newEndpoint(addr, port string, network EndpointNetwork) *Endpoint { + return &Endpoint{Network: network, Address: net.JoinHostPort(addr, port)} +} - // CannotGenerateEndpoints for URL is true if the code tasked of - // generating a list of endpoints for the URL fails. - CannotGenerateEndpoints bool +func (m *DNSMeasurement) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { + for _, alpn := range entry.ALPN { + switch alpn { + case "h3": + return true + } + } + return false +} - // Endpoints contains a measurement for each endpoint - // that we discovered via DNS or TH. - Endpoints []*Measurement +// allHTTPEndpointsForURL returns all the HTTPEndpoints matching +// a specific URL's domain inside this measurement. +// +// Arguments: +// +// - URL is the URL for which we want endpoints; +// +// Returns a list of endpoints or an error. +func (m *DNSMeasurement) allHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, error) { + domain := URL.Hostname() + port, err := PortFromURL(URL) + if err != nil { + return nil, err + } + epnts := m.allEndpointsForDomain(domain, port) + var out []*HTTPEndpoint + for _, epnt := range epnts { + if URL.Scheme != "https" && epnt.Network == NetworkQUIC { + continue // we'll only use QUIC with HTTPS + } + out = append(out, &HTTPEndpoint{ + Domain: domain, + Network: epnt.Network, + Address: epnt.Address, + SNI: domain, + ALPN: alpnForHTTPEndpoint(epnt.Network), + URL: URL, + Header: NewHTTPRequestHeaderForMeasuring(), + }) + } + return out, nil +} - // RedirectURLs contain the URLs to which we should fetch - // if we choose to follow redirections. - RedirectURLs []string +// AllHTTPEndpointsForURL gathers all the HTTP endpoints for a given +// URL from a list of DNSMeasurements, removes duplicates and returns +// the result. This call may fail if we cannot determine the port +// from the URL, in which case we return an error. +func AllHTTPEndpointsForURL(URL *url.URL, meas ...*DNSMeasurement) ([]*HTTPEndpoint, error) { + var out []*HTTPEndpoint + for _, m := range meas { + epnt, err := m.allHTTPEndpointsForURL(URL) + if err != nil { + return nil, err + } + out = append(out, epnt...) + } + return removeDuplicateHTTPEndpoints(out...), nil +} - // TotalRuntime is the total time to measure this URL. - TotalRuntime time.Duration +// EndpointMeasurement is an endpoint measurement. +type EndpointMeasurement struct { + // Endpoint is the endpoint this measurement refers to. + Endpoint string `json:"endpoint"` - // DNSRuntime is the time to run all DNS checks. - DNSRuntime time.Duration + // An EndpointMeasurement is a Measurement. + *Measurement +} - // THRuntime is the total time to invoke all test helpers. - THRuntime time.Duration +// HTTPEndpointMeasurement is an HTTP endpoint measurement. +type HTTPEndpointMeasurement struct { + // URL is the URL this measurement refers to. + URL string `json:"url"` - // EpntsRuntime is the total time to check all the endpoints. - EpntsRuntime time.Duration -} + // Endpoint is the endpoint this measurement refers to. + Endpoint string `json:"endpoint"` -// fillRedirects takes in input a complete URLMeasurement and fills -// the field named Redirects with all redirections. -func (m *URLMeasurement) fillRedirects() { - dups := make(map[string]bool) - for _, epnt := range m.Endpoints { - for _, redir := range epnt.HTTPRedirect { - loc := redir.Location.String() - if _, found := dups[loc]; found { - continue - } - dups[loc] = true - m.RedirectURLs = append(m.RedirectURLs, loc) - } - } + // An HTTPEndpointMeasurement is a Measurement. + *Measurement } diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 50d8e7a389..0c7e17d217 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -19,13 +19,15 @@ import ( "time" "github.com/apex/log" + "github.com/lucas-clemente/quic-go" "github.com/ooni/probe-cli/v3/internal/netxlite" ) -// Measurer performs measurements. +// Measurer performs measurements. If you don't use a factory +// for creating this type, make sure you set all the MANDATORY fields. type Measurer struct { - // DB is the MANDATORY database to use. - DB *DB + // Begin is when we started measuring (this field is MANDATORY). + Begin time.Time // HTTPClient is the MANDATORY HTTP client for the WCTH. HTTPClient HTTPClient @@ -33,8 +35,8 @@ type Measurer struct { // Logger is the MANDATORY logger to use. Logger Logger - // Origin is the MANDATORY measurements origin to use. - Origin Origin + // Resolvers is the MANDATORY list of resolvers. + Resolvers []*ResolverInfo // TLSHandshaker is the MANDATORY TLS handshaker. TLSHandshaker netxlite.TLSHandshaker @@ -43,32 +45,36 @@ type Measurer struct { // NewMeasurerWithDefaultSettings creates a new Measurer // instance using the most default settings. func NewMeasurerWithDefaultSettings() *Measurer { - db := NewDB(time.Now()) return &Measurer{ - DB: db, - HTTPClient: &http.Client{}, - Logger: log.Log, - Origin: OriginProbe, + Begin: time.Now(), + HTTPClient: &http.Client{}, + Logger: log.Log, + Resolvers: []*ResolverInfo{{ + Network: "system", + Address: "", + }, { + Network: "udp", + Address: "8.8.4.4:53", + }}, TLSHandshaker: netxlite.NewTLSHandshakerStdlib(log.Log), } } -func (mx *Measurer) nextMeasurement() int64 { - return mx.DB.NextMeasurementID() -} - // LookupHostSystem performs a LookupHost using the system resolver. -func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *Measurement { +func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *DNSMeasurement { const timeout = 4 * time.Second ol := newOperationLogger(mx.Logger, "LookupHost %s with getaddrinfo", domain) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - mid := mx.nextMeasurement() - r := NewResolverSystem(mid, mx.Origin, mx.DB, mx.Logger) + db := &MeasurementDB{} + r := mx.NewResolverSystem(db, mx.Logger) defer r.CloseIdleConnections() _, err := r.LookupHost(ctx, domain) ol.Stop(err) - return NewMeasurement(mx.DB, mid) + return &DNSMeasurement{ + Domain: domain, + Measurement: db.AsMeasurement(), + } } // LookupHostUDP is like LookupHostSystem but uses an UDP resolver. @@ -81,19 +87,22 @@ func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *Measur // // - address is the UDP resolver address (e.g., "dns.google:53"). // -// Returns a Measurement. +// Returns a DNSMeasurement. func (mx *Measurer) LookupHostUDP( - ctx context.Context, domain, address string) *Measurement { + ctx context.Context, domain, address string) *DNSMeasurement { const timeout = 4 * time.Second ol := newOperationLogger(mx.Logger, "LookupHost %s with %s/udp", domain, address) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - mid := mx.nextMeasurement() - r := NewResolverUDP(mid, mx.Origin, mx.DB, mx.Logger, address) + db := &MeasurementDB{} + r := mx.NewResolverUDP(db, mx.Logger, address) defer r.CloseIdleConnections() _, err := r.LookupHost(ctx, domain) ol.Stop(err) - return NewMeasurement(mx.DB, mid) + return &DNSMeasurement{ + Domain: domain, + Measurement: db.AsMeasurement(), + } } // LookupHTTPSSvcUDP issues an HTTPSSvc query for the given domain. @@ -106,19 +115,22 @@ func (mx *Measurer) LookupHostUDP( // // - address is the UDP resolver address (e.g., "dns.google:53"). // -// Returns a Measurement. +// Returns a DNSMeasurement. func (mx *Measurer) LookupHTTPSSvcUDP( - ctx context.Context, domain, address string) *Measurement { + ctx context.Context, domain, address string) *DNSMeasurement { const timeout = 4 * time.Second ol := newOperationLogger(mx.Logger, "LookupHTTPSvc %s with %s/udp", domain, address) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - mid := mx.nextMeasurement() - r := NewResolverUDP(mid, mx.Origin, mx.DB, mx.Logger, address) + db := &MeasurementDB{} + r := mx.NewResolverUDP(db, mx.Logger, address) defer r.CloseIdleConnections() _, err := r.LookupHTTPSSvcWithoutRetry(ctx, domain) ol.Stop(err) - return NewMeasurement(mx.DB, mid) + return &DNSMeasurement{ + Domain: domain, + Measurement: db.AsMeasurement(), + } } // TCPConnect establishes a connection with a TCP endpoint. @@ -129,25 +141,30 @@ func (mx *Measurer) LookupHTTPSSvcUDP( // // - address is the TCP endpoint address (e.g., "8.8.4.4:443"). // -// Returns a Measurement. -func (mx *Measurer) TCPConnect(ctx context.Context, address string) *Measurement { - mid := mx.nextMeasurement() - conn, _ := mx.tcpConnect(ctx, mid, address) - measurement := NewMeasurement(mx.DB, mid) +// Returns an EndpointMeasurement. +func (mx *Measurer) TCPConnect(ctx context.Context, address string) *EndpointMeasurement { + db := &MeasurementDB{} + conn, _ := mx.tcpConnect(ctx, db, address) + measurement := db.AsMeasurement() if conn != nil { conn.Close() } - return measurement + return &EndpointMeasurement{ + Endpoint: (&Endpoint{ + Network: NetworkTCP, + Address: address, + }).String(), + Measurement: measurement, + } } // tcpConnect is like TCPConnect but does not create a new measurement. -func (mx *Measurer) tcpConnect(ctx context.Context, - measurementID int64, address string) (Conn, error) { +func (mx *Measurer) tcpConnect(ctx context.Context, db WritableDB, address string) (Conn, error) { const timeout = 10 * time.Second ol := newOperationLogger(mx.Logger, "TCPConnect %s", address) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - d := NewDialerWithoutResolver(measurementID, mx.Origin, mx.DB, mx.Logger) + d := mx.NewDialerWithoutResolver(db, mx.Logger) defer d.CloseIdleConnections() conn, err := d.DialContext(ctx, "tcp", address) ol.Stop(err) @@ -183,23 +200,29 @@ func (mx *Measurer) tcpConnect(ctx context.Context, // will not only depend on the config field but also on the // utls.ClientHelloID thay you're using. // -// Returns a Measurement. +// Returns an EndpointMeasurement. func (mx *Measurer) TLSConnectAndHandshake(ctx context.Context, - address string, config *tls.Config) *Measurement { - mid := mx.nextMeasurement() - conn, _ := mx.tlsConnectAndHandshake(ctx, mid, address, config) - measurement := NewMeasurement(mx.DB, mid) + address string, config *tls.Config) *EndpointMeasurement { + db := &MeasurementDB{} + conn, _ := mx.tlsConnectAndHandshake(ctx, db, address, config) + measurement := db.AsMeasurement() if conn != nil { conn.Close() } - return measurement + return &EndpointMeasurement{ + Endpoint: (&Endpoint{ + Network: NetworkTCP, + Address: address, + }).String(), + Measurement: measurement, + } } // tlsConnectAndHandshake is like TLSConnectAndHandshake // but does not create a new measurement. func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, - measurementID int64, address string, config *tls.Config) (TLSConn, error) { - conn, err := mx.tcpConnect(ctx, measurementID, address) + db WritableDB, address string, config *tls.Config) (netxlite.TLSConn, error) { + conn, err := mx.tcpConnect(ctx, db, address) if err != nil { return nil, err } @@ -208,10 +231,11 @@ func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, "TLSHandshake %s with sni=%s", address, config.ServerName) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - th := WrapTLSHandshaker(measurementID, mx.Origin, mx.DB, mx.TLSHandshaker) - tlsConn, err := th.Handshake(ctx, conn, config) + th := mx.WrapTLSHandshaker(db, mx.TLSHandshaker) + tlsConn, _, err := th.Handshake(ctx, conn, config) ol.Stop(err) - return tlsConn, err + // cast safe according to the docs of netxlite's handshaker + return tlsConn.(netxlite.TLSConn), err } // QUICHandshake connects and TLS handshakes with a QUIC endpoint. @@ -234,34 +258,36 @@ func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, // - NextProtos to the desired ALPN ([]string{"h2", "http/1.1"} for // HTTPS and []string{"dot"} for DNS-over-TLS). // -// Returns a Measurement. +// Returns an EndpointMeasurement. func (mx *Measurer) QUICHandshake(ctx context.Context, address string, - config *tls.Config) *Measurement { - mid := mx.nextMeasurement() - sess, _ := mx.quicHandshake(ctx, mid, address, config) - measurement := NewMeasurement(mx.DB, mid) + config *tls.Config) *EndpointMeasurement { + db := &MeasurementDB{} + sess, _ := mx.quicHandshake(ctx, db, address, config) + measurement := db.AsMeasurement() if sess != nil { // TODO(bassosimone): close session with correct message sess.CloseWithError(0, "") } - return measurement + return &EndpointMeasurement{ + Endpoint: (&Endpoint{ + Network: NetworkQUIC, + Address: address, + }).String(), + Measurement: measurement, + } } // quicHandshake is like QUICHandshake but does not create a new measurement. -func (mx *Measurer) quicHandshake(ctx context.Context, measurementID int64, - address string, config *tls.Config) (QUICEarlySession, error) { +func (mx *Measurer) quicHandshake(ctx context.Context, db WritableDB, + address string, config *tls.Config) (quic.EarlySession, error) { const timeout = 10 * time.Second ol := newOperationLogger(mx.Logger, "QUICHandshake %s with sni=%s", address, config.ServerName) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() - qd := WrapQUICDialer(measurementID, mx.Origin, mx.DB, - netxlite.NewQUICDialerWithoutResolver(WrapQUICListener( - measurementID, mx.Origin, mx.DB, netxlite.NewQUICListener()), - mx.Logger, - )) + qd := mx.NewQUICDialerWithoutResolver(db, mx.Logger) defer qd.CloseIdleConnections() - sess, err := qd.DialContext(ctx, address, config) + sess, err := qd.DialContext(ctx, "udp", address, config, &quic.Config{}) ol.Stop(err) return sess, err } @@ -282,7 +308,7 @@ func (mx *Measurer) quicHandshake(ctx context.Context, measurementID int64, // Returns a measurement. The returned measurement is empty if // the endpoint is misconfigured or the URL has an unknown scheme. func (mx *Measurer) HTTPEndpointGet( - ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) *Measurement { + ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) *HTTPEndpointMeasurement { resp, m, _ := mx.httpEndpointGet(ctx, epnt, jar) if resp != nil { resp.Body.Close() @@ -299,7 +325,7 @@ var ( // for you to Resume it to deliver a result. type HTTPPreparedRequest struct { resp *http.Response - m *Measurement + m *HTTPEndpointMeasurement err error } @@ -310,7 +336,7 @@ func (r *HTTPPreparedRequest) Resume() (*http.Response, error) { } // Measurement returns the associated measurement. -func (r *HTTPPreparedRequest) Measurement() *Measurement { +func (r *HTTPPreparedRequest) Measurement() *HTTPEndpointMeasurement { return r.m } @@ -337,15 +363,38 @@ func (mx *Measurer) HTTPEndpointPrepareGet(ctx context.Context, // httpEndpointGet implements HTTPEndpointGet. func (mx *Measurer) httpEndpointGet(ctx context.Context, epnt *HTTPEndpoint, + jar http.CookieJar) (*http.Response, *HTTPEndpointMeasurement, error) { + resp, m, err := mx.httpEndpointGetMeasurement(ctx, epnt, jar) + out := &HTTPEndpointMeasurement{ + URL: epnt.URL.String(), + Endpoint: (&Endpoint{ + Network: epnt.Network, + Address: epnt.Address, + }).String(), + Measurement: m, + } + return resp, out, err +} + +// httpEndpointGetMeasurement implements httpEndpointGet. +// +// This function returns a triple where: +// +// - the first element is a valid response on success a nil response on failure +// +// - the second element is always a valid Measurement +// +// - the third element is a nil error on success and an error on failure +func (mx *Measurer) httpEndpointGetMeasurement(ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (resp *http.Response, m *Measurement, err error) { - mid := mx.nextMeasurement() + db := &MeasurementDB{} switch epnt.Network { case NetworkQUIC: - resp, err = mx.httpEndpointGetQUIC(ctx, mid, epnt, jar) - m = NewMeasurement(mx.DB, mid) + resp, err = mx.httpEndpointGetQUIC(ctx, db, epnt, jar) + m = db.AsMeasurement() case NetworkTCP: - resp, err = mx.httpEndpointGetTCP(ctx, mid, epnt, jar) - m = NewMeasurement(mx.DB, mid) + resp, err = mx.httpEndpointGetTCP(ctx, db, epnt, jar) + m = db.AsMeasurement() default: m, err = &Measurement{}, errUnknownHTTPEndpointNetwork } @@ -354,12 +403,12 @@ func (mx *Measurer) httpEndpointGet(ctx context.Context, epnt *HTTPEndpoint, // httpEndpointGetTCP specializes HTTPSEndpointGet for HTTP and HTTPS. func (mx *Measurer) httpEndpointGetTCP(ctx context.Context, - measurementID int64, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { + db WritableDB, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { switch epnt.URL.Scheme { case "http": - return mx.httpEndpointGetHTTP(ctx, measurementID, epnt, jar) + return mx.httpEndpointGetHTTP(ctx, db, epnt, jar) case "https": - return mx.httpEndpointGetHTTPS(ctx, measurementID, epnt, jar) + return mx.httpEndpointGetHTTPS(ctx, db, epnt, jar) default: return nil, errUnknownHTTPEndpointURLScheme } @@ -367,32 +416,32 @@ func (mx *Measurer) httpEndpointGetTCP(ctx context.Context, // httpEndpointGetHTTP specializes httpEndpointGetTCP for HTTP. func (mx *Measurer) httpEndpointGetHTTP(ctx context.Context, - measurementID int64, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { + db WritableDB, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) if err != nil { return nil, err } req.Header = epnt.Header - conn, err := mx.tcpConnect(ctx, measurementID, epnt.Address) + conn, err := mx.tcpConnect(ctx, db, epnt.Address) if err != nil { return nil, err } defer conn.Close() // we own it - clnt := NewHTTPClientWithoutRedirects(measurementID, mx.Origin, mx.DB, jar, - NewHTTPTransportWithConn(measurementID, mx.Origin, mx.Logger, mx.DB, conn)) + clnt := NewHTTPClientWithoutRedirects(db, jar, + mx.NewHTTPTransportWithConn(mx.Logger, db, conn)) defer clnt.CloseIdleConnections() return mx.httpClientDo(ctx, clnt, epnt, req) } // httpEndpointGetHTTPS specializes httpEndpointGetTCP for HTTPS. func (mx *Measurer) httpEndpointGetHTTPS(ctx context.Context, - measurementID int64, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { + db WritableDB, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) if err != nil { return nil, err } req.Header = epnt.Header - conn, err := mx.tlsConnectAndHandshake(ctx, measurementID, epnt.Address, &tls.Config{ + conn, err := mx.tlsConnectAndHandshake(ctx, db, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, RootCAs: netxlite.NewDefaultCertPool(), @@ -401,21 +450,21 @@ func (mx *Measurer) httpEndpointGetHTTPS(ctx context.Context, return nil, err } defer conn.Close() // we own it - clnt := NewHTTPClientWithoutRedirects(measurementID, mx.Origin, mx.DB, jar, - NewHTTPTransportWithTLSConn(measurementID, mx.Origin, mx.Logger, mx.DB, conn)) + clnt := NewHTTPClientWithoutRedirects(db, jar, + mx.NewHTTPTransportWithTLSConn(mx.Logger, db, conn)) defer clnt.CloseIdleConnections() return mx.httpClientDo(ctx, clnt, epnt, req) } // httpEndpointGetQUIC specializes httpEndpointGetTCP for QUIC. func (mx *Measurer) httpEndpointGetQUIC(ctx context.Context, - measurementID int64, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { + db WritableDB, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) if err != nil { return nil, err } req.Header = epnt.Header - sess, err := mx.quicHandshake(ctx, measurementID, epnt.Address, &tls.Config{ + sess, err := mx.quicHandshake(ctx, db, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, RootCAs: netxlite.NewDefaultCertPool(), @@ -425,8 +474,8 @@ func (mx *Measurer) httpEndpointGetQUIC(ctx context.Context, } // TODO(bassosimone): close session with correct message defer sess.CloseWithError(0, "") // we own it - clnt := NewHTTPClientWithoutRedirects(measurementID, mx.Origin, mx.DB, jar, - NewHTTPTransportWithQUICSess(measurementID, mx.Origin, mx.Logger, mx.DB, sess)) + clnt := NewHTTPClientWithoutRedirects(db, jar, + mx.NewHTTPTransportWithQUICSess(mx.Logger, db, sess)) defer clnt.CloseIdleConnections() return mx.httpClientDo(ctx, clnt, epnt, req) } @@ -443,60 +492,17 @@ func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, return resp, err } -// LookupWCTH performs an Endpoint lookup using the WCTH (i.e., -// the Web Connectivity Test Helper) web service. -// -// Arguments: -// -// - ctx is the context carrying timeouts; -// -// - URL is the URL for which we're looking up endpoints; -// -// - endpoints is the list of endpoints discovered so far using -// the means available to the probe (e.g., DNS); -// -// - port is the port for the endpoints. -// -// This function will safely discard any non-TCP endpoints -// in the input list and will only use TCP endpoints. -// -// Returns a measurement. -func (mx *Measurer) LookupWCTH(ctx context.Context, URL *url.URL, - endpoints []*Endpoint, port string, WCTHURL string) *Measurement { - const timeout = 30 * time.Second - ol := newOperationLogger(mx.Logger, "WCTH %s with %s", URL.String(), WCTHURL) - ctx, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - mid := mx.nextMeasurement() - w := NewWCTHWorker(mid, mx.Logger, mx.DB, mx.HTTPClient, WCTHURL) - _, err := w.Run(ctx, URL, mx.onlyTCPEndpoints(endpoints)) - ol.Stop(err) - return NewMeasurement(mx.DB, mid) -} - -// onlyTCPEndpoints takes in input a list of endpoints and returns -// in output a list of endpoints only containing the TCP ones. -func (mx *Measurer) onlyTCPEndpoints(endpoints []*Endpoint) (out []string) { - for _, epnt := range endpoints { - switch epnt.Network { - case NetworkTCP: - out = append(out, epnt.Address) - } - } - return -} - // HTTPEndpointGetParallel performs an HTTPEndpointGet for each // input endpoint using a pool of background goroutines. // // This function returns to the caller a channel where to read // measurements from. The channel is closed when done. func (mx *Measurer) HTTPEndpointGetParallel(ctx context.Context, - jar http.CookieJar, epnts ...*HTTPEndpoint) <-chan *Measurement { + jar http.CookieJar, epnts ...*HTTPEndpoint) <-chan *HTTPEndpointMeasurement { var ( done = make(chan interface{}) input = make(chan *HTTPEndpoint) - output = make(chan *Measurement) + output = make(chan *HTTPEndpointMeasurement) ) go func() { defer close(input) @@ -522,29 +528,27 @@ func (mx *Measurer) HTTPEndpointGetParallel(ctx context.Context, return output } -// RegisterUDPResolvers registers UDP resolvers into the DB. -func (mx *Measurer) RegisterUDPResolvers(resolvers ...string) { - for _, resolver := range resolvers { - mx.DB.InsertIntoResolvers("udp", resolver) - } +// ResolverInfo contains info about a DNS resolver. +type ResolverInfo struct { + // Network is the resolver's network (e.g., "doh", "udp") + Network string + + // Address is the address (e.g., "1.1.1.1:53", "https://1.1.1.1/dns-query") + Address string } // LookupURLHostParallel performs an LookupHost-like operation for each -// DNS resolver registered into the database using a pool of background -// goroutines. -// -// This function returns to the caller a channel where to read -// measurements from. The channel is closed when done. -func (mx *Measurer) LookupURLHostParallel( - ctx context.Context, URL *url.URL) <-chan *Measurement { +// resolver that you provide as argument using a pool of goroutines. +func (mx *Measurer) LookupURLHostParallel(ctx context.Context, + URL *url.URL, resos ...*ResolverInfo) <-chan *DNSMeasurement { var ( done = make(chan interface{}) resolvers = make(chan *ResolverInfo) - output = make(chan *Measurement) + output = make(chan *DNSMeasurement) ) go func() { defer close(resolvers) - for _, reso := range mx.DB.SelectAllFromResolvers() { + for _, reso := range resos { resolvers <- reso } }() @@ -570,7 +574,7 @@ func (mx *Measurer) LookupURLHostParallel( // operation using the given ResolverInfo. func (mx *Measurer) lookupHostWithResolverInfo( ctx context.Context, reso *ResolverInfo, URL *url.URL, - output chan<- *Measurement) { + output chan<- *DNSMeasurement) { switch reso.Network { case "system": output <- mx.LookupHostSystem(ctx, URL.Hostname()) @@ -595,67 +599,19 @@ func (mx *Measurer) lookupHostWithResolverInfo( // determine whether to perform HTTPSSvc lookups and so we aren't // going to perform this kind of lookups in this case. func (mx *Measurer) LookupHostParallel( - ctx context.Context, hostname, port string) <-chan *Measurement { - return mx.LookupURLHostParallel(ctx, &url.URL{ - Scheme: "", // so we don't see https and we don't try HTTPSSvc - Host: net.JoinHostPort(hostname, port), - }) -} - -// RegisterWCTH registers URLs for the WCTH. -func (mx *Measurer) RegisterWCTH(URLs ...string) { - for _, URL := range URLs { - mx.DB.InsertIntoTestHelpers("wcth", URL) - } -} - -// QueryTestHelperParallel performs a parallel query for the -// given URL to all known test helpers. -func (mx *Measurer) QueryTestHelperParallel( - ctx context.Context, URL *url.URL) <-chan *Measurement { - var ( - done = make(chan interface{}) - ths = make(chan *TestHelperInfo) - output = make(chan *Measurement) - ) + ctx context.Context, hostname, port string) <-chan *DNSMeasurement { + out := make(chan *DNSMeasurement) go func() { - defer close(ths) - for _, th := range mx.DB.SelectAllFromTestHelpers() { - ths <- th + defer close(out) + URL := &url.URL{ + Scheme: "", // so we don't see https and we don't try HTTPSSvc + Host: net.JoinHostPort(hostname, port), } - }() - const parallelism = 1 // maybe raise in the future? - for i := 0; i < parallelism; i++ { - go func() { - for th := range ths { - mx.asyncTestHelperQuery(ctx, th, URL, output) - } - done <- true - }() - } - go func() { - for i := 0; i < parallelism; i++ { - <-done + for m := range mx.LookupURLHostParallel(ctx, URL) { + out <- &DNSMeasurement{Domain: hostname, Measurement: m.Measurement} } - close(output) }() - return output -} - -func (mx *Measurer) asyncTestHelperQuery( - ctx context.Context, th *TestHelperInfo, URL *url.URL, - output chan<- *Measurement) { - switch th.Protocol { - case "wcth": - port, err := PortFromURL(URL) - if err != nil { - return // TODO(bassosimone): what to do about this error? - } - endpoints := mx.DB.SelectAllEndpointsForDomain(URL.Hostname(), port) - output <- mx.LookupWCTH(ctx, URL, endpoints, port, th.URL) - default: - // don't know what to do - } + return out } // MeasureURL measures an HTTP or HTTPS URL. The DNS resolvers @@ -684,30 +640,26 @@ func (mx *Measurer) asyncTestHelperQuery( // redirect properly without cookies. This has been // documented at https://github.com/ooni/probe/issues/1727. func (mx *Measurer) MeasureURL( - ctx context.Context, URL string, cookies http.CookieJar) *URLMeasurement { + ctx context.Context, URL string, cookies http.CookieJar) (*URLMeasurement, error) { mx.Logger.Infof("MeasureURL url=%s", URL) m := &URLMeasurement{URL: URL} begin := time.Now() defer func() { m.TotalRuntime = time.Since(begin) }() parsed, err := url.Parse(URL) if err != nil { - m.CannotParseURL = true - return m + return nil, err + } + if len(mx.Resolvers) < 1 { + return nil, errors.New("measurer: no configured resolver") } dnsBegin := time.Now() - for dns := range mx.LookupURLHostParallel(ctx, parsed) { + for dns := range mx.LookupURLHostParallel(ctx, parsed, mx.Resolvers...) { m.DNS = append(m.DNS, dns) } m.DNSRuntime = time.Since(dnsBegin) - thBegin := time.Now() - for th := range mx.QueryTestHelperParallel(ctx, parsed) { - m.TH = append(m.TH, th) - } - m.THRuntime = time.Since(thBegin) - epnts, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + epnts, err := AllHTTPEndpointsForURL(parsed, m.DNS...) if err != nil { - m.CannotGenerateEndpoints = true - return m + return nil, err } epntRuntime := time.Now() for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, epnts...) { @@ -715,7 +667,7 @@ func (mx *Measurer) MeasureURL( } m.EpntsRuntime = time.Since(epntRuntime) m.fillRedirects() - return m + return m, nil } // redirectionQueue is the type we use to manage the redirection @@ -750,15 +702,23 @@ func (mx *Measurer) MeasureHTTPURLAndFollowRedirections(ctx context.Context, out := make(chan *URLMeasurement) go func() { defer close(out) - m := mx.MeasureURL(ctx, URL, cookies) - out <- m - rq := &redirectionQueue{q: m.RedirectURLs} + meas, err := mx.MeasureURL(ctx, URL, cookies) + if err != nil { + mx.Logger.Warnf("mx.MeasureURL failed: %s", err.Error()) + return + } + out <- meas + rq := &redirectionQueue{q: meas.RedirectURLs} const maxRedirects = 7 for !rq.empty() && rq.redirectionsCount() < maxRedirects { URL = rq.popleft() - m = mx.MeasureURL(ctx, URL, cookies) - out <- m - rq.append(m.RedirectURLs...) + meas, err = mx.MeasureURL(ctx, URL, cookies) + if err != nil { + mx.Logger.Warnf("mx.MeasureURL failed: %s", err.Error()) + return + } + out <- meas + rq.append(meas.RedirectURLs...) } }() return out diff --git a/internal/measurex/origin.go b/internal/measurex/origin.go deleted file mode 100644 index ab9c7a06a6..0000000000 --- a/internal/measurex/origin.go +++ /dev/null @@ -1,18 +0,0 @@ -package measurex - -// -// Origin -// -// Here we define the origin type. -// - -// Origin is the origin of a measurement. -type Origin string - -var ( - // OriginProbe means that the probe performed this measurement. - OriginProbe = Origin("probe") - - // OriginTH means that the test helper performed this measurement. - OriginTH = Origin("th") -) diff --git a/internal/measurex/quic.go b/internal/measurex/quic.go index c471fe9d8c..0728828269 100644 --- a/internal/measurex/quic.go +++ b/internal/measurex/quic.go @@ -3,8 +3,7 @@ package measurex // // QUIC // -// Wrappers for netxlite's QUIC code that are capable of -// saving interesting events into an EventDB. +// Wrappers for QUIC to store events into a WritableDB. // import ( @@ -19,229 +18,130 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite/quicx" ) +// QUICConn is the kind of conn used by QUIC. +type QUICConn = quicx.UDPLikeConn + +// QUICDialer creates QUICSesssions. +type QUICDialer = netxlite.QUICDialer + // QUICListener creates listening connections for QUIC. type QUICListener = netxlite.QUICListener -// WrapQUICListener takes in input a netxlite.QUICListener and returns -// a new listener that saves measurements into the DB. -func WrapQUICListener(measurementID int64, - origin Origin, db EventDB, ql netxlite.QUICListener) QUICListener { - return &quicListenerx{ - QUICListener: ql, - db: db, - mid: measurementID, - origin: origin, - } -} - -type quicListenerx struct { +type quicListenerDB struct { netxlite.QUICListener - db EventDB - mid int64 - origin Origin + begin time.Time + db WritableDB } -// QUICPacketConn is an UDP PacketConn used by QUIC. -type QUICPacketConn = quicx.UDPLikeConn - -func (ql *quicListenerx) Listen(addr *net.UDPAddr) (QUICPacketConn, error) { +func (ql *quicListenerDB) Listen(addr *net.UDPAddr) (QUICConn, error) { pconn, err := ql.QUICListener.Listen(addr) if err != nil { return nil, err } - return &quicUDPLikeConnx{ + return &udpLikeConnDB{ UDPLikeConn: pconn, - connID: ql.db.NextConnID(), + begin: ql.begin, db: ql.db, - localAddr: pconn.LocalAddr().String(), - origin: ql.origin, - mid: ql.mid, }, nil } -type quicUDPLikeConnx struct { +type udpLikeConnDB struct { quicx.UDPLikeConn - connID int64 - db EventDB - localAddr string - mid int64 - origin Origin + begin time.Time + db WritableDB } -func (c *quicUDPLikeConnx) WriteTo(p []byte, addr net.Addr) (int, error) { - started := c.db.ElapsedTime() +func (c *udpLikeConnDB) WriteTo(p []byte, addr net.Addr) (int, error) { + started := time.Since(c.begin).Seconds() count, err := c.UDPLikeConn.WriteTo(p, addr) - finished := c.db.ElapsedTime() + finished := time.Since(c.begin).Seconds() c.db.InsertIntoReadWrite(&NetworkEvent{ - Origin: c.origin, - MeasurementID: c.mid, - ConnID: c.connID, - Operation: "write_to", - Network: string(NetworkQUIC), - RemoteAddr: addr.String(), - LocalAddr: c.localAddr, - Started: started, - Finished: finished, - Error: err, - Count: count, + Operation: "write_to", + Network: "quic", + RemoteAddr: addr.String(), + Started: started, + Finished: finished, + Error: err, + Count: count, }) return count, err } -func (c *quicUDPLikeConnx) ReadFrom(b []byte) (int, net.Addr, error) { - started := c.db.ElapsedTime() +func (c *udpLikeConnDB) ReadFrom(b []byte) (int, net.Addr, error) { + started := time.Since(c.begin).Seconds() count, addr, err := c.UDPLikeConn.ReadFrom(b) - finished := c.db.ElapsedTime() + finished := time.Since(c.begin).Seconds() c.db.InsertIntoReadWrite(&NetworkEvent{ - Origin: c.origin, - MeasurementID: c.mid, - ConnID: c.connID, - Operation: "read_from", - Network: string(NetworkQUIC), - RemoteAddr: c.addrStringIfNotNil(addr), - LocalAddr: c.localAddr, - Started: started, - Finished: finished, - Error: err, - Count: count, + Operation: "read_from", + Network: "quic", + RemoteAddr: addrStringIfNotNil(addr), + Started: started, + Finished: finished, + Error: err, + Count: count, }) return count, addr, err } -func (c *quicUDPLikeConnx) addrStringIfNotNil(addr net.Addr) (out string) { - if addr != nil { - out = addr.String() - } - return -} - -func (c *quicUDPLikeConnx) Close() error { - started := c.db.ElapsedTime() +func (c *udpLikeConnDB) Close() error { + started := time.Since(c.begin).Seconds() err := c.UDPLikeConn.Close() - finished := c.db.ElapsedTime() + finished := time.Since(c.begin).Seconds() c.db.InsertIntoReadWrite(&NetworkEvent{ - Origin: c.origin, - MeasurementID: c.mid, - ConnID: c.connID, - Operation: "close", - Network: string(NetworkQUIC), - RemoteAddr: "", - LocalAddr: c.localAddr, - Started: started, - Finished: finished, - Error: err, - Count: 0, + Operation: "close", + Network: "quic", + RemoteAddr: "", + Started: started, + Finished: finished, + Error: err, + Count: 0, }) return err } -// LocalAddr returns the local address and also implements a -// hack to pass to the session the ConnID. -func (c *quicUDPLikeConnx) LocalAddr() net.Addr { - localAddr := c.UDPLikeConn.LocalAddr() - if localAddr == nil { - return nil - } - return &quicLocalAddrx{ - Addr: localAddr, - connID: c.connID, - mid: c.mid, - } -} - -type quicLocalAddrx struct { - net.Addr - connID int64 - mid int64 -} - -// QUICEarlySession is the type we use to wrap quic.EarlySession. This -// kind of session knows about the underlying ConnID. -type QUICEarlySession interface { - quic.EarlySession - - ConnID() int64 -} - -// QUICDialer creates QUIC sessions. This kind of dialer will -// save QUIC handshake measurements into the DB. -type QUICDialer interface { - DialContext(ctx context.Context, address string, - tlsConfig *tls.Config) (QUICEarlySession, error) - - CloseIdleConnections() -} - // QUICHandshakeEvent is the result of QUICHandshake. -type QUICHandshakeEvent struct { - Origin Origin - MeasurementID int64 - ConnID int64 - Network string - RemoteAddr string - LocalAddr string - SNI string - ALPN []string - SkipVerify bool - Started time.Duration - Finished time.Duration - Error error - Oddity Oddity - TLSVersion string - CipherSuite string - NegotiatedProto string - PeerCerts [][]byte -} +type QUICHandshakeEvent = TLSHandshakeEvent -// WrapQUICDialer creates a new QUICDialer that will save -// QUIC handshake events into the DB. -func WrapQUICDialer(measurementID int64, - origin Origin, db EventDB, dialer netxlite.QUICDialer) QUICDialer { - return &quicDialerx{ - QUICDialer: dialer, - origin: origin, - db: db, - mid: measurementID, - } +// NewQUICDialerWithoutResolver creates a new QUICDialer that is not +// attached to any resolver. This means that every attempt to dial any +// address containing a domain name will fail. This QUICDialer will +// save any event into the WritableDB. Any QUICConn created by it will +// likewise save any event into the WritableDB. +func (mx *Measurer) NewQUICDialerWithoutResolver(db WritableDB, logger Logger) QUICDialer { + return &quicDialerDB{db: db, logger: logger, begin: mx.Begin} } -type quicDialerx struct { +type quicDialerDB struct { netxlite.QUICDialer - db EventDB - mid int64 - origin Origin -} - -func (qh *quicDialerx) DialContext(ctx context.Context, - address string, tlsConfig *tls.Config) (QUICEarlySession, error) { - started := qh.db.ElapsedTime() - var ( - localAddr *quicLocalAddrx - state tls.ConnectionState - ) - sess, err := qh.QUICDialer.DialContext( - ctx, "udp", address, tlsConfig, &quic.Config{}) + begin time.Time + db WritableDB + logger Logger +} + +func (qh *quicDialerDB) DialContext(ctx context.Context, network, address string, + tlsConfig *tls.Config, quicConfig *quic.Config) (quic.EarlySession, error) { + started := time.Since(qh.begin).Seconds() + var state tls.ConnectionState + listener := &quicListenerDB{ + QUICListener: netxlite.NewQUICListener(), + begin: qh.begin, + db: qh.db, + } + dialer := netxlite.NewQUICDialerWithoutResolver(listener, qh.logger) + defer dialer.CloseIdleConnections() + sess, err := dialer.DialContext(ctx, network, address, tlsConfig, quicConfig) if err == nil { select { case <-sess.HandshakeComplete().Done(): state = sess.ConnectionState().TLS.ConnectionState - if addr := sess.LocalAddr(); addr != nil { - if laddr, ok := addr.(*quicLocalAddrx); ok { - localAddr = laddr - } - } case <-ctx.Done(): sess, err = nil, ctx.Err() } } - finished := qh.db.ElapsedTime() + finished := time.Since(qh.begin).Seconds() qh.db.InsertIntoQUICHandshake(&QUICHandshakeEvent{ - Origin: qh.origin, - MeasurementID: qh.mid, - ConnID: qh.connIDIfNotNil(localAddr), - Network: string(NetworkQUIC), + Network: "quic", RemoteAddr: address, - LocalAddr: qh.localAddrIfNotNil(localAddr), SNI: tlsConfig.ServerName, ALPN: tlsConfig.NextProtos, SkipVerify: tlsConfig.InsecureSkipVerify, @@ -252,16 +152,12 @@ func (qh *quicDialerx) DialContext(ctx context.Context, TLSVersion: netxlite.TLSVersionString(state.Version), CipherSuite: netxlite.TLSCipherSuiteString(state.CipherSuite), NegotiatedProto: state.NegotiatedProtocol, - PeerCerts: peerCerts(nil, &state), + PeerCerts: NewArchivalTLSCerts(peerCerts(nil, &state)), }) - if err != nil { - return nil, err - } - return &quicEarlySessionx{ - EarlySession: sess, connID: qh.connIDIfNotNil(localAddr)}, nil + return sess, err } -func (qh *quicDialerx) computeOddity(err error) Oddity { +func (qh *quicDialerDB) computeOddity(err error) Oddity { if err == nil { return "" } @@ -275,29 +171,6 @@ func (qh *quicDialerx) computeOddity(err error) Oddity { } } -func (qh *quicDialerx) connIDIfNotNil(addr *quicLocalAddrx) (out int64) { - if addr != nil { - out = addr.connID - } - return -} - -func (qh *quicDialerx) localAddrIfNotNil(addr *quicLocalAddrx) (out string) { - if addr != nil { - out = addr.String() - } - return -} - -func (qh *quicDialerx) CloseIdleConnections() { - qh.QUICDialer.CloseIdleConnections() -} - -type quicEarlySessionx struct { - quic.EarlySession - connID int64 -} - -func (qes *quicEarlySessionx) ConnID() int64 { - return qes.connID +func (qh *quicDialerDB) CloseIdleConnections() { + // nothing to do } diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 6a2a7caa3b..dec80b50ad 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -3,12 +3,12 @@ package measurex // // Resolver // -// Wrappers for netxlite's resolvers that are able -// to store events into an EventDB. +// Wrappers for Resolver to store events into a WritableDB. // import ( "context" + "encoding/json" "time" "github.com/ooni/probe-cli/v3/internal/netxlite" @@ -21,22 +21,17 @@ type HTTPSSvc = dnsx.HTTPSSvc // Resolver is the resolver type we use. This resolver will // store resolve events into the DB. -type Resolver interface { - netxlite.Resolver -} +type Resolver = netxlite.Resolver -// WrapResolver wraps a Resolver so that we save measurements into the DB. -func WrapResolver(measurementID int64, - origin Origin, db EventDB, r netxlite.Resolver) Resolver { - return &resolverx{Resolver: r, db: db, origin: origin, mid: measurementID} +// WrapResolver creates a new Resolver that saves events into the WritableDB. +func (mx *Measurer) WrapResolver(db WritableDB, r netxlite.Resolver) Resolver { + return &resolverDB{Resolver: r, db: db, begin: mx.Begin} } -// NewResolverSystem is a convenience factory for creating a -// system resolver that saves measurements into a DB. -func NewResolverSystem(measurementID int64, - origin Origin, db EventDB, logger Logger) Resolver { - return WrapResolver( - measurementID, origin, db, netxlite.NewResolverStdlib(logger)) +// NewResolverSystem creates a system resolver and then wraps +// it using the WrapResolver function/ +func (mx *Measurer) NewResolverSystem(db WritableDB, logger Logger) Resolver { + return mx.WrapResolver(db, netxlite.NewResolverStdlib(logger)) } // NewResolverUDP is a convenience factory for creating a Resolver @@ -44,123 +39,117 @@ func NewResolverSystem(measurementID int64, // // Arguments: // -// - measurementID is the measurement ID; -// -// - origin is OrigiProbe or OriginTH; -// // - db is where to save events; // // - logger is the logger; // // - address is the resolver address (e.g., "1.1.1.1:53"). -func NewResolverUDP(measurementID int64, - origin Origin, db EventDB, logger Logger, address string) Resolver { - return WrapResolver(measurementID, origin, db, - netxlite.WrapResolver(logger, dnsx.NewSerialResolver( - WrapDNSXRoundTripper(measurementID, origin, db, dnsx.NewDNSOverUDP( - &netxliteDialerAdapter{ - NewDialerWithSystemResolver( - measurementID, origin, db, logger), - }, +func (mx *Measurer) NewResolverUDP(db WritableDB, logger Logger, address string) Resolver { + return mx.WrapResolver(db, netxlite.WrapResolver( + logger, dnsx.NewSerialResolver( + mx.WrapDNSXRoundTripper(db, dnsx.NewDNSOverUDP( + mx.NewDialerWithSystemResolver(db, logger), address, )))), ) } -type resolverx struct { +type resolverDB struct { netxlite.Resolver - db EventDB - mid int64 - origin Origin + begin time.Time + db WritableDB } // LookupHostEvent contains the result of a host lookup. type LookupHostEvent struct { - Origin Origin - MeasurementID int64 - ConnID int64 // connID (typically zero) - Network string - Address string - Domain string - Started time.Duration - Finished time.Duration - Error error - Oddity Oddity - Addrs []string + Network string + Address string + Domain string + Started float64 + Finished float64 + Error error + Oddity Oddity + Addrs []string +} + +// MarshalJSON marshals a LookupHostEvent to the archival +// format compatible with df-002-dnst. +func (ev *LookupHostEvent) MarshalJSON() ([]byte, error) { + archival := NewArchivalLookupHostList(ev) + return json.Marshal(archival) } -func (r *resolverx) LookupHost(ctx context.Context, domain string) ([]string, error) { - started := r.db.ElapsedTime() +func (r *resolverDB) LookupHost(ctx context.Context, domain string) ([]string, error) { + started := time.Since(r.begin).Seconds() addrs, err := r.Resolver.LookupHost(ctx, domain) - finished := r.db.ElapsedTime() + finished := time.Since(r.begin).Seconds() r.db.InsertIntoLookupHost(&LookupHostEvent{ - Origin: r.origin, - MeasurementID: r.mid, - Network: r.Resolver.Network(), - Address: r.Resolver.Address(), - Domain: domain, - Started: started, - Finished: finished, - Error: err, - Oddity: r.computeOddityLookupHost(addrs, err), - Addrs: addrs, + Network: r.Resolver.Network(), + Address: r.Resolver.Address(), + Domain: domain, + Started: started, + Finished: finished, + Error: err, + Oddity: r.computeOddityLookupHost(addrs, err), + Addrs: addrs, }) return addrs, err } -func (r *resolverx) computeOddityLookupHost(addrs []string, err error) Oddity { - if err == nil { - for _, addr := range addrs { - if isBogon(addr) { - return OddityDNSLookupBogon - } +func (r *resolverDB) computeOddityLookupHost(addrs []string, err error) Oddity { + if err != nil { + switch err.Error() { + case errorsx.FailureGenericTimeoutError: + return OddityDNSLookupTimeout + case errorsx.FailureDNSNXDOMAINError: + return OddityDNSLookupNXDOMAIN + case errorsx.FailureDNSRefusedError: + return OddityDNSLookupRefused + default: + return OddityDNSLookupOther } - return "" } - switch err.Error() { - case errorsx.FailureGenericTimeoutError: - return OddityDNSLookupTimeout - case errorsx.FailureDNSNXDOMAINError: - return OddityDNSLookupNXDOMAIN - case errorsx.FailureDNSRefusedError: - return OddityDNSLookupRefused - default: - return OddityDNSLookupOther + for _, addr := range addrs { + if isBogon(addr) { + return OddityDNSLookupBogon + } } + return "" } -// LookupHTTPSSvcEvent is the event emitted when we perform -// an HTTPSSvc DNS query for a domain. +// LookupHTTPSSvcEvent contains the results of an HTTPSSvc lookup. type LookupHTTPSSvcEvent struct { - Origin Origin - MeasurementID int64 - ConnID int64 // connID (typically zero) - Network string - Address string - Domain string - Started time.Duration - Finished time.Duration - Error error - Oddity Oddity - IPv4 []string - IPv6 []string - ALPN []string + Network string + Address string + Domain string + Started float64 + Finished float64 + Error error + Oddity Oddity + IPv4 []string + IPv6 []string + ALPN []string +} + +// MarshalJSON marshals a LookupHTTPSSvcEvent to the archival +// format that is similar to df-002-dnst. +func (ev *LookupHTTPSSvcEvent) MarshalJSON() ([]byte, error) { + archival := NewArchivalLookupHTTPSSvcList(ev) + return json.Marshal(archival) } -func (r *resolverx) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain string) (HTTPSSvc, error) { - started := r.db.ElapsedTime() +func (r *resolverDB) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain string) (HTTPSSvc, error) { + started := time.Since(r.begin).Seconds() https, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) - finished := r.db.ElapsedTime() + finished := time.Since(r.begin).Seconds() ev := &LookupHTTPSSvcEvent{ - Origin: r.origin, - MeasurementID: r.mid, - Network: r.Resolver.Network(), - Address: r.Resolver.Address(), - Domain: domain, - Started: started, - Finished: finished, - Error: err, - Oddity: Oddity(r.computeOddityHTTPSSvc(https, err)), + Network: r.Resolver.Network(), + Address: r.Resolver.Address(), + Domain: domain, + Started: started, + Finished: finished, + Error: err, + Oddity: Oddity(r.computeOddityHTTPSSvc(https, err)), } if err == nil { ev.IPv4 = https.IPv4Hint() @@ -171,7 +160,7 @@ func (r *resolverx) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain strin return https, err } -func (r *resolverx) computeOddityHTTPSSvc(https HTTPSSvc, err error) Oddity { +func (r *resolverDB) computeOddityHTTPSSvc(https HTTPSSvc, err error) Oddity { if err != nil { return r.computeOddityLookupHost(nil, err) } diff --git a/internal/measurex/th.go b/internal/measurex/th.go index 250d745183..f12305410f 100644 --- a/internal/measurex/th.go +++ b/internal/measurex/th.go @@ -47,7 +47,6 @@ import ( "net/http" "net/url" "strings" - "time" "github.com/apex/log" "github.com/ooni/probe-cli/v3/internal/netxlite" @@ -186,7 +185,7 @@ const thMaxAcceptableBodySize = 1 << 20 type THClient struct { // DNSServers is the MANDATORY list of DNS-over-UDP // servers to use to discover endpoints locally. - DNServers []string + DNServers []*ResolverInfo // HTTPClient is the MANDATORY HTTP client to // use for contacting the TH. @@ -217,9 +216,11 @@ func (c *THClient) Run(ctx context.Context, URL string) (*THServerResponse, erro return nil, err } mx := NewMeasurerWithDefaultSettings() - mx.RegisterUDPResolvers(c.DNServers...) - mx.LookupURLHostParallel(ctx, parsed) - httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + var dns []*DNSMeasurement + for m := range mx.LookupURLHostParallel(ctx, parsed, c.DNServers...) { + dns = append(dns, m) + } + httpEndpoints, err := AllHTTPEndpointsForURL(parsed, dns...) if err != nil { return nil, err } @@ -540,9 +541,9 @@ func (h *THHandler) doQUICFollowUp(ctx context.Context, // newTHEndpointMeasurement takes in input an endpoint // measurement performed by a measurer and emits in output // the simplified THEndpointMeasurement equivalent. -func (h *THHandler) newTHEndpointMeasurement(in *Measurement) *THEndpointMeasurement { +func (h *THHandler) newTHEndpointMeasurement(in *HTTPEndpointMeasurement) *THEndpointMeasurement { return &THEndpointMeasurement{ - Oddities: in.Oddities, + // TODO(bassosimone): here we need to add more fields Connect: h.newTHConnectEventList(in.Connect), TLSHandshake: h.newTLSHandshakesList(in.TLSHandshake), QUICHandshake: h.newQUICHandshakeList(in.QUICHandshake), @@ -646,30 +647,30 @@ var thResolver = netxlite.WrapResolver(log.Log, dnsx.NewSerialResolver( // - the THDNSMeasurement for the THServeResponse message func (h *THHandler) dohQuery(ctx context.Context, URL *url.URL) ( epnts []*HTTPEndpoint, meas *THDNSMeasurement) { - db := NewDB(time.Now()) // timing is not sent back to client - r := WrapResolver(0, OriginTH, db, thResolver) + db := &MeasurementDB{} + r := NewMeasurerWithDefaultSettings().WrapResolver(db, thResolver) meas = &THDNSMeasurement{} op := newOperationLogger(log.Log, "dohQuery A/AAAA for %s with %s", URL.Hostname(), r.Address()) _, err := r.LookupHost(ctx, URL.Hostname()) op.Stop(err) - meas.LookupHost = h.newTHLookupHostList(db) + meas.LookupHost = h.newTHLookupHostList(db.AsMeasurement()) switch URL.Scheme { case "https": op := newOperationLogger(log.Log, "dohQuery HTTPSSvc for %s with %s", URL.Hostname(), r.Address()) _, err = r.LookupHTTPSSvcWithoutRetry(ctx, URL.Hostname()) op.Stop(err) - meas.LookupHTTPSSvc = h.newTHLookupHTTPSSvcList(db) + meas.LookupHTTPSSvc = h.newTHLookupHTTPSSvcList(db.AsMeasurement()) default: // nothing } - epnts, _ = db.SelectAllHTTPEndpointsForURL(URL) // nil on failure + epnts, _ = AllHTTPEndpointsForURL(URL) // nil on failure return } -func (h *THHandler) newTHLookupHostList(db *DB) (out []*THLookupHostEvent) { - for _, entry := range db.SelectAllFromLookupHost() { +func (h *THHandler) newTHLookupHostList(m *Measurement) (out []*THLookupHostEvent) { + for _, entry := range m.LookupHost { out = append(out, &THLookupHostEvent{ Network: entry.Network, Address: entry.Address, @@ -682,8 +683,8 @@ func (h *THHandler) newTHLookupHostList(db *DB) (out []*THLookupHostEvent) { return } -func (h *THHandler) newTHLookupHTTPSSvcList(db *DB) (out []*THLookupHTTPSSvcEvent) { - for _, entry := range db.SelectAllFromLookupHTTPSSvc() { +func (h *THHandler) newTHLookupHTTPSSvcList(m *Measurement) (out []*THLookupHTTPSSvcEvent) { + for _, entry := range m.LookupHTTPSSvc { out = append(out, &THLookupHTTPSSvcEvent{ Network: entry.Network, Address: entry.Address, diff --git a/internal/measurex/tls.go b/internal/measurex/tls.go index 5a6e58cb65..6375e14fd0 100644 --- a/internal/measurex/tls.go +++ b/internal/measurex/tls.go @@ -3,8 +3,7 @@ package measurex // // TLS // -// Wrappers for netxlite's TLS that allow one to -// save network events into an EventDB type. +// Wraps TLS code to write events into a WritableDB. // import ( @@ -12,91 +11,65 @@ import ( "crypto/tls" "crypto/x509" "errors" + "net" "time" "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) -// TLSConn is the TLS conn type we use. -type TLSConn interface { - netxlite.TLSConn - - // ConnID returns the connection ID. - ConnID() int64 -} - -// TLSHandshaker is the TLS handshaker type we use. This handshaker -// will save TLS handshake events into the DB. -type TLSHandshaker interface { - Handshake(ctx context.Context, conn Conn, config *tls.Config) (TLSConn, error) -} +// TLSHandshaker performs TLS handshakes. +type TLSHandshaker = netxlite.TLSHandshaker // WrapTLSHandshaker wraps a netxlite.TLSHandshaker to return a new // instance of TLSHandshaker that saves events into the DB. -func WrapTLSHandshaker(measurementID int64, - origin Origin, db EventDB, thx netxlite.TLSHandshaker) TLSHandshaker { - return &tlsHandshakerx{ - TLSHandshaker: thx, - db: db, - origin: origin, - mid: measurementID, - } +func (mx *Measurer) WrapTLSHandshaker(db WritableDB, thx netxlite.TLSHandshaker) TLSHandshaker { + return &tlsHandshakerDB{TLSHandshaker: thx, db: db, begin: mx.Begin} } // NewTLSHandshakerStdlib creates a new TLS handshaker that // saves results into the DB and uses the stdlib for TLS. -func NewTLSHandshakerStdlib(measurementID int64, - origin Origin, db EventDB, logger Logger) TLSHandshaker { - return WrapTLSHandshaker( - measurementID, origin, db, netxlite.NewTLSHandshakerStdlib(logger)) +func (mx *Measurer) NewTLSHandshakerStdlib(db WritableDB, logger Logger) TLSHandshaker { + return mx.WrapTLSHandshaker(db, netxlite.NewTLSHandshakerStdlib(logger)) } -type tlsHandshakerx struct { +type tlsHandshakerDB struct { netxlite.TLSHandshaker - db EventDB - mid int64 - origin Origin + begin time.Time + db WritableDB } // TLSHandshakeEvent contains a TLS handshake event. type TLSHandshakeEvent struct { - Origin Origin - MeasurementID int64 - ConnID int64 - Engine string - Network string - RemoteAddr string - LocalAddr string - SNI string - ALPN []string - SkipVerify bool - Started time.Duration - Finished time.Duration - Error error - Oddity Oddity - TLSVersion string - CipherSuite string - NegotiatedProto string - PeerCerts [][]byte + // JSON names compatible with df-006-tlshandshake + CipherSuite string `json:"cipher_suite"` + Error error `json:"failure"` + NegotiatedProto string `json:"negotiated_proto"` + TLSVersion string `json:"tls_version"` + PeerCerts []*ArchivalBinaryData `json:"peer_certificates"` + Finished float64 `json:"t"` + + // JSON names that are consistent with the + // spirit of the spec but are not in it + RemoteAddr string `json:"address"` + SNI string `json:"server_name"` // used in prod + ALPN []string `json:"alpn"` + SkipVerify bool `json:"no_tls_verify"` // used in prod + Oddity Oddity `json:"oddity"` + Network string `json:"proto"` + Started float64 `json:"started"` } -func (thx *tlsHandshakerx) Handshake(ctx context.Context, - conn Conn, config *tls.Config) (TLSConn, error) { +func (thx *tlsHandshakerDB) Handshake(ctx context.Context, + conn Conn, config *tls.Config) (net.Conn, tls.ConnectionState, error) { network := conn.RemoteAddr().Network() remoteAddr := conn.RemoteAddr().String() - localAddr := conn.LocalAddr().String() - started := thx.db.ElapsedTime() + started := time.Since(thx.begin).Seconds() tconn, state, err := thx.TLSHandshaker.Handshake(ctx, conn, config) - finished := thx.db.ElapsedTime() + finished := time.Since(thx.begin).Seconds() thx.db.InsertIntoTLSHandshake(&TLSHandshakeEvent{ - Origin: thx.origin, - MeasurementID: thx.mid, - ConnID: conn.ConnID(), - Engine: "", // TODO(bassosimone): add support Network: network, RemoteAddr: remoteAddr, - LocalAddr: localAddr, SNI: config.ServerName, ALPN: config.NextProtos, SkipVerify: config.InsecureSkipVerify, @@ -107,16 +80,12 @@ func (thx *tlsHandshakerx) Handshake(ctx context.Context, TLSVersion: netxlite.TLSVersionString(state.Version), CipherSuite: netxlite.TLSCipherSuiteString(state.CipherSuite), NegotiatedProto: state.NegotiatedProtocol, - PeerCerts: peerCerts(err, &state), + PeerCerts: NewArchivalTLSCerts(peerCerts(err, &state)), }) - if err != nil { - return nil, err - } - return &tlsConnx{ - TLSConn: tconn.(netxlite.TLSConn), connID: conn.ConnID()}, nil + return tconn, state, err } -func (thx *tlsHandshakerx) computeOddity(err error) Oddity { +func (thx *tlsHandshakerDB) computeOddity(err error) Oddity { if err == nil { return "" } @@ -136,15 +105,6 @@ func (thx *tlsHandshakerx) computeOddity(err error) Oddity { } } -type tlsConnx struct { - netxlite.TLSConn - connID int64 -} - -func (c *tlsConnx) ConnID() int64 { - return c.connID -} - func peerCerts(err error, state *tls.ConnectionState) (out [][]byte) { var x509HostnameError x509.HostnameError if errors.As(err, &x509HostnameError) { diff --git a/internal/measurex/utils.go b/internal/measurex/utils.go index ea8fec6dd1..a47d2e53fb 100644 --- a/internal/measurex/utils.go +++ b/internal/measurex/utils.go @@ -1,5 +1,11 @@ package measurex +import ( + "errors" + "net" + "net/url" +) + // // Utils // @@ -18,3 +24,54 @@ func alpnForHTTPEndpoint(network EndpointNetwork) []string { return nil } } + +// addrStringIfNotNil returns the string of the given addr +// unless the addr is nil, in which case it returns an empty string. +func addrStringIfNotNil(addr net.Addr) (out string) { + if addr != nil { + out = addr.String() + } + return +} + +// ErrCannotDeterminePortFromURL indicates that we could not determine +// the correct port from the URL authority and scheme. +var ErrCannotDeterminePortFromURL = errors.New("cannot determine port from URL") + +// PortFromURL returns the port determined from the URL or an error. +func PortFromURL(URL *url.URL) (string, error) { + switch { + case URL.Port() != "": + return URL.Port(), nil + case URL.Scheme == "https": + return "443", nil + case URL.Scheme == "http": + return "80", nil + default: + return "", ErrCannotDeterminePortFromURL + } +} + +// removeDuplicateEndpoints removes duplicate endpoints from a list of endpoints. +func removeDuplicateEndpoints(epnts ...*Endpoint) (out []*Endpoint) { + duplicates := make(map[string]*Endpoint) + for _, epnt := range epnts { + duplicates[epnt.String()] = epnt + } + for _, epnt := range duplicates { + out = append(out, epnt) + } + return +} + +// removeDuplicateHTTPEndpoints removes duplicate endpoints from a list of endpoints. +func removeDuplicateHTTPEndpoints(epnts ...*HTTPEndpoint) (out []*HTTPEndpoint) { + duplicates := make(map[string]*HTTPEndpoint) + for _, epnt := range epnts { + duplicates[epnt.String()] = epnt + } + for _, epnt := range duplicates { + out = append(out, epnt) + } + return +} diff --git a/internal/measurex/wcth.go b/internal/measurex/wcth.go deleted file mode 100644 index a164c05a27..0000000000 --- a/internal/measurex/wcth.go +++ /dev/null @@ -1,205 +0,0 @@ -package measurex - -// -// WCTH (Web Connectivity Test Helper) -// -// We use the WCTH as an alternative DNS for gathering -// additional IP addresses to test, which is useful when -// your local DNS is censored. -// -// This code is merely here to bootstrap websteps and -// should be removed when we have a proper test helper. -// - -import ( - "bytes" - "context" - "encoding/json" - "errors" - "fmt" - "io" - "net" - "net/http" - "net/url" - - "github.com/ooni/probe-cli/v3/internal/netxlite/iox" - "github.com/ooni/probe-cli/v3/internal/runtimex" - "github.com/ooni/probe-cli/v3/internal/version" -) - -// WCTHWorker is the Web Connectivity test helper worker. -type WCTHWorker struct { - clnt HTTPClient - db EventDB - logger Logger - mid int64 - url string -} - -// NewWCTHWorker creates a new TestHelper instance using the -// web connectivity test helper protocol. -// -// Arguments: -// -// - measurementID is the measurement ID; -// -// - logger is the logger to use; -// -// - db is the database to use; -// -// - clnt is the HTTP client to use; -// -// - URL is the WCTH service URL. -// -// All arguments are mandatory. -func NewWCTHWorker(measurementID int64, - logger Logger, db EventDB, clnt HTTPClient, URL string) *WCTHWorker { - return &WCTHWorker{ - db: db, - logger: logger, - clnt: clnt, - url: URL, - mid: measurementID, - } -} - -var errWCTHRequestFailed = errors.New("wcth: request failed") - -// Run runs the WCTH for the given URL and endpoints and creates -// measurements into the DB that derive on the WCTH response. -// -// CAVEAT: this implementation is very inefficient because the -// WCTH will fetch the whole redirection chain for every request -// but the WCTH is already there and it can bootstrap us. -func (w *WCTHWorker) Run( - ctx context.Context, URL *url.URL, endpoints []string) (*WCTHResponse, error) { - req, err := w.newHTTPRequest(ctx, URL, endpoints) - if err != nil { - return nil, err - } - resp, err := w.do(req) - if err != nil { - return nil, err - } - w.parseResp(URL, resp) - return resp, nil -} - -func (w *WCTHWorker) parseResp(URL *url.URL, resp *WCTHResponse) { - w.db.InsertIntoLookupHost(&LookupHostEvent{ - Origin: OriginTH, - MeasurementID: w.mid, - Network: "system", - Address: "", - Domain: URL.Hostname(), - Started: 0, - Finished: 0, - Error: w.newError(resp.DNS.Failure), - Addrs: w.filterDNSAddrs(resp.DNS.Addrs), - }) - for addr, status := range resp.TCPConnect { - w.db.InsertIntoDial(&NetworkEvent{ - Origin: OriginTH, - MeasurementID: w.mid, - ConnID: 0, - Operation: "connect", - Network: "tcp", - RemoteAddr: addr, - LocalAddr: "", - Started: 0, - Finished: 0, - Error: w.newError(status.Failure), - Count: 0, - }) - } -} - -func (w *WCTHWorker) newHTTPRequest(ctx context.Context, - URL *url.URL, endpoints []string) (*http.Request, error) { - wtchReq := &wcthRequest{ - HTTPRequest: URL.String(), - HTTPRequestHeaders: NewHTTPRequestHeaderForMeasuring(), - TCPConnect: endpoints, - } - reqBody, err := json.Marshal(wtchReq) - runtimex.PanicOnError(err, "json.Marshal failed") - req, err := http.NewRequestWithContext(ctx, "POST", w.url, bytes.NewReader(reqBody)) - if err != nil { - return nil, err - } - req.Header.Set("User-Agent", fmt.Sprintf("miniooni/%s", version.Version)) - return req, nil -} - -func (w *WCTHWorker) do(req *http.Request) (*WCTHResponse, error) { - resp, err := w.clnt.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - if resp.StatusCode != 200 { - return nil, errWCTHRequestFailed - } - const maxResponseBodySize = 1 << 20 // limit test helper response size - r := io.LimitReader(resp.Body, maxResponseBodySize) - respBody, err := iox.ReadAllContext(req.Context(), r) - if err != nil { - return nil, err - } - var wcthResp WCTHResponse - if err := json.Unmarshal(respBody, &wcthResp); err != nil { - return nil, err - } - return &wcthResp, nil -} - -func (w *WCTHWorker) filterDNSAddrs(addrs []string) (out []string) { - for _, addr := range addrs { - if net.ParseIP(addr) == nil { - continue // WCTH also returns the CNAME - } - out = append(out, addr) - } - return -} - -func (w *WCTHWorker) newError(failure *string) error { - if failure != nil { - return errors.New(*failure) - } - return nil -} - -type wcthRequest struct { - HTTPRequest string `json:"http_request"` - HTTPRequestHeaders map[string][]string `json:"http_request_headers"` - TCPConnect []string `json:"tcp_connect"` -} - -// WCTHTCPConnectResult contains the TCP connect result. -type WCTHTCPConnectResult struct { - Status bool `json:"status"` - Failure *string `json:"failure"` -} - -// WCTHHTTPRequestResult contains the HTTP result. -type WCTHHTTPRequestResult struct { - BodyLength int64 `json:"body_length"` - Failure *string `json:"failure"` - Title string `json:"title"` - Headers map[string]string `json:"headers"` - StatusCode int64 `json:"status_code"` -} - -// WCTHDNSResult contains the DNS result. -type WCTHDNSResult struct { - Failure *string `json:"failure"` - Addrs []string `json:"addrs"` -} - -// WCTHResponse is the response from the WCTH service. -type WCTHResponse struct { - TCPConnect map[string]WCTHTCPConnectResult `json:"tcp_connect"` - HTTPRequest WCTHHTTPRequestResult `json:"http_request"` - DNS WCTHDNSResult `json:"dns"` -} From ab48a8bad57a2bf6da8ddcc34b719ab74ef27b50 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 23 Sep 2021 00:33:15 +0200 Subject: [PATCH 32/53] start refactoring to simplify the TH --- .../engine/experiment/webstepsx/webstepsx.go | 3 +- internal/measurex/measurement.go | 33 ++++++-- internal/measurex/measurer.go | 78 ++++++++++++++++--- internal/measurex/th.go | 11 +-- 4 files changed, 101 insertions(+), 24 deletions(-) diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/webstepsx.go index 1e92bbce10..f25163b886 100644 --- a/internal/engine/experiment/webstepsx/webstepsx.go +++ b/internal/engine/experiment/webstepsx/webstepsx.go @@ -118,7 +118,8 @@ func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, TLSHandshaker: netxlite.NewTLSHandshakerStdlib(sess.Logger()), } cookies := measurex.NewCookieJar() - in := mmx.MeasureHTTPURLAndFollowRedirections(ctx, URL, cookies) + in := mmx.MeasureHTTPURLAndFollowRedirections( + ctx, URL, measurex.NewHTTPRequestHeaderForMeasuring(), cookies) for m := range in { out <- &model.ExperimentAsyncTestKeys{ MeasurementRuntime: m.TotalRuntime.Seconds(), diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go index be3d324ec8..99e037f020 100644 --- a/internal/measurex/measurement.go +++ b/internal/measurex/measurement.go @@ -2,6 +2,7 @@ package measurex import ( "net" + "net/http" "net/url" "time" ) @@ -179,8 +180,11 @@ func (m *DNSMeasurement) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { // // - URL is the URL for which we want endpoints; // +// - headers are the headers to use. +// // Returns a list of endpoints or an error. -func (m *DNSMeasurement) allHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, error) { +func (m *DNSMeasurement) allHTTPEndpointsForURL( + URL *url.URL, headers http.Header) ([]*HTTPEndpoint, error) { domain := URL.Hostname() port, err := PortFromURL(URL) if err != nil { @@ -199,7 +203,24 @@ func (m *DNSMeasurement) allHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, SNI: domain, ALPN: alpnForHTTPEndpoint(epnt.Network), URL: URL, - Header: NewHTTPRequestHeaderForMeasuring(), + Header: headers, + }) + } + return out, nil +} + +// AllEndpointsForURL is like AllHTTPEndpointsForURL but return +// simple Endpoints rather than HTTPEndpoints. +func AllEndpointsForURL(URL *url.URL, meas ...*DNSMeasurement) ([]*Endpoint, error) { + all, err := AllHTTPEndpointsForURL(URL, http.Header{}, meas...) + if err != nil { + return nil, err + } + var out []*Endpoint + for _, epnt := range all { + out = append(out, &Endpoint{ + Network: epnt.Network, + Address: epnt.Address, }) } return out, nil @@ -208,11 +229,13 @@ func (m *DNSMeasurement) allHTTPEndpointsForURL(URL *url.URL) ([]*HTTPEndpoint, // AllHTTPEndpointsForURL gathers all the HTTP endpoints for a given // URL from a list of DNSMeasurements, removes duplicates and returns // the result. This call may fail if we cannot determine the port -// from the URL, in which case we return an error. -func AllHTTPEndpointsForURL(URL *url.URL, meas ...*DNSMeasurement) ([]*HTTPEndpoint, error) { +// from the URL, in which case we return an error. You MUST supply +// the headers you want to use for measuring. +func AllHTTPEndpointsForURL(URL *url.URL, + headers http.Header, meas ...*DNSMeasurement) ([]*HTTPEndpoint, error) { var out []*HTTPEndpoint for _, m := range meas { - epnt, err := m.allHTTPEndpointsForURL(URL) + epnt, err := m.allHTTPEndpointsForURL(URL, headers) if err != nil { return nil, err } diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 0c7e17d217..9cd61e6720 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -77,6 +77,22 @@ func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *DNSMea } } +// lookupHostForeign performs a LookupHost using a "foreign" resolver. +func (mx *Measurer) lookupHostForeign( + ctx context.Context, domain string, r Resolver) *DNSMeasurement { + const timeout = 4 * time.Second + ol := newOperationLogger(mx.Logger, "LookupHost %s with %s", domain, r.Network()) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + db := &MeasurementDB{} + _, err := mx.WrapResolver(db, r).LookupHost(ctx, domain) + ol.Stop(err) + return &DNSMeasurement{ + Domain: domain, + Measurement: db.AsMeasurement(), + } +} + // LookupHostUDP is like LookupHostSystem but uses an UDP resolver. // // Arguments: @@ -133,6 +149,23 @@ func (mx *Measurer) LookupHTTPSSvcUDP( } } +// lookupHTTPSSvcUDPForeign is like LookupHTTPSSvcUDP +// except that it uses a "foreign" resolver. +func (mx *Measurer) lookupHTTPSSvcUDPForeign( + ctx context.Context, domain string, r Resolver) *DNSMeasurement { + const timeout = 4 * time.Second + ol := newOperationLogger(mx.Logger, "LookupHTTPSvc %s with %s", domain, r.Address()) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + db := &MeasurementDB{} + _, err := mx.WrapResolver(db, r).LookupHTTPSSvcWithoutRetry(ctx, domain) + ol.Stop(err) + return &DNSMeasurement{ + Domain: domain, + Measurement: db.AsMeasurement(), + } +} + // TCPConnect establishes a connection with a TCP endpoint. // // Arguments: @@ -528,13 +561,33 @@ func (mx *Measurer) HTTPEndpointGetParallel(ctx context.Context, return output } +// ResolverNetwork identifies the network of a resolver. +type ResolverNetwork string + +var ( + // ResolverSystem is the system resolver (i.e., getaddrinfo) + ResolverSystem = ResolverNetwork("system") + + // ResolverUDP is a resolver using DNS-over-UDP + ResolverUDP = ResolverNetwork("udp") + + // ResolverForeign is a resolver that is not managed by + // this package. We can wrap it, but we don't be able to + // observe any event but Lookup{Host,HTTPSvc} + ResolverForeign = ResolverNetwork("foreign") +) + // ResolverInfo contains info about a DNS resolver. type ResolverInfo struct { // Network is the resolver's network (e.g., "doh", "udp") - Network string + Network ResolverNetwork // Address is the address (e.g., "1.1.1.1:53", "https://1.1.1.1/dns-query") Address string + + // ForeignResolver is only used when Network's + // value equals the ResolverForeign constant. + ForeignResolver Resolver } // LookupURLHostParallel performs an LookupHost-like operation for each @@ -576,10 +629,12 @@ func (mx *Measurer) lookupHostWithResolverInfo( ctx context.Context, reso *ResolverInfo, URL *url.URL, output chan<- *DNSMeasurement) { switch reso.Network { - case "system": + case ResolverSystem: output <- mx.LookupHostSystem(ctx, URL.Hostname()) - case "udp": + case ResolverUDP: output <- mx.LookupHostUDP(ctx, URL.Hostname(), reso.Address) + case ResolverForeign: + output <- mx.lookupHostForeign(ctx, URL.Hostname(), reso.ForeignResolver) default: return } @@ -589,8 +644,10 @@ func (mx *Measurer) lookupHostWithResolverInfo( return } switch reso.Network { - case "udp": + case ResolverUDP: output <- mx.LookupHTTPSSvcUDP(ctx, URL.Hostname(), reso.Address) + case ResolverForeign: + output <- mx.lookupHTTPSSvcUDPForeign(ctx, URL.Hostname(), reso.ForeignResolver) } } @@ -628,6 +685,8 @@ func (mx *Measurer) LookupHostParallel( // // - URL is the URL to measure // +// - header contains the HTTP headers for the request +// // - cookies contains the cookies we should use for measuring // this URL and possibly future redirections. // @@ -640,7 +699,8 @@ func (mx *Measurer) LookupHostParallel( // redirect properly without cookies. This has been // documented at https://github.com/ooni/probe/issues/1727. func (mx *Measurer) MeasureURL( - ctx context.Context, URL string, cookies http.CookieJar) (*URLMeasurement, error) { + ctx context.Context, URL string, headers http.Header, + cookies http.CookieJar) (*URLMeasurement, error) { mx.Logger.Infof("MeasureURL url=%s", URL) m := &URLMeasurement{URL: URL} begin := time.Now() @@ -657,7 +717,7 @@ func (mx *Measurer) MeasureURL( m.DNS = append(m.DNS, dns) } m.DNSRuntime = time.Since(dnsBegin) - epnts, err := AllHTTPEndpointsForURL(parsed, m.DNS...) + epnts, err := AllHTTPEndpointsForURL(parsed, headers, m.DNS...) if err != nil { return nil, err } @@ -698,11 +758,11 @@ func (r *redirectionQueue) redirectionsCount() int { // MeasureURLAndFollowRedirections is like MeasureURL except // that it _also_ follows all the HTTP redirections. func (mx *Measurer) MeasureHTTPURLAndFollowRedirections(ctx context.Context, - URL string, cookies http.CookieJar) <-chan *URLMeasurement { + URL string, headers http.Header, cookies http.CookieJar) <-chan *URLMeasurement { out := make(chan *URLMeasurement) go func() { defer close(out) - meas, err := mx.MeasureURL(ctx, URL, cookies) + meas, err := mx.MeasureURL(ctx, URL, headers, cookies) if err != nil { mx.Logger.Warnf("mx.MeasureURL failed: %s", err.Error()) return @@ -712,7 +772,7 @@ func (mx *Measurer) MeasureHTTPURLAndFollowRedirections(ctx context.Context, const maxRedirects = 7 for !rq.empty() && rq.redirectionsCount() < maxRedirects { URL = rq.popleft() - meas, err = mx.MeasureURL(ctx, URL, cookies) + meas, err = mx.MeasureURL(ctx, URL, headers, cookies) if err != nil { mx.Logger.Warnf("mx.MeasureURL failed: %s", err.Error()) return diff --git a/internal/measurex/th.go b/internal/measurex/th.go index f12305410f..0ae7d225b5 100644 --- a/internal/measurex/th.go +++ b/internal/measurex/th.go @@ -220,17 +220,10 @@ func (c *THClient) Run(ctx context.Context, URL string) (*THServerResponse, erro for m := range mx.LookupURLHostParallel(ctx, parsed, c.DNServers...) { dns = append(dns, m) } - httpEndpoints, err := AllHTTPEndpointsForURL(parsed, dns...) + endpoints, err := AllEndpointsForURL(parsed, dns...) if err != nil { return nil, err } - var endpoints []*Endpoint - for _, epnt := range httpEndpoints { - endpoints = append(endpoints, &Endpoint{ - Network: epnt.Network, - Address: epnt.Address, - }) - } return (&THClientCall{ Endpoints: endpoints, HTTPClient: c.HTTPClient, @@ -665,7 +658,7 @@ func (h *THHandler) dohQuery(ctx context.Context, URL *url.URL) ( default: // nothing } - epnts, _ = AllHTTPEndpointsForURL(URL) // nil on failure + epnts, _ = AllHTTPEndpointsForURL(URL, NewHTTPRequestHeaderForMeasuring()) // nil on failure return } From 02bb05a7a683efc7bde2afe73026f71c86eaae28 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 23 Sep 2021 01:20:29 +0200 Subject: [PATCH 33/53] continue preparing for reducing th code complexity --- internal/measurex/measurer.go | 144 ++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 9cd61e6720..35cd556490 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -16,6 +16,7 @@ import ( "net" "net/http" "net/url" + "strings" "time" "github.com/apex/log" @@ -35,6 +36,11 @@ type Measurer struct { // Logger is the MANDATORY logger to use. Logger Logger + // MeasureURLHelper is the OPTIONAL test helper to use when + // we're measuring using the MeasureURL function. If this field + // is not set, we'll not be using any helper. + MeasureURLHelper MeasureURLHelper + // Resolvers is the MANDATORY list of resolvers. Resolvers []*ResolverInfo @@ -671,6 +677,47 @@ func (mx *Measurer) LookupHostParallel( return out } +// MeasureURLHelper is a Test Helper that discovers additional +// endpoints after MeasureURL has finished discovering endpoints +// via the usual DNS mechanism. The MeasureURLHelper: +// +// - is used by experiments to call a real test helper, i.e., +// a remote service providing extra endpoints +// +// - is used by test helpers to augment the set of endpoints +// discovered so far with the ones provided by a client. +type MeasureURLHelper interface { + // LookupExtraHTTPEndpoints searches for extra HTTP endpoints + // suitable for the given URL we're measuring. + // + // Arguments: + // + // - ctx is the context for timeout/cancellation/deadline + // + // - URL is the URL we're currently measuring + // + // - headers contains the HTTP headers we wish to use + // + // - epnts is the current list of endpoints + // + // This function SHOULD return a NEW list of extra endpoints + // it discovered and SHOULD NOT merge the epnts endpoints with + // extra endpoints it discovered. Therefore: + // + // - on any kind of error it MUST return nil, err + // + // - on success it MUST return the NEW endpoints it discovered + // + // It is the caller's responsibility to merge the NEW list of + // endpoints with the ones it passed as argument. + // + // It is also the caller's responsibility to ENSURE that the + // newly returned endpoints only use the few headers that our + // test helper protocol allows one to set. + LookupExtraHTTPEndpoints(ctx context.Context, URL *url.URL, + headers http.Header, epnts ...*HTTPEndpoint) ([]*HTTPEndpoint, error) +} + // MeasureURL measures an HTTP or HTTPS URL. The DNS resolvers // and the Test Helpers we use in this measurement are the ones // configured into the database. The default is to use the system @@ -721,15 +768,112 @@ func (mx *Measurer) MeasureURL( if err != nil { return nil, err } + if mx.MeasureURLHelper != nil { + thBegin := time.Now() + extraEpnts, _ := mx.MeasureURLHelper.LookupExtraHTTPEndpoints( + ctx, parsed, headers, epnts...) + epnts = removeDuplicateHTTPEndpoints(append(epnts, extraEpnts...)...) + m.THRuntime = time.Since(thBegin) + mx.enforceAllowedHeadersOnly(epnts) + } epntRuntime := time.Now() for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, epnts...) { m.Endpoints = append(m.Endpoints, epnt) } + mx.maybeQUICFollowUp(ctx, m, cookies, epnts...) m.EpntsRuntime = time.Since(epntRuntime) m.fillRedirects() return m, nil } +// maybeQUICFollowUp checks whether we need to use Alt-Svc to check +// for QUIC. We query for HTTPSSvc but currently only Cloudflare +// implements this proposed standard. So, this function is +// where we take care of all the other servers implementing QUIC. +func (mx *Measurer) maybeQUICFollowUp(ctx context.Context, + m *URLMeasurement, cookies http.CookieJar, epnts ...*HTTPEndpoint) { + altsvc := []string{} + for _, epnt := range m.Endpoints { + // Check whether we have a QUIC handshake. If so, then + // HTTPSSvc worked and we can stop here. + if epnt.QUICHandshake != nil { + return + } + for _, rtrip := range epnt.HTTPRoundTrip { + if v := rtrip.ResponseHeader.Get("alt-svc"); v != "" { + altsvc = append(altsvc, v) + } + } + } + // syntax: + // + // Alt-Svc: clear + // Alt-Svc: =; ma= + // Alt-Svc: =; ma=; persist=1 + // + // multiple entries may be separated by comma. + // + // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Alt-Svc + for _, header := range altsvc { + entries := strings.Split(header, ",") + if len(entries) < 1 { + continue + } + for _, entry := range entries { + parts := strings.Split(entry, ";") + if len(parts) < 1 { + continue + } + if parts[0] == "h3=\":443\"" { + mx.doQUICFollowUp(ctx, m, cookies, epnts...) + return + } + } + } +} + +// doQUICFollowUp runs when we know there's QUIC support via Alt-Svc. +func (mx *Measurer) doQUICFollowUp(ctx context.Context, + m *URLMeasurement, cookies http.CookieJar, epnts ...*HTTPEndpoint) { + quicEpnts := []*HTTPEndpoint{} + // do not mutate the existing list rather create a new one + for _, epnt := range epnts { + quicEpnts = append(quicEpnts, &HTTPEndpoint{ + Domain: epnt.Domain, + Network: NetworkQUIC, + Address: epnt.Address, + SNI: epnt.SNI, + ALPN: []string{"h3"}, + URL: epnt.URL, + Header: epnt.Header, + }) + } + for mquic := range mx.HTTPEndpointGetParallel(ctx, cookies, quicEpnts...) { + m.Endpoints = append(m.Endpoints, mquic) + } +} + +func (mx *Measurer) enforceAllowedHeadersOnly(epnts []*HTTPEndpoint) { + for _, epnt := range epnts { + epnt.Header = mx.keepOnlyAllowedHeaders(epnt.Header) + } +} + +func (mx *Measurer) keepOnlyAllowedHeaders(header http.Header) (out http.Header) { + out = http.Header{} + for k, vv := range header { + switch strings.ToLower(k) { + case "accept", "accept-language", "cookie", "user-agent": + for _, v := range vv { + out.Add(k, v) + } + default: + // ignore all the other headers + } + } + return +} + // redirectionQueue is the type we use to manage the redirection // queue and to follow a reasonable number of redirects. type redirectionQueue struct { From 80681e58d612cfbade0647b914ee1d9ac7e3f1c4 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 23 Sep 2021 01:24:54 +0200 Subject: [PATCH 34/53] simplify archival by making dnsx data format equal to archival --- internal/measurex/archival.go | 32 -------------------------------- internal/measurex/dnsx.go | 28 +++++++++++----------------- 2 files changed, 11 insertions(+), 49 deletions(-) diff --git a/internal/measurex/archival.go b/internal/measurex/archival.go index 0866c3295a..4b53498f63 100644 --- a/internal/measurex/archival.go +++ b/internal/measurex/archival.go @@ -15,38 +15,6 @@ import ( // help we use routines from this file to serialize correctly. // -// -// DNSRoundTrip -// - -// TODO(bassosimone): this is a candidate to not be in archival but -// rather to be what we actually save into the WritableDB. - -// ArchivalDNSRoundTrip is the archival fromat for DNSRoundTripEvent. -type ArchivalDNSRoundTrip struct { - Network string `json:"engine"` - Address string `json:"resolver_address"` - Query *ArchivalBinaryData `json:"raw_query"` - Started float64 `json:"started"` - Finished float64 `json:"t"` - Error error `json:"failure"` - Reply *ArchivalBinaryData `json:"raw_reply"` -} - -// NewArchivalDNSRoundTrip converts a DNSRoundTripEvent -// to the corresponding archival format. -func NewArchivalDNSRoundTrip(in *DNSRoundTripEvent) (out *ArchivalDNSRoundTrip) { - return &ArchivalDNSRoundTrip{ - Network: in.Network, - Address: in.Address, - Query: NewArchivalBinaryData(in.Query), - Started: in.Started, - Finished: in.Finished, - Error: in.Error, - Reply: NewArchivalBinaryData(in.Reply), - } -} - // // BinaryData // diff --git a/internal/measurex/dnsx.go b/internal/measurex/dnsx.go index cbe3ed3d6a..67a113f01c 100644 --- a/internal/measurex/dnsx.go +++ b/internal/measurex/dnsx.go @@ -8,7 +8,6 @@ package measurex import ( "context" - "encoding/json" "time" "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" @@ -33,20 +32,15 @@ type dnsxRoundTripperDB struct { // DNSRoundTripEvent contains the result of a DNS round trip. type DNSRoundTripEvent struct { - Network string - Address string - Query []byte - Started float64 - Finished float64 - Error error - Reply []byte -} - -// MarshalJSON marshals a DNSRoundTripEvent to the archival -// format that is similar to df-002-dnst. -func (ev *DNSRoundTripEvent) MarshalJSON() ([]byte, error) { - archival := NewArchivalDNSRoundTrip(ev) - return json.Marshal(archival) + // This data structure is not in df-002-dns but the names and + // semantics try to be consistent with such a spec. + Network string `json:"engine"` + Address string `json:"resolver_address"` + Query *ArchivalBinaryData `json:"raw_query"` + Started float64 `json:"started"` + Finished float64 `json:"t"` + Error error `json:"failure"` + Reply *ArchivalBinaryData `json:"raw_reply"` } func (txp *dnsxRoundTripperDB) RoundTrip(ctx context.Context, query []byte) ([]byte, error) { @@ -56,11 +50,11 @@ func (txp *dnsxRoundTripperDB) RoundTrip(ctx context.Context, query []byte) ([]b txp.db.InsertIntoDNSRoundTrip(&DNSRoundTripEvent{ Network: txp.RoundTripper.Network(), Address: txp.RoundTripper.Address(), - Query: query, + Query: NewArchivalBinaryData(query), Started: started, Finished: finished, Error: err, - Reply: reply, + Reply: NewArchivalBinaryData(reply), }) return reply, err } From ad0a3cf4ce02b12aa1900aaa8e5cd1d7c2809ca4 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 23 Sep 2021 02:17:29 +0200 Subject: [PATCH 35/53] now also http uses the ooni data format --- internal/measurex/archival.go | 82 ++++++++--------------------------- internal/measurex/http.go | 70 +++++++++++++++++++----------- internal/measurex/measurer.go | 9 +++- internal/measurex/th.go | 28 ++++++------ 4 files changed, 85 insertions(+), 104 deletions(-) diff --git a/internal/measurex/archival.go b/internal/measurex/archival.go index 4b53498f63..0e029d1c5d 100644 --- a/internal/measurex/archival.go +++ b/internal/measurex/archival.go @@ -1,9 +1,9 @@ package measurex import ( + "log" "net/http" "strings" - "unicode/utf8" ) // @@ -41,58 +41,28 @@ func NewArchivalBinaryData(data []byte) (out *ArchivalBinaryData) { // HTTPRoundTrip // -// ArchivalHTTPRoundTrip is the archival format for HTTPRoundTripEvent. -type ArchivalHTTPRoundTrip struct { - // JSON names following the df-001-httpt data format. - Error error `json:"failure"` - Request *ArchivalHTTPRequest `json:"request"` - Response *ArchivalHTTPResponse `json:"response"` - Finished float64 `json:"t"` - Started float64 `json:"started"` - - // Names not in the specification - Oddity Oddity `json:"oddity"` -} - -// ArchivalHTTPRequest is the archival representation of a request. -type ArchivalHTTPRequest struct { - Method string `json:"method"` - URL string `json:"url"` - HeadersList [][]string `json:"headers_list"` -} - -// ArchivalHTTPResponse is the archival representation of a response. -type ArchivalHTTPResponse struct { - Code int64 `json:"code"` - HeadersList [][]string `json:"headers_list"` - Body interface{} `json:"body"` - BodyIsTruncated bool `json:"body_is_truncated"` -} - -// NewArchivalHTTPRoundTrip converts an HTTPRoundTripEvent -// to the corresponding archival format. -func NewArchivalHTTPRoundTrip(in *HTTPRoundTripEvent) (out *ArchivalHTTPRoundTrip) { - return &ArchivalHTTPRoundTrip{ - Error: in.Error, - Request: &ArchivalHTTPRequest{ - Method: in.RequestMethod, - URL: in.RequestURL.String(), - HeadersList: NewArchivalHeadersList(in.RequestHeader), - }, - Response: &ArchivalHTTPResponse{ - Code: int64(in.ResponseStatus), - HeadersList: NewArchivalHeadersList(in.ResponseHeader), - Body: NewArchivalHTTPBody(in.ResponseBodySnapshot), - BodyIsTruncated: int64(len(in.ResponseBodySnapshot)) >= in.MaxBodySnapshotSize, - }, - Finished: in.Finished, - Started: in.Started, - Oddity: in.Oddity, +// ArchivalHeadersList is a list of HTTP headers. +type ArchivalHeadersList [][]string + +// Get searches for the first header with the named key +// and returns it. If not found, returns an empty string. +func (headers ArchivalHeadersList) Get(key string) string { + key = strings.ToLower(key) + for _, entry := range headers { + if len(entry) != 2 { + log.Printf("headers: malformed header: %+v", entry) + continue + } + headerKey, headerValue := entry[0], entry[1] + if strings.ToLower(headerKey) == key { + return headerValue + } } + return "" } // NewArchivalHeadersList builds a new HeadersList from http.Header. -func NewArchivalHeadersList(in http.Header) (out [][]string) { +func NewArchivalHeadersList(in http.Header) (out ArchivalHeadersList) { for k, vv := range in { for _, v := range vv { out = append(out, []string{k, v}) @@ -101,20 +71,6 @@ func NewArchivalHeadersList(in http.Header) (out [][]string) { return } -// NewArchivalHTTPBody builds a new HTTP body for archival from the body. -func NewArchivalHTTPBody(body []byte) (out interface{}) { - if body != nil { - if utf8.Valid(body) { - return string(body) - } - out = &ArchivalBinaryData{ - Data: body, - Format: "base64", - } - } - return -} - // // TLSCerts // diff --git a/internal/measurex/http.go b/internal/measurex/http.go index 0c72baa3f1..6830b3eacb 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -17,13 +17,13 @@ import ( "bytes" "context" "crypto/tls" - "encoding/json" "errors" "io" "net/http" "net/http/cookiejar" "net/url" "time" + "unicode/utf8" "github.com/lucas-clemente/quic-go" "github.com/ooni/probe-cli/v3/internal/engine/httpheader" @@ -71,26 +71,38 @@ type httpTransportDB struct { db WritableDB } +// HTTPRequest is the HTTP request. +type HTTPRequest struct { + // Names consistent with df-001-http.md + Method string `json:"method"` + URL string `json:"url"` + HeadersList ArchivalHeadersList `json:"headers_list"` +} + +// HTTPResponse is the HTTP response. +type HTTPResponse struct { + // Names consistent with df-001-http.md + Code int64 `json:"code"` + HeadersList ArchivalHeadersList `json:"headers_list"` + Body *ArchivalBinaryData `json:"body"` + BodyIsTruncated bool `json:"body_is_truncated"` + + // Fields not part of the spec + BodyLength int64 `json:"x_body_length"` + BodyIsUTF8 bool `json:"x_body_is_utf8"` +} + // HTTPRoundTripEvent contains information about an HTTP round trip. type HTTPRoundTripEvent struct { - RequestMethod string - RequestURL *url.URL - RequestHeader http.Header - Started float64 - Finished float64 - Error error - Oddity Oddity - ResponseStatus int - ResponseHeader http.Header - ResponseBodySnapshot []byte - MaxBodySnapshotSize int64 -} + // JSON names following the df-001-httpt data format. + Error error `json:"failure"` + Request *HTTPRequest `json:"request"` + Response *HTTPResponse `json:"response"` + Finished float64 `json:"t"` + Started float64 `json:"started"` -// MarshalJSON marshals a HTTPRoundTripEvent to the archival -// format that is similar to df-001-httpt. -func (ev *HTTPRoundTripEvent) MarshalJSON() ([]byte, error) { - archival := NewArchivalHTTPRoundTrip(ev) - return json.Marshal(archival) + // Names not in the specification + Oddity Oddity `json:"oddity"` } // We only read a small snapshot of the body to keep measurements @@ -102,11 +114,12 @@ func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) started := time.Since(txp.begin).Seconds() resp, err := txp.HTTPTransport.RoundTrip(req) rt := &HTTPRoundTripEvent{ - RequestMethod: req.Method, - RequestURL: req.URL, - RequestHeader: req.Header, - Started: started, - MaxBodySnapshotSize: maxBodySnapshot, + Request: &HTTPRequest{ + Method: req.Method, + URL: req.URL.String(), + HeadersList: NewArchivalHeadersList(req.Header), + }, + Started: started, } if err != nil { rt.Finished = time.Since(txp.begin).Seconds() @@ -124,8 +137,10 @@ func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) case resp.StatusCode >= 400: rt.Oddity = OddityStatusOther } - rt.ResponseStatus = resp.StatusCode - rt.ResponseHeader = resp.Header + rt.Response = &HTTPResponse{ + Code: int64(resp.StatusCode), + HeadersList: NewArchivalHeadersList(resp.Header), + } r := io.LimitReader(resp.Body, maxBodySnapshot) body, err := iox.ReadAllContext(req.Context(), r) if errors.Is(err, io.EOF) && resp.Close { @@ -141,7 +156,10 @@ func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) Reader: io.MultiReader(bytes.NewReader(body), resp.Body), Closer: resp.Body, } - rt.ResponseBodySnapshot = body + rt.Response.Body = NewArchivalBinaryData(body) + rt.Response.BodyLength = int64(len(body)) + rt.Response.BodyIsTruncated = len(body) >= maxBodySnapshot + rt.Response.BodyIsUTF8 = utf8.Valid(body) rt.Finished = time.Since(txp.begin).Seconds() txp.db.InsertIntoHTTPRoundTrip(rt) return resp, nil diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 35cd556490..ad848a926d 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -13,6 +13,7 @@ import ( "context" "crypto/tls" "errors" + stdlog "log" "net" "net/http" "net/url" @@ -799,8 +800,12 @@ func (mx *Measurer) maybeQUICFollowUp(ctx context.Context, if epnt.QUICHandshake != nil { return } - for _, rtrip := range epnt.HTTPRoundTrip { - if v := rtrip.ResponseHeader.Get("alt-svc"); v != "" { + for idx, rtrip := range epnt.HTTPRoundTrip { + if rtrip.Response == nil { + stdlog.Printf("malformed HTTPRoundTrip@%d: %+v", idx, rtrip) + continue + } + if v := rtrip.Response.HeadersList.Get("alt-svc"); v != "" { altsvc = append(altsvc, v) } } diff --git a/internal/measurex/th.go b/internal/measurex/th.go index 0ae7d225b5..986bc6d0ea 100644 --- a/internal/measurex/th.go +++ b/internal/measurex/th.go @@ -591,19 +591,21 @@ func (h *THHandler) newQUICHandshakeList(in []*QUICHandshakeEvent) (out []*THHan } func (h *THHandler) newHTTPRoundTripList(in []*HTTPRoundTripEvent) (out []*THHTTPRoundTripEvent) { - for _, e := range in { - out = append(out, &THHTTPRoundTripEvent{ - RequestMethod: e.RequestMethod, - RequestURL: e.RequestURL.String(), - RequestHeader: e.RequestHeader, - Error: h.errorToFailure(e.Error), - Oddity: e.Oddity, - ResponseStatus: int64(e.ResponseStatus), - ResponseHeader: e.ResponseHeader, - ResponseBodySnapshotSize: int64(len(e.ResponseBodySnapshot)), - MaxBodySnapshotSize: e.MaxBodySnapshotSize, - }) - } + /* + for _, e := range in { + out = append(out, &THHTTPRoundTripEvent{ + RequestMethod: e.RequestMethod, + RequestURL: e.RequestURL.String(), + RequestHeader: e.RequestHeader, + Error: h.errorToFailure(e.Error), + Oddity: e.Oddity, + ResponseStatus: int64(e.ResponseStatus), + ResponseHeader: e.ResponseHeader, + ResponseBodySnapshotSize: int64(len(e.ResponseBodySnapshot)), + MaxBodySnapshotSize: e.MaxBodySnapshotSize, + }) + } + */ return } From cbcfb28559d38b04184fd4aa2b7a93a7f1a1918c Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 23 Sep 2021 02:54:41 +0200 Subject: [PATCH 36/53] finish converting to ooni data format --- internal/measurex/archival.go | 131 ------------- internal/measurex/db.go | 16 +- internal/measurex/measurement.go | 21 +-- internal/measurex/quic.go | 2 +- internal/measurex/resolver.go | 172 ++++++++++++------ internal/measurex/th.go | 10 +- internal/netxlite/errorsx/classify.go | 3 + internal/netxlite/errorsx/errno.go | 4 +- internal/netxlite/errorsx/errno_test.go | 2 +- internal/netxlite/errorsx/errno_unix.go | 2 +- internal/netxlite/errorsx/errno_windows.go | 2 +- .../errorsx/internal/generrno/main.go | 1 + internal/netxlite/http.go | 10 - internal/netxlite/quic.go | 10 + 14 files changed, 154 insertions(+), 232 deletions(-) diff --git a/internal/measurex/archival.go b/internal/measurex/archival.go index 0e029d1c5d..37ef309e5a 100644 --- a/internal/measurex/archival.go +++ b/internal/measurex/archival.go @@ -86,134 +86,3 @@ func NewArchivalTLSCerts(in [][]byte) (out []*ArchivalBinaryData) { } return } - -// -// DNS LookupHost and LookupHTTPSSvc -// - -// ArchivalDNSLookup is the archival format for DNS. -type ArchivalDNSLookup struct { - // JSON names compatible with df-002-dnst's spec - Answers []*ArchivalDNSAnswer `json:"answers"` - Network string `json:"engine"` - Error error `json:"failure"` - Domain string `json:"hostname"` - QueryType string `json:"query_type"` - Address string `json:"resolver_address"` - Finished float64 `json:"t"` - - // Names not part of the spec. - Started float64 `json:"started"` - Oddity Oddity `json:"oddity"` -} - -// ArchivalDNSAnswer is an answer inside ArchivalDNS. -type ArchivalDNSAnswer struct { - // JSON names compatible with df-002-dnst's spec - Type string `json:"answer_type"` - IPv4 string `json:"ipv4,omitempty"` - IPv6 string `json:"ivp6,omitempty"` - - // Names not part of the spec. - ALPN string `json:"alpn,omitempty"` -} - -// NewArchivalLookupHostList converts a []*LookupHostEvent -// to the corresponding archival format. -func NewArchivalLookupHostList(in ...*LookupHostEvent) (out []*ArchivalDNSLookup) { - for _, ev := range in { - out = append(out, NewArchivalLookupHost(ev, "A")) - out = append(out, NewArchivalLookupHost(ev, "AAAA")) - } - return -} - -// NewArchivalLookupHost generates an ArchivalDNS entry for the given -// LookupHost event and for the given query type. (OONI's DNS data -// format splits A and AAAA queries, so we need to run this func twice.) -func NewArchivalLookupHost(in *LookupHostEvent, qtype string) (out *ArchivalDNSLookup) { - return &ArchivalDNSLookup{ - Answers: NewArchivalDNSAnswersLookupHost(in.Addrs, qtype), - Network: in.Network, - Error: in.Error, - Domain: in.Domain, - QueryType: qtype, - Address: in.Address, - Finished: in.Finished, - Started: in.Started, - Oddity: in.Oddity, - } -} - -// NewArchivalDNSAnswersLookupHost builds the ArchivalDNSAnswer -// vector for a LookupHost operation and a given query type. -func NewArchivalDNSAnswersLookupHost(addrs []string, qtype string) (out []*ArchivalDNSAnswer) { - for _, addr := range addrs { - switch qtype { - case "A": - if !strings.Contains(addr, ":") { - out = append(out, &ArchivalDNSAnswer{ - Type: qtype, - IPv4: addr, - }) - } - case "AAAA": - if strings.Contains(addr, ":") { - out = append(out, &ArchivalDNSAnswer{ - Type: qtype, - IPv6: addr, - }) - } - } - } - return -} - -// NewArchivalLookupHTTPSSvc generates an ArchivalDNS entry for the given -// LookupHTTPSSvc event. -func NewArchivalLookupHTTPSSvc(in *LookupHTTPSSvcEvent) (out *ArchivalDNSLookup) { - return &ArchivalDNSLookup{ - Answers: NewArchivalDNSAnswersLookupHTTPSSvc(in), - Network: in.Network, - Error: in.Error, - Domain: in.Domain, - QueryType: "HTTPS", - Address: in.Address, - Finished: in.Finished, - Started: in.Started, - Oddity: in.Oddity, - } -} - -// NewArchivalLookupHTTPSSvcList converts a []*LookupHTTPSSvcEvent -// to the corresponding archival format. -func NewArchivalLookupHTTPSSvcList(in ...*LookupHTTPSSvcEvent) (out []*ArchivalDNSLookup) { - for _, ev := range in { - out = append(out, NewArchivalLookupHTTPSSvc(ev)) - } - return -} - -// NewArchivalDNSAnswersLookupHTTPSSvc builds the ArchivalDNSAnswer -// vector for a LookupHTTPSSvc operation. -func NewArchivalDNSAnswersLookupHTTPSSvc(in *LookupHTTPSSvcEvent) (out []*ArchivalDNSAnswer) { - for _, addr := range in.IPv4 { - out = append(out, &ArchivalDNSAnswer{ - Type: "A", - IPv4: addr, - }) - } - for _, addr := range in.IPv6 { - out = append(out, &ArchivalDNSAnswer{ - Type: "AAAA", - IPv6: addr, - }) - } - for _, alpn := range in.ALPN { - out = append(out, &ArchivalDNSAnswer{ - Type: "ALPN", - ALPN: alpn, - }) - } - return -} diff --git a/internal/measurex/db.go b/internal/measurex/db.go index 953ba82466..6cc0bbd9a0 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -32,10 +32,10 @@ type WritableDB interface { InsertIntoTLSHandshake(ev *TLSHandshakeEvent) // InsertIntoLookupHost saves a lookup host event. - InsertIntoLookupHost(ev *LookupHostEvent) + InsertIntoLookupHost(ev *DNSLookupEvent) // InsertIntoLookupHTTPSvc saves an HTTPSvc lookup event. - InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) + InsertIntoLookupHTTPSSvc(ev *DNSLookupEvent) // InsertIntoDNSRoundTrip saves a DNS round trip event. InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) @@ -57,8 +57,8 @@ type MeasurementDB struct { readWriteTable []*NetworkEvent closeTable []*NetworkEvent tlsHandshakeTable []*TLSHandshakeEvent - lookupHostTable []*LookupHostEvent - lookupHTTPSvcTable []*LookupHTTPSSvcEvent + lookupHostTable []*DNSLookupEvent + lookupHTTPSvcTable []*DNSLookupEvent dnsRoundTripTable []*DNSRoundTripEvent httpRoundTripTable []*HTTPRoundTripEvent httpRedirectTable []*HTTPRedirectEvent @@ -123,27 +123,27 @@ func (db *MeasurementDB) selectAllFromTLSHandshake() (out []*TLSHandshakeEvent) } // InsertIntoLookupHost implements EventDB.InsertIntoLookupHost. -func (db *MeasurementDB) InsertIntoLookupHost(ev *LookupHostEvent) { +func (db *MeasurementDB) InsertIntoLookupHost(ev *DNSLookupEvent) { db.mu.Lock() db.lookupHostTable = append(db.lookupHostTable, ev) db.mu.Unlock() } // selectAllFromLookupHost returns all the lookup host events. -func (db *MeasurementDB) selectAllFromLookupHost() (out []*LookupHostEvent) { +func (db *MeasurementDB) selectAllFromLookupHost() (out []*DNSLookupEvent) { out = append(out, db.lookupHostTable...) return } // InsertIntoHTTPSSvc implements EventDB.InsertIntoHTTPSSvc -func (db *MeasurementDB) InsertIntoLookupHTTPSSvc(ev *LookupHTTPSSvcEvent) { +func (db *MeasurementDB) InsertIntoLookupHTTPSSvc(ev *DNSLookupEvent) { db.mu.Lock() db.lookupHTTPSvcTable = append(db.lookupHTTPSvcTable, ev) db.mu.Unlock() } // selectAllFromLookupHTTPSSvc returns all HTTPSSvc lookup events. -func (db *MeasurementDB) selectAllFromLookupHTTPSSvc() (out []*LookupHTTPSSvcEvent) { +func (db *MeasurementDB) selectAllFromLookupHTTPSSvc() (out []*DNSLookupEvent) { out = append(out, db.lookupHTTPSvcTable...) return } diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go index 99e037f020..0c230582e8 100644 --- a/internal/measurex/measurement.go +++ b/internal/measurex/measurement.go @@ -79,10 +79,10 @@ type Measurement struct { QUICHandshake []*QUICHandshakeEvent `json:"quic_handshake,omitempty"` // LookupHost contains all the host lookups. - LookupHost []*LookupHostEvent `json:"lookup_host,omitempty"` + LookupHost []*DNSLookupEvent `json:"lookup_host,omitempty"` // LookupHTTPSSvc contains all the HTTPSSvc lookups. - LookupHTTPSSvc []*LookupHTTPSSvcEvent `json:"lookup_httpssvc,omitempty"` + LookupHTTPSSvc []*DNSLookupEvent `json:"lookup_httpssvc,omitempty"` // DNSRoundTrip contains all the DNS round trips. DNSRoundTrip []*DNSRoundTripEvent `json:"dns_round_trip,omitempty"` @@ -133,7 +133,7 @@ func (m *DNSMeasurement) allTCPEndpoints(domain, port string) (out []*Endpoint) if domain != entry.Domain { continue } - for _, addr := range entry.Addrs { + for _, addr := range entry.Addrs() { if net.ParseIP(addr) == nil { continue // skip CNAME entries courtesy the WCTH } @@ -148,11 +148,10 @@ func (m *DNSMeasurement) allQUICEndpoints(domain, port string) (out []*Endpoint) if domain != entry.Domain { continue } - if !m.supportsHTTP3(entry) { + if !entry.SupportsHTTP3() { continue } - addrs := append([]string{}, entry.IPv4...) - for _, addr := range append(addrs, entry.IPv6...) { + for _, addr := range entry.Addrs() { out = append(out, m.newEndpoint(addr, port, NetworkQUIC)) } } @@ -163,16 +162,6 @@ func (m *DNSMeasurement) newEndpoint(addr, port string, network EndpointNetwork) return &Endpoint{Network: network, Address: net.JoinHostPort(addr, port)} } -func (m *DNSMeasurement) supportsHTTP3(entry *LookupHTTPSSvcEvent) bool { - for _, alpn := range entry.ALPN { - switch alpn { - case "h3": - return true - } - } - return false -} - // allHTTPEndpointsForURL returns all the HTTPEndpoints matching // a specific URL's domain inside this measurement. // diff --git a/internal/measurex/quic.go b/internal/measurex/quic.go index 0728828269..79c66d7b7d 100644 --- a/internal/measurex/quic.go +++ b/internal/measurex/quic.go @@ -87,7 +87,7 @@ func (c *udpLikeConnDB) Close() error { started := time.Since(c.begin).Seconds() err := c.UDPLikeConn.Close() finished := time.Since(c.begin).Seconds() - c.db.InsertIntoReadWrite(&NetworkEvent{ + c.db.InsertIntoClose(&NetworkEvent{ Operation: "close", Network: "quic", RemoteAddr: "", diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index dec80b50ad..affefb0616 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -8,7 +8,8 @@ package measurex import ( "context" - "encoding/json" + "net" + "strings" "time" "github.com/ooni/probe-cli/v3/internal/netxlite" @@ -60,42 +61,102 @@ type resolverDB struct { db WritableDB } -// LookupHostEvent contains the result of a host lookup. -type LookupHostEvent struct { - Network string - Address string - Domain string - Started float64 - Finished float64 - Error error - Oddity Oddity - Addrs []string +// DNSLookupAnswer is a DNS lookup answer. +type DNSLookupAnswer struct { + // JSON names compatible with df-002-dnst's spec + Type string `json:"answer_type"` + IPv4 string `json:"ipv4,omitempty"` + IPv6 string `json:"ivp6,omitempty"` + + // Names not part of the spec. + ALPN string `json:"alpn,omitempty"` } -// MarshalJSON marshals a LookupHostEvent to the archival -// format compatible with df-002-dnst. -func (ev *LookupHostEvent) MarshalJSON() ([]byte, error) { - archival := NewArchivalLookupHostList(ev) - return json.Marshal(archival) +// DNSLookupEvent contains the results of a DNS lookup. +type DNSLookupEvent struct { + // fields inside df-002-dnst + Answers []DNSLookupAnswer `json:"answers"` + Network string `json:"engine"` + Error error `json:"failure"` + Domain string `json:"hostname"` + QueryType string `json:"query_type"` + Address string `json:"resolver_address"` + Finished float64 `json:"t"` + + // Names not part of the spec. + Started float64 `json:"started"` + Oddity Oddity `json:"oddity"` +} + +// SupportsHTTP3 returns true if this query is for HTTPS and +// the answer contains an ALPN for "h3" +func (ev *DNSLookupEvent) SupportsHTTP3() bool { + if ev.QueryType != "HTTPS" { + return false + } + for _, ans := range ev.Answers { + switch ans.Type { + case "ALPN": + if ans.ALPN == "h3" { + return true + } + } + } + return false +} + +// Addrs returns all the IPv4/IPv6 addresses +func (ev *DNSLookupEvent) Addrs() (out []string) { + for _, ans := range ev.Answers { + switch ans.Type { + case "A": + if net.ParseIP(ans.IPv4) != nil { + out = append(out, ans.IPv4) + } + case "AAAA": + if net.ParseIP(ans.IPv6) != nil { + out = append(out, ans.IPv6) + } + } + } + return } func (r *resolverDB) LookupHost(ctx context.Context, domain string) ([]string, error) { started := time.Since(r.begin).Seconds() addrs, err := r.Resolver.LookupHost(ctx, domain) finished := time.Since(r.begin).Seconds() - r.db.InsertIntoLookupHost(&LookupHostEvent{ - Network: r.Resolver.Network(), - Address: r.Resolver.Address(), - Domain: domain, - Started: started, - Finished: finished, - Error: err, - Oddity: r.computeOddityLookupHost(addrs, err), - Addrs: addrs, - }) + for _, qtype := range []string{"A", "AAAA"} { + ev := &DNSLookupEvent{ + Answers: r.computeAnswers(addrs, qtype), + Network: r.Resolver.Network(), + Address: r.Resolver.Address(), + Error: err, + Domain: domain, + QueryType: qtype, + Finished: finished, + Started: started, + Oddity: r.computeOddityLookupHost(addrs, err), + } + r.db.InsertIntoLookupHost(ev) + } return addrs, err } +func (r *resolverDB) computeAnswers(addrs []string, qtype string) (out []DNSLookupAnswer) { + for _, addr := range addrs { + if qtype == "A" && !strings.Contains(addr, ":") { + out = append(out, DNSLookupAnswer{Type: qtype, IPv4: addr}) + continue + } + if qtype == "AAAA" && strings.Contains(addr, ":") { + out = append(out, DNSLookupAnswer{Type: qtype, IPv6: addr}) + continue + } + } + return +} + func (r *resolverDB) computeOddityLookupHost(addrs []string, err error) Oddity { if err != nil { switch err.Error() { @@ -117,44 +178,39 @@ func (r *resolverDB) computeOddityLookupHost(addrs []string, err error) Oddity { return "" } -// LookupHTTPSSvcEvent contains the results of an HTTPSSvc lookup. -type LookupHTTPSSvcEvent struct { - Network string - Address string - Domain string - Started float64 - Finished float64 - Error error - Oddity Oddity - IPv4 []string - IPv6 []string - ALPN []string -} - -// MarshalJSON marshals a LookupHTTPSSvcEvent to the archival -// format that is similar to df-002-dnst. -func (ev *LookupHTTPSSvcEvent) MarshalJSON() ([]byte, error) { - archival := NewArchivalLookupHTTPSSvcList(ev) - return json.Marshal(archival) -} - func (r *resolverDB) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain string) (HTTPSSvc, error) { started := time.Since(r.begin).Seconds() https, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) finished := time.Since(r.begin).Seconds() - ev := &LookupHTTPSSvcEvent{ - Network: r.Resolver.Network(), - Address: r.Resolver.Address(), - Domain: domain, - Started: started, - Finished: finished, - Error: err, - Oddity: Oddity(r.computeOddityHTTPSSvc(https, err)), + ev := &DNSLookupEvent{ + Network: r.Resolver.Network(), + Address: r.Resolver.Address(), + Domain: domain, + QueryType: "HTTPS", + Started: started, + Finished: finished, + Error: err, + Oddity: Oddity(r.computeOddityHTTPSSvc(https, err)), } if err == nil { - ev.IPv4 = https.IPv4Hint() - ev.IPv6 = https.IPv6Hint() - ev.ALPN = https.ALPN() + for _, addr := range https.IPv4Hint() { + ev.Answers = append(ev.Answers, DNSLookupAnswer{ + Type: "A", + IPv4: addr, + }) + } + for _, addr := range https.IPv6Hint() { + ev.Answers = append(ev.Answers, DNSLookupAnswer{ + Type: "AAAA", + IPv6: addr, + }) + } + for _, alpn := range https.ALPN() { + ev.Answers = append(ev.Answers, DNSLookupAnswer{ + Type: "ALPN", + ALPN: alpn, + }) + } } r.db.InsertIntoLookupHTTPSSvc(ev) return https, err diff --git a/internal/measurex/th.go b/internal/measurex/th.go index 986bc6d0ea..eac2cdd5c5 100644 --- a/internal/measurex/th.go +++ b/internal/measurex/th.go @@ -672,7 +672,7 @@ func (h *THHandler) newTHLookupHostList(m *Measurement) (out []*THLookupHostEven Domain: entry.Domain, Error: h.errorToFailure(entry.Error), Oddity: entry.Oddity, - Addrs: entry.Addrs, + //Addrs: entry.Addrs, }) } return @@ -686,9 +686,11 @@ func (h *THHandler) newTHLookupHTTPSSvcList(m *Measurement) (out []*THLookupHTTP Domain: entry.Domain, Error: h.errorToFailure(entry.Error), Oddity: entry.Oddity, - IPv4: entry.IPv4, - IPv6: entry.IPv6, - ALPN: entry.ALPN, + /* + IPv4: entry.IPv4, + IPv6: entry.IPv6, + ALPN: entry.ALPN, + */ }) } return diff --git a/internal/netxlite/errorsx/classify.go b/internal/netxlite/errorsx/classify.go index c9bd177258..2e0a70793d 100644 --- a/internal/netxlite/errorsx/classify.go +++ b/internal/netxlite/errorsx/classify.go @@ -96,6 +96,9 @@ func classifyWithStringSuffix(err error) string { if strings.HasSuffix(s, DNSNoAnswerSuffix) { return FailureDNSNoAnswer } + if strings.HasSuffix(s, "use of closed network connection") { + return FailureConnectionAlreadyClosed + } return "" // not found } diff --git a/internal/netxlite/errorsx/errno.go b/internal/netxlite/errorsx/errno.go index ddc0dd654e..1905719056 100644 --- a/internal/netxlite/errorsx/errno.go +++ b/internal/netxlite/errorsx/errno.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2021-09-15 01:19:41.870222 +0200 CEST m=+0.133778293 +// Generated: 2021-09-23 02:53:58.903572 +0200 CEST m=+0.155915001 package errorsx @@ -61,6 +61,7 @@ const ( FailureSSLUnknownAuthority = "ssl_unknown_authority" FailureSSLInvalidCertificate = "ssl_invalid_certificate" FailureJSONParseError = "json_parse_error" + FailureConnectionAlreadyClosed = "connection_already_closed" ) // failureMap lists all failures so we can match them @@ -107,6 +108,7 @@ var failuresMap = map[string]string{ "ssl_unknown_authority": "ssl_unknown_authority", "ssl_invalid_certificate": "ssl_invalid_certificate", "json_parse_error": "json_parse_error", + "connection_already_closed": "connection_already_closed", } // classifySyscallError converts a syscall error to the diff --git a/internal/netxlite/errorsx/errno_test.go b/internal/netxlite/errorsx/errno_test.go index f33de7e5e6..6bd6f8597a 100644 --- a/internal/netxlite/errorsx/errno_test.go +++ b/internal/netxlite/errorsx/errno_test.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2021-09-15 01:19:41.914231 +0200 CEST m=+0.177788626 +// Generated: 2021-09-23 02:53:58.950293 +0200 CEST m=+0.202636793 package errorsx diff --git a/internal/netxlite/errorsx/errno_unix.go b/internal/netxlite/errorsx/errno_unix.go index b9c76f7794..eff816fd23 100644 --- a/internal/netxlite/errorsx/errno_unix.go +++ b/internal/netxlite/errorsx/errno_unix.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2021-09-15 01:19:41.737047 +0200 CEST m=+0.000598959 +// Generated: 2021-09-23 02:53:58.748226 +0200 CEST m=+0.000564668 package errorsx diff --git a/internal/netxlite/errorsx/errno_windows.go b/internal/netxlite/errorsx/errno_windows.go index d789bbf1cc..c8f48ab61a 100644 --- a/internal/netxlite/errorsx/errno_windows.go +++ b/internal/netxlite/errorsx/errno_windows.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2021-09-15 01:19:41.845953 +0200 CEST m=+0.109508834 +// Generated: 2021-09-23 02:53:58.877247 +0200 CEST m=+0.129589085 package errorsx diff --git a/internal/netxlite/errorsx/internal/generrno/main.go b/internal/netxlite/errorsx/internal/generrno/main.go index 0cf23ecf44..dce774f438 100644 --- a/internal/netxlite/errorsx/internal/generrno/main.go +++ b/internal/netxlite/errorsx/internal/generrno/main.go @@ -104,6 +104,7 @@ var Specs = []*ErrorSpec{ NewLibraryError("SSL_unknown_authority"), NewLibraryError("SSL_invalid_certificate"), NewLibraryError("JSON_parse_error"), + NewLibraryError("connection_already_closed"), } func fileCreate(filename string) *os.File { diff --git a/internal/netxlite/http.go b/internal/netxlite/http.go index c408471a36..2150736a0d 100644 --- a/internal/netxlite/http.go +++ b/internal/netxlite/http.go @@ -31,16 +31,6 @@ type httpTransportLogger struct { var _ HTTPTransport = &httpTransportLogger{} func (txp *httpTransportLogger) RoundTrip(req *http.Request) (*http.Response, error) { - host := req.Host - if host == "" { - host = req.URL.Host - } - req.Header.Set("Host", host) // anticipate what Go would do - return txp.logTrip(req) -} - -// logTrip is an HTTP round trip with logging. -func (txp *httpTransportLogger) logTrip(req *http.Request) (*http.Response, error) { txp.Logger.Debugf("> %s %s", req.Method, req.URL.String()) for key, values := range req.Header { for _, value := range values { diff --git a/internal/netxlite/quic.go b/internal/netxlite/quic.go index 17d309e357..890a255698 100644 --- a/internal/netxlite/quic.go +++ b/internal/netxlite/quic.go @@ -394,6 +394,16 @@ func (c *quicErrWrapperUDPLikeConn) ReadFrom(b []byte) (int, net.Addr, error) { return n, addr, nil } +// Close implements quicx.UDPLikeConn.Close. +func (c *quicErrWrapperUDPLikeConn) Close() error { + err := c.UDPLikeConn.Close() + if err != nil { + return errorsx.NewErrWrapper( + errorsx.ClassifyGenericError, errorsx.ReadFromOperation, err) + } + return nil +} + // quicDialerErrWrapper is a dialer that performs quic err wrapping type quicDialerErrWrapper struct { QUICDialer From ca728c38cb5cbfaca5bfc60d746cf5594bd4e4ef Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 23 Sep 2021 04:05:47 +0200 Subject: [PATCH 37/53] adjusted TH to use most abstract measurex --- internal/cmd/oohelper/oohelper.go | 5 +- internal/measurex/archival.go | 45 ++- internal/measurex/dialer.go | 10 +- internal/measurex/dnsx.go | 4 +- internal/measurex/http.go | 42 ++- internal/measurex/measurer.go | 2 +- internal/measurex/quic.go | 8 +- internal/measurex/resolver.go | 6 +- internal/measurex/th.go | 472 +++++------------------- internal/measurex/tls.go | 4 +- internal/netxlite/errorsx/errwrapper.go | 6 + 11 files changed, 167 insertions(+), 437 deletions(-) diff --git a/internal/cmd/oohelper/oohelper.go b/internal/cmd/oohelper/oohelper.go index 97faa30706..871a72eb93 100644 --- a/internal/cmd/oohelper/oohelper.go +++ b/internal/cmd/oohelper/oohelper.go @@ -68,7 +68,10 @@ func webstepsth() interface{} { serverURL = "http://127.0.0.1:8080/api/v1/websteps" } clnt := &measurex.THClient{ - DNServers: []string{"8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53"}, + DNServers: []*measurex.ResolverInfo{{ + Network: "udp", + Address: "8.8.4.4:53", + }}, HTTPClient: httpClient, ServerURL: serverURL, } diff --git a/internal/measurex/archival.go b/internal/measurex/archival.go index 37ef309e5a..515c724c2f 100644 --- a/internal/measurex/archival.go +++ b/internal/measurex/archival.go @@ -1,7 +1,6 @@ package measurex import ( - "log" "net/http" "strings" ) @@ -41,31 +40,26 @@ func NewArchivalBinaryData(data []byte) (out *ArchivalBinaryData) { // HTTPRoundTrip // -// ArchivalHeadersList is a list of HTTP headers. -type ArchivalHeadersList [][]string +// ArchivalHeaders is a list of HTTP headers. +type ArchivalHeaders map[string]string // Get searches for the first header with the named key // and returns it. If not found, returns an empty string. -func (headers ArchivalHeadersList) Get(key string) string { - key = strings.ToLower(key) - for _, entry := range headers { - if len(entry) != 2 { - log.Printf("headers: malformed header: %+v", entry) - continue - } - headerKey, headerValue := entry[0], entry[1] - if strings.ToLower(headerKey) == key { - return headerValue - } - } - return "" +func (headers ArchivalHeaders) Get(key string) string { + return headers[strings.ToLower(key)] } -// NewArchivalHeadersList builds a new HeadersList from http.Header. -func NewArchivalHeadersList(in http.Header) (out ArchivalHeadersList) { +// NewArchivalHeaders builds a new HeadersList from http.Header. +func NewArchivalHeaders(in http.Header) (out ArchivalHeaders) { + out = make(ArchivalHeaders) for k, vv := range in { for _, v := range vv { - out = append(out, []string{k, v}) + // It breaks my hearth a little bit to ignore + // subsequent headers, but this does not happen + // very frequently, and I know the pipeline + // parses the map headers format only. + out[strings.ToLower(k)] = v + break } } return @@ -86,3 +80,16 @@ func NewArchivalTLSCerts(in [][]byte) (out []*ArchivalBinaryData) { } return } + +// +// Failure +// + +// NewArchivalFailure creates an archival failure from an error. +func NewArchivalFailure(err error) *string { + if err == nil { + return nil + } + s := err.Error() + return &s +} diff --git a/internal/measurex/dialer.go b/internal/measurex/dialer.go index f95eb74dfe..d6d1d3a879 100644 --- a/internal/measurex/dialer.go +++ b/internal/measurex/dialer.go @@ -52,7 +52,7 @@ type dialerDB struct { type NetworkEvent struct { // JSON names compatible with df-008-netevents RemoteAddr string `json:"address"` - Error error `json:"failure"` + Failure *string `json:"failure"` Count int `json:"num_bytes,omitempty"` Operation string `json:"operation"` Network string `json:"proto"` @@ -74,7 +74,7 @@ func (d *dialerDB) DialContext( RemoteAddr: address, Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Oddity: d.computeOddity(err), Count: 0, }) @@ -124,7 +124,7 @@ func (c *connDB) Read(b []byte) (int, error) { RemoteAddr: c.remoteAddr, Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Count: count, }) return count, err @@ -140,7 +140,7 @@ func (c *connDB) Write(b []byte) (int, error) { RemoteAddr: c.remoteAddr, Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Count: count, }) return count, err @@ -156,7 +156,7 @@ func (c *connDB) Close() error { RemoteAddr: c.remoteAddr, Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Count: 0, }) return err diff --git a/internal/measurex/dnsx.go b/internal/measurex/dnsx.go index 67a113f01c..79fccbd327 100644 --- a/internal/measurex/dnsx.go +++ b/internal/measurex/dnsx.go @@ -39,7 +39,7 @@ type DNSRoundTripEvent struct { Query *ArchivalBinaryData `json:"raw_query"` Started float64 `json:"started"` Finished float64 `json:"t"` - Error error `json:"failure"` + Failure *string `json:"failure"` Reply *ArchivalBinaryData `json:"raw_reply"` } @@ -53,7 +53,7 @@ func (txp *dnsxRoundTripperDB) RoundTrip(ctx context.Context, query []byte) ([]b Query: NewArchivalBinaryData(query), Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Reply: NewArchivalBinaryData(reply), }) return reply, err diff --git a/internal/measurex/http.go b/internal/measurex/http.go index 6830b3eacb..9229f25f59 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -28,6 +28,7 @@ import ( "github.com/lucas-clemente/quic-go" "github.com/ooni/probe-cli/v3/internal/engine/httpheader" "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" "github.com/ooni/probe-cli/v3/internal/netxlite/iox" "github.com/ooni/probe-cli/v3/internal/runtimex" "golang.org/x/net/publicsuffix" @@ -74,16 +75,16 @@ type httpTransportDB struct { // HTTPRequest is the HTTP request. type HTTPRequest struct { // Names consistent with df-001-http.md - Method string `json:"method"` - URL string `json:"url"` - HeadersList ArchivalHeadersList `json:"headers_list"` + Method string `json:"method"` + URL string `json:"url"` + Headers ArchivalHeaders `json:"headers"` } // HTTPResponse is the HTTP response. type HTTPResponse struct { // Names consistent with df-001-http.md Code int64 `json:"code"` - HeadersList ArchivalHeadersList `json:"headers_list"` + Headers ArchivalHeaders `json:"headers"` Body *ArchivalBinaryData `json:"body"` BodyIsTruncated bool `json:"body_is_truncated"` @@ -95,7 +96,7 @@ type HTTPResponse struct { // HTTPRoundTripEvent contains information about an HTTP round trip. type HTTPRoundTripEvent struct { // JSON names following the df-001-httpt data format. - Error error `json:"failure"` + Failure *string `json:"failure"` Request *HTTPRequest `json:"request"` Response *HTTPResponse `json:"response"` Finished float64 `json:"t"` @@ -115,15 +116,15 @@ func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) resp, err := txp.HTTPTransport.RoundTrip(req) rt := &HTTPRoundTripEvent{ Request: &HTTPRequest{ - Method: req.Method, - URL: req.URL.String(), - HeadersList: NewArchivalHeadersList(req.Header), + Method: req.Method, + URL: req.URL.String(), + Headers: NewArchivalHeaders(req.Header), }, Started: started, } if err != nil { rt.Finished = time.Since(txp.begin).Seconds() - rt.Error = err + rt.Failure = NewArchivalFailure(err) txp.db.InsertIntoHTTPRoundTrip(rt) return nil, err } @@ -138,8 +139,8 @@ func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) rt.Oddity = OddityStatusOther } rt.Response = &HTTPResponse{ - Code: int64(resp.StatusCode), - HeadersList: NewArchivalHeadersList(resp.Header), + Code: int64(resp.StatusCode), + Headers: NewArchivalHeaders(resp.Header), } r := io.LimitReader(resp.Body, maxBodySnapshot) body, err := iox.ReadAllContext(req.Context(), r) @@ -148,7 +149,7 @@ func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) } if err != nil { rt.Finished = time.Since(txp.begin).Seconds() - rt.Error = err + rt.Failure = NewArchivalFailure(err) txp.db.InsertIntoHTTPRoundTrip(rt) return nil, err } @@ -219,7 +220,7 @@ var ErrHTTPTooManyRedirects = errors.New("stopped after 10 redirects") func newHTTPClient(db WritableDB, cookiejar http.CookieJar, txp HTTPTransport, defaultErr error) HTTPClient { - return &http.Client{ + return &httpClientErrWrapper{&http.Client{ Transport: txp, Jar: cookiejar, CheckRedirect: func(req *http.Request, via []*http.Request) error { @@ -227,6 +228,9 @@ func newHTTPClient(db WritableDB, cookiejar http.CookieJar, if len(via) >= 10 { err = ErrHTTPTooManyRedirects } + if err != nil { + err = errorsx.NewTopLevelGenericErrWrapper(err) + } db.InsertIntoHTTPRedirect(&HTTPRedirectEvent{ URL: via[0].URL, // bug in Go stdlib if we crash here Location: req.URL, @@ -235,7 +239,19 @@ func newHTTPClient(db WritableDB, cookiejar http.CookieJar, }) return err }, + }} +} + +type httpClientErrWrapper struct { + HTTPClient +} + +func (c *httpClientErrWrapper) Do(req *http.Request) (*http.Response, error) { + resp, err := c.HTTPClient.Do(req) + if err != nil { + err = errorsx.NewTopLevelGenericErrWrapper(err) } + return resp, err } // NewCookieJar is a convenience factory for creating an http.CookieJar diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index ad848a926d..aca092d260 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -805,7 +805,7 @@ func (mx *Measurer) maybeQUICFollowUp(ctx context.Context, stdlog.Printf("malformed HTTPRoundTrip@%d: %+v", idx, rtrip) continue } - if v := rtrip.Response.HeadersList.Get("alt-svc"); v != "" { + if v := rtrip.Response.Headers.Get("alt-svc"); v != "" { altsvc = append(altsvc, v) } } diff --git a/internal/measurex/quic.go b/internal/measurex/quic.go index 79c66d7b7d..5ef2f0a837 100644 --- a/internal/measurex/quic.go +++ b/internal/measurex/quic.go @@ -61,7 +61,7 @@ func (c *udpLikeConnDB) WriteTo(p []byte, addr net.Addr) (int, error) { RemoteAddr: addr.String(), Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Count: count, }) return count, err @@ -77,7 +77,7 @@ func (c *udpLikeConnDB) ReadFrom(b []byte) (int, net.Addr, error) { RemoteAddr: addrStringIfNotNil(addr), Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Count: count, }) return count, addr, err @@ -93,7 +93,7 @@ func (c *udpLikeConnDB) Close() error { RemoteAddr: "", Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Count: 0, }) return err @@ -147,7 +147,7 @@ func (qh *quicDialerDB) DialContext(ctx context.Context, network, address string SkipVerify: tlsConfig.InsecureSkipVerify, Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Oddity: qh.computeOddity(err), TLSVersion: netxlite.TLSVersionString(state.Version), CipherSuite: netxlite.TLSCipherSuiteString(state.CipherSuite), diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index affefb0616..1e8d0a6be2 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -77,7 +77,7 @@ type DNSLookupEvent struct { // fields inside df-002-dnst Answers []DNSLookupAnswer `json:"answers"` Network string `json:"engine"` - Error error `json:"failure"` + Failure *string `json:"failure"` Domain string `json:"hostname"` QueryType string `json:"query_type"` Address string `json:"resolver_address"` @@ -131,7 +131,7 @@ func (r *resolverDB) LookupHost(ctx context.Context, domain string) ([]string, e Answers: r.computeAnswers(addrs, qtype), Network: r.Resolver.Network(), Address: r.Resolver.Address(), - Error: err, + Failure: NewArchivalFailure(err), Domain: domain, QueryType: qtype, Finished: finished, @@ -189,7 +189,7 @@ func (r *resolverDB) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain stri QueryType: "HTTPS", Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Oddity: Oddity(r.computeOddityHTTPSSvc(https, err)), } if err == nil { diff --git a/internal/measurex/th.go b/internal/measurex/th.go index eac2cdd5c5..a58c0a1ad5 100644 --- a/internal/measurex/th.go +++ b/internal/measurex/th.go @@ -46,7 +46,6 @@ import ( "io" "net/http" "net/url" - "strings" "github.com/apex/log" "github.com/ooni/probe-cli/v3/internal/netxlite" @@ -74,103 +73,15 @@ type THClientRequest struct { // THServerResponse is the response from the test helper. type THServerResponse struct { + // URL is the URL this measurement refers to. + URL string `json:"url"` + // DNS contains all the DNS related measurements. - DNS *THDNSMeasurement + DNS []*DNSMeasurement `json:"dns"` // Endpoints contains a measurement for each endpoint // that was discovered by the probe or the TH. - Endpoints []*THEndpointMeasurement -} - -// THDNSMeasurement is a DNS measurement performed by the test helper. -type THDNSMeasurement struct { - // Oddities lists all the oddities inside this measurement. - Oddities []Oddity - - // LookupHost contains all the host lookups. - LookupHost []*THLookupHostEvent `json:",omitempty"` - - // LookupHTTPSSvc contains all the HTTPSSvc lookups. - LookupHTTPSSvc []*THLookupHTTPSSvcEvent `json:",omitempty"` -} - -// THLookupHostEvent is the LookupHost event sent -// back by the test helper. -type THLookupHostEvent struct { - Network string - Address string - Domain string - Error *string - Oddity Oddity - Addrs []string -} - -// THLookupHTTPSSvcEvent is the LookupHTTPSvc event sent -// back by the test helper. -type THLookupHTTPSSvcEvent struct { - Network string - Address string - Domain string - Error *string - Oddity Oddity - IPv4 []string - IPv6 []string - ALPN []string -} - -// THEndpointMeasurement is an endpoint measurement -// performed by the test helper. -type THEndpointMeasurement struct { - // Oddities lists all the oddities inside this measurement. - Oddities []Oddity - - // Connect contains all the connect operations. - Connect []*THConnectEvent `json:",omitempty"` - - // TLSHandshake contains all the TLS handshakes. - TLSHandshake []*THHandshakeEvent `json:",omitempty"` - - // QUICHandshake contains all the QUIC handshakes. - QUICHandshake []*THHandshakeEvent `json:",omitempty"` - - // HTTPRoundTrip contains all the HTTP round trips. - HTTPRoundTrip []*THHTTPRoundTripEvent `json:",omitempty"` -} - -// THConnectEvent is the connect event sent back by the test helper. -type THConnectEvent struct { - Network string - RemoteAddr string - Error *string - Oddity Oddity -} - -// THHandshakeEvent is the handshake event sent -// back by the test helper. -type THHandshakeEvent struct { - Network string - RemoteAddr string - SNI string - ALPN []string - Error *string - Oddity Oddity - TLSVersion string - CipherSuite string - NegotiatedProto string -} - -// THHTTPRoundTripEvent is the HTTP round trip event -// sent back by the test helper. -type THHTTPRoundTripEvent struct { - RequestMethod string - RequestURL string - RequestHeader http.Header - Error *string - Oddity Oddity - ResponseStatus int64 - ResponseHeader http.Header - ResponseBodySnapshotSize int64 - MaxBodySnapshotSize int64 + Endpoints []*HTTPEndpointMeasurement `json:"endpoints"` } // thMaxAcceptableBodySize is the maximum acceptable body size by TH code. @@ -378,241 +289,118 @@ func (h *THHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { // The return value is either a THServerResponse or an error. func (h *THHandler) singleStep( ctx context.Context, req *THClientRequest) (*THServerResponse, error) { - parsedURL, err := url.Parse(req.URL) - if err != nil || (parsedURL.Scheme != "http" && parsedURL.Scheme != "https") { - return nil, errors.New("invalid request url") - } - epnts, dns := h.dohQuery(ctx, parsedURL) - m := &THServerResponse{DNS: dns} - epnts = h.prepareEnpoints( - epnts, parsedURL, req.Endpoints, req.HTTPRequestHeaders) mx := NewMeasurerWithDefaultSettings() + mx.MeasureURLHelper = &thMeasureURLHelper{req.Endpoints} + mx.Resolvers = []*ResolverInfo{{ + Network: ResolverForeign, + ForeignResolver: thResolver, + }} jar := NewCookieJar() - for me := range mx.HTTPEndpointGetParallel(ctx, jar, epnts...) { - m.Endpoints = append(m.Endpoints, h.newTHEndpointMeasurement(me)) + meas, err := mx.MeasureURL(ctx, req.URL, req.HTTPRequestHeaders, jar) + if err != nil { + return nil, err } - h.maybeQUICFollowUp(ctx, m, epnts...) - return m, nil + return &THServerResponse{ + URL: req.URL, + DNS: meas.DNS, + Endpoints: h.simplifyEndpoints(meas.Endpoints), + }, nil } -// prepareEnpoints takes in input a list of endpoints discovered -// so far by the TH and extends this list by adding the endpoints -// discovered by the client. Before returning, this function -// ensures that we don't have any duplicate endpoint. -// -// Arguments: -// -// - the list of endpoints discovered by the TH -// -// - the URL provided by the probe -// -// - the endpoints provided by the probe -// -// - the headers provided by the probe -// -// The return value may be an empty list if both the client -// and the TH failed to discover any endpoint. -// -// When the return value contains endpoints, we also fill -// the HTTPEndpoint.Header field using the header param -// provided by the client. We don't allow arbitrary headers: -// we only copy a subset of allowed headers. -func (h *THHandler) prepareEnpoints(epnts []*HTTPEndpoint, URL *url.URL, - clientEpnts []*Endpoint, header http.Header) (out []*HTTPEndpoint) { - for _, epnt := range clientEpnts { - epnts = append(epnts, &HTTPEndpoint{ - Domain: URL.Hostname(), - Network: epnt.Network, - Address: epnt.Address, - SNI: URL.Hostname(), - ALPN: alpnForHTTPEndpoint(epnt.Network), - URL: URL, - Header: http.Header{}, // see the loop below +func (h *THHandler) simplifyEndpoints( + in []*HTTPEndpointMeasurement) (out []*HTTPEndpointMeasurement) { + for _, epnt := range in { + out = append(out, &HTTPEndpointMeasurement{ + URL: epnt.URL, + Endpoint: epnt.Endpoint, + Measurement: h.simplifyMeasurement(epnt.Measurement), }) } - dups := make(map[string]bool) - for _, epnt := range epnts { - id := epnt.String() - if _, found := dups[id]; found { - continue - } - dups[id] = true - epnt.Header = h.onlyAllowedHeaders(header) - out = append(out, epnt) - } return } -func (h *THHandler) onlyAllowedHeaders(header http.Header) (out http.Header) { - out = http.Header{} - for k, vv := range header { - switch strings.ToLower(k) { - case "accept", "accept-language", "user-agent": - for _, v := range vv { - out.Add(k, v) - } - default: - // ignore all the other headers - } +func (h *THHandler) simplifyMeasurement(in *Measurement) (out *Measurement) { + out = &Measurement{ + Connect: in.Connect, + TLSHandshake: h.simplifyHandshake(in.TLSHandshake), + QUICHandshake: h.simplifyHandshake(in.QUICHandshake), + LookupHost: in.LookupHost, + LookupHTTPSSvc: in.LookupHTTPSSvc, + HTTPRoundTrip: h.simplifyHTTPRoundTrip(in.HTTPRoundTrip), } return } -// maybeQUICFollowUp checks whether we need to use Alt-Svc to check -// for QUIC. We query for HTTPSSvc but currently only Cloudflare -// implements this proposed standard. So, this function is -// where we take care of all the other servers implementing QUIC. -func (h *THHandler) maybeQUICFollowUp(ctx context.Context, - m *THServerResponse, epnts ...*HTTPEndpoint) { - altsvc := []string{} - for _, epnt := range m.Endpoints { - // Check whether we have a QUIC handshake. If so, then - // HTTPSSvc worked and we can stop here. - if epnt.QUICHandshake != nil { - return - } - for _, rtrip := range epnt.HTTPRoundTrip { - if v := rtrip.ResponseHeader.Get("alt-svc"); v != "" { - altsvc = append(altsvc, v) - } - } - } - // syntax: - // - // Alt-Svc: clear - // Alt-Svc: =; ma= - // Alt-Svc: =; ma=; persist=1 - // - // multiple entries may be separated by comma. - // - // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Alt-Svc - for _, header := range altsvc { - entries := strings.Split(header, ",") - if len(entries) < 1 { - continue - } - for _, entry := range entries { - parts := strings.Split(entry, ";") - if len(parts) < 1 { - continue - } - if parts[0] == "h3=\":443\"" { - h.doQUICFollowUp(ctx, m, epnts...) - return - } - } - } -} - -// doQUICFollowUp runs when we know there's QUIC support via Alt-Svc. -func (h *THHandler) doQUICFollowUp(ctx context.Context, - m *THServerResponse, epnts ...*HTTPEndpoint) { - quicEpnts := []*HTTPEndpoint{} - // do not mutate the existing list rather create a new one - for _, epnt := range epnts { - quicEpnts = append(quicEpnts, &HTTPEndpoint{ - Domain: epnt.Domain, - Network: NetworkQUIC, - Address: epnt.Address, - SNI: epnt.SNI, - ALPN: []string{"h3"}, - URL: epnt.URL, - Header: epnt.Header, +func (h *THHandler) simplifyHandshake(in []*TLSHandshakeEvent) (out []*TLSHandshakeEvent) { + for _, ev := range in { + out = append(out, &TLSHandshakeEvent{ + CipherSuite: ev.CipherSuite, + Failure: ev.Failure, + NegotiatedProto: ev.NegotiatedProto, + TLSVersion: ev.TLSVersion, + PeerCerts: nil, + Finished: 0, + RemoteAddr: ev.RemoteAddr, + SNI: ev.SNI, + ALPN: ev.ALPN, + SkipVerify: ev.SkipVerify, + Oddity: ev.Oddity, + Network: ev.Network, + Started: 0, }) } - mx := NewMeasurerWithDefaultSettings() - jar := NewCookieJar() - for me := range mx.HTTPEndpointGetParallel(ctx, jar, quicEpnts...) { - m.Endpoints = append(m.Endpoints, h.newTHEndpointMeasurement(me)) - } -} - -// -// TH server: marshalling of endpoint measurements -// - -// newTHEndpointMeasurement takes in input an endpoint -// measurement performed by a measurer and emits in output -// the simplified THEndpointMeasurement equivalent. -func (h *THHandler) newTHEndpointMeasurement(in *HTTPEndpointMeasurement) *THEndpointMeasurement { - return &THEndpointMeasurement{ - // TODO(bassosimone): here we need to add more fields - Connect: h.newTHConnectEventList(in.Connect), - TLSHandshake: h.newTLSHandshakesList(in.TLSHandshake), - QUICHandshake: h.newQUICHandshakeList(in.QUICHandshake), - HTTPRoundTrip: h.newHTTPRoundTripList(in.HTTPRoundTrip), - } + return } -func (h *THHandler) newTHConnectEventList(in []*NetworkEvent) (out []*THConnectEvent) { - for _, e := range in { - out = append(out, &THConnectEvent{ - Network: e.Network, - RemoteAddr: e.RemoteAddr, - Error: h.errorToFailure(e.Error), - Oddity: e.Oddity, +func (h *THHandler) simplifyHTTPRoundTrip(in []*HTTPRoundTripEvent) (out []*HTTPRoundTripEvent) { + for _, ev := range in { + out = append(out, &HTTPRoundTripEvent{ + Failure: ev.Failure, + Request: ev.Request, + Response: h.simplifyHTTPResponse(ev.Response), + Finished: 0, + Started: 0, + Oddity: ev.Oddity, }) } return } -func (h *THHandler) newTLSHandshakesList(in []*TLSHandshakeEvent) (out []*THHandshakeEvent) { - for _, e := range in { - out = append(out, &THHandshakeEvent{ - Network: e.Network, - RemoteAddr: e.RemoteAddr, - SNI: e.SNI, - ALPN: e.ALPN, - Error: h.errorToFailure(e.Error), - Oddity: e.Oddity, - TLSVersion: e.TLSVersion, - CipherSuite: e.CipherSuite, - NegotiatedProto: e.NegotiatedProto, - }) +func (h *THHandler) simplifyHTTPResponse(in *HTTPResponse) (out *HTTPResponse) { + if in != nil { + out = &HTTPResponse{ + Code: in.Code, + Headers: in.Headers, + Body: nil, + BodyIsTruncated: in.BodyIsTruncated, + BodyLength: in.BodyLength, + BodyIsUTF8: in.BodyIsUTF8, + } } return } -func (h *THHandler) newQUICHandshakeList(in []*QUICHandshakeEvent) (out []*THHandshakeEvent) { - for _, e := range in { - out = append(out, &THHandshakeEvent{ - Network: e.Network, - RemoteAddr: e.RemoteAddr, - SNI: e.SNI, - ALPN: e.ALPN, - Error: h.errorToFailure(e.Error), - Oddity: e.Oddity, - TLSVersion: e.TLSVersion, - CipherSuite: e.CipherSuite, - NegotiatedProto: e.NegotiatedProto, - }) - } - return +type thMeasureURLHelper struct { + epnts []*Endpoint } -func (h *THHandler) newHTTPRoundTripList(in []*HTTPRoundTripEvent) (out []*THHTTPRoundTripEvent) { - /* - for _, e := range in { - out = append(out, &THHTTPRoundTripEvent{ - RequestMethod: e.RequestMethod, - RequestURL: e.RequestURL.String(), - RequestHeader: e.RequestHeader, - Error: h.errorToFailure(e.Error), - Oddity: e.Oddity, - ResponseStatus: int64(e.ResponseStatus), - ResponseHeader: e.ResponseHeader, - ResponseBodySnapshotSize: int64(len(e.ResponseBodySnapshot)), - MaxBodySnapshotSize: e.MaxBodySnapshotSize, - }) - } - */ +func (thh *thMeasureURLHelper) LookupExtraHTTPEndpoints( + ctx context.Context, URL *url.URL, headers http.Header, + serverEpnts ...*HTTPEndpoint) (epnts []*HTTPEndpoint, err error) { + for _, epnt := range thh.epnts { + epnts = append(epnts, &HTTPEndpoint{ + Domain: URL.Hostname(), + Network: epnt.Network, + Address: epnt.Address, + SNI: URL.Hostname(), + ALPN: alpnForHTTPEndpoint(epnt.Network), + URL: URL, + Header: headers, // but overriden later anyway + }) + } return } -// -// TH server: DNS -// - // thResolverURL is the DNS resolver URL used by the TH. We use an // encrypted resolver to reduce the risk that there is DNS-over-UDP // censorship in the place where we deploy the TH. @@ -625,93 +413,3 @@ const thResolverURL = "https://dns.google/dns-query" var thResolver = netxlite.WrapResolver(log.Log, dnsx.NewSerialResolver( dnsx.NewDNSOverHTTPS(http.DefaultClient, thResolverURL), )) - -// dohQuery discovers endpoints for the URL's hostname using DoH. -// -// Arguments: -// -// - ctx is the context for deadline/cancellation/timeout -// -// - parsedURL is the parsed URL -// -// Returns: -// -// - a possibly empty list of HTTPEndpoints (this happens for -// example if the URL's hostname causes NXDOMAIN) -// -// - the THDNSMeasurement for the THServeResponse message -func (h *THHandler) dohQuery(ctx context.Context, URL *url.URL) ( - epnts []*HTTPEndpoint, meas *THDNSMeasurement) { - db := &MeasurementDB{} - r := NewMeasurerWithDefaultSettings().WrapResolver(db, thResolver) - meas = &THDNSMeasurement{} - op := newOperationLogger(log.Log, - "dohQuery A/AAAA for %s with %s", URL.Hostname(), r.Address()) - _, err := r.LookupHost(ctx, URL.Hostname()) - op.Stop(err) - meas.LookupHost = h.newTHLookupHostList(db.AsMeasurement()) - switch URL.Scheme { - case "https": - op := newOperationLogger(log.Log, - "dohQuery HTTPSSvc for %s with %s", URL.Hostname(), r.Address()) - _, err = r.LookupHTTPSSvcWithoutRetry(ctx, URL.Hostname()) - op.Stop(err) - meas.LookupHTTPSSvc = h.newTHLookupHTTPSSvcList(db.AsMeasurement()) - default: - // nothing - } - epnts, _ = AllHTTPEndpointsForURL(URL, NewHTTPRequestHeaderForMeasuring()) // nil on failure - return -} - -func (h *THHandler) newTHLookupHostList(m *Measurement) (out []*THLookupHostEvent) { - for _, entry := range m.LookupHost { - out = append(out, &THLookupHostEvent{ - Network: entry.Network, - Address: entry.Address, - Domain: entry.Domain, - Error: h.errorToFailure(entry.Error), - Oddity: entry.Oddity, - //Addrs: entry.Addrs, - }) - } - return -} - -func (h *THHandler) newTHLookupHTTPSSvcList(m *Measurement) (out []*THLookupHTTPSSvcEvent) { - for _, entry := range m.LookupHTTPSSvc { - out = append(out, &THLookupHTTPSSvcEvent{ - Network: entry.Network, - Address: entry.Address, - Domain: entry.Domain, - Error: h.errorToFailure(entry.Error), - Oddity: entry.Oddity, - /* - IPv4: entry.IPv4, - IPv6: entry.IPv6, - ALPN: entry.ALPN, - */ - }) - } - return -} - -// -// TH server: utility functions -// - -// errorToFailure converts an error type to a failure type (which -// is loosely defined as a pointer to a string). -// -// When the error is nil, the string pointer is nil. When the error is -// not nil, the pointer points to the err.Error() string. -// -// We cannot unmarshal Go errors from JSON. Therefore, we need to -// convert to this type when we're marshalling. -func (h *THHandler) errorToFailure(err error) (out *string) { - if err != nil { - s := err.Error() - out = &s - } - return -} diff --git a/internal/measurex/tls.go b/internal/measurex/tls.go index 6375e14fd0..e013999823 100644 --- a/internal/measurex/tls.go +++ b/internal/measurex/tls.go @@ -43,7 +43,7 @@ type tlsHandshakerDB struct { type TLSHandshakeEvent struct { // JSON names compatible with df-006-tlshandshake CipherSuite string `json:"cipher_suite"` - Error error `json:"failure"` + Failure *string `json:"failure"` NegotiatedProto string `json:"negotiated_proto"` TLSVersion string `json:"tls_version"` PeerCerts []*ArchivalBinaryData `json:"peer_certificates"` @@ -75,7 +75,7 @@ func (thx *tlsHandshakerDB) Handshake(ctx context.Context, SkipVerify: config.InsecureSkipVerify, Started: started, Finished: finished, - Error: err, + Failure: NewArchivalFailure(err), Oddity: thx.computeOddity(err), TLSVersion: netxlite.TLSVersionString(state.Version), CipherSuite: netxlite.TLSCipherSuiteString(state.CipherSuite), diff --git a/internal/netxlite/errorsx/errwrapper.go b/internal/netxlite/errorsx/errwrapper.go index 81c3ac2bba..2c875e76b6 100644 --- a/internal/netxlite/errorsx/errwrapper.go +++ b/internal/netxlite/errorsx/errwrapper.go @@ -92,3 +92,9 @@ func NewErrWrapper(c Classifier, op string, err error) *ErrWrapper { WrappedErr: err, } } + +// NewTopLevelGenerciErrWrapper wraps an error occurring at top +// level using the most generic available classified. +func NewTopLevelGenericErrWrapper(err error) *ErrWrapper { + return NewErrWrapper(ClassifyGenericError, TopLevelOperation, err) +} From 19e45d57c787217c9964a09c5ee835a6c5c0ed24 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 23 Sep 2021 05:35:10 +0200 Subject: [PATCH 38/53] make th work and fix a bunch of bugs --- internal/cmd/oohelper/oohelper.go | 3 +- internal/cmd/oohelperd/oohelperd.go | 4 +- internal/engine/experiment/webstepsx/doc.go | 6 + .../webstepsx/{webstepsx.go => measurer.go} | 108 ++++++++++++++---- .../experiment/webstepsx}/th.go | 107 ++++++----------- internal/measurex/http.go | 3 - internal/measurex/logger.go | 14 +-- internal/measurex/measurement.go | 16 ++- internal/measurex/measurer.go | 49 ++++---- internal/measurex/utils.go | 15 ++- 10 files changed, 188 insertions(+), 137 deletions(-) create mode 100644 internal/engine/experiment/webstepsx/doc.go rename internal/engine/experiment/webstepsx/{webstepsx.go => measurer.go} (63%) rename internal/{measurex => engine/experiment/webstepsx}/th.go (78%) diff --git a/internal/cmd/oohelper/oohelper.go b/internal/cmd/oohelper/oohelper.go index 871a72eb93..29fd00e541 100644 --- a/internal/cmd/oohelper/oohelper.go +++ b/internal/cmd/oohelper/oohelper.go @@ -11,6 +11,7 @@ import ( "github.com/apex/log" "github.com/ooni/probe-cli/v3/internal/cmd/oohelper/internal" + "github.com/ooni/probe-cli/v3/internal/engine/experiment/webstepsx" "github.com/ooni/probe-cli/v3/internal/engine/netx" "github.com/ooni/probe-cli/v3/internal/measurex" "github.com/ooni/probe-cli/v3/internal/runtimex" @@ -67,7 +68,7 @@ func webstepsth() interface{} { if serverURL == "" { serverURL = "http://127.0.0.1:8080/api/v1/websteps" } - clnt := &measurex.THClient{ + clnt := &webstepsx.THClient{ DNServers: []*measurex.ResolverInfo{{ Network: "udp", Address: "8.8.4.4:53", diff --git a/internal/cmd/oohelperd/oohelperd.go b/internal/cmd/oohelperd/oohelperd.go index 78d24819df..f7f84f4b62 100644 --- a/internal/cmd/oohelperd/oohelperd.go +++ b/internal/cmd/oohelperd/oohelperd.go @@ -11,8 +11,8 @@ import ( "github.com/apex/log" "github.com/ooni/probe-cli/v3/internal/cmd/oohelperd/internal/webconnectivity" "github.com/ooni/probe-cli/v3/internal/cmd/oohelperd/internal/websteps" + "github.com/ooni/probe-cli/v3/internal/engine/experiment/webstepsx" "github.com/ooni/probe-cli/v3/internal/engine/netx" - "github.com/ooni/probe-cli/v3/internal/measurex" ) const maxAcceptableBody = 1 << 24 @@ -55,7 +55,7 @@ func main() { func testableMain() { mux := http.NewServeMux() mux.Handle("/api/unstable/websteps", &websteps.Handler{Config: &websteps.Config{}}) - mux.Handle("/api/v1/websteps", &measurex.THHandler{}) + mux.Handle("/api/v1/websteps", &webstepsx.THHandler{}) mux.Handle("/", webconnectivity.Handler{ Client: httpx, Dialer: dialer, diff --git a/internal/engine/experiment/webstepsx/doc.go b/internal/engine/experiment/webstepsx/doc.go new file mode 100644 index 0000000000..5c3af36146 --- /dev/null +++ b/internal/engine/experiment/webstepsx/doc.go @@ -0,0 +1,6 @@ +// Package webstepsx contains a websteps implementation +// based on the internal/measurex package. +// +// This implementation does not follow any existing spec +// rather we are modeling the spec on this one. +package webstepsx diff --git a/internal/engine/experiment/webstepsx/webstepsx.go b/internal/engine/experiment/webstepsx/measurer.go similarity index 63% rename from internal/engine/experiment/webstepsx/webstepsx.go rename to internal/engine/experiment/webstepsx/measurer.go index f25163b886..1eb2b4980f 100644 --- a/internal/engine/experiment/webstepsx/webstepsx.go +++ b/internal/engine/experiment/webstepsx/measurer.go @@ -1,13 +1,15 @@ -// Package webstepsx contains a websteps implementation -// based on the internal/measurex package. -// -// This implementation does not follow any existing spec -// rather we are modeling the spec on this one. package webstepsx +// +// Measurer +// +// This file contains the client implementation. +// + import ( "context" "errors" + "net/http" "net/url" "time" @@ -28,6 +30,8 @@ type Config struct{} // TestKeys contains the experiment's test keys. type TestKeys struct { *measurex.URLMeasurement + + TH *THServerResponse `json:"th"` } // Measurer performs the measurement. @@ -93,29 +97,39 @@ func (mx *Measurer) RunAsync( if testhelper == nil { return nil, ErrNoAvailableTestHelpers } + testhelper.Address = "http://127.0.0.1:8080/api/v1/websteps" // TODO(bassosimone): remove! out := make(chan *model.ExperimentAsyncTestKeys) go mx.runAsync(ctx, sess, input, testhelper, out) return out, nil } +var measurerResolvers = []*measurex.ResolverInfo{{ + Network: "system", + Address: "", +}, { + Network: "udp", + Address: "8.8.4.4:53", +}, { + Network: "udp", + Address: "1.1.1.1:53", +}} + func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, URL string, th *model.Service, out chan<- *model.ExperimentAsyncTestKeys) { defer close(out) - mmx := &measurex.Measurer{ - Begin: time.Now(), - HTTPClient: sess.DefaultHTTPClient(), + helper := &measurerMeasureURLHelper{ + Clnt: sess.DefaultHTTPClient(), Logger: sess.Logger(), - Resolvers: []*measurex.ResolverInfo{{ - Network: "system", - Address: "", - }, { - Network: "udp", - Address: "8.8.4.4:53", - }, { - Network: "udp", - Address: "1.1.1.1:53", - }}, - TLSHandshaker: netxlite.NewTLSHandshakerStdlib(sess.Logger()), + THURL: th.Address, + thResponse: make(chan *THServerResponse, 1), // buffer + } + mmx := &measurex.Measurer{ + Begin: time.Now(), + HTTPClient: sess.DefaultHTTPClient(), + MeasureURLHelper: helper, + Logger: sess.Logger(), + Resolvers: measurerResolvers, + TLSHandshaker: netxlite.NewTLSHandshakerStdlib(sess.Logger()), } cookies := measurex.NewCookieJar() in := mmx.MeasureHTTPURLAndFollowRedirections( @@ -123,7 +137,10 @@ func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, for m := range in { out <- &model.ExperimentAsyncTestKeys{ MeasurementRuntime: m.TotalRuntime.Seconds(), - TestKeys: &TestKeys{m}, + TestKeys: &TestKeys{ + URLMeasurement: m, + TH: <-helper.thResponse, + }, Extensions: map[string]int64{ archival.ExtHTTP.Name: archival.ExtHTTP.V, archival.ExtDNS.Name: archival.ExtDNS.V, @@ -135,6 +152,57 @@ func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, } } +// measurerMeasureURLHelper injects the TH into the normal +// URL measurement flow implemented by measurex. +type measurerMeasureURLHelper struct { + // Clnt is the MANDATORY client to use + Clnt measurex.HTTPClient + + // Logger is the MANDATORY Logger to use + Logger model.Logger + + // THURL is the MANDATORY TH URL. + THURL string + + // thResponse is the response from the TH. + thResponse chan *THServerResponse +} + +func (mth *measurerMeasureURLHelper) LookupExtraHTTPEndpoints( + ctx context.Context, URL *url.URL, headers http.Header, + curEndpoints ...*measurex.HTTPEndpoint) ([]*measurex.HTTPEndpoint, error) { + cc := &THClientCall{ + Endpoints: measurex.HTTPEndpointsToEndpoints(curEndpoints), + HTTPClient: mth.Clnt, + Header: headers, + THURL: mth.THURL, + TargetURL: URL.String(), + } + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + ol := measurex.NewOperationLogger( + mth.Logger, "THClientCall %s", URL.String()) + resp, err := cc.Call(ctx) + ol.Stop(err) + mth.thResponse <- resp // note that nil is ~fine here + if err != nil { + return nil, err + } + var out []*measurex.HTTPEndpoint + for _, epnt := range resp.Endpoints { + out = append(out, &measurex.HTTPEndpoint{ + Domain: URL.Hostname(), + Network: epnt.Network, + Address: epnt.Address, + SNI: URL.Hostname(), + ALPN: measurex.ALPNForHTTPEndpoint(epnt.Network), + URL: URL, + Header: headers, + }) + } + return out, nil +} + // Run implements ExperimentMeasurer.Run. func (mx *Measurer) Run(ctx context.Context, sess model.ExperimentSession, measurement *model.Measurement, callbacks model.ExperimentCallbacks) error { diff --git a/internal/measurex/th.go b/internal/engine/experiment/webstepsx/th.go similarity index 78% rename from internal/measurex/th.go rename to internal/engine/experiment/webstepsx/th.go index a58c0a1ad5..90858412e1 100644 --- a/internal/measurex/th.go +++ b/internal/engine/experiment/webstepsx/th.go @@ -1,4 +1,4 @@ -package measurex +package webstepsx // // TH (Test Helper) @@ -6,36 +6,6 @@ package measurex // This file contains an implementation of the // (proposed) websteps test helper spec. // -// Why is this code in this package? -// -// The measurex model allows you to define test -// helpers that run in the DNS lookup phase. This -// model is quite nice because it allows you to -// discover additional IP addresses for the domain -// you're testing. When your local resolver is -// censored, the TH is how we get extra IP addresses -// for the domain to test. -// -// The current TH code requires you to submit an -// HTTP or HTTPS URL. If we relax this constraint, -// we can have a more flexible test helper that -// may be useful also for other experiments. -// -// Here are some ideas: -// -// - `dnslookup://domain` lookups a domain according -// to the test helper's resolver; -// -// - `tlshandshake://endpoint` performs a domain -// lookup and then a TLS handshake; -// -// - `quichandshake://endpoint` likewise. -// -// To conclude, this code is here because its -// trajectory is that of making it the base -// building block for building several types -// of test helpers. -// import ( "bytes" @@ -48,6 +18,7 @@ import ( "net/url" "github.com/apex/log" + "github.com/ooni/probe-cli/v3/internal/measurex" "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" "github.com/ooni/probe-cli/v3/internal/netxlite/iox" @@ -62,7 +33,7 @@ import ( // THClientRequest is the request received by the test helper. type THClientRequest struct { // Endpoints is a list of endpoints to measure. - Endpoints []*Endpoint + Endpoints []*measurex.Endpoint // URL is the URL we want to measure. URL string @@ -73,15 +44,12 @@ type THClientRequest struct { // THServerResponse is the response from the test helper. type THServerResponse struct { - // URL is the URL this measurement refers to. - URL string `json:"url"` - // DNS contains all the DNS related measurements. - DNS []*DNSMeasurement `json:"dns"` + DNS []*measurex.DNSMeasurement `json:"dns"` // Endpoints contains a measurement for each endpoint // that was discovered by the probe or the TH. - Endpoints []*HTTPEndpointMeasurement `json:"endpoints"` + Endpoints []*measurex.HTTPEndpointMeasurement `json:"endpoints"` } // thMaxAcceptableBodySize is the maximum acceptable body size by TH code. @@ -96,11 +64,11 @@ const thMaxAcceptableBodySize = 1 << 20 type THClient struct { // DNSServers is the MANDATORY list of DNS-over-UDP // servers to use to discover endpoints locally. - DNServers []*ResolverInfo + DNServers []*measurex.ResolverInfo // HTTPClient is the MANDATORY HTTP client to // use for contacting the TH. - HTTPClient HTTPClient + HTTPClient measurex.HTTPClient // ServerURL is the MANDATORY URL of the TH HTTP endpoint. ServerURL string @@ -126,19 +94,19 @@ func (c *THClient) Run(ctx context.Context, URL string) (*THServerResponse, erro if err != nil { return nil, err } - mx := NewMeasurerWithDefaultSettings() - var dns []*DNSMeasurement + mx := measurex.NewMeasurerWithDefaultSettings() + var dns []*measurex.DNSMeasurement for m := range mx.LookupURLHostParallel(ctx, parsed, c.DNServers...) { dns = append(dns, m) } - endpoints, err := AllEndpointsForURL(parsed, dns...) + endpoints, err := measurex.AllEndpointsForURL(parsed, dns...) if err != nil { return nil, err } return (&THClientCall{ Endpoints: endpoints, HTTPClient: c.HTTPClient, - Header: NewHTTPRequestHeaderForMeasuring(), + Header: measurex.NewHTTPRequestHeaderForMeasuring(), THURL: c.ServerURL, TargetURL: URL, }).Call(ctx) @@ -146,17 +114,13 @@ func (c *THClient) Run(ctx context.Context, URL string) (*THServerResponse, erro // THClientCall allows to perform a single TH client call. Make sure // you fill all the fields marked as MANDATORY before use. -// -// This is a low-level API for calling the TH. If you are writing -// a CLI client, use THClient. If you are writing code for the -// Measurer, use THMeasurerClientCall. type THClientCall struct { // Endpoints contains the MANDATORY endpoints we discovered. - Endpoints []*Endpoint + Endpoints []*measurex.Endpoint // HTTPClient is the MANDATORY HTTP client to // use for contacting the TH. - HTTPClient HTTPClient + HTTPClient measurex.HTTPClient // Header contains the MANDATORY request headers. Header http.Header @@ -196,10 +160,10 @@ func (c *THClientCall) httpClientDo(req *http.Request) (*THServerResponse, error if err != nil { return nil, err } + defer resp.Body.Close() if resp.StatusCode != 200 { // THHandler returns either 400 or 200 return nil, errTHRequestFailed } - defer resp.Body.Close() r := io.LimitReader(resp.Body, thMaxAcceptableBodySize) respBody, err := iox.ReadAllContext(req.Context(), r) if err != nil { @@ -289,38 +253,38 @@ func (h *THHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { // The return value is either a THServerResponse or an error. func (h *THHandler) singleStep( ctx context.Context, req *THClientRequest) (*THServerResponse, error) { - mx := NewMeasurerWithDefaultSettings() + mx := measurex.NewMeasurerWithDefaultSettings() mx.MeasureURLHelper = &thMeasureURLHelper{req.Endpoints} - mx.Resolvers = []*ResolverInfo{{ - Network: ResolverForeign, + mx.Resolvers = []*measurex.ResolverInfo{{ + Network: measurex.ResolverForeign, ForeignResolver: thResolver, }} - jar := NewCookieJar() + jar := measurex.NewCookieJar() meas, err := mx.MeasureURL(ctx, req.URL, req.HTTPRequestHeaders, jar) if err != nil { return nil, err } return &THServerResponse{ - URL: req.URL, DNS: meas.DNS, Endpoints: h.simplifyEndpoints(meas.Endpoints), }, nil } func (h *THHandler) simplifyEndpoints( - in []*HTTPEndpointMeasurement) (out []*HTTPEndpointMeasurement) { + in []*measurex.HTTPEndpointMeasurement) (out []*measurex.HTTPEndpointMeasurement) { for _, epnt := range in { - out = append(out, &HTTPEndpointMeasurement{ + out = append(out, &measurex.HTTPEndpointMeasurement{ URL: epnt.URL, - Endpoint: epnt.Endpoint, + Network: epnt.Network, + Address: epnt.Address, Measurement: h.simplifyMeasurement(epnt.Measurement), }) } return } -func (h *THHandler) simplifyMeasurement(in *Measurement) (out *Measurement) { - out = &Measurement{ +func (h *THHandler) simplifyMeasurement(in *measurex.Measurement) (out *measurex.Measurement) { + out = &measurex.Measurement{ Connect: in.Connect, TLSHandshake: h.simplifyHandshake(in.TLSHandshake), QUICHandshake: h.simplifyHandshake(in.QUICHandshake), @@ -331,9 +295,10 @@ func (h *THHandler) simplifyMeasurement(in *Measurement) (out *Measurement) { return } -func (h *THHandler) simplifyHandshake(in []*TLSHandshakeEvent) (out []*TLSHandshakeEvent) { +func (h *THHandler) simplifyHandshake( + in []*measurex.TLSHandshakeEvent) (out []*measurex.TLSHandshakeEvent) { for _, ev := range in { - out = append(out, &TLSHandshakeEvent{ + out = append(out, &measurex.TLSHandshakeEvent{ CipherSuite: ev.CipherSuite, Failure: ev.Failure, NegotiatedProto: ev.NegotiatedProto, @@ -352,9 +317,10 @@ func (h *THHandler) simplifyHandshake(in []*TLSHandshakeEvent) (out []*TLSHandsh return } -func (h *THHandler) simplifyHTTPRoundTrip(in []*HTTPRoundTripEvent) (out []*HTTPRoundTripEvent) { +func (h *THHandler) simplifyHTTPRoundTrip( + in []*measurex.HTTPRoundTripEvent) (out []*measurex.HTTPRoundTripEvent) { for _, ev := range in { - out = append(out, &HTTPRoundTripEvent{ + out = append(out, &measurex.HTTPRoundTripEvent{ Failure: ev.Failure, Request: ev.Request, Response: h.simplifyHTTPResponse(ev.Response), @@ -366,9 +332,10 @@ func (h *THHandler) simplifyHTTPRoundTrip(in []*HTTPRoundTripEvent) (out []*HTTP return } -func (h *THHandler) simplifyHTTPResponse(in *HTTPResponse) (out *HTTPResponse) { +func (h *THHandler) simplifyHTTPResponse( + in *measurex.HTTPResponse) (out *measurex.HTTPResponse) { if in != nil { - out = &HTTPResponse{ + out = &measurex.HTTPResponse{ Code: in.Code, Headers: in.Headers, Body: nil, @@ -381,19 +348,19 @@ func (h *THHandler) simplifyHTTPResponse(in *HTTPResponse) (out *HTTPResponse) { } type thMeasureURLHelper struct { - epnts []*Endpoint + epnts []*measurex.Endpoint } func (thh *thMeasureURLHelper) LookupExtraHTTPEndpoints( ctx context.Context, URL *url.URL, headers http.Header, - serverEpnts ...*HTTPEndpoint) (epnts []*HTTPEndpoint, err error) { + serverEpnts ...*measurex.HTTPEndpoint) (epnts []*measurex.HTTPEndpoint, err error) { for _, epnt := range thh.epnts { - epnts = append(epnts, &HTTPEndpoint{ + epnts = append(epnts, &measurex.HTTPEndpoint{ Domain: URL.Hostname(), Network: epnt.Network, Address: epnt.Address, SNI: URL.Hostname(), - ALPN: alpnForHTTPEndpoint(epnt.Network), + ALPN: measurex.ALPNForHTTPEndpoint(epnt.Network), URL: URL, Header: headers, // but overriden later anyway }) diff --git a/internal/measurex/http.go b/internal/measurex/http.go index 9229f25f59..f4f70c1c98 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -228,9 +228,6 @@ func newHTTPClient(db WritableDB, cookiejar http.CookieJar, if len(via) >= 10 { err = ErrHTTPTooManyRedirects } - if err != nil { - err = errorsx.NewTopLevelGenericErrWrapper(err) - } db.InsertIntoHTTPRedirect(&HTTPRedirectEvent{ URL: via[0].URL, // bug in Go stdlib if we crash here Location: req.URL, diff --git a/internal/measurex/logger.go b/internal/measurex/logger.go index 112980c12d..12a2488e9c 100644 --- a/internal/measurex/logger.go +++ b/internal/measurex/logger.go @@ -26,10 +26,10 @@ type Logger interface { Warnf(format string, v ...interface{}) } -// newOperationLogger creates a new logger that logs +// NewOperationLogger creates a new logger that logs // about an in-progress operation. -func newOperationLogger(logger Logger, format string, v ...interface{}) *operationLogger { - ol := &operationLogger{ +func NewOperationLogger(logger Logger, format string, v ...interface{}) *OperationLogger { + ol := &OperationLogger{ sighup: make(chan interface{}), logger: logger, once: &sync.Once{}, @@ -41,8 +41,8 @@ func newOperationLogger(logger Logger, format string, v ...interface{}) *operati return ol } -// operationLogger logs about an in-progress operation -type operationLogger struct { +// OperationLogger logs about an in-progress operation +type OperationLogger struct { logger Logger message string once *sync.Once @@ -50,7 +50,7 @@ type operationLogger struct { wg *sync.WaitGroup } -func (ol *operationLogger) logloop() { +func (ol *OperationLogger) logloop() { defer ol.wg.Done() timer := time.NewTimer(500 * time.Millisecond) defer timer.Stop() @@ -62,7 +62,7 @@ func (ol *operationLogger) logloop() { } } -func (ol *operationLogger) Stop(err error) { +func (ol *OperationLogger) Stop(err error) { ol.once.Do(func() { close(ol.sighup) ol.wg.Wait() diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go index 0c230582e8..be106c2049 100644 --- a/internal/measurex/measurement.go +++ b/internal/measurex/measurement.go @@ -190,7 +190,7 @@ func (m *DNSMeasurement) allHTTPEndpointsForURL( Network: epnt.Network, Address: epnt.Address, SNI: domain, - ALPN: alpnForHTTPEndpoint(epnt.Network), + ALPN: ALPNForHTTPEndpoint(epnt.Network), URL: URL, Header: headers, }) @@ -235,8 +235,11 @@ func AllHTTPEndpointsForURL(URL *url.URL, // EndpointMeasurement is an endpoint measurement. type EndpointMeasurement struct { - // Endpoint is the endpoint this measurement refers to. - Endpoint string `json:"endpoint"` + // Network is the network of this endpoint. + Network EndpointNetwork `json:"network"` + + // Address is the address of this endpoint. + Address string `json:"address"` // An EndpointMeasurement is a Measurement. *Measurement @@ -247,8 +250,11 @@ type HTTPEndpointMeasurement struct { // URL is the URL this measurement refers to. URL string `json:"url"` - // Endpoint is the endpoint this measurement refers to. - Endpoint string `json:"endpoint"` + // Network is the network of this endpoint. + Network EndpointNetwork `json:"network"` + + // Address is the address of this endpoint. + Address string `json:"address"` // An HTTPEndpointMeasurement is a Measurement. *Measurement diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index aca092d260..f9ee672a14 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -70,7 +70,7 @@ func NewMeasurerWithDefaultSettings() *Measurer { // LookupHostSystem performs a LookupHost using the system resolver. func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *DNSMeasurement { const timeout = 4 * time.Second - ol := newOperationLogger(mx.Logger, "LookupHost %s with getaddrinfo", domain) + ol := NewOperationLogger(mx.Logger, "LookupHost %s with getaddrinfo", domain) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() db := &MeasurementDB{} @@ -88,7 +88,7 @@ func (mx *Measurer) LookupHostSystem(ctx context.Context, domain string) *DNSMea func (mx *Measurer) lookupHostForeign( ctx context.Context, domain string, r Resolver) *DNSMeasurement { const timeout = 4 * time.Second - ol := newOperationLogger(mx.Logger, "LookupHost %s with %s", domain, r.Network()) + ol := NewOperationLogger(mx.Logger, "LookupHost %s with %s", domain, r.Network()) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() db := &MeasurementDB{} @@ -114,7 +114,7 @@ func (mx *Measurer) lookupHostForeign( func (mx *Measurer) LookupHostUDP( ctx context.Context, domain, address string) *DNSMeasurement { const timeout = 4 * time.Second - ol := newOperationLogger(mx.Logger, "LookupHost %s with %s/udp", domain, address) + ol := NewOperationLogger(mx.Logger, "LookupHost %s with %s/udp", domain, address) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() db := &MeasurementDB{} @@ -142,7 +142,7 @@ func (mx *Measurer) LookupHostUDP( func (mx *Measurer) LookupHTTPSSvcUDP( ctx context.Context, domain, address string) *DNSMeasurement { const timeout = 4 * time.Second - ol := newOperationLogger(mx.Logger, "LookupHTTPSvc %s with %s/udp", domain, address) + ol := NewOperationLogger(mx.Logger, "LookupHTTPSvc %s with %s/udp", domain, address) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() db := &MeasurementDB{} @@ -161,7 +161,7 @@ func (mx *Measurer) LookupHTTPSSvcUDP( func (mx *Measurer) lookupHTTPSSvcUDPForeign( ctx context.Context, domain string, r Resolver) *DNSMeasurement { const timeout = 4 * time.Second - ol := newOperationLogger(mx.Logger, "LookupHTTPSvc %s with %s", domain, r.Address()) + ol := NewOperationLogger(mx.Logger, "LookupHTTPSvc %s with %s", domain, r.Address()) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() db := &MeasurementDB{} @@ -190,10 +190,8 @@ func (mx *Measurer) TCPConnect(ctx context.Context, address string) *EndpointMea conn.Close() } return &EndpointMeasurement{ - Endpoint: (&Endpoint{ - Network: NetworkTCP, - Address: address, - }).String(), + Network: NetworkTCP, + Address: address, Measurement: measurement, } } @@ -201,7 +199,7 @@ func (mx *Measurer) TCPConnect(ctx context.Context, address string) *EndpointMea // tcpConnect is like TCPConnect but does not create a new measurement. func (mx *Measurer) tcpConnect(ctx context.Context, db WritableDB, address string) (Conn, error) { const timeout = 10 * time.Second - ol := newOperationLogger(mx.Logger, "TCPConnect %s", address) + ol := NewOperationLogger(mx.Logger, "TCPConnect %s", address) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() d := mx.NewDialerWithoutResolver(db, mx.Logger) @@ -250,10 +248,8 @@ func (mx *Measurer) TLSConnectAndHandshake(ctx context.Context, conn.Close() } return &EndpointMeasurement{ - Endpoint: (&Endpoint{ - Network: NetworkTCP, - Address: address, - }).String(), + Network: NetworkTCP, + Address: address, Measurement: measurement, } } @@ -267,15 +263,18 @@ func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, return nil, err } const timeout = 10 * time.Second - ol := newOperationLogger(mx.Logger, + ol := NewOperationLogger(mx.Logger, "TLSHandshake %s with sni=%s", address, config.ServerName) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() th := mx.WrapTLSHandshaker(db, mx.TLSHandshaker) tlsConn, _, err := th.Handshake(ctx, conn, config) ol.Stop(err) + if err != nil { + return nil, err + } // cast safe according to the docs of netxlite's handshaker - return tlsConn.(netxlite.TLSConn), err + return tlsConn.(netxlite.TLSConn), nil } // QUICHandshake connects and TLS handshakes with a QUIC endpoint. @@ -309,10 +308,8 @@ func (mx *Measurer) QUICHandshake(ctx context.Context, address string, sess.CloseWithError(0, "") } return &EndpointMeasurement{ - Endpoint: (&Endpoint{ - Network: NetworkQUIC, - Address: address, - }).String(), + Network: NetworkQUIC, + Address: address, Measurement: measurement, } } @@ -321,7 +318,7 @@ func (mx *Measurer) QUICHandshake(ctx context.Context, address string, func (mx *Measurer) quicHandshake(ctx context.Context, db WritableDB, address string, config *tls.Config) (quic.EarlySession, error) { const timeout = 10 * time.Second - ol := newOperationLogger(mx.Logger, + ol := NewOperationLogger(mx.Logger, "QUICHandshake %s with sni=%s", address, config.ServerName) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -406,11 +403,9 @@ func (mx *Measurer) httpEndpointGet(ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, *HTTPEndpointMeasurement, error) { resp, m, err := mx.httpEndpointGetMeasurement(ctx, epnt, jar) out := &HTTPEndpointMeasurement{ - URL: epnt.URL.String(), - Endpoint: (&Endpoint{ - Network: epnt.Network, - Address: epnt.Address, - }).String(), + URL: epnt.URL.String(), + Network: epnt.Network, + Address: epnt.Address, Measurement: m, } return resp, out, err @@ -523,7 +518,7 @@ func (mx *Measurer) httpEndpointGetQUIC(ctx context.Context, func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, epnt *HTTPEndpoint, req *http.Request) (*http.Response, error) { const timeout = 15 * time.Second - ol := newOperationLogger(mx.Logger, + ol := NewOperationLogger(mx.Logger, "%s %s with %s/%s", req.Method, req.URL.String(), epnt.Address, epnt.Network) ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() diff --git a/internal/measurex/utils.go b/internal/measurex/utils.go index a47d2e53fb..29e5d48601 100644 --- a/internal/measurex/utils.go +++ b/internal/measurex/utils.go @@ -12,9 +12,9 @@ import ( // This is where we put free functions. // -// alpnForHTTPEndpoint selects the correct ALPN for an HTTP endpoint +// ALPNForHTTPEndpoint selects the correct ALPN for an HTTP endpoint // given the network. On failure, we return a nil list. -func alpnForHTTPEndpoint(network EndpointNetwork) []string { +func ALPNForHTTPEndpoint(network EndpointNetwork) []string { switch network { case NetworkQUIC: return []string{"h3"} @@ -75,3 +75,14 @@ func removeDuplicateHTTPEndpoints(epnts ...*HTTPEndpoint) (out []*HTTPEndpoint) } return } + +// HTTPEndpointsToEndpoints convers HTTPEndpoints to Endpoints +func HTTPEndpointsToEndpoints(in []*HTTPEndpoint) (out []*Endpoint) { + for _, epnt := range in { + out = append(out, &Endpoint{ + Network: epnt.Network, + Address: epnt.Address, + }) + } + return +} From a2c562844bdb69ac7446f5ddf631d51ba622f3f7 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 23 Sep 2021 13:51:35 +0200 Subject: [PATCH 39/53] some readability fixes --- internal/measurex/archival.go | 25 ++--------- internal/measurex/db.go | 85 +++++++++++++++++------------------ 2 files changed, 46 insertions(+), 64 deletions(-) diff --git a/internal/measurex/archival.go b/internal/measurex/archival.go index 515c724c2f..1fe81368ea 100644 --- a/internal/measurex/archival.go +++ b/internal/measurex/archival.go @@ -8,14 +8,7 @@ import ( // // Archival // -// This file defines helpers to serialize to the OONI data format. Some of -// our data structure are already pretty close to the desired format, while -// other are more flat, which makes processing simpler. So, when we need -// help we use routines from this file to serialize correctly. -// - -// -// BinaryData +// This file defines helpers to serialize to the OONI data format. // // ArchivalBinaryData is the archival format for binary data. @@ -36,10 +29,6 @@ func NewArchivalBinaryData(data []byte) (out *ArchivalBinaryData) { return } -// -// HTTPRoundTrip -// - // ArchivalHeaders is a list of HTTP headers. type ArchivalHeaders map[string]string @@ -65,10 +54,6 @@ func NewArchivalHeaders(in http.Header) (out ArchivalHeaders) { return } -// -// TLSCerts -// - // NewArchivalTLSCertList builds a new []ArchivalBinaryData // from a list of raw x509 certificates data. func NewArchivalTLSCerts(in [][]byte) (out []*ArchivalBinaryData) { @@ -81,11 +66,9 @@ func NewArchivalTLSCerts(in [][]byte) (out []*ArchivalBinaryData) { return } -// -// Failure -// - -// NewArchivalFailure creates an archival failure from an error. +// NewArchivalFailure creates an archival failure from an error. We +// cannot round trip an error using JSON, so we serialize to this +// intermediate format that is a sort of Optional. func NewArchivalFailure(err error) *string { if err == nil { return nil diff --git a/internal/measurex/db.go b/internal/measurex/db.go index 6cc0bbd9a0..03f84d30dd 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -5,19 +5,18 @@ package measurex // // This file defines two types: // -// - WritableDB is the interface for storing events that -// we pass to the networking code +// - WritableDB is the interface allowing networking code +// (e.g., Dialer to save measurement events); // -// - MeasurementDB is a concrete database in which network -// code stores events and from which you can create a -// measurement with all the collected events +// - MeasurementDB implements WritableDB and allows high-level +// code to generate a Measurement from all the events. // -import ( - "sync" -) +import "sync" -// WritableDB is a measurement database in which you can write. +// WritableDB is an events "database" in which networking code +// (e.g., Dialer) can save measurement events (e.g., the result +// of a connect, a TLS handshake, a read). type WritableDB interface { // InsertIntoDial saves a Dial event. InsertIntoDial(ev *NetworkEvent) @@ -50,9 +49,10 @@ type WritableDB interface { InsertIntoQUICHandshake(ev *QUICHandshakeEvent) } -// MeasurementDB is a database for assembling a measurement. +// MeasurementDB is a WritableDB that also allows high-level code +// to generate a Measurement from all the saved events. type MeasurementDB struct { - // database tables + // database "tables" dialTable []*NetworkEvent readWriteTable []*NetworkEvent closeTable []*NetworkEvent @@ -77,8 +77,8 @@ func (db *MeasurementDB) InsertIntoDial(ev *NetworkEvent) { db.mu.Unlock() } -// selectAllFromDial returns all dial events. -func (db *MeasurementDB) selectAllFromDial() (out []*NetworkEvent) { +// selectAllFromDialUnlocked returns all dial events. +func (db *MeasurementDB) selectAllFromDialUnlocked() (out []*NetworkEvent) { out = append(out, db.dialTable...) return } @@ -90,8 +90,8 @@ func (db *MeasurementDB) InsertIntoReadWrite(ev *NetworkEvent) { db.mu.Unlock() } -// selectAllFromReadWrite returns all I/O events. -func (db *MeasurementDB) selectAllFromReadWrite() (out []*NetworkEvent) { +// selectAllFromReadWriteUnlocked returns all I/O events. +func (db *MeasurementDB) selectAllFromReadWriteUnlocked() (out []*NetworkEvent) { out = append(out, db.readWriteTable...) return } @@ -103,8 +103,8 @@ func (db *MeasurementDB) InsertIntoClose(ev *NetworkEvent) { db.mu.Unlock() } -// selectAllFromClose returns all close events. -func (db *MeasurementDB) selectAllFromClose() (out []*NetworkEvent) { +// selectAllFromCloseUnlocked returns all close events. +func (db *MeasurementDB) selectAllFromCloseUnlocked() (out []*NetworkEvent) { out = append(out, db.closeTable...) return } @@ -116,8 +116,8 @@ func (db *MeasurementDB) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) { db.mu.Unlock() } -// selectAllFromTLSHandshake returns all TLS handshake events. -func (db *MeasurementDB) selectAllFromTLSHandshake() (out []*TLSHandshakeEvent) { +// selectAllFromTLSHandshakeUnlocked returns all TLS handshake events. +func (db *MeasurementDB) selectAllFromTLSHandshakeUnlocked() (out []*TLSHandshakeEvent) { out = append(out, db.tlsHandshakeTable...) return } @@ -129,8 +129,8 @@ func (db *MeasurementDB) InsertIntoLookupHost(ev *DNSLookupEvent) { db.mu.Unlock() } -// selectAllFromLookupHost returns all the lookup host events. -func (db *MeasurementDB) selectAllFromLookupHost() (out []*DNSLookupEvent) { +// selectAllFromLookupHostUnlocked returns all the lookup host events. +func (db *MeasurementDB) selectAllFromLookupHostUnlocked() (out []*DNSLookupEvent) { out = append(out, db.lookupHostTable...) return } @@ -142,8 +142,8 @@ func (db *MeasurementDB) InsertIntoLookupHTTPSSvc(ev *DNSLookupEvent) { db.mu.Unlock() } -// selectAllFromLookupHTTPSSvc returns all HTTPSSvc lookup events. -func (db *MeasurementDB) selectAllFromLookupHTTPSSvc() (out []*DNSLookupEvent) { +// selectAllFromLookupHTTPSSvcUnlocked returns all HTTPSSvc lookup events. +func (db *MeasurementDB) selectAllFromLookupHTTPSSvcUnlocked() (out []*DNSLookupEvent) { out = append(out, db.lookupHTTPSvcTable...) return } @@ -155,8 +155,8 @@ func (db *MeasurementDB) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) { db.mu.Unlock() } -// selectAllFromDNSRoundTrip returns all DNS round trip events. -func (db *MeasurementDB) selectAllFromDNSRoundTrip() (out []*DNSRoundTripEvent) { +// selectAllFromDNSRoundTripUnlocked returns all DNS round trip events. +func (db *MeasurementDB) selectAllFromDNSRoundTripUnlocked() (out []*DNSRoundTripEvent) { out = append(out, db.dnsRoundTripTable...) return } @@ -168,8 +168,8 @@ func (db *MeasurementDB) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) { db.mu.Unlock() } -// selectAllFromHTTPRoundTrip returns all HTTP round trip events. -func (db *MeasurementDB) selectAllFromHTTPRoundTrip() (out []*HTTPRoundTripEvent) { +// selectAllFromHTTPRoundTripUnlocked returns all HTTP round trip events. +func (db *MeasurementDB) selectAllFromHTTPRoundTripUnlocked() (out []*HTTPRoundTripEvent) { out = append(out, db.httpRoundTripTable...) return } @@ -181,8 +181,8 @@ func (db *MeasurementDB) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) { db.mu.Unlock() } -// selectAllFromHTTPRedirect returns all HTTP redirections. -func (db *MeasurementDB) selectAllFromHTTPRedirect() (out []*HTTPRedirectEvent) { +// selectAllFromHTTPRedirectUnlocked returns all HTTP redirections. +func (db *MeasurementDB) selectAllFromHTTPRedirectUnlocked() (out []*HTTPRedirectEvent) { out = append(out, db.httpRedirectTable...) return } @@ -194,8 +194,8 @@ func (db *MeasurementDB) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) { db.mu.Unlock() } -// selectAllFromQUICHandshake returns all QUIC handshake events. -func (db *MeasurementDB) selectAllFromQUICHandshake() (out []*QUICHandshakeEvent) { +// selectAllFromQUICHandshakeUnlocked returns all QUIC handshake events. +func (db *MeasurementDB) selectAllFromQUICHandshakeUnlocked() (out []*QUICHandshakeEvent) { out = append(out, db.quicHandshakeTable...) return } @@ -203,21 +203,20 @@ func (db *MeasurementDB) selectAllFromQUICHandshake() (out []*QUICHandshakeEvent // AsMeasurement converts the current state of the database into // a finalized Measurement structure. The original events will remain // into the database. To start a new measurement cycle, just create -// a new MeasurementDB instance. You are not supposed to modify -// the Measurement returned by this method. +// a new MeasurementDB instance and use that. func (db *MeasurementDB) AsMeasurement() *Measurement { db.mu.Lock() meas := &Measurement{ - Connect: db.selectAllFromDial(), - ReadWrite: db.selectAllFromReadWrite(), - Close: db.selectAllFromClose(), - TLSHandshake: db.selectAllFromTLSHandshake(), - QUICHandshake: db.selectAllFromQUICHandshake(), - LookupHost: db.selectAllFromLookupHost(), - LookupHTTPSSvc: db.selectAllFromLookupHTTPSSvc(), - DNSRoundTrip: db.selectAllFromDNSRoundTrip(), - HTTPRoundTrip: db.selectAllFromHTTPRoundTrip(), - HTTPRedirect: db.selectAllFromHTTPRedirect(), + Connect: db.selectAllFromDialUnlocked(), + ReadWrite: db.selectAllFromReadWriteUnlocked(), + Close: db.selectAllFromCloseUnlocked(), + TLSHandshake: db.selectAllFromTLSHandshakeUnlocked(), + QUICHandshake: db.selectAllFromQUICHandshakeUnlocked(), + LookupHost: db.selectAllFromLookupHostUnlocked(), + LookupHTTPSSvc: db.selectAllFromLookupHTTPSSvcUnlocked(), + DNSRoundTrip: db.selectAllFromDNSRoundTripUnlocked(), + HTTPRoundTrip: db.selectAllFromHTTPRoundTripUnlocked(), + HTTPRedirect: db.selectAllFromHTTPRedirectUnlocked(), } db.mu.Unlock() return meas From 05e38f4914b297e8456d96d860ece53a5fd42cc5 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 23 Sep 2021 20:59:30 +0200 Subject: [PATCH 40/53] continue improving docs --- internal/measurex/dialer.go | 7 +- internal/measurex/http.go | 72 ++- internal/measurex/measurer.go | 49 +- internal/measurex/resolver.go | 7 +- internal/measurex/tracing.go | 81 +++ internal/netxlite/dialer.go | 6 +- internal/tutorial/generator/main.go | 19 + .../tutorial/measurex/chapter01/README.md | 366 +++++++++++ internal/tutorial/measurex/chapter01/main.go | 343 +++++++++++ .../tutorial/measurex/chapter02/README.md | 224 +++++++ internal/tutorial/measurex/chapter02/main.go | 201 +++++++ .../tutorial/measurex/chapter03/README.md | 567 ++++++++++++++++++ internal/tutorial/measurex/chapter03/main.go | 543 +++++++++++++++++ .../tutorial/measurex/chapter04/README.md | 232 +++++++ internal/tutorial/measurex/chapter04/main.go | 216 +++++++ .../tutorial/measurex/chapter05/README.md | 0 .../tutorial/measurex/chapter06/README.md | 0 .../tutorial/measurex/chapter07/README.md | 0 internal/tutorial/measurex/chapter07/main.go | 7 +- .../tutorial/measurex/chapter08/README.md | 0 internal/tutorial/measurex/chapter08/main.go | 7 +- .../tutorial/measurex/chapter09/README.md | 0 internal/tutorial/measurex/chapter09/main.go | 7 +- .../tutorial/measurex/chapter10/README.md | 0 internal/tutorial/measurex/chapter10/main.go | 21 +- .../tutorial/measurex/chapter11/README.md | 0 internal/tutorial/measurex/chapter11/main.go | 26 +- .../tutorial/measurex/chapter12/README.md | 0 internal/tutorial/measurex/chapter12/main.go | 14 +- .../tutorial/measurex/chapter13/README.md | 0 internal/tutorial/measurex/chapter13/main.go | 53 +- 31 files changed, 2976 insertions(+), 92 deletions(-) create mode 100644 internal/measurex/tracing.go create mode 100644 internal/tutorial/measurex/chapter01/README.md create mode 100644 internal/tutorial/measurex/chapter02/README.md create mode 100644 internal/tutorial/measurex/chapter03/README.md create mode 100644 internal/tutorial/measurex/chapter04/README.md create mode 100644 internal/tutorial/measurex/chapter05/README.md create mode 100644 internal/tutorial/measurex/chapter06/README.md create mode 100644 internal/tutorial/measurex/chapter07/README.md create mode 100644 internal/tutorial/measurex/chapter08/README.md create mode 100644 internal/tutorial/measurex/chapter09/README.md create mode 100644 internal/tutorial/measurex/chapter10/README.md create mode 100644 internal/tutorial/measurex/chapter11/README.md create mode 100644 internal/tutorial/measurex/chapter12/README.md create mode 100644 internal/tutorial/measurex/chapter13/README.md diff --git a/internal/measurex/dialer.go b/internal/measurex/dialer.go index d6d1d3a879..9f2fad1095 100644 --- a/internal/measurex/dialer.go +++ b/internal/measurex/dialer.go @@ -25,7 +25,12 @@ type Dialer = netxlite.Dialer // into the given WritableDB. The net.Conns created by // a wrapped dialer also write into the WritableDB. func (mx *Measurer) WrapDialer(db WritableDB, dialer netxlite.Dialer) Dialer { - return &dialerDB{Dialer: dialer, db: db, begin: mx.Begin} + return WrapDialer(mx.Begin, db, dialer) +} + +// WrapDialer wraps a dialer. +func WrapDialer(begin time.Time, db WritableDB, dialer netxlite.Dialer) Dialer { + return &dialerDB{Dialer: dialer, db: db, begin: begin} } // NewDialerWithSystemResolver creates a diff --git a/internal/measurex/http.go b/internal/measurex/http.go index f4f70c1c98..6ab414b44e 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -39,13 +39,30 @@ type HTTPTransport = netxlite.HTTPTransport // WrapHTTPTransport creates a new transport that saves // HTTP events into the WritableDB. -func (mx *Measurer) WrapHTTPTransport(db WritableDB, txp HTTPTransport) HTTPTransport { - return &httpTransportDB{HTTPTransport: txp, db: db, begin: mx.Begin} +func (mx *Measurer) WrapHTTPTransport( + db WritableDB, txp HTTPTransport) *HTTPTransportDB { + return WrapHTTPTransport(mx.Begin, db, txp) +} + +// We only read a small snapshot of the body to keep measurements +// lean, since we're mostly interested in TLS interference nowadays +// but we'll also allow for reading more bytes from the conn. +const httpMaxBodySnapshot = 1 << 11 + +func WrapHTTPTransport( + begin time.Time, db WritableDB, txp HTTPTransport) *HTTPTransportDB { + return &HTTPTransportDB{ + HTTPTransport: txp, + Begin: begin, + DB: db, + MaxBodySnapshotSize: httpMaxBodySnapshot, + } } // NewHTTPTransportWithConn creates and wraps an HTTPTransport that // does not dial and only uses the given conn. -func (mx *Measurer) NewHTTPTransportWithConn(logger Logger, db WritableDB, conn Conn) HTTPTransport { +func (mx *Measurer) NewHTTPTransportWithConn( + logger Logger, db WritableDB, conn Conn) *HTTPTransportDB { return mx.WrapHTTPTransport(db, netxlite.NewHTTPTransport( logger, netxlite.NewSingleUseDialer(conn), netxlite.NewNullTLSDialer())) } @@ -53,7 +70,7 @@ func (mx *Measurer) NewHTTPTransportWithConn(logger Logger, db WritableDB, conn // NewHTTPTransportWithTLSConn creates and wraps an HTTPTransport that // does not dial and only uses the given conn. func (mx *Measurer) NewHTTPTransportWithTLSConn( - logger Logger, db WritableDB, conn netxlite.TLSConn) HTTPTransport { + logger Logger, db WritableDB, conn netxlite.TLSConn) *HTTPTransportDB { return mx.WrapHTTPTransport(db, netxlite.NewHTTPTransport( logger, netxlite.NewNullDialer(), netxlite.NewSingleUseTLSDialer(conn))) } @@ -61,15 +78,29 @@ func (mx *Measurer) NewHTTPTransportWithTLSConn( // NewHTTPTransportWithQUICSess creates and wraps an HTTPTransport that // does not dial and only uses the given QUIC session. func (mx *Measurer) NewHTTPTransportWithQUICSess( - logger Logger, db WritableDB, sess quic.EarlySession) HTTPTransport { + logger Logger, db WritableDB, sess quic.EarlySession) *HTTPTransportDB { return mx.WrapHTTPTransport(db, netxlite.NewHTTP3Transport( logger, netxlite.NewSingleUseQUICDialer(sess), &tls.Config{})) } -type httpTransportDB struct { +// HTTPTransportDB is an implementation of HTTPTransport that +// writes measurement events into a WritableDB. +// +// There are many factories to construct this data type. Otherwise, +// you can construct it manually. In which case, do not modify +// public fields during usage, since this may cause a data race. +type HTTPTransportDB struct { netxlite.HTTPTransport - begin time.Time - db WritableDB + + // Begin is when we started measuring. + Begin time.Time + + // DB is where to write events. + DB WritableDB + + // MaxBodySnapshotSize is the maximum size of the body + // snapshot that we take during a round trip. + MaxBodySnapshotSize int64 } // HTTPRequest is the HTTP request. @@ -106,13 +137,8 @@ type HTTPRoundTripEvent struct { Oddity Oddity `json:"oddity"` } -// We only read a small snapshot of the body to keep measurements -// lean, since we're mostly interested in TLS interference nowadays -// but we'll also allow for reading more bytes from the conn. -const maxBodySnapshot = 1 << 11 - -func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) { - started := time.Since(txp.begin).Seconds() +func (txp *HTTPTransportDB) RoundTrip(req *http.Request) (*http.Response, error) { + started := time.Since(txp.Begin).Seconds() resp, err := txp.HTTPTransport.RoundTrip(req) rt := &HTTPRoundTripEvent{ Request: &HTTPRequest{ @@ -123,9 +149,9 @@ func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) Started: started, } if err != nil { - rt.Finished = time.Since(txp.begin).Seconds() + rt.Finished = time.Since(txp.Begin).Seconds() rt.Failure = NewArchivalFailure(err) - txp.db.InsertIntoHTTPRoundTrip(rt) + txp.DB.InsertIntoHTTPRoundTrip(rt) return nil, err } switch { @@ -142,15 +168,15 @@ func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) Code: int64(resp.StatusCode), Headers: NewArchivalHeaders(resp.Header), } - r := io.LimitReader(resp.Body, maxBodySnapshot) + r := io.LimitReader(resp.Body, txp.MaxBodySnapshotSize) body, err := iox.ReadAllContext(req.Context(), r) if errors.Is(err, io.EOF) && resp.Close { err = nil // we expected to see an EOF here, so no real error } if err != nil { - rt.Finished = time.Since(txp.begin).Seconds() + rt.Finished = time.Since(txp.Begin).Seconds() rt.Failure = NewArchivalFailure(err) - txp.db.InsertIntoHTTPRoundTrip(rt) + txp.DB.InsertIntoHTTPRoundTrip(rt) return nil, err } resp.Body = &httpTransportBody{ // allow for reading more if needed @@ -159,10 +185,10 @@ func (txp *httpTransportDB) RoundTrip(req *http.Request) (*http.Response, error) } rt.Response.Body = NewArchivalBinaryData(body) rt.Response.BodyLength = int64(len(body)) - rt.Response.BodyIsTruncated = len(body) >= maxBodySnapshot + rt.Response.BodyIsTruncated = int64(len(body)) >= txp.MaxBodySnapshotSize rt.Response.BodyIsUTF8 = utf8.Valid(body) - rt.Finished = time.Since(txp.begin).Seconds() - txp.db.InsertIntoHTTPRoundTrip(rt) + rt.Finished = time.Since(txp.Begin).Seconds() + txp.DB.InsertIntoHTTPRoundTrip(rt) return resp, nil } diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index f9ee672a14..ebc2bbe6b7 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -184,7 +184,7 @@ func (mx *Measurer) lookupHTTPSSvcUDPForeign( // Returns an EndpointMeasurement. func (mx *Measurer) TCPConnect(ctx context.Context, address string) *EndpointMeasurement { db := &MeasurementDB{} - conn, _ := mx.tcpConnect(ctx, db, address) + conn, _ := mx.TCPConnectWithDB(ctx, db, address) measurement := db.AsMeasurement() if conn != nil { conn.Close() @@ -196,8 +196,9 @@ func (mx *Measurer) TCPConnect(ctx context.Context, address string) *EndpointMea } } -// tcpConnect is like TCPConnect but does not create a new measurement. -func (mx *Measurer) tcpConnect(ctx context.Context, db WritableDB, address string) (Conn, error) { +// TCPConnectWithDB is like TCPConnect but does not create a new measurement, +// rather it just stores the events inside of the given DB. +func (mx *Measurer) TCPConnectWithDB(ctx context.Context, db WritableDB, address string) (Conn, error) { const timeout = 10 * time.Second ol := NewOperationLogger(mx.Logger, "TCPConnect %s", address) ctx, cancel := context.WithTimeout(ctx, timeout) @@ -242,7 +243,7 @@ func (mx *Measurer) tcpConnect(ctx context.Context, db WritableDB, address strin func (mx *Measurer) TLSConnectAndHandshake(ctx context.Context, address string, config *tls.Config) *EndpointMeasurement { db := &MeasurementDB{} - conn, _ := mx.tlsConnectAndHandshake(ctx, db, address, config) + conn, _ := mx.TLSConnectAndHandshakeWithDB(ctx, db, address, config) measurement := db.AsMeasurement() if conn != nil { conn.Close() @@ -254,11 +255,11 @@ func (mx *Measurer) TLSConnectAndHandshake(ctx context.Context, } } -// tlsConnectAndHandshake is like TLSConnectAndHandshake -// but does not create a new measurement. -func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, +// TLSConnectAndHandshakeWithDB is like TLSConnectAndHandshake but +// uses the given DB instead of creating a new Measurement. +func (mx *Measurer) TLSConnectAndHandshakeWithDB(ctx context.Context, db WritableDB, address string, config *tls.Config) (netxlite.TLSConn, error) { - conn, err := mx.tcpConnect(ctx, db, address) + conn, err := mx.TCPConnectWithDB(ctx, db, address) if err != nil { return nil, err } @@ -301,7 +302,7 @@ func (mx *Measurer) tlsConnectAndHandshake(ctx context.Context, func (mx *Measurer) QUICHandshake(ctx context.Context, address string, config *tls.Config) *EndpointMeasurement { db := &MeasurementDB{} - sess, _ := mx.quicHandshake(ctx, db, address, config) + sess, _ := mx.QUICHandshakeWithDB(ctx, db, address, config) measurement := db.AsMeasurement() if sess != nil { // TODO(bassosimone): close session with correct message @@ -314,8 +315,10 @@ func (mx *Measurer) QUICHandshake(ctx context.Context, address string, } } -// quicHandshake is like QUICHandshake but does not create a new measurement. -func (mx *Measurer) quicHandshake(ctx context.Context, db WritableDB, +// QUICHandshakeWithDB is like QUICHandshake but uses the given +// db to store events rather than creating a temporary one and +// use it to generate a new Measuremet. +func (mx *Measurer) QUICHandshakeWithDB(ctx context.Context, db WritableDB, address string, config *tls.Config) (quic.EarlySession, error) { const timeout = 10 * time.Second ol := NewOperationLogger(mx.Logger, @@ -355,7 +358,10 @@ func (mx *Measurer) HTTPEndpointGet( var ( errUnknownHTTPEndpointURLScheme = errors.New("unknown HTTPEndpoint.URL.Scheme") - errUnknownHTTPEndpointNetwork = errors.New("unknown HTTPEndpoint.Network") + + // ErrUnknownHTTPEndpointNetwork means that the given endpoint's + // network is of a type that we don't know how to handle. + ErrUnknownHTTPEndpointNetwork = errors.New("unknown HTTPEndpoint.Network") ) // HTTPPreparedRequest is a suspended request that only awaits @@ -423,15 +429,22 @@ func (mx *Measurer) httpEndpointGet(ctx context.Context, epnt *HTTPEndpoint, func (mx *Measurer) httpEndpointGetMeasurement(ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (resp *http.Response, m *Measurement, err error) { db := &MeasurementDB{} + resp, err = mx.HTTPEndpointGetWithDB(ctx, epnt, db, jar) + m = db.AsMeasurement() + return +} + +// HTTPEndpointGetWithDB is an HTTPEndpointGet that stores the +// events into the given WritableDB. +func (mx *Measurer) HTTPEndpointGetWithDB(ctx context.Context, epnt *HTTPEndpoint, + db WritableDB, jar http.CookieJar) (resp *http.Response, err error) { switch epnt.Network { case NetworkQUIC: resp, err = mx.httpEndpointGetQUIC(ctx, db, epnt, jar) - m = db.AsMeasurement() case NetworkTCP: resp, err = mx.httpEndpointGetTCP(ctx, db, epnt, jar) - m = db.AsMeasurement() default: - m, err = &Measurement{}, errUnknownHTTPEndpointNetwork + err = ErrUnknownHTTPEndpointNetwork } return } @@ -457,7 +470,7 @@ func (mx *Measurer) httpEndpointGetHTTP(ctx context.Context, return nil, err } req.Header = epnt.Header - conn, err := mx.tcpConnect(ctx, db, epnt.Address) + conn, err := mx.TCPConnectWithDB(ctx, db, epnt.Address) if err != nil { return nil, err } @@ -476,7 +489,7 @@ func (mx *Measurer) httpEndpointGetHTTPS(ctx context.Context, return nil, err } req.Header = epnt.Header - conn, err := mx.tlsConnectAndHandshake(ctx, db, epnt.Address, &tls.Config{ + conn, err := mx.TLSConnectAndHandshakeWithDB(ctx, db, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, RootCAs: netxlite.NewDefaultCertPool(), @@ -499,7 +512,7 @@ func (mx *Measurer) httpEndpointGetQUIC(ctx context.Context, return nil, err } req.Header = epnt.Header - sess, err := mx.quicHandshake(ctx, db, epnt.Address, &tls.Config{ + sess, err := mx.QUICHandshakeWithDB(ctx, db, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, RootCAs: netxlite.NewDefaultCertPool(), diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 1e8d0a6be2..820551b55f 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -26,7 +26,12 @@ type Resolver = netxlite.Resolver // WrapResolver creates a new Resolver that saves events into the WritableDB. func (mx *Measurer) WrapResolver(db WritableDB, r netxlite.Resolver) Resolver { - return &resolverDB{Resolver: r, db: db, begin: mx.Begin} + return WrapResolver(mx.Begin, db, r) +} + +// WrapResolver wraps a resolver. +func WrapResolver(begin time.Time, db WritableDB, r netxlite.Resolver) Resolver { + return &resolverDB{Resolver: r, db: db, begin: begin} } // NewResolverSystem creates a system resolver and then wraps diff --git a/internal/measurex/tracing.go b/internal/measurex/tracing.go new file mode 100644 index 0000000000..3d8850a0fa --- /dev/null +++ b/internal/measurex/tracing.go @@ -0,0 +1,81 @@ +package measurex + +import ( + "net/http" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// NewTracingHTTPTransport creates a new HTTPTransport +// instance with events tracing. +// +// Arguments: +// +// - logger is the logger to use +// +// - begin is the zero time for measurements +// +// - db is the DB in which to write events that will +// eventually become the measurement +// +// - dialer is the base dialer to establish conns +// +// - resolver is the underlying resolver to use +// +// - handshake is the TLS handshaker to use +func NewTracingHTTPTransport(logger Logger, begin time.Time, db WritableDB, + resolver Resolver, dialer Dialer, handshaker TLSHandshaker) *HTTPTransportDB { + resolver = WrapResolver(begin, db, resolver) + dialer = netxlite.WrapDialer(logger, resolver, WrapDialer(begin, db, dialer)) + tlsDialer := netxlite.NewTLSDialer(dialer, handshaker) + return WrapHTTPTransport( + begin, db, netxlite.NewHTTPTransport(logger, dialer, tlsDialer)) +} + +// NewTracingHTTPTransportWithDefaultSettings creates a new +// HTTP transport with tracing capabilities and default settings. +// +// Arguments: +// +// - begin is the zero time for measurements +// +// - logger is the logger to use +// +// - db is the DB in which to write events that will +// eventually become the measurement +// +func NewTracingHTTPTransportWithDefaultSettings( + begin time.Time, logger Logger, db WritableDB) *HTTPTransportDB { + return NewTracingHTTPTransport(logger, begin, db, + netxlite.NewResolverStdlib(logger), + netxlite.NewDialerWithoutResolver(logger), + netxlite.NewTLSHandshakerStdlib(logger)) +} + +func (mx *Measurer) NewTracingHTTPTransportWithDefaultSettings( + logger Logger, db WritableDB) *HTTPTransportDB { + return NewTracingHTTPTransport( + mx.Logger, mx.Begin, db, mx.NewResolverSystem(db, mx.Logger), + mx.NewDialerWithoutResolver(db, mx.Logger), + mx.TLSHandshaker) +} + +// UnmeasuredHTTPEndpoints returns the endpoints whose IP address +// has been resolved but for which we don't have any measurement +// inside of the given database. The returned list will be +// empty if there is no such endpoint in the DB. This function will +// return an error if the URL is not valid or not HTTP/HTTPS. +func UnmeasuredHTTPEndpoints(db *MeasurementDB, URL string, + headers http.Header) ([]*HTTPEndpoint, error) { + parsedURL, err := url.Parse(URL) + if err != nil { + return nil, err + } + m := &DNSMeasurement{ + Domain: parsedURL.Hostname(), + Measurement: db.AsMeasurement(), + } + return AllHTTPEndpointsForURL(parsedURL, headers, m) +} diff --git a/internal/netxlite/dialer.go b/internal/netxlite/dialer.go index 9c9fb041ab..5cdc37cfc0 100644 --- a/internal/netxlite/dialer.go +++ b/internal/netxlite/dialer.go @@ -46,11 +46,15 @@ type Dialer interface { // the CloseIdleConnection call to its resolver (which is // instrumental to manage a DoH resolver connections properly). func NewDialerWithResolver(logger Logger, resolver Resolver) Dialer { + return WrapDialer(logger, resolver, &dialerSystem{}) +} + +func WrapDialer(logger Logger, resolver Resolver, dialer Dialer) Dialer { return &dialerLogger{ Dialer: &dialerResolver{ Dialer: &dialerLogger{ Dialer: &dialerErrWrapper{ - Dialer: &dialerSystem{}, + Dialer: dialer, }, Logger: logger, operationSuffix: "_address", diff --git a/internal/tutorial/generator/main.go b/internal/tutorial/generator/main.go index 1b82b2c4e8..df9ed0dbef 100644 --- a/internal/tutorial/generator/main.go +++ b/internal/tutorial/generator/main.go @@ -90,6 +90,25 @@ func gentorsf() { gen(path.Join(prefix, "chapter04"), "torsf.go") } +// genmeasurex generates the measurex chapters. +func genmeasurex() { + prefix := path.Join(".", "measurex") + gen(path.Join(prefix, "chapter01"), "main.go") + gen(path.Join(prefix, "chapter02"), "main.go") + gen(path.Join(prefix, "chapter03"), "main.go") + gen(path.Join(prefix, "chapter04"), "main.go") + gen(path.Join(prefix, "chapter05"), "main.go") + gen(path.Join(prefix, "chapter06"), "main.go") + gen(path.Join(prefix, "chapter07"), "main.go") + gen(path.Join(prefix, "chapter08"), "main.go") + gen(path.Join(prefix, "chapter09"), "main.go") + gen(path.Join(prefix, "chapter10"), "main.go") + gen(path.Join(prefix, "chapter11"), "main.go") + gen(path.Join(prefix, "chapter12"), "main.go") + gen(path.Join(prefix, "chapter13"), "main.go") +} + func main() { gentorsf() + genmeasurex() } diff --git a/internal/tutorial/measurex/chapter01/README.md b/internal/tutorial/measurex/chapter01/README.md new file mode 100644 index 0000000000..a61d1e3f2d --- /dev/null +++ b/internal/tutorial/measurex/chapter01/README.md @@ -0,0 +1,366 @@ + +# Chapter I: using the system resolver + +In this chapter we explain how to measure DNS resolutions performed +using the system resolver. *En passant*, we will also introduce you to +the `Measurer`, which we will use for the rest of the tutorial. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter01/main.go`.) + +## The system resolver + +We define "system resolver" the DNS resolver implemented by the C +library. On Unix, the most popular interface to such a resolver is +`getaddrinfo(3)` C library function. + +Most OONI experiments (also known as nettests) use the system +resolver to map domain names to IP addresses. The advantage of +the system resolver is that it's provided by the system. So, +it should _generally_ work. Also, it is the resolver that the +user of the system will use every day, therefore its results +should be representative (even though the rise of DNS over +HTTPS embedded in browser may make this statement less solid +than it appeared ten years ago). + +The disadvantage of the system resolver is that we do not +know how it is configured. Say the user has configured a +DNS over TLS resolver; then the measurements may miss censorship +that we would otherwise see if using a custom DNS resolver. + +Now that we have justified why the system resolver is +important for OONI, let us perform some measurements with it. + +We will first write a simple `main.go` file that shows how to use +this functionality. Then, we will show some runs of this file, and +we will comment the output that we see. + +## main.go + +We declare the package and import useful packages. The most +important package we're importing here is, of course, `internal/measurex`. + +```Go +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { +``` +### Setup + +We define command line flags useful to test this program. We use +the `flags` package for that. We want the user to be able to configure +both the domain name to resolve and the resolution timeout. + +```Go + domain := flag.String("domain", "example.com", "domain to resolve") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") +``` + +We call `flag.Parse` to parse the CLI flags. + +```Go + flag.Parse() +``` + +We create a context and we attach a timeout to it. (This is a pretty +standard way to configure a timeout in Go.) + +```Go + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() +``` + +### Creating a Measurer + +Now we create a `Measurer`. + +```Go + mx := measurex.NewMeasurerWithDefaultSettings() +``` + +The `Measurer` is a concrete type that contains many fields +requiring initialization. For this reason, we provide a factory +that creates one with default settings. The expected usage +pattern is that you do not modify a `Measurer`'s field after +initialization. Modifying them while the `Measurer` is in +use could, in fact, lead to races. + +Let's now invoke the system resolver to resolve `*domain`! + +### Invoking the system resolver + +We call the `LookupHostSystem` method of the `Measurer`. The +arguments are the Context, that in this case carries the timeout +we configured above, and the domain to resolve. + +The call itself is named `LookupHost` because this is the name +used by the Go function that performs a domain lookup. + +Under the hood, `mx.LookupHostSystem` will eventually call +`(*net.Resolver).LookupHost`. In turn, in the common case on +Unix, this function will eventually call `getaddrinfo(3)`. + +```Go + m := mx.LookupHostSystem(ctx, *domain) +``` + +The return value of `(*net.Resolver).LookupHost` is either a +list of IP addresses or an error. Our `LookupHostSystem` method, +instead, returns a `*measurex.DNSMeasurement` type. + +This is probably a good moment to remind you about Go's +built in help system. We could include a definition of the +`DNSMeasurement` structure, but since this definition is +just a comment in the main.go file, it may age badly. + +Instead, if you run + +``` +go doc ./internal/measurex.DNSMeasurement +``` + +You get the current definition. As you can see, this type +is basically just a wrapper around `Measurement`. Now, +checking the docs of `Measurement` with + +``` +go doc ./internal/measurex.Measurement +``` + +we see a container of events +classified by event type. In our case, because we're +doing a `LookupHost`, we should have at least one entry +inside of the `Measurement.LookupHost` field. + +This entry is of type `DNSLookupEvent`. Let us check +together the definition of this type: + +``` +go doc ./internal/measurex.DNSLookupEvent +``` + +If you are familiar with [the OONI data format specs]( +https://github.com/ooni/spec/tree/master/data-formats), you +should probably recognize that this structure is the Go +representation of the `df-002-dnst` data format. + +In fact, every event field inside of a `Measurement` +should serialize nicely to JSON to one of the OONI data +formats. + +### Printing the measurement + +Because there is a close relationship between the +events inside a `Measurement` and the JSON OONI data +format, in the remainder of this program we're +going to serialize the `Measurement` to JSON and +print it to the standard output. + +```Go + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +``` + +As a final note, the `PanicOnError` is here because the +message `m` *can* be marshalled to JSON. It still feels a +bit better having an assertion for our assumptions than +outrightly ignoring the error code. (We tend to use such +a convention quite frequently in the OONI codebase.) + +```Go +} + +``` + +## Running the example program + +Let us run the program with default arguments first. You can do +this operation by running: + +```bash +go run -race ./internal/tutorial/measurex/chapter01 +``` + +If you do that you obtain some logging messages, which are out of +the scope of this tutorial, and the following JSON: + +```JSON +{ + "domain": "example.com", + "lookup_host": [ + { + "answers": [ + { + "answer_type": "A", + "ipv4": "93.184.216.34" + } + ], + "engine": "system", + "failure": null, + "hostname": "example.com", + "query_type": "A", + "resolver_address": "", + "t": 0.002996459, + "started": 9.8e-05, + "oddity": "" + }, + { + "answers": [ + { + "answer_type": "AAAA", + "ivp6": "2606:2800:220:1:248:1893:25c8:1946" + } + ], + "engine": "system", + "failure": null, + "hostname": "example.com", + "query_type": "AAAA", + "resolver_address": "", + "t": 0.002996459, + "started": 9.8e-05, + "oddity": "" + } + ] +} +``` + +You see that we have two messages here. OONI splits a DNS +resolution performed using the system resolver into two "fake" +DNS resolutions for A and AAAA. (Under the hood, this is +what the system resolver would most likely do.) + +The most important fields are: + +- _engine_, indidcating that we are using the "system" resolver; + +- _hostname_, meaning that we wanted to resolve the "example.com" domain; + +- _answers_, which contains a list of answers; + +- _t_, which is the time when the LookupHost operation completed. + +### NXDOMAIN measurement + +Let us now change the domain to resolve to be `antani.ooni.org` (a +nonexisting domain), which we can do by running this command: + +```bash +go run -race ./internal/tutorial/measurex/chapter01 -domain antani.ooni.org +``` + +This is the output JSON: + +```JSON +{ + "domain": "antani.ooni.org", + "lookup_host": [ + { + "answers": null, + "engine": "system", + "failure": "dns_nxdomain_error", + "hostname": "antani.ooni.org", + "query_type": "A", + "resolver_address": "", + "t": 0.072963834, + "started": 0.000125417, + "oddity": "dns.lookup.nxdomain" + }, + { + "answers": null, + "engine": "system", + "failure": "dns_nxdomain_error", + "hostname": "antani.ooni.org", + "query_type": "AAAA", + "resolver_address": "", + "t": 0.072963834, + "started": 0.000125417, + "oddity": "dns.lookup.nxdomain" + } + ] +} +``` + +So we see a failure that says there was indeed an NXDOMAIN +error and we also see a field named `oddity`. + +What is an oddity? We define oddity something unexpected thay +may be explained by censorship as well as by a transient failure +or other normal network conditions. (In this case, the result +is perfectly normal since we're looking up a nonexistent domain.) + +The difference between failure and oddity is that the failure +indicates the error that occurred, while the oddity classifies +the error in the context of the operation during which it +occurred. (In this case the difference is subtle, but we'll +have a better example later, when we'll see what happens on timeout.) + +Failures are specified in +[df-007-errors](https://github.com/ooni/spec/blob/master/data-formats/df-007-errors.md). +Inside the `internal/netxlite/errorsx` +package, there is code that maps Go errors to failures. (The +`netxlite` package is the fundamental network package we use, on +top of which `measurex` is written.) + +### Measurement with timeout + +Let us now try with an insanely low timeout: + +```bash +go run -race ./internal/tutorial/measurex/chapter01 -timeout 250us +``` + +To get this JSON: + +```JSON +{ + "domain": "example.com", + "lookup_host": [ + { + "answers": null, + "engine": "system", + "failure": "generic_timeout_error", + "hostname": "example.com", + "query_type": "A", + "resolver_address": "", + "t": 0.000489167, + "started": 9.2583e-05, + "oddity": "dns.lookup.timeout" + }, + { + "answers": null, + "engine": "system", + "failure": "generic_timeout_error", + "hostname": "example.com", + "query_type": "AAAA", + "resolver_address": "", + "t": 0.000489167, + "started": 9.2583e-05, + "oddity": "dns.lookup.timeout" + } + ] +} +``` + +You should now better see the difference between a failure and +an oddity. The context timeout maps to a `generic_timeout_error` while +the oddity clearly indicates the timeout happens during a DNS +lookup. As we mentioned above, the failure is just an error while +an oddity is an error put in context. + +## Conclusions + +This is it. We have seen how to measure the system resolver and we have +also seen which easy-to-provoke errors we can get. + diff --git a/internal/tutorial/measurex/chapter01/main.go b/internal/tutorial/measurex/chapter01/main.go index cf25b9162d..6184c11161 100644 --- a/internal/tutorial/measurex/chapter01/main.go +++ b/internal/tutorial/measurex/chapter01/main.go @@ -1,3 +1,47 @@ +// -=-=- StartHere -=-=- +// +// # Chapter I: using the system resolver +// +// In this chapter we explain how to measure DNS resolutions performed +// using the system resolver. *En passant*, we will also introduce you to +// the `Measurer`, which we will use for the rest of the tutorial. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter01/main.go`.) +// +// ## The system resolver +// +// We define "system resolver" the DNS resolver implemented by the C +// library. On Unix, the most popular interface to such a resolver is +// `getaddrinfo(3)` C library function. +// +// Most OONI experiments (also known as nettests) use the system +// resolver to map domain names to IP addresses. The advantage of +// the system resolver is that it's provided by the system. So, +// it should _generally_ work. Also, it is the resolver that the +// user of the system will use every day, therefore its results +// should be representative (even though the rise of DNS over +// HTTPS embedded in browser may make this statement less solid +// than it appeared ten years ago). +// +// The disadvantage of the system resolver is that we do not +// know how it is configured. Say the user has configured a +// DNS over TLS resolver; then the measurements may miss censorship +// that we would otherwise see if using a custom DNS resolver. +// +// Now that we have justified why the system resolver is +// important for OONI, let us perform some measurements with it. +// +// We will first write a simple `main.go` file that shows how to use +// this functionality. Then, we will show some runs of this file, and +// we will comment the output that we see. +// +// ## main.go +// +// We declare the package and import useful packages. The most +// important package we're importing here is, of course, `internal/measurex`. +// +// ```Go package main import ( @@ -12,14 +56,313 @@ import ( ) func main() { + // ``` + // ### Setup + // + // We define command line flags useful to test this program. We use + // the `flags` package for that. We want the user to be able to configure + // both the domain name to resolve and the resolution timeout. + // + // ```Go domain := flag.String("domain", "example.com", "domain to resolve") timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + // ``` + // + // We call `flag.Parse` to parse the CLI flags. + // + // ```Go flag.Parse() + // ``` + // + // We create a context and we attach a timeout to it. (This is a pretty + // standard way to configure a timeout in Go.) + // + // ```Go ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() + // ``` + // + // ### Creating a Measurer + // + // Now we create a `Measurer`. + // + // ```Go mx := measurex.NewMeasurerWithDefaultSettings() + // ``` + // + // The `Measurer` is a concrete type that contains many fields + // requiring initialization. For this reason, we provide a factory + // that creates one with default settings. The expected usage + // pattern is that you do not modify a `Measurer`'s field after + // initialization. Modifying them while the `Measurer` is in + // use could, in fact, lead to races. + // + // Let's now invoke the system resolver to resolve `*domain`! + // + // ### Invoking the system resolver + // + // We call the `LookupHostSystem` method of the `Measurer`. The + // arguments are the Context, that in this case carries the timeout + // we configured above, and the domain to resolve. + // + // The call itself is named `LookupHost` because this is the name + // used by the Go function that performs a domain lookup. + // + // Under the hood, `mx.LookupHostSystem` will eventually call + // `(*net.Resolver).LookupHost`. In turn, in the common case on + // Unix, this function will eventually call `getaddrinfo(3)`. + // + // ```Go m := mx.LookupHostSystem(ctx, *domain) + // ``` + // + // The return value of `(*net.Resolver).LookupHost` is either a + // list of IP addresses or an error. Our `LookupHostSystem` method, + // instead, returns a `*measurex.DNSMeasurement` type. + // + // This is probably a good moment to remind you about Go's + // built in help system. We could include a definition of the + // `DNSMeasurement` structure, but since this definition is + // just a comment in the main.go file, it may age badly. + // + // Instead, if you run + // + // ``` + // go doc ./internal/measurex.DNSMeasurement + // ``` + // + // You get the current definition. As you can see, this type + // is basically just a wrapper around `Measurement`. Now, + // checking the docs of `Measurement` with + // + // ``` + // go doc ./internal/measurex.Measurement + // ``` + // + // we see a container of events + // classified by event type. In our case, because we're + // doing a `LookupHost`, we should have at least one entry + // inside of the `Measurement.LookupHost` field. + // + // This entry is of type `DNSLookupEvent`. Let us check + // together the definition of this type: + // + // ``` + // go doc ./internal/measurex.DNSLookupEvent + // ``` + // + // If you are familiar with [the OONI data format specs]( + // https://github.com/ooni/spec/tree/master/data-formats), you + // should probably recognize that this structure is the Go + // representation of the `df-002-dnst` data format. + // + // In fact, every event field inside of a `Measurement` + // should serialize nicely to JSON to one of the OONI data + // formats. + // + // ### Printing the measurement + // + // Because there is a close relationship between the + // events inside a `Measurement` and the JSON OONI data + // format, in the remainder of this program we're + // going to serialize the `Measurement` to JSON and + // print it to the standard output. + // + // ```Go data, err := json.Marshal(m) runtimex.PanicOnError(err, "json.Marshal failed") fmt.Printf("%s\n", string(data)) + // ``` + // + // As a final note, the `PanicOnError` is here because the + // message `m` *can* be marshalled to JSON. It still feels a + // bit better having an assertion for our assumptions than + // outrightly ignoring the error code. (We tend to use such + // a convention quite frequently in the OONI codebase.) + // + // ```Go } + +// ``` +// +// ## Running the example program +// +// Let us run the program with default arguments first. You can do +// this operation by running: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter01 +// ``` +// +// If you do that you obtain some logging messages, which are out of +// the scope of this tutorial, and the following JSON: +// +// ```JSON +// { +// "domain": "example.com", +// "lookup_host": [ +// { +// "answers": [ +// { +// "answer_type": "A", +// "ipv4": "93.184.216.34" +// } +// ], +// "engine": "system", +// "failure": null, +// "hostname": "example.com", +// "query_type": "A", +// "resolver_address": "", +// "t": 0.002996459, +// "started": 9.8e-05, +// "oddity": "" +// }, +// { +// "answers": [ +// { +// "answer_type": "AAAA", +// "ivp6": "2606:2800:220:1:248:1893:25c8:1946" +// } +// ], +// "engine": "system", +// "failure": null, +// "hostname": "example.com", +// "query_type": "AAAA", +// "resolver_address": "", +// "t": 0.002996459, +// "started": 9.8e-05, +// "oddity": "" +// } +// ] +// } +// ``` +// +// You see that we have two messages here. OONI splits a DNS +// resolution performed using the system resolver into two "fake" +// DNS resolutions for A and AAAA. (Under the hood, this is +// what the system resolver would most likely do.) +// +// The most important fields are: +// +// - _engine_, indidcating that we are using the "system" resolver; +// +// - _hostname_, meaning that we wanted to resolve the "example.com" domain; +// +// - _answers_, which contains a list of answers; +// +// - _t_, which is the time when the LookupHost operation completed. +// +// ### NXDOMAIN measurement +// +// Let us now change the domain to resolve to be `antani.ooni.org` (a +// nonexisting domain), which we can do by running this command: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter01 -domain antani.ooni.org +// ``` +// +// This is the output JSON: +// +// ```JSON +// { +// "domain": "antani.ooni.org", +// "lookup_host": [ +// { +// "answers": null, +// "engine": "system", +// "failure": "dns_nxdomain_error", +// "hostname": "antani.ooni.org", +// "query_type": "A", +// "resolver_address": "", +// "t": 0.072963834, +// "started": 0.000125417, +// "oddity": "dns.lookup.nxdomain" +// }, +// { +// "answers": null, +// "engine": "system", +// "failure": "dns_nxdomain_error", +// "hostname": "antani.ooni.org", +// "query_type": "AAAA", +// "resolver_address": "", +// "t": 0.072963834, +// "started": 0.000125417, +// "oddity": "dns.lookup.nxdomain" +// } +// ] +// } +// ``` +// +// So we see a failure that says there was indeed an NXDOMAIN +// error and we also see a field named `oddity`. +// +// What is an oddity? We define oddity something unexpected thay +// may be explained by censorship as well as by a transient failure +// or other normal network conditions. (In this case, the result +// is perfectly normal since we're looking up a nonexistent domain.) +// +// The difference between failure and oddity is that the failure +// indicates the error that occurred, while the oddity classifies +// the error in the context of the operation during which it +// occurred. (In this case the difference is subtle, but we'll +// have a better example later, when we'll see what happens on timeout.) +// +// Failures are specified in +// [df-007-errors](https://github.com/ooni/spec/blob/master/data-formats/df-007-errors.md). +// Inside the `internal/netxlite/errorsx` +// package, there is code that maps Go errors to failures. (The +// `netxlite` package is the fundamental network package we use, on +// top of which `measurex` is written.) +// +// ### Measurement with timeout +// +// Let us now try with an insanely low timeout: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter01 -timeout 250us +// ``` +// +// To get this JSON: +// +// ```JSON +// { +// "domain": "example.com", +// "lookup_host": [ +// { +// "answers": null, +// "engine": "system", +// "failure": "generic_timeout_error", +// "hostname": "example.com", +// "query_type": "A", +// "resolver_address": "", +// "t": 0.000489167, +// "started": 9.2583e-05, +// "oddity": "dns.lookup.timeout" +// }, +// { +// "answers": null, +// "engine": "system", +// "failure": "generic_timeout_error", +// "hostname": "example.com", +// "query_type": "AAAA", +// "resolver_address": "", +// "t": 0.000489167, +// "started": 9.2583e-05, +// "oddity": "dns.lookup.timeout" +// } +// ] +// } +// ``` +// +// You should now better see the difference between a failure and +// an oddity. The context timeout maps to a `generic_timeout_error` while +// the oddity clearly indicates the timeout happens during a DNS +// lookup. As we mentioned above, the failure is just an error while +// an oddity is an error put in context. +// +// ## Conclusions +// +// This is it. We have seen how to measure the system resolver and we have +// also seen which easy-to-provoke errors we can get. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter02/README.md b/internal/tutorial/measurex/chapter02/README.md new file mode 100644 index 0000000000..0d53158484 --- /dev/null +++ b/internal/tutorial/measurex/chapter02/README.md @@ -0,0 +1,224 @@ + +# Chapter II: establishing TCP connections + +In this chapter we explain how to measure establishing TCP connections. + +We will first write a simple `main.go` file that shows how to use +this functionality. Then, we will show some runs of this file, and +we will comment the output that we see. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter02/main.go`.) + +## main.go + +We declare the package and import useful packages. The most +important package we're importing here is, of course, `internal/measurex`. + +```Go +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { +``` +### Setup + +This first part of `main.go` is really similar to the previous +chapter, so there is not much new to say here. + +```Go + address := flag.String("address", "8.8.4.4:443", "remote endpoint address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() +``` + +### Creaging a Measurer + +We create a `Measurer` like we did in the previous chapter. + +```Go + mx := measurex.NewMeasurerWithDefaultSettings() +``` + +### Establishing a TCP connection. + +We then call `TCPConnect`, which establishes a connection +and returns the corresponding measurent. + +The arguments are the context (for timeouts), and the address +of the endpoint to which we want to connect. (Here and in +most of this tutorial with "endpoint" we mean an IP address +and a port, serialized as "ADDRESS:PORT", where the +address is quoted with "[" and "]" if IPv6, e.g., `[::1]:53`.) + +```Go + m := mx.TCPConnect(ctx, *address) +``` + +### Printing the measurement + +The rest of the main function is just like in the previous chapter. + +```Go + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +``` + +## Running the example program + +Let us run the program with default arguments first. You can do +this operation by running: + +```bash +go run -race ./internal/tutorial/measurex/chapter02 +``` + +Here is the JSON we obtain in output: + +```JSON +{ + "network": "tcp", + "address": "8.8.4.4:443", + "connect": [ + { + "address": "8.8.4.4:443", + "failure": null, + "operation": "connect", + "proto": "tcp", + "t": 0.026879041, + "started": 8.8625e-05, + "oddity": "" + } + ] +} +``` + +This is what it says: + +- we are connecting a "tcp" socket; + +- the destination endpoint address is "8.8.4.4:443"; + +- connect terminated ~0.027 seconds into the program life; + +- the operation succeeded (`failure` is `nil`). + +Let us now see if we can provoke some errors and timeouts. + +### Measurement with connection refused + +Let us start with an IP address where there's no listening socket: + +```bash +go run -race ./internal/tutorial/measurex/chapter02 -address 127.0.0.1:1 +``` + +We get this JSON: + +```JSON +{ + "network": "tcp", + "address": "127.0.0.1:1", + "connect": [ + { + "address": "127.0.0.1:1", + "failure": "connection_refused", + "operation": "connect", + "proto": "tcp", + "t": 0.000372167, + "started": 8.4917e-05, + "oddity": "tcp.connect.refused" + } + ] +} + +``` + +And here's an error telling us the connection was refused and +the oddity that classifies the error. + +### Measurement with timeouts + +Let us now try to obtain a timeout: + +```bash +go run -race ./internal/tutorial/measurex/chapter02 -address 8.8.4.4:1 +``` + +We get this JSON: + +```JSON +{ + "network": "tcp", + "address": "8.8.4.4:1", + "connect": [ + { + "address": "8.8.4.4:1", + "failure": "generic_timeout_error", + "operation": "connect", + "proto": "tcp", + "t": 10.005494583, + "started": 8.4833e-05, + "oddity": "tcp.connect.timeout" + } + ] +} +``` + +So, we clearly see from the value of `t` that our 60 seconds +default timeout did not hit, because there is a lower watchdog +timeout (10 s). We also see again how the oddity is more +precise than just the error alone. + +Let us now use a very small timeout: + +```bash +go run -race ./internal/tutorial/measurex/chapter02 -address 8.8.4.4:1 -timeout 100ms +``` + +To get this JSON: + +```JSON +{ + "network": "tcp", + "address": "8.8.4.4:1", + "connect": [ + { + "address": "8.8.4.4:1", + "failure": "generic_timeout_error", + "operation": "connect", + "proto": "tcp", + "t": 0.10148025, + "started": 0.000122375, + "oddity": "tcp.connect.timeout" + } + ] +} +``` + +We see a timeout after ~0.1s. We enforce a reasonably small +timeout for connecting, equal to 10 s, because we want to +guarantee that measurements eventually terminate. Also, since +often censorship is implemented by timing out, we don't want +to spend to much time waiting for a timeout to expire. + +## Conclusions + +We have seen how to measure the operation of connecting +to a specific TCP endpoint. + diff --git a/internal/tutorial/measurex/chapter02/main.go b/internal/tutorial/measurex/chapter02/main.go index 3fea2f7f55..94d8271afa 100644 --- a/internal/tutorial/measurex/chapter02/main.go +++ b/internal/tutorial/measurex/chapter02/main.go @@ -1,3 +1,22 @@ +// -=-=- StartHere -=-=- +// +// # Chapter II: establishing TCP connections +// +// In this chapter we explain how to measure establishing TCP connections. +// +// We will first write a simple `main.go` file that shows how to use +// this functionality. Then, we will show some runs of this file, and +// we will comment the output that we see. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter02/main.go`.) +// +// ## main.go +// +// We declare the package and import useful packages. The most +// important package we're importing here is, of course, `internal/measurex`. +// +// ```Go package main import ( @@ -12,14 +31,196 @@ import ( ) func main() { + // ``` + // ### Setup + // + // This first part of `main.go` is really similar to the previous + // chapter, so there is not much new to say here. + // + // ```Go address := flag.String("address", "8.8.4.4:443", "remote endpoint address") timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") flag.Parse() ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() + // ``` + // + // ### Creaging a Measurer + // + // We create a `Measurer` like we did in the previous chapter. + // + // ```Go mx := measurex.NewMeasurerWithDefaultSettings() + // ``` + // + // ### Establishing a TCP connection. + // + // We then call `TCPConnect`, which establishes a connection + // and returns the corresponding measurent. + // + // The arguments are the context (for timeouts), and the address + // of the endpoint to which we want to connect. (Here and in + // most of this tutorial with "endpoint" we mean an IP address + // and a port, serialized as "ADDRESS:PORT", where the + // address is quoted with "[" and "]" if IPv6, e.g., `[::1]:53`.) + // + // ```Go m := mx.TCPConnect(ctx, *address) + // ``` + // + // ### Printing the measurement + // + // The rest of the main function is just like in the previous chapter. + // + // ```Go data, err := json.Marshal(m) runtimex.PanicOnError(err, "json.Marshal failed") fmt.Printf("%s\n", string(data)) } + +// ``` +// +// ## Running the example program +// +// Let us run the program with default arguments first. You can do +// this operation by running: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter02 +// ``` +// +// Here is the JSON we obtain in output: +// +// ```JSON +// { +// "network": "tcp", +// "address": "8.8.4.4:443", +// "connect": [ +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "operation": "connect", +// "proto": "tcp", +// "t": 0.026879041, +// "started": 8.8625e-05, +// "oddity": "" +// } +// ] +// } +// ``` +// +// This is what it says: +// +// - we are connecting a "tcp" socket; +// +// - the destination endpoint address is "8.8.4.4:443"; +// +// - connect terminated ~0.027 seconds into the program life; +// +// - the operation succeeded (`failure` is `nil`). +// +// Let us now see if we can provoke some errors and timeouts. +// +// ### Measurement with connection refused +// +// Let us start with an IP address where there's no listening socket: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter02 -address 127.0.0.1:1 +// ``` +// +// We get this JSON: +// +// ```JSON +// { +// "network": "tcp", +// "address": "127.0.0.1:1", +// "connect": [ +// { +// "address": "127.0.0.1:1", +// "failure": "connection_refused", +// "operation": "connect", +// "proto": "tcp", +// "t": 0.000372167, +// "started": 8.4917e-05, +// "oddity": "tcp.connect.refused" +// } +// ] +// } +// +// ``` +// +// And here's an error telling us the connection was refused and +// the oddity that classifies the error. +// +// ### Measurement with timeouts +// +// Let us now try to obtain a timeout: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter02 -address 8.8.4.4:1 +// ``` +// +// We get this JSON: +// +// ```JSON +// { +// "network": "tcp", +// "address": "8.8.4.4:1", +// "connect": [ +// { +// "address": "8.8.4.4:1", +// "failure": "generic_timeout_error", +// "operation": "connect", +// "proto": "tcp", +// "t": 10.005494583, +// "started": 8.4833e-05, +// "oddity": "tcp.connect.timeout" +// } +// ] +// } +// ``` +// +// So, we clearly see from the value of `t` that our 60 seconds +// default timeout did not hit, because there is a lower watchdog +// timeout (10 s). We also see again how the oddity is more +// precise than just the error alone. +// +// Let us now use a very small timeout: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter02 -address 8.8.4.4:1 -timeout 100ms +// ``` +// +// To get this JSON: +// +// ```JSON +// { +// "network": "tcp", +// "address": "8.8.4.4:1", +// "connect": [ +// { +// "address": "8.8.4.4:1", +// "failure": "generic_timeout_error", +// "operation": "connect", +// "proto": "tcp", +// "t": 0.10148025, +// "started": 0.000122375, +// "oddity": "tcp.connect.timeout" +// } +// ] +// } +// ``` +// +// We see a timeout after ~0.1s. We enforce a reasonably small +// timeout for connecting, equal to 10 s, because we want to +// guarantee that measurements eventually terminate. Also, since +// often censorship is implemented by timing out, we don't want +// to spend to much time waiting for a timeout to expire. +// +// ## Conclusions +// +// We have seen how to measure the operation of connecting +// to a specific TCP endpoint. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter03/README.md b/internal/tutorial/measurex/chapter03/README.md new file mode 100644 index 0000000000..d6b88c1b2e --- /dev/null +++ b/internal/tutorial/measurex/chapter03/README.md @@ -0,0 +1,567 @@ + +# Chapter III: using a custom DNS-over-UDP resolver + +In this chapter we learn how to measure sending DNS queries to +a DNS server speaking the DNS-over-UDP protocol. + +Without further ado, let's describe our example `main.go` program +and let's use it to better understand this flow. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter03/main.go`.) + +## main.go + +The initial part of the program is pretty much the same as the one +used in previous chapters, so I will not add further comments. + +```Go +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { + query := flag.String("query", "example.com", "domain to resolver") + address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() +``` + +### Using a custom UDP resolver + +We now invoke `LookupHostUDP`. We specify: + +- a context for timeout information; + +- the domain to query for; + +- the address of the DNS-over-UDP server endpoint. + +```Go + m := mx.LookupHostUDP(ctx, *query, *address) +``` + +Also this operation returns a measurement, which +we print using the usual three-liner. + +```Go + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +``` + +## Running the example program + +As before, let us start off with a vanilla run: + +```bash +go run -race ./internal/tutorial/measurex/chapter03 +``` + +This time we get a much larger JSON, so I will pretend it is +actually JavaScript and add comments to explain it inline. + +(This is the first case in which we see how a single +method call for measurer causes several events to +be generated and inserted into a `Measurement`.) + +```JavaScript +{ + "domain": "example.com", + + // This block tells us about the UDP connect events + // where we bind to the server's endpoint + "connect": [ + { + "address": "8.8.4.4:53", + "failure": null, + "operation": "connect", + "proto": "udp", + "t": 0.00043175, + "started": 0.000191958, + "oddity": "" + }, + { + "address": "8.8.4.4:53", + "failure": null, + "operation": "connect", + "proto": "udp", + "t": 0.042198458, + "started": 0.042113208, + "oddity": "" + } + ], + + // This block shows the read and write events + // occurred on the sockets (because we control + // in full the implementation of this DNS + // over UDP resolver, we can see these events) + "read_write": [ + { + "address": "8.8.4.4:53", + "failure": null, + "num_bytes": 29, + "operation": "write", + "proto": "udp", + "t": 0.000459583, + "started": 0.00043825, + "oddity": "" + }, + { + "address": "8.8.4.4:53", + "failure": null, + "num_bytes": 45, + "operation": "read", + "proto": "udp", + "t": 0.041955792, + "started": 0.000471833, + "oddity": "" + }, + { + "address": "8.8.4.4:53", + "failure": null, + "num_bytes": 29, + "operation": "write", + "proto": "udp", + "t": 0.042218917, + "started": 0.042203, + "oddity": "" + }, + { + "address": "8.8.4.4:53", + "failure": null, + "num_bytes": 57, + "operation": "read", + "proto": "udp", + "t": 0.196646583, + "started": 0.042233167, + "oddity": "" + } + ], + + // This is the same kind of result as before, we + // show the emitted queries and the resolved addrs. + // + // Also note how here the resolver_address is the + // correct endpoint address and the engine tells us + // that we're using DNS over UDP. + "lookup_host": [ + { + "answers": [ + { + "answer_type": "A", + "ipv4": "93.184.216.34" + } + ], + "engine": "udp", + "failure": null, + "hostname": "example.com", + "query_type": "A", + "resolver_address": "8.8.4.4:53", + "t": 0.196777042, + "started": 0.000118542, + "oddity": "" + }, + { + "answers": [ + { + "answer_type": "AAAA", + "ivp6": "2606:2800:220:1:248:1893:25c8:1946" + } + ], + "engine": "udp", + "failure": null, + "hostname": "example.com", + "query_type": "AAAA", + "resolver_address": "8.8.4.4:53", + "t": 0.196777042, + "started": 0.000118542, + "oddity": "" + } + ], + + // This block shows the query we sent (encoded as base64) + // and the response we received. Here we clearly see + // that we perform two DNS "round trip" (i.e., send request + // and receive response) to resolve a domain: one for + // A and the other for AAAA. + "dns_round_trip": [ + { + "engine": "udp", + "resolver_address": "8.8.4.4:53", + "raw_query": { + "data": "PrcBAAABAAAAAAAAB2V4YW1wbGUDY29tAAABAAE=", + "format": "base64" + }, + "started": 0.000191625, + "t": 0.041998667, + "failure": null, + "raw_reply": { + "data": "PreBgAABAAEAAAAAB2V4YW1wbGUDY29tAAABAAHADAABAAEAAE8BAARduNgi", + "format": "base64" + } + }, + { + "engine": "udp", + "resolver_address": "8.8.4.4:53", + "raw_query": { + "data": "LAwBAAABAAAAAAAAB2V4YW1wbGUDY29tAAAcAAE=", + "format": "base64" + }, + "started": 0.04210775, + "t": 0.196701333, + "failure": null, + "raw_reply": { + "data": "LAyBgAABAAEAAAAAB2V4YW1wbGUDY29tAAAcAAHADAAcAAEAAE6nABAmBigAAiAAAQJIGJMlyBlG", + "format": "base64" + } + } + ] +} +``` + +This data format is really an extension of the `LookupHostSystem` +one. It just adds more fields that clarify what happened at low +level in terms of socket I/O and queries sent and received. + +Let us now try to provoke some errors and see how the +output JSON changes because of them. + +### Measurement with NXDOMAIN + +Let us try to get a NXDOMAIN error. + +```bash +go run -race ./internal/tutorial/measurex/chapter03 -query antani.ooni.org +``` + +This produces the following JSON: + +```JavaScript +{ + "domain": "antani.ooni.org", + "connect": [ /* snip */ ], + "read_write": [ /* snip */ ], + "lookup_host": [ + { + "answers": null, + "engine": "udp", + "failure": "dns_nxdomain_error", + "hostname": "antani.ooni.org", + "query_type": "A", + "resolver_address": "8.8.4.4:53", + "t": 0.098208709, + "started": 8.95e-05, + "oddity": "dns.lookup.nxdomain" + }, + { + "answers": null, + "engine": "udp", + "failure": "dns_nxdomain_error", + "hostname": "antani.ooni.org", + "query_type": "AAAA", + "resolver_address": "8.8.4.4:53", + "t": 0.098208709, + "started": 8.95e-05, + "oddity": "dns.lookup.nxdomain" + } + ], + "dns_round_trip": [ + { + "engine": "udp", + "resolver_address": "8.8.4.4:53", + "raw_query": { + "data": "jLIBAAABAAAAAAAABmFudGFuaQRvb25pA29yZwAAAQAB", + "format": "base64" + }, + "started": 0.000141542, + "t": 0.034689417, + "failure": null, + "raw_reply": { + "data": "jLKBgwABAAAAAQAABmFudGFuaQRvb25pA29yZwAAAQABwBMABgABAAAHCAA9BGRuczERcmVnaXN0cmFyLXNlcnZlcnMDY29tAApob3N0bWFzdGVywDJhABz8AACowAAADhAACTqAAAAOEQ==", + "format": "base64" + } + }, + { + "engine": "udp", + "resolver_address": "8.8.4.4:53", + "raw_query": { + "data": "azEBAAABAAAAAAAABmFudGFuaQRvb25pA29yZwAAHAAB", + "format": "base64" + }, + "started": 0.034776709, + "t": 0.098170542, + "failure": null, + "raw_reply": { + "data": "azGBgwABAAAAAQAABmFudGFuaQRvb25pA29yZwAAHAABwBMABgABAAAHCAA9BGRuczERcmVnaXN0cmFyLXNlcnZlcnMDY29tAApob3N0bWFzdGVywDJhABz8AACowAAADhAACTqAAAAOEQ==", + "format": "base64" + } + } + ] +} +``` + +We indeed get a NXDOMAIN error as the failure in `lookup_host`. + +Let us now decode one of the replies by using this program: + +``` +package main + +import ( +"fmt" +"encoding/base64" + +"github.com/miekg/dns" +) + +func main() { + const query = "azGBgwABAAAAAQAABmFudGFuaQRvb25pA29yZwAAHAABwBMABgABAAAHCAA9BGRuczERcmVnaXN0cmFyLXNlcnZlcnMDY29tAApob3N0bWFzdGVywDJhABz8AACowAAADhAACTqAAAAOEQ==" + data, _ := base64.StdEncoding.DecodeString(query) + msg := new(dns.Msg) + _ = msg.Unpack(data) + fmt.Printf("%s\n", msg) +} +``` + +where `query` is one of the replies. If we run this program +we get as the output: + +``` +;; opcode: QUERY, status: NXDOMAIN, id: 27441 +;; flags: qr rd ra; QUERY: 1, ANSWER: 0, AUTHORITY: 1, ADDITIONAL: 0 + +;; QUESTION SECTION: +;antani.ooni.org. IN AAAA + +;; AUTHORITY SECTION: +ooni.org. 1800 IN SOA dns1.registrar-servers.com. hostmaster.registrar-servers.com. 1627397372 43200 3600 604800 3601 +``` + +### Measurement with timeout + +Let us now query an IP address known for not responding +to DNS queries, to get a timeout. + +```bash +go run -race ./internal/tutorial/measurex/chapter03 -address 182.92.22.222:53 +``` + +Here's the corresponding JSON: + +```JavaScript +{ + "domain": "example.com", + "connect": [ /* snip */ ], + "read_write": [ + { + "address": "182.92.22.222:53", + "failure": null, + "num_bytes": 29, + "operation": "write", + "proto": "udp", + "t": 0.0005275, + "started": 0.000500209, + "oddity": "" + }, + { + "address": "182.92.22.222:53", + "failure": "generic_timeout_error", /* <--- */ + "operation": "read", + "proto": "udp", + "t": 5.001140125, + "started": 0.000544042, + "oddity": "" + } + ], + "lookup_host": [ + { + "answers": null, + "engine": "udp", + "failure": "generic_timeout_error", /* <--- */ + "hostname": "example.com", + "query_type": "A", + "resolver_address": "182.92.22.222:53", + "t": 5.001462084, + "started": 0.000127917, + "oddity": "dns.lookup.timeout" /* <--- */ + }, + { + "answers": null, + "engine": "udp", + "failure": "generic_timeout_error", + "hostname": "example.com", + "query_type": "AAAA", + "resolver_address": "182.92.22.222:53", + "t": 5.001462084, + "started": 0.000127917, + "oddity": "dns.lookup.timeout" + } + ], + "dns_round_trip": [ + { + "engine": "udp", + "resolver_address": "182.92.22.222:53", + "raw_query": { + "data": "ej8BAAABAAAAAAAAB2V4YW1wbGUDY29tAAABAAE=", + "format": "base64" + }, + "started": 0.000220584, + "t": 5.001317417, + "failure": "generic_timeout_error", + "raw_reply": null + } + ] +} +``` + +We see that we do fail with a timeout (I have marked some of them +with comments inside the JSON). We see the timeout at three different +level of abstractions (from lower to higher abstraction): at the socket layer, +during the DNS round trip, during the DNS lookup. + +What we also see is that `t`'s value is ~5s when the `read` event +fails, which tells us about the socket's read timeout. + +### Measurement with REFUSED error + +Let us now try to get a REFUSED DNS Rcode, again from servers +that are, let's say, kind enough to easily help. + +```bash +go run -race ./internal/tutorial/measurex/chapter03 -address 180.97.36.63:53 +``` + +Here's the answer I get: + +```JavaScript +{ + "domain": "example.com", + "connect": [ /* snip */ ], + + // The I/O events look normal this time + "read_write": [ + { + "address": "180.97.36.63:53", + "failure": null, + "num_bytes": 29, + "operation": "write", + "proto": "udp", + "t": 0.000333583, + "started": 0.000312125, + "oddity": "" + }, + { + "address": "180.97.36.63:53", + "failure": null, + "num_bytes": 29, + "operation": "read", + "proto": "udp", + "t": 0.334948125, + "started": 0.000366625, + "oddity": "" + }, + { + "address": "180.97.36.63:53", + "failure": null, + "num_bytes": 29, + "operation": "write", + "proto": "udp", + "t": 0.3358025, + "started": 0.335725958, + "oddity": "" + }, + { + "address": "180.97.36.63:53", + "failure": null, + "num_bytes": 29, + "operation": "read", + "proto": "udp", + "t": 0.739987666, + "started": 0.335863875, + "oddity": "" + } + ], + + // But we see both in the error and in the oddity + // that the response was "REFUSED" + "lookup_host": [ + { + "answers": null, + "engine": "udp", + "failure": "dns_refused_error", + "hostname": "example.com", + "query_type": "A", + "resolver_address": "180.97.36.63:53", + "t": 0.7402975, + "started": 7.2291e-05, + "oddity": "dns.lookup.refused" + }, + { + "answers": null, + "engine": "udp", + "failure": "dns_refused_error", + "hostname": "example.com", + "query_type": "AAAA", + "resolver_address": "180.97.36.63:53", + "t": 0.7402975, + "started": 7.2291e-05, + "oddity": "dns.lookup.refused" + } + ], + + // Exercise: do like I did before and decode the messages + "dns_round_trip": [ + { + "engine": "udp", + "resolver_address": "180.97.36.63:53", + "raw_query": { + "data": "crkBAAABAAAAAAAAB2V4YW1wbGUDY29tAAABAAE=", + "format": "base64" + }, + "started": 0.000130666, + "t": 0.33509925, + "failure": null, + "raw_reply": { + "data": "crmBBQABAAAAAAAAB2V4YW1wbGUDY29tAAABAAE=", + "format": "base64" + } + }, + { + "engine": "udp", + "resolver_address": "180.97.36.63:53", + "raw_query": { + "data": "ywcBAAABAAAAAAAAB2V4YW1wbGUDY29tAAAcAAE=", + "format": "base64" + }, + "started": 0.335321333, + "t": 0.740152375, + "failure": null, + "raw_reply": { + "data": "yweBBQABAAAAAAAAB2V4YW1wbGUDY29tAAAcAAE=", + "format": "base64" + } + } + ] +} +``` + +## Conclusion + +We have seen how we can configure and use the flow for +sending DNS queries over UDP and we have seen some common errors. + diff --git a/internal/tutorial/measurex/chapter03/main.go b/internal/tutorial/measurex/chapter03/main.go index c660fc8783..faba4da39b 100644 --- a/internal/tutorial/measurex/chapter03/main.go +++ b/internal/tutorial/measurex/chapter03/main.go @@ -1,3 +1,22 @@ +// -=-=- StartHere -=-=- +// +// # Chapter III: using a custom DNS-over-UDP resolver +// +// In this chapter we learn how to measure sending DNS queries to +// a DNS server speaking the DNS-over-UDP protocol. +// +// Without further ado, let's describe our example `main.go` program +// and let's use it to better understand this flow. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter03/main.go`.) +// +// ## main.go +// +// The initial part of the program is pretty much the same as the one +// used in previous chapters, so I will not add further comments. +// +// ```Go package main import ( @@ -19,8 +38,532 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() mx := measurex.NewMeasurerWithDefaultSettings() + // ``` + // + // ### Using a custom UDP resolver + // + // We now invoke `LookupHostUDP`. We specify: + // + // - a context for timeout information; + // + // - the domain to query for; + // + // - the address of the DNS-over-UDP server endpoint. + // + // ```Go m := mx.LookupHostUDP(ctx, *query, *address) + // ``` + // + // Also this operation returns a measurement, which + // we print using the usual three-liner. + // + // ```Go data, err := json.Marshal(m) runtimex.PanicOnError(err, "json.Marshal failed") fmt.Printf("%s\n", string(data)) } + +// ``` +// +// ## Running the example program +// +// As before, let us start off with a vanilla run: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter03 +// ``` +// +// This time we get a much larger JSON, so I will pretend it is +// actually JavaScript and add comments to explain it inline. +// +// (This is the first case in which we see how a single +// method call for measurer causes several events to +// be generated and inserted into a `Measurement`.) +// +// ```JavaScript +// { +// "domain": "example.com", +// +// // This block tells us about the UDP connect events +// // where we bind to the server's endpoint +// "connect": [ +// { +// "address": "8.8.4.4:53", +// "failure": null, +// "operation": "connect", +// "proto": "udp", +// "t": 0.00043175, +// "started": 0.000191958, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:53", +// "failure": null, +// "operation": "connect", +// "proto": "udp", +// "t": 0.042198458, +// "started": 0.042113208, +// "oddity": "" +// } +// ], +// +// // This block shows the read and write events +// // occurred on the sockets (because we control +// // in full the implementation of this DNS +// // over UDP resolver, we can see these events) +// "read_write": [ +// { +// "address": "8.8.4.4:53", +// "failure": null, +// "num_bytes": 29, +// "operation": "write", +// "proto": "udp", +// "t": 0.000459583, +// "started": 0.00043825, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:53", +// "failure": null, +// "num_bytes": 45, +// "operation": "read", +// "proto": "udp", +// "t": 0.041955792, +// "started": 0.000471833, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:53", +// "failure": null, +// "num_bytes": 29, +// "operation": "write", +// "proto": "udp", +// "t": 0.042218917, +// "started": 0.042203, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:53", +// "failure": null, +// "num_bytes": 57, +// "operation": "read", +// "proto": "udp", +// "t": 0.196646583, +// "started": 0.042233167, +// "oddity": "" +// } +// ], +// +// // This is the same kind of result as before, we +// // show the emitted queries and the resolved addrs. +// // +// // Also note how here the resolver_address is the +// // correct endpoint address and the engine tells us +// // that we're using DNS over UDP. +// "lookup_host": [ +// { +// "answers": [ +// { +// "answer_type": "A", +// "ipv4": "93.184.216.34" +// } +// ], +// "engine": "udp", +// "failure": null, +// "hostname": "example.com", +// "query_type": "A", +// "resolver_address": "8.8.4.4:53", +// "t": 0.196777042, +// "started": 0.000118542, +// "oddity": "" +// }, +// { +// "answers": [ +// { +// "answer_type": "AAAA", +// "ivp6": "2606:2800:220:1:248:1893:25c8:1946" +// } +// ], +// "engine": "udp", +// "failure": null, +// "hostname": "example.com", +// "query_type": "AAAA", +// "resolver_address": "8.8.4.4:53", +// "t": 0.196777042, +// "started": 0.000118542, +// "oddity": "" +// } +// ], +// +// // This block shows the query we sent (encoded as base64) +// // and the response we received. Here we clearly see +// // that we perform two DNS "round trip" (i.e., send request +// // and receive response) to resolve a domain: one for +// // A and the other for AAAA. +// "dns_round_trip": [ +// { +// "engine": "udp", +// "resolver_address": "8.8.4.4:53", +// "raw_query": { +// "data": "PrcBAAABAAAAAAAAB2V4YW1wbGUDY29tAAABAAE=", +// "format": "base64" +// }, +// "started": 0.000191625, +// "t": 0.041998667, +// "failure": null, +// "raw_reply": { +// "data": "PreBgAABAAEAAAAAB2V4YW1wbGUDY29tAAABAAHADAABAAEAAE8BAARduNgi", +// "format": "base64" +// } +// }, +// { +// "engine": "udp", +// "resolver_address": "8.8.4.4:53", +// "raw_query": { +// "data": "LAwBAAABAAAAAAAAB2V4YW1wbGUDY29tAAAcAAE=", +// "format": "base64" +// }, +// "started": 0.04210775, +// "t": 0.196701333, +// "failure": null, +// "raw_reply": { +// "data": "LAyBgAABAAEAAAAAB2V4YW1wbGUDY29tAAAcAAHADAAcAAEAAE6nABAmBigAAiAAAQJIGJMlyBlG", +// "format": "base64" +// } +// } +// ] +// } +// ``` +// +// This data format is really an extension of the `LookupHostSystem` +// one. It just adds more fields that clarify what happened at low +// level in terms of socket I/O and queries sent and received. +// +// Let us now try to provoke some errors and see how the +// output JSON changes because of them. +// +// ### Measurement with NXDOMAIN +// +// Let us try to get a NXDOMAIN error. +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter03 -query antani.ooni.org +// ``` +// +// This produces the following JSON: +// +// ```JavaScript +// { +// "domain": "antani.ooni.org", +// "connect": [ /* snip */ ], +// "read_write": [ /* snip */ ], +// "lookup_host": [ +// { +// "answers": null, +// "engine": "udp", +// "failure": "dns_nxdomain_error", +// "hostname": "antani.ooni.org", +// "query_type": "A", +// "resolver_address": "8.8.4.4:53", +// "t": 0.098208709, +// "started": 8.95e-05, +// "oddity": "dns.lookup.nxdomain" +// }, +// { +// "answers": null, +// "engine": "udp", +// "failure": "dns_nxdomain_error", +// "hostname": "antani.ooni.org", +// "query_type": "AAAA", +// "resolver_address": "8.8.4.4:53", +// "t": 0.098208709, +// "started": 8.95e-05, +// "oddity": "dns.lookup.nxdomain" +// } +// ], +// "dns_round_trip": [ +// { +// "engine": "udp", +// "resolver_address": "8.8.4.4:53", +// "raw_query": { +// "data": "jLIBAAABAAAAAAAABmFudGFuaQRvb25pA29yZwAAAQAB", +// "format": "base64" +// }, +// "started": 0.000141542, +// "t": 0.034689417, +// "failure": null, +// "raw_reply": { +// "data": "jLKBgwABAAAAAQAABmFudGFuaQRvb25pA29yZwAAAQABwBMABgABAAAHCAA9BGRuczERcmVnaXN0cmFyLXNlcnZlcnMDY29tAApob3N0bWFzdGVywDJhABz8AACowAAADhAACTqAAAAOEQ==", +// "format": "base64" +// } +// }, +// { +// "engine": "udp", +// "resolver_address": "8.8.4.4:53", +// "raw_query": { +// "data": "azEBAAABAAAAAAAABmFudGFuaQRvb25pA29yZwAAHAAB", +// "format": "base64" +// }, +// "started": 0.034776709, +// "t": 0.098170542, +// "failure": null, +// "raw_reply": { +// "data": "azGBgwABAAAAAQAABmFudGFuaQRvb25pA29yZwAAHAABwBMABgABAAAHCAA9BGRuczERcmVnaXN0cmFyLXNlcnZlcnMDY29tAApob3N0bWFzdGVywDJhABz8AACowAAADhAACTqAAAAOEQ==", +// "format": "base64" +// } +// } +// ] +// } +// ``` +// +// We indeed get a NXDOMAIN error as the failure in `lookup_host`. +// +// Let us now decode one of the replies by using this program: +// +// ``` +// package main +// +// import ( +// "fmt" +// "encoding/base64" +// +// "github.com/miekg/dns" +// ) +// +// func main() { +// const query = "azGBgwABAAAAAQAABmFudGFuaQRvb25pA29yZwAAHAABwBMABgABAAAHCAA9BGRuczERcmVnaXN0cmFyLXNlcnZlcnMDY29tAApob3N0bWFzdGVywDJhABz8AACowAAADhAACTqAAAAOEQ==" +// data, _ := base64.StdEncoding.DecodeString(query) +// msg := new(dns.Msg) +// _ = msg.Unpack(data) +// fmt.Printf("%s\n", msg) +//} +// ``` +// +// where `query` is one of the replies. If we run this program +// we get as the output: +// +// ``` +// ;; opcode: QUERY, status: NXDOMAIN, id: 27441 +// ;; flags: qr rd ra; QUERY: 1, ANSWER: 0, AUTHORITY: 1, ADDITIONAL: 0 +// +// ;; QUESTION SECTION: +// ;antani.ooni.org. IN AAAA +// +// ;; AUTHORITY SECTION: +// ooni.org. 1800 IN SOA dns1.registrar-servers.com. hostmaster.registrar-servers.com. 1627397372 43200 3600 604800 3601 +// ``` +// +// ### Measurement with timeout +// +// Let us now query an IP address known for not responding +// to DNS queries, to get a timeout. +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter03 -address 182.92.22.222:53 +// ``` +// +// Here's the corresponding JSON: +// +// ```JavaScript +// { +// "domain": "example.com", +// "connect": [ /* snip */ ], +// "read_write": [ +// { +// "address": "182.92.22.222:53", +// "failure": null, +// "num_bytes": 29, +// "operation": "write", +// "proto": "udp", +// "t": 0.0005275, +// "started": 0.000500209, +// "oddity": "" +// }, +// { +// "address": "182.92.22.222:53", +// "failure": "generic_timeout_error", /* <--- */ +// "operation": "read", +// "proto": "udp", +// "t": 5.001140125, +// "started": 0.000544042, +// "oddity": "" +// } +// ], +// "lookup_host": [ +// { +// "answers": null, +// "engine": "udp", +// "failure": "generic_timeout_error", /* <--- */ +// "hostname": "example.com", +// "query_type": "A", +// "resolver_address": "182.92.22.222:53", +// "t": 5.001462084, +// "started": 0.000127917, +// "oddity": "dns.lookup.timeout" /* <--- */ +// }, +// { +// "answers": null, +// "engine": "udp", +// "failure": "generic_timeout_error", +// "hostname": "example.com", +// "query_type": "AAAA", +// "resolver_address": "182.92.22.222:53", +// "t": 5.001462084, +// "started": 0.000127917, +// "oddity": "dns.lookup.timeout" +// } +// ], +// "dns_round_trip": [ +// { +// "engine": "udp", +// "resolver_address": "182.92.22.222:53", +// "raw_query": { +// "data": "ej8BAAABAAAAAAAAB2V4YW1wbGUDY29tAAABAAE=", +// "format": "base64" +// }, +// "started": 0.000220584, +// "t": 5.001317417, +// "failure": "generic_timeout_error", +// "raw_reply": null +// } +// ] +// } +// ``` +// +// We see that we do fail with a timeout (I have marked some of them +// with comments inside the JSON). We see the timeout at three different +// level of abstractions (from lower to higher abstraction): at the socket layer, +// during the DNS round trip, during the DNS lookup. +// +// What we also see is that `t`'s value is ~5s when the `read` event +// fails, which tells us about the socket's read timeout. +// +// ### Measurement with REFUSED error +// +// Let us now try to get a REFUSED DNS Rcode, again from servers +// that are, let's say, kind enough to easily help. +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter03 -address 180.97.36.63:53 +// ``` +// +// Here's the answer I get: +// +// ```JavaScript +// { +// "domain": "example.com", +// "connect": [ /* snip */ ], +// +// // The I/O events look normal this time +// "read_write": [ +// { +// "address": "180.97.36.63:53", +// "failure": null, +// "num_bytes": 29, +// "operation": "write", +// "proto": "udp", +// "t": 0.000333583, +// "started": 0.000312125, +// "oddity": "" +// }, +// { +// "address": "180.97.36.63:53", +// "failure": null, +// "num_bytes": 29, +// "operation": "read", +// "proto": "udp", +// "t": 0.334948125, +// "started": 0.000366625, +// "oddity": "" +// }, +// { +// "address": "180.97.36.63:53", +// "failure": null, +// "num_bytes": 29, +// "operation": "write", +// "proto": "udp", +// "t": 0.3358025, +// "started": 0.335725958, +// "oddity": "" +// }, +// { +// "address": "180.97.36.63:53", +// "failure": null, +// "num_bytes": 29, +// "operation": "read", +// "proto": "udp", +// "t": 0.739987666, +// "started": 0.335863875, +// "oddity": "" +// } +// ], +// +// // But we see both in the error and in the oddity +// // that the response was "REFUSED" +// "lookup_host": [ +// { +// "answers": null, +// "engine": "udp", +// "failure": "dns_refused_error", +// "hostname": "example.com", +// "query_type": "A", +// "resolver_address": "180.97.36.63:53", +// "t": 0.7402975, +// "started": 7.2291e-05, +// "oddity": "dns.lookup.refused" +// }, +// { +// "answers": null, +// "engine": "udp", +// "failure": "dns_refused_error", +// "hostname": "example.com", +// "query_type": "AAAA", +// "resolver_address": "180.97.36.63:53", +// "t": 0.7402975, +// "started": 7.2291e-05, +// "oddity": "dns.lookup.refused" +// } +// ], +// +// // Exercise: do like I did before and decode the messages +// "dns_round_trip": [ +// { +// "engine": "udp", +// "resolver_address": "180.97.36.63:53", +// "raw_query": { +// "data": "crkBAAABAAAAAAAAB2V4YW1wbGUDY29tAAABAAE=", +// "format": "base64" +// }, +// "started": 0.000130666, +// "t": 0.33509925, +// "failure": null, +// "raw_reply": { +// "data": "crmBBQABAAAAAAAAB2V4YW1wbGUDY29tAAABAAE=", +// "format": "base64" +// } +// }, +// { +// "engine": "udp", +// "resolver_address": "180.97.36.63:53", +// "raw_query": { +// "data": "ywcBAAABAAAAAAAAB2V4YW1wbGUDY29tAAAcAAE=", +// "format": "base64" +// }, +// "started": 0.335321333, +// "t": 0.740152375, +// "failure": null, +// "raw_reply": { +// "data": "yweBBQABAAAAAAAAB2V4YW1wbGUDY29tAAAcAAE=", +// "format": "base64" +// } +// } +// ] +// } +// ``` +// +// ## Conclusion +// +// We have seen how we can configure and use the flow for +// sending DNS queries over UDP and we have seen some common errors. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter04/README.md b/internal/tutorial/measurex/chapter04/README.md new file mode 100644 index 0000000000..a62a9f4795 --- /dev/null +++ b/internal/tutorial/measurex/chapter04/README.md @@ -0,0 +1,232 @@ + +# Chapter IV: TLS handshaking + +This chapter describes measuring TLS handshakes. + +Without further ado, let's describe our example `main.go` program +and let's use it to better understand how to measure that. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter04/main.go`.) + +## main.go + +The initial part of the program is pretty much the same as the one +used in previous chapters, so I will not add further comments. + +```Go +package main + +import ( + "context" + "crypto/tls" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { + sni := flag.String("sni", "dns.google", "domain to resolver") + address := flag.String("address", "8.8.4.4:443", "remote endpoint address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() +``` + +### Connecting and handshaking. + +We call the `ConnectAndHandshake` method. The arguments +are the context, the address, and a TLS config. + +```Go + m := mx.TLSConnectAndHandshake(ctx, *address, &tls.Config{ + ServerName: *sni, + NextProtos: []string{"h2", "http/1.1"}, + RootCAs: netxlite.NewDefaultCertPool(), + }) +``` + +The three fields above are the files you should always set +in a TLS config when doing handshakes manually. The `ServerName` +field forces the SNI, the NextProtos field forces the ALPN, +and the `RootCAs` field is overridden so that we use the +CA bundle that we bundle with OONI. (This CA bundle is the +same you can find at https://curl.haxx.se/ca/.) + +As usual, the method to perform a measurement returns +the measurement itself, which we print below. + +``` + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +``` +## Running the example program + +As before, let us start off with a vanilla run: + +```bash +go run -race ./internal/tutorial/measurex/chapter04 +``` + +Let us comment the JSON in detail: + +```JavaScript +{ + "network": "tcp", + "address": "8.8.4.4:443", + + // This block is generated when connecting to a TCP + // socket, as we've already seen in chapter02 + "connect": [ + { + "address": "8.8.4.4:443", + "failure": null, + "operation": "connect", + "proto": "tcp", + "t": 0.046959084, + "started": 0.022998875, + "oddity": "" + } + ], + + // These are the I/O events during the handshake + "read_write": [ + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 280, + "operation": "write", + "proto": "tcp", + "t": 0.048752875, + "started": 0.04874125, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 517, + "operation": "read", + "proto": "tcp", + "t": 0.087221334, + "started": 0.048760417, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 4301, + "operation": "read", + "proto": "tcp", + "t": 0.088843584, + "started": 0.088830959, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 64, + "operation": "write", + "proto": "tcp", + "t": 0.092078042, + "started": 0.092064042, + "oddity": "" + } + ], + + // This block contains information about the handshake + "tls_handshake": [ + { + "cipher_suite": "TLS_AES_128_GCM_SHA256", + "failure": null, + "negotiated_proto": "h2", + "tls_version": "TLSv1.3", + "peer_certificates": [ + { + "data": "MIIF4TCCBMmgAwIBAgIQGa7QSAXLo6sKAAAAAPz4cjANBgkqhkiG9w0BAQsFADBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzAeFw0yMTA4MzAwNDAwMDBaFw0yMTExMjIwMzU5NTlaMBUxEzARBgNVBAMTCmRucy5nb29nbGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC8cttrGHp3SS9YGYgsNLXt43dhW4d8FPULk0n6WYWC+EbMLkLnYXHLZHXJEz1Tor5hrCfHEVyX4xmhY2LCt0jprP6Gfo+gkKyjSV3LO65aWx6ezejvIdQBiLhSo/R5E3NwjMUAbm9PoNfSZSLiP3RjC3Px1vXFVmlcap4bUHnv9OvcPvwV1wmw5IMVzCuGBjCzJ4c4fxgyyggES1mbXZpYcDO4YKhSqIJx2D0gop9wzBQevI/kb35miN1pAvIKK2lgf7kZvYa7HH5vJ+vtn3Vkr34dKUAc/cO62t+NVufADPwn2/Tx8y8fPxlnCmoJeI+MPsw+StTYDawxajkjvZfdAgMBAAGjggL6MIIC9jAOBgNVHQ8BAf8EBAMCBaAwEwYDVR0lBAwwCgYIKwYBBQUHAwEwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQUooaIxGAth6+bJh0JHYVWccyuoUcwHwYDVR0jBBgwFoAUinR/r4XN7pXNPZzQ4kYU83E1HScwagYIKwYBBQUHAQEEXjBcMCcGCCsGAQUFBzABhhtodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHMxYzMwMQYIKwYBBQUHMAKGJWh0dHA6Ly9wa2kuZ29vZy9yZXBvL2NlcnRzL2d0czFjMy5kZXIwgawGA1UdEQSBpDCBoYIKZG5zLmdvb2dsZYIOZG5zLmdvb2dsZS5jb22CECouZG5zLmdvb2dsZS5jb22CCzg4ODguZ29vZ2xlghBkbnM2NC5kbnMuZ29vZ2xlhwQICAgIhwQICAQEhxAgAUhgSGAAAAAAAAAAAIiIhxAgAUhgSGAAAAAAAAAAAIhEhxAgAUhgSGAAAAAAAAAAAGRkhxAgAUhgSGAAAAAAAAAAAABkMCEGA1UdIAQaMBgwCAYGZ4EMAQIBMAwGCisGAQQB1nkCBQMwPAYDVR0fBDUwMzAxoC+gLYYraHR0cDovL2NybHMucGtpLmdvb2cvZ3RzMWMzL2ZWSnhiVi1LdG1rLmNybDCCAQMGCisGAQQB1nkCBAIEgfQEgfEA7wB1AH0+8viP/4hVaCTCwMqeUol5K8UOeAl/LmqXaJl+IvDXAAABe5VtuiwAAAQDAEYwRAIgAwzr02ayTnNk/G+HDP50WTZUls3g+9P1fTGR9PEywpYCIAIOIQJ7nJTlcJdSyyOvgzX4BxJDr18mOKJPHlJs1naIAHYAXNxDkv7mq0VEsV6a1FbmEDf71fpH3KFzlLJe5vbHDsoAAAF7lW26IQAABAMARzBFAiAtlIkbCH+QgiO6T6Y/+UAf+eqHB2wdzMNfOoo4SnUhVgIhALPiRtyPMo8fPPxN3VgiXBqVF7tzLWTJUjprOe4kQUCgMA0GCSqGSIb3DQEBCwUAA4IBAQDVq3WWgg6eYSpFLfNgo2KzLKDPkWZx42gW2Tum6JZd6O/Nj+mjYGOyXyryTslUwmONxiq2Ip3PLA/qlbPdYic1F1mDwMHSzRteSe7axwEP6RkoxhMy5zuI4hfijhSrfhVUZF299PesDf2gI+Vh30s6muHVfQjbXOl/AkAqIPLSetv2mS9MHQLeHcCCXpwsXQJwusZ3+ILrgCRAGv6NLXwbfE0t3OjXV0gnNRp3DWEaF+yrfjE0oU1myeYDNtugsw8VRwTzCM53Nqf/BJffnuShmBBZfZ2jlsPnLys0UqCZo2dg5wdwj3DaKtHO5Pofq6P8r4w6W/aUZCTLUi1jZ3Gc", + "format": "base64" + }, + { + "data": "MIIFljCCA36gAwIBAgINAgO8U1lrNMcY9QFQZjANBgkqhkiG9w0BAQsFADBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMjAwODEzMDAwMDQyWhcNMjcwOTMwMDAwMDQyWjBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPWI3+dijB43+DdCkH9sh9D7ZYIl/ejLa6T/belaI+KZ9hzpkgOZE3wJCor6QtZeViSqejOEH9Hpabu5dOxXTGZok3c3VVP+ORBNtzS7XyV3NzsXlOo85Z3VvMO0Q+sup0fvsEQRY9i0QYXdQTBIkxu/t/bgRQIh4JZCF8/ZK2VWNAcmBA2o/X3KLu/qSHw3TT8An4Pf73WELnlXXPxXbhqW//yMmqaZviXZf5YsBvcRKgKAgOtjGDxQSYflispfGStZloEAoPtR28p3CwvJlk/vcEnHXG0g/Zm0tOLKLnf9LdwLtmsTDIwZKxeWmLnwi/agJ7u2441Rj72ux5uxiZ0CAwEAAaOCAYAwggF8MA4GA1UdDwEB/wQEAwIBhjAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUinR/r4XN7pXNPZzQ4kYU83E1HScwHwYDVR0jBBgwFoAU5K8rJnEaK0gnhS9SZizv8IkTcT4waAYIKwYBBQUHAQEEXDBaMCYGCCsGAQUFBzABhhpodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHNyMTAwBggrBgEFBQcwAoYkaHR0cDovL3BraS5nb29nL3JlcG8vY2VydHMvZ3RzcjEuZGVyMDQGA1UdHwQtMCswKaAnoCWGI2h0dHA6Ly9jcmwucGtpLmdvb2cvZ3RzcjEvZ3RzcjEuY3JsMFcGA1UdIARQME4wOAYKKwYBBAHWeQIFAzAqMCgGCCsGAQUFBwIBFhxodHRwczovL3BraS5nb29nL3JlcG9zaXRvcnkvMAgGBmeBDAECATAIBgZngQwBAgIwDQYJKoZIhvcNAQELBQADggIBAIl9rCBcDDy+mqhXlRu0rvqrpXJxtDaV/d9AEQNMwkYUuxQkq/BQcSLbrcRuf8/xam/IgxvYzolfh2yHuKkMo5uhYpSTld9brmYZCwKWnvy15xBpPnrLRklfRuFBsdeYTWU0AIAaP0+fbH9JAIFTQaSSIYKCGvGjRFsqUBITTcFTNvNCCK9U+o53UxtkOCcXCb1YyRt8OS1b887U7ZfbFAO/CVMkH8IMBHmYJvJh8VNS/UKMG2YrPxWhu//2m+OBmgEGcYk1KCTd4b3rGS3hSMs9WYNRtHTGnXzGsYZbr8w0xNPM1IERlQCh9BIiAfq0g3GvjLeMcySsN1PCAJA/Ef5c7TaUEDu9Ka7ixzpiO2xj2YC/WXGsYye5TBeg2vZzFb8q3o/zpWwygTMD0IZRcZk0upONXbVRWPeyk+gB9lm+cZv9TSjOz23HFtz30dZGm6fKa+l3D/2gthsjgx0QGtkJAITgRNOidSOzNIb2ILCkXhAd4FJGAJ2xDx8hcFH1mt0G/FX0Kw4zd8NLQsLxdxP8c4CU6x+7Nz/OAipmsHMdMqUybDKwjuDEI/9bfU1lcKwrmz3O2+BtjjKAvpafkmO8l7tdufThcV4q5O8DIrGKZTqPwJNl1IXNDw9bg1kWRxYtnCQ6yICmJhSFm/Y3m6xv+cXDBlHz4n/FsRC6UfTd", + "format": "base64" + }, + { + "data": "MIIFYjCCBEqgAwIBAgIQd70NbNs2+RrqIQ/E8FjTDTANBgkqhkiG9w0BAQsFADBXMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1zYTEQMA4GA1UECxMHUm9vdCBDQTEbMBkGA1UEAxMSR2xvYmFsU2lnbiBSb290IENBMB4XDTIwMDYxOTAwMDA0MloXDTI4MDEyODAwMDA0MlowRzELMAkGA1UEBhMCVVMxIjAgBgNVBAoTGUdvb2dsZSBUcnVzdCBTZXJ2aWNlcyBMTEMxFDASBgNVBAMTC0dUUyBSb290IFIxMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAthECix7joXebO9y/lD63ladAPKH9gvl9MgaCcfb2jH/76Nu8ai6Xl6OMS/kr9rH5zoQdsfnFl97vufKj6bwSiV6nqlKr+CMny6SxnGPb15l+8Ape62im9MZaRw1NEDPjTrETo8gYbEvs/AmQ351kKSUjB6G00j0uYODP0gmHu81I8E3CwnqIiru6z1kZ1q+PsAewnjHxgsHA3y6mbWwZDrXYfiYaRQM9sHmklCitD38m5agI/pboPGiUU+6DOogrFZYJsuB6jC511pzrp1Zkj5ZPaK49l8KEj8C8QMALXL32h7M1bKwYUH+E4EzNktMg6TO8UpmvMrUpsyUqtEj5cuHKZPfmghCN6J3Cioj6OGaK/GP5Afl4/Xtcd/p2h/rs37EOeZVXtL0m79YB0esWCruOC7XFxYpVq9Os6pFLKcwZpDIlTirxZUTQAs6qzkm06p98g7BAe+dDq6dso499iYH6TKX/1Y7DzkvgtdizjkXPdsDtQCv9Uw+wp9U7DbGKogPeMa3Md+pvez7W35EiEua++tgy/BBjFFFy3l3WFpO9KWgz7zpm7AeKJt8T11dleCfeXkkUAKIAf5qoIbapsZWwpbkNFhHax2xIPEDgfg1azVY80ZcFuctL7TlLnMQ/0lUTbiSw1nH69MG6zO0b9f6BQdgAmD06yK56mDcYBZUCAwEAAaOCATgwggE0MA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTkrysmcRorSCeFL1JmLO/wiRNxPjAfBgNVHSMEGDAWgBRge2YaRQ2XyolQL30EzTSo//z9SzBgBggrBgEFBQcBAQRUMFIwJQYIKwYBBQUHMAGGGWh0dHA6Ly9vY3NwLnBraS5nb29nL2dzcjEwKQYIKwYBBQUHMAKGHWh0dHA6Ly9wa2kuZ29vZy9nc3IxL2dzcjEuY3J0MDIGA1UdHwQrMCkwJ6AloCOGIWh0dHA6Ly9jcmwucGtpLmdvb2cvZ3NyMS9nc3IxLmNybDA7BgNVHSAENDAyMAgGBmeBDAECATAIBgZngQwBAgIwDQYLKwYBBAHWeQIFAwIwDQYLKwYBBAHWeQIFAwMwDQYJKoZIhvcNAQELBQADggEBADSkHrEoo9C0dhemMXoh6dFSPsjbdBZBiLg9NR3t5P+T4Vxfq7vqfM/b5A3Ri1fyJm9bvhdGaJQ3b2t6yMAYN/olUazsaL+yyEn9WprKASOshIArAoyZl+tJaox118fessmXn1hIVw41oeQa1v1vg4Fv74zPl6/AhSrw9U5pCZEt4Wi4wStz6dTZ/CLANx8LZh1J7QJVj2fhMtfTJr9w4z30Z209fOU0iOMy+qduBmpvvYuR7hZL6Dupszfnw0Skfths18dG9ZKb59UhvmaSGZRVbNQpsg3BZlvid0lIKO2d1xozclOzgjXPYovJJIultzkMu34qQb9Sz/yilrbCgj8=", + "format": "base64" + } + ], + "t": 0.092117709, + "address": "8.8.4.4:443", + "server_name": "dns.google", + "alpn": [ + "h2", + "http/1.1" + ], + "no_tls_verify": false, + "oddity": "", + "proto": "tcp", + "started": 0.047288542 + } + ] +} +``` + +All the data formats we're using here are, by the way, +compatible with the data formats specified at +https://github.com/ooni/spec/tree/master/data-formats. + +### Exercises + +Try to run experiments in the following scenarios, and +check the output JSON to familiarize with what changes in +different error conditions. + +1. measurement that causes timeout + +2. measurement with wrong SNI + +3. measurement with self-signed certificate + +4. measurement with expired certificate + +5. measurement with connection reset during handshake + +6. measurement with timeout during handshake + +To emulate the two last scenario, if you're on Linux, a +possibility is building Jafar with this command: + +``` +go build -v ./internal/cmd/jafar +``` + +Then, for example, to provoke a connection reset you +can run in a terminal: + +``` +sudo ./jafar -iptables-reset-keyword dns.google +``` + +and you can run this tutorial with `dns.google` as +the SNI in another terminal. + +Likewise, you can obtain a timeout using the +`-iptables-drop-keyword` flag instead. + +(Jafar runs forever and censors. You need to use +`^C` to terminate it from running.) + +## Conclusion + +We have seen how to measure TLS handshakes. We have seen how +this flow produces different output on different error conditions. + diff --git a/internal/tutorial/measurex/chapter04/main.go b/internal/tutorial/measurex/chapter04/main.go index 8a19f643b5..bce84145fc 100644 --- a/internal/tutorial/measurex/chapter04/main.go +++ b/internal/tutorial/measurex/chapter04/main.go @@ -1,3 +1,21 @@ +// -=-=- StartHere -=-=- +// +// # Chapter IV: TLS handshaking +// +// This chapter describes measuring TLS handshakes. +// +// Without further ado, let's describe our example `main.go` program +// and let's use it to better understand how to measure that. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter04/main.go`.) +// +// ## main.go +// +// The initial part of the program is pretty much the same as the one +// used in previous chapters, so I will not add further comments. +// +// ```Go package main import ( @@ -21,12 +39,210 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() mx := measurex.NewMeasurerWithDefaultSettings() + // ``` + // + // ### Connecting and handshaking. + // + // We call the `ConnectAndHandshake` method. The arguments + // are the context, the address, and a TLS config. + // + // ```Go m := mx.TLSConnectAndHandshake(ctx, *address, &tls.Config{ ServerName: *sni, NextProtos: []string{"h2", "http/1.1"}, RootCAs: netxlite.NewDefaultCertPool(), }) + // ``` + // + // The three fields above are the files you should always set + // in a TLS config when doing handshakes manually. The `ServerName` + // field forces the SNI, the NextProtos field forces the ALPN, + // and the `RootCAs` field is overridden so that we use the + // CA bundle that we bundle with OONI. (This CA bundle is the + // same you can find at https://curl.haxx.se/ca/.) + // + // As usual, the method to perform a measurement returns + // the measurement itself, which we print below. + // + // ``` data, err := json.Marshal(m) runtimex.PanicOnError(err, "json.Marshal failed") fmt.Printf("%s\n", string(data)) } + +// ``` +// ## Running the example program +// +// As before, let us start off with a vanilla run: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter04 +// ``` +// +// Let us comment the JSON in detail: +// +// ```JavaScript +// { +// "network": "tcp", +// "address": "8.8.4.4:443", +// +// // This block is generated when connecting to a TCP +// // socket, as we've already seen in chapter02 +// "connect": [ +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "operation": "connect", +// "proto": "tcp", +// "t": 0.046959084, +// "started": 0.022998875, +// "oddity": "" +// } +// ], +// +// // These are the I/O events during the handshake +// "read_write": [ +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 280, +// "operation": "write", +// "proto": "tcp", +// "t": 0.048752875, +// "started": 0.04874125, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 517, +// "operation": "read", +// "proto": "tcp", +// "t": 0.087221334, +// "started": 0.048760417, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 4301, +// "operation": "read", +// "proto": "tcp", +// "t": 0.088843584, +// "started": 0.088830959, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 64, +// "operation": "write", +// "proto": "tcp", +// "t": 0.092078042, +// "started": 0.092064042, +// "oddity": "" +// } +// ], +// +// // This block contains information about the handshake +// "tls_handshake": [ +// { +// "cipher_suite": "TLS_AES_128_GCM_SHA256", +// "failure": null, +// "negotiated_proto": "h2", +// "tls_version": "TLSv1.3", +// "peer_certificates": [ +// { +// "data": "MIIF4TCCBMmgAwIBAgIQGa7QSAXLo6sKAAAAAPz4cjANBgkqhkiG9w0BAQsFADBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzAeFw0yMTA4MzAwNDAwMDBaFw0yMTExMjIwMzU5NTlaMBUxEzARBgNVBAMTCmRucy5nb29nbGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC8cttrGHp3SS9YGYgsNLXt43dhW4d8FPULk0n6WYWC+EbMLkLnYXHLZHXJEz1Tor5hrCfHEVyX4xmhY2LCt0jprP6Gfo+gkKyjSV3LO65aWx6ezejvIdQBiLhSo/R5E3NwjMUAbm9PoNfSZSLiP3RjC3Px1vXFVmlcap4bUHnv9OvcPvwV1wmw5IMVzCuGBjCzJ4c4fxgyyggES1mbXZpYcDO4YKhSqIJx2D0gop9wzBQevI/kb35miN1pAvIKK2lgf7kZvYa7HH5vJ+vtn3Vkr34dKUAc/cO62t+NVufADPwn2/Tx8y8fPxlnCmoJeI+MPsw+StTYDawxajkjvZfdAgMBAAGjggL6MIIC9jAOBgNVHQ8BAf8EBAMCBaAwEwYDVR0lBAwwCgYIKwYBBQUHAwEwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQUooaIxGAth6+bJh0JHYVWccyuoUcwHwYDVR0jBBgwFoAUinR/r4XN7pXNPZzQ4kYU83E1HScwagYIKwYBBQUHAQEEXjBcMCcGCCsGAQUFBzABhhtodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHMxYzMwMQYIKwYBBQUHMAKGJWh0dHA6Ly9wa2kuZ29vZy9yZXBvL2NlcnRzL2d0czFjMy5kZXIwgawGA1UdEQSBpDCBoYIKZG5zLmdvb2dsZYIOZG5zLmdvb2dsZS5jb22CECouZG5zLmdvb2dsZS5jb22CCzg4ODguZ29vZ2xlghBkbnM2NC5kbnMuZ29vZ2xlhwQICAgIhwQICAQEhxAgAUhgSGAAAAAAAAAAAIiIhxAgAUhgSGAAAAAAAAAAAIhEhxAgAUhgSGAAAAAAAAAAAGRkhxAgAUhgSGAAAAAAAAAAAABkMCEGA1UdIAQaMBgwCAYGZ4EMAQIBMAwGCisGAQQB1nkCBQMwPAYDVR0fBDUwMzAxoC+gLYYraHR0cDovL2NybHMucGtpLmdvb2cvZ3RzMWMzL2ZWSnhiVi1LdG1rLmNybDCCAQMGCisGAQQB1nkCBAIEgfQEgfEA7wB1AH0+8viP/4hVaCTCwMqeUol5K8UOeAl/LmqXaJl+IvDXAAABe5VtuiwAAAQDAEYwRAIgAwzr02ayTnNk/G+HDP50WTZUls3g+9P1fTGR9PEywpYCIAIOIQJ7nJTlcJdSyyOvgzX4BxJDr18mOKJPHlJs1naIAHYAXNxDkv7mq0VEsV6a1FbmEDf71fpH3KFzlLJe5vbHDsoAAAF7lW26IQAABAMARzBFAiAtlIkbCH+QgiO6T6Y/+UAf+eqHB2wdzMNfOoo4SnUhVgIhALPiRtyPMo8fPPxN3VgiXBqVF7tzLWTJUjprOe4kQUCgMA0GCSqGSIb3DQEBCwUAA4IBAQDVq3WWgg6eYSpFLfNgo2KzLKDPkWZx42gW2Tum6JZd6O/Nj+mjYGOyXyryTslUwmONxiq2Ip3PLA/qlbPdYic1F1mDwMHSzRteSe7axwEP6RkoxhMy5zuI4hfijhSrfhVUZF299PesDf2gI+Vh30s6muHVfQjbXOl/AkAqIPLSetv2mS9MHQLeHcCCXpwsXQJwusZ3+ILrgCRAGv6NLXwbfE0t3OjXV0gnNRp3DWEaF+yrfjE0oU1myeYDNtugsw8VRwTzCM53Nqf/BJffnuShmBBZfZ2jlsPnLys0UqCZo2dg5wdwj3DaKtHO5Pofq6P8r4w6W/aUZCTLUi1jZ3Gc", +// "format": "base64" +// }, +// { +// "data": "MIIFljCCA36gAwIBAgINAgO8U1lrNMcY9QFQZjANBgkqhkiG9w0BAQsFADBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMjAwODEzMDAwMDQyWhcNMjcwOTMwMDAwMDQyWjBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPWI3+dijB43+DdCkH9sh9D7ZYIl/ejLa6T/belaI+KZ9hzpkgOZE3wJCor6QtZeViSqejOEH9Hpabu5dOxXTGZok3c3VVP+ORBNtzS7XyV3NzsXlOo85Z3VvMO0Q+sup0fvsEQRY9i0QYXdQTBIkxu/t/bgRQIh4JZCF8/ZK2VWNAcmBA2o/X3KLu/qSHw3TT8An4Pf73WELnlXXPxXbhqW//yMmqaZviXZf5YsBvcRKgKAgOtjGDxQSYflispfGStZloEAoPtR28p3CwvJlk/vcEnHXG0g/Zm0tOLKLnf9LdwLtmsTDIwZKxeWmLnwi/agJ7u2441Rj72ux5uxiZ0CAwEAAaOCAYAwggF8MA4GA1UdDwEB/wQEAwIBhjAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUinR/r4XN7pXNPZzQ4kYU83E1HScwHwYDVR0jBBgwFoAU5K8rJnEaK0gnhS9SZizv8IkTcT4waAYIKwYBBQUHAQEEXDBaMCYGCCsGAQUFBzABhhpodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHNyMTAwBggrBgEFBQcwAoYkaHR0cDovL3BraS5nb29nL3JlcG8vY2VydHMvZ3RzcjEuZGVyMDQGA1UdHwQtMCswKaAnoCWGI2h0dHA6Ly9jcmwucGtpLmdvb2cvZ3RzcjEvZ3RzcjEuY3JsMFcGA1UdIARQME4wOAYKKwYBBAHWeQIFAzAqMCgGCCsGAQUFBwIBFhxodHRwczovL3BraS5nb29nL3JlcG9zaXRvcnkvMAgGBmeBDAECATAIBgZngQwBAgIwDQYJKoZIhvcNAQELBQADggIBAIl9rCBcDDy+mqhXlRu0rvqrpXJxtDaV/d9AEQNMwkYUuxQkq/BQcSLbrcRuf8/xam/IgxvYzolfh2yHuKkMo5uhYpSTld9brmYZCwKWnvy15xBpPnrLRklfRuFBsdeYTWU0AIAaP0+fbH9JAIFTQaSSIYKCGvGjRFsqUBITTcFTNvNCCK9U+o53UxtkOCcXCb1YyRt8OS1b887U7ZfbFAO/CVMkH8IMBHmYJvJh8VNS/UKMG2YrPxWhu//2m+OBmgEGcYk1KCTd4b3rGS3hSMs9WYNRtHTGnXzGsYZbr8w0xNPM1IERlQCh9BIiAfq0g3GvjLeMcySsN1PCAJA/Ef5c7TaUEDu9Ka7ixzpiO2xj2YC/WXGsYye5TBeg2vZzFb8q3o/zpWwygTMD0IZRcZk0upONXbVRWPeyk+gB9lm+cZv9TSjOz23HFtz30dZGm6fKa+l3D/2gthsjgx0QGtkJAITgRNOidSOzNIb2ILCkXhAd4FJGAJ2xDx8hcFH1mt0G/FX0Kw4zd8NLQsLxdxP8c4CU6x+7Nz/OAipmsHMdMqUybDKwjuDEI/9bfU1lcKwrmz3O2+BtjjKAvpafkmO8l7tdufThcV4q5O8DIrGKZTqPwJNl1IXNDw9bg1kWRxYtnCQ6yICmJhSFm/Y3m6xv+cXDBlHz4n/FsRC6UfTd", +// "format": "base64" +// }, +// { +// "data": "MIIFYjCCBEqgAwIBAgIQd70NbNs2+RrqIQ/E8FjTDTANBgkqhkiG9w0BAQsFADBXMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1zYTEQMA4GA1UECxMHUm9vdCBDQTEbMBkGA1UEAxMSR2xvYmFsU2lnbiBSb290IENBMB4XDTIwMDYxOTAwMDA0MloXDTI4MDEyODAwMDA0MlowRzELMAkGA1UEBhMCVVMxIjAgBgNVBAoTGUdvb2dsZSBUcnVzdCBTZXJ2aWNlcyBMTEMxFDASBgNVBAMTC0dUUyBSb290IFIxMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAthECix7joXebO9y/lD63ladAPKH9gvl9MgaCcfb2jH/76Nu8ai6Xl6OMS/kr9rH5zoQdsfnFl97vufKj6bwSiV6nqlKr+CMny6SxnGPb15l+8Ape62im9MZaRw1NEDPjTrETo8gYbEvs/AmQ351kKSUjB6G00j0uYODP0gmHu81I8E3CwnqIiru6z1kZ1q+PsAewnjHxgsHA3y6mbWwZDrXYfiYaRQM9sHmklCitD38m5agI/pboPGiUU+6DOogrFZYJsuB6jC511pzrp1Zkj5ZPaK49l8KEj8C8QMALXL32h7M1bKwYUH+E4EzNktMg6TO8UpmvMrUpsyUqtEj5cuHKZPfmghCN6J3Cioj6OGaK/GP5Afl4/Xtcd/p2h/rs37EOeZVXtL0m79YB0esWCruOC7XFxYpVq9Os6pFLKcwZpDIlTirxZUTQAs6qzkm06p98g7BAe+dDq6dso499iYH6TKX/1Y7DzkvgtdizjkXPdsDtQCv9Uw+wp9U7DbGKogPeMa3Md+pvez7W35EiEua++tgy/BBjFFFy3l3WFpO9KWgz7zpm7AeKJt8T11dleCfeXkkUAKIAf5qoIbapsZWwpbkNFhHax2xIPEDgfg1azVY80ZcFuctL7TlLnMQ/0lUTbiSw1nH69MG6zO0b9f6BQdgAmD06yK56mDcYBZUCAwEAAaOCATgwggE0MA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTkrysmcRorSCeFL1JmLO/wiRNxPjAfBgNVHSMEGDAWgBRge2YaRQ2XyolQL30EzTSo//z9SzBgBggrBgEFBQcBAQRUMFIwJQYIKwYBBQUHMAGGGWh0dHA6Ly9vY3NwLnBraS5nb29nL2dzcjEwKQYIKwYBBQUHMAKGHWh0dHA6Ly9wa2kuZ29vZy9nc3IxL2dzcjEuY3J0MDIGA1UdHwQrMCkwJ6AloCOGIWh0dHA6Ly9jcmwucGtpLmdvb2cvZ3NyMS9nc3IxLmNybDA7BgNVHSAENDAyMAgGBmeBDAECATAIBgZngQwBAgIwDQYLKwYBBAHWeQIFAwIwDQYLKwYBBAHWeQIFAwMwDQYJKoZIhvcNAQELBQADggEBADSkHrEoo9C0dhemMXoh6dFSPsjbdBZBiLg9NR3t5P+T4Vxfq7vqfM/b5A3Ri1fyJm9bvhdGaJQ3b2t6yMAYN/olUazsaL+yyEn9WprKASOshIArAoyZl+tJaox118fessmXn1hIVw41oeQa1v1vg4Fv74zPl6/AhSrw9U5pCZEt4Wi4wStz6dTZ/CLANx8LZh1J7QJVj2fhMtfTJr9w4z30Z209fOU0iOMy+qduBmpvvYuR7hZL6Dupszfnw0Skfths18dG9ZKb59UhvmaSGZRVbNQpsg3BZlvid0lIKO2d1xozclOzgjXPYovJJIultzkMu34qQb9Sz/yilrbCgj8=", +// "format": "base64" +// } +// ], +// "t": 0.092117709, +// "address": "8.8.4.4:443", +// "server_name": "dns.google", +// "alpn": [ +// "h2", +// "http/1.1" +// ], +// "no_tls_verify": false, +// "oddity": "", +// "proto": "tcp", +// "started": 0.047288542 +// } +// ] +// } +// ``` +// +// All the data formats we're using here are, by the way, +// compatible with the data formats specified at +// https://github.com/ooni/spec/tree/master/data-formats. +// +// ### Exercises +// +// Try to run experiments in the following scenarios, and +// check the output JSON to familiarize with what changes in +// different error conditions. +// +// 1. measurement that causes timeout +// +// 2. measurement with wrong SNI +// +// 3. measurement with self-signed certificate +// +// 4. measurement with expired certificate +// +// 5. measurement with connection reset during handshake +// +// 6. measurement with timeout during handshake +// +// To emulate the two last scenario, if you're on Linux, a +// possibility is building Jafar with this command: +// +// ``` +// go build -v ./internal/cmd/jafar +// ``` +// +// Then, for example, to provoke a connection reset you +// can run in a terminal: +// +// ``` +// sudo ./jafar -iptables-reset-keyword dns.google +// ``` +// +// and you can run this tutorial with `dns.google` as +// the SNI in another terminal. +// +// Likewise, you can obtain a timeout using the +// `-iptables-drop-keyword` flag instead. +// +// (Jafar runs forever and censors. You need to use +// `^C` to terminate it from running.) +// +// ## Conclusion +// +// We have seen how to measure TLS handshakes. We have seen how +// this flow produces different output on different error conditions. +// +// -=-=- StopHere -=-=- +// +// Here are the commands I used for each proposed exercise: +// +// 1. go run -race ./internal/tutorial/measurex/chapter04 -address 8.8.4.4:1 +// +// 2. go run -race ./internal/tutorial/measurex/chapter04 -sni example.org +// +// 3. go run -race ./internal/tutorial/measurex/chapter04 -address 104.154.89.105:443 -sni self-signed.badssl.com +// +// 4. go run -race ./internal/tutorial/measurex/chapter04 -address 104.154.89.105:443 -sni expire.badssl.com +// +// 5. use jafar as suggested with vanilla go run ... command +// +// 6. use jafar as suggested with vanilla go run ... command diff --git a/internal/tutorial/measurex/chapter05/README.md b/internal/tutorial/measurex/chapter05/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/tutorial/measurex/chapter06/README.md b/internal/tutorial/measurex/chapter06/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/tutorial/measurex/chapter07/README.md b/internal/tutorial/measurex/chapter07/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/tutorial/measurex/chapter07/main.go b/internal/tutorial/measurex/chapter07/main.go index 379332c7d1..da5191a16c 100644 --- a/internal/tutorial/measurex/chapter07/main.go +++ b/internal/tutorial/measurex/chapter07/main.go @@ -13,8 +13,8 @@ import ( ) type measurement struct { - DNS *measurex.Measurement - Endpoints []*measurex.Measurement + DNS *measurex.DNSMeasurement + Endpoints []*measurex.HTTPEndpointMeasurement } func main() { @@ -29,7 +29,8 @@ func main() { mx := measurex.NewMeasurerWithDefaultSettings() m := &measurement{} m.DNS = mx.LookupHostUDP(ctx, parsed.Hostname(), *address) - httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + headers := measurex.NewHTTPRequestHeaderForMeasuring() + httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS) runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") cookies := measurex.NewCookieJar() for _, epnt := range httpEndpoints { diff --git a/internal/tutorial/measurex/chapter08/README.md b/internal/tutorial/measurex/chapter08/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/tutorial/measurex/chapter08/main.go b/internal/tutorial/measurex/chapter08/main.go index 6ff73a7242..74c90749b7 100644 --- a/internal/tutorial/measurex/chapter08/main.go +++ b/internal/tutorial/measurex/chapter08/main.go @@ -13,8 +13,8 @@ import ( ) type measurement struct { - DNS []*measurex.Measurement - Endpoints []*measurex.Measurement + DNS []*measurex.DNSMeasurement + Endpoints []*measurex.HTTPEndpointMeasurement } func main() { @@ -30,7 +30,8 @@ func main() { m := &measurement{} m.DNS = append(m.DNS, mx.LookupHostUDP(ctx, parsed.Hostname(), *address)) m.DNS = append(m.DNS, mx.LookupHTTPSSvcUDP(ctx, parsed.Hostname(), *address)) - httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + headers := measurex.NewHTTPRequestHeaderForMeasuring() + httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS...) runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") cookies := measurex.NewCookieJar() for _, epnt := range httpEndpoints { diff --git a/internal/tutorial/measurex/chapter09/README.md b/internal/tutorial/measurex/chapter09/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/tutorial/measurex/chapter09/main.go b/internal/tutorial/measurex/chapter09/main.go index cf47314f62..2dde889807 100644 --- a/internal/tutorial/measurex/chapter09/main.go +++ b/internal/tutorial/measurex/chapter09/main.go @@ -13,8 +13,8 @@ import ( ) type measurement struct { - DNS []*measurex.Measurement - Endpoints []*measurex.Measurement + DNS []*measurex.DNSMeasurement + Endpoints []*measurex.HTTPEndpointMeasurement } func main() { @@ -30,7 +30,8 @@ func main() { m := &measurement{} m.DNS = append(m.DNS, mx.LookupHostUDP(ctx, parsed.Hostname(), *address)) m.DNS = append(m.DNS, mx.LookupHTTPSSvcUDP(ctx, parsed.Hostname(), *address)) - httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + headers := measurex.NewHTTPRequestHeaderForMeasuring() + httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS...) runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") cookies := measurex.NewCookieJar() for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, httpEndpoints...) { diff --git a/internal/tutorial/measurex/chapter10/README.md b/internal/tutorial/measurex/chapter10/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/tutorial/measurex/chapter10/main.go b/internal/tutorial/measurex/chapter10/main.go index b15c2d2109..9643d4a15f 100644 --- a/internal/tutorial/measurex/chapter10/main.go +++ b/internal/tutorial/measurex/chapter10/main.go @@ -13,8 +13,8 @@ import ( ) type measurement struct { - DNS []*measurex.Measurement - Endpoints []*measurex.Measurement + DNS []*measurex.DNSMeasurement + Endpoints []*measurex.HTTPEndpointMeasurement } func main() { @@ -26,12 +26,25 @@ func main() { parsed, err := url.Parse(*URL) runtimex.PanicOnError(err, "url.Parse failed") mx := measurex.NewMeasurerWithDefaultSettings() + mx.Resolvers = []*measurex.ResolverInfo{{ + Network: measurex.ResolverUDP, + Address: "8.8.8.8:53", + }, { + Network: measurex.ResolverUDP, + Address: "8.8.4.4:53", + }, { + Network: measurex.ResolverUDP, + Address: "1.1.1.1:53", + }, { + Network: measurex.ResolverUDP, + Address: "1.0.0.1:53", + }} m := &measurement{} - mx.RegisterUDPResolvers("8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53") for dns := range mx.LookupURLHostParallel(ctx, parsed) { m.DNS = append(m.DNS, dns) } - httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) + headers := measurex.NewHTTPRequestHeaderForMeasuring() + httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS...) runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") cookies := measurex.NewCookieJar() for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, httpEndpoints...) { diff --git a/internal/tutorial/measurex/chapter11/README.md b/internal/tutorial/measurex/chapter11/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/tutorial/measurex/chapter11/main.go b/internal/tutorial/measurex/chapter11/main.go index 9a81114954..162407526a 100644 --- a/internal/tutorial/measurex/chapter11/main.go +++ b/internal/tutorial/measurex/chapter11/main.go @@ -5,43 +5,23 @@ import ( "encoding/json" "flag" "fmt" - "net/url" "time" "github.com/ooni/probe-cli/v3/internal/measurex" "github.com/ooni/probe-cli/v3/internal/runtimex" ) -type measurement struct { - DNS []*measurex.Measurement - TH []*measurex.Measurement - Endpoints []*measurex.Measurement -} - func main() { URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") flag.Parse() ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() - parsed, err := url.Parse(*URL) - runtimex.PanicOnError(err, "url.Parse failed") mx := measurex.NewMeasurerWithDefaultSettings() - m := &measurement{} - mx.RegisterUDPResolvers("8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53") - for dns := range mx.LookupURLHostParallel(ctx, parsed) { - m.DNS = append(m.DNS, dns) - } - mx.RegisterWCTH("https://wcth.ooni.io/") - for th := range mx.QueryTestHelperParallel(ctx, parsed) { - m.TH = append(m.TH, th) - } - httpEndpoints, err := mx.DB.SelectAllHTTPEndpointsForURL(parsed) - runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") cookies := measurex.NewCookieJar() - for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, httpEndpoints...) { - m.Endpoints = append(m.Endpoints, epnt) - } + headers := measurex.NewHTTPRequestHeaderForMeasuring() + m, err := mx.MeasureURL(ctx, *URL, headers, cookies) + runtimex.PanicOnError(err, "mx.MeasureURL failed") data, err := json.Marshal(m) runtimex.PanicOnError(err, "json.Marshal failed") fmt.Printf("%s\n", string(data)) diff --git a/internal/tutorial/measurex/chapter12/README.md b/internal/tutorial/measurex/chapter12/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/tutorial/measurex/chapter12/main.go b/internal/tutorial/measurex/chapter12/main.go index 352e5106c9..a3cd8a2662 100644 --- a/internal/tutorial/measurex/chapter12/main.go +++ b/internal/tutorial/measurex/chapter12/main.go @@ -11,6 +11,10 @@ import ( "github.com/ooni/probe-cli/v3/internal/runtimex" ) +type measurement struct { + URLs []*measurex.URLMeasurement +} + func main() { URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") @@ -18,11 +22,13 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() mx := measurex.NewMeasurerWithDefaultSettings() - mx.RegisterWCTH("https://wcth.ooni.io/") - mx.RegisterUDPResolvers("8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53") cookies := measurex.NewCookieJar() - m := mx.MeasureURL(ctx, *URL, cookies) - data, err := json.Marshal(m) + all := &measurement{} + headers := measurex.NewHTTPRequestHeaderForMeasuring() + for m := range mx.MeasureHTTPURLAndFollowRedirections(ctx, *URL, headers, cookies) { + all.URLs = append(all.URLs, m) + } + data, err := json.Marshal(all) runtimex.PanicOnError(err, "json.Marshal failed") fmt.Printf("%s\n", string(data)) } diff --git a/internal/tutorial/measurex/chapter13/README.md b/internal/tutorial/measurex/chapter13/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/internal/tutorial/measurex/chapter13/main.go b/internal/tutorial/measurex/chapter13/main.go index bb28c2164c..443e807cb3 100644 --- a/internal/tutorial/measurex/chapter13/main.go +++ b/internal/tutorial/measurex/chapter13/main.go @@ -5,31 +5,68 @@ import ( "encoding/json" "flag" "fmt" + "net/http" "time" + "github.com/apex/log" "github.com/ooni/probe-cli/v3/internal/measurex" "github.com/ooni/probe-cli/v3/internal/runtimex" ) type measurement struct { - URLs []*measurex.URLMeasurement + Queries []*measurex.DNSLookupEvent `json:"queries"` + TCPConnect []*measurex.NetworkEvent `json:"tcp_connect"` + TLSHandshakes []*measurex.TLSHandshakeEvent `json:"tls_handshakes"` + Requests []*measurex.HTTPRoundTripEvent `json:"requests"` +} + +func (m *measurement) addQueries(dm *measurex.DNSMeasurement) { + m.Queries = append(m.Queries, dm.LookupHost...) +} + +func (m *measurement) addEndpointCheck(em *measurex.EndpointMeasurement) { + for _, ev := range em.Connect { + switch ev.Network { + case "tcp": + m.TCPConnect = append(m.TCPConnect, ev) + } + } + m.TLSHandshakes = append(m.TLSHandshakes, em.TLSHandshake...) +} + +func (m *measurement) addHTTPCheck(hem *measurex.Measurement) { + m.Requests = append(m.Requests, hem.HTTPRoundTrip...) } func main() { - URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + URL := flag.String("url", "https://www.google.com/", "URL to fetch") timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") flag.Parse() ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() mx := measurex.NewMeasurerWithDefaultSettings() - mx.RegisterWCTH("https://wcth.ooni.io/") - mx.RegisterUDPResolvers("8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53") cookies := measurex.NewCookieJar() - all := &measurement{} - for m := range mx.MeasureHTTPURLAndFollowRedirections(ctx, *URL, cookies) { - all.URLs = append(all.URLs, m) + db := &measurex.MeasurementDB{} + txp := mx.NewTracingHTTPTransportWithDefaultSettings(log.Log, db) + txp.MaxBodySnapshotSize = 1 << 14 + client := &http.Client{Jar: cookies, Transport: txp} + req, err := measurex.NewHTTPGetRequest(ctx, *URL) + runtimex.PanicOnError(err, "NewHTTPGetRequest failed") + resp, err := client.Do(req) + if err == nil { + resp.Body.Close() // be tidy + } + httpEndpoints, err := measurex.UnmeasuredHTTPEndpoints( + db, *URL, measurex.NewHTTPRequestHeaderForMeasuring()) + runtimex.PanicOnError(err, "cannot determine unmeasured HTTP endpoints") + for _, epnt := range httpEndpoints { + resp, err = mx.HTTPEndpointGetWithDB(ctx, epnt, db, cookies) + if err == nil { + resp.Body.Close() // be tidy + } } - data, err := json.Marshal(all) + m := db.AsMeasurement() + data, err := json.Marshal(m) runtimex.PanicOnError(err, "json.Marshal failed") fmt.Printf("%s\n", string(data)) } From e155a08d83f116676fdd619a20665695aac8820d Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 24 Sep 2021 15:11:22 +0200 Subject: [PATCH 41/53] the tutorials are now ready --- .../engine/experiment/webstepsx/measurer.go | 2 +- internal/measurex/db.go | 16 + internal/measurex/measurer.go | 109 ++-- internal/tutorial/README.md | 2 + internal/tutorial/generator/main.go | 1 + internal/tutorial/measurex/README.md | 104 ++++ .../tutorial/measurex/chapter02/README.md | 6 +- internal/tutorial/measurex/chapter02/main.go | 6 +- .../tutorial/measurex/chapter03/README.md | 4 +- internal/tutorial/measurex/chapter03/main.go | 4 +- .../tutorial/measurex/chapter04/README.md | 19 +- internal/tutorial/measurex/chapter04/main.go | 33 +- .../tutorial/measurex/chapter05/README.md | 280 ++++++++++ internal/tutorial/measurex/chapter05/main.go | 250 +++++++++ .../tutorial/measurex/chapter06/README.md | 486 ++++++++++++++++++ internal/tutorial/measurex/chapter06/main.go | 467 ++++++++++++++++- .../tutorial/measurex/chapter07/README.md | 142 +++++ internal/tutorial/measurex/chapter07/main.go | 112 +++- .../tutorial/measurex/chapter08/README.md | 132 +++++ internal/tutorial/measurex/chapter08/main.go | 101 +++- .../tutorial/measurex/chapter09/README.md | 124 +++++ internal/tutorial/measurex/chapter09/main.go | 89 +++- .../tutorial/measurex/chapter10/README.md | 127 +++++ internal/tutorial/measurex/chapter10/main.go | 85 ++- .../tutorial/measurex/chapter11/README.md | 126 +++++ internal/tutorial/measurex/chapter11/main.go | 108 +++- .../tutorial/measurex/chapter12/README.md | 94 ++++ internal/tutorial/measurex/chapter12/main.go | 74 ++- .../tutorial/measurex/chapter13/README.md | 97 ++++ internal/tutorial/measurex/chapter13/main.go | 162 +++--- .../tutorial/measurex/chapter14/README.md | 318 ++++++++++++ internal/tutorial/measurex/chapter14/main.go | 311 ++++++++++- 32 files changed, 3779 insertions(+), 212 deletions(-) create mode 100644 internal/tutorial/measurex/README.md create mode 100644 internal/tutorial/measurex/chapter14/README.md diff --git a/internal/engine/experiment/webstepsx/measurer.go b/internal/engine/experiment/webstepsx/measurer.go index 1eb2b4980f..b6490f0a10 100644 --- a/internal/engine/experiment/webstepsx/measurer.go +++ b/internal/engine/experiment/webstepsx/measurer.go @@ -132,7 +132,7 @@ func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, TLSHandshaker: netxlite.NewTLSHandshakerStdlib(sess.Logger()), } cookies := measurex.NewCookieJar() - in := mmx.MeasureHTTPURLAndFollowRedirections( + in := mmx.MeasureURLAndFollowRedirections( ctx, URL, measurex.NewHTTPRequestHeaderForMeasuring(), cookies) for m := range in { out <- &model.ExperimentAsyncTestKeys{ diff --git a/internal/measurex/db.go b/internal/measurex/db.go index 03f84d30dd..7260e07cca 100644 --- a/internal/measurex/db.go +++ b/internal/measurex/db.go @@ -70,6 +70,22 @@ type MeasurementDB struct { var _ WritableDB = &MeasurementDB{} +// DeleteAll deletes all the content of the DB. +func (db *MeasurementDB) DeleteAll() { + db.mu.Lock() + db.dialTable = nil + db.readWriteTable = nil + db.closeTable = nil + db.tlsHandshakeTable = nil + db.lookupHostTable = nil + db.lookupHTTPSvcTable = nil + db.dnsRoundTripTable = nil + db.httpRoundTripTable = nil + db.httpRedirectTable = nil + db.quicHandshakeTable = nil + db.mu.Unlock() +} + // InsertIntoDial implements EventDB.InsertIntoDial. func (db *MeasurementDB) InsertIntoDial(ev *NetworkEvent) { db.mu.Lock() diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index ebc2bbe6b7..588f6161c1 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -356,6 +356,13 @@ func (mx *Measurer) HTTPEndpointGet( return m } +// HTTPEndpointGetWithoutCookies is like HTTPEndpointGet +// but does not require you to provide a CookieJar. +func (mx *Measurer) HTTPEndpointGetWithoutCookies( + ctx context.Context, epnt *HTTPEndpoint) *HTTPEndpointMeasurement { + return mx.HTTPEndpointGet(ctx, epnt, NewCookieJar()) +} + var ( errUnknownHTTPEndpointURLScheme = errors.New("unknown HTTPEndpoint.URL.Scheme") @@ -364,46 +371,6 @@ var ( ErrUnknownHTTPEndpointNetwork = errors.New("unknown HTTPEndpoint.Network") ) -// HTTPPreparedRequest is a suspended request that only awaits -// for you to Resume it to deliver a result. -type HTTPPreparedRequest struct { - resp *http.Response - m *HTTPEndpointMeasurement - err error -} - -// Resume resumes the request and yields either a response or an error. You -// shall not call this function more than once. -func (r *HTTPPreparedRequest) Resume() (*http.Response, error) { - return r.resp, r.err -} - -// Measurement returns the associated measurement. -func (r *HTTPPreparedRequest) Measurement() *HTTPEndpointMeasurement { - return r.m -} - -// HTTPEndpointPrepareGet prepares a GET request for an HTTP endpoint. -// -// This prepared request WILL NOT follow redirects. If there is a redirect -// you will see it inside the specific database table. -// -// Arguments: -// -// - ctx is the context allowing to timeout the operation; -// -// - epnt is the HTTP endpoint; -// -// - jar is the cookie jar to use. -// -// Returns either a prepared request or an error. -func (mx *Measurer) HTTPEndpointPrepareGet(ctx context.Context, - epnt *HTTPEndpoint, jar http.CookieJar) *HTTPPreparedRequest { - out := &HTTPPreparedRequest{} - out.resp, out.m, out.err = mx.httpEndpointGet(ctx, epnt, jar) - return out -} - // httpEndpointGet implements HTTPEndpointGet. func (mx *Measurer) httpEndpointGet(ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, *HTTPEndpointMeasurement, error) { @@ -429,7 +396,7 @@ func (mx *Measurer) httpEndpointGet(ctx context.Context, epnt *HTTPEndpoint, func (mx *Measurer) httpEndpointGetMeasurement(ctx context.Context, epnt *HTTPEndpoint, jar http.CookieJar) (resp *http.Response, m *Measurement, err error) { db := &MeasurementDB{} - resp, err = mx.HTTPEndpointGetWithDB(ctx, epnt, db, jar) + resp, err = mx.httpEndpointGetWithDB(ctx, epnt, db, jar) m = db.AsMeasurement() return } @@ -437,6 +404,21 @@ func (mx *Measurer) httpEndpointGetMeasurement(ctx context.Context, epnt *HTTPEn // HTTPEndpointGetWithDB is an HTTPEndpointGet that stores the // events into the given WritableDB. func (mx *Measurer) HTTPEndpointGetWithDB(ctx context.Context, epnt *HTTPEndpoint, + db WritableDB, jar http.CookieJar) (err error) { + switch epnt.Network { + case NetworkQUIC: + _, err = mx.httpEndpointGetQUIC(ctx, db, epnt, jar) + case NetworkTCP: + _, err = mx.httpEndpointGetTCP(ctx, db, epnt, jar) + default: + err = ErrUnknownHTTPEndpointNetwork + } + return +} + +// httpEndpointGetWithDB is an HTTPEndpointGet that stores the +// events into the given WritableDB. +func (mx *Measurer) httpEndpointGetWithDB(ctx context.Context, epnt *HTTPEndpoint, db WritableDB, jar http.CookieJar) (resp *http.Response, err error) { switch epnt.Network { case NetworkQUIC: @@ -465,11 +447,6 @@ func (mx *Measurer) httpEndpointGetTCP(ctx context.Context, // httpEndpointGetHTTP specializes httpEndpointGetTCP for HTTP. func (mx *Measurer) httpEndpointGetHTTP(ctx context.Context, db WritableDB, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { - req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) - if err != nil { - return nil, err - } - req.Header = epnt.Header conn, err := mx.TCPConnectWithDB(ctx, db, epnt.Address) if err != nil { return nil, err @@ -478,17 +455,12 @@ func (mx *Measurer) httpEndpointGetHTTP(ctx context.Context, clnt := NewHTTPClientWithoutRedirects(db, jar, mx.NewHTTPTransportWithConn(mx.Logger, db, conn)) defer clnt.CloseIdleConnections() - return mx.httpClientDo(ctx, clnt, epnt, req) + return mx.httpClientDo(ctx, clnt, epnt) } // httpEndpointGetHTTPS specializes httpEndpointGetTCP for HTTPS. func (mx *Measurer) httpEndpointGetHTTPS(ctx context.Context, db WritableDB, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { - req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) - if err != nil { - return nil, err - } - req.Header = epnt.Header conn, err := mx.TLSConnectAndHandshakeWithDB(ctx, db, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, @@ -501,17 +473,12 @@ func (mx *Measurer) httpEndpointGetHTTPS(ctx context.Context, clnt := NewHTTPClientWithoutRedirects(db, jar, mx.NewHTTPTransportWithTLSConn(mx.Logger, db, conn)) defer clnt.CloseIdleConnections() - return mx.httpClientDo(ctx, clnt, epnt, req) + return mx.httpClientDo(ctx, clnt, epnt) } // httpEndpointGetQUIC specializes httpEndpointGetTCP for QUIC. func (mx *Measurer) httpEndpointGetQUIC(ctx context.Context, db WritableDB, epnt *HTTPEndpoint, jar http.CookieJar) (*http.Response, error) { - req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) - if err != nil { - return nil, err - } - req.Header = epnt.Header sess, err := mx.QUICHandshakeWithDB(ctx, db, epnt.Address, &tls.Config{ ServerName: epnt.SNI, NextProtos: epnt.ALPN, @@ -525,11 +492,29 @@ func (mx *Measurer) httpEndpointGetQUIC(ctx context.Context, clnt := NewHTTPClientWithoutRedirects(db, jar, mx.NewHTTPTransportWithQUICSess(mx.Logger, db, sess)) defer clnt.CloseIdleConnections() - return mx.httpClientDo(ctx, clnt, epnt, req) + return mx.httpClientDo(ctx, clnt, epnt) +} + +func (mx *Measurer) HTTPClientGET( + ctx context.Context, clnt HTTPClient, URL *url.URL) (*http.Response, error) { + return mx.httpClientDo(ctx, clnt, &HTTPEndpoint{ + Domain: URL.Hostname(), + Network: "tcp", + Address: URL.Hostname(), + SNI: "", // not needed + ALPN: []string{}, // not needed + URL: URL, + Header: NewHTTPRequestHeaderForMeasuring(), + }) } -func (mx *Measurer) httpClientDo(ctx context.Context, clnt HTTPClient, - epnt *HTTPEndpoint, req *http.Request) (*http.Response, error) { +func (mx *Measurer) httpClientDo(ctx context.Context, + clnt HTTPClient, epnt *HTTPEndpoint) (*http.Response, error) { + req, err := NewHTTPGetRequest(ctx, epnt.URL.String()) + if err != nil { + return nil, err + } + req.Header = epnt.Header const timeout = 15 * time.Second ol := NewOperationLogger(mx.Logger, "%s %s with %s/%s", req.Method, req.URL.String(), epnt.Address, epnt.Network) @@ -914,7 +899,7 @@ func (r *redirectionQueue) redirectionsCount() int { // MeasureURLAndFollowRedirections is like MeasureURL except // that it _also_ follows all the HTTP redirections. -func (mx *Measurer) MeasureHTTPURLAndFollowRedirections(ctx context.Context, +func (mx *Measurer) MeasureURLAndFollowRedirections(ctx context.Context, URL string, headers http.Header, cookies http.CookieJar) <-chan *URLMeasurement { out := make(chan *URLMeasurement) go func() { diff --git a/internal/tutorial/README.md b/internal/tutorial/README.md index 22f10701bc..a66c0a06e6 100644 --- a/internal/tutorial/README.md +++ b/internal/tutorial/README.md @@ -10,6 +10,8 @@ real OONI code, it should always be up to date. - [Rewriting the torsf experiment](experiment/torsf/) +- [Using the measurex package to write network experiments](measurex) + ## Regenerating the tutorials diff --git a/internal/tutorial/generator/main.go b/internal/tutorial/generator/main.go index df9ed0dbef..be2d3c9d30 100644 --- a/internal/tutorial/generator/main.go +++ b/internal/tutorial/generator/main.go @@ -106,6 +106,7 @@ func genmeasurex() { gen(path.Join(prefix, "chapter11"), "main.go") gen(path.Join(prefix, "chapter12"), "main.go") gen(path.Join(prefix, "chapter13"), "main.go") + gen(path.Join(prefix, "chapter14"), "main.go") } func main() { diff --git a/internal/tutorial/measurex/README.md b/internal/tutorial/measurex/README.md new file mode 100644 index 0000000000..f89f9aa476 --- /dev/null +++ b/internal/tutorial/measurex/README.md @@ -0,0 +1,104 @@ +# Using the measurex package to write network experiments + +This tutorial teaches you how to write OONI network +experiments using the primitives in the `./internal/measurex` +package. The name of this package means either "measure +extensions" or "measure crossover". + +The measure extension interpretation of the name explains +what this package does. It contains extensions to our +basic networking code (`./internal/netxlite`) that allow +us to perform OONI measurements. + +The measure crossover interpretation explains its history. Since +OONI has been written in Go, we've mostly performed measurements +using "tracing". That is, by registering hooks that run when +specific operations happen (e.g., TCP connect or TLS handshake) +and then making sense of the network trace. This is the approach +with which most experiments are written as of 2021-09-24. But +we have also seen that in several cases a step-by-step approach +is preferrable. Under this approach, you perform individual +operations and record their result right away. So, for example, +you have a connection and a TLS config, you perform a TLS +handshake, and immediately after you create and store somewhere +a data structure containing the result. This package is at the +crossover of these two approaches, because basically it contains +enough primitives to support both. + +What we are going to do in this tutorial is the following: + +- we will start from very simple-step-by-step measurements such +as TCP connect, DNS lookup, and TLS handshake; + +- we will see how `measurex` provides support for composing +these primitives in larger steps, which will lead us to +eventually perform all the measurements that matter for a +given input URL (including discovering QUIC endpoints +and following redirections); + +- finally, as an exercise, we will use the knowledge +acquired in the rest of the tutorial to rewrite a +subset of the Web Connectivity experiment (as of 2021-09-24 +the flagship OONI experiment). This will be an opportunity +to explore more low level aspects of `measurex`. + +As part of the process, we'll introduce you to the data +format used by OONI and there will be proposed exercises +where we simulate censorship conditions and we see how +that impacts the generated measurements. + +Every chapter will show how to write a simple `main.go` +program that explains how to use some primitives. The +chapter text itself is autogenerated from comments inside +the actual `main.go` the we describe in the chapter. + +For this reason, if you need to change a chapter, you +need to change the corresponding `main.go` file and then +follow the instructions at `./internal/tutorial/generate` +to regenerate the markdown text of the chapter. + +More in detail, here's the index: + +- [chapter01](chapter01) explains how to use the "system" resolver + +- [chapter02](chapter02) deals with establishing TCP connections + +- [chapter03](chapter03) is about using custom DNS-over-UDP resolvers + +- [chapter04](chapter04) shows how to measure TLS handshakes + +- [chapter05](chapter05) is about the QUIC handshake + +- [chapter06](chapter06) shows how to get a webpage knowing its +URL and the endpoint (i.e., IP address and TCP/UDP port) + +- [chapter07](chapter07) shows how to extend what we did in +chapter06 to cover _all_ the IP addresses in the URL's domain + +- [chapter08](chapter08) is about HTTPSSvc DNS queries and +how they can be used to discover and test QUIC endpoints, thus +extending the work done in chapter07 + +- [chapter09](chapter09) improves upon chapter08 showing +how to run endpoints measurements in parallel + +- [chapter10](chapter10) improves upon chapter09 by +also running DNS queries in parallel + +- [chapter11](chapter11) tells you that all the code we +have been writing so far, and specifically the code we have +in chapter10, is actually the implementation of an API +of `measurex` called `MeasureURL`, and then shows you how +you can simplify the code in chapter10 by using this API. + +- [chapter12](chapter12) extends the work done in +chapter11 by teaching you about a more high-level API +that discovers and follows all redirections, calling +`MeasureURL` for each redirection. + +- [chapter13](chapter13) contains the exercise regarding +rewriting WebConnectivity using all the tools you have +learned so far and pointing you at additional `measurex` +API that could be useful to solve the problem. + +- [chapter14](chapter14) contains our solution to the exercise. diff --git a/internal/tutorial/measurex/chapter02/README.md b/internal/tutorial/measurex/chapter02/README.md index 0d53158484..78fda595c4 100644 --- a/internal/tutorial/measurex/chapter02/README.md +++ b/internal/tutorial/measurex/chapter02/README.md @@ -90,10 +90,14 @@ go run -race ./internal/tutorial/measurex/chapter02 Here is the JSON we obtain in output: -```JSON +```JavaScript { + // These two fields identify the endpoint "network": "tcp", "address": "8.8.4.4:443", + + // This block contains the results of the connect syscall + // using the df-008-netevents data format. "connect": [ { "address": "8.8.4.4:443", diff --git a/internal/tutorial/measurex/chapter02/main.go b/internal/tutorial/measurex/chapter02/main.go index 94d8271afa..fa035ae808 100644 --- a/internal/tutorial/measurex/chapter02/main.go +++ b/internal/tutorial/measurex/chapter02/main.go @@ -91,10 +91,14 @@ func main() { // // Here is the JSON we obtain in output: // -// ```JSON +// ```JavaScript // { +// // These two fields identify the endpoint // "network": "tcp", // "address": "8.8.4.4:443", +// +// // This block contains the results of the connect syscall +// // using the df-008-netevents data format. // "connect": [ // { // "address": "8.8.4.4:443", diff --git a/internal/tutorial/measurex/chapter03/README.md b/internal/tutorial/measurex/chapter03/README.md index d6b88c1b2e..1c7e4f3c34 100644 --- a/internal/tutorial/measurex/chapter03/README.md +++ b/internal/tutorial/measurex/chapter03/README.md @@ -562,6 +562,6 @@ Here's the answer I get: ## Conclusion -We have seen how we can configure and use the flow for -sending DNS queries over UDP and we have seen some common errors. +We have seen how we sending DNS queries over UDP, measure the +results, and what happens on common error conditions. diff --git a/internal/tutorial/measurex/chapter03/main.go b/internal/tutorial/measurex/chapter03/main.go index faba4da39b..8d000c4479 100644 --- a/internal/tutorial/measurex/chapter03/main.go +++ b/internal/tutorial/measurex/chapter03/main.go @@ -563,7 +563,7 @@ func main() { // // ## Conclusion // -// We have seen how we can configure and use the flow for -// sending DNS queries over UDP and we have seen some common errors. +// We have seen how we sending DNS queries over UDP, measure the +// results, and what happens on common error conditions. // // -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter04/README.md b/internal/tutorial/measurex/chapter04/README.md index a62a9f4795..970d8602d2 100644 --- a/internal/tutorial/measurex/chapter04/README.md +++ b/internal/tutorial/measurex/chapter04/README.md @@ -45,6 +45,10 @@ func main() { We call the `ConnectAndHandshake` method. The arguments are the context, the address, and a TLS config. +Under the hood, the code will call the TCP connect functionality +we have seen in chapter02, using the address argument. Then, if +successful, it will TLS handshake using the given TLS config. + ```Go m := mx.TLSConnectAndHandshake(ctx, *address, &tls.Config{ ServerName: *sni, @@ -53,7 +57,8 @@ are the context, the address, and a TLS config. }) ``` -The three fields above are the files you should always set +Regarding the TLS config, in particular, +the three fields above are the files you should always set in a TLS config when doing handshakes manually. The `ServerName` field forces the SNI, the NextProtos field forces the ALPN, and the `RootCAs` field is overridden so that we use the @@ -184,7 +189,7 @@ All the data formats we're using here are, by the way, compatible with the data formats specified at https://github.com/ooni/spec/tree/master/data-formats. -### Exercises +### Suggested follow-up experiments Try to run experiments in the following scenarios, and check the output JSON to familiarize with what changes in @@ -202,6 +207,16 @@ different error conditions. 6. measurement with timeout during handshake +Here are the commands I used for each proposed exercise: + +1. go run -race ./internal/tutorial/measurex/chapter04 -address 8.8.4.4:1 + +2. go run -race ./internal/tutorial/measurex/chapter04 -sni example.org + +3. go run -race ./internal/tutorial/measurex/chapter04 -address 104.154.89.105:443 -sni self-signed.badssl.com + +4. go run -race ./internal/tutorial/measurex/chapter04 -address 104.154.89.105:443 -sni expire.badssl.com + To emulate the two last scenario, if you're on Linux, a possibility is building Jafar with this command: diff --git a/internal/tutorial/measurex/chapter04/main.go b/internal/tutorial/measurex/chapter04/main.go index bce84145fc..3ca7bfc91a 100644 --- a/internal/tutorial/measurex/chapter04/main.go +++ b/internal/tutorial/measurex/chapter04/main.go @@ -46,6 +46,10 @@ func main() { // We call the `ConnectAndHandshake` method. The arguments // are the context, the address, and a TLS config. // + // Under the hood, the code will call the TCP connect functionality + // we have seen in chapter02, using the address argument. Then, if + // successful, it will TLS handshake using the given TLS config. + // // ```Go m := mx.TLSConnectAndHandshake(ctx, *address, &tls.Config{ ServerName: *sni, @@ -54,7 +58,8 @@ func main() { }) // ``` // - // The three fields above are the files you should always set + // Regarding the TLS config, in particular, + // the three fields above are the files you should always set // in a TLS config when doing handshakes manually. The `ServerName` // field forces the SNI, the NextProtos field forces the ALPN, // and the `RootCAs` field is overridden so that we use the @@ -185,7 +190,7 @@ func main() { // compatible with the data formats specified at // https://github.com/ooni/spec/tree/master/data-formats. // -// ### Exercises +// ### Suggested follow-up experiments // // Try to run experiments in the following scenarios, and // check the output JSON to familiarize with what changes in @@ -203,6 +208,16 @@ func main() { // // 6. measurement with timeout during handshake // +// Here are the commands I used for each proposed exercise: +// +// 1. go run -race ./internal/tutorial/measurex/chapter04 -address 8.8.4.4:1 +// +// 2. go run -race ./internal/tutorial/measurex/chapter04 -sni example.org +// +// 3. go run -race ./internal/tutorial/measurex/chapter04 -address 104.154.89.105:443 -sni self-signed.badssl.com +// +// 4. go run -race ./internal/tutorial/measurex/chapter04 -address 104.154.89.105:443 -sni expire.badssl.com +// // To emulate the two last scenario, if you're on Linux, a // possibility is building Jafar with this command: // @@ -232,17 +247,3 @@ func main() { // this flow produces different output on different error conditions. // // -=-=- StopHere -=-=- -// -// Here are the commands I used for each proposed exercise: -// -// 1. go run -race ./internal/tutorial/measurex/chapter04 -address 8.8.4.4:1 -// -// 2. go run -race ./internal/tutorial/measurex/chapter04 -sni example.org -// -// 3. go run -race ./internal/tutorial/measurex/chapter04 -address 104.154.89.105:443 -sni self-signed.badssl.com -// -// 4. go run -race ./internal/tutorial/measurex/chapter04 -address 104.154.89.105:443 -sni expire.badssl.com -// -// 5. use jafar as suggested with vanilla go run ... command -// -// 6. use jafar as suggested with vanilla go run ... command diff --git a/internal/tutorial/measurex/chapter05/README.md b/internal/tutorial/measurex/chapter05/README.md index e69de29bb2..9898dfc7d5 100644 --- a/internal/tutorial/measurex/chapter05/README.md +++ b/internal/tutorial/measurex/chapter05/README.md @@ -0,0 +1,280 @@ + +# Chapter V: QUIC handshaking + +This chapter describes measuring QUIC handshakes. Conceptually, +and code wise, this is very similar to the previous chapter. +The API call, in fact, has exactly the same structure, though +under the hood QUIC is different because there are no +separate connection establishment and handshake primitives. +For this reason, we will not see a connect event, but we +will only see a "QUIC handshake event". + +Having said that, let us now move on and see the code of +the simple program that shows this functionality. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measure/chapter05/main.go`.) + +## main.go + +The initial part of the program is pretty much the same as the one +used in previous chapters, so I will not add further comments. + +```Go +package main + +import ( + "context" + "crypto/tls" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func main() { + sni := flag.String("sni", "dns.google", "value for SNI extension") + address := flag.String("address", "8.8.4.4:443", "remote endpoint address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() +``` + +### Handshaking with QUIC + +The API signature is indeed the same as the previous chapter, +except that here we call the `QUICHandshake` function. + +```Go + m := mx.QUICHandshake(ctx, *address, &tls.Config{ + ServerName: *sni, + NextProtos: []string{"h3"}, + RootCAs: netxlite.NewDefaultCertPool(), + }) +``` + +The same remarks mentioned in the previous chapter regarding +the arguments for the TLS config also apply here. We need +to specify the SNI (`ServerName`), the ALPN (`NextProtos`), +and the CA pool we want to use. Here, again, we're using +the CA pool from cURL that we bundle with ooniprobe. + +As we did in the previous chapters, here's the usual three +lines of code for printing the resulting measurement. + +``` + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +``` + +## Running the example program + +As before, let us start off with a vanilla run: + +```bash +go run -race ./internal/tutorial/measurex/chapter05 +``` + +Produces this JSON: + +```JavaScript +{ + // In chapter02 these two fields were similar but + // the network was "tcp" as opposed to "quic" + "network": "quic", + "address": "8.8.4.4:443", + + // This block contains I/O operations. Note that + // the protocol is "quic" and that the syscalls + // are "read_from" and "write_to" because QUIC does + // not bind/connect sockets. (The real syscalls + // are actually `recvfrom` and `sendto` but here + // we follow the Go convention of using read/write + // more frequently than send/recv.) + "read_write": [ + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 1252, + "operation": "write_to", + "proto": "quic", + "t": 0.003903167, + "started": 0.0037395, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 1252, + "operation": "read_from", + "proto": "quic", + "t": 0.029389125, + "started": 0.002954792, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 1252, + "operation": "write_to", + "proto": "quic", + "t": 0.029757584, + "started": 0.02972325, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 1252, + "operation": "read_from", + "proto": "quic", + "t": 0.045039875, + "started": 0.029424792, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 1252, + "operation": "read_from", + "proto": "quic", + "t": 0.045055334, + "started": 0.045049625, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 1252, + "operation": "read_from", + "proto": "quic", + "t": 0.045073917, + "started": 0.045069667, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 1233, + "operation": "read_from", + "proto": "quic", + "t": 0.04508, + "started": 0.045075292, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 64, + "operation": "read_from", + "proto": "quic", + "t": 0.045088167, + "started": 0.045081167, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 44, + "operation": "write_to", + "proto": "quic", + "t": 0.045370417, + "started": 0.045338667, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 44, + "operation": "write_to", + "proto": "quic", + "t": 0.045392125, + "started": 0.045380959, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 83, + "operation": "write_to", + "proto": "quic", + "t": 0.047042542, + "started": 0.047001917, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 33, + "operation": "write_to", + "proto": "quic", + "t": 0.047060834, + "started": 0.047046875, + "oddity": "" + } + ], + + // This section describes the QUIC handshake and it has + // basically the same fields of the TLS handshake. + "quic_handshake": [ + { + "cipher_suite": "TLS_CHACHA20_POLY1305_SHA256", + "failure": null, + "negotiated_proto": "h3", + "tls_version": "TLSv1.3", + "peer_certificates": [ + { + "data": "MIIF4TCCBMmgAwIBAgIQGa7QSAXLo6sKAAAAAPz4cjANBgkqhkiG9w0BAQsFADBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzAeFw0yMTA4MzAwNDAwMDBaFw0yMTExMjIwMzU5NTlaMBUxEzARBgNVBAMTCmRucy5nb29nbGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC8cttrGHp3SS9YGYgsNLXt43dhW4d8FPULk0n6WYWC+EbMLkLnYXHLZHXJEz1Tor5hrCfHEVyX4xmhY2LCt0jprP6Gfo+gkKyjSV3LO65aWx6ezejvIdQBiLhSo/R5E3NwjMUAbm9PoNfSZSLiP3RjC3Px1vXFVmlcap4bUHnv9OvcPvwV1wmw5IMVzCuGBjCzJ4c4fxgyyggES1mbXZpYcDO4YKhSqIJx2D0gop9wzBQevI/kb35miN1pAvIKK2lgf7kZvYa7HH5vJ+vtn3Vkr34dKUAc/cO62t+NVufADPwn2/Tx8y8fPxlnCmoJeI+MPsw+StTYDawxajkjvZfdAgMBAAGjggL6MIIC9jAOBgNVHQ8BAf8EBAMCBaAwEwYDVR0lBAwwCgYIKwYBBQUHAwEwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQUooaIxGAth6+bJh0JHYVWccyuoUcwHwYDVR0jBBgwFoAUinR/r4XN7pXNPZzQ4kYU83E1HScwagYIKwYBBQUHAQEEXjBcMCcGCCsGAQUFBzABhhtodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHMxYzMwMQYIKwYBBQUHMAKGJWh0dHA6Ly9wa2kuZ29vZy9yZXBvL2NlcnRzL2d0czFjMy5kZXIwgawGA1UdEQSBpDCBoYIKZG5zLmdvb2dsZYIOZG5zLmdvb2dsZS5jb22CECouZG5zLmdvb2dsZS5jb22CCzg4ODguZ29vZ2xlghBkbnM2NC5kbnMuZ29vZ2xlhwQICAgIhwQICAQEhxAgAUhgSGAAAAAAAAAAAIiIhxAgAUhgSGAAAAAAAAAAAIhEhxAgAUhgSGAAAAAAAAAAAGRkhxAgAUhgSGAAAAAAAAAAAABkMCEGA1UdIAQaMBgwCAYGZ4EMAQIBMAwGCisGAQQB1nkCBQMwPAYDVR0fBDUwMzAxoC+gLYYraHR0cDovL2NybHMucGtpLmdvb2cvZ3RzMWMzL2ZWSnhiVi1LdG1rLmNybDCCAQMGCisGAQQB1nkCBAIEgfQEgfEA7wB1AH0+8viP/4hVaCTCwMqeUol5K8UOeAl/LmqXaJl+IvDXAAABe5VtuiwAAAQDAEYwRAIgAwzr02ayTnNk/G+HDP50WTZUls3g+9P1fTGR9PEywpYCIAIOIQJ7nJTlcJdSyyOvgzX4BxJDr18mOKJPHlJs1naIAHYAXNxDkv7mq0VEsV6a1FbmEDf71fpH3KFzlLJe5vbHDsoAAAF7lW26IQAABAMARzBFAiAtlIkbCH+QgiO6T6Y/+UAf+eqHB2wdzMNfOoo4SnUhVgIhALPiRtyPMo8fPPxN3VgiXBqVF7tzLWTJUjprOe4kQUCgMA0GCSqGSIb3DQEBCwUAA4IBAQDVq3WWgg6eYSpFLfNgo2KzLKDPkWZx42gW2Tum6JZd6O/Nj+mjYGOyXyryTslUwmONxiq2Ip3PLA/qlbPdYic1F1mDwMHSzRteSe7axwEP6RkoxhMy5zuI4hfijhSrfhVUZF299PesDf2gI+Vh30s6muHVfQjbXOl/AkAqIPLSetv2mS9MHQLeHcCCXpwsXQJwusZ3+ILrgCRAGv6NLXwbfE0t3OjXV0gnNRp3DWEaF+yrfjE0oU1myeYDNtugsw8VRwTzCM53Nqf/BJffnuShmBBZfZ2jlsPnLys0UqCZo2dg5wdwj3DaKtHO5Pofq6P8r4w6W/aUZCTLUi1jZ3Gc", + "format": "base64" + }, + { + "data": "MIIFljCCA36gAwIBAgINAgO8U1lrNMcY9QFQZjANBgkqhkiG9w0BAQsFADBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMjAwODEzMDAwMDQyWhcNMjcwOTMwMDAwMDQyWjBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPWI3+dijB43+DdCkH9sh9D7ZYIl/ejLa6T/belaI+KZ9hzpkgOZE3wJCor6QtZeViSqejOEH9Hpabu5dOxXTGZok3c3VVP+ORBNtzS7XyV3NzsXlOo85Z3VvMO0Q+sup0fvsEQRY9i0QYXdQTBIkxu/t/bgRQIh4JZCF8/ZK2VWNAcmBA2o/X3KLu/qSHw3TT8An4Pf73WELnlXXPxXbhqW//yMmqaZviXZf5YsBvcRKgKAgOtjGDxQSYflispfGStZloEAoPtR28p3CwvJlk/vcEnHXG0g/Zm0tOLKLnf9LdwLtmsTDIwZKxeWmLnwi/agJ7u2441Rj72ux5uxiZ0CAwEAAaOCAYAwggF8MA4GA1UdDwEB/wQEAwIBhjAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUinR/r4XN7pXNPZzQ4kYU83E1HScwHwYDVR0jBBgwFoAU5K8rJnEaK0gnhS9SZizv8IkTcT4waAYIKwYBBQUHAQEEXDBaMCYGCCsGAQUFBzABhhpodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHNyMTAwBggrBgEFBQcwAoYkaHR0cDovL3BraS5nb29nL3JlcG8vY2VydHMvZ3RzcjEuZGVyMDQGA1UdHwQtMCswKaAnoCWGI2h0dHA6Ly9jcmwucGtpLmdvb2cvZ3RzcjEvZ3RzcjEuY3JsMFcGA1UdIARQME4wOAYKKwYBBAHWeQIFAzAqMCgGCCsGAQUFBwIBFhxodHRwczovL3BraS5nb29nL3JlcG9zaXRvcnkvMAgGBmeBDAECATAIBgZngQwBAgIwDQYJKoZIhvcNAQELBQADggIBAIl9rCBcDDy+mqhXlRu0rvqrpXJxtDaV/d9AEQNMwkYUuxQkq/BQcSLbrcRuf8/xam/IgxvYzolfh2yHuKkMo5uhYpSTld9brmYZCwKWnvy15xBpPnrLRklfRuFBsdeYTWU0AIAaP0+fbH9JAIFTQaSSIYKCGvGjRFsqUBITTcFTNvNCCK9U+o53UxtkOCcXCb1YyRt8OS1b887U7ZfbFAO/CVMkH8IMBHmYJvJh8VNS/UKMG2YrPxWhu//2m+OBmgEGcYk1KCTd4b3rGS3hSMs9WYNRtHTGnXzGsYZbr8w0xNPM1IERlQCh9BIiAfq0g3GvjLeMcySsN1PCAJA/Ef5c7TaUEDu9Ka7ixzpiO2xj2YC/WXGsYye5TBeg2vZzFb8q3o/zpWwygTMD0IZRcZk0upONXbVRWPeyk+gB9lm+cZv9TSjOz23HFtz30dZGm6fKa+l3D/2gthsjgx0QGtkJAITgRNOidSOzNIb2ILCkXhAd4FJGAJ2xDx8hcFH1mt0G/FX0Kw4zd8NLQsLxdxP8c4CU6x+7Nz/OAipmsHMdMqUybDKwjuDEI/9bfU1lcKwrmz3O2+BtjjKAvpafkmO8l7tdufThcV4q5O8DIrGKZTqPwJNl1IXNDw9bg1kWRxYtnCQ6yICmJhSFm/Y3m6xv+cXDBlHz4n/FsRC6UfTd", + "format": "base64" + }, + { + "data": "MIIFYjCCBEqgAwIBAgIQd70NbNs2+RrqIQ/E8FjTDTANBgkqhkiG9w0BAQsFADBXMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1zYTEQMA4GA1UECxMHUm9vdCBDQTEbMBkGA1UEAxMSR2xvYmFsU2lnbiBSb290IENBMB4XDTIwMDYxOTAwMDA0MloXDTI4MDEyODAwMDA0MlowRzELMAkGA1UEBhMCVVMxIjAgBgNVBAoTGUdvb2dsZSBUcnVzdCBTZXJ2aWNlcyBMTEMxFDASBgNVBAMTC0dUUyBSb290IFIxMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAthECix7joXebO9y/lD63ladAPKH9gvl9MgaCcfb2jH/76Nu8ai6Xl6OMS/kr9rH5zoQdsfnFl97vufKj6bwSiV6nqlKr+CMny6SxnGPb15l+8Ape62im9MZaRw1NEDPjTrETo8gYbEvs/AmQ351kKSUjB6G00j0uYODP0gmHu81I8E3CwnqIiru6z1kZ1q+PsAewnjHxgsHA3y6mbWwZDrXYfiYaRQM9sHmklCitD38m5agI/pboPGiUU+6DOogrFZYJsuB6jC511pzrp1Zkj5ZPaK49l8KEj8C8QMALXL32h7M1bKwYUH+E4EzNktMg6TO8UpmvMrUpsyUqtEj5cuHKZPfmghCN6J3Cioj6OGaK/GP5Afl4/Xtcd/p2h/rs37EOeZVXtL0m79YB0esWCruOC7XFxYpVq9Os6pFLKcwZpDIlTirxZUTQAs6qzkm06p98g7BAe+dDq6dso499iYH6TKX/1Y7DzkvgtdizjkXPdsDtQCv9Uw+wp9U7DbGKogPeMa3Md+pvez7W35EiEua++tgy/BBjFFFy3l3WFpO9KWgz7zpm7AeKJt8T11dleCfeXkkUAKIAf5qoIbapsZWwpbkNFhHax2xIPEDgfg1azVY80ZcFuctL7TlLnMQ/0lUTbiSw1nH69MG6zO0b9f6BQdgAmD06yK56mDcYBZUCAwEAAaOCATgwggE0MA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTkrysmcRorSCeFL1JmLO/wiRNxPjAfBgNVHSMEGDAWgBRge2YaRQ2XyolQL30EzTSo//z9SzBgBggrBgEFBQcBAQRUMFIwJQYIKwYBBQUHMAGGGWh0dHA6Ly9vY3NwLnBraS5nb29nL2dzcjEwKQYIKwYBBQUHMAKGHWh0dHA6Ly9wa2kuZ29vZy9nc3IxL2dzcjEuY3J0MDIGA1UdHwQrMCkwJ6AloCOGIWh0dHA6Ly9jcmwucGtpLmdvb2cvZ3NyMS9nc3IxLmNybDA7BgNVHSAENDAyMAgGBmeBDAECATAIBgZngQwBAgIwDQYLKwYBBAHWeQIFAwIwDQYLKwYBBAHWeQIFAwMwDQYJKoZIhvcNAQELBQADggEBADSkHrEoo9C0dhemMXoh6dFSPsjbdBZBiLg9NR3t5P+T4Vxfq7vqfM/b5A3Ri1fyJm9bvhdGaJQ3b2t6yMAYN/olUazsaL+yyEn9WprKASOshIArAoyZl+tJaox118fessmXn1hIVw41oeQa1v1vg4Fv74zPl6/AhSrw9U5pCZEt4Wi4wStz6dTZ/CLANx8LZh1J7QJVj2fhMtfTJr9w4z30Z209fOU0iOMy+qduBmpvvYuR7hZL6Dupszfnw0Skfths18dG9ZKb59UhvmaSGZRVbNQpsg3BZlvid0lIKO2d1xozclOzgjXPYovJJIultzkMu34qQb9Sz/yilrbCgj8=", + "format": "base64" + } + ], + "t": 0.047042459, + "address": "8.8.4.4:443", + "server_name": "dns.google", + "alpn": [ + "h3" + ], + "no_tls_verify": false, + "oddity": "", + "proto": "quic", + "started": 0.002154834 + } + ] +} +``` + +Here are some suggestions on other experiments to run: + +1. obtain a timeout by connecting on a port that is not +actually listening for QUIC; + +2. obtain a certificate validation error by forcing +a different SNI; + +3. use a different ALPN (by changing the code), and see +how the error and the oddity are handled. Can we do +anything about this by changing `./internal/netxlite/errorx` +to better support for this specific error condition? + +## Conclusion + +We have seen how to perform QUIC handshake and +collect measurements. + diff --git a/internal/tutorial/measurex/chapter05/main.go b/internal/tutorial/measurex/chapter05/main.go index 9ab67606fc..8d92d6bf56 100644 --- a/internal/tutorial/measurex/chapter05/main.go +++ b/internal/tutorial/measurex/chapter05/main.go @@ -1,3 +1,27 @@ +// -=-=- StartHere -=-=- +// +// # Chapter V: QUIC handshaking +// +// This chapter describes measuring QUIC handshakes. Conceptually, +// and code wise, this is very similar to the previous chapter. +// The API call, in fact, has exactly the same structure, though +// under the hood QUIC is different because there are no +// separate connection establishment and handshake primitives. +// For this reason, we will not see a connect event, but we +// will only see a "QUIC handshake event". +// +// Having said that, let us now move on and see the code of +// the simple program that shows this functionality. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measure/chapter05/main.go`.) +// +// ## main.go +// +// The initial part of the program is pretty much the same as the one +// used in previous chapters, so I will not add further comments. +// +// ```Go package main import ( @@ -21,12 +45,238 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() mx := measurex.NewMeasurerWithDefaultSettings() + // ``` + // + // ### Handshaking with QUIC + // + // The API signature is indeed the same as the previous chapter, + // except that here we call the `QUICHandshake` function. + // + // ```Go m := mx.QUICHandshake(ctx, *address, &tls.Config{ ServerName: *sni, NextProtos: []string{"h3"}, RootCAs: netxlite.NewDefaultCertPool(), }) + // ``` + // + // The same remarks mentioned in the previous chapter regarding + // the arguments for the TLS config also apply here. We need + // to specify the SNI (`ServerName`), the ALPN (`NextProtos`), + // and the CA pool we want to use. Here, again, we're using + // the CA pool from cURL that we bundle with ooniprobe. + // + // As we did in the previous chapters, here's the usual three + // lines of code for printing the resulting measurement. + // + // ``` data, err := json.Marshal(m) runtimex.PanicOnError(err, "json.Marshal failed") fmt.Printf("%s\n", string(data)) } + +// ``` +// +// ## Running the example program +// +// As before, let us start off with a vanilla run: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter05 +// ``` +// +// Produces this JSON: +// +// ```JavaScript +// { +// // In chapter02 these two fields were similar but +// // the network was "tcp" as opposed to "quic" +// "network": "quic", +// "address": "8.8.4.4:443", +// +// // This block contains I/O operations. Note that +// // the protocol is "quic" and that the syscalls +// // are "read_from" and "write_to" because QUIC does +// // not bind/connect sockets. (The real syscalls +// // are actually `recvfrom` and `sendto` but here +// // we follow the Go convention of using read/write +// // more frequently than send/recv.) +// "read_write": [ +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 1252, +// "operation": "write_to", +// "proto": "quic", +// "t": 0.003903167, +// "started": 0.0037395, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 1252, +// "operation": "read_from", +// "proto": "quic", +// "t": 0.029389125, +// "started": 0.002954792, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 1252, +// "operation": "write_to", +// "proto": "quic", +// "t": 0.029757584, +// "started": 0.02972325, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 1252, +// "operation": "read_from", +// "proto": "quic", +// "t": 0.045039875, +// "started": 0.029424792, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 1252, +// "operation": "read_from", +// "proto": "quic", +// "t": 0.045055334, +// "started": 0.045049625, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 1252, +// "operation": "read_from", +// "proto": "quic", +// "t": 0.045073917, +// "started": 0.045069667, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 1233, +// "operation": "read_from", +// "proto": "quic", +// "t": 0.04508, +// "started": 0.045075292, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 64, +// "operation": "read_from", +// "proto": "quic", +// "t": 0.045088167, +// "started": 0.045081167, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 44, +// "operation": "write_to", +// "proto": "quic", +// "t": 0.045370417, +// "started": 0.045338667, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 44, +// "operation": "write_to", +// "proto": "quic", +// "t": 0.045392125, +// "started": 0.045380959, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 83, +// "operation": "write_to", +// "proto": "quic", +// "t": 0.047042542, +// "started": 0.047001917, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 33, +// "operation": "write_to", +// "proto": "quic", +// "t": 0.047060834, +// "started": 0.047046875, +// "oddity": "" +// } +// ], +// +// // This section describes the QUIC handshake and it has +// // basically the same fields of the TLS handshake. +// "quic_handshake": [ +// { +// "cipher_suite": "TLS_CHACHA20_POLY1305_SHA256", +// "failure": null, +// "negotiated_proto": "h3", +// "tls_version": "TLSv1.3", +// "peer_certificates": [ +// { +// "data": "MIIF4TCCBMmgAwIBAgIQGa7QSAXLo6sKAAAAAPz4cjANBgkqhkiG9w0BAQsFADBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzAeFw0yMTA4MzAwNDAwMDBaFw0yMTExMjIwMzU5NTlaMBUxEzARBgNVBAMTCmRucy5nb29nbGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC8cttrGHp3SS9YGYgsNLXt43dhW4d8FPULk0n6WYWC+EbMLkLnYXHLZHXJEz1Tor5hrCfHEVyX4xmhY2LCt0jprP6Gfo+gkKyjSV3LO65aWx6ezejvIdQBiLhSo/R5E3NwjMUAbm9PoNfSZSLiP3RjC3Px1vXFVmlcap4bUHnv9OvcPvwV1wmw5IMVzCuGBjCzJ4c4fxgyyggES1mbXZpYcDO4YKhSqIJx2D0gop9wzBQevI/kb35miN1pAvIKK2lgf7kZvYa7HH5vJ+vtn3Vkr34dKUAc/cO62t+NVufADPwn2/Tx8y8fPxlnCmoJeI+MPsw+StTYDawxajkjvZfdAgMBAAGjggL6MIIC9jAOBgNVHQ8BAf8EBAMCBaAwEwYDVR0lBAwwCgYIKwYBBQUHAwEwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQUooaIxGAth6+bJh0JHYVWccyuoUcwHwYDVR0jBBgwFoAUinR/r4XN7pXNPZzQ4kYU83E1HScwagYIKwYBBQUHAQEEXjBcMCcGCCsGAQUFBzABhhtodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHMxYzMwMQYIKwYBBQUHMAKGJWh0dHA6Ly9wa2kuZ29vZy9yZXBvL2NlcnRzL2d0czFjMy5kZXIwgawGA1UdEQSBpDCBoYIKZG5zLmdvb2dsZYIOZG5zLmdvb2dsZS5jb22CECouZG5zLmdvb2dsZS5jb22CCzg4ODguZ29vZ2xlghBkbnM2NC5kbnMuZ29vZ2xlhwQICAgIhwQICAQEhxAgAUhgSGAAAAAAAAAAAIiIhxAgAUhgSGAAAAAAAAAAAIhEhxAgAUhgSGAAAAAAAAAAAGRkhxAgAUhgSGAAAAAAAAAAAABkMCEGA1UdIAQaMBgwCAYGZ4EMAQIBMAwGCisGAQQB1nkCBQMwPAYDVR0fBDUwMzAxoC+gLYYraHR0cDovL2NybHMucGtpLmdvb2cvZ3RzMWMzL2ZWSnhiVi1LdG1rLmNybDCCAQMGCisGAQQB1nkCBAIEgfQEgfEA7wB1AH0+8viP/4hVaCTCwMqeUol5K8UOeAl/LmqXaJl+IvDXAAABe5VtuiwAAAQDAEYwRAIgAwzr02ayTnNk/G+HDP50WTZUls3g+9P1fTGR9PEywpYCIAIOIQJ7nJTlcJdSyyOvgzX4BxJDr18mOKJPHlJs1naIAHYAXNxDkv7mq0VEsV6a1FbmEDf71fpH3KFzlLJe5vbHDsoAAAF7lW26IQAABAMARzBFAiAtlIkbCH+QgiO6T6Y/+UAf+eqHB2wdzMNfOoo4SnUhVgIhALPiRtyPMo8fPPxN3VgiXBqVF7tzLWTJUjprOe4kQUCgMA0GCSqGSIb3DQEBCwUAA4IBAQDVq3WWgg6eYSpFLfNgo2KzLKDPkWZx42gW2Tum6JZd6O/Nj+mjYGOyXyryTslUwmONxiq2Ip3PLA/qlbPdYic1F1mDwMHSzRteSe7axwEP6RkoxhMy5zuI4hfijhSrfhVUZF299PesDf2gI+Vh30s6muHVfQjbXOl/AkAqIPLSetv2mS9MHQLeHcCCXpwsXQJwusZ3+ILrgCRAGv6NLXwbfE0t3OjXV0gnNRp3DWEaF+yrfjE0oU1myeYDNtugsw8VRwTzCM53Nqf/BJffnuShmBBZfZ2jlsPnLys0UqCZo2dg5wdwj3DaKtHO5Pofq6P8r4w6W/aUZCTLUi1jZ3Gc", +// "format": "base64" +// }, +// { +// "data": "MIIFljCCA36gAwIBAgINAgO8U1lrNMcY9QFQZjANBgkqhkiG9w0BAQsFADBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMjAwODEzMDAwMDQyWhcNMjcwOTMwMDAwMDQyWjBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPWI3+dijB43+DdCkH9sh9D7ZYIl/ejLa6T/belaI+KZ9hzpkgOZE3wJCor6QtZeViSqejOEH9Hpabu5dOxXTGZok3c3VVP+ORBNtzS7XyV3NzsXlOo85Z3VvMO0Q+sup0fvsEQRY9i0QYXdQTBIkxu/t/bgRQIh4JZCF8/ZK2VWNAcmBA2o/X3KLu/qSHw3TT8An4Pf73WELnlXXPxXbhqW//yMmqaZviXZf5YsBvcRKgKAgOtjGDxQSYflispfGStZloEAoPtR28p3CwvJlk/vcEnHXG0g/Zm0tOLKLnf9LdwLtmsTDIwZKxeWmLnwi/agJ7u2441Rj72ux5uxiZ0CAwEAAaOCAYAwggF8MA4GA1UdDwEB/wQEAwIBhjAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUinR/r4XN7pXNPZzQ4kYU83E1HScwHwYDVR0jBBgwFoAU5K8rJnEaK0gnhS9SZizv8IkTcT4waAYIKwYBBQUHAQEEXDBaMCYGCCsGAQUFBzABhhpodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHNyMTAwBggrBgEFBQcwAoYkaHR0cDovL3BraS5nb29nL3JlcG8vY2VydHMvZ3RzcjEuZGVyMDQGA1UdHwQtMCswKaAnoCWGI2h0dHA6Ly9jcmwucGtpLmdvb2cvZ3RzcjEvZ3RzcjEuY3JsMFcGA1UdIARQME4wOAYKKwYBBAHWeQIFAzAqMCgGCCsGAQUFBwIBFhxodHRwczovL3BraS5nb29nL3JlcG9zaXRvcnkvMAgGBmeBDAECATAIBgZngQwBAgIwDQYJKoZIhvcNAQELBQADggIBAIl9rCBcDDy+mqhXlRu0rvqrpXJxtDaV/d9AEQNMwkYUuxQkq/BQcSLbrcRuf8/xam/IgxvYzolfh2yHuKkMo5uhYpSTld9brmYZCwKWnvy15xBpPnrLRklfRuFBsdeYTWU0AIAaP0+fbH9JAIFTQaSSIYKCGvGjRFsqUBITTcFTNvNCCK9U+o53UxtkOCcXCb1YyRt8OS1b887U7ZfbFAO/CVMkH8IMBHmYJvJh8VNS/UKMG2YrPxWhu//2m+OBmgEGcYk1KCTd4b3rGS3hSMs9WYNRtHTGnXzGsYZbr8w0xNPM1IERlQCh9BIiAfq0g3GvjLeMcySsN1PCAJA/Ef5c7TaUEDu9Ka7ixzpiO2xj2YC/WXGsYye5TBeg2vZzFb8q3o/zpWwygTMD0IZRcZk0upONXbVRWPeyk+gB9lm+cZv9TSjOz23HFtz30dZGm6fKa+l3D/2gthsjgx0QGtkJAITgRNOidSOzNIb2ILCkXhAd4FJGAJ2xDx8hcFH1mt0G/FX0Kw4zd8NLQsLxdxP8c4CU6x+7Nz/OAipmsHMdMqUybDKwjuDEI/9bfU1lcKwrmz3O2+BtjjKAvpafkmO8l7tdufThcV4q5O8DIrGKZTqPwJNl1IXNDw9bg1kWRxYtnCQ6yICmJhSFm/Y3m6xv+cXDBlHz4n/FsRC6UfTd", +// "format": "base64" +// }, +// { +// "data": "MIIFYjCCBEqgAwIBAgIQd70NbNs2+RrqIQ/E8FjTDTANBgkqhkiG9w0BAQsFADBXMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1zYTEQMA4GA1UECxMHUm9vdCBDQTEbMBkGA1UEAxMSR2xvYmFsU2lnbiBSb290IENBMB4XDTIwMDYxOTAwMDA0MloXDTI4MDEyODAwMDA0MlowRzELMAkGA1UEBhMCVVMxIjAgBgNVBAoTGUdvb2dsZSBUcnVzdCBTZXJ2aWNlcyBMTEMxFDASBgNVBAMTC0dUUyBSb290IFIxMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAthECix7joXebO9y/lD63ladAPKH9gvl9MgaCcfb2jH/76Nu8ai6Xl6OMS/kr9rH5zoQdsfnFl97vufKj6bwSiV6nqlKr+CMny6SxnGPb15l+8Ape62im9MZaRw1NEDPjTrETo8gYbEvs/AmQ351kKSUjB6G00j0uYODP0gmHu81I8E3CwnqIiru6z1kZ1q+PsAewnjHxgsHA3y6mbWwZDrXYfiYaRQM9sHmklCitD38m5agI/pboPGiUU+6DOogrFZYJsuB6jC511pzrp1Zkj5ZPaK49l8KEj8C8QMALXL32h7M1bKwYUH+E4EzNktMg6TO8UpmvMrUpsyUqtEj5cuHKZPfmghCN6J3Cioj6OGaK/GP5Afl4/Xtcd/p2h/rs37EOeZVXtL0m79YB0esWCruOC7XFxYpVq9Os6pFLKcwZpDIlTirxZUTQAs6qzkm06p98g7BAe+dDq6dso499iYH6TKX/1Y7DzkvgtdizjkXPdsDtQCv9Uw+wp9U7DbGKogPeMa3Md+pvez7W35EiEua++tgy/BBjFFFy3l3WFpO9KWgz7zpm7AeKJt8T11dleCfeXkkUAKIAf5qoIbapsZWwpbkNFhHax2xIPEDgfg1azVY80ZcFuctL7TlLnMQ/0lUTbiSw1nH69MG6zO0b9f6BQdgAmD06yK56mDcYBZUCAwEAAaOCATgwggE0MA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTkrysmcRorSCeFL1JmLO/wiRNxPjAfBgNVHSMEGDAWgBRge2YaRQ2XyolQL30EzTSo//z9SzBgBggrBgEFBQcBAQRUMFIwJQYIKwYBBQUHMAGGGWh0dHA6Ly9vY3NwLnBraS5nb29nL2dzcjEwKQYIKwYBBQUHMAKGHWh0dHA6Ly9wa2kuZ29vZy9nc3IxL2dzcjEuY3J0MDIGA1UdHwQrMCkwJ6AloCOGIWh0dHA6Ly9jcmwucGtpLmdvb2cvZ3NyMS9nc3IxLmNybDA7BgNVHSAENDAyMAgGBmeBDAECATAIBgZngQwBAgIwDQYLKwYBBAHWeQIFAwIwDQYLKwYBBAHWeQIFAwMwDQYJKoZIhvcNAQELBQADggEBADSkHrEoo9C0dhemMXoh6dFSPsjbdBZBiLg9NR3t5P+T4Vxfq7vqfM/b5A3Ri1fyJm9bvhdGaJQ3b2t6yMAYN/olUazsaL+yyEn9WprKASOshIArAoyZl+tJaox118fessmXn1hIVw41oeQa1v1vg4Fv74zPl6/AhSrw9U5pCZEt4Wi4wStz6dTZ/CLANx8LZh1J7QJVj2fhMtfTJr9w4z30Z209fOU0iOMy+qduBmpvvYuR7hZL6Dupszfnw0Skfths18dG9ZKb59UhvmaSGZRVbNQpsg3BZlvid0lIKO2d1xozclOzgjXPYovJJIultzkMu34qQb9Sz/yilrbCgj8=", +// "format": "base64" +// } +// ], +// "t": 0.047042459, +// "address": "8.8.4.4:443", +// "server_name": "dns.google", +// "alpn": [ +// "h3" +// ], +// "no_tls_verify": false, +// "oddity": "", +// "proto": "quic", +// "started": 0.002154834 +// } +// ] +// } +// ``` +// +// Here are some suggestions on other experiments to run: +// +// 1. obtain a timeout by connecting on a port that is not +// actually listening for QUIC; +// +// 2. obtain a certificate validation error by forcing +// a different SNI; +// +// 3. use a different ALPN (by changing the code), and see +// how the error and the oddity are handled. Can we do +// anything about this by changing `./internal/netxlite/errorx` +// to better support for this specific error condition? +// +// ## Conclusion +// +// We have seen how to perform QUIC handshake and +// collect measurements. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter06/README.md b/internal/tutorial/measurex/chapter06/README.md index e69de29bb2..f37b8cc6ac 100644 --- a/internal/tutorial/measurex/chapter06/README.md +++ b/internal/tutorial/measurex/chapter06/README.md @@ -0,0 +1,486 @@ + +# Chapter VI: Getting a webpage from an HTTP/HTTPS/HTTP3 endpoint. + +This chapter describes measuring getting a webpage from an +HTTPS endpoint. We have seen how to TCP connect, we have +seen how to TLS handshake, now it's time to see how we can +combine these operations with fetching a webpage from a +given TCP endpoint speaking HTTP and TLS. (As well as to +provide you with information on how to otherwise fetch +from HTTP and HTTP/3 endpoints.) + +The program we're going to write, `main.go`, will show a +high-level operation to perform this measurement in a +single API call. The code implementing this API call will +combine the operations we have seen in previous chapter +with the "give me the webpage" operation. We are still +quite far away from the ability of "measuring a URL" but +we are increasingly moving towards more complex operations. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter06/main.go`.) + +## main.go + +We have package declaration and imports as usual. + +```Go +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +``` + +We have factored the three lines to print a measurement +into the following utility function called `print`. + +```Go +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +``` + +The initial part of the program is pretty much the same as the one +used in previous chapters, expect that we have a few more command line +flags now, so I will not add further comments. + +```Go +func main() { + sni := flag.String("sni", "dns.google", "value for SNI extension") + address := flag.String("address", "8.8.4.4:443", "remote endpoint address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + mx := measurex.NewMeasurerWithDefaultSettings() +``` + +### HTTPEndpoint: a description of what to measure + +First of all, let us create a description of the endpoint +for `measurex`. Up to this point we have used endpoint +to describe a level-4-like address. Therefore, we have seen +TCP endpoints being used for the TCP connect and the TLS +handshake, and we have seen QUIC endpoints being used +for the QUIC handshake. Now, however, we are going +to need more information to characterize the endpoint. + +```Go + epnt := &measurex.HTTPEndpoint{ + Domain: *sni, + Network: "tcp", + Address: *address, + SNI: *sni, + ALPN: []string{"h2", "http/1.1"}, + URL: &url.URL{ + Scheme: "https", + Host: *sni, + Path: "/", + }, + Header: measurex.NewHTTPRequestHeaderForMeasuring(), + } +``` + +In fact, in the above definition we recognize fields +we have already discussed, such as: + +- `Network`, describing whether to use "tcp" or "quic"; + +- `Address`, which is the endpoint address. + +But we also need to combine into this view of the +endpoint additional fields for TLS: + +- `SNI`, to set the SNI; + +- `ALPN`, to set the ALPN; + +But then we also need to specify: + +- the URL to use; + +- the headers to use (for which we're using a handy +factory for creating reasonable defaults for measuring). + +This API is not the highest level API with which to do +the job, but it's still handy to introduce the +`measurex.HTTPEndpoint` data structure since it's +used by higher level APIs. + +(You may also be wondering about the CA pool. It turns +out that for APIs such as this one and for higher +level APIs, the default is to always use the bundled +Mozilla CA pool, because this is what we use in +most cases for performing measurements.) + +### HTTPEndpointGetWithoutCookies + +When used with an HTTP URL, the `HTTPEndpointGetWithoutCookies` +method combines two operations: + +- TCP connect + +- HTTP GET + +When the URL is HTTPS, we do: + +- TCP connect + +- TLS handshake + +- HTTP GET (or HTTP/2 GET depending on the ALPN) + +When the `HTTPEndpoint.Network` field value +is QUIC, instead we do: + +- QUIC handshake + +- HTTP/3 GET + +```Go + m := mx.HTTPEndpointGetWithoutCookies(ctx, epnt) +``` + +The arguments for `HTTPEndpointGetWithDBWithoutCookies` are: + +- the context for deadline/timeout + +- the HTTPEndpoint descriptor + +The result is an `HTTPEndpointMeasurement` which +you can inspect with + +``` +go doc ./internal/measurex.HTTPEndpointMeasurement +``` + +### Printing the measurement + +Let us now print the resulting measurement. + +```Go + print(m) +} + +``` + +## Running the example program + +Let us perform a vanilla run first: + +```bash +go run -race ./internal/tutorial/measurex/chapter06 +``` + +This is the JSON output. Let us comment it in detail: + +```Javascript +{ + // The returned type is called HTTPEndpointMeasurement + // and you see that here on top we indeed have the + // information on the endpoint and the URL. + "url": "https://dns.google/", + "network": "tcp", + "address": "8.8.4.4:443", + + // Internally, HTTPEndpointGetWithoutCookies calls + // TCPConnect and here we see the corresponding event + "connect": [ + { + "address": "8.8.4.4:443", + "failure": null, + "operation": "connect", + "proto": "tcp", + "t": 0.02422375, + "started": 0.002269291, + "oddity": "" + } + ], + + // These are the I/O operations we have already seen + // in previous chapters + "read_write": [ + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 280, + "operation": "write", + "proto": "tcp", + "t": 0.024931791, + "started": 0.024910416, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 517, + "operation": "read", + "proto": "tcp", + "t": 0.063629791, + "started": 0.024935666, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 4301, + "operation": "read", + "proto": "tcp", + "t": 0.064183, + "started": 0.064144208, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 64, + "operation": "write", + "proto": "tcp", + "t": 0.065464041, + "started": 0.065441333, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 86, + "operation": "write", + "proto": "tcp", + "t": 0.067256083, + "started": 0.067224375, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 201, + "operation": "write", + "proto": "tcp", + "t": 0.067674416, + "started": 0.067652375, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 93, + "operation": "read", + "proto": "tcp", + "t": 0.086618708, + "started": 0.067599208, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 31, + "operation": "write", + "proto": "tcp", + "t": 0.086703625, + "started": 0.0866745, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 2028, + "operation": "read", + "proto": "tcp", + "t": 0.337785916, + "started": 0.086717333, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 39, + "operation": "write", + "proto": "tcp", + "t": 0.338514916, + "started": 0.338485375, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": null, + "num_bytes": 24, + "operation": "write", + "proto": "tcp", + "t": 0.338800833, + "started": 0.338788625, + "oddity": "" + }, + { + "address": "8.8.4.4:443", + "failure": "connection_already_closed", + "operation": "read", + "proto": "tcp", + "t": 0.338888041, + "started": 0.338523291, + "oddity": "" + } + ], + + // Internally, HTTPEndpointGetWithoutCookies calls TLSConnectAndHandshake, + // and here's the resulting handshake event. Of course, if we + // specified a QUIC endpoint we would instead see here a + // QUIC handshake event. And, we would not see any handshake + // if the URL was instead an HTTP URL. + "tls_handshake": [ + { + "cipher_suite": "TLS_AES_128_GCM_SHA256", + "failure": null, + "negotiated_proto": "h2", + "tls_version": "TLSv1.3", + "peer_certificates": [ + { + "data": "MIIF4TCCBMmgAwIBAgIQGa7QSAXLo6sKAAAAAPz4cjANBgkqhkiG9w0BAQsFADBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzAeFw0yMTA4MzAwNDAwMDBaFw0yMTExMjIwMzU5NTlaMBUxEzARBgNVBAMTCmRucy5nb29nbGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC8cttrGHp3SS9YGYgsNLXt43dhW4d8FPULk0n6WYWC+EbMLkLnYXHLZHXJEz1Tor5hrCfHEVyX4xmhY2LCt0jprP6Gfo+gkKyjSV3LO65aWx6ezejvIdQBiLhSo/R5E3NwjMUAbm9PoNfSZSLiP3RjC3Px1vXFVmlcap4bUHnv9OvcPvwV1wmw5IMVzCuGBjCzJ4c4fxgyyggES1mbXZpYcDO4YKhSqIJx2D0gop9wzBQevI/kb35miN1pAvIKK2lgf7kZvYa7HH5vJ+vtn3Vkr34dKUAc/cO62t+NVufADPwn2/Tx8y8fPxlnCmoJeI+MPsw+StTYDawxajkjvZfdAgMBAAGjggL6MIIC9jAOBgNVHQ8BAf8EBAMCBaAwEwYDVR0lBAwwCgYIKwYBBQUHAwEwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQUooaIxGAth6+bJh0JHYVWccyuoUcwHwYDVR0jBBgwFoAUinR/r4XN7pXNPZzQ4kYU83E1HScwagYIKwYBBQUHAQEEXjBcMCcGCCsGAQUFBzABhhtodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHMxYzMwMQYIKwYBBQUHMAKGJWh0dHA6Ly9wa2kuZ29vZy9yZXBvL2NlcnRzL2d0czFjMy5kZXIwgawGA1UdEQSBpDCBoYIKZG5zLmdvb2dsZYIOZG5zLmdvb2dsZS5jb22CECouZG5zLmdvb2dsZS5jb22CCzg4ODguZ29vZ2xlghBkbnM2NC5kbnMuZ29vZ2xlhwQICAgIhwQICAQEhxAgAUhgSGAAAAAAAAAAAIiIhxAgAUhgSGAAAAAAAAAAAIhEhxAgAUhgSGAAAAAAAAAAAGRkhxAgAUhgSGAAAAAAAAAAAABkMCEGA1UdIAQaMBgwCAYGZ4EMAQIBMAwGCisGAQQB1nkCBQMwPAYDVR0fBDUwMzAxoC+gLYYraHR0cDovL2NybHMucGtpLmdvb2cvZ3RzMWMzL2ZWSnhiVi1LdG1rLmNybDCCAQMGCisGAQQB1nkCBAIEgfQEgfEA7wB1AH0+8viP/4hVaCTCwMqeUol5K8UOeAl/LmqXaJl+IvDXAAABe5VtuiwAAAQDAEYwRAIgAwzr02ayTnNk/G+HDP50WTZUls3g+9P1fTGR9PEywpYCIAIOIQJ7nJTlcJdSyyOvgzX4BxJDr18mOKJPHlJs1naIAHYAXNxDkv7mq0VEsV6a1FbmEDf71fpH3KFzlLJe5vbHDsoAAAF7lW26IQAABAMARzBFAiAtlIkbCH+QgiO6T6Y/+UAf+eqHB2wdzMNfOoo4SnUhVgIhALPiRtyPMo8fPPxN3VgiXBqVF7tzLWTJUjprOe4kQUCgMA0GCSqGSIb3DQEBCwUAA4IBAQDVq3WWgg6eYSpFLfNgo2KzLKDPkWZx42gW2Tum6JZd6O/Nj+mjYGOyXyryTslUwmONxiq2Ip3PLA/qlbPdYic1F1mDwMHSzRteSe7axwEP6RkoxhMy5zuI4hfijhSrfhVUZF299PesDf2gI+Vh30s6muHVfQjbXOl/AkAqIPLSetv2mS9MHQLeHcCCXpwsXQJwusZ3+ILrgCRAGv6NLXwbfE0t3OjXV0gnNRp3DWEaF+yrfjE0oU1myeYDNtugsw8VRwTzCM53Nqf/BJffnuShmBBZfZ2jlsPnLys0UqCZo2dg5wdwj3DaKtHO5Pofq6P8r4w6W/aUZCTLUi1jZ3Gc", + "format": "base64" + }, + { + "data": "MIIFljCCA36gAwIBAgINAgO8U1lrNMcY9QFQZjANBgkqhkiG9w0BAQsFADBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMjAwODEzMDAwMDQyWhcNMjcwOTMwMDAwMDQyWjBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPWI3+dijB43+DdCkH9sh9D7ZYIl/ejLa6T/belaI+KZ9hzpkgOZE3wJCor6QtZeViSqejOEH9Hpabu5dOxXTGZok3c3VVP+ORBNtzS7XyV3NzsXlOo85Z3VvMO0Q+sup0fvsEQRY9i0QYXdQTBIkxu/t/bgRQIh4JZCF8/ZK2VWNAcmBA2o/X3KLu/qSHw3TT8An4Pf73WELnlXXPxXbhqW//yMmqaZviXZf5YsBvcRKgKAgOtjGDxQSYflispfGStZloEAoPtR28p3CwvJlk/vcEnHXG0g/Zm0tOLKLnf9LdwLtmsTDIwZKxeWmLnwi/agJ7u2441Rj72ux5uxiZ0CAwEAAaOCAYAwggF8MA4GA1UdDwEB/wQEAwIBhjAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUinR/r4XN7pXNPZzQ4kYU83E1HScwHwYDVR0jBBgwFoAU5K8rJnEaK0gnhS9SZizv8IkTcT4waAYIKwYBBQUHAQEEXDBaMCYGCCsGAQUFBzABhhpodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHNyMTAwBggrBgEFBQcwAoYkaHR0cDovL3BraS5nb29nL3JlcG8vY2VydHMvZ3RzcjEuZGVyMDQGA1UdHwQtMCswKaAnoCWGI2h0dHA6Ly9jcmwucGtpLmdvb2cvZ3RzcjEvZ3RzcjEuY3JsMFcGA1UdIARQME4wOAYKKwYBBAHWeQIFAzAqMCgGCCsGAQUFBwIBFhxodHRwczovL3BraS5nb29nL3JlcG9zaXRvcnkvMAgGBmeBDAECATAIBgZngQwBAgIwDQYJKoZIhvcNAQELBQADggIBAIl9rCBcDDy+mqhXlRu0rvqrpXJxtDaV/d9AEQNMwkYUuxQkq/BQcSLbrcRuf8/xam/IgxvYzolfh2yHuKkMo5uhYpSTld9brmYZCwKWnvy15xBpPnrLRklfRuFBsdeYTWU0AIAaP0+fbH9JAIFTQaSSIYKCGvGjRFsqUBITTcFTNvNCCK9U+o53UxtkOCcXCb1YyRt8OS1b887U7ZfbFAO/CVMkH8IMBHmYJvJh8VNS/UKMG2YrPxWhu//2m+OBmgEGcYk1KCTd4b3rGS3hSMs9WYNRtHTGnXzGsYZbr8w0xNPM1IERlQCh9BIiAfq0g3GvjLeMcySsN1PCAJA/Ef5c7TaUEDu9Ka7ixzpiO2xj2YC/WXGsYye5TBeg2vZzFb8q3o/zpWwygTMD0IZRcZk0upONXbVRWPeyk+gB9lm+cZv9TSjOz23HFtz30dZGm6fKa+l3D/2gthsjgx0QGtkJAITgRNOidSOzNIb2ILCkXhAd4FJGAJ2xDx8hcFH1mt0G/FX0Kw4zd8NLQsLxdxP8c4CU6x+7Nz/OAipmsHMdMqUybDKwjuDEI/9bfU1lcKwrmz3O2+BtjjKAvpafkmO8l7tdufThcV4q5O8DIrGKZTqPwJNl1IXNDw9bg1kWRxYtnCQ6yICmJhSFm/Y3m6xv+cXDBlHz4n/FsRC6UfTd", + "format": "base64" + }, + { + "data": "MIIFYjCCBEqgAwIBAgIQd70NbNs2+RrqIQ/E8FjTDTANBgkqhkiG9w0BAQsFADBXMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1zYTEQMA4GA1UECxMHUm9vdCBDQTEbMBkGA1UEAxMSR2xvYmFsU2lnbiBSb290IENBMB4XDTIwMDYxOTAwMDA0MloXDTI4MDEyODAwMDA0MlowRzELMAkGA1UEBhMCVVMxIjAgBgNVBAoTGUdvb2dsZSBUcnVzdCBTZXJ2aWNlcyBMTEMxFDASBgNVBAMTC0dUUyBSb290IFIxMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAthECix7joXebO9y/lD63ladAPKH9gvl9MgaCcfb2jH/76Nu8ai6Xl6OMS/kr9rH5zoQdsfnFl97vufKj6bwSiV6nqlKr+CMny6SxnGPb15l+8Ape62im9MZaRw1NEDPjTrETo8gYbEvs/AmQ351kKSUjB6G00j0uYODP0gmHu81I8E3CwnqIiru6z1kZ1q+PsAewnjHxgsHA3y6mbWwZDrXYfiYaRQM9sHmklCitD38m5agI/pboPGiUU+6DOogrFZYJsuB6jC511pzrp1Zkj5ZPaK49l8KEj8C8QMALXL32h7M1bKwYUH+E4EzNktMg6TO8UpmvMrUpsyUqtEj5cuHKZPfmghCN6J3Cioj6OGaK/GP5Afl4/Xtcd/p2h/rs37EOeZVXtL0m79YB0esWCruOC7XFxYpVq9Os6pFLKcwZpDIlTirxZUTQAs6qzkm06p98g7BAe+dDq6dso499iYH6TKX/1Y7DzkvgtdizjkXPdsDtQCv9Uw+wp9U7DbGKogPeMa3Md+pvez7W35EiEua++tgy/BBjFFFy3l3WFpO9KWgz7zpm7AeKJt8T11dleCfeXkkUAKIAf5qoIbapsZWwpbkNFhHax2xIPEDgfg1azVY80ZcFuctL7TlLnMQ/0lUTbiSw1nH69MG6zO0b9f6BQdgAmD06yK56mDcYBZUCAwEAAaOCATgwggE0MA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTkrysmcRorSCeFL1JmLO/wiRNxPjAfBgNVHSMEGDAWgBRge2YaRQ2XyolQL30EzTSo//z9SzBgBggrBgEFBQcBAQRUMFIwJQYIKwYBBQUHMAGGGWh0dHA6Ly9vY3NwLnBraS5nb29nL2dzcjEwKQYIKwYBBQUHMAKGHWh0dHA6Ly9wa2kuZ29vZy9nc3IxL2dzcjEuY3J0MDIGA1UdHwQrMCkwJ6AloCOGIWh0dHA6Ly9jcmwucGtpLmdvb2cvZ3NyMS9nc3IxLmNybDA7BgNVHSAENDAyMAgGBmeBDAECATAIBgZngQwBAgIwDQYLKwYBBAHWeQIFAwIwDQYLKwYBBAHWeQIFAwMwDQYJKoZIhvcNAQELBQADggEBADSkHrEoo9C0dhemMXoh6dFSPsjbdBZBiLg9NR3t5P+T4Vxfq7vqfM/b5A3Ri1fyJm9bvhdGaJQ3b2t6yMAYN/olUazsaL+yyEn9WprKASOshIArAoyZl+tJaox118fessmXn1hIVw41oeQa1v1vg4Fv74zPl6/AhSrw9U5pCZEt4Wi4wStz6dTZ/CLANx8LZh1J7QJVj2fhMtfTJr9w4z30Z209fOU0iOMy+qduBmpvvYuR7hZL6Dupszfnw0Skfths18dG9ZKb59UhvmaSGZRVbNQpsg3BZlvid0lIKO2d1xozclOzgjXPYovJJIultzkMu34qQb9Sz/yilrbCgj8=", + "format": "base64" + } + ], + "t": 0.065514708, + "address": "8.8.4.4:443", + "server_name": "dns.google", + "alpn": [ + "h2", + "http/1.1" + ], + "no_tls_verify": false, + "oddity": "", + "proto": "tcp", + "started": 0.024404083 + } + ], + + // Finally here we see information about the round trip, which + // is formatted according the df-001-httpt data format: + "http_round_trip": [ + { + + // This field indicates whether there was an error during + // the HTTP round trip: + "failure": null, + + // This field contains the request method, URL, and HTTP headers + "request": { + "method": "GET", + "url": "https://dns.google/", + "headers": { + "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "accept-language": "en-US;q=0.8,en;q=0.5", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36" + } + }, + + // This field contains the response status code, body, + // and headers. + "response": { + "code": 200, + "headers": { + "accept-ranges": "none", + "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000,h3-T051=\":443\"; ma=2592000,h3-Q050=\":443\"; ma=2592000,h3-Q046=\":443\"; ma=2592000,h3-Q043=\":443\"; ma=2592000,quic=\":443\"; ma=2592000; v=\"46,43\"", + "cache-control": "private", + "content-security-policy": "object-src 'none';base-uri 'self';script-src 'nonce-bSLcJjaotppZl3Y2moIaxg==' 'strict-dynamic' 'report-sample' 'unsafe-eval' 'unsafe-inline' https: http:;report-uri https://csp.withgoogle.com/csp/honest_dns/1_0;frame-ancestors 'none'", + "content-type": "text/html; charset=UTF-8", + "date": "Fri, 24 Sep 2021 08:51:01 GMT", + "server": "scaffolding on HTTPServer2", + "strict-transport-security": "max-age=31536000; includeSubDomains; preload", + "vary": "Accept-Encoding", + "x-content-type-options": "nosniff", + "x-frame-options": "SAMEORIGIN", + "x-xss-protection": "0" + }, + + // The body in particular is a snapshot of the response + // body: we don't want to read and submit to the OONI + // collector large bodies. + "body": { + "data": "PCFET0NUWVBFIGh0bWw+CjxodG1sIGxhbmc9ImVuIj4gPGhlYWQ+IDx0aXRsZT5Hb29nbGUgUHVibGljIEROUzwvdGl0bGU+ICA8bWV0YSBjaGFyc2V0PSJVVEYtOCI+IDxsaW5rIGhyZWY9Ii9zdGF0aWMvOTNkZDU5NTQvZmF2aWNvbi5wbmciIHJlbD0ic2hvcnRjdXQgaWNvbiIgdHlwZT0iaW1hZ2UvcG5nIj4gPGxpbmsgaHJlZj0iL3N0YXRpYy84MzZhZWJjNi9tYXR0ZXIubWluLmNzcyIgcmVsPSJzdHlsZXNoZWV0Ij4gPGxpbmsgaHJlZj0iL3N0YXRpYy9iODUzNmMzNy9zaGFyZWQuY3NzIiByZWw9InN0eWxlc2hlZXQiPiA8bWV0YSBuYW1lPSJ2aWV3cG9ydCIgY29udGVudD0id2lkdGg9ZGV2aWNlLXdpZHRoLCBpbml0aWFsLXNjYWxlPTEiPiAgPGxpbmsgaHJlZj0iL3N0YXRpYy9kMDVjZDZiYS9yb290LmNzcyIgcmVsPSJzdHlsZXNoZWV0Ij4gPC9oZWFkPiA8Ym9keT4gPHNwYW4gY2xhc3M9ImZpbGxlciB0b3AiPjwvc3Bhbj4gICA8ZGl2IGNsYXNzPSJsb2dvIiB0aXRsZT0iR29vZ2xlIFB1YmxpYyBETlMiPiA8ZGl2IGNsYXNzPSJsb2dvLXRleHQiPjxzcGFuPlB1YmxpYyBETlM8L3NwYW4+PC9kaXY+IDwvZGl2PiAgPGZvcm0gYWN0aW9uPSIvcXVlcnkiIG1ldGhvZD0iR0VUIj4gIDxkaXYgY2xhc3M9InJvdyI+IDxsYWJlbCBjbGFzcz0ibWF0dGVyLXRleHRmaWVsZC1vdXRsaW5lZCI+IDxpbnB1dCB0eXBlPSJ0ZXh0IiBuYW1lPSJuYW1lIiBwbGFjZWhvbGRlcj0iJm5ic3A7Ij4gPHNwYW4+RE5TIE5hbWU8L3NwYW4+IDxwIGNsYXNzPSJoZWxwIj4gRW50ZXIgYSBkb21haW4gKGxpa2UgZXhhbXBsZS5jb20pIG9yIElQIGFkZHJlc3MgKGxpa2UgOC44LjguOCBvciAyMDAxOjQ4NjA6NDg2MDo6ODg0NCkgaGVyZS4gPC9wPiA8L2xhYmVsPiA8YnV0dG9uIGNsYXNzPSJtYXR0ZXItYnV0dG9uLWNvbnRhaW5lZCBtYXR0ZXItcHJpbWFyeSIgdHlwZT0ic3VibWl0Ij5SZXNvbHZlPC9idXR0b24+IDwvZGl2PiA8L2Zvcm0+ICA8c3BhbiBjbGFzcz0iZmlsbGVyIGJvdHRvbSI+PC9zcGFuPiA8Zm9vdGVyIGNsYXNzPSJyb3ciPiA8YSBocmVmPSJodHRwczovL2RldmVsb3BlcnMuZ29vZ2xlLmNvbS9zcGVlZC9wdWJsaWMtZG5zIj5IZWxwPC9hPiA8YSBocmVmPSIvY2FjaGUiPkNhY2hlIEZsdXNoPC9hPiA8c3BhbiBjbGFzcz0iZmlsbGVyIj48L3NwYW4+IDxhIGhyZWY9Imh0dHBzOi8vZGV2ZWxvcGVycy5nb29nbGUuY29tL3NwZWVkL3B1YmxpYy1kbnMvZG9jcy91c2luZyI+IEdldCBTdGFydGVkIHdpdGggR29vZ2xlIFB1YmxpYyBETlMgPC9hPiA8L2Zvb3Rlcj4gICA8c2NyaXB0IG5vbmNlPSJiU0xjSmphb3RwcFpsM1kybW9JYXhnPT0iPmRvY3VtZW50LmZvcm1zWzBdLm5hbWUuZm9jdXMoKTs8L3NjcmlwdD4gPC9ib2R5PiA8L2h0bWw+", + "format": "base64" + }, + + // This field tells us whether the size of the read + // snapshot was smaller than the snapshot size. If + // not, then the body has been truncated. + "body_is_truncated": false, + + // These extra fields are not part of the spec and + // hence we prefix them with `x_`. They tell us + // the length of the body and whether the content + // of the body is valid UTF8. + "x_body_length": 1383, + "x_body_is_utf8": true + }, + + // The t field is the moment where we finished the + // round trip and saved the event. The started field + // is instead when we started the round trip. + + // You may notice that the start of the round trip + // if after the `t` of the handshake. This tells us + // that the code first connects, then handshakes, and + // finally creates HTTP code for performing the + // round trip. + "t": 0.338674625, + "started": 0.065926625, + + // As usual we also compute an oddity value related + // in this case to the HTTP round trip. + "oddity": "" + } + ] +} +``` + +Here are some suggestions for follow up measurements: + +1. provoke a connect error by using: + +``` +go run -race ./internal/tutorial/measurex/chapter06 -address 127.0.0.1:1 +``` + +2. provoke a TLS handshake error by using: + +``` +go run -race ./internal/tutorial/measurex/chapter06 -sni example.com +``` + +3. provoke an HTTP round trip error by using: + +``` +go run -race ./internal/tutorial/measurex/chapter06 -address 8.8.8.8:853 +``` + +4. modify the code to fetch an HTTP endpoint instead (hint: you +need to change the HTTPEndpoint's URL scheme); + +5. modify the code to use QUIC and HTTP/3 instead (hint: you need to +change the HTTPEndpoint's network and... is this enough?). + +## Conclusion + +We have seen how to measure the flow of fetching a +specific webpage from an HTTPEndpoint. + diff --git a/internal/tutorial/measurex/chapter06/main.go b/internal/tutorial/measurex/chapter06/main.go index 04d8e90211..42ff264345 100644 --- a/internal/tutorial/measurex/chapter06/main.go +++ b/internal/tutorial/measurex/chapter06/main.go @@ -1,3 +1,31 @@ +// -=-=- StartHere -=-=- +// +// # Chapter VI: Getting a webpage from an HTTP/HTTPS/HTTP3 endpoint. +// +// This chapter describes measuring getting a webpage from an +// HTTPS endpoint. We have seen how to TCP connect, we have +// seen how to TLS handshake, now it's time to see how we can +// combine these operations with fetching a webpage from a +// given TCP endpoint speaking HTTP and TLS. (As well as to +// provide you with information on how to otherwise fetch +// from HTTP and HTTP/3 endpoints.) +// +// The program we're going to write, `main.go`, will show a +// high-level operation to perform this measurement in a +// single API call. The code implementing this API call will +// combine the operations we have seen in previous chapter +// with the "give me the webpage" operation. We are still +// quite far away from the ability of "measuring a URL" but +// we are increasingly moving towards more complex operations. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter06/main.go`.) +// +// ## main.go +// +// We have package declaration and imports as usual. +// +// ```Go package main import ( @@ -9,10 +37,28 @@ import ( "time" "github.com/ooni/probe-cli/v3/internal/measurex" - "github.com/ooni/probe-cli/v3/internal/netxlite/iox" "github.com/ooni/probe-cli/v3/internal/runtimex" ) +// ``` +// +// We have factored the three lines to print a measurement +// into the following utility function called `print`. +// +// ```Go +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +// ``` +// +// The initial part of the program is pretty much the same as the one +// used in previous chapters, expect that we have a few more command line +// flags now, so I will not add further comments. +// +// ```Go func main() { sni := flag.String("sni", "dns.google", "value for SNI extension") address := flag.String("address", "8.8.4.4:443", "remote endpoint address") @@ -21,6 +67,19 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() mx := measurex.NewMeasurerWithDefaultSettings() + // ``` + // + // ### HTTPEndpoint: a description of what to measure + // + // First of all, let us create a description of the endpoint + // for `measurex`. Up to this point we have used endpoint + // to describe a level-4-like address. Therefore, we have seen + // TCP endpoints being used for the TCP connect and the TLS + // handshake, and we have seen QUIC endpoints being used + // for the QUIC handshake. Now, however, we are going + // to need more information to characterize the endpoint. + // + // ```Go epnt := &measurex.HTTPEndpoint{ Domain: *sni, Network: "tcp", @@ -34,18 +93,396 @@ func main() { }, Header: measurex.NewHTTPRequestHeaderForMeasuring(), } - cookies := measurex.NewCookieJar() - prep := mx.HTTPEndpointPrepareGet(ctx, epnt, cookies) - m := prep.Measurement() - resp, err := prep.Resume() - if err == nil { - data, err := iox.ReadAllContext(ctx, resp.Body) - if err == nil { - fmt.Printf("{\"full body size\": %d}\n", len(data)) - } - resp.Body.Close() - } - data, err := json.Marshal(m) - runtimex.PanicOnError(err, "json.Marshal failed") - fmt.Printf("%s\n", string(data)) + // ``` + // + // In fact, in the above definition we recognize fields + // we have already discussed, such as: + // + // - `Network`, describing whether to use "tcp" or "quic"; + // + // - `Address`, which is the endpoint address. + // + // But we also need to combine into this view of the + // endpoint additional fields for TLS: + // + // - `SNI`, to set the SNI; + // + // - `ALPN`, to set the ALPN; + // + // But then we also need to specify: + // + // - the URL to use; + // + // - the headers to use (for which we're using a handy + // factory for creating reasonable defaults for measuring). + // + // This API is not the highest level API with which to do + // the job, but it's still handy to introduce the + // `measurex.HTTPEndpoint` data structure since it's + // used by higher level APIs. + // + // (You may also be wondering about the CA pool. It turns + // out that for APIs such as this one and for higher + // level APIs, the default is to always use the bundled + // Mozilla CA pool, because this is what we use in + // most cases for performing measurements.) + // + // ### HTTPEndpointGetWithoutCookies + // + // When used with an HTTP URL, the `HTTPEndpointGetWithoutCookies` + // method combines two operations: + // + // - TCP connect + // + // - HTTP GET + // + // When the URL is HTTPS, we do: + // + // - TCP connect + // + // - TLS handshake + // + // - HTTP GET (or HTTP/2 GET depending on the ALPN) + // + // When the `HTTPEndpoint.Network` field value + // is QUIC, instead we do: + // + // - QUIC handshake + // + // - HTTP/3 GET + // + // ```Go + m := mx.HTTPEndpointGetWithoutCookies(ctx, epnt) + // ``` + // + // The arguments for `HTTPEndpointGetWithDBWithoutCookies` are: + // + // - the context for deadline/timeout + // + // - the HTTPEndpoint descriptor + // + // The result is an `HTTPEndpointMeasurement` which + // you can inspect with + // + // ``` + // go doc ./internal/measurex.HTTPEndpointMeasurement + // ``` + // + // ### Printing the measurement + // + // Let us now print the resulting measurement. + // + // ```Go + print(m) } + +// ``` +// +// ## Running the example program +// +// Let us perform a vanilla run first: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter06 +// ``` +// +// This is the JSON output. Let us comment it in detail: +// +// ```Javascript +// { +// // The returned type is called HTTPEndpointMeasurement +// // and you see that here on top we indeed have the +// // information on the endpoint and the URL. +// "url": "https://dns.google/", +// "network": "tcp", +// "address": "8.8.4.4:443", +// +// // Internally, HTTPEndpointGetWithoutCookies calls +// // TCPConnect and here we see the corresponding event +// "connect": [ +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "operation": "connect", +// "proto": "tcp", +// "t": 0.02422375, +// "started": 0.002269291, +// "oddity": "" +// } +// ], +// +// // These are the I/O operations we have already seen +// // in previous chapters +// "read_write": [ +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 280, +// "operation": "write", +// "proto": "tcp", +// "t": 0.024931791, +// "started": 0.024910416, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 517, +// "operation": "read", +// "proto": "tcp", +// "t": 0.063629791, +// "started": 0.024935666, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 4301, +// "operation": "read", +// "proto": "tcp", +// "t": 0.064183, +// "started": 0.064144208, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 64, +// "operation": "write", +// "proto": "tcp", +// "t": 0.065464041, +// "started": 0.065441333, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 86, +// "operation": "write", +// "proto": "tcp", +// "t": 0.067256083, +// "started": 0.067224375, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 201, +// "operation": "write", +// "proto": "tcp", +// "t": 0.067674416, +// "started": 0.067652375, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 93, +// "operation": "read", +// "proto": "tcp", +// "t": 0.086618708, +// "started": 0.067599208, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 31, +// "operation": "write", +// "proto": "tcp", +// "t": 0.086703625, +// "started": 0.0866745, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 2028, +// "operation": "read", +// "proto": "tcp", +// "t": 0.337785916, +// "started": 0.086717333, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 39, +// "operation": "write", +// "proto": "tcp", +// "t": 0.338514916, +// "started": 0.338485375, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": null, +// "num_bytes": 24, +// "operation": "write", +// "proto": "tcp", +// "t": 0.338800833, +// "started": 0.338788625, +// "oddity": "" +// }, +// { +// "address": "8.8.4.4:443", +// "failure": "connection_already_closed", +// "operation": "read", +// "proto": "tcp", +// "t": 0.338888041, +// "started": 0.338523291, +// "oddity": "" +// } +// ], +// +// // Internally, HTTPEndpointGetWithoutCookies calls TLSConnectAndHandshake, +// // and here's the resulting handshake event. Of course, if we +// // specified a QUIC endpoint we would instead see here a +// // QUIC handshake event. And, we would not see any handshake +// // if the URL was instead an HTTP URL. +// "tls_handshake": [ +// { +// "cipher_suite": "TLS_AES_128_GCM_SHA256", +// "failure": null, +// "negotiated_proto": "h2", +// "tls_version": "TLSv1.3", +// "peer_certificates": [ +// { +// "data": "MIIF4TCCBMmgAwIBAgIQGa7QSAXLo6sKAAAAAPz4cjANBgkqhkiG9w0BAQsFADBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzAeFw0yMTA4MzAwNDAwMDBaFw0yMTExMjIwMzU5NTlaMBUxEzARBgNVBAMTCmRucy5nb29nbGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC8cttrGHp3SS9YGYgsNLXt43dhW4d8FPULk0n6WYWC+EbMLkLnYXHLZHXJEz1Tor5hrCfHEVyX4xmhY2LCt0jprP6Gfo+gkKyjSV3LO65aWx6ezejvIdQBiLhSo/R5E3NwjMUAbm9PoNfSZSLiP3RjC3Px1vXFVmlcap4bUHnv9OvcPvwV1wmw5IMVzCuGBjCzJ4c4fxgyyggES1mbXZpYcDO4YKhSqIJx2D0gop9wzBQevI/kb35miN1pAvIKK2lgf7kZvYa7HH5vJ+vtn3Vkr34dKUAc/cO62t+NVufADPwn2/Tx8y8fPxlnCmoJeI+MPsw+StTYDawxajkjvZfdAgMBAAGjggL6MIIC9jAOBgNVHQ8BAf8EBAMCBaAwEwYDVR0lBAwwCgYIKwYBBQUHAwEwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQUooaIxGAth6+bJh0JHYVWccyuoUcwHwYDVR0jBBgwFoAUinR/r4XN7pXNPZzQ4kYU83E1HScwagYIKwYBBQUHAQEEXjBcMCcGCCsGAQUFBzABhhtodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHMxYzMwMQYIKwYBBQUHMAKGJWh0dHA6Ly9wa2kuZ29vZy9yZXBvL2NlcnRzL2d0czFjMy5kZXIwgawGA1UdEQSBpDCBoYIKZG5zLmdvb2dsZYIOZG5zLmdvb2dsZS5jb22CECouZG5zLmdvb2dsZS5jb22CCzg4ODguZ29vZ2xlghBkbnM2NC5kbnMuZ29vZ2xlhwQICAgIhwQICAQEhxAgAUhgSGAAAAAAAAAAAIiIhxAgAUhgSGAAAAAAAAAAAIhEhxAgAUhgSGAAAAAAAAAAAGRkhxAgAUhgSGAAAAAAAAAAAABkMCEGA1UdIAQaMBgwCAYGZ4EMAQIBMAwGCisGAQQB1nkCBQMwPAYDVR0fBDUwMzAxoC+gLYYraHR0cDovL2NybHMucGtpLmdvb2cvZ3RzMWMzL2ZWSnhiVi1LdG1rLmNybDCCAQMGCisGAQQB1nkCBAIEgfQEgfEA7wB1AH0+8viP/4hVaCTCwMqeUol5K8UOeAl/LmqXaJl+IvDXAAABe5VtuiwAAAQDAEYwRAIgAwzr02ayTnNk/G+HDP50WTZUls3g+9P1fTGR9PEywpYCIAIOIQJ7nJTlcJdSyyOvgzX4BxJDr18mOKJPHlJs1naIAHYAXNxDkv7mq0VEsV6a1FbmEDf71fpH3KFzlLJe5vbHDsoAAAF7lW26IQAABAMARzBFAiAtlIkbCH+QgiO6T6Y/+UAf+eqHB2wdzMNfOoo4SnUhVgIhALPiRtyPMo8fPPxN3VgiXBqVF7tzLWTJUjprOe4kQUCgMA0GCSqGSIb3DQEBCwUAA4IBAQDVq3WWgg6eYSpFLfNgo2KzLKDPkWZx42gW2Tum6JZd6O/Nj+mjYGOyXyryTslUwmONxiq2Ip3PLA/qlbPdYic1F1mDwMHSzRteSe7axwEP6RkoxhMy5zuI4hfijhSrfhVUZF299PesDf2gI+Vh30s6muHVfQjbXOl/AkAqIPLSetv2mS9MHQLeHcCCXpwsXQJwusZ3+ILrgCRAGv6NLXwbfE0t3OjXV0gnNRp3DWEaF+yrfjE0oU1myeYDNtugsw8VRwTzCM53Nqf/BJffnuShmBBZfZ2jlsPnLys0UqCZo2dg5wdwj3DaKtHO5Pofq6P8r4w6W/aUZCTLUi1jZ3Gc", +// "format": "base64" +// }, +// { +// "data": "MIIFljCCA36gAwIBAgINAgO8U1lrNMcY9QFQZjANBgkqhkiG9w0BAQsFADBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMjAwODEzMDAwMDQyWhcNMjcwOTMwMDAwMDQyWjBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPWI3+dijB43+DdCkH9sh9D7ZYIl/ejLa6T/belaI+KZ9hzpkgOZE3wJCor6QtZeViSqejOEH9Hpabu5dOxXTGZok3c3VVP+ORBNtzS7XyV3NzsXlOo85Z3VvMO0Q+sup0fvsEQRY9i0QYXdQTBIkxu/t/bgRQIh4JZCF8/ZK2VWNAcmBA2o/X3KLu/qSHw3TT8An4Pf73WELnlXXPxXbhqW//yMmqaZviXZf5YsBvcRKgKAgOtjGDxQSYflispfGStZloEAoPtR28p3CwvJlk/vcEnHXG0g/Zm0tOLKLnf9LdwLtmsTDIwZKxeWmLnwi/agJ7u2441Rj72ux5uxiZ0CAwEAAaOCAYAwggF8MA4GA1UdDwEB/wQEAwIBhjAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUinR/r4XN7pXNPZzQ4kYU83E1HScwHwYDVR0jBBgwFoAU5K8rJnEaK0gnhS9SZizv8IkTcT4waAYIKwYBBQUHAQEEXDBaMCYGCCsGAQUFBzABhhpodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHNyMTAwBggrBgEFBQcwAoYkaHR0cDovL3BraS5nb29nL3JlcG8vY2VydHMvZ3RzcjEuZGVyMDQGA1UdHwQtMCswKaAnoCWGI2h0dHA6Ly9jcmwucGtpLmdvb2cvZ3RzcjEvZ3RzcjEuY3JsMFcGA1UdIARQME4wOAYKKwYBBAHWeQIFAzAqMCgGCCsGAQUFBwIBFhxodHRwczovL3BraS5nb29nL3JlcG9zaXRvcnkvMAgGBmeBDAECATAIBgZngQwBAgIwDQYJKoZIhvcNAQELBQADggIBAIl9rCBcDDy+mqhXlRu0rvqrpXJxtDaV/d9AEQNMwkYUuxQkq/BQcSLbrcRuf8/xam/IgxvYzolfh2yHuKkMo5uhYpSTld9brmYZCwKWnvy15xBpPnrLRklfRuFBsdeYTWU0AIAaP0+fbH9JAIFTQaSSIYKCGvGjRFsqUBITTcFTNvNCCK9U+o53UxtkOCcXCb1YyRt8OS1b887U7ZfbFAO/CVMkH8IMBHmYJvJh8VNS/UKMG2YrPxWhu//2m+OBmgEGcYk1KCTd4b3rGS3hSMs9WYNRtHTGnXzGsYZbr8w0xNPM1IERlQCh9BIiAfq0g3GvjLeMcySsN1PCAJA/Ef5c7TaUEDu9Ka7ixzpiO2xj2YC/WXGsYye5TBeg2vZzFb8q3o/zpWwygTMD0IZRcZk0upONXbVRWPeyk+gB9lm+cZv9TSjOz23HFtz30dZGm6fKa+l3D/2gthsjgx0QGtkJAITgRNOidSOzNIb2ILCkXhAd4FJGAJ2xDx8hcFH1mt0G/FX0Kw4zd8NLQsLxdxP8c4CU6x+7Nz/OAipmsHMdMqUybDKwjuDEI/9bfU1lcKwrmz3O2+BtjjKAvpafkmO8l7tdufThcV4q5O8DIrGKZTqPwJNl1IXNDw9bg1kWRxYtnCQ6yICmJhSFm/Y3m6xv+cXDBlHz4n/FsRC6UfTd", +// "format": "base64" +// }, +// { +// "data": "MIIFYjCCBEqgAwIBAgIQd70NbNs2+RrqIQ/E8FjTDTANBgkqhkiG9w0BAQsFADBXMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1zYTEQMA4GA1UECxMHUm9vdCBDQTEbMBkGA1UEAxMSR2xvYmFsU2lnbiBSb290IENBMB4XDTIwMDYxOTAwMDA0MloXDTI4MDEyODAwMDA0MlowRzELMAkGA1UEBhMCVVMxIjAgBgNVBAoTGUdvb2dsZSBUcnVzdCBTZXJ2aWNlcyBMTEMxFDASBgNVBAMTC0dUUyBSb290IFIxMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAthECix7joXebO9y/lD63ladAPKH9gvl9MgaCcfb2jH/76Nu8ai6Xl6OMS/kr9rH5zoQdsfnFl97vufKj6bwSiV6nqlKr+CMny6SxnGPb15l+8Ape62im9MZaRw1NEDPjTrETo8gYbEvs/AmQ351kKSUjB6G00j0uYODP0gmHu81I8E3CwnqIiru6z1kZ1q+PsAewnjHxgsHA3y6mbWwZDrXYfiYaRQM9sHmklCitD38m5agI/pboPGiUU+6DOogrFZYJsuB6jC511pzrp1Zkj5ZPaK49l8KEj8C8QMALXL32h7M1bKwYUH+E4EzNktMg6TO8UpmvMrUpsyUqtEj5cuHKZPfmghCN6J3Cioj6OGaK/GP5Afl4/Xtcd/p2h/rs37EOeZVXtL0m79YB0esWCruOC7XFxYpVq9Os6pFLKcwZpDIlTirxZUTQAs6qzkm06p98g7BAe+dDq6dso499iYH6TKX/1Y7DzkvgtdizjkXPdsDtQCv9Uw+wp9U7DbGKogPeMa3Md+pvez7W35EiEua++tgy/BBjFFFy3l3WFpO9KWgz7zpm7AeKJt8T11dleCfeXkkUAKIAf5qoIbapsZWwpbkNFhHax2xIPEDgfg1azVY80ZcFuctL7TlLnMQ/0lUTbiSw1nH69MG6zO0b9f6BQdgAmD06yK56mDcYBZUCAwEAAaOCATgwggE0MA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTkrysmcRorSCeFL1JmLO/wiRNxPjAfBgNVHSMEGDAWgBRge2YaRQ2XyolQL30EzTSo//z9SzBgBggrBgEFBQcBAQRUMFIwJQYIKwYBBQUHMAGGGWh0dHA6Ly9vY3NwLnBraS5nb29nL2dzcjEwKQYIKwYBBQUHMAKGHWh0dHA6Ly9wa2kuZ29vZy9nc3IxL2dzcjEuY3J0MDIGA1UdHwQrMCkwJ6AloCOGIWh0dHA6Ly9jcmwucGtpLmdvb2cvZ3NyMS9nc3IxLmNybDA7BgNVHSAENDAyMAgGBmeBDAECATAIBgZngQwBAgIwDQYLKwYBBAHWeQIFAwIwDQYLKwYBBAHWeQIFAwMwDQYJKoZIhvcNAQELBQADggEBADSkHrEoo9C0dhemMXoh6dFSPsjbdBZBiLg9NR3t5P+T4Vxfq7vqfM/b5A3Ri1fyJm9bvhdGaJQ3b2t6yMAYN/olUazsaL+yyEn9WprKASOshIArAoyZl+tJaox118fessmXn1hIVw41oeQa1v1vg4Fv74zPl6/AhSrw9U5pCZEt4Wi4wStz6dTZ/CLANx8LZh1J7QJVj2fhMtfTJr9w4z30Z209fOU0iOMy+qduBmpvvYuR7hZL6Dupszfnw0Skfths18dG9ZKb59UhvmaSGZRVbNQpsg3BZlvid0lIKO2d1xozclOzgjXPYovJJIultzkMu34qQb9Sz/yilrbCgj8=", +// "format": "base64" +// } +// ], +// "t": 0.065514708, +// "address": "8.8.4.4:443", +// "server_name": "dns.google", +// "alpn": [ +// "h2", +// "http/1.1" +// ], +// "no_tls_verify": false, +// "oddity": "", +// "proto": "tcp", +// "started": 0.024404083 +// } +// ], +// +// // Finally here we see information about the round trip, which +// // is formatted according the df-001-httpt data format: +// "http_round_trip": [ +// { +// +// // This field indicates whether there was an error during +// // the HTTP round trip: +// "failure": null, +// +// // This field contains the request method, URL, and HTTP headers +// "request": { +// "method": "GET", +// "url": "https://dns.google/", +// "headers": { +// "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", +// "accept-language": "en-US;q=0.8,en;q=0.5", +// "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36" +// } +// }, +// +// // This field contains the response status code, body, +// // and headers. +// "response": { +// "code": 200, +// "headers": { +// "accept-ranges": "none", +// "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000,h3-T051=\":443\"; ma=2592000,h3-Q050=\":443\"; ma=2592000,h3-Q046=\":443\"; ma=2592000,h3-Q043=\":443\"; ma=2592000,quic=\":443\"; ma=2592000; v=\"46,43\"", +// "cache-control": "private", +// "content-security-policy": "object-src 'none';base-uri 'self';script-src 'nonce-bSLcJjaotppZl3Y2moIaxg==' 'strict-dynamic' 'report-sample' 'unsafe-eval' 'unsafe-inline' https: http:;report-uri https://csp.withgoogle.com/csp/honest_dns/1_0;frame-ancestors 'none'", +// "content-type": "text/html; charset=UTF-8", +// "date": "Fri, 24 Sep 2021 08:51:01 GMT", +// "server": "scaffolding on HTTPServer2", +// "strict-transport-security": "max-age=31536000; includeSubDomains; preload", +// "vary": "Accept-Encoding", +// "x-content-type-options": "nosniff", +// "x-frame-options": "SAMEORIGIN", +// "x-xss-protection": "0" +// }, +// +// // The body in particular is a snapshot of the response +// // body: we don't want to read and submit to the OONI +// // collector large bodies. +// "body": { +// "data": "PCFET0NUWVBFIGh0bWw+CjxodG1sIGxhbmc9ImVuIj4gPGhlYWQ+IDx0aXRsZT5Hb29nbGUgUHVibGljIEROUzwvdGl0bGU+ICA8bWV0YSBjaGFyc2V0PSJVVEYtOCI+IDxsaW5rIGhyZWY9Ii9zdGF0aWMvOTNkZDU5NTQvZmF2aWNvbi5wbmciIHJlbD0ic2hvcnRjdXQgaWNvbiIgdHlwZT0iaW1hZ2UvcG5nIj4gPGxpbmsgaHJlZj0iL3N0YXRpYy84MzZhZWJjNi9tYXR0ZXIubWluLmNzcyIgcmVsPSJzdHlsZXNoZWV0Ij4gPGxpbmsgaHJlZj0iL3N0YXRpYy9iODUzNmMzNy9zaGFyZWQuY3NzIiByZWw9InN0eWxlc2hlZXQiPiA8bWV0YSBuYW1lPSJ2aWV3cG9ydCIgY29udGVudD0id2lkdGg9ZGV2aWNlLXdpZHRoLCBpbml0aWFsLXNjYWxlPTEiPiAgPGxpbmsgaHJlZj0iL3N0YXRpYy9kMDVjZDZiYS9yb290LmNzcyIgcmVsPSJzdHlsZXNoZWV0Ij4gPC9oZWFkPiA8Ym9keT4gPHNwYW4gY2xhc3M9ImZpbGxlciB0b3AiPjwvc3Bhbj4gICA8ZGl2IGNsYXNzPSJsb2dvIiB0aXRsZT0iR29vZ2xlIFB1YmxpYyBETlMiPiA8ZGl2IGNsYXNzPSJsb2dvLXRleHQiPjxzcGFuPlB1YmxpYyBETlM8L3NwYW4+PC9kaXY+IDwvZGl2PiAgPGZvcm0gYWN0aW9uPSIvcXVlcnkiIG1ldGhvZD0iR0VUIj4gIDxkaXYgY2xhc3M9InJvdyI+IDxsYWJlbCBjbGFzcz0ibWF0dGVyLXRleHRmaWVsZC1vdXRsaW5lZCI+IDxpbnB1dCB0eXBlPSJ0ZXh0IiBuYW1lPSJuYW1lIiBwbGFjZWhvbGRlcj0iJm5ic3A7Ij4gPHNwYW4+RE5TIE5hbWU8L3NwYW4+IDxwIGNsYXNzPSJoZWxwIj4gRW50ZXIgYSBkb21haW4gKGxpa2UgZXhhbXBsZS5jb20pIG9yIElQIGFkZHJlc3MgKGxpa2UgOC44LjguOCBvciAyMDAxOjQ4NjA6NDg2MDo6ODg0NCkgaGVyZS4gPC9wPiA8L2xhYmVsPiA8YnV0dG9uIGNsYXNzPSJtYXR0ZXItYnV0dG9uLWNvbnRhaW5lZCBtYXR0ZXItcHJpbWFyeSIgdHlwZT0ic3VibWl0Ij5SZXNvbHZlPC9idXR0b24+IDwvZGl2PiA8L2Zvcm0+ICA8c3BhbiBjbGFzcz0iZmlsbGVyIGJvdHRvbSI+PC9zcGFuPiA8Zm9vdGVyIGNsYXNzPSJyb3ciPiA8YSBocmVmPSJodHRwczovL2RldmVsb3BlcnMuZ29vZ2xlLmNvbS9zcGVlZC9wdWJsaWMtZG5zIj5IZWxwPC9hPiA8YSBocmVmPSIvY2FjaGUiPkNhY2hlIEZsdXNoPC9hPiA8c3BhbiBjbGFzcz0iZmlsbGVyIj48L3NwYW4+IDxhIGhyZWY9Imh0dHBzOi8vZGV2ZWxvcGVycy5nb29nbGUuY29tL3NwZWVkL3B1YmxpYy1kbnMvZG9jcy91c2luZyI+IEdldCBTdGFydGVkIHdpdGggR29vZ2xlIFB1YmxpYyBETlMgPC9hPiA8L2Zvb3Rlcj4gICA8c2NyaXB0IG5vbmNlPSJiU0xjSmphb3RwcFpsM1kybW9JYXhnPT0iPmRvY3VtZW50LmZvcm1zWzBdLm5hbWUuZm9jdXMoKTs8L3NjcmlwdD4gPC9ib2R5PiA8L2h0bWw+", +// "format": "base64" +// }, +// +// // This field tells us whether the size of the read +// // snapshot was smaller than the snapshot size. If +// // not, then the body has been truncated. +// "body_is_truncated": false, +// +// // These extra fields are not part of the spec and +// // hence we prefix them with `x_`. They tell us +// // the length of the body and whether the content +// // of the body is valid UTF8. +// "x_body_length": 1383, +// "x_body_is_utf8": true +// }, +// +// // The t field is the moment where we finished the +// // round trip and saved the event. The started field +// // is instead when we started the round trip. +// +// // You may notice that the start of the round trip +// // if after the `t` of the handshake. This tells us +// // that the code first connects, then handshakes, and +// // finally creates HTTP code for performing the +// // round trip. +// "t": 0.338674625, +// "started": 0.065926625, +// +// // As usual we also compute an oddity value related +// // in this case to the HTTP round trip. +// "oddity": "" +// } +// ] +// } +// ``` +// +// Here are some suggestions for follow up measurements: +// +// 1. provoke a connect error by using: +// +// ``` +// go run -race ./internal/tutorial/measurex/chapter06 -address 127.0.0.1:1 +// ``` +// +// 2. provoke a TLS handshake error by using: +// +// ``` +// go run -race ./internal/tutorial/measurex/chapter06 -sni example.com +// ``` +// +// 3. provoke an HTTP round trip error by using: +// +// ``` +// go run -race ./internal/tutorial/measurex/chapter06 -address 8.8.8.8:853 +// ``` +// +// 4. modify the code to fetch an HTTP endpoint instead (hint: you +// need to change the HTTPEndpoint's URL scheme); +// +// 5. modify the code to use QUIC and HTTP/3 instead (hint: you need to +// change the HTTPEndpoint's network and... is this enough?). +// +// ## Conclusion +// +// We have seen how to measure the flow of fetching a +// specific webpage from an HTTPEndpoint. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter07/README.md b/internal/tutorial/measurex/chapter07/README.md index e69de29bb2..7879009639 100644 --- a/internal/tutorial/measurex/chapter07/README.md +++ b/internal/tutorial/measurex/chapter07/README.md @@ -0,0 +1,142 @@ + +# Chapter VII: Measuring all the HTTPEndpoints for a domain + +We are now going to combine DNS resolutions with getting +HTTPEndpoints. Conceptually, the DNS resolution yields +us a list of IP addresses. For each address, we build the +HTTPEndpoint and fetch it like we did in chapter06. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter07/main.go`.) + +## main.go + +We have package declaration and imports as usual. + +```Go +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +``` + +Here we define an helper type for containing the DNS +measurement and the subsequent endpoints measurements. + +```Go +type measurement struct { + DNS *measurex.DNSMeasurement + Endpoints []*measurex.HTTPEndpointMeasurement +} + +``` + +The rest of the program is quite similar to what we had before. + +```Go +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +func main() { + URL := flag.String("url", "https://google.com/", "URL to fetch") + address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + parsed, err := url.Parse(*URL) + runtimex.PanicOnError(err, "url.Parse failed") + mx := measurex.NewMeasurerWithDefaultSettings() +``` + +This is where the main.go file starts to diverge. We create an +instance of our measurement type to hold the results. + +```Go + m := &measurement{} +``` + +Then we perform a DNS lookup using UDP like we saw in chapter03. + +```Go + m.DNS = mx.LookupHostUDP(ctx, parsed.Hostname(), *address) +``` + +Like we did in the previous chapter, we create suitable HTTP +headers for performing an HTTP measurement. + +```Go + headers := measurex.NewHTTPRequestHeaderForMeasuring() +``` + +The following is an entirely new function we're learning +about just now. `AllHTTPEndpointsForURL` is a free function +in `measurex` that given: + +- an already parsed HTTP/HTTPS URL + +- headers we want to use + +- the result of one or more DNS queries + +builds us a list of HTTPEndpoint data structures. + +```Go + httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS) + runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") +``` + +This function may fail if, for example, the URL is not HTTP/HTTPS. We +handle the error panicking, because this is an example program. + +We are almost done now: we loop over all the endpoints and apply the +`HTTPEndpointGetWithoutCookies` method we have seen in chapter06. + +```Go + for _, epnt := range httpEndpoints { + m.Endpoints = append(m.Endpoints, mx.HTTPEndpointGetWithoutCookies(ctx, epnt)) + } +``` + +Finally, we print the results. + +```Go + print(m) +} + +``` + +## Running the example program + +Let us perform a vanilla run first: + +```bash +go run -race ./internal/tutorial/measurex/chapter07 +``` + +Please, check the JSON output. Do you recognize the fields +we have described in previous chapters? + +Can you provoke common errors such as DNS resolution +errors, TCP connect errors, TLS handshake errors, and +HTTP round trip errors? How does the JSON change? + +## Conclusion + +We have seen how to combine DNS resolutions (chapter01 and +chapter03) with HTTPEndpoint GET (chapter06) to measure +all the HTTP endpoints for a given domain. + diff --git a/internal/tutorial/measurex/chapter07/main.go b/internal/tutorial/measurex/chapter07/main.go index da5191a16c..851df2d664 100644 --- a/internal/tutorial/measurex/chapter07/main.go +++ b/internal/tutorial/measurex/chapter07/main.go @@ -1,3 +1,20 @@ +// -=-=- StartHere -=-=- +// +// # Chapter VII: Measuring all the HTTPEndpoints for a domain +// +// We are now going to combine DNS resolutions with getting +// HTTPEndpoints. Conceptually, the DNS resolution yields +// us a list of IP addresses. For each address, we build the +// HTTPEndpoint and fetch it like we did in chapter06. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter07/main.go`.) +// +// ## main.go +// +// We have package declaration and imports as usual. +// +// ```Go package main import ( @@ -12,11 +29,28 @@ import ( "github.com/ooni/probe-cli/v3/internal/runtimex" ) +// ``` +// +// Here we define an helper type for containing the DNS +// measurement and the subsequent endpoints measurements. +// +// ```Go type measurement struct { DNS *measurex.DNSMeasurement Endpoints []*measurex.HTTPEndpointMeasurement } +// ``` +// +// The rest of the program is quite similar to what we had before. +// +// ```Go +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + func main() { URL := flag.String("url", "https://google.com/", "URL to fetch") address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") @@ -27,16 +61,84 @@ func main() { parsed, err := url.Parse(*URL) runtimex.PanicOnError(err, "url.Parse failed") mx := measurex.NewMeasurerWithDefaultSettings() + // ``` + // + // This is where the main.go file starts to diverge. We create an + // instance of our measurement type to hold the results. + // + // ```Go m := &measurement{} + // ``` + // + // Then we perform a DNS lookup using UDP like we saw in chapter03. + // + // ```Go m.DNS = mx.LookupHostUDP(ctx, parsed.Hostname(), *address) + // ``` + // + // Like we did in the previous chapter, we create suitable HTTP + // headers for performing an HTTP measurement. + // + // ```Go headers := measurex.NewHTTPRequestHeaderForMeasuring() + // ``` + // + // The following is an entirely new function we're learning + // about just now. `AllHTTPEndpointsForURL` is a free function + // in `measurex` that given: + // + // - an already parsed HTTP/HTTPS URL + // + // - headers we want to use + // + // - the result of one or more DNS queries + // + // builds us a list of HTTPEndpoint data structures. + // + // ```Go httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS) runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") - cookies := measurex.NewCookieJar() + // ``` + // + // This function may fail if, for example, the URL is not HTTP/HTTPS. We + // handle the error panicking, because this is an example program. + // + // We are almost done now: we loop over all the endpoints and apply the + // `HTTPEndpointGetWithoutCookies` method we have seen in chapter06. + // + // ```Go for _, epnt := range httpEndpoints { - m.Endpoints = append(m.Endpoints, mx.HTTPEndpointGet(ctx, epnt, cookies)) + m.Endpoints = append(m.Endpoints, mx.HTTPEndpointGetWithoutCookies(ctx, epnt)) } - data, err := json.Marshal(m) - runtimex.PanicOnError(err, "json.Marshal failed") - fmt.Printf("%s\n", string(data)) + // ``` + // + // Finally, we print the results. + // + // ```Go + print(m) } + +// ``` +// +// ## Running the example program +// +// Let us perform a vanilla run first: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter07 +// ``` +// +// Please, check the JSON output. Do you recognize the fields +// we have described in previous chapters? +// +// Can you provoke common errors such as DNS resolution +// errors, TCP connect errors, TLS handshake errors, and +// HTTP round trip errors? How does the JSON change? +// +// ## Conclusion +// +// We have seen how to combine DNS resolutions (chapter01 and +// chapter03) with HTTPEndpoint GET (chapter06) to measure +// all the HTTP endpoints for a given domain. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter08/README.md b/internal/tutorial/measurex/chapter08/README.md index e69de29bb2..5baba856e0 100644 --- a/internal/tutorial/measurex/chapter08/README.md +++ b/internal/tutorial/measurex/chapter08/README.md @@ -0,0 +1,132 @@ + +# Chapter VII: HTTPSSvc DNS queries + +The program we see here is _really_ similar to the one we +discussed in the previous chapter. The main difference +is the following: now we also issue HTTPSSvc DNS queries +to discover HTTP/3 endpoints. (Because HTTPSSvc is +still a draft and is mostly implemented by Cloudflare +at this point, we are going to use as the example +input URL a Cloudflare URL.) + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter08/main.go`.) + +## main.go + +The beginning of the program is pretty much the same. We +have just amended our `measurement` type to contain multiple +`DNSMeasurement` results. + +```Go +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + DNS []*measurex.DNSMeasurement + Endpoints []*measurex.HTTPEndpointMeasurement +} + +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +func main() { + URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + parsed, err := url.Parse(*URL) + runtimex.PanicOnError(err, "url.Parse failed") + mx := measurex.NewMeasurerWithDefaultSettings() + m := &measurement{} +``` +### Call LookupHTTPSSvc + +Here we perform the `LookupHostUDP` we performed in the +previous chapter and then we call `LookupHTTPSvcUDP`. + +```Go + m.DNS = append(m.DNS, mx.LookupHostUDP(ctx, parsed.Hostname(), *address)) + m.DNS = append(m.DNS, mx.LookupHTTPSSvcUDP(ctx, parsed.Hostname(), *address)) +``` + +The `LookupHTTPSSvcUDP` function has the same signature +of `LookupHostUDP` _but_ it behaves differently. Rather than +querying for `A` and `AAAA`, it performs an `HTTPS` DNS +lookup. This query returns: + +1. a list of ALPNs for the domain; + +2. a list of IPv4 addresses; + +3. a list of IPv6 addresses. + +### Build an []HTTPEndpoint and run serial measurements + +Here we call `AllHTTPEndpointsForURL` like we did in the +previous chapter. However, note that we pass to it the +whole content of `m.DNS`, which now contains not only the +A/AAAA lookups results but also the HTTPS lookup results. + +The `AllHTTPEndpointsForURL` function will recognize that +we also have HTTPS lookups and, if the "h3" ALPN is +present, will _also_ build HTTP/3 endpoints using "quic" +as the `HTTPEndpoint.Network`. + +```Go + headers := measurex.NewHTTPRequestHeaderForMeasuring() + httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS...) + runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") +``` + +This is it. The rest of the program is exactly the same. + +```Go + for _, epnt := range httpEndpoints { + m.Endpoints = append(m.Endpoints, mx.HTTPEndpointGetWithoutCookies(ctx, epnt)) + } + print(m) +} + +``` + +## Running the example program + +Let us perform a vanilla run first: + +```bash +go run -race ./internal/tutorial/measurex/chapter08 +``` + +Please, check the JSON output. Do you recognize the fields +we have described in previous chapters? You should see +that, compared to previous chapters, now we're also testing +QUIC/HTTP3 endpoints. + +Can you provoke common errors such as DNS resolution +errors, TCP connect errors, TLS handshake errors, and +HTTP round trip errors? What is a good way to cause +timeout and SNI mismatch errors for QUIC? + +## Conclusion + +We have seen how to extend fetching all the HTTPS +endpoints to include the QUIC/HTTP3 endpoints discovered +using HTTPSSvc. + diff --git a/internal/tutorial/measurex/chapter08/main.go b/internal/tutorial/measurex/chapter08/main.go index 74c90749b7..eb1c6cd244 100644 --- a/internal/tutorial/measurex/chapter08/main.go +++ b/internal/tutorial/measurex/chapter08/main.go @@ -1,3 +1,25 @@ +// -=-=- StartHere -=-=- +// +// # Chapter VII: HTTPSSvc DNS queries +// +// The program we see here is _really_ similar to the one we +// discussed in the previous chapter. The main difference +// is the following: now we also issue HTTPSSvc DNS queries +// to discover HTTP/3 endpoints. (Because HTTPSSvc is +// still a draft and is mostly implemented by Cloudflare +// at this point, we are going to use as the example +// input URL a Cloudflare URL.) +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter08/main.go`.) +// +// ## main.go +// +// The beginning of the program is pretty much the same. We +// have just amended our `measurement` type to contain multiple +// `DNSMeasurement` results. +// +// ```Go package main import ( @@ -17,6 +39,12 @@ type measurement struct { Endpoints []*measurex.HTTPEndpointMeasurement } +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + func main() { URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") @@ -28,16 +56,79 @@ func main() { runtimex.PanicOnError(err, "url.Parse failed") mx := measurex.NewMeasurerWithDefaultSettings() m := &measurement{} + // ``` + // ### Call LookupHTTPSSvc + // + // Here we perform the `LookupHostUDP` we performed in the + // previous chapter and then we call `LookupHTTPSvcUDP`. + // + // ```Go m.DNS = append(m.DNS, mx.LookupHostUDP(ctx, parsed.Hostname(), *address)) m.DNS = append(m.DNS, mx.LookupHTTPSSvcUDP(ctx, parsed.Hostname(), *address)) + // ``` + // + // The `LookupHTTPSSvcUDP` function has the same signature + // of `LookupHostUDP` _but_ it behaves differently. Rather than + // querying for `A` and `AAAA`, it performs an `HTTPS` DNS + // lookup. This query returns: + // + // 1. a list of ALPNs for the domain; + // + // 2. a list of IPv4 addresses; + // + // 3. a list of IPv6 addresses. + // + // ### Build an []HTTPEndpoint and run serial measurements + // + // Here we call `AllHTTPEndpointsForURL` like we did in the + // previous chapter. However, note that we pass to it the + // whole content of `m.DNS`, which now contains not only the + // A/AAAA lookups results but also the HTTPS lookup results. + // + // The `AllHTTPEndpointsForURL` function will recognize that + // we also have HTTPS lookups and, if the "h3" ALPN is + // present, will _also_ build HTTP/3 endpoints using "quic" + // as the `HTTPEndpoint.Network`. + // + // ```Go headers := measurex.NewHTTPRequestHeaderForMeasuring() httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS...) runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") - cookies := measurex.NewCookieJar() + // ``` + // + // This is it. The rest of the program is exactly the same. + // + // ```Go for _, epnt := range httpEndpoints { - m.Endpoints = append(m.Endpoints, mx.HTTPEndpointGet(ctx, epnt, cookies)) + m.Endpoints = append(m.Endpoints, mx.HTTPEndpointGetWithoutCookies(ctx, epnt)) } - data, err := json.Marshal(m) - runtimex.PanicOnError(err, "json.Marshal failed") - fmt.Printf("%s\n", string(data)) + print(m) } + +// ``` +// +// ## Running the example program +// +// Let us perform a vanilla run first: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter08 +// ``` +// +// Please, check the JSON output. Do you recognize the fields +// we have described in previous chapters? You should see +// that, compared to previous chapters, now we're also testing +// QUIC/HTTP3 endpoints. +// +// Can you provoke common errors such as DNS resolution +// errors, TCP connect errors, TLS handshake errors, and +// HTTP round trip errors? What is a good way to cause +// timeout and SNI mismatch errors for QUIC? +// +// ## Conclusion +// +// We have seen how to extend fetching all the HTTPS +// endpoints to include the QUIC/HTTP3 endpoints discovered +// using HTTPSSvc. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter09/README.md b/internal/tutorial/measurex/chapter09/README.md index e69de29bb2..fe20445e18 100644 --- a/internal/tutorial/measurex/chapter09/README.md +++ b/internal/tutorial/measurex/chapter09/README.md @@ -0,0 +1,124 @@ + +# Chapter IX: Parallel HTTPEndpoint measurements + +The program we see here is _really_ similar to the one we +discussed in the previous chapter. The main difference +is the following: rather than looping through the list of +HTTPEndpoint, we call a function that runs through the +list of endpoints using a small pool of background workers. + +There is a trade off between quick measurements and +false positives. A timeout is one of the most common +ways of censoring HTTPS and HTTP3 endpoints. So, if +we run measurements sequentially, a whole scan could +in principle take a long time. On the other hand, +if we run too many parallel measurements, we may cause +our own congestion and maybe some measurements will +fail because of that. Our solution to this problem is +to have low parallelism: at the moment of writing +this note, we have three workers. If you submit +more than three HTTPEndpoint at a a time, we will +service the first three immediately and all the +other endpoints will be queued for later measurement. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter09/main.go`.) + +## main.go + +The beginning of the program is pretty much the same. + +```Go +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + DNS []*measurex.DNSMeasurement + Endpoints []*measurex.HTTPEndpointMeasurement +} + +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +func main() { + URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + parsed, err := url.Parse(*URL) + runtimex.PanicOnError(err, "url.Parse failed") + mx := measurex.NewMeasurerWithDefaultSettings() + m := &measurement{} + m.DNS = append(m.DNS, mx.LookupHostUDP(ctx, parsed.Hostname(), *address)) + m.DNS = append(m.DNS, mx.LookupHTTPSSvcUDP(ctx, parsed.Hostname(), *address)) + headers := measurex.NewHTTPRequestHeaderForMeasuring() + httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS...) + runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") +``` + +This is where the program changes. First, we need to create a jar +for cookies because the API we're about to call requires a +cookie jar. (We mostly use this API with redirects and we want +to have cookies with redirects because a small portion of the +URLs we typically test require cookies to properly redirect, +see https://github.com/ooni/probe/issues/1727 for more information). + +Then, we call `HTTPEndpointGetParallel`. The arguments are: + +- as usual, the context + +- the cookie jar + +- all the endpoints to measure + +```Go + cookies := measurex.NewCookieJar() + for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, httpEndpoints...) { + m.Endpoints = append(m.Endpoints, epnt) + } +``` + +The `HTTPEndpointGetParallel` method returns a channel where it +posts `HTTPEndpointMeasurements`. Once the input list has been +fully measured, this method closes the returned channel. + +Like we did before, we append the resulting measurements to +our `m` container and we print it. + +```Go + print(m) +} + +``` + +## Running the example program + +Let us perform a vanilla run first: + +```bash +go run -race ./internal/tutorial/measurex/chapter09 +``` + +Take a look at the JSON output. Can you spot that +endpoints measurements are run in parallel? + +## Conclusion + +We have seen how to run HTTPEndpoint measurements in parallel. + diff --git a/internal/tutorial/measurex/chapter09/main.go b/internal/tutorial/measurex/chapter09/main.go index 2dde889807..f431ff3a62 100644 --- a/internal/tutorial/measurex/chapter09/main.go +++ b/internal/tutorial/measurex/chapter09/main.go @@ -1,3 +1,35 @@ +// -=-=- StartHere -=-=- +// +// # Chapter IX: Parallel HTTPEndpoint measurements +// +// The program we see here is _really_ similar to the one we +// discussed in the previous chapter. The main difference +// is the following: rather than looping through the list of +// HTTPEndpoint, we call a function that runs through the +// list of endpoints using a small pool of background workers. +// +// There is a trade off between quick measurements and +// false positives. A timeout is one of the most common +// ways of censoring HTTPS and HTTP3 endpoints. So, if +// we run measurements sequentially, a whole scan could +// in principle take a long time. On the other hand, +// if we run too many parallel measurements, we may cause +// our own congestion and maybe some measurements will +// fail because of that. Our solution to this problem is +// to have low parallelism: at the moment of writing +// this note, we have three workers. If you submit +// more than three HTTPEndpoint at a a time, we will +// service the first three immediately and all the +// other endpoints will be queued for later measurement. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter09/main.go`.) +// +// ## main.go +// +// The beginning of the program is pretty much the same. +// +// ```Go package main import ( @@ -17,6 +49,12 @@ type measurement struct { Endpoints []*measurex.HTTPEndpointMeasurement } +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + func main() { URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") address := flag.String("address", "8.8.4.4:53", "DNS-over-UDP server address") @@ -33,11 +71,56 @@ func main() { headers := measurex.NewHTTPRequestHeaderForMeasuring() httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS...) runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") + // ``` + // + // This is where the program changes. First, we need to create a jar + // for cookies because the API we're about to call requires a + // cookie jar. (We mostly use this API with redirects and we want + // to have cookies with redirects because a small portion of the + // URLs we typically test require cookies to properly redirect, + // see https://github.com/ooni/probe/issues/1727 for more information). + // + // Then, we call `HTTPEndpointGetParallel`. The arguments are: + // + // - as usual, the context + // + // - the cookie jar + // + // - all the endpoints to measure + // + // ```Go cookies := measurex.NewCookieJar() for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, httpEndpoints...) { m.Endpoints = append(m.Endpoints, epnt) } - data, err := json.Marshal(m) - runtimex.PanicOnError(err, "json.Marshal failed") - fmt.Printf("%s\n", string(data)) + // ``` + // + // The `HTTPEndpointGetParallel` method returns a channel where it + // posts `HTTPEndpointMeasurements`. Once the input list has been + // fully measured, this method closes the returned channel. + // + // Like we did before, we append the resulting measurements to + // our `m` container and we print it. + // + // ```Go + print(m) } + +// ``` +// +// ## Running the example program +// +// Let us perform a vanilla run first: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter09 +// ``` +// +// Take a look at the JSON output. Can you spot that +// endpoints measurements are run in parallel? +// +// ## Conclusion +// +// We have seen how to run HTTPEndpoint measurements in parallel. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter10/README.md b/internal/tutorial/measurex/chapter10/README.md index e69de29bb2..d851a06da6 100644 --- a/internal/tutorial/measurex/chapter10/README.md +++ b/internal/tutorial/measurex/chapter10/README.md @@ -0,0 +1,127 @@ + +# Chapter IX: Parallel DNS lookups + +The program we see here is _really_ similar to the one we +discussed in the previous chapter. The main difference +is the following: rather than performing DNS lookups +sequentially, we call a function that runs through the +list of resolvers and run them in parallel. + +Again, we are going to use low parallelism for the same +rationale mentioned in chapter09. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter10/main.go`.) + +## main.go + +The beginning of the program is pretty much the same. + +```Go +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + DNS []*measurex.DNSMeasurement + Endpoints []*measurex.HTTPEndpointMeasurement +} + +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +func main() { + URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + parsed, err := url.Parse(*URL) + runtimex.PanicOnError(err, "url.Parse failed") + mx := measurex.NewMeasurerWithDefaultSettings() + m := &measurement{} +``` + +The bulk of the difference is here. We create +a list of DNS resolvers. For each of them, we specify +the type and the endpoint address. (There is no +endpoint address for the system resolver, therefore +we leave its address empty.) + +```Go + resolvers := []*measurex.ResolverInfo{{ + Network: measurex.ResolverUDP, + Address: "8.8.8.8:53", + }, { + Network: measurex.ResolverUDP, + Address: "8.8.4.4:53", + }, { + Network: measurex.ResolverUDP, + Address: "1.1.1.1:53", + }, { + Network: measurex.ResolverUDP, + Address: "1.0.0.1:53", + }, { + Network: measurex.ResolverSystem, + Address: "", + }} +``` + +Then we call `LookupURLHostParallel`. This function runs +the queries that make sense given the input URL using a +pool of (currently three) background goroutines. + +When I say "queries that make sense", I mostly mean +that we only query for HTTPSSvc when the input URL +scheme is "https". Otherwise, if it's just "http", it +does not make sense to send this query. + +```Go + for dns := range mx.LookupURLHostParallel(ctx, parsed, resolvers...) { + m.DNS = append(m.DNS, dns) + } +``` + +The rest of the program is exactly like in chapter09. + +```Go + headers := measurex.NewHTTPRequestHeaderForMeasuring() + httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS...) + runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") + cookies := measurex.NewCookieJar() + for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, httpEndpoints...) { + m.Endpoints = append(m.Endpoints, epnt) + } + print(m) +} + +``` + +## Running the example program + +Let us perform a vanilla run first: + +```bash +go run -race ./internal/tutorial/measurex/chapter10 +``` + +Take a look at the JSON output. Can you spot that +DNS queries are run in parallel? + +## Conclusion + +We have seen how to run parallel DNS queries. + diff --git a/internal/tutorial/measurex/chapter10/main.go b/internal/tutorial/measurex/chapter10/main.go index 9643d4a15f..566100b6b1 100644 --- a/internal/tutorial/measurex/chapter10/main.go +++ b/internal/tutorial/measurex/chapter10/main.go @@ -1,3 +1,24 @@ +// -=-=- StartHere -=-=- +// +// # Chapter IX: Parallel DNS lookups +// +// The program we see here is _really_ similar to the one we +// discussed in the previous chapter. The main difference +// is the following: rather than performing DNS lookups +// sequentially, we call a function that runs through the +// list of resolvers and run them in parallel. +// +// Again, we are going to use low parallelism for the same +// rationale mentioned in chapter09. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter10/main.go`.) +// +// ## main.go +// +// The beginning of the program is pretty much the same. +// +// ```Go package main import ( @@ -17,6 +38,12 @@ type measurement struct { Endpoints []*measurex.HTTPEndpointMeasurement } +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + func main() { URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") @@ -26,7 +53,17 @@ func main() { parsed, err := url.Parse(*URL) runtimex.PanicOnError(err, "url.Parse failed") mx := measurex.NewMeasurerWithDefaultSettings() - mx.Resolvers = []*measurex.ResolverInfo{{ + m := &measurement{} + // ``` + // + // The bulk of the difference is here. We create + // a list of DNS resolvers. For each of them, we specify + // the type and the endpoint address. (There is no + // endpoint address for the system resolver, therefore + // we leave its address empty.) + // + // ```Go + resolvers := []*measurex.ResolverInfo{{ Network: measurex.ResolverUDP, Address: "8.8.8.8:53", }, { @@ -38,11 +75,30 @@ func main() { }, { Network: measurex.ResolverUDP, Address: "1.0.0.1:53", + }, { + Network: measurex.ResolverSystem, + Address: "", }} - m := &measurement{} - for dns := range mx.LookupURLHostParallel(ctx, parsed) { + // ``` + // + // Then we call `LookupURLHostParallel`. This function runs + // the queries that make sense given the input URL using a + // pool of (currently three) background goroutines. + // + // When I say "queries that make sense", I mostly mean + // that we only query for HTTPSSvc when the input URL + // scheme is "https". Otherwise, if it's just "http", it + // does not make sense to send this query. + // + // ```Go + for dns := range mx.LookupURLHostParallel(ctx, parsed, resolvers...) { m.DNS = append(m.DNS, dns) } + // ``` + // + // The rest of the program is exactly like in chapter09. + // + // ```Go headers := measurex.NewHTTPRequestHeaderForMeasuring() httpEndpoints, err := measurex.AllHTTPEndpointsForURL(parsed, headers, m.DNS...) runtimex.PanicOnError(err, "cannot get all the HTTP endpoints") @@ -50,7 +106,24 @@ func main() { for epnt := range mx.HTTPEndpointGetParallel(ctx, cookies, httpEndpoints...) { m.Endpoints = append(m.Endpoints, epnt) } - data, err := json.Marshal(m) - runtimex.PanicOnError(err, "json.Marshal failed") - fmt.Printf("%s\n", string(data)) + print(m) } + +// ``` +// +// ## Running the example program +// +// Let us perform a vanilla run first: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter10 +// ``` +// +// Take a look at the JSON output. Can you spot that +// DNS queries are run in parallel? +// +// ## Conclusion +// +// We have seen how to run parallel DNS queries. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter11/README.md b/internal/tutorial/measurex/chapter11/README.md index e69de29bb2..482e7dd222 100644 --- a/internal/tutorial/measurex/chapter11/README.md +++ b/internal/tutorial/measurex/chapter11/README.md @@ -0,0 +1,126 @@ + +# Chapter XI: Measuring a URL + +This program shows how to measure an HTTP/HTTPS URL. We +are going to call an API whose implementation is +basically the same code we have seen in the previous +chapter, to obtain an URL measurement in a more compact +way. (As an historical note, the API we are going to +call has indeed been written as a refactoring of +the code we introduced in the previous chapter.) + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter11/main.go`.) + +## main.go + +The beginning of the program is much simpler. We have removed +out custom measurement type. We are now going to use the +`URLMeasurement` type (`go doc ./internal/measurex.URLMeasurement`), +which as the same fields of `measurement` in chapter10 _plus_ +some extra fields that we'll examine in a later chapter. + +```Go +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +func main() { + URL := flag.String("url", "https://www.google.com/", "URL to fetch") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() +``` + +We create a measurer, cookies, and headers like we +saw in the previous chapter. + +```Go + mx := measurex.NewMeasurerWithDefaultSettings() + cookies := measurex.NewCookieJar() + headers := measurex.NewHTTPRequestHeaderForMeasuring() +``` + +Then we call `MeasureURL`. This function's implementation +is in `./internal/measurex/measurer.go` and is pretty +much a refactoring of the code in chapter10. + +The arguments are: + +- the context as usual + +- the unparsed URL to measure + +- the headers we want to use + +- a jar for cookies + +```Go + m, err := mx.MeasureURL(ctx, *URL, headers, cookies) +``` +The return value is either an `URLMeasurement` +or an error. The error happens, for example, if +the input URL scheme is not "http" or "https" (which +we handled by panicking in chapter11). + +Now, rather than panicking inside `MeasureURL`, we +return the error to the caller and we `panic` +here on `main` using the `PanicOnError` function. + +```Go + runtimex.PanicOnError(err, "mx.MeasureURL failed") + print(m) +} + +``` + +## Running the example program + +Let us perform a vanilla run first: + +```bash +go run -race ./internal/tutorial/measurex/chapter11 +``` + +Take a look at the JSON output and compare it with: + +```bash +go run -race ./internal/tutorial/measurex/chapter10 -url https://www.google.com +``` + +(which is basically forcing chapter10 to run with the +the default URL we use in this chapter). + +Can you explain why we are able to measure more endpoints +in this chapter by checking the implementation of `MeasureURL` +and compare it to the code written in chapter10? + +Now run: + +```bash +go run -race ./internal/tutorial/measurex/chapter11 -url https://google.com +``` + +Do you see the opportunity there for following redirections? :^). + +## Conclusion + +We have introduced `MeasureURL`, the top-level API for +measuring a single URL. + diff --git a/internal/tutorial/measurex/chapter11/main.go b/internal/tutorial/measurex/chapter11/main.go index 162407526a..6f37a171db 100644 --- a/internal/tutorial/measurex/chapter11/main.go +++ b/internal/tutorial/measurex/chapter11/main.go @@ -1,3 +1,27 @@ +// -=-=- StartHere -=-=- +// +// # Chapter XI: Measuring a URL +// +// This program shows how to measure an HTTP/HTTPS URL. We +// are going to call an API whose implementation is +// basically the same code we have seen in the previous +// chapter, to obtain an URL measurement in a more compact +// way. (As an historical note, the API we are going to +// call has indeed been written as a refactoring of +// the code we introduced in the previous chapter.) +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter11/main.go`.) +// +// ## main.go +// +// The beginning of the program is much simpler. We have removed +// out custom measurement type. We are now going to use the +// `URLMeasurement` type (`go doc ./internal/measurex.URLMeasurement`), +// which as the same fields of `measurement` in chapter10 _plus_ +// some extra fields that we'll examine in a later chapter. +// +// ```Go package main import ( @@ -11,18 +35,94 @@ import ( "github.com/ooni/probe-cli/v3/internal/runtimex" ) +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + func main() { - URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + URL := flag.String("url", "https://www.google.com/", "URL to fetch") timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") flag.Parse() ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() + // ``` + // + // We create a measurer, cookies, and headers like we + // saw in the previous chapter. + // + // ```Go mx := measurex.NewMeasurerWithDefaultSettings() cookies := measurex.NewCookieJar() headers := measurex.NewHTTPRequestHeaderForMeasuring() + // ``` + // + // Then we call `MeasureURL`. This function's implementation + // is in `./internal/measurex/measurer.go` and is pretty + // much a refactoring of the code in chapter10. + // + // The arguments are: + // + // - the context as usual + // + // - the unparsed URL to measure + // + // - the headers we want to use + // + // - a jar for cookies + // + // ```Go m, err := mx.MeasureURL(ctx, *URL, headers, cookies) + // ``` + // The return value is either an `URLMeasurement` + // or an error. The error happens, for example, if + // the input URL scheme is not "http" or "https" (which + // we handled by panicking in chapter11). + // + // Now, rather than panicking inside `MeasureURL`, we + // return the error to the caller and we `panic` + // here on `main` using the `PanicOnError` function. + // + // ```Go runtimex.PanicOnError(err, "mx.MeasureURL failed") - data, err := json.Marshal(m) - runtimex.PanicOnError(err, "json.Marshal failed") - fmt.Printf("%s\n", string(data)) + print(m) } + +// ``` +// +// ## Running the example program +// +// Let us perform a vanilla run first: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter11 +// ``` +// +// Take a look at the JSON output and compare it with: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter10 -url https://www.google.com +// ``` +// +// (which is basically forcing chapter10 to run with the +// the default URL we use in this chapter). +// +// Can you explain why we are able to measure more endpoints +// in this chapter by checking the implementation of `MeasureURL` +// and compare it to the code written in chapter10? +// +// Now run: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter11 -url https://google.com +// ``` +// +// Do you see the opportunity there for following redirections? :^). +// +// ## Conclusion +// +// We have introduced `MeasureURL`, the top-level API for +// measuring a single URL. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter12/README.md b/internal/tutorial/measurex/chapter12/README.md index e69de29bb2..c5051ff0b4 100644 --- a/internal/tutorial/measurex/chapter12/README.md +++ b/internal/tutorial/measurex/chapter12/README.md @@ -0,0 +1,94 @@ + +# Chapter XII: Following redirections. + +This program shows how to combine the URL measurement +"step" introduced in the previous chapter with +following redirections. If we say that the previous +chapter performed a "web step", then we can say +that here we're performing multiple "web steps". + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter12/main.go`.) + +## main.go + +The beginning of the program is pretty much the +same, except that here we need to define a +`measurement` container type that will contain +the result of each "web step". + +```Go +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +type measurement struct { + URLs []*measurex.URLMeasurement +} + +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +func main() { + URL := flag.String("url", "http://facebook.com/", "URL to fetch") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + all := &measurement{} + mx := measurex.NewMeasurerWithDefaultSettings() + cookies := measurex.NewCookieJar() + headers := measurex.NewHTTPRequestHeaderForMeasuring() +``` + +Everything above this line is like in chapter11. What changes +now is that we're calling `MeasureURLAndFollowRedirections` +instead of `MeasureURL`. + +Rather than returning a single measurement, this function +returns a channel where it posts the result of measuring +the original URL along with all its redirections. Internally, +`MeasureURLAndFollowRedirections` calls `MeasureURL`. + +We accumulate the results in `URLs` and print `m`. The channel +is closed when done by `MeasureURLAndFollowRedirections`, so we leave the loop. + +```Go + for m := range mx.MeasureURLAndFollowRedirections(ctx, *URL, headers, cookies) { + all.URLs = append(all.URLs, m) + } + print(all) +} + +``` + +## Running the example program + +Let us perform a vanilla run first: + +```bash +go run -race ./internal/tutorial/measurex/chapter12 +``` + +Take a look at the JSON. You should see several redirects +and that we measure each endpoint of each redirect, including +QUIC endpoints that we discover on the way. + +## Conclusion + +We have introduced `MeasureURLAndFollowRedirect`, the +top-level API for fully measuring a URL and all the URLs +that derive from such an URL via redirection. + diff --git a/internal/tutorial/measurex/chapter12/main.go b/internal/tutorial/measurex/chapter12/main.go index a3cd8a2662..42a3638fe9 100644 --- a/internal/tutorial/measurex/chapter12/main.go +++ b/internal/tutorial/measurex/chapter12/main.go @@ -1,3 +1,24 @@ +// -=-=- StartHere -=-=- +// +// # Chapter XII: Following redirections. +// +// This program shows how to combine the URL measurement +// "step" introduced in the previous chapter with +// following redirections. If we say that the previous +// chapter performed a "web step", then we can say +// that here we're performing multiple "web steps". +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter12/main.go`.) +// +// ## main.go +// +// The beginning of the program is pretty much the +// same, except that here we need to define a +// `measurement` container type that will contain +// the result of each "web step". +// +// ```Go package main import ( @@ -15,20 +36,61 @@ type measurement struct { URLs []*measurex.URLMeasurement } +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + func main() { - URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + URL := flag.String("url", "http://facebook.com/", "URL to fetch") timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") flag.Parse() ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() + all := &measurement{} mx := measurex.NewMeasurerWithDefaultSettings() cookies := measurex.NewCookieJar() - all := &measurement{} headers := measurex.NewHTTPRequestHeaderForMeasuring() - for m := range mx.MeasureHTTPURLAndFollowRedirections(ctx, *URL, headers, cookies) { + // ``` + // + // Everything above this line is like in chapter11. What changes + // now is that we're calling `MeasureURLAndFollowRedirections` + // instead of `MeasureURL`. + // + // Rather than returning a single measurement, this function + // returns a channel where it posts the result of measuring + // the original URL along with all its redirections. Internally, + // `MeasureURLAndFollowRedirections` calls `MeasureURL`. + // + // We accumulate the results in `URLs` and print `m`. The channel + // is closed when done by `MeasureURLAndFollowRedirections`, so we leave the loop. + // + // ```Go + for m := range mx.MeasureURLAndFollowRedirections(ctx, *URL, headers, cookies) { all.URLs = append(all.URLs, m) } - data, err := json.Marshal(all) - runtimex.PanicOnError(err, "json.Marshal failed") - fmt.Printf("%s\n", string(data)) + print(all) } + +// ``` +// +// ## Running the example program +// +// Let us perform a vanilla run first: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter12 +// ``` +// +// Take a look at the JSON. You should see several redirects +// and that we measure each endpoint of each redirect, including +// QUIC endpoints that we discover on the way. +// +// ## Conclusion +// +// We have introduced `MeasureURLAndFollowRedirect`, the +// top-level API for fully measuring a URL and all the URLs +// that derive from such an URL via redirection. +// +// -=-=- StopHere -=-=- diff --git a/internal/tutorial/measurex/chapter13/README.md b/internal/tutorial/measurex/chapter13/README.md index e69de29bb2..d31604e336 100644 --- a/internal/tutorial/measurex/chapter13/README.md +++ b/internal/tutorial/measurex/chapter13/README.md @@ -0,0 +1,97 @@ + +# Chapter XIII: Rewriting Web Connectivity + +This chapter contains an exercise. We are going to +use the `measurex` API to rewrite part of the +Web Connectivity network experiment. +(This is probably the right place to prod you +to go to the [ooni/spec](https://github.com/ooni/spec) +repository, locate the ts-017-web-connectivity.md +spec, and read it.) + +Read the spec? Good, so +what we are more precisely going to do here +is implement the network measurement part of +Web Connectivity where we: + +1. enumerate all the IP addresses of the target +URL using the system resolver; + +2. build endpoints with such IPs with a suitable +port, thus obtaining a list of HTTP endpoints; + +3. TCP connect each of the endpoints and save the +results into a measurement object compatible +with Web Connectivity's data format; + +4. TLS handshake each endpoint (only if this +makes sense, of course); + +5. HTTP GET the URL and follow redirects until +we reach a webpage, fetch the body, and store it +for later analysis (which we'll not implement +as part of this exercise). + +Let us now provide extra context that should +help you figure out how to solve this exercise. + +## Regarding points 3-4 + +You already know all the primitives. + +## Regarding point 5 + +Historically this point has always been +performed by a separate HTTP client. This +means that any implementation: + +- will not include any TCP or TLS event +generated during point 5 in the measurement; + +- most likely will resolve the URL's domain +again (even though the probe-cli implementation +uses a fake Resolver to avoid that); + +- tries every available IP address and stops +at the first one to which it can connect to (which +is what a naive HTTP client does, whereas a more +advanced one likely tries a couple of addrs in +parallel, especially when both IPv4 and IPv6 +are supported - this is also known as happy eyeballs). + +In terms of `measurex`, the best API to do what +you're required to do in point 5 is probably +`NewTracingHTTPTransportWithDefaultSettings`, which +allows you to trace only the HTTP round trip and +ignores any other event. + +Once you have such a transport, the best `Measurer` +API for the task is probably `HTTPClientGET`. + +## Other remarks + +You also need to learn about how to measure +events at low level, which entails creating an +instance of `MeasurementDB`, passing it to +the relevant networking code, and then calling +its `AsMeasurement` method to get back a +measurement. (You can probably get an idea +of how this is done in general by checking the +implementation of `Measurer.TCPConnect`.) + +Hopefully, this should be enough information +to help you tackle this task. As you see +below, the main function is there empty waiting +for your implementation. We will provide our +own solution to this problem in the next chapter. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter13/main.go`.) + +## The main.go file + +```Go +package main + +func main() { +} diff --git a/internal/tutorial/measurex/chapter13/main.go b/internal/tutorial/measurex/chapter13/main.go index 443e807cb3..1aff64c7e1 100644 --- a/internal/tutorial/measurex/chapter13/main.go +++ b/internal/tutorial/measurex/chapter13/main.go @@ -1,72 +1,98 @@ +// -=-=- StartHere -=-=- +// +// # Chapter XIII: Rewriting Web Connectivity +// +// This chapter contains an exercise. We are going to +// use the `measurex` API to rewrite part of the +// Web Connectivity network experiment. +// (This is probably the right place to prod you +// to go to the [ooni/spec](https://github.com/ooni/spec) +// repository, locate the ts-017-web-connectivity.md +// spec, and read it.) +// +// Read the spec? Good, so +// what we are more precisely going to do here +// is implement the network measurement part of +// Web Connectivity where we: +// +// 1. enumerate all the IP addresses of the target +// URL using the system resolver; +// +// 2. build endpoints with such IPs with a suitable +// port, thus obtaining a list of HTTP endpoints; +// +// 3. TCP connect each of the endpoints and save the +// results into a measurement object compatible +// with Web Connectivity's data format; +// +// 4. TLS handshake each endpoint (only if this +// makes sense, of course); +// +// 5. HTTP GET the URL and follow redirects until +// we reach a webpage, fetch the body, and store it +// for later analysis (which we'll not implement +// as part of this exercise). +// +// Let us now provide extra context that should +// help you figure out how to solve this exercise. +// +// ## Regarding points 3-4 +// +// You already know all the primitives. +// +// ## Regarding point 5 +// +// Historically this point has always been +// performed by a separate HTTP client. This +// means that any implementation: +// +// - will not include any TCP or TLS event +// generated during point 5 in the measurement; +// +// - most likely will resolve the URL's domain +// again (even though the probe-cli implementation +// uses a fake Resolver to avoid that); +// +// - tries every available IP address and stops +// at the first one to which it can connect to (which +// is what a naive HTTP client does, whereas a more +// advanced one likely tries a couple of addrs in +// parallel, especially when both IPv4 and IPv6 +// are supported - this is also known as happy eyeballs). +// +// In terms of `measurex`, the best API to do what +// you're required to do in point 5 is probably +// `NewTracingHTTPTransportWithDefaultSettings`, which +// allows you to trace only the HTTP round trip and +// ignores any other event. +// +// Once you have such a transport, the best `Measurer` +// API for the task is probably `HTTPClientGET`. +// +// ## Other remarks +// +// You also need to learn about how to measure +// events at low level, which entails creating an +// instance of `MeasurementDB`, passing it to +// the relevant networking code, and then calling +// its `AsMeasurement` method to get back a +// measurement. (You can probably get an idea +// of how this is done in general by checking the +// implementation of `Measurer.TCPConnect`.) +// +// Hopefully, this should be enough information +// to help you tackle this task. As you see +// below, the main function is there empty waiting +// for your implementation. We will provide our +// own solution to this problem in the next chapter. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter13/main.go`.) +// +// ## The main.go file +// +// ```Go package main -import ( - "context" - "encoding/json" - "flag" - "fmt" - "net/http" - "time" - - "github.com/apex/log" - "github.com/ooni/probe-cli/v3/internal/measurex" - "github.com/ooni/probe-cli/v3/internal/runtimex" -) - -type measurement struct { - Queries []*measurex.DNSLookupEvent `json:"queries"` - TCPConnect []*measurex.NetworkEvent `json:"tcp_connect"` - TLSHandshakes []*measurex.TLSHandshakeEvent `json:"tls_handshakes"` - Requests []*measurex.HTTPRoundTripEvent `json:"requests"` -} - -func (m *measurement) addQueries(dm *measurex.DNSMeasurement) { - m.Queries = append(m.Queries, dm.LookupHost...) -} - -func (m *measurement) addEndpointCheck(em *measurex.EndpointMeasurement) { - for _, ev := range em.Connect { - switch ev.Network { - case "tcp": - m.TCPConnect = append(m.TCPConnect, ev) - } - } - m.TLSHandshakes = append(m.TLSHandshakes, em.TLSHandshake...) -} - -func (m *measurement) addHTTPCheck(hem *measurex.Measurement) { - m.Requests = append(m.Requests, hem.HTTPRoundTrip...) -} - func main() { - URL := flag.String("url", "https://www.google.com/", "URL to fetch") - timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") - flag.Parse() - ctx, cancel := context.WithTimeout(context.Background(), *timeout) - defer cancel() - mx := measurex.NewMeasurerWithDefaultSettings() - cookies := measurex.NewCookieJar() - db := &measurex.MeasurementDB{} - txp := mx.NewTracingHTTPTransportWithDefaultSettings(log.Log, db) - txp.MaxBodySnapshotSize = 1 << 14 - client := &http.Client{Jar: cookies, Transport: txp} - req, err := measurex.NewHTTPGetRequest(ctx, *URL) - runtimex.PanicOnError(err, "NewHTTPGetRequest failed") - resp, err := client.Do(req) - if err == nil { - resp.Body.Close() // be tidy - } - httpEndpoints, err := measurex.UnmeasuredHTTPEndpoints( - db, *URL, measurex.NewHTTPRequestHeaderForMeasuring()) - runtimex.PanicOnError(err, "cannot determine unmeasured HTTP endpoints") - for _, epnt := range httpEndpoints { - resp, err = mx.HTTPEndpointGetWithDB(ctx, epnt, db, cookies) - if err == nil { - resp.Body.Close() // be tidy - } - } - m := db.AsMeasurement() - data, err := json.Marshal(m) - runtimex.PanicOnError(err, "json.Marshal failed") - fmt.Printf("%s\n", string(data)) } diff --git a/internal/tutorial/measurex/chapter14/README.md b/internal/tutorial/measurex/chapter14/README.md new file mode 100644 index 0000000000..348a4d4f37 --- /dev/null +++ b/internal/tutorial/measurex/chapter14/README.md @@ -0,0 +1,318 @@ + +# Chapter XIV: A possible rewrite of Web Connectivity + +In this chapter we try to solve the exercise laid out in +the previous chapter, using `measurex` primitives. + +(This file is auto-generated. Do not edit it directly! To apply +changes you need to modify `./internal/tutorial/measurex/chapter14/main.go`.) + +## main.go + +The beginning of the file is always pretty much the same. + +```Go +package main + +import ( + "context" + "crypto/tls" + "encoding/json" + "flag" + "fmt" + "net/http" + "net/url" + "time" + + "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +``` + +## measurement type + +We define a measurement type with the fields +that a Web Connectivity measurement should have. + +```Go + +type measurement struct { + Queries []*measurex.DNSLookupEvent `json:"queries"` + TCPConnect []*measurex.NetworkEvent `json:"tcp_connect"` + TLSHandshakes []*measurex.TLSHandshakeEvent `json:"tls_handshakes"` + Requests []*measurex.HTTPRoundTripEvent `json:"requests"` +} + +``` + +## WebConnectivity implementation + +We define a function that takes in input a context and a URL to +measure and returns a measurement or an error. + +We will only error out in case the input does not allow us to +proceed (i.e., invalid input URL). + +```Go + +func webConnectivity(ctx context.Context, URL string) (*measurement, error) { +``` + +We start by parsing the input URL. If we cannot parse it, of +course this is an hard error and we cannot continue. + +```Go + parsedURL, err := url.Parse(URL) + if err != nil { + return nil, err + } + +``` + +We create an empty measurement and a measurer with +default settings like we did in the previous chapters. + +```Go + m := &measurement{} + mx := measurex.NewMeasurerWithDefaultSettings() + +``` + +Now it's time to start measuring. We will address all +the points laid out in the previous chapter. + +### 1. Enumerating IP addrs + +Let us enumerate all the IP addresses for +the input URL's domain using the system resolver. + +```Go + dns := mx.LookupHostSystem(ctx, parsedURL.Hostname()) + m.Queries = append(m.Queries, dns.LookupHost...) + +``` + +This is code we have already seen in previous chapter. + + +### 2. Building a list of endpoints + +```Go + epnts, err := measurex.AllHTTPEndpointsForURL(parsedURL, http.Header{}, dns) + if err != nil { + return nil, err + } + +``` + +This is also code we have seen in previous chapters. The only +difference is that we supply empty headers since we're not going +to actually use the headers inside the endpoints. + +### 3 and 4. Measure each endpoint + +We will loop through the endpoints in the previous point +and issue the correct TCP or TLS primitive depending on +whether the input URL is HTTP or HTTPS. + +```Go + for _, epnt := range epnts { + switch parsedURL.Scheme { + case "http": + tcp := mx.TCPConnect(ctx, epnt.Address) + m.TCPConnect = append(m.TCPConnect, tcp.Connect...) + case "https": + config := &tls.Config{ + ServerName: parsedURL.Hostname(), + NextProtos: []string{"h2", "http/1.1"}, + RootCAs: netxlite.NewDefaultCertPool(), + } + tls := mx.TLSConnectAndHandshake(ctx, epnt.Address, config) + m.TCPConnect = append(m.TCPConnect, tls.Connect...) + m.TLSHandshakes = append(m.TLSHandshakes, tls.TLSHandshake...) + } + } + +``` + +At this point we've addressed points 1-4. So let's +now focus on the last point: + +### 5. HTTP measurement + +We need to manually build a `MeasurementDB`. This is a +"database" where networking code will store events. + +```Go + + db := &measurex.MeasurementDB{} + +``` + +Following the hint from the previous chapter we use the +`NewTracingHTTPTransportWithDefaultSettings` factory +to create an `http.Transport`-like object that will trace +HTTP round trip events writing them into `db`. + + +```Go + + txp := measurex.NewTracingHTTPTransportWithDefaultSettings(mx.Begin, mx.Logger, db) + +``` + +We now build an `http.Client` using the transport +we've just created and a cookie jar (which we +use because otherwise some redirects will lead +to a redirect loop, as mentioned in previous chapters). + +```Go + + clnt := &http.Client{ + Transport: txp, + Jar: measurex.NewCookieJar(), + } + +``` + +Now we use a method of the measurer that allows us to +perform an HTTP GET with an existing HTTP client +and a URL. This method will set a timeout and perform +the round trip. Reading a snapshot of the response +body is not implemented by this function but rather +is a property of the "tracing" HTTP transport we +created above (this type of transport is the one we +have been internally using in all the examples +presented so far.) + +```Go + + resp, _ := mx.HTTPClientGET(ctx, clnt, parsedURL) + +``` + +To be tidy, we also close the response body in case +we have a response. We don't really need to read +the body here. As mentioned previously, we're already +using an HTTP transport reading a body snapshot. + +```Go + + if resp != nil { + resp.Body.Close() // tidy + } + +``` + +Finally, we append the round trips we performed into +the right field and return the measurement. + +To this end, we're using the `db.AsMeasurement` method that +takes the current set of events into `db` and assembles +them into the `Measurement` struct we've been using in all +the chapters we have seen so far. + +```Go + + m.Requests = append(m.Requests, db.AsMeasurement().HTTPRoundTrip...) + return m, nil +} + +``` + +The rest of the program is pretty straightforward. + +```Go + +func main() { + URL := flag.String("url", "https://www.google.com/", "URL to fetch") + timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") + flag.Parse() + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + m, err := webConnectivity(ctx, *URL) + runtimex.PanicOnError(err, "invalid arguments to webConnectivity (wrong URL?)") + print(m) +} + +``` + +## Running the example program + +Let us perform a vanilla run first: + +```bash +go run -race ./internal/tutorial/measurex/chapter14 +``` + +Take a look at the JSON. + +Now try running the program with `http://gmail.com` as +input. Take note of the redirect chain. See how the +domain changes during the redirect. Take note of the +fact that we are not measuring any TLS handshake. See +how we're not trying QUIC endpoints. These are, in +fact, some of the limitations of Web Connectivity that +we were trying to address when we wrote `measurex`. + +Also, build the miniooni research client: + +``` +go build -v ./internal/cmd/miniooni +``` + +Run Web Connectivity with: + +``` +./miniooni -ni http://gmail.com web_connectivity +``` + +This writes the report in a file named `report.jsonl`. + +Check the content of the file and match it with the +output of this chapter. Are there other notable +differences between the two outputs? + +### Bonus question + +The solution we presented is true to the original +spirit of Web Connectivity, where we first perform +separate DNS, TCP/TLS steps, and then we also +perform a separate HTTP step. Is there in `measurex` +an API allowing you to invert the order of the +operations, that is: + +1. build a full-fledged HTTP client where we can +trace _any_ operation; + +2. use such client to measure the URL; + +3. figure out what TCP endpoints we did not +test for TCP/TLS during this process and run +TCP/TLS testing only for them? + +If such an API exist, can you write a simple +main.go client that implements points 1-3 above? + +## Conclusion + +We have presented the solution to the exercise +proposed in the previous chapter, i.e., how +to rewrite Web Connectivity using `measurex` API. + +You have now been exposed to some complexity and +APIs to perform OONI measurements. So you should now +be read to help us write new and maitain existing +network experiments. + +If you have further questions, please [contact us]( +https://ooni.org/about/). + diff --git a/internal/tutorial/measurex/chapter14/main.go b/internal/tutorial/measurex/chapter14/main.go index d3101169b3..0c073019a1 100644 --- a/internal/tutorial/measurex/chapter14/main.go +++ b/internal/tutorial/measurex/chapter14/main.go @@ -1,35 +1,320 @@ +// -=-=- StartHere -=-=- +// +// # Chapter XIV: A possible rewrite of Web Connectivity +// +// In this chapter we try to solve the exercise laid out in +// the previous chapter, using `measurex` primitives. +// +// (This file is auto-generated. Do not edit it directly! To apply +// changes you need to modify `./internal/tutorial/measurex/chapter14/main.go`.) +// +// ## main.go +// +// The beginning of the file is always pretty much the same. +// +// ```Go package main import ( "context" + "crypto/tls" "encoding/json" "flag" "fmt" + "net/http" + "net/url" "time" "github.com/ooni/probe-cli/v3/internal/measurex" + "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/runtimex" ) +func print(v interface{}) { + data, err := json.Marshal(v) + runtimex.PanicOnError(err, "json.Marshal failed") + fmt.Printf("%s\n", string(data)) +} + +// ``` +// +// ## measurement type +// +// We define a measurement type with the fields +// that a Web Connectivity measurement should have. +// +// ```Go + type measurement struct { - URLs []*measurex.ArchivalURLMeasurement `json:"urls"` + Queries []*measurex.DNSLookupEvent `json:"queries"` + TCPConnect []*measurex.NetworkEvent `json:"tcp_connect"` + TLSHandshakes []*measurex.TLSHandshakeEvent `json:"tls_handshakes"` + Requests []*measurex.HTTPRoundTripEvent `json:"requests"` +} + +// ``` +// +// ## WebConnectivity implementation +// +// We define a function that takes in input a context and a URL to +// measure and returns a measurement or an error. +// +// We will only error out in case the input does not allow us to +// proceed (i.e., invalid input URL). +// +// ```Go + +func webConnectivity(ctx context.Context, URL string) (*measurement, error) { + // ``` + // + // We start by parsing the input URL. If we cannot parse it, of + // course this is an hard error and we cannot continue. + // + // ```Go + parsedURL, err := url.Parse(URL) + if err != nil { + return nil, err + } + + // ``` + // + // We create an empty measurement and a measurer with + // default settings like we did in the previous chapters. + // + // ```Go + m := &measurement{} + mx := measurex.NewMeasurerWithDefaultSettings() + + // ``` + // + // Now it's time to start measuring. We will address all + // the points laid out in the previous chapter. + // + // ### 1. Enumerating IP addrs + // + // Let us enumerate all the IP addresses for + // the input URL's domain using the system resolver. + // + // ```Go + dns := mx.LookupHostSystem(ctx, parsedURL.Hostname()) + m.Queries = append(m.Queries, dns.LookupHost...) + + // ``` + // + // This is code we have already seen in previous chapter. + // + // + // ### 2. Building a list of endpoints + // + // ```Go + epnts, err := measurex.AllHTTPEndpointsForURL(parsedURL, http.Header{}, dns) + if err != nil { + return nil, err + } + + // ``` + // + // This is also code we have seen in previous chapters. The only + // difference is that we supply empty headers since we're not going + // to actually use the headers inside the endpoints. + // + // ### 3 and 4. Measure each endpoint + // + // We will loop through the endpoints in the previous point + // and issue the correct TCP or TLS primitive depending on + // whether the input URL is HTTP or HTTPS. + // + // ```Go + for _, epnt := range epnts { + switch parsedURL.Scheme { + case "http": + tcp := mx.TCPConnect(ctx, epnt.Address) + m.TCPConnect = append(m.TCPConnect, tcp.Connect...) + case "https": + config := &tls.Config{ + ServerName: parsedURL.Hostname(), + NextProtos: []string{"h2", "http/1.1"}, + RootCAs: netxlite.NewDefaultCertPool(), + } + tls := mx.TLSConnectAndHandshake(ctx, epnt.Address, config) + m.TCPConnect = append(m.TCPConnect, tls.Connect...) + m.TLSHandshakes = append(m.TLSHandshakes, tls.TLSHandshake...) + } + } + + // ``` + // + // At this point we've addressed points 1-4. So let's + // now focus on the last point: + // + // ### 5. HTTP measurement + // + // We need to manually build a `MeasurementDB`. This is a + // "database" where networking code will store events. + // + // ```Go + + db := &measurex.MeasurementDB{} + + // ``` + // + // Following the hint from the previous chapter we use the + // `NewTracingHTTPTransportWithDefaultSettings` factory + // to create an `http.Transport`-like object that will trace + // HTTP round trip events writing them into `db`. + // + // + // ```Go + + txp := measurex.NewTracingHTTPTransportWithDefaultSettings(mx.Begin, mx.Logger, db) + + // ``` + // + // We now build an `http.Client` using the transport + // we've just created and a cookie jar (which we + // use because otherwise some redirects will lead + // to a redirect loop, as mentioned in previous chapters). + // + // ```Go + + clnt := &http.Client{ + Transport: txp, + Jar: measurex.NewCookieJar(), + } + + // ``` + // + // Now we use a method of the measurer that allows us to + // perform an HTTP GET with an existing HTTP client + // and a URL. This method will set a timeout and perform + // the round trip. Reading a snapshot of the response + // body is not implemented by this function but rather + // is a property of the "tracing" HTTP transport we + // created above (this type of transport is the one we + // have been internally using in all the examples + // presented so far.) + // + // ```Go + + resp, _ := mx.HTTPClientGET(ctx, clnt, parsedURL) + + // ``` + // + // To be tidy, we also close the response body in case + // we have a response. We don't really need to read + // the body here. As mentioned previously, we're already + // using an HTTP transport reading a body snapshot. + // + // ```Go + + if resp != nil { + resp.Body.Close() // tidy + } + + // ``` + // + // Finally, we append the round trips we performed into + // the right field and return the measurement. + // + // To this end, we're using the `db.AsMeasurement` method that + // takes the current set of events into `db` and assembles + // them into the `Measurement` struct we've been using in all + // the chapters we have seen so far. + // + // ```Go + + m.Requests = append(m.Requests, db.AsMeasurement().HTTPRoundTrip...) + return m, nil } +// ``` +// +// The rest of the program is pretty straightforward. +// +// ```Go + func main() { - URL := flag.String("url", "https://blog.cloudflare.com/", "URL to fetch") + URL := flag.String("url", "https://www.google.com/", "URL to fetch") timeout := flag.Duration("timeout", 60*time.Second, "timeout to use") flag.Parse() ctx, cancel := context.WithTimeout(context.Background(), *timeout) defer cancel() - mx := measurex.NewMeasurerWithDefaultSettings() - mx.RegisterWCTH("https://wcth.ooni.io/") - mx.RegisterUDPResolvers("8.8.8.8:53", "8.8.4.4:53", "1.1.1.1:53", "1.0.0.1:53") - cookies := measurex.NewCookieJar() - all := &measurement{} - for m := range mx.MeasureHTTPURLAndFollowRedirections(ctx, *URL, cookies) { - all.URLs = append(all.URLs, measurex.NewArchivalURLMeasurement(m)) - } - data, err := json.Marshal(all) - runtimex.PanicOnError(err, "json.Marshal failed") - fmt.Printf("%s\n", string(data)) + m, err := webConnectivity(ctx, *URL) + runtimex.PanicOnError(err, "invalid arguments to webConnectivity (wrong URL?)") + print(m) } + +// ``` +// +// ## Running the example program +// +// Let us perform a vanilla run first: +// +// ```bash +// go run -race ./internal/tutorial/measurex/chapter14 +// ``` +// +// Take a look at the JSON. +// +// Now try running the program with `http://gmail.com` as +// input. Take note of the redirect chain. See how the +// domain changes during the redirect. Take note of the +// fact that we are not measuring any TLS handshake. See +// how we're not trying QUIC endpoints. These are, in +// fact, some of the limitations of Web Connectivity that +// we were trying to address when we wrote `measurex`. +// +// Also, build the miniooni research client: +// +// ``` +// go build -v ./internal/cmd/miniooni +// ``` +// +// Run Web Connectivity with: +// +// ``` +// ./miniooni -ni http://gmail.com web_connectivity +// ``` +// +// This writes the report in a file named `report.jsonl`. +// +// Check the content of the file and match it with the +// output of this chapter. Are there other notable +// differences between the two outputs? +// +// ### Bonus question +// +// The solution we presented is true to the original +// spirit of Web Connectivity, where we first perform +// separate DNS, TCP/TLS steps, and then we also +// perform a separate HTTP step. Is there in `measurex` +// an API allowing you to invert the order of the +// operations, that is: +// +// 1. build a full-fledged HTTP client where we can +// trace _any_ operation; +// +// 2. use such client to measure the URL; +// +// 3. figure out what TCP endpoints we did not +// test for TCP/TLS during this process and run +// TCP/TLS testing only for them? +// +// If such an API exist, can you write a simple +// main.go client that implements points 1-3 above? +// +// ## Conclusion +// +// We have presented the solution to the exercise +// proposed in the previous chapter, i.e., how +// to rewrite Web Connectivity using `measurex` API. +// +// You have now been exposed to some complexity and +// APIs to perform OONI measurements. So you should now +// be read to help us write new and maitain existing +// network experiments. +// +// If you have further questions, please [contact us]( +// https://ooni.org/about/). +// +// -=-=- StopHere -=-=- From eccaa47d5d69bd93237a32c98494a57b0b295083 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 24 Sep 2021 15:52:12 +0200 Subject: [PATCH 42/53] Minor edits to chapter01 --- internal/tutorial/measurex/chapter01/main.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/tutorial/measurex/chapter01/main.go b/internal/tutorial/measurex/chapter01/main.go index 6184c11161..1c99f9c089 100644 --- a/internal/tutorial/measurex/chapter01/main.go +++ b/internal/tutorial/measurex/chapter01/main.go @@ -11,9 +11,9 @@ // // ## The system resolver // -// We define "system resolver" the DNS resolver implemented by the C +// We define "system resolver" as the DNS resolver implemented by the C // library. On Unix, the most popular interface to such a resolver is -// `getaddrinfo(3)` C library function. +// the `getaddrinfo(3)` C library function. // // Most OONI experiments (also known as nettests) use the system // resolver to map domain names to IP addresses. The advantage of @@ -21,8 +21,8 @@ // it should _generally_ work. Also, it is the resolver that the // user of the system will use every day, therefore its results // should be representative (even though the rise of DNS over -// HTTPS embedded in browser may make this statement less solid -// than it appeared ten years ago). +// HTTPS embedded in browsers may make this statement less solid +// than it were ten years ago). // // The disadvantage of the system resolver is that we do not // know how it is configured. Say the user has configured a @@ -75,7 +75,7 @@ func main() { // ``` // // We create a context and we attach a timeout to it. (This is a pretty - // standard way to configure a timeout in Go.) + // standard way of configuring a timeout in Go.) // // ```Go ctx, cancel := context.WithTimeout(context.Background(), *timeout) @@ -120,10 +120,10 @@ func main() { // list of IP addresses or an error. Our `LookupHostSystem` method, // instead, returns a `*measurex.DNSMeasurement` type. // - // This is probably a good moment to remind you about Go's + // This is probably a good moment to remind you of Go's // built in help system. We could include a definition of the // `DNSMeasurement` structure, but since this definition is - // just a comment in the main.go file, it may age badly. + // just a comment in the main.go file, it might age badly. // // Instead, if you run // @@ -139,7 +139,7 @@ func main() { // go doc ./internal/measurex.Measurement // ``` // - // we see a container of events + // we can see a container of events // classified by event type. In our case, because we're // doing a `LookupHost`, we should have at least one entry // inside of the `Measurement.LookupHost` field. @@ -244,7 +244,7 @@ func main() { // // The most important fields are: // -// - _engine_, indidcating that we are using the "system" resolver; +// - _engine_, indicating that we are using the "system" resolver; // // - _hostname_, meaning that we wanted to resolve the "example.com" domain; // @@ -362,7 +362,7 @@ func main() { // // ## Conclusions // -// This is it. We have seen how to measure the system resolver and we have +// This is it. We have seen how to measure with the system resolver and we have // also seen which easy-to-provoke errors we can get. // // -=-=- StopHere -=-=- From f83c5a8e2226f3360cb232edfa4e406576973d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 24 Sep 2021 16:02:00 +0200 Subject: [PATCH 43/53] Minor edits to chapter02 --- internal/tutorial/measurex/chapter02/main.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/tutorial/measurex/chapter02/main.go b/internal/tutorial/measurex/chapter02/main.go index fa035ae808..0d19971447 100644 --- a/internal/tutorial/measurex/chapter02/main.go +++ b/internal/tutorial/measurex/chapter02/main.go @@ -45,7 +45,7 @@ func main() { defer cancel() // ``` // - // ### Creaging a Measurer + // ### Creating a Measurer // // We create a `Measurer` like we did in the previous chapter. // @@ -53,10 +53,10 @@ func main() { mx := measurex.NewMeasurerWithDefaultSettings() // ``` // - // ### Establishing a TCP connection. + // ### Establishing a TCP connection // // We then call `TCPConnect`, which establishes a connection - // and returns the corresponding measurent. + // and returns the corresponding measurement. // // The arguments are the context (for timeouts), and the address // of the endpoint to which we want to connect. (Here and in @@ -119,7 +119,7 @@ func main() { // // - the destination endpoint address is "8.8.4.4:443"; // -// - connect terminated ~0.027 seconds into the program life; +// - connect terminated ~0.027 seconds into the program's life; // // - the operation succeeded (`failure` is `nil`). // From cb30de8b678b04216306ecbd3a63e6f280e7b636 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 27 Sep 2021 11:34:41 +0200 Subject: [PATCH 44/53] netxlite: start documenting new factories --- internal/netxlite/dialer.go | 37 ++++++++--------------------------- internal/netxlite/http.go | 23 ++++++++++++---------- internal/netxlite/resolver.go | 17 ++++++++-------- 3 files changed, 30 insertions(+), 47 deletions(-) diff --git a/internal/netxlite/dialer.go b/internal/netxlite/dialer.go index 5cdc37cfc0..fe18a11a1a 100644 --- a/internal/netxlite/dialer.go +++ b/internal/netxlite/dialer.go @@ -19,8 +19,14 @@ type Dialer interface { CloseIdleConnections() } -// NewDialerWithResolver creates a new Dialer. The returned Dialer -// has the following properties: +// NewDialerWithResolver is a convenience factory that calls +// WrapDialer for a stdlib dialer type. +func NewDialerWithResolver(logger Logger, resolver Resolver) Dialer { + return WrapDialer(logger, resolver, &dialerSystem{}) +} + +// WrapDialer creates a new Dialer that wraps the given +// Dialer. The returned Dialer has the following properties: // // 1. logs events using the given logger; // @@ -45,10 +51,6 @@ type Dialer interface { // 6. if a dialer wraps a resolver, the dialer will forward // the CloseIdleConnection call to its resolver (which is // instrumental to manage a DoH resolver connections properly). -func NewDialerWithResolver(logger Logger, resolver Resolver) Dialer { - return WrapDialer(logger, resolver, &dialerSystem{}) -} - func WrapDialer(logger Logger, resolver Resolver, dialer Dialer) Dialer { return &dialerLogger{ Dialer: &dialerResolver{ @@ -71,29 +73,6 @@ func NewDialerWithoutResolver(logger Logger) Dialer { return NewDialerWithResolver(logger, &nullResolver{}) } -type Connector = Dialer - -func NewDialerWithConnector( - logger Logger, resolver Resolver, connector Connector) Dialer { - return &dialerLogger{ - Dialer: &dialerResolver{ - Dialer: connector, - Resolver: resolver, - }, - Logger: logger, - } -} - -func NewConnector(logger Logger) Connector { - return &dialerLogger{ - Dialer: &dialerErrWrapper{ - Dialer: &dialerSystem{}, - }, - Logger: logger, - operationSuffix: "_address", - } -} - // dialerSystem uses system facilities to perform domain name // resolution and guarantees we have a dialer timeout. type dialerSystem struct { diff --git a/internal/netxlite/http.go b/internal/netxlite/http.go index 2150736a0d..708701daf0 100644 --- a/internal/netxlite/http.go +++ b/internal/netxlite/http.go @@ -72,11 +72,15 @@ func (txp *httpTransportConnectionsCloser) CloseIdleConnections() { txp.TLSDialer.CloseIdleConnections() } -// NewHTTPTransport creates a new HTTP transport using the given +// NewHTTPTransport combines NewOOHTTPBaseTransport and +// WrapHTTPTransport to construct a new HTTPTransport. +func NewHTTPTransport(logger Logger, dialer Dialer, tlsDialer TLSDialer) HTTPTransport { + return WrapHTTPTransport(logger, NewOOHTTPBaseTransport(dialer, tlsDialer)) +} + +// NewOOHTTPBaseTransport creates a new HTTP transport using the given // dialer and TLS dialer to create connections. // -// The returned transport will use the given Logger for logging. -// // The returned transport will gracefully handle TLS connections // created using gitlab.com/yawning/utls.git. // @@ -96,13 +100,6 @@ func (txp *httpTransportConnectionsCloser) CloseIdleConnections() { // necessary to perform sane measurements with tracing. We will be // able to possibly relax this requirement after we change the // way in which we perform measurements. -// -// The returned transport will set a default user agent if the -// request has not already set a user agent. -func NewHTTPTransport(logger Logger, dialer Dialer, tlsDialer TLSDialer) HTTPTransport { - return WrapHTTPTransport(logger, NewOOHTTPBaseTransport(dialer, tlsDialer)) -} - func NewOOHTTPBaseTransport(dialer Dialer, tlsDialer TLSDialer) HTTPTransport { // Using oohttp to support any TLS library. txp := oohttp.DefaultTransport.(*oohttp.Transport).Clone() @@ -132,6 +129,7 @@ func NewOOHTTPBaseTransport(dialer Dialer, tlsDialer TLSDialer) HTTPTransport { // upon us when we are using TLS parroting). txp.ForceAttemptHTTP2 = true + // Ensure we correctly forward CloseIdleConnections. return &httpTransportConnectionsCloser{ HTTPTransport: &oohttp.StdlibTransport{Transport: txp}, Dialer: dialer, @@ -139,6 +137,11 @@ func NewOOHTTPBaseTransport(dialer Dialer, tlsDialer TLSDialer) HTTPTransport { } } +// WrapHTTPTransport creates a new HTTP transport using +// the given logger for logging. +// +// The returned transport will set a default user agent if the +// request has not already set a user agent. func WrapHTTPTransport(logger Logger, txp HTTPTransport) HTTPTransport { // Ensure we correctly forward CloseIdleConnections and compose // with a logging transport thus enabling logging. diff --git a/internal/netxlite/resolver.go b/internal/netxlite/resolver.go index e1a289cd65..8ac24c55c0 100644 --- a/internal/netxlite/resolver.go +++ b/internal/netxlite/resolver.go @@ -46,10 +46,15 @@ type Resolver interface { // cannot be performed because we're using the "system" resolver. var ErrNoDNSTransport = errors.New("operation requires a DNS transport") -// NewResolverStdlib creates a new resolver using system -// facilities for resolving domain names (e.g., getaddrinfo). -// -// The resolver will provide the following guarantees: +// NewResolverStdlib creates a new Resolver by combining +// WrapResolver with an internal "system" resolver type that +// adds extra functionality to net.Resolver. +func NewResolverStdlib(logger Logger) Resolver { + return WrapResolver(logger, &resolverSystem{}) +} + +// WrapResolver creates a new resolver that wraps an +// existing resolver to add these properties: // // 1. handles IDNA; // @@ -62,10 +67,6 @@ var ErrNoDNSTransport = errors.New("operation requires a DNS transport") // // 5. enforces reasonable timeouts ( // see https://github.com/ooni/probe/issues/1726). -func NewResolverStdlib(logger Logger) Resolver { - return WrapResolver(logger, &resolverSystem{}) -} - func WrapResolver(logger Logger, resolver Resolver) Resolver { return &resolverIDNA{ Resolver: &resolverLogger{ From 06761790f0f671824860cf0c540b79dd2d0ff7cc Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 27 Sep 2021 12:56:38 +0200 Subject: [PATCH 45/53] transport: do not mutate outgoing request --- internal/netxlite/http.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/internal/netxlite/http.go b/internal/netxlite/http.go index 030a96353f..33e3e0be78 100644 --- a/internal/netxlite/http.go +++ b/internal/netxlite/http.go @@ -139,15 +139,10 @@ func NewOOHTTPBaseTransport(dialer Dialer, tlsDialer TLSDialer) HTTPTransport { // WrapHTTPTransport creates a new HTTP transport using // the given logger for logging. -// -// The returned transport will set a default user agent if the -// request has not already set a user agent. func WrapHTTPTransport(logger Logger, txp HTTPTransport) HTTPTransport { - return &httpUserAgentTransport{ - HTTPTransport: &httpTransportLogger{ - HTTPTransport: txp, - Logger: logger, - }, + return &httpTransportLogger{ + HTTPTransport: txp, + Logger: logger, } } @@ -240,6 +235,10 @@ func (c *httpTLSConnWithReadTimeout) Read(b []byte) (int, error) { // httpUserAgentTransport is a transport that ensures that we always // set an OONI specific default User-Agent header. +// +// Deprecated: this transport mutates its own request, which is not +// what a transport should do. We should add headers when we are +// creating requests rather than using this transport. type httpUserAgentTransport struct { HTTPTransport } From 90234cc7dc787ba274b3b969737f541a64443a89 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 27 Sep 2021 13:40:08 +0200 Subject: [PATCH 46/53] typo --- internal/netxlite/errorsx/errwrapper.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/netxlite/errorsx/errwrapper.go b/internal/netxlite/errorsx/errwrapper.go index 2c875e76b6..87025c2d3f 100644 --- a/internal/netxlite/errorsx/errwrapper.go +++ b/internal/netxlite/errorsx/errwrapper.go @@ -93,7 +93,7 @@ func NewErrWrapper(c Classifier, op string, err error) *ErrWrapper { } } -// NewTopLevelGenerciErrWrapper wraps an error occurring at top +// NewTopLevelGenericErrWrapper wraps an error occurring at top // level using the most generic available classified. func NewTopLevelGenericErrWrapper(err error) *ErrWrapper { return NewErrWrapper(ClassifyGenericError, TopLevelOperation, err) From 633befd91c6e5bcd418c56ad8963f6c6cb9cb177 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 27 Sep 2021 16:55:17 +0200 Subject: [PATCH 47/53] remove LookupHostWithoutRetry --- internal/netxlite/legacy.go | 5 --- internal/netxlite/mocks/resolver.go | 5 --- internal/netxlite/resolver.go | 51 ----------------------------- 3 files changed, 61 deletions(-) diff --git a/internal/netxlite/legacy.go b/internal/netxlite/legacy.go index 8f7a47327d..ed9a03575d 100644 --- a/internal/netxlite/legacy.go +++ b/internal/netxlite/legacy.go @@ -97,11 +97,6 @@ func (r *ResolverLegacyAdapter) CloseIdleConnections() { } } -func (r *ResolverLegacyAdapter) LookupHostWithoutRetry( - ctx context.Context, domain string, qtype uint16) ([]string, error) { - return nil, ErrNoDNSTransport -} - func (r *ResolverLegacyAdapter) LookupHTTPSSvcWithoutRetry( ctx context.Context, domain string) (HTTPSSvc, error) { return nil, ErrNoDNSTransport diff --git a/internal/netxlite/mocks/resolver.go b/internal/netxlite/mocks/resolver.go index 9aa61fcb0d..de0ff353a6 100644 --- a/internal/netxlite/mocks/resolver.go +++ b/internal/netxlite/mocks/resolver.go @@ -34,11 +34,6 @@ func (r *Resolver) CloseIdleConnections() { r.MockCloseIdleConnections() } -func (r *Resolver) LookupHostWithoutRetry( - ctx context.Context, domain string, qtype uint16) ([]string, error) { - panic("not yet implemented") -} - // HTTPSSvc is an HTTPSSvc reply. type HTTPSSvc = model.HTTPSSvc diff --git a/internal/netxlite/resolver.go b/internal/netxlite/resolver.go index 8ac24c55c0..4a8a216876 100644 --- a/internal/netxlite/resolver.go +++ b/internal/netxlite/resolver.go @@ -30,12 +30,6 @@ type Resolver interface { // CloseIdleConnections closes idle connections, if any. CloseIdleConnections() - // LookupHostWithoutRetry issues a single lookup host query - // for the given qtype (dns.TypeA or dns.TypeAAAA) without any - // retry mechanism whatsoever. - LookupHostWithoutRetry( - ctx context.Context, domain string, qtype uint16) ([]string, error) - // LookupHTTPSSvcWithoutRetry issues a single HTTPS query for // a domain without any retry mechanism whatsoever. LookupHTTPSSvcWithoutRetry( @@ -140,11 +134,6 @@ func (r *resolverSystem) CloseIdleConnections() { // nothing to do } -func (r *resolverSystem) LookupHostWithoutRetry( - ctx context.Context, domain string, qtype uint16) ([]string, error) { - return nil, ErrNoDNSTransport -} - func (r *resolverSystem) LookupHTTPSSvcWithoutRetry( ctx context.Context, domain string) (HTTPSSvc, error) { return nil, ErrNoDNSTransport @@ -172,22 +161,6 @@ func (r *resolverLogger) LookupHost(ctx context.Context, hostname string) ([]str return addrs, nil } -func (r *resolverLogger) LookupHostWithoutRetry( - ctx context.Context, domain string, qtype uint16) ([]string, error) { - qtypename := dns.TypeToString[qtype] - prefix := fmt.Sprintf("resolve[%s] %s with %s (%s)", qtypename, domain, r.Network(), r.Address()) - r.Logger.Debugf("%s...", prefix) - start := time.Now() - addrs, err := r.Resolver.LookupHostWithoutRetry(ctx, domain, qtype) - elapsed := time.Since(start) - if err != nil { - r.Logger.Debugf("%s... %s in %s", prefix, err, elapsed) - return nil, err - } - r.Logger.Debugf("%s... %+v in %s", prefix, addrs, elapsed) - return addrs, nil -} - func (r *resolverLogger) LookupHTTPSSvcWithoutRetry( ctx context.Context, domain string) (HTTPSSvc, error) { prefix := fmt.Sprintf("resolve[HTTPS] %s with %s (%s)", domain, r.Network(), r.Address()) @@ -221,15 +194,6 @@ func (r *resolverIDNA) LookupHost(ctx context.Context, hostname string) ([]strin return r.Resolver.LookupHost(ctx, host) } -func (r *resolverIDNA) LookupHostWithoutRetry( - ctx context.Context, domain string, qtype uint16) ([]string, error) { - host, err := idna.ToASCII(domain) - if err != nil { - return nil, err - } - return r.Resolver.LookupHostWithoutRetry(ctx, host, qtype) -} - func (r *resolverIDNA) LookupHTTPSSvcWithoutRetry( ctx context.Context, domain string) (HTTPSSvc, error) { host, err := idna.ToASCII(domain) @@ -275,11 +239,6 @@ func (r *nullResolver) CloseIdleConnections() { // nothing to do } -func (r *nullResolver) LookupHostWithoutRetry( - ctx context.Context, domain string, qtype uint16) ([]string, error) { - return nil, ErrNoDNSTransport -} - func (r *nullResolver) LookupHTTPSSvcWithoutRetry( ctx context.Context, domain string) (HTTPSSvc, error) { return nil, ErrNoDNSTransport @@ -301,16 +260,6 @@ func (r *resolverErrWrapper) LookupHost(ctx context.Context, hostname string) ([ return addrs, nil } -func (r *resolverErrWrapper) LookupHostWithoutRetry( - ctx context.Context, domain string, qtype uint16) ([]string, error) { - addrs, err := r.Resolver.LookupHostWithoutRetry(ctx, domain, qtype) - if err != nil { - return nil, errorsx.NewErrWrapper( - errorsx.ClassifyResolverError, errorsx.ResolveOperation, err) - } - return addrs, nil -} - func (r *resolverErrWrapper) LookupHTTPSSvcWithoutRetry( ctx context.Context, domain string) (HTTPSSvc, error) { out, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) From 45dc835e49484eccda768723afa93e466149f89b Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 27 Sep 2021 16:57:11 +0200 Subject: [PATCH 48/53] Rename to LookupHTTPS (simpler, shorter name) --- internal/measurex/measurer.go | 4 ++-- internal/measurex/resolver.go | 4 ++-- internal/netxlite/dnsx/serial.go | 4 ++-- internal/netxlite/legacy.go | 2 +- internal/netxlite/mocks/resolver.go | 2 +- internal/netxlite/resolver.go | 20 ++++++++++---------- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 588f6161c1..653540bab8 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -148,7 +148,7 @@ func (mx *Measurer) LookupHTTPSSvcUDP( db := &MeasurementDB{} r := mx.NewResolverUDP(db, mx.Logger, address) defer r.CloseIdleConnections() - _, err := r.LookupHTTPSSvcWithoutRetry(ctx, domain) + _, err := r.LookupHTTPS(ctx, domain) ol.Stop(err) return &DNSMeasurement{ Domain: domain, @@ -165,7 +165,7 @@ func (mx *Measurer) lookupHTTPSSvcUDPForeign( ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() db := &MeasurementDB{} - _, err := mx.WrapResolver(db, r).LookupHTTPSSvcWithoutRetry(ctx, domain) + _, err := mx.WrapResolver(db, r).LookupHTTPS(ctx, domain) ol.Stop(err) return &DNSMeasurement{ Domain: domain, diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 820551b55f..1e57b11e6e 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -183,9 +183,9 @@ func (r *resolverDB) computeOddityLookupHost(addrs []string, err error) Oddity { return "" } -func (r *resolverDB) LookupHTTPSSvcWithoutRetry(ctx context.Context, domain string) (HTTPSSvc, error) { +func (r *resolverDB) LookupHTTPS(ctx context.Context, domain string) (HTTPSSvc, error) { started := time.Since(r.begin).Seconds() - https, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) + https, err := r.Resolver.LookupHTTPS(ctx, domain) finished := time.Since(r.begin).Seconds() ev := &DNSLookupEvent{ Network: r.Resolver.Network(), diff --git a/internal/netxlite/dnsx/serial.go b/internal/netxlite/dnsx/serial.go index d937d2d5d3..e7735cdef0 100644 --- a/internal/netxlite/dnsx/serial.go +++ b/internal/netxlite/dnsx/serial.go @@ -61,8 +61,8 @@ func (r *SerialResolver) LookupHost(ctx context.Context, hostname string) ([]str return addrs, nil } -// LookupHTTPSSvcWithoutRetry issues an HTTPS query without retrying on failure. -func (r *SerialResolver) LookupHTTPSSvcWithoutRetry( +// LookupHTTPS issues an HTTPS query without retrying on failure. +func (r *SerialResolver) LookupHTTPS( ctx context.Context, hostname string) (HTTPSSvc, error) { querydata, err := r.Encoder.Encode( hostname, dns.TypeHTTPS, r.Txp.RequiresPadding()) diff --git a/internal/netxlite/legacy.go b/internal/netxlite/legacy.go index ed9a03575d..5928679043 100644 --- a/internal/netxlite/legacy.go +++ b/internal/netxlite/legacy.go @@ -97,7 +97,7 @@ func (r *ResolverLegacyAdapter) CloseIdleConnections() { } } -func (r *ResolverLegacyAdapter) LookupHTTPSSvcWithoutRetry( +func (r *ResolverLegacyAdapter) LookupHTTPS( ctx context.Context, domain string) (HTTPSSvc, error) { return nil, ErrNoDNSTransport } diff --git a/internal/netxlite/mocks/resolver.go b/internal/netxlite/mocks/resolver.go index de0ff353a6..a353922e73 100644 --- a/internal/netxlite/mocks/resolver.go +++ b/internal/netxlite/mocks/resolver.go @@ -37,7 +37,7 @@ func (r *Resolver) CloseIdleConnections() { // HTTPSSvc is an HTTPSSvc reply. type HTTPSSvc = model.HTTPSSvc -func (r *Resolver) LookupHTTPSSvcWithoutRetry( +func (r *Resolver) LookupHTTPS( ctx context.Context, domain string) (HTTPSSvc, error) { panic("not yet implemented") } diff --git a/internal/netxlite/resolver.go b/internal/netxlite/resolver.go index 4a8a216876..788584ff8e 100644 --- a/internal/netxlite/resolver.go +++ b/internal/netxlite/resolver.go @@ -30,9 +30,9 @@ type Resolver interface { // CloseIdleConnections closes idle connections, if any. CloseIdleConnections() - // LookupHTTPSSvcWithoutRetry issues a single HTTPS query for + // LookupHTTPS issues a single HTTPS query for // a domain without any retry mechanism whatsoever. - LookupHTTPSSvcWithoutRetry( + LookupHTTPS( ctx context.Context, domain string) (HTTPSSvc, error) } @@ -134,7 +134,7 @@ func (r *resolverSystem) CloseIdleConnections() { // nothing to do } -func (r *resolverSystem) LookupHTTPSSvcWithoutRetry( +func (r *resolverSystem) LookupHTTPS( ctx context.Context, domain string) (HTTPSSvc, error) { return nil, ErrNoDNSTransport } @@ -161,12 +161,12 @@ func (r *resolverLogger) LookupHost(ctx context.Context, hostname string) ([]str return addrs, nil } -func (r *resolverLogger) LookupHTTPSSvcWithoutRetry( +func (r *resolverLogger) LookupHTTPS( ctx context.Context, domain string) (HTTPSSvc, error) { prefix := fmt.Sprintf("resolve[HTTPS] %s with %s (%s)", domain, r.Network(), r.Address()) r.Logger.Debugf("%s...", prefix) start := time.Now() - https, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) + https, err := r.Resolver.LookupHTTPS(ctx, domain) elapsed := time.Since(start) if err != nil { r.Logger.Debugf("%s... %s in %s", prefix, err, elapsed) @@ -194,13 +194,13 @@ func (r *resolverIDNA) LookupHost(ctx context.Context, hostname string) ([]strin return r.Resolver.LookupHost(ctx, host) } -func (r *resolverIDNA) LookupHTTPSSvcWithoutRetry( +func (r *resolverIDNA) LookupHTTPS( ctx context.Context, domain string) (HTTPSSvc, error) { host, err := idna.ToASCII(domain) if err != nil { return nil, err } - return r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, host) + return r.Resolver.LookupHTTPS(ctx, host) } // resolverShortCircuitIPAddr recognizes when the input hostname is an @@ -239,7 +239,7 @@ func (r *nullResolver) CloseIdleConnections() { // nothing to do } -func (r *nullResolver) LookupHTTPSSvcWithoutRetry( +func (r *nullResolver) LookupHTTPS( ctx context.Context, domain string) (HTTPSSvc, error) { return nil, ErrNoDNSTransport } @@ -260,9 +260,9 @@ func (r *resolverErrWrapper) LookupHost(ctx context.Context, hostname string) ([ return addrs, nil } -func (r *resolverErrWrapper) LookupHTTPSSvcWithoutRetry( +func (r *resolverErrWrapper) LookupHTTPS( ctx context.Context, domain string) (HTTPSSvc, error) { - out, err := r.Resolver.LookupHTTPSSvcWithoutRetry(ctx, domain) + out, err := r.Resolver.LookupHTTPS(ctx, domain) if err != nil { return nil, errorsx.NewErrWrapper( errorsx.ClassifyResolverError, errorsx.ResolveOperation, err) From 2b4f8781c430261206f3e2a6c59b72f7cfc19b57 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 27 Sep 2021 17:03:45 +0200 Subject: [PATCH 49/53] fix: simplify LookupHTTPS API --- internal/measurex/resolver.go | 14 +++++------ internal/netxlite/dnsx/decoder.go | 34 ++++++--------------------- internal/netxlite/dnsx/model/model.go | 14 +++++------ internal/netxlite/dnsx/serial.go | 2 +- internal/netxlite/legacy.go | 2 +- internal/netxlite/mocks/resolver.go | 2 +- internal/netxlite/resolver.go | 19 +++++++-------- 7 files changed, 33 insertions(+), 54 deletions(-) diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 1e57b11e6e..374e878b39 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -183,7 +183,7 @@ func (r *resolverDB) computeOddityLookupHost(addrs []string, err error) Oddity { return "" } -func (r *resolverDB) LookupHTTPS(ctx context.Context, domain string) (HTTPSSvc, error) { +func (r *resolverDB) LookupHTTPS(ctx context.Context, domain string) (*HTTPSSvc, error) { started := time.Since(r.begin).Seconds() https, err := r.Resolver.LookupHTTPS(ctx, domain) finished := time.Since(r.begin).Seconds() @@ -198,19 +198,19 @@ func (r *resolverDB) LookupHTTPS(ctx context.Context, domain string) (HTTPSSvc, Oddity: Oddity(r.computeOddityHTTPSSvc(https, err)), } if err == nil { - for _, addr := range https.IPv4Hint() { + for _, addr := range https.IPv4 { ev.Answers = append(ev.Answers, DNSLookupAnswer{ Type: "A", IPv4: addr, }) } - for _, addr := range https.IPv6Hint() { + for _, addr := range https.IPv6 { ev.Answers = append(ev.Answers, DNSLookupAnswer{ Type: "AAAA", IPv6: addr, }) } - for _, alpn := range https.ALPN() { + for _, alpn := range https.ALPN { ev.Answers = append(ev.Answers, DNSLookupAnswer{ Type: "ALPN", ALPN: alpn, @@ -221,12 +221,12 @@ func (r *resolverDB) LookupHTTPS(ctx context.Context, domain string) (HTTPSSvc, return https, err } -func (r *resolverDB) computeOddityHTTPSSvc(https HTTPSSvc, err error) Oddity { +func (r *resolverDB) computeOddityHTTPSSvc(https *HTTPSSvc, err error) Oddity { if err != nil { return r.computeOddityLookupHost(nil, err) } var addrs []string - addrs = append(addrs, https.IPv4Hint()...) - addrs = append(addrs, https.IPv6Hint()...) + addrs = append(addrs, https.IPv4...) + addrs = append(addrs, https.IPv6...) return r.computeOddityLookupHost(addrs, nil) } diff --git a/internal/netxlite/dnsx/decoder.go b/internal/netxlite/dnsx/decoder.go index 6e6fcd3517..9e539d305b 100644 --- a/internal/netxlite/dnsx/decoder.go +++ b/internal/netxlite/dnsx/decoder.go @@ -9,33 +9,13 @@ import ( // HTTPSSvc is an HTTPSSvc reply. type HTTPSSvc = model.HTTPSSvc -type https struct { - alpn []string - ipv4hint []string - ipv6hint []string -} - -var _ HTTPSSvc = &https{} - -func (h *https) ALPN() []string { - return h.alpn -} - -func (h *https) IPv4Hint() []string { - return h.ipv4hint -} - -func (h *https) IPv6Hint() []string { - return h.ipv6hint -} - // The Decoder decodes DNS replies. type Decoder interface { // DecodeLookupHost decodes an A or AAAA reply. DecodeLookupHost(qtype uint16, data []byte) ([]string, error) // DecodeHTTPS decodes an HTTPS reply. - DecodeHTTPS(data []byte) (HTTPSSvc, error) + DecodeHTTPS(data []byte) (*HTTPSSvc, error) } // MiekgDecoder uses github.com/miekg/dns to implement the Decoder. @@ -60,32 +40,32 @@ func (d *MiekgDecoder) parseReply(data []byte) (*dns.Msg, error) { } } -func (d *MiekgDecoder) DecodeHTTPS(data []byte) (HTTPSSvc, error) { +func (d *MiekgDecoder) DecodeHTTPS(data []byte) (*HTTPSSvc, error) { reply, err := d.parseReply(data) if err != nil { return nil, err } - out := &https{} + out := &HTTPSSvc{} for _, answer := range reply.Answer { switch avalue := answer.(type) { case *dns.HTTPS: for _, v := range avalue.Value { switch extv := v.(type) { case *dns.SVCBAlpn: - out.alpn = extv.Alpn + out.ALPN = extv.Alpn case *dns.SVCBIPv4Hint: for _, ip := range extv.Hint { - out.ipv4hint = append(out.ipv4hint, ip.String()) + out.IPv4 = append(out.IPv4, ip.String()) } case *dns.SVCBIPv6Hint: for _, ip := range extv.Hint { - out.ipv6hint = append(out.ipv6hint, ip.String()) + out.IPv6 = append(out.IPv6, ip.String()) } } } } } - if len(out.alpn) <= 0 { + if len(out.ALPN) <= 0 { return nil, errorsx.ErrOODNSNoAnswer } return out, nil diff --git a/internal/netxlite/dnsx/model/model.go b/internal/netxlite/dnsx/model/model.go index 3360faebf9..75e9055722 100644 --- a/internal/netxlite/dnsx/model/model.go +++ b/internal/netxlite/dnsx/model/model.go @@ -2,13 +2,13 @@ package model // HTTPSSvc is an HTTPSSvc reply. -type HTTPSSvc interface { - // ALPN returns the ALPNs inside the SVCBAlpn structure - ALPN() []string +type HTTPSSvc struct { + // ALPN contains the ALPNs inside the HTTPS reply + ALPN []string - // IPv4Hint returns the IPv4 hints. - IPv4Hint() []string + // IPv4 contains the IPv4 hints. + IPv4 []string - // IPv6Hint returns the IPv6 hints. - IPv6Hint() []string + // IPv6 contains the IPv6 hints. + IPv6 []string } diff --git a/internal/netxlite/dnsx/serial.go b/internal/netxlite/dnsx/serial.go index e7735cdef0..3f8eac21d1 100644 --- a/internal/netxlite/dnsx/serial.go +++ b/internal/netxlite/dnsx/serial.go @@ -63,7 +63,7 @@ func (r *SerialResolver) LookupHost(ctx context.Context, hostname string) ([]str // LookupHTTPS issues an HTTPS query without retrying on failure. func (r *SerialResolver) LookupHTTPS( - ctx context.Context, hostname string) (HTTPSSvc, error) { + ctx context.Context, hostname string) (*HTTPSSvc, error) { querydata, err := r.Encoder.Encode( hostname, dns.TypeHTTPS, r.Txp.RequiresPadding()) if err != nil { diff --git a/internal/netxlite/legacy.go b/internal/netxlite/legacy.go index 5928679043..9af9092bcd 100644 --- a/internal/netxlite/legacy.go +++ b/internal/netxlite/legacy.go @@ -98,7 +98,7 @@ func (r *ResolverLegacyAdapter) CloseIdleConnections() { } func (r *ResolverLegacyAdapter) LookupHTTPS( - ctx context.Context, domain string) (HTTPSSvc, error) { + ctx context.Context, domain string) (*HTTPSSvc, error) { return nil, ErrNoDNSTransport } diff --git a/internal/netxlite/mocks/resolver.go b/internal/netxlite/mocks/resolver.go index a353922e73..f0049f1ab5 100644 --- a/internal/netxlite/mocks/resolver.go +++ b/internal/netxlite/mocks/resolver.go @@ -38,6 +38,6 @@ func (r *Resolver) CloseIdleConnections() { type HTTPSSvc = model.HTTPSSvc func (r *Resolver) LookupHTTPS( - ctx context.Context, domain string) (HTTPSSvc, error) { + ctx context.Context, domain string) (*HTTPSSvc, error) { panic("not yet implemented") } diff --git a/internal/netxlite/resolver.go b/internal/netxlite/resolver.go index 788584ff8e..8abd89ea41 100644 --- a/internal/netxlite/resolver.go +++ b/internal/netxlite/resolver.go @@ -7,7 +7,6 @@ import ( "net" "time" - "github.com/miekg/dns" "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" "golang.org/x/net/idna" @@ -33,7 +32,7 @@ type Resolver interface { // LookupHTTPS issues a single HTTPS query for // a domain without any retry mechanism whatsoever. LookupHTTPS( - ctx context.Context, domain string) (HTTPSSvc, error) + ctx context.Context, domain string) (*HTTPSSvc, error) } // ErrNoDNSTransport indicates that the requested Resolver operation @@ -135,7 +134,7 @@ func (r *resolverSystem) CloseIdleConnections() { } func (r *resolverSystem) LookupHTTPS( - ctx context.Context, domain string) (HTTPSSvc, error) { + ctx context.Context, domain string) (*HTTPSSvc, error) { return nil, ErrNoDNSTransport } @@ -162,7 +161,7 @@ func (r *resolverLogger) LookupHost(ctx context.Context, hostname string) ([]str } func (r *resolverLogger) LookupHTTPS( - ctx context.Context, domain string) (HTTPSSvc, error) { + ctx context.Context, domain string) (*HTTPSSvc, error) { prefix := fmt.Sprintf("resolve[HTTPS] %s with %s (%s)", domain, r.Network(), r.Address()) r.Logger.Debugf("%s...", prefix) start := time.Now() @@ -172,9 +171,9 @@ func (r *resolverLogger) LookupHTTPS( r.Logger.Debugf("%s... %s in %s", prefix, err, elapsed) return nil, err } - alpn := https.ALPN() - a := https.IPv4Hint() - aaaa := https.IPv6Hint() + alpn := https.ALPN + a := https.IPv4 + aaaa := https.IPv6 r.Logger.Debugf("%s... %+v %+v %+v in %s", prefix, alpn, a, aaaa, elapsed) return https, nil } @@ -195,7 +194,7 @@ func (r *resolverIDNA) LookupHost(ctx context.Context, hostname string) ([]strin } func (r *resolverIDNA) LookupHTTPS( - ctx context.Context, domain string) (HTTPSSvc, error) { + ctx context.Context, domain string) (*HTTPSSvc, error) { host, err := idna.ToASCII(domain) if err != nil { return nil, err @@ -240,7 +239,7 @@ func (r *nullResolver) CloseIdleConnections() { } func (r *nullResolver) LookupHTTPS( - ctx context.Context, domain string) (HTTPSSvc, error) { + ctx context.Context, domain string) (*HTTPSSvc, error) { return nil, ErrNoDNSTransport } @@ -261,7 +260,7 @@ func (r *resolverErrWrapper) LookupHost(ctx context.Context, hostname string) ([ } func (r *resolverErrWrapper) LookupHTTPS( - ctx context.Context, domain string) (HTTPSSvc, error) { + ctx context.Context, domain string) (*HTTPSSvc, error) { out, err := r.Resolver.LookupHTTPS(ctx, domain) if err != nil { return nil, errorsx.NewErrWrapper( From d46f1a72a3051acbd393b84aec0acb9a8b6ec6ee Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 28 Sep 2021 12:51:10 +0200 Subject: [PATCH 50/53] refactor(meaurex,webstepsx): use new netxlite API --- internal/engine/experiment/webstepsx/th.go | 10 ++++------ internal/measurex/dialer.go | 7 +++---- internal/measurex/dnsx.go | 16 ++++++++-------- internal/measurex/http.go | 6 ++---- internal/measurex/quic.go | 5 ++--- internal/measurex/resolver.go | 11 +++++------ internal/measurex/tls.go | 11 +++++------ 7 files changed, 29 insertions(+), 37 deletions(-) diff --git a/internal/engine/experiment/webstepsx/th.go b/internal/engine/experiment/webstepsx/th.go index 90858412e1..f6f17b6669 100644 --- a/internal/engine/experiment/webstepsx/th.go +++ b/internal/engine/experiment/webstepsx/th.go @@ -20,8 +20,6 @@ import ( "github.com/apex/log" "github.com/ooni/probe-cli/v3/internal/measurex" "github.com/ooni/probe-cli/v3/internal/netxlite" - "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" - "github.com/ooni/probe-cli/v3/internal/netxlite/iox" "github.com/ooni/probe-cli/v3/internal/runtimex" "github.com/ooni/probe-cli/v3/internal/version" ) @@ -165,7 +163,7 @@ func (c *THClientCall) httpClientDo(req *http.Request) (*THServerResponse, error return nil, errTHRequestFailed } r := io.LimitReader(resp.Body, thMaxAcceptableBodySize) - respBody, err := iox.ReadAllContext(req.Context(), r) + respBody, err := netxlite.ReadAllContext(req.Context(), r) if err != nil { return nil, err } @@ -209,7 +207,7 @@ func (h *THHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { return } reader := io.LimitReader(req.Body, thMaxAcceptableBodySize) - data, err := iox.ReadAllContext(req.Context(), reader) + data, err := netxlite.ReadAllContext(req.Context(), reader) if err != nil { w.WriteHeader(400) return @@ -377,6 +375,6 @@ const thResolverURL = "https://dns.google/dns-query" // // Here we're using github.com/apex/log as the logger, which // is fine because this is backend only code. -var thResolver = netxlite.WrapResolver(log.Log, dnsx.NewSerialResolver( - dnsx.NewDNSOverHTTPS(http.DefaultClient, thResolverURL), +var thResolver = netxlite.WrapResolver(log.Log, netxlite.NewSerialResolver( + netxlite.NewDNSOverHTTPS(http.DefaultClient, thResolverURL), )) diff --git a/internal/measurex/dialer.go b/internal/measurex/dialer.go index 9f2fad1095..6484857d3f 100644 --- a/internal/measurex/dialer.go +++ b/internal/measurex/dialer.go @@ -12,7 +12,6 @@ import ( "time" "github.com/ooni/probe-cli/v3/internal/netxlite" - "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) // Conn is a network connection. @@ -100,11 +99,11 @@ func (c *dialerDB) computeOddity(err error) Oddity { return "" } switch err.Error() { - case errorsx.FailureGenericTimeoutError: + case netxlite.FailureGenericTimeoutError: return OddityTCPConnectTimeout - case errorsx.FailureConnectionRefused: + case netxlite.FailureConnectionRefused: return OddityTCPConnectRefused - case errorsx.FailureHostUnreachable: + case netxlite.FailureHostUnreachable: return OddityTCPConnectHostUnreachable default: return OddityTCPConnectOher diff --git a/internal/measurex/dnsx.go b/internal/measurex/dnsx.go index 79fccbd327..8eae17437d 100644 --- a/internal/measurex/dnsx.go +++ b/internal/measurex/dnsx.go @@ -10,22 +10,22 @@ import ( "context" "time" - "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" + "github.com/ooni/probe-cli/v3/internal/netxlite" ) // DNSXRoundTripper is a transport for sending raw DNS queries // and receiving raw DNS replies. The internal/netxlite/dnsx // package implements a bunch of these transports. -type DNSXRoundTripper = dnsx.RoundTripper +type DNSTransport = netxlite.DNSTransport // WrapDNSXRoundTripper creates a new DNSXRoundTripper that // saves events into the given WritableDB. -func (mx *Measurer) WrapDNSXRoundTripper(db WritableDB, rtx dnsx.RoundTripper) DNSXRoundTripper { - return &dnsxRoundTripperDB{db: db, RoundTripper: rtx, begin: mx.Begin} +func (mx *Measurer) WrapDNSXRoundTripper(db WritableDB, rtx netxlite.DNSTransport) DNSTransport { + return &dnsxRoundTripperDB{db: db, DNSTransport: rtx, begin: mx.Begin} } type dnsxRoundTripperDB struct { - dnsx.RoundTripper + netxlite.DNSTransport begin time.Time db WritableDB } @@ -45,11 +45,11 @@ type DNSRoundTripEvent struct { func (txp *dnsxRoundTripperDB) RoundTrip(ctx context.Context, query []byte) ([]byte, error) { started := time.Since(txp.begin).Seconds() - reply, err := txp.RoundTripper.RoundTrip(ctx, query) + reply, err := txp.DNSTransport.RoundTrip(ctx, query) finished := time.Since(txp.begin).Seconds() txp.db.InsertIntoDNSRoundTrip(&DNSRoundTripEvent{ - Network: txp.RoundTripper.Network(), - Address: txp.RoundTripper.Address(), + Network: txp.DNSTransport.Network(), + Address: txp.DNSTransport.Address(), Query: NewArchivalBinaryData(query), Started: started, Finished: finished, diff --git a/internal/measurex/http.go b/internal/measurex/http.go index 6ab414b44e..2003e208c2 100644 --- a/internal/measurex/http.go +++ b/internal/measurex/http.go @@ -28,8 +28,6 @@ import ( "github.com/lucas-clemente/quic-go" "github.com/ooni/probe-cli/v3/internal/engine/httpheader" "github.com/ooni/probe-cli/v3/internal/netxlite" - "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" - "github.com/ooni/probe-cli/v3/internal/netxlite/iox" "github.com/ooni/probe-cli/v3/internal/runtimex" "golang.org/x/net/publicsuffix" ) @@ -169,7 +167,7 @@ func (txp *HTTPTransportDB) RoundTrip(req *http.Request) (*http.Response, error) Headers: NewArchivalHeaders(resp.Header), } r := io.LimitReader(resp.Body, txp.MaxBodySnapshotSize) - body, err := iox.ReadAllContext(req.Context(), r) + body, err := netxlite.ReadAllContext(req.Context(), r) if errors.Is(err, io.EOF) && resp.Close { err = nil // we expected to see an EOF here, so no real error } @@ -272,7 +270,7 @@ type httpClientErrWrapper struct { func (c *httpClientErrWrapper) Do(req *http.Request) (*http.Response, error) { resp, err := c.HTTPClient.Do(req) if err != nil { - err = errorsx.NewTopLevelGenericErrWrapper(err) + err = netxlite.NewTopLevelGenericErrWrapper(err) } return resp, err } diff --git a/internal/measurex/quic.go b/internal/measurex/quic.go index 5ef2f0a837..eee7a7f495 100644 --- a/internal/measurex/quic.go +++ b/internal/measurex/quic.go @@ -14,7 +14,6 @@ import ( "github.com/lucas-clemente/quic-go" "github.com/ooni/probe-cli/v3/internal/netxlite" - "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" "github.com/ooni/probe-cli/v3/internal/netxlite/quicx" ) @@ -162,9 +161,9 @@ func (qh *quicDialerDB) computeOddity(err error) Oddity { return "" } switch err.Error() { - case errorsx.FailureGenericTimeoutError: + case netxlite.FailureGenericTimeoutError: return OddityQUICHandshakeTimeout - case errorsx.FailureHostUnreachable: + case netxlite.FailureHostUnreachable: return OddityQUICHandshakeHostUnreachable default: return OddityQUICHandshakeOther diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 374e878b39..7d48b4098f 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -14,7 +14,6 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/netxlite/dnsx" - "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) // HTTPSSvc is the result returned by HTTPSSvc queries. @@ -52,8 +51,8 @@ func (mx *Measurer) NewResolverSystem(db WritableDB, logger Logger) Resolver { // - address is the resolver address (e.g., "1.1.1.1:53"). func (mx *Measurer) NewResolverUDP(db WritableDB, logger Logger, address string) Resolver { return mx.WrapResolver(db, netxlite.WrapResolver( - logger, dnsx.NewSerialResolver( - mx.WrapDNSXRoundTripper(db, dnsx.NewDNSOverUDP( + logger, netxlite.NewSerialResolver( + mx.WrapDNSXRoundTripper(db, netxlite.NewDNSOverUDP( mx.NewDialerWithSystemResolver(db, logger), address, )))), @@ -165,11 +164,11 @@ func (r *resolverDB) computeAnswers(addrs []string, qtype string) (out []DNSLook func (r *resolverDB) computeOddityLookupHost(addrs []string, err error) Oddity { if err != nil { switch err.Error() { - case errorsx.FailureGenericTimeoutError: + case netxlite.FailureGenericTimeoutError: return OddityDNSLookupTimeout - case errorsx.FailureDNSNXDOMAINError: + case netxlite.FailureDNSNXDOMAINError: return OddityDNSLookupNXDOMAIN - case errorsx.FailureDNSRefusedError: + case netxlite.FailureDNSRefusedError: return OddityDNSLookupRefused default: return OddityDNSLookupOther diff --git a/internal/measurex/tls.go b/internal/measurex/tls.go index e013999823..8ef85fa08b 100644 --- a/internal/measurex/tls.go +++ b/internal/measurex/tls.go @@ -15,7 +15,6 @@ import ( "time" "github.com/ooni/probe-cli/v3/internal/netxlite" - "github.com/ooni/probe-cli/v3/internal/netxlite/errorsx" ) // TLSHandshaker performs TLS handshakes. @@ -90,15 +89,15 @@ func (thx *tlsHandshakerDB) computeOddity(err error) Oddity { return "" } switch err.Error() { - case errorsx.FailureGenericTimeoutError: + case netxlite.FailureGenericTimeoutError: return OddityTLSHandshakeTimeout - case errorsx.FailureConnectionReset: + case netxlite.FailureConnectionReset: return OddityTLSHandshakeReset - case errorsx.FailureEOFError: + case netxlite.FailureEOFError: return OddityTLSHandshakeUnexpectedEOF - case errorsx.FailureSSLInvalidHostname: + case netxlite.FailureSSLInvalidHostname: return OddityTLSHandshakeInvalidHostname - case errorsx.FailureSSLUnknownAuthority: + case netxlite.FailureSSLUnknownAuthority: return OddityTLSHandshakeUnknownAuthority default: return OddityTLSHandshakeOther From 8b0071af124c72ecc47ed94746057b64f592fe88 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 29 Sep 2021 16:29:21 +0200 Subject: [PATCH 51/53] measurex: make th interface more practical It's easier to screw up with channels than it is with interface{} --- .../engine/experiment/webstepsx/measurer.go | 25 ++++++------------- internal/engine/experiment/webstepsx/th.go | 3 ++- internal/measurex/measurement.go | 3 +++ internal/measurex/measurer.go | 10 +++++--- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/internal/engine/experiment/webstepsx/measurer.go b/internal/engine/experiment/webstepsx/measurer.go index b6490f0a10..720111f9d0 100644 --- a/internal/engine/experiment/webstepsx/measurer.go +++ b/internal/engine/experiment/webstepsx/measurer.go @@ -30,8 +30,6 @@ type Config struct{} // TestKeys contains the experiment's test keys. type TestKeys struct { *measurex.URLMeasurement - - TH *THServerResponse `json:"th"` } // Measurer performs the measurement. @@ -118,10 +116,9 @@ func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, URL string, th *model.Service, out chan<- *model.ExperimentAsyncTestKeys) { defer close(out) helper := &measurerMeasureURLHelper{ - Clnt: sess.DefaultHTTPClient(), - Logger: sess.Logger(), - THURL: th.Address, - thResponse: make(chan *THServerResponse, 1), // buffer + Clnt: sess.DefaultHTTPClient(), + Logger: sess.Logger(), + THURL: th.Address, } mmx := &measurex.Measurer{ Begin: time.Now(), @@ -137,10 +134,7 @@ func (mx *Measurer) runAsync(ctx context.Context, sess model.ExperimentSession, for m := range in { out <- &model.ExperimentAsyncTestKeys{ MeasurementRuntime: m.TotalRuntime.Seconds(), - TestKeys: &TestKeys{ - URLMeasurement: m, - TH: <-helper.thResponse, - }, + TestKeys: &TestKeys{URLMeasurement: m}, Extensions: map[string]int64{ archival.ExtHTTP.Name: archival.ExtHTTP.V, archival.ExtDNS.Name: archival.ExtDNS.V, @@ -163,14 +157,12 @@ type measurerMeasureURLHelper struct { // THURL is the MANDATORY TH URL. THURL string - - // thResponse is the response from the TH. - thResponse chan *THServerResponse } func (mth *measurerMeasureURLHelper) LookupExtraHTTPEndpoints( ctx context.Context, URL *url.URL, headers http.Header, - curEndpoints ...*measurex.HTTPEndpoint) ([]*measurex.HTTPEndpoint, error) { + curEndpoints ...*measurex.HTTPEndpoint) ( + []*measurex.HTTPEndpoint, interface{}, error) { cc := &THClientCall{ Endpoints: measurex.HTTPEndpointsToEndpoints(curEndpoints), HTTPClient: mth.Clnt, @@ -184,9 +176,8 @@ func (mth *measurerMeasureURLHelper) LookupExtraHTTPEndpoints( mth.Logger, "THClientCall %s", URL.String()) resp, err := cc.Call(ctx) ol.Stop(err) - mth.thResponse <- resp // note that nil is ~fine here if err != nil { - return nil, err + return nil, resp, err } var out []*measurex.HTTPEndpoint for _, epnt := range resp.Endpoints { @@ -200,7 +191,7 @@ func (mth *measurerMeasureURLHelper) LookupExtraHTTPEndpoints( Header: headers, }) } - return out, nil + return out, resp, nil } // Run implements ExperimentMeasurer.Run. diff --git a/internal/engine/experiment/webstepsx/th.go b/internal/engine/experiment/webstepsx/th.go index f6f17b6669..20978b475a 100644 --- a/internal/engine/experiment/webstepsx/th.go +++ b/internal/engine/experiment/webstepsx/th.go @@ -351,7 +351,8 @@ type thMeasureURLHelper struct { func (thh *thMeasureURLHelper) LookupExtraHTTPEndpoints( ctx context.Context, URL *url.URL, headers http.Header, - serverEpnts ...*measurex.HTTPEndpoint) (epnts []*measurex.HTTPEndpoint, err error) { + serverEpnts ...*measurex.HTTPEndpoint) ( + epnts []*measurex.HTTPEndpoint, thMeaurement interface{}, err error) { for _, epnt := range thh.epnts { epnts = append(epnts, &measurex.HTTPEndpoint{ Domain: URL.Hostname(), diff --git a/internal/measurex/measurement.go b/internal/measurex/measurement.go index be106c2049..0141a2201c 100644 --- a/internal/measurex/measurement.go +++ b/internal/measurex/measurement.go @@ -31,6 +31,9 @@ type URLMeasurement struct { // if we choose to follow redirections. RedirectURLs []string `json:"-"` + // THMeasurement is the measurement collected by the TH. + TH interface{} `json:"th,omitempty"` + // TotalRuntime is the total time to measure this URL. TotalRuntime time.Duration `json:"-"` diff --git a/internal/measurex/measurer.go b/internal/measurex/measurer.go index 653540bab8..09b8db4117 100644 --- a/internal/measurex/measurer.go +++ b/internal/measurex/measurer.go @@ -701,6 +701,8 @@ type MeasureURLHelper interface { // - on any kind of error it MUST return nil, err // // - on success it MUST return the NEW endpoints it discovered + // as well as the TH measurement to be added to the measurement + // that the URL measurer is constructing. // // It is the caller's responsibility to merge the NEW list of // endpoints with the ones it passed as argument. @@ -709,7 +711,8 @@ type MeasureURLHelper interface { // newly returned endpoints only use the few headers that our // test helper protocol allows one to set. LookupExtraHTTPEndpoints(ctx context.Context, URL *url.URL, - headers http.Header, epnts ...*HTTPEndpoint) ([]*HTTPEndpoint, error) + headers http.Header, epnts ...*HTTPEndpoint) ( + newEpnts []*HTTPEndpoint, thMeasurement interface{}, err error) } // MeasureURL measures an HTTP or HTTPS URL. The DNS resolvers @@ -764,10 +767,11 @@ func (mx *Measurer) MeasureURL( } if mx.MeasureURLHelper != nil { thBegin := time.Now() - extraEpnts, _ := mx.MeasureURLHelper.LookupExtraHTTPEndpoints( + extraEpnts, thMeasurement, _ := mx.MeasureURLHelper.LookupExtraHTTPEndpoints( ctx, parsed, headers, epnts...) - epnts = removeDuplicateHTTPEndpoints(append(epnts, extraEpnts...)...) m.THRuntime = time.Since(thBegin) + epnts = removeDuplicateHTTPEndpoints(append(epnts, extraEpnts...)...) + m.TH = thMeasurement mx.enforceAllowedHeadersOnly(epnts) } epntRuntime := time.Now() From cc3a78e5dddf40d323e83d9a282f119cfa283254 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 30 Sep 2021 01:37:42 +0200 Subject: [PATCH 52/53] fix wrong diff with master --- internal/tutorial/generator/main.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/internal/tutorial/generator/main.go b/internal/tutorial/generator/main.go index ac33092ec5..b35316f2e2 100644 --- a/internal/tutorial/generator/main.go +++ b/internal/tutorial/generator/main.go @@ -120,12 +120,6 @@ func gennetxlite() { gen(path.Join(prefix, "chapter06"), "main.go") gen(path.Join(prefix, "chapter07"), "main.go") gen(path.Join(prefix, "chapter08"), "main.go") - gen(path.Join(prefix, "chapter09"), "main.go") - gen(path.Join(prefix, "chapter10"), "main.go") - gen(path.Join(prefix, "chapter11"), "main.go") - gen(path.Join(prefix, "chapter12"), "main.go") - gen(path.Join(prefix, "chapter13"), "main.go") - gen(path.Join(prefix, "chapter14"), "main.go") } func main() { From 1744cd0ec04ed2626a30768e10c8bb5d5dc9c5fc Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 30 Sep 2021 01:47:41 +0200 Subject: [PATCH 53/53] fix(webstepsx): point code to roaming th --- internal/engine/experiment/webstepsx/measurer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/engine/experiment/webstepsx/measurer.go b/internal/engine/experiment/webstepsx/measurer.go index 720111f9d0..e5d4ad1975 100644 --- a/internal/engine/experiment/webstepsx/measurer.go +++ b/internal/engine/experiment/webstepsx/measurer.go @@ -95,7 +95,7 @@ func (mx *Measurer) RunAsync( if testhelper == nil { return nil, ErrNoAvailableTestHelpers } - testhelper.Address = "http://127.0.0.1:8080/api/v1/websteps" // TODO(bassosimone): remove! + testhelper.Address = "https://1.th.ooni.org/api/v1/websteps" // TODO(bassosimone): remove! out := make(chan *model.ExperimentAsyncTestKeys) go mx.runAsync(ctx, sess, input, testhelper, out) return out, nil