Skip to content

Commit

Permalink
cli: new testhelper and the websteps experiment prototype (ooni#432)
Browse files Browse the repository at this point in the history
This is the extension of ooni#431, and my final deliverable for GSoC 2021.

The diff introduces:

1) The new `testhelper` which supports testing multiple IP endpoints per domain and introduces HTTP/3 control measurements. The specification of the `testhelper` can be found at ooni/spec#219. The `testhelper` algorithm consists of three main steps:

   * `InitialChecks` verifies that the input URL can be parsed, has an expected scheme, and contains a valid domain name.

   * `Explore` enumerates all the URLs that it discovers by redirection from the original URL, or by detecting h3 support at the target host.

   * `Generate` performs a step-by-step measurement of each discovered URL.

2) A prototype of the corresponding new experiment `websteps` which uses the control measurement of the `testhelper` to know which URLs to measure, and what to expect. The prototype does not yet have:

   * unit and integration tests,

   * an analysis tool to compare the control and the probe measurement.

This PR is my final deliverable as it is the outcome of the trials, considerations and efforts of my GSoC weeks at OONI. 
It fully integrates HTTP/3 (QUIC) support which has been only used in the `urlgetter` experiment until now.

Related issues: ooni/probe#1729 and ooni/probe#1733.
  • Loading branch information
kelmenhorst authored Aug 17, 2021
1 parent be3262e commit d75de2f
Show file tree
Hide file tree
Showing 28 changed files with 2,736 additions and 8 deletions.
4 changes: 1 addition & 3 deletions internal/cmd/oohelperd/internal/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ func HTTPDo(ctx context.Context, config *HTTPConfig) {
// we're implementing (for now?) a more liberal approach.
for k, vs := range config.Headers {
switch strings.ToLower(k) {
case "user-agent":
case "accept":
case "accept-language":
case "user-agent", "accept", "accept-language":
for _, v := range vs {
req.Header.Add(k, v)
}
Expand Down
8 changes: 3 additions & 5 deletions internal/cmd/oohelperd/internal/internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/ooni/probe-cli/v3/internal/engine/netx"
"github.com/ooni/probe-cli/v3/internal/iox"
"github.com/ooni/probe-cli/v3/internal/runtimex"
"github.com/ooni/probe-cli/v3/internal/version"
)

Expand All @@ -27,10 +28,6 @@ func (h Handler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
w.WriteHeader(400)
return
}
if req.Header.Get("content-type") != "application/json" {
w.WriteHeader(400)
return
}
reader := &io.LimitedReader{R: req.Body, N: h.MaxAcceptableBody}
data, err := iox.ReadAllContext(req.Context(), reader)
if err != nil {
Expand All @@ -50,7 +47,8 @@ func (h Handler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
}
// We assume that the following call cannot fail because it's a
// clearly serializable data structure.
data, _ = json.Marshal(cresp)
data, err = json.Marshal(cresp)
runtimex.PanicOnError(err, "json.Marshal failed")
w.Header().Add("Content-Type", "application/json")
w.Write(data)
}
166 changes: 166 additions & 0 deletions internal/cmd/oohelperd/internal/nwcth/explore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
package nwcth

import (
"crypto/tls"
"net/http"
"net/http/cookiejar"
"net/url"
"sort"
"strings"

"github.com/ooni/probe-cli/v3/internal/netxlite"
"github.com/ooni/probe-cli/v3/internal/runtimex"
)

// Explore is the second step of the test helper algorithm. Its objective
// is to enumerate all the URLs we can discover by redirection from
// the original URL in the test list. Because the test list contains by
// definition noisy data, we need this preprocessing step to learn all
// the URLs that are actually implied by the original URL.

// Explorer is the interface responsible for running Explore.
type Explorer interface {
Explore(URL *url.URL, headers map[string][]string) ([]*RoundTrip, error)
}

// DefaultExplorer is the default Explorer.
type DefaultExplorer struct {
resolver netxlite.Resolver
}

// Explore returns a list of round trips sorted so that the first
// round trip is the first element in the list, and so on.
// Explore uses the URL and the optional headers provided by the CtrlRequest.
func (e *DefaultExplorer) Explore(URL *url.URL, headers map[string][]string) ([]*RoundTrip, error) {
resp, err := e.get(URL, headers)
if err != nil {
return nil, err
}
rts := e.rearrange(resp, nil)
h3URL, err := getH3URL(resp)
if err != nil {
// If we cannot find the HTTP/3 URL for subsequent measurements, we just continue
// the measurement using the URLs we have found so far.
return rts, nil
}
resp, err = e.getH3(h3URL, headers)
if err != nil {
// If we cannot follow the HTTP/3 chain, we just continue
// the measurement using the URLs we have found so far.
return rts, nil
}
rts = append(rts, e.rearrange(resp, h3URL)...)
return rts, nil
}

// rearrange takes in input the final response of an HTTP transaction and an optional h3URL
// (which is needed to derive the type of h3 protocol, i.e. h3 or h3-29),
// and produces in output a list of round trips sorted
// such that the first round trip is the first element in the out array.
func (e *DefaultExplorer) rearrange(resp *http.Response, h3URL *h3URL) (out []*RoundTrip) {
index := 0
for resp != nil && resp.Request != nil {
proto := resp.Request.URL.Scheme
if h3URL != nil {
proto = h3URL.proto
}
out = append(out, &RoundTrip{
Proto: proto,
SortIndex: index,
Request: resp.Request,
Response: resp,
})
index += 1
resp = resp.Request.Response
}
sh := &sortHelper{out}
sort.Sort(sh)
return
}

// sortHelper is the helper structure to sort round trips.
type sortHelper struct {
v []*RoundTrip
}

// Len implements sort.Interface.Len.
func (sh *sortHelper) Len() int {
return len(sh.v)
}

// Less implements sort.Interface.Less.
func (sh *sortHelper) Less(i, j int) bool {
return sh.v[i].SortIndex >= sh.v[j].SortIndex
}

// Swap implements sort.Interface.Swap.
func (sh *sortHelper) Swap(i, j int) {
sh.v[i], sh.v[j] = sh.v[j], sh.v[i]
}

// get gets the given URL and returns the final response after
// redirection, and an error. If the error is nil, the final response is valid.
func (e *DefaultExplorer) get(URL *url.URL, headers map[string][]string) (*http.Response, error) {
tlsConf := &tls.Config{
NextProtos: []string{"h2", "http/1.1"},
}
transport := netxlite.NewHTTPTransport(NewDialerResolver(e.resolver), tlsConf, &netxlite.TLSHandshakerConfigurable{})
// TODO(bassosimone): here we should use runtimex.PanicOnError
jarjar, _ := cookiejar.New(nil)
clnt := &http.Client{
Transport: transport,
Jar: jarjar,
}
// TODO(bassosimone): document why e.newRequest cannot fail.
req, err := e.newRequest(URL, headers)
runtimex.PanicOnError(err, "newRequest failed")
resp, err := clnt.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Note that we ignore the response body.
return resp, nil
}

// getH3 uses HTTP/3 to get the given URL and returns the final
// response after redirection, and an error. If the error is nil, the final response is valid.
func (e *DefaultExplorer) getH3(h3URL *h3URL, headers map[string][]string) (*http.Response, error) {
dialer := NewQUICDialerResolver(e.resolver)
tlsConf := &tls.Config{
NextProtos: []string{h3URL.proto},
}
transport := netxlite.NewHTTP3Transport(dialer, tlsConf)
// TODO(bassosimone): here we should use runtimex.PanicOnError
jarjar, _ := cookiejar.New(nil)
clnt := &http.Client{
Transport: transport,
Jar: jarjar,
}
// TODO(bassosimone): document why e.newRequest cannot fail.
req, err := e.newRequest(h3URL.URL, headers)
runtimex.PanicOnError(err, "newRequest failed")
resp, err := clnt.Do(req)
if err != nil {
return nil, err
}
// Note that we ignore the response body.
defer resp.Body.Close()
return resp, nil
}

func (e *DefaultExplorer) newRequest(URL *url.URL, headers map[string][]string) (*http.Request, error) {
req, err := http.NewRequest("GET", URL.String(), nil)
if err != nil {
return nil, err
}
for k, vs := range headers {
switch strings.ToLower(k) {
case "user-agent", "accept", "accept-language":
for _, v := range vs {
req.Header.Add(k, v)
}
}
}
return req, nil
}
156 changes: 156 additions & 0 deletions internal/cmd/oohelperd/internal/nwcth/explore_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package nwcth

import (
"net/http"
"net/url"
"testing"

"github.com/ooni/probe-cli/v3/internal/runtimex"
)

var explorer = &DefaultExplorer{resolver: newResolver()}

func TestExploreSuccess(t *testing.T) {
u, err := url.Parse("https://example.com")
runtimex.PanicOnError(err, "url.Parse failed")
rts, err := explorer.Explore(u, nil)
if err != nil {
t.Fatal("unexpected error")
}
if len(rts) != 1 {
t.Fatal("unexpected number of roundtrips")
}
}

func TestExploreFailure(t *testing.T) {
u, err := url.Parse("https://example.example")
runtimex.PanicOnError(err, "url.Parse failed")
rts, err := explorer.Explore(u, nil)
if err == nil {
t.Fatal("expected an error here")
}
if rts != nil {
t.Fatal("rts should be nil")
}
}

func TestExploreSuccessWithH3(t *testing.T) {
// TODO(bassosimone): figure out why this happens.
t.Skip("this test does not work in GHA")
u, err := url.Parse("https://www.google.com")
runtimex.PanicOnError(err, "url.Parse failed")
rts, err := explorer.Explore(u, nil)
if err != nil {
t.Fatal("unexpected error")
}
if len(rts) != 2 {
t.Fatal("unexpected number of roundtrips")
}
if rts[0].Proto != "https" {
t.Fatal("unexpected protocol")
}
if rts[1].Proto != "h3" {
t.Fatal("unexpected protocol")
}
}

func TestGetSuccess(t *testing.T) {
u, err := url.Parse("https://example.com")
resp, err := explorer.get(u, nil)
if err != nil {
t.Fatal("unexpected error")
}
if resp == nil {
t.Fatal("unexpected nil response")
}
buf := make([]byte, 100)
if n, _ := resp.Body.Read(buf); n != 0 {
t.Fatal("expected response body tom be closed")
}

}

func TestGetFailure(t *testing.T) {
u, err := url.Parse("https://example.example")
resp, err := explorer.get(u, nil)
if err == nil {
t.Fatal("expected an error here")
}
if resp != nil {
t.Fatal("response should be nil")
}
}

func TestGetH3Success(t *testing.T) {
u, err := url.Parse("https://www.google.com")
h3u := &h3URL{URL: u, proto: "h3"}
resp, err := explorer.getH3(h3u, nil)
if err != nil {
t.Fatal("unexpected error")
}
if resp == nil {
t.Fatal("unexpected nil response")
}
buf := make([]byte, 100)
if n, _ := resp.Body.Read(buf); n != 0 {
t.Fatal("expected response body tom be closed")
}

}

func TestGetH3Failure(t *testing.T) {
u, err := url.Parse("https://www.google.google")
h3u := &h3URL{URL: u, proto: "h3"}
resp, err := explorer.getH3(h3u, nil)
if err == nil {
t.Fatal("expected an error here")
}
if resp != nil {
t.Fatal("response should be nil")
}
}

func TestRearrange(t *testing.T) {
u, err := url.Parse("https://example.com")
runtimex.PanicOnError(err, "url.Parse failed")
resp := &http.Response{
// the ProtoMajor field identifies the request/response structs and indicates the correct order
ProtoMajor: 2,
Request: &http.Request{
ProtoMajor: 2,
URL: u,
Response: &http.Response{
ProtoMajor: 1,
Request: &http.Request{
ProtoMajor: 1,
URL: u,
Response: &http.Response{
ProtoMajor: 0,
Request: &http.Request{
ProtoMajor: 0,
URL: u,
},
},
},
},
},
}
h3URL := &h3URL{URL: u, proto: "expected"}
rts := explorer.rearrange(resp, h3URL)
expectedIndex := 0
for _, rt := range rts {
if rt.Request == nil || rt.Response == nil {
t.Fatal("unexpected nil value")
}
if rt.Request.ProtoMajor != expectedIndex {
t.Fatal("unexpected order")
}
if rt.Response.ProtoMajor != expectedIndex {
t.Fatal("unexpected order")
}
if rt.Proto != h3URL.proto {
t.Fatal("unexpected protocol")
}
expectedIndex += 1
}
}
Loading

0 comments on commit d75de2f

Please sign in to comment.