Skip to content

Commit

Permalink
feat(measurex): refactored measurement library (ooni#528)
Browse files Browse the repository at this point in the history
This commit introduce a measurement library that consists of
refactored code from earlier websteps experiments.

I am not going to add tests for the time being, because this library
is still a bit in flux, as we finalize websteps.

I will soon though commit documentation explaining in detail how
to use it, which currrently is at ooni#506
and adds a new directory to internal/tutorial.

The core idea of this measurement library is to allow two
measurement modes:

1. tracing, which is what we're currently doing now, and the
tutorial shows how we can rewrite the measurement part of web
connectivity with measurex using less code. Under a tracing
approach, we construct a normal http.Client that however has
tracing configured, we gather events for resolve, connect, TLS
handshake, QUIC handshake, HTTP round trip, etc. and then we
try to make sense of what happened from the events stream;

2. step-by-step, which is what websteps does, and basically
means that after each operation you immediately write into
a Measurement structure its results and immediately draw the
conclusions on what seems odd (which later may become an
anomaly if we see what the test helper measured).

This library is also such that it produces a data format
compatible with the current OONI spec.

This work is part of ooni/probe#1733.
  • Loading branch information
bassosimone authored Sep 29, 2021
1 parent 74621a0 commit d29a3ad
Show file tree
Hide file tree
Showing 17 changed files with 3,008 additions and 0 deletions.
78 changes: 78 additions & 0 deletions internal/measurex/archival.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package measurex

import (
"net/http"
"strings"
)

//
// Archival
//
// This file defines helpers to serialize to the OONI data format.
//

// ArchivalBinaryData is the archival format for binary data.
type ArchivalBinaryData struct {
Data []byte `json:"data"`
Format string `json:"format"`
}

// NewArchivalBinaryData builds a new ArchivalBinaryData
// from an array of bytes. If the array is nil, we return nil.
func NewArchivalBinaryData(data []byte) (out *ArchivalBinaryData) {
if len(data) > 0 {
out = &ArchivalBinaryData{
Data: data,
Format: "base64",
}
}
return
}

// ArchivalHeaders is a list of HTTP headers.
type ArchivalHeaders map[string]string

// Get searches for the first header with the named key
// and returns it. If not found, returns an empty string.
func (headers ArchivalHeaders) Get(key string) string {
return headers[strings.ToLower(key)]
}

// NewArchivalHeaders builds a new HeadersList from http.Header.
func NewArchivalHeaders(in http.Header) (out ArchivalHeaders) {
out = make(ArchivalHeaders)
for k, vv := range in {
for _, v := range vv {
// It breaks my hearth a little bit to ignore
// subsequent headers, but this does not happen
// very frequently, and I know the pipeline
// parses the map headers format only.
out[strings.ToLower(k)] = v
break
}
}
return
}

// NewArchivalTLSCertList builds a new []ArchivalBinaryData
// from a list of raw x509 certificates data.
func NewArchivalTLSCerts(in [][]byte) (out []*ArchivalBinaryData) {
for _, cert := range in {
out = append(out, &ArchivalBinaryData{
Data: cert,
Format: "base64",
})
}
return
}

// NewArchivalFailure creates an archival failure from an error. We
// cannot round trip an error using JSON, so we serialize to this
// intermediate format that is a sort of Optional<string>.
func NewArchivalFailure(err error) *string {
if err == nil {
return nil
}
s := err.Error()
return &s
}
56 changes: 56 additions & 0 deletions internal/measurex/bogon.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package measurex

//
// Bogon
//
// This file helps us to decide if an IPAddr is a bogon.
//

// TODO(bassosimone): code in engine/netx should use this file.

import (
"net"

"github.com/ooni/probe-cli/v3/internal/runtimex"
)

// isBogon returns whether if an IP address is bogon. Passing to this
// function a non-IP address causes it to return true.
func isBogon(address string) bool {
ip := net.ParseIP(address)
return ip == nil || isPrivate(ip)
}

var privateIPBlocks []*net.IPNet

func init() {
for _, cidr := range []string{
"0.0.0.0/8", // "This" network (however, Linux...)
"10.0.0.0/8", // RFC1918
"100.64.0.0/10", // Carrier grade NAT
"127.0.0.0/8", // IPv4 loopback
"169.254.0.0/16", // RFC3927 link-local
"172.16.0.0/12", // RFC1918
"192.168.0.0/16", // RFC1918
"224.0.0.0/4", // Multicast
"::1/128", // IPv6 loopback
"fe80::/10", // IPv6 link-local
"fc00::/7", // IPv6 unique local addr
} {
_, block, err := net.ParseCIDR(cidr)
runtimex.PanicOnError(err, "net.ParseCIDR failed")
privateIPBlocks = append(privateIPBlocks, block)
}
}

func isPrivate(ip net.IP) bool {
if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
return true
}
for _, block := range privateIPBlocks {
if block.Contains(ip) {
return true
}
}
return false
}
239 changes: 239 additions & 0 deletions internal/measurex/db.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
package measurex

//
// DB
//
// This file defines two types:
//
// - WritableDB is the interface allowing networking code
// (e.g., Dialer to save measurement events);
//
// - MeasurementDB implements WritableDB and allows high-level
// code to generate a Measurement from all the events.
//

import "sync"

// WritableDB is an events "database" in which networking code
// (e.g., Dialer) can save measurement events (e.g., the result
// of a connect, a TLS handshake, a read).
type WritableDB interface {
// InsertIntoDial saves a Dial event.
InsertIntoDial(ev *NetworkEvent)

// InsertIntoReadWrite saves an I/O event.
InsertIntoReadWrite(ev *NetworkEvent)

// InsertIntoClose saves a close event.
InsertIntoClose(ev *NetworkEvent)

// InsertIntoTLSHandshake saves a TLS handshake event.
InsertIntoTLSHandshake(ev *TLSHandshakeEvent)

// InsertIntoLookupHost saves a lookup host event.
InsertIntoLookupHost(ev *DNSLookupEvent)

// InsertIntoLookupHTTPSvc saves an HTTPSvc lookup event.
InsertIntoLookupHTTPSSvc(ev *DNSLookupEvent)

// InsertIntoDNSRoundTrip saves a DNS round trip event.
InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent)

// InsertIntoHTTPRoundTrip saves an HTTP round trip event.
InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent)

// InsertIntoHTTPRedirect saves an HTTP redirect event.
InsertIntoHTTPRedirect(ev *HTTPRedirectEvent)

// InsertIntoQUICHandshake saves a QUIC handshake event.
InsertIntoQUICHandshake(ev *QUICHandshakeEvent)
}

// MeasurementDB is a WritableDB that also allows high-level code
// to generate a Measurement from all the saved events.
type MeasurementDB struct {
// database "tables"
dialTable []*NetworkEvent
readWriteTable []*NetworkEvent
closeTable []*NetworkEvent
tlsHandshakeTable []*TLSHandshakeEvent
lookupHostTable []*DNSLookupEvent
lookupHTTPSvcTable []*DNSLookupEvent
dnsRoundTripTable []*DNSRoundTripEvent
httpRoundTripTable []*HTTPRoundTripEvent
httpRedirectTable []*HTTPRedirectEvent
quicHandshakeTable []*QUICHandshakeEvent

// mu protects all the fields
mu sync.Mutex
}

var _ WritableDB = &MeasurementDB{}

// DeleteAll deletes all the content of the DB.
func (db *MeasurementDB) DeleteAll() {
db.mu.Lock()
db.dialTable = nil
db.readWriteTable = nil
db.closeTable = nil
db.tlsHandshakeTable = nil
db.lookupHostTable = nil
db.lookupHTTPSvcTable = nil
db.dnsRoundTripTable = nil
db.httpRoundTripTable = nil
db.httpRedirectTable = nil
db.quicHandshakeTable = nil
db.mu.Unlock()
}

// InsertIntoDial implements EventDB.InsertIntoDial.
func (db *MeasurementDB) InsertIntoDial(ev *NetworkEvent) {
db.mu.Lock()
db.dialTable = append(db.dialTable, ev)
db.mu.Unlock()
}

// selectAllFromDialUnlocked returns all dial events.
func (db *MeasurementDB) selectAllFromDialUnlocked() (out []*NetworkEvent) {
out = append(out, db.dialTable...)
return
}

// InsertIntoReadWrite implements EventDB.InsertIntoReadWrite.
func (db *MeasurementDB) InsertIntoReadWrite(ev *NetworkEvent) {
db.mu.Lock()
db.readWriteTable = append(db.readWriteTable, ev)
db.mu.Unlock()
}

// selectAllFromReadWriteUnlocked returns all I/O events.
func (db *MeasurementDB) selectAllFromReadWriteUnlocked() (out []*NetworkEvent) {
out = append(out, db.readWriteTable...)
return
}

// InsertIntoClose implements EventDB.InsertIntoClose.
func (db *MeasurementDB) InsertIntoClose(ev *NetworkEvent) {
db.mu.Lock()
db.closeTable = append(db.closeTable, ev)
db.mu.Unlock()
}

// selectAllFromCloseUnlocked returns all close events.
func (db *MeasurementDB) selectAllFromCloseUnlocked() (out []*NetworkEvent) {
out = append(out, db.closeTable...)
return
}

// InsertIntoTLSHandshake implements EventDB.InsertIntoTLSHandshake.
func (db *MeasurementDB) InsertIntoTLSHandshake(ev *TLSHandshakeEvent) {
db.mu.Lock()
db.tlsHandshakeTable = append(db.tlsHandshakeTable, ev)
db.mu.Unlock()
}

// selectAllFromTLSHandshakeUnlocked returns all TLS handshake events.
func (db *MeasurementDB) selectAllFromTLSHandshakeUnlocked() (out []*TLSHandshakeEvent) {
out = append(out, db.tlsHandshakeTable...)
return
}

// InsertIntoLookupHost implements EventDB.InsertIntoLookupHost.
func (db *MeasurementDB) InsertIntoLookupHost(ev *DNSLookupEvent) {
db.mu.Lock()
db.lookupHostTable = append(db.lookupHostTable, ev)
db.mu.Unlock()
}

// selectAllFromLookupHostUnlocked returns all the lookup host events.
func (db *MeasurementDB) selectAllFromLookupHostUnlocked() (out []*DNSLookupEvent) {
out = append(out, db.lookupHostTable...)
return
}

// InsertIntoHTTPSSvc implements EventDB.InsertIntoHTTPSSvc
func (db *MeasurementDB) InsertIntoLookupHTTPSSvc(ev *DNSLookupEvent) {
db.mu.Lock()
db.lookupHTTPSvcTable = append(db.lookupHTTPSvcTable, ev)
db.mu.Unlock()
}

// selectAllFromLookupHTTPSSvcUnlocked returns all HTTPSSvc lookup events.
func (db *MeasurementDB) selectAllFromLookupHTTPSSvcUnlocked() (out []*DNSLookupEvent) {
out = append(out, db.lookupHTTPSvcTable...)
return
}

// InsertIntoDNSRoundTrip implements EventDB.InsertIntoDNSRoundTrip.
func (db *MeasurementDB) InsertIntoDNSRoundTrip(ev *DNSRoundTripEvent) {
db.mu.Lock()
db.dnsRoundTripTable = append(db.dnsRoundTripTable, ev)
db.mu.Unlock()
}

// selectAllFromDNSRoundTripUnlocked returns all DNS round trip events.
func (db *MeasurementDB) selectAllFromDNSRoundTripUnlocked() (out []*DNSRoundTripEvent) {
out = append(out, db.dnsRoundTripTable...)
return
}

// InsertIntoHTTPRoundTrip implements EventDB.InsertIntoHTTPRoundTrip.
func (db *MeasurementDB) InsertIntoHTTPRoundTrip(ev *HTTPRoundTripEvent) {
db.mu.Lock()
db.httpRoundTripTable = append(db.httpRoundTripTable, ev)
db.mu.Unlock()
}

// selectAllFromHTTPRoundTripUnlocked returns all HTTP round trip events.
func (db *MeasurementDB) selectAllFromHTTPRoundTripUnlocked() (out []*HTTPRoundTripEvent) {
out = append(out, db.httpRoundTripTable...)
return
}

// InsertIntoHTTPRedirect implements EventDB.InsertIntoHTTPRedirect.
func (db *MeasurementDB) InsertIntoHTTPRedirect(ev *HTTPRedirectEvent) {
db.mu.Lock()
db.httpRedirectTable = append(db.httpRedirectTable, ev)
db.mu.Unlock()
}

// selectAllFromHTTPRedirectUnlocked returns all HTTP redirections.
func (db *MeasurementDB) selectAllFromHTTPRedirectUnlocked() (out []*HTTPRedirectEvent) {
out = append(out, db.httpRedirectTable...)
return
}

// InsertIntoQUICHandshake implements EventDB.InsertIntoQUICHandshake.
func (db *MeasurementDB) InsertIntoQUICHandshake(ev *QUICHandshakeEvent) {
db.mu.Lock()
db.quicHandshakeTable = append(db.quicHandshakeTable, ev)
db.mu.Unlock()
}

// selectAllFromQUICHandshakeUnlocked returns all QUIC handshake events.
func (db *MeasurementDB) selectAllFromQUICHandshakeUnlocked() (out []*QUICHandshakeEvent) {
out = append(out, db.quicHandshakeTable...)
return
}

// AsMeasurement converts the current state of the database into
// a finalized Measurement structure. The original events will remain
// into the database. To start a new measurement cycle, just create
// a new MeasurementDB instance and use that.
func (db *MeasurementDB) AsMeasurement() *Measurement {
db.mu.Lock()
meas := &Measurement{
Connect: db.selectAllFromDialUnlocked(),
ReadWrite: db.selectAllFromReadWriteUnlocked(),
Close: db.selectAllFromCloseUnlocked(),
TLSHandshake: db.selectAllFromTLSHandshakeUnlocked(),
QUICHandshake: db.selectAllFromQUICHandshakeUnlocked(),
LookupHost: db.selectAllFromLookupHostUnlocked(),
LookupHTTPSSvc: db.selectAllFromLookupHTTPSSvcUnlocked(),
DNSRoundTrip: db.selectAllFromDNSRoundTripUnlocked(),
HTTPRoundTrip: db.selectAllFromHTTPRoundTripUnlocked(),
HTTPRedirect: db.selectAllFromHTTPRedirectUnlocked(),
}
db.mu.Unlock()
return meas
}
Loading

0 comments on commit d29a3ad

Please sign in to comment.