ooni · bassosimone · Jan 19, 2024 · Jan 16, 2024 · Jan 16, 2024 · Jan 16, 2024
@@ -1,5 +1,12 @@
 package webconnectivitylte
 
+//
+// The "classic" analysis engine.
+//
+// We try to emulate results produced by v0.4 of Web Connectivity and
+// also attempt to provide a more fine-grained view of the results.
+//
+
 import (
 	"github.com/ooni/probe-cli/v3/internal/minipipeline"
 	"github.com/ooni/probe-cli/v3/internal/model"
@@ -8,7 +15,8 @@ import (
 )
 
 // AnalysisEngineClassic is an alternative analysis engine that aims to produce
-// results that are backward compatible with Web Connectivity v0.4.
+// results that are backward compatible with Web Connectivity v0.4 while also
+// procuding more fine-grained blocking flags.
 func AnalysisEngineClassic(tk *TestKeys, logger model.Logger) {
 	tk.analysisClassic(logger)
 }
@@ -31,26 +39,29 @@ func (tk *TestKeys) analysisClassic(logger model.Logger) {
 		runtimex.Try0(container.IngestControlMessages(tk.ControlRequest, tk.Control))
 	}
 
-	// 2. filter observations to only include results collected by the
+	// 2. compute extended analysis flags
+	analysisExtMain(tk, container)
+
+	// 3. filter observations to only include results collected by the
 	// system resolver, which approximates v0.4's results
 	classic := minipipeline.ClassicFilter(container)
 
 	// 3. produce a web observations analysis based on the web observations
 	woa := minipipeline.AnalyzeWebObservationsWithLinearAnalysis(classic)
 
-	// 4. determine the DNS consistency
+	// 5. determine the DNS consistency
 	tk.DNSConsistency = analysisClassicDNSConsistency(woa)
 
-	// 5. set DNSExperimentFailure
+	// 6. set DNSExperimentFailure
 	if !woa.DNSExperimentFailure.IsNone() && woa.DNSExperimentFailure.Unwrap() != "" {
 		value := woa.DNSExperimentFailure.Unwrap()
 		tk.DNSExperimentFailure = &value
 	}
 
-	// 6. compute the HTTPDiff values
+	// 7. compute the HTTPDiff values
 	tk.setHTTPDiffValues(woa)
 
-	// 7. compute blocking & accessible
+	// 8. compute blocking & accessible
 	analysisClassicComputeBlockingAccessible(woa, tk)
 }
 
@@ -72,6 +83,7 @@ func analysisClassicDNSConsistency(woa *minipipeline.WebAnalysis) optional.Value
 }
 
 func (tk *TestKeys) setHTTPDiffValues(woa *minipipeline.WebAnalysis) {
+	// TODO(bassosimone): this code should use [newAnalysisHTTPDiffStatus].
 	const bodyProportionFactor = 0.7
 	if !woa.HTTPFinalResponseDiffBodyProportionFactor.IsNone() {
 		tk.BodyProportion = woa.HTTPFinalResponseDiffBodyProportionFactor.Unwrap()
@@ -116,6 +128,7 @@ var _ analysisClassicTestKeysProxy = &TestKeys{}
 
 // httpDiff implements analysisClassicTestKeysProxy.
 func (tk *TestKeys) httpDiff() bool {
+	// TODO(bassosimone): this code should use [newAnalysisHTTPDiffStatus].
 	if tk.StatusCodeMatch != nil && *tk.StatusCodeMatch {
 		if tk.BodyLengthMatch != nil && *tk.BodyLengthMatch {
 			return false

@@ -0,0 +1,246 @@
+package webconnectivitylte
+
+//
+// The extended ("ext") analysis engine.
+//
+// We analyze all the produced observations without limiting ourselves to
+// analyzing observations rooted into getaddrinfo lookups.
+//
+
+import (
+	"fmt"
+	"io"
+	"strings"
+
+	"github.com/ooni/probe-cli/v3/internal/minipipeline"
+)
+
+// analysisExtMain computes the extended analysis.
+//
+// This function MUTATES the [*TestKeys].
+func analysisExtMain(tk *TestKeys, container *minipipeline.WebObservationsContainer) {
+	// compute the web analysis
+	analysis := minipipeline.AnalyzeWebObservationsWithoutLinearAnalysis(container)
+
+	// prepare for emitting informational messages
+	var info strings.Builder
+
+	// DNS & address analysis matching with control info (i.e., analysis
+	// of what happened during the 0th redirect)
+	analysisExtDNS(tk, analysis, &info)
+
+	// endpoint (TCP, TLS, HTTP) failure analysis matching with control info (i.e., analysis
+	// of what happened during the 0th redirect)
+	analysisExtEndpointFailure(tk, analysis, &info)
+
+	// error occurring during redirects (which we can possibly explain if the control
+	// succeeded in getting a webpage from the target server)
+	analysisExtRedirectErrors(tk, analysis, &info)
+
+	// HTTP success analysis (i.e., only if we manage to get an HTTP response)
+	analysisExtHTTPFinalResponse(tk, analysis, &info)
+
+	// TODO(bassosimone): we need to also compute the null-null flags here
+
+	// print the content of the analysis only if there's some content to print
+	if content := info.String(); content != "" {
+		fmt.Printf("\n")
+		fmt.Printf("Extended Analysis\n")
+		fmt.Printf("-----------------\n")
+		fmt.Printf("%s", content)
+		fmt.Printf("\n\n")
+	}
+}
+
+func analysisExtDNS(tk *TestKeys, analysis *minipipeline.WebAnalysis, info io.Writer) {
+	// note: here we want to match all the possible conditions because
+	// we're processing N >= 1 DNS lookups.
+
+	if analysis.DNSLookupSuccessWithBogonAddresses.Len() > 0 {
+		tk.BlockingFlags |= AnalysisBlockingFlagDNSBlocking
+		tk.DNSFlags |= AnalysisFlagDNSBogon
+		fmt.Fprintf(
+			info, "- transactions with bogon IP addresses: %s\n",
+			analysis.DNSLookupSuccessWithBogonAddresses.String(),
+		)
+	}
+
+	if analysis.DNSLookupUnexpectedFailure.Len() > 0 {
+		tk.BlockingFlags |= AnalysisBlockingFlagDNSBlocking
+		tk.DNSFlags |= AnalysisDNSFlagUnexpectedFailure
+		fmt.Fprintf(
+			info, "- transactions with unexpected DNS lookup failures: %s\n",
+			analysis.DNSLookupUnexpectedFailure.String(),
+		)
+	}
+
+	if analysis.DNSLookupSuccessWithInvalidAddresses.Len() > 0 {
+		tk.BlockingFlags |= AnalysisBlockingFlagDNSBlocking
+		tk.DNSFlags |= AnalysisDNSFlagUnexpectedAddrs
+		fmt.Fprintf(
+			info, "- transactions with invalid IP addrs: %s\n",
+			analysis.DNSLookupSuccessWithInvalidAddresses.String(),
+		)
+	}
+}
+
+func analysisExtEndpointFailure(tk *TestKeys, analysis *minipipeline.WebAnalysis, info io.Writer) {
+	// note: here we want to match all the possible conditions because
+	// we're processing N >= 1 endpoint measurements (with the exception
+	// of HTTP but it makes sense to also process HTTP failures here).
+	//
+	// also note that the definition of "unexpected" implies that we could
+	// use the TH to establish some expectations.
+
+	// TCP analysis
+	if analysis.TCPConnectUnexpectedFailure.Len() > 0 {
+		tk.BlockingFlags |= AnalysisBlockingFlagTCPIPBlocking
+		fmt.Fprintf(
+			info, "- transactions with unexpected TCP connect failures: %s\n",
+			analysis.TCPConnectUnexpectedFailure.String(),
+		)
+	}
+
+	// TLS analysis
+	if analysis.TLSHandshakeUnexpectedFailure.Len() > 0 {
+		tk.BlockingFlags |= AnalysisBlockingFlagTLSBlocking
+		fmt.Fprintf(
+			info, "- transactions with unexpected TLS handshake failures: %s\n",
+			analysis.TLSHandshakeUnexpectedFailure.String(),
+		)
+	}
+
+	// HTTP failure analysis
+	if analysis.HTTPRoundTripUnexpectedFailure.Len() > 0 {
+		tk.BlockingFlags |= AnalysisBlockingFlagHTTPBlocking
+		fmt.Fprintf(
+			info, "- transactions with unexpected HTTP round trip failures: %s\n",
+			analysis.HTTPRoundTripUnexpectedFailure.String(),
+		)
+	}
+}
+
+func analysisExtHTTPFinalResponse(tk *TestKeys, analysis *minipipeline.WebAnalysis, info io.Writer) {
+	switch {
+	// case #1: HTTP final response without control
+	//
+	// we don't know what to do in this case.
+	case !analysis.HTTPFinalResponseSuccessTCPWithoutControl.IsNone():
+		txID := analysis.HTTPFinalResponseSuccessTCPWithoutControl.Unwrap()
+		fmt.Fprintf(
+			info,
+			"- there is no control information to compare to the final response (transaction: %d)\n",
+			txID,
+		)
+		return
+
+	// case #2: HTTPS final response without control
+	//
+	// this is automatic success.
+	case !analysis.HTTPFinalResponseSuccessTLSWithoutControl.IsNone():
+		txID := analysis.HTTPFinalResponseSuccessTLSWithoutControl.Unwrap()
+		fmt.Fprintf(info, "- the final response (transaction: %d) uses TLS: automatic success\n", txID)
+		tk.BlockingFlags |= AnalysisBlockingFlagSuccess
+		return
+
+	// case #3: HTTPS final response with control
+	//
+	// this is also automatic success.
+	case !analysis.HTTPFinalResponseSuccessTLSWithControl.IsNone():
+		txID := analysis.HTTPFinalResponseSuccessTLSWithControl.Unwrap()
+		fmt.Fprintf(info, "- the final response (transaction: %d) uses TLS: automatic success\n", txID)
+		tk.BlockingFlags |= AnalysisBlockingFlagSuccess
+		return
+
+	// case #4: HTTP final response with control
+	//
+	// we need to run HTTPDiff
+	case !analysis.HTTPFinalResponseSuccessTCPWithControl.IsNone():
+		txID := analysis.HTTPFinalResponseSuccessTCPWithControl.Unwrap()
+		hds := newAnalysisHTTPDiffStatus(analysis)
+		if hds.httpDiff() {
+			tk.BlockingFlags |= AnalysisBlockingFlagHTTPDiff
+			fmt.Fprintf(info, "- the final response (transaction: %d) differs from the control response\n", txID)
+			return
+		}
+		fmt.Fprintf(info, "- the final response (transaction: %d) matches the control response\n", txID)
+		tk.BlockingFlags |= AnalysisBlockingFlagSuccess
+		return
+
+	// case #5: we don't know
+	default:
+		return
+	}
+}
+
+func analysisExtRedirectErrors(tk *TestKeys, analysis *minipipeline.WebAnalysis, info io.Writer) {
+	// Implementation note: we care about cases in which we don't have a final response
+	// to compare to and we have unexplained failures. We define "unexplained failure" a
+	// failure for which there's no corresponding control information. If we have test
+	// helper information telling us that the control server could fetch the final webpage
+	// then we can turn these unexplained errors into explained errors.
+
+	switch {
+	// case #1: there is a successful final response with or without control
+	case !analysis.HTTPFinalResponseSuccessTCPWithoutControl.IsNone():
+		return
+	case !analysis.HTTPFinalResponseSuccessTLSWithoutControl.IsNone():
+		return
+	case !analysis.HTTPFinalResponseSuccessTLSWithControl.IsNone():
+		return
+	case !analysis.HTTPFinalResponseSuccessTCPWithControl.IsNone():
+		return
+
+	// case #2: no final response, which is what we care about
+	default:
+		// fallthrough
+	}
+
+	// we care about cases in which the TH succeeded
+	if analysis.ControlFinalResponseExpectations.IsNone() {
+		return
+	}
+	expect := analysis.ControlFinalResponseExpectations.Unwrap()
+	if expect.Failure.IsNone() {
+		return
+	}
+	if expect.Failure.Unwrap() != "" {
+		return
+	}
+
+	// okay, now we're in business and we can explain what happened
+	//
+	// these cases are NOT MUTUALLY EXCLUSIVE because we may have different
+	// DNS lookups or endpoints failing in different ways here
+	if failures := analysis.DNSLookupUnexplainedFailure; failures.Len() > 0 {
+		tk.BlockingFlags |= AnalysisBlockingFlagDNSBlocking
+		fmt.Fprintf(
+			info, "- transactions with unexplained DNS lookup failures and successful control: %s\n",
+			failures.String(),
+		)
+	}
+
+	if failures := analysis.TCPConnectUnexplainedFailure; failures.Len() > 0 {
+		tk.BlockingFlags |= AnalysisBlockingFlagTCPIPBlocking
+		fmt.Fprintf(
+			info, "- transactions with unexplained TCP connect failures and successful control: %s\n",
+			failures.String(),
+		)
+	}
+
+	if failures := analysis.TLSHandshakeUnexplainedFailure; failures.Len() > 0 {
+		tk.BlockingFlags |= AnalysisBlockingFlagTLSBlocking
+		fmt.Fprintf(
+			info, "- transactions with unexplained TLS handshake failures and successful control: %s\n",
+			failures.String(),
+		)
+	}
+
+	if failures := analysis.HTTPRoundTripUnexplainedFailure; failures.Len() > 0 {
+		tk.BlockingFlags |= AnalysisBlockingFlagHTTPBlocking
+		fmt.Fprintf(
+			info, "- transactions with unexplained HTTP round trip failures and successful control: %s\n",
+			failures.String(),
+		)
+	}
+}
@@ -11,10 +11,71 @@ import (
 
 	"github.com/ooni/probe-cli/v3/internal/experiment/webconnectivity"
 	"github.com/ooni/probe-cli/v3/internal/measurexlite"
+	"github.com/ooni/probe-cli/v3/internal/minipipeline"
 	"github.com/ooni/probe-cli/v3/internal/model"
+	"github.com/ooni/probe-cli/v3/internal/optional"
 	"github.com/ooni/probe-cli/v3/internal/runtimex"
 )
 
+// analysisHTTPDiffStatus contains the status relevant to compute HTTP diff.
+type analysisHTTPDiffStatus struct {
+	BodyProportion  optional.Value[float64] `json:"body_proportion"`
+	BodyLengthMatch optional.Value[bool]    `json:"body_length_match"`
+	HeadersMatch    optional.Value[bool]    `json:"headers_match"`
+	StatusCodeMatch optional.Value[bool]    `json:"status_code_match"`
+	TitleMatch      optional.Value[bool]    `json:"title_match"`
+}
+
+// newAnalysisHTTPDiffStatus constructs a new [*analysisHTTPDiffStatus].
+func newAnalysisHTTPDiffStatus(analysis *minipipeline.WebAnalysis) *analysisHTTPDiffStatus {
+	hds := &analysisHTTPDiffStatus{}
+
+	// BodyProportion & BodyLengthMatch
+	const bodyProportionFactor = 0.7
+	if !analysis.HTTPFinalResponseDiffBodyProportionFactor.IsNone() {
+		hds.BodyProportion = analysis.HTTPFinalResponseDiffBodyProportionFactor
+		value := hds.BodyProportion.Unwrap() > bodyProportionFactor
+		hds.BodyLengthMatch = optional.Some(value)
+	}
+
+	// HeadersMatch
+	if !analysis.HTTPFinalResponseDiffUncommonHeadersIntersection.IsNone() {
+		value := len(analysis.HTTPFinalResponseDiffUncommonHeadersIntersection.Unwrap()) > 0
+		hds.HeadersMatch = optional.Some(value)
+	}
+
+	// StatusCodeMatch
+	if !analysis.HTTPFinalResponseDiffStatusCodeMatch.IsNone() {
+		value := analysis.HTTPFinalResponseDiffStatusCodeMatch.Unwrap()
+		hds.StatusCodeMatch = optional.Some(value)
+	}
+
+	// TitleMatch
+	if !analysis.HTTPFinalResponseDiffTitleDifferentLongWords.IsNone() {
+		value := len(analysis.HTTPFinalResponseDiffTitleDifferentLongWords.Unwrap()) <= 0
+		hds.TitleMatch = optional.Some(value)
+	}
+
+	return hds
+}
+
+// httpDiff computes whether there is HTTP diff.
+func (hds *analysisHTTPDiffStatus) httpDiff() bool {
+	if !hds.StatusCodeMatch.IsNone() && hds.StatusCodeMatch.Unwrap() {
+		if !hds.BodyLengthMatch.IsNone() && hds.BodyLengthMatch.Unwrap() {
+			return false
+		}
+		if !hds.HeadersMatch.IsNone() && hds.HeadersMatch.Unwrap() {
+			return false
+		}
+		if !hds.TitleMatch.IsNone() && hds.TitleMatch.Unwrap() {
+			return false
+		}
+		// fallthrough
+	}
+	return true
+}
+
 // analysisHTTPDiff computes the HTTP diff between the final request-response
 // observed by the probe and the TH's result. The caller is responsible of passing
 // us a valid probe observation and a valid TH observation with nil failure.