sql: add datadriven telemetry tests

This commit adds `sql.TestTelemetry` which uses a datadriven approach to telemetry testing. See the comment for that function for more details. `TestCBOReportUsage` is converted to a datadriven test. Similar tests should be moved over in subsequent changes. Informs cockroachdb#46730. Release note: None
RaduBerinde · Apr 7, 2020 · 0ca3976 · 0ca3976
1 parent 345e787
commit 0ca3976
Show file tree

Hide file tree

Showing 5 changed files with 322 additions and 157 deletions.
diff --git a/pkg/server/updates_test.go b/pkg/server/updates_test.go
@@ -35,7 +35,6 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/testutils"
 	"github.com/cockroachdb/cockroach/pkg/testutils/diagutils"
 	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
-	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
 	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
 	"github.com/kr/pretty"
 	"github.com/pkg/errors"
@@ -207,158 +206,6 @@ func TestUsageQuantization(t *testing.T) {
 	}
 }
 
-func TestCBOReportUsage(t *testing.T) {
-	defer leaktest.AfterTest(t)()
-
-	const elemName = "somestring"
-	ctx := context.TODO()
-
-	r := diagutils.NewServer()
-	defer r.Close()
-
-	st := cluster.MakeTestingClusterSettings()
-
-	url := r.URL()
-	storeSpec := base.DefaultTestStoreSpec
-	storeSpec.Attributes = roachpb.Attributes{Attrs: []string{elemName}}
-	params := base.TestServerArgs{
-		StoreSpecs: []base.StoreSpec{
-			storeSpec,
-			base.DefaultTestStoreSpec,
-		},
-		Settings: st,
-		Locality: roachpb.Locality{
-			Tiers: []roachpb.Tier{
-				{Key: "region", Value: "east"},
-				{Key: "zone", Value: elemName},
-				{Key: "state", Value: "ny"},
-				{Key: "city", Value: "nyc"},
-			},
-		},
-		Knobs: base.TestingKnobs{
-			SQLLeaseManager: &sql.LeaseManagerTestingKnobs{
-				// Disable SELECT called for delete orphaned leases to keep
-				// query stats stable.
-				DisableDeleteOrphanedLeases: true,
-			},
-			Server: &TestingKnobs{
-				DiagnosticsTestingKnobs: diagnosticspb.TestingKnobs{
-					OverrideReportingURL: &url,
-				},
-			},
-		},
-	}
-
-	s, db, _ := serverutils.StartServer(t, params)
-	// Stopper will wait for the update/report loop to finish too.
-	defer s.Stopper().Stop(context.TODO())
-	defer db.Close()
-	ts := s.(*TestServer)
-
-	// Disable periodic reporting so it doesn't interfere with the test.
-	if _, err := db.Exec(`SET CLUSTER SETTING diagnostics.reporting.enabled = false`); err != nil {
-		t.Fatal(err)
-	}
-
-	// make sure the test's generated activity is the only activity we measure.
-	telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ResetCounts)
-
-	sqlDB := sqlutils.MakeSQLRunner(db)
-
-	sqlDB.Exec(t, fmt.Sprintf(`CREATE DATABASE %s`, elemName))
-	sqlDB.Exec(t, `CREATE TABLE x (a INT PRIMARY KEY)`)
-	sqlDB.Exec(t, `CREATE STATISTICS stats FROM x`)
-	// Run a variety of hinted queries.
-	sqlDB.Exec(t, `SELECT * FROM x@primary`)
-	sqlDB.Exec(t, `EXPLAIN SELECT * FROM x`)
-	sqlDB.Exec(t, `EXPLAIN (distsql) SELECT * FROM x`)
-	sqlDB.Exec(t, `EXPLAIN ANALYZE SELECT * FROM x`)
-	sqlDB.Exec(t, `EXPLAIN ANALYZE (DEBUG) SELECT * FROM x`)
-	sqlDB.Exec(t, `EXPLAIN (opt) SELECT * FROM x`)
-	sqlDB.Exec(t, `EXPLAIN (opt, verbose) SELECT * FROM x`)
-	// Do joins different numbers of times to disambiguate them in the expected,
-	// output but make sure the query strings are different each time so that the
-	// plan cache doesn't affect our results.
-	sqlDB.Exec(
-		t,
-		`SELECT x FROM (VALUES (1)) AS a(x) INNER HASH JOIN (VALUES (1)) AS b(y) ON x = y`,
-	)
-	for i := 0; i < 2; i++ {
-		sqlDB.Exec(
-			t,
-			fmt.Sprintf(
-				`SELECT x FROM (VALUES (%d)) AS a(x) INNER MERGE JOIN (VALUES (1)) AS b(y) ON x = y`,
-				i,
-			),
-		)
-	}
-	for i := 0; i < 3; i++ {
-		sqlDB.Exec(
-			t,
-			fmt.Sprintf(
-				`SELECT a FROM (VALUES (%d)) AS b(y) INNER LOOKUP JOIN x ON y = a`,
-				i,
-			),
-		)
-	}
-
-	for i := 0; i < 20; i += 3 {
-		sqlDB.Exec(
-			t,
-			fmt.Sprintf(
-				`SET CLUSTER SETTING sql.defaults.reorder_joins_limit = %d`,
-				i,
-			),
-		)
-	}
-
-	sqlDB.Exec(
-		t,
-		`SET CLUSTER SETTING sql.stats.automatic_collection.enabled = on`,
-	)
-
-	sqlDB.Exec(
-		t,
-		`SET CLUSTER SETTING sql.stats.automatic_collection.enabled = off`,
-	)
-
-	if _, err := db.Exec(`RESET CLUSTER SETTING sql.stats.automatic_collection.enabled`); err != nil {
-		t.Fatal(err)
-	}
-
-	ts.ReportDiagnostics(ctx)
-
-	expectedFeatureUsage := map[string]int32{
-		"sql.plan.hints.hash-join":              1,
-		"sql.plan.hints.merge-join":             2,
-		"sql.plan.hints.lookup-join":            3,
-		"sql.plan.hints.index":                  1,
-		"sql.plan.reorder-joins.set-limit-0":    1,
-		"sql.plan.reorder-joins.set-limit-3":    1,
-		"sql.plan.reorder-joins.set-limit-6":    1,
-		"sql.plan.reorder-joins.set-limit-9":    1,
-		"sql.plan.reorder-joins.set-limit-more": 3,
-		"sql.plan.automatic-stats.enabled":      2,
-		"sql.plan.automatic-stats.disabled":     1,
-		"sql.plan.stats.created":                1,
-		"sql.plan.explain":                      1,
-		"sql.plan.explain-analyze":              1,
-		"sql.plan.explain-analyze-debug":        1,
-		"sql.plan.explain-opt":                  1,
-		"sql.plan.explain-opt-verbose":          1,
-		"sql.plan.explain-distsql":              1,
-	}
-
-	last := r.LastRequestData()
-	for key, expected := range expectedFeatureUsage {
-		if got, ok := last.FeatureUsage[key]; !ok {
-			t.Fatalf("expected report of feature %q", key)
-		} else if got != expected {
-			t.Fatalf("expected reported value of feature %q to be %d not %d", key, expected, got)
-		}
-	}
-}
-
 func TestReportUsage(t *testing.T) {
 	defer leaktest.AfterTest(t)()
 

diff --git a/pkg/sql/telemetry_test.go b/pkg/sql/telemetry_test.go
@@ -0,0 +1,156 @@
+// Copyright 2020 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package sql_test
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"sort"
+	"strings"
+	"testing"
+	"text/tabwriter"
+
+	"github.com/cockroachdb/cockroach/pkg/base"
+	"github.com/cockroachdb/cockroach/pkg/server"
+	"github.com/cockroachdb/cockroach/pkg/server/diagnosticspb"
+	"github.com/cockroachdb/cockroach/pkg/testutils/diagutils"
+	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
+	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
+	"github.com/cockroachdb/cockroach/pkg/util/cloudinfo"
+	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
+	"github.com/cockroachdb/datadriven"
+	"github.com/cockroachdb/errors"
+)
+
+// TestTelemetry runs the datadriven telemetry tests. The test sets up a
+// database and a testing diagnostics reporting server. The test implements the
+// following data-driven commands:
+//
+//  - exec
+//
+//    Executes SQL statements against the database. Outputs no results on
+//    success. In case of error, outputs the error message.
+//
+//  - feature-whitelist
+//
+//    The input for this command is not SQL, but just a list of counter values.
+//    Tests that follow (until the next feature-whitelist command) will only
+//    output counters in this white list.
+//
+//  - feature-usage, feature-counters
+//
+//    Executes SQL statements and then outputs the feature counters from the
+//    white list that have been reported to the diagnostic server. The first
+//    variant outputs only the names of the counters that changed; the second
+//    variant outputs the counts as well. It is necessary to use
+//    feature-whitelist before these commands to avoid test flakes (e.g. because
+//    of counters that are changed by looking up descriptors)
+//
+func TestTelemetry(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+
+	ctx := context.Background()
+	// Note: these tests cannot be run in parallel (with each other or with other
+	// tests) because telemetry counters are global.
+	datadriven.Walk(t, "testdata/telemetry", func(t *testing.T, path string) {
+		// Disable cloud info reporting (it would make these tests really slow).
+		defer cloudinfo.Disable()()
+
+		diagSrv := diagutils.NewServer()
+		defer diagSrv.Close()
+
+		diagSrvURL := diagSrv.URL()
+		params := base.TestServerArgs{
+			Knobs: base.TestingKnobs{
+				Server: &server.TestingKnobs{
+					DiagnosticsTestingKnobs: diagnosticspb.TestingKnobs{
+						OverrideReportingURL: &diagSrvURL,
+					},
+				},
+			},
+		}
+		s, sqlConn, _ := serverutils.StartServer(t, params)
+		defer s.Stopper().Stop(ctx)
+
+		runner := sqlutils.MakeSQLRunner(sqlConn)
+		// Disable automatic reporting so it doesn't interfere with the test.
+		runner.Exec(t, "SET CLUSTER SETTING diagnostics.reporting.enabled = false")
+		runner.Exec(t, "SET CLUSTER SETTING diagnostics.reporting.send_crash_reports = false")
+		// Disable plan caching to get accurate counts if the same statement is
+		// issued multiple times.
+		runner.Exec(t, "SET CLUSTER SETTING sql.query_cache.enabled = false")
+
+		var featureKeyWhitelist map[string]struct{}
+		datadriven.RunTest(t, path, func(t *testing.T, td *datadriven.TestData) string {
+			switch td.Cmd {
+			case "exec":
+				_, err := sqlConn.Exec(td.Input)
+				if err != nil {
+					if errors.HasAssertionFailure(err) {
+						td.Fatalf(t, "%+v", err)
+					}
+					return fmt.Sprintf("error: %v\n", err)
+				}
+				return ""
+
+			case "feature-whitelist":
+				featureKeyWhitelist = make(map[string]struct{})
+				for _, k := range strings.Split(td.Input, "\n") {
+					featureKeyWhitelist[k] = struct{}{}
+				}
+				return ""
+
+			case "feature-usage", "feature-counters":
+				// Report diagnostics once to reset the counters.
+				s.ReportDiagnostics(ctx)
+				_, err := sqlConn.Exec(td.Input)
+				var buf bytes.Buffer
+				if err != nil {
+					fmt.Fprintf(&buf, "error: %v\n", err)
+				}
+				s.ReportDiagnostics(ctx)
+				last := diagSrv.LastRequestData()
+				usage := last.FeatureUsage
+				keys := make([]string, 0, len(usage))
+				for k, v := range usage {
+					if v == 0 {
+						// Ignore zero values (shouldn't happen in practice)
+						continue
+					}
+					if featureKeyWhitelist != nil {
+						if _, ok := featureKeyWhitelist[k]; !ok {
+							// Feature key not in whitelist.
+							continue
+						}
+					}
+					keys = append(keys, k)
+				}
+				sort.Strings(keys)
+				tw := tabwriter.NewWriter(&buf, 2, 1, 2, ' ', 0)
+				for _, k := range keys {
+					// Report either just the key or the key and the count.
+					if td.Cmd == "feature-counters" {
+						fmt.Fprintf(tw, "%s\t%d\n", k, usage[k])
+					} else {
+						fmt.Fprintf(tw, "%s\n", k)
+					}
+				}
+				_ = tw.Flush()
+				return buf.String()
+
+			default:
+				td.Fatalf(t, "unknown command %s", td.Cmd)
+				return ""
+			}
+		})
+	})
+}