From 73b40cc68c6ecf6b48b5ba4d0e6d7d57a2dfe857 Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Sun, 5 Apr 2020 19:45:32 -0700 Subject: [PATCH] sql: add datadriven telemetry tests This commit adds `sql.TestTelemetry` which uses a datadriven approach to telemetry testing. See the comment for that function for more details. `TestCBOReportUsage` is converted to a datadriven test. Similar tests should be moved over in subsequent changes. Informs #46730. Release note: None --- pkg/server/updates_test.go | 153 ----------------- pkg/sql/telemetry_test.go | 156 ++++++++++++++++++ pkg/sql/testdata/telemetry/planning | 143 ++++++++++++++++ pkg/testutils/serverutils/test_server_shim.go | 14 +- pkg/util/cloudinfo/cloudinfo.go | 13 ++ 5 files changed, 322 insertions(+), 157 deletions(-) create mode 100644 pkg/sql/telemetry_test.go create mode 100644 pkg/sql/testdata/telemetry/planning diff --git a/pkg/server/updates_test.go b/pkg/server/updates_test.go index 8942a4d03208..b0bf82082da2 100644 --- a/pkg/server/updates_test.go +++ b/pkg/server/updates_test.go @@ -35,7 +35,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/diagutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" - "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/kr/pretty" "github.com/pkg/errors" @@ -207,158 +206,6 @@ func TestUsageQuantization(t *testing.T) { } } -func TestCBOReportUsage(t *testing.T) { - defer leaktest.AfterTest(t)() - - const elemName = "somestring" - ctx := context.TODO() - - r := diagutils.NewServer() - defer r.Close() - - st := cluster.MakeTestingClusterSettings() - - url := r.URL() - storeSpec := base.DefaultTestStoreSpec - storeSpec.Attributes = roachpb.Attributes{Attrs: []string{elemName}} - params := base.TestServerArgs{ - StoreSpecs: []base.StoreSpec{ - storeSpec, - base.DefaultTestStoreSpec, - }, - Settings: st, - Locality: roachpb.Locality{ - Tiers: []roachpb.Tier{ - {Key: "region", Value: "east"}, - {Key: "zone", Value: elemName}, - {Key: "state", Value: "ny"}, - {Key: "city", Value: "nyc"}, - }, - }, - Knobs: base.TestingKnobs{ - SQLLeaseManager: &sql.LeaseManagerTestingKnobs{ - // Disable SELECT called for delete orphaned leases to keep - // query stats stable. - DisableDeleteOrphanedLeases: true, - }, - Server: &TestingKnobs{ - DiagnosticsTestingKnobs: diagnosticspb.TestingKnobs{ - OverrideReportingURL: &url, - }, - }, - }, - } - - s, db, _ := serverutils.StartServer(t, params) - // Stopper will wait for the update/report loop to finish too. - defer s.Stopper().Stop(context.TODO()) - defer db.Close() - ts := s.(*TestServer) - - // Disable periodic reporting so it doesn't interfere with the test. - if _, err := db.Exec(`SET CLUSTER SETTING diagnostics.reporting.enabled = false`); err != nil { - t.Fatal(err) - } - - // make sure the test's generated activity is the only activity we measure. - telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ResetCounts) - - sqlDB := sqlutils.MakeSQLRunner(db) - - sqlDB.Exec(t, fmt.Sprintf(`CREATE DATABASE %s`, elemName)) - sqlDB.Exec(t, `CREATE TABLE x (a INT PRIMARY KEY)`) - sqlDB.Exec(t, `CREATE STATISTICS stats FROM x`) - // Run a variety of hinted queries. - sqlDB.Exec(t, `SELECT * FROM x@primary`) - sqlDB.Exec(t, `EXPLAIN SELECT * FROM x`) - sqlDB.Exec(t, `EXPLAIN (distsql) SELECT * FROM x`) - sqlDB.Exec(t, `EXPLAIN ANALYZE SELECT * FROM x`) - sqlDB.Exec(t, `EXPLAIN ANALYZE (DEBUG) SELECT * FROM x`) - sqlDB.Exec(t, `EXPLAIN (opt) SELECT * FROM x`) - sqlDB.Exec(t, `EXPLAIN (opt, verbose) SELECT * FROM x`) - // Do joins different numbers of times to disambiguate them in the expected, - // output but make sure the query strings are different each time so that the - // plan cache doesn't affect our results. - sqlDB.Exec( - t, - `SELECT x FROM (VALUES (1)) AS a(x) INNER HASH JOIN (VALUES (1)) AS b(y) ON x = y`, - ) - for i := 0; i < 2; i++ { - sqlDB.Exec( - t, - fmt.Sprintf( - `SELECT x FROM (VALUES (%d)) AS a(x) INNER MERGE JOIN (VALUES (1)) AS b(y) ON x = y`, - i, - ), - ) - } - for i := 0; i < 3; i++ { - sqlDB.Exec( - t, - fmt.Sprintf( - `SELECT a FROM (VALUES (%d)) AS b(y) INNER LOOKUP JOIN x ON y = a`, - i, - ), - ) - } - - for i := 0; i < 20; i += 3 { - sqlDB.Exec( - t, - fmt.Sprintf( - `SET CLUSTER SETTING sql.defaults.reorder_joins_limit = %d`, - i, - ), - ) - } - - sqlDB.Exec( - t, - `SET CLUSTER SETTING sql.stats.automatic_collection.enabled = on`, - ) - - sqlDB.Exec( - t, - `SET CLUSTER SETTING sql.stats.automatic_collection.enabled = off`, - ) - - if _, err := db.Exec(`RESET CLUSTER SETTING sql.stats.automatic_collection.enabled`); err != nil { - t.Fatal(err) - } - - ts.ReportDiagnostics(ctx) - - expectedFeatureUsage := map[string]int32{ - "sql.plan.hints.hash-join": 1, - "sql.plan.hints.merge-join": 2, - "sql.plan.hints.lookup-join": 3, - "sql.plan.hints.index": 1, - "sql.plan.reorder-joins.set-limit-0": 1, - "sql.plan.reorder-joins.set-limit-3": 1, - "sql.plan.reorder-joins.set-limit-6": 1, - "sql.plan.reorder-joins.set-limit-9": 1, - "sql.plan.reorder-joins.set-limit-more": 3, - "sql.plan.automatic-stats.enabled": 2, - "sql.plan.automatic-stats.disabled": 1, - "sql.plan.stats.created": 1, - "sql.plan.explain": 1, - "sql.plan.explain-analyze": 1, - "sql.plan.explain-analyze-debug": 1, - "sql.plan.explain-opt": 1, - "sql.plan.explain-opt-verbose": 1, - "sql.plan.explain-distsql": 1, - } - - last := r.LastRequestData() - for key, expected := range expectedFeatureUsage { - if got, ok := last.FeatureUsage[key]; !ok { - t.Fatalf("expected report of feature %q", key) - } else if got != expected { - t.Fatalf("expected reported value of feature %q to be %d not %d", key, expected, got) - } - } -} - func TestReportUsage(t *testing.T) { defer leaktest.AfterTest(t)() diff --git a/pkg/sql/telemetry_test.go b/pkg/sql/telemetry_test.go new file mode 100644 index 000000000000..4978dc3d6eda --- /dev/null +++ b/pkg/sql/telemetry_test.go @@ -0,0 +1,156 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package sql_test + +import ( + "bytes" + "context" + "fmt" + "sort" + "strings" + "testing" + "text/tabwriter" + + "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/server" + "github.com/cockroachdb/cockroach/pkg/server/diagnosticspb" + "github.com/cockroachdb/cockroach/pkg/testutils/diagutils" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" + "github.com/cockroachdb/cockroach/pkg/util/cloudinfo" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" +) + +// TestTelemetry runs the datadriven telemetry tests. The test sets up a +// database and a testing diagnostics reporting server. The test implements the +// following data-driven commands: +// +// - exec +// +// Executes SQL statements against the database. Outputs no results on +// success. In case of error, outputs the error message. +// +// - feature-whitelist +// +// The input for this command is not SQL, but just a list of counter values. +// Tests that follow (until the next feature-whitelist command) will only +// output counters in this white list. +// +// - feature-usage, feature-counters +// +// Executes SQL statements and then outputs the feature counters from the +// white list that have been reported to the diagnostic server. The first +// variant outputs only the names of the counters that changed; the second +// variant outputs the counts as well. It is necessary to use +// feature-whitelist before these commands to avoid test flakes (e.g. because +// of counters that are changed by looking up descriptors) +// +func TestTelemetry(t *testing.T) { + defer leaktest.AfterTest(t)() + + ctx := context.Background() + // Note: these tests cannot be run in parallel (with each other or with other + // tests) because telemetry counters are global. + datadriven.Walk(t, "testdata/telemetry", func(t *testing.T, path string) { + // Disable cloud info reporting (it would make these tests really slow). + defer cloudinfo.Disable()() + + diagSrv := diagutils.NewServer() + defer diagSrv.Close() + + diagSrvURL := diagSrv.URL() + params := base.TestServerArgs{ + Knobs: base.TestingKnobs{ + Server: &server.TestingKnobs{ + DiagnosticsTestingKnobs: diagnosticspb.TestingKnobs{ + OverrideReportingURL: &diagSrvURL, + }, + }, + }, + } + s, sqlConn, _ := serverutils.StartServer(t, params) + defer s.Stopper().Stop(ctx) + + runner := sqlutils.MakeSQLRunner(sqlConn) + // Disable automatic reporting so it doesn't interfere with the test. + runner.Exec(t, "SET CLUSTER SETTING diagnostics.reporting.enabled = false") + runner.Exec(t, "SET CLUSTER SETTING diagnostics.reporting.send_crash_reports = false") + // Disable plan caching to get accurate counts if the same statement is + // issued multiple times. + runner.Exec(t, "SET CLUSTER SETTING sql.query_cache.enabled = false") + + var featureKeyWhitelist map[string]struct{} + datadriven.RunTest(t, path, func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "exec": + _, err := sqlConn.Exec(td.Input) + if err != nil { + if errors.HasAssertionFailure(err) { + td.Fatalf(t, "%+v", err) + } + return fmt.Sprintf("error: %v\n", err) + } + return "" + + case "feature-whitelist": + featureKeyWhitelist = make(map[string]struct{}) + for _, k := range strings.Split(td.Input, "\n") { + featureKeyWhitelist[k] = struct{}{} + } + return "" + + case "feature-usage", "feature-counters": + // Report diagnostics once to reset the counters. + s.ReportDiagnostics(ctx) + _, err := sqlConn.Exec(td.Input) + var buf bytes.Buffer + if err != nil { + fmt.Fprintf(&buf, "error: %v\n", err) + } + s.ReportDiagnostics(ctx) + last := diagSrv.LastRequestData() + usage := last.FeatureUsage + keys := make([]string, 0, len(usage)) + for k, v := range usage { + if v == 0 { + // Ignore zero values (shouldn't happen in practice) + continue + } + if featureKeyWhitelist != nil { + if _, ok := featureKeyWhitelist[k]; !ok { + // Feature key not in whitelist. + continue + } + } + keys = append(keys, k) + } + sort.Strings(keys) + tw := tabwriter.NewWriter(&buf, 2, 1, 2, ' ', 0) + for _, k := range keys { + // Report either just the key or the key and the count. + if td.Cmd == "feature-counters" { + fmt.Fprintf(tw, "%s\t%d\n", k, usage[k]) + } else { + fmt.Fprintf(tw, "%s\n", k) + } + } + _ = tw.Flush() + return buf.String() + + default: + td.Fatalf(t, "unknown command %s", td.Cmd) + return "" + } + }) + }) +} diff --git a/pkg/sql/testdata/telemetry/planning b/pkg/sql/testdata/telemetry/planning new file mode 100644 index 000000000000..e15e2dc5b283 --- /dev/null +++ b/pkg/sql/testdata/telemetry/planning @@ -0,0 +1,143 @@ +exec +CREATE TABLE x (a INT PRIMARY KEY) +---- + +# Tests for EXPLAIN counters. + +feature-whitelist +sql.plan.explain +sql.plan.explain-analyze +sql.plan.explain-opt +sql.plan.explain-opt-verbose +sql.plan.explain-distsql +---- + +feature-usage +EXPLAIN SELECT * FROM x +---- +sql.plan.explain + +feature-usage +EXPLAIN (DISTSQL) SELECT * FROM x +---- +sql.plan.explain-distsql + +feature-usage +EXPLAIN ANALYZE SELECT * FROM x +---- +sql.plan.explain-analyze + +feature-usage +EXPLAIN ANALYZE (DEBUG) SELECT * FROM x +---- + +feature-usage +EXPLAIN (OPT) SELECT * FROM x +---- +sql.plan.explain-opt + +feature-usage +EXPLAIN (OPT, VERBOSE) SELECT * FROM x +---- +sql.plan.explain-opt-verbose + +# Tests for hints. + +feature-whitelist +sql.plan.hints.hash-join +sql.plan.hints.merge-join +sql.plan.hints.lookup-join +sql.plan.hints.index +sql.plan.hints.index.select +sql.plan.hints.index.update +sql.plan.hints.index.delete +---- + +feature-usage +SELECT x FROM (VALUES (1)) AS a(x) INNER HASH JOIN (VALUES (1)) AS b(y) ON x = y +---- +sql.plan.hints.hash-join + +feature-usage +SELECT x FROM (VALUES (1)) AS a(x) INNER MERGE JOIN (VALUES (1)) AS b(y) ON x = y +---- +sql.plan.hints.merge-join + +feature-usage +SELECT a FROM (VALUES (1)) AS b(y) INNER LOOKUP JOIN x ON y = a +---- +sql.plan.hints.lookup-join + +feature-usage +SELECT * FROM x@primary +---- +sql.plan.hints.index +sql.plan.hints.index.select + +feature-usage +UPDATE x@primary SET a=1 WHERE a>1 +---- +sql.plan.hints.index +sql.plan.hints.index.update + +feature-usage +DELETE FROM x@primary WHERE a>1 +---- +sql.plan.hints.index +sql.plan.hints.index.delete + +# Tests for tracking important setting changes. + +feature-whitelist +sql.plan.reorder-joins.set-limit-0 +sql.plan.reorder-joins.set-limit-3 +sql.plan.reorder-joins.set-limit-6 +sql.plan.reorder-joins.set-limit-more +sql.plan.automatic-stats.enabled +sql.plan.automatic-stats.disabled +---- + +feature-usage +SET CLUSTER SETTING sql.defaults.reorder_joins_limit = 0 +---- +sql.plan.reorder-joins.set-limit-0 + +feature-usage +SET CLUSTER SETTING sql.defaults.reorder_joins_limit = 3 +---- +sql.plan.reorder-joins.set-limit-3 + +feature-usage +SET CLUSTER SETTING sql.defaults.reorder_joins_limit = 6 +---- +sql.plan.reorder-joins.set-limit-6 + +feature-usage +SET CLUSTER SETTING sql.defaults.reorder_joins_limit = 20 +---- +sql.plan.reorder-joins.set-limit-more + +feature-usage +SET CLUSTER SETTING sql.stats.automatic_collection.enabled = on +---- +sql.plan.automatic-stats.enabled + +feature-usage +SET CLUSTER SETTING sql.stats.automatic_collection.enabled = off +---- +sql.plan.automatic-stats.disabled + +feature-usage +RESET CLUSTER SETTING sql.stats.automatic_collection.enabled +---- +sql.plan.automatic-stats.enabled + +# Test telemetry for manual statistics creation. +feature-whitelist +sql.plan.stats.created +---- + +feature-usage +CREATE STATISTICS stats FROM x +---- +sql.plan.stats.created diff --git a/pkg/testutils/serverutils/test_server_shim.go b/pkg/testutils/serverutils/test_server_shim.go index 70ee2dac0f68..4fabed433a0e 100644 --- a/pkg/testutils/serverutils/test_server_shim.go +++ b/pkg/testutils/serverutils/test_server_shim.go @@ -179,14 +179,20 @@ type TestServerInterface interface { // CheckForUpdates phones home to check for updates and report usage. // - // If using this, consider setting DiagnosticsReportingEnabled to false so the - // periodic check doesn't interfere with the test. + // When using this for testing, consider setting DiagnosticsReportingEnabled + // to false so the periodic check doesn't interfere with the test. + // + // This can be slow because of cloud detection; use cloudinfo.Disable() in + // tests to avoid that. CheckForUpdates(ctx context.Context) // ReportDiagnostics phones home to report diagnostics. // - // If using this, consider setting DiagnosticsReportingEnabled to false so the - // periodic reporting doesn't interfere with the test. + // If using this for testing, consider setting DiagnosticsReportingEnabled to + // false so the periodic reporting doesn't interfere with the test. + // + // This can be slow because of cloud detection; use cloudinfo.Disable() in + // tests to avoid that. ReportDiagnostics(ctx context.Context) } diff --git a/pkg/util/cloudinfo/cloudinfo.go b/pkg/util/cloudinfo/cloudinfo.go index c3b72886473c..16b3f8e26253 100644 --- a/pkg/util/cloudinfo/cloudinfo.go +++ b/pkg/util/cloudinfo/cloudinfo.go @@ -31,6 +31,15 @@ const ( region = "region" ) +var enabled = true + +// Disable disables cloud detection until the returned function is called. +// Used for tests that trigger diagnostics updates. +func Disable() (restore func()) { + enabled = false + return func() { enabled = true } +} + // client is necessary to provide a struct for mocking http requests // in testing. type client struct { @@ -181,6 +190,10 @@ func (cli *client) getInstanceMetadata( // the node is running on, as well as the value of the requested metadata // element. func getCloudInfo(ctx context.Context, metadataElement string) (provider string, element string) { + if !enabled { + return "", "" + } + const timeout = 500 * time.Millisecond cli := client{httputil.NewClientWithTimeout(timeout)}