Merge #89256

89256: sql: output RU estimate for EXPLAIN ANALYZE on tenants r=DrewKimball a=DrewKimball **sql: surface query request units consumed by network egress** This commit adds a top-level field to the output of `EXPLAIN ANALYZE` that shows the estimated number of RUs that would be consumed due to network egress to the client. The estimate is obtained by measuring the in-memory size of the query result, and passing that to the tenant cost config's `PGWireEgressCost` method. **sql: surface query request units consumed due to cpu usage** This commit adds the ability for clients to estimate the number of RUs consumed by a query due to CPU usage. This is accomplished by keeping a moving average of the CPU usage for the entire tenant process, then using that to obtain an estimate for what the CPU usage *would* be if the query wasn't running. This is then compared against the actual measured CPU usage during the query's execution to get the estimate. For local flows this is done at the `connExecutor` level; for remote flows this is handled by the last outbox on the node (which gathers and sends the flow's metadata). The resulting RU estimate is added to the existing estimate from network egress and displayed in the output of `EXPLAIN ANALYZE`. **sql: surface query request units consumed by IO** This commit adds tracking for request units consumed by IO operations for all execution operators that perform KV operations. The corresponding RU count is recorded in the span and later aggregated with the RU consumption due to network egress and CPU usage. The resulting query RU consumption estimate is visible in the output of `EXPLAIN ANALYZE`. **multitenantccl: add sanity testing for ru estimation** This commit adds a sanity test for the RU estimates produced by running queries with `EXPLAIN ANALYZE` on a tenant. The test runs each test query several times with `EXPLAIN ANALYZE`, then runs all test queries without `EXPLAIN ANALYZE` and compares the resulting actual RU measurement to the aggregated estimates. Informs #74441 Release note (sql change): Added an estimate for the number of request units consumed by a query to the output of `EXPLAIN ANALYZE` for tenant sessions. Co-authored-by: Drew Kimball <drewk@cockroachlabs.com>
cockroachdb · Nov 18, 2022 · a035d2e · a035d2e
2 parents 8814521 + f95b046
commit a035d2e
Show file tree

Hide file tree

Showing 37 changed files with 678 additions and 56 deletions.
diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel
@@ -1236,6 +1236,7 @@ GO_TARGETS = [
     "//pkg/kv/kvserver:kvserver_test",
     "//pkg/kv:kv",
     "//pkg/kv:kv_test",
+    "//pkg/multitenant/multitenantcpu:multitenantcpu",
     "//pkg/multitenant/multitenantio:multitenantio",
     "//pkg/multitenant/tenantcostmodel:tenantcostmodel",
     "//pkg/multitenant:multitenant",
@@ -2535,6 +2536,7 @@ GET_X_DATA_TARGETS = [
     "//pkg/kv/kvserver/txnwait:get_x_data",
     "//pkg/kv/kvserver/uncertainty:get_x_data",
     "//pkg/multitenant:get_x_data",
+    "//pkg/multitenant/multitenantcpu:get_x_data",
     "//pkg/multitenant/multitenantio:get_x_data",
     "//pkg/multitenant/tenantcostmodel:get_x_data",
     "//pkg/obs:get_x_data",

diff --git a/pkg/ccl/multitenantccl/tenantcostclient/BUILD.bazel b/pkg/ccl/multitenantccl/tenantcostclient/BUILD.bazel
@@ -26,6 +26,8 @@ go_library(
         "//pkg/util/stop",
         "//pkg/util/syncutil",
         "//pkg/util/timeutil",
+        "//pkg/util/tracing",
+        "//pkg/util/tracing/tracingpb",
         "@com_github_cockroachdb_errors//:errors",
         "@com_github_cockroachdb_errors//errorspb",
     ],
@@ -36,6 +38,7 @@ go_test(
     srcs = [
         "limiter_test.go",
         "main_test.go",
+        "query_ru_estimate_test.go",
         "tenant_side_test.go",
         "token_bucket_test.go",
     ],
@@ -47,6 +50,8 @@ go_test(
         "//pkg/blobs",
         "//pkg/ccl",
         "//pkg/ccl/changefeedccl",
+        "//pkg/ccl/kvccl/kvtenantccl",
+        "//pkg/ccl/multitenantccl/tenantcostserver",
         "//pkg/ccl/utilccl",
         "//pkg/cloud",
         "//pkg/cloud/nodelocal",
@@ -72,12 +77,14 @@ go_test(
         "//pkg/sql/stats",
         "//pkg/testutils",
         "//pkg/testutils/serverutils",
+        "//pkg/testutils/skip",
         "//pkg/testutils/sqlutils",
         "//pkg/testutils/testcluster",
         "//pkg/util/ctxgroup",
         "//pkg/util/ioctx",
         "//pkg/util/leaktest",
         "//pkg/util/log",
+        "//pkg/util/protoutil",
         "//pkg/util/randutil",
         "//pkg/util/stop",
         "//pkg/util/syncutil",

diff --git a/pkg/ccl/multitenantccl/tenantcostclient/query_ru_estimate_test.go b/pkg/ccl/multitenantccl/tenantcostclient/query_ru_estimate_test.go
@@ -0,0 +1,193 @@
+// Copyright 2022 The Cockroach Authors.
+//
+// Licensed as a CockroachDB Enterprise file under the Cockroach Community
+// License (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+//     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
+
+package tenantcostclient_test
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/cockroachdb/cockroach/pkg/base"
+	_ "github.com/cockroachdb/cockroach/pkg/ccl" // ccl init hooks
+	_ "github.com/cockroachdb/cockroach/pkg/ccl/kvccl/kvtenantccl"
+	"github.com/cockroachdb/cockroach/pkg/ccl/multitenantccl/tenantcostclient"
+	_ "github.com/cockroachdb/cockroach/pkg/ccl/multitenantccl/tenantcostserver"
+	"github.com/cockroachdb/cockroach/pkg/roachpb"
+	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
+	"github.com/cockroachdb/cockroach/pkg/sql/stats"
+	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
+	"github.com/cockroachdb/cockroach/pkg/testutils/skip"
+	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
+	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
+	"github.com/cockroachdb/cockroach/pkg/util/log"
+	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
+	"github.com/stretchr/testify/require"
+)
+
+// TestEstimateQueryRUConsumption is a sanity check for the RU estimates
+// produced for queries that are run by a tenant under EXPLAIN ANALYZE. The RU
+// consumption of a query is not deterministic, since it depends on inexact
+// quantities like the (already estimated) CPU usage. Therefore, the test runs
+// each query multiple times and then checks that the total estimated RU
+// consumption is within reasonable distance from the actual measured RUs for
+// the tenant.
+func TestEstimateQueryRUConsumption(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	// This test becomes flaky when the machine/cluster is under significant
+	// background load, so it should only be run manually.
+	skip.IgnoreLint(t, "intended to be manually run as a sanity test")
+
+	ctx := context.Background()
+
+	st := cluster.MakeTestingClusterSettings()
+	stats.AutomaticStatisticsClusterMode.Override(ctx, &st.SV, false)
+	stats.UseStatisticsOnSystemTables.Override(ctx, &st.SV, false)
+	stats.AutomaticStatisticsOnSystemTables.Override(ctx, &st.SV, false)
+
+	// Lower the target duration for reporting tenant usage so that it can be
+	// measured accurately. Avoid decreasing too far, since doing so can add
+	// measurable overhead.
+	tenantcostclient.TargetPeriodSetting.Override(ctx, &st.SV, time.Millisecond*500)
+
+	params := base.TestServerArgs{
+		Settings:                 st,
+		DisableDefaultTestTenant: true,
+	}
+
+	s, mainDB, _ := serverutils.StartServer(t, params)
+	defer s.Stopper().Stop(ctx)
+	sysDB := sqlutils.MakeSQLRunner(mainDB)
+
+	tenantID := serverutils.TestTenantID()
+	tenant1, tenantDB1 := serverutils.StartTenant(t, s, base.TestTenantArgs{
+		TenantID: tenantID,
+		Settings: st,
+	})
+	defer tenant1.Stopper().Stop(ctx)
+	defer tenantDB1.Close()
+	tdb := sqlutils.MakeSQLRunner(tenantDB1)
+	tdb.Exec(t, "SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false")
+	tdb.Exec(t, "CREATE TABLE abcd (a INT, b INT, c INT, d INT, INDEX (a, b, c))")
+
+	type testCase struct {
+		sql   string
+		count int
+	}
+	testCases := []testCase{
+		{ // Insert statement
+			sql:   "INSERT INTO abcd (SELECT t%2, t%3, t, -t FROM generate_series(1,50000) g(t))",
+			count: 1,
+		},
+		{ // Point query
+			sql:   "SELECT a FROM abcd WHERE (a, b) = (1, 1)",
+			count: 10,
+		},
+		{ // Range query
+			sql:   "SELECT a FROM abcd WHERE (a, b) = (1, 1) AND c > 0 AND c < 10000",
+			count: 10,
+		},
+		{ // Aggregate
+			sql:   "SELECT count(*) FROM abcd",
+			count: 10,
+		},
+		{ // Distinct
+			sql:   "SELECT DISTINCT ON (a, b) * FROM abcd",
+			count: 10,
+		},
+		{ // Full table scan
+			sql:   "SELECT a FROM abcd",
+			count: 10,
+		},
+		{ // Lookup join
+			sql:   "SELECT a FROM (VALUES (1, 1), (0, 2)) v(x, y) INNER LOOKUP JOIN abcd ON (a, b) = (x, y)",
+			count: 10,
+		},
+		{ // Index join
+			sql:   "SELECT * FROM abcd WHERE (a, b) = (0, 0)",
+			count: 10,
+		},
+		{ // No kv IO, lots of network egress.
+			sql:   "SELECT 'deadbeef' FROM generate_series(1, 50000)",
+			count: 10,
+		},
+	}
+
+	var err error
+	var tenantEstimatedRUs int
+	for _, tc := range testCases {
+		for i := 0; i < tc.count; i++ {
+			output := tdb.QueryStr(t, "EXPLAIN ANALYZE "+tc.sql)
+			var estimatedRU int
+			for _, row := range output {
+				if len(row) != 1 {
+					t.Fatalf("expected one column")
+				}
+				val := row[0]
+				if strings.Contains(val, "estimated RUs consumed") {
+					substr := strings.Split(val, " ")
+					require.Equalf(t, 4, len(substr), "expected RU consumption message to have four words")
+					ruCountStr := strings.Replace(strings.TrimSpace(substr[3]), ",", "", -1)
+					estimatedRU, err = strconv.Atoi(ruCountStr)
+					require.NoError(t, err, "failed to retrieve estimated RUs")
+					break
+				}
+			}
+			tenantEstimatedRUs += estimatedRU
+		}
+	}
+
+	getTenantRUs := func() float64 {
+		// Sleep to ensure the measured RU consumption gets recorded in the
+		// tenant_usage table.
+		time.Sleep(time.Second)
+		var consumptionBytes []byte
+		var consumption roachpb.TenantConsumption
+		var tenantRUs float64
+		rows := sysDB.Query(t,
+			fmt.Sprintf(
+				"SELECT total_consumption FROM system.tenant_usage WHERE tenant_id = %d AND instance_id = 0",
+				tenantID.ToUint64(),
+			),
+		)
+		for rows.Next() {
+			require.NoError(t, rows.Scan(&consumptionBytes))
+			if len(consumptionBytes) == 0 {
+				continue
+			}
+			require.NoError(t, protoutil.Unmarshal(consumptionBytes, &consumption))
+			tenantRUs += consumption.RU
+		}
+		return tenantRUs
+	}
+	tenantStartRUs := getTenantRUs()
+
+	var tenantMeasuredRUs float64
+	for _, tc := range testCases {
+		for i := 0; i < tc.count; i++ {
+			tdb.QueryStr(t, tc.sql)
+		}
+	}
+
+	// Check the estimated RU aggregate for all the queries against the actual
+	// measured RU consumption for the tenant.
+	tenantMeasuredRUs = getTenantRUs() - tenantStartRUs
+	const deltaFraction = 0.25
+	allowedDelta := tenantMeasuredRUs * deltaFraction
+	require.InDeltaf(t, tenantMeasuredRUs, tenantEstimatedRUs, allowedDelta,
+		"estimated RUs (%d) were not within %f RUs of the expected value (%f)",
+		tenantEstimatedRUs,
+		allowedDelta,
+		tenantMeasuredRUs,
+	)
+}
diff --git a/pkg/ccl/multitenantccl/tenantcostclient/tenant_side.go b/pkg/ccl/multitenantccl/tenantcostclient/tenant_side.go
@@ -25,6 +25,8 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/util/stop"
 	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
 	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
+	"github.com/cockroachdb/cockroach/pkg/util/tracing"
+	"github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
 	"github.com/cockroachdb/errors"
 	"github.com/cockroachdb/errors/errorspb"
 )
@@ -129,6 +131,9 @@ const defaultTickInterval = time.Second
 //	0.5^(1 second / tickInterval)
 const movingAvgRUPerSecFactor = 0.5
 
+// movingAvgCPUPerSecFactor is the weight applied to a new sample of CPU usage.
+const movingAvgCPUPerSecFactor = 0.5
+
 // We request more tokens when the available RUs go below a threshold. The
 // threshold is a fraction of the last granted RUs.
 const notifyFraction = 0.1
@@ -260,6 +265,11 @@ type tenantSideCostController struct {
 		// It is read and written on multiple goroutines and so must be protected
 		// by a mutex.
 		consumption roachpb.TenantConsumption
+
+		// avgCPUPerSec is an exponentially-weighted moving average of the CPU usage
+		// per second; used to estimate the CPU usage of a query. It is only written
+		// in the main loop, but can be read by multiple goroutines so is protected.
+		avgCPUPerSec float64
 	}
 
 	// lowRUNotifyChan is used when the number of available RUs is running low and
@@ -389,23 +399,28 @@ func (c *tenantSideCostController) onTick(ctx context.Context, newTime time.Time
 	// Update CPU consumption.
 	deltaCPU := newExternalUsage.CPUSecs - c.run.externalUsage.CPUSecs
 
-	// Subtract any allowance that we consider free background usage.
 	deltaTime := newTime.Sub(c.run.lastTick)
 	if deltaTime > 0 {
+		// Subtract any allowance that we consider free background usage.
 		allowance := CPUUsageAllowance.Get(&c.settings.SV).Seconds() * deltaTime.Seconds()
 		deltaCPU -= allowance
 
+		avgCPU := deltaCPU / deltaTime.Seconds()
+
+		c.mu.Lock()
 		// If total CPU usage is small (less than 3% of a single CPU by default)
 		// and there have been no recent read/write operations, then ignore the
 		// recent usage altogether. This is intended to minimize RU usage when the
 		// cluster is idle.
-		c.mu.Lock()
 		if deltaCPU < allowance*2 {
 			if c.mu.consumption.ReadBatches == c.run.consumption.ReadBatches &&
 				c.mu.consumption.WriteBatches == c.run.consumption.WriteBatches {
 				deltaCPU = 0
 			}
 		}
+		// Keep track of an exponential moving average of CPU usage.
+		c.mu.avgCPUPerSec *= 1 - movingAvgCPUPerSecFactor
+		c.mu.avgCPUPerSec += avgCPU * movingAvgCPUPerSecFactor
 		c.mu.Unlock()
 	}
 	if deltaCPU < 0 {
@@ -753,7 +768,8 @@ func (c *tenantSideCostController) OnRequestWait(ctx context.Context) error {
 	return c.limiter.Wait(ctx, 0)
 }
 
-// OnResponse is part of the multitenant.TenantSideBatchInterceptor interface.
+// OnResponseWait is part of the multitenant.TenantSideBatchInterceptor
+// interface.
 func (c *tenantSideCostController) OnResponseWait(
 	ctx context.Context, req tenantcostmodel.RequestInfo, resp tenantcostmodel.ResponseInfo,
 ) error {
@@ -773,6 +789,13 @@ func (c *tenantSideCostController) OnResponseWait(
 		return err
 	}
 
+	// Record the number of RUs consumed by the IO request.
+	if sp := tracing.SpanFromContext(ctx); sp != nil && sp.RecordingType() != tracingpb.RecordingOff {
+		sp.RecordStructured(&roachpb.TenantConsumption{
+			RU: float64(totalRU),
+		})
+	}
+
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
@@ -814,7 +837,7 @@ func (c *tenantSideCostController) OnExternalIOWait(
 }
 
 // OnExternalIO is part of the multitenant.TenantSideExternalIORecorder
-// interface.
+// interface. TODO(drewk): collect this for queries.
 func (c *tenantSideCostController) OnExternalIO(
 	ctx context.Context, usage multitenant.ExternalIOUsage,
 ) {
@@ -853,3 +876,16 @@ func (c *tenantSideCostController) onExternalIO(
 
 	return nil
 }
+
+// GetCPUMovingAvg is used to obtain an exponential moving average estimate
+// for the CPU usage in seconds per each second of wall-clock time.
+func (c *tenantSideCostController) GetCPUMovingAvg() float64 {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	return c.mu.avgCPUPerSec
+}
+
+// GetCostConfig is part of the multitenant.TenantSideCostController interface.
+func (c *tenantSideCostController) GetCostConfig() *tenantcostmodel.Config {
+	return &c.costCfg
+}
diff --git a/pkg/multitenant/cost_controller.go b/pkg/multitenant/cost_controller.go
@@ -32,6 +32,14 @@ type TenantSideCostController interface {
 		nextLiveInstanceIDFn NextLiveInstanceIDFn,
 	) error
 
+	// GetCPUMovingAvg returns an exponential moving average used for estimating
+	// the CPU usage (in CPU secs) per wall-clock second.
+	GetCPUMovingAvg() float64
+
+	// GetCostConfig returns the cost model config this TenantSideCostController
+	// is using.
+	GetCostConfig() *tenantcostmodel.Config
+
 	TenantSideKVInterceptor
 
 	TenantSideExternalIORecorder

diff --git a/pkg/multitenant/multitenantcpu/BUILD.bazel b/pkg/multitenant/multitenantcpu/BUILD.bazel
@@ -0,0 +1,17 @@
+load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data")
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "multitenantcpu",
+    srcs = ["cpu_usage.go"],
+    importpath = "github.com/cockroachdb/cockroach/pkg/multitenant/multitenantcpu",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//pkg/multitenant",
+        "//pkg/server/status",
+        "//pkg/util/log",
+        "//pkg/util/timeutil",
+    ],
+)
+
+get_x_data(name = "get_x_data")