Skip to content

Commit

Permalink
multitenantccl: add sanity testing for ru estimation
Browse files Browse the repository at this point in the history
This commit adds a sanity test for the RU estimates produced by running
queries with `EXPLAIN ANALYZE` on a tenant. The test runs each test query
several times, ensuring that the variance of the estimates does not exceed 5%
of the mean. It then runs all test queries without `EXPLAIN ANALYZE` and
compares the resulting actual RU measurement to the aggregated estimates.

Release note: None
  • Loading branch information
DrewKimball committed Oct 25, 2022
1 parent 1543bb0 commit 29969e8
Show file tree
Hide file tree
Showing 3 changed files with 218 additions and 0 deletions.
3 changes: 3 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ ALL_TESTS = [
"//pkg/kv/kvserver/uncertainty:uncertainty_test",
"//pkg/kv/kvserver:kvserver_test",
"//pkg/kv:kv_test",
"//pkg/multitenant/multitenant_test:multitenant_test_test",
"//pkg/obsservice/obslib/ingest:ingest_test",
"//pkg/roachpb:roachpb_disallowed_imports_test",
"//pkg/roachpb:roachpb_test",
Expand Down Expand Up @@ -1224,6 +1225,7 @@ GO_TARGETS = [
"//pkg/kv/kvserver:kvserver_test",
"//pkg/kv:kv",
"//pkg/kv:kv_test",
"//pkg/multitenant/multitenant_test:multitenant_test_test",
"//pkg/multitenant/multitenantio:multitenantio",
"//pkg/multitenant/tenantcostmodel:tenantcostmodel",
"//pkg/multitenant:multitenant",
Expand Down Expand Up @@ -2510,6 +2512,7 @@ GET_X_DATA_TARGETS = [
"//pkg/kv/kvserver/txnwait:get_x_data",
"//pkg/kv/kvserver/uncertainty:get_x_data",
"//pkg/multitenant:get_x_data",
"//pkg/multitenant/multitenant_test:get_x_data",
"//pkg/multitenant/multitenantio:get_x_data",
"//pkg/multitenant/tenantcostmodel:get_x_data",
"//pkg/obs:get_x_data",
Expand Down
6 changes: 6 additions & 0 deletions pkg/ccl/multitenantccl/tenantcostclient/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ go_test(
srcs = [
"limiter_test.go",
"main_test.go",
"query_ru_estimate_test.go",
"tenant_side_test.go",
"token_bucket_test.go",
],
Expand All @@ -49,6 +50,8 @@ go_test(
"//pkg/blobs",
"//pkg/ccl",
"//pkg/ccl/changefeedccl",
"//pkg/ccl/kvccl/kvtenantccl",
"//pkg/ccl/multitenantccl/tenantcostserver",
"//pkg/ccl/utilccl",
"//pkg/cloud",
"//pkg/cloud/nodelocal",
Expand All @@ -74,18 +77,21 @@ go_test(
"//pkg/sql/stats",
"//pkg/testutils",
"//pkg/testutils/serverutils",
"//pkg/testutils/skip",
"//pkg/testutils/sqlutils",
"//pkg/testutils/testcluster",
"//pkg/util/ctxgroup",
"//pkg/util/ioctx",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/protoutil",
"//pkg/util/randutil",
"//pkg/util/stop",
"//pkg/util/syncutil",
"//pkg/util/timeutil",
"@com_github_cockroachdb_datadriven//:datadriven",
"@com_github_cockroachdb_errors//:errors",
"@com_github_montanaflynn_stats//:stats",
"@com_github_stretchr_testify//require",
"@in_gopkg_yaml_v2//:yaml_v2",
],
Expand Down
209 changes: 209 additions & 0 deletions pkg/ccl/multitenantccl/tenantcostclient/query_ru_estimate_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
package tenantcostclient_test

import (
"context"
"fmt"
"math"
"strconv"
"strings"
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/base"
_ "github.com/cockroachdb/cockroach/pkg/ccl" // ccl init hooks
_ "github.com/cockroachdb/cockroach/pkg/ccl/kvccl/kvtenantccl"
"github.com/cockroachdb/cockroach/pkg/ccl/multitenantccl/tenantcostclient"
_ "github.com/cockroachdb/cockroach/pkg/ccl/multitenantccl/tenantcostserver"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/stats"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
stats2 "github.com/montanaflynn/stats"
"github.com/stretchr/testify/require"
)

// TestEstimateQueryRUConsumption is a sanity check for the RU estimates
// produced for queries that are run by a tenant under EXPLAIN ANALYZE. The RU
// consumption of a query is not deterministic, since it depends on inexact
// quantities like the (already estimated) CPU usage and the memory size of the
// results returned to the client. Therefore, the test runs each query multiple
// times and ensures that the variance is within a limit, and then checks that
// the total estimated RU consumption is within reasonable distance from the
// actual measured RUs for the tenant.
func TestEstimateQueryRUConsumption(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
skip.UnderStress(t, "the test is sensitive to background activity and may fail under stress")
skip.UnderShort(t)

ctx := context.Background()

st := cluster.MakeTestingClusterSettings()
stats.AutomaticStatisticsClusterMode.Override(ctx, &st.SV, false)
stats.UseStatisticsOnSystemTables.Override(ctx, &st.SV, false)
stats.AutomaticStatisticsOnSystemTables.Override(ctx, &st.SV, false)

// Lower the target duration for reporting tenant usage so that it can be
// measured accurately. Avoid decreasing too far, since doing so can add
// measurable overhead.
tenantcostclient.TargetPeriodSetting.Override(ctx, &st.SV, time.Millisecond*500)

params := base.TestServerArgs{
Settings: st,
DisableDefaultTestTenant: true,
}

params.DisableDefaultTestTenant = true
s, mainDB, _ := serverutils.StartServer(t, params)
defer s.Stopper().Stop(ctx)
sysDB := sqlutils.MakeSQLRunner(mainDB)

tenantID := serverutils.TestTenantID()
tenant1, tenantDB1 := serverutils.StartTenant(t, s, base.TestTenantArgs{
TenantID: tenantID,
Settings: st,
})
defer tenant1.Stopper().Stop(ctx)
defer tenantDB1.Close()
tdb := sqlutils.MakeSQLRunner(tenantDB1)
tdb.Exec(t, "SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false")
tdb.Exec(t, "CREATE TABLE abcd (a INT, b INT, c INT, d INT, INDEX (a, b, c))")
tdb.Exec(t, "INSERT INTO abcd (SELECT t%2, t%3, t, -t FROM generate_series(1,100000) g(t))")

type testCase struct {
sql string
count int
}
testCases := []testCase{
{ // Point query
sql: "SELECT a FROM abcd WHERE (a, b) = (1, 1)",
count: 10,
},
{ // Range query
sql: "SELECT a FROM abcd WHERE (a, b) = (1, 1) AND c > 0 AND c < 10000",
count: 10,
},
{ // Aggregate
sql: "SELECT count(*) FROM abcd",
count: 10,
},
{ // Distinct
sql: "SELECT DISTINCT ON (a, b) * FROM abcd",
count: 10,
},
{ // Full table scan
sql: "SELECT a FROM abcd",
count: 10,
},
{ // Lookup join
sql: "SELECT a FROM (VALUES (1, 1), (0, 2)) v(x, y) INNER LOOKUP JOIN abcd ON (a, b) = (x, y)",
count: 10,
},
{ // Index join
sql: "SELECT * FROM abcd WHERE (a, b) = (0, 0)",
count: 10,
},
{ // No kv IO, lots of network egress.
sql: "SELECT 'deadbeef' FROM generate_series(1, 50000)",
count: 10,
},
}

var err error
var tenantEstimatedRUs int
for tcNum, tc := range testCases {
testCaseRUEstimates := make([]float64, tc.count)
for i := 0; i < tc.count; i++ {
output := tdb.QueryStr(t, "EXPLAIN ANALYZE "+tc.sql)
var estimatedRU int
for _, row := range output {
if len(row) != 1 {
t.Fatalf("expected one column")
}
val := row[0]
if strings.Contains(val, "estimated RUs consumed") {
substr := strings.Split(val, " ")
if len(substr) == 4 {
ruCountStr := strings.Replace(strings.TrimSpace(substr[3]), ",", "", -1)
estimatedRU, err = strconv.Atoi(ruCountStr)
require.NoError(t, err, "failed to retrieve estimated RUs")
break
}
}
}
tenantEstimatedRUs += estimatedRU
testCaseRUEstimates[i] = float64(estimatedRU)
}
var mean, variance float64
mean, err = stats2.Mean(testCaseRUEstimates)
if mean == 0 {
// Sufficiently cheap queries will return zero as the RU estimate.
continue
}
require.NoError(t, err, "failed to calculate mean for test case %d", tcNum)
variance, err = stats2.Variance(testCaseRUEstimates)
require.NoError(t, err, "failed to calculate variance for test case %d", tcNum)
stdDev := math.Sqrt(variance)
const minAllowedStdDev = 10
const maxStdDevFraction = 0.05
maxAllowedStdDev := mean * maxStdDevFraction
if maxAllowedStdDev < minAllowedStdDev {
maxAllowedStdDev = minAllowedStdDev
}
require.Lessf(t, stdDev, maxAllowedStdDev,
"standard deviation of RU estimates is %f%% of the mean RUs (%f) for test case %d",
(stdDev/mean)*100,
mean,
tcNum,
)
}

getTenantRUs := func() float64 {
// Sleep to ensure the measured RU consumption gets recorded in the
// tenant_usage table.
time.Sleep(time.Second)
var consumptionBytes []byte
var consumption roachpb.TenantConsumption
var tenantRUs float64
rows := sysDB.Query(t,
fmt.Sprintf(
"SELECT total_consumption FROM system.tenant_usage WHERE tenant_id = %d AND instance_id = 0",
tenantID.ToUint64(),
),
)
for rows.Next() {
require.NoError(t, rows.Scan(&consumptionBytes))
if len(consumptionBytes) == 0 {
continue
}
require.NoError(t, protoutil.Unmarshal(consumptionBytes, &consumption))
tenantRUs += consumption.RU
}
return tenantRUs
}
tenantStartRUs := getTenantRUs()

var tenantMeasuredRUs float64
for _, tc := range testCases {
for i := 0; i < tc.count; i++ {
tdb.QueryStr(t, tc.sql)
}
}

// Check the estimated RU aggregate for all the queries against the actual
// measured RU consumption for the tenant.
tenantMeasuredRUs = getTenantRUs() - tenantStartRUs
const deltaFraction = 0.5
allowedDelta := tenantMeasuredRUs * deltaFraction
require.InDeltaf(t, tenantMeasuredRUs, tenantEstimatedRUs, allowedDelta,
"estimated RUs (%d) were not within %f RUs of the expected value (%f)",
tenantEstimatedRUs,
allowedDelta,
tenantMeasuredRUs,
)
}

0 comments on commit 29969e8

Please sign in to comment.