From 13c3b7c74f7841420990a06cf3c268f693bdf69b Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Mon, 31 Jan 2022 10:48:07 -0500 Subject: [PATCH] sql: fix the sql.trace.stmt.enable_threshold cluster setting This setting had rotted, leading to a span use-after-Finish. This was caught by the recently-unskipped follower reads roachtests (roachtests panic on use-after-finish), accounting for all the issues below. The last entry in these issues refers to the nodes crashing because of the bug fixed here. Some of these issues have been open for a long time for more relevant failures - those have been fixed in #72296. Fixes #75716 Fixes #75715 Fixes #75714 Fixes #71244 Fixes #71126 Fixes #70818 Fixes #70191 Fixes #70011 Fixes #70010 Fixes #69952 Fixes #69951 Release note: None --- pkg/sql/conn_executor_exec.go | 50 ++++++++++++++++++++++------------- pkg/sql/exec_util.go | 4 +-- pkg/sql/trace_test.go | 21 +++++++++++++++ 3 files changed, 54 insertions(+), 21 deletions(-) diff --git a/pkg/sql/conn_executor_exec.go b/pkg/sql/conn_executor_exec.go index f83c4c0ed1b5..357407555ea6 100644 --- a/pkg/sql/conn_executor_exec.go +++ b/pkg/sql/conn_executor_exec.go @@ -490,7 +490,7 @@ func (ex *connExecutor) execStmtInOpenState( }) } - defer func() { + defer func(ctx context.Context) { if filter := ex.server.cfg.TestingKnobs.StatementFilter; retErr == nil && filter != nil { var execErr error if perr, ok := retPayload.(payloadWithError); ok { @@ -506,9 +506,8 @@ func (ex *connExecutor) execStmtInOpenState( } if canAutoCommit && !isExtendedProtocol { retEv, retPayload = ex.handleAutoCommit(ctx, ast) - return } - }() + }(ctx) switch s := ast.(type) { case *tree.BeginTransaction: @@ -707,25 +706,38 @@ func (ex *connExecutor) execStmtInOpenState( var stmtThresholdSpan *tracing.Span alreadyRecording := ex.transitionCtx.sessionTracing.Enabled() - stmtTraceThreshold := traceStmtThreshold.Get(&ex.planner.execCfg.Settings.SV) - if !alreadyRecording && stmtTraceThreshold > 0 { - ctx, stmtThresholdSpan = createRootOrChildSpan(ctx, "trace-stmt-threshold", ex.transitionCtx.tracer, tracing.WithRecording(tracing.RecordingVerbose)) - } + stmtTraceThreshold := TraceStmtThreshold.Get(&ex.planner.execCfg.Settings.SV) + { + origCtx := ctx + // TODO(andrei): I think we should do this even if alreadyRecording == true. + if !alreadyRecording && stmtTraceThreshold > 0 { + ctx, stmtThresholdSpan = createRootOrChildSpan(ctx, "trace-stmt-threshold", ex.transitionCtx.tracer, tracing.WithRecording(tracing.RecordingVerbose)) + } - if err := ex.dispatchToExecutionEngine(ctx, p, res); err != nil { - stmtThresholdSpan.Finish() - return nil, nil, err - } + if err := ex.dispatchToExecutionEngine(ctx, p, res); err != nil { + stmtThresholdSpan.Finish() + return nil, nil, err + } + ctx = origCtx + } if stmtThresholdSpan != nil { - stmtThresholdSpan.Finish() - logTraceAboveThreshold( - ctx, - stmtThresholdSpan.GetRecording(tracing.RecordingVerbose), - fmt.Sprintf("SQL stmt %s", stmt.AST.String()), - stmtTraceThreshold, - timeutil.Since(ex.phaseTimes.GetSessionPhaseTime(sessionphase.SessionQueryReceived)), - ) + stmtDur := timeutil.Since(ex.phaseTimes.GetSessionPhaseTime(sessionphase.SessionQueryReceived)) + needRecording := stmtTraceThreshold < stmtDur + if needRecording { + rec := stmtThresholdSpan.FinishAndGetRecording(tracing.RecordingVerbose) + // NB: This recording does not include the commit for implicit + // transactions if the statement didn't auto-commit. + logTraceAboveThreshold( + ctx, + rec, + fmt.Sprintf("SQL stmt %s", stmt.AST.String()), + stmtTraceThreshold, + stmtDur, + ) + } else { + stmtThresholdSpan.Finish() + } } if err := res.Err(); err != nil { diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index c6fe3d88ab8f..79cccb1f2519 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -217,11 +217,11 @@ var traceTxnThreshold = settings.RegisterDurationSetting( "within a transaction as well as client communication (e.g. retries).", 0, ).WithPublic() -// traceStmtThreshold is identical to traceTxnThreshold except it applies to +// TraceStmtThreshold is identical to traceTxnThreshold except it applies to // individual statements in a transaction. The motivation for this setting is // to be able to reduce the noise associated with a larger transaction (e.g. // round trips to client). -var traceStmtThreshold = settings.RegisterDurationSetting( +var TraceStmtThreshold = settings.RegisterDurationSetting( settings.TenantWritable, "sql.trace.stmt.enable_threshold", "duration beyond which all statements are traced (set to 0 to disable). "+ diff --git a/pkg/sql/trace_test.go b/pkg/sql/trace_test.go index ba8d543f50f2..e9136b5d22e5 100644 --- a/pkg/sql/trace_test.go +++ b/pkg/sql/trace_test.go @@ -18,9 +18,11 @@ import ( "sort" "strings" "testing" + "time" "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/security" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/skip" @@ -633,3 +635,22 @@ func TestTraceDistSQL(t *testing.T) { // Check that the table reader indeed came from a remote note. require.Equal(t, "2", sp.Tags["node"]) } + +// Test the sql.trace.stmt.enable_threshold cluster setting. +func TestStatementThreshold(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + settings := cluster.MakeTestingClusterSettings() + sql.TraceStmtThreshold.Override(ctx, &settings.SV, 1*time.Nanosecond) + args := base.TestServerArgs{ + Settings: settings, + } + // Check that the server starts (no crash). + s, db, _ := serverutils.StartServer(t, args) + defer s.Stopper().Stop(ctx) + r := sqlutils.MakeSQLRunner(db) + r.Exec(t, "select 1") + // TODO(andrei): check the logs for traces somehow. +}