Skip to content

Commit

Permalink
Merge #44947
Browse files Browse the repository at this point in the history
44947: sql: add infrastructure for tracing queries on demand r=andreimatei a=andreimatei

This patch adds some infrastructure, to be hooked up to the AdminUI in
the future. The system.statement_diagnostics_requests and
system.statement_diagnostics tables are introduced. The idea is that all
nodes poll the former for fingerprints of requests that need information
collected and, when nodes then see a query with such a fingerprint, they
race to write the requested info (initially, a trace) into the latter.

system.statement_bundle_chunks is also introduced, to be used in the
future for gatherig more generic data about a query.

Release note: None

Co-authored-by: Radu Berinde <radu@cockroachlabs.com>
  • Loading branch information
craig[bot] and RaduBerinde committed Feb 27, 2020
2 parents 9c72e72 + 10d989f commit 6595c62
Show file tree
Hide file tree
Showing 35 changed files with 3,283 additions and 798 deletions.
793 changes: 653 additions & 140 deletions c-deps/libroach/protos/util/tracing/recorded_span.pb.cc

Large diffs are not rendered by default.

786 changes: 610 additions & 176 deletions c-deps/libroach/protos/util/tracing/recorded_span.pb.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,6 @@
<tr><td><code>trace.debug.enable</code></td><td>boolean</td><td><code>false</code></td><td>if set, traces for recent requests can be seen in the /debug page</td></tr>
<tr><td><code>trace.lightstep.token</code></td><td>string</td><td><code></code></td><td>if set, traces go to Lightstep using this token</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>if set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'); ignored if trace.lightstep.token is set</td></tr>
<tr><td><code>version</code></td><td>custom validation</td><td><code>19.2-13</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
<tr><td><code>version</code></td><td>custom validation</td><td><code>19.2-14</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
30 changes: 24 additions & 6 deletions pkg/cli/zip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ requesting heap profile for node 1... writing: debug/nodes/1/heap.pprof
requesting heap files for node 1... 0 found
requesting goroutine files for node 1... 0 found
requesting log file ...
requesting ranges... 29 found
requesting ranges... 32 found
writing: debug/nodes/1/ranges/1.json
writing: debug/nodes/1/ranges/2.json
writing: debug/nodes/1/ranges/3.json
Expand Down Expand Up @@ -194,13 +194,16 @@ writing: debug/nodes/1/ranges/26.json
writing: debug/nodes/1/ranges/27.json
writing: debug/nodes/1/ranges/28.json
writing: debug/nodes/1/ranges/29.json
writing: debug/nodes/1/ranges/30.json
writing: debug/nodes/1/ranges/31.json
writing: debug/nodes/1/ranges/32.json
requesting list of SQL databases... 3 found
requesting database details for defaultdb... writing: debug/schema/defaultdb@details.json
0 tables found
requesting database details for postgres... writing: debug/schema/postgres@details.json
0 tables found
requesting database details for system... writing: debug/schema/system@details.json
23 tables found
26 tables found
requesting table details for system.comments... writing: debug/schema/system/comments.json
requesting table details for system.descriptor... writing: debug/schema/system/descriptor.json
requesting table details for system.eventlog... writing: debug/schema/system/eventlog.json
Expand All @@ -219,6 +222,9 @@ requesting table details for system.reports_meta... writing: debug/schema/system
requesting table details for system.role_members... writing: debug/schema/system/role_members.json
requesting table details for system.role_options... writing: debug/schema/system/role_options.json
requesting table details for system.settings... writing: debug/schema/system/settings.json
requesting table details for system.statement_bundle_chunks... writing: debug/schema/system/statement_bundle_chunks.json
requesting table details for system.statement_diagnostics... writing: debug/schema/system/statement_diagnostics.json
requesting table details for system.statement_diagnostics_requests... writing: debug/schema/system/statement_diagnostics_requests.json
requesting table details for system.table_statistics... writing: debug/schema/system/table_statistics.json
requesting table details for system.ui... writing: debug/schema/system/ui.json
requesting table details for system.users... writing: debug/schema/system/users.json
Expand Down Expand Up @@ -284,7 +290,7 @@ requesting table details for defaultdb.pg_catalog.pg_class... writing: debug/sch
requesting database details for postgres... writing: debug/schema/postgres@details.json
0 tables found
requesting database details for system... writing: debug/schema/system-1@details.json
23 tables found
26 tables found
requesting table details for system.comments... writing: debug/schema/system-1/comments.json
requesting table details for system.descriptor... writing: debug/schema/system-1/descriptor.json
requesting table details for system.eventlog... writing: debug/schema/system-1/eventlog.json
Expand All @@ -303,6 +309,9 @@ requesting table details for system.reports_meta... writing: debug/schema/system
requesting table details for system.role_members... writing: debug/schema/system-1/role_members.json
requesting table details for system.role_options... writing: debug/schema/system-1/role_options.json
requesting table details for system.settings... writing: debug/schema/system-1/settings.json
requesting table details for system.statement_bundle_chunks... writing: debug/schema/system-1/statement_bundle_chunks.json
requesting table details for system.statement_diagnostics... writing: debug/schema/system-1/statement_diagnostics.json
requesting table details for system.statement_diagnostics_requests... writing: debug/schema/system-1/statement_diagnostics_requests.json
requesting table details for system.table_statistics... writing: debug/schema/system-1/table_statistics.json
requesting table details for system.ui... writing: debug/schema/system-1/ui.json
requesting table details for system.users... writing: debug/schema/system-1/users.json
Expand Down Expand Up @@ -561,7 +570,7 @@ requesting heap profile for node 1... writing: debug/nodes/1/heap.pprof
requesting heap files for node 1... 0 found
requesting goroutine files for node 1... 0 found
requesting log file ...
requesting ranges... 29 found
requesting ranges... 32 found
writing: debug/nodes/1/ranges/1.json
writing: debug/nodes/1/ranges/2.json
writing: debug/nodes/1/ranges/3.json
Expand Down Expand Up @@ -591,6 +600,9 @@ writing: debug/nodes/1/ranges/26.json
writing: debug/nodes/1/ranges/27.json
writing: debug/nodes/1/ranges/28.json
writing: debug/nodes/1/ranges/29.json
writing: debug/nodes/1/ranges/30.json
writing: debug/nodes/1/ranges/31.json
writing: debug/nodes/1/ranges/32.json
writing: debug/nodes/2/status.json
using SQL connection URL for node 2: postgresql://...
retrieving SQL data for crdb_internal.feature_usage... writing: debug/nodes/2/crdb_internal.feature_usage.txt
Expand Down Expand Up @@ -660,7 +672,7 @@ requesting heap profile for node 3... writing: debug/nodes/3/heap.pprof
requesting heap files for node 3... 0 found
requesting goroutine files for node 3... 0 found
requesting log file ...
requesting ranges... 29 found
requesting ranges... 32 found
writing: debug/nodes/3/ranges/1.json
writing: debug/nodes/3/ranges/2.json
writing: debug/nodes/3/ranges/3.json
Expand Down Expand Up @@ -690,13 +702,16 @@ writing: debug/nodes/3/ranges/26.json
writing: debug/nodes/3/ranges/27.json
writing: debug/nodes/3/ranges/28.json
writing: debug/nodes/3/ranges/29.json
writing: debug/nodes/3/ranges/30.json
writing: debug/nodes/3/ranges/31.json
writing: debug/nodes/3/ranges/32.json
requesting list of SQL databases... 3 found
requesting database details for defaultdb... writing: debug/schema/defaultdb@details.json
0 tables found
requesting database details for postgres... writing: debug/schema/postgres@details.json
0 tables found
requesting database details for system... writing: debug/schema/system@details.json
23 tables found
26 tables found
requesting table details for system.comments... writing: debug/schema/system/comments.json
requesting table details for system.descriptor... writing: debug/schema/system/descriptor.json
requesting table details for system.eventlog... writing: debug/schema/system/eventlog.json
Expand All @@ -715,6 +730,9 @@ requesting table details for system.reports_meta... writing: debug/schema/system
requesting table details for system.role_members... writing: debug/schema/system/role_members.json
requesting table details for system.role_options... writing: debug/schema/system/role_options.json
requesting table details for system.settings... writing: debug/schema/system/settings.json
requesting table details for system.statement_bundle_chunks... writing: debug/schema/system/statement_bundle_chunks.json
requesting table details for system.statement_diagnostics... writing: debug/schema/system/statement_diagnostics.json
requesting table details for system.statement_diagnostics_requests... writing: debug/schema/system/statement_diagnostics_requests.json
requesting table details for system.table_statistics... writing: debug/schema/system/table_statistics.json
requesting table details for system.ui... writing: debug/schema/system/ui.json
requesting table details for system.users... writing: debug/schema/system/users.json
Expand Down
7 changes: 7 additions & 0 deletions pkg/gossip/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ const (
// client connections a node has open. This is used by other nodes in the
// cluster to build a map of the gossip network.
KeyGossipClientsPrefix = "gossip-clients"

// KeyGossipStatementDiagnosticsRequest is the gossip key for new statement
// diagnostics requests. The values is the id of the request that generated
// the notification, as a little-endian-encoded uint64.
// stmtDiagnosticsRequestRegistry listens for notifications and responds by
// polling for new requests.
KeyGossipStatementDiagnosticsRequest = "stmt-diag-req"
)

// MakeKey creates a canonical key under which to gossip a piece of
Expand Down
4 changes: 4 additions & 0 deletions pkg/keys/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,10 @@ const (

RoleOptionsTableID = 33

StatementBundleChunksTableID = 34
StatementDiagnosticsRequestsTableID = 35
StatementDiagnosticsTableID = 36

// CommentType is type for system.comments
DatabaseCommentType = 0
TableCommentType = 1
Expand Down
2 changes: 1 addition & 1 deletion pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
)
s.internalExecutor = internalExecutor
execCfg.InternalExecutor = internalExecutor

s.status.stmtDiagnosticsRequester = execCfg.NewStmtDiagnosticsRequestRegistry()
s.execCfg = &execCfg

s.leaseMgr.SetInternalExecutor(execCfg.InternalExecutor)
Expand Down
27 changes: 14 additions & 13 deletions pkg/server/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,19 +128,20 @@ func propagateGatewayMetadata(ctx context.Context) context.Context {
type statusServer struct {
log.AmbientContext

st *cluster.Settings
cfg *base.Config
admin *adminServer
db *client.DB
gossip *gossip.Gossip
metricSource metricMarshaler
nodeLiveness *storage.NodeLiveness
storePool *storage.StorePool
rpcCtx *rpc.Context
stores *storage.Stores
stopper *stop.Stopper
sessionRegistry *sql.SessionRegistry
si systemInfoOnce
st *cluster.Settings
cfg *base.Config
admin *adminServer
db *client.DB
gossip *gossip.Gossip
metricSource metricMarshaler
nodeLiveness *storage.NodeLiveness
storePool *storage.StorePool
rpcCtx *rpc.Context
stores *storage.Stores
stopper *stop.Stopper
sessionRegistry *sql.SessionRegistry
si systemInfoOnce
stmtDiagnosticsRequester sql.StmtDiagnosticsRequester
}

// newStatusServer allocates and returns a statusServer.
Expand Down
5 changes: 5 additions & 0 deletions pkg/server/testserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,11 @@ func (ts *TestServer) DistSender() *kv.DistSender {
return ts.DistSenderI().(*kv.DistSender)
}

// SQLServer is part of TestServerInterface.
func (ts *TestServer) SQLServer() interface{} {
return ts.PGServer().SQLServer
}

// DistSQLServer is part of TestServerInterface.
func (ts *TestServer) DistSQLServer() interface{} {
return ts.distSQLServer
Expand Down
10 changes: 10 additions & 0 deletions pkg/settings/cluster/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ const (
VersionNoExplicitForeignKeyIndexIDs
VersionHashShardedIndexes
VersionCreateRolePrivilege
VersionStatementDiagnosticsSystemTables

// Add new versions here (step one of two).
)
Expand Down Expand Up @@ -417,6 +418,15 @@ var versionsSingleton = keyedVersions([]keyedVersion{
Key: VersionCreateRolePrivilege,
Version: roachpb.Version{Major: 19, Minor: 2, Unstable: 13},
},
{
// VersionStatementDiagnosticsSystemTables introduces the system tables for
// storing statement information (like traces, bundles).
// In this version and later the system.statement_diagnostics_requests,
// system.statement_diagnostics and system.statement_bundle_chunks tables
// are part of the system bootstap schema.
Key: VersionStatementDiagnosticsSystemTables,
Version: roachpb.Version{Major: 19, Minor: 2, Unstable: 14},
},
// Add new versions here (step two of two).

})
Expand Down
5 changes: 3 additions & 2 deletions pkg/settings/cluster/versionkey_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions pkg/sql/conn_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,8 @@ func (s *Server) Start(ctx context.Context, stopper *stop.Stopper) {
s.PeriodicallyClearSQLStats(ctx, stopper, maxSQLStatReset, &s.reportedStats)
// Start a second loop to clear SQL stats at the requested interval.
s.PeriodicallyClearSQLStats(ctx, stopper, sqlStatReset, &s.sqlStats)

s.PeriodicallyPollForStatementInfoRequests(ctx, stopper)
}

// ResetSQLStats resets the executor's collected sql statistics.
Expand Down Expand Up @@ -599,6 +601,7 @@ func (s *Server) newConnExecutor(
ctxHolder: ctxHolder{connCtx: ctx},
executorType: executorTypeExec,
hasCreatedTemporarySchema: false,
stmtInfoRegistry: s.cfg.stmtInfoRequestRegistry,
}

ex.state.txnAbortCount = ex.metrics.EngineMetrics.TxnAbortCount
Expand Down Expand Up @@ -775,6 +778,30 @@ func (s *Server) PeriodicallyClearSQLStats(
})
}

// PeriodicallyPollForStatementInfoRequests runs a worker that periodically
// polls system.statement_diagnostics_requests.
func (s *Server) PeriodicallyPollForStatementInfoRequests(
ctx context.Context, stopper *stop.Stopper,
) {
pollingInterval := 10 * time.Second
stopper.RunWorker(ctx, func(ctx context.Context) {
ctx, _ = stopper.WithCancelOnQuiesce(ctx)
var timer timeutil.Timer
for {
if err := s.cfg.stmtInfoRequestRegistry.pollRequests(ctx); err != nil {
log.Warningf(ctx, "error polling for statement diagnostics requests: %s", err)
}
timer.Reset(pollingInterval)
select {
case <-stopper.ShouldQuiesce():
return
case <-timer.C:
timer.Read = true
}
}
})
}

type closeType int

const (
Expand Down Expand Up @@ -1067,6 +1094,10 @@ type connExecutor struct {
// hasCreatedTemporarySchema is set if the executor has created a
// temporary schema, which requires special cleanup on close.
hasCreatedTemporarySchema bool

// stmtInfoRequestRegistry is used to track which queries need to have
// information collected.
stmtInfoRegistry *stmtDiagnosticsRequestRegistry
}

// ctxHolder contains a connection's context and, while session tracing is
Expand Down
17 changes: 17 additions & 0 deletions pkg/sql/conn_executor_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
"github.com/opentracing/opentracing-go"
)

// execStmt executes one statement by dispatching according to the current
Expand Down Expand Up @@ -179,6 +180,21 @@ func (ex *connExecutor) execStmtInOpenState(
}
}()

shouldCollectInfo, recordStmtInfoFn := ex.stmtInfoRegistry.shouldCollectDiagnostics(ctx, stmt.AST)
if shouldCollectInfo {
tr := ex.server.cfg.AmbientCtx.Tracer
origCtx := ctx
var sp opentracing.Span
ctx, sp = tracing.StartSnowballTrace(ctx, tr, "traced statement")
defer func() {
// Record the statement information that we've collected.
// Note that in case of implicit transactions, the trace contains the auto-commit too.
sp.Finish()
trace := tracing.GetRecording(sp)
recordStmtInfoFn(origCtx, trace)
}()
}

if ex.sessionData.StmtTimeout > 0 {
timeoutTicker = time.AfterFunc(
ex.sessionData.StmtTimeout-timeutil.Since(ex.phaseTimes[sessionQueryReceived]),
Expand Down Expand Up @@ -1146,6 +1162,7 @@ func (ex *connExecutor) handleAutoCommit(
) (fsm.Event, fsm.EventPayload) {
txn := ex.state.mu.txn
if txn.IsCommitted() {
log.Event(ctx, "statement execution committed the txn")
return eventTxnFinish{}, eventTxnFinishPayload{commit: true}
}

Expand Down
5 changes: 3 additions & 2 deletions pkg/sql/conn_executor_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,9 @@ func startConnExecutor(
dummyLivenessProvider{}, /* liveness */
nil, /* nodeDialer */
),
QueryCache: querycache.New(0),
TestingKnobs: ExecutorTestingKnobs{},
QueryCache: querycache.New(0),
TestingKnobs: ExecutorTestingKnobs{},
stmtInfoRequestRegistry: newStmtDiagnosticsRequestRegistry(nil, nil, nil, 0),
}
pool := mon.MakeUnlimitedMonitor(
context.Background(), "test", mon.MemoryResource,
Expand Down
19 changes: 16 additions & 3 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -570,11 +570,24 @@ type ExecutorConfig struct {

// ProtectedTimestampProvider encapsulates the protected timestamp subsystem.
ProtectedTimestampProvider protectedts.Provider

stmtInfoRequestRegistry *stmtDiagnosticsRequestRegistry
}

// Organization returns the value of cluster.organization.
func (ec *ExecutorConfig) Organization() string {
return ClusterOrganization.Get(&ec.Settings.SV)
func (cfg *ExecutorConfig) Organization() string {
return ClusterOrganization.Get(&cfg.Settings.SV)
}

// NewStmtDiagnosticsRequestRegistry initializes cfg.stmtInfoRequestRegistry and
// returns it as the publicly-accessible StmtDiagnosticsRequester.
func (cfg *ExecutorConfig) NewStmtDiagnosticsRequestRegistry() StmtDiagnosticsRequester {
if cfg.InternalExecutor == nil {
panic("cfg.InternalExecutor not initialized")
}
cfg.stmtInfoRequestRegistry = newStmtDiagnosticsRequestRegistry(
cfg.InternalExecutor, cfg.DB, cfg.Gossip, cfg.NodeID.Get())
return cfg.stmtInfoRequestRegistry
}

var _ base.ModuleTestingKnobs = &ExecutorTestingKnobs{}
Expand Down Expand Up @@ -1491,7 +1504,7 @@ func (st *SessionTracing) TraceExecEnd(ctx context.Context, err error, count int

// extractMsgFromRecord extracts the message of the event, which is either in an
// "event" or "error" field.
func extractMsgFromRecord(rec tracing.RecordedSpan_LogRecord) string {
func extractMsgFromRecord(rec tracing.LogRecord) string {
for _, f := range rec.Fields {
key := f.Key
if key == "event" {
Expand Down
Loading

0 comments on commit 6595c62

Please sign in to comment.