diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 24c48ce4a678..87140ec213ab 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -101,6 +101,6 @@ trace.debug.enablebooleanfalseif set, traces for recent requests can be seen at https:///debug/requests trace.lightstep.tokenstringif set, traces go to Lightstep using this token trace.zipkin.collectorstringif set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'); ignored if trace.lightstep.token is set -versionversion20.2-26set the active cluster version in the format '.' +versionversion20.2-28set the active cluster version in the format '.' diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md index 93497b21bd88..fd830a090539 100644 --- a/docs/generated/sql/functions.md +++ b/docs/generated/sql/functions.md @@ -2648,6 +2648,8 @@ SELECT * FROM crdb_internal.check_consistency(true, ‘\x02’, ‘\x04’)

crdb_internal.set_vmodule(vmodule_string: string) → int

Set the equivalent of the --vmodule flag on the gateway node processing this request; it affords control over the logging verbosity of different files. Example syntax: crdb_internal.set_vmodule('recordio=2,file=1,gfs*=3'). Reset with: crdb_internal.set_vmodule(''). Raising the verbosity can severely affect performance.

+crdb_internal.trace_id() → int

Returns the current trace ID or an error if no trace is open.

+
current_database() → string

Returns the current database.

current_schema() → string

Returns the current schema.

diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index 91b6e0617480..7f5f56467ba7 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -237,6 +237,10 @@ const ( PostTruncatedAndRangeAppliedStateMigration // SeparatedIntents allows the writing of separated intents/locks. SeparatedIntents + // TracingVerbosityIndependentSemantics marks a change in which trace spans + // are propagated across RPC boundaries independently of their verbosity setting. + // This requires a version gate this violates implicit assumptions in v20.2. + TracingVerbosityIndependentSemantics // Step (1): Add new versions here. ) @@ -395,6 +399,10 @@ var versionsSingleton = keyedVersions([]keyedVersion{ Key: SeparatedIntents, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 26}, }, + { + Key: TracingVerbosityIndependentSemantics, + Version: roachpb.Version{Major: 20, Minor: 2, Internal: 28}, + }, // Step (2): Add new versions here. }) diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index fd75c9e515ab..79613367595e 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -41,11 +41,12 @@ func _() { _ = x[TruncatedAndRangeAppliedStateMigration-30] _ = x[PostTruncatedAndRangeAppliedStateMigration-31] _ = x[SeparatedIntents-32] + _ = x[TracingVerbosityIndependentSemantics-33] } -const _Key_name = "NamespaceTableWithSchemasStart20_2GeospatialTypeEnumsRangefeedLeasesAlterColumnTypeGeneralAlterSystemJobsAddCreatedByColumnsAddScheduledJobsTableUserDefinedSchemasNoOriginFKIndexesNodeMembershipStatusMinPasswordLengthAbortSpanBytesAlterSystemJobsAddSqllivenessColumnsAddNewSystemSqllivenessTableMaterializedViewsBox2DTypeUpdateScheduledJobsSchemaCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1EmptyArraysInInvertedIndexesUniqueWithoutIndexConstraintsVirtualComputedColumnsCPutInlineReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationNewSchemaChangerLongRunningMigrationsTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntents" +const _Key_name = "NamespaceTableWithSchemasStart20_2GeospatialTypeEnumsRangefeedLeasesAlterColumnTypeGeneralAlterSystemJobsAddCreatedByColumnsAddScheduledJobsTableUserDefinedSchemasNoOriginFKIndexesNodeMembershipStatusMinPasswordLengthAbortSpanBytesAlterSystemJobsAddSqllivenessColumnsAddNewSystemSqllivenessTableMaterializedViewsBox2DTypeUpdateScheduledJobsSchemaCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1EmptyArraysInInvertedIndexesUniqueWithoutIndexConstraintsVirtualComputedColumnsCPutInlineReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationNewSchemaChangerLongRunningMigrationsTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemantics" -var _Key_index = [...]uint16{0, 25, 34, 48, 53, 68, 90, 124, 145, 163, 180, 200, 217, 231, 295, 312, 321, 346, 366, 378, 383, 392, 420, 449, 471, 481, 496, 542, 592, 608, 629, 667, 709, 725} +var _Key_index = [...]uint16{0, 25, 34, 48, 53, 68, 90, 124, 145, 163, 180, 200, 217, 231, 295, 312, 321, 346, 366, 378, 383, 392, 420, 449, 471, 481, 496, 542, 592, 608, 629, 667, 709, 725, 761} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/cmd/roachtest/versionupgrade.go b/pkg/cmd/roachtest/versionupgrade.go index 201e0d54377e..e63960b2446d 100644 --- a/pkg/cmd/roachtest/versionupgrade.go +++ b/pkg/cmd/roachtest/versionupgrade.go @@ -175,6 +175,14 @@ func runVersionUpgrade(ctx context.Context, t *test, c *cluster, buildVersion ve testFeaturesStep, // schemaChangeStep, backupStep, + // Turn tracing on globally to give it a fighting chance at exposing any + // crash-inducing incompatibilities or horrendous memory leaks. (It won't + // catch most memory leaks since this test doesn't run for too long or does + // too much work). Then, run the previous tests again. + enableTracingGloballyStep, + testFeaturesStep, + // schemaChangeStep, + backupStep, ) u.run(ctx, t) @@ -354,6 +362,15 @@ func binaryUpgradeStep(nodes nodeListOption, newVersion string) versionStep { } } +func enableTracingGloballyStep(ctx context.Context, t *test, u *versionUpgradeTest) { + db := u.conn(ctx, t, 1) + // NB: this enables net/trace, and as a side effect creates verbose trace spans everywhere. + _, err := db.ExecContext(ctx, `SET CLUSTER SETTING trace.debug.enable = $1`, true) + if err != nil { + t.Fatal(err) + } +} + func preventAutoUpgradeStep(node int) versionStep { return func(ctx context.Context, t *test, u *versionUpgradeTest) { db := u.conn(ctx, t, node) diff --git a/pkg/jobs/jobs.go b/pkg/jobs/jobs.go index 49497c5e2d5a..78fbeb92deb9 100644 --- a/pkg/jobs/jobs.go +++ b/pkg/jobs/jobs.go @@ -915,6 +915,12 @@ func (sj *StartableJob) CleanupOnRollback(ctx context.Context) error { "cannot call CleanupOnRollback for a StartableJob with a non-finalized transaction") } sj.registry.unregister(*sj.ID()) + if sj.span != nil { + sj.span.Finish() + } + if sj.cancel != nil { + sj.cancel() + } return nil } diff --git a/pkg/jobs/registry.go b/pkg/jobs/registry.go index 603e778c91af..5cf53dc33c1f 100644 --- a/pkg/jobs/registry.go +++ b/pkg/jobs/registry.go @@ -517,6 +517,14 @@ func (r *Registry) CreateStartableJobWithTxn( _, span := tracing.ForkSpan(ctx, "job") if span != nil { resumerCtx = tracing.ContextWithSpan(resumerCtx, span) + + // This trace span unfortunately is sometimes never finished. + // As a hack/workaround, finish it now so that it leaves the + // tracer registry. + // + // Remove this when this issue is fixed: + // https://github.com/cockroachdb/cockroach/issues/60671 + span.Finish() } if r.startUsingSQLLivenessAdoption(ctx) { diff --git a/pkg/kv/kvnemesis/kvnemesis_test.go b/pkg/kv/kvnemesis/kvnemesis_test.go index 5c8fd5c9dcc0..86c8153f3a9d 100644 --- a/pkg/kv/kvnemesis/kvnemesis_test.go +++ b/pkg/kv/kvnemesis/kvnemesis_test.go @@ -70,6 +70,9 @@ func TestKVNemesisMultiNode(t *testing.T) { sqlDBs[i] = tc.ServerConn(i) } sqlutils.MakeSQLRunner(sqlDBs[0]).Exec(t, `SET CLUSTER SETTING kv.rangefeed.enabled = true`) + // Turn net/trace on, which results in real trace spans created throughout. + // This gives kvnemesis a chance to hit NPEs related to tracing. + sqlutils.MakeSQLRunner(sqlDBs[0]).Exec(t, `SET CLUSTER SETTING trace.debug.enable = true`) config := NewDefaultConfig() config.NumNodes, config.NumReplicas = numNodes, 3 diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index 09d5d86bb504..ca0b62679e25 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -129,6 +129,13 @@ func (r *Replica) evalAndPropose( // Fork the proposal's context span so that the proposal's context // can outlive the original proposer's context. proposal.ctx, proposal.sp = tracing.ForkSpan(ctx, "async consensus") + { + // This span sometimes leaks. Disable it for the time being. + // + // Tracked in: https://github.com/cockroachdb/cockroach/issues/60677 + proposal.sp.Finish() + proposal.sp = nil + } // Signal the proposal's response channel immediately. reply := *proposal.Local.Reply diff --git a/pkg/settings/cluster/cluster_settings.go b/pkg/settings/cluster/cluster_settings.go index c1e96a86f948..7d6c461ba62a 100644 --- a/pkg/settings/cluster/cluster_settings.go +++ b/pkg/settings/cluster/cluster_settings.go @@ -121,6 +121,10 @@ func MakeClusterSettings() *Settings { sv.Init(s.Version) s.Tracer = tracing.NewTracer() + s.Tracer.TracingVerbosityIndependentSemanticsIsActive = func() bool { + return s.Version.IsActive(context.Background(), + clusterversion.TracingVerbosityIndependentSemantics) + } s.Tracer.Configure(sv) return s diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index b8ae839749dc..85c50e07cebc 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -592,9 +592,10 @@ func (s *Server) newConnExecutor( nodeIDOrZero: nodeIDOrZero, clock: s.cfg.Clock, // Future transaction's monitors will inherits from sessionRootMon. - connMon: sessionRootMon, - tracer: s.cfg.AmbientCtx.Tracer, - settings: s.cfg.Settings, + connMon: sessionRootMon, + tracer: s.cfg.AmbientCtx.Tracer, + settings: s.cfg.Settings, + execTestingKnobs: s.GetExecutorConfig().TestingKnobs, }, memMetrics: memMetrics, planner: planner{execCfg: s.cfg, alloc: &rowenc.DatumAlloc{}}, diff --git a/pkg/sql/crdb_internal.go b/pkg/sql/crdb_internal.go index a55f39acf1fe..040584710734 100644 --- a/pkg/sql/crdb_internal.go +++ b/pkg/sql/crdb_internal.go @@ -66,6 +66,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/util/tracing" "github.com/cockroachdb/errors" + ptypes "github.com/gogo/protobuf/types" ) // CrdbInternalName is the name of the crdb_internal schema. @@ -1148,9 +1149,18 @@ CREATE TABLE crdb_internal.node_inflight_trace_spans ( duration INTERVAL, -- The span's duration, measured from start to Finish(). -- A span whose recording is collected before it's finished will -- have the duration set as the "time of collection - start time". + num_payloads INT NOT NULL, -- The number of structured payloads in this span. operation STRING NULL -- The span's operation. )`, populate: func(ctx context.Context, p *planner, _ *dbdesc.Immutable, addRow func(...tree.Datum) error) error { + hasAdmin, err := p.HasAdminRole(ctx) + if err != nil { + return err + } + if !hasAdmin { + return pgerror.Newf(pgcode.InsufficientPrivilege, + "only users with the admin role are allowed to read crdb_internal.node_inflight_trace_spans") + } return p.ExecCfg().Settings.Tracer.VisitSpans(func(span *tracing.Span) error { for _, rec := range span.GetRecording() { traceID := rec.TraceID @@ -1167,7 +1177,14 @@ CREATE TABLE crdb_internal.node_inflight_trace_spans ( spanDuration := rec.Duration operation := rec.Operation + var numStructured int + rec.Structured(func(any *ptypes.Any) { + numStructured++ + }) + if err := addRow( + // TODO(angelapwen): we're casting uint64s to int64 here, + // is that ok? tree.NewDInt(tree.DInt(traceID)), tree.NewDInt(tree.DInt(parentSpanID)), tree.NewDInt(tree.DInt(spanID)), @@ -1178,6 +1195,7 @@ CREATE TABLE crdb_internal.node_inflight_trace_spans ( duration.MakeDuration(spanDuration.Nanoseconds(), 0, 0), types.DefaultIntervalTypeMetadata, ), + tree.NewDInt(tree.DInt(numStructured)), tree.NewDString(operation), ); err != nil { return err diff --git a/pkg/sql/distsql_running.go b/pkg/sql/distsql_running.go index 119119cb7ba0..854ca435563b 100644 --- a/pkg/sql/distsql_running.go +++ b/pkg/sql/distsql_running.go @@ -670,7 +670,9 @@ func (r *DistSQLReceiver) Push( r.contendedQueryMetric = nil } if err := r.contentionRegistry.AddContentionEvent(ev); err != nil { - r.resultWriter.SetError(errors.Wrap(err, "unable to add contention event to registry")) + // TODO(asubiotto): see https://github.com/cockroachdb/cockroach/issues/60669 + // r.resultWriter.SetError(errors.Wrap(err, "unable to add contention event to registry")) + _ = err } }) } diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 0dfb85233cfa..ccbc30b4b0bb 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -921,6 +921,12 @@ type ExecutorTestingKnobs struct { // DeterministicExplainAnalyze, if set, will result in overriding fields in // EXPLAIN ANALYZE (PLAN) that can vary between runs (like elapsed times). DeterministicExplainAnalyze bool + + // Pretend59315IsFixed pretends that this issue is fixed: + // https://github.com/cockroachdb/cockroach/issues/59315 + // which means that we don't need the WithBypassRegistry option + // in resetForNewSQLTxn. + Pretend59315IsFixed bool } // PGWireTestingKnobs contains knobs for the pgwire module. diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index 4a0ef22f08fe..c4f175876e28 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -465,7 +465,8 @@ type testClusterConfig struct { isCCLConfig bool // localities is set if nodes should be set to a particular locality. // Nodes are 1-indexed. - localities map[int]roachpb.Locality + localities map[int]roachpb.Locality + pretend59315IsFixed bool } const threeNodeTenantConfigName = "3node-tenant" @@ -562,6 +563,13 @@ var logicTestConfigs = []testClusterConfig{ overrideDistSQLMode: "on", overrideAutoStats: "false", }, + { + name: "5node-pretend59315IsFixed", + numNodes: 5, + overrideDistSQLMode: "on", + overrideAutoStats: "false", + pretend59315IsFixed: true, + }, { name: "5node-metadata", numNodes: 5, @@ -1332,6 +1340,7 @@ func (t *logicTest) newCluster(serverArgs TestServerArgs) { }, SQLExecutor: &sql.ExecutorTestingKnobs{ DeterministicExplainAnalyze: true, + Pretend59315IsFixed: t.cfg.pretend59315IsFixed, }, }, ClusterName: "testclustername", diff --git a/pkg/sql/logictest/testdata/logic_test/contention_event b/pkg/sql/logictest/testdata/logic_test/contention_event index 9b46cec3953f..ede473134a10 100644 --- a/pkg/sql/logictest/testdata/logic_test/contention_event +++ b/pkg/sql/logictest/testdata/logic_test/contention_event @@ -1,3 +1,5 @@ +# LogicTest: 5node-pretend59315IsFixed +# # Verify that ContentionEvents are emitted. This is mostly a sanity check - look # for the datadriven tests in `pkg/kv/kvserver/concurrency` for the actual events # that do get emitted in various contention scenarios. @@ -6,8 +8,8 @@ statement ok GRANT ADMIN TO testuser statement ok -CREATE TABLE kv (k VARCHAR PRIMARY KEY, v VARCHAR) - +CREATE TABLE kv (k VARCHAR PRIMARY KEY, v VARCHAR); +ALTER TABLE kv SPLIT AT VALUES ('b'), ('d'), ('q'), ('z'); query TT SELECT * FROM kv @@ -23,41 +25,34 @@ INSERT INTO kv VALUES('k', 'v') user root +# Scan all ranges of the table (note that we have intentionally +# split it into at least six ranges). This is better than a point +# lookup in that it gives tracing more of a chance to get things +# wrong due to DistSender parallelism. statement ok -SET tracing=on - -statement ok -SET TRACING=on; BEGIN; SET TRANSACTION PRIORITY HIGH; -INSERT INTO kv VALUES('k', 'my v'); -COMMIT; -SET TRACING=off; - -# Check for the message emitted by (roachpb.ContentionEvent).String() as proof that a ContentionEvent -# payload was added to the trace. -# -# NB: if the matcher here ever gets updated, make sure to update throughout this file. -query I -SELECT count(message) FROM [ SHOW TRACE FOR SESSION ] WHERE message LIKE '%conflicted with % on % for %' ----- -1 +SELECT * FROM kv; -# Clean up. user testuser statement ok ROLLBACK -# Do a straightforward non-conflicting insert; we want to check that there's *no* ContentionEvent -# emitted for those. There's no reason to believe it would be - this is a pure sanity check. - -statement ok -SET TRACING=on; -INSERT INTO kv VALUES ('l', 'lll'); -SET TRACING=off; +user root -query I -SELECT count(message) FROM [ SHOW TRACE FOR SESSION ] WHERE message LIKE '%conflicted with % on % for %' +# Check that the number of payloads in the open trace is at least 1. +# TODO(angelapwen): when we have a way to pull particular payloads for +# a trace, we should verify that we're seeing the right contention event. +# As is, the payloads could be something else (though we verified manually +# and there is a contention event). +# +# NB: the contention event is not in our trace span but in one of its +# children, so it wouldn't be found if we filtered by the trace span ID. +# +# NB: this needs the 5node-pretend59315 config because otherwise the span is not +# tracked. +query B +SELECT count(num_payloads) > 0 FROM crdb_internal.node_inflight_trace_spans WHERE trace_id = crdb_internal.trace_id(); ---- -0 +true diff --git a/pkg/sql/logictest/testdata/logic_test/crdb_internal b/pkg/sql/logictest/testdata/logic_test/crdb_internal index 0aa5a3111c18..d6c36a2eba62 100644 --- a/pkg/sql/logictest/testdata/logic_test/crdb_internal +++ b/pkg/sql/logictest/testdata/logic_test/crdb_internal @@ -250,10 +250,10 @@ SELECT * FROM crdb_internal.zones WHERE false zone_id subzone_id target range_name database_name table_name index_name partition_name raw_config_yaml raw_config_sql raw_config_protobuf full_config_yaml full_config_sql -query IIIIBTTT colnames +query IIIIBTTIT colnames SELECT * FROM crdb_internal.node_inflight_trace_spans WHERE span_id < 0 ---- -trace_id parent_span_id span_id goroutine_id finished start_time duration operation +trace_id parent_span_id span_id goroutine_id finished start_time duration num_payloads operation query ITTTTITTTTTTTTTI colnames SELECT * FROM crdb_internal.ranges WHERE range_id < 0 @@ -477,6 +477,9 @@ select * from crdb_internal.kv_store_status query error pq: only users with the admin role are allowed to read crdb_internal.gossip_alerts select * from crdb_internal.gossip_alerts +query error pq: only users with the admin role are allowed to read crdb_internal.node_inflight_trace_spans +select * from crdb_internal.node_inflight_trace_spans + # Anyone can see the executable version. query T select regexp_replace(crdb_internal.node_executable_version()::string, '(-\d+)?$', ''); diff --git a/pkg/sql/logictest/testdata/logic_test/crdb_internal_tenant b/pkg/sql/logictest/testdata/logic_test/crdb_internal_tenant index 2d4b9d49e710..c54eda79e560 100644 --- a/pkg/sql/logictest/testdata/logic_test/crdb_internal_tenant +++ b/pkg/sql/logictest/testdata/logic_test/crdb_internal_tenant @@ -262,10 +262,10 @@ SELECT * FROM crdb_internal.zones WHERE false zone_id subzone_id target range_name database_name table_name index_name partition_name raw_config_yaml raw_config_sql raw_config_protobuf full_config_yaml full_config_sql -query IIIIBTTT colnames +query IIIIBTTIT colnames SELECT * FROM crdb_internal.node_inflight_trace_spans WHERE span_id < 0 ---- -trace_id parent_span_id span_id goroutine_id finished start_time duration operation +trace_id parent_span_id span_id goroutine_id finished start_time duration num_payloads operation statement error not fully contained in tenant keyspace SELECT * FROM crdb_internal.ranges WHERE range_id < 0 diff --git a/pkg/sql/opt/exec/execbuilder/testdata/autocommit_nonmetamorphic b/pkg/sql/opt/exec/execbuilder/testdata/autocommit_nonmetamorphic index 7191149f250a..0a016b360620 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/autocommit_nonmetamorphic +++ b/pkg/sql/opt/exec/execbuilder/testdata/autocommit_nonmetamorphic @@ -110,6 +110,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%PushTxn%' AND message NOT LIKE '%QueryTxn%' ---- +dist sender send r36: sending batch 2 CPut to (n1,s1):1 dist sender send r36: sending batch 2 CPut, 1 EndTxn to (n1,s1):1 # TODO(radu): allow non-side-effecting projections. @@ -133,6 +134,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND operation NOT LIKE '%async%' ---- dist sender send r36: sending batch 2 CPut to (n1,s1):1 +dist sender send r36: sending batch 2 CPut to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 # Insert with RETURNING statement with side-effects should not auto-commit. @@ -157,6 +159,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND operation NOT LIKE '%async%' ---- dist sender send r36: sending batch 2 CPut to (n1,s1):1 +dist sender send r36: sending batch 2 CPut to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 # Another way to test the scenario above: generate an error and ensure that the @@ -192,6 +195,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%PushTxn%' AND message NOT LIKE '%QueryTxn%' ---- +dist sender send r36: sending batch 2 CPut to (n1,s1):1 dist sender send r36: sending batch 1 Put, 1 EndTxn to (n1,s1):1 # Multi-row upsert should auto-commit. @@ -213,6 +217,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%PushTxn%' AND message NOT LIKE '%QueryTxn%' ---- +dist sender send r36: sending batch 2 CPut to (n1,s1):1 dist sender send r36: sending batch 2 Put, 1 EndTxn to (n1,s1):1 # No auto-commit inside a transaction. @@ -261,6 +266,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%PushTxn%' AND message NOT LIKE '%QueryTxn%' ---- +dist sender send r36: sending batch 2 Put to (n1,s1):1 dist sender send r36: sending batch 2 Put, 1 EndTxn to (n1,s1):1 # TODO(radu): allow non-side-effecting projections. @@ -284,6 +290,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND operation NOT LIKE '%async%' ---- dist sender send r36: sending batch 2 Put to (n1,s1):1 +dist sender send r36: sending batch 2 Put to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 # Upsert with RETURNING statement with side-effects should not auto-commit. @@ -308,6 +315,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND operation NOT LIKE '%async%' ---- dist sender send r36: sending batch 2 Put to (n1,s1):1 +dist sender send r36: sending batch 2 Put to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 # Another way to test the scenario above: generate an error and ensure that the @@ -343,6 +351,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%PushTxn%' AND message NOT LIKE '%QueryTxn%' ---- +dist sender send r36: sending batch 2 Put to (n1,s1):1 dist sender send r36: sending batch 1 Scan to (n1,s1):1 dist sender send r36: sending batch 2 Put, 1 EndTxn to (n1,s1):1 @@ -394,6 +403,8 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%QueryTxn%' ---- dist sender send r36: sending batch 1 Scan to (n1,s1):1 +dist sender send r36: sending batch 2 Put to (n1,s1):1 +dist sender send r36: sending batch 1 Scan to (n1,s1):1 dist sender send r36: sending batch 2 Put, 1 EndTxn to (n1,s1):1 # TODO(radu): allow non-side-effecting projections. @@ -418,6 +429,8 @@ WHERE message LIKE '%r$rangeid: sending batch%' ---- dist sender send r36: sending batch 1 Scan to (n1,s1):1 dist sender send r36: sending batch 2 Put to (n1,s1):1 +dist sender send r36: sending batch 1 Scan to (n1,s1):1 +dist sender send r36: sending batch 2 Put to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 # Update with RETURNING statement with side-effects should not auto-commit. @@ -443,6 +456,8 @@ WHERE message LIKE '%r$rangeid: sending batch%' ---- dist sender send r36: sending batch 1 Scan to (n1,s1):1 dist sender send r36: sending batch 2 Put to (n1,s1):1 +dist sender send r36: sending batch 1 Scan to (n1,s1):1 +dist sender send r36: sending batch 2 Put to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 # Another way to test the scenario above: generate an error and ensure that the @@ -478,6 +493,8 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%PushTxn%' AND message NOT LIKE '%QueryTxn%' ---- +dist sender send r36: sending batch 1 Scan to (n1,s1):1 +dist sender send r36: sending batch 2 Put to (n1,s1):1 dist sender send r36: sending batch 1 DelRng, 1 EndTxn to (n1,s1):1 # Multi-row delete should auto-commit. @@ -499,6 +516,8 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%PushTxn%' AND message NOT LIKE '%QueryTxn%' ---- +dist sender send r36: sending batch 1 Scan to (n1,s1):1 +dist sender send r36: sending batch 2 Put to (n1,s1):1 dist sender send r36: sending batch 1 DelRng, 1 EndTxn to (n1,s1):1 # No auto-commit inside a transaction. @@ -547,6 +566,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%PushTxn%' AND message NOT LIKE '%QueryTxn%' ---- +dist sender send r36: sending batch 1 DelRng to (n1,s1):1 dist sender send r36: sending batch 1 Scan to (n1,s1):1 dist sender send r36: sending batch 2 Del, 1 EndTxn to (n1,s1):1 @@ -570,6 +590,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%QueryTxn%' AND operation NOT LIKE '%async%' ---- +dist sender send r36: sending batch 1 DelRng to (n1,s1):1 dist sender send r36: sending batch 1 Scan to (n1,s1):1 dist sender send r36: sending batch 2 Del to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 @@ -595,6 +616,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%QueryTxn%' AND operation NOT LIKE '%async%' ---- +dist sender send r36: sending batch 1 DelRng to (n1,s1):1 dist sender send r36: sending batch 1 Scan to (n1,s1):1 dist sender send r36: sending batch 2 Del to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 @@ -644,6 +666,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%QueryTxn%' AND operation NOT LIKE '%async%' ---- +dist sender send r36: sending batch 1 DelRng to (n1,s1):1 dist sender send r36: sending batch 2 CPut to (n1,s1):1 dist sender send r36: sending batch 2 Scan to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 @@ -667,6 +690,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%QueryTxn%' AND operation NOT LIKE '%async%' ---- +dist sender send r36: sending batch 1 DelRng to (n1,s1):1 dist sender send r36: sending batch 1 Scan to (n1,s1):1 dist sender send r36: sending batch 1 Put to (n1,s1):1 dist sender send r36: sending batch 1 Scan to (n1,s1):1 @@ -692,6 +716,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%QueryTxn%' AND operation NOT LIKE '%async%' ---- +dist sender send r36: sending batch 1 DelRng to (n1,s1):1 dist sender send r36: sending batch 1 Scan to (n1,s1):1 dist sender send r36: sending batch 1 Del to (n1,s1):1 dist sender send r36: sending batch 1 Scan to (n1,s1):1 @@ -720,6 +745,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND operation NOT LIKE '%async%' ---- dist sender send r36: sending batch 1 DelRng to (n1,s1):1 +dist sender send r36: sending batch 1 DelRng to (n1,s1):1 dist sender send r36: sending batch 1 Scan to (n1,s1):1 dist sender send r36: sending batch 1 Del, 1 EndTxn to (n1,s1):1 @@ -749,6 +775,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%QueryTxn%' AND operation NOT LIKE '%async%' ---- +dist sender send r36: sending batch 1 DelRng to (n1,s1):1 dist sender send r36: sending batch 2 CPut to (n1,s1):1 dist sender send r36: sending batch 2 CPut to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 @@ -774,6 +801,7 @@ WHERE message LIKE '%r$rangeid: sending batch%' AND message NOT LIKE '%QueryTxn%' AND operation NOT LIKE '%async%' ---- +dist sender send r36: sending batch 1 DelRng to (n1,s1):1 dist sender send r36: sending batch 2 CPut to (n1,s1):1 dist sender send r36: sending batch 2 CPut to (n1,s1):1 dist sender send r36: sending batch 1 EndTxn to (n1,s1):1 diff --git a/pkg/sql/sem/builtins/BUILD.bazel b/pkg/sql/sem/builtins/BUILD.bazel index 9c7b8bc3e8ef..b699dd2b53ba 100644 --- a/pkg/sql/sem/builtins/BUILD.bazel +++ b/pkg/sql/sem/builtins/BUILD.bazel @@ -79,6 +79,7 @@ go_library( "//pkg/util/timeofday", "//pkg/util/timetz", "//pkg/util/timeutil", + "//pkg/util/tracing", "//pkg/util/unaccent", "//pkg/util/uuid", "@com_github_cockroachdb_apd_v2//:apd", diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index dce6113a236a..31f371c294a0 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -69,6 +69,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/timeofday" "github.com/cockroachdb/cockroach/pkg/util/timetz" "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/cockroach/pkg/util/tracing" "github.com/cockroachdb/cockroach/pkg/util/unaccent" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" @@ -3578,6 +3579,25 @@ may increase either contention or retry errors, or both.`, }, ), + // Get the current trace ID. + "crdb_internal.trace_id": makeBuiltin( + tree.FunctionProperties{Category: categorySystemInfo}, + tree.Overload{ + Types: tree.ArgTypes{}, + ReturnType: tree.FixedReturnType(types.Int), + Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) { + sp := tracing.SpanFromContext(ctx.Context) + if sp == nil { + return tree.DNull, nil + } + return tree.NewDInt(tree.DInt(sp.GetRecording()[0].TraceID)), nil + }, + Info: "Returns the current trace ID or an error if no trace is open.", + // NB: possibly this is or could be made stable, but it's not worth it. + Volatility: tree.VolatilityVolatile, + }, + ), + "crdb_internal.locality_value": makeBuiltin( tree.FunctionProperties{Category: categorySystemInfo}, tree.Overload{ diff --git a/pkg/sql/txn_state.go b/pkg/sql/txn_state.go index d56e300e923e..9d0d867e463e 100644 --- a/pkg/sql/txn_state.go +++ b/pkg/sql/txn_state.go @@ -157,7 +157,13 @@ func (ts *txnState) resetForNewSQLTxn( // TODO(andrei): figure out how to close these spans on server shutdown? Ties // into a larger discussion about how to drain SQL and rollback open txns. opName := sqlTxnName - txnCtx, sp := createRootOrChildSpan(connCtx, opName, tranCtx.tracer, tracing.WithBypassRegistry()) + var traceOpts []tracing.SpanOption + if !tranCtx.execTestingKnobs.Pretend59315IsFixed { + // The surrounding conditional and this option can be removed once #59315 + // is addressed. + traceOpts = append(traceOpts, tracing.WithBypassRegistry()) + } + txnCtx, sp := createRootOrChildSpan(connCtx, opName, tranCtx.tracer, traceOpts...) if txnType == implicitTxn { sp.SetTag("implicit", "true") } @@ -384,8 +390,9 @@ type transitionCtx struct { tracer *tracing.Tracer // sessionTracing provides access to the session's tracing interface. The // state machine needs to see if session tracing is enabled. - sessionTracing *SessionTracing - settings *cluster.Settings + sessionTracing *SessionTracing + settings *cluster.Settings + execTestingKnobs ExecutorTestingKnobs } var noRewind = rewindCapability{} diff --git a/pkg/testutils/grpcutils/BUILD.bazel b/pkg/testutils/grpcutils/BUILD.bazel new file mode 100644 index 000000000000..07469ebd0436 --- /dev/null +++ b/pkg/testutils/grpcutils/BUILD.bazel @@ -0,0 +1,28 @@ +load("@rules_proto//proto:defs.bzl", "proto_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") + +proto_library( + name = "grpcutils_proto", + srcs = ["testservice.proto"], + strip_import_prefix = "/pkg", + visibility = ["//visibility:public"], + deps = ["@com_google_protobuf//:any_proto"], +) + +go_proto_library( + name = "grpcutils_go_proto", + compilers = ["//pkg/cmd/protoc-gen-gogoroach:protoc-gen-gogoroach_grpc_compiler"], + importpath = "github.com/cockroachdb/cockroach/pkg/testutils/grpcutils", + proto = ":grpcutils_proto", + visibility = ["//visibility:public"], +) + +go_library( + name = "grpcutils", + srcs = ["testserver.go"], + embed = [":grpcutils_go_proto"], + importpath = "github.com/cockroachdb/cockroach/pkg/testutils/grpcutils", + visibility = ["//visibility:public"], + deps = ["@com_github_gogo_protobuf//types"], +) diff --git a/pkg/testutils/grpcutils/testserver.go b/pkg/testutils/grpcutils/testserver.go new file mode 100644 index 000000000000..6d7e411c3e5a --- /dev/null +++ b/pkg/testutils/grpcutils/testserver.go @@ -0,0 +1,47 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package grpcutils + +import ( + "context" + + "github.com/gogo/protobuf/types" +) + +// TestServerImpl backs the Test service. +type TestServerImpl struct { + UU func(context.Context, *types.Any) (*types.Any, error) // UnaryUnary + US func(*types.Any, GRPCTest_UnaryStreamServer) error // UnaryStream + SU func(server GRPCTest_StreamUnaryServer) error // StreamUnary + SS func(server GRPCTest_StreamStreamServer) error // StreamStream +} + +var _ GRPCTestServer = (*TestServerImpl)(nil) + +// UnaryUnary implements GRPCTestServer. +func (s *TestServerImpl) UnaryUnary(ctx context.Context, any *types.Any) (*types.Any, error) { + return s.UU(ctx, any) +} + +// UnaryStream implements GRPCTestServer. +func (s *TestServerImpl) UnaryStream(any *types.Any, server GRPCTest_UnaryStreamServer) error { + return s.US(any, server) +} + +// StreamUnary implements GRPCTestServer. +func (s *TestServerImpl) StreamUnary(server GRPCTest_StreamUnaryServer) error { + return s.SU(server) +} + +// StreamStream implements GRPCTestServer. +func (s *TestServerImpl) StreamStream(server GRPCTest_StreamStreamServer) error { + return s.SS(server) +} diff --git a/pkg/testutils/grpcutils/testservice.pb.go b/pkg/testutils/grpcutils/testservice.pb.go new file mode 100644 index 000000000000..5813f3a4f01e --- /dev/null +++ b/pkg/testutils/grpcutils/testservice.pb.go @@ -0,0 +1,312 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: testutils/grpcutils/testservice.proto + +package grpcutils + +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" +import types "github.com/gogo/protobuf/types" + +import ( + context "context" + grpc "google.golang.org/grpc" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +// Reference imports to suppress errors if they are not otherwise used. +var _ context.Context +var _ grpc.ClientConn + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +const _ = grpc.SupportPackageIsVersion4 + +// GRPCTestClient is the client API for GRPCTest service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. +type GRPCTestClient interface { + UnaryUnary(ctx context.Context, in *types.Any, opts ...grpc.CallOption) (*types.Any, error) + UnaryStream(ctx context.Context, in *types.Any, opts ...grpc.CallOption) (GRPCTest_UnaryStreamClient, error) + StreamUnary(ctx context.Context, opts ...grpc.CallOption) (GRPCTest_StreamUnaryClient, error) + StreamStream(ctx context.Context, opts ...grpc.CallOption) (GRPCTest_StreamStreamClient, error) +} + +type gRPCTestClient struct { + cc *grpc.ClientConn +} + +func NewGRPCTestClient(cc *grpc.ClientConn) GRPCTestClient { + return &gRPCTestClient{cc} +} + +func (c *gRPCTestClient) UnaryUnary(ctx context.Context, in *types.Any, opts ...grpc.CallOption) (*types.Any, error) { + out := new(types.Any) + err := c.cc.Invoke(ctx, "/cockroach.testutils.grpcutils.GRPCTest/UnaryUnary", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *gRPCTestClient) UnaryStream(ctx context.Context, in *types.Any, opts ...grpc.CallOption) (GRPCTest_UnaryStreamClient, error) { + stream, err := c.cc.NewStream(ctx, &_GRPCTest_serviceDesc.Streams[0], "/cockroach.testutils.grpcutils.GRPCTest/UnaryStream", opts...) + if err != nil { + return nil, err + } + x := &gRPCTestUnaryStreamClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type GRPCTest_UnaryStreamClient interface { + Recv() (*types.Any, error) + grpc.ClientStream +} + +type gRPCTestUnaryStreamClient struct { + grpc.ClientStream +} + +func (x *gRPCTestUnaryStreamClient) Recv() (*types.Any, error) { + m := new(types.Any) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *gRPCTestClient) StreamUnary(ctx context.Context, opts ...grpc.CallOption) (GRPCTest_StreamUnaryClient, error) { + stream, err := c.cc.NewStream(ctx, &_GRPCTest_serviceDesc.Streams[1], "/cockroach.testutils.grpcutils.GRPCTest/StreamUnary", opts...) + if err != nil { + return nil, err + } + x := &gRPCTestStreamUnaryClient{stream} + return x, nil +} + +type GRPCTest_StreamUnaryClient interface { + Send(*types.Any) error + CloseAndRecv() (*types.Any, error) + grpc.ClientStream +} + +type gRPCTestStreamUnaryClient struct { + grpc.ClientStream +} + +func (x *gRPCTestStreamUnaryClient) Send(m *types.Any) error { + return x.ClientStream.SendMsg(m) +} + +func (x *gRPCTestStreamUnaryClient) CloseAndRecv() (*types.Any, error) { + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + m := new(types.Any) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *gRPCTestClient) StreamStream(ctx context.Context, opts ...grpc.CallOption) (GRPCTest_StreamStreamClient, error) { + stream, err := c.cc.NewStream(ctx, &_GRPCTest_serviceDesc.Streams[2], "/cockroach.testutils.grpcutils.GRPCTest/StreamStream", opts...) + if err != nil { + return nil, err + } + x := &gRPCTestStreamStreamClient{stream} + return x, nil +} + +type GRPCTest_StreamStreamClient interface { + Send(*types.Any) error + Recv() (*types.Any, error) + grpc.ClientStream +} + +type gRPCTestStreamStreamClient struct { + grpc.ClientStream +} + +func (x *gRPCTestStreamStreamClient) Send(m *types.Any) error { + return x.ClientStream.SendMsg(m) +} + +func (x *gRPCTestStreamStreamClient) Recv() (*types.Any, error) { + m := new(types.Any) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +// GRPCTestServer is the server API for GRPCTest service. +type GRPCTestServer interface { + UnaryUnary(context.Context, *types.Any) (*types.Any, error) + UnaryStream(*types.Any, GRPCTest_UnaryStreamServer) error + StreamUnary(GRPCTest_StreamUnaryServer) error + StreamStream(GRPCTest_StreamStreamServer) error +} + +func RegisterGRPCTestServer(s *grpc.Server, srv GRPCTestServer) { + s.RegisterService(&_GRPCTest_serviceDesc, srv) +} + +func _GRPCTest_UnaryUnary_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(types.Any) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(GRPCTestServer).UnaryUnary(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/cockroach.testutils.grpcutils.GRPCTest/UnaryUnary", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(GRPCTestServer).UnaryUnary(ctx, req.(*types.Any)) + } + return interceptor(ctx, in, info, handler) +} + +func _GRPCTest_UnaryStream_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(types.Any) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(GRPCTestServer).UnaryStream(m, &gRPCTestUnaryStreamServer{stream}) +} + +type GRPCTest_UnaryStreamServer interface { + Send(*types.Any) error + grpc.ServerStream +} + +type gRPCTestUnaryStreamServer struct { + grpc.ServerStream +} + +func (x *gRPCTestUnaryStreamServer) Send(m *types.Any) error { + return x.ServerStream.SendMsg(m) +} + +func _GRPCTest_StreamUnary_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(GRPCTestServer).StreamUnary(&gRPCTestStreamUnaryServer{stream}) +} + +type GRPCTest_StreamUnaryServer interface { + SendAndClose(*types.Any) error + Recv() (*types.Any, error) + grpc.ServerStream +} + +type gRPCTestStreamUnaryServer struct { + grpc.ServerStream +} + +func (x *gRPCTestStreamUnaryServer) SendAndClose(m *types.Any) error { + return x.ServerStream.SendMsg(m) +} + +func (x *gRPCTestStreamUnaryServer) Recv() (*types.Any, error) { + m := new(types.Any) + if err := x.ServerStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func _GRPCTest_StreamStream_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(GRPCTestServer).StreamStream(&gRPCTestStreamStreamServer{stream}) +} + +type GRPCTest_StreamStreamServer interface { + Send(*types.Any) error + Recv() (*types.Any, error) + grpc.ServerStream +} + +type gRPCTestStreamStreamServer struct { + grpc.ServerStream +} + +func (x *gRPCTestStreamStreamServer) Send(m *types.Any) error { + return x.ServerStream.SendMsg(m) +} + +func (x *gRPCTestStreamStreamServer) Recv() (*types.Any, error) { + m := new(types.Any) + if err := x.ServerStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +var _GRPCTest_serviceDesc = grpc.ServiceDesc{ + ServiceName: "cockroach.testutils.grpcutils.GRPCTest", + HandlerType: (*GRPCTestServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "UnaryUnary", + Handler: _GRPCTest_UnaryUnary_Handler, + }, + }, + Streams: []grpc.StreamDesc{ + { + StreamName: "UnaryStream", + Handler: _GRPCTest_UnaryStream_Handler, + ServerStreams: true, + }, + { + StreamName: "StreamUnary", + Handler: _GRPCTest_StreamUnary_Handler, + ClientStreams: true, + }, + { + StreamName: "StreamStream", + Handler: _GRPCTest_StreamStream_Handler, + ServerStreams: true, + ClientStreams: true, + }, + }, + Metadata: "testutils/grpcutils/testservice.proto", +} + +func init() { + proto.RegisterFile("testutils/grpcutils/testservice.proto", fileDescriptor_testservice_e326d78c9e5539cb) +} + +var fileDescriptor_testservice_e326d78c9e5539cb = []byte{ + // 217 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x52, 0x2d, 0x49, 0x2d, 0x2e, + 0x29, 0x2d, 0xc9, 0xcc, 0x29, 0xd6, 0x4f, 0x2f, 0x2a, 0x48, 0x86, 0xb0, 0x40, 0x62, 0xc5, 0xa9, + 0x45, 0x65, 0x99, 0xc9, 0xa9, 0x7a, 0x05, 0x45, 0xf9, 0x25, 0xf9, 0x42, 0xb2, 0xc9, 0xf9, 0xc9, + 0xd9, 0x45, 0xf9, 0x89, 0xc9, 0x19, 0x7a, 0x70, 0x0d, 0x7a, 0x70, 0x0d, 0x52, 0x92, 0xe9, 0xf9, + 0xf9, 0xe9, 0x39, 0xa9, 0xfa, 0x60, 0xc5, 0x49, 0xa5, 0x69, 0xfa, 0x89, 0x79, 0x95, 0x10, 0x9d, + 0x46, 0x6d, 0x4c, 0x5c, 0x1c, 0xee, 0x41, 0x01, 0xce, 0x21, 0xa9, 0xc5, 0x25, 0x42, 0x56, 0x5c, + 0x5c, 0xa1, 0x79, 0x89, 0x45, 0x95, 0x60, 0x42, 0x48, 0x44, 0x0f, 0xa2, 0x4d, 0x0f, 0xa6, 0x4d, + 0xcf, 0x31, 0xaf, 0x52, 0x0a, 0xab, 0xa8, 0x12, 0x83, 0x90, 0x2d, 0x17, 0x37, 0x58, 0x5b, 0x70, + 0x49, 0x51, 0x6a, 0x62, 0x2e, 0x69, 0x9a, 0x0d, 0x18, 0x41, 0xda, 0x21, 0x3a, 0xc9, 0xb0, 0x5b, + 0x83, 0x51, 0xc8, 0x81, 0x8b, 0x07, 0xa2, 0x9d, 0x1c, 0xeb, 0x35, 0x18, 0x0d, 0x18, 0x9d, 0xb4, + 0x4f, 0x3c, 0x94, 0x63, 0x38, 0xf1, 0x48, 0x8e, 0xf1, 0xc2, 0x23, 0x39, 0xc6, 0x1b, 0x8f, 0xe4, + 0x18, 0x1f, 0x3c, 0x92, 0x63, 0x9c, 0xf0, 0x58, 0x8e, 0xe1, 0xc2, 0x63, 0x39, 0x86, 0x1b, 0x8f, + 0xe5, 0x18, 0xa2, 0x38, 0xe1, 0x01, 0x9a, 0xc4, 0x06, 0x36, 0xc0, 0x18, 0x10, 0x00, 0x00, 0xff, + 0xff, 0x29, 0xc3, 0x4b, 0x1d, 0x9f, 0x01, 0x00, 0x00, +} diff --git a/pkg/testutils/grpcutils/testservice.proto b/pkg/testutils/grpcutils/testservice.proto new file mode 100644 index 000000000000..ce1ebf1293dd --- /dev/null +++ b/pkg/testutils/grpcutils/testservice.proto @@ -0,0 +1,25 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +syntax = "proto3"; +package cockroach.testutils.grpcutils; +option go_package = "grpcutils"; + +import "google/protobuf/any.proto"; + +// GRPCTest is a generic test service that operates on arbitrary protobufs (as +// Anys) and exposes all combinations of streaming and unary RPCs. +service GRPCTest { + rpc UnaryUnary (google.protobuf.Any) returns (google.protobuf.Any) {} + rpc UnaryStream (google.protobuf.Any) returns (stream google.protobuf.Any) {} + rpc StreamUnary (stream google.protobuf.Any) returns (google.protobuf.Any) {} + rpc StreamStream (stream google.protobuf.Any) returns (stream google.protobuf.Any) {} +} + diff --git a/pkg/testutils/lint/lint_test.go b/pkg/testutils/lint/lint_test.go index 308df3312a95..7eba8d5f20fd 100644 --- a/pkg/testutils/lint/lint_test.go +++ b/pkg/testutils/lint/lint_test.go @@ -814,6 +814,7 @@ func TestLint(t *testing.T) { ":!util/grpcutil/grpc_util_test.go", ":!cli/systembench/network_test_server.go", ":!server/testserver.go", + ":!util/tracing/*_test.go", ) if err != nil { t.Fatal(err) diff --git a/pkg/testutils/testcluster/testcluster.go b/pkg/testutils/testcluster/testcluster.go index f4d4827eec34..9fa3cb4cc09e 100644 --- a/pkg/testutils/testcluster/testcluster.go +++ b/pkg/testutils/testcluster/testcluster.go @@ -16,6 +16,7 @@ import ( "fmt" "net" "reflect" + "strings" "sync" "testing" "time" @@ -131,9 +132,21 @@ func (tc *TestCluster) stopServers(ctx context.Context) { // [1]: cleanupSessionTempObjects tracer := tc.Server(i).Tracer().(*tracing.Tracer) testutils.SucceedsSoon(tc.t, func() error { - return tracer.VisitSpans(func(span *tracing.Span) error { - return errors.Newf("expected to find no active spans, found %s", span.Meta()) + var sps []*tracing.Span + _ = tracer.VisitSpans(func(span *tracing.Span) error { + sps = append(sps, span) + return nil }) + if len(sps) == 0 { + return nil + } + var buf strings.Builder + buf.WriteString("unexpectedly found active spans:\n") + for _, sp := range sps { + fmt.Fprintln(&buf, sp.GetRecording()) + fmt.Fprintln(&buf) + } + return errors.Newf("%s", buf.String()) }) } } diff --git a/pkg/util/tracing/BUILD.bazel b/pkg/util/tracing/BUILD.bazel index 67efe9734509..1a28592f8a3e 100644 --- a/pkg/util/tracing/BUILD.bazel +++ b/pkg/util/tracing/BUILD.bazel @@ -52,6 +52,7 @@ go_test( size = "small", srcs = [ "alloc_test.go", + "grpc_interceptor_test.go", "helpers_test.go", "span_test.go", "tags_test.go", @@ -60,7 +61,13 @@ go_test( embed = [":tracing"], deps = [ "//pkg/settings", + "//pkg/testutils", + "//pkg/testutils/grpcutils", + "//pkg/util", "//pkg/util/iterutil", + "//pkg/util/leaktest", + "//pkg/util/stop", + "//pkg/util/tracing/tracingpb", "@com_github_cockroachdb_errors//:errors", "@com_github_cockroachdb_logtags//:logtags", "@com_github_gogo_protobuf//types", @@ -68,6 +75,8 @@ go_test( "@com_github_opentracing_opentracing_go//:opentracing-go", "@com_github_opentracing_opentracing_go//log", "@com_github_stretchr_testify//require", + "@org_golang_google_grpc//:go_default_library", "@org_golang_google_grpc//metadata", + "@org_golang_x_net//trace", ], ) diff --git a/pkg/util/tracing/crdbspan.go b/pkg/util/tracing/crdbspan.go index 9be388dc021b..a2ced2b9b2dc 100644 --- a/pkg/util/tracing/crdbspan.go +++ b/pkg/util/tracing/crdbspan.go @@ -135,19 +135,27 @@ func (s *crdbSpan) disableRecording() { } } -func (s *crdbSpan) getRecording(m mode) Recording { +func (s *crdbSpan) getRecording(m mode, everyoneIsV211 bool) Recording { if s == nil { - return nil + return nil // noop span } - if m == modeLegacy && s.recordingType() == RecordingOff { - // In legacy tracing (pre always-on), we avoid allocations when the - // Span is not actively recording. + + s.mu.Lock() + + if !everyoneIsV211 { + // The cluster may contain nodes that are running v20.2. Unfortunately that + // version can easily crash when a peer returns a recording that that node + // did not expect would get created. To circumvent this, retain the v20.2 + // behavior of eliding recordings when verbosity is off until we're sure + // that v20.2 is not around any longer. // - // TODO(tbg): we could consider doing the same when background tracing - // is on but the current span contains "nothing of interest". - return nil + // TODO(tbg): remove this in the v21.2 cycle. + if m == modeLegacy && s.recordingType() == RecordingOff { + s.mu.Unlock() + return nil + } } - s.mu.Lock() + // The capacity here is approximate since we don't know how many grandchildren // there are. result := make(Recording, 0, 1+len(s.mu.recording.children)+len(s.mu.recording.remoteSpans)) @@ -158,7 +166,7 @@ func (s *crdbSpan) getRecording(m mode) Recording { s.mu.Unlock() for _, child := range children { - result = append(result, child.getRecording(m)...) + result = append(result, child.getRecording(m, everyoneIsV211)...) } // Sort the spans by StartTime, except the first Span (the root of this diff --git a/pkg/util/tracing/grpc_interceptor.go b/pkg/util/tracing/grpc_interceptor.go index e25393b59e74..271b4cabd68a 100644 --- a/pkg/util/tracing/grpc_interceptor.go +++ b/pkg/util/tracing/grpc_interceptor.go @@ -73,7 +73,7 @@ func spanInclusionFuncForServer(t *Tracer, spanMeta *SpanMeta) bool { // configured to always trace, return true. The second part is particularly // useful for calls coming through the HTTP->RPC gateway (i.e. the AdminUI), // where client is never tracing. - return !spanMeta.isNilOrNoop() || t.AlwaysTrace() + return spanMeta != nil || t.AlwaysTrace() } // setSpanTags sets one or more tags on the given span according to the @@ -351,7 +351,7 @@ func newTracingClientStream( // The client span is being finished by another code path; hence, no // action is necessary. case <-cs.Context().Done(): - finishFunc(cs.Context().Err()) + finishFunc(nil) } }() otcs := &tracingClientStream{ diff --git a/pkg/util/tracing/grpc_interceptor_test.go b/pkg/util/tracing/grpc_interceptor_test.go new file mode 100644 index 000000000000..507eabcb7c1e --- /dev/null +++ b/pkg/util/tracing/grpc_interceptor_test.go @@ -0,0 +1,225 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tracing_test + +import ( + "context" + "fmt" + "net" + "testing" + + "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/testutils/grpcutils" + "github.com/cockroachdb/cockroach/pkg/util" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/stop" + "github.com/cockroachdb/cockroach/pkg/util/tracing" + "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" + "github.com/cockroachdb/errors" + "github.com/gogo/protobuf/types" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" +) + +// TestGRPCInterceptors verifies that the streaming and unary tracing +// interceptors work as advertised. We expect to see a span on the client side +// and a span on the server side. +func TestGRPCInterceptors(t *testing.T) { + defer leaktest.AfterTest(t)() + + const ( + k = "test-baggage-key" + v = "test-baggage-value" + magicValue = 150 + ) + + checkForSpanAndReturnRecording := func(ctx context.Context) (*types.Any, error) { + sp := tracing.SpanFromContext(ctx) + if sp == nil { + return nil, errors.New("no span in ctx") + } + actV, ok := sp.GetRecording()[0].Baggage[k] + if !ok { + return nil, errors.Newf("%s not set in baggage", k) + } + if v != actV { + return nil, errors.Newf("expected %v, got %v instead", v, actV) + } + + sp.RecordStructured(&types.Int32Value{Value: magicValue}) + recs := sp.GetRecording() + if len(recs) != 1 { + return nil, errors.Newf("expected exactly one recorded span, not %+v", recs) + } + return types.MarshalAny(&recs[0]) + } + + s := stop.NewStopper() + defer s.Stop(context.Background()) + + tr := tracing.NewTracer() + srv := grpc.NewServer( + grpc.UnaryInterceptor(tracing.ServerInterceptor(tr)), + grpc.StreamInterceptor(tracing.StreamServerInterceptor(tr)), + ) + impl := &grpcutils.TestServerImpl{ + UU: func(ctx context.Context, any *types.Any) (*types.Any, error) { + return checkForSpanAndReturnRecording(ctx) + }, + US: func(_ *types.Any, server grpcutils.GRPCTest_UnaryStreamServer) error { + any, err := checkForSpanAndReturnRecording(server.Context()) + if err != nil { + return err + } + return server.Send(any) + }, + SU: func(server grpcutils.GRPCTest_StreamUnaryServer) error { + _, err := server.Recv() + if err != nil { + return err + } + any, err := checkForSpanAndReturnRecording(server.Context()) + if err != nil { + return err + } + return server.SendAndClose(any) + }, + SS: func(server grpcutils.GRPCTest_StreamStreamServer) error { + _, err := server.Recv() + if err != nil { + return err + } + any, err := checkForSpanAndReturnRecording(server.Context()) + if err != nil { + return err + } + return server.Send(any) + }, + } + grpcutils.RegisterGRPCTestServer(srv, impl) + defer srv.GracefulStop() + ln, err := net.Listen(util.TestAddr.Network(), util.TestAddr.String()) + require.NoError(t, err) + require.NoError(t, s.RunAsyncTask(context.Background(), "serve", func(ctx context.Context) { + if err := srv.Serve(ln); err != nil { + t.Error(err) + } + })) + conn, err := grpc.DialContext(context.Background(), ln.Addr().String(), + grpc.WithInsecure(), + grpc.WithUnaryInterceptor(tracing.ClientInterceptor( + tr, func(sp *tracing.Span) { + sp.SetBaggageItem(k, v) + })), + grpc.WithStreamInterceptor(tracing.StreamClientInterceptor(tr, func(sp *tracing.Span) { + sp.SetBaggageItem(k, v) + })), + ) + require.NoError(t, err) + defer func() { + _ = conn.Close() // nolint:grpcconnclose + }() + + c := grpcutils.NewGRPCTestClient(conn) + unusedAny, err := types.MarshalAny(&types.Empty{}) + require.NoError(t, err) + + for _, tc := range []struct { + name string + do func(context.Context) (*types.Any, error) + }{ + { + name: "UnaryUnary", + do: func(ctx context.Context) (*types.Any, error) { + return c.UnaryUnary(ctx, unusedAny) + }, + }, + { + name: "UnaryStream", + do: func(ctx context.Context) (*types.Any, error) { + sc, err := c.UnaryStream(ctx, unusedAny) + if err != nil { + return nil, err + } + any, err := sc.Recv() + if err != nil { + return nil, err + } + return any, sc.CloseSend() + }, + }, + { + name: "StreamUnary", + do: func(ctx context.Context) (*types.Any, error) { + sc, err := c.StreamUnary(ctx) + if err != nil { + return nil, err + } + if err := sc.Send(unusedAny); err != nil { + return nil, err + } + return sc.CloseAndRecv() + }, + }, + { + name: "StreamStream", + do: func(ctx context.Context) (*types.Any, error) { + sc, err := c.StreamStream(ctx) + if err != nil { + return nil, err + } + if err := sc.Send(unusedAny); err != nil { + return nil, err + } + return sc.Recv() + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + ctx, sp := tr.StartSpanCtx(context.Background(), tc.name, tracing.WithForceRealSpan()) + recAny, err := tc.do(ctx) + require.NoError(t, err) + var rec tracingpb.RecordedSpan + require.NoError(t, types.UnmarshalAny(recAny, &rec)) + require.Len(t, rec.InternalStructured, 1) + require.NoError(t, sp.ImportRemoteSpans([]tracingpb.RecordedSpan{rec})) + sp.Finish() + var n int + finalRecs := sp.GetRecording() + for _, rec := range finalRecs { + n += len(rec.InternalStructured) + // Remove all of the _unfinished tags. These crop up because + // in this test we are pulling the recorder in the handler impl, + // but the span is only closed in the interceptor. Additionally, + // this differs between the streaming and unary interceptor, and + // it's not worth having to have a separate expectation for each. + // Note that we check that we're not leaking spans at the end of + // the test. + delete(rec.Tags, "_unfinished") + } + require.Equal(t, 1, n) + + exp := fmt.Sprintf(` +Span %[1]s: +Span /cockroach.testutils.grpcutils.GRPCTest/%[1]s: + tags: component=gRPC span.kind=client test-baggage-key=test-baggage-value +Span /cockroach.testutils.grpcutils.GRPCTest/%[1]s: + tags: component=gRPC span.kind=server test-baggage-key=test-baggage-value`, + tc.name) + require.NoError(t, tracing.TestingCheckRecordedSpans(finalRecs, exp)) + }) + } + testutils.SucceedsSoon(t, func() error { + return tr.VisitSpans(func(sp *tracing.Span) error { + return errors.Newf("leaked span: %s", sp.GetRecording()[0].Operation) + }) + }) +} diff --git a/pkg/util/tracing/span.go b/pkg/util/tracing/span.go index 1ef06781e234..bcdd03bfe766 100644 --- a/pkg/util/tracing/span.go +++ b/pkg/util/tracing/span.go @@ -149,16 +149,13 @@ func (s *Span) SetVerbose(to bool) { // enabled. This can be called while spans that are part of the recording are // still open; it can run concurrently with operations on those spans. func (s *Span) GetRecording() Recording { - return s.crdb.getRecording(s.tracer.mode()) + return s.crdb.getRecording(s.tracer.mode(), s.tracer.TracingVerbosityIndependentSemanticsIsActive()) } // ImportRemoteSpans adds RecordedSpan data to the recording of the given Span; // these spans will be part of the result of GetRecording. Used to import // recorded traces from other nodes. func (s *Span) ImportRemoteSpans(remoteSpans []tracingpb.RecordedSpan) error { - if s.tracer.mode() == modeLegacy && s.crdb.recordingType() == RecordingOff { - return nil - } return s.crdb.importRemoteSpans(remoteSpans) } @@ -174,13 +171,6 @@ func (s *Span) IsBlackHole() bool { return s.crdb.recordingType() == RecordingOff && s.netTr == nil && s.ot == (otSpan{}) } -// isNilOrNoop returns true if the Span context is either nil -// or corresponds to a "no-op" Span. If this is true, any Span -// derived from this context will be a "black hole Span". -func (sm *SpanMeta) isNilOrNoop() bool { - return sm == nil || (sm.recordingType == RecordingOff && sm.shadowTracerType == "") -} - // SpanStats are stats that can be added to a Span. type SpanStats interface { protoutil.Message @@ -218,9 +208,21 @@ func (s *Span) Finish() { return } finishTime := time.Now() + s.crdb.mu.Lock() + if alreadyFinished := s.crdb.mu.duration >= 0; alreadyFinished { + s.crdb.mu.Unlock() + + // External spans and net/trace are not always forgiving about spans getting + // finished twice, but it may happen so let's be resilient to it. + return + } s.crdb.mu.duration = finishTime.Sub(s.crdb.startTime) + if s.crdb.mu.duration == 0 { + s.crdb.mu.duration = time.Nanosecond + } s.crdb.mu.Unlock() + if s.ot.shadowSpan != nil { s.ot.shadowSpan.Finish() } diff --git a/pkg/util/tracing/span_test.go b/pkg/util/tracing/span_test.go index 0866092f8f24..da647ad23367 100644 --- a/pkg/util/tracing/span_test.go +++ b/pkg/util/tracing/span_test.go @@ -20,6 +20,7 @@ import ( "github.com/cockroachdb/errors" "github.com/gogo/protobuf/types" "github.com/stretchr/testify/require" + "golang.org/x/net/trace" "google.golang.org/grpc/metadata" ) @@ -183,9 +184,28 @@ Span grandchild: require.Equal(t, exp, recToStrippedString(childRec)) } +func TestSpan_ImportRemoteSpans(t *testing.T) { + // Verify that GetRecording propagates the recording even when the + // receiving Span isn't verbose. + tr := NewTracer() + sp := tr.StartSpan("root", WithForceRealSpan()) + ch := tr.StartSpan("child", WithParentAndManualCollection(sp.Meta())) + ch.SetVerbose(true) + ch.Record("foo") + ch.SetVerbose(false) + ch.Finish() + require.NoError(t, sp.ImportRemoteSpans(ch.GetRecording())) + sp.Finish() + + require.NoError(t, TestingCheckRecordedSpans(sp.GetRecording(), ` +Span root: +Span child: + event: foo +`)) +} + func TestSpanRecordStructured(t *testing.T) { tr := NewTracer() - tr._mode = int32(modeBackground) sp := tr.StartSpan("root", WithForceRealSpan()) defer sp.Finish() @@ -231,3 +251,25 @@ func TestSpanMaxChildren(t *testing.T) { require.Len(t, sp.crdb.mu.recording.children, exp) } } + +type countingNetTrace struct { + trace.Trace + n int +} + +func (nt *countingNetTrace) Finish() { + nt.n++ + nt.Trace.Finish() +} + +func TestSpan_FinishTwice(t *testing.T) { + tr := NewTracer() + tr._useNetTrace = 1 + sp := tr.StartSpan("foo", WithForceRealSpan()) + require.NotNil(t, sp.netTr) + nt := &countingNetTrace{Trace: sp.netTr} + sp.netTr = nt + sp.Finish() + sp.Finish() + require.Equal(t, 1, nt.n) +} diff --git a/pkg/util/tracing/tracer.go b/pkg/util/tracing/tracer.go index e0e6ec4238a1..3ed10b929521 100644 --- a/pkg/util/tracing/tracer.go +++ b/pkg/util/tracing/tracer.go @@ -167,6 +167,11 @@ type Tracer struct { syncutil.Mutex m map[uint64]*Span } + // TracingVerbosityIndependentSemanticsIsActive is really + // version.IsActive(TracingVerbosityIndependentSemanticsIsActive) + // but gets injected this way to avoid import cycles. It defaults + // to a function that returns `true`. + TracingVerbosityIndependentSemanticsIsActive func() bool includeAsyncSpansInRecordings bool // see TestingIncludeAsyncSpansInRecordings } @@ -177,6 +182,7 @@ type Tracer struct { func NewTracer() *Tracer { t := &Tracer{} t.activeSpans.m = make(map[uint64]*Span) + t.TracingVerbosityIndependentSemanticsIsActive = func() bool { return true } t.noopSpan = &Span{tracer: t} return t } @@ -293,15 +299,9 @@ func (t *Tracer) startSpanGeneric( opts.LogTags = logtags.FromContext(ctx) } - // Avoid creating a real span when possible. If tracing is globally - // enabled, we always need to create spans. If the incoming - // span is recording (which implies that there is a parent) then - // we also have to create a real child. Additionally, if the - // caller explicitly asked for a real span they need to get one. - // In all other cases, a noop span will do. - if !t.AlwaysTrace() && - opts.recordingType() == RecordingOff && - !opts.ForceRealSpan { + // Are we tracing everything, or have a parent, or want a real span? Then + // we create a real trace span. In all other cases, a noop span will do. + if !(t.AlwaysTrace() || opts.parentTraceID() != 0 || opts.ForceRealSpan) { return maybeWrapCtx(ctx, nil /* octx */, t.noopSpan) } @@ -532,7 +532,7 @@ func (fn textMapWriterFn) Set(key, val string) { // Carrier. This, alongside ExtractMetaFrom, can be used to carry span metadata // across process boundaries. See serializationFormat for more details. func (t *Tracer) InjectMetaInto(sm *SpanMeta, carrier Carrier) error { - if sm.isNilOrNoop() { + if sm == nil { // Fast path when tracing is disabled. ExtractMetaFrom will accept an // empty map as a noop context. return nil @@ -575,7 +575,7 @@ func (t *Tracer) InjectMetaInto(sm *SpanMeta, carrier Carrier) error { return nil } -var noopSpanMeta = &SpanMeta{} +var noopSpanMeta = (*SpanMeta)(nil) // ExtractMetaFrom is used to deserialize a span metadata (if any) from the // given Carrier. This, alongside InjectMetaFrom, can be used to carry span diff --git a/pkg/util/tracing/tracer_test.go b/pkg/util/tracing/tracer_test.go index 8bc59728aeb2..e96efc584948 100644 --- a/pkg/util/tracing/tracer_test.go +++ b/pkg/util/tracing/tracer_test.go @@ -46,6 +46,9 @@ func TestTracerRecording(t *testing.T) { } noop1.Record("hello") + // Noop span returns empty recording. + require.Equal(t, Recording(nil), noop1.GetRecording()) + noop2 := tr.StartSpan("noop2", WithParentAndManualCollection(noop1.Meta())) if !noop2.isNoop() { t.Error("expected noop child Span") @@ -61,12 +64,20 @@ func TestTracerRecording(t *testing.T) { t.Error("WithForceRealSpan Span should be black hole") } - // Unless recording is actually started, child spans are still noop. - noop3 := tr.StartSpan("noop3", WithParentAndManualCollection(s1.Meta())) - if !noop3.isNoop() { - t.Error("expected noop child Span") + // Initial recording of this fresh (real) span. + if err := TestingCheckRecordedSpans(s1.GetRecording(), ` + Span a: + tags: _unfinished=1 + `); err != nil { + t.Fatal(err) } - noop3.Finish() + + // Real parent --> real child. + real3 := tr.StartSpan("noop3", WithParentAndManualCollection(s1.Meta())) + if real3.isNoop() { + t.Error("expected real child Span") + } + real3.Finish() s1.Recordf("x=%d", 1) s1.SetVerbose(true) @@ -116,7 +127,7 @@ func TestTracerRecording(t *testing.T) { t.Fatal(err) } s3.Finish() - if err := TestingCheckRecordedSpans(s1.GetRecording(), ` + expS1v := ` Span a: tags: _unfinished=1 _verbose=1 event: x=2 @@ -126,12 +137,26 @@ func TestTracerRecording(t *testing.T) { Span c: tags: _verbose=1 tag=val event: x=4 - `); err != nil { + ` + if err := TestingCheckRecordedSpans(s1.GetRecording(), expS1v); err != nil { t.Fatal(err) } + // When we turn off verbosity and add more verbose logs, they get + // dropped, i.e. the recording is unchanged mod the _verbose flag. + expS1n := ` + Span a: + tags: _unfinished=1 + event: x=2 + Span b: + tags: _verbose=1 + event: x=3 + Span c: + tags: _verbose=1 tag=val + event: x=4 + ` s1.SetVerbose(false) s1.Recordf("x=%d", 100) - if err := TestingCheckRecordedSpans(s1.GetRecording(), ``); err != nil { + if err := TestingCheckRecordedSpans(s1.GetRecording(), expS1n); err != nil { t.Fatal(err) } @@ -223,7 +248,7 @@ func TestTracerInjectExtract(t *testing.T) { if err != nil { t.Fatal(err) } - if !wireSpanMeta.isNilOrNoop() { + if wireSpanMeta != nil { t.Errorf("expected noop context: %v", wireSpanMeta) } noop2 := tr2.StartSpan("remote op", WithParentAndManualCollection(wireSpanMeta)) @@ -292,6 +317,27 @@ func TestTracerInjectExtract(t *testing.T) { } } +func TestTracer_PropagateNonRecordingRealSpanAcrossRPCBoundaries(t *testing.T) { + // Verify that when a span is put on the wire on one end, and is checked + // against the span inclusion functions both on the client and server, a real + // span results in a real span. + tr1 := NewTracer() + sp1 := tr1.StartSpan("tr1.root", WithForceRealSpan()) + defer sp1.Finish() + carrier := metadataCarrier{MD: metadata.MD{}} + require.True(t, spanInclusionFuncForClient(sp1)) + require.NoError(t, tr1.InjectMetaInto(sp1.Meta(), carrier)) + require.Equal(t, 2, carrier.Len(), "%+v", carrier) // trace id and span id + + tr2 := NewTracer() + meta, err := tr2.ExtractMetaFrom(carrier) + require.NoError(t, err) + require.True(t, spanInclusionFuncForServer(tr2, meta)) + sp2 := tr2.StartSpan("tr2.child", WithParentAndManualCollection(meta)) + defer sp2.Finish() + require.NotZero(t, sp2.crdb.spanID) +} + func TestLightstepContext(t *testing.T) { tr := NewTracer() lsTr := lightstep.NewTracer(lightstep.Options{ @@ -492,3 +538,19 @@ func TestSpanRecordingFinished(t *testing.T) { spanOpsWithFinished = getSpanOpsWithFinished(t, tr1) require.Len(t, spanOpsWithFinished, 0) } + +func TestTracer_TracingVerbosityIndependentSemanticsIsActive(t *testing.T) { + // Verify that GetRecording() returns nil for non-verbose spans if we're in + // mixed-version mode. + tr := NewTracer() + tr.TracingVerbosityIndependentSemanticsIsActive = func() bool { return false } + sp := tr.StartSpan("root", WithForceRealSpan()) + defer sp.Finish() + sp.SetVerbose(true) + sp.Record("foo") + require.NotNil(t, sp.GetRecording()) + sp.SetVerbose(false) + require.Nil(t, sp.GetRecording()) + tr.TracingVerbosityIndependentSemanticsIsActive = func() bool { return true } + require.NotNil(t, sp.GetRecording()) +}