Skip to content

Commit

Permalink
server, tracing: add cluster setting to enable tracer redactability
Browse files Browse the repository at this point in the history
Prior commits added the ability to set a flag on the Tracer to determine
whether Span's should be marked redactable or not. This commit adds
a cluster setting that will enable fine-grained trace redactability. The
intended use is for multi-tenant deployments where KV traces should be
redacted prior to being returned to tenants. Enabling the flag will enable
fine-grained redaction so that tenant operators can continue to debug
their queries via redacted kv trace information instead of having that
information stripped out.

The desired outcome is to preserve redactability for tenant traces while
preserving a non-redactable code path for requests using the system
tenant. This preserves existing functionality in production while
keeping the current performance hit of fine grained redaction from
propagating to non-tenant deployments.

Benchmarking this against master results in the following improvements.
Verbose redaction is enabled in the test via SET tracing = on.

* verbose-redaction-always.txt refers to master @ a64af2c
which has redaction turned on always
* verbose-redaction-conditional-off.txt refers to the top of this PR @
51a8c72 which implements conditional redaction and turns it off for the
test

We gain back a significant portion of the performance hit.

```
10:39:06 ❯ benchstat verbose-redaction-always.txt  verbose-redaction-conditional-off.txt
name                                                        old time/op    new time/op    delta
Tracing/1node/scan/trace=100%/redactable=on/verbose=on-8       890µs ± 8%     795µs ±10%  -10.68%  (p=0.000 n=10+10)
Tracing/1node/insert/trace=100%/redactable=on/verbose=on-8    1.05ms ± 4%    0.95ms ± 1%   -9.17%  (p=0.000 n=9+8)
Tracing/3node/scan/trace=100%/redactable=on/verbose=on-8       921µs ± 4%     875µs ± 9%   -5.09%  (p=0.006 n=9+9)
Tracing/3node/insert/trace=100%/redactable=on/verbose=on-8    1.12ms ±21%    1.02ms ± 5%   -9.35%  (p=0.010 n=10+9)

name                                                        old alloc/op   new alloc/op   delta
Tracing/1node/scan/trace=100%/redactable=on/verbose=on-8       133kB ± 0%     135kB ± 0%   +1.76%  (p=0.000 n=9+9)
Tracing/1node/insert/trace=100%/redactable=on/verbose=on-8     165kB ± 0%     164kB ± 0%   -0.47%  (p=0.000 n=10+9)
Tracing/3node/scan/trace=100%/redactable=on/verbose=on-8       170kB ± 0%     170kB ± 1%     ~     (p=0.796 n=10+10)
Tracing/3node/insert/trace=100%/redactable=on/verbose=on-8     154kB ± 2%     155kB ± 1%     ~     (p=0.796 n=10+10)

name                                                        old allocs/op  new allocs/op  delta
Tracing/1node/scan/trace=100%/redactable=on/verbose=on-8       2.00k ± 0%     1.76k ± 0%  -12.02%  (p=0.000 n=9+8)
Tracing/1node/insert/trace=100%/redactable=on/verbose=on-8     2.36k ± 0%     2.01k ± 0%  -14.64%  (p=0.000 n=10+10)
Tracing/3node/scan/trace=100%/redactable=on/verbose=on-8       1.92k ± 0%     1.81k ± 1%   -5.69%  (p=0.000 n=10+10)
Tracing/3node/insert/trace=100%/redactable=on/verbose=on-8     1.68k ± 1%     1.56k ± 1%   -7.45%  (p=0.000 n=10+10)
```

Resolves cockroachdb#71694

Release note: None
  • Loading branch information
dhartunian committed Dec 9, 2021
1 parent 07c52a5 commit 7c768ac
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 10 deletions.
15 changes: 10 additions & 5 deletions pkg/ccl/kvccl/kvtenantccl/tenant_trace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
Expand Down Expand Up @@ -72,6 +73,8 @@ SET tracing = off;
tc.Server(0).TracerI().(*tracing.Tracer).SetRedactable(redactable)
defer tc.Stopper().Stop(ctx)

// Queries from the system tenant will receive unredacted traces
// since the tracer will not have the redactable flag set.
t.Run("system-tenant", func(t *testing.T) {
db := tc.ServerConn(0)
defer db.Close()
Expand All @@ -97,7 +100,6 @@ SET tracing = off;
defer tenDB.Close()
results := getTrace(t, tenDB)

const redactedMarkerString = "verbose trace message redacted"
var found bool
var foundRedactedMarker bool
for _, sl := range results {
Expand All @@ -111,26 +113,29 @@ SET tracing = off;
if strings.Contains(s, visibleString) {
found = true
}
if strings.Contains(s, redactedMarkerString) {
if strings.Contains(s, string(server.TraceRedactedMarker)) {
foundRedactedMarker = true
}
}
}

// In both cases we don't expect to see the `TraceRedactedMarker`
// since that's only shown when the server is in an inconsistent
// state or if there's a version mismatch between client and server.
if redactable {
// If redaction was on, we expect the tenant to see safe information in its
// trace.
require.True(t, found, "did not see expected trace message '%q':\n%s",
visibleString, sqlutils.MatrixToStr(results))
require.False(t, foundRedactedMarker, "unexpectedly found '%q':\n%s",
redactedMarkerString, sqlutils.MatrixToStr(results))
string(server.TraceRedactedMarker), sqlutils.MatrixToStr(results))
} else {
// Otherwise, expect the opposite: not even safe information makes it through,
// because it gets replaced with foundRedactedMarker.
require.False(t, found, "unexpectedly saw message '%q':\n%s",
visibleString, sqlutils.MatrixToStr(results))
require.True(t, foundRedactedMarker, "not not find expected message '%q':\n%s",
redactedMarkerString, sqlutils.MatrixToStr(results))
require.False(t, foundRedactedMarker, "unexpectedly found '%q':\n%s",
string(server.TraceRedactedMarker), sqlutils.MatrixToStr(results))
}
})
}
2 changes: 1 addition & 1 deletion pkg/kv/txn.go
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ func (txn *Txn) exec(ctx context.Context, fn func(context.Context, *Txn) error)
if err == nil {
if !txn.IsCommitted() {
err = txn.Commit(ctx)
log.Eventf(ctx, "client.Txn did AutoCommit. err: %v\n", err)
log.Eventf(ctx, "client.Txn did AutoCommit. err: %v", err)
if err != nil {
if !errors.HasType(err, (*roachpb.TransactionRetryWithProtoRefreshError)(nil)) {
// We can't retry, so let the caller know we tried to
Expand Down
1 change: 0 additions & 1 deletion pkg/kv/txn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ func TestTxnVerboseTrace(t *testing.T) {
`(?s)`+
`.*event:[^:]*:\d+ inside txn\n`+
`.*event:[^:]*:\d+ client\.Txn did AutoCommit\. err: <nil>\n`+
`.*\n`+
`.*event:[^:]*:\d+ txn complete.*`,
dump)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions pkg/server/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,7 @@ func (n *Node) setupSpanForIncomingRPC(
// remoteTrace case below.
const opName = "/cockroach.roachpb.Internal/Batch"
tr := n.storeCfg.AmbientCtx.Tracer

// newSpan is set if we end up creating a new span.
var newSpan *tracing.Span
parentSpan := tracing.SpanFromContext(ctx)
Expand Down
7 changes: 4 additions & 3 deletions pkg/server/node_tenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ import (
"github.com/cockroachdb/redact"
)

const sRedactedMarker = redact.RedactableString("verbose trace message redacted")
// TraceRedactedMarker is used to replace logs that weren't redacted.
const TraceRedactedMarker = redact.RedactableString("verbose trace message redacted")

// redactRecordingForTenant redacts the sensitive parts of log messages in the
// recording if the tenant to which this recording is intended is not the system
Expand Down Expand Up @@ -49,7 +50,7 @@ func redactRecordingForTenant(tenID roachpb.TenantID, rec tracing.Recording) err
if field.Key != tracingpb.LogMessageField {
// We don't have any of these fields, but let's not take any
// chances (our dependencies might slip them in).
field.Value = sRedactedMarker
field.Value = TraceRedactedMarker
continue
}
if !sp.RedactableLogs {
Expand All @@ -58,7 +59,7 @@ func redactRecordingForTenant(tenID roachpb.TenantID, rec tracing.Recording) err
// stripped. Note that this is not the common path here, as most
// information in the trace will be from the local node, which
// always creates redactable logs.
field.Value = sRedactedMarker
field.Value = TraceRedactedMarker
continue
}
field.Value = field.Value.Redact()
Expand Down
16 changes: 16 additions & 0 deletions pkg/util/tracing/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,18 @@ const (
spanKindTagKey = "span.kind"
)

// TODO(davidh): Once performance issues around redaction are
// resolved via #58610, this setting can be removed so that all traces
// have redactability enabled.
var enableTraceRedactable = settings.RegisterBoolSetting(
"trace.redactable",
"set to true to enable redactability for unstructured events "+
"in traces and to redact traces sent to tenants. "+
"Set to false to coarsely mark unstructured events as redactable "+
" and eliminate them from tenant traces.",
false,
)

var enableNetTrace = settings.RegisterBoolSetting(
"trace.debug.enable",
"if set, traces for recent requests can be seen at https://<ui>/debug/requests",
Expand Down Expand Up @@ -412,6 +424,9 @@ func (t *Tracer) Configure(ctx context.Context, sv *settings.Values) {
jaegerAgentAddr := jaegerAgent.Get(sv)
otlpCollectorAddr := openTelemetryCollector.Get(sv)
zipkinAddr := ZipkinCollector.Get(sv)
enableRedactable := enableTraceRedactable.Get(sv)

t.SetRedactable(enableRedactable)

var nt int32
if enableNetTrace.Get(sv) {
Expand Down Expand Up @@ -494,6 +509,7 @@ func (t *Tracer) Configure(ctx context.Context, sv *settings.Values) {
openTelemetryCollector.SetOnChange(sv, reconfigure)
ZipkinCollector.SetOnChange(sv, reconfigure)
jaegerAgent.SetOnChange(sv, reconfigure)
enableTraceRedactable.SetOnChange(sv, reconfigure)
}

func createOTLPSpanProcessor(
Expand Down

0 comments on commit 7c768ac

Please sign in to comment.