From 973ca514d4c12af80b234900cbf4c43124a9758b Mon Sep 17 00:00:00 2001 From: Mario Date: Wed, 26 Jun 2024 12:56:35 +0200 Subject: [PATCH 1/2] Add data quality metric to measure traces without a root --- pkg/dataquality/warnings.go | 5 +++++ tempodb/compactor.go | 3 +++ tempodb/encoding/common/interfaces.go | 1 + tempodb/encoding/vparquet3/compactor.go | 3 +++ tempodb/encoding/vparquet3/wal_block.go | 3 +++ tempodb/encoding/vparquet4/compactor.go | 3 +++ tempodb/encoding/vparquet4/wal_block.go | 3 +++ 7 files changed, 21 insertions(+) diff --git a/pkg/dataquality/warnings.go b/pkg/dataquality/warnings.go index 4d133ecab05..19b6b8e90d7 100644 --- a/pkg/dataquality/warnings.go +++ b/pkg/dataquality/warnings.go @@ -8,6 +8,7 @@ import ( const ( reasonOutsideIngestionSlack = "outside_ingestion_time_slack" reasonDisconnectedTrace = "disconnected_trace" + reasonRootlessTrace = "rootless_trace" PhaseTraceFlushedToWal = "_flushed_to_wal" PhaseTraceWalToComplete = "_wal_to_complete" @@ -27,3 +28,7 @@ func WarnOutsideIngestionSlack(tenant string) { func WarnDisconnectedTrace(tenant string, phase string) { metric.WithLabelValues(tenant, reasonDisconnectedTrace+phase).Inc() } + +func WarnRootlessTrace(tenant string, phase string) { + metric.WithLabelValues(tenant, reasonRootlessTrace+phase).Inc() +} diff --git a/tempodb/compactor.go b/tempodb/compactor.go index b20418a7e12..e9aa78132b0 100644 --- a/tempodb/compactor.go +++ b/tempodb/compactor.go @@ -236,6 +236,9 @@ func (rw *readerWriter) compact(ctx context.Context, blockMetas []*backend.Block DisconnectedTrace: func() { dataquality.WarnDisconnectedTrace(tenantID, dataquality.PhaseTraceCompactorCombine) }, + RootlessTrace: func() { + dataquality.WarnRootlessTrace(tenantID, dataquality.PhaseTraceCompactorCombine) + }, } compactor := enc.NewCompactor(opts) diff --git a/tempodb/encoding/common/interfaces.go b/tempodb/encoding/common/interfaces.go index f34b5c09c5b..cf0b29af406 100644 --- a/tempodb/encoding/common/interfaces.go +++ b/tempodb/encoding/common/interfaces.go @@ -80,6 +80,7 @@ type CompactionOptions struct { BytesWritten func(compactionLevel, bytes int) SpansDiscarded func(traceID string, rootSpanName string, rootServiceName string, spans int) DisconnectedTrace func() + RootlessTrace func() } type Iterator interface { diff --git a/tempodb/encoding/vparquet3/compactor.go b/tempodb/encoding/vparquet3/compactor.go index 81cbf66e97f..130be9dd727 100644 --- a/tempodb/encoding/vparquet3/compactor.go +++ b/tempodb/encoding/vparquet3/compactor.go @@ -124,6 +124,9 @@ func (c *Compactor) Compact(ctx context.Context, l log.Logger, r backend.Reader, if !connected { c.opts.DisconnectedTrace() } + if tr != nil && tr.RootSpanName == "" { + c.opts.RootlessTrace() + } c.opts.ObjectsCombined(int(compactionLevel), 1) return sch.Deconstruct(pool.Get(), tr), nil diff --git a/tempodb/encoding/vparquet3/wal_block.go b/tempodb/encoding/vparquet3/wal_block.go index d17ef9723b9..47221fca114 100644 --- a/tempodb/encoding/vparquet3/wal_block.go +++ b/tempodb/encoding/vparquet3/wal_block.go @@ -335,6 +335,9 @@ func (b *walBlock) AppendTrace(id common.ID, trace *tempopb.Trace, start, end ui if !connected { dataquality.WarnDisconnectedTrace(b.meta.TenantID, dataquality.PhaseTraceFlushedToWal) } + if b.buffer != nil && b.buffer.RootSpanName == "" { + dataquality.WarnRootlessTrace(b.meta.TenantID, dataquality.PhaseTraceFlushedToWal) + } start, end = b.adjustTimeRangeForSlack(start, end, 0) diff --git a/tempodb/encoding/vparquet4/compactor.go b/tempodb/encoding/vparquet4/compactor.go index 3bf46c2f30b..0770e9c0d17 100644 --- a/tempodb/encoding/vparquet4/compactor.go +++ b/tempodb/encoding/vparquet4/compactor.go @@ -124,6 +124,9 @@ func (c *Compactor) Compact(ctx context.Context, l log.Logger, r backend.Reader, if !connected { c.opts.DisconnectedTrace() } + if tr != nil && tr.RootSpanName == "" { + c.opts.RootlessTrace() + } c.opts.ObjectsCombined(int(compactionLevel), 1) return sch.Deconstruct(pool.Get(), tr), nil diff --git a/tempodb/encoding/vparquet4/wal_block.go b/tempodb/encoding/vparquet4/wal_block.go index afc07006b3c..c88a3af576c 100644 --- a/tempodb/encoding/vparquet4/wal_block.go +++ b/tempodb/encoding/vparquet4/wal_block.go @@ -335,6 +335,9 @@ func (b *walBlock) AppendTrace(id common.ID, trace *tempopb.Trace, start, end ui if !connected { dataquality.WarnDisconnectedTrace(b.meta.TenantID, dataquality.PhaseTraceFlushedToWal) } + if b.buffer != nil && b.buffer.RootSpanName == "" { + dataquality.WarnRootlessTrace(b.meta.TenantID, dataquality.PhaseTraceFlushedToWal) + } start, end = b.adjustTimeRangeForSlack(start, end, 0) From bd0141919f6de70bbf4803e198d0ebd7ded9a6a6 Mon Sep 17 00:00:00 2001 From: Mario Date: Wed, 26 Jun 2024 12:59:53 +0200 Subject: [PATCH 2/2] chlog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30e58a4460c..cfe67f442ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ * [ENHANCEMENT] Protect ingesters from panics by adding defer/recover to all read path methods. [#3790](https://github.com/grafana/tempo/pull/3790) (@joe-elliott) * [ENHANCEMENT] Added a boolean flag to enable or disable dualstack mode on Storage block config for S3 [#3721](https://github.com/grafana/tempo/pull/3721) (@sid-jar, @mapno) * [ENHANCEMENT] Add caching to query range queries [#3796](https://github.com/grafana/tempo/pull/3796) (@mapno) +* [ENHANCEMENT] Add data quality metric to measure traces without a root [#3812](https://github.com/grafana/tempo/pull/3812) (@mapno) * [BUGFIX] Fix metrics queries when grouping by attributes that may not exist [#3734](https://github.com/grafana/tempo/pull/3734) (@mdisibio) * [BUGFIX] Fix frontend parsing error on cached responses [#3759](https://github.com/grafana/tempo/pull/3759) (@mdisibio) * [BUGFIX] max_global_traces_per_user: take into account ingestion.tenant_shard_size when converting to local limit [#3618](https://github.com/grafana/tempo/pull/3618) (@kvrhdn)