From da87572d7bbc062ea672ef9a99913b1d75cee7bf Mon Sep 17 00:00:00 2001
From: Arthur Silva Sens <arthursens2005@gmail.com>
Date: Fri, 16 Feb 2024 11:52:25 -0300
Subject: [PATCH] [processor/tailsampling] Add metric for sampled/not sampled
 spans  (#30485)

**Description:** <Describe what has changed.>
Add metrics to measure sampled/not sampled spans.

**Link to tracking Issue:**
Fixes
https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/30482

**Testing:** <Describe what testing was performed and which tests were
added.>
None

**Documentation:** <Describe the documentation added.>
None

---------

Signed-off-by: Arthur Silva Sens <arthur.sens@coralogix.com>
---
 .chloggen/sampled_spans_metrics.yaml         |  27 ++++
 processor/tailsamplingprocessor/factory.go   |  11 ++
 processor/tailsamplingprocessor/go.mod       |   2 +
 processor/tailsamplingprocessor/go.sum       |   4 +
 processor/tailsamplingprocessor/metrics.go   | 159 +++++++++----------
 processor/tailsamplingprocessor/processor.go |  14 ++
 6 files changed, 134 insertions(+), 83 deletions(-)
 create mode 100644 .chloggen/sampled_spans_metrics.yaml
diff --git a/.chloggen/sampled_spans_metrics.yaml b/.chloggen/sampled_spans_metrics.yaml
new file mode 100644
index 000000000000..6f1d0222dd96
--- /dev/null
+++ b/.chloggen/sampled_spans_metrics.yaml
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: processor/tail_sampling
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: "Add metrics that measure the number of sampled spans and the number of spans that are dropped due to sampling decisions."
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [30482]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [user]
diff --git a/processor/tailsamplingprocessor/factory.go b/processor/tailsamplingprocessor/factory.go
index ba608001bc88..c078a8627887 100644
--- a/processor/tailsamplingprocessor/factory.go
+++ b/processor/tailsamplingprocessor/factory.go
@@ -14,6 +14,7 @@ import (
 	"go.opentelemetry.io/collector/component"
 	"go.opentelemetry.io/collector/config/configtelemetry"
 	"go.opentelemetry.io/collector/consumer"
+	"go.opentelemetry.io/collector/featuregate"
 	"go.opentelemetry.io/collector/processor"
 
 	"github.com/open-telemetry/opentelemetry-collector-contrib/processor/tailsamplingprocessor/internal/metadata"
@@ -21,6 +22,16 @@ import (
 
 var onceMetrics sync.Once
 
+var metricStatCountSpansSampledFeatureGate = featuregate.GlobalRegistry().MustRegister(
+	"processor.tailsamplingprocessor.metricstatcountspanssampled",
+	featuregate.StageAlpha,
+	featuregate.WithRegisterDescription("When enabled, a new metric stat_count_spans_sampled will be available in the tail sampling processor. Differently from stat_count_traces_sampled, this metric will count the number of spans sampled or not per sampling policy, where the original counts traces."),
+)
+
+func isMetricStatCountSpansSampledEnabled() bool {
+	return metricStatCountSpansSampledFeatureGate.IsEnabled()
+}
+
 // NewFactory returns a new factory for the Tail Sampling processor.
 func NewFactory() processor.Factory {
 	onceMetrics.Do(func() {
diff --git a/processor/tailsamplingprocessor/go.mod b/processor/tailsamplingprocessor/go.mod
index 9bc5d027984a..9d742d180379 100644
--- a/processor/tailsamplingprocessor/go.mod
+++ b/processor/tailsamplingprocessor/go.mod
@@ -14,6 +14,7 @@ require (
 	go.opentelemetry.io/collector/config/configtelemetry v0.94.1
 	go.opentelemetry.io/collector/confmap v0.94.1
 	go.opentelemetry.io/collector/consumer v0.94.1
+	go.opentelemetry.io/collector/featuregate v1.1.0
 	go.opentelemetry.io/collector/pdata v1.1.0
 	go.opentelemetry.io/collector/processor v0.94.1
 	go.opentelemetry.io/otel/metric v1.23.1
@@ -33,6 +34,7 @@ require (
 	github.com/gobwas/glob v0.2.3 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.3 // indirect
+	github.com/hashicorp/go-version v1.6.0 // indirect
 	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
 	github.com/iancoleman/strcase v0.3.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
diff --git a/processor/tailsamplingprocessor/go.sum b/processor/tailsamplingprocessor/go.sum
index d83011749f5b..d7b4982354d7 100644
--- a/processor/tailsamplingprocessor/go.sum
+++ b/processor/tailsamplingprocessor/go.sum
@@ -61,6 +61,8 @@ github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek=
+github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
@@ -128,6 +130,8 @@ go.opentelemetry.io/collector/confmap v0.94.1 h1:O69bkeyR1YPAFz+jMd45aDZc1DtYnwb
 go.opentelemetry.io/collector/confmap v0.94.1/go.mod h1:pCT5UtcHaHVJ5BIILv1Z2VQyjZzmT9uTdBmC9+Z0AgA=
 go.opentelemetry.io/collector/consumer v0.94.1 h1:l/9h5L71xr/d93snQ9fdxgz64C4UuB8mEDxpp456X8o=
 go.opentelemetry.io/collector/consumer v0.94.1/go.mod h1:BIPWmw8wES6jlPTPC+acJxLvUzIdOm6uh/p/X85ALsY=
+go.opentelemetry.io/collector/featuregate v1.1.0 h1:W+/FKvRxHMFC6MuTTEgrHINCf1vFBvLH7stSOEar6zU=
+go.opentelemetry.io/collector/featuregate v1.1.0/go.mod h1:QQXjP4etmJQhkQ20j4P/rapWuItYxoFozg/iIwuKnYg=
 go.opentelemetry.io/collector/pdata v1.1.0 h1:cE6Al1rQieUjMHro6p6cKwcu3sjHXGG59BZ3kRVUvsM=
 go.opentelemetry.io/collector/pdata v1.1.0/go.mod h1:IDkDj+B4Fp4wWOclBELN97zcb98HugJ8Q2gA4ZFsN8Q=
 go.opentelemetry.io/collector/processor v0.94.1 h1:cNlGox8fN85KhtUq6yuqgPM9KDCQ4O5aDQ864joc4JQ=
diff --git a/processor/tailsamplingprocessor/metrics.go b/processor/tailsamplingprocessor/metrics.go
index 9cefb51fd596..9a7e320063fe 100644
--- a/processor/tailsamplingprocessor/metrics.go
+++ b/processor/tailsamplingprocessor/metrics.go
@@ -28,6 +28,7 @@ var (
 	statPolicyEvaluationErrorCount = stats.Int64("sampling_policy_evaluation_error", "Count of sampling policy evaluation errors", stats.UnitDimensionless)
 
 	statCountTracesSampled       = stats.Int64("count_traces_sampled", "Count of traces that were sampled or not per sampling policy", stats.UnitDimensionless)
+	statCountSpansSampled        = stats.Int64("count_spans_sampled", "Count of spans that were sampled or not per sampling policy", stats.UnitDimensionless)
 	statCountGlobalTracesSampled = stats.Int64("global_count_traces_sampled", "Global count of traces that were sampled or not by at least one policy", stats.UnitDimensionless)
 
 	statDroppedTooEarlyCount    = stats.Int64("sampling_trace_dropped_too_early", "Count of traces that needed to be dropped before the configured wait time", stats.UnitDimensionless)
@@ -46,90 +47,82 @@ func samplingProcessorMetricViews(level configtelemetry.Level) []*view.View {
 	latencyDistributionAggregation := view.Distribution(1, 2, 5, 10, 25, 50, 75, 100, 150, 200, 300, 400, 500, 750, 1000, 2000, 3000, 4000, 5000, 10000, 20000, 30000, 50000)
 	ageDistributionAggregation := view.Distribution(1, 2, 5, 10, 20, 30, 40, 50, 60, 90, 120, 180, 300, 600, 1800, 3600, 7200)
 
-	decisionLatencyView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statDecisionLatencyMicroSec.Name()),
-		Measure:     statDecisionLatencyMicroSec,
-		Description: statDecisionLatencyMicroSec.Description(),
-		TagKeys:     policyTagKeys,
-		Aggregation: latencyDistributionAggregation,
-	}
-	overallDecisionLatencyView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statOverallDecisionLatencyUs.Name()),
-		Measure:     statOverallDecisionLatencyUs,
-		Description: statOverallDecisionLatencyUs.Description(),
-		Aggregation: latencyDistributionAggregation,
-	}
-
-	traceRemovalAgeView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statTraceRemovalAgeSec.Name()),
-		Measure:     statTraceRemovalAgeSec,
-		Description: statTraceRemovalAgeSec.Description(),
-		Aggregation: ageDistributionAggregation,
-	}
-	lateSpanArrivalView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statLateSpanArrivalAfterDecision.Name()),
-		Measure:     statLateSpanArrivalAfterDecision,
-		Description: statLateSpanArrivalAfterDecision.Description(),
-		Aggregation: ageDistributionAggregation,
-	}
-
-	countPolicyEvaluationErrorView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statPolicyEvaluationErrorCount.Name()),
-		Measure:     statPolicyEvaluationErrorCount,
-		Description: statPolicyEvaluationErrorCount.Description(),
-		Aggregation: view.Sum(),
-	}
-
+	views := make([]*view.View, 0)
 	sampledTagKeys := []tag.Key{tagPolicyKey, tagSampledKey}
-	countTracesSampledView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountTracesSampled.Name()),
-		Measure:     statCountTracesSampled,
-		Description: statCountTracesSampled.Description(),
-		TagKeys:     sampledTagKeys,
-		Aggregation: view.Sum(),
-	}
-
-	countGlobalTracesSampledView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountGlobalTracesSampled.Name()),
-		Measure:     statCountGlobalTracesSampled,
-		Description: statCountGlobalTracesSampled.Description(),
-		TagKeys:     []tag.Key{tagSampledKey},
-		Aggregation: view.Sum(),
+	views = append(views,
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statDecisionLatencyMicroSec.Name()),
+			Measure:     statDecisionLatencyMicroSec,
+			Description: statDecisionLatencyMicroSec.Description(),
+			TagKeys:     policyTagKeys,
+			Aggregation: latencyDistributionAggregation,
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statOverallDecisionLatencyUs.Name()),
+			Measure:     statOverallDecisionLatencyUs,
+			Description: statOverallDecisionLatencyUs.Description(),
+			Aggregation: latencyDistributionAggregation,
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statTraceRemovalAgeSec.Name()),
+			Measure:     statTraceRemovalAgeSec,
+			Description: statTraceRemovalAgeSec.Description(),
+			Aggregation: ageDistributionAggregation,
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statLateSpanArrivalAfterDecision.Name()),
+			Measure:     statLateSpanArrivalAfterDecision,
+			Description: statLateSpanArrivalAfterDecision.Description(),
+			Aggregation: ageDistributionAggregation,
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statPolicyEvaluationErrorCount.Name()),
+			Measure:     statPolicyEvaluationErrorCount,
+			Description: statPolicyEvaluationErrorCount.Description(),
+			Aggregation: view.Sum(),
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountTracesSampled.Name()),
+			Measure:     statCountTracesSampled,
+			Description: statCountTracesSampled.Description(),
+			TagKeys:     sampledTagKeys,
+			Aggregation: view.Sum(),
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountGlobalTracesSampled.Name()),
+			Measure:     statCountGlobalTracesSampled,
+			Description: statCountGlobalTracesSampled.Description(),
+			TagKeys:     []tag.Key{tagSampledKey},
+			Aggregation: view.Sum(),
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statDroppedTooEarlyCount.Name()),
+			Measure:     statDroppedTooEarlyCount,
+			Description: statDroppedTooEarlyCount.Description(),
+			Aggregation: view.Sum(),
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statNewTraceIDReceivedCount.Name()),
+			Measure:     statNewTraceIDReceivedCount,
+			Description: statNewTraceIDReceivedCount.Description(),
+			Aggregation: view.Sum(),
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statTracesOnMemoryGauge.Name()),
+			Measure:     statTracesOnMemoryGauge,
+			Description: statTracesOnMemoryGauge.Description(),
+			Aggregation: view.LastValue(),
+		})
+
+	if isMetricStatCountSpansSampledEnabled() {
+		views = append(views, &view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountSpansSampled.Name()),
+			Measure:     statCountSpansSampled,
+			Description: statCountSpansSampled.Description(),
+			TagKeys:     sampledTagKeys,
+			Aggregation: view.Sum(),
+		})
 	}
 
-	countTraceDroppedTooEarlyView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statDroppedTooEarlyCount.Name()),
-		Measure:     statDroppedTooEarlyCount,
-		Description: statDroppedTooEarlyCount.Description(),
-		Aggregation: view.Sum(),
-	}
-	countTraceIDArrivalView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statNewTraceIDReceivedCount.Name()),
-		Measure:     statNewTraceIDReceivedCount,
-		Description: statNewTraceIDReceivedCount.Description(),
-		Aggregation: view.Sum(),
-	}
-	trackTracesOnMemorylView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statTracesOnMemoryGauge.Name()),
-		Measure:     statTracesOnMemoryGauge,
-		Description: statTracesOnMemoryGauge.Description(),
-		Aggregation: view.LastValue(),
-	}
-
-	return []*view.View{
-		decisionLatencyView,
-		overallDecisionLatencyView,
-
-		traceRemovalAgeView,
-		lateSpanArrivalView,
-
-		countPolicyEvaluationErrorView,
-
-		countTracesSampledView,
-		countGlobalTracesSampledView,
-
-		countTraceDroppedTooEarlyView,
-		countTraceIDArrivalView,
-		trackTracesOnMemorylView,
-	}
+	return views
 }
diff --git a/processor/tailsamplingprocessor/processor.go b/processor/tailsamplingprocessor/processor.go
index 75d5b55559c3..9ffcb13846e0 100644
--- a/processor/tailsamplingprocessor/processor.go
+++ b/processor/tailsamplingprocessor/processor.go
@@ -308,6 +308,13 @@ func (tsp *tailSamplingSpanProcessor) makeDecision(id pcommon.TraceID, trace *sa
 				mutators,
 				statCountTracesSampled.M(int64(1)),
 			)
+			if isMetricStatCountSpansSampledEnabled() {
+				_ = stats.RecordWithTags(
+					p.ctx,
+					mutators,
+					statCountSpansSampled.M(trace.SpanCount.Load()),
+				)
+			}
 			metrics.decisionSampled++
 
 		case sampling.NotSampled:
@@ -317,6 +324,13 @@ func (tsp *tailSamplingSpanProcessor) makeDecision(id pcommon.TraceID, trace *sa
 				mutators,
 				statCountTracesSampled.M(int64(1)),
 			)
+			if isMetricStatCountSpansSampledEnabled() {
+				_ = stats.RecordWithTags(
+					p.ctx,
+					mutators,
+					statCountSpansSampled.M(trace.SpanCount.Load()),
+				)
+			}
 			metrics.decisionNotSampled++
 		}
 	}