From 9424ae3d9b9430d00f05c0d6172fc48a0d3c8566 Mon Sep 17 00:00:00 2001
From: Anup Rout <arout@wayfair.com>
Date: Thu, 7 Sep 2023 11:56:14 -0400
Subject: [PATCH] [receiver/statsdreceiver] add support for distribution type
 metrics (#26107)

**Description:** <Describe what has changed.>
<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue.
Ex. Adding a feature - Explain what this achieves.-->

Add support for statsd `distribution` type metrics .

**Link to tracking Issue:** <Issue number if
applicable>[24768](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/24768)

**Testing:** <Describe what testing was performed and which tests were
added.>
test config:
```
receivers:
  statsd:
    endpoint: "localhost:8125"
    aggregation_interval: 10s
    enable_metric_type: true
    is_monotonic_counter: true
    timer_histogram_mapping:
      - statsd_type: "distribution"
        observer_type: "histogram"
        histogram:
          max_size: 50

exporters:
  logging:
    verbosity: detailed

service:
  pipelines:
    metrics:
      receivers: [statsd]
      processors: []
      exporters: [logging]

```
test data:
```
test.statsdreceiver.dist:1|d|#tag1:Val1
test.statsdreceiver.dist:2|d|#tag1:Val1
test.statsdreceiver.dist:3|d|#tag1:Val1
```
output:
```
ResourceMetrics #0
Resource SchemaURL:
ScopeMetrics #0
ScopeMetrics SchemaURL:
InstrumentationScope otelcol/statsdreceiver 0.83.0-dev
Metric #0
Descriptor:
     -> Name: test.statsdreceiver.dist
     -> Description:
     -> Unit:
     -> DataType: ExponentialHistogram
     -> AggregationTemporality: Delta
ExponentialHistogramDataPoints #0
Data point attributes:
     -> metric_type: Str(distribution)
     -> tag1: Str(Val1)
StartTimestamp: 2023-08-25 15:49:29.595594 +0000 UTC
Timestamp: 2023-08-25 15:49:39.596418 +0000 UTC
Count: 3
Sum: 6.000000
Min: 1.000000
Max: 3.000000
Bucket (0.957603, 1.000000], Count: 1
Bucket (1.000000, 1.044274], Count: 0
Bucket (1.044274, 1.090508], Count: 0
Bucket (1.090508, 1.138789], Count: 0
Bucket (1.138789, 1.189207], Count: 0
Bucket (1.189207, 1.241858], Count: 0
Bucket (1.241858, 1.296840], Count: 0
Bucket (1.296840, 1.354256], Count: 0
Bucket (1.354256, 1.414214], Count: 0
Bucket (1.414214, 1.476826], Count: 0
Bucket (1.476826, 1.542211], Count: 0
Bucket (1.542211, 1.610490], Count: 0
Bucket (1.610490, 1.681793], Count: 0
Bucket (1.681793, 1.756252], Count: 0
Bucket (1.756252, 1.834008], Count: 0
Bucket (1.834008, 1.915207], Count: 0
Bucket (1.915207, 2.000000], Count: 1
Bucket (2.000000, 2.088548], Count: 0
Bucket (2.088548, 2.181015], Count: 0
Bucket (2.181015, 2.277577], Count: 0
Bucket (2.277577, 2.378414], Count: 0
Bucket (2.378414, 2.483716], Count: 0
Bucket (2.483716, 2.593679], Count: 0
Bucket (2.593679, 2.708511], Count: 0
Bucket (2.708511, 2.828427], Count: 0
Bucket (2.828427, 2.953652], Count: 0
Bucket (2.953652, 3.084422], Count: 1
	{"kind": "exporter", "data_type": "metrics", "name": "logging"}
```

**Documentation:** <Describe the documentation added.>
---
 ..._statsd_receiver_distribution_support.yaml | 27 ++++++++
 receiver/statsdreceiver/README.md             | 10 ++-
 receiver/statsdreceiver/config.go             |  2 +-
 receiver/statsdreceiver/config_test.go        |  7 ++
 receiver/statsdreceiver/factory.go            |  2 +-
 .../internal/protocol/statsd_parser.go        | 32 +++++----
 .../internal/protocol/statsd_parser_test.go   | 68 ++++++++++++++++++-
 receiver/statsdreceiver/testdata/config.yaml  |  4 ++
 8 files changed, 134 insertions(+), 18 deletions(-)
 create mode 100755 .chloggen/anup_statsd_receiver_distribution_support.yaml
diff --git a/.chloggen/anup_statsd_receiver_distribution_support.yaml b/.chloggen/anup_statsd_receiver_distribution_support.yaml
new file mode 100755
index 000000000000..4181e1fbdb8b
--- /dev/null
+++ b/.chloggen/anup_statsd_receiver_distribution_support.yaml
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: receiver/statsdreceiver
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: "Add support for distribution type metrics in the statsdreceiver."
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [24768]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [user]
diff --git a/receiver/statsdreceiver/README.md b/receiver/statsdreceiver/README.md
index d6dce9aab499..db55b5aa773c 100644
--- a/receiver/statsdreceiver/README.md
+++ b/receiver/statsdreceiver/README.md
@@ -38,7 +38,7 @@ The Following settings are optional:
 - `timer_histogram_mapping:`(default value is below): Specify what OTLP type to convert received timing/histogram data to.
 
 
-`"statsd_type"` specifies received Statsd data type. Possible values for this setting are `"timing"`, `"timer"` and `"histogram"`.
+`"statsd_type"` specifies received Statsd data type. Possible values for this setting are `"timing"`, `"timer"`, `"histogram"` and `"distribution"`.
 
 `"observer_type"` specifies OTLP data type to convert to. We support `"gauge"`, `"summary"`, and `"histogram"`. For `"gauge"`, it does not perform any aggregation.
 For `"summary`, the statsD receiver will aggregate to one OTLP summary metric for one metric description (the same metric name with the same tags). It will send percentile 0, 10, 50, 90, 95, 100 to the downstream.  The `"histogram"` setting selects an [auto-scaling exponential histogram configured with only a maximum size](https://github.com/lightstep/go-expohisto#readme), as shown in the example below.
@@ -61,6 +61,10 @@ receivers:
         observer_type: "histogram"
         histogram: 
           max_size: 100
+      - statsd_type: "distribution"
+        observer_type: "histogram"
+        histogram: 
+          max_size: 50    
 ```
 
 The full list of settings exposed for this receiver are documented [here](./config.go)
@@ -138,6 +142,10 @@ receivers:
         observer_type: "histogram"
         histogram:
           max_size: 50
+      - statsd_type: "distribution"
+        observer_type: "histogram"
+        histogram: 
+          max_size: 50    
       - statsd_type: "timing"
         observer_type: "summary"
 
diff --git a/receiver/statsdreceiver/config.go b/receiver/statsdreceiver/config.go
index 67a92b493f27..76dd700b6374 100644
--- a/receiver/statsdreceiver/config.go
+++ b/receiver/statsdreceiver/config.go
@@ -39,7 +39,7 @@ func (c *Config) Validate() error {
 		}
 
 		switch eachMap.StatsdType {
-		case protocol.TimingTypeName, protocol.TimingAltTypeName, protocol.HistogramTypeName:
+		case protocol.TimingTypeName, protocol.TimingAltTypeName, protocol.HistogramTypeName, protocol.DistributionTypeName:
 			// do nothing
 		case protocol.CounterTypeName, protocol.GaugeTypeName:
 			fallthrough
diff --git a/receiver/statsdreceiver/config_test.go b/receiver/statsdreceiver/config_test.go
index 73cad547990b..ecfb38ecc0e6 100644
--- a/receiver/statsdreceiver/config_test.go
+++ b/receiver/statsdreceiver/config_test.go
@@ -54,6 +54,13 @@ func TestLoadConfig(t *testing.T) {
 							MaxSize: 170,
 						},
 					},
+					{
+						StatsdType:   "distribution",
+						ObserverType: "histogram",
+						Histogram: protocol.HistogramConfig{
+							MaxSize: 170,
+						},
+					},
 				},
 			},
 		},
diff --git a/receiver/statsdreceiver/factory.go b/receiver/statsdreceiver/factory.go
index cf2ac87e154e..d3b66404221c 100644
--- a/receiver/statsdreceiver/factory.go
+++ b/receiver/statsdreceiver/factory.go
@@ -25,7 +25,7 @@ const (
 )
 
 var (
-	defaultTimerHistogramMapping = []protocol.TimerHistogramMapping{{StatsdType: "timer", ObserverType: "gauge"}, {StatsdType: "histogram", ObserverType: "gauge"}}
+	defaultTimerHistogramMapping = []protocol.TimerHistogramMapping{{StatsdType: "timer", ObserverType: "gauge"}, {StatsdType: "histogram", ObserverType: "gauge"}, {StatsdType: "distribution", ObserverType: "gauge"}}
 )
 
 // NewFactory creates a factory for the StatsD receiver.
diff --git a/receiver/statsdreceiver/internal/protocol/statsd_parser.go b/receiver/statsdreceiver/internal/protocol/statsd_parser.go
index 6969de362c4d..d375f950ed3c 100644
--- a/receiver/statsdreceiver/internal/protocol/statsd_parser.go
+++ b/receiver/statsdreceiver/internal/protocol/statsd_parser.go
@@ -33,16 +33,18 @@ type (
 const (
 	tagMetricType = "metric_type"
 
-	CounterType   MetricType = "c"
-	GaugeType     MetricType = "g"
-	HistogramType MetricType = "h"
-	TimingType    MetricType = "ms"
-
-	CounterTypeName   TypeName = "counter"
-	GaugeTypeName     TypeName = "gauge"
-	HistogramTypeName TypeName = "histogram"
-	TimingTypeName    TypeName = "timing"
-	TimingAltTypeName TypeName = "timer"
+	CounterType      MetricType = "c"
+	GaugeType        MetricType = "g"
+	HistogramType    MetricType = "h"
+	TimingType       MetricType = "ms"
+	DistributionType MetricType = "d"
+
+	CounterTypeName      TypeName = "counter"
+	GaugeTypeName        TypeName = "gauge"
+	HistogramTypeName    TypeName = "histogram"
+	TimingTypeName       TypeName = "timing"
+	TimingAltTypeName    TypeName = "timer"
+	DistributionTypeName TypeName = "distribution"
 
 	GaugeObserver     ObserverType = "gauge"
 	SummaryObserver   ObserverType = "summary"
@@ -143,6 +145,8 @@ func (t MetricType) FullName() TypeName {
 		return TimingTypeName
 	case HistogramType:
 		return HistogramTypeName
+	case DistributionType:
+		return DistributionTypeName
 	}
 	return TypeName(fmt.Sprintf("unknown(%s)", t))
 }
@@ -162,7 +166,7 @@ func (p *StatsDParser) Initialize(enableMetricType bool, isMonotonicCounter bool
 	// Note: validation occurs in ("../".Config).validate()
 	for _, eachMap := range sendTimerHistogram {
 		switch eachMap.StatsdType {
-		case HistogramTypeName:
+		case HistogramTypeName, DistributionTypeName:
 			p.histogramEvents.method = eachMap.ObserverType
 			p.histogramEvents.histogramConfig = expoHistogramConfig(eachMap.Histogram)
 		case TimingTypeName, TimingAltTypeName:
@@ -255,7 +259,7 @@ var timeNowFunc = time.Now
 
 func (p *StatsDParser) observerCategoryFor(t MetricType) ObserverCategory {
 	switch t {
-	case HistogramType:
+	case HistogramType, DistributionType:
 		return p.histogramEvents
 	case TimingType:
 		return p.timerEvents
@@ -301,7 +305,7 @@ func (p *StatsDParser) Aggregate(line string, addr net.Addr) error {
 			point.SetIntValue(point.IntValue() + parsedMetric.counterValue())
 		}
 
-	case TimingType, HistogramType:
+	case TimingType, HistogramType, DistributionType:
 		category := p.observerCategoryFor(parsedMetric.description.metricType)
 		switch category.method {
 		case GaugeObserver:
@@ -372,7 +376,7 @@ func parseMessageToMetric(line string, enableMetricType bool) (statsDMetric, err
 
 	inType := MetricType(parts[1])
 	switch inType {
-	case CounterType, GaugeType, HistogramType, TimingType:
+	case CounterType, GaugeType, HistogramType, TimingType, DistributionType:
 		result.description.metricType = inType
 	default:
 		return result, fmt.Errorf("unsupported metric type: %s", inType)
diff --git a/receiver/statsdreceiver/internal/protocol/statsd_parser_test.go b/receiver/statsdreceiver/internal/protocol/statsd_parser_test.go
index d2b0e5a6704b..db562409c2ed 100644
--- a/receiver/statsdreceiver/internal/protocol/statsd_parser_test.go
+++ b/receiver/statsdreceiver/internal/protocol/statsd_parser_test.go
@@ -409,6 +409,17 @@ func Test_ParseMessageToMetricWithMetricType(t *testing.T) {
 				[]string{"metric_type"},
 				[]string{"histogram"}),
 		},
+		{
+			name:  "int distribution",
+			input: "test.metric:42|d",
+			wantMetric: testStatsDMetric(
+				"test.metric",
+				42,
+				false,
+				"d", 0,
+				[]string{"metric_type"},
+				[]string{"distribution"}),
+		},
 	}
 
 	for _, tt := range tests {
@@ -632,12 +643,14 @@ func TestStatsDParser_Aggregate(t *testing.T) {
 			},
 		},
 		{
-			name: "counter and gauge: 2 timings and 2 histograms",
+			name: "counter and gauge: 2 timings and 2 histograms and 2 distribution",
 			input: []string{
 				"statsdTestMetric1:500|ms|#mykey:myvalue",
 				"statsdTestMetric1:400|h|#mykey:myvalue",
 				"statsdTestMetric1:300|ms|#mykey:myvalue",
 				"statsdTestMetric1:10|h|@0.1|#mykey:myvalue",
+				"statsdTestMetric1:100|d|#mykey:myvalue",
+				"statsdTestMetric1:200|d|#mykey:myvalue",
 			},
 			expectedGauges:   map[statsDMetricDescription]pmetric.ScopeMetrics{},
 			expectedCounters: map[statsDMetricDescription]pmetric.ScopeMetrics{},
@@ -646,6 +659,8 @@ func TestStatsDParser_Aggregate(t *testing.T) {
 				buildGaugeMetric(testStatsDMetric("statsdTestMetric1", 400, false, "h", 0, []string{"mykey"}, []string{"myvalue"}), time.Unix(711, 0)),
 				buildGaugeMetric(testStatsDMetric("statsdTestMetric1", 300, false, "ms", 0, []string{"mykey"}, []string{"myvalue"}), time.Unix(711, 0)),
 				buildGaugeMetric(testStatsDMetric("statsdTestMetric1", 10, false, "h", 0, []string{"mykey"}, []string{"myvalue"}), time.Unix(711, 0)),
+				buildGaugeMetric(testStatsDMetric("statsdTestMetric1", 100, false, "d", 0, []string{"mykey"}, []string{"myvalue"}), time.Unix(711, 0)),
+				buildGaugeMetric(testStatsDMetric("statsdTestMetric1", 200, false, "d", 0, []string{"mykey"}, []string{"myvalue"}), time.Unix(711, 0)),
 			},
 		},
 	}
@@ -933,6 +948,30 @@ func TestStatsDParser_AggregateTimerWithSummary(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "distribution",
+			input: []string{
+				"statsdTestMetric1:1|d|#mykey:myvalue",
+				"statsdTestMetric2:2|d|#mykey:myvalue",
+				"statsdTestMetric1:1|d|#mykey:myvalue",
+				"statsdTestMetric1:10|d|#mykey:myvalue",
+				"statsdTestMetric1:20|d|#mykey:myvalue",
+				"statsdTestMetric2:5|d|#mykey:myvalue",
+				"statsdTestMetric2:10|d|#mykey:myvalue",
+			},
+			expectedSummaries: map[statsDMetricDescription]summaryMetric{
+				testDescription("statsdTestMetric1", "d",
+					[]string{"mykey"}, []string{"myvalue"}): {
+					points:  []float64{1, 1, 10, 20},
+					weights: []float64{1, 1, 1, 1},
+				},
+				testDescription("statsdTestMetric2", "d",
+					[]string{"mykey"}, []string{"myvalue"}): {
+					points:  []float64{2, 5, 10},
+					weights: []float64{1, 1, 1},
+				},
+			},
+		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
@@ -1319,6 +1358,33 @@ func TestStatsDParser_AggregateTimerWithHistogram(t *testing.T) {
 			}(),
 			mapping: normalMapping,
 		},
+		{
+			name: "one_each_distribution",
+			input: []string{
+				"expohisto:1|d|#mykey:myvalue",
+				"expohisto:0|d|#mykey:myvalue",
+				"expohisto:-1|d|#mykey:myvalue",
+			},
+			expected: func() pmetric.Metrics {
+				data, dp := newPoint()
+				dp.SetCount(3)
+				dp.SetSum(0)
+				dp.SetMin(-1)
+				dp.SetMax(1)
+				dp.SetZeroCount(1)
+				dp.SetScale(logarithm.MaxScale)
+				dp.Positive().SetOffset(-1)
+				dp.Negative().SetOffset(-1)
+				dp.Positive().BucketCounts().FromRaw([]uint64{
+					1,
+				})
+				dp.Negative().BucketCounts().FromRaw([]uint64{
+					1,
+				})
+				return data
+			}(),
+			mapping: normalMapping,
+		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
diff --git a/receiver/statsdreceiver/testdata/config.yaml b/receiver/statsdreceiver/testdata/config.yaml
index cec588b989aa..d3984fb9684c 100644
--- a/receiver/statsdreceiver/testdata/config.yaml
+++ b/receiver/statsdreceiver/testdata/config.yaml
@@ -11,3 +11,7 @@ statsd/receiver_settings:
       observer_type: "histogram"
       histogram:
         max_size: 170
+    - statsd_type: "distribution"
+      observer_type: "histogram"
+      histogram:
+        max_size: 170