From 0e21c70f87ace760b01f8cef02c9e6bf4a13852c Mon Sep 17 00:00:00 2001
From: Diego Hurtado <ocelotl@users.noreply.github.com>
Date: Fri, 8 Sep 2023 13:59:16 +0200
Subject: [PATCH] Fix histogram aggregation

Fixes #3407
---
 .../sdk/metrics/_internal/aggregation.py      | 133 ++++------
 ...t_explicit_bucket_histogram_aggregation.py | 250 ++++++++++++++++++
 .../integration_test/test_histogram_export.py |  15 +-
 .../tests/metrics/test_aggregation.py         |   8 +-
 4 files changed, 319 insertions(+), 87 deletions(-)
 create mode 100644 opentelemetry-sdk/tests/metrics/integration_test/test_explicit_bucket_histogram_aggregation.py

diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/aggregation.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/aggregation.py
index ae21db907dd..18394f4beea 100644
--- a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/aggregation.py
+++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/aggregation.py
@@ -252,33 +252,36 @@ def __init__(
         record_min_max: bool = True,
     ):
         super().__init__(attributes)
+
+        self._start_time_unix_nano = start_time_unix_nano
         self._boundaries = tuple(boundaries)
-        self._bucket_counts = self._get_empty_bucket_counts()
+        self._record_min_max = record_min_max
+
+        self._current_value = None
         self._min = inf
         self._max = -inf
         self._sum = 0
-        self._record_min_max = record_min_max
-        self._start_time_unix_nano = start_time_unix_nano
-        # It is assumed that the "natural" aggregation temporality for a
-        # Histogram instrument is DELTA, like the "natural" aggregation
-        # temporality for a Counter is DELTA and the "natural" aggregation
-        # temporality for an ObservableCounter is CUMULATIVE.
-        self._instrument_temporality = AggregationTemporality.DELTA
+
+        self._previous_collection_start_nano = self._start_time_unix_nano
 
     def _get_empty_bucket_counts(self) -> List[int]:
         return [0] * (len(self._boundaries) + 1)
 
     def aggregate(self, measurement: Measurement) -> None:
 
-        value = measurement.value
+        with self._lock:
+
+            if self._current_value is None:
+                self._current_value = self._get_empty_bucket_counts()
 
-        if self._record_min_max:
-            self._min = min(self._min, value)
-            self._max = max(self._max, value)
+            value = measurement.value
 
-        self._sum += value
+            if self._record_min_max:
+                self._min = min(self._min, value)
+                self._max = max(self._max, value)
 
-        self._bucket_counts[bisect_left(self._boundaries, value)] += 1
+            self._current_value[bisect_left(self._boundaries, value)] += 1
+            self._sum = self._sum + value
 
     def collect(
         self,
@@ -289,79 +292,49 @@ def collect(
         Atomically return a point for the current value of the metric.
         """
         with self._lock:
-            if not any(self._bucket_counts):
-                return None
 
-            bucket_counts = self._bucket_counts
-            start_time_unix_nano = self._start_time_unix_nano
-            sum_ = self._sum
-            max_ = self._max
-            min_ = self._min
+            if aggregation_temporality is AggregationTemporality.DELTA:
 
-            self._bucket_counts = self._get_empty_bucket_counts()
-            self._start_time_unix_nano = collection_start_nano
-            self._sum = 0
-            self._min = inf
-            self._max = -inf
+                current_value = self._current_value
+                min_ = self._min
+                max_ = self._max
+                sum_ = self._sum
+                previous_collection_start_nano = (
+                    self._previous_collection_start_nano
+                )
 
-        current_point = HistogramDataPoint(
-            attributes=self._attributes,
-            start_time_unix_nano=start_time_unix_nano,
-            time_unix_nano=collection_start_nano,
-            count=sum(bucket_counts),
-            sum=sum_,
-            bucket_counts=tuple(bucket_counts),
-            explicit_bounds=self._boundaries,
-            min=min_,
-            max=max_,
-        )
+                self._current_value = None
+                self._min = inf
+                self._max = -inf
+                self._sum = 0
+                self._previous_collection_start_nano = collection_start_nano
 
-        if self._previous_point is None or (
-            self._instrument_temporality is aggregation_temporality
-        ):
-            self._previous_point = current_point
-            return current_point
+                if current_value is None:
+                    return None
 
-        max_ = current_point.max
-        min_ = current_point.min
-
-        if aggregation_temporality is AggregationTemporality.CUMULATIVE:
-            start_time_unix_nano = self._previous_point.start_time_unix_nano
-            sum_ = current_point.sum + self._previous_point.sum
-            # Only update min/max on delta -> cumulative
-            max_ = max(current_point.max, self._previous_point.max)
-            min_ = min(current_point.min, self._previous_point.min)
-            bucket_counts = [
-                curr_count + prev_count
-                for curr_count, prev_count in zip(
-                    current_point.bucket_counts,
-                    self._previous_point.bucket_counts,
-                )
-            ]
-        else:
-            start_time_unix_nano = self._previous_point.time_unix_nano
-            sum_ = current_point.sum - self._previous_point.sum
-            bucket_counts = [
-                curr_count - prev_count
-                for curr_count, prev_count in zip(
-                    current_point.bucket_counts,
-                    self._previous_point.bucket_counts,
+                return HistogramDataPoint(
+                    attributes=self._attributes,
+                    start_time_unix_nano=previous_collection_start_nano,
+                    time_unix_nano=collection_start_nano,
+                    count=sum(current_value),
+                    sum=sum_,
+                    bucket_counts=tuple(current_value),
+                    explicit_bounds=self._boundaries,
+                    min=min_,
+                    max=max_,
                 )
-            ]
 
-        current_point = HistogramDataPoint(
-            attributes=self._attributes,
-            start_time_unix_nano=start_time_unix_nano,
-            time_unix_nano=current_point.time_unix_nano,
-            count=sum(bucket_counts),
-            sum=sum_,
-            bucket_counts=tuple(bucket_counts),
-            explicit_bounds=current_point.explicit_bounds,
-            min=min_,
-            max=max_,
-        )
-        self._previous_point = current_point
-        return current_point
+            return HistogramDataPoint(
+                attributes=self._attributes,
+                start_time_unix_nano=self._start_time_unix_nano,
+                time_unix_nano=collection_start_nano,
+                count=sum(self._current_value),
+                sum=self._sum,
+                bucket_counts=tuple(self._current_value),
+                explicit_bounds=self._boundaries,
+                min=self._min,
+                max=self._max,
+            )
 
 
 # pylint: disable=protected-access
diff --git a/opentelemetry-sdk/tests/metrics/integration_test/test_explicit_bucket_histogram_aggregation.py b/opentelemetry-sdk/tests/metrics/integration_test/test_explicit_bucket_histogram_aggregation.py
new file mode 100644
index 00000000000..052c5b4edbc
--- /dev/null
+++ b/opentelemetry-sdk/tests/metrics/integration_test/test_explicit_bucket_histogram_aggregation.py
@@ -0,0 +1,250 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from platform import system
+from unittest import TestCase
+
+from pytest import mark
+
+from opentelemetry.sdk.metrics import Histogram, MeterProvider
+from opentelemetry.sdk.metrics.export import (
+    AggregationTemporality,
+    InMemoryMetricReader,
+)
+from opentelemetry.sdk.metrics.view import ExplicitBucketHistogramAggregation
+
+
+class TestExplicitBucketHistogramAggregation(TestCase):
+
+    values = [
+        1.0,
+        6.0,
+        11.0,
+        26.0,
+        51.0,
+        76.0,
+        101.0,
+        251.0,
+        501.0,
+        751.0,
+    ]
+
+    @mark.skipif(
+        system() != "Linux",
+        reason=(
+            "Tests fail because Windows time_ns resolution is too low so "
+            "two different time measurements may end up having the exact same"
+            "value."
+        ),
+    )
+    def test_synchronous_delta_temporality(self):
+
+        aggregation = ExplicitBucketHistogramAggregation()
+
+        reader = InMemoryMetricReader(
+            preferred_aggregation={Histogram: aggregation},
+            preferred_temporality={Histogram: AggregationTemporality.DELTA},
+        )
+
+        provider = MeterProvider(metric_readers=[reader])
+        meter = provider.get_meter("name", "version")
+
+        histogram = meter.create_histogram("histogram")
+
+        results = []
+
+        for _ in range(10):
+
+            results.append(reader.get_metrics_data())
+
+        for metrics_data in results:
+            self.assertIsNone(metrics_data)
+
+        results = []
+
+        for value in self.values:
+            histogram.record(value)
+            results.append(reader.get_metrics_data())
+
+        previous_time_unix_nano = (
+            results[0]
+            .resource_metrics[0]
+            .scope_metrics[0]
+            .metrics[0]
+            .data.data_points[0]
+            .time_unix_nano
+        )
+
+        self.assertEqual(
+            (
+                results[0]
+                .resource_metrics[0]
+                .scope_metrics[0]
+                .metrics[0]
+                .data.data_points[0]
+                .bucket_counts
+            ),
+            (0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+        )
+
+        self.assertLess(
+            (
+                results[0]
+                .resource_metrics[0]
+                .scope_metrics[0]
+                .metrics[0]
+                .data.data_points[0]
+                .start_time_unix_nano
+            ),
+            previous_time_unix_nano,
+        )
+
+        for index, metrics_data in enumerate(results[1:]):
+
+            metric_data = (
+                metrics_data.resource_metrics[0]
+                .scope_metrics[0]
+                .metrics[0]
+                .data.data_points[0]
+            )
+
+            self.assertEqual(
+                previous_time_unix_nano, metric_data.start_time_unix_nano
+            )
+            previous_time_unix_nano = metric_data.time_unix_nano
+
+            self.assertEqual(
+                metric_data.bucket_counts,
+                tuple([1 if value == index + 2 else 0 for value in range(16)]),
+            )
+            self.assertLess(
+                metric_data.start_time_unix_nano, metric_data.time_unix_nano
+            )
+
+        results = []
+
+        for _ in range(10):
+
+            results.append(reader.get_metrics_data())
+
+        provider.shutdown()
+
+        for metrics_data in results:
+            self.assertIsNone(metrics_data)
+
+    @mark.skipif(
+        system() != "Linux",
+        reason=(
+            "Tests fail because Windows time_ns resolution is too low so "
+            "two different time measurements may end up having the exact same"
+            "value."
+        ),
+    )
+    def test_synchronous_cumulative_temporality(self):
+
+        aggregation = ExplicitBucketHistogramAggregation()
+
+        reader = InMemoryMetricReader(
+            preferred_aggregation={Histogram: aggregation},
+            preferred_temporality={
+                Histogram: AggregationTemporality.CUMULATIVE
+            },
+        )
+
+        provider = MeterProvider(metric_readers=[reader])
+        meter = provider.get_meter("name", "version")
+
+        histogram = meter.create_histogram("histogram")
+
+        results = []
+
+        for _ in range(10):
+
+            results.append(reader.get_metrics_data())
+
+        for metrics_data in results:
+            self.assertIsNone(metrics_data)
+
+        results = []
+
+        for value in self.values:
+            histogram.record(value)
+            results.append(reader.get_metrics_data())
+
+        start_time_unix_nano = (
+            results[0]
+            .resource_metrics[0]
+            .scope_metrics[0]
+            .metrics[0]
+            .data.data_points[0]
+            .start_time_unix_nano
+        )
+
+        for index, metrics_data in enumerate(results):
+
+            metric_data = (
+                metrics_data.resource_metrics[0]
+                .scope_metrics[0]
+                .metrics[0]
+                .data.data_points[0]
+            )
+
+            self.assertEqual(
+                start_time_unix_nano, metric_data.start_time_unix_nano
+            )
+            self.assertEqual(
+                metric_data.bucket_counts,
+                tuple(
+                    [
+                        1
+                        if inner_index <= index + 1 and inner_index > 0
+                        else 0
+                        for inner_index, value in enumerate(range(16))
+                    ]
+                ),
+            )
+
+        results = []
+
+        for _ in range(10):
+
+            results.append(reader.get_metrics_data())
+
+        provider.shutdown()
+
+        start_time_unix_nano = (
+            results[0]
+            .resource_metrics[0]
+            .scope_metrics[0]
+            .metrics[0]
+            .data.data_points[0]
+            .start_time_unix_nano
+        )
+
+        for metrics_data in results:
+
+            metric_data = (
+                metrics_data.resource_metrics[0]
+                .scope_metrics[0]
+                .metrics[0]
+                .data.data_points[0]
+            )
+
+            self.assertEqual(
+                start_time_unix_nano, metric_data.start_time_unix_nano
+            )
+            self.assertEqual(
+                metric_data.bucket_counts,
+                (0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
+            )
diff --git a/opentelemetry-sdk/tests/metrics/integration_test/test_histogram_export.py b/opentelemetry-sdk/tests/metrics/integration_test/test_histogram_export.py
index 81d419819a4..eef81524d18 100644
--- a/opentelemetry-sdk/tests/metrics/integration_test/test_histogram_export.py
+++ b/opentelemetry-sdk/tests/metrics/integration_test/test_histogram_export.py
@@ -65,17 +65,26 @@ def test_histogram_counter_collection(self):
 
         metric_data = in_memory_metric_reader.get_metrics_data()
 
-        # FIXME ExplicitBucketHistogramAggregation is resetting counts to zero
-        # even if aggregation temporality is cumulative.
         self.assertEqual(
-            len(metric_data.resource_metrics[0].scope_metrics[0].metrics), 1
+            len(metric_data.resource_metrics[0].scope_metrics[0].metrics), 2
         )
+
         self.assertEqual(
             (
                 metric_data.resource_metrics[0]
                 .scope_metrics[0]
                 .metrics[0]
                 .data.data_points[0]
+                .bucket_counts
+            ),
+            (0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+        )
+        self.assertEqual(
+            (
+                metric_data.resource_metrics[0]
+                .scope_metrics[0]
+                .metrics[1]
+                .data.data_points[0]
                 .value
             ),
             1,
diff --git a/opentelemetry-sdk/tests/metrics/test_aggregation.py b/opentelemetry-sdk/tests/metrics/test_aggregation.py
index 9c9de1f2cbb..2de9936feef 100644
--- a/opentelemetry-sdk/tests/metrics/test_aggregation.py
+++ b/opentelemetry-sdk/tests/metrics/test_aggregation.py
@@ -283,22 +283,22 @@ def test_aggregate(self):
 
         # The first bucket keeps count of values between (-inf, 0] (-1 and 0)
         self.assertEqual(
-            explicit_bucket_histogram_aggregation._bucket_counts[0], 2
+            explicit_bucket_histogram_aggregation._current_value[0], 2
         )
 
         # The second bucket keeps count of values between (0, 2] (1 and 2)
         self.assertEqual(
-            explicit_bucket_histogram_aggregation._bucket_counts[1], 2
+            explicit_bucket_histogram_aggregation._current_value[1], 2
         )
 
         # The third bucket keeps count of values between (2, 4] (3 and 4)
         self.assertEqual(
-            explicit_bucket_histogram_aggregation._bucket_counts[2], 2
+            explicit_bucket_histogram_aggregation._current_value[2], 2
         )
 
         # The fourth bucket keeps count of values between (4, inf) (3 and 4)
         self.assertEqual(
-            explicit_bucket_histogram_aggregation._bucket_counts[3], 1
+            explicit_bucket_histogram_aggregation._current_value[3], 1
         )
 
         histo = explicit_bucket_histogram_aggregation.collect(