From 15b63348816edb9c87c75fc06ae44368624f97be Mon Sep 17 00:00:00 2001 From: "Nathan J. Mehl" Date: Fri, 9 Dec 2022 15:44:34 -0500 Subject: [PATCH 1/3] Record counters as CUMULATIVE metrics This addresses https://github.com/google/go-metrics-stackdriver/issues/18 While using `|align delta_gauge()` in MQL is potentially a workaround, it suffers from the drawback of producing huge negative deltas when the counter resets, and the current behavior is unhappily surprising to first-time users who would reasonably assume that `IncrCounter()` would produce a CUMULATIVE metric. Signed-off-by: Nathan J. Mehl --- stackdriver.go | 27 ++++++++++++++++++++++----- stackdriver_test.go | 12 ++++++------ 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/stackdriver.go b/stackdriver.go index 9331dca..f4f7f7a 100644 --- a/stackdriver.go +++ b/stackdriver.go @@ -325,8 +325,9 @@ func (s *Sink) deep() (time.Time, map[string]*gauge, map[string]*counter, map[st } for k, v := range s.counters { rCounters[k] = &counter{ - name: v.name, - value: v.value, + name: v.name, + value: v.value, + startTime: v.startTime, } } for k, v := range s.histograms { @@ -365,11 +366,14 @@ func (s *Sink) report(ctx context.Context) { Type: fmt.Sprintf("custom.googleapis.com/%s%s", s.prefix, name), Labels: labels, }, - MetricKind: metricpb.MetricDescriptor_GAUGE, + MetricKind: metricpb.MetricDescriptor_CUMULATIVE, Resource: resource, Points: []*monitoringpb.Point{ { Interval: &monitoringpb.TimeInterval{ + StartTime: &googlepb.Timestamp{ + Seconds: v.startTime.Unix(), + }, EndTime: &googlepb.Timestamp{ Seconds: end.Unix(), }, @@ -537,11 +541,23 @@ func (s *Sink) IncrCounterWithLabels(key []string, val float32, labels []metrics c, ok := s.counters[n.hash] if ok { + // counter exists; increment value c.value += float64(val) + // start times cannot be over 25 hours old; reset after 24h + age := time.Now().Unix() - c.startTime.Unix() + if age > 86400 { + // Start times _must_ be before the end time (which is set in Report()), + // so backdate our new start time to 1ms before the observed time. + c.startTime = time.Now().Add(time.Millisecond * -1) + } } else { + // init new counter s.counters[n.hash] = &counter{ name: n, value: float64(val), + // startTime must predate what GCM believes is "now" when we create the interval; + // so backdate by 1ms + startTime: time.Now().Add(time.Millisecond * -1), } } } @@ -645,8 +661,9 @@ type gauge struct { // https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TimeSeries#point type counter struct { - name *series - value float64 + name *series + value float64 + startTime time.Time } // https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TimeSeries#distribution diff --git a/stackdriver_test.go b/stackdriver_test.go index d8aa9fd..d8171f4 100644 --- a/stackdriver_test.go +++ b/stackdriver_test.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, @@ -365,7 +365,7 @@ func TestSample(t *testing.T) { Metric: &metricpb.Metric{ Type: "custom.googleapis.com/go-metrics/foo_bar_counter", }, - MetricKind: metricpb.MetricDescriptor_GAUGE, + MetricKind: metricpb.MetricDescriptor_CUMULATIVE, Points: []*monitoringpb.Point{ { Value: &monitoringpb.TypedValue{ @@ -402,7 +402,7 @@ func TestSample(t *testing.T) { "env": "dev", }, }, - MetricKind: metricpb.MetricDescriptor_GAUGE, + MetricKind: metricpb.MetricDescriptor_CUMULATIVE, Points: []*monitoringpb.Point{ { Value: &monitoringpb.TypedValue{ @@ -438,7 +438,7 @@ func TestSample(t *testing.T) { Metric: &metricpb.Metric{ Type: "custom.googleapis.com/go-metrics/foo_bar_counter", }, - MetricKind: metricpb.MetricDescriptor_GAUGE, + MetricKind: metricpb.MetricDescriptor_CUMULATIVE, Points: []*monitoringpb.Point{ { Value: &monitoringpb.TypedValue{ @@ -773,7 +773,7 @@ func TestExtract(t *testing.T) { "method": "bar", }, }, - MetricKind: metricpb.MetricDescriptor_GAUGE, + MetricKind: metricpb.MetricDescriptor_CUMULATIVE, Points: []*monitoringpb.Point{ { Value: &monitoringpb.TypedValue{ @@ -914,7 +914,7 @@ func TestExtract(t *testing.T) { "method": "baz", }, }, - MetricKind: metricpb.MetricDescriptor_GAUGE, + MetricKind: metricpb.MetricDescriptor_CUMULATIVE, Points: []*monitoringpb.Point{ { Value: &monitoringpb.TypedValue{ From cfc6ea5fa7079d4e0e27ca7c1c93954da46db72a Mon Sep 17 00:00:00 2001 From: "Nathan J. Mehl" Date: Mon, 2 Jan 2023 11:21:28 -0500 Subject: [PATCH 2/3] update README --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 0927208..d2345c4 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,15 @@ In general the author of this package would recommend instrumenting custom metri This package is intended as a way to publish metrics for applications that are _already_ instrumented with `go-metrics` without having to use a sidecar process like [stackdriver-prometheus-sidecar](https://github.com/Stackdriver/stackdriver-prometheus-sidecar). +## 🚨 Upgrading + +Between v0.5.0 and v0.6.0, the behavior of the `IncrCounter` method changed: previously it would create a `GAUGE` [metric kind](https://cloud.google.com/monitoring/api/v3/kinds-and-types), but from v0.6.0 forward it will create a `CUMULATIVE` metric kind. (See https://github.com/google/go-metrics-stackdriver/issues/18 for a discussion.) + +However, once a [MetricDescriptor](https://cloud.google.com/logging/docs/reference/v2/rest/v2/projects.metrics#MetricDescriptor) has been created in Google Cloud Monitoring, its `metricKind` field cannot be changed. So if you have any _existing_ `GAUGE` metrics that were created by `IncrCounter`, you will see errors in your logs when the v0.6.0 client attempts to update them and fails. Your options for handling this are: + +1. Change the name of the metric you are passing to `IncrCounter` (creating a new metricDescriptor with a different name), or: +2. Delete the existing metricDescriptor using the [delete API](https://cloud.google.com/monitoring/api/ref_v3/rest/v3/projects.metricDescriptors/delete) and let go-metrics re-create it as a `CUMULATIVE` metric + ## Details [stackdriver.NewSink](https://godoc.org/github.com/google/go-metrics-stackdriver#NewSink)'s return value satisfies the go-metrics library's [MetricSink](https://godoc.org/github.com/armon/go-metrics#MetricSink) interface. When providing a `stackdriver.Sink` to libraries and applications instrumented against `MetricSink`, the metrics will be aggregated within this library and written to stackdriver as [Generic Task](https://cloud.google.com/monitoring/api/resources#tag_generic_task) timeseries metrics. From 5a05d309d561d6a837991e1a9c0799926a0bde95 Mon Sep 17 00:00:00 2001 From: "Nathan J. Mehl" Date: Mon, 2 Jan 2023 16:32:33 -0500 Subject: [PATCH 3/3] add resetcounter methods --- README.md | 6 ++-- stackdriver.go | 28 +++++++++++++++++ stackdriver_test.go | 75 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d2345c4..6b70871 100644 --- a/README.md +++ b/README.md @@ -14,13 +14,15 @@ This package is intended as a way to publish metrics for applications that are _ ## 🚨 Upgrading -Between v0.5.0 and v0.6.0, the behavior of the `IncrCounter` method changed: previously it would create a `GAUGE` [metric kind](https://cloud.google.com/monitoring/api/v3/kinds-and-types), but from v0.6.0 forward it will create a `CUMULATIVE` metric kind. (See https://github.com/google/go-metrics-stackdriver/issues/18 for a discussion.) +Between v0.5.0 and v0.6.0, the behavior of the `IncrCounter()` method changed: previously it would create a `GAUGE` [metric kind](https://cloud.google.com/monitoring/api/v3/kinds-and-types), but from v0.6.0 forward it will create a `CUMULATIVE` metric kind. (See https://github.com/google/go-metrics-stackdriver/issues/18 for a discussion.) -However, once a [MetricDescriptor](https://cloud.google.com/logging/docs/reference/v2/rest/v2/projects.metrics#MetricDescriptor) has been created in Google Cloud Monitoring, its `metricKind` field cannot be changed. So if you have any _existing_ `GAUGE` metrics that were created by `IncrCounter`, you will see errors in your logs when the v0.6.0 client attempts to update them and fails. Your options for handling this are: +However, once a [MetricDescriptor](https://cloud.google.com/logging/docs/reference/v2/rest/v2/projects.metrics#MetricDescriptor) has been created in Google Cloud Monitoring, its `metricKind` field cannot be changed. So if you have any _existing_ `GAUGE` metrics that were created by `IncrCounter()`, you will see errors in your logs when the v0.6.0 client attempts to update them and fails. Your options for handling this are: 1. Change the name of the metric you are passing to `IncrCounter` (creating a new metricDescriptor with a different name), or: 2. Delete the existing metricDescriptor using the [delete API](https://cloud.google.com/monitoring/api/ref_v3/rest/v3/projects.metricDescriptors/delete) and let go-metrics re-create it as a `CUMULATIVE` metric +Additionally, v0.6.0 adds `ResetCounter()` and `ResetCounterWithLabels()` methods: calling these methods resets the counter value to zero. + ## Details [stackdriver.NewSink](https://godoc.org/github.com/google/go-metrics-stackdriver#NewSink)'s return value satisfies the go-metrics library's [MetricSink](https://godoc.org/github.com/armon/go-metrics#MetricSink) interface. When providing a `stackdriver.Sink` to libraries and applications instrumented against `MetricSink`, the metrics will be aggregated within this library and written to stackdriver as [Generic Task](https://cloud.google.com/monitoring/api/resources#tag_generic_task) timeseries metrics. diff --git a/stackdriver.go b/stackdriver.go index f4f7f7a..fae04b7 100644 --- a/stackdriver.go +++ b/stackdriver.go @@ -562,6 +562,34 @@ func (s *Sink) IncrCounterWithLabels(key []string, val float32, labels []metrics } } +// ResetCounter resets a counter to zero +func (s *Sink) ResetCounter(key []string) { + s.ResetCounterWithLabels(key, nil) +} + +// ResetCounterWithLabels resets a counter to zero +func (s *Sink) ResetCounterWithLabels(key []string, labels []metrics.Label) { + n := newSeries(key, labels) + s.mu.Lock() + defer s.mu.Unlock() + + initVal := float64(0) + startTime := time.Now().Add(time.Millisecond * -1) + c, ok := s.counters[n.hash] + if ok { + // counter exists, reset to 0 and reset startTime + c.value = initVal + c.startTime = startTime + } else { + // counter did not exist, init at 0 value + s.counters[n.hash] = &counter{ + name: n, + value: initVal, + startTime: startTime, + } + } +} + // AddSample adds a sample to a histogram metric. func (s *Sink) AddSample(key []string, val float32) { s.AddSampleWithLabels(key, val, nil) diff --git a/stackdriver_test.go b/stackdriver_test.go index d8171f4..8aefbd1 100644 --- a/stackdriver_test.go +++ b/stackdriver_test.go @@ -458,6 +458,81 @@ func TestSample(t *testing.T) { } }, }, + { + name: "reset counter", + collect: func() { + ss.IncrCounter([]string{"foo", "bar"}, 1.0) + ss.IncrCounter([]string{"foo", "bar"}, 1.0) + ss.ResetCounter([]string{"foo", "bar"}) + }, + createFn: func(t *testing.T) func(context.Context, *monitoringpb.CreateTimeSeriesRequest) (*emptypb.Empty, error) { + return func(_ context.Context, req *monitoringpb.CreateTimeSeriesRequest) (*emptypb.Empty, error) { + want := &monitoringpb.CreateTimeSeriesRequest{ + Name: "projects/foo", + TimeSeries: []*monitoringpb.TimeSeries{ + { + Metric: &metricpb.Metric{ + Type: "custom.googleapis.com/go-metrics/foo_bar_counter", + }, + MetricKind: metricpb.MetricDescriptor_CUMULATIVE, + Points: []*monitoringpb.Point{ + { + Value: &monitoringpb.TypedValue{ + Value: &monitoringpb.TypedValue_DoubleValue{ + DoubleValue: 0.0, + }, + }, + }, + }, + }, + }, + } + if diff := diffCreateMsg(want, req); diff != "" { + t.Errorf("unexpected CreateTimeSeriesRequest (-want +got):\n%s", diff) + } + return &emptypb.Empty{}, nil + } + }, + }, + { + name: "reset counter with label", + collect: func() { + ss.IncrCounterWithLabels([]string{"foo", "bar"}, 1.0, []metrics.Label{{Name: "env", Value: "dev"}}) + ss.IncrCounterWithLabels([]string{"foo", "bar"}, 1.0, []metrics.Label{{Name: "env", Value: "dev"}}) + ss.ResetCounterWithLabels([]string{"foo", "bar"}, []metrics.Label{{Name: "env", Value: "dev"}}) + }, + createFn: func(t *testing.T) func(context.Context, *monitoringpb.CreateTimeSeriesRequest) (*emptypb.Empty, error) { + return func(_ context.Context, req *monitoringpb.CreateTimeSeriesRequest) (*emptypb.Empty, error) { + want := &monitoringpb.CreateTimeSeriesRequest{ + Name: "projects/foo", + TimeSeries: []*monitoringpb.TimeSeries{ + { + Metric: &metricpb.Metric{ + Type: "custom.googleapis.com/go-metrics/foo_bar_counter", + Labels: map[string]string{ + "env": "dev", + }, + }, + MetricKind: metricpb.MetricDescriptor_CUMULATIVE, + Points: []*monitoringpb.Point{ + { + Value: &monitoringpb.TypedValue{ + Value: &monitoringpb.TypedValue_DoubleValue{ + DoubleValue: 0.0, + }, + }, + }, + }, + }, + }, + } + if diff := diffCreateMsg(want, req); diff != "" { + t.Errorf("unexpected CreateTimeSeriesRequest (-want +got):\n%s", diff) + } + return &emptypb.Empty{}, nil + } + }, + }, { name: "gauge", collect: func() {