Skip to content
This repository was archived by the owner on Dec 3, 2024. It is now read-only.

Record counters as CUMULATIVE metrics #24

Merged
merged 3 commits into from
Jan 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@ In general the author of this package would recommend instrumenting custom metri

This package is intended as a way to publish metrics for applications that are _already_ instrumented with `go-metrics` without having to use a sidecar process like [stackdriver-prometheus-sidecar](https://github.com/Stackdriver/stackdriver-prometheus-sidecar).

## 🚨 Upgrading

Between v0.5.0 and v0.6.0, the behavior of the `IncrCounter()` method changed: previously it would create a `GAUGE` [metric kind](https://cloud.google.com/monitoring/api/v3/kinds-and-types), but from v0.6.0 forward it will create a `CUMULATIVE` metric kind. (See https://github.com/google/go-metrics-stackdriver/issues/18 for a discussion.)

However, once a [MetricDescriptor](https://cloud.google.com/logging/docs/reference/v2/rest/v2/projects.metrics#MetricDescriptor) has been created in Google Cloud Monitoring, its `metricKind` field cannot be changed. So if you have any _existing_ `GAUGE` metrics that were created by `IncrCounter()`, you will see errors in your logs when the v0.6.0 client attempts to update them and fails. Your options for handling this are:

1. Change the name of the metric you are passing to `IncrCounter` (creating a new metricDescriptor with a different name), or:
2. Delete the existing metricDescriptor using the [delete API](https://cloud.google.com/monitoring/api/ref_v3/rest/v3/projects.metricDescriptors/delete) and let go-metrics re-create it as a `CUMULATIVE` metric

Additionally, v0.6.0 adds `ResetCounter()` and `ResetCounterWithLabels()` methods: calling these methods resets the counter value to zero.

## Details

[stackdriver.NewSink](https://godoc.org/github.com/google/go-metrics-stackdriver#NewSink)'s return value satisfies the go-metrics library's [MetricSink](https://godoc.org/github.com/armon/go-metrics#MetricSink) interface. When providing a `stackdriver.Sink` to libraries and applications instrumented against `MetricSink`, the metrics will be aggregated within this library and written to stackdriver as [Generic Task](https://cloud.google.com/monitoring/api/resources#tag_generic_task) timeseries metrics.
Expand Down
55 changes: 50 additions & 5 deletions stackdriver.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,9 @@ func (s *Sink) deep() (time.Time, map[string]*gauge, map[string]*counter, map[st
}
for k, v := range s.counters {
rCounters[k] = &counter{
name: v.name,
value: v.value,
name: v.name,
value: v.value,
startTime: v.startTime,
}
}
for k, v := range s.histograms {
Expand Down Expand Up @@ -365,11 +366,14 @@ func (s *Sink) report(ctx context.Context) {
Type: fmt.Sprintf("custom.googleapis.com/%s%s", s.prefix, name),
Labels: labels,
},
MetricKind: metricpb.MetricDescriptor_GAUGE,
MetricKind: metricpb.MetricDescriptor_CUMULATIVE,
Resource: resource,
Points: []*monitoringpb.Point{
{
Interval: &monitoringpb.TimeInterval{
StartTime: &googlepb.Timestamp{
Seconds: v.startTime.Unix(),
},
EndTime: &googlepb.Timestamp{
Seconds: end.Unix(),
},
Expand Down Expand Up @@ -537,11 +541,51 @@ func (s *Sink) IncrCounterWithLabels(key []string, val float32, labels []metrics

c, ok := s.counters[n.hash]
if ok {
// counter exists; increment value
c.value += float64(val)
// start times cannot be over 25 hours old; reset after 24h
age := time.Now().Unix() - c.startTime.Unix()
if age > 86400 {
// Start times _must_ be before the end time (which is set in Report()),
// so backdate our new start time to 1ms before the observed time.
c.startTime = time.Now().Add(time.Millisecond * -1)
}
} else {
// init new counter
s.counters[n.hash] = &counter{
name: n,
value: float64(val),
// startTime must predate what GCM believes is "now" when we create the interval;
// so backdate by 1ms
startTime: time.Now().Add(time.Millisecond * -1),
}
}
}

// ResetCounter resets a counter to zero
func (s *Sink) ResetCounter(key []string) {
s.ResetCounterWithLabels(key, nil)
}

// ResetCounterWithLabels resets a counter to zero
func (s *Sink) ResetCounterWithLabels(key []string, labels []metrics.Label) {
n := newSeries(key, labels)
s.mu.Lock()
defer s.mu.Unlock()

initVal := float64(0)
startTime := time.Now().Add(time.Millisecond * -1)
c, ok := s.counters[n.hash]
if ok {
// counter exists, reset to 0 and reset startTime
c.value = initVal
c.startTime = startTime
} else {
// counter did not exist, init at 0 value
s.counters[n.hash] = &counter{
name: n,
value: initVal,
startTime: startTime,
}
}
}
Expand Down Expand Up @@ -645,8 +689,9 @@ type gauge struct {

// https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TimeSeries#point
type counter struct {
name *series
value float64
name *series
value float64
startTime time.Time
}

// https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TimeSeries#distribution
Expand Down
87 changes: 81 additions & 6 deletions stackdriver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
Expand Down Expand Up @@ -365,7 +365,7 @@ func TestSample(t *testing.T) {
Metric: &metricpb.Metric{
Type: "custom.googleapis.com/go-metrics/foo_bar_counter",
},
MetricKind: metricpb.MetricDescriptor_GAUGE,
MetricKind: metricpb.MetricDescriptor_CUMULATIVE,
Points: []*monitoringpb.Point{
{
Value: &monitoringpb.TypedValue{
Expand Down Expand Up @@ -402,7 +402,7 @@ func TestSample(t *testing.T) {
"env": "dev",
},
},
MetricKind: metricpb.MetricDescriptor_GAUGE,
MetricKind: metricpb.MetricDescriptor_CUMULATIVE,
Points: []*monitoringpb.Point{
{
Value: &monitoringpb.TypedValue{
Expand Down Expand Up @@ -438,7 +438,7 @@ func TestSample(t *testing.T) {
Metric: &metricpb.Metric{
Type: "custom.googleapis.com/go-metrics/foo_bar_counter",
},
MetricKind: metricpb.MetricDescriptor_GAUGE,
MetricKind: metricpb.MetricDescriptor_CUMULATIVE,
Points: []*monitoringpb.Point{
{
Value: &monitoringpb.TypedValue{
Expand All @@ -458,6 +458,81 @@ func TestSample(t *testing.T) {
}
},
},
{
name: "reset counter",
collect: func() {
ss.IncrCounter([]string{"foo", "bar"}, 1.0)
ss.IncrCounter([]string{"foo", "bar"}, 1.0)
ss.ResetCounter([]string{"foo", "bar"})
},
createFn: func(t *testing.T) func(context.Context, *monitoringpb.CreateTimeSeriesRequest) (*emptypb.Empty, error) {
return func(_ context.Context, req *monitoringpb.CreateTimeSeriesRequest) (*emptypb.Empty, error) {
want := &monitoringpb.CreateTimeSeriesRequest{
Name: "projects/foo",
TimeSeries: []*monitoringpb.TimeSeries{
{
Metric: &metricpb.Metric{
Type: "custom.googleapis.com/go-metrics/foo_bar_counter",
},
MetricKind: metricpb.MetricDescriptor_CUMULATIVE,
Points: []*monitoringpb.Point{
{
Value: &monitoringpb.TypedValue{
Value: &monitoringpb.TypedValue_DoubleValue{
DoubleValue: 0.0,
},
},
},
},
},
},
}
if diff := diffCreateMsg(want, req); diff != "" {
t.Errorf("unexpected CreateTimeSeriesRequest (-want +got):\n%s", diff)
}
return &emptypb.Empty{}, nil
}
},
},
{
name: "reset counter with label",
collect: func() {
ss.IncrCounterWithLabels([]string{"foo", "bar"}, 1.0, []metrics.Label{{Name: "env", Value: "dev"}})
ss.IncrCounterWithLabels([]string{"foo", "bar"}, 1.0, []metrics.Label{{Name: "env", Value: "dev"}})
ss.ResetCounterWithLabels([]string{"foo", "bar"}, []metrics.Label{{Name: "env", Value: "dev"}})
},
createFn: func(t *testing.T) func(context.Context, *monitoringpb.CreateTimeSeriesRequest) (*emptypb.Empty, error) {
return func(_ context.Context, req *monitoringpb.CreateTimeSeriesRequest) (*emptypb.Empty, error) {
want := &monitoringpb.CreateTimeSeriesRequest{
Name: "projects/foo",
TimeSeries: []*monitoringpb.TimeSeries{
{
Metric: &metricpb.Metric{
Type: "custom.googleapis.com/go-metrics/foo_bar_counter",
Labels: map[string]string{
"env": "dev",
},
},
MetricKind: metricpb.MetricDescriptor_CUMULATIVE,
Points: []*monitoringpb.Point{
{
Value: &monitoringpb.TypedValue{
Value: &monitoringpb.TypedValue_DoubleValue{
DoubleValue: 0.0,
},
},
},
},
},
},
}
if diff := diffCreateMsg(want, req); diff != "" {
t.Errorf("unexpected CreateTimeSeriesRequest (-want +got):\n%s", diff)
}
return &emptypb.Empty{}, nil
}
},
},
{
name: "gauge",
collect: func() {
Expand Down Expand Up @@ -773,7 +848,7 @@ func TestExtract(t *testing.T) {
"method": "bar",
},
},
MetricKind: metricpb.MetricDescriptor_GAUGE,
MetricKind: metricpb.MetricDescriptor_CUMULATIVE,
Points: []*monitoringpb.Point{
{
Value: &monitoringpb.TypedValue{
Expand Down Expand Up @@ -914,7 +989,7 @@ func TestExtract(t *testing.T) {
"method": "baz",
},
},
MetricKind: metricpb.MetricDescriptor_GAUGE,
MetricKind: metricpb.MetricDescriptor_CUMULATIVE,
Points: []*monitoringpb.Point{
{
Value: &monitoringpb.TypedValue{
Expand Down