From 1009e52472013af2dc29d37c5f92767e1fddf1a4 Mon Sep 17 00:00:00 2001 From: Patrick Robinson Date: Thu, 24 Sep 2020 15:28:45 +1000 Subject: [PATCH 1/4] Specify StorageResolution If metrics are retrieved at a higher resolution than 1min we should store them at a higher resolution --- backend/cloudwatch.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/backend/cloudwatch.go b/backend/cloudwatch.go index 7abad8d1..b221ff86 100644 --- a/backend/cloudwatch.go +++ b/backend/cloudwatch.go @@ -62,6 +62,7 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { svc := cloudwatch.New(sess) metrics := []*cloudwatch.MetricDatum{} + duration := r.PollDuration.Milliseconds() / 1000 // Set the baseline org dimension dimensions := []*cloudwatch.Dimension{ @@ -78,7 +79,7 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { } // Add total metrics - metrics = append(metrics, cloudwatchMetrics(r.Totals, nil)...) + metrics = append(metrics, cloudwatchMetrics(r.Totals, nil, duration)...) for name, c := range r.Queues { queueDimensions := dimensions @@ -89,7 +90,7 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { ) // Add per-queue metrics - metrics = append(metrics, cloudwatchMetrics(c, queueDimensions)...) + metrics = append(metrics, cloudwatchMetrics(c, queueDimensions, duration)...) } log.Printf("Extracted %d cloudwatch metrics from results", len(metrics)) @@ -109,15 +110,16 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { return nil } -func cloudwatchMetrics(counts map[string]int, dimensions []*cloudwatch.Dimension) []*cloudwatch.MetricDatum { +func cloudwatchMetrics(counts map[string]int, dimensions []*cloudwatch.Dimension, duration int64) []*cloudwatch.MetricDatum { m := []*cloudwatch.MetricDatum{} for k, v := range counts { m = append(m, &cloudwatch.MetricDatum{ - MetricName: aws.String(k), - Dimensions: dimensions, - Value: aws.Float64(float64(v)), - Unit: aws.String("Count"), + MetricName: aws.String(k), + Dimensions: dimensions, + Value: aws.Float64(float64(v)), + Unit: aws.String("Count"), + StorageResolution: &duration, }) } From 55073033a7f754d495d4aa3b1030ce574c1f1217 Mon Sep 17 00:00:00 2001 From: Michael Pearson Date: Wed, 30 Sep 2020 17:33:55 +1000 Subject: [PATCH 2/4] Fix CW metrics put to send as high frequency metrics, add note to README --- README.md | 26 ++++++++++++++------------ backend/cloudwatch.go | 7 +++++++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 4888aace..3d51eea9 100644 --- a/README.md +++ b/README.md @@ -47,34 +47,34 @@ It's entrypoint is `handler`, it requires a `go1.x` environment and respects the - `BUILDKITE_BACKEND` : The name of the backend to use (e.g. `cloudwatch`, `statsd`, `prometheus` or `stackdriver`). - `BUILDKITE_QUEUE` : A comma separated list of Buildkite queues to process (e.g. `backend-deploy,ui-deploy`). - `BUILDKITE_QUIET` : A boolean specifying that only `ERROR` log lines must be printed. (e.g. `1`, `true`). - - `BUILDKITE_CLOUDWATCH_DIMENSIONS` : A comma separated list in the form of Key=Value, Other=Value containing the Cloudwatch dimensions to index metrics under. - + - `BUILDKITE_CLOUDWATCH_DIMENSIONS` : A comma separated list in the form of Key=Value, Other=Value containing the Cloudwatch dimensions to index metrics under. + Additionally, one of the following groups of environment variables must be set in order to define how the Lambda function should obtain the required Buildkite API token: - + ##### Option 1 - Provide the token as plain-text - + - `BUILDKITE_AGENT_TOKEN` : The Buildkite agent API token to use. - + #### Option 2 - Retrieve token from AWS Systems Manager -- `BUILDKITE_AGENT_TOKEN_SSM_KEY` : The parameter name which contains the token value in AWS -Systems Manager. - -**Note**: Parameters stored as `String` and `SecureString` are currently supported. +- `BUILDKITE_AGENT_TOKEN_SSM_KEY` : The parameter name which contains the token value in AWS +Systems Manager. + +**Note**: Parameters stored as `String` and `SecureString` are currently supported. #### Option 3 - Retrieve token from AWS Secrets Manager - `BUILDKITE_AGENT_SECRETS_MANAGER_SECRET_ID`: The id of the secret which contains the token value -in AWS Secrets Manager. +in AWS Secrets Manager. - (Optional) `BUILDKITE_AGENT_SECRETS_MANAGER_JSON_KEY`: The JSON key containing the token value in the secret JSON blob. **Note 1**: Both `SecretBinary` and `SecretString` are supported. In the case of `SecretBinary`, the secret payload will be automatically decoded and returned as a plain-text string. -**Note 2**: `BUILDKITE_AGENT_SECRETS_MANAGER_JSON_KEY` can be used on secrets of type `SecretBinary` only if their -binary payload corresponds to a valid JSON object containing the provided key. +**Note 2**: `BUILDKITE_AGENT_SECRETS_MANAGER_JSON_KEY` can be used on secrets of type `SecretBinary` only if their +binary payload corresponds to a valid JSON object containing the provided key. ```bash aws lambda create-function \ @@ -110,6 +110,8 @@ The Cloudwatch backend supports the following arguments: * `-cloudwatch-dimensions`: A optional custom dimension in the form of `Key=Value, Key=Value` +If `-interval` is less than 60 seconds the metrics will be sent to CloudWatch as [High-Resolution Metrics](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html#high-resolution-metrics). + The StatsD backend supports the following arguments: * `-statsd-host HOST`: The StatsD host and port (defaults to `127.0.0.1:8125`). diff --git a/backend/cloudwatch.go b/backend/cloudwatch.go index b221ff86..7ec04394 100644 --- a/backend/cloudwatch.go +++ b/backend/cloudwatch.go @@ -113,6 +113,13 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { func cloudwatchMetrics(counts map[string]int, dimensions []*cloudwatch.Dimension, duration int64) []*cloudwatch.MetricDatum { m := []*cloudwatch.MetricDatum{} + if duration < 60 { + // PutMetricData supports either normal (60s) or high frequency (1s) + // metrics - other values result in an error. + duration = 1 + } else { + duration = 60 + } for k, v := range counts { m = append(m, &cloudwatch.MetricDatum{ MetricName: aws.String(k), From 39a928a3a2e6a7631a87b88986a88fa530aa2242 Mon Sep 17 00:00:00 2001 From: Patrick Robinson Date: Fri, 12 Jul 2024 15:55:32 +1000 Subject: [PATCH 3/4] Determine interval since last run via command line or lambda global variable The API poll duration is just a lower bounds --- backend/cloudwatch.go | 10 ++++++---- lambda/main.go | 8 +++++--- main.go | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/backend/cloudwatch.go b/backend/cloudwatch.go index 7ec04394..84293b08 100644 --- a/backend/cloudwatch.go +++ b/backend/cloudwatch.go @@ -42,13 +42,15 @@ func ParseCloudWatchDimensions(ds string) ([]CloudWatchDimension, error) { type CloudWatchBackend struct { region string dimensions []CloudWatchDimension + interval int64 } // NewCloudWatchBackend returns a new CloudWatchBackend with optional dimensions -func NewCloudWatchBackend(region string, dimensions []CloudWatchDimension) *CloudWatchBackend { +func NewCloudWatchBackend(region string, dimensions []CloudWatchDimension, interval int64) *CloudWatchBackend { return &CloudWatchBackend{ region: region, dimensions: dimensions, + interval: interval, } } @@ -62,7 +64,6 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { svc := cloudwatch.New(sess) metrics := []*cloudwatch.MetricDatum{} - duration := r.PollDuration.Milliseconds() / 1000 // Set the baseline org dimension dimensions := []*cloudwatch.Dimension{ @@ -79,7 +80,7 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { } // Add total metrics - metrics = append(metrics, cloudwatchMetrics(r.Totals, nil, duration)...) + metrics = append(metrics, cloudwatchMetrics(r.Totals, nil, cb.interval)...) for name, c := range r.Queues { queueDimensions := dimensions @@ -90,7 +91,7 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { ) // Add per-queue metrics - metrics = append(metrics, cloudwatchMetrics(c, queueDimensions, duration)...) + metrics = append(metrics, cloudwatchMetrics(c, queueDimensions, cb.interval)...) } log.Printf("Extracted %d cloudwatch metrics from results", len(metrics)) @@ -120,6 +121,7 @@ func cloudwatchMetrics(counts map[string]int, dimensions []*cloudwatch.Dimension } else { duration = 60 } + for k, v := range counts { m = append(m, &cloudwatch.MetricDatum{ MetricName: aws.String(k), diff --git a/lambda/main.go b/lambda/main.go index 5b19e12a..bce50c64 100644 --- a/lambda/main.go +++ b/lambda/main.go @@ -31,6 +31,7 @@ const ( var ( nextPollTime time.Time + lastPollTime time.Time ) func main() { @@ -117,7 +118,7 @@ func Handler(ctx context.Context, evt json.RawMessage) (string, error) { if err != nil { return "", err } - b = backend.NewCloudWatchBackend(awsRegion, dimensions) + b = backend.NewCloudWatchBackend(awsRegion, dimensions, int64(time.Since(lastPollTime).Seconds())) } res, err := c.Collect() @@ -140,10 +141,11 @@ func Handler(ctx context.Context, evt json.RawMessage) (string, error) { } } - log.Printf("Finished in %s", time.Now().Sub(t)) + lastPollTime = time.Now() + log.Printf("Finished in %s", lastPollTime.Sub(t)) // Store the next acceptable poll time in global state - nextPollTime = time.Now().Add(res.PollDuration) + nextPollTime = lastPollTime.Add(res.PollDuration) return "", nil } diff --git a/main.go b/main.go index e5c348de..67af3089 100644 --- a/main.go +++ b/main.go @@ -74,7 +74,7 @@ func main() { fmt.Println(err) os.Exit(1) } - bk = backend.NewCloudWatchBackend(region, dimensions) + bk = backend.NewCloudWatchBackend(region, dimensions, int64(interval.Seconds())) case "statsd": bk, err = backend.NewStatsDBackend(*statsdHost, *statsdTags) if err != nil { From c30c4e4377e6d52cb6541db0b4ee672b988c5172 Mon Sep 17 00:00:00 2001 From: Patrick Robinson Date: Fri, 12 Jul 2024 16:04:47 +1000 Subject: [PATCH 4/4] Put it behind a feature flag Avoid accidentally incuring more charges unless it's explicitly enabled --- README.md | 7 ++++--- backend/cloudwatch.go | 13 ++++++++----- lambda/main.go | 4 +++- main.go | 3 ++- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index b1688ae0..e4884cdd 100644 --- a/README.md +++ b/README.md @@ -47,10 +47,11 @@ It's entrypoint is `handler`, it requires a `go1.x` environment and respects the - `BUILDKITE_QUEUE` : A comma separated list of Buildkite queues to process (e.g. `backend-deploy,ui-deploy`). - `BUILDKITE_QUIET` : A boolean specifying that only `ERROR` log lines must be printed. (e.g. `1`, `true`). - `BUILDKITE_CLOUDWATCH_DIMENSIONS` : A comma separated list in the form of Key=Value, Other=Value containing the Cloudwatch dimensions to index metrics under. + - `BUILDKITE_CLOUDWATCH_HIGH_RESOLUTION` : Whether to enable [High-Resolution Metrics](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html#high-resolution-metrics) which incurs additional charges. Additionally, one of the following groups of environment variables must be set in order to define how the Lambda function should obtain the required Buildkite Agent API token: - + ##### Option 1 - Provide the token as plain-text - `BUILDKITE_AGENT_TOKEN` : The Buildkite Agent API token to use. @@ -121,6 +122,8 @@ Usage of buildkite-agent-metrics: A custom Buildkite Agent API endpoint (default "https://agent.buildkite.com/v3") -interval duration Update metrics every interval, rather than once + -cloudwatch-high-resolution + If `-interval` is less than 60 seconds send metrics to CloudWatch as [High-Resolution Metrics](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html#high-resolution-metrics) which incurs additional charges. -newrelic-app-name string New Relic application name for metric events -newrelic-license-key string @@ -153,8 +156,6 @@ The Cloudwatch backend supports the following arguments: * `-cloudwatch-dimensions`: A optional custom dimension in the form of `Key=Value, Key=Value` -If `-interval` is less than 60 seconds the metrics will be sent to CloudWatch as [High-Resolution Metrics](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html#high-resolution-metrics). - The StatsD backend supports the following arguments: * `-statsd-host HOST`: The StatsD host and port (defaults to `127.0.0.1:8125`). diff --git a/backend/cloudwatch.go b/backend/cloudwatch.go index 84293b08..616794a1 100644 --- a/backend/cloudwatch.go +++ b/backend/cloudwatch.go @@ -43,14 +43,16 @@ type CloudWatchBackend struct { region string dimensions []CloudWatchDimension interval int64 + enableHighResolution bool } // NewCloudWatchBackend returns a new CloudWatchBackend with optional dimensions -func NewCloudWatchBackend(region string, dimensions []CloudWatchDimension, interval int64) *CloudWatchBackend { +func NewCloudWatchBackend(region string, dimensions []CloudWatchDimension, interval int64, enableHighResolution bool) *CloudWatchBackend { return &CloudWatchBackend{ region: region, dimensions: dimensions, interval: interval, + enableHighResolution: enableHighResolution, } } @@ -80,7 +82,7 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { } // Add total metrics - metrics = append(metrics, cloudwatchMetrics(r.Totals, nil, cb.interval)...) + metrics = append(metrics, cb.cloudwatchMetrics(r.Totals, nil)...) for name, c := range r.Queues { queueDimensions := dimensions @@ -91,7 +93,7 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { ) // Add per-queue metrics - metrics = append(metrics, cloudwatchMetrics(c, queueDimensions, cb.interval)...) + metrics = append(metrics, cb.cloudwatchMetrics(c, queueDimensions)...) } log.Printf("Extracted %d cloudwatch metrics from results", len(metrics)) @@ -111,10 +113,11 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error { return nil } -func cloudwatchMetrics(counts map[string]int, dimensions []*cloudwatch.Dimension, duration int64) []*cloudwatch.MetricDatum { +func (cb *CloudWatchBackend) cloudwatchMetrics(counts map[string]int, dimensions []*cloudwatch.Dimension) []*cloudwatch.MetricDatum { m := []*cloudwatch.MetricDatum{} - if duration < 60 { + var duration int64 + if cb.interval < 60 && cb.enableHighResolution { // PutMetricData supports either normal (60s) or high frequency (1s) // metrics - other values result in an error. duration = 1 diff --git a/lambda/main.go b/lambda/main.go index bce50c64..5fd46b89 100644 --- a/lambda/main.go +++ b/lambda/main.go @@ -56,7 +56,9 @@ func Handler(ctx context.Context, evt json.RawMessage) (string, error) { queue := os.Getenv("BUILDKITE_QUEUE") clwDimensions := os.Getenv("BUILDKITE_CLOUDWATCH_DIMENSIONS") quietString := os.Getenv("BUILDKITE_QUIET") + enableHighResolutionString := os.Getenv("BUILDKITE_CLOUDWATCH_HIGH_RESOLUTION") quiet := quietString == "1" || quietString == "true" + enableHighResolution := enableHighResolutionString == "1" || enableHighResolutionString == "true" if quiet { log.SetOutput(ioutil.Discard) @@ -118,7 +120,7 @@ func Handler(ctx context.Context, evt json.RawMessage) (string, error) { if err != nil { return "", err } - b = backend.NewCloudWatchBackend(awsRegion, dimensions, int64(time.Since(lastPollTime).Seconds())) + b = backend.NewCloudWatchBackend(awsRegion, dimensions, int64(time.Since(lastPollTime).Seconds()), enableHighResolution) } res, err := c.Collect() diff --git a/main.go b/main.go index 67af3089..15add93d 100644 --- a/main.go +++ b/main.go @@ -35,6 +35,7 @@ func main() { prometheusPath = flag.String("prometheus-path", "/metrics", "Prometheus metrics transport path") clwRegion = flag.String("cloudwatch-region", "", "AWS Region to connect to, defaults to $AWS_REGION or us-east-1") clwDimensions = flag.String("cloudwatch-dimensions", "", "Cloudwatch dimensions to index metrics under, in the form of Key=Value, Other=Value") + clwHighResolution = flag.Bool("cloudwatch-high-resolution", false, "Send metrics at a high-resolution, which incurs extra costs") gcpProjectID = flag.String("stackdriver-projectid", "", "Specify Stackdriver Project ID") nrAppName = flag.String("newrelic-app-name", "", "New Relic application name for metric events") nrLicenseKey = flag.String("newrelic-license-key", "", "New Relic license key for publishing events") @@ -74,7 +75,7 @@ func main() { fmt.Println(err) os.Exit(1) } - bk = backend.NewCloudWatchBackend(region, dimensions, int64(interval.Seconds())) + bk = backend.NewCloudWatchBackend(region, dimensions, int64(interval.Seconds()), *clwHighResolution) case "statsd": bk, err = backend.NewStatsDBackend(*statsdHost, *statsdTags) if err != nil {