Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Specify storage resolution #125

Merged
merged 6 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ It requires a `provided.al2` environment and respects the following env vars:
- `BUILDKITE_CLOUDWATCH_DIMENSIONS` : A comma separated list in the form of
`Key=Value,Other=Value` containing the Cloudwatch dimensions to index metrics
under.
- `BUILDKITE_CLOUDWATCH_HIGH_RESOLUTION` : Whether to enable [High-Resolution Metrics](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html#high-resolution-metrics) which incurs additional charges.

To adjust timeouts, and connection pooling in the HTTP client use the following env vars:

Expand Down Expand Up @@ -160,7 +161,9 @@ Usage of buildkite-agent-metrics:
-endpoint string
A custom Buildkite Agent API endpoint (default "https://agent.buildkite.com/v3")
-interval duration
Update metrics every interval, rather than once
Update metrics every interval, rather than once
-cloudwatch-high-resolution
If `-interval` is less than 60 seconds send metrics to CloudWatch as [High-Resolution Metrics](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html#high-resolution-metrics) which incurs additional charges.
-max-idle-conns int
Maximum number of idle (keep-alive) HTTP connections for Buildkite Agent API. Zero means no limit, -1 disables connection reuse. (default 100)
-newrelic-app-name string
Expand Down
30 changes: 22 additions & 8 deletions backend/cloudwatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,17 @@ func ParseCloudWatchDimensions(ds string) ([]CloudWatchDimension, error) {
type CloudWatchBackend struct {
region string
dimensions []CloudWatchDimension
interval int64
enableHighResolution bool
}

// NewCloudWatchBackend returns a new CloudWatchBackend with optional dimensions
func NewCloudWatchBackend(region string, dimensions []CloudWatchDimension) *CloudWatchBackend {
func NewCloudWatchBackend(region string, dimensions []CloudWatchDimension, interval int64, enableHighResolution bool) *CloudWatchBackend {
return &CloudWatchBackend{
region: region,
dimensions: dimensions,
interval: interval,
enableHighResolution: enableHighResolution,
}
}

Expand Down Expand Up @@ -89,7 +93,7 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error {
}

// Add total metrics
metrics = append(metrics, cloudwatchMetrics(r.Totals, nil)...)
metrics = append(metrics, cb.cloudwatchMetrics(r.Totals, nil)...)

for name, c := range r.Queues {
queueDimensions := dimensions
Expand All @@ -100,7 +104,7 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error {
)

// Add per-queue metrics
metrics = append(metrics, cloudwatchMetrics(c, queueDimensions)...)
metrics = append(metrics, cb.cloudwatchMetrics(c, queueDimensions)...)
}

log.Printf("Extracted %d cloudwatch metrics from results", len(metrics))
Expand All @@ -120,15 +124,25 @@ func (cb *CloudWatchBackend) Collect(r *collector.Result) error {
return nil
}

func cloudwatchMetrics(counts map[string]int, dimensions []*cloudwatch.Dimension) []*cloudwatch.MetricDatum {
func (cb *CloudWatchBackend) cloudwatchMetrics(counts map[string]int, dimensions []*cloudwatch.Dimension) []*cloudwatch.MetricDatum {
m := []*cloudwatch.MetricDatum{}

var duration int64
if cb.interval < 60 && cb.enableHighResolution {
// PutMetricData supports either normal (60s) or high frequency (1s)
// metrics - other values result in an error.
duration = 1
} else {
duration = 60
}

for k, v := range counts {
m = append(m, &cloudwatch.MetricDatum{
MetricName: aws.String(k),
Dimensions: dimensions,
Value: aws.Float64(float64(v)),
Unit: aws.String("Count"),
MetricName: aws.String(k),
Dimensions: dimensions,
Value: aws.Float64(float64(v)),
Unit: aws.String("Count"),
StorageResolution: &duration,
})
}

Expand Down
6 changes: 5 additions & 1 deletion lambda/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const (

var (
nextPollTime time.Time
lastPollTime time.Time
)

func main() {
Expand All @@ -55,6 +56,8 @@ func Handler(ctx context.Context, evt json.RawMessage) (string, error) {
clwDimensions := os.Getenv("BUILDKITE_CLOUDWATCH_DIMENSIONS")
quietString := os.Getenv("BUILDKITE_QUIET")
quiet := quietString == "1" || quietString == "true"
enableHighResolutionString := os.Getenv("BUILDKITE_CLOUDWATCH_HIGH_RESOLUTION")
enableHighResolution := enableHighResolutionString == "1" || enableHighResolutionString == "true"
timeout := os.Getenv("BUILDKITE_AGENT_METRICS_TIMEOUT")
maxIdleConns := os.Getenv("BUILDKITE_AGENT_METRICS_MAX_IDLE_CONNS")

Expand Down Expand Up @@ -146,7 +149,7 @@ func Handler(ctx context.Context, evt json.RawMessage) (string, error) {
if err != nil {
return "", err
}
metricsBackend = backend.NewCloudWatchBackend(awsRegion, dimensions)
metricsBackend = backend.NewCloudWatchBackend(awsRegion, dimensions, int64(time.Since(lastPollTime).Seconds()), enableHighResolution)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first time this runs lastPollTime will be 0 so this will be epoch seconds (i.e. 1720764897). But since we only care if it's less than or more than 60 seconds then that's inconsequential. The first metric we send within this lambda context won't be high frequency, but that should be OK

}

// minimum res.PollDuration across collectors
Expand Down Expand Up @@ -177,6 +180,7 @@ func Handler(ctx context.Context, evt json.RawMessage) (string, error) {
}
}

lastPollTime = time.Now()
log.Printf("Finished in %s", time.Since(startTime))

// Store the next acceptable poll time in global state
Expand Down
4 changes: 2 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func main() {
prometheusPath = flag.String("prometheus-path", "/metrics", "Prometheus metrics transport path")
clwRegion = flag.String("cloudwatch-region", "", "AWS Region to connect to, defaults to $AWS_REGION or us-east-1")
clwDimensions = flag.String("cloudwatch-dimensions", "", "Cloudwatch dimensions to index metrics under, in the form of Key=Value, Other=Value")
clwHighResolution = flag.Bool("cloudwatch-high-resolution", false, "Send metrics at a high-resolution, which incurs extra costs")
gcpProjectID = flag.String("stackdriver-projectid", "", "Specify Stackdriver Project ID")
nrAppName = flag.String("newrelic-app-name", "", "New Relic application name for metric events")
nrLicenseKey = flag.String("newrelic-license-key", "", "New Relic license key for publishing events")
Expand Down Expand Up @@ -86,8 +87,7 @@ func main() {
fmt.Println(err)
os.Exit(1)
}
metricsBackend = backend.NewCloudWatchBackend(region, dimensions)

metricsBackend = backend.NewCloudWatchBackend(region, dimensions, int64(interval.Seconds()), *clwHighResolution)
case "statsd":
metricsBackend, err = backend.NewStatsDBackend(*statsdHost, *statsdTags)
if err != nil {
Expand Down