Skip to content

Commit

Permalink
status: Remove activeRevision label on all but one metric (open-polic…
Browse files Browse the repository at this point in the history
…y-agent#4600)

Having one activeRevision label on each of the prometheus metrics emitted
by the status plugin has proven to be problematic with a large number of
bundles. So with this change,

1. we keep the activeRevision label (just on) the last_success_bundle_activation metric.
2. the gauge gets reset, so we only keep the last active_revision (instead of keeping
   them all and therefore avoiding the situation where the /metrics output grows indefinitely)

Fixes open-policy-agent#4584.

Signed-off-by: cmuraru <cmuraru@adobe.com>
  • Loading branch information
costimuraru authored and rokkiter committed Apr 26, 2022
1 parent d609b26 commit 18205d8
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 16 deletions.
6 changes: 3 additions & 3 deletions docs/content/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ When Prometheus is enabled in the status plugin (see [Configuration](../configur
| bundle_loaded_counter | counter | Number of bundles loaded with success. | EXPERIMENTAL |
| bundle_failed_load_counter | counter | Number of bundles that failed to load. | EXPERIMENTAL |
| last_bundle_request | gauge | Last bundle request in UNIX nanoseconds. | EXPERIMENTAL |
| last_success_bundle_activation | gauge | Last successfully bundle activation in UNIX nanoseconds. | EXPERIMENTAL |
| last_success_bundle_download | gauge | Last successfully bundle download in UNIX nanoseconds. | EXPERIMENTAL |
| last_success_bundle_request | gauge | Last successfully bundle request in UNIX nanoseconds. | EXPERIMENTAL |
| last_success_bundle_activation | gauge | Last successful bundle activation in UNIX nanoseconds. | EXPERIMENTAL |
| last_success_bundle_download | gauge | Last successful bundle download in UNIX nanoseconds. | EXPERIMENTAL |
| last_success_bundle_request | gauge | Last successful bundle request in UNIX nanoseconds. | EXPERIMENTAL |
| bundle_loading_duration_ns | histogram | A histogram of duration for bundle loading. | EXPERIMENTAL |


Expand Down
2 changes: 1 addition & 1 deletion plugins/bundle/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ func (p *Plugin) checkPluginReadiness() {
}

func (p *Plugin) activate(ctx context.Context, name string, b *bundle.Bundle) error {
p.log(name).Debug("Bundle activation in progress. Opening storage transaction.")
p.log(name).Debug("Bundle activation in progress (%v). Opening storage transaction.", b.Manifest.Revision)

params := storage.WriteParams
params.Context = storage.NewContext().WithMetrics(p.status[name].Metrics)
Expand Down
12 changes: 6 additions & 6 deletions plugins/status/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@ var (
prometheus.CounterOpts{
Name: "bundle_loaded_counter",
Help: "Counter for the bundle loaded."},
[]string{"name", "active_revision"},
[]string{"name"},
)
failLoad = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "bundle_failed_load_counter",
Help: "Counter for the failed bundle load."},
[]string{"name", "active_revision", "code", "message"},
[]string{"name", "code", "message"},
)
lastRequest = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "last_bundle_request",
Help: "Gauge for the last bundle request."},
[]string{"name", "active_revision"},
[]string{"name"},
)
lastSuccessfulActivation = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand All @@ -39,17 +39,17 @@ var (
prometheus.GaugeOpts{
Name: "last_success_bundle_download",
Help: "Gauge for the last success bundle download."},
[]string{"name", "active_revision"},
[]string{"name"},
)
lastSuccessfulRequest = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "last_success_bundle_request",
Help: "Gauge for the last success bundle request."},
[]string{"name", "active_revision"},
[]string{"name"},
)
bundleLoadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "bundle_loading_duration_ns",
Help: "Histogram for the bundle loading duration by stage.",
Buckets: prometheus.ExponentialBuckets(1000, 2, 20),
}, []string{"name", "active_revision", "stage"})
}, []string{"name", "stage"})
)
13 changes: 7 additions & 6 deletions plugins/status/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,21 +480,22 @@ func updatePrometheusMetrics(u *UpdateRequestV1) {
for name, plugin := range u.Plugins {
pluginStatus.WithLabelValues(name, string(plugin.State)).Set(1)
}
lastSuccessfulActivation.Reset()
for _, bundle := range u.Bundles {
if bundle.Code == "" && bundle.ActiveRevision != "" {
loaded.WithLabelValues(bundle.Name, bundle.ActiveRevision).Inc()
loaded.WithLabelValues(bundle.Name).Inc()
} else {
failLoad.WithLabelValues(bundle.Name, bundle.ActiveRevision, bundle.Code, bundle.Message).Inc()
failLoad.WithLabelValues(bundle.Name, bundle.Code, bundle.Message).Inc()
}
lastSuccessfulActivation.WithLabelValues(bundle.Name, bundle.ActiveRevision).Set(float64(bundle.LastSuccessfulActivation.UnixNano()))
lastSuccessfulDownload.WithLabelValues(bundle.Name, bundle.ActiveRevision).Set(float64(bundle.LastSuccessfulDownload.UnixNano()))
lastSuccessfulRequest.WithLabelValues(bundle.Name, bundle.ActiveRevision).Set(float64(bundle.LastSuccessfulRequest.UnixNano()))
lastRequest.WithLabelValues(bundle.Name, bundle.ActiveRevision).Set(float64(bundle.LastRequest.UnixNano()))
lastSuccessfulDownload.WithLabelValues(bundle.Name).Set(float64(bundle.LastSuccessfulDownload.UnixNano()))
lastSuccessfulRequest.WithLabelValues(bundle.Name).Set(float64(bundle.LastSuccessfulRequest.UnixNano()))
lastRequest.WithLabelValues(bundle.Name).Set(float64(bundle.LastRequest.UnixNano()))
if bundle.Metrics != nil {
for stage, metric := range bundle.Metrics.All() {
switch stage {
case "timer_bundle_request_ns", "timer_rego_data_parse_ns", "timer_rego_module_parse_ns", "timer_rego_module_compile_ns", "timer_rego_load_bundles_ns":
bundleLoadDuration.WithLabelValues(bundle.Name, bundle.ActiveRevision, stage).Observe(float64(metric.(int64)))
bundleLoadDuration.WithLabelValues(bundle.Name, stage).Observe(float64(metric.(int64)))
}
}
}
Expand Down

0 comments on commit 18205d8

Please sign in to comment.