Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: Customize status bundle_loading_duration_ns #7156

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/content/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,7 @@ included in the actual bundle gzipped tarball.
| `status.partition_name` | `string` | No | Path segment to include in status updates. |
| `status.console` | `boolean` | No (default: `false`) | Log the status updates locally to the console. When enabled alongside a remote status update API the `service` must be configured, the default `service` selection will be disabled. |
| `status.prometheus` | `boolean` | No (default: `false`) | Export the status (bundle and plugin) metrics to prometheus (see [the monitoring documentation](../monitoring/#prometheus)). When enabled alongside a remote status update API the `service` must be configured, the default `service` selection will be disabled. |
| `status.prometheus_config.collectors.bundle_loading_duration_ns.buckets` | `[]float64` | No, (Only use when status.prometheus true, default: [1000, 2000, 4000, 8000, 16_000, 32_000, 64_000, 128_000, 256_000, 512_000, 1_024_000, 2_048_000, 4_096_000, 8_192_000, 16_384_000, 32_768_000, 65_536_000, 131_072_000, 262_144_000, 524_288_000]) | Specifies the buckets for the `bundle_loading_duration_ns` metric. Each value is a float, it is expressed in nanoseconds. |
| `status.plugin` | `string` | No | Use the named plugin for status updates. If this field exists, the other configuration fields are not required. |
| `status.trigger` | `string` (default: `periodic`) | No | Controls how status updates are reported to the remote server. Allowed values are `periodic` and `manual` (`manual` triggers are only possible when using OPA as a Go package). |

Expand Down
4 changes: 4 additions & 0 deletions docs/content/management-status.md
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,10 @@ Example of minimal config to enable:
```yaml
status:
prometheus: true
prometheus_config:
jwu730-1 marked this conversation as resolved.
Show resolved Hide resolved
collectors:
bundle_loading_duration_ns:
buckets: [1, 1000, 10_000, 1e8]
```

When enabled the OPA instance's Prometheus endpoint exposes the metrics described on [the monitoring documentation](../monitoring/#status-metrics).
Expand Down
160 changes: 126 additions & 34 deletions plugins/status/metrics.go
Original file line number Diff line number Diff line change
@@ -1,82 +1,174 @@
package status

import (
"github.com/open-policy-agent/opa/logging"
"github.com/open-policy-agent/opa/version"
"github.com/prometheus/client_golang/prometheus"
)

var (
opaInfo = prometheus.NewGauge(
var defaultBundleLoadStageBuckets = prometheus.ExponentialBuckets(1000, 2, 20)

type PrometheusConfig struct {
Collectors *Collectors `json:"collectors,omitempty"`
}

type Collectors struct {
BundleLoadDurationNanoseconds *BundleLoadDurationNanoseconds `json:"bundle_loading_duration_ns,omitempty"`
}

func injectDefaultDurationBuckets(p *PrometheusConfig) *PrometheusConfig {
if p != nil && p.Collectors != nil && p.Collectors.BundleLoadDurationNanoseconds != nil && p.Collectors.BundleLoadDurationNanoseconds.Buckets != nil {
return p
}

return &PrometheusConfig{
Collectors: &Collectors{
BundleLoadDurationNanoseconds: &BundleLoadDurationNanoseconds{
Buckets: defaultBundleLoadStageBuckets,
},
},
}
}

// collectors is a list of all collectors maintained by the status plugin.
// Note: when adding a new collector, make sure to also add it to this list,
// or it won't survive status plugin reconfigure events.
type collectors struct {
opaInfo prometheus.Gauge
pluginStatus *prometheus.GaugeVec
loaded *prometheus.CounterVec
failLoad *prometheus.CounterVec
lastRequest *prometheus.GaugeVec
lastSuccessfulActivation *prometheus.GaugeVec
lastSuccessfulDownload *prometheus.GaugeVec
lastSuccessfulRequest *prometheus.GaugeVec
bundleLoadDuration *prometheus.HistogramVec
}

func newCollectors(prometheusConfig *PrometheusConfig) *collectors {
opaInfo := prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "opa_info",
Help: "Information about the OPA environment.",
ConstLabels: map[string]string{"version": version.Version},
},
)
pluginStatus = prometheus.NewGaugeVec(
opaInfo.Set(1) // only publish once

pluginStatus := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "plugin_status_gauge",
Help: "Gauge for the plugin by status."},
Help: "Gauge for the plugin by status.",
},
[]string{"name", "status"},
)
loaded = prometheus.NewCounterVec(
loaded := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "bundle_loaded_counter",
Help: "Counter for the bundle loaded."},
Help: "Counter for the bundle loaded.",
},
[]string{"name"},
)
failLoad = prometheus.NewCounterVec(
failLoad := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "bundle_failed_load_counter",
Help: "Counter for the failed bundle load."},
Help: "Counter for the failed bundle load.",
},
[]string{"name", "code", "message"},
)
lastRequest = prometheus.NewGaugeVec(
lastRequest := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "last_bundle_request",
Help: "Gauge for the last bundle request."},
Help: "Gauge for the last bundle request.",
},
[]string{"name"},
)
lastSuccessfulActivation = prometheus.NewGaugeVec(
lastSuccessfulActivation := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "last_success_bundle_activation",
Help: "Gauge for the last success bundle activation."},
Help: "Gauge for the last success bundle activation.",
},
[]string{"name", "active_revision"},
)
lastSuccessfulDownload = prometheus.NewGaugeVec(
lastSuccessfulDownload := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "last_success_bundle_download",
Help: "Gauge for the last success bundle download."},
Help: "Gauge for the last success bundle download.",
},
[]string{"name"},
)
lastSuccessfulRequest = prometheus.NewGaugeVec(
lastSuccessfulRequest := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "last_success_bundle_request",
Help: "Gauge for the last success bundle request."},
Help: "Gauge for the last success bundle request.",
},
[]string{"name"},
)
bundleLoadDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{

bundleLoadDuration := newBundleLoadDurationCollector(prometheusConfig)

return &collectors{
opaInfo: opaInfo,
pluginStatus: pluginStatus,
loaded: loaded,
failLoad: failLoad,
lastRequest: lastRequest,
lastSuccessfulActivation: lastSuccessfulActivation,
lastSuccessfulDownload: lastSuccessfulDownload,
lastSuccessfulRequest: lastSuccessfulRequest,
bundleLoadDuration: bundleLoadDuration,
}
}

func newBundleLoadDurationCollector(prometheusConfig *PrometheusConfig) *prometheus.HistogramVec {
return prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "bundle_loading_duration_ns",
Help: "Histogram for the bundle loading duration by stage.",
Buckets: prometheus.ExponentialBuckets(1000, 2, 20),
Buckets: prometheusConfig.Collectors.BundleLoadDurationNanoseconds.Buckets,
}, []string{"name", "stage"})
}

// allCollectors is a list of all collectors maintained by the status plugin.
// Note: when adding a new collector, make sure to also add it to this list,
// or it won't survive status plugin reconfigure events.
allCollectors = []prometheus.Collector{
opaInfo,
pluginStatus,
loaded,
failLoad,
lastRequest,
lastSuccessfulActivation,
lastSuccessfulDownload,
lastSuccessfulRequest,
bundleLoadDuration,
func (c *collectors) RegisterAll(register prometheus.Registerer, logger logging.Logger) {
if register == nil {
return
}
)
for _, collector := range c.toList() {
if err := register.Register(collector); err != nil {
logger.Error("Status metric failed to register on prometheus :%v.", err)
}
}
}

func init() {
opaInfo.Set(1)
func (c *collectors) UnregisterAll(register prometheus.Registerer) {
if register == nil {
return
}

for _, collector := range c.toList() {
register.Unregister(collector)
}
}

func (c *collectors) ReregisterBundleLoadDuration(register prometheus.Registerer, config *PrometheusConfig, logger logging.Logger) {
logger.Debug("Re-register bundleLoadDuration collector")
register.Unregister(c.bundleLoadDuration)
c.bundleLoadDuration = newBundleLoadDurationCollector(config)
if err := register.Register(c.bundleLoadDuration); err != nil {
logger.Error("Status metric failed to register bundleLoadDuration collector on prometheus :%v.", err)
}
}

// helper function
func (c *collectors) toList() []prometheus.Collector {
return []prometheus.Collector{
c.opaInfo,
c.pluginStatus,
c.loaded,
c.failLoad,
c.lastRequest,
c.lastSuccessfulActivation,
c.lastSuccessfulDownload,
c.lastSuccessfulRequest,
c.bundleLoadDuration,
}
}
Loading