Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: scope metrics to active config, add optional per-host metrics #6531

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
3 changes: 2 additions & 1 deletion caddy.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,8 @@ func run(newCfg *Config, start bool) (Context, error) {
if err != nil {
return ctx, err
}

globalMetrics.configSuccess.Set(1)
globalMetrics.configSuccessTime.SetToCurrentTime()
// now that the user's config is running, finish setting up anything else,
// such as remote admin endpoint, config loader, etc.
return ctx, finishSettingUp(ctx, ctx.cfg)
Expand Down
12 changes: 6 additions & 6 deletions caddyconfig/httpcaddyfile/serveroptions.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,13 @@ func unmarshalCaddyfileServerOptions(d *caddyfile.Dispenser) (any, error) {
}

case "metrics":
if d.NextArg() {
return nil, d.ArgErr()
}
if nesting := d.Nesting(); d.NextBlock(nesting) {
return nil, d.ArgErr()
}
serverOpts.Metrics = new(caddyhttp.Metrics)
for nesting := d.Nesting(); d.NextBlock(nesting); {
switch d.Val() {
case "per_host":
serverOpts.Metrics.PerHost = true
}
}

case "trace":
if d.NextArg() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
servers :80 {
metrics {
per_host
}
}
}
:80 {
respond "Hello"
}
----------
{
"apps": {
"http": {
"servers": {
"srv0": {
"listen": [
":80"
],
"routes": [
{
"handle": [
{
"body": "Hello",
"handler": "static_response"
}
]
}
],
"metrics": {
"per_host": true
}
}
}
}
}
}
23 changes: 22 additions & 1 deletion context.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"reflect"

"github.com/caddyserver/certmagic"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"go.uber.org/zap"
"go.uber.org/zap/exp/zapslog"

Expand All @@ -47,6 +49,7 @@ type Context struct {
ancestry []Module
cleanupFuncs []func() // invoked at every config unload
exitFuncs []func(context.Context) // invoked at config unload ONLY IF the process is exiting (EXPERIMENTAL)
metricsRegistry *prometheus.Registry
}

// NewContext provides a new context derived from the given
Expand All @@ -58,7 +61,7 @@ type Context struct {
// modules which are loaded will be properly unloaded.
// See standard library context package's documentation.
func NewContext(ctx Context) (Context, context.CancelFunc) {
newCtx := Context{moduleInstances: make(map[string][]Module), cfg: ctx.cfg}
newCtx := Context{moduleInstances: make(map[string][]Module), cfg: ctx.cfg, metricsRegistry: prometheus.NewPedanticRegistry()}
c, cancel := context.WithCancel(ctx.Context)
wrappedCancel := func() {
cancel()
Expand All @@ -77,8 +80,10 @@ func NewContext(ctx Context) (Context, context.CancelFunc) {
}
}
}
globalMetrics.configSuccess.Set(0)
}
newCtx.Context = c
newCtx.initMetrics()
return newCtx, wrappedCancel
}

Expand All @@ -97,6 +102,22 @@ func (ctx *Context) Filesystems() FileSystems {
return ctx.cfg.filesystems
}

// Returns the active metrics registry for the context
// EXPERIMENTAL: This API is subject to change.
func (ctx *Context) GetMetricsRegistry() *prometheus.Registry {
return ctx.metricsRegistry
}

func (ctx *Context) initMetrics() {
ctx.metricsRegistry.MustRegister(
collectors.NewBuildInfoCollector(),
adminMetrics.requestCount,
adminMetrics.requestErrors,
globalMetrics.configSuccess,
globalMetrics.configSuccessTime,
)
}

// OnExit executes f when the process exits gracefully.
// The function is only executed if the process is gracefully
// shut down while this context is active.
Expand Down
23 changes: 16 additions & 7 deletions metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,33 @@ import (
"net/http"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promauto"

"github.com/caddyserver/caddy/v2/internal/metrics"
)

// define and register the metrics used in this package.
func init() {
prometheus.MustRegister(collectors.NewBuildInfoCollector())

const ns, sub = "caddy", "admin"

adminMetrics.requestCount = promauto.NewCounterVec(prometheus.CounterOpts{
adminMetrics.requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: sub,
Name: "http_requests_total",
Help: "Counter of requests made to the Admin API's HTTP endpoints.",
}, []string{"handler", "path", "code", "method"})
adminMetrics.requestErrors = promauto.NewCounterVec(prometheus.CounterOpts{
adminMetrics.requestErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: sub,
Name: "http_request_errors_total",
Help: "Number of requests resulting in middleware errors.",
}, []string{"handler", "path", "method"})
globalMetrics.configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "caddy_config_last_reload_successful",
Help: "Whether the last configuration reload attempt was successful.",
})
globalMetrics.configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "caddy_config_last_reload_success_timestamp_seconds",
Help: "Timestamp of the last successful configuration reload.",
})
}

// adminMetrics is a collection of metrics that can be tracked for the admin API.
Expand All @@ -36,6 +39,12 @@ var adminMetrics = struct {
requestErrors *prometheus.CounterVec
}{}

// globalMetrics is a collection of metrics that can be tracked for Caddy global state
var globalMetrics = struct {
configSuccess prometheus.Gauge
configSuccessTime prometheus.Gauge
}{}

// Similar to promhttp.InstrumentHandlerCounter, but upper-cases method names
// instead of lower-casing them.
//
Expand Down
4 changes: 4 additions & 0 deletions modules/caddyhttp/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,10 @@ func (app *App) Provision(ctx caddy.Context) error {
// route handler so that important security checks are done, etc.
primaryRoute := emptyHandler
if srv.Routes != nil {
if srv.Metrics != nil {
srv.Metrics.init = sync.Once{}
srv.Metrics.httpMetrics = &httpMetrics{}
}
err := srv.Routes.ProvisionHandlers(ctx, srv.Metrics)
if err != nil {
return fmt.Errorf("server %s: setting up route handlers: %v", srvName, err)
Expand Down
79 changes: 54 additions & 25 deletions modules/caddyhttp/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,43 +10,54 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"

"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/internal/metrics"
)

// Metrics configures metrics observations.
// EXPERIMENTAL and subject to change or removal.
type Metrics struct{}
type Metrics struct {
// Enable per-host metrics. Enabling this option may
// incur high-memory consumption, depending on the number of hosts
// managed by Caddy.
PerHost bool `json:"per_host,omitempty"`

init sync.Once
configSuccess prometheus.Gauge `json:"-"` // TODO:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// TODO: what's to do?

configSuccessTime prometheus.Gauge `json:"-"`
httpMetrics *httpMetrics `json:"-"`
}

var httpMetrics = struct {
init sync.Once
type httpMetrics struct {
requestInFlight *prometheus.GaugeVec
requestCount *prometheus.CounterVec
requestErrors *prometheus.CounterVec
requestDuration *prometheus.HistogramVec
requestSize *prometheus.HistogramVec
responseSize *prometheus.HistogramVec
responseDuration *prometheus.HistogramVec
}{
init: sync.Once{},
}

func initHTTPMetrics() {
func initHTTPMetrics(ctx caddy.Context, metrics *Metrics) {
const ns, sub = "caddy", "http"

registry := ctx.GetMetricsRegistry()
basicLabels := []string{"server", "handler"}
httpMetrics.requestInFlight = promauto.NewGaugeVec(prometheus.GaugeOpts{
if metrics.PerHost {
basicLabels = append(basicLabels, "host")
}
metrics.httpMetrics.requestInFlight = promauto.With(registry).NewGaugeVec(prometheus.GaugeOpts{
Namespace: ns,
Subsystem: sub,
Name: "requests_in_flight",
Help: "Number of requests currently handled by this server.",
}, basicLabels)
httpMetrics.requestErrors = promauto.NewCounterVec(prometheus.CounterOpts{
metrics.httpMetrics.requestErrors = promauto.With(registry).NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: sub,
Name: "request_errors_total",
Help: "Number of requests resulting in middleware errors.",
}, basicLabels)
httpMetrics.requestCount = promauto.NewCounterVec(prometheus.CounterOpts{
metrics.httpMetrics.requestCount = promauto.With(registry).NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: sub,
Name: "requests_total",
Expand All @@ -58,34 +69,46 @@ func initHTTPMetrics() {
sizeBuckets := prometheus.ExponentialBuckets(256, 4, 8)

httpLabels := []string{"server", "handler", "code", "method"}
httpMetrics.requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
if metrics.PerHost {
httpLabels = append(httpLabels, "host")
}
metrics.httpMetrics.requestDuration = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: sub,
Name: "request_duration_seconds",
Help: "Histogram of round-trip request durations.",
Buckets: durationBuckets,
}, httpLabels)
httpMetrics.requestSize = promauto.NewHistogramVec(prometheus.HistogramOpts{
metrics.httpMetrics.requestSize = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: sub,
Name: "request_size_bytes",
Help: "Total size of the request. Includes body",
Buckets: sizeBuckets,
}, httpLabels)
httpMetrics.responseSize = promauto.NewHistogramVec(prometheus.HistogramOpts{
metrics.httpMetrics.responseSize = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: sub,
Name: "response_size_bytes",
Help: "Size of the returned response.",
Buckets: sizeBuckets,
}, httpLabels)
httpMetrics.responseDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
metrics.httpMetrics.responseDuration = promauto.With(registry).NewHistogramVec(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: sub,
Name: "response_duration_seconds",
Help: "Histogram of times to first byte in response bodies.",
Buckets: durationBuckets,
}, httpLabels)

metrics.configSuccess = promauto.With(registry).NewGauge(prometheus.GaugeOpts{
Name: "prometheus_config_last_reload_successful",
Help: "Whether the last configuration reload attempt was successful.",
})
metrics.configSuccessTime = promauto.With(registry).NewGauge(prometheus.GaugeOpts{
Name: "prometheus_config_last_reload_success_timestamp_seconds",
Help: "Timestamp of the last successful configuration reload.",
})
}

// serverNameFromContext extracts the current server name from the context.
Expand All @@ -101,14 +124,15 @@ func serverNameFromContext(ctx context.Context) string {
type metricsInstrumentedHandler struct {
handler string
mh MiddlewareHandler
metrics *Metrics
}

func newMetricsInstrumentedHandler(handler string, mh MiddlewareHandler) *metricsInstrumentedHandler {
httpMetrics.init.Do(func() {
initHTTPMetrics()
func newMetricsInstrumentedHandler(ctx caddy.Context, handler string, mh MiddlewareHandler, metrics *Metrics) *metricsInstrumentedHandler {
metrics.init.Do(func() {
initHTTPMetrics(ctx, metrics)
})

return &metricsInstrumentedHandler{handler, mh}
return &metricsInstrumentedHandler{handler, mh, metrics}
}

func (h *metricsInstrumentedHandler) ServeHTTP(w http.ResponseWriter, r *http.Request, next Handler) error {
Expand All @@ -119,7 +143,12 @@ func (h *metricsInstrumentedHandler) ServeHTTP(w http.ResponseWriter, r *http.Re
// of a panic
statusLabels := prometheus.Labels{"server": server, "handler": h.handler, "method": method, "code": ""}

inFlight := httpMetrics.requestInFlight.With(labels)
if h.metrics.PerHost {
labels["host"] = r.Host
statusLabels["host"] = r.Host
}

inFlight := h.metrics.httpMetrics.requestInFlight.With(labels)
inFlight.Inc()
defer inFlight.Dec()

Expand All @@ -131,13 +160,13 @@ func (h *metricsInstrumentedHandler) ServeHTTP(w http.ResponseWriter, r *http.Re
writeHeaderRecorder := ShouldBufferFunc(func(status int, header http.Header) bool {
statusLabels["code"] = metrics.SanitizeCode(status)
ttfb := time.Since(start).Seconds()
httpMetrics.responseDuration.With(statusLabels).Observe(ttfb)
h.metrics.httpMetrics.responseDuration.With(statusLabels).Observe(ttfb)
return false
})
wrec := NewResponseRecorder(w, nil, writeHeaderRecorder)
err := h.mh.ServeHTTP(wrec, r, next)
dur := time.Since(start).Seconds()
httpMetrics.requestCount.With(labels).Inc()
h.metrics.httpMetrics.requestCount.With(labels).Inc()

observeRequest := func(status int) {
// If the code hasn't been set yet, and we didn't encounter an error, we're
Expand All @@ -148,9 +177,9 @@ func (h *metricsInstrumentedHandler) ServeHTTP(w http.ResponseWriter, r *http.Re
statusLabels["code"] = metrics.SanitizeCode(status)
}

httpMetrics.requestDuration.With(statusLabels).Observe(dur)
httpMetrics.requestSize.With(statusLabels).Observe(float64(computeApproximateRequestSize(r)))
httpMetrics.responseSize.With(statusLabels).Observe(float64(wrec.Size()))
h.metrics.httpMetrics.requestDuration.With(statusLabels).Observe(dur)
h.metrics.httpMetrics.requestSize.With(statusLabels).Observe(float64(computeApproximateRequestSize(r)))
h.metrics.httpMetrics.responseSize.With(statusLabels).Observe(float64(wrec.Size()))
}

if err != nil {
Expand All @@ -159,7 +188,7 @@ func (h *metricsInstrumentedHandler) ServeHTTP(w http.ResponseWriter, r *http.Re
observeRequest(handlerErr.StatusCode)
}

httpMetrics.requestErrors.With(labels).Inc()
h.metrics.httpMetrics.requestErrors.With(labels).Inc()

return err
}
Expand Down
Loading
Loading