From 9e264a442dd1e6f131be9a6390c815cc8045211c Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Fri, 22 Nov 2024 21:35:01 +0100 Subject: [PATCH] feat(acme): improved logging and http metrics rerequisite for productization and monitoring this is necessary minimum to gain some understanding about how long requests take, how many are in flight /v1/health endpoint added for monitoring by load-balancer --- README.md | 24 ++++++++++++++++++++++++ acme/metrics.go | 6 ------ acme/writer.go | 40 +++++++++++++++++++++++++++++----------- docs/METRICS.md | 8 +++++++- e2e_test.go | 4 ++-- go.mod | 5 +++-- go.sum | 10 ++++++---- 7 files changed, 71 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index fe44713..5630616 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,25 @@ This is the backend of [`AutoTLS` feature introduced in Kubo 0.32.0-rc1](https://github.com/ipfs/kubo/blob/master/docs/config.md#autotls). It is deployed at `libp2p.direct` and maintained by [Interplanetary Shipyard](https://github.com/ipshipyard). +- [High-level Design](#high-level-design) + - [Peer Authentication and DNS-01 Challenge and Certificate Issuance](#peer-authentication-and-dns-01-challenge-and-certificate-issuance) + - [DNS Resolution and TLS Connection](#dns-resolution-and-tls-connection) +- [Build](#build) +- [Install](#install) + - [From source](#from-source) +- [Usage](#usage) + - [Local testing](#local-testing) + - [Docker](#docker) + - [Configuration](#configuration) + - [ipparser Syntax](#ipparser-syntax) + - [acme Syntax](#acme-syntax) + - [Example](#example) + - [Handled DNS records](#handled-dns-records) + - [IPv4 subdomain handling](#ipv4-subdomain-handling) + - [IPv6 subdomain handling](#ipv6-subdomain-handling) + - [Submitting Challenge Records](#submitting-challenge-records) + - [Health Check](#health-check) + ## High-level Design The following diagrams show the high-level design of how p2p-forge works. @@ -211,3 +230,8 @@ curl -X POST "https://registration.libp2p.direct/v1/_acme-challenge" \ ``` Where the bearer token is derived via the [libp2p HTTP PeerID Auth Specification](https://github.com/libp2p/specs/blob/master/http/peer-id-auth.md). + +### Health Check + +`/v1/health` will always respond with HTTP 204 + diff --git a/acme/metrics.go b/acme/metrics.go index e693358..b80d13f 100644 --- a/acme/metrics.go +++ b/acme/metrics.go @@ -10,12 +10,6 @@ import ( ) var ( - registrationRequestCount = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: plugin.Namespace, - Subsystem: "forge_" + pluginName, - Name: "registrations_total", - Help: "Counter of ACME DNS-01 broker registration requests made to this p2p-forge instance.", - }, []string{"status"}) dns01ResponseCount = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: plugin.Namespace, Subsystem: "forge_" + pluginName, diff --git a/acme/writer.go b/acme/writer.go index ac1dd42..6452582 100644 --- a/acme/writer.go +++ b/acme/writer.go @@ -12,7 +12,6 @@ import ( "net" "net/http" "os" - "strconv" "strings" "time" @@ -20,6 +19,11 @@ import ( "github.com/coredns/coredns/plugin/pkg/reuseport" "github.com/felixge/httpsnoop" "github.com/ipshipyard/p2p-forge/client" + "github.com/prometheus/client_golang/prometheus" + + metrics "github.com/slok/go-http-metrics/metrics/prometheus" + "github.com/slok/go-http-metrics/middleware" + "github.com/slok/go-http-metrics/middleware/std" "github.com/caddyserver/certmagic" @@ -34,6 +38,7 @@ import ( var log = clog.NewWithPlugin(pluginName) const registrationApiPath = "/v1/_acme-challenge" +const healthcheckApiPath = "/v1/health" // acmeWriter implements writing of ACME Challenge DNS records by exporting an HTTP endpoint. type acmeWriter struct { @@ -88,8 +93,6 @@ func (c *acmeWriter) OnStartup() error { } c.ln = ln - - mux := http.NewServeMux() c.nlSetup = true // server side secret key and peerID not particularly relevant, so we can generate new ones as needed @@ -171,11 +174,24 @@ func (c *acmeWriter) OnStartup() error { } } + // middleware with prometheus recorder + httpMetricsMiddleware := middleware.New(middleware.Config{ + Recorder: metrics.NewRecorder(metrics.Config{ + Registry: prometheus.DefaultRegisterer.(*prometheus.Registry), + Prefix: "coredns_forge_" + pluginName, + DurationBuckets: []float64{0.1, 0.5, 1, 2, 5, 8, 10, 20, 30}, // TODO: remove this comment if we are ok with these buckets + }), + DisableMeasureSize: true, // not meaningful for the registration api + }) + // register handlers - mux.Handle(registrationApiPath, authPeer) + mux := http.NewServeMux() + mux.Handle(registrationApiPath, std.Handler(registrationApiPath, httpMetricsMiddleware, authPeer)) + mux.HandleFunc(healthcheckApiPath, func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNoContent) + }) - // wrap handler in metrics meter - c.handler = withRequestMetrics(mux) + c.handler = withRequestLogger(mux) go func() { log.Infof("Registration HTTP API (%s) listener at %s", registrationApiPath, c.ln.Addr().String()) @@ -185,12 +201,14 @@ func (c *acmeWriter) OnStartup() error { return nil } -func withRequestMetrics(next http.Handler) http.Handler { +func withRequestLogger(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - m := httpsnoop.CaptureMetrics(next, w, r) - registrationRequestCount.WithLabelValues(strconv.Itoa(m.Code)).Add(1) - // TODO: decide if we keep below logger - log.Infof("%s %s (status=%d dt=%s ua=%q)", r.Method, r.URL, m.Code, m.Duration, r.UserAgent()) + // we skip healthcheck endpoint because its spammed + if !strings.HasPrefix(r.URL.Path, healthcheckApiPath) { + // TODO: decide if we want to keep this logger enabled by default, or move it to debug + m := httpsnoop.CaptureMetrics(next, w, r) + log.Infof("%s %s (status=%d dt=%s ua=%q)", r.Method, r.URL, m.Code, m.Duration, r.UserAgent()) + } }) } diff --git a/docs/METRICS.md b/docs/METRICS.md index ed34108..9a86963 100644 --- a/docs/METRICS.md +++ b/docs/METRICS.md @@ -6,9 +6,15 @@ It includes default [Prometheus Glient metrics](https://prometheus.io/docs/guide ### Forge metrics +#### `ipparser` plugin (DNS A/AAAA) + - `coredns_forge_ipparser_requests_total{type}` - dynamic DNS `A`/`AAAA` responses generated by `ipparser` plugin for `ip.peerid.domain` and `peerid.domain`, including `NODATA-*` ones. + +#### `acme` plugin (HTTP/ACME registration) + +- `coredns_forge_acme_http_request_duration_seconds{code, handler}` - Histogram with the status, count and latency of the HTTP requests sent to path specified in `handler` (like `/v1/_acme-challenge`). +- `coredns_forge_acme_http_requests_inflight{handler}` - The number of inflight requests being handled by `handler`. - `coredns_forge_acme_dns01_responses_total{type}` - DNS `TXT` responses generated by `acme` plugin for DNS-01 challenge at `_acme-challenge.peerid.domain`, including `NODATA-*` ones. -- `coredns_forge_acme_registrations_total{status}` - registration API responses generated by `acme` plugin when a client attempts to register DNS-01 challenge for their PeerID at `_acme-challenge.peerid.domain`. - `coredns_forge_acme_libp2p_probe_total{result, agent}` - libp2p probe results `acme` plugin when testing connectivity before accepting DNS-01 challenge for a PeerID. `status` is either `ok` or `error` and `agent` value is limited to well known agents, `other` and `unknown`. ### CoreDNS metrics diff --git a/e2e_test.go b/e2e_test.go index ede2884..2d55db4 100644 --- a/e2e_test.go +++ b/e2e_test.go @@ -371,7 +371,7 @@ func TestIPv4Lookup(t *testing.T) { return } - expectedAnswer := fmt.Sprintf(`%s 3600 IN A %s`, m.Question[0].Name, tt.expectedAddress) + expectedAnswer := fmt.Sprintf(`%s 86400 IN A %s`, m.Question[0].Name, tt.expectedAddress) if r.Answer[0].String() != expectedAnswer { t.Fatalf("Expected %s reply, got %s", expectedAnswer, r.Answer[0].String()) } @@ -487,7 +487,7 @@ func TestIPv6Lookup(t *testing.T) { return } - expectedAnswer := fmt.Sprintf(`%s 3600 IN AAAA %s`, m.Question[0].Name, tt.expectedAddress) + expectedAnswer := fmt.Sprintf(`%s 86400 IN AAAA %s`, m.Question[0].Name, tt.expectedAddress) if r.Answer[0].String() != expectedAnswer { t.Fatalf("Expected %s reply, got %s", expectedAnswer, r.Answer[0].String()) } diff --git a/go.mod b/go.mod index 996f770..23a3126 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( github.com/multiformats/go-multiaddr-dns v0.4.0 github.com/multiformats/go-multibase v0.2.0 github.com/prometheus/client_golang v1.20.5 + github.com/slok/go-http-metrics v0.13.0 go.uber.org/zap v1.27.0 ) @@ -63,7 +64,7 @@ require ( github.com/dustin/go-humanize v1.0.1 // indirect github.com/ebitengine/purego v0.6.0-alpha.5 // indirect github.com/elastic/gosigar v0.14.3 // indirect - github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/emicklei/go-restful/v3 v3.12.1 // indirect github.com/farsightsec/golang-framestream v0.3.0 // indirect github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 // indirect github.com/flynn/noise v1.1.0 // indirect @@ -203,7 +204,7 @@ require ( golang.org/x/sys v0.26.0 // indirect golang.org/x/term v0.25.0 // indirect golang.org/x/text v0.19.0 // indirect - golang.org/x/time v0.5.0 // indirect + golang.org/x/time v0.6.0 // indirect golang.org/x/tools v0.26.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/api v0.172.0 // indirect diff --git a/go.sum b/go.sum index 918097c..9cfc790 100644 --- a/go.sum +++ b/go.sum @@ -128,8 +128,8 @@ github.com/ebitengine/purego v0.6.0-alpha.5/go.mod h1:ah1In8AOtksoNK6yk5z1HTJeUk github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo= github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= -github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= -github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= +github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -531,6 +531,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV github.com/shurcooL/users v0.0.0-20180125191416-49c67e49c537/go.mod h1:QJTqeLYEDaXHZDBsXlPCDqdhQuJkuw4NOtaxYe3xii4= github.com/shurcooL/webdavfs v0.0.0-20170829043945-18c3829fa133/go.mod h1:hKmq5kWdCj2z2KEozexVbfEZIWiTjhE0+UjmZgPqehw= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/slok/go-http-metrics v0.13.0 h1:lQDyJJx9wKhmbliyUsZ2l6peGnXRHjsjoqPt5VYzcP8= +github.com/slok/go-http-metrics v0.13.0/go.mod h1:HIr7t/HbN2sJaunvnt9wKP9xoBBVZFo1/KiHU3b0w+4= github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE= github.com/sourcegraph/syntaxhighlight v0.0.0-20170531221838-bd320f5d308e/go.mod h1:HuIsMU8RRBOtsCgI77wP899iHVBQpCmg4ErYMZB+2IA= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= @@ -751,8 +753,8 @@ golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= +golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181030000716-a0a13e073c7b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=