From 799204fe6fa639924abb0f0cf0534853d6378a7c Mon Sep 17 00:00:00 2001 From: jiceatscion <139873336+jiceatscion@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:14:23 +0200 Subject: [PATCH 1/5] monitoring: give a timeout to our prometheus request handler (#4575) Set a 1 m timeout to all prometheus incoming requests. Fixes #1971 --- private/env/BUILD.bazel | 1 + private/env/env.go | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/private/env/BUILD.bazel b/private/env/BUILD.bazel index 796ad79db1..57482bffd9 100644 --- a/private/env/BUILD.bazel +++ b/private/env/BUILD.bazel @@ -19,6 +19,7 @@ go_library( "//pkg/scrypto:go_default_library", "//private/config:go_default_library", "@com_github_opentracing_opentracing_go//:go_default_library", + "@com_github_prometheus_client_golang//prometheus:go_default_library", "@com_github_prometheus_client_golang//prometheus/promhttp:go_default_library", "@com_github_uber_jaeger_client_go//:go_default_library", "@com_github_uber_jaeger_client_go//config:go_default_library", diff --git a/private/env/env.go b/private/env/env.go index c8b0843216..fdba5dc47f 100644 --- a/private/env/env.go +++ b/private/env/env.go @@ -34,6 +34,7 @@ import ( "time" opentracing "github.com/opentracing/opentracing-go" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" jaeger "github.com/uber/jaeger-client-go" jaegercfg "github.com/uber/jaeger-client-go/config" @@ -61,6 +62,10 @@ const ( // ShutdownGraceInterval is the time applications wait after issuing a // clean shutdown signal, before forcerfully tearing down the application. ShutdownGraceInterval = 5 * time.Second + + // HandlerTimeout is the time after which the http handler gives up on a request and + // returns an error instead. + HandlerTimeout = time.Minute ) var sighupC chan os.Signal @@ -185,7 +190,14 @@ func (cfg *Metrics) ServePrometheus(ctx context.Context) error { if cfg.Prometheus == "" { return nil } - http.Handle("/metrics", promhttp.Handler()) + handler := promhttp.InstrumentMetricHandler( + prometheus.DefaultRegisterer, + promhttp.HandlerFor( + prometheus.DefaultGatherer, + promhttp.HandlerOpts{Timeout: HandlerTimeout}, + ), + ) + http.Handle("/metrics", handler) log.Info("Exporting prometheus metrics", "addr", cfg.Prometheus) server := &http.Server{Addr: cfg.Prometheus} From ce54172e019168ad531c8f98646d10bc4642518c Mon Sep 17 00:00:00 2001 From: Matthias Frei Date: Tue, 16 Jul 2024 14:29:55 +0200 Subject: [PATCH 2/5] github: add check for PR title (#4574) Github workflow to check that the PR title follows the contribution guidelines: - Starts with `:` - Uses lowercase letters for subject line (and does not end with a period) Could also be done in buildkite, but github action seems appropriate. --- .github/workflows/pr-title.yml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/pr-title.yml diff --git a/.github/workflows/pr-title.yml b/.github/workflows/pr-title.yml new file mode 100644 index 0000000000..41a54c144c --- /dev/null +++ b/.github/workflows/pr-title.yml @@ -0,0 +1,31 @@ +name: Check the pull request title + +on: + pull_request: + types: [opened, edited, reopened, synchronize] + +jobs: + check: + runs-on: ubuntu-latest + steps: + - name : Check the PR title + env: + TITLE: ${{ github.event.pull_request.title }} + run: | + # Check that PR is of the form `: ` + + url='https://docs.scion.org/en/latest/dev/git.html#good-commit-messages' + if [[ ! "$TITLE" =~ ^[a-z0-9,/]*:[[:space:]] ]]; then + echo '::error::The PR title should start with `: `. See '"$url" + exit 1 + fi + # Title should be lower case; initialisms and identifiers still occur occasionally and should be allowed. + # -> enforce only the first word + if [[ ! "$TITLE" =~ ^[a-z0-9,/]*:[[:space:]][a-z] ]]; then + echo '::error::The PR title should be lower case (enforced on first letter). See '"$url" + exit 1 + fi + if [[ $TITLE =~ \.[[:space:]]*$ ]]; then + echo '::error::The PR title should not end with a ".". See '"$url" + exit 1 + fi From cb634d7eac13dcad30ed2c7e8710a7a228d024fb Mon Sep 17 00:00:00 2001 From: Matthias Frei Date: Tue, 16 Jul 2024 18:36:19 +0200 Subject: [PATCH 3/5] build: replace dummy build target with bazel info (#4577) Just a tiny, random cleanup; I've noticed to obtain the effective go tags in the Makefile and scripts, instead of building a dummy target ":dummy_setting" we can also use the bazel info subcommand. --- BUILD.bazel | 10 ---------- Makefile | 2 +- tools/update_testdata.sh | 2 +- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index f63e8c8a85..a142069715 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -63,16 +63,6 @@ config_setting( }, ) -# This is a dummy target so Make can "blaze build --announce_rc -# Where something truly does nothing that we may care about. - -config_setting( - name = "dummy_setting", - define_values = { - "whatever": "whatever", - }, -) - gazelle( name = "gazelle", build_tags = select({ diff --git a/Makefile b/Makefile index f34413c519..1def0cf11f 100644 --- a/Makefile +++ b/Makefile @@ -115,7 +115,7 @@ lint-go-bazel: $(info ==> $@) @tools/quiet bazel test --config lint //... -GO_BUILD_TAGS_ARG=$(shell bazel build --ui_event_filters=-stdout,-stderr --announce_rc --noshow_progress :dummy_setting 2>&1 | grep "'build' options" | sed -n "s/^.*--define gotags=\(\S*\).*/--build-tags \1/p" ) +GO_BUILD_TAGS_ARG=$(shell bazel info --ui_event_filters=-stdout,-stderr --announce_rc --noshow_progress 2>&1 | grep "'build' options" | sed -n "s/^.*--define gotags=\(\S*\).*/--build-tags \1/p" ) lint-go-golangci: $(info ==> $@) diff --git a/tools/update_testdata.sh b/tools/update_testdata.sh index 1d4874a31b..4a828803a0 100755 --- a/tools/update_testdata.sh +++ b/tools/update_testdata.sh @@ -10,7 +10,7 @@ folders=$(grep \ --exclude-dir=bazel-\* \ "xtest.UpdateGoldenFiles()" . | xargs dirname | sort | uniq ) -GO_BUILD_TAGS_ARG=$(bazel build --ui_event_filters=-stdout,-stderr --announce_rc --noshow_progress :dummy_setting 2>&1 | grep "'build' options" | sed -n "s/^.*--define gotags=\(\S*\).*/-tags \1/p") +GO_BUILD_TAGS_ARG=$(bazel info --ui_event_filters=-stdout,-stderr --announce_rc --noshow_progress 2>&1 | grep "'build' options" | sed -n "s/^.*--define gotags=\(\S*\).*/-tags \1/p") echo $folders -update | xargs go test ${GO_BUILD_TAGS_ARG} echo $folders -count=1 | xargs go test ${GO_BUILD_TAGS_ARG} From 8ba6b38202a24b79b6875562f6b1233932406a1c Mon Sep 17 00:00:00 2001 From: jiceatscion <139873336+jiceatscion@users.noreply.github.com> Date: Wed, 17 Jul 2024 11:28:40 +0200 Subject: [PATCH 4/5] router: implement a proper shutdown mechanism (#4578) The "running" flag is now accessed as an atomic (so with sequential ordering per the Go memory model). Added a shutdown entry point which cannot race with initialization. Fixes #4116 --- router/dataplane.go | 53 ++++++++++++++++++++++++------- router/dataplane_internal_test.go | 12 +++---- router/export_test.go | 2 +- 3 files changed, 48 insertions(+), 19 deletions(-) diff --git a/router/dataplane.go b/router/dataplane.go index 2fac11ac1c..8617230efa 100644 --- a/router/dataplane.go +++ b/router/dataplane.go @@ -27,6 +27,7 @@ import ( "net" "net/netip" "sync" + "sync/atomic" "time" "unsafe" @@ -184,7 +185,7 @@ type DataPlane struct { bfdSessions map[uint16]bfdSession localIA addr.IA mtx sync.Mutex - running bool + running atomic.Bool Metrics *Metrics forwardingMetrics map[uint16]interfaceMetrics dispatchedPortStart uint16 @@ -243,11 +244,39 @@ type drkeyProvider interface { ) (drkey.ASHostKey, error) } +// setRunning() Configures the running state of the data plane to true. setRunning() is called once +// the dataplane is finished initializing and is ready to process packets. +func (d *DataPlane) setRunning() { + d.running.Store(true) +} + +// setStopping() Configures the running state of the data plane to false. This should not be called +// during the dataplane initialization. Calling this before initialization starts has no effect. +func (d *DataPlane) setStopping() { + d.running.Store(false) +} + +// IsRunning() Indicates the running state of the data plane. If true, the dataplane is initialized +// and ready to process or already processing packets. In this case some configuration changes are +// not permitted. If false, the data plane is not ready to process packets yet, or is shutting +// down. +func (d *DataPlane) IsRunning() bool { + return d.running.Load() +} + +// Shutdown() causes the dataplane to stop accepting packets and then terminate. Note that +// in that case the router is committed to shutting down. There is no mechanism to restart it. +func (d *DataPlane) Shutdown() { + d.mtx.Lock() // make sure we're nor racing with initialization. + defer d.mtx.Unlock() + d.setStopping() +} + // SetIA sets the local IA for the dataplane. func (d *DataPlane) SetIA(ia addr.IA) error { d.mtx.Lock() defer d.mtx.Unlock() - if d.running { + if d.IsRunning() { return modifyExisting } if ia.IsZero() { @@ -265,7 +294,7 @@ func (d *DataPlane) SetIA(ia addr.IA) error { func (d *DataPlane) SetKey(key []byte) error { d.mtx.Lock() defer d.mtx.Unlock() - if d.running { + if d.IsRunning() { return modifyExisting } if len(key) == 0 { @@ -297,7 +326,7 @@ func (d *DataPlane) SetPortRange(start, end uint16) { func (d *DataPlane) AddInternalInterface(conn BatchConn, ip netip.Addr) error { d.mtx.Lock() defer d.mtx.Unlock() - if d.running { + if d.IsRunning() { return modifyExisting } if conn == nil { @@ -324,7 +353,7 @@ func (d *DataPlane) AddExternalInterface(ifID uint16, conn BatchConn, d.mtx.Lock() defer d.mtx.Unlock() - if d.running { + if d.IsRunning() { return modifyExisting } if conn == nil || !src.Addr.IsValid() || !dst.Addr.IsValid() { @@ -354,7 +383,7 @@ func (d *DataPlane) AddExternalInterface(ifID uint16, conn BatchConn, func (d *DataPlane) AddNeighborIA(ifID uint16, remote addr.IA) error { d.mtx.Lock() defer d.mtx.Unlock() - if d.running { + if d.IsRunning() { return modifyExisting } if remote.IsZero() { @@ -515,7 +544,7 @@ func (d *DataPlane) AddNextHop(ifID uint16, src, dst netip.AddrPort, cfg control d.mtx.Lock() defer d.mtx.Unlock() - if d.running { + if d.IsRunning() { return modifyExisting } if !dst.IsValid() || !src.IsValid() { @@ -585,7 +614,7 @@ type RunConfig struct { func (d *DataPlane) Run(ctx context.Context, cfg *RunConfig) error { d.mtx.Lock() - d.running = true + d.setRunning() d.initMetrics() processorQueueSize := max( @@ -718,7 +747,7 @@ func (d *DataPlane) runReceiver(ifID uint16, conn BatchConn, cfg *RunConfig, } } - for d.running { + for d.IsRunning() { // collect packets. // Give a new buffer to the msgs elements that have been used in the previous loop. @@ -778,7 +807,7 @@ func (d *DataPlane) runProcessor(id int, q <-chan *packet, log.Debug("Initialize processor with", "id", id) processor := newPacketProcessor(d) - for d.running { + for d.IsRunning() { p, ok := <-q if !ok { continue @@ -835,7 +864,7 @@ func (d *DataPlane) runSlowPathProcessor(id int, q <-chan *packet, log.Debug("Initialize slow-path processor with", "id", id) processor := newSlowPathProcessor(d) - for d.running { + for d.IsRunning() { p, ok := <-q if !ok { continue @@ -1012,7 +1041,7 @@ func (d *DataPlane) runForwarder(ifID uint16, conn BatchConn, cfg *RunConfig, c metrics := d.forwardingMetrics[ifID] toWrite := 0 - for d.running { + for d.IsRunning() { toWrite += readUpTo(c, cfg.BatchSize-toWrite, toWrite == 0, pkts[toWrite:]) // Turn the packets into underlay messages that WriteBatch can send. diff --git a/router/dataplane_internal_test.go b/router/dataplane_internal_test.go index 6c96b149bb..5c02410a87 100644 --- a/router/dataplane_internal_test.go +++ b/router/dataplane_internal_test.go @@ -71,7 +71,7 @@ func TestReceiver(t *testing.T) { ).Times(2) mInternal.EXPECT().ReadBatch(gomock.Any()).DoAndReturn( func(m underlayconn.Messages) (int, error) { - dp.running = false + dp.setStopping() done <- true return 0, nil }, @@ -86,7 +86,7 @@ func TestReceiver(t *testing.T) { dp.initPacketPool(runConfig, 64) procCh, _, _ := initQueues(runConfig, dp.interfaces, 64) initialPoolSize := len(dp.packetPool) - dp.running = true + dp.setRunning() dp.initMetrics() go func() { dp.runReceiver(0, dp.internal, runConfig, procCh) @@ -109,7 +109,7 @@ func TestReceiver(t *testing.T) { // make sure that the processing routine received exactly 20 messages if i != 20 { t.Fail() - dp.running = false + dp.setStopping() } } } @@ -160,7 +160,7 @@ func TestForwarder(t *testing.T) { } } if totalCount == 255 { - ret.running = false + ret.setStopping() done <- struct{}{} } if len(ms) == 0 { @@ -180,7 +180,7 @@ func TestForwarder(t *testing.T) { dp.initPacketPool(runConfig, 64) _, fwCh, _ := initQueues(runConfig, dp.interfaces, 64) initialPoolSize := len(dp.packetPool) - dp.running = true + dp.setRunning() dp.initMetrics() go dp.runForwarder(0, dp.internal, runConfig, fwCh[0]) @@ -211,7 +211,7 @@ func TestForwarder(t *testing.T) { assert.Equal(t, initialPoolSize, len(dp.packetPool)) case <-time.After(100 * time.Millisecond): t.Fail() - dp.running = false + dp.setStopping() } } diff --git a/router/export_test.go b/router/export_test.go index b175754402..e5f4cc0d18 100644 --- a/router/export_test.go +++ b/router/export_test.go @@ -96,7 +96,7 @@ func NewDP( } func (d *DataPlane) FakeStart() { - d.running = true + d.setRunning() } func (d *DataPlane) ProcessPkt(pkt *Packet) Disposition { From ad0d3bc87a29e80b4462dd8e9d1003d03ef6fbc5 Mon Sep 17 00:00:00 2001 From: jiceatscion <139873336+jiceatscion@users.noreply.github.com> Date: Wed, 17 Jul 2024 16:32:30 +0200 Subject: [PATCH 5/5] pkg/slayers: remove unnecessary sub-slicing (#4576) Fixes: #4094 --- pkg/slayers/path/scion/decoded.go | 2 +- pkg/slayers/path/scion/raw.go | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pkg/slayers/path/scion/decoded.go b/pkg/slayers/path/scion/decoded.go index b9db4888fd..627cb6adde 100644 --- a/pkg/slayers/path/scion/decoded.go +++ b/pkg/slayers/path/scion/decoded.go @@ -64,7 +64,7 @@ func (s *Decoded) SerializeTo(b []byte) error { return serrors.New("buffer too small to serialize path.", "expected", s.Len(), "actual", len(b)) } - if err := s.PathMeta.SerializeTo(b[:MetaLen]); err != nil { + if err := s.PathMeta.SerializeTo(b); err != nil { return err } offset := MetaLen diff --git a/pkg/slayers/path/scion/raw.go b/pkg/slayers/path/scion/raw.go index c8e9796298..5a758347bd 100644 --- a/pkg/slayers/path/scion/raw.go +++ b/pkg/slayers/path/scion/raw.go @@ -51,7 +51,10 @@ func (s *Raw) SerializeTo(b []byte) error { } // XXX(roosd): This modifies the underlying buffer. Consider writing to data // directly. - if err := s.PathMeta.SerializeTo(s.Raw[:MetaLen]); err != nil { + // TODO(matzf, jiceatscion): it is not clear whether updating pathmeta in s.Raw is desirable + // or not. It migh be best to make that question moot by not keeping the path meta header as + // raw bytes at all. However that's a viral change. + if err := s.PathMeta.SerializeTo(s.Raw); err != nil { return err } copy(b, s.Raw) @@ -82,7 +85,7 @@ func (s *Raw) Reverse() (path.Path, error) { // ToDecoded transforms a scion.Raw to a scion.Decoded. func (s *Raw) ToDecoded() (*Decoded, error) { // Serialize PathMeta to ensure potential changes are reflected Raw. - if err := s.PathMeta.SerializeTo(s.Raw[:MetaLen]); err != nil { + if err := s.PathMeta.SerializeTo(s.Raw); err != nil { return nil, err } decoded := &Decoded{} @@ -97,7 +100,7 @@ func (s *Raw) IncPath() error { if err := s.Base.IncPath(); err != nil { return err } - return s.PathMeta.SerializeTo(s.Raw[:MetaLen]) + return s.PathMeta.SerializeTo(s.Raw) } // GetInfoField returns the InfoField at a given index.