From 320f2aa8b940bb04ec1096caa3826de9a5e07fd3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 1 Oct 2024 19:47:18 +0000 Subject: [PATCH 1/5] tetragon: Cleanup policy/sensor/progs directories We did not properly cleanup the directory hierarchy we create for policy/sensor/program. Adding the missing cleanup and adding error check when creating the directories. Signed-off-by: Jiri Olsa --- pkg/sensors/load.go | 38 +++++++++++++++++++++++++++++++++++--- pkg/sensors/sensors.go | 2 ++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/pkg/sensors/load.go b/pkg/sensors/load.go index c7b6ff8d76f..f68fc33bc96 100644 --- a/pkg/sensors/load.go +++ b/pkg/sensors/load.go @@ -63,13 +63,43 @@ func LoadConfig(bpfDir string, sens []*Sensor) error { return nil } -func (s *Sensor) setupProgsPinPath(bpfDir string) { +func (s *Sensor) createDirs(bpfDir string) { for _, p := range s.Progs { // setup sensor based program pin path p.PinPath = filepath.Join(sanitize(s.Policy), s.Name, p.PinName) // and make the path - os.MkdirAll(filepath.Join(bpfDir, p.PinPath), os.ModeDir) + if err := os.MkdirAll(filepath.Join(bpfDir, p.PinPath), os.ModeDir); err != nil { + logger.GetLogger().WithError(err). + WithField("prog", p.PinName). + WithField("dir", p.PinPath). + Warn("Failed to create program dir") + } } + s.BpfDir = bpfDir +} + +func (s *Sensor) removeDirs() { + // Remove all the program dirs + for _, p := range s.Progs { + if err := os.Remove(filepath.Join(s.BpfDir, p.PinPath)); err != nil { + logger.GetLogger().WithError(err). + WithField("prog", p.PinName). + WithField("dir", p.PinPath). + Warn("Failed to remove program dir") + } + } + // Remove sensor dir + if err := os.Remove(filepath.Join(s.BpfDir, sanitize(s.Policy), s.Name)); err != nil { + logger.GetLogger().WithError(err). + WithField("sensor", s.Name). + WithField("dir", filepath.Join(sanitize(s.Policy), s.Name)). + Warn("Failed to remove sensor dir") + } + + // For policy dir the last one switches off the light.. there still + // might be other sensors in the policy, so the last sensors removed + // will succeed in removal policy dir. + os.Remove(filepath.Join(s.BpfDir, sanitize(s.Policy))) } // Load loads the sensor, by loading all the BPF programs and maps. @@ -87,7 +117,7 @@ func (s *Sensor) Load(bpfDir string) error { return fmt.Errorf("tetragon, aborting minimum requirements not met: %w", err) } - s.setupProgsPinPath(bpfDir) + s.createDirs(bpfDir) l := logger.GetLogger() @@ -155,6 +185,8 @@ func (s *Sensor) Unload() error { } } + s.removeDirs() + s.Loaded = false if s.PostUnloadHook != nil { diff --git a/pkg/sensors/sensors.go b/pkg/sensors/sensors.go index 56b1917b999..ff4ca2025dd 100644 --- a/pkg/sensors/sensors.go +++ b/pkg/sensors/sensors.go @@ -42,6 +42,8 @@ type Sensor struct { Name string // Policy name the sensor is part of. Policy string + // When loaded this contains bpffs root directory + BpfDir string // Progs are all the BPF programs that exist on the filesystem. Progs []*program.Program // Maps are all the BPF Maps that the progs use. From 6154d19f6c89f622a61f2c1e8da47c304af0fe78 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Oct 2024 11:22:44 +0000 Subject: [PATCH 2/5] tetragon: Cleanup directories fater failed sensor load Currently we do not remove map/progs bpffs hierarchy directories if we fail to load the sensor for some reason, fixing that. Signed-off-by: Jiri Olsa --- pkg/sensors/load.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pkg/sensors/load.go b/pkg/sensors/load.go index f68fc33bc96..43341b9994e 100644 --- a/pkg/sensors/load.go +++ b/pkg/sensors/load.go @@ -103,7 +103,7 @@ func (s *Sensor) removeDirs() { } // Load loads the sensor, by loading all the BPF programs and maps. -func (s *Sensor) Load(bpfDir string) error { +func (s *Sensor) Load(bpfDir string) (err error) { if s == nil { return nil } @@ -113,11 +113,16 @@ func (s *Sensor) Load(bpfDir string) error { } logger.GetLogger().WithField("metadata", cachedbtf.GetCachedBTFFile()).Info("BTF file: using metadata file") - if _, err := observerMinReqs(); err != nil { + if _, err = observerMinReqs(); err != nil { return fmt.Errorf("tetragon, aborting minimum requirements not met: %w", err) } s.createDirs(bpfDir) + defer func() { + if err != nil { + s.removeDirs() + } + }() l := logger.GetLogger() @@ -129,11 +134,11 @@ func (s *Sensor) Load(bpfDir string) error { _, verStr, _ := kernels.GetKernelVersion(option.Config.KernelVersion, option.Config.ProcFS) l.Infof("Loading kernel version %s", verStr) - if err := s.FindPrograms(); err != nil { + if err = s.FindPrograms(); err != nil { return fmt.Errorf("tetragon, aborting could not find BPF programs: %w", err) } - if err := s.loadMaps(bpfDir); err != nil { + if err = s.loadMaps(bpfDir); err != nil { return fmt.Errorf("tetragon, aborting could not load sensor BPF maps: %w", err) } @@ -144,7 +149,7 @@ func (s *Sensor) Load(bpfDir string) error { continue } - if err := observerLoadInstance(bpfDir, p); err != nil { + if err = observerLoadInstance(bpfDir, p); err != nil { return err } p.LoadState.RefInc() From 78d10e0bb13ffd1e29f75727bece5b28efde1027 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Oct 2024 12:24:52 +0000 Subject: [PATCH 3/5] tetragon: Move loop out of loadMaps function Moving loop out of loadMaps function, it will ease up following patch, there's no functional change. Signed-off-by: Jiri Olsa --- pkg/sensors/load.go | 120 ++++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/pkg/sensors/load.go b/pkg/sensors/load.go index 43341b9994e..165e161c892 100644 --- a/pkg/sensors/load.go +++ b/pkg/sensors/load.go @@ -138,8 +138,10 @@ func (s *Sensor) Load(bpfDir string) (err error) { return fmt.Errorf("tetragon, aborting could not find BPF programs: %w", err) } - if err = s.loadMaps(bpfDir); err != nil { - return fmt.Errorf("tetragon, aborting could not load sensor BPF maps: %w", err) + for _, m := range s.Maps { + if err = s.loadMap(bpfDir, m); err != nil { + return fmt.Errorf("tetragon, aborting could not load sensor BPF maps: %w", err) + } } for _, p := range s.Progs { @@ -273,76 +275,74 @@ func (s *Sensor) setMapPinPath(m *program.Map) { } } -// loadMaps loads all the BPF maps in the sensor. -func (s *Sensor) loadMaps(bpfDir string) error { +// loadMap loads BPF map in the sensor. +func (s *Sensor) loadMap(bpfDir string, m *program.Map) error { l := logger.GetLogger() - for _, m := range s.Maps { - if m.PinState.IsLoaded() { - l.WithFields(logrus.Fields{ - "sensor": s.Name, - "map": m.Name, - }).Info("map is already loaded, incrementing reference count") - m.PinState.RefInc() - continue - } - - spec, err := ebpf.LoadCollectionSpec(m.Prog.Name) - if err != nil { - return fmt.Errorf("failed to open collection '%s': %w", m.Prog.Name, err) - } - mapSpec, ok := spec.Maps[m.Name] - if !ok { - return fmt.Errorf("map '%s' not found from '%s'", m.Name, m.Prog.Name) - } + if m.PinState.IsLoaded() { + l.WithFields(logrus.Fields{ + "sensor": s.Name, + "map": m.Name, + }).Info("map is already loaded, incrementing reference count") + m.PinState.RefInc() + return nil + } - s.setMapPinPath(m) - pinPath := filepath.Join(bpfDir, m.PinPath) + spec, err := ebpf.LoadCollectionSpec(m.Prog.Name) + if err != nil { + return fmt.Errorf("failed to open collection '%s': %w", m.Prog.Name, err) + } + mapSpec, ok := spec.Maps[m.Name] + if !ok { + return fmt.Errorf("map '%s' not found from '%s'", m.Name, m.Prog.Name) + } - if m.IsOwner() { - // If map is the owner we set configured max entries - // directly to map spec. - if max, ok := m.GetMaxEntries(); ok { - mapSpec.MaxEntries = max - } + s.setMapPinPath(m) + pinPath := filepath.Join(bpfDir, m.PinPath) - if innerMax, ok := m.GetMaxInnerEntries(); ok { - if innerMs := mapSpec.InnerMap; innerMs != nil { - mapSpec.InnerMap.MaxEntries = innerMax - } - } - } else { - // If map is NOT the owner we follow the max entries - // of the pinned map and update the spec with that. - max, err := program.GetMaxEntriesPinnedMap(pinPath) - if err != nil { - return err - } + if m.IsOwner() { + // If map is the owner we set configured max entries + // directly to map spec. + if max, ok := m.GetMaxEntries(); ok { mapSpec.MaxEntries = max + } - // 'm' is not the owner but for some reason requires max - // entries setup, make sure it matches the pinned map. - if max, ok := m.GetMaxEntries(); ok { - if mapSpec.MaxEntries != max { - return fmt.Errorf("failed to load map '%s' max entries mismatch: %d %d", - m.Name, mapSpec.MaxEntries, max) - } + if innerMax, ok := m.GetMaxInnerEntries(); ok { + if innerMs := mapSpec.InnerMap; innerMs != nil { + mapSpec.InnerMap.MaxEntries = innerMax } - - m.SetMaxEntries(int(max)) } - - if err := m.LoadOrCreatePinnedMap(pinPath, mapSpec); err != nil { - return fmt.Errorf("failed to load map '%s' for sensor '%s': %w", m.Name, s.Name, err) + } else { + // If map is NOT the owner we follow the max entries + // of the pinned map and update the spec with that. + max, err := program.GetMaxEntriesPinnedMap(pinPath) + if err != nil { + return err + } + mapSpec.MaxEntries = max + + // 'm' is not the owner but for some reason requires max + // entries setup, make sure it matches the pinned map. + if max, ok := m.GetMaxEntries(); ok { + if mapSpec.MaxEntries != max { + return fmt.Errorf("failed to load map '%s' max entries mismatch: %d %d", + m.Name, mapSpec.MaxEntries, max) + } } - l.WithFields(logrus.Fields{ - "sensor": s.Name, - "map": m.Name, - "path": pinPath, - "max": m.Entries, - }).Info("tetragon, map loaded.") + m.SetMaxEntries(int(max)) } + if err := m.LoadOrCreatePinnedMap(pinPath, mapSpec); err != nil { + return fmt.Errorf("failed to load map '%s' for sensor '%s': %w", m.Name, s.Name, err) + } + + l.WithFields(logrus.Fields{ + "sensor": s.Name, + "map": m.Name, + "path": pinPath, + "max": m.Entries, + }).Info("tetragon, map loaded.") + return nil } From 10790d2a902b1b52f4af89e7a41779bb91b4efd7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Oct 2024 12:25:54 +0000 Subject: [PATCH 4/5] tetragon: Cleanup progs/maps after failed sensor load Currently we do not unload progs/maps that were loaded prior the fail to load the sensor for some reason, fixing that. Signed-off-by: Jiri Olsa --- pkg/sensors/load.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pkg/sensors/load.go b/pkg/sensors/load.go index 165e161c892..274c34a7c36 100644 --- a/pkg/sensors/load.go +++ b/pkg/sensors/load.go @@ -117,9 +117,20 @@ func (s *Sensor) Load(bpfDir string) (err error) { return fmt.Errorf("tetragon, aborting minimum requirements not met: %w", err) } + var ( + loadedMaps []*program.Map + loadedProgs []*program.Program + ) + s.createDirs(bpfDir) defer func() { if err != nil { + for _, m := range loadedMaps { + m.Unload() + } + for _, p := range loadedProgs { + unloadProgram(p) + } s.removeDirs() } }() @@ -142,6 +153,7 @@ func (s *Sensor) Load(bpfDir string) (err error) { if err = s.loadMap(bpfDir, m); err != nil { return fmt.Errorf("tetragon, aborting could not load sensor BPF maps: %w", err) } + loadedMaps = append(loadedMaps, m) } for _, p := range s.Progs { @@ -155,6 +167,7 @@ func (s *Sensor) Load(bpfDir string) (err error) { return err } p.LoadState.RefInc() + loadedProgs = append(loadedProgs, p) l.WithField("prog", p.Name).WithField("label", p.Label).Debugf("BPF prog was loaded") } From 53097375dde74a5493ebf002def6d12b1b8d0e19 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Oct 2024 12:41:52 +0000 Subject: [PATCH 5/5] tetragon: Add tests for proper cleanup after sensor unload Adding tests for proper cleanup after sensor unload. Signed-off-by: Jiri Olsa --- pkg/sensors/test/sensors_test.go | 130 +++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/pkg/sensors/test/sensors_test.go b/pkg/sensors/test/sensors_test.go index 4d6a7e5eeaa..d75a5dfbe5c 100644 --- a/pkg/sensors/test/sensors_test.go +++ b/pkg/sensors/test/sensors_test.go @@ -4,6 +4,7 @@ package test import ( + "os" "path/filepath" "testing" @@ -500,3 +501,132 @@ func TestMaxEntriesInnerMulti(t *testing.T) { // TODO, we need to check BTF for inner map max entries t.Skip() } + +func TestCleanup(t *testing.T) { + p1 := program.Builder( + "bpf_map_test_p1.o", + "wake_up_new_task", + "kprobe/wake_up_new_task", + "p1", + "kprobe", + ) + p2 := program.Builder( + "bpf_map_test_p2.o", + "wake_up_new_task", + "kprobe/wake_up_new_task", + "p2", + "kprobe", + ) + p3 := program.Builder( + "bpf_map_test_p3.o", + "wake_up_new_task", + "kprobe/wake_up_new_task", + "p3", + "badtype", + ) + + var err error + + verifyRemoved := func(files ...string) { + for _, f := range files { + _, err := os.Stat(filepath.Join(bpf.MapPrefixPath(), f)) + t.Logf("checking path: '%s'\n", f) + assert.Error(t, err) + } + } + + t.Run("single_ok", func(t *testing.T) { + m1 := program.MapBuilder("m1", p1) + m2 := program.MapBuilder("m2", p2) + + s1 := &sensors.Sensor{ + Name: "sensor1", + Progs: []*program.Program{p1}, + Maps: []*program.Map{m1, m2}, + Policy: "policy", + } + + err = s1.Load(bpf.MapPrefixPath()) + assert.NoError(t, err) + + s1.Unload() + verifyRemoved("m1", "m2", "policy") + }) + + t.Run("multi_ok", func(t *testing.T) { + m1 := program.MapBuilder("m1", p1) + m2 := program.MapBuilder("m2", p2) + + s1 := &sensors.Sensor{ + Name: "sensor1", + Progs: []*program.Program{p1}, + Maps: []*program.Map{m1, m2}, + Policy: "policy", + } + + s2 := &sensors.Sensor{ + Name: "sensor2", + Progs: []*program.Program{p2}, + Maps: []*program.Map{m1, m2}, + Policy: "policy", + } + + err = s1.Load(bpf.MapPrefixPath()) + assert.NoError(t, err) + err = s2.Load(bpf.MapPrefixPath()) + assert.NoError(t, err) + + s1.Unload() + verifyRemoved("policy/sensor1") + + s2.Unload() + verifyRemoved("m1", "m2", "policy") + }) + + t.Run("map_fail", func(t *testing.T) { + m1 := program.MapBuilder("m1", p1, p2) + // map with wrong name + m2 := program.MapBuilder("non-existing", p1, p2) + + s1 := &sensors.Sensor{ + Name: "sensor1", + Progs: []*program.Program{p1}, + Maps: []*program.Map{m1, m2}, + Policy: "policy", + } + + err = s1.Load(bpf.MapPrefixPath()) + assert.Error(t, err) + if err == nil { + defer s1.Unload() + } + + verifyRemoved("m1", "m2", "policy") + }) + + t.Run("prog_fail", func(t *testing.T) { + m1 := program.MapBuilder("m1", p1, p3) + s1 := &sensors.Sensor{ + Name: "sensor1", + Progs: []*program.Program{p1}, + Maps: []*program.Map{m1}, + Policy: "policy", + } + + s3 := &sensors.Sensor{ + Name: "sensor3", + Progs: []*program.Program{p3}, + Maps: []*program.Map{m1}, + Policy: "policy", + } + + err = s1.Load(bpf.MapPrefixPath()) + assert.NoError(t, err) + + err = s3.Load(bpf.MapPrefixPath()) + assert.Error(t, err) + + s1.Unload() + verifyRemoved("m1", "policy") + }) +}