Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ebpf): ignore kthreads, do full pids cleanup #2778

Merged
merged 4 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions ebpf/bpf/profile.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include "bpf_tracing.h"
#include "profile.bpf.h"
#include "pid.h"
#include "ume.h"

#define PF_KTHREAD 0x00200000

SEC("perf_event")
int do_perf_event(struct bpf_perf_event_data *ctx) {
Expand All @@ -14,9 +17,20 @@ int do_perf_event(struct bpf_perf_event_data *ctx) {
struct sample_key key = {};
u32 *val, one = 1;

if (tgid == 0) {
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
if (tgid == 0 || task == 0) {
return 0;
}
int flags = 0;
if (pyro_bpf_core_read(&flags, sizeof(flags), &task->flags)) {
bpf_dbg_printk("failed to read task->flags\n");
return 0;
}
if (flags & PF_KTHREAD) {
bpf_dbg_printk("skipping kthread %d\n", tgid);
return 0;
}

struct pid_config *config = bpf_map_lookup_elem(&pids, &tgid);
if (config == NULL) {
struct pid_config unknown = {
Expand All @@ -25,7 +39,10 @@ int do_perf_event(struct bpf_perf_event_data *ctx) {
.collect_user = 0,
.padding_ = 0
};
bpf_map_update_elem(&pids, &tgid, &unknown, BPF_NOEXIST);
if (bpf_map_update_elem(&pids, &tgid, &unknown, BPF_NOEXIST)) {
bpf_dbg_printk("failed to update pids map. probably concurrent update\n");
return 0;
}
struct pid_event event = {
.op = OP_REQUEST_UNKNOWN_PROCESS_INFO,
.pid = tgid
Expand Down
Binary file modified ebpf/pyrobpf/profile_bpfel_arm64.o
Binary file not shown.
Binary file modified ebpf/pyrobpf/profile_bpfel_x86.o
Binary file not shown.
38 changes: 38 additions & 0 deletions ebpf/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,44 @@ func (s *session) cleanup() {
}
}
}

if s.roundNumber%10 == 0 {
s.checkStalePids()
}
}

// iterate over all pids and check if they are alive
// it is only needed in case disassociate_ctty hook somehow mises a process death
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// it is only needed in case disassociate_ctty hook somehow mises a process death
// it is only needed in case disassociate_ctty hook somehow misses a process death

func (s *session) checkStalePids() {
var (
m = s.bpf.Pids
mapSize = m.MaxEntries()
nextKey = uint32(0)
)
keys := make([]uint32, mapSize)
values := make([]pyrobpf.ProfilePidConfig, mapSize)
n, err := m.BatchLookup(nil, &nextKey, keys, values, new(ebpf.BatchOptions))
_ = level.Debug(s.logger).Log("msg", "check stale pids", "count", n)
for i := 0; i < n; i++ {
_, err := os.Stat(fmt.Sprintf("/proc/%d/status", keys[i]))
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
_ = level.Error(s.logger).Log("msg", "check stale pids", "err", err)
}
if err := m.Delete(keys[i]); err != nil && !errors.Is(err, ebpf.ErrKeyNotExist) {
_ = level.Error(s.logger).Log("msg", "delete stale pid", "pid", keys[i], "err", err)
}
_ = level.Debug(s.logger).Log("msg", "stale pid deleted", "pid", keys[i])
continue
} else {
_ = level.Debug(s.logger).Log("msg", "stale pid check : alive", "pid", keys[i], "config", fmt.Sprintf("%+v", values[i]))
}
}
if err != nil {
if !errors.Is(err, ebpf.ErrKeyNotExist) {
_ = level.Error(s.logger).Log("msg", "check stale pids", "err", err)
}
}
}

type stackBuilder struct {
Expand Down
31 changes: 17 additions & 14 deletions ebpf/symtab/elf.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ func (et *ElfTable) load() {

me, err := elf2.NewMMapedElfFile(fsElfFilePath)
if err != nil {
et.err = err
et.onLoadError()
et.onLoadError(err)
return
}
defer me.Close() // todo do not close if it is the selected elf
Expand All @@ -104,8 +103,7 @@ func (et *ElfTable) load() {
}
buildID, err := me.BuildID()
if err != nil && !errors.Is(err, elf2.ErrNoBuildIDSection) {
et.err = err
et.onLoadError()
et.onLoadError(err)
return
}

Expand All @@ -117,8 +115,7 @@ func (et *ElfTable) load() {
}
fileInfo, err := os.Stat(fsElfFilePath)
if err != nil {
et.err = err
et.onLoadError()
et.onLoadError(err)
return
}
symbols = et.options.ElfCache.GetSymbolsByStat(statFromFileInfo(fileInfo))
Expand All @@ -132,16 +129,14 @@ func (et *ElfTable) load() {
if debugFilePath != "" {
debugMe, err := elf2.NewMMapedElfFile(path.Join(et.fs, debugFilePath))
if err != nil {
et.err = err
et.onLoadError()
et.onLoadError(err)
return
}
defer debugMe.Close() // todo do not close if it is the selected elf

symbols, err = et.createSymbolTable(debugMe)
if err != nil {
et.err = err
et.onLoadError()
et.onLoadError(err)
return
}
et.table = symbols
Expand All @@ -152,8 +147,7 @@ func (et *ElfTable) load() {
symbols, err = et.createSymbolTable(me)
level.Debug(et.logger).Log("msg", "create symbol table", "f", me.FilePath())
if err != nil {
et.err = err
et.onLoadError()
et.onLoadError(err)
return
}

Expand Down Expand Up @@ -326,8 +320,17 @@ func (et *ElfTable) DebugInfo() elf2.SymTabDebugInfo {
return et.table.DebugInfo()
}

func (et *ElfTable) onLoadError() {
level.Error(et.logger).Log("msg", "failed to load elf table", "err", et.err,
func (et *ElfTable) onLoadError(err error) {
et.err = err
var l log.Logger
if errors.Is(err, os.ErrNotExist) {
l = level.Debug(et.logger)
} else {
l = level.Error(et.logger)
}
l.Log(
"msg", "failed to load elf table",
"err", et.err,
"f", et.elfFilePath,
"fs", et.fs)
if et.options.Metrics != nil {
Expand Down
Loading