From 1e144175002c29614908e4e6972548a1abb252e6 Mon Sep 17 00:00:00 2001 From: Christiano Haesbaert Date: Fri, 27 Sep 2024 14:42:38 +0200 Subject: [PATCH] Support older perf rings, needed for centos7 This is one step into making centos7 kprobes work, still missing the btfhub PR, but I've tested and it works, trust me, I'm an engineer. centos7 perf_rings don't support clock_id and exec_comm, it might be that we need to add something back when exec_comm is removed, but for now let us get the probes to at least install. The offsets for `task_struct.pids` were wrong, I got mistaken in my first attempt as in newer kernels that structure is an array of pointers, but there it's a struct of pid_link: https://elixir.bootlin.com/linux/v3.10/source/include/linux/pid.h#L69 and sizeof(struct pid_link) is 24: 16 for hlist_node, 8 for the pid pointer. This took me awhile to see because they declare the two members of hlist_node in the same line, disappointing. --- kprobe_defs.h | 4 ++-- kprobe_queue.c | 40 +++++++++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/kprobe_defs.h b/kprobe_defs.h index e44470a..1245538 100644 --- a/kprobe_defs.h +++ b/kprobe_defs.h @@ -31,11 +31,11 @@ #define PWD_S(_t, _o) "task_struct.fs fs_struct.pwd.dentry " XS(RPT(_t, _o, dentry.d_parent)) " dentry.d_name.name +0" struct kprobe_arg ka_task_old_pgid = { - "pgid", XS(ARG_0), "u32", "task_struct.group_leader (task_struct.pids+8) (pid.numbers+0).upid.nr" + "pgid", XS(ARG_0), "u32", "task_struct.group_leader (task_struct.pids+40) (pid.numbers+0).upid.nr" }; struct kprobe_arg ka_task_old_sid = { - "sid", XS(ARG_0), "u32", "task_struct.group_leader (task_struct.pids+16) (pid.numbers+0).upid.nr" + "sid", XS(ARG_0), "u32", "task_struct.group_leader (task_struct.pids+64) (pid.numbers+0).upid.nr" }; struct kprobe_arg ka_task_new_pgid = { diff --git a/kprobe_queue.c b/kprobe_queue.c index 615dc3e..f2f516f 100644 --- a/kprobe_queue.c +++ b/kprobe_queue.c @@ -635,10 +635,37 @@ perf_mmap_consume(struct perf_mmap *mmap) } static int -perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, +perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { - return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +static int +perf_event_open_degradable(struct perf_event_attr *attr, pid_t pid, int cpu, + int group_fd, unsigned long flags) +{ + int r; + +again: + r = perf_event_open(attr, pid, cpu, group_fd, flags); + if (r == 0) + return (r); + else if (r == -1 && errno != EINVAL) + return (-1); + + /* start degrading until it works */ + if (attr->comm_exec) { + attr->comm_exec = 0; + goto again; + } + if (attr->use_clockid) { + attr->use_clockid = 0; + attr->clockid = 0; + goto again; + } + + return (r); } static int @@ -1071,9 +1098,6 @@ perf_attr_init(struct perf_event_attr *attr, int id) PERF_SAMPLE_CPU | PERF_SAMPLE_RAW; - /* attr->read_format = PERF_FORMAT_LOST; */ - /* attr->mmap2 */ - /* XXX Should we set clock in the child as well? XXX */ attr->use_clockid = 1; attr->clockid = CLOCK_MONOTONIC; attr->disabled = 1; @@ -1105,8 +1129,9 @@ perf_open_group_leader(struct kprobe_queue *kqq, int cpu) pgl->attr.watermark = 1; pgl->attr.wakeup_watermark = (PERF_MMAP_PAGES * getpagesize()) / 10;; - pgl->fd = perf_event_open(&pgl->attr, -1, cpu, -1, 0); + pgl->fd = perf_event_open_degradable(&pgl->attr, -1, cpu, -1, 0); if (pgl->fd == -1) { + warn("perf_event_open_degradable"); free(pgl); return (NULL); } @@ -1145,8 +1170,9 @@ perf_open_kprobe(struct kprobe_queue *kqq, struct kprobe *k, return (NULL); } perf_attr_init(&ks->attr, id); - ks->fd = perf_event_open(&ks->attr, -1, cpu, group_fd, 0); + ks->fd = perf_event_open_degradable(&ks->attr, -1, cpu, group_fd, 0); if (ks->fd == -1) { + warn("perf_event_open_degradable"); free(ks); return (NULL); }