Skip to content

Commit 7a978f9

Browse files
committed
chore(proctree): mov hash computation to userspace
1 parent 6e46b3e commit 7a978f9

15 files changed

+286
-224
lines changed

pkg/ebpf/c/tracee.bpf.c

+45-45
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ int tracepoint__sched__sched_process_fork(struct bpf_raw_tracepoint_args *ctx)
706706
save_to_submit_buf(&p.event->args_buf, (void *) &parent_ns_pid, sizeof(int), 3);
707707
save_to_submit_buf(&p.event->args_buf, (void *) &parent_start_time, sizeof(u64), 4);
708708

709-
// Child (might be a lwp or a process, sched_process_fork trace is calle by clone() also).
709+
// Child (might be a lwp or a process, sched_process_fork trace is called by clone() also).
710710
save_to_submit_buf(&p.event->args_buf, (void *) &child_tid, sizeof(int), 5);
711711
save_to_submit_buf(&p.event->args_buf, (void *) &child_ns_tid, sizeof(int), 6);
712712
save_to_submit_buf(&p.event->args_buf, (void *) &child_pid, sizeof(int), 7);
@@ -719,7 +719,7 @@ int tracepoint__sched__sched_process_fork(struct bpf_raw_tracepoint_args *ctx)
719719
// as a parent of the child in the hierarchy), are needed by the userland process tree.
720720
// The userland process tree default source of events is the signal events, but there is
721721
// an option to use regular event for maintaining it as well (and it is needed for some
722-
// situatins). These arguments will always be removed by userland event processors.
722+
// situations). These arguments will always be removed by userland event processors.
723723
struct task_struct *leader = get_leader_task(child);
724724
struct task_struct *parent_process = get_leader_task(get_parent_task(leader));
725725

@@ -7035,26 +7035,31 @@ int sched_process_exec_signal(struct bpf_raw_tracepoint_args *ctx)
70357035
if (unlikely(signal == NULL))
70367036
return 0;
70377037

7038-
// Hashes
7039-
70407038
struct task_struct *task = (struct task_struct *) ctx->args[0];
70417039
if (task == NULL)
70427040
return -1;
70437041
struct task_struct *leader = get_leader_task(task);
70447042
struct task_struct *parent = get_leader_task(get_parent_task(leader));
70457043

7046-
// The hash is always calculated with "task_struct->pid + start_time".
7047-
u32 task_hash = hash_task_id(get_task_host_pid(task), get_task_start_time(task));
7048-
u32 parent_hash = hash_task_id(get_task_host_pid(parent), get_task_start_time(parent));
7049-
u32 leader_hash = hash_task_id(get_task_host_pid(leader), get_task_start_time(leader));
7050-
70517044
// The event timestamp, so process tree info can be changelog'ed.
70527045
u64 timestamp = get_current_time_in_ns();
70537046
save_to_submit_buf(&signal->args_buf, &timestamp, sizeof(u64), 0);
70547047

7055-
save_to_submit_buf(&signal->args_buf, (void *) &task_hash, sizeof(u32), 1);
7056-
save_to_submit_buf(&signal->args_buf, (void *) &parent_hash, sizeof(u32), 2);
7057-
save_to_submit_buf(&signal->args_buf, (void *) &leader_hash, sizeof(u32), 3);
7048+
// Fields required for hash computation (start times and pids)
7049+
7050+
u64 task_start_time = get_task_start_time(task);
7051+
u64 parent_start_time = get_task_start_time(parent);
7052+
u64 leader_start_time = get_task_start_time(leader);
7053+
save_to_submit_buf(&signal->args_buf, &task_start_time, sizeof(task_start_time), 1);
7054+
save_to_submit_buf(&signal->args_buf, &parent_start_time, sizeof(parent_start_time), 2);
7055+
save_to_submit_buf(&signal->args_buf, &leader_start_time, sizeof(leader_start_time), 3);
7056+
7057+
u32 task_pid = get_task_host_pid(task);
7058+
u32 parent_pid = get_task_host_pid(parent);
7059+
u32 leader_pid = get_task_host_pid(leader);
7060+
save_to_submit_buf(&signal->args_buf, &task_pid, sizeof(task_pid), 4);
7061+
save_to_submit_buf(&signal->args_buf, &parent_pid, sizeof(parent_pid), 5);
7062+
save_to_submit_buf(&signal->args_buf, &leader_pid, sizeof(leader_pid), 6);
70587063

70597064
// Exec logic
70607065

@@ -7081,18 +7086,18 @@ int sched_process_exec_signal(struct bpf_raw_tracepoint_args *ctx)
70817086
u64 ctime = get_ctime_nanosec_from_file(file);
70827087
umode_t inode_mode = get_inode_mode_from_file(file);
70837088

7084-
save_str_to_buf(&signal->args_buf, (void *) filename, 4); // executable name
7085-
save_str_to_buf(&signal->args_buf, file_path, 5); // executable path
7086-
save_to_submit_buf(&signal->args_buf, &s_dev, sizeof(dev_t), 6); // device number
7087-
save_to_submit_buf(&signal->args_buf, &inode_nr, sizeof(unsigned long), 7); // inode number
7088-
save_to_submit_buf(&signal->args_buf, &ctime, sizeof(u64), 8); // creation time
7089-
save_to_submit_buf(&signal->args_buf, &inode_mode, sizeof(umode_t), 9); // inode mode
7089+
save_str_to_buf(&signal->args_buf, (void *) filename, 7); // executable name
7090+
save_str_to_buf(&signal->args_buf, file_path, 8); // executable path
7091+
save_to_submit_buf(&signal->args_buf, &s_dev, sizeof(dev_t), 9); // device number
7092+
save_to_submit_buf(&signal->args_buf, &inode_nr, sizeof(unsigned long), 10); // inode number
7093+
save_to_submit_buf(&signal->args_buf, &ctime, sizeof(u64), 11); // creation time
7094+
save_to_submit_buf(&signal->args_buf, &inode_mode, sizeof(umode_t), 12); // inode mode
70907095

7091-
// The proc_info interpreter field is set by "load_elf_phdrs" kprobe program.
7092-
save_str_to_buf(&signal->args_buf, &proc_info->interpreter.pathname, 10); // interpreter path
7093-
save_to_submit_buf(&signal->args_buf, &proc_info->interpreter.id.device, sizeof(dev_t), 11); // interpreter device number
7094-
save_to_submit_buf(&signal->args_buf, &proc_info->interpreter.id.inode, sizeof(u64), 12); // interpreter inode number
7095-
save_to_submit_buf(&signal->args_buf, &proc_info->interpreter.id.ctime, sizeof(u64), 13); // interpreter creation time
7096+
// The proc_info interpreter field is set by "load_elf_phdrs" kprobe program
7097+
save_str_to_buf(&signal->args_buf, &proc_info->interpreter.pathname, 13); // interpreter path
7098+
save_to_submit_buf(&signal->args_buf, &proc_info->interpreter.id.device, sizeof(dev_t), 14); // interpreter device number
7099+
save_to_submit_buf(&signal->args_buf, &proc_info->interpreter.id.inode, sizeof(u64), 15); // interpreter inode number
7100+
save_to_submit_buf(&signal->args_buf, &proc_info->interpreter.id.ctime, sizeof(u64), 16); // interpreter creation time
70967101

70977102
struct mm_struct *mm = get_mm_from_task(task); // bprm->mm is null here, but task->mm is not
70987103

@@ -7108,12 +7113,12 @@ int sched_process_exec_signal(struct bpf_raw_tracepoint_args *ctx)
71087113

71097114
bool invoked_from_kernel = !!(get_task_parent_flags(task) & PF_KTHREAD);
71107115

7111-
save_args_str_arr_to_buf(&signal->args_buf, (void *) arg_start, (void *) arg_end, argc, 14); // argv
7112-
save_str_to_buf(&signal->args_buf, (void *) interp, 15); // interp
7113-
save_to_submit_buf(&signal->args_buf, &stdin_type, sizeof(unsigned short), 16); // stdin type
7114-
save_str_to_buf(&signal->args_buf, stdin_path, 17); // stdin path
7115-
save_to_submit_buf(&signal->args_buf, &invoked_from_kernel, sizeof(bool), 18); // invoked from kernel ?
7116-
7116+
save_args_str_arr_to_buf(&signal->args_buf, (void *) arg_start, (void *) arg_end, argc, 17); // argv
7117+
save_str_to_buf(&signal->args_buf, (void *) interp, 18); // interp
7118+
save_to_submit_buf(&signal->args_buf, &stdin_type, sizeof(unsigned short), 19); // stdin type
7119+
save_str_to_buf(&signal->args_buf, stdin_path, 20); // stdin path
7120+
save_to_submit_buf(&signal->args_buf, &invoked_from_kernel, sizeof(bool), 21); // invoked from kernel ?
7121+
71177122
signal_perf_submit(ctx, signal);
71187123

71197124
return 0;
@@ -7128,28 +7133,23 @@ int sched_process_exit_signal(struct bpf_raw_tracepoint_args *ctx)
71287133
if (unlikely(signal == NULL))
71297134
return 0;
71307135

7131-
// Hashes
7132-
71337136
struct task_struct *task = (struct task_struct *) bpf_get_current_task();
71347137
if (task == NULL)
71357138
return -1;
7136-
struct task_struct *leader = get_leader_task(task);
7137-
struct task_struct *parent = get_leader_task(get_parent_task(leader));
7138-
7139-
// The hash is always calculated with "task_struct->pid + start_time".
7140-
u32 task_hash = hash_task_id(get_task_host_pid(task), get_task_start_time(task));
7141-
u32 parent_hash = hash_task_id(get_task_host_pid(parent), get_task_start_time(parent));
7142-
u32 leader_hash = hash_task_id(get_task_host_pid(leader), get_task_start_time(leader));
71437139

71447140
// The event timestamp, so process tree info can be changelog'ed.
71457141
u64 timestamp = get_current_time_in_ns();
71467142
save_to_submit_buf(&signal->args_buf, &timestamp, sizeof(u64), 0);
71477143

7148-
save_to_submit_buf(&signal->args_buf, (void *) &task_hash, sizeof(u32), 1);
7149-
save_to_submit_buf(&signal->args_buf, (void *) &parent_hash, sizeof(u32), 2);
7150-
save_to_submit_buf(&signal->args_buf, (void *) &leader_hash, sizeof(u32), 3);
7144+
// Fields required for hash computation (start times and pids)
7145+
7146+
u64 task_start_time = get_task_start_time(task);
7147+
save_to_submit_buf(&signal->args_buf, &task_start_time, sizeof(task_start_time), 1);
7148+
7149+
pid_t task_pid = (pid_t) get_task_host_pid(task);
7150+
save_to_submit_buf(&signal->args_buf, &task_pid, sizeof(task_pid), 2);
71517151

7152-
// Exit logic.
7152+
// Exit logic
71537153

71547154
bool group_dead = false;
71557155
struct signal_struct *s = BPF_CORE_READ(task, signal);
@@ -7163,12 +7163,12 @@ int sched_process_exit_signal(struct bpf_raw_tracepoint_args *ctx)
71637163
int exit_code = get_task_exit_code(task);
71647164
int exit_code_real = exit_code >> 8;
71657165

7166-
save_to_submit_buf(&signal->args_buf, (void *) &exit_code_real, sizeof(int), 4);
7166+
save_to_submit_buf(&signal->args_buf, (void *) &exit_code_real, sizeof(int), 3);
71677167
if (task_flags & PF_SIGNALED) {
71687168
int signal_code = exit_code & 0xFF;
7169-
save_to_submit_buf(&signal->args_buf, (void *) &signal_code, sizeof(int), 5);
7169+
save_to_submit_buf(&signal->args_buf, (void *) &signal_code, sizeof(int), 4);
71707170
}
7171-
save_to_submit_buf(&signal->args_buf, (void *) &group_dead, sizeof(bool), 6);
7171+
save_to_submit_buf(&signal->args_buf, (void *) &group_dead, sizeof(bool), 5);
71727172

71737173
signal_perf_submit(ctx, signal);
71747174

pkg/ebpf/controlplane/controller.go

+41
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"github.com/aquasecurity/libbpfgo"
1010

1111
"github.com/aquasecurity/tracee/pkg/containers"
12+
"github.com/aquasecurity/tracee/pkg/errfmt"
1213
"github.com/aquasecurity/tracee/pkg/events"
1314
"github.com/aquasecurity/tracee/pkg/logger"
1415
"github.com/aquasecurity/tracee/pkg/proctree"
@@ -116,10 +117,50 @@ func (ctrl *Controller) processSignal(signal *signal) error {
116117
case events.SignalCgroupRmdir:
117118
return ctrl.processCgroupRmdir(signal.args)
118119
case events.SignalSchedProcessFork:
120+
err := events.NormalizeTimeArgs(
121+
signal.args,
122+
[]string{
123+
"timestamp",
124+
"parent_process_start_time",
125+
"leader_start_time",
126+
"start_time",
127+
},
128+
)
129+
if err != nil {
130+
signalName := events.Core.GetDefinitionByID(signal.id).GetName()
131+
return errfmt.Errorf("error normalizing time args for signal %s: %v", signalName, err)
132+
}
133+
119134
return ctrl.procTreeForkProcessor(signal.args)
120135
case events.SignalSchedProcessExec:
136+
err := events.NormalizeTimeArgs(
137+
signal.args,
138+
[]string{
139+
"timestamp",
140+
"task_start_time",
141+
"parent_start_time",
142+
"leader_start_time",
143+
},
144+
)
145+
if err != nil {
146+
signalName := events.Core.GetDefinitionByID(signal.id).GetName()
147+
return errfmt.Errorf("error normalizing time args for signal %s: %v", signalName, err)
148+
}
149+
121150
return ctrl.procTreeExecProcessor(signal.args)
122151
case events.SignalSchedProcessExit:
152+
err := events.NormalizeTimeArgs(
153+
signal.args,
154+
[]string{
155+
"timestamp",
156+
"task_start_time",
157+
},
158+
)
159+
if err != nil {
160+
signalName := events.Core.GetDefinitionByID(signal.id).GetName()
161+
return errfmt.Errorf("error normalizing time args for signal %s: %v", signalName, err)
162+
}
163+
123164
return ctrl.procTreeExitProcessor(signal.args)
124165
}
125166

pkg/ebpf/controlplane/processes.go

+40-30
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package controlplane
22

33
import (
44
"github.com/aquasecurity/tracee/pkg/events/parse"
5-
"github.com/aquasecurity/tracee/pkg/time"
65
"github.com/aquasecurity/tracee/pkg/utils"
76
"github.com/aquasecurity/tracee/types/trace"
87
)
@@ -95,19 +94,11 @@ func (ctrl *Controller) procTreeForkProcessor(args []trace.Argument) error {
9594
return err
9695
}
9796

98-
// Compute hashes using raw kernel start times without normalization.
99-
// The sched_process_fork signal is the only one that doesn't compute the hash in the kernel,
100-
// so it must be handled here.
97+
// Hashes
10198
forkFeed.ParentHash = utils.HashTaskID(uint32(forkFeed.ParentTid), forkFeed.ParentStartTime)
10299
forkFeed.LeaderHash = utils.HashTaskID(uint32(forkFeed.LeaderTid), forkFeed.LeaderStartTime)
103100
forkFeed.ChildHash = utils.HashTaskID(uint32(forkFeed.ChildTid), forkFeed.ChildStartTime)
104101

105-
// Normalize times
106-
forkFeed.TimeStamp = time.BootToEpochNS(forkFeed.TimeStamp)
107-
forkFeed.ParentStartTime = time.BootToEpochNS(forkFeed.ParentStartTime)
108-
forkFeed.LeaderStartTime = time.BootToEpochNS(forkFeed.LeaderStartTime)
109-
forkFeed.ChildStartTime = time.BootToEpochNS(forkFeed.ChildStartTime)
110-
111102
return ctrl.processTree.FeedFromFork(forkFeed)
112103
}
113104

@@ -123,24 +114,40 @@ func (ctrl *Controller) procTreeExecProcessor(args []trace.Argument) error {
123114
defer ctrl.processTree.PutExecFeedInPool(execFeed)
124115

125116
// not available from this signal
126-
execFeed.StartTime = 0
127117
execFeed.Pid = -1
128118
execFeed.Tid = -1
129119
execFeed.PPid = -1
130-
execFeed.HostPid = -1
131-
execFeed.HostTid = -1
132-
execFeed.HostPPid = -1
133120

134-
// Process & Event identification arguments (won't exist for regular events)
121+
// Process & Event identification arguments
135122
execFeed.TimeStamp, err = parse.ArgVal[uint64](args, "timestamp")
136123
if err != nil {
137124
return err
138125
}
139-
execFeed.TimeStamp = time.BootToEpochNS(execFeed.TimeStamp) // normalize time
126+
execFeed.StartTime, err = parse.ArgVal[uint64](args, "task_start_time")
127+
if err != nil {
128+
return err
129+
}
130+
parentStartTime, err := parse.ArgVal[uint64](args, "parent_start_time")
131+
if err != nil {
132+
return err
133+
}
134+
leaderStartTime, err := parse.ArgVal[uint64](args, "leader_start_time")
135+
if err != nil {
136+
return err
137+
}
140138

141-
execFeed.TaskHash, _ = parse.ArgVal[uint32](args, "task_hash")
142-
execFeed.ParentHash, _ = parse.ArgVal[uint32](args, "parent_hash")
143-
execFeed.LeaderHash, _ = parse.ArgVal[uint32](args, "leader_hash")
139+
execFeed.HostTid, err = parse.ArgVal[int32](args, "task_pid")
140+
if err != nil {
141+
return err
142+
}
143+
execFeed.HostPPid, err = parse.ArgVal[int32](args, "parent_pid")
144+
if err != nil {
145+
return err
146+
}
147+
execFeed.HostPid, err = parse.ArgVal[int32](args, "leader_pid")
148+
if err != nil {
149+
return err
150+
}
144151

145152
// Executable
146153
execFeed.CmdPath, err = parse.ArgVal[string](args, "cmdpath")
@@ -194,6 +201,11 @@ func (ctrl *Controller) procTreeExecProcessor(args []trace.Argument) error {
194201
return err
195202
}
196203

204+
// Hashes
205+
execFeed.TaskHash = utils.HashTaskID(uint32(execFeed.HostTid), execFeed.StartTime)
206+
execFeed.ParentHash = utils.HashTaskID(uint32(execFeed.HostPPid), parentStartTime)
207+
execFeed.LeaderHash = utils.HashTaskID(uint32(execFeed.HostPid), leaderStartTime)
208+
197209
return ctrl.processTree.FeedFromExec(execFeed)
198210
}
199211

@@ -212,25 +224,20 @@ func (ctrl *Controller) procTreeExitProcessor(args []trace.Argument) error {
212224
exitFeed := ctrl.processTree.GetExitFeedFromPool()
213225
defer ctrl.processTree.PutExitFeedInPool(exitFeed)
214226

215-
// Process & Event identification arguments (won't exist for regular events)
227+
// Process & Event identification arguments
216228
exitFeed.TimeStamp, err = parse.ArgVal[uint64](args, "timestamp")
217229
if err != nil {
218230
return err
219231
}
220-
exitFeed.TimeStamp = time.BootToEpochNS(exitFeed.TimeStamp) // normalize time
232+
startTime, err := parse.ArgVal[uint64](args, "task_start_time")
233+
if err != nil {
234+
return err
235+
}
221236

222-
exitFeed.TaskHash, err = parse.ArgVal[uint32](args, "task_hash")
237+
taskPid, err := parse.ArgVal[int32](args, "task_pid")
223238
if err != nil {
224239
return err
225240
}
226-
// exitFeed.ParentHash, err = parse.ArgVal[uint32](args, "parent_hash")
227-
// if err != nil {
228-
// return err
229-
// }
230-
// exitFeed.LeaderHash, err = parse.ArgVal[uint32](args, "leader_hash")
231-
// if err != nil {
232-
// return err
233-
// }
234241

235242
// // Exit logic arguments
236243
// exitFeed.ExitCode, err = parse.ArgVal[int32](args, "exit_code")
@@ -246,5 +253,8 @@ func (ctrl *Controller) procTreeExitProcessor(args []trace.Argument) error {
246253
// return err
247254
// }
248255

256+
// Hash
257+
exitFeed.TaskHash = utils.HashTaskID(uint32(taskPid), startTime)
258+
249259
return ctrl.processTree.FeedFromExit(exitFeed)
250260
}

pkg/ebpf/events_pipeline.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -273,10 +273,10 @@ func (t *Tracee) decodeEvents(ctx context.Context, sourceChan chan []byte) (<-ch
273273
evt.ContextFlags = flags
274274
evt.Syscall = syscall
275275
evt.Metadata = nil
276-
// compute hashes using kernel start times as-is, to be consistent with signals
277-
evt.ThreadEntityId = utils.HashTaskID(eCtx.HostTid, eCtx.StartTime)
278-
evt.ProcessEntityId = utils.HashTaskID(eCtx.HostPid, eCtx.LeaderStartTime)
279-
evt.ParentEntityId = utils.HashTaskID(eCtx.HostPpid, eCtx.ParentStartTime)
276+
// compute hashes using normalized times
277+
evt.ThreadEntityId = utils.HashTaskID(eCtx.HostTid, uint64(evt.ThreadStartTime))
278+
evt.ProcessEntityId = utils.HashTaskID(eCtx.HostPid, traceetime.BootToEpochNS(eCtx.LeaderStartTime))
279+
evt.ParentEntityId = utils.HashTaskID(eCtx.HostPpid, traceetime.BootToEpochNS(eCtx.ParentStartTime))
280280

281281
// If there aren't any policies that need filtering in userland, tracee **may** skip
282282
// this event, as long as there aren't any derivatives or signatures that depend on it.

pkg/ebpf/net_capture.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ func (t *Tracee) processNetCapEvent(event *trace.Event) {
9797

9898
// sanity checks
9999

100-
payloadArg := events.GetArg(event, "payload")
100+
payloadArg := events.GetArg(event.Args, "payload")
101101
if payloadArg == nil {
102102
logger.Debugw("Network capture: no payload packet")
103103
return

0 commit comments

Comments
 (0)