Skip to content

Commit

Permalink
ebpf: aggregate python stacks in ebpf program instead of userspace (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
korniltsev authored Feb 19, 2024
1 parent 6d987ed commit 764a83f
Show file tree
Hide file tree
Showing 20 changed files with 444 additions and 470 deletions.
44 changes: 44 additions & 0 deletions ebpf/bpf/hash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@


// murmurhash2 from
// https://github.com/aappleby/smhasher/blob/92cf3702fcfaadc84eb7bef59825a23e0cd84f56/src/MurmurHash2.cpp/* */
// https://github.com/parca-dev/parca-agent/blob/main/bpf/unwinders/hash.h

// Hash limit in bytes, set to size of python stack
#define HASH_LIMIT 32 * 3 * 4
// len should be multiple of 4
static __always_inline uint64_t MurmurHash64A ( const void * key, uint64_t len, uint64_t seed )
{
const uint64_t m = 0xc6a4a7935bd1e995ULL;
const int r = 47;

uint64_t h = seed ^ (len * m);

const uint64_t * data = key;
int i = 0;
for (; i < len/8 && i < HASH_LIMIT/8; i++)
{
uint64_t k = data[i];

k *= m;
k ^= k >> r;
k *= m;

h ^= k;
h *= m;
}


const unsigned char * data2 = (const unsigned char*)&data[i];
if(len & 7)
{
h ^= (uint64_t)(((uint32_t*)data2)[0]);
h *= m;
};

h ^= h >> r;
h *= m;
h ^= h >> r;

return h;
}
12 changes: 0 additions & 12 deletions ebpf/bpf/profile.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@



struct sample_key {
__u32 pid;
__u32 flags;
__s64 kern_stack;
__s64 user_stack;
};

#define PROFILING_TYPE_UNKNOWN 1
#define PROFILING_TYPE_FRAMEPOINTERS 2
Expand Down Expand Up @@ -65,11 +59,5 @@ struct {
#include "stacks.h"


struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct sample_key);
__type(value, u32);
__uint(max_entries, PROFILE_MAPS_SIZE);
} counts SEC(".maps");

#endif // PROFILE_BPF_H
115 changes: 65 additions & 50 deletions ebpf/bpf/pyperf.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,15 @@
#include "stacks.h"
#include "pystr.h"
#include "pyoffsets.h"
#include "hash.h"

#define PYTHON_STACK_FRAMES_PER_PROG 25
#define PYTHON_STACK_FRAMES_PER_PROG 32
#define PYTHON_STACK_PROG_CNT 3
#define PYTHON_STACK_MAX_LEN (PYTHON_STACK_FRAMES_PER_PROG * PYTHON_STACK_PROG_CNT)
#define PYTHON_CLASS_NAME_LEN 32
#define PYTHON_FUNCTION_NAME_LEN 64
#define PYTHON_FILE_NAME_LEN 128

enum {
STACK_STATUS_COMPLETE = 0,
STACK_STATUS_ERROR = 1,
STACK_STATUS_TRUNCATED = 2,
};

enum {
PY_ERROR_GENERIC = 1,
PY_ERROR_THREAD_STATE = 2,
Expand All @@ -42,6 +37,14 @@ enum {

};

struct global_config_t {
uint8_t bpf_log_err;
uint8_t bpf_log_debug;
};

const volatile struct global_config_t global_config;
#define log_error(fmt, ...) if (global_config.bpf_log_err) bpf_printk(fmt, ##__VA_ARGS__)
#define log_debug(fmt, ...) if (global_config.bpf_log_debug) bpf_printk(fmt, ##__VA_ARGS__)

typedef struct {
uint32_t major;
Expand All @@ -54,6 +57,7 @@ typedef struct {
py_version version;
struct libc libc;
int32_t tssKey;
uint8_t collect_kernel;
} py_pid_data;

typedef struct {
Expand All @@ -72,17 +76,14 @@ typedef struct {
} py_symbol;


typedef uint32_t py_symbol_id;

typedef struct {
uint8_t stack_status;
uint8_t err;
uint8_t reserved2;
uint8_t reserved3;
uint32_t pid;
int64_t kern_stack;
struct sample_key k;
uint32_t stack_len;
// instead of storing symbol name here directly, we add it to another
// hashmap with Symbols and only store the ids here
uint32_t stack_len;
uint32_t stack[PYTHON_STACK_MAX_LEN];
py_symbol_id stack[PYTHON_STACK_MAX_LEN];
} py_event;

#define _STR_CONCAT(str1, str2) str1##str2
Expand All @@ -93,24 +94,33 @@ typedef struct {
} STR_CONCAT(compile_time_condition_check, __COUNTER__);
// See comments in get_frame_data
FAIL_COMPILATION_IF(sizeof(py_symbol) == sizeof(struct bpf_perf_event_value))
FAIL_COMPILATION_IF(HASH_LIMIT != PYTHON_STACK_MAX_LEN * sizeof(py_symbol_id))

typedef struct {
int64_t symbol_counter;
py_offset_config offsets;
uint32_t cur_cpu;
uint64_t frame_ptr;
int64_t python_stack_prog_call_cnt;
py_symbol sym;
py_event event;
uint64_t padding;// satisfy verifier for hash function
} py_sample_state_t;

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(u32));
__uint(value_size, PYTHON_STACK_MAX_LEN * sizeof(py_symbol_id));
__uint(max_entries, PROFILE_MAPS_SIZE);
} python_stacks SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
__type(value, py_sample_state_t);
__uint(max_entries, 1);
} py_state_heap SEC(".maps");

typedef uint32_t py_symbol_id;

struct {
__uint(type, BPF_MAP_TYPE_HASH);
Expand Down Expand Up @@ -144,32 +154,38 @@ struct {
};


struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(u32));
} py_events SEC(".maps");

static __always_inline int get_thread_state(
py_pid_data *pid_data,
void **out_thread_state) {
return pyro_pthread_getspecific(&pid_data->libc, pid_data->tssKey, out_thread_state);
}

static __always_inline int submit_sample(
void *ctx,
py_sample_state_t *state) {
bpf_perf_event_output(ctx, &py_events, BPF_F_CURRENT_CPU, &state->event, sizeof(py_event));
py_sample_state_t* state) {
uint32_t one = 1;
if (state->event.stack_len < PYTHON_STACK_MAX_LEN) {
state->event.stack[state->event.stack_len] = 0;
}
u64 h = MurmurHash64A(&state->event.stack, state->event.stack_len * sizeof(state->event.stack[0]), 0);
state->event.k.user_stack = h;
if (bpf_map_update_elem(&python_stacks, &h, &state->event.stack, BPF_ANY)) {
return -1;
}
uint32_t* val = bpf_map_lookup_elem(&counts, &state->event.k);
if (val) {
(*val)++;
}
else {
bpf_map_update_elem(&counts, &state->event.k, &one, BPF_NOEXIST);
}


return 0;
}

static __always_inline int submit_error_sample(
void *ctx,
py_sample_state_t *state, uint8_t err) {
state->event.stack_status = STACK_STATUS_ERROR;
state->event.err = err;
bpf_perf_event_output(ctx, &py_events, BPF_F_CURRENT_CPU, &state->event,
offsetof(py_event, kern_stack) + sizeof(state->event.kern_stack));
uint8_t err) { //todo replace with more useful log
log_error("pyperf_err: %d\n", err);
return -1;
}

Expand Down Expand Up @@ -218,7 +234,7 @@ static __always_inline int get_top_frame(py_pid_data *pid_data, py_sample_state_
return 0;
}

static __always_inline int pyperf_collect_impl(struct bpf_perf_event_data *ctx, pid_t pid, bool collect_kern_stack) {
static __always_inline int pyperf_collect_impl(struct bpf_perf_event_data* ctx, pid_t pid) {
py_pid_data *pid_data = bpf_map_lookup_elem(&py_pid_config, &pid);
if (!pid_data) {
return 0;
Expand All @@ -231,33 +247,32 @@ static __always_inline int pyperf_collect_impl(struct bpf_perf_event_data *ctx,
state->python_stack_prog_call_cnt = 0;

py_event *event = &state->event;
event->pid = pid;
if (collect_kern_stack) {
event->kern_stack = bpf_get_stackid(ctx, &stacks, KERN_STACKID_FLAGS);
event->k.pid = pid;
if (pid_data->collect_kernel) {
event->k.kern_stack = bpf_get_stackid(ctx, &stacks, KERN_STACKID_FLAGS);
} else {
event->kern_stack = -1;
event->k.kern_stack = -1;
}


// Read PyThreadState of this Thread from TLS
void *thread_state;
if (get_thread_state(pid_data, &thread_state)) {
return submit_error_sample(ctx, state, PY_ERROR_THREAD_STATE);
return submit_error_sample(PY_ERROR_THREAD_STATE);
}

// pre-initialize event struct in case any subprogram below fails
event->stack_status = STACK_STATUS_COMPLETE;
event->stack_len = 0;

if (thread_state != 0) {
if (get_top_frame(pid_data, state, thread_state)) {
return submit_error_sample(ctx, state, PY_ERROR_TOP_FRAME);
return submit_error_sample(PY_ERROR_TOP_FRAME);
}
// jump to reading first set of Python frames
bpf_tail_call(ctx, &py_progs, PYTHON_PROG_IDX_READ_PYTHON_STACK);
// we won't ever get here
}
return submit_error_sample(ctx, state, PY_ERROR_THREAD_STATE_NULL);
return submit_error_sample(PY_ERROR_THREAD_STATE_NULL);
}

SEC("perf_event")
Expand All @@ -267,7 +282,7 @@ int pyperf_collect(struct bpf_perf_event_data *ctx) {
if (pid == 0) {
return 0;
}
return pyperf_collect_impl(ctx, (pid_t) pid, false); // todo allow configuring it
return pyperf_collect_impl(ctx, (pid_t) pid);
}


Expand Down Expand Up @@ -510,21 +525,21 @@ int read_python_stack(struct bpf_perf_event_data *ctx) {
state->python_stack_prog_call_cnt++;
py_event *sample = &state->event;

py_symbol sym = {};
int last_res;
py_symbol *sym = &state->sym;
#pragma unroll
for (int i = 0; i < PYTHON_STACK_FRAMES_PER_PROG; i++) {
last_res = get_frame_data((void **) &state->frame_ptr, &state->offsets, &sym, ctx);
last_res = get_frame_data((void **) &state->frame_ptr, &state->offsets, sym, ctx);
if (last_res < 0) {
return submit_error_sample(ctx, state, (uint8_t) (-last_res));
return submit_error_sample((uint8_t) (-last_res));
}
if (last_res == 0) {
break;
}
if (last_res == 1) {
py_symbol_id symbol_id;
if (get_symbol_id(state, &sym, &symbol_id)) {
return submit_error_sample(ctx, state, PY_ERROR_SYMBOL);
if (get_symbol_id(state, sym, &symbol_id)) {
return submit_error_sample(PY_ERROR_SYMBOL);
}
uint32_t cur_len = sample->stack_len;
if (cur_len < PYTHON_STACK_MAX_LEN) {
Expand All @@ -535,19 +550,19 @@ int read_python_stack(struct bpf_perf_event_data *ctx) {
}

if (last_res == 0) {
sample->stack_status = STACK_STATUS_COMPLETE;
sample->k.flags = SAMPLE_KEY_FLAG_PYTHON_STACK;
} else {
sample->stack_status = STACK_STATUS_TRUNCATED;
sample->k.flags = (SAMPLE_KEY_FLAG_PYTHON_STACK|SAMPLE_KEY_FLAG_STACK_TRUNCATED);
}

if (sample->stack_status == STACK_STATUS_TRUNCATED &&
if (sample->k.flags == (SAMPLE_KEY_FLAG_PYTHON_STACK|SAMPLE_KEY_FLAG_STACK_TRUNCATED) &&
state->python_stack_prog_call_cnt < PYTHON_STACK_PROG_CNT) {
// read next batch of frames
bpf_tail_call(ctx, &py_progs, PYTHON_PROG_IDX_READ_PYTHON_STACK);
return -1;
}

return submit_sample(ctx, state);
return submit_sample(state);
}

#endif // PYPERF_H
Expand Down
17 changes: 17 additions & 0 deletions ebpf/bpf/stacks.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK)

#define SAMPLE_KEY_FLAG_PYTHON_STACK 1
#define SAMPLE_KEY_FLAG_STACK_TRUNCATED 2

struct sample_key {
__u32 pid;
__u32 flags;
__s64 kern_stack;
__s64 user_stack;
};

struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
Expand All @@ -16,4 +25,12 @@ struct {
} stacks SEC(".maps");


struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct sample_key);
__type(value, u32);
__uint(max_entries, PROFILE_MAPS_SIZE);
} counts SEC(".maps");


#endif
Loading

0 comments on commit 764a83f

Please sign in to comment.