Skip to content

Commit

Permalink
bpf: permit multiple bpf attachments for a single perf event
Browse files Browse the repository at this point in the history
This patch enables multiple bpf attachments for a
kprobe/uprobe/tracepoint single trace event.
Each trace_event keeps a list of attached perf events.
When an event happens, all attached bpf programs will
be executed based on the order of attachment.

A global bpf_event_mutex lock is introduced to protect
prog_array attaching and detaching. An alternative will
be introduce a mutex lock in every trace_event_call
structure, but it takes a lot of extra memory.
So a global bpf_event_mutex lock is a good compromise.

The bpf prog detachment involves allocation of memory.
If the allocation fails, a dummy do-nothing program
will replace to-be-detached program in-place.

Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
yonghong-song authored and davem330 committed Oct 25, 2017
1 parent 0b4c684 commit e87c6bc
Show file tree
Hide file tree
Showing 9 changed files with 255 additions and 56 deletions.
30 changes: 25 additions & 5 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,18 +273,38 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
__u32 __user *prog_ids, u32 cnt);

#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
struct bpf_prog *old_prog);
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array);

#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
({ \
struct bpf_prog **_prog; \
struct bpf_prog **_prog, *__prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
rcu_read_lock(); \
_prog = rcu_dereference(array)->progs; \
for (; *_prog; _prog++) \
_ret &= func(*_prog, ctx); \
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
goto _out; \
_prog = _array->progs; \
while ((__prog = READ_ONCE(*_prog))) { \
_ret &= func(__prog, ctx); \
_prog++; \
} \
_out: \
rcu_read_unlock(); \
_ret; \
})

#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
__BPF_PROG_RUN_ARRAY(array, ctx, func, false)

#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \
__BPF_PROG_RUN_ARRAY(array, ctx, func, true)

#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);

Expand Down
43 changes: 39 additions & 4 deletions include/linux/trace_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,14 +271,37 @@ struct trace_event_call {
#ifdef CONFIG_PERF_EVENTS
int perf_refcount;
struct hlist_head __percpu *perf_events;
struct bpf_prog *prog;
struct perf_event *bpf_prog_owner;
struct bpf_prog_array __rcu *prog_array;

int (*perf_perm)(struct trace_event_call *,
struct perf_event *);
#endif
};

#ifdef CONFIG_PERF_EVENTS
static inline bool bpf_prog_array_valid(struct trace_event_call *call)
{
/*
* This inline function checks whether call->prog_array
* is valid or not. The function is called in various places,
* outside rcu_read_lock/unlock, as a heuristic to speed up execution.
*
* If this function returns true, and later call->prog_array
* becomes false inside rcu_read_lock/unlock region,
* we bail out then. If this function return false,
* there is a risk that we might miss a few events if the checking
* were delayed until inside rcu_read_lock/unlock region and
* call->prog_array happened to become non-NULL then.
*
* Here, READ_ONCE() is used instead of rcu_access_pointer().
* rcu_access_pointer() requires the actual definition of
* "struct bpf_prog_array" while READ_ONCE() only needs
* a declaration of the same type.
*/
return !!READ_ONCE(call->prog_array);
}
#endif

static inline const char *
trace_event_name(struct trace_event_call *call)
{
Expand Down Expand Up @@ -435,12 +458,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
}

#ifdef CONFIG_BPF_EVENTS
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
void perf_event_detach_bpf_prog(struct perf_event *event);
#else
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
return 1;
}

static inline int
perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
{
return -EOPNOTSUPP;
}

static inline void perf_event_detach_bpf_prog(struct perf_event *event) { }

#endif

enum {
Expand Down Expand Up @@ -511,6 +545,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
{
perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
}

#endif

#endif /* _LINUX_TRACE_EVENT_H */
6 changes: 3 additions & 3 deletions include/trace/perf.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ perf_trace_##call(void *__data, proto) \
struct trace_event_call *event_call = __data; \
struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
struct trace_event_raw_##call *entry; \
struct bpf_prog *prog = event_call->prog; \
struct pt_regs *__regs; \
u64 __count = 1; \
struct task_struct *__task = NULL; \
Expand All @@ -46,8 +45,9 @@ perf_trace_##call(void *__data, proto) \
__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
\
head = this_cpu_ptr(event_call->perf_events); \
if (!prog && __builtin_constant_p(!__task) && !__task && \
hlist_empty(head)) \
if (!bpf_prog_array_valid(event_call) && \
__builtin_constant_p(!__task) && !__task && \
hlist_empty(head)) \
return; \
\
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
Expand Down
81 changes: 81 additions & 0 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1394,6 +1394,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
}
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);

static unsigned int __bpf_prog_ret1(const void *ctx,
const struct bpf_insn *insn)
{
return 1;
}

static struct bpf_prog_dummy {
struct bpf_prog prog;
} dummy_bpf_prog = {
.prog = {
.bpf_func = __bpf_prog_ret1,
},
};

/* to avoid allocating empty bpf_prog_array for cgroups that
* don't have bpf program attached use one global 'empty_prog_array'
* It will not be modified the caller of bpf_prog_array_alloc()
Expand Down Expand Up @@ -1463,6 +1477,73 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
return 0;
}

void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
struct bpf_prog *old_prog)
{
struct bpf_prog **prog = progs->progs;

for (; *prog; prog++)
if (*prog == old_prog) {
WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
break;
}
}

int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array)
{
int new_prog_cnt, carry_prog_cnt = 0;
struct bpf_prog **existing_prog;
struct bpf_prog_array *array;
int new_prog_idx = 0;

/* Figure out how many existing progs we need to carry over to
* the new array.
*/
if (old_array) {
existing_prog = old_array->progs;
for (; *existing_prog; existing_prog++) {
if (*existing_prog != exclude_prog &&
*existing_prog != &dummy_bpf_prog.prog)
carry_prog_cnt++;
if (*existing_prog == include_prog)
return -EEXIST;
}
}

/* How many progs (not NULL) will be in the new array? */
new_prog_cnt = carry_prog_cnt;
if (include_prog)
new_prog_cnt += 1;

/* Do we have any prog (not NULL) in the new array? */
if (!new_prog_cnt) {
*new_array = NULL;
return 0;
}

/* +1 as the end of prog_array is marked with NULL */
array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
if (!array)
return -ENOMEM;

/* Fill in the new prog array */
if (carry_prog_cnt) {
existing_prog = old_array->progs;
for (; *existing_prog; existing_prog++)
if (*existing_prog != exclude_prog &&
*existing_prog != &dummy_bpf_prog.prog)
array->progs[new_prog_idx++] = *existing_prog;
}
if (include_prog)
array->progs[new_prog_idx++] = include_prog;
array->progs[new_prog_idx] = NULL;
*new_array = array;
return 0;
}

static void bpf_prog_free_deferred(struct work_struct *work)
{
struct bpf_prog_aux *aux;
Expand Down
26 changes: 8 additions & 18 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -7954,11 +7954,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
struct pt_regs *regs, struct hlist_head *head,
struct task_struct *task)
{
struct bpf_prog *prog = call->prog;

if (prog) {
if (bpf_prog_array_valid(call)) {
*(struct pt_regs **)raw_data = regs;
if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
perf_swevent_put_recursion_context(rctx);
return;
}
Expand Down Expand Up @@ -8147,13 +8145,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
bool is_kprobe, is_tracepoint, is_syscall_tp;
struct bpf_prog *prog;
int ret;

if (event->attr.type != PERF_TYPE_TRACEPOINT)
return perf_event_set_bpf_handler(event, prog_fd);

if (event->tp_event->prog)
return -EEXIST;

is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
is_syscall_tp = is_syscall_trace_event(event->tp_event);
Expand Down Expand Up @@ -8181,26 +8177,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
return -EACCES;
}
}
event->tp_event->prog = prog;
event->tp_event->bpf_prog_owner = event;

return 0;
ret = perf_event_attach_bpf_prog(event, prog);
if (ret)
bpf_prog_put(prog);
return ret;
}

static void perf_event_free_bpf_prog(struct perf_event *event)
{
struct bpf_prog *prog;

if (event->attr.type != PERF_TYPE_TRACEPOINT) {
perf_event_free_bpf_handler(event);
return;
}

prog = event->tp_event->prog;
if (prog && event->tp_event->bpf_prog_owner == event) {
event->tp_event->prog = NULL;
bpf_prog_put(prog);
}
perf_event_detach_bpf_prog(event);
}

#else
Expand Down
Loading

0 comments on commit e87c6bc

Please sign in to comment.