Skip to content

Commit d445fbf

Browse files
laoarKernel Patches Daemon
authored andcommitted
bpf: Add bpf_for_each_cpu helper
Some statistical data is stored in percpu pointers, but the kernel does not consolidate them into a single value, such as the data stored within struct psi_group_cpu. To facilitate obtaining the sum of this data, a new bpf helper called bpf_for_each_cpu is introduced. This new helper implements for_each_{possible, present, online}_cpu, allowing the user to traverse CPUs conveniently. For instance, it enables walking through the CPUs of a cpuset cgroup when the task is within that cgroup. Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
1 parent d7e241d commit d445fbf

File tree

6 files changed

+167
-1
lines changed

6 files changed

+167
-1
lines changed

include/linux/bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2951,6 +2951,7 @@ extern const struct bpf_func_proto bpf_get_retval_proto;
29512951
extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto;
29522952
extern const struct bpf_func_proto bpf_cgrp_storage_get_proto;
29532953
extern const struct bpf_func_proto bpf_cgrp_storage_delete_proto;
2954+
extern const struct bpf_func_proto bpf_for_each_cpu_proto;
29542955

29552956
const struct bpf_func_proto *tracing_prog_func_proto(
29562957
enum bpf_func_id func_id, const struct bpf_prog *prog);

include/uapi/linux/bpf.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,6 +1669,14 @@ union bpf_attr {
16691669

16701670
} __attribute__((aligned(8)));
16711671

1672+
enum bpf_cpu_mask_type {
1673+
CPU_MASK_UNSPEC = 0,
1674+
CPU_MASK_POSSIBLE = 1,
1675+
CPU_MASK_ONLINE = 2,
1676+
CPU_MASK_PRESENT = 3,
1677+
CPU_MASK_TASK = 4, /* cpu mask of a task */
1678+
};
1679+
16721680
/* The description below is an attempt at providing documentation to eBPF
16731681
* developers about the multiple available eBPF helper functions. It can be
16741682
* parsed and used to produce a manual page. The workflow is the following,
@@ -5615,6 +5623,29 @@ union bpf_attr {
56155623
* 0 on success.
56165624
*
56175625
* **-ENOENT** if the bpf_local_storage cannot be found.
5626+
*
5627+
* long bpf_for_each_cpu(void *callback_fn, void *callback_ctx, const void *pcpu_ptr, u32 type, u32 target)
5628+
* Description
5629+
* Walk the percpu pointer **pcpu_ptr** with the callback **callback_fn** function.
5630+
* The **callback_fn** should be a static function and the **callback_ctx** should
5631+
* be a pointer to the stack.
5632+
* The **callback_ctx** is the context parameter.
5633+
* The **type** and **tartet** specify which CPUs to walk. If **target** is specified,
5634+
* it will get the cpumask from the associated target.
5635+
*
5636+
* long (\*callback_fn)(u32 cpu, void \*ctx, const void \*ptr);
5637+
*
5638+
* where **cpu** is the current cpu in the walk, the **ctx** is the **callback_ctx**,
5639+
* and the **ptr** is the address of **pcpu_ptr** on current cpu.
5640+
*
5641+
* If **callback_fn** returns 0, the helper will continue to the next
5642+
* loop. If return value is 1, the helper will skip the rest of
5643+
* the loops and return. Other return values are not used now,
5644+
* and will be rejected by the verifier.
5645+
*
5646+
* Return
5647+
* The number of CPUs walked, **-EINVAL** for invalid **type**, **target** or
5648+
* **pcpu_ptr**.
56185649
*/
56195650
#define ___BPF_FUNC_MAPPER(FN, ctx...) \
56205651
FN(unspec, 0, ##ctx) \
@@ -5829,6 +5860,7 @@ union bpf_attr {
58295860
FN(user_ringbuf_drain, 209, ##ctx) \
58305861
FN(cgrp_storage_get, 210, ##ctx) \
58315862
FN(cgrp_storage_delete, 211, ##ctx) \
5863+
FN(for_each_cpu, 212, ##ctx) \
58325864
/* */
58335865

58345866
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't

kernel/bpf/bpf_iter.c

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include <linux/filter.h>
77
#include <linux/bpf.h>
88
#include <linux/rcupdate_trace.h>
9+
#include <linux/btf.h>
10+
#include <linux/cpumask.h>
911

1012
struct bpf_iter_target_info {
1113
struct list_head list;
@@ -777,6 +779,76 @@ const struct bpf_func_proto bpf_loop_proto = {
777779
.arg4_type = ARG_ANYTHING,
778780
};
779781

782+
BPF_CALL_5(bpf_for_each_cpu, void *, callback_fn, void *, callback_ctx,
783+
const void *, pcpu_ptr, u32, type, u32, target)
784+
{
785+
bpf_callback_t callback = (bpf_callback_t)callback_fn;
786+
struct task_struct *task = NULL;
787+
const cpumask_t *mask;
788+
const void *ptr;
789+
u64 ret;
790+
u32 cpu;
791+
792+
if (!pcpu_ptr)
793+
return -EINVAL;
794+
795+
if ((type != CPU_MASK_TASK && target) || (type == CPU_MASK_TASK && !target))
796+
return -EINVAL;
797+
798+
switch (type) {
799+
case CPU_MASK_POSSIBLE:
800+
mask = cpu_possible_mask;
801+
break;
802+
case CPU_MASK_ONLINE:
803+
mask = cpu_online_mask;
804+
break;
805+
case CPU_MASK_PRESENT:
806+
mask = cpu_present_mask;
807+
break;
808+
case CPU_MASK_TASK:
809+
rcu_read_lock();
810+
task = get_pid_task(find_vpid(target), PIDTYPE_PID);
811+
rcu_read_unlock();
812+
if (!task)
813+
return -EINVAL;
814+
mask = task->cpus_ptr;
815+
break;
816+
default:
817+
return -EINVAL;
818+
}
819+
820+
for_each_cpu(cpu, mask) {
821+
ptr = per_cpu_ptr((const void __percpu *)pcpu_ptr, cpu);
822+
if (!ptr) {
823+
if (task)
824+
put_task_struct(task);
825+
return cpu + 1;
826+
}
827+
828+
ret = callback((u64)cpu, (u64)(long)callback_ctx, (u64)(long)ptr, 0, 0);
829+
if (ret) {
830+
if (task)
831+
put_task_struct(task);
832+
return cpu + 1;
833+
}
834+
}
835+
836+
if (task)
837+
put_task_struct(task);
838+
return cpu;
839+
}
840+
841+
const struct bpf_func_proto bpf_for_each_cpu_proto = {
842+
.func = bpf_for_each_cpu,
843+
.gpl_only = false,
844+
.ret_type = RET_INTEGER,
845+
.arg1_type = ARG_PTR_TO_FUNC,
846+
.arg2_type = ARG_PTR_TO_STACK_OR_NULL,
847+
.arg3_type = ARG_PTR_TO_PERCPU_BTF_ID,
848+
.arg4_type = ARG_ANYTHING,
849+
.arg5_type = ARG_ANYTHING,
850+
};
851+
780852
struct bpf_iter_num_kern {
781853
int cur; /* current value, inclusive */
782854
int end; /* final value, exclusive */

kernel/bpf/helpers.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1768,6 +1768,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
17681768
case BPF_FUNC_get_current_ancestor_cgroup_id:
17691769
return &bpf_get_current_ancestor_cgroup_id_proto;
17701770
#endif
1771+
case BPF_FUNC_for_each_cpu:
1772+
return &bpf_for_each_cpu_proto;
17711773
default:
17721774
break;
17731775
}

kernel/bpf/verifier.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,8 @@ static bool is_callback_calling_function(enum bpf_func_id func_id)
549549
func_id == BPF_FUNC_timer_set_callback ||
550550
func_id == BPF_FUNC_find_vma ||
551551
func_id == BPF_FUNC_loop ||
552-
func_id == BPF_FUNC_user_ringbuf_drain;
552+
func_id == BPF_FUNC_user_ringbuf_drain ||
553+
func_id == BPF_FUNC_for_each_cpu;
553554
}
554555

555556
static bool is_async_callback_calling_function(enum bpf_func_id func_id)
@@ -9179,6 +9180,28 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
91799180
return 0;
91809181
}
91819182

9183+
static int set_for_each_cpu_callback_state(struct bpf_verifier_env *env,
9184+
struct bpf_func_state *caller,
9185+
struct bpf_func_state *callee,
9186+
int insn_idx)
9187+
{
9188+
/* long bpf_for_each_cpu(bpf_callback_t callback_fn, void *callback_ctx,
9189+
* const void *pc, u32 type, u64 flags)
9190+
* callback_fn(u64 cpu, void *callback_ctx, const void *pc);
9191+
*/
9192+
callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9193+
callee->regs[BPF_REG_2] = caller->regs[BPF_REG_2];
9194+
callee->regs[BPF_REG_3] = caller->regs[BPF_REG_3];
9195+
9196+
/* unused */
9197+
__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9198+
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9199+
9200+
callee->in_callback_fn = true;
9201+
callee->callback_ret_range = tnum_range(0, 1);
9202+
return 0;
9203+
}
9204+
91829205
static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
91839206
struct bpf_func_state *caller,
91849207
struct bpf_func_state *callee,
@@ -9776,6 +9799,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
97769799
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
97779800
set_user_ringbuf_callback_state);
97789801
break;
9802+
case BPF_FUNC_for_each_cpu:
9803+
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
9804+
set_for_each_cpu_callback_state);
9805+
break;
97799806
}
97809807

97819808
if (err)

tools/include/uapi/linux/bpf.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,6 +1669,14 @@ union bpf_attr {
16691669

16701670
} __attribute__((aligned(8)));
16711671

1672+
enum bpf_cpu_mask_type {
1673+
CPU_MASK_UNSPEC = 0,
1674+
CPU_MASK_POSSIBLE = 1,
1675+
CPU_MASK_ONLINE = 2,
1676+
CPU_MASK_PRESENT = 3,
1677+
CPU_MASK_TASK = 4, /* cpu mask of a task */
1678+
};
1679+
16721680
/* The description below is an attempt at providing documentation to eBPF
16731681
* developers about the multiple available eBPF helper functions. It can be
16741682
* parsed and used to produce a manual page. The workflow is the following,
@@ -5615,6 +5623,29 @@ union bpf_attr {
56155623
* 0 on success.
56165624
*
56175625
* **-ENOENT** if the bpf_local_storage cannot be found.
5626+
*
5627+
* long bpf_for_each_cpu(void *callback_fn, void *callback_ctx, const void *pcpu_ptr, u32 type, u32 target)
5628+
* Description
5629+
* Walk the percpu pointer **pcpu_ptr** with the callback **callback_fn** function.
5630+
* The **callback_fn** should be a static function and the **callback_ctx** should
5631+
* be a pointer to the stack.
5632+
* The **callback_ctx** is the context parameter.
5633+
* The **type** and **tartet** specify which CPUs to walk. If **target** is specified,
5634+
* it will get the cpumask from the associated target.
5635+
*
5636+
* long (\*callback_fn)(u32 cpu, void \*ctx, const void \*ptr);
5637+
*
5638+
* where **cpu** is the current cpu in the walk, the **ctx** is the **callback_ctx**,
5639+
* and the **ptr** is the address of **pcpu_ptr** on current cpu.
5640+
*
5641+
* If **callback_fn** returns 0, the helper will continue to the next
5642+
* loop. If return value is 1, the helper will skip the rest of
5643+
* the loops and return. Other return values are not used now,
5644+
* and will be rejected by the verifier.
5645+
*
5646+
* Return
5647+
* The number of CPUs walked, **-EINVAL** for invalid **type**, **target** or
5648+
* **pcpu_ptr**.
56185649
*/
56195650
#define ___BPF_FUNC_MAPPER(FN, ctx...) \
56205651
FN(unspec, 0, ##ctx) \
@@ -5829,6 +5860,7 @@ union bpf_attr {
58295860
FN(user_ringbuf_drain, 209, ##ctx) \
58305861
FN(cgrp_storage_get, 210, ##ctx) \
58315862
FN(cgrp_storage_delete, 211, ##ctx) \
5863+
FN(for_each_cpu, 212, ##ctx) \
58325864
/* */
58335865

58345866
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't

0 commit comments

Comments
 (0)