Skip to content

Commit e8ec1c9

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-reduce-the-use-of-migrate_-disable-enable'
Hou Tao says: ==================== The use of migrate_{disable|enable} pair in BPF is mainly due to the introduction of bpf memory allocator and the use of per-CPU data struct in its internal implementation. The caller needs to disable migration before invoking the alloc or free APIs of bpf memory allocator, and enable migration after the invocation. The main users of bpf memory allocator are various kind of bpf maps in which the map values or the special fields in the map values are allocated by using bpf memory allocator. At present, the running context for bpf program has already disabled migration explictly or implictly, therefore, when these maps are manipulated in bpf program, it is OK to not invoke migrate_disable() and migrate_enable() pair. Howevers, it is not always the case when these maps are manipulated through bpf syscall, therefore many migrate_{disable|enable} pairs are added when the map can either be manipulated by BPF program or BPF syscall. The initial idea of reducing the use of migrate_{disable|enable} comes from Alexei [1]. I turned it into a patch set that archives the goals through the following three methods: 1. remove unnecessary migrate_{disable|enable} pair when the BPF syscall path also disables migration, it is OK to remove the pair. Patch #1~#3 fall into this category, while patch #4~#5 are partially included. 2. move the migrate_{disable|enable} pair from inner callee to outer caller Instead of invoking migrate_disable() in the inner callee, invoking migrate_disable() in the outer caller to simplify reasoning about when migrate_disable() is needed. Patch #4~#5 and patch torvalds#6~torvalds#19 belongs to this category. 3. add cant_migrate() check in the inner callee Add cant_migrate() check in the inner callee to ensure the guarantee that migration is disabled is not broken. Patch #1~#5, torvalds#13, torvalds#16~torvalds#19 also belong to this category. Please check the individual patches for more details. Comments are always welcome. Change Log: v2: * sqaush the ->map_free related patches (torvalds#10~torvalds#12, torvalds#15) into one patch * remove unnecessary cant_migrate() checks. v1: https://lore.kernel.org/bpf/20250106081900.1665573-1-houtao@huaweicloud.com ==================== Link: https://patch.msgid.link/20250108010728.207536-1-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2 parents bfaac2a + d86088e commit e8ec1c9

12 files changed

+55
-88
lines changed

kernel/bpf/arraymap.c

+2-4
Original file line numberDiff line numberDiff line change
@@ -735,13 +735,13 @@ static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback
735735
u64 ret = 0;
736736
void *val;
737737

738+
cant_migrate();
739+
738740
if (flags != 0)
739741
return -EINVAL;
740742

741743
is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
742744
array = container_of(map, struct bpf_array, map);
743-
if (is_percpu)
744-
migrate_disable();
745745
for (i = 0; i < map->max_entries; i++) {
746746
if (is_percpu)
747747
val = this_cpu_ptr(array->pptrs[i]);
@@ -756,8 +756,6 @@ static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback
756756
break;
757757
}
758758

759-
if (is_percpu)
760-
migrate_enable();
761759
return num_elems;
762760
}
763761

kernel/bpf/bpf_cgrp_storage.c

+7-8
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,20 @@ static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy);
1515

1616
static void bpf_cgrp_storage_lock(void)
1717
{
18-
migrate_disable();
18+
cant_migrate();
1919
this_cpu_inc(bpf_cgrp_storage_busy);
2020
}
2121

2222
static void bpf_cgrp_storage_unlock(void)
2323
{
2424
this_cpu_dec(bpf_cgrp_storage_busy);
25-
migrate_enable();
2625
}
2726

2827
static bool bpf_cgrp_storage_trylock(void)
2928
{
30-
migrate_disable();
29+
cant_migrate();
3130
if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) {
3231
this_cpu_dec(bpf_cgrp_storage_busy);
33-
migrate_enable();
3432
return false;
3533
}
3634
return true;
@@ -47,17 +45,18 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
4745
{
4846
struct bpf_local_storage *local_storage;
4947

48+
migrate_disable();
5049
rcu_read_lock();
5150
local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
52-
if (!local_storage) {
53-
rcu_read_unlock();
54-
return;
55-
}
51+
if (!local_storage)
52+
goto out;
5653

5754
bpf_cgrp_storage_lock();
5855
bpf_local_storage_destroy(local_storage);
5956
bpf_cgrp_storage_unlock();
57+
out:
6058
rcu_read_unlock();
59+
migrate_enable();
6160
}
6261

6362
static struct bpf_local_storage_data *

kernel/bpf/bpf_inode_storage.c

+5-4
Original file line numberDiff line numberDiff line change
@@ -62,16 +62,17 @@ void bpf_inode_storage_free(struct inode *inode)
6262
if (!bsb)
6363
return;
6464

65+
migrate_disable();
6566
rcu_read_lock();
6667

6768
local_storage = rcu_dereference(bsb->storage);
68-
if (!local_storage) {
69-
rcu_read_unlock();
70-
return;
71-
}
69+
if (!local_storage)
70+
goto out;
7271

7372
bpf_local_storage_destroy(local_storage);
73+
out:
7474
rcu_read_unlock();
75+
migrate_enable();
7576
}
7677

7778
static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)

kernel/bpf/bpf_local_storage.c

+9-21
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
8181
return NULL;
8282

8383
if (smap->bpf_ma) {
84-
migrate_disable();
8584
selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags);
86-
migrate_enable();
8785
if (selem)
8886
/* Keep the original bpf_map_kzalloc behavior
8987
* before started using the bpf_mem_cache_alloc.
@@ -174,17 +172,14 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
174172
return;
175173
}
176174

177-
if (smap) {
178-
migrate_disable();
175+
if (smap)
179176
bpf_mem_cache_free(&smap->storage_ma, local_storage);
180-
migrate_enable();
181-
} else {
177+
else
182178
/* smap could be NULL if the selem that triggered
183179
* this 'local_storage' creation had been long gone.
184180
* In this case, directly do call_rcu().
185181
*/
186182
call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
187-
}
188183
}
189184

190185
/* rcu tasks trace callback for bpf_ma == false */
@@ -217,7 +212,10 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
217212
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
218213
/* The bpf_local_storage_map_free will wait for rcu_barrier */
219214
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
215+
216+
migrate_disable();
220217
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
218+
migrate_enable();
221219
bpf_mem_cache_raw_free(selem);
222220
}
223221

@@ -256,9 +254,7 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
256254
* bpf_mem_cache_free will be able to reuse selem
257255
* immediately.
258256
*/
259-
migrate_disable();
260257
bpf_mem_cache_free(&smap->selem_ma, selem);
261-
migrate_enable();
262258
return;
263259
}
264260

@@ -497,15 +493,11 @@ int bpf_local_storage_alloc(void *owner,
497493
if (err)
498494
return err;
499495

500-
if (smap->bpf_ma) {
501-
migrate_disable();
496+
if (smap->bpf_ma)
502497
storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags);
503-
migrate_enable();
504-
} else {
498+
else
505499
storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
506500
gfp_flags | __GFP_NOWARN);
507-
}
508-
509501
if (!storage) {
510502
err = -ENOMEM;
511503
goto uncharge;
@@ -902,15 +894,11 @@ void bpf_local_storage_map_free(struct bpf_map *map,
902894
while ((selem = hlist_entry_safe(
903895
rcu_dereference_raw(hlist_first_rcu(&b->list)),
904896
struct bpf_local_storage_elem, map_node))) {
905-
if (busy_counter) {
906-
migrate_disable();
897+
if (busy_counter)
907898
this_cpu_inc(*busy_counter);
908-
}
909899
bpf_selem_unlink(selem, true);
910-
if (busy_counter) {
900+
if (busy_counter)
911901
this_cpu_dec(*busy_counter);
912-
migrate_enable();
913-
}
914902
cond_resched_rcu();
915903
}
916904
rcu_read_unlock();

kernel/bpf/bpf_task_storage.c

+7-8
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,20 @@ static DEFINE_PER_CPU(int, bpf_task_storage_busy);
2424

2525
static void bpf_task_storage_lock(void)
2626
{
27-
migrate_disable();
27+
cant_migrate();
2828
this_cpu_inc(bpf_task_storage_busy);
2929
}
3030

3131
static void bpf_task_storage_unlock(void)
3232
{
3333
this_cpu_dec(bpf_task_storage_busy);
34-
migrate_enable();
3534
}
3635

3736
static bool bpf_task_storage_trylock(void)
3837
{
39-
migrate_disable();
38+
cant_migrate();
4039
if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
4140
this_cpu_dec(bpf_task_storage_busy);
42-
migrate_enable();
4341
return false;
4442
}
4543
return true;
@@ -72,18 +70,19 @@ void bpf_task_storage_free(struct task_struct *task)
7270
{
7371
struct bpf_local_storage *local_storage;
7472

73+
migrate_disable();
7574
rcu_read_lock();
7675

7776
local_storage = rcu_dereference(task->bpf_storage);
78-
if (!local_storage) {
79-
rcu_read_unlock();
80-
return;
81-
}
77+
if (!local_storage)
78+
goto out;
8279

8380
bpf_task_storage_lock();
8481
bpf_local_storage_destroy(local_storage);
8582
bpf_task_storage_unlock();
83+
out:
8684
rcu_read_unlock();
85+
migrate_enable();
8786
}
8887

8988
static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)

kernel/bpf/cpumask.c

-2
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,7 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask)
9191
if (!refcount_dec_and_test(&cpumask->usage))
9292
return;
9393

94-
migrate_disable();
9594
bpf_mem_cache_free_rcu(&bpf_cpumask_ma, cpumask);
96-
migrate_enable();
9795
}
9896

9997
__bpf_kfunc void bpf_cpumask_release_dtor(void *cpumask)

kernel/bpf/hashtab.c

+7-12
Original file line numberDiff line numberDiff line change
@@ -897,11 +897,9 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
897897
{
898898
check_and_free_fields(htab, l);
899899

900-
migrate_disable();
901900
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
902901
bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
903902
bpf_mem_cache_free(&htab->ma, l);
904-
migrate_enable();
905903
}
906904

907905
static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
@@ -1502,10 +1500,9 @@ static void delete_all_elements(struct bpf_htab *htab)
15021500
{
15031501
int i;
15041502

1505-
/* It's called from a worker thread, so disable migration here,
1506-
* since bpf_mem_cache_free() relies on that.
1503+
/* It's called from a worker thread and migration has been disabled,
1504+
* therefore, it is OK to invoke bpf_mem_cache_free() directly.
15071505
*/
1508-
migrate_disable();
15091506
for (i = 0; i < htab->n_buckets; i++) {
15101507
struct hlist_nulls_head *head = select_bucket(htab, i);
15111508
struct hlist_nulls_node *n;
@@ -1517,7 +1514,6 @@ static void delete_all_elements(struct bpf_htab *htab)
15171514
}
15181515
cond_resched();
15191516
}
1520-
migrate_enable();
15211517
}
15221518

15231519
static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
@@ -2208,17 +2204,18 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_
22082204
bool is_percpu;
22092205
u64 ret = 0;
22102206

2207+
cant_migrate();
2208+
22112209
if (flags != 0)
22122210
return -EINVAL;
22132211

22142212
is_percpu = htab_is_percpu(htab);
22152213

22162214
roundup_key_size = round_up(map->key_size, 8);
2217-
/* disable migration so percpu value prepared here will be the
2218-
* same as the one seen by the bpf program with bpf_map_lookup_elem().
2215+
/* migration has been disabled, so percpu value prepared here will be
2216+
* the same as the one seen by the bpf program with
2217+
* bpf_map_lookup_elem().
22192218
*/
2220-
if (is_percpu)
2221-
migrate_disable();
22222219
for (i = 0; i < htab->n_buckets; i++) {
22232220
b = &htab->buckets[i];
22242221
rcu_read_lock();
@@ -2244,8 +2241,6 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_
22442241
rcu_read_unlock();
22452242
}
22462243
out:
2247-
if (is_percpu)
2248-
migrate_enable();
22492244
return num_elems;
22502245
}
22512246

kernel/bpf/helpers.c

-4
Original file line numberDiff line numberDiff line change
@@ -2066,9 +2066,7 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head,
20662066
/* The contained type can also have resources, including a
20672067
* bpf_list_head which needs to be freed.
20682068
*/
2069-
migrate_disable();
20702069
__bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
2071-
migrate_enable();
20722070
}
20732071
}
20742072

@@ -2105,9 +2103,7 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
21052103
obj -= field->graph_root.node_offset;
21062104

21072105

2108-
migrate_disable();
21092106
__bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
2110-
migrate_enable();
21112107
}
21122108
}
21132109

kernel/bpf/lpm_trie.c

+4-16
Original file line numberDiff line numberDiff line change
@@ -289,16 +289,11 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
289289
}
290290

291291
static struct lpm_trie_node *lpm_trie_node_alloc(struct lpm_trie *trie,
292-
const void *value,
293-
bool disable_migration)
292+
const void *value)
294293
{
295294
struct lpm_trie_node *node;
296295

297-
if (disable_migration)
298-
migrate_disable();
299296
node = bpf_mem_cache_alloc(&trie->ma);
300-
if (disable_migration)
301-
migrate_enable();
302297

303298
if (!node)
304299
return NULL;
@@ -342,10 +337,8 @@ static long trie_update_elem(struct bpf_map *map,
342337
if (key->prefixlen > trie->max_prefixlen)
343338
return -EINVAL;
344339

345-
/* Allocate and fill a new node. Need to disable migration before
346-
* invoking bpf_mem_cache_alloc().
347-
*/
348-
new_node = lpm_trie_node_alloc(trie, value, true);
340+
/* Allocate and fill a new node */
341+
new_node = lpm_trie_node_alloc(trie, value);
349342
if (!new_node)
350343
return -ENOMEM;
351344

@@ -425,8 +418,7 @@ static long trie_update_elem(struct bpf_map *map,
425418
goto out;
426419
}
427420

428-
/* migration is disabled within the locked scope */
429-
im_node = lpm_trie_node_alloc(trie, NULL, false);
421+
im_node = lpm_trie_node_alloc(trie, NULL);
430422
if (!im_node) {
431423
trie->n_entries--;
432424
ret = -ENOMEM;
@@ -452,11 +444,9 @@ static long trie_update_elem(struct bpf_map *map,
452444
out:
453445
raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
454446

455-
migrate_disable();
456447
if (ret)
457448
bpf_mem_cache_free(&trie->ma, new_node);
458449
bpf_mem_cache_free_rcu(&trie->ma, free_node);
459-
migrate_enable();
460450

461451
return ret;
462452
}
@@ -555,10 +545,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
555545
out:
556546
raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
557547

558-
migrate_disable();
559548
bpf_mem_cache_free_rcu(&trie->ma, free_parent);
560549
bpf_mem_cache_free_rcu(&trie->ma, free_node);
561-
migrate_enable();
562550

563551
return ret;
564552
}

kernel/bpf/range_tree.c

-2
Original file line numberDiff line numberDiff line change
@@ -259,9 +259,7 @@ void range_tree_destroy(struct range_tree *rt)
259259

260260
while ((rn = range_it_iter_first(rt, 0, -1U))) {
261261
range_it_remove(rn, rt);
262-
migrate_disable();
263262
bpf_mem_free(&bpf_global_ma, rn);
264-
migrate_enable();
265263
}
266264
}
267265

0 commit comments

Comments
 (0)