Skip to content

Commit 378b770

Browse files
image-dragonPeter Zijlstra
authored andcommitted
sched: Make migrate_{en,dis}able() inline
For now, migrate_enable and migrate_disable are global, which makes them become hotspots in some case. Take BPF for example, the function calling to migrate_enable and migrate_disable in BPF trampoline can introduce significant overhead, and following is the 'perf top' of FENTRY's benchmark (./tools/testing/selftests/bpf/bench trig-fentry): 54.63% bpf_prog_2dcccf652aac1793_bench_trigger_fentry [k] bpf_prog_2dcccf652aac1793_bench_trigger_fentry 10.43% [kernel] [k] migrate_enable 10.07% bpf_trampoline_6442517037 [k] bpf_trampoline_6442517037 8.06% [kernel] [k] __bpf_prog_exit_recur 4.11% libc.so.6 [.] syscall 2.15% [kernel] [k] entry_SYSCALL_64 1.48% [kernel] [k] memchr_inv 1.32% [kernel] [k] fput 1.16% [kernel] [k] _copy_to_user 0.73% [kernel] [k] bpf_prog_test_run_raw_tp So in this commit, we make migrate_enable/migrate_disable inline to obtain better performance. The struct rq is defined internally in kernel/sched/sched.h, and the field "nr_pinned" is accessed in migrate_enable/migrate_disable, which makes it hard to make them inline. Alexei Starovoitov suggests to generate the offset of "nr_pinned" in [1], so we can define the migrate_enable/migrate_disable in include/linux/sched.h and access "this_rq()->nr_pinned" with "(void *)this_rq() + RQ_nr_pinned". The offset of "nr_pinned" is generated in include/generated/rq-offsets.h by kernel/sched/rq-offsets.c. Generally speaking, we move the definition of migrate_enable and migrate_disable to include/linux/sched.h from kernel/sched/core.c. The calling to __set_cpus_allowed_ptr() is leaved in ___migrate_enable(). The "struct rq" is not available in include/linux/sched.h, so we can't access the "runqueues" with this_cpu_ptr(), as the compilation will fail in this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr(): typeof((ptr) + 0) So we introduce the this_rq_raw() and access the runqueues with arch_raw_cpu_ptr/PERCPU_PTR directly. The variable "runqueues" is not visible in the kernel modules, and export it is not a good idea. As Peter Zijlstra advised in [2], we define and export migrate_enable/migrate_disable in kernel/sched/core.c too, and use them for the modules. Before this patch, the performance of BPF FENTRY is: fentry : 113.030 ± 0.149M/s fentry : 112.501 ± 0.187M/s fentry : 112.828 ± 0.267M/s fentry : 115.287 ± 0.241M/s After this patch, the performance of BPF FENTRY increases to: fentry : 143.644 ± 0.670M/s fentry : 149.764 ± 0.362M/s fentry : 149.642 ± 0.156M/s fentry : 145.263 ± 0.221M/s Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/bpf/CAADnVQ+5sEDKHdsJY5ZsfGDO_1SEhhQWHrt2SMBG5SYyQ+jt7w@mail.gmail.com/ [1] Link: https://lore.kernel.org/all/20250819123214.GH4067720@noisy.programming.kicks-ass.net/ [2]
1 parent 88a9031 commit 378b770

File tree

6 files changed

+152
-53
lines changed

6 files changed

+152
-53
lines changed

Kbuild

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,24 @@ arch/$(SRCARCH)/kernel/asm-offsets.s: $(timeconst-file) $(bounds-file)
3434
$(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE
3535
$(call filechk,offsets,__ASM_OFFSETS_H__)
3636

37+
# Generate rq-offsets.h
38+
39+
rq-offsets-file := include/generated/rq-offsets.h
40+
41+
targets += kernel/sched/rq-offsets.s
42+
43+
kernel/sched/rq-offsets.s: $(offsets-file)
44+
45+
$(rq-offsets-file): kernel/sched/rq-offsets.s FORCE
46+
$(call filechk,offsets,__RQ_OFFSETS_H__)
47+
3748
# Check for missing system calls
3849

3950
quiet_cmd_syscalls = CALL $<
4051
cmd_syscalls = $(CONFIG_SHELL) $< $(CC) $(c_flags) $(missing_syscalls_flags)
4152

4253
PHONY += missing-syscalls
43-
missing-syscalls: scripts/checksyscalls.sh $(offsets-file)
54+
missing-syscalls: scripts/checksyscalls.sh $(rq-offsets-file)
4455
$(call cmd,syscalls)
4556

4657
# Check the manual modification of atomic headers

include/linux/preempt.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
424424
* work-conserving schedulers.
425425
*
426426
*/
427-
extern void migrate_disable(void);
428-
extern void migrate_enable(void);
429427

430428
/**
431429
* preempt_disable_nested - Disable preemption inside a normally preempt disabled section
@@ -471,7 +469,6 @@ static __always_inline void preempt_enable_nested(void)
471469

472470
DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
473471
DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
474-
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
475472

476473
#ifdef CONFIG_PREEMPT_DYNAMIC
477474

include/linux/sched.h

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
#include <linux/tracepoint-defs.h>
5050
#include <linux/unwind_deferred_types.h>
5151
#include <asm/kmap_size.h>
52+
#ifndef COMPILE_OFFSETS
53+
#include <generated/rq-offsets.h>
54+
#endif
5255

5356
/* task_struct member predeclarations (sorted alphabetically): */
5457
struct audit_context;
@@ -2317,4 +2320,114 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
23172320
#define alloc_tag_restore(_tag, _old) do {} while (0)
23182321
#endif
23192322

2323+
#ifndef MODULE
2324+
#ifndef COMPILE_OFFSETS
2325+
2326+
extern void ___migrate_enable(void);
2327+
2328+
struct rq;
2329+
DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
2330+
2331+
/*
2332+
* The "struct rq" is not available here, so we can't access the
2333+
* "runqueues" with this_cpu_ptr(), as the compilation will fail in
2334+
* this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
2335+
* typeof((ptr) + 0)
2336+
*
2337+
* So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here.
2338+
*/
2339+
#ifdef CONFIG_SMP
2340+
#define this_rq_raw() arch_raw_cpu_ptr(&runqueues)
2341+
#else
2342+
#define this_rq_raw() PERCPU_PTR(&runqueues)
2343+
#endif
2344+
#define this_rq_pinned() (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))
2345+
2346+
static inline void __migrate_enable(void)
2347+
{
2348+
struct task_struct *p = current;
2349+
2350+
#ifdef CONFIG_DEBUG_PREEMPT
2351+
/*
2352+
* Check both overflow from migrate_disable() and superfluous
2353+
* migrate_enable().
2354+
*/
2355+
if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
2356+
return;
2357+
#endif
2358+
2359+
if (p->migration_disabled > 1) {
2360+
p->migration_disabled--;
2361+
return;
2362+
}
2363+
2364+
/*
2365+
* Ensure stop_task runs either before or after this, and that
2366+
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
2367+
*/
2368+
guard(preempt)();
2369+
if (unlikely(p->cpus_ptr != &p->cpus_mask))
2370+
___migrate_enable();
2371+
/*
2372+
* Mustn't clear migration_disabled() until cpus_ptr points back at the
2373+
* regular cpus_mask, otherwise things that race (eg.
2374+
* select_fallback_rq) get confused.
2375+
*/
2376+
barrier();
2377+
p->migration_disabled = 0;
2378+
this_rq_pinned()--;
2379+
}
2380+
2381+
static inline void __migrate_disable(void)
2382+
{
2383+
struct task_struct *p = current;
2384+
2385+
if (p->migration_disabled) {
2386+
#ifdef CONFIG_DEBUG_PREEMPT
2387+
/*
2388+
*Warn about overflow half-way through the range.
2389+
*/
2390+
WARN_ON_ONCE((s16)p->migration_disabled < 0);
2391+
#endif
2392+
p->migration_disabled++;
2393+
return;
2394+
}
2395+
2396+
guard(preempt)();
2397+
this_rq_pinned()++;
2398+
p->migration_disabled = 1;
2399+
}
2400+
#else /* !COMPILE_OFFSETS */
2401+
static inline void __migrate_disable(void) { }
2402+
static inline void __migrate_enable(void) { }
2403+
#endif /* !COMPILE_OFFSETS */
2404+
2405+
/*
2406+
* So that it is possible to not export the runqueues variable, define and
2407+
* export migrate_enable/migrate_disable in kernel/sched/core.c too, and use
2408+
* them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will
2409+
* be defined in kernel/sched/core.c.
2410+
*/
2411+
#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE
2412+
static inline void migrate_disable(void)
2413+
{
2414+
__migrate_disable();
2415+
}
2416+
2417+
static inline void migrate_enable(void)
2418+
{
2419+
__migrate_enable();
2420+
}
2421+
#else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
2422+
extern void migrate_disable(void);
2423+
extern void migrate_enable(void);
2424+
#endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
2425+
2426+
#else /* MODULE */
2427+
extern void migrate_disable(void);
2428+
extern void migrate_enable(void);
2429+
#endif /* MODULE */
2430+
2431+
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
2432+
23202433
#endif

kernel/bpf/verifier.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23855,6 +23855,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
2385523855
BTF_SET_START(btf_id_deny)
2385623856
BTF_ID_UNUSED
2385723857
#ifdef CONFIG_SMP
23858+
BTF_ID(func, ___migrate_enable)
2385823859
BTF_ID(func, migrate_disable)
2385923860
BTF_ID(func, migrate_enable)
2386023861
#endif

kernel/sched/core.c

Lines changed: 14 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
* Copyright (C) 1991-2002 Linus Torvalds
88
* Copyright (C) 1998-2024 Ingo Molnar, Red Hat
99
*/
10+
#define INSTANTIATE_EXPORTED_MIGRATE_DISABLE
11+
#include <linux/sched.h>
1012
#include <linux/highmem.h>
1113
#include <linux/hrtimer_api.h>
1214
#include <linux/ktime_api.h>
@@ -2381,64 +2383,27 @@ static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
23812383
__do_set_cpus_allowed(p, &ac);
23822384
}
23832385

2384-
void migrate_disable(void)
2385-
{
2386-
struct task_struct *p = current;
2387-
2388-
if (p->migration_disabled) {
2389-
#ifdef CONFIG_DEBUG_PREEMPT
2390-
/*
2391-
*Warn about overflow half-way through the range.
2392-
*/
2393-
WARN_ON_ONCE((s16)p->migration_disabled < 0);
2394-
#endif
2395-
p->migration_disabled++;
2396-
return;
2397-
}
2398-
2399-
guard(preempt)();
2400-
this_rq()->nr_pinned++;
2401-
p->migration_disabled = 1;
2402-
}
2403-
EXPORT_SYMBOL_GPL(migrate_disable);
2404-
2405-
void migrate_enable(void)
2386+
void ___migrate_enable(void)
24062387
{
24072388
struct task_struct *p = current;
24082389
struct affinity_context ac = {
24092390
.new_mask = &p->cpus_mask,
24102391
.flags = SCA_MIGRATE_ENABLE,
24112392
};
24122393

2413-
#ifdef CONFIG_DEBUG_PREEMPT
2414-
/*
2415-
* Check both overflow from migrate_disable() and superfluous
2416-
* migrate_enable().
2417-
*/
2418-
if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
2419-
return;
2420-
#endif
2394+
__set_cpus_allowed_ptr(p, &ac);
2395+
}
2396+
EXPORT_SYMBOL_GPL(___migrate_enable);
24212397

2422-
if (p->migration_disabled > 1) {
2423-
p->migration_disabled--;
2424-
return;
2425-
}
2398+
void migrate_disable(void)
2399+
{
2400+
__migrate_disable();
2401+
}
2402+
EXPORT_SYMBOL_GPL(migrate_disable);
24262403

2427-
/*
2428-
* Ensure stop_task runs either before or after this, and that
2429-
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
2430-
*/
2431-
guard(preempt)();
2432-
if (p->cpus_ptr != &p->cpus_mask)
2433-
__set_cpus_allowed_ptr(p, &ac);
2434-
/*
2435-
* Mustn't clear migration_disabled() until cpus_ptr points back at the
2436-
* regular cpus_mask, otherwise things that race (eg.
2437-
* select_fallback_rq) get confused.
2438-
*/
2439-
barrier();
2440-
p->migration_disabled = 0;
2441-
this_rq()->nr_pinned--;
2404+
void migrate_enable(void)
2405+
{
2406+
__migrate_enable();
24422407
}
24432408
EXPORT_SYMBOL_GPL(migrate_enable);
24442409

kernel/sched/rq-offsets.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#define COMPILE_OFFSETS
3+
#include <linux/kbuild.h>
4+
#include <linux/types.h>
5+
#include "sched.h"
6+
7+
int main(void)
8+
{
9+
DEFINE(RQ_nr_pinned, offsetof(struct rq, nr_pinned));
10+
11+
return 0;
12+
}

0 commit comments

Comments
 (0)