Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scx_layered: Make layered work in pid namespaces #1099

Merged
merged 1 commit into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions scheds/include/scx/namespace.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@
#ifndef __SCHED_EXT_NAMESPACE_BPF_H
#define __SCHED_EXT_NAMESPACE_BPF_H

#ifdef LSP
#define __bpf__
#include "../vmlinux.h"
#else
#include "vmlinux.h"
#endif

struct pid_namespace* get_task_pid_ns(const struct task_struct* task);
struct pid_namespace* get_task_pid_ns(const struct task_struct* task, enum pid_type);
struct pid* get_task_pid_ptr(const struct task_struct* task, enum pid_type type);
pid_t get_task_ns_pid(const struct task_struct* task, enum pid_type type);
pid_t get_task_ns_pid(const struct task_struct* task);

pid_t get_pid_nr_ns(struct pid* pid, struct pid_namespace* ns);
pid_t get_ns_pid(void);
Expand Down
26 changes: 24 additions & 2 deletions scheds/rust/scx_layered/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ char _license[] SEC("license") = "GPL";
extern unsigned CONFIG_HZ __kconfig;

const volatile u32 debug;
const volatile s32 layered_tgid;
const volatile u64 slice_ns;
const volatile u64 max_exec_ns;
const volatile u32 nr_cpu_ids = 1;
Expand Down Expand Up @@ -59,6 +58,7 @@ private(big_cpumask) struct bpf_cpumask __kptr *big_cpumask;
struct layer layers[MAX_LAYERS];
u32 fallback_cpu;
static u32 preempt_cursor;
u32 layered_root_tgid = 0;

u32 empty_layer_ids[MAX_LAYERS];
u32 nr_empty_layer_ids;
Expand Down Expand Up @@ -143,6 +143,11 @@ static inline bool is_fb_dsq(u64 dsq_id)
return dsq_id & (HI_FB_DSQ_BASE | LO_FB_DSQ_BASE);
}

static __always_inline bool is_scheduler_task(struct task_struct *p)
{
return (u32)p->tgid == layered_root_tgid;
}

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
Expand Down Expand Up @@ -485,6 +490,23 @@ int BPF_PROG(tp_task_rename, struct task_struct *p, const char *buf)
return 0;
}

/*
* Initializes the scheduler to support running in a pid namespace.
*/
SEC("syscall")
int BPF_PROG(initialize_pid_namespace)
{
struct task_struct *p;

if (!(p = (struct task_struct*)bpf_get_current_task_btf()))
return -ENOENT;

layered_root_tgid = BPF_PROBE_READ(p, tgid);
trace("CFG layered running with tgid: %d", layered_root_tgid);

return 0;
}

static bool should_refresh_cached_cpus(struct cached_cpus *ccpus, s64 id, u64 cpus_seq)
{
return ccpus->id != id || ccpus->seq != cpus_seq;
Expand Down Expand Up @@ -1081,7 +1103,7 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
* are usually important for system performance and responsiveness.
*/
if (((p->flags & PF_KTHREAD) && p->nr_cpus_allowed < nr_possible_cpus) ||
p->tgid == layered_tgid) {
is_scheduler_task(p)) {
struct cpumask *layer_cpumask;

if (layer->kind == LAYER_KIND_CONFINED &&
Expand Down
10 changes: 7 additions & 3 deletions scheds/rust/scx_layered/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1568,9 +1568,6 @@ impl<'a> Scheduler<'a> {
skel.struct_ops.layered_mut().exit_dump_len = opts.exit_dump_len;

skel.maps.rodata_data.debug = opts.verbose as u32;
// Running scx_layered inside a PID namespace would break the
// following.
skel.maps.rodata_data.layered_tgid = std::process::id() as i32;
skel.maps.rodata_data.slice_ns = opts.slice_us * 1000;
skel.maps.rodata_data.max_exec_ns = if opts.max_exec_us > 0 {
opts.max_exec_us * 1000
Expand Down Expand Up @@ -1637,6 +1634,13 @@ impl<'a> Scheduler<'a> {
// Other stuff.
let proc_reader = procfs::ProcReader::new();

// Handle setup if layered is running in a pid namespace.
let input = ProgramInput {
..Default::default()
};
let prog = &mut skel.progs.initialize_pid_namespace;
let _ = prog.test_run(input);

// XXX If we try to refresh the cpumasks here before attaching, we
// sometimes (non-deterministically) don't see the updated values in
// BPF. It would be better to update the cpumasks here before we
Expand Down