Skip to content

Commit

Permalink
Merge pull request #1099 from hodgesds/layered-namespace-aware
Browse files Browse the repository at this point in the history
scx_layered: Make layered work in pid namespaces
  • Loading branch information
hodgesds authored Dec 12, 2024
2 parents aa7b375 + 6879929 commit d3ec7b7
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 7 deletions.
9 changes: 7 additions & 2 deletions scheds/include/scx/namespace.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@
#ifndef __SCHED_EXT_NAMESPACE_BPF_H
#define __SCHED_EXT_NAMESPACE_BPF_H

#ifdef LSP
#define __bpf__
#include "../vmlinux.h"
#else
#include "vmlinux.h"
#endif

struct pid_namespace* get_task_pid_ns(const struct task_struct* task);
struct pid_namespace* get_task_pid_ns(const struct task_struct* task, enum pid_type);
struct pid* get_task_pid_ptr(const struct task_struct* task, enum pid_type type);
pid_t get_task_ns_pid(const struct task_struct* task, enum pid_type type);
pid_t get_task_ns_pid(const struct task_struct* task);

pid_t get_pid_nr_ns(struct pid* pid, struct pid_namespace* ns);
pid_t get_ns_pid(void);
Expand Down
26 changes: 24 additions & 2 deletions scheds/rust/scx_layered/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ char _license[] SEC("license") = "GPL";
extern unsigned CONFIG_HZ __kconfig;

const volatile u32 debug;
const volatile s32 layered_tgid;
const volatile u64 slice_ns;
const volatile u64 max_exec_ns;
const volatile u32 nr_cpu_ids = 1;
Expand Down Expand Up @@ -59,6 +58,7 @@ private(big_cpumask) struct bpf_cpumask __kptr *big_cpumask;
struct layer layers[MAX_LAYERS];
u32 fallback_cpu;
static u32 preempt_cursor;
u32 layered_root_tgid = 0;

u32 empty_layer_ids[MAX_LAYERS];
u32 nr_empty_layer_ids;
Expand Down Expand Up @@ -143,6 +143,11 @@ static inline bool is_fb_dsq(u64 dsq_id)
return dsq_id & (HI_FB_DSQ_BASE | LO_FB_DSQ_BASE);
}

static __always_inline bool is_scheduler_task(struct task_struct *p)
{
return (u32)p->tgid == layered_root_tgid;
}

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
Expand Down Expand Up @@ -485,6 +490,23 @@ int BPF_PROG(tp_task_rename, struct task_struct *p, const char *buf)
return 0;
}

/*
* Initializes the scheduler to support running in a pid namespace.
*/
SEC("syscall")
int BPF_PROG(initialize_pid_namespace)
{
struct task_struct *p;

if (!(p = (struct task_struct*)bpf_get_current_task_btf()))
return -ENOENT;

layered_root_tgid = BPF_PROBE_READ(p, tgid);
trace("CFG layered running with tgid: %d", layered_root_tgid);

return 0;
}

static bool should_refresh_cached_cpus(struct cached_cpus *ccpus, s64 id, u64 cpus_seq)
{
return ccpus->id != id || ccpus->seq != cpus_seq;
Expand Down Expand Up @@ -1081,7 +1103,7 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
* are usually important for system performance and responsiveness.
*/
if (((p->flags & PF_KTHREAD) && p->nr_cpus_allowed < nr_possible_cpus) ||
p->tgid == layered_tgid) {
is_scheduler_task(p)) {
struct cpumask *layer_cpumask;

if (layer->kind == LAYER_KIND_CONFINED &&
Expand Down
10 changes: 7 additions & 3 deletions scheds/rust/scx_layered/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1568,9 +1568,6 @@ impl<'a> Scheduler<'a> {
skel.struct_ops.layered_mut().exit_dump_len = opts.exit_dump_len;

skel.maps.rodata_data.debug = opts.verbose as u32;
// Running scx_layered inside a PID namespace would break the
// following.
skel.maps.rodata_data.layered_tgid = std::process::id() as i32;
skel.maps.rodata_data.slice_ns = opts.slice_us * 1000;
skel.maps.rodata_data.max_exec_ns = if opts.max_exec_us > 0 {
opts.max_exec_us * 1000
Expand Down Expand Up @@ -1637,6 +1634,13 @@ impl<'a> Scheduler<'a> {
// Other stuff.
let proc_reader = procfs::ProcReader::new();

// Handle setup if layered is running in a pid namespace.
let input = ProgramInput {
..Default::default()
};
let prog = &mut skel.progs.initialize_pid_namespace;
let _ = prog.test_run(input);

// XXX If we try to refresh the cpumasks here before attaching, we
// sometimes (non-deterministically) don't see the updated values in
// BPF. It would be better to update the cpumasks here before we
Expand Down

0 comments on commit d3ec7b7

Please sign in to comment.