Skip to content

Commit

Permalink
scx_sync: Sync scheduler changes from https://github.com/sched-ext/scx
Browse files Browse the repository at this point in the history
  • Loading branch information
htejun committed Dec 4, 2023
1 parent f0566ba commit 234eb2c
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 6 deletions.
20 changes: 16 additions & 4 deletions tools/sched_ext/scx_layered/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,20 @@ struct layer *lookup_layer(int idx)
return &layers[idx];
}

/*
* Because the layer membership is by the default hierarchy cgroups rather than
* the CPU controller membership, we can't use ops.cgroup_move(). Let's iterate
* the tasks manually and set refresh_layer.
*
* The iteration isn't synchronized and may fail spuriously. It's not a big
* practical problem as process migrations are very rare in most modern systems.
* That said, we eventually want this to be based on CPU controller membership.
*/
SEC("tp_btf/cgroup_attach_task")
int BPF_PROG(tp_cgroup_attach_task, struct cgroup *cgrp, const char *cgrp_path,
struct task_struct *leader, bool threadgroup)
{
struct list_head *thread_head;
struct task_struct *next;
struct task_ctx *tctx;
int leader_pid = leader->pid;
Expand All @@ -265,6 +275,8 @@ int BPF_PROG(tp_cgroup_attach_task, struct cgroup *cgrp, const char *cgrp_path,
if (!threadgroup)
return 0;

thread_head = &leader->signal->thread_head;

if (!(next = bpf_task_acquire(leader))) {
scx_bpf_error("failed to acquire leader");
return 0;
Expand All @@ -274,18 +286,18 @@ int BPF_PROG(tp_cgroup_attach_task, struct cgroup *cgrp, const char *cgrp_path,
struct task_struct *p;
int pid;

p = container_of(next->thread_group.next, struct task_struct, thread_group);
p = container_of(next->thread_node.next, struct task_struct, thread_node);
bpf_task_release(next);

pid = BPF_CORE_READ(p, pid);
if (pid == leader_pid) {
if (&p->thread_node == thread_head) {
next = NULL;
break;
}

pid = BPF_CORE_READ(p, pid);
next = bpf_task_from_pid(pid);
if (!next) {
scx_bpf_error("thread iteration failed");
bpf_printk("scx_layered: tp_cgroup_attach_task: thread iteration failed");
break;
}

Expand Down
18 changes: 16 additions & 2 deletions tools/sched_ext/scx_rusty/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,13 @@ s32 BPF_STRUCT_OPS(rusty_prep_enable, struct task_struct *p,
pid_t pid;

pid = p->pid;
ret = bpf_map_update_elem(&task_data, &pid, &taskc, BPF_NOEXIST);

/*
* XXX - We want BPF_NOEXIST but bpf_map_delete_elem() in .disable() may
* fail spuriously due to BPF recursion protection triggering
* unnecessarily.
*/
ret = bpf_map_update_elem(&task_data, &pid, &taskc, 0 /*BPF_NOEXIST*/);
if (ret) {
stat_add(RUSTY_STAT_TASK_GET_ERR, 1);
return ret;
Expand Down Expand Up @@ -1003,7 +1009,15 @@ s32 BPF_STRUCT_OPS(rusty_prep_enable, struct task_struct *p,
void BPF_STRUCT_OPS(rusty_disable, struct task_struct *p)
{
pid_t pid = p->pid;
long ret = bpf_map_delete_elem(&task_data, &pid);
long ret;

/*
* XXX - There's no reason delete should fail here but BPF's recursion
* protection can unnecessarily fail the operation. The fact that
* deletions aren't reliable means that we sometimes leak task_ctx and
* can't use BPF_NOEXIST on allocation in .prep_enable().
*/
ret = bpf_map_delete_elem(&task_data, &pid);
if (ret) {
stat_add(RUSTY_STAT_TASK_GET_ERR, 1);
return;
Expand Down

0 comments on commit 234eb2c

Please sign in to comment.