From 30530b6ec795ed6e62c67a71fa7ce05c687f5b68 Mon Sep 17 00:00:00 2001 From: Changwoo Min Date: Sat, 1 Feb 2025 01:38:30 +0900 Subject: [PATCH] scx_lavd: Do not set task's time slice to zero at the ops.enqueue() path Note that at ops.enqueue() path, setting a task's slice to zero is risky because we don't know the exact status of the task, so it could cause a zero time slice error as follows: [ 8271.001818] sched_ext: ksoftirqd/1[70] has zero slice in pick_task_scx() The zero slice warning is harmful because the sched_ext core ends up setting the time slice to SCX_SLICE_DFL (20 msec), increasing latency spikes. Thus, we do not set the time slice to 0 at the ops.enqueue() path and rely on scx_bpf_kick_cpu(SCX_KICK_PREEMPT) all the time. Also, use 1 (instead of 0) as a marker to perform scx_bpf_kick_cpu(). This should solve the following issue: https://github.com/sched-ext/scx/issues/1283 Signed-off-by: Changwoo Min --- scheds/rust/scx_lavd/src/bpf/preempt.bpf.c | 45 ++++------------------ 1 file changed, 8 insertions(+), 37 deletions(-) diff --git a/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c b/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c index 955178d4f..b5dc25daa 100644 --- a/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c +++ b/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c @@ -130,18 +130,8 @@ static struct cpu_ctx *find_victim_cpu(const struct cpumask *cpumask, cur_cpu = cpuc->cpu_id; /* - * First, test the current CPU since it can skip the expensive IPI. - */ - if (can_cpu_be_kicked(now, cpuc) && - bpf_cpumask_test_cpu(cur_cpu, cpumask) && - can_cpu1_kick_cpu2(&prm_task, &prm_cpus[0], cpuc)) { - victim_cpu = &prm_task; - goto bingo_out; - } - - /* - * If the current CPU cannot be a victim, let's check if it is worth to - * try to kick other CPU at the expense of IPI. + * First check if it is worth to try to kick other CPU + * at the expense of IPI. */ if (!is_worth_kick_other_task(taskc)) goto null_out; @@ -216,11 +206,6 @@ static struct cpu_ctx *find_victim_cpu(const struct cpumask *cpumask, return NULL; } -static void kick_current_cpu(struct task_struct *p) -{ - WRITE_ONCE(p->scx.slice, 0); -} - static bool try_kick_cpu(struct task_struct *p, struct cpu_ctx *cpuc_cur, struct cpu_ctx *victim_cpuc) { @@ -234,29 +219,15 @@ static bool try_kick_cpu(struct task_struct *p, struct cpu_ctx *cpuc_cur, u64 old; bool ret = false; - /* - * If the current CPU is a victim, we just reset the current task's - * time slice as an optimization. Othewise, kick the remote CPU for - * preemption. - * - * Resetting task's time slice to zero does not trigger an immediate - * preemption. However, the cost of self-IPI is prohibitively expensive - * for some scenarios. The actual preemption will happen at the next - * ops.tick(). - */ - if (cpuc_cur->cpu_id == victim_cpuc->cpu_id) { - struct task_struct *tsk = bpf_get_current_task_btf(); - kick_current_cpu(tsk); - return true; - } - /* * Kick a victim CPU if it is not victimized yet by another * concurrent kick task. + * + * */ old = p->scx.slice; - if (old != 0) - ret = __sync_bool_compare_and_swap(&p->scx.slice, old, 0); + if (old != 1 && old != 0) + ret = __sync_bool_compare_and_swap(&p->scx.slice, old, 1); /* * Kick the remote CPU for preemption. @@ -329,7 +300,7 @@ static bool try_yield_current_cpu(struct task_struct *p_run, * give up its extended time slice for fairness. */ if (taskc_run->lock_holder_xted) { - kick_current_cpu(p_run); + p_run->scx.slice = 0; return true; } @@ -367,7 +338,7 @@ static bool try_yield_current_cpu(struct task_struct *p_run, bpf_rcu_read_unlock(); if (ret) - kick_current_cpu(p_run); + p_run->scx.slice = 0; return ret; }