scx_lavd: Do not set task's time slice to zero at the ops.enqueue() path

Note that at ops.enqueue() path, setting a task's slice to zero is risky because we don't know the exact status of the task, so it could cause a zero time slice error as follows: [ 8271.001818] sched_ext: ksoftirqd/1[70] has zero slice in pick_task_scx() The zero slice warning is harmful because the sched_ext core ends up setting the time slice to SCX_SLICE_DFL (20 msec), increasing latency spikes. Thus, we do not set the time slice to 0 at the ops.enqueue() path and rely on scx_bpf_kick_cpu(SCX_KICK_PREEMPT) all the time. Also, use 1 (instead of 0) as a marker to perform scx_bpf_kick_cpu(). This should solve the following issue: #1283 Signed-off-by: Changwoo Min <changwoo@igalia.com>
sched-ext · Jan 31, 2025 · 30530b6 · 30530b6
1 parent 0d5c496
commit 30530b6
Showing 1 changed file with 8 additions and 37 deletions.
diff --git a/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c b/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c
@@ -130,18 +130,8 @@ static struct cpu_ctx *find_victim_cpu(const struct cpumask *cpumask,
 	cur_cpu = cpuc->cpu_id;
 
 	/*
-	 * First, test the current CPU since it can skip the expensive IPI.
-	 */
-	if (can_cpu_be_kicked(now, cpuc) &&
-	    bpf_cpumask_test_cpu(cur_cpu, cpumask) &&
-	    can_cpu1_kick_cpu2(&prm_task, &prm_cpus[0], cpuc)) {
-		victim_cpu = &prm_task;
-		goto bingo_out;
-	}
-
-	/*
-	 * If the current CPU cannot be a victim, let's check if it is worth to
-	 * try to kick other CPU at the expense of IPI.
+	 * First check if it is worth to try to kick other CPU
+	 * at the expense of IPI.
 	 */
 	if (!is_worth_kick_other_task(taskc))
 		goto null_out;
@@ -216,11 +206,6 @@ static struct cpu_ctx *find_victim_cpu(const struct cpumask *cpumask,
 	return NULL;
 }
 
-static void kick_current_cpu(struct task_struct *p)
-{
-	WRITE_ONCE(p->scx.slice, 0);
-}
-
 static bool try_kick_cpu(struct task_struct *p, struct cpu_ctx *cpuc_cur,
 			 struct cpu_ctx *victim_cpuc)
 {
@@ -234,29 +219,15 @@ static bool try_kick_cpu(struct task_struct *p, struct cpu_ctx *cpuc_cur,
 	u64 old;
 	bool ret = false;
 
-	/*
-	 * If the current CPU is a victim, we just reset the current task's
-	 * time slice as an optimization. Othewise, kick the remote CPU for
-	 * preemption.
-	 *
-	 * Resetting task's time slice to zero does not trigger an immediate
-	 * preemption. However, the cost of self-IPI is prohibitively expensive
-	 * for some scenarios. The actual preemption will happen at the next
-	 * ops.tick().
-	 */
-	if (cpuc_cur->cpu_id == victim_cpuc->cpu_id) {
-		struct task_struct *tsk = bpf_get_current_task_btf();
-		kick_current_cpu(tsk);
-		return true;
-	}
-
 	/*
 	 * Kick a victim CPU if it is not victimized yet by another
 	 * concurrent kick task.
+	 *
+	 *
 	 */
 	old = p->scx.slice;
-	if (old != 0)
-		ret = __sync_bool_compare_and_swap(&p->scx.slice, old, 0);
+	if (old != 1 && old != 0)
+		ret = __sync_bool_compare_and_swap(&p->scx.slice, old, 1);
 
 	/*
 	 * Kick the remote CPU for preemption.
@@ -329,7 +300,7 @@ static bool try_yield_current_cpu(struct task_struct *p_run,
 	 * give up its extended time slice for fairness.
 	 */
 	if (taskc_run->lock_holder_xted) {
-		kick_current_cpu(p_run);
+		p_run->scx.slice = 0;
 		return true;
 	}
 
@@ -367,7 +338,7 @@ static bool try_yield_current_cpu(struct task_struct *p_run,
 	bpf_rcu_read_unlock();
 
 	if (ret)
-		kick_current_cpu(p_run);
+		p_run->scx.slice = 0;
 
 	return ret;
 }