Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

Commit

Permalink
scx: fix NULL pointer dereference with scx_exit_info
Browse files Browse the repository at this point in the history
Trying to load multiple schedulers at the same time can trigger the
following NULL pointer dereference:

[22053.942960] BUG: kernel NULL pointer dereference, address: 0000000000000000
[22053.942966] #PF: supervisor write access in kernel mode
[22053.942968] #PF: error_code(0x0002) - not-present page
[22053.942969] PGD 0 P4D 0
[22053.942972] Oops: 0002 [#1] PREEMPT SMP NOPTI
[22053.942976] CPU: 6 PID: 3550 Comm: sched_ext_ops_h Tainted: G           O       6.7.0-4-generic #4+scx3-Ubuntu
[22053.942978] Hardware name: GPD G1621-02/G1621-02, BIOS 2.04 09/01/2022
[22053.942980] Sched_ext: central (enabled+all)
[22053.942981] RIP: 0010:scx_ops_disable_workfn+0x85/0x610
[22053.942987] Code: 89 df f3 48 ab b9 01 00 00 00 83 fa 01 0f 86 c3 00 00 00 89 d0 f0 0f b1 0d 60 ff 11 02 0f 85 10 05 00 00 48 8b 85 90 fe ff ff <89> 10 81 fa 00 04 00 00 0f 84 ef 04 00 00 0f 8e cb 00 00 00 48 c7
[22053.942989] RSP: 0018:ffffad420257fd30 EFLAGS: 00010246
[22053.942991] RAX: 0000000000000000 RBX: ffffad420257fd58 RCX: 0000000000000001
[22053.942993] RDX: 0000000000000040 RSI: 0000000000000000 RDI: ffffad420257fd98
[22053.942994] RBP: ffffad420257fea8 R08: 0000000000000000 R09: 0000000000000000
[22053.942996] R10: 0000000000000000 R11: 0000000000000000 R12: ffffad420257fd98
[22053.942998] R13: ffff96a4d07eddc0 R14: ffff96a4d07eddc4 R15: ffffffff8897c3b0
[22053.942999] FS:  0000000000000000(0000) GS:ffff96a7dfb00000(0000) knlGS:0000000000000000
[22053.943002] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[22053.943003] CR2: 0000000000000000 CR3: 00000001f6a3c001 CR4: 0000000000f70ef0
[22053.943005] PKRU: 55555554
[22053.943006] Call Trace:
[22053.943008]  <TASK>
[22053.943012]  ? show_regs+0x6d/0x80
[22053.943016]  ? __die+0x24/0x80
[22053.943018]  ? page_fault_oops+0x99/0x1b0
[22053.943021]  ? do_user_addr_fault+0x2ee/0x6b0
[22053.943024]  ? exc_page_fault+0x83/0x1b0
[22053.943028]  ? asm_exc_page_fault+0x27/0x30
[22053.943030]  ? __pfx_scx_ops_disable_workfn+0x10/0x10
[22053.943034]  ? scx_ops_disable_workfn+0x85/0x610
[22053.943036]  ? asm_sysvec_irq_work+0x1b/0x20
[22053.943042]  ? __pfx_scx_ops_disable_workfn+0x10/0x10
[22053.943043]  kthread_worker_fn+0x9e/0x230
[22053.943048]  ? __pfx_kthread_worker_fn+0x10/0x10
[22053.943050]  kthread+0xef/0x120
[22053.943053]  ? __pfx_kthread+0x10/0x10
[22053.943056]  ret_from_fork+0x44/0x70
[22053.943058]  ? __pfx_kthread+0x10/0x10
[22053.943061]  ret_from_fork_asm+0x1b/0x30
[22053.943064]  </TASK>

This happens because in scx_ops_enable(), if a scheduler is already
running, we are freeing scx_exit_info, that is still owned by the
running scheduler.

Therefore, as soon as we stop the running scheduler we can hit the NULL
pointer dereference.

Reproducer:
 - start any scheduler
 - try to start another scheduler
 - stop the running scheduler
 - BUG

Fix this by not freeing scx_exit_info in error path of scx_ops_enable()
when there is a running scheduler.

Moreover, also make sure to access scx_exit_info with the
scx_ops_enable_mutex held in scx_ops_disable_workfn(), to prevent other
potential race conditions.

Fixes: 26ae1b0 ("scx: Make scx_exit_info fields dynamically allocated")
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
  • Loading branch information
Andrea Righi committed Jan 26, 2024
1 parent 58ac752 commit 57d57d8
Showing 1 changed file with 20 additions and 13 deletions.
33 changes: 20 additions & 13 deletions kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -3272,7 +3272,7 @@ static const char *scx_exit_reason(enum scx_exit_kind kind)

static void scx_ops_disable_workfn(struct kthread_work *work)
{
struct scx_exit_info *ei = scx_exit_info;
struct scx_exit_info *ei;
struct scx_task_iter sti;
struct task_struct *p;
struct rhashtable_iter rht_iter;
Expand All @@ -3291,14 +3291,27 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
if (atomic_try_cmpxchg(&scx_exit_kind, &kind, SCX_EXIT_DONE))
break;
}

/* guarantee forward progress by bypassing scx_ops */
scx_ops_bypass(true);

/*
* Here, every runnable task is guaranteed to make forward progress and
* we can safely use blocking synchronization constructs. Actually
* disable ops.
*/
mutex_lock(&scx_ops_enable_mutex);

ei = scx_exit_info;
if (!ei) {
mutex_unlock(&scx_ops_enable_mutex);
goto done;
}
ei->kind = kind;
ei->reason = scx_exit_reason(ei->kind);

cancel_delayed_work_sync(&scx_watchdog_work);

/* guarantee forward progress by bypassing scx_ops */
scx_ops_bypass(true);

switch (scx_ops_set_enable_state(SCX_OPS_DISABLING)) {
case SCX_OPS_DISABLING:
WARN_ONCE(true, "sched_ext: duplicate disabling instance?");
Expand All @@ -3313,13 +3326,6 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
break;
}

/*
* Here, every runnable task is guaranteed to make forward progress and
* we can safely use blocking synchronization constructs. Actually
* disable ops.
*/
mutex_lock(&scx_ops_enable_mutex);

static_branch_disable(&__scx_switched_all);
WRITE_ONCE(scx_switching_all, false);

Expand Down Expand Up @@ -3614,13 +3620,13 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
scx_create_rt_helper("sched_ext_ops_helper"));
if (!scx_ops_helper) {
ret = -ENOMEM;
goto err;
goto err_unlock;
}
}

if (scx_ops_enable_state() != SCX_OPS_DISABLED) {
ret = -EBUSY;
goto err;
goto err_unlock;
}

scx_exit_info = alloc_exit_info();
Expand Down Expand Up @@ -3868,6 +3874,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
free_exit_info(scx_exit_info);
scx_exit_info = NULL;
}
err_unlock:
mutex_unlock(&scx_ops_enable_mutex);
return ret;

Expand Down

0 comments on commit 57d57d8

Please sign in to comment.