From fa75b1fa5ce2e3f5cbe29416baa60e72ae46e2db Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Fri, 26 Jan 2024 22:50:40 +0100
Subject: [PATCH] scx: fix NULL pointer dereference with scx_exit_info

Trying to load multiple schedulers at the same time can trigger the
following NULL pointer dereference:

[22053.942960] BUG: kernel NULL pointer dereference, address: 0000000000000000
[22053.942966] #PF: supervisor write access in kernel mode
[22053.942968] #PF: error_code(0x0002) - not-present page
[22053.942969] PGD 0 P4D 0
[22053.942972] Oops: 0002 [#1] PREEMPT SMP NOPTI
[22053.942976] CPU: 6 PID: 3550 Comm: sched_ext_ops_h Tainted: G           O       6.7.0-4-generic #4+scx3-Ubuntu
[22053.942978] Hardware name: GPD G1621-02/G1621-02, BIOS 2.04 09/01/2022
[22053.942980] Sched_ext: central (enabled+all)
[22053.942981] RIP: 0010:scx_ops_disable_workfn+0x85/0x610
[22053.942987] Code: 89 df f3 48 ab b9 01 00 00 00 83 fa 01 0f 86 c3 00 00 00 89 d0 f0 0f b1 0d 60 ff 11 02 0f 85 10 05 00 00 48 8b 85 90 fe ff ff <89> 10 81 fa 00 04 00 00 0f 84 ef 04 00 00 0f 8e cb 00 00 00 48 c7
[22053.942989] RSP: 0018:ffffad420257fd30 EFLAGS: 00010246
[22053.942991] RAX: 0000000000000000 RBX: ffffad420257fd58 RCX: 0000000000000001
[22053.942993] RDX: 0000000000000040 RSI: 0000000000000000 RDI: ffffad420257fd98
[22053.942994] RBP: ffffad420257fea8 R08: 0000000000000000 R09: 0000000000000000
[22053.942996] R10: 0000000000000000 R11: 0000000000000000 R12: ffffad420257fd98
[22053.942998] R13: ffff96a4d07eddc0 R14: ffff96a4d07eddc4 R15: ffffffff8897c3b0
[22053.942999] FS:  0000000000000000(0000) GS:ffff96a7dfb00000(0000) knlGS:0000000000000000
[22053.943002] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[22053.943003] CR2: 0000000000000000 CR3: 00000001f6a3c001 CR4: 0000000000f70ef0
[22053.943005] PKRU: 55555554
[22053.943006] Call Trace:
[22053.943008]  <TASK>
[22053.943012]  ? show_regs+0x6d/0x80
[22053.943016]  ? __die+0x24/0x80
[22053.943018]  ? page_fault_oops+0x99/0x1b0
[22053.943021]  ? do_user_addr_fault+0x2ee/0x6b0
[22053.943024]  ? exc_page_fault+0x83/0x1b0
[22053.943028]  ? asm_exc_page_fault+0x27/0x30
[22053.943030]  ? __pfx_scx_ops_disable_workfn+0x10/0x10
[22053.943034]  ? scx_ops_disable_workfn+0x85/0x610
[22053.943036]  ? asm_sysvec_irq_work+0x1b/0x20
[22053.943042]  ? __pfx_scx_ops_disable_workfn+0x10/0x10
[22053.943043]  kthread_worker_fn+0x9e/0x230
[22053.943048]  ? __pfx_kthread_worker_fn+0x10/0x10
[22053.943050]  kthread+0xef/0x120
[22053.943053]  ? __pfx_kthread+0x10/0x10
[22053.943056]  ret_from_fork+0x44/0x70
[22053.943058]  ? __pfx_kthread+0x10/0x10
[22053.943061]  ret_from_fork_asm+0x1b/0x30
[22053.943064]  </TASK>

This happens because in scx_ops_enable(), if a scheduler is already
running, we are freeing scx_exit_info, that is still owned by the
running scheduler.

Therefore, as soon as we stop the running scheduler we can hit the NULL
pointer dereference.

Reproducer:
 - start any scheduler
 - try to start another scheduler
 - stop the running scheduler
 - BUG

Fix this by not freeing scx_exit_info in error path of scx_ops_enable()
when there is a running scheduler.

Fixes: 26ae1b03568b ("scx: Make scx_exit_info fields dynamically allocated")
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
---
 kernel/sched/ext.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index e45a3058d2cff..785da129de46b 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3614,13 +3614,13 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
 			   scx_create_rt_helper("sched_ext_ops_helper"));
 		if (!scx_ops_helper) {
 			ret = -ENOMEM;
-			goto err;
+			goto err_unlock;
 		}
 	}
 
 	if (scx_ops_enable_state() != SCX_OPS_DISABLED) {
 		ret = -EBUSY;
-		goto err;
+		goto err_unlock;
 	}
 
 	scx_exit_info = alloc_exit_info();
@@ -3868,6 +3868,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
 		free_exit_info(scx_exit_info);
 		scx_exit_info = NULL;
 	}
+err_unlock:
 	mutex_unlock(&scx_ops_enable_mutex);
 	return ret;