Skip to content

Commit

Permalink
kcsan: Turn report_filterlist_lock into a raw_spinlock
Browse files Browse the repository at this point in the history
Ran Xiaokai reports that with a KCSAN-enabled PREEMPT_RT kernel, we can see
splats like:

| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
| in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1
| preempt_count: 10002, expected: 0
| RCU nest depth: 0, expected: 0
| no locks held by swapper/1/0.
| irq event stamp: 156674
| hardirqs last  enabled at (156673): [<ffffffff81130bd9>] do_idle+0x1f9/0x240
| hardirqs last disabled at (156674): [<ffffffff82254f84>] sysvec_apic_timer_interrupt+0x14/0xc0
| softirqs last  enabled at (0): [<ffffffff81099f47>] copy_process+0xfc7/0x4b60
| softirqs last disabled at (0): [<0000000000000000>] 0x0
| Preemption disabled at:
| [<ffffffff814a3e2a>] paint_ptr+0x2a/0x90
| CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.11.0+ #3
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
| Call Trace:
|  <IRQ>
|  dump_stack_lvl+0x7e/0xc0
|  dump_stack+0x1d/0x30
|  __might_resched+0x1a2/0x270
|  rt_spin_lock+0x68/0x170
|  kcsan_skip_report_debugfs+0x43/0xe0
|  print_report+0xb5/0x590
|  kcsan_report_known_origin+0x1b1/0x1d0
|  kcsan_setup_watchpoint+0x348/0x650
|  __tsan_unaligned_write1+0x16d/0x1d0
|  hrtimer_interrupt+0x3d6/0x430
|  __sysvec_apic_timer_interrupt+0xe8/0x3a0
|  sysvec_apic_timer_interrupt+0x97/0xc0
|  </IRQ>

On a detected data race, KCSAN's reporting logic checks if it should
filter the report. That list is protected by the report_filterlist_lock
*non-raw* spinlock which may sleep on RT kernels.

Since KCSAN may report data races in any context, convert it to a
raw_spinlock.

This requires being careful about when to allocate memory for the filter
list itself which can be done via KCSAN's debugfs interface. Concurrent
modification of the filter list via debugfs should be rare: the chosen
strategy is to optimistically pre-allocate memory before the critical
section and discard if unused.

Link: https://lore.kernel.org/all/20240925143154.2322926-1-ranxiaokai627@163.com/
Reported-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Tested-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Signed-off-by: Marco Elver <elver@google.com>
  • Loading branch information
melver committed Oct 14, 2024
1 parent 8cf0b93 commit 59458fa
Showing 1 changed file with 36 additions and 38 deletions.
74 changes: 36 additions & 38 deletions kernel/kcsan/debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,8 @@ static struct {
int used; /* number of elements used */
bool sorted; /* if elements are sorted */
bool whitelist; /* if list is a blacklist or whitelist */
} report_filterlist = {
.addrs = NULL,
.size = 8, /* small initial size */
.used = 0,
.sorted = false,
.whitelist = false, /* default is blacklist */
};
static DEFINE_SPINLOCK(report_filterlist_lock);
} report_filterlist;
static DEFINE_RAW_SPINLOCK(report_filterlist_lock);

/*
* The microbenchmark allows benchmarking KCSAN core runtime only. To run
Expand Down Expand Up @@ -110,7 +104,7 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr)
return false;
func_addr -= offset; /* Get function start */

spin_lock_irqsave(&report_filterlist_lock, flags);
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
if (report_filterlist.used == 0)
goto out;

Expand All @@ -127,67 +121,71 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr)
ret = !ret;

out:
spin_unlock_irqrestore(&report_filterlist_lock, flags);
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
return ret;
}

static void set_report_filterlist_whitelist(bool whitelist)
{
unsigned long flags;

spin_lock_irqsave(&report_filterlist_lock, flags);
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
report_filterlist.whitelist = whitelist;
spin_unlock_irqrestore(&report_filterlist_lock, flags);
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
}

/* Returns 0 on success, error-code otherwise. */
static ssize_t insert_report_filterlist(const char *func)
{
unsigned long flags;
unsigned long addr = kallsyms_lookup_name(func);
unsigned long *delay_free = NULL;
unsigned long *new_addrs = NULL;
size_t new_size = 0;
ssize_t ret = 0;

if (!addr) {
pr_err("could not find function: '%s'\n", func);
return -ENOENT;
}

spin_lock_irqsave(&report_filterlist_lock, flags);
retry_alloc:
/*
* Check if we need an allocation, and re-validate under the lock. Since
* the report_filterlist_lock is a raw, cannot allocate under the lock.
*/
if (data_race(report_filterlist.used == report_filterlist.size)) {
new_size = (report_filterlist.size ?: 4) * 2;
delay_free = new_addrs = kmalloc_array(new_size, sizeof(unsigned long), GFP_KERNEL);
if (!new_addrs)
return -ENOMEM;
}

if (report_filterlist.addrs == NULL) {
/* initial allocation */
report_filterlist.addrs =
kmalloc_array(report_filterlist.size,
sizeof(unsigned long), GFP_ATOMIC);
if (report_filterlist.addrs == NULL) {
ret = -ENOMEM;
goto out;
}
} else if (report_filterlist.used == report_filterlist.size) {
/* resize filterlist */
size_t new_size = report_filterlist.size * 2;
unsigned long *new_addrs =
krealloc(report_filterlist.addrs,
new_size * sizeof(unsigned long), GFP_ATOMIC);

if (new_addrs == NULL) {
/* leave filterlist itself untouched */
ret = -ENOMEM;
goto out;
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
if (report_filterlist.used == report_filterlist.size) {
/* Check we pre-allocated enough, and retry if not. */
if (report_filterlist.used >= new_size) {
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
kfree(new_addrs); /* kfree(NULL) is safe */
delay_free = new_addrs = NULL;
goto retry_alloc;
}

if (report_filterlist.used)
memcpy(new_addrs, report_filterlist.addrs, report_filterlist.used * sizeof(unsigned long));
delay_free = report_filterlist.addrs; /* free the old list */
report_filterlist.addrs = new_addrs; /* switch to the new list */
report_filterlist.size = new_size;
report_filterlist.addrs = new_addrs;
}

/* Note: deduplicating should be done in userspace. */
report_filterlist.addrs[report_filterlist.used++] =
kallsyms_lookup_name(func);
report_filterlist.sorted = false;

out:
spin_unlock_irqrestore(&report_filterlist_lock, flags);
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);

kfree(delay_free);
return ret;
}

Expand All @@ -204,13 +202,13 @@ static int show_info(struct seq_file *file, void *v)
}

/* show filter functions, and filter type */
spin_lock_irqsave(&report_filterlist_lock, flags);
raw_spin_lock_irqsave(&report_filterlist_lock, flags);
seq_printf(file, "\n%s functions: %s\n",
report_filterlist.whitelist ? "whitelisted" : "blacklisted",
report_filterlist.used == 0 ? "none" : "");
for (i = 0; i < report_filterlist.used; ++i)
seq_printf(file, " %ps\n", (void *)report_filterlist.addrs[i]);
spin_unlock_irqrestore(&report_filterlist_lock, flags);
raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);

return 0;
}
Expand Down

0 comments on commit 59458fa

Please sign in to comment.