Skip to content

Commit 01daccf

Browse files
Mukesh Ojhagregkh
authored andcommitted
devcoredump : Serialize devcd_del work
In following scenario(diagram), when one thread X running dev_coredumpm() adds devcd device to the framework which sends uevent notification to userspace and another thread Y reads this uevent and call to devcd_data_write() which eventually try to delete the queued timer that is not initialized/queued yet. So, debug object reports some warning and in the meantime, timer is initialized and queued from X path. and from Y path, it gets reinitialized again and timer->entry.pprev=NULL and try_to_grab_pending() stucks. To fix this, introduce mutex and a boolean flag to serialize the behaviour. cpu0(X) cpu1(Y) dev_coredump() uevent sent to user space device_add() ======================> user space process Y reads the uevents writes to devcd fd which results into writes to devcd_data_write() mod_delayed_work() try_to_grab_pending() del_timer() debug_assert_init() INIT_DELAYED_WORK() schedule_delayed_work() debug_object_fixup() timer_fixup_assert_init() timer_setup() do_init_timer() /* Above call reinitializes the timer to timer->entry.pprev=NULL and this will be checked later in timer_pending() call. */ timer_pending() !hlist_unhashed_lockless(&timer->entry) !h->pprev /* del_timer() checks h->pprev and finds it to be NULL due to which try_to_grab_pending() stucks. */ Link: https://lore.kernel.org/lkml/2e1f81e2-428c-f11f-ce92-eb11048cb271@quicinc.com/ Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com> Link: https://lore.kernel.org/r/1663073424-13663-1-git-send-email-quic_mojha@quicinc.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent b8de524 commit 01daccf

File tree

1 file changed

+81
-2
lines changed

1 file changed

+81
-2
lines changed

drivers/base/devcoredump.c

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,47 @@ struct devcd_entry {
2525
struct device devcd_dev;
2626
void *data;
2727
size_t datalen;
28+
/*
29+
* Here, mutex is required to serialize the calls to del_wk work between
30+
* user/kernel space which happens when devcd is added with device_add()
31+
* and that sends uevent to user space. User space reads the uevents,
32+
* and calls to devcd_data_write() which try to modify the work which is
33+
* not even initialized/queued from devcoredump.
34+
*
35+
*
36+
*
37+
* cpu0(X) cpu1(Y)
38+
*
39+
* dev_coredump() uevent sent to user space
40+
* device_add() ======================> user space process Y reads the
41+
* uevents writes to devcd fd
42+
* which results into writes to
43+
*
44+
* devcd_data_write()
45+
* mod_delayed_work()
46+
* try_to_grab_pending()
47+
* del_timer()
48+
* debug_assert_init()
49+
* INIT_DELAYED_WORK()
50+
* schedule_delayed_work()
51+
*
52+
*
53+
* Also, mutex alone would not be enough to avoid scheduling of
54+
* del_wk work after it get flush from a call to devcd_free()
55+
* mentioned as below.
56+
*
57+
* disabled_store()
58+
* devcd_free()
59+
* mutex_lock() devcd_data_write()
60+
* flush_delayed_work()
61+
* mutex_unlock()
62+
* mutex_lock()
63+
* mod_delayed_work()
64+
* mutex_unlock()
65+
* So, delete_work flag is required.
66+
*/
67+
struct mutex mutex;
68+
bool delete_work;
2869
struct module *owner;
2970
ssize_t (*read)(char *buffer, loff_t offset, size_t count,
3071
void *data, size_t datalen);
@@ -84,7 +125,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
84125
struct device *dev = kobj_to_dev(kobj);
85126
struct devcd_entry *devcd = dev_to_devcd(dev);
86127

87-
mod_delayed_work(system_wq, &devcd->del_wk, 0);
128+
mutex_lock(&devcd->mutex);
129+
if (!devcd->delete_work) {
130+
devcd->delete_work = true;
131+
mod_delayed_work(system_wq, &devcd->del_wk, 0);
132+
}
133+
mutex_unlock(&devcd->mutex);
88134

89135
return count;
90136
}
@@ -112,7 +158,12 @@ static int devcd_free(struct device *dev, void *data)
112158
{
113159
struct devcd_entry *devcd = dev_to_devcd(dev);
114160

161+
mutex_lock(&devcd->mutex);
162+
if (!devcd->delete_work)
163+
devcd->delete_work = true;
164+
115165
flush_delayed_work(&devcd->del_wk);
166+
mutex_unlock(&devcd->mutex);
116167
return 0;
117168
}
118169

@@ -122,6 +173,30 @@ static ssize_t disabled_show(struct class *class, struct class_attribute *attr,
122173
return sysfs_emit(buf, "%d\n", devcd_disabled);
123174
}
124175

176+
/*
177+
*
178+
* disabled_store() worker()
179+
* class_for_each_device(&devcd_class,
180+
* NULL, NULL, devcd_free)
181+
* ...
182+
* ...
183+
* while ((dev = class_dev_iter_next(&iter))
184+
* devcd_del()
185+
* device_del()
186+
* put_device() <- last reference
187+
* error = fn(dev, data) devcd_dev_release()
188+
* devcd_free(dev, data) kfree(devcd)
189+
* mutex_lock(&devcd->mutex);
190+
*
191+
*
192+
* In the above diagram, It looks like disabled_store() would be racing with parallely
193+
* running devcd_del() and result in memory abort while acquiring devcd->mutex which
194+
* is called after kfree of devcd memory after dropping its last reference with
195+
* put_device(). However, this will not happens as fn(dev, data) runs
196+
* with its own reference to device via klist_node so it is not its last reference.
197+
* so, above situation would not occur.
198+
*/
199+
125200
static ssize_t disabled_store(struct class *class, struct class_attribute *attr,
126201
const char *buf, size_t count)
127202
{
@@ -278,13 +353,16 @@ void dev_coredumpm(struct device *dev, struct module *owner,
278353
devcd->read = read;
279354
devcd->free = free;
280355
devcd->failing_dev = get_device(dev);
356+
devcd->delete_work = false;
281357

358+
mutex_init(&devcd->mutex);
282359
device_initialize(&devcd->devcd_dev);
283360

284361
dev_set_name(&devcd->devcd_dev, "devcd%d",
285362
atomic_inc_return(&devcd_count));
286363
devcd->devcd_dev.class = &devcd_class;
287364

365+
mutex_lock(&devcd->mutex);
288366
if (device_add(&devcd->devcd_dev))
289367
goto put_device;
290368

@@ -301,10 +379,11 @@ void dev_coredumpm(struct device *dev, struct module *owner,
301379

302380
INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
303381
schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
304-
382+
mutex_unlock(&devcd->mutex);
305383
return;
306384
put_device:
307385
put_device(&devcd->devcd_dev);
386+
mutex_unlock(&devcd->mutex);
308387
put_module:
309388
module_put(owner);
310389
free:

0 commit comments

Comments
 (0)