Skip to content

Commit ea0ecba

Browse files
jankarasmb49
authored andcommitted
mbcache: automatically delete entries from cache on freeing
BugLink: https://bugs.launchpad.net/bugs/2003914 [ Upstream commit 307af6c ] Use the fact that entries with elevated refcount are not removed from the hash and just move removal of the entry from the hash to the entry freeing time. When doing this we also change the generic code to hold one reference to the cache entry, not two of them, which makes code somewhat more obvious. Signed-off-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20220712105436.32204-10-jack@suse.cz Signed-off-by: Theodore Ts'o <tytso@mit.edu> Stable-dep-of: a44e84a ("ext4: fix deadlock due to mbcache entry corruption") Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Kamal Mostafa <kamal@canonical.com> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
1 parent f83c67b commit ea0ecba

File tree

2 files changed

+55
-77
lines changed

2 files changed

+55
-77
lines changed

fs/mbcache.c

Lines changed: 39 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
9090
return -ENOMEM;
9191

9292
INIT_LIST_HEAD(&entry->e_list);
93-
/* One ref for hash, one ref returned */
93+
/* Initial hash reference */
9494
atomic_set(&entry->e_refcnt, 1);
9595
entry->e_key = key;
9696
entry->e_value = value;
@@ -106,21 +106,28 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
106106
}
107107
}
108108
hlist_bl_add_head(&entry->e_hash_list, head);
109-
hlist_bl_unlock(head);
110-
109+
/*
110+
* Add entry to LRU list before it can be found by
111+
* mb_cache_entry_delete() to avoid races
112+
*/
111113
spin_lock(&cache->c_list_lock);
112114
list_add_tail(&entry->e_list, &cache->c_list);
113-
/* Grab ref for LRU list */
114-
atomic_inc(&entry->e_refcnt);
115115
cache->c_entry_count++;
116116
spin_unlock(&cache->c_list_lock);
117+
hlist_bl_unlock(head);
117118

118119
return 0;
119120
}
120121
EXPORT_SYMBOL(mb_cache_entry_create);
121122

122-
void __mb_cache_entry_free(struct mb_cache_entry *entry)
123+
void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
123124
{
125+
struct hlist_bl_head *head;
126+
127+
head = mb_cache_entry_head(cache, entry->e_key);
128+
hlist_bl_lock(head);
129+
hlist_bl_del(&entry->e_hash_list);
130+
hlist_bl_unlock(head);
124131
kmem_cache_free(mb_entry_cache, entry);
125132
}
126133
EXPORT_SYMBOL(__mb_cache_entry_free);
@@ -134,7 +141,7 @@ EXPORT_SYMBOL(__mb_cache_entry_free);
134141
*/
135142
void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
136143
{
137-
wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
144+
wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
138145
}
139146
EXPORT_SYMBOL(mb_cache_entry_wait_unused);
140147

@@ -155,10 +162,9 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
155162
while (node) {
156163
entry = hlist_bl_entry(node, struct mb_cache_entry,
157164
e_hash_list);
158-
if (entry->e_key == key && entry->e_reusable) {
159-
atomic_inc(&entry->e_refcnt);
165+
if (entry->e_key == key && entry->e_reusable &&
166+
atomic_inc_not_zero(&entry->e_refcnt))
160167
goto out;
161-
}
162168
node = node->next;
163169
}
164170
entry = NULL;
@@ -218,10 +224,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
218224
head = mb_cache_entry_head(cache, key);
219225
hlist_bl_lock(head);
220226
hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
221-
if (entry->e_key == key && entry->e_value == value) {
222-
atomic_inc(&entry->e_refcnt);
227+
if (entry->e_key == key && entry->e_value == value &&
228+
atomic_inc_not_zero(&entry->e_refcnt))
223229
goto out;
224-
}
225230
}
226231
entry = NULL;
227232
out:
@@ -281,37 +286,25 @@ EXPORT_SYMBOL(mb_cache_entry_delete);
281286
struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
282287
u32 key, u64 value)
283288
{
284-
struct hlist_bl_node *node;
285-
struct hlist_bl_head *head;
286289
struct mb_cache_entry *entry;
287290

288-
head = mb_cache_entry_head(cache, key);
289-
hlist_bl_lock(head);
290-
hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
291-
if (entry->e_key == key && entry->e_value == value) {
292-
if (atomic_read(&entry->e_refcnt) > 2) {
293-
atomic_inc(&entry->e_refcnt);
294-
hlist_bl_unlock(head);
295-
return entry;
296-
}
297-
/* We keep hash list reference to keep entry alive */
298-
hlist_bl_del_init(&entry->e_hash_list);
299-
hlist_bl_unlock(head);
300-
spin_lock(&cache->c_list_lock);
301-
if (!list_empty(&entry->e_list)) {
302-
list_del_init(&entry->e_list);
303-
if (!WARN_ONCE(cache->c_entry_count == 0,
304-
"mbcache: attempt to decrement c_entry_count past zero"))
305-
cache->c_entry_count--;
306-
atomic_dec(&entry->e_refcnt);
307-
}
308-
spin_unlock(&cache->c_list_lock);
309-
mb_cache_entry_put(cache, entry);
310-
return NULL;
311-
}
312-
}
313-
hlist_bl_unlock(head);
291+
entry = mb_cache_entry_get(cache, key, value);
292+
if (!entry)
293+
return NULL;
314294

295+
/*
296+
* Drop the ref we got from mb_cache_entry_get() and the initial hash
297+
* ref if we are the last user
298+
*/
299+
if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
300+
return entry;
301+
302+
spin_lock(&cache->c_list_lock);
303+
if (!list_empty(&entry->e_list))
304+
list_del_init(&entry->e_list);
305+
cache->c_entry_count--;
306+
spin_unlock(&cache->c_list_lock);
307+
__mb_cache_entry_free(cache, entry);
315308
return NULL;
316309
}
317310
EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
@@ -343,42 +336,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
343336
unsigned long nr_to_scan)
344337
{
345338
struct mb_cache_entry *entry;
346-
struct hlist_bl_head *head;
347339
unsigned long shrunk = 0;
348340

349341
spin_lock(&cache->c_list_lock);
350342
while (nr_to_scan-- && !list_empty(&cache->c_list)) {
351343
entry = list_first_entry(&cache->c_list,
352344
struct mb_cache_entry, e_list);
353-
if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
345+
/* Drop initial hash reference if there is no user */
346+
if (entry->e_referenced ||
347+
atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
354348
entry->e_referenced = 0;
355349
list_move_tail(&entry->e_list, &cache->c_list);
356350
continue;
357351
}
358352
list_del_init(&entry->e_list);
359353
cache->c_entry_count--;
360-
/*
361-
* We keep LRU list reference so that entry doesn't go away
362-
* from under us.
363-
*/
364354
spin_unlock(&cache->c_list_lock);
365-
head = mb_cache_entry_head(cache, entry->e_key);
366-
hlist_bl_lock(head);
367-
/* Now a reliable check if the entry didn't get used... */
368-
if (atomic_read(&entry->e_refcnt) > 2) {
369-
hlist_bl_unlock(head);
370-
spin_lock(&cache->c_list_lock);
371-
list_add_tail(&entry->e_list, &cache->c_list);
372-
cache->c_entry_count++;
373-
continue;
374-
}
375-
if (!hlist_bl_unhashed(&entry->e_hash_list)) {
376-
hlist_bl_del_init(&entry->e_hash_list);
377-
atomic_dec(&entry->e_refcnt);
378-
}
379-
hlist_bl_unlock(head);
380-
if (mb_cache_entry_put(cache, entry))
381-
shrunk++;
355+
__mb_cache_entry_free(cache, entry);
356+
shrunk++;
382357
cond_resched();
383358
spin_lock(&cache->c_list_lock);
384359
}
@@ -470,11 +445,6 @@ void mb_cache_destroy(struct mb_cache *cache)
470445
* point.
471446
*/
472447
list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
473-
if (!hlist_bl_unhashed(&entry->e_hash_list)) {
474-
hlist_bl_del_init(&entry->e_hash_list);
475-
atomic_dec(&entry->e_refcnt);
476-
} else
477-
WARN_ON(1);
478448
list_del(&entry->e_list);
479449
WARN_ON(atomic_read(&entry->e_refcnt) != 1);
480450
mb_cache_entry_put(cache, entry);

include/linux/mbcache.h

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,16 @@ struct mb_cache;
1313
struct mb_cache_entry {
1414
/* List of entries in cache - protected by cache->c_list_lock */
1515
struct list_head e_list;
16-
/* Hash table list - protected by hash chain bitlock */
16+
/*
17+
* Hash table list - protected by hash chain bitlock. The entry is
18+
* guaranteed to be hashed while e_refcnt > 0.
19+
*/
1720
struct hlist_bl_node e_hash_list;
21+
/*
22+
* Entry refcount. Once it reaches zero, entry is unhashed and freed.
23+
* While refcount > 0, the entry is guaranteed to stay in the hash and
24+
* e.g. mb_cache_entry_try_delete() will fail.
25+
*/
1826
atomic_t e_refcnt;
1927
/* Key in hash - stable during lifetime of the entry */
2028
u32 e_key;
@@ -29,20 +37,20 @@ void mb_cache_destroy(struct mb_cache *cache);
2937

3038
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
3139
u64 value, bool reusable);
32-
void __mb_cache_entry_free(struct mb_cache_entry *entry);
40+
void __mb_cache_entry_free(struct mb_cache *cache,
41+
struct mb_cache_entry *entry);
3342
void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
34-
static inline int mb_cache_entry_put(struct mb_cache *cache,
35-
struct mb_cache_entry *entry)
43+
static inline void mb_cache_entry_put(struct mb_cache *cache,
44+
struct mb_cache_entry *entry)
3645
{
3746
unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
3847

3948
if (cnt > 0) {
40-
if (cnt <= 3)
49+
if (cnt <= 2)
4150
wake_up_var(&entry->e_refcnt);
42-
return 0;
51+
return;
4352
}
44-
__mb_cache_entry_free(entry);
45-
return 1;
53+
__mb_cache_entry_free(cache, entry);
4654
}
4755

4856
struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,

0 commit comments

Comments
 (0)