Skip to content

Commit 12dcb0e

Browse files
yosrym93akpm00
authored andcommitted
mm: zswap: properly synchronize freeing resources during CPU hotunplug
In zswap_compress() and zswap_decompress(), the per-CPU acomp_ctx of the current CPU at the beginning of the operation is retrieved and used throughout. However, since neither preemption nor migration are disabled, it is possible that the operation continues on a different CPU. If the original CPU is hotunplugged while the acomp_ctx is still in use, we run into a UAF bug as some of the resources attached to the acomp_ctx are freed during hotunplug in zswap_cpu_comp_dead() (i.e. acomp_ctx.buffer, acomp_ctx.req, or acomp_ctx.acomp). The problem was introduced in commit 1ec3b5f ("mm/zswap: move to use crypto_acomp API for hardware acceleration") when the switch to the crypto_acomp API was made. Prior to that, the per-CPU crypto_comp was retrieved using get_cpu_ptr() which disables preemption and makes sure the CPU cannot go away from under us. Preemption cannot be disabled with the crypto_acomp API as a sleepable context is needed. Use the acomp_ctx.mutex to synchronize CPU hotplug callbacks allocating and freeing resources with compression/decompression paths. Make sure that acomp_ctx.req is NULL when the resources are freed. In the compression/decompression paths, check if acomp_ctx.req is NULL after acquiring the mutex (meaning the CPU was offlined) and retry on the new CPU. The initialization of acomp_ctx.mutex is moved from the CPU hotplug callback to the pool initialization where it belongs (where the mutex is allocated). In addition to adding clarity, this makes sure that CPU hotplug cannot reinitialize a mutex that is already locked by compression/decompression. Previously a fix was attempted by holding cpus_read_lock() [1]. This would have caused a potential deadlock as it is possible for code already holding the lock to fall into reclaim and enter zswap (causing a deadlock). A fix was also attempted using SRCU for synchronization, but Johannes pointed out that synchronize_srcu() cannot be used in CPU hotplug notifiers [2]. Alternative fixes that were considered/attempted and could have worked: - Refcounting the per-CPU acomp_ctx. This involves complexity in handling the race between the refcount dropping to zero in zswap_[de]compress() and the refcount being re-initialized when the CPU is onlined. - Disabling migration before getting the per-CPU acomp_ctx [3], but that's discouraged and is a much bigger hammer than needed, and could result in subtle performance issues. [1]https://lkml.kernel.org/20241219212437.2714151-1-yosryahmed@google.com/ [2]https://lkml.kernel.org/20250107074724.1756696-2-yosryahmed@google.com/ [3]https://lkml.kernel.org/20250107222236.2715883-2-yosryahmed@google.com/ [yosryahmed@google.com: remove comment] Link: https://lkml.kernel.org/r/CAJD7tkaxS1wjn+swugt8QCvQ-rVF5RZnjxwPGX17k8x9zSManA@mail.gmail.com Link: https://lkml.kernel.org/r/20250108222441.3622031-1-yosryahmed@google.com Fixes: 1ec3b5f ("mm/zswap: move to use crypto_acomp API for hardware acceleration") Signed-off-by: Yosry Ahmed <yosryahmed@google.com> Reported-by: Johannes Weiner <hannes@cmpxchg.org> Closes: https://lore.kernel.org/lkml/20241113213007.GB1564047@cmpxchg.org/ Reported-by: Sam Sun <samsun1006219@gmail.com> Closes: https://lore.kernel.org/lkml/CAEkJfYMtSdM5HceNsXUDf5haghD5+o2e7Qv4OcuruL4tPg6OaQ@mail.gmail.com/ Cc: Barry Song <baohua@kernel.org> Cc: Chengming Zhou <chengming.zhou@linux.dev> Cc: Kanchana P Sridhar <kanchana.p.sridhar@intel.com> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Vitaly Wool <vitalywool@gmail.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 4dff389 commit 12dcb0e

File tree

1 file changed

+44
-14
lines changed

1 file changed

+44
-14
lines changed

Diff for: mm/zswap.c

+44-14
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
251251
struct zswap_pool *pool;
252252
char name[38]; /* 'zswap' + 32 char (max) num + \0 */
253253
gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
254-
int ret;
254+
int ret, cpu;
255255

256256
if (!zswap_has_pool) {
257257
/* if either are unset, pool initialization failed, and we
@@ -285,6 +285,9 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
285285
goto error;
286286
}
287287

288+
for_each_possible_cpu(cpu)
289+
mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex);
290+
288291
ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
289292
&pool->node);
290293
if (ret)
@@ -821,11 +824,12 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
821824
struct acomp_req *req;
822825
int ret;
823826

824-
mutex_init(&acomp_ctx->mutex);
825-
827+
mutex_lock(&acomp_ctx->mutex);
826828
acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
827-
if (!acomp_ctx->buffer)
828-
return -ENOMEM;
829+
if (!acomp_ctx->buffer) {
830+
ret = -ENOMEM;
831+
goto buffer_fail;
832+
}
829833

830834
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
831835
if (IS_ERR(acomp)) {
@@ -855,12 +859,15 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
855859
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
856860
crypto_req_done, &acomp_ctx->wait);
857861

862+
mutex_unlock(&acomp_ctx->mutex);
858863
return 0;
859864

860865
req_fail:
861866
crypto_free_acomp(acomp_ctx->acomp);
862867
acomp_fail:
863868
kfree(acomp_ctx->buffer);
869+
buffer_fail:
870+
mutex_unlock(&acomp_ctx->mutex);
864871
return ret;
865872
}
866873

@@ -869,17 +876,45 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
869876
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
870877
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
871878

879+
mutex_lock(&acomp_ctx->mutex);
872880
if (!IS_ERR_OR_NULL(acomp_ctx)) {
873881
if (!IS_ERR_OR_NULL(acomp_ctx->req))
874882
acomp_request_free(acomp_ctx->req);
883+
acomp_ctx->req = NULL;
875884
if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
876885
crypto_free_acomp(acomp_ctx->acomp);
877886
kfree(acomp_ctx->buffer);
878887
}
888+
mutex_unlock(&acomp_ctx->mutex);
879889

880890
return 0;
881891
}
882892

893+
static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool)
894+
{
895+
struct crypto_acomp_ctx *acomp_ctx;
896+
897+
for (;;) {
898+
acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
899+
mutex_lock(&acomp_ctx->mutex);
900+
if (likely(acomp_ctx->req))
901+
return acomp_ctx;
902+
/*
903+
* It is possible that we were migrated to a different CPU after
904+
* getting the per-CPU ctx but before the mutex was acquired. If
905+
* the old CPU got offlined, zswap_cpu_comp_dead() could have
906+
* already freed ctx->req (among other things) and set it to
907+
* NULL. Just try again on the new CPU that we ended up on.
908+
*/
909+
mutex_unlock(&acomp_ctx->mutex);
910+
}
911+
}
912+
913+
static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
914+
{
915+
mutex_unlock(&acomp_ctx->mutex);
916+
}
917+
883918
static bool zswap_compress(struct page *page, struct zswap_entry *entry,
884919
struct zswap_pool *pool)
885920
{
@@ -893,10 +928,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
893928
gfp_t gfp;
894929
u8 *dst;
895930

896-
acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
897-
898-
mutex_lock(&acomp_ctx->mutex);
899-
931+
acomp_ctx = acomp_ctx_get_cpu_lock(pool);
900932
dst = acomp_ctx->buffer;
901933
sg_init_table(&input, 1);
902934
sg_set_page(&input, page, PAGE_SIZE, 0);
@@ -949,7 +981,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
949981
else if (alloc_ret)
950982
zswap_reject_alloc_fail++;
951983

952-
mutex_unlock(&acomp_ctx->mutex);
984+
acomp_ctx_put_unlock(acomp_ctx);
953985
return comp_ret == 0 && alloc_ret == 0;
954986
}
955987

@@ -960,9 +992,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
960992
struct crypto_acomp_ctx *acomp_ctx;
961993
u8 *src;
962994

963-
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
964-
mutex_lock(&acomp_ctx->mutex);
965-
995+
acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
966996
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
967997
/*
968998
* If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer
@@ -986,10 +1016,10 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
9861016
acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
9871017
BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
9881018
BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
989-
mutex_unlock(&acomp_ctx->mutex);
9901019

9911020
if (src != acomp_ctx->buffer)
9921021
zpool_unmap_handle(zpool, entry->handle);
1022+
acomp_ctx_put_unlock(acomp_ctx);
9931023
}
9941024

9951025
/*********************************

0 commit comments

Comments
 (0)