Skip to content

Commit

Permalink
ZIL: Fix config lock deadlock.
Browse files Browse the repository at this point in the history
When we have some LWBs closed and their ZIOs ready to be issued, we
can not afford sleeping on config lock if somebody else try to lock
it as writer, or it will cause a deadlock.

To solve it, move spa_config_enter() from zil_lwb_write_issue() to
zil_lwb_write_close() under zl_issuer_lock to enforce lock ordering
with other threads.  Now if we can't immediately lock config, issue
all previously closed LWBs so that they could drop their config
locks after completion, and only then allow sleeping on our lock.

Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #15078
Closes #15080
  • Loading branch information
amotin authored Jul 24, 2023
1 parent fb344f5 commit 2cb992a
Showing 1 changed file with 27 additions and 7 deletions.
34 changes: 27 additions & 7 deletions module/zfs/zil.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ static kmem_cache_t *zil_lwb_cache;
static kmem_cache_t *zil_zcw_cache;

static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb);
static itx_t *zil_itx_clone(itx_t *oitx);

static int
Expand Down Expand Up @@ -1768,7 +1769,7 @@ static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;
* Has to be called under zl_issuer_lock to chain more lwbs.
*/
static lwb_t *
zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)
zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
{
lwb_t *nlwb = NULL;
zil_chain_t *zilc;
Expand Down Expand Up @@ -1870,6 +1871,27 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)

dmu_tx_commit(tx);

/*
* We need to acquire the config lock for the lwb to issue it later.
* However, if we already have a queue of closed parent lwbs already
* holding the config lock (but not yet issued), we can't block here
* waiting on the lock or we will deadlock. In that case we must
* first issue to parent IOs before waiting on the lock.
*/
if (ilwbs && !list_is_empty(ilwbs)) {
if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) {
lwb_t *tlwb;
while ((tlwb = list_remove_head(ilwbs)) != NULL)
zil_lwb_write_issue(zilog, tlwb);
spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
}
} else {
spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
}

if (ilwbs)
list_insert_tail(ilwbs, lwb);

/*
* If there was an allocation failure then nlwb will be null which
* forces a txg_wait_synced().
Expand Down Expand Up @@ -1933,7 +1955,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
BP_GET_LSIZE(&lwb->lwb_blk));
}
spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER);
ASSERT(spa_config_held(zilog->zl_spa, SCL_STATE, RW_READER));
zil_lwb_add_block(lwb, &lwb->lwb_blk);
lwb->lwb_issued_timestamp = gethrtime();
zio_nowait(lwb->lwb_root_zio);
Expand Down Expand Up @@ -2037,8 +2059,7 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
lwb_sp < zil_max_waste_space(zilog) &&
(dlen % max_log_data == 0 ||
lwb_sp < reclen + dlen % max_log_data))) {
list_insert_tail(ilwbs, lwb);
lwb = zil_lwb_write_close(zilog, lwb);
lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
if (lwb == NULL)
return (NULL);
zil_lwb_write_open(zilog, lwb);
Expand Down Expand Up @@ -2937,8 +2958,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
zfs_commit_timeout_pct / 100;
if (sleep < zil_min_commit_timeout ||
lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
list_insert_tail(ilwbs, lwb);
lwb = zil_lwb_write_close(zilog, lwb);
lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
zilog->zl_cur_used = 0;
if (lwb == NULL) {
while ((lwb = list_remove_head(ilwbs))
Expand Down Expand Up @@ -3096,7 +3116,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
* since we've reached the commit waiter's timeout and it still
* hasn't been issued.
*/
lwb_t *nlwb = zil_lwb_write_close(zilog, lwb);
lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL);

ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);

Expand Down

0 comments on commit 2cb992a

Please sign in to comment.