Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix zil replay panic when TX_REMOVE followed by TX_CREATE #9145

Merged
merged 1 commit into from
Aug 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions include/sys/dnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ extern "C" {
*/
#define DNODE_MUST_BE_ALLOCATED 1
#define DNODE_MUST_BE_FREE 2
#define DNODE_DRY_RUN 4

/*
* dnode_next_offset() flags.
Expand Down Expand Up @@ -415,6 +416,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots,
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);
void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting);
int dnode_try_claim(objset_t *os, uint64_t object, int slots);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
Expand Down Expand Up @@ -531,11 +533,6 @@ typedef struct dnode_stats {
* a range of dnode slots which would overflow the dnode_phys_t.
*/
kstat_named_t dnode_hold_free_overflow;
/*
* Number of times a dnode_hold(...) was attempted on a dnode
* which had already been unlinked in an earlier txg.
*/
kstat_named_t dnode_hold_free_txg;
/*
* Number of times dnode_free_interior_slots() needed to retry
* acquiring a slot zrl lock due to contention.
Expand Down
49 changes: 37 additions & 12 deletions module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ dnode_stats_t dnode_stats = {
{ "dnode_hold_free_lock_retry", KSTAT_DATA_UINT64 },
{ "dnode_hold_free_overflow", KSTAT_DATA_UINT64 },
{ "dnode_hold_free_refcount", KSTAT_DATA_UINT64 },
{ "dnode_hold_free_txg", KSTAT_DATA_UINT64 },
{ "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 },
{ "dnode_allocate", KSTAT_DATA_UINT64 },
{ "dnode_reallocate", KSTAT_DATA_UINT64 },
Expand Down Expand Up @@ -1263,6 +1262,10 @@ dnode_buf_evict_async(void *dbu)
* as an extra dnode slot by an large dnode, in which case it returns
* ENOENT.
*
* If the DNODE_DRY_RUN flag is set, we don't actually hold the dnode, just
* return whether the hold would succeed or not. tag and dnp should set to
* NULL in this case.
*
* errors:
* EINVAL - Invalid object number or flags.
* ENOSPC - Hole too small to fulfill "slots" request (DNODE_MUST_BE_FREE)
Expand Down Expand Up @@ -1291,6 +1294,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,

ASSERT(!(flag & DNODE_MUST_BE_ALLOCATED) || (slots == 0));
ASSERT(!(flag & DNODE_MUST_BE_FREE) || (slots > 0));
IMPLY(flag & DNODE_DRY_RUN, (tag == NULL) && (dnp == NULL));

/*
* If you are holding the spa config lock as writer, you shouldn't
Expand Down Expand Up @@ -1320,8 +1324,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
return (SET_ERROR(EEXIST));
DNODE_VERIFY(dn);
(void) zfs_refcount_add(&dn->dn_holds, tag);
*dnp = dn;
/* Don't actually hold if dry run, just return 0 */
if (!(flag & DNODE_DRY_RUN)) {
(void) zfs_refcount_add(&dn->dn_holds, tag);
*dnp = dn;
}
return (0);
}

Expand Down Expand Up @@ -1462,6 +1469,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
return (SET_ERROR(ENOENT));
}

/* Don't actually hold if dry run, just return 0 */
if (flag & DNODE_DRY_RUN) {
mutex_exit(&dn->dn_mtx);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (0);
}

DNODE_STAT_BUMP(dnode_hold_alloc_hits);
} else if (flag & DNODE_MUST_BE_FREE) {

Expand Down Expand Up @@ -1519,22 +1534,22 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
return (SET_ERROR(EEXIST));
}

/* Don't actually hold if dry run, just return 0 */
if (flag & DNODE_DRY_RUN) {
tuxoko marked this conversation as resolved.
Show resolved Hide resolved
mutex_exit(&dn->dn_mtx);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (0);
}

dnode_set_slots(dnc, idx + 1, slots - 1, DN_SLOT_INTERIOR);
DNODE_STAT_BUMP(dnode_hold_free_hits);
} else {
dbuf_rele(db, FTAG);
return (SET_ERROR(EINVAL));
}

if (dn->dn_free_txg) {
DNODE_STAT_BUMP(dnode_hold_free_txg);
tuxoko marked this conversation as resolved.
Show resolved Hide resolved
type = dn->dn_type;
mutex_exit(&dn->dn_mtx);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (SET_ERROR((flag & DNODE_MUST_BE_ALLOCATED) ?
ENOENT : EEXIST));
}
behlendorf marked this conversation as resolved.
Show resolved Hide resolved
ASSERT0(dn->dn_free_txg);

if (zfs_refcount_add(&dn->dn_holds, tag) == 1)
dbuf_add_ref(db, dnh);
Expand Down Expand Up @@ -1625,6 +1640,16 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting)
}
}

/*
* Test whether we can create a dnode at the specified location.
*/
int
dnode_try_claim(objset_t *os, uint64_t object, int slots)
{
return (dnode_hold_impl(os, object, DNODE_MUST_BE_FREE | DNODE_DRY_RUN,
slots, NULL, NULL));
}

void
dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
{
Expand Down
8 changes: 4 additions & 4 deletions module/zfs/zfs_replay.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,8 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
xva.xva_vattr.va_nblocks = lr->lr_gen;
xva.xva_vattr.va_fsid = dnodesize;

error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
if (error != ENOENT)
error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT);
if (error)
goto bail;

if (lr->lr_common.lrc_txtype & TX_CI)
Expand Down Expand Up @@ -473,8 +473,8 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
xva.xva_vattr.va_nblocks = lr->lr_gen;
xva.xva_vattr.va_fsid = dnodesize;

error = dmu_object_info(zfsvfs->z_os, objid, NULL);
if (error != ENOENT)
error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT);
if (error)
goto out;

if (lr->lr_common.lrc_txtype & TX_CI)
Expand Down
4 changes: 2 additions & 2 deletions tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -835,8 +835,8 @@ tags = ['functional', 'scrub_mirror']
tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs',
'slog_replay_volume']
'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs_001',
'slog_replay_fs_002', 'slog_replay_volume']
tags = ['functional', 'slog']

[tests/functional/snapshot]
Expand Down
3 changes: 2 additions & 1 deletion tests/zfs-tests/tests/functional/slog/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ dist_pkgdata_SCRIPTS = \
slog_013_pos.ksh \
slog_014_pos.ksh \
slog_015_neg.ksh \
slog_replay_fs.ksh \
slog_replay_fs_001.ksh \
slog_replay_fs_002.ksh \
slog_replay_volume.ksh

dist_pkgdata_DATA = \
Expand Down
137 changes: 137 additions & 0 deletions tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#

. $STF_SUITE/tests/functional/slog/slog.kshlib

#
# DESCRIPTION:
# Verify slog replay correctly when TX_REMOVEs are followed by
# TX_CREATEs.
#
# STRATEGY:
# 1. Create a file system (TESTFS) with a lot of files
# 2. Freeze TESTFS
# 3. Remove all files then create a lot of files
# 4. Copy TESTFS to temporary location (TESTDIR/copy)
# 5. Unmount filesystem
# <at this stage TESTFS is empty again and unfrozen, and the
# intent log contains a complete set of deltas to replay it>
# 6. Remount TESTFS <which replays the intent log>
# 7. Compare TESTFS against the TESTDIR/copy
#

verify_runnable "global"

function cleanup_fs
{
cleanup
}

log_assert "Replay of intent log succeeds."
log_onexit cleanup_fs
log_must setup

#
# 1. Create a file system (TESTFS) with a lot of files
#
log_must zpool create $TESTPOOL $VDEV log mirror $LDEV
log_must zfs set compression=on $TESTPOOL
log_must zfs create $TESTPOOL/$TESTFS

# Prep for the test of TX_REMOVE followed by TX_CREATE
dnsize=(legacy auto 1k 2k 4k 8k 16k)
NFILES=200
log_must mkdir /$TESTPOOL/$TESTFS/dir0
log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done'

#
# Reimport to reset dnode allocation pointer.
# This is to make sure we will have TX_REMOVE and TX_CREATE on same id
#
log_must zpool export $TESTPOOL
log_must zpool import -f -d $VDIR $TESTPOOL

#
# This dd command works around an issue where ZIL records aren't created
# after freezing the pool unless a ZIL header already exists. Create a file
# synchronously to force ZFS to write one out.
#
log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \
conv=fdatasync,fsync bs=1 count=1

#
# 2. Freeze TESTFS
#
log_must zpool freeze $TESTPOOL

#
# 3. Remove all files then create a lot of files
#
# TX_REMOVE followed by TX_CREATE
log_must eval 'rm -f /$TESTPOOL/$TESTFS/dir0/*'
log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done'

#
# 4. Copy TESTFS to temporary location (TESTDIR/copy)
#
log_must mkdir -p $TESTDIR/copy
log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR/copy/

#
# 5. Unmount filesystem and export the pool
#
# At this stage TESTFS is empty again and frozen, the intent log contains
# a complete set of deltas to replay.
#
log_must zfs unmount /$TESTPOOL/$TESTFS

log_note "Verify transactions to replay:"
log_must zdb -iv $TESTPOOL/$TESTFS

log_must zpool export $TESTPOOL

#
# 6. Remount TESTFS <which replays the intent log>
#
# Import the pool to unfreeze it and claim log blocks. It has to be
# `zpool import -f` because we can't write a frozen pool's labels!
#
log_must zpool import -f -d $VDIR $TESTPOOL

#
# 7. Compare TESTFS against the TESTDIR/copy
#
log_note "Verify current block usage:"
log_must zdb -bcv $TESTPOOL

log_note "Verify number of files"
log_must test "$(ls /$TESTPOOL/$TESTFS/dir0 | wc -l)" -eq $NFILES

log_note "Verify working set diff:"
log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy

log_pass "Replay of intent log succeeds."