Skip to content

Commit

Permalink
Fix zil replay panic when TX_REMOVE followed by TX_CREATE
Browse files Browse the repository at this point in the history
If TX_REMOVE is followed by TX_CREATE on the same object id, we need to
make sure the object removal is completely finished before creation. The
current implementation relies on dnode_hold_impl with
DNODE_MUST_BE_ALLOCATED returning ENOENT. While this check seems to work
fine before, in current version it does not guarantee the object removal
is completed.

We fix this by checking if DNODE_MUST_BE_FREE returns successful
instead. Also add test and remove dead code in dnode_hold_impl.

Closes #7151
Closes #8910
Closes #9123
Signed-off-by: Chunwei Chen <david.chen@nutanix.com>
  • Loading branch information
davidchenntnx committed Aug 21, 2019
1 parent c81f179 commit 9d54188
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 16 deletions.
10 changes: 1 addition & 9 deletions module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1518,15 +1518,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
return (SET_ERROR(EINVAL));
}

if (dn->dn_free_txg) {
DNODE_STAT_BUMP(dnode_hold_free_txg);
type = dn->dn_type;
mutex_exit(&dn->dn_mtx);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (SET_ERROR((flag & DNODE_MUST_BE_ALLOCATED) ?
ENOENT : EEXIST));
}
ASSERT0(dn->dn_free_txg);

if (zfs_refcount_add(&dn->dn_holds, tag) == 1)
dbuf_add_ref(db, dnh);
Expand Down
16 changes: 12 additions & 4 deletions module/zfs/zfs_replay.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
uint64_t objid;
uint64_t dnodesize;
int error;
dnode_t *dn;

txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
if (byteswap) {
Expand Down Expand Up @@ -337,10 +338,13 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
xva.xva_vattr.va_nblocks = lr->lr_gen;
xva.xva_vattr.va_fsid = dnodesize;

error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
if (error != ENOENT)
error = dnode_hold_impl(zfsvfs->z_os, objid, DNODE_MUST_BE_FREE,
dnodesize >> DNODE_SHIFT, FTAG, &dn);
if (error)
goto bail;

dnode_rele(dn, FTAG);

if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
switch (txtype) {
Expand Down Expand Up @@ -442,6 +446,7 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
uint64_t objid;
uint64_t dnodesize;
int error;
dnode_t *dn;

txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
if (byteswap) {
Expand Down Expand Up @@ -473,10 +478,13 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
xva.xva_vattr.va_nblocks = lr->lr_gen;
xva.xva_vattr.va_fsid = dnodesize;

error = dmu_object_info(zfsvfs->z_os, objid, NULL);
if (error != ENOENT)
error = dnode_hold_impl(zfsvfs->z_os, objid, DNODE_MUST_BE_FREE,
dnodesize >> DNODE_SHIFT, FTAG, &dn);
if (error)
goto out;

dnode_rele(dn, FTAG);

if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;

Expand Down
4 changes: 2 additions & 2 deletions tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -833,8 +833,8 @@ tags = ['functional', 'scrub_mirror']
tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs',
'slog_replay_volume']
'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs_001',
'slog_replay_fs_002', 'slog_replay_volume']
tags = ['functional', 'slog']

[tests/functional/snapshot]
Expand Down
3 changes: 2 additions & 1 deletion tests/zfs-tests/tests/functional/slog/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ dist_pkgdata_SCRIPTS = \
slog_013_pos.ksh \
slog_014_pos.ksh \
slog_015_neg.ksh \
slog_replay_fs.ksh \
slog_replay_fs_001.ksh \
slog_replay_fs_002.ksh \
slog_replay_volume.ksh

dist_pkgdata_DATA = \
Expand Down
134 changes: 134 additions & 0 deletions tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#

. $STF_SUITE/tests/functional/slog/slog.kshlib

#
# DESCRIPTION:
# Verify slog replay correctly when TX_REMOVEs are followed by
# TX_CREATEs.
#
# STRATEGY:
# 1. Create a file system (TESTFS) with a lot of files
# 2. Freeze TESTFS
# 3. Remove all files then create a lot of files
# 4. Copy TESTFS to temporary location (TESTDIR/copy)
# 5. Unmount filesystem
# <at this stage TESTFS is empty again and unfrozen, and the
# intent log contains a complete set of deltas to replay it>
# 6. Remount TESTFS <which replays the intent log>
# 7. Compare TESTFS against the TESTDIR/copy
#

verify_runnable "global"

function cleanup_fs
{
cleanup
}

log_assert "Replay of intent log succeeds."
log_onexit cleanup_fs

#
# 1. Create a file system (TESTFS) with a lot of files
#
log_must zpool create $TESTPOOL $VDEV log mirror $LDEV
log_must zfs set compression=on $TESTPOOL
log_must zfs create $TESTPOOL/$TESTFS

# Prep for the test of TX_REMOVE followed by TX_CREATE
log_must mkdir /$TESTPOOL/$TESTFS/dir0
log_must eval 'for i in $(seq 1000); do touch /$TESTPOOL/$TESTFS/dir0/file.$i; done'

#
# Reimport to reset dnode allocation pointer.
# This is to make sure we will have TX_REMOVE and TX_CREATE on same id
#
log_must zpool export $TESTPOOL
log_must zpool import -f -d $VDIR $TESTPOOL

#
# This dd command works around an issue where ZIL records aren't created
# after freezing the pool unless a ZIL header already exists. Create a file
# synchronously to force ZFS to write one out.
#
log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \
conv=fdatasync,fsync bs=1 count=1

#
# 2. Freeze TESTFS
#
log_must zpool freeze $TESTPOOL

#
# 3. Remove all files then create a lot of files
#
# TX_REMOVE followed by TX_CREATE
log_must eval 'rm -f /$TESTPOOL/$TESTFS/dir0/*'
log_must eval 'for i in $(seq 1000); do touch /$TESTPOOL/$TESTFS/dir0/file.$i; done'

#
# 4. Copy TESTFS to temporary location (TESTDIR/copy)
#
log_must mkdir -p $TESTDIR/copy
log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR/copy/

#
# 5. Unmount filesystem and export the pool
#
# At this stage TESTFS is empty again and frozen, the intent log contains
# a complete set of deltas to replay.
#
log_must zfs unmount /$TESTPOOL/$TESTFS

log_note "Verify transactions to replay:"
log_must zdb -iv $TESTPOOL/$TESTFS

log_must zpool export $TESTPOOL

#
# 6. Remount TESTFS <which replays the intent log>
#
# Import the pool to unfreeze it and claim log blocks. It has to be
# `zpool import -f` because we can't write a frozen pool's labels!
#
log_must zpool import -f -d $VDIR $TESTPOOL

#
# 7. Compare TESTFS against the TESTDIR/copy
#
log_note "Verify current block usage:"
log_must zdb -bcv $TESTPOOL

log_note "Verify number of files"
log_must test "$(ls /$TESTPOOL/$TESTFS/dir0 | wc -l)" -eq 1000

log_note "Verify working set diff:"
log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy

log_pass "Replay of intent log succeeds."

0 comments on commit 9d54188

Please sign in to comment.