Skip to content

Commit

Permalink
OpenZFS 9403 - assertion failed in arc_buf_destroy() when concurrentl…
Browse files Browse the repository at this point in the history
…y reading block with checksum error

Porting notes:
* zio_decompress_fail_fraction has been set to (1 << 18) by default
  in ztest.
* The ability to zinject decompression errors has been added, but
  this only works at the zio_decompress() level, where we have all
  of the inf we need to match against the user's zinject options.
* The decompress_fault test has been added to test the new zinject
  functionality

Authored by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Approved by: Matt Ahrens <mahrens@delphix.com>
Ported-by: Tom Caputi <tcaputi@datto.com>

OpenZFS-issue: https://illumos.org/issues/9403
OpenZFS-commit: openzfs/openzfs@fa98e487a9
  • Loading branch information
Tom Caputi committed Aug 24, 2018
1 parent 4338c5c commit c27703e
Show file tree
Hide file tree
Showing 12 changed files with 172 additions and 39 deletions.
10 changes: 6 additions & 4 deletions cmd/zinject/zinject.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@
* specified.
*
* The '-e' option takes a string describing the errno to simulate. This must
* be one of 'io', 'checksum', or 'decrypt'. In most cases this will result
* in the same behavior, but RAID-Z will produce a different set of ereports
* for this situation.
* be one of 'io', 'checksum', 'decompress', or 'decrypt'. In most cases this
* will result in the same behavior, but RAID-Z will produce a different set of
* ereports for this situation.
*
* The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is
* specified, then the ARC cache is flushed appropriately. If '-u' is
Expand Down Expand Up @@ -300,7 +300,7 @@ usage(void)
"\n"
"\t\t-q\tQuiet mode. Only print out the handler number added.\n"
"\t\t-e\tInject a specific error. Must be one of 'io',\n"
"\t\t\t'checksum', or decrypt. Default is 'io'.\n"
"\t\t\t'checksum', 'decompress', or decrypt. Default is 'io'.\n"
"\t\t-l\tInject error at a particular block level. Default is "
"0.\n"
"\t\t-m\tAutomatically remount underlying filesystem.\n"
Expand Down Expand Up @@ -774,6 +774,8 @@ main(int argc, char **argv)
error = EIO;
} else if (strcasecmp(optarg, "checksum") == 0) {
error = ECKSUM;
} else if (strcasecmp(optarg, "decompress") == 0) {
error = EINVAL;
} else if (strcasecmp(optarg, "decrypt") == 0) {
error = EACCES;
} else if (strcasecmp(optarg, "nxio") == 0) {
Expand Down
13 changes: 9 additions & 4 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = {
.zo_init = 1,
.zo_time = 300, /* 5 minutes */
.zo_maxloops = 50, /* max loops during spa_freeze() */
.zo_metaslab_force_ganging = 32 << 10
.zo_metaslab_force_ganging = 32 << 10,
};

extern uint64_t metaslab_force_ganging;
Expand All @@ -204,6 +204,7 @@ extern boolean_t zfs_compressed_arc_enabled;
extern int zfs_abd_scatter_enabled;
extern int dmu_object_alloc_chunk_shift;
extern boolean_t zfs_force_some_double_word_sm_entries;
extern unsigned long zio_decompress_fail_fraction;

static ztest_shared_opts_t *ztest_shared_opts;
static ztest_shared_opts_t ztest_opts;
Expand Down Expand Up @@ -483,9 +484,6 @@ static int zc_cb_counter = 0;
*/
#define ZTEST_COMMIT_CB_THRESH (TXG_CONCURRENT_STATES + 1000)

extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;

enum ztest_object {
ZTEST_META_DNODE = 0,
ZTEST_DIROBJ,
Expand Down Expand Up @@ -7367,6 +7365,13 @@ main(int argc, char **argv)
*/
zfs_force_some_double_word_sm_entries = B_TRUE;

/*
* Force rare decompression failures to exercise those code paths.
* This will touch many more code paths than zinjecting "decompress"
* errors since those only operate at the zio_decompress() level.
*/
zio_decompress_fail_fraction = (1 << 18);

action.sa_handler = sig_handler;
sigemptyset(&action.sa_mask);
action.sa_flags = 0;
Expand Down
13 changes: 13 additions & 0 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -2511,6 +2511,19 @@ to limit potential SLOG device abuse by single active ZIL writer.
Default value: \fB786,432\fR.
.RE

.sp
.ne 2
.na
\fBzio_decompress_fail_fraction\fR (int)
.ad
.RS 12n
If non-zero, this value represents the denominator of the probability that zfs
should induce a decompression failure. For instance, for a 5% decompression
failure rate, this value should be set to 20.
.sp
Default value: \fB0\fR.
.RE

.sp
.ne 2
.na
Expand Down
1 change: 1 addition & 0 deletions man/man8/zinject.8
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ A vdev specified by path or GUID.
.BI "\-e" " device_error"
Specify
.BR "checksum" " for an ECKSUM error,"
.BR "decompress" " for a data decompression error,"
.BR "decrypt" " for a data decryption error,"
.BR "corrupt" " to flip a bit in the data after a read,"
.BR "dtl" " for an ECHILD error,"
Expand Down
51 changes: 41 additions & 10 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
Expand Down Expand Up @@ -5803,10 +5803,12 @@ arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
arc_buf_t **bufp = arg;

if (buf == NULL) {
ASSERT(zio == NULL || zio->io_error != 0);
*bufp = NULL;
} else {
ASSERT(zio == NULL || zio->io_error == 0);
*bufp = buf;
ASSERT(buf->b_data);
ASSERT(buf->b_data != NULL);
}
}

Expand Down Expand Up @@ -5934,12 +5936,6 @@ arc_read_done(zio_t *zio)
&acb->acb_zb, acb->acb_private, acb->acb_encrypted,
acb->acb_compressed, acb->acb_noauth, B_TRUE,
&acb->acb_buf);
if (error != 0) {
(void) remove_reference(hdr, hash_lock,
acb->acb_private);
arc_buf_destroy_impl(acb->acb_buf);
acb->acb_buf = NULL;
}

/*
* Assert non-speculative zios didn't fail because an
Expand All @@ -5962,9 +5958,34 @@ arc_read_done(zio_t *zio)
}
}

if (zio->io_error == 0)
if (error != 0) {
/*
* Decompression or decryption failed. Set
* io_error so that when we call acb_done
* (below), we will indicate that the read
* failed. Note that in the unusual case
* where one callback is compressed and another
* uncompressed, we will mark all of them
* as failed, even though the uncompressed
* one can't actually fail. In this case,
* the hdr will not be anonymous, because
* if there are multiple callbacks, it's
* because multiple threads found the same
* arc buf in the hash table.
*/
zio->io_error = error;
}
}

/*
* If there are multiple callbacks, we must have the hash lock,
* because the only way for multiple threads to find this hdr is
* in the hash table. This ensures that if there are multiple
* callbacks, the hdr is not anonymous. If it were anonymous,
* we couldn't use arc_buf_destroy() in the error case below.
*/
ASSERT(callback_cnt < 2 || hash_lock != NULL);

hdr->b_l1hdr.b_acb = NULL;
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
if (callback_cnt == 0)
Expand Down Expand Up @@ -6006,7 +6027,17 @@ arc_read_done(zio_t *zio)

/* execute each callback and free its structure */
while ((acb = callback_list) != NULL) {
if (acb->acb_done) {
if (acb->acb_done != NULL) {
if (zio->io_error != 0 && acb->acb_buf != NULL) {
/*
* If arc_buf_alloc_impl() fails during
* decompression, the buf will still be
* allocated, and needs to be freed here.
*/
arc_buf_destroy(acb->acb_buf,
acb->acb_private);
acb->acb_buf = NULL;
}
acb->acb_done(zio, &zio->io_bookmark, zio->io_bp,
acb->acb_buf, acb->acb_private);
}
Expand Down
37 changes: 20 additions & 17 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
Expand Down Expand Up @@ -1191,25 +1191,26 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
ASSERT(refcount_count(&db->db_holds) > 0);
ASSERT(db->db_buf == NULL);
ASSERT(db->db.db_data == NULL);
if (db->db_level == 0 && db->db_freed_in_flight) {
/* we were freed in flight; disregard any error */
if (buf == NULL) {
buf = arc_alloc_buf(db->db_objset->os_spa,
db, DBUF_GET_BUFC_TYPE(db), db->db.db_size);
}
if (buf == NULL) {
/* i/o error */
ASSERT(zio == NULL || zio->io_error != 0);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT3P(db->db_buf, ==, NULL);
db->db_state = DB_UNCACHED;
} else if (db->db_level == 0 && db->db_freed_in_flight) {
/* freed in flight */
ASSERT(zio == NULL || zio->io_error == 0);
arc_release(buf, db);
bzero(buf->b_data, db->db.db_size);
arc_buf_freeze(buf);
db->db_freed_in_flight = FALSE;
dbuf_set_data(db, buf);
db->db_state = DB_CACHED;
} else if (buf != NULL) {
} else {
/* success */
ASSERT(zio == NULL || zio->io_error == 0);
dbuf_set_data(db, buf);
db->db_state = DB_CACHED;
} else {
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT3P(db->db_buf, ==, NULL);
db->db_state = DB_UNCACHED;
}
cv_broadcast(&db->db_changed);
dbuf_rele_and_unlock(db, NULL, B_FALSE);
Expand Down Expand Up @@ -2848,6 +2849,13 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
ASSERT3S(dpa->dpa_zb.zb_level, <, dpa->dpa_curlevel);
ASSERT3S(dpa->dpa_curlevel, >, 0);

if (abuf == NULL) {
ASSERT(zio == NULL || zio->io_error != 0);
kmem_free(dpa, sizeof (*dpa));
return;
}
ASSERT(zio == NULL || zio->io_error == 0);

/*
* The dpa_dnode is only valid if we are called with a NULL
* zio. This indicates that the arc_read() returned without
Expand Down Expand Up @@ -2880,11 +2888,6 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
dbuf_rele(db, FTAG);
}

if (abuf == NULL) {
kmem_free(dpa, sizeof (*dpa));
return;
}

dpa->dpa_curlevel--;
uint64_t nextblkid = dpa->dpa_zb.zb_blkid >>
(dpa->dpa_epbs * (dpa->dpa_curlevel - dpa->dpa_zb.zb_level));
Expand Down
6 changes: 6 additions & 0 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,12 @@ zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
zio->io_abd, tmp, zio->io_size, size);
abd_return_buf_copy(data, tmp, size);

#ifdef _KERNEL
printk(KERN_DEBUG "HERE1: %d %d\n", zio_injection_enabled, ret);
#endif
if (zio_injection_enabled && ret == 0)
ret = zio_handle_fault_injection(zio, EINVAL);

if (ret != 0)
zio->io_error = SET_ERROR(EIO);
}
Expand Down
18 changes: 17 additions & 1 deletion module/zfs/zio_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
*/

/*
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
*/

#include <sys/zfs_context.h>
Expand All @@ -37,6 +37,12 @@
#include <sys/zio.h>
#include <sys/zio_compress.h>

/*
* If nonzero, every 1/X decompression attempts will fail, simulating
* an undetected memory error.
*/
unsigned long zio_decompress_fail_fraction = 0;

/*
* Compression vectors.
*/
Expand Down Expand Up @@ -148,5 +154,15 @@ zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len);
abd_return_buf(src, tmp, s_len);

/*
* Decompression shouldn't fail, because we've already verifyied
* the checksum. However, for extra protection (e.g. against bitflips
* in non-ECC RAM), we handle this error (and test it).
*/
ASSERT0(ret);
if (zio_decompress_fail_fraction != 0 &&
spa_get_random(zio_decompress_fail_fraction) == 0)
ret = SET_ERROR(EINVAL);

return (ret);
}
2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ tags = ['functional', 'exec']
[tests/functional/fault]
tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple',
'scrub_after_resilver', 'decrypt_fault']
'scrub_after_resilver', 'decrypt_fault', 'decompress_fault']
tags = ['functional', 'fault']

[tests/functional/features/async_destroy]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/fault/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dist_pkgdata_SCRIPTS = \
auto_spare_ashift.ksh \
auto_spare_multiple.ksh \
decrypt_fault.ksh \
decompress_fault.ksh \
scrub_after_resilver.ksh

dist_pkgdata_DATA = \
Expand Down
55 changes: 55 additions & 0 deletions tests/zfs-tests/tests/functional/fault/decompress_fault.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/ksh -p
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#

#
# Copyright (c) 2018 by Datto Inc.
# All rights reserved.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg

#
# DESCRIPTION:
# Test that injected decompression errors are handled correctly.
#
# STRATEGY:
# 1. Create an compressed dataset with a test file
# 2. Inject decompression errors on the file 20% of the time
# 3. Read the file to confirm that errors are handled correctly
# 4. Confirm that the decompression injection was added to the ZED logs
#

log_assert "Testing that injected decompression errors are handled correctly"

function cleanup
{
log_must eval "echo 1 > /sys/module/zfs/parameters/zfs_compressed_arc_enabled"
log_must zinject -c all
default_cleanup_noexit
}

log_onexit cleanup

default_mirror_setup_noexit $DISK1 $DISK2
log_must eval "echo 0 > /sys/module/zfs/parameters/zfs_compressed_arc_enabled"
log_must zfs create -o compression=on $TESTPOOL/fs
mntpt=$(get_prop mountpoint $TESTPOOL/fs)
write_compressible $mntpt 32m 1 0 "testfile"
log_must sync
log_must zfs umount $TESTPOOL/fs
log_must zfs mount $TESTPOOL/fs
log_must zinject -a -t data -e decompress -f 20 $mntpt/testfile.0
log_mustnot eval "cat $mntpt/testfile.0 > /dev/null"
log_must eval "zpool events $TESTPOOL | grep -q 'data'"

log_pass "Injected decompression errors are handled correctly"
Loading

0 comments on commit c27703e

Please sign in to comment.