Skip to content

Commit

Permalink
Enable the head_errlog feature to remove errors
Browse files Browse the repository at this point in the history
In case check_filesystem() does not error out and does not report
an error, remove that error block from error lists and logs
without requiring a scrub. This can happen when the original file and
all snapshots/clones referencing it have been removed.

Otherwise zpool status will still report that "Permanent errors have
been detected..." without actually reporting any of them.

To implement this change the functions introduced in corrective
receive were modified to take into account the head_errlog feature.

Before this change:
=============================
pool: test
 state: ONLINE
status: One or more devices has experienced an error resulting in data
        corruption.  Applications may be affected.
action: Restore the file in question if possible.  Otherwise restore the
        entire pool from backup.
   see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-8A
config:

        NAME                   STATE     READ WRITE CKSUM
        test                   ONLINE       0     0     0
          /home/user/vdev_a    ONLINE       0     0     2

errors: Permanent errors have been detected in the following files:

=============================

After this change:
=============================
  pool: test
 state: ONLINE
status: One or more devices has experienced an unrecoverable error.  An
        attempt was made to correct the error.  Applications are
unaffected.
action: Determine if the device needs to be replaced, and clear the
errors
        using 'zpool clear' or replace the device with 'zpool replace'.
   see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-9P
config:

        NAME                   STATE     READ WRITE CKSUM
        test                   ONLINE       0     0     0
          /home/user/vdev_a    ONLINE       0     0     2

errors: No known data errors
=============================

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes #14813
  • Loading branch information
gamanakis authored May 9, 2023
1 parent 4eca03f commit 6839ec6
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 37 deletions.
3 changes: 2 additions & 1 deletion include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -1140,7 +1140,8 @@ extern const char *spa_state_to_name(spa_t *spa);
struct zbookmark_phys;
extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb,
const uint64_t *birth);
extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb);
extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb,
const uint64_t *birth);
extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);
extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,
Expand Down
3 changes: 3 additions & 0 deletions man/man8/zpool-status.8
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ See
.It Fl v
Displays verbose data error information, printing out a complete list of all
data errors since the last complete pool scrub.
If the head_errlog feature is enabled and files containing errors have been
removed then the respective filenames will not be reported in subsequent runs
of this command.
.It Fl x
Only display status for pools that are exhibiting errors or are otherwise
unavailable.
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/dmu_recv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1353,7 +1353,7 @@ corrective_read_done(zio_t *zio)
cr_cb_data_t *data = zio->io_private;
/* Corruption corrected; update error log if needed */
if (zio->io_error == 0)
spa_remove_error(data->spa, &data->zb);
spa_remove_error(data->spa, &data->zb, &zio->io_bp->blk_birth);
kmem_free(data, sizeof (cr_cb_data_t));
abd_free(zio->io_abd);
}
Expand Down
175 changes: 140 additions & 35 deletions module/zfs/spa_errlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
}

uint64_t top_affected_fs;
uint64_t init_count = *count;
int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs);
if (error == 0) {
clones_t *ct;
Expand Down Expand Up @@ -520,6 +521,16 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,

list_destroy(&clones_list);
}
if (error == 0 && init_count == *count) {
/*
* If we reach this point, no errors have been detected
* in the checked filesystems/snapshots. Before returning mark
* the error block to be removed from the error lists and logs.
*/
zbookmark_phys_t zb;
zep_to_zb(head_ds, zep, &zb);
spa_remove_error(spa, &zb, &zep->zb_birth);
}

return (error);
}
Expand All @@ -530,37 +541,111 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
* so that we can later remove the related log entries in sync context.
*/
static void
spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb)
spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb,
const uint64_t *birth)
{
char name[NAME_MAX_LEN];

if (obj == 0)
return;

bookmark_to_name(healed_zb, name, sizeof (name));
mutex_enter(&spa->spa_errlog_lock);
if (zap_contains(spa->spa_meta_objset, obj, name) == 0) {
/*
* Found an error matching healed zb, add zb to our
* tree of healed errors
*/
avl_tree_t *tree = &spa->spa_errlist_healed;
spa_error_entry_t search;
spa_error_entry_t *new;
avl_index_t where;
search.se_bookmark = *healed_zb;
mutex_enter(&spa->spa_errlist_lock);
if (avl_find(tree, &search, &where) != NULL) {
mutex_exit(&spa->spa_errlist_lock);
mutex_exit(&spa->spa_errlog_lock);
return;
boolean_t held_list = B_FALSE;
boolean_t held_log = B_FALSE;

if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
bookmark_to_name(healed_zb, name, sizeof (name));

if (zap_contains(spa->spa_meta_objset, healed_zb->zb_objset,
name) == 0) {
if (!MUTEX_HELD(&spa->spa_errlog_lock)) {
mutex_enter(&spa->spa_errlog_lock);
held_log = B_TRUE;
}

/*
* Found an error matching healed zb, add zb to our
* tree of healed errors
*/
avl_tree_t *tree = &spa->spa_errlist_healed;
spa_error_entry_t search;
spa_error_entry_t *new;
avl_index_t where;
search.se_bookmark = *healed_zb;
if (!MUTEX_HELD(&spa->spa_errlist_lock)) {
mutex_enter(&spa->spa_errlist_lock);
held_list = B_TRUE;
}
if (avl_find(tree, &search, &where) != NULL) {
if (held_list)
mutex_exit(&spa->spa_errlist_lock);
if (held_log)
mutex_exit(&spa->spa_errlog_lock);
return;
}
new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP);
new->se_bookmark = *healed_zb;
avl_insert(tree, new, where);
if (held_list)
mutex_exit(&spa->spa_errlist_lock);
if (held_log)
mutex_exit(&spa->spa_errlog_lock);
}
new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP);
new->se_bookmark = *healed_zb;
avl_insert(tree, new, where);
mutex_exit(&spa->spa_errlist_lock);
return;
}
mutex_exit(&spa->spa_errlog_lock);

zbookmark_err_phys_t healed_zep;
healed_zep.zb_object = healed_zb->zb_object;
healed_zep.zb_level = healed_zb->zb_level;
healed_zep.zb_blkid = healed_zb->zb_blkid;

if (birth != NULL)
healed_zep.zb_birth = *birth;
else
healed_zep.zb_birth = 0;

errphys_to_name(&healed_zep, name, sizeof (name));

zap_cursor_t zc;
zap_attribute_t za;
for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_errlog_last);
zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
if (zap_contains(spa->spa_meta_objset, za.za_first_integer,
name) == 0) {
if (!MUTEX_HELD(&spa->spa_errlog_lock)) {
mutex_enter(&spa->spa_errlog_lock);
held_log = B_TRUE;
}

avl_tree_t *tree = &spa->spa_errlist_healed;
spa_error_entry_t search;
spa_error_entry_t *new;
avl_index_t where;
search.se_bookmark = *healed_zb;

if (!MUTEX_HELD(&spa->spa_errlist_lock)) {
mutex_enter(&spa->spa_errlist_lock);
held_list = B_TRUE;
}

if (avl_find(tree, &search, &where) != NULL) {
if (held_list)
mutex_exit(&spa->spa_errlist_lock);
if (held_log)
mutex_exit(&spa->spa_errlog_lock);
continue;
}
new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP);
new->se_bookmark = *healed_zb;
new->se_zep = healed_zep;
avl_insert(tree, new, where);

if (held_list)
mutex_exit(&spa->spa_errlist_lock);
if (held_log)
mutex_exit(&spa->spa_errlog_lock);
}
}
zap_cursor_fini(&zc);
}

/*
Expand Down Expand Up @@ -598,12 +683,36 @@ spa_remove_healed_errors(spa_t *spa, avl_tree_t *s, avl_tree_t *l, dmu_tx_t *tx)
&cookie)) != NULL) {
remove_error_from_list(spa, s, &se->se_bookmark);
remove_error_from_list(spa, l, &se->se_bookmark);
bookmark_to_name(&se->se_bookmark, name, sizeof (name));
kmem_free(se, sizeof (spa_error_entry_t));

This comment has been minimized.

Copy link
@lattera

lattera May 13, 2023

I suspect this call to kmem_free might be best performed after the conditional statements below.

#14813 (comment)

(void) zap_remove(spa->spa_meta_objset,
spa->spa_errlog_last, name, tx);
(void) zap_remove(spa->spa_meta_objset,
spa->spa_errlog_scrub, name, tx);

if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
bookmark_to_name(&se->se_bookmark, name, sizeof (name));
(void) zap_remove(spa->spa_meta_objset,
spa->spa_errlog_last, name, tx);
(void) zap_remove(spa->spa_meta_objset,
spa->spa_errlog_scrub, name, tx);
} else {
errphys_to_name(&se->se_zep, name, sizeof (name));
zap_cursor_t zc;
zap_attribute_t za;
for (zap_cursor_init(&zc, spa->spa_meta_objset,
spa->spa_errlog_last);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
zap_remove(spa->spa_meta_objset,
za.za_first_integer, name, tx);
}
zap_cursor_fini(&zc);

for (zap_cursor_init(&zc, spa->spa_meta_objset,
spa->spa_errlog_scrub);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
zap_remove(spa->spa_meta_objset,
za.za_first_integer, name, tx);
}
zap_cursor_fini(&zc);
}
}
}

Expand All @@ -612,14 +721,10 @@ spa_remove_healed_errors(spa_t *spa, avl_tree_t *s, avl_tree_t *l, dmu_tx_t *tx)
* later in spa_remove_healed_errors().
*/
void
spa_remove_error(spa_t *spa, zbookmark_phys_t *zb)
spa_remove_error(spa_t *spa, zbookmark_phys_t *zb, const uint64_t *birth)
{
char name[NAME_MAX_LEN];

bookmark_to_name(zb, name, sizeof (name));

spa_add_healed_error(spa, spa->spa_errlog_last, zb);
spa_add_healed_error(spa, spa->spa_errlog_scrub, zb);
spa_add_healed_error(spa, spa->spa_errlog_last, zb, birth);
spa_add_healed_error(spa, spa->spa_errlog_scrub, zb, birth);
}

static uint64_t
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
# 7. Verify we report errors in the pool in 'zpool status -v'
# 8. Promote clone1
# 9. Verify we report errors in the pool in 'zpool status -v'
# 10. Delete the corrupted file and origin snapshots.
# 11. Verify we do not report data errors anymore, without requiring
# a scrub.

. $STF_SUITE/include/libtest.shlib

Expand Down Expand Up @@ -95,4 +98,14 @@ log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clonexx/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone2@snap3:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone3/$TESTFILE0'"

log_must rm /$TESTPOOL2/clone1/$TESTFILE0
log_must zfs destroy -R $TESTPOOL2/clone1@snap1
log_must zfs destroy -R $TESTPOOL2/clone1@snap2
log_must zfs list -r $TESTPOOL2
log_must zpool status -v $TESTPOOL2
log_must zpool sync
log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v $TESTPOOL2 | \
grep \"No known data errors\""

log_pass "Verify reporting errors when deleting corrupted files after scrub"

0 comments on commit 6839ec6

Please sign in to comment.