From 1722451cf7e0588632f116c488009e68dd1f8dcf Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 20 Apr 2021 15:16:12 -0700 Subject: [PATCH] Fix lseek() SEEK_DATA returning ENXIO The dn_struct_rwlock lock which prevents the dnode structure from changing should be held over the dirty check and the subsequent call to dnode_next_offset(). This ensures that a clean dnode can't be dirtied before the data/hole is located. Furthermore, refactor the code to provide a dnode_is_dirty() helper function which performs the multilist_link_active() check while holding the correct multilist lock. Signed-off-by: Brian Behlendorf Issue #11900 --- include/sys/dnode.h | 1 + module/zfs/dmu.c | 49 ++++++++++++++++++++------------------------- module/zfs/dnode.c | 24 ++++++++++++++++++++++ 3 files changed, 47 insertions(+), 27 deletions(-) diff --git a/include/sys/dnode.h b/include/sys/dnode.h index e7cccd044abf..3f5fcc958c36 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -425,6 +425,7 @@ boolean_t dnode_add_ref(dnode_t *dn, void *ref); void dnode_rele(dnode_t *dn, void *ref); void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting); int dnode_try_claim(objset_t *os, uint64_t object, int slots); +boolean_t dnode_is_dirty(dnode_t *dn); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, void *tag); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 1c47430953b1..6f14cd69b317 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2095,42 +2095,37 @@ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) { dnode_t *dn; - int i, err; - boolean_t clean = B_TRUE; + int err; +restart: err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); - /* - * Check if dnode is dirty - */ - for (i = 0; i < TXG_SIZE; i++) { - if (multilist_link_active(&dn->dn_dirty_link[i])) { - clean = B_FALSE; - break; - } - } + rw_enter(&dn->dn_struct_rwlock, RW_READER); - /* - * If compatibility option is on, sync any current changes before - * we go trundling through the block pointers. - */ - if (!clean && zfs_dmu_offset_next_sync) { - clean = B_TRUE; - dnode_rele(dn, FTAG); - txg_wait_synced(dmu_objset_pool(os), 0); - err = dnode_hold(os, object, FTAG, &dn); - if (err) - return (err); - } + if (dnode_is_dirty(dn)) { + /* + * If the zfs_dmu_offset_next_sync module option is enabled + * then strict hole reporting has been requested. Dirty + * dnodes must be synced to disk to accurately report all + * holes. When disabled (the default) dirty dnodes are + * reported to not have any holes which is always safe. + */ + if (zfs_dmu_offset_next_sync) { + rw_exit(&dn->dn_struct_rwlock); + dnode_rele(dn, FTAG); + txg_wait_synced(dmu_objset_pool(os), 0); + goto restart; + } - if (clean) - err = dnode_next_offset(dn, - (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); - else err = SET_ERROR(EBUSY); + } else { + err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK | + (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); + } + rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (err); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 7f741542ce02..f3c8c0861df8 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1648,6 +1648,30 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots) slots, NULL, NULL)); } +/* + * Checks if the dnode contains any uncommitted dirty records. + */ +boolean_t +dnode_is_dirty(dnode_t *dn) +{ + multilist_t *dirtylist; + multilist_sublist_t *mls; + + for (int i = 0; i < TXG_SIZE; i++) { + dirtylist = &dn->dn_objset->os_dirty_dnodes[i]; + mls = multilist_sublist_lock_obj(dirtylist, dn); + + if (multilist_link_active(&dn->dn_dirty_link[i])) { + multilist_sublist_unlock(mls); + return (B_TRUE); + } + + multilist_sublist_unlock(mls); + } + + return (B_FALSE); +} + void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) {