Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip dnode handles use when not needed #16374

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,15 @@ typedef struct dmu_buf_impl {
struct objset *db_objset;

/*
* handle to safely access the dnode we belong to (NULL when evicted)
* Handle to safely access the dnode we belong to (NULL when evicted)
* if dnode_move() is used on the platform, or just dnode otherwise.
*/
#if !defined(__linux__) && !defined(__FreeBSD__)
#define USE_DNODE_HANDLE 1
struct dnode_handle *db_dnode_handle;
#else
struct dnode *db_dnode;
#endif

/*
* our parent buffer; if the dnode points to us directly,
Expand Down Expand Up @@ -417,11 +423,19 @@ void dbuf_stats_destroy(void);
int dbuf_dnode_findbp(dnode_t *dn, uint64_t level, uint64_t blkid,
blkptr_t *bp, uint16_t *datablkszsec, uint8_t *indblkshift);

#ifdef USE_DNODE_HANDLE
#define DB_DNODE(_db) ((_db)->db_dnode_handle->dnh_dnode)
#define DB_DNODE_LOCK(_db) ((_db)->db_dnode_handle->dnh_zrlock)
#define DB_DNODE_ENTER(_db) (zrl_add(&DB_DNODE_LOCK(_db)))
#define DB_DNODE_EXIT(_db) (zrl_remove(&DB_DNODE_LOCK(_db)))
#define DB_DNODE_HELD(_db) (!zrl_is_zero(&DB_DNODE_LOCK(_db)))
#else
#define DB_DNODE(_db) ((_db)->db_dnode)
#define DB_DNODE_LOCK(_db)
#define DB_DNODE_ENTER(_db)
#define DB_DNODE_EXIT(_db)
#define DB_DNODE_HELD(_db) (B_TRUE)
#endif

void dbuf_init(void);
void dbuf_fini(void);
Expand Down
19 changes: 12 additions & 7 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3103,7 +3103,11 @@ dbuf_destroy(dmu_buf_impl_t *db)
*/
mutex_enter(&dn->dn_mtx);
dnode_rele_and_unlock(dn, db, B_TRUE);
#ifdef USE_DNODE_HANDLE
db->db_dnode_handle = NULL;
#else
db->db_dnode = NULL;
#endif

dbuf_hash_remove(db);
} else {
Expand Down Expand Up @@ -3252,7 +3256,11 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
db->db_level = level;
db->db_blkid = blkid;
db->db_dirtycnt = 0;
#ifdef USE_DNODE_HANDLE
db->db_dnode_handle = dn->dn_handle;
#else
db->db_dnode = dn;
#endif
db->db_parent = parent;
db->db_blkptr = blkptr;
db->db_hash = hash;
Expand Down Expand Up @@ -4390,7 +4398,7 @@ dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
dmu_buf_impl_t *parent_db = dr->dr_parent->dr_dbuf;
int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
VERIFY3U(parent_db->db_level, ==, 1);
VERIFY3P(parent_db->db_dnode_handle->dnh_dnode, ==, dn);
VERIFY3P(DB_DNODE(parent_db), ==, dn);
VERIFY3U(dr->dt.dll.dr_blkid >> epbs, ==, parent_db->db_blkid);
blkptr_t *bp = parent_db->db.db_data;
return (&bp[dr->dt.dll.dr_blkid & ((1 << epbs) - 1)]);
Expand Down Expand Up @@ -4813,14 +4821,13 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
{
(void) zio, (void) buf;
dmu_buf_impl_t *db = vdb;
dnode_t *dn;
blkptr_t *bp;
unsigned int epbs, i;

ASSERT3U(db->db_level, >, 0);
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
epbs = DB_DNODE(db)->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
DB_DNODE_EXIT(db);
ASSERT3U(epbs, <, 31);

/* Determine if all our children are holes */
Expand All @@ -4843,7 +4850,6 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
memset(db->db.db_data, 0, db->db.db_size);
rw_exit(&db->db_rwlock);
}
DB_DNODE_EXIT(db);
}

static void
Expand Down Expand Up @@ -5062,8 +5068,7 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
}
} else if (db->db.db_object == DMU_META_DNODE_OBJECT) {
dnode_phys_t *dnp = db->db.db_data;
ASSERT3U(db->db_dnode_handle->dnh_dnode->dn_type, ==,
DMU_OT_DNODE);
ASSERT3U(dn->dn_type, ==, DMU_OT_DNODE);
for (int i = 0; i < db->db.db_size >> DNODE_SHIFT;
i += dnp[i].dn_extra_slots + 1) {
for (int j = 0; j < dnp[i].dn_nblkptr; j++) {
Expand Down
61 changes: 26 additions & 35 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,14 @@ dmu_set_bonus(dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx)
dnode_t *dn;
int error;

if (newsize < 0 || newsize > db_fake->db_size)
return (SET_ERROR(EINVAL));

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);

if (dn->dn_bonus != db) {
error = SET_ERROR(EINVAL);
} else if (newsize < 0 || newsize > db_fake->db_size) {
error = SET_ERROR(EINVAL);
} else {
dnode_setbonuslen(dn, newsize, tx);
error = 0;
Expand All @@ -299,12 +300,13 @@ dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
dnode_t *dn;
int error;

if (!DMU_OT_IS_VALID(type))
return (SET_ERROR(EINVAL));

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);

if (!DMU_OT_IS_VALID(type)) {
error = SET_ERROR(EINVAL);
} else if (dn->dn_bonus != db) {
if (dn->dn_bonus != db) {
error = SET_ERROR(EINVAL);
} else {
dnode_setbonus_type(dn, type, tx);
Expand All @@ -319,12 +321,10 @@ dmu_object_type_t
dmu_get_bonustype(dmu_buf_t *db_fake)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
dnode_t *dn;
dmu_object_type_t type;

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
type = dn->dn_bonustype;
type = DB_DNODE(db)->dn_bonustype;
DB_DNODE_EXIT(db);

return (type);
Expand Down Expand Up @@ -486,16 +486,14 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, const void *tag,
dmu_buf_t **dbp)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus;
dnode_t *dn;
int err;
uint32_t db_flags = DB_RF_CANFAIL;

if (flags & DMU_READ_NO_DECRYPT)
db_flags |= DB_RF_NO_DECRYPT;

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
err = dmu_spill_hold_by_dnode(dn, db_flags, tag, dbp);
err = dmu_spill_hold_by_dnode(DB_DNODE(db), db_flags, tag, dbp);
DB_DNODE_EXIT(db);

return (err);
Expand Down Expand Up @@ -668,13 +666,11 @@ dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset,
dmu_buf_t ***dbpp)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
dnode_t *dn;
int err;

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
numbufsp, dbpp, DMU_READ_PREFETCH);
err = dmu_buf_hold_array_by_dnode(DB_DNODE(db), offset, length, read,
tag, numbufsp, dbpp, DMU_READ_PREFETCH);
DB_DNODE_EXIT(db);

return (err);
Expand Down Expand Up @@ -1301,15 +1297,13 @@ int
dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
dnode_t *dn;
int err;

if (size == 0)
return (0);

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
err = dmu_read_uio_dnode(dn, uio, size);
err = dmu_read_uio_dnode(DB_DNODE(db), uio, size);
DB_DNODE_EXIT(db);

return (err);
Expand Down Expand Up @@ -1403,15 +1397,13 @@ dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
dnode_t *dn;
int err;

if (size == 0)
return (0);

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
err = dmu_write_uio_dnode(dn, uio, size, tx);
err = dmu_write_uio_dnode(DB_DNODE(db), uio, size, tx);
DB_DNODE_EXIT(db);

return (err);
Expand Down Expand Up @@ -1539,11 +1531,11 @@ dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
dmu_tx_t *tx)
{
int err;
dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;

DB_DNODE_ENTER(dbuf);
err = dmu_assign_arcbuf_by_dnode(DB_DNODE(dbuf), offset, buf, tx);
DB_DNODE_EXIT(dbuf);
DB_DNODE_ENTER(db);
err = dmu_assign_arcbuf_by_dnode(DB_DNODE(db), offset, buf, tx);
DB_DNODE_EXIT(db);

return (err);
}
Expand Down Expand Up @@ -1782,7 +1774,6 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
dmu_sync_arg_t *dsa;
zbookmark_phys_t zb;
zio_prop_t zp;
dnode_t *dn;

ASSERT(pio != NULL);
ASSERT(txg != 0);
Expand All @@ -1791,8 +1782,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
db->db.db_object, db->db_level, db->db_blkid);

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp);
dmu_write_policy(os, DB_DNODE(db), db->db_level, WP_DMU_SYNC, &zp);
DB_DNODE_EXIT(db);

/*
Expand Down Expand Up @@ -1877,11 +1867,14 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
* zio_done(), which VERIFYs that the override BP is identical
* to the on-disk BP.
*/
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
if (dr_next != NULL || dnode_block_freed(dn, db->db_blkid))
if (dr_next != NULL) {
zp.zp_nopwrite = B_FALSE;
DB_DNODE_EXIT(db);
} else {
DB_DNODE_ENTER(db);
if (dnode_block_freed(DB_DNODE(db), db->db_blkid))
zp.zp_nopwrite = B_FALSE;
DB_DNODE_EXIT(db);
}

ASSERT(dr->dr_txg == txg);
if (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC ||
Expand Down Expand Up @@ -2487,11 +2480,9 @@ void
dmu_object_dnsize_from_db(dmu_buf_t *db_fake, int *dnsize)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
dnode_t *dn;

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
*dnsize = dn->dn_num_slots << DNODE_SHIFT;
*dnsize = DB_DNODE(db)->dn_num_slots << DNODE_SHIFT;
DB_DNODE_EXIT(db);
}

Expand Down
5 changes: 1 addition & 4 deletions module/zfs/dmu_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1520,11 +1520,8 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow)
ASSERT(tx->tx_txg == 0);
dmu_tx_hold_spill(tx, object);
} else {
dnode_t *dn;

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
if (dn->dn_have_spill) {
if (DB_DNODE(db)->dn_have_spill) {
ASSERT(tx->tx_txg == 0);
dmu_tx_hold_spill(tx, object);
}
Expand Down
15 changes: 14 additions & 1 deletion module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,19 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
int64_t refcount;
uint32_t dbufs;

#ifndef USE_DNODE_HANDLE
/*
* We can't move dnodes if dbufs reference them directly without
* using handles and respecitve locking. Unless USE_DNODE_HANDLE
* is defined the code below is only to make sure it still builds,
* but it should never be used, since it is unsafe.
*/
#ifdef ZFS_DEBUG
PANIC("dnode_move() called without USE_DNODE_HANDLE");
#endif
return (KMEM_CBRC_NO);
#endif

/*
* The dnode is on the objset's list of known dnodes if the objset
* pointer is valid. We set the low bit of the objset pointer when
Expand Down Expand Up @@ -1757,7 +1770,7 @@ dnode_rele_and_unlock(dnode_t *dn, const void *tag, boolean_t evicting)
* handle.
*/
#ifdef ZFS_DEBUG
ASSERT(refs > 0 || dnh->dnh_zrlock.zr_owner != curthread);
ASSERT(refs > 0 || zrl_owner(&dnh->dnh_zrlock) != curthread);
#endif

/* NOTE: the DNODE_DNODE does not have a dn_dbuf */
Expand Down
4 changes: 1 addition & 3 deletions module/zfs/sa.c
Original file line number Diff line number Diff line change
Expand Up @@ -1852,7 +1852,6 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
{
sa_os_t *sa = hdl->sa_os->os_sa;
dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus;
dnode_t *dn;
sa_bulk_attr_t *attr_desc;
void *old_data[2];
int bonus_attr_count = 0;
Expand All @@ -1872,8 +1871,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
/* First make of copy of the old data */

DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
if (dn->dn_bonuslen != 0) {
if (DB_DNODE(db)->dn_bonuslen != 0) {
bonus_data_size = hdl->sa_bonus->db_size;
old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP);
memcpy(old_data[0], hdl->sa_bonus->db_data,
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -665,13 +665,13 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
DB_DNODE_ENTER(db);
err = dmu_read_by_dnode(DB_DNODE(db), off, len, lr + 1,
DMU_READ_NO_PREFETCH);
DB_DNODE_EXIT(db);
if (err != 0) {
zil_itx_destroy(itx);
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
wr_state = WR_NEED_COPY;
}
DB_DNODE_EXIT(db);
}

itx->itx_wr_state = wr_state;
Expand Down
Loading