diff --git a/cmd/arc_summary/arc_summary3 b/cmd/arc_summary/arc_summary3 index d9ac03fa8ec2..bd49e1b53770 100755 --- a/cmd/arc_summary/arc_summary3 +++ b/cmd/arc_summary/arc_summary3 @@ -58,7 +58,6 @@ SECTION_PATHS = {'arc': 'arcstats', 'dmu': 'dmu_tx', 'l2arc': 'arcstats', # L2ARC stuff lives in arcstats 'vdev': 'vdev_cache_stats', - 'xuio': 'xuio_stats', 'zfetch': 'zfetchstats', 'zil': 'zil'} diff --git a/include/os/freebsd/spl/sys/uio.h b/include/os/freebsd/spl/sys/uio.h index d9dab652c3dd..81c7ab64ca2b 100644 --- a/include/os/freebsd/spl/sys/uio.h +++ b/include/os/freebsd/spl/sys/uio.h @@ -43,27 +43,6 @@ typedef struct uio uio_t; typedef struct iovec iovec_t; typedef enum uio_seg uio_seg_t; -typedef enum xuio_type { - UIOTYPE_ASYNCIO, - UIOTYPE_ZEROCOPY -} xuio_type_t; - -typedef struct xuio { - uio_t xu_uio; - - /* Extended uio fields */ - enum xuio_type xu_type; /* What kind of uio structure? */ - union { - struct { - int xu_zc_rw; - void *xu_zc_priv; - } xu_zc; - } xu_ext; -} xuio_t; - -#define XUIO_XUZC_PRIV(xuio) xuio->xu_ext.xu_zc.xu_zc_priv -#define XUIO_XUZC_RW(xuio) xuio->xu_ext.xu_zc.xu_zc_rw - static __inline int zfs_uiomove(void *cp, size_t n, enum uio_rw dir, uio_t *uio) { diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h index ce3b38fcde2c..9bd358dc3d83 100644 --- a/include/os/linux/spl/sys/uio.h +++ b/include/os/linux/spl/sys/uio.h @@ -63,49 +63,6 @@ typedef struct uio { size_t uio_skip; } uio_t; -typedef struct aio_req { - uio_t *aio_uio; - void *aio_private; -} aio_req_t; - -typedef enum xuio_type { - UIOTYPE_ASYNCIO, - UIOTYPE_ZEROCOPY, -} xuio_type_t; - - -#define UIOA_IOV_MAX 16 - -typedef struct uioa_page_s { - int uioa_pfncnt; - void **uioa_ppp; - caddr_t uioa_base; - size_t uioa_len; -} uioa_page_t; - -typedef struct xuio { - uio_t xu_uio; - enum xuio_type xu_type; - union { - struct { - uint32_t xu_a_state; - ssize_t xu_a_mbytes; - uioa_page_t *xu_a_lcur; - void **xu_a_lppp; - void *xu_a_hwst[4]; - uioa_page_t xu_a_locked[UIOA_IOV_MAX]; - } xu_aio; - - struct { - int xu_zc_rw; - void *xu_zc_priv; - } xu_zc; - } xu_ext; -} xuio_t; - -#define XUIO_XUZC_PRIV(xuio) xuio->xu_ext.xu_zc.xu_zc_priv -#define XUIO_XUZC_RW(xuio) xuio->xu_ext.xu_zc.xu_zc_rw - #define uio_segflg(uio) (uio)->uio_segflg #define uio_offset(uio) (uio)->uio_loffset #define uio_resid(uio) (uio)->uio_resid diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 54fdbc9ad227..0c50d0409b2b 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -864,18 +864,6 @@ int dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, dmu_tx_t *tx); #define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf -#ifdef HAVE_UIO_ZEROCOPY -int dmu_xuio_init(struct xuio *uio, int niov); -void dmu_xuio_fini(struct xuio *uio); -int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off, - size_t n); -int dmu_xuio_cnt(struct xuio *uio); -struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i); -void dmu_xuio_clear(struct xuio *uio, int i); -#endif /* HAVE_UIO_ZEROCOPY */ -void xuio_stat_wbuf_copied(void); -void xuio_stat_wbuf_nocopy(void); - extern int zfs_prefetch_disable; extern int zfs_max_recordsize; diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 0c6273a3a727..def4aadba1d0 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -237,13 +237,6 @@ extern "C" { struct objset; struct dmu_pool; -typedef struct dmu_xuio { - int next; - int cnt; - struct arc_buf **bufs; - iovec_t *iovp; -} dmu_xuio_t; - typedef struct dmu_sendstatus { list_node_t dss_link; int dss_outfd; diff --git a/lib/libspl/include/sys/uio.h b/lib/libspl/include/sys/uio.h index 482ffef6f473..99a5a4d2ab7b 100644 --- a/lib/libspl/include/sys/uio.h +++ b/lib/libspl/include/sys/uio.h @@ -76,45 +76,6 @@ typedef struct uio { ssize_t uio_resid; /* residual count */ } uio_t; -typedef enum xuio_type { - UIOTYPE_ASYNCIO, - UIOTYPE_ZEROCOPY, -} xuio_type_t; - -#define UIOA_IOV_MAX 16 - -typedef struct uioa_page_s { /* locked uio_iov state */ - int uioa_pfncnt; /* count of pfn_t(s) in *uioa_ppp */ - void **uioa_ppp; /* page_t or pfn_t array */ - caddr_t uioa_base; /* address base */ - size_t uioa_len; /* span length */ -} uioa_page_t; - -typedef struct xuio { - uio_t xu_uio; /* embedded UIO structure */ - - /* Extended uio fields */ - enum xuio_type xu_type; /* uio type */ - union { - struct { - uint32_t xu_a_state; /* state of async i/o */ - ssize_t xu_a_mbytes; /* bytes moved */ - uioa_page_t *xu_a_lcur; /* uioa_locked[] pointer */ - void **xu_a_lppp; /* lcur->uioa_pppp[] pointer */ - void *xu_a_hwst[4]; /* opaque hardware state */ - uioa_page_t xu_a_locked[UIOA_IOV_MAX]; - } xu_aio; - - struct { - int xu_zc_rw; /* read or write buffer */ - void *xu_zc_priv; /* fs specific */ - } xu_zc; - } xu_ext; -} xuio_t; - -#define XUIO_XUZC_PRIV(xuio) xuio->xu_ext.xu_zc.xu_zc_priv -#define XUIO_XUZC_RW(xuio) xuio->xu_ext.xu_zc.xu_zc_rw - #define uio_segflg(uio) (uio)->uio_segflg #define uio_offset(uio) (uio)->uio_loffset #define uio_resid(uio) (uio)->uio_resid diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 2469769bc78a..0b72c0356917 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -4211,164 +4211,6 @@ zfs_fid(struct inode *ip, fid_t *fidp) return (0); } -#ifdef HAVE_UIO_ZEROCOPY -/* - * The smallest read we may consider to loan out an arcbuf. - * This must be a power of 2. - */ -int zcr_blksz_min = (1 << 10); /* 1K */ -/* - * If set to less than the file block size, allow loaning out of an - * arcbuf for a partial block read. This must be a power of 2. - */ -int zcr_blksz_max = (1 << 17); /* 128K */ - - -/*ARGSUSED*/ -static int -zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr) -{ - znode_t *zp = ITOZ(ip); - zfsvfs_t *zfsvfs = ITOZSB(ip); - int max_blksz = zfsvfs->z_max_blksz; - uio_t *uio = &xuio->xu_uio; - ssize_t size = uio->uio_resid; - offset_t offset = uio->uio_loffset; - int blksz; - int fullblk, i; - arc_buf_t *abuf; - ssize_t maxsize; - int preamble, postamble; - - if (xuio->xu_type != UIOTYPE_ZEROCOPY) - return (SET_ERROR(EINVAL)); - - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - switch (ioflag) { - case UIO_WRITE: - /* - * Loan out an arc_buf for write if write size is bigger than - * max_blksz, and the file's block size is also max_blksz. - */ - blksz = max_blksz; - if (size < blksz || zp->z_blksz != blksz) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - /* - * Caller requests buffers for write before knowing where the - * write offset might be (e.g. NFS TCP write). - */ - if (offset == -1) { - preamble = 0; - } else { - preamble = P2PHASE(offset, blksz); - if (preamble) { - preamble = blksz - preamble; - size -= preamble; - } - } - - postamble = P2PHASE(size, blksz); - size -= postamble; - - fullblk = size / blksz; - (void) dmu_xuio_init(xuio, - (preamble != 0) + fullblk + (postamble != 0)); - - /* - * Have to fix iov base/len for partial buffers. They - * currently represent full arc_buf's. - */ - if (preamble) { - /* data begins in the middle of the arc_buf */ - abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), - blksz); - ASSERT(abuf); - (void) dmu_xuio_add(xuio, abuf, - blksz - preamble, preamble); - } - - for (i = 0; i < fullblk; i++) { - abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), - blksz); - ASSERT(abuf); - (void) dmu_xuio_add(xuio, abuf, 0, blksz); - } - - if (postamble) { - /* data ends in the middle of the arc_buf */ - abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), - blksz); - ASSERT(abuf); - (void) dmu_xuio_add(xuio, abuf, 0, postamble); - } - break; - case UIO_READ: - /* - * Loan out an arc_buf for read if the read size is larger than - * the current file block size. Block alignment is not - * considered. Partial arc_buf will be loaned out for read. - */ - blksz = zp->z_blksz; - if (blksz < zcr_blksz_min) - blksz = zcr_blksz_min; - if (blksz > zcr_blksz_max) - blksz = zcr_blksz_max; - /* avoid potential complexity of dealing with it */ - if (blksz > max_blksz) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - - maxsize = zp->z_size - uio->uio_loffset; - if (size > maxsize) - size = maxsize; - - if (size < blksz) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - break; - default: - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - - uio->uio_extflg = UIO_XUIO; - XUIO_XUZC_RW(xuio) = ioflag; - ZFS_EXIT(zfsvfs); - return (0); -} - -/*ARGSUSED*/ -static int -zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr) -{ - int i; - arc_buf_t *abuf; - int ioflag = XUIO_XUZC_RW(xuio); - - ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); - - i = dmu_xuio_cnt(xuio); - while (i-- > 0) { - abuf = dmu_xuio_arcbuf(xuio, i); - /* - * if abuf == NULL, it must be a write buffer - * that has been returned in zfs_write(). - */ - if (abuf) - dmu_return_arcbuf(abuf); - ASSERT(abuf || ioflag == UIO_WRITE); - } - - dmu_xuio_fini(xuio); - return (0); -} -#endif /* HAVE_UIO_ZEROCOPY */ - #if defined(_KERNEL) EXPORT_SYMBOL(zfs_open); EXPORT_SYMBOL(zfs_close); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 9b6481d22322..82bb7206c1ac 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2627,11 +2627,9 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) (void) dbuf_dirty(db, tx); bcopy(buf->b_data, db->db.db_data, db->db.db_size); arc_buf_destroy(buf, db); - xuio_stat_wbuf_copied(); return; } - xuio_stat_wbuf_nocopy(); if (db->db_state == DB_CACHED) { dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 2c96645214f8..14d864fbcda9 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1165,165 +1165,12 @@ dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_buf_rele_array(dbp, numbufs, FTAG); } -/* - * DMU support for xuio - */ -kstat_t *xuio_ksp = NULL; - -typedef struct xuio_stats { - /* loaned yet not returned arc_buf */ - kstat_named_t xuiostat_onloan_rbuf; - kstat_named_t xuiostat_onloan_wbuf; - /* whether a copy is made when loaning out a read buffer */ - kstat_named_t xuiostat_rbuf_copied; - kstat_named_t xuiostat_rbuf_nocopy; - /* whether a copy is made when assigning a write buffer */ - kstat_named_t xuiostat_wbuf_copied; - kstat_named_t xuiostat_wbuf_nocopy; -} xuio_stats_t; - -static xuio_stats_t xuio_stats = { - { "onloan_read_buf", KSTAT_DATA_UINT64 }, - { "onloan_write_buf", KSTAT_DATA_UINT64 }, - { "read_buf_copied", KSTAT_DATA_UINT64 }, - { "read_buf_nocopy", KSTAT_DATA_UINT64 }, - { "write_buf_copied", KSTAT_DATA_UINT64 }, - { "write_buf_nocopy", KSTAT_DATA_UINT64 } -}; - -#define XUIOSTAT_INCR(stat, val) \ - atomic_add_64(&xuio_stats.stat.value.ui64, (val)) -#define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1) - -#ifdef HAVE_UIO_ZEROCOPY -int -dmu_xuio_init(xuio_t *xuio, int nblk) -{ - dmu_xuio_t *priv; - uio_t *uio = &xuio->xu_uio; - - uio->uio_iovcnt = nblk; - uio->uio_iov = kmem_zalloc(nblk * sizeof (iovec_t), KM_SLEEP); - - priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_SLEEP); - priv->cnt = nblk; - priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_SLEEP); - priv->iovp = (iovec_t *)uio->uio_iov; - XUIO_XUZC_PRIV(xuio) = priv; - - if (XUIO_XUZC_RW(xuio) == UIO_READ) - XUIOSTAT_INCR(xuiostat_onloan_rbuf, nblk); - else - XUIOSTAT_INCR(xuiostat_onloan_wbuf, nblk); - - return (0); -} - -void -dmu_xuio_fini(xuio_t *xuio) -{ - dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio); - int nblk = priv->cnt; - - kmem_free(priv->iovp, nblk * sizeof (iovec_t)); - kmem_free(priv->bufs, nblk * sizeof (arc_buf_t *)); - kmem_free(priv, sizeof (dmu_xuio_t)); - - if (XUIO_XUZC_RW(xuio) == UIO_READ) - XUIOSTAT_INCR(xuiostat_onloan_rbuf, -nblk); - else - XUIOSTAT_INCR(xuiostat_onloan_wbuf, -nblk); -} - -/* - * Initialize iov[priv->next] and priv->bufs[priv->next] with { off, n, abuf } - * and increase priv->next by 1. - */ -int -dmu_xuio_add(xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n) -{ - struct iovec *iov; - uio_t *uio = &xuio->xu_uio; - dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio); - int i = priv->next++; - - ASSERT(i < priv->cnt); - ASSERT(off + n <= arc_buf_lsize(abuf)); - iov = (iovec_t *)uio->uio_iov + i; - iov->iov_base = (char *)abuf->b_data + off; - iov->iov_len = n; - priv->bufs[i] = abuf; - return (0); -} - -int -dmu_xuio_cnt(xuio_t *xuio) -{ - dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio); - return (priv->cnt); -} - -arc_buf_t * -dmu_xuio_arcbuf(xuio_t *xuio, int i) -{ - dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio); - - ASSERT(i < priv->cnt); - return (priv->bufs[i]); -} - -void -dmu_xuio_clear(xuio_t *xuio, int i) -{ - dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio); - - ASSERT(i < priv->cnt); - priv->bufs[i] = NULL; -} -#endif /* HAVE_UIO_ZEROCOPY */ - -static void -xuio_stat_init(void) -{ - xuio_ksp = kstat_create("zfs", 0, "xuio_stats", "misc", - KSTAT_TYPE_NAMED, sizeof (xuio_stats) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - if (xuio_ksp != NULL) { - xuio_ksp->ks_data = &xuio_stats; - kstat_install(xuio_ksp); - } -} - -static void -xuio_stat_fini(void) -{ - if (xuio_ksp != NULL) { - kstat_delete(xuio_ksp); - xuio_ksp = NULL; - } -} - -void -xuio_stat_wbuf_copied(void) -{ - XUIOSTAT_BUMP(xuiostat_wbuf_copied); -} - -void -xuio_stat_wbuf_nocopy(void) -{ - XUIOSTAT_BUMP(xuiostat_wbuf_nocopy); -} - #ifdef _KERNEL int dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) { dmu_buf_t **dbp; int numbufs, i, err; -#ifdef HAVE_UIO_ZEROCOPY - xuio_t *xuio = NULL; -#endif /* * NB: we could do this block-at-a-time, but it's nice @@ -1344,21 +1191,6 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) bufoff = uio_offset(uio) - db->db_offset; tocpy = MIN(db->db_size - bufoff, size); -#ifdef HAVE_UIO_ZEROCOPY - if (xuio) { - dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db; - arc_buf_t *dbuf_abuf = dbi->db_buf; - arc_buf_t *abuf = dbuf_loan_arcbuf(dbi); - err = dmu_xuio_add(xuio, abuf, bufoff, tocpy); - if (!err) - uio_advance(uio, tocpy); - - if (abuf == dbuf_abuf) - XUIOSTAT_BUMP(xuiostat_rbuf_nocopy); - else - XUIOSTAT_BUMP(xuiostat_rbuf_copied); - } else -#endif #ifdef __FreeBSD__ err = vn_io_fault_uiomove((char *)db->db_data + bufoff, tocpy, uio); @@ -1597,7 +1429,6 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, dbuf_rele(db, FTAG); dmu_write(os, object, offset, blksz, buf->b_data, tx); dmu_return_arcbuf(buf); - XUIOSTAT_BUMP(xuiostat_wbuf_copied); } return (0); @@ -2409,7 +2240,6 @@ dmu_init(void) abd_init(); zfs_dbgmsg_init(); sa_cache_init(); - xuio_stat_init(); dmu_objset_init(); dnode_init(); zfetch_init(); @@ -2429,7 +2259,6 @@ dmu_fini(void) dbuf_fini(); dnode_fini(); dmu_objset_fini(); - xuio_stat_fini(); sa_cache_fini(); zfs_dbgmsg_fini(); abd_fini();