diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml new file mode 100644 index 000000000000..6193c8afeae9 --- /dev/null +++ b/.github/workflows/labels.yml @@ -0,0 +1,49 @@ +name: labels + +on: + pull_request_target: + types: [ opened, synchronize, reopened, converted_to_draft, ready_for_review ] + +permissions: + pull-requests: write + +jobs: + open: + runs-on: ubuntu-latest + if: ${{ github.event.action == 'opened' && github.event.pull_request.draft }} + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE: ${{ github.event.pull_request.html_url }} + run: | + gh pr edit $ISSUE --add-label "Status: Work in Progress" + + push: + runs-on: ubuntu-latest + if: ${{ github.event.action == 'synchronize' || github.event.action == 'reopened' }} + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE: ${{ github.event.pull_request.html_url }} + run: | + gh pr edit $ISSUE --remove-label "Status: Accepted,Status: Inactive,Status: Revision Needed,Status: Stale" + + draft: + runs-on: ubuntu-latest + if: ${{ github.event.action == 'converted_to_draft' }} + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE: ${{ github.event.pull_request.html_url }} + run: | + gh pr edit $ISSUE --remove-label "Status: Accepted,Status: Code Review Needed,Status: Inactive,Status: Revision Needed,Status: Stale" --add-label "Status: Work in Progress" + + rfr: + runs-on: ubuntu-latest + if: ${{ github.event.action == 'ready_for_review' }} + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE: ${{ github.event.pull_request.html_url }} + run: | + gh pr edit $ISSUE --remove-label "Status: Accepted,Status: Inactive,Status: Revision Needed,Status: Stale,Status: Work in Progress" --add-label "Status: Code Review Needed" diff --git a/.github/workflows/scripts/qemu-2-start.sh b/.github/workflows/scripts/qemu-2-start.sh index 84e13832d10f..39ac92107b71 100755 --- a/.github/workflows/scripts/qemu-2-start.sh +++ b/.github/workflows/scripts/qemu-2-start.sh @@ -52,16 +52,16 @@ case "$OS" in OSNAME="Debian 12" URL="https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-generic-amd64.qcow2" ;; - fedora39) - OSNAME="Fedora 39" - OSv="fedora39" - URL="https://download.fedoraproject.org/pub/fedora/linux/releases/39/Cloud/x86_64/images/Fedora-Cloud-Base-39-1.5.x86_64.qcow2" - ;; fedora40) OSNAME="Fedora 40" - OSv="fedora39" + OSv="fedora-unknown" URL="https://download.fedoraproject.org/pub/fedora/linux/releases/40/Cloud/x86_64/images/Fedora-Cloud-Base-Generic.x86_64-40-1.14.qcow2" ;; + fedora41) + OSNAME="Fedora 41" + OSv="fedora-unknown" + URL="https://download.fedoraproject.org/pub/fedora/linux/releases/41/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-41-1.4.x86_64.qcow2" + ;; freebsd13-3r) OSNAME="FreeBSD 13.3-RELEASE" OSv="freebsd13.0" diff --git a/.github/workflows/scripts/qemu-3-deps.sh b/.github/workflows/scripts/qemu-3-deps.sh index bf06c1892ceb..96979cd02e09 100755 --- a/.github/workflows/scripts/qemu-3-deps.sh +++ b/.github/workflows/scripts/qemu-3-deps.sh @@ -66,7 +66,13 @@ function rhel() { echo "##[endgroup]" echo "##[group]Install Development Tools" - sudo dnf group install -y "Development Tools" + + # Alma wants "Development Tools", Fedora 41 wants "development-tools" + if ! sudo dnf group install -y "Development Tools" ; then + echo "Trying 'development-tools' instead of 'Development Tools'" + sudo dnf group install -y development-tools + fi + sudo dnf install -y \ acl attr bc bzip2 cryptsetup curl dbench dkms elfutils-libelf-devel fio \ gdb git jq kernel-rpm-macros ksh libacl-devel libaio-devel \ diff --git a/.github/workflows/scripts/qemu-4-build.sh b/.github/workflows/scripts/qemu-4-build.sh index 1051ee1f4deb..955f605f5bce 100755 --- a/.github/workflows/scripts/qemu-4-build.sh +++ b/.github/workflows/scripts/qemu-4-build.sh @@ -83,7 +83,7 @@ function rpm_build_and_install() { echo "##[endgroup]" echo "##[group]Install" - run sudo dnf -y --skip-broken localinstall $(ls *.rpm | grep -v src.rpm) + run sudo dnf -y --nobest install $(ls *.rpm | grep -v src.rpm) echo "##[endgroup]" } diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml index f819e9938e31..e90030f4c02e 100644 --- a/.github/workflows/zfs-qemu.yml +++ b/.github/workflows/zfs-qemu.yml @@ -22,8 +22,8 @@ jobs: - name: Generate OS config and CI type id: os run: | - FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora39", "fedora40", "freebsd13-4r", "freebsd14-0r", "freebsd14-1s", "ubuntu20", "ubuntu22", "ubuntu24"]' - QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora40", "freebsd13-3r", "freebsd14-1r", "ubuntu24"]' + FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora40", "fedora41", "freebsd13-4r", "freebsd14-0r", "freebsd14-1s", "ubuntu20", "ubuntu22", "ubuntu24"]' + QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora41", "freebsd13-3r", "freebsd14-1r", "ubuntu24"]' # determine CI type when running on PR ci_type="full" if ${{ github.event_name == 'pull_request' }}; then @@ -46,7 +46,7 @@ jobs: strategy: fail-fast: false matrix: - # rhl: almalinux8, almalinux9, centos-stream9, fedora39, fedora40 + # rhl: almalinux8, almalinux9, centos-stream9, fedora40, fedora41 # debian: debian11, debian12, ubuntu20, ubuntu22, ubuntu24 # misc: archlinux, tumbleweed # FreeBSD Release: freebsd13-3r, freebsd13-4r, freebsd14-0r, freebsd14-1r diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 42b82c08249f..46587671202a 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2152,14 +2152,21 @@ dump_brt(spa_t *spa) if (dump_opt['T'] < 3) return; + /* -TTT shows a per-vdev histograms; -TTTT shows all entries */ + boolean_t do_histo = dump_opt['T'] == 3; + char dva[64]; - printf("\n%-16s %-10s\n", "DVA", "REFCNT"); + + if (!do_histo) + printf("\n%-16s %-10s\n", "DVA", "REFCNT"); for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) { brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid]; if (brtvd == NULL || !brtvd->bv_initiated) continue; + uint64_t counts[64] = {}; + zap_cursor_t zc; zap_attribute_t *za = zap_attribute_alloc(); for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries); @@ -2172,14 +2179,26 @@ dump_brt(spa_t *spa) za->za_integer_length, za->za_num_integers, &refcnt)); - uint64_t offset = *(const uint64_t *)za->za_name; + if (do_histo) + counts[highbit64(refcnt)]++; + else { + uint64_t offset = + *(const uint64_t *)za->za_name; - snprintf(dva, sizeof (dva), "%" PRIu64 ":%llx", vdevid, - (u_longlong_t)offset); - printf("%-16s %-10llu\n", dva, (u_longlong_t)refcnt); + snprintf(dva, sizeof (dva), "%" PRIu64 ":%llx", + vdevid, (u_longlong_t)offset); + printf("%-16s %-10llu\n", dva, + (u_longlong_t)refcnt); + } } zap_cursor_fini(&zc); zap_attribute_free(za); + + if (do_histo) { + printf("\nBRT: vdev %" PRIu64 + ": DVAs with 2^n refcnts:\n", vdevid); + dump_histogram(counts, 64, 0); + } } } diff --git a/cmd/zed/zed_disk_event.c b/cmd/zed/zed_disk_event.c index 32a8789d3001..4a2b7398e922 100644 --- a/cmd/zed/zed_disk_event.c +++ b/cmd/zed/zed_disk_event.c @@ -139,7 +139,8 @@ dev_event_nvlist(struct udev_device *dev) * is /dev/sda. */ struct udev_device *parent_dev = udev_device_get_parent(dev); - if ((value = udev_device_get_sysattr_value(parent_dev, "size")) + if (parent_dev != NULL && + (value = udev_device_get_sysattr_value(parent_dev, "size")) != NULL) { uint64_t numval = DEV_BSIZE; diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index ea180f6b705e..6a45a063d91a 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -7966,8 +7966,11 @@ zpool_do_online(int argc, char **argv) poolname = argv[0]; - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) { + (void) fprintf(stderr, gettext("failed to open pool " + "\"%s\""), poolname); return (1); + } for (i = 1; i < argc; i++) { vdev_state_t oldstate; @@ -7988,12 +7991,15 @@ zpool_do_online(int argc, char **argv) &l2cache, NULL); if (tgt == NULL) { ret = 1; + (void) fprintf(stderr, gettext("couldn't find device " + "\"%s\" in pool \"%s\"\n"), argv[i], poolname); continue; } uint_t vsc; oldstate = ((vdev_stat_t *)fnvlist_lookup_uint64_array(tgt, ZPOOL_CONFIG_VDEV_STATS, &vsc))->vs_state; - if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) { + if ((rc = zpool_vdev_online(zhp, argv[i], flags, + &newstate)) == 0) { if (newstate != VDEV_STATE_HEALTHY) { (void) printf(gettext("warning: device '%s' " "onlined, but remains in faulted state\n"), @@ -8019,6 +8025,9 @@ zpool_do_online(int argc, char **argv) } } } else { + (void) fprintf(stderr, gettext("Failed to online " + "\"%s\" in pool \"%s\": %d\n"), + argv[i], poolname, rc); ret = 1; } } @@ -8103,8 +8112,11 @@ zpool_do_offline(int argc, char **argv) poolname = argv[0]; - if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) { + (void) fprintf(stderr, gettext("failed to open pool " + "\"%s\""), poolname); return (1); + } for (i = 1; i < argc; i++) { uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); diff --git a/config/kernel-vfs-invalidate_folio.m4 b/config/kernel-vfs-invalidate_folio.m4 deleted file mode 100644 index 61a5c8478af1..000000000000 --- a/config/kernel-vfs-invalidate_folio.m4 +++ /dev/null @@ -1,33 +0,0 @@ -dnl # -dnl # Linux 5.18 uses invalidate_folio in lieu of invalidate_page -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_INVALIDATE_FOLIO], [ - ZFS_LINUX_TEST_SRC([vfs_has_invalidate_folio], [ - #include - - static void - test_invalidate_folio(struct folio *folio, size_t offset, - size_t len) { - (void) folio; (void) offset; (void) len; - return; - } - - static const struct address_space_operations - aops __attribute__ ((unused)) = { - .invalidate_folio = test_invalidate_folio, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_INVALIDATE_FOLIO], [ - dnl # - dnl # Linux 5.18 uses invalidate_folio in lieu of invalidate_page - dnl # - AC_MSG_CHECKING([whether invalidate_folio exists]) - ZFS_LINUX_TEST_RESULT([vfs_has_invalidate_folio], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_INVALIDATE_FOLIO, 1, [invalidate_folio exists]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel-vfs-migrate_folio.m4 b/config/kernel-vfs-migrate_folio.m4 new file mode 100644 index 000000000000..186cd0581a17 --- /dev/null +++ b/config/kernel-vfs-migrate_folio.m4 @@ -0,0 +1,27 @@ +dnl # +dnl # Linux 6.0 uses migrate_folio in lieu of migrate_page +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_MIGRATE_FOLIO], [ + ZFS_LINUX_TEST_SRC([vfs_has_migrate_folio], [ + #include + #include + + static const struct address_space_operations + aops __attribute__ ((unused)) = { + .migrate_folio = migrate_folio, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_MIGRATE_FOLIO], [ + dnl # + dnl # Linux 6.0 uses migrate_folio in lieu of migrate_page + dnl # + AC_MSG_CHECKING([whether migrate_folio exists]) + ZFS_LINUX_TEST_RESULT([vfs_has_migrate_folio], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_MIGRATE_FOLIO, 1, [migrate_folio exists]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/kernel-vfs-release_folio.m4 b/config/kernel-vfs-release_folio.m4 deleted file mode 100644 index f31db5677fd3..000000000000 --- a/config/kernel-vfs-release_folio.m4 +++ /dev/null @@ -1,32 +0,0 @@ -dnl # -dnl # Linux 5.19 uses release_folio in lieu of releasepage -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_RELEASE_FOLIO], [ - ZFS_LINUX_TEST_SRC([vfs_has_release_folio], [ - #include - - static bool - test_release_folio(struct folio *folio, gfp_t gfp) { - (void) folio; (void) gfp; - return (0); - } - - static const struct address_space_operations - aops __attribute__ ((unused)) = { - .release_folio = test_release_folio, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_RELEASE_FOLIO], [ - dnl # - dnl # Linux 5.19 uses release_folio in lieu of releasepage - dnl # - AC_MSG_CHECKING([whether release_folio exists]) - ZFS_LINUX_TEST_RESULT([vfs_has_release_folio], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_RELEASE_FOLIO, 1, [release_folio exists]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 556df58082f9..78f178ff27ac 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -77,8 +77,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SGET ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO - ZFS_AC_KERNEL_SRC_VFS_RELEASE_FOLIO - ZFS_AC_KERNEL_SRC_VFS_INVALIDATE_FOLIO + ZFS_AC_KERNEL_SRC_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO ZFS_AC_KERNEL_SRC_VFS_READPAGES @@ -189,8 +188,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_VFS_READ_FOLIO - ZFS_AC_KERNEL_VFS_RELEASE_FOLIO - ZFS_AC_KERNEL_VFS_INVALIDATE_FOLIO + ZFS_AC_KERNEL_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_VFS_DIRECT_IO ZFS_AC_KERNEL_VFS_READPAGES diff --git a/include/os/freebsd/spl/sys/simd.h b/include/os/freebsd/spl/sys/simd.h index 6bc46755c4e3..d16e1db5e826 100644 --- a/include/os/freebsd/spl/sys/simd.h +++ b/include/os/freebsd/spl/sys/simd.h @@ -50,7 +50,7 @@ #define kfpu_fini() do {} while (0) #endif -#define simd_stat_init() 0 -#define simd_stat_fini() 0 +#define simd_stat_init() do {} while (0) +#define simd_stat_fini() do {} while (0) #endif diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index c9bfb9a8026c..e69464809a42 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -171,7 +171,6 @@ typedef struct dbuf_dirty_record { * gets COW'd in a subsequent transaction group. */ arc_buf_t *dr_data; - blkptr_t dr_overridden_by; override_states_t dr_override_state; uint8_t dr_copies; boolean_t dr_nopwrite; @@ -179,14 +178,21 @@ typedef struct dbuf_dirty_record { boolean_t dr_diowrite; boolean_t dr_has_raw_params; - /* - * If dr_has_raw_params is set, the following crypt - * params will be set on the BP that's written. - */ - boolean_t dr_byteorder; - uint8_t dr_salt[ZIO_DATA_SALT_LEN]; - uint8_t dr_iv[ZIO_DATA_IV_LEN]; - uint8_t dr_mac[ZIO_DATA_MAC_LEN]; + /* Override and raw params are mutually exclusive. */ + union { + blkptr_t dr_overridden_by; + struct { + /* + * If dr_has_raw_params is set, the + * following crypt params will be set + * on the BP that's written. + */ + boolean_t dr_byteorder; + uint8_t dr_salt[ZIO_DATA_SALT_LEN]; + uint8_t dr_iv[ZIO_DATA_IV_LEN]; + uint8_t dr_mac[ZIO_DATA_MAC_LEN]; + }; + }; } dl; struct dirty_lightweight_leaf { /* @@ -346,6 +352,8 @@ typedef struct dbuf_hash_table { typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t); +extern kmem_cache_t *dbuf_dirty_kmem_cache; + uint64_t dbuf_whichblock(const struct dnode *di, const int64_t level, const uint64_t offset); diff --git a/include/sys/u8_textprep.h b/include/sys/u8_textprep.h index e82037de4fe4..b0bae5833acd 100644 --- a/include/sys/u8_textprep.h +++ b/include/sys/u8_textprep.h @@ -36,28 +36,6 @@ extern "C" { #endif -/* - * Unicode encoding conversion functions and their macros. - */ -#define UCONV_IN_BIG_ENDIAN 0x0001 -#define UCONV_OUT_BIG_ENDIAN 0x0002 -#define UCONV_IN_SYSTEM_ENDIAN 0x0004 -#define UCONV_OUT_SYSTEM_ENDIAN 0x0008 -#define UCONV_IN_LITTLE_ENDIAN 0x0010 -#define UCONV_OUT_LITTLE_ENDIAN 0x0020 -#define UCONV_IGNORE_NULL 0x0040 -#define UCONV_IN_ACCEPT_BOM 0x0080 -#define UCONV_OUT_EMIT_BOM 0x0100 - -extern int uconv_u16tou32(const uint16_t *, size_t *, uint32_t *, size_t *, - int); -extern int uconv_u16tou8(const uint16_t *, size_t *, uchar_t *, size_t *, int); -extern int uconv_u32tou16(const uint32_t *, size_t *, uint16_t *, size_t *, - int); -extern int uconv_u32tou8(const uint32_t *, size_t *, uchar_t *, size_t *, int); -extern int uconv_u8tou16(const uchar_t *, size_t *, uint16_t *, size_t *, int); -extern int uconv_u8tou32(const uchar_t *, size_t *, uint32_t *, size_t *, int); - /* * UTF-8 text preparation functions and their macros. * @@ -67,7 +45,9 @@ extern int uconv_u8tou32(const uchar_t *, size_t *, uint32_t *, size_t *, int); */ #define U8_STRCMP_CS (0x00000001) #define U8_STRCMP_CI_UPPER (0x00000002) +#if 0 #define U8_STRCMP_CI_LOWER (0x00000004) +#endif #define U8_CANON_DECOMP (0x00000010) #define U8_COMPAT_DECOMP (0x00000020) @@ -79,7 +59,9 @@ extern int uconv_u8tou32(const uchar_t *, size_t *, uint32_t *, size_t *, int); #define U8_STRCMP_NFKC (U8_COMPAT_DECOMP | U8_CANON_COMP) #define U8_TEXTPREP_TOUPPER (U8_STRCMP_CI_UPPER) +#ifdef U8_STRCMP_CI_LOWER #define U8_TEXTPREP_TOLOWER (U8_STRCMP_CI_LOWER) +#endif #define U8_TEXTPREP_NFD (U8_STRCMP_NFD) #define U8_TEXTPREP_NFC (U8_STRCMP_NFC) @@ -90,8 +72,12 @@ extern int uconv_u8tou32(const uchar_t *, size_t *, uint32_t *, size_t *, int); #define U8_TEXTPREP_IGNORE_INVALID (0x00020000) #define U8_TEXTPREP_NOWAIT (0x00040000) +#if 0 #define U8_UNICODE_320 (0) #define U8_UNICODE_500 (1) +#else +#define U8_UNICODE_500 (0) +#endif #define U8_UNICODE_LATEST (U8_UNICODE_500) #define U8_VALIDATE_ENTIRE (0x00100000) diff --git a/include/sys/u8_textprep_data.h b/include/sys/u8_textprep_data.h index 2a97966ee56e..bd6af0e8f661 100644 --- a/include/sys/u8_textprep_data.h +++ b/include/sys/u8_textprep_data.h @@ -146,7 +146,8 @@ typedef struct { * The common b1_tbl for combining class, decompositions, tolower, and * toupper case conversion mappings. */ -static const uchar_t u8_common_b1_tbl[2][256] = { +static const uchar_t u8_common_b1_tbl[U8_UNICODE_LATEST + 1][256] = { +#ifdef U8_UNICODE_320 { 0, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, @@ -181,6 +182,7 @@ static const uchar_t u8_common_b1_tbl[2][256] = { 1, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, }, +#endif { 0, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, @@ -217,7 +219,9 @@ static const uchar_t u8_common_b1_tbl[2][256] = { }, }; -static const uchar_t u8_combining_class_b2_tbl[2][2][256] = { +static const uchar_t u8_combining_class_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = +{ +#ifdef U8_UNICODE_320 { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -289,6 +293,7 @@ static const uchar_t u8_combining_class_b2_tbl[2][2][256] = { }, }, +#endif { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -363,7 +368,9 @@ static const uchar_t u8_combining_class_b2_tbl[2][2][256] = { }; -static const uchar_t u8_combining_class_b3_tbl[2][9][256] = { +static const uchar_t u8_combining_class_b3_tbl[U8_UNICODE_LATEST + 1][9][256] = +{ +#ifdef U8_UNICODE_320 { { /* Third byte table 0. */ N_, N_, N_, N_, N_, N_, N_, N_, @@ -672,6 +679,7 @@ static const uchar_t u8_combining_class_b3_tbl[2][9][256] = { N_, N_, N_, N_, N_, N_, N_, N_, }, }, +#endif { { /* Third byte table 0. */ N_, N_, N_, N_, N_, N_, N_, N_, @@ -986,7 +994,9 @@ static const uchar_t u8_combining_class_b3_tbl[2][9][256] = { * Unlike other b4_tbl, the b4_tbl for combining class data has * the combining class values not indices to the final tables. */ -static const uchar_t u8_combining_class_b4_tbl[2][55][256] = { +static const uchar_t u8_combining_class_b4_tbl[U8_UNICODE_LATEST + 1][55][256] = +{ +#ifdef U8_UNICODE_320 { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -2859,6 +2869,7 @@ static const uchar_t u8_combining_class_b4_tbl[2][55][256] = { 0, 0, 0, 0, 0, 0, 0, 0, }, }, +#endif { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -4733,7 +4744,8 @@ static const uchar_t u8_combining_class_b4_tbl[2][55][256] = { }, }; -static const uchar_t u8_composition_b1_tbl[2][256] = { +static const uchar_t u8_composition_b1_tbl[U8_UNICODE_LATEST + 1][256] = { +#ifdef U8_UNICODE_320 { 0, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, @@ -4768,6 +4780,7 @@ static const uchar_t u8_composition_b1_tbl[2][256] = { N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, }, +#endif { 0, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, @@ -4804,7 +4817,8 @@ static const uchar_t u8_composition_b1_tbl[2][256] = { }, }; -static const uchar_t u8_composition_b2_tbl[2][1][256] = { +static const uchar_t u8_composition_b2_tbl[U8_UNICODE_LATEST + 1][1][256] = { +#ifdef U8_UNICODE_320 { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -4842,6 +4856,7 @@ static const uchar_t u8_composition_b2_tbl[2][1][256] = { }, }, +#endif { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -4882,7 +4897,10 @@ static const uchar_t u8_composition_b2_tbl[2][1][256] = { }; -static const u8_displacement_t u8_composition_b3_tbl[2][5][256] = { +static const u8_displacement_t u8_composition_b3_tbl[ + U8_UNICODE_LATEST + 1][5][256] = +{ +#ifdef U8_UNICODE_320 { { /* Third byte table 0. */ { 0x8000, 0 }, { N_, 0 }, { N_, 0 }, @@ -5325,6 +5343,7 @@ static const u8_displacement_t u8_composition_b3_tbl[2][5][256] = { { N_, 0 }, }, }, +#endif { { /* Third byte table 0. */ { 0x8000, 0 }, { N_, 0 }, { N_, 0 }, @@ -5769,7 +5788,8 @@ static const u8_displacement_t u8_composition_b3_tbl[2][5][256] = { }, }; -static const uchar_t u8_composition_b4_tbl[2][41][257] = { +static const uchar_t u8_composition_b4_tbl[U8_UNICODE_LATEST + 1][41][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -7207,6 +7227,7 @@ static const uchar_t u8_composition_b4_tbl[2][41][257] = { 0, }, }, +#endif { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -8646,7 +8667,10 @@ static const uchar_t u8_composition_b4_tbl[2][41][257] = { }, }; -static const uint16_t u8_composition_b4_16bit_tbl[2][5][257] = { +static const uint16_t u8_composition_b4_16bit_tbl[ + U8_UNICODE_LATEST + 1][5][257] = +{ +#ifdef U8_UNICODE_320 { { /* Fourth byte 16-bit table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -8824,6 +8848,7 @@ static const uint16_t u8_composition_b4_16bit_tbl[2][5][257] = { 362, }, }, +#endif { { /* Fourth byte 16-bit table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -9003,7 +9028,8 @@ static const uint16_t u8_composition_b4_16bit_tbl[2][5][257] = { }, }; -static const uchar_t u8_composition_final_tbl[2][6623] = { +static const uchar_t u8_composition_final_tbl[U8_UNICODE_LATEST + 1][6623] = { +#ifdef U8_UNICODE_320 { 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAE, FIL_, 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA0, FIL_, @@ -9834,6 +9860,7 @@ static const uchar_t u8_composition_final_tbl[2][6623] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, +#endif { 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAE, FIL_, 0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA0, FIL_, @@ -10666,7 +10693,8 @@ static const uchar_t u8_composition_final_tbl[2][6623] = { }, }; -static const uchar_t u8_decomp_b2_tbl[2][2][256] = { +static const uchar_t u8_decomp_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { +#ifdef U8_UNICODE_320 { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -10738,6 +10766,7 @@ static const uchar_t u8_decomp_b2_tbl[2][2][256] = { }, }, +#endif { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -10812,7 +10841,9 @@ static const uchar_t u8_decomp_b2_tbl[2][2][256] = { }; -static const u8_displacement_t u8_decomp_b3_tbl[2][8][256] = { +static const u8_displacement_t u8_decomp_b3_tbl[U8_UNICODE_LATEST + 1][8][256] = +{ +#ifdef U8_UNICODE_320 { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -11519,6 +11550,7 @@ static const u8_displacement_t u8_decomp_b3_tbl[2][8][256] = { { N_, 0 }, }, }, +#endif { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -12227,7 +12259,8 @@ static const u8_displacement_t u8_decomp_b3_tbl[2][8][256] = { }, }; -static const uchar_t u8_decomp_b4_tbl[2][118][257] = { +static const uchar_t u8_decomp_b4_tbl[U8_UNICODE_LATEST + 1][118][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -16360,6 +16393,7 @@ static const uchar_t u8_decomp_b4_tbl[2][118][257] = { 0, }, }, +#endif { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -20494,7 +20528,8 @@ static const uchar_t u8_decomp_b4_tbl[2][118][257] = { }, }; -static const uint16_t u8_decomp_b4_16bit_tbl[2][30][257] = { +static const uint16_t u8_decomp_b4_16bit_tbl[U8_UNICODE_LATEST + 1][30][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte 16-bit table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -21547,6 +21582,7 @@ static const uint16_t u8_decomp_b4_16bit_tbl[2][30][257] = { 0, }, }, +#endif { { /* Fourth byte 16-bit table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -22601,7 +22637,8 @@ static const uint16_t u8_decomp_b4_16bit_tbl[2][30][257] = { }, }; -static const uchar_t u8_decomp_final_tbl[2][19370] = { +static const uchar_t u8_decomp_final_tbl[U8_UNICODE_LATEST + 1][19370] = { +#ifdef U8_UNICODE_320 { 0x20, 0x20, 0xCC, 0x88, 0x61, 0x20, 0xCC, 0x84, 0x32, 0x33, 0x20, 0xCC, 0x81, 0xCE, 0xBC, 0x20, @@ -25026,6 +25063,7 @@ static const uchar_t u8_decomp_final_tbl[2][19370] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, +#endif { 0x20, 0x20, 0xCC, 0x88, 0x61, 0x20, 0xCC, 0x84, 0x32, 0x33, 0x20, 0xCC, 0x81, 0xCE, 0xBC, 0x20, @@ -27452,7 +27490,8 @@ static const uchar_t u8_decomp_final_tbl[2][19370] = { }, }; -static const uchar_t u8_case_common_b2_tbl[2][2][256] = { +static const uchar_t u8_case_common_b2_tbl[U8_UNICODE_LATEST + 1][2][256] = { +#ifdef U8_UNICODE_320 { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -27524,6 +27563,7 @@ static const uchar_t u8_case_common_b2_tbl[2][2][256] = { }, }, +#endif { { 0, N_, N_, N_, N_, N_, N_, N_, @@ -27598,7 +27638,11 @@ static const uchar_t u8_case_common_b2_tbl[2][2][256] = { }; -static const u8_displacement_t u8_tolower_b3_tbl[2][5][256] = { +#ifdef U8_STRCMP_CI_LOWER +static const u8_displacement_t u8_tolower_b3_tbl[ + U8_UNICODE_LATEST + 1][5][256] = +{ +#ifdef U8_UNICODE_320 { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -27931,6 +27975,7 @@ static const u8_displacement_t u8_tolower_b3_tbl[2][5][256] = { { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, }, }, +#endif { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -28265,7 +28310,8 @@ static const u8_displacement_t u8_tolower_b3_tbl[2][5][256] = { }, }; -static const uchar_t u8_tolower_b4_tbl[2][36][257] = { +static const uchar_t u8_tolower_b4_tbl[U8_UNICODE_LATEST + 1][36][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -29528,6 +29574,7 @@ static const uchar_t u8_tolower_b4_tbl[2][36][257] = { 0, }, }, +#endif { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -30792,7 +30839,8 @@ static const uchar_t u8_tolower_b4_tbl[2][36][257] = { }, }; -static const uchar_t u8_tolower_final_tbl[2][2299] = { +static const uchar_t u8_tolower_final_tbl[U8_UNICODE_LATEST + 1][2299] = { +#ifdef U8_UNICODE_320 { 0xC3, 0xA0, 0xC3, 0xA1, 0xC3, 0xA2, 0xC3, 0xA3, 0xC3, 0xA4, 0xC3, 0xA5, 0xC3, 0xA6, 0xC3, 0xA7, @@ -31083,6 +31131,7 @@ static const uchar_t u8_tolower_final_tbl[2][2299] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, +#endif { 0xC3, 0xA0, 0xC3, 0xA1, 0xC3, 0xA2, 0xC3, 0xA3, 0xC3, 0xA4, 0xC3, 0xA5, 0xC3, 0xA6, 0xC3, 0xA7, @@ -31374,8 +31423,12 @@ static const uchar_t u8_tolower_final_tbl[2][2299] = { 0x90, 0x91, 0x8F, }, }; +#endif -static const u8_displacement_t u8_toupper_b3_tbl[2][5][256] = { +static const u8_displacement_t u8_toupper_b3_tbl[ + U8_UNICODE_LATEST + 1][5][256] = +{ +#ifdef U8_UNICODE_320 { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -31708,6 +31761,7 @@ static const u8_displacement_t u8_toupper_b3_tbl[2][5][256] = { { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, }, }, +#endif { { /* Third byte table 0. */ { N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 }, @@ -32042,7 +32096,8 @@ static const u8_displacement_t u8_toupper_b3_tbl[2][5][256] = { }, }; -static const uchar_t u8_toupper_b4_tbl[2][39][257] = { +static const uchar_t u8_toupper_b4_tbl[U8_UNICODE_LATEST + 1][39][257] = { +#ifdef U8_UNICODE_320 { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -33410,6 +33465,7 @@ static const uchar_t u8_toupper_b4_tbl[2][39][257] = { 0, }, }, +#endif { { /* Fourth byte table 0. */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -34779,7 +34835,8 @@ static const uchar_t u8_toupper_b4_tbl[2][39][257] = { }, }; -static const uchar_t u8_toupper_final_tbl[2][2318] = { +static const uchar_t u8_toupper_final_tbl[U8_UNICODE_LATEST + 1][2318] = { +#ifdef U8_UNICODE_320 { 0xCE, 0x9C, 0xC3, 0x80, 0xC3, 0x81, 0xC3, 0x82, 0xC3, 0x83, 0xC3, 0x84, 0xC3, 0x85, 0xC3, 0x86, @@ -35072,6 +35129,7 @@ static const uchar_t u8_toupper_final_tbl[2][2318] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, +#endif { 0xCE, 0x9C, 0xC3, 0x80, 0xC3, 0x81, 0xC3, 0x82, 0xC3, 0x83, 0xC3, 0x84, 0xC3, 0x85, 0xC3, 0x86, diff --git a/lib/libunicode/Makefile.am b/lib/libunicode/Makefile.am index 906759471163..e1ac666a5e60 100644 --- a/lib/libunicode/Makefile.am +++ b/lib/libunicode/Makefile.am @@ -3,5 +3,4 @@ libunicode_la_CFLAGS = $(AM_CFLAGS) $(KERNEL_CFLAGS) $(LIBRARY_CFLAGS) noinst_LTLIBRARIES += libunicode.la nodist_libunicode_la_SOURCES = \ - module/unicode/u8_textprep.c \ - module/unicode/uconv.c + module/unicode/u8_textprep.c diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index c9f6ed0dece3..da027798f962 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -18,7 +18,7 @@ .\" .\" Copyright (c) 2024, Klara, Inc. .\" -.Dd October 2, 2024 +.Dd November 1, 2024 .Dt ZFS 4 .Os . @@ -1333,9 +1333,10 @@ results in vector instructions from the respective CPU instruction set being used. . .It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int -Enable the experimental block cloning feature. +Enables access to the block cloning feature. If this setting is 0, then even if feature@block_cloning is enabled, -attempts to clone blocks will act as though the feature is disabled. +using functions and system calls that attempt to clone blocks will act as +though the feature is disabled. . .It Sy zfs_bclone_wait_dirty Ns = Ns Sy 0 Ns | Ns 1 Pq int When set to 1 the FICLONE and FICLONERANGE ioctls wait for dirty data to be diff --git a/man/man8/zdb.8 b/man/man8/zdb.8 index 08f5a3f70040..ae35454ad083 100644 --- a/man/man8/zdb.8 +++ b/man/man8/zdb.8 @@ -14,7 +14,7 @@ .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC. .\" Copyright (c) 2017 Intel Corporation. .\" -.Dd November 18, 2023 +.Dd October 27, 2024 .Dt ZDB 8 .Os . @@ -408,6 +408,8 @@ blocks cloned, the space saving as a result of cloning, and the saving ratio. .It Fl TT Display the per-vdev BRT statistics, including total references. .It Fl TTT +Display histograms of per-vdev BRT refcounts. +.It Fl TTTT Dump the contents of the block reference tables. .It Fl u , -uberblock Display the current uberblock. diff --git a/module/Kbuild.in b/module/Kbuild.in index dcbdbc912f6d..fc14d5cb535e 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -224,8 +224,7 @@ zfs-objs += $(addprefix nvpair/,$(NVPAIR_OBJS)) UNICODE_OBJS := \ - u8_textprep.o \ - uconv.o + u8_textprep.o zfs-objs += $(addprefix unicode/,$(UNICODE_OBJS)) diff --git a/module/Makefile.bsd b/module/Makefile.bsd index 61a664c5bf66..c605069d07d3 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -217,8 +217,7 @@ SRCS+= abd_os.c \ zvol_os.c #unicode -SRCS+= u8_textprep.c \ - uconv.c +SRCS+= u8_textprep.c #zcommon SRCS+= cityhash.c \ diff --git a/module/os/freebsd/zfs/zio_crypt.c b/module/os/freebsd/zfs/zio_crypt.c index 2b62abcccb78..feaca93fb933 100644 --- a/module/os/freebsd/zfs/zio_crypt.c +++ b/module/os/freebsd/zfs/zio_crypt.c @@ -1686,11 +1686,10 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, freebsd_crypt_session_t *tmpl = NULL; uint8_t *authbuf = NULL; - + memset(&puio_s, 0, sizeof (puio_s)); + memset(&cuio_s, 0, sizeof (cuio_s)); zfs_uio_init(&puio, &puio_s); zfs_uio_init(&cuio, &cuio_s); - memset(GET_UIO_STRUCT(&puio), 0, sizeof (struct uio)); - memset(GET_UIO_STRUCT(&cuio), 0, sizeof (struct uio)); #ifdef FCRYPTO_DEBUG printf("%s(%s, %p, %p, %d, %p, %p, %u, %s, %p, %p, %p)\n", diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index a6271d3a7df1..6a66a72b91a9 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -801,24 +801,13 @@ vbio_completion(struct bio *bio) bio_put(bio); /* - * If we copied the ABD before issuing it, clean up and return the copy - * to the ADB, with changes if appropriate. + * We're likely in an interrupt context so we can't do ABD/memory work + * here; instead we stash vbio on the zio and take care of it in the + * done callback. */ - if (vbio->vbio_abd != NULL) { - void *buf = abd_to_buf(vbio->vbio_abd); - abd_free(vbio->vbio_abd); - vbio->vbio_abd = NULL; - - if (zio->io_type == ZIO_TYPE_READ) - abd_return_buf_copy(zio->io_abd, buf, zio->io_size); - else - abd_return_buf(zio->io_abd, buf, zio->io_size); - } - - /* Final cleanup */ - kmem_free(vbio, sizeof (vbio_t)); + ASSERT3P(zio->io_bio, ==, NULL); + zio->io_bio = vbio; - /* All done, submit for processing */ zio_delay_interrupt(zio); } @@ -834,38 +823,61 @@ vbio_completion(struct bio *bio) * NOTE: if you change this function, change the copy in * tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c, and add test * data there to validate the change you're making. - * */ typedef struct { - uint_t bmask; - uint_t npages; - uint_t end; -} vdev_disk_check_pages_t; + size_t blocksize; + int seen_first; + int seen_last; +} vdev_disk_check_alignment_t; static int -vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv) +vdev_disk_check_alignment_cb(struct page *page, size_t off, size_t len, + void *priv) { (void) page; - vdev_disk_check_pages_t *s = priv; + vdev_disk_check_alignment_t *s = priv; /* - * If we didn't finish on a block size boundary last time, then there - * would be a gap if we tried to use this ABD as-is, so abort. + * The cardinal rule: a single on-disk block must never cross an + * physical (order-0) page boundary, as the kernel expects to be able + * to split at both LBS and page boundaries. + * + * This implies various alignment rules for the blocks in this + * (possibly compound) page, which we can check for. */ - if (s->end != 0) - return (1); /* - * Note if we're taking less than a full block, so we can check it - * above on the next call. + * If the previous page did not end on a page boundary, then we + * can't proceed without creating a hole. */ - s->end = (off+len) & s->bmask; + if (s->seen_last) + return (1); - /* All blocks after the first must start on a block size boundary. */ - if (s->npages != 0 && (off & s->bmask) != 0) + /* This page must contain only whole LBS-sized blocks. */ + if (!IS_P2ALIGNED(len, s->blocksize)) return (1); - s->npages++; + /* + * If this is not the first page in the ABD, then the data must start + * on a page-aligned boundary (so the kernel can split on page + * boundaries without having to deal with a hole). If it is, then + * it can start on LBS-alignment. + */ + if (s->seen_first) { + if (!IS_P2ALIGNED(off, PAGESIZE)) + return (1); + } else { + if (!IS_P2ALIGNED(off, s->blocksize)) + return (1); + s->seen_first = 1; + } + + /* + * If this data does not end on a page-aligned boundary, then this + * must be the last page in the ABD, for the same reason. + */ + s->seen_last = !IS_P2ALIGNED(off+len, PAGESIZE); + return (0); } @@ -874,15 +886,14 @@ vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv) * the number of pages, or 0 if it can't be submitted like this. */ static boolean_t -vdev_disk_check_pages(abd_t *abd, uint64_t size, struct block_device *bdev) +vdev_disk_check_alignment(abd_t *abd, uint64_t size, struct block_device *bdev) { - vdev_disk_check_pages_t s = { - .bmask = bdev_logical_block_size(bdev)-1, - .npages = 0, - .end = 0, + vdev_disk_check_alignment_t s = { + .blocksize = bdev_logical_block_size(bdev), }; - if (abd_iterate_page_func(abd, 0, size, vdev_disk_check_pages_cb, &s)) + if (abd_iterate_page_func(abd, 0, size, + vdev_disk_check_alignment_cb, &s)) return (B_FALSE); return (B_TRUE); @@ -916,37 +927,32 @@ vdev_disk_io_rw(zio_t *zio) /* * Check alignment of the incoming ABD. If any part of it would require - * submitting a page that is not aligned to the logical block size, - * then we take a copy into a linear buffer and submit that instead. - * This should be impossible on a 512b LBS, and fairly rare on 4K, - * usually requiring abnormally-small data blocks (eg gang blocks) - * mixed into the same ABD as larger ones (eg aggregated). + * submitting a page that is not aligned to both the logical block size + * and the page size, then we take a copy into a new memory region with + * correct alignment. This should be impossible on a 512b LBS. On + * larger blocks, this can happen at least when a small number of + * blocks (usually 1) are allocated from a shared slab, or when + * abnormally-small data regions (eg gang headers) are mixed into the + * same ABD as larger allocations (eg aggregations). */ abd_t *abd = zio->io_abd; - if (!vdev_disk_check_pages(abd, zio->io_size, bdev)) { - void *buf; - if (zio->io_type == ZIO_TYPE_READ) - buf = abd_borrow_buf(zio->io_abd, zio->io_size); - else - buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size); + if (!vdev_disk_check_alignment(abd, zio->io_size, bdev)) { + /* Allocate a new memory region with guaranteed alignment */ + abd = abd_alloc_for_io(zio->io_size, + zio->io_abd->abd_flags & ABD_FLAG_META); - /* - * Wrap the copy in an abd_t, so we can use the same iterators - * to count and fill the vbio later. - */ - abd = abd_get_from_buf(buf, zio->io_size); + /* If we're writing copy our data into it */ + if (zio->io_type == ZIO_TYPE_WRITE) + abd_copy(abd, zio->io_abd, zio->io_size); /* - * False here would mean the borrowed copy has an invalid - * alignment too, which would mean we've somehow been passed a - * linear ABD with an interior page that has a non-zero offset - * or a size not a multiple of PAGE_SIZE. This is not possible. - * It would mean either zio_buf_alloc() or its underlying - * allocators have done something extremely strange, or our - * math in vdev_disk_check_pages() is wrong. In either case, + * False here would mean the new allocation has an invalid + * alignment too, which would mean that abd_alloc() is not + * guaranteeing this, or our logic in + * vdev_disk_check_alignment() is wrong. In either case, * something in seriously wrong and its not safe to continue. */ - VERIFY(vdev_disk_check_pages(abd, zio->io_size, bdev)); + VERIFY(vdev_disk_check_alignment(abd, zio->io_size, bdev)); } /* Allocate vbio, with a pointer to the borrowed ABD if necessary */ @@ -1437,6 +1443,28 @@ vdev_disk_io_start(zio_t *zio) static void vdev_disk_io_done(zio_t *zio) { + /* If this was a read or write, we need to clean up the vbio */ + if (zio->io_bio != NULL) { + vbio_t *vbio = zio->io_bio; + zio->io_bio = NULL; + + /* + * If we copied the ABD before issuing it, clean up and return + * the copy to the ADB, with changes if appropriate. + */ + if (vbio->vbio_abd != NULL) { + if (zio->io_type == ZIO_TYPE_READ) + abd_copy(zio->io_abd, vbio->vbio_abd, + zio->io_size); + + abd_free(vbio->vbio_abd); + vbio->vbio_abd = NULL; + } + + /* Final cleanup */ + kmem_free(vbio, sizeof (vbio_t)); + } + /* * If the device returned EIO, we revalidate the media. If it is * determined the media has changed this triggers the asynchronous diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 469197220859..dd9fd760b9c2 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -260,15 +260,6 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os) } else { ClearPageError(pp); SetPageUptodate(pp); - if (!PagePrivate(pp)) { - /* - * Set private bit so page migration - * will wait for us to finish writeback - * before calling migrate_folio(). - */ - SetPagePrivate(pp); - get_page(pp); - } if (mapping_writably_mapped(mp)) flush_dcache_page(pp); @@ -4090,14 +4081,6 @@ zfs_fillpage(struct inode *ip, struct page *pp) } else { ClearPageError(pp); SetPageUptodate(pp); - if (!PagePrivate(pp)) { - /* - * Set private bit so page migration will wait for us to - * finish writeback before calling migrate_folio(). - */ - SetPagePrivate(pp); - get_page(pp); - } } return (error); diff --git a/module/os/linux/zfs/zfs_znode_os.c b/module/os/linux/zfs/zfs_znode_os.c index bc1e17f086d9..bbaca2f58394 100644 --- a/module/os/linux/zfs/zfs_znode_os.c +++ b/module/os/linux/zfs/zfs_znode_os.c @@ -1577,14 +1577,6 @@ zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len) mark_page_accessed(pp); SetPageUptodate(pp); ClearPageError(pp); - if (!PagePrivate(pp)) { - /* - * Set private bit so page migration will wait for us to - * finish writeback before calling migrate_folio(). - */ - SetPagePrivate(pp); - get_page(pp); - } unlock_page(pp); put_page(pp); } diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 50c63695dcc8..f6e014327717 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -28,6 +28,7 @@ #include #endif #include +#include #include #include #include @@ -607,42 +608,6 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc) return (zpl_putpage(pp, wbc, &for_sync)); } -static int -zpl_releasepage(struct page *pp, gfp_t gfp) -{ - if (PagePrivate(pp)) { - ClearPagePrivate(pp); - put_page(pp); - } - return (1); -} - -#ifdef HAVE_VFS_RELEASE_FOLIO -static bool -zpl_release_folio(struct folio *folio, gfp_t gfp) -{ - return (zpl_releasepage(&folio->page, gfp)); -} -#endif - -#ifdef HAVE_VFS_INVALIDATE_FOLIO -static void -zpl_invalidate_folio(struct folio *folio, size_t offset, size_t len) -{ - if ((offset == 0) && (len == PAGE_SIZE)) { - zpl_releasepage(&folio->page, 0); - } -} -#else -static void -zpl_invalidatepage(struct page *pp, unsigned int offset, unsigned int len) -{ - if ((offset == 0) && (len == PAGE_SIZE)) { - zpl_releasepage(pp, 0); - } -} -#endif - /* * The flag combination which matches the behavior of zfs_space() is * FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE @@ -1126,15 +1091,10 @@ const struct address_space_operations zpl_address_space_operations = { #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO .dirty_folio = filemap_dirty_folio, #endif -#ifdef HAVE_VFS_RELEASE_FOLIO - .release_folio = zpl_release_folio, -#else - .releasepage = zpl_releasepage, -#endif -#ifdef HAVE_VFS_INVALIDATE_FOLIO - .invalidate_folio = zpl_invalidate_folio, +#ifdef HAVE_VFS_MIGRATE_FOLIO + .migrate_folio = migrate_folio, #else - .invalidatepage = zpl_invalidatepage, + .migratepage = migrate_page, #endif }; diff --git a/module/unicode/u8_textprep.c b/module/unicode/u8_textprep.c index 49e22c88cde7..281396678ec2 100644 --- a/module/unicode/u8_textprep.c +++ b/module/unicode/u8_textprep.c @@ -527,6 +527,7 @@ do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper) for (i = 0; start_id < end_id; start_id++) u8s[i++] = u8_toupper_final_tbl[uv][b3_base + start_id]; } else { +#ifdef U8_STRCMP_CI_LOWER b3_tbl = u8_tolower_b3_tbl[uv][b2][b3].tbl_id; if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF) return ((size_t)sz); @@ -541,6 +542,9 @@ do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper) for (i = 0; start_id < end_id; start_id++) u8s[i++] = u8_tolower_final_tbl[uv][b3_base + start_id]; +#else + __builtin_unreachable(); +#endif } /* @@ -1753,7 +1757,11 @@ do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2, s2last = s2 + n2; is_it_toupper = flag & U8_TEXTPREP_TOUPPER; +#ifdef U8_STRCMP_CI_LOWER is_it_tolower = flag & U8_TEXTPREP_TOLOWER; +#else + is_it_tolower = 0; +#endif canonical_decomposition = flag & U8_CANON_DECOMP; compatibility_decomposition = flag & U8_COMPAT_DECOMP; canonical_composition = flag & U8_CANON_COMP; @@ -1870,12 +1878,22 @@ u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv, if (flag == 0) { flag = U8_STRCMP_CS; } else { - f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER | - U8_STRCMP_CI_LOWER); +#ifdef U8_STRCMP_CI_LOWER + f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER + | U8_STRCMP_CI_LOWER); +#else + f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER); +#endif if (f == 0) { flag |= U8_STRCMP_CS; - } else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER && - f != U8_STRCMP_CI_LOWER) { + } +#ifdef U8_STRCMP_CI_LOWER + else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER && + f != U8_STRCMP_CI_LOWER) +#else + else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER) +#endif + { *errnum = EBADF; flag = U8_STRCMP_CS; } @@ -1908,10 +1926,13 @@ u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv, if (flag == U8_STRCMP_CI_UPPER) { return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2, B_TRUE, errnum)); - } else if (flag == U8_STRCMP_CI_LOWER) { + } +#ifdef U8_STRCMP_CI_LOWER + else if (flag == U8_STRCMP_CI_LOWER) { return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2, B_FALSE, errnum)); } +#endif return (do_norm_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2, flag, errnum)); @@ -1945,11 +1966,13 @@ u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen, return ((size_t)-1); } +#ifdef U8_TEXTPREP_TOLOWER f = flag & (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER); if (f == (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER)) { *errnum = EBADF; return ((size_t)-1); } +#endif f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP); if (f && f != U8_TEXTPREP_NFD && f != U8_TEXTPREP_NFC && @@ -1974,7 +1997,11 @@ u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen, do_not_ignore_null = !(flag & U8_TEXTPREP_IGNORE_NULL); do_not_ignore_invalid = !(flag & U8_TEXTPREP_IGNORE_INVALID); is_it_toupper = flag & U8_TEXTPREP_TOUPPER; +#ifdef U8_TEXTPREP_TOLOWER is_it_tolower = flag & U8_TEXTPREP_TOLOWER; +#else + is_it_tolower = 0; +#endif ret_val = 0; diff --git a/module/unicode/uconv.c b/module/unicode/uconv.c deleted file mode 100644 index 4bd19ebdd242..000000000000 --- a/module/unicode/uconv.c +++ /dev/null @@ -1,859 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or https://opensource.org/licenses/CDDL-1.0. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - - - -/* - * Unicode encoding conversion functions among UTF-8, UTF-16, and UTF-32. - * (PSARC/2005/446, PSARC/2007/038, PSARC/2007/517) - * Man pages: uconv_u16tou32(9F), uconv_u16tou8(9F), uconv_u32tou16(9F), - * uconv_u32tou8(9F), uconv_u8tou16(9F), and uconv_u8tou32(9F). See also - * the section 3C man pages. - * Interface stability: Committed - */ - -#include -#ifdef _KERNEL -#include -#include -#include -#include -#include -#else -#include -#endif /* _KERNEL */ -#include -#include - - -/* - * The max and min values of high and low surrogate pairs of UTF-16, - * UTF-16 bit shift value, bit mask, and starting value outside of BMP. - */ -#define UCONV_U16_HI_MIN (0xd800U) -#define UCONV_U16_HI_MAX (0xdbffU) -#define UCONV_U16_LO_MIN (0xdc00U) -#define UCONV_U16_LO_MAX (0xdfffU) -#define UCONV_U16_BIT_SHIFT (0x0400U) -#define UCONV_U16_BIT_MASK (0x0fffffU) -#define UCONV_U16_START (0x010000U) - -/* The maximum value of Unicode coding space and ASCII coding space. */ -#define UCONV_UNICODE_MAX (0x10ffffU) -#define UCONV_ASCII_MAX (0x7fU) - -/* The mask values for input and output endians. */ -#define UCONV_IN_ENDIAN_MASKS (UCONV_IN_BIG_ENDIAN | UCONV_IN_LITTLE_ENDIAN) -#define UCONV_OUT_ENDIAN_MASKS (UCONV_OUT_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN) - -/* Native and reversed endian macros. */ -#ifdef _ZFS_BIG_ENDIAN -#define UCONV_IN_NAT_ENDIAN UCONV_IN_BIG_ENDIAN -#define UCONV_IN_REV_ENDIAN UCONV_IN_LITTLE_ENDIAN -#define UCONV_OUT_NAT_ENDIAN UCONV_OUT_BIG_ENDIAN -#define UCONV_OUT_REV_ENDIAN UCONV_OUT_LITTLE_ENDIAN -#else -#define UCONV_IN_NAT_ENDIAN UCONV_IN_LITTLE_ENDIAN -#define UCONV_IN_REV_ENDIAN UCONV_IN_BIG_ENDIAN -#define UCONV_OUT_NAT_ENDIAN UCONV_OUT_LITTLE_ENDIAN -#define UCONV_OUT_REV_ENDIAN UCONV_OUT_BIG_ENDIAN -#endif /* _BIG_ENDIAN */ - -/* The Byte Order Mark (BOM) character in normal and reversed byte orderings. */ -#define UCONV_BOM_NORMAL (0xfeffU) -#define UCONV_BOM_SWAPPED (0xfffeU) -#define UCONV_BOM_SWAPPED_32 (0xfffe0000U) - -/* UTF-32 boundaries based on UTF-8 character byte lengths. */ -#define UCONV_U8_ONE_BYTE (0x7fU) -#define UCONV_U8_TWO_BYTES (0x7ffU) -#define UCONV_U8_THREE_BYTES (0xffffU) -#define UCONV_U8_FOUR_BYTES (0x10ffffU) - -/* The common minimum and maximum values at the UTF-8 character bytes. */ -#define UCONV_U8_BYTE_MIN (0x80U) -#define UCONV_U8_BYTE_MAX (0xbfU) - -/* - * The following "6" and "0x3f" came from "10xx xxxx" bit representation of - * UTF-8 character bytes. - */ -#define UCONV_U8_BIT_SHIFT 6 -#define UCONV_U8_BIT_MASK 0x3f - -/* - * The following vector shows remaining bytes in a UTF-8 character. - * Index will be the first byte of the character. - */ -static const uchar_t remaining_bytes_tbl[0x100] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -/* C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF */ - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - -/* D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - -/* E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - -/* F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF */ - 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* - * The following is a vector of bit-masks to get used bits in - * the first byte of a UTF-8 character. Index is remaining bytes at above of - * the character. - */ -static const uchar_t u8_masks_tbl[6] = { 0x00, 0x1f, 0x0f, 0x07, 0x03, 0x01 }; - -/* - * The following two vectors are to provide valid minimum and - * maximum values for the 2'nd byte of a multibyte UTF-8 character for - * better illegal sequence checking. The index value must be the value of - * the first byte of the UTF-8 character. - */ -static const uchar_t valid_min_2nd_byte[0x100] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - -/* C0 C1 C2 C3 C4 C5 C6 C7 */ - 0, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* C8 C9 CA CB CC CD CE CF */ - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* D0 D1 D2 D3 D4 D5 D6 D7 */ - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* D8 D9 DA DB DC DD DE DF */ - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* E0 E1 E2 E3 E4 E5 E6 E7 */ - 0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* E8 E9 EA EB EC ED EE EF */ - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - -/* F0 F1 F2 F3 F4 F5 F6 F7 */ - 0x90, 0x80, 0x80, 0x80, 0x80, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static const uchar_t valid_max_2nd_byte[0x100] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - -/* C0 C1 C2 C3 C4 C5 C6 C7 */ - 0, 0, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, - -/* C8 C9 CA CB CC CD CE CF */ - 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, - -/* D0 D1 D2 D3 D4 D5 D6 D7 */ - 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, - -/* D8 D9 DA DB DC DD DE DF */ - 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, - -/* E0 E1 E2 E3 E4 E5 E6 E7 */ - 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, - -/* E8 E9 EA EB EC ED EE EF */ - 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf, - -/* F0 F1 F2 F3 F4 F5 F6 F7 */ - 0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0 -}; - - -static int -check_endian(int flag, int *in, int *out) -{ - *in = flag & UCONV_IN_ENDIAN_MASKS; - - /* You cannot have both. */ - if (*in == UCONV_IN_ENDIAN_MASKS) - return (EBADF); - - if (*in == 0) - *in = UCONV_IN_NAT_ENDIAN; - - *out = flag & UCONV_OUT_ENDIAN_MASKS; - - /* You cannot have both. */ - if (*out == UCONV_OUT_ENDIAN_MASKS) - return (EBADF); - - if (*out == 0) - *out = UCONV_OUT_NAT_ENDIAN; - - return (0); -} - -static boolean_t -check_bom16(const uint16_t *u16s, size_t u16l, int *in) -{ - if (u16l > 0) { - if (*u16s == UCONV_BOM_NORMAL) { - *in = UCONV_IN_NAT_ENDIAN; - return (B_TRUE); - } - if (*u16s == UCONV_BOM_SWAPPED) { - *in = UCONV_IN_REV_ENDIAN; - return (B_TRUE); - } - } - - return (B_FALSE); -} - -static boolean_t -check_bom32(const uint32_t *u32s, size_t u32l, int *in) -{ - if (u32l > 0) { - if (*u32s == UCONV_BOM_NORMAL) { - *in = UCONV_IN_NAT_ENDIAN; - return (B_TRUE); - } - if (*u32s == UCONV_BOM_SWAPPED_32) { - *in = UCONV_IN_REV_ENDIAN; - return (B_TRUE); - } - } - - return (B_FALSE); -} - -int -uconv_u16tou32(const uint16_t *u16s, size_t *utf16len, - uint32_t *u32s, size_t *utf32len, int flag) -{ - int inendian; - int outendian; - size_t u16l; - size_t u32l; - uint32_t hi; - uint32_t lo; - boolean_t do_not_ignore_null; - - /* - * Do preliminary validity checks on parameters and collect info on - * endians. - */ - if (u16s == NULL || utf16len == NULL) - return (EILSEQ); - - if (u32s == NULL || utf32len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - /* - * Initialize input and output parameter buffer indices and - * temporary variables. - */ - u16l = u32l = 0; - hi = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - /* - * Check on the BOM at the beginning of the input buffer if required - * and if there is indeed one, process it. - */ - if ((flag & UCONV_IN_ACCEPT_BOM) && - check_bom16(u16s, *utf16len, &inendian)) - u16l++; - - /* - * Reset inendian and outendian so that after this point, those can be - * used as condition values. - */ - inendian &= UCONV_IN_NAT_ENDIAN; - outendian &= UCONV_OUT_NAT_ENDIAN; - - /* - * If there is something in the input buffer and if necessary and - * requested, save the BOM at the output buffer. - */ - if (*utf16len > 0 && *utf32len > 0 && (flag & UCONV_OUT_EMIT_BOM)) - u32s[u32l++] = (outendian) ? UCONV_BOM_NORMAL : - UCONV_BOM_SWAPPED_32; - - /* - * Do conversion; if encounter a surrogate pair, assemble high and - * low pair values to form a UTF-32 character. If a half of a pair - * exists alone, then, either it is an illegal (EILSEQ) or - * invalid (EINVAL) value. - */ - for (; u16l < *utf16len; u16l++) { - if (u16s[u16l] == 0 && do_not_ignore_null) - break; - - lo = (uint32_t)((inendian) ? u16s[u16l] : BSWAP_16(u16s[u16l])); - - if (lo >= UCONV_U16_HI_MIN && lo <= UCONV_U16_HI_MAX) { - if (hi) - return (EILSEQ); - hi = lo; - continue; - } else if (lo >= UCONV_U16_LO_MIN && lo <= UCONV_U16_LO_MAX) { - if (! hi) - return (EILSEQ); - lo = (((hi - UCONV_U16_HI_MIN) * UCONV_U16_BIT_SHIFT + - lo - UCONV_U16_LO_MIN) & UCONV_U16_BIT_MASK) - + UCONV_U16_START; - hi = 0; - } else if (hi) { - return (EILSEQ); - } - - if (u32l >= *utf32len) - return (E2BIG); - - u32s[u32l++] = (outendian) ? lo : BSWAP_32(lo); - } - - /* - * If high half didn't see low half, then, it's most likely the input - * parameter is incomplete. - */ - if (hi) - return (EINVAL); - - /* - * Save the number of consumed and saved characters. They do not - * include terminating NULL character (U+0000) at the end of - * the input buffer (even when UCONV_IGNORE_NULL isn't specified and - * the input buffer length is big enough to include the terminating - * NULL character). - */ - *utf16len = u16l; - *utf32len = u32l; - - return (0); -} - -int -uconv_u16tou8(const uint16_t *u16s, size_t *utf16len, - uchar_t *u8s, size_t *utf8len, int flag) -{ - int inendian; - int outendian; - size_t u16l; - size_t u8l; - uint32_t hi; - uint32_t lo; - boolean_t do_not_ignore_null; - - if (u16s == NULL || utf16len == NULL) - return (EILSEQ); - - if (u8s == NULL || utf8len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - u16l = u8l = 0; - hi = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - if ((flag & UCONV_IN_ACCEPT_BOM) && - check_bom16(u16s, *utf16len, &inendian)) - u16l++; - - inendian &= UCONV_IN_NAT_ENDIAN; - - for (; u16l < *utf16len; u16l++) { - if (u16s[u16l] == 0 && do_not_ignore_null) - break; - - lo = (uint32_t)((inendian) ? u16s[u16l] : BSWAP_16(u16s[u16l])); - - if (lo >= UCONV_U16_HI_MIN && lo <= UCONV_U16_HI_MAX) { - if (hi) - return (EILSEQ); - hi = lo; - continue; - } else if (lo >= UCONV_U16_LO_MIN && lo <= UCONV_U16_LO_MAX) { - if (! hi) - return (EILSEQ); - lo = (((hi - UCONV_U16_HI_MIN) * UCONV_U16_BIT_SHIFT + - lo - UCONV_U16_LO_MIN) & UCONV_U16_BIT_MASK) - + UCONV_U16_START; - hi = 0; - } else if (hi) { - return (EILSEQ); - } - - /* - * Now we convert a UTF-32 character into a UTF-8 character. - * Unicode coding space is between U+0000 and U+10FFFF; - * anything bigger is an illegal character. - */ - if (lo <= UCONV_U8_ONE_BYTE) { - if (u8l >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)lo; - } else if (lo <= UCONV_U8_TWO_BYTES) { - if ((u8l + 1) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xc0 | ((lo & 0x07c0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x003f)); - } else if (lo <= UCONV_U8_THREE_BYTES) { - if ((u8l + 2) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xe0 | ((lo & 0x0f000) >> 12)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x00fc0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x0003f)); - } else if (lo <= UCONV_U8_FOUR_BYTES) { - if ((u8l + 3) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xf0 | ((lo & 0x01c0000) >> 18)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x003f000) >> 12)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x0000fc0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x000003f)); - } else { - return (EILSEQ); - } - } - - if (hi) - return (EINVAL); - - *utf16len = u16l; - *utf8len = u8l; - - return (0); -} - -int -uconv_u32tou16(const uint32_t *u32s, size_t *utf32len, - uint16_t *u16s, size_t *utf16len, int flag) -{ - int inendian; - int outendian; - size_t u16l; - size_t u32l; - uint32_t hi; - uint32_t lo; - boolean_t do_not_ignore_null; - - if (u32s == NULL || utf32len == NULL) - return (EILSEQ); - - if (u16s == NULL || utf16len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - u16l = u32l = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - if ((flag & UCONV_IN_ACCEPT_BOM) && - check_bom32(u32s, *utf32len, &inendian)) - u32l++; - - inendian &= UCONV_IN_NAT_ENDIAN; - outendian &= UCONV_OUT_NAT_ENDIAN; - - if (*utf32len > 0 && *utf16len > 0 && (flag & UCONV_OUT_EMIT_BOM)) - u16s[u16l++] = (outendian) ? UCONV_BOM_NORMAL : - UCONV_BOM_SWAPPED; - - for (; u32l < *utf32len; u32l++) { - if (u32s[u32l] == 0 && do_not_ignore_null) - break; - - hi = (inendian) ? u32s[u32l] : BSWAP_32(u32s[u32l]); - - /* - * Anything bigger than the Unicode coding space, i.e., - * Unicode scalar value bigger than U+10FFFF, is an illegal - * character. - */ - if (hi > UCONV_UNICODE_MAX) - return (EILSEQ); - - /* - * Anything bigger than U+FFFF must be converted into - * a surrogate pair in UTF-16. - */ - if (hi >= UCONV_U16_START) { - lo = ((hi - UCONV_U16_START) % UCONV_U16_BIT_SHIFT) + - UCONV_U16_LO_MIN; - hi = ((hi - UCONV_U16_START) / UCONV_U16_BIT_SHIFT) + - UCONV_U16_HI_MIN; - - if ((u16l + 1) >= *utf16len) - return (E2BIG); - - if (outendian) { - u16s[u16l++] = (uint16_t)hi; - u16s[u16l++] = (uint16_t)lo; - } else { - u16s[u16l++] = BSWAP_16(((uint16_t)hi)); - u16s[u16l++] = BSWAP_16(((uint16_t)lo)); - } - } else { - if (u16l >= *utf16len) - return (E2BIG); - u16s[u16l++] = (outendian) ? (uint16_t)hi : - BSWAP_16(((uint16_t)hi)); - } - } - - *utf16len = u16l; - *utf32len = u32l; - - return (0); -} - -int -uconv_u32tou8(const uint32_t *u32s, size_t *utf32len, - uchar_t *u8s, size_t *utf8len, int flag) -{ - int inendian; - int outendian; - size_t u32l; - size_t u8l; - uint32_t lo; - boolean_t do_not_ignore_null; - - if (u32s == NULL || utf32len == NULL) - return (EILSEQ); - - if (u8s == NULL || utf8len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - u32l = u8l = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - if ((flag & UCONV_IN_ACCEPT_BOM) && - check_bom32(u32s, *utf32len, &inendian)) - u32l++; - - inendian &= UCONV_IN_NAT_ENDIAN; - - for (; u32l < *utf32len; u32l++) { - if (u32s[u32l] == 0 && do_not_ignore_null) - break; - - lo = (inendian) ? u32s[u32l] : BSWAP_32(u32s[u32l]); - - if (lo <= UCONV_U8_ONE_BYTE) { - if (u8l >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)lo; - } else if (lo <= UCONV_U8_TWO_BYTES) { - if ((u8l + 1) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xc0 | ((lo & 0x07c0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x003f)); - } else if (lo <= UCONV_U8_THREE_BYTES) { - if ((u8l + 2) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xe0 | ((lo & 0x0f000) >> 12)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x00fc0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x0003f)); - } else if (lo <= UCONV_U8_FOUR_BYTES) { - if ((u8l + 3) >= *utf8len) - return (E2BIG); - u8s[u8l++] = (uchar_t)(0xf0 | ((lo & 0x01c0000) >> 18)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x003f000) >> 12)); - u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x0000fc0) >> 6)); - u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x000003f)); - } else { - return (EILSEQ); - } - } - - *utf32len = u32l; - *utf8len = u8l; - - return (0); -} - -int -uconv_u8tou16(const uchar_t *u8s, size_t *utf8len, - uint16_t *u16s, size_t *utf16len, int flag) -{ - int inendian; - int outendian; - size_t u16l; - size_t u8l; - uint32_t hi; - uint32_t lo; - int remaining_bytes; - int first_b; - boolean_t do_not_ignore_null; - - if (u8s == NULL || utf8len == NULL) - return (EILSEQ); - - if (u16s == NULL || utf16len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - u16l = u8l = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - outendian &= UCONV_OUT_NAT_ENDIAN; - - if (*utf8len > 0 && *utf16len > 0 && (flag & UCONV_OUT_EMIT_BOM)) - u16s[u16l++] = (outendian) ? UCONV_BOM_NORMAL : - UCONV_BOM_SWAPPED; - - for (; u8l < *utf8len; ) { - if (u8s[u8l] == 0 && do_not_ignore_null) - break; - - /* - * Collect a UTF-8 character and convert it to a UTF-32 - * character. In doing so, we screen out illegally formed - * UTF-8 characters and treat such as illegal characters. - * The algorithm at below also screens out anything bigger - * than the U+10FFFF. - * - * See Unicode 3.1 UTF-8 Corrigendum and Unicode 3.2 for - * more details on the illegal values of UTF-8 character - * bytes. - */ - hi = (uint32_t)u8s[u8l++]; - - if (hi > UCONV_ASCII_MAX) { - if ((remaining_bytes = remaining_bytes_tbl[hi]) == 0) - return (EILSEQ); - - first_b = hi; - hi = hi & u8_masks_tbl[remaining_bytes]; - - for (; remaining_bytes > 0; remaining_bytes--) { - /* - * If we have no more bytes, the current - * UTF-8 character is incomplete. - */ - if (u8l >= *utf8len) - return (EINVAL); - - lo = (uint32_t)u8s[u8l++]; - - if (first_b) { - if (lo < valid_min_2nd_byte[first_b] || - lo > valid_max_2nd_byte[first_b]) - return (EILSEQ); - first_b = 0; - } else if (lo < UCONV_U8_BYTE_MIN || - lo > UCONV_U8_BYTE_MAX) { - return (EILSEQ); - } - hi = (hi << UCONV_U8_BIT_SHIFT) | - (lo & UCONV_U8_BIT_MASK); - } - } - - if (hi >= UCONV_U16_START) { - lo = ((hi - UCONV_U16_START) % UCONV_U16_BIT_SHIFT) + - UCONV_U16_LO_MIN; - hi = ((hi - UCONV_U16_START) / UCONV_U16_BIT_SHIFT) + - UCONV_U16_HI_MIN; - - if ((u16l + 1) >= *utf16len) - return (E2BIG); - - if (outendian) { - u16s[u16l++] = (uint16_t)hi; - u16s[u16l++] = (uint16_t)lo; - } else { - u16s[u16l++] = BSWAP_16(((uint16_t)hi)); - u16s[u16l++] = BSWAP_16(((uint16_t)lo)); - } - } else { - if (u16l >= *utf16len) - return (E2BIG); - - u16s[u16l++] = (outendian) ? (uint16_t)hi : - BSWAP_16(((uint16_t)hi)); - } - } - - *utf16len = u16l; - *utf8len = u8l; - - return (0); -} - -int -uconv_u8tou32(const uchar_t *u8s, size_t *utf8len, - uint32_t *u32s, size_t *utf32len, int flag) -{ - int inendian; - int outendian; - size_t u32l; - size_t u8l; - uint32_t hi; - uint32_t c; - int remaining_bytes; - int first_b; - boolean_t do_not_ignore_null; - - if (u8s == NULL || utf8len == NULL) - return (EILSEQ); - - if (u32s == NULL || utf32len == NULL) - return (E2BIG); - - if (check_endian(flag, &inendian, &outendian) != 0) - return (EBADF); - - u32l = u8l = 0; - do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0); - - outendian &= UCONV_OUT_NAT_ENDIAN; - - if (*utf8len > 0 && *utf32len > 0 && (flag & UCONV_OUT_EMIT_BOM)) - u32s[u32l++] = (outendian) ? UCONV_BOM_NORMAL : - UCONV_BOM_SWAPPED_32; - - for (; u8l < *utf8len; ) { - if (u8s[u8l] == 0 && do_not_ignore_null) - break; - - hi = (uint32_t)u8s[u8l++]; - - if (hi > UCONV_ASCII_MAX) { - if ((remaining_bytes = remaining_bytes_tbl[hi]) == 0) - return (EILSEQ); - - first_b = hi; - hi = hi & u8_masks_tbl[remaining_bytes]; - - for (; remaining_bytes > 0; remaining_bytes--) { - if (u8l >= *utf8len) - return (EINVAL); - - c = (uint32_t)u8s[u8l++]; - - if (first_b) { - if (c < valid_min_2nd_byte[first_b] || - c > valid_max_2nd_byte[first_b]) - return (EILSEQ); - first_b = 0; - } else if (c < UCONV_U8_BYTE_MIN || - c > UCONV_U8_BYTE_MAX) { - return (EILSEQ); - } - hi = (hi << UCONV_U8_BIT_SHIFT) | - (c & UCONV_U8_BIT_MASK); - } - } - - if (u32l >= *utf32len) - return (E2BIG); - - u32s[u32l++] = (outendian) ? hi : BSWAP_32(hi); - } - - *utf32len = u32l; - *utf8len = u8l; - - return (0); -} - -#if defined(_KERNEL) -EXPORT_SYMBOL(uconv_u16tou32); -EXPORT_SYMBOL(uconv_u16tou8); -EXPORT_SYMBOL(uconv_u32tou16); -EXPORT_SYMBOL(uconv_u32tou8); -EXPORT_SYMBOL(uconv_u8tou16); -EXPORT_SYMBOL(uconv_u8tou32); -#endif diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index df9368fc8bdb..b1419d96f4ef 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -182,6 +182,7 @@ static void dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr); * Global data structures and functions for the dbuf cache. */ static kmem_cache_t *dbuf_kmem_cache; +kmem_cache_t *dbuf_dirty_kmem_cache; static taskq_t *dbu_evict_taskq; static kthread_t *dbuf_cache_evict_thread; @@ -966,6 +967,8 @@ dbuf_init(void) dbuf_kmem_cache = kmem_cache_create("dmu_buf_impl_t", sizeof (dmu_buf_impl_t), 0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); + dbuf_dirty_kmem_cache = kmem_cache_create("dbuf_dirty_record_t", + sizeof (dbuf_dirty_record_t), 0, NULL, NULL, NULL, NULL, NULL, 0); for (int i = 0; i < hmsize; i++) mutex_init(&h->hash_mutexes[i], NULL, MUTEX_NOLOCKDEP, NULL); @@ -1041,6 +1044,7 @@ dbuf_fini(void) sizeof (kmutex_t)); kmem_cache_destroy(dbuf_kmem_cache); + kmem_cache_destroy(dbuf_dirty_kmem_cache); taskq_destroy(dbu_evict_taskq); mutex_enter(&dbuf_evict_lock); @@ -2343,7 +2347,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * to make a copy of it so that the changes we make in this * transaction group won't leak out when we sync the older txg. */ - dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP); + dr = kmem_cache_alloc(dbuf_dirty_kmem_cache, KM_SLEEP); + memset(dr, 0, sizeof (*dr)); list_link_init(&dr->dr_dirty_node); list_link_init(&dr->dr_dbuf_node); dr->dr_dnode = dn; @@ -2526,7 +2531,7 @@ dbuf_undirty_bonus(dbuf_dirty_record_t *dr) mutex_destroy(&dr->dt.di.dr_mtx); list_destroy(&dr->dt.di.dr_children); } - kmem_free(dr, sizeof (dbuf_dirty_record_t)); + kmem_cache_free(dbuf_dirty_kmem_cache, dr); ASSERT3U(db->db_dirtycnt, >, 0); db->db_dirtycnt -= 1; } @@ -2616,7 +2621,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) } } - kmem_free(dr, sizeof (dbuf_dirty_record_t)); + kmem_cache_free(dbuf_dirty_kmem_cache, dr); ASSERT(db->db_dirtycnt > 0); db->db_dirtycnt -= 1; @@ -2941,7 +2946,7 @@ dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder, * (see dbuf_sync_dnode_leaf_crypt()). */ ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT); - ASSERT3U(db->db_level, ==, 0); + ASSERT0(db->db_level); ASSERT(db->db_objset->os_raw_receive); dmu_buf_will_dirty_impl(db_fake, @@ -2950,6 +2955,7 @@ dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder, dr = dbuf_find_dirty_eq(db, tx->tx_txg); ASSERT3P(dr, !=, NULL); + ASSERT3U(dr->dt.dl.dr_override_state, ==, DR_NOT_OVERRIDDEN); dr->dt.dl.dr_has_raw_params = B_TRUE; dr->dt.dl.dr_byteorder = byteorder; @@ -2964,10 +2970,14 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx) struct dirty_leaf *dl; dbuf_dirty_record_t *dr; + ASSERT3U(db->db.db_object, !=, DMU_META_DNODE_OBJECT); + ASSERT0(db->db_level); + dr = list_head(&db->db_dirty_records); ASSERT3P(dr, !=, NULL); ASSERT3U(dr->dr_txg, ==, tx->tx_txg); dl = &dr->dt.dl; + ASSERT0(dl->dr_has_raw_params); dl->dr_overridden_by = *bp; dl->dr_override_state = DR_OVERRIDDEN; BP_SET_LOGICAL_BIRTH(&dl->dr_overridden_by, dr->dr_txg); @@ -3040,6 +3050,7 @@ dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data, ASSERT3P(dr, !=, NULL); ASSERT3U(dr->dr_txg, ==, tx->tx_txg); dl = &dr->dt.dl; + ASSERT0(dl->dr_has_raw_params); encode_embedded_bp_compressed(&dl->dr_overridden_by, data, comp, uncompressed_size, compressed_size); BPE_SET_ETYPE(&dl->dr_overridden_by, etype); @@ -5083,7 +5094,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted, zio->io_txg); - kmem_free(dr, sizeof (dbuf_dirty_record_t)); + kmem_cache_free(dbuf_dirty_kmem_cache, dr); } static void diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 3f87cfe6bee9..362415a25895 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1895,6 +1895,7 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) mutex_enter(&db->db_mtx); ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC); if (zio->io_error == 0) { + ASSERT0(dr->dt.dl.dr_has_raw_params); dr->dt.dl.dr_nopwrite = !!(zio->io_flags & ZIO_FLAG_NOPWRITE); if (dr->dt.dl.dr_nopwrite) { blkptr_t *bp = zio->io_bp; @@ -2190,6 +2191,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) return (SET_ERROR(EALREADY)); } + ASSERT0(dr->dt.dl.dr_has_raw_params); ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC; mutex_exit(&db->db_mtx); @@ -2657,6 +2659,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, db = (dmu_buf_impl_t *)dbuf; bp = &bps[i]; + ASSERT3U(db->db.db_object, !=, DMU_META_DNODE_OBJECT); ASSERT0(db->db_level); ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT(db->db_blkid != DMU_SPILL_BLKID); @@ -2672,11 +2675,6 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, db = (dmu_buf_impl_t *)dbuf; bp = &bps[i]; - ASSERT0(db->db_level); - ASSERT(db->db_blkid != DMU_BONUS_BLKID); - ASSERT(db->db_blkid != DMU_SPILL_BLKID); - ASSERT(BP_IS_HOLE(bp) || dbuf->db_size == BP_GET_LSIZE(bp)); - dmu_buf_will_clone_or_dio(dbuf, tx); mutex_enter(&db->db_mtx); @@ -2685,6 +2683,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, VERIFY(dr != NULL); ASSERT3U(dr->dr_txg, ==, tx->tx_txg); dl = &dr->dt.dl; + ASSERT0(dl->dr_has_raw_params); dl->dr_overridden_by = *bp; if (!BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) != 0) { if (!BP_IS_EMBEDDED(bp)) { diff --git a/module/zfs/dmu_direct.c b/module/zfs/dmu_direct.c index ed96e7515bc7..40b78b519f49 100644 --- a/module/zfs/dmu_direct.c +++ b/module/zfs/dmu_direct.c @@ -180,6 +180,7 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx) if (list_next(&db->db_dirty_records, dr_head) != NULL) zp.zp_nopwrite = B_FALSE; + ASSERT0(dr_head->dt.dl.dr_has_raw_params); ASSERT3S(dr_head->dt.dl.dr_override_state, ==, DR_NOT_OVERRIDDEN); dr_head->dt.dl.dr_override_state = DR_IN_DMU_SYNC; diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index a174972e9b57..aeb8ff3b6688 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -180,6 +180,8 @@ struct send_range { */ dnode_phys_t *dnp; blkptr_t bp; + /* Piggyback unmodified spill block */ + struct send_range *spill_range; } object; struct srr { uint32_t datablksz; @@ -231,6 +233,8 @@ range_free(struct send_range *range) size_t size = sizeof (dnode_phys_t) * (range->sru.object.dnp->dn_extra_slots + 1); kmem_free(range->sru.object.dnp, size); + if (range->sru.object.spill_range) + range_free(range->sru.object.spill_range); } else if (range->type == DATA) { mutex_enter(&range->sru.data.lock); while (range->sru.data.io_outstanding) @@ -617,7 +621,7 @@ dump_spill(dmu_send_cookie_t *dscp, const blkptr_t *bp, uint64_t object, drrs->drr_length = blksz; drrs->drr_toguid = dscp->dsc_toguid; - /* See comment in dump_dnode() for full details */ + /* See comment in piggyback_unmodified_spill() for full details */ if (zfs_send_unmodified_spill_blocks && (BP_GET_LOGICAL_BIRTH(bp) <= dscp->dsc_fromtxg)) { drrs->drr_flags |= DRR_SPILL_UNMODIFIED; @@ -793,35 +797,6 @@ dump_dnode(dmu_send_cookie_t *dscp, const blkptr_t *bp, uint64_t object, (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0) return (SET_ERROR(EINTR)); - /* - * Send DRR_SPILL records for unmodified spill blocks. This is useful - * because changing certain attributes of the object (e.g. blocksize) - * can cause old versions of ZFS to incorrectly remove a spill block. - * Including these records in the stream forces an up to date version - * to always be written ensuring they're never lost. Current versions - * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can - * ignore these unmodified spill blocks. - */ - if (zfs_send_unmodified_spill_blocks && - (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && - (BP_GET_LOGICAL_BIRTH(DN_SPILL_BLKPTR(dnp)) <= dscp->dsc_fromtxg)) { - struct send_range record; - blkptr_t *bp = DN_SPILL_BLKPTR(dnp); - - memset(&record, 0, sizeof (struct send_range)); - record.type = DATA; - record.object = object; - record.eos_marker = B_FALSE; - record.start_blkid = DMU_SPILL_BLKID; - record.end_blkid = record.start_blkid + 1; - record.sru.data.bp = *bp; - record.sru.data.obj_type = dnp->dn_type; - record.sru.data.datablksz = BP_GET_LSIZE(bp); - - if (do_dump(dscp, &record) != 0) - return (SET_ERROR(EINTR)); - } - if (dscp->dsc_err != 0) return (SET_ERROR(EINTR)); @@ -911,6 +886,9 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) case OBJECT: err = dump_dnode(dscp, &range->sru.object.bp, range->object, range->sru.object.dnp); + /* Dump piggybacked unmodified spill block */ + if (!err && range->sru.object.spill_range) + err = do_dump(dscp, range->sru.object.spill_range); return (err); case OBJECT_RANGE: { ASSERT3U(range->start_blkid + 1, ==, range->end_blkid); @@ -939,34 +917,7 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) ASSERT3U(srdp->datablksz, ==, BP_GET_LSIZE(bp)); ASSERT3U(range->start_blkid + 1, ==, range->end_blkid); - if (BP_GET_TYPE(bp) == DMU_OT_SA) { - arc_flags_t aflags = ARC_FLAG_WAIT; - zio_flag_t zioflags = ZIO_FLAG_CANFAIL; - if (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW) { - ASSERT(BP_IS_PROTECTED(bp)); - zioflags |= ZIO_FLAG_RAW; - } - - zbookmark_phys_t zb; - ASSERT3U(range->start_blkid, ==, DMU_SPILL_BLKID); - zb.zb_objset = dmu_objset_id(dscp->dsc_os); - zb.zb_object = range->object; - zb.zb_level = 0; - zb.zb_blkid = range->start_blkid; - - arc_buf_t *abuf = NULL; - if (!dscp->dsc_dso->dso_dryrun && arc_read(NULL, spa, - bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, - zioflags, &aflags, &zb) != 0) - return (SET_ERROR(EIO)); - - err = dump_spill(dscp, bp, zb.zb_object, - (abuf == NULL ? NULL : abuf->b_data)); - if (abuf != NULL) - arc_buf_destroy(abuf, &abuf); - return (err); - } if (send_do_embed(bp, dscp->dsc_featureflags)) { err = dump_write_embedded(dscp, range->object, range->start_blkid * srdp->datablksz, @@ -975,8 +926,9 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) } ASSERT(range->object > dscp->dsc_resume_object || (range->object == dscp->dsc_resume_object && + (range->start_blkid == DMU_SPILL_BLKID || range->start_blkid * srdp->datablksz >= - dscp->dsc_resume_offset)); + dscp->dsc_resume_offset))); /* it's a level-0 block of a regular object */ mutex_enter(&srdp->lock); @@ -1006,8 +958,6 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) ASSERT(dscp->dsc_dso->dso_dryrun || srdp->abuf != NULL || srdp->abd != NULL); - uint64_t offset = range->start_blkid * srdp->datablksz; - char *data = NULL; if (srdp->abd != NULL) { data = abd_to_buf(srdp->abd); @@ -1016,6 +966,14 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range) data = srdp->abuf->b_data; } + if (BP_GET_TYPE(bp) == DMU_OT_SA) { + ASSERT3U(range->start_blkid, ==, DMU_SPILL_BLKID); + err = dump_spill(dscp, bp, range->object, data); + return (err); + } + + uint64_t offset = range->start_blkid * srdp->datablksz; + /* * If we have large blocks stored on disk but the send flags * don't allow us to send large blocks, we split the data from @@ -1098,6 +1056,8 @@ range_alloc(enum type type, uint64_t object, uint64_t start_blkid, range->sru.data.io_outstanding = 0; range->sru.data.io_err = 0; range->sru.data.io_compressed = B_FALSE; + } else if (type == OBJECT) { + range->sru.object.spill_range = NULL; } return (range); } @@ -1742,6 +1702,45 @@ enqueue_range(struct send_reader_thread_arg *srta, bqueue_t *q, dnode_t *dn, bqueue_enqueue(q, range, datablksz); } +/* + * Send DRR_SPILL records for unmodified spill blocks. This is useful + * because changing certain attributes of the object (e.g. blocksize) + * can cause old versions of ZFS to incorrectly remove a spill block. + * Including these records in the stream forces an up to date version + * to always be written ensuring they're never lost. Current versions + * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can + * ignore these unmodified spill blocks. + * + * We piggyback the spill_range to dnode range instead of enqueueing it + * so send_range_after won't complain. + */ +static uint64_t +piggyback_unmodified_spill(struct send_reader_thread_arg *srta, + struct send_range *range) +{ + ASSERT3U(range->type, ==, OBJECT); + + dnode_phys_t *dnp = range->sru.object.dnp; + uint64_t fromtxg = srta->smta->to_arg->fromtxg; + + if (!zfs_send_unmodified_spill_blocks || + !(dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) || + !(BP_GET_LOGICAL_BIRTH(DN_SPILL_BLKPTR(dnp)) <= fromtxg)) + return (0); + + blkptr_t *bp = DN_SPILL_BLKPTR(dnp); + struct send_range *spill_range = range_alloc(DATA, range->object, + DMU_SPILL_BLKID, DMU_SPILL_BLKID+1, B_FALSE); + spill_range->sru.data.bp = *bp; + spill_range->sru.data.obj_type = dnp->dn_type; + spill_range->sru.data.datablksz = BP_GET_LSIZE(bp); + + issue_data_read(srta, spill_range); + range->sru.object.spill_range = spill_range; + + return (BP_GET_LSIZE(bp)); +} + /* * This thread is responsible for two things: First, it retrieves the correct * blkptr in the to ds if we need to send the data because of something from @@ -1773,17 +1772,20 @@ send_reader_thread(void *arg) uint64_t last_obj_exists = B_TRUE; while (!range->eos_marker && !srta->cancel && smta->error == 0 && err == 0) { + uint64_t spill = 0; switch (range->type) { case DATA: issue_data_read(srta, range); bqueue_enqueue(outq, range, range->sru.data.datablksz); range = get_next_range_nofree(inq, range); break; - case HOLE: case OBJECT: + spill = piggyback_unmodified_spill(srta, range); + zfs_fallthrough; + case HOLE: case OBJECT_RANGE: case REDACT: // Redacted blocks must exist - bqueue_enqueue(outq, range, sizeof (*range)); + bqueue_enqueue(outq, range, sizeof (*range) + spill); range = get_next_range_nofree(inq, range); break; case PREVIOUSLY_REDACTED: { diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 3fdcebdff918..6aee7afb6954 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -1377,6 +1377,13 @@ dmu_tx_pool(dmu_tx_t *tx) return (tx->tx_pool); } +/* + * Register a callback to be executed at the end of a TXG. + * + * Note: This currently exists for outside consumers, specifically the ZFS OSD + * for Lustre. Please do not remove before checking that project. For examples + * on how to use this see `ztest_commit_callback`. + */ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) { diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index f67dad002319..122d7d0d17d8 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -566,7 +566,7 @@ dnode_undirty_dbufs(list_t *list) mutex_destroy(&dr->dt.di.dr_mtx); list_destroy(&dr->dt.di.dr_children); } - kmem_free(dr, sizeof (dbuf_dirty_record_t)); + kmem_cache_free(dbuf_dirty_kmem_cache, dr); dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE); } } diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index bcab46c63bfa..983f444d79b0 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -2205,10 +2205,11 @@ vdev_open(vdev_t *vd) vd->vdev_max_asize = max_asize; /* - * If the vdev_ashift was not overridden at creation time, + * If the vdev_ashift was not overridden at creation time + * (0) or the override value is impossible for the device, * then set it the logical ashift and optimize the ashift. */ - if (vd->vdev_ashift == 0) { + if (vd->vdev_ashift < vd->vdev_logical_ashift) { vd->vdev_ashift = vd->vdev_logical_ashift; if (vd->vdev_logical_ashift > ASHIFT_MAX) { diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 0799f17758bf..c01a9cf5d0b2 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -58,9 +58,9 @@ #include /* - * Enable the experimental block cloning feature. If this setting is 0, then - * even if feature@block_cloning is enabled, attempts to clone blocks will act - * as though the feature is disabled. + * Enables access to the block cloning feature. If this setting is 0, then even + * if feature@block_cloning is enabled, using functions and system calls that + * attempt to clone blocks will act as though the feature is disabled. */ int zfs_bclone_enabled = 1; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 3c7305e0e724..a5daf73d59ba 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -187,20 +187,6 @@ zio_init(void) continue; #endif -#if defined(__linux__) && defined(_KERNEL) - /* - * Workaround issue of Linux vdev_disk.c, in some cases not - * linearizing buffers with disk sector crossing a page - * boundary. It is fine for hardware, but somehow required by - * LUKS. It is not typical for ZFS to produce such buffers, but - * it may happen if 6KB block is compressed to 4KB, while still - * having 2KB alignment. Banning the 6KB buffers helps vdevs - * with ashifh=12. - */ - if (size > PAGESIZE && !IS_P2ALIGNED(size, PAGESIZE)) - continue; -#endif - if (IS_P2ALIGNED(size, PAGESIZE)) align = PAGESIZE; else diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh index fa1ce9c64d33..3b06e8c35c77 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/zpool_add_dryrun_output.ksh @@ -148,9 +148,9 @@ done # Foreach test create pool, add -n devices and check output. for (( i=0; i < ${#tests[@]}; i+=1 )); do - typeset tree="${tests[$i].tree}" - typeset add="${tests[$i].add}" - typeset want="${tests[$i].want}" + tree="${tests[$i].tree}" + add="${tests[$i].add}" + want="${tests[$i].want}" log_must eval zpool create "$TESTPOOL" $tree log_must poolexists "$TESTPOOL" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh index 485891c945a3..0671ea618e05 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_dryrun_output.ksh @@ -124,8 +124,8 @@ done # Foreach test create pool, add -n devices and check output. for (( i=0; i < ${#tests[@]}; i+=1 )); do - typeset tree="${tests[$i].tree}" - typeset want="${tests[$i].want}" + tree="${tests[$i].tree}" + want="${tests[$i].want}" typeset out="$(log_must eval "zpool create -n '$TESTPOOL' $tree" | \ sed /^SUCCESS/d)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh index 410b1fe7a03e..def1d154387d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_split/zpool_split_dryrun_output.ksh @@ -133,9 +133,9 @@ done # Foreach test create pool, add -n devices and check output. for (( i=0; i < ${#tests[@]}; i+=1 )); do - typeset tree="${tests[$i].tree}" - typeset devs="${tests[$i].devs}" - typeset want="${tests[$i].want}" + tree="${tests[$i].tree}" + devs="${tests[$i].devs}" + want="${tests[$i].want}" log_must eval zpool create "$TESTPOOL" $tree log_must poolexists "$TESTPOOL" diff --git a/tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c b/tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c index 5c6d28eb2c44..7b926da6c01c 100644 --- a/tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c +++ b/tests/zfs-tests/tests/functional/vdev_disk/page_alignment.c @@ -30,7 +30,7 @@ /* * This tests the vdev_disk page alignment check callback - * vdev_disk_check_pages_cb(). For now, this test includes a copy of that + * vdev_disk_check_alignment_cb(). For now, this test includes a copy of that * function from module/os/linux/zfs/vdev_disk.c. If you change it here, * remember to change it there too, and add tests data here to validate the * change you're making. @@ -38,36 +38,69 @@ struct page; +/* + * This is spl_pagesize() in userspace, which requires linking libspl, but + * would also then use the platform page size, which isn't what we want for + * a test. To keep the check callback the same as the real one, we just + * redefine it. + */ +#undef PAGESIZE +#define PAGESIZE (4096) + typedef struct { - uint32_t bmask; - uint32_t npages; - uint32_t end; -} vdev_disk_check_pages_t; + size_t blocksize; + int seen_first; + int seen_last; +} vdev_disk_check_alignment_t; static int -vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv) +vdev_disk_check_alignment_cb(struct page *page, size_t off, size_t len, + void *priv) { (void) page; - vdev_disk_check_pages_t *s = priv; + vdev_disk_check_alignment_t *s = priv; /* - * If we didn't finish on a block size boundary last time, then there - * would be a gap if we tried to use this ABD as-is, so abort. + * The cardinal rule: a single on-disk block must never cross an + * physical (order-0) page boundary, as the kernel expects to be able + * to split at both LBS and page boundaries. + * + * This implies various alignment rules for the blocks in this + * (possibly compound) page, which we can check for. */ - if (s->end != 0) - return (1); /* - * Note if we're taking less than a full block, so we can check it - * above on the next call. + * If the previous page did not end on a page boundary, then we + * can't proceed without creating a hole. */ - s->end = (off+len) & s->bmask; + if (s->seen_last) + return (1); - /* All blocks after the first must start on a block size boundary. */ - if (s->npages != 0 && (off & s->bmask) != 0) + /* This page must contain only whole LBS-sized blocks. */ + if (!IS_P2ALIGNED(len, s->blocksize)) return (1); - s->npages++; + /* + * If this is not the first page in the ABD, then the data must start + * on a page-aligned boundary (so the kernel can split on page + * boundaries without having to deal with a hole). If it is, then + * it can start on LBS-alignment. + */ + if (s->seen_first) { + if (!IS_P2ALIGNED(off, PAGESIZE)) + return (1); + } else { + if (!IS_P2ALIGNED(off, s->blocksize)) + return (1); + s->seen_first = 1; + } + + /* + * If this data does not end on a page-aligned boundary, then this + * must be the last page in the ABD, for the same reason. + */ + s->seen_last = !IS_P2ALIGNED(off+len, PAGESIZE); + return (0); } @@ -75,8 +108,8 @@ typedef struct { /* test name */ const char *name; - /* blocks size mask */ - uint32_t mask; + /* stored block size */ + uint32_t blocksize; /* amount of data to take */ size_t size; @@ -89,39 +122,39 @@ static const page_test_t valid_tests[] = { /* 512B block tests */ { "512B blocks, 4K single page", - 0x1ff, 0x1000, { + 512, 0x1000, { { 0x0, 0x1000 }, }, }, { "512B blocks, 1K at start of page", - 0x1ff, 0x400, { + 512, 0x400, { { 0x0, 0x1000 }, }, }, { "512B blocks, 1K at end of page", - 0x1ff, 0x400, { + 512, 0x400, { { 0x0c00, 0x0400 }, }, }, { "512B blocks, 1K within page, 512B start offset", - 0x1ff, 0x400, { + 512, 0x400, { { 0x0200, 0x0e00 }, }, }, { "512B blocks, 8K across 2x4K pages", - 0x1ff, 0x2000, { + 512, 0x2000, { { 0x0, 0x1000 }, { 0x0, 0x1000 }, }, }, { "512B blocks, 4K across two pages, 2K start offset", - 0x1ff, 0x1000, { + 512, 0x1000, { { 0x0800, 0x0800 }, { 0x0, 0x0800 }, }, }, { "512B blocks, 16K across 5x4K pages, 512B start offset", - 0x1ff, 0x4000, { + 512, 0x4000, { { 0x0200, 0x0e00 }, { 0x0, 0x1000 }, { 0x0, 0x1000 }, @@ -130,7 +163,7 @@ static const page_test_t valid_tests[] = { }, }, { "512B blocks, 64K data, 8x8K compound pages", - 0x1ff, 0x10000, { + 512, 0x10000, { { 0x0, 0x2000 }, { 0x0, 0x2000 }, { 0x0, 0x2000 }, @@ -142,7 +175,7 @@ static const page_test_t valid_tests[] = { }, }, { "512B blocks, 64K data, 9x8K compound pages, 512B start offset", - 0x1ff, 0x10000, { + 512, 0x10000, { { 0x0200, 0x1e00 }, { 0x0, 0x2000 }, { 0x0, 0x2000 }, @@ -155,7 +188,7 @@ static const page_test_t valid_tests[] = { }, }, { "512B blocks, 64K data, 2x16K compound pages, 8x4K pages", - 0x1ff, 0x10000, { + 512, 0x10000, { { 0x0, 0x8000 }, { 0x0, 0x8000 }, { 0x0, 0x1000 }, @@ -169,7 +202,7 @@ static const page_test_t valid_tests[] = { }, }, { "512B blocks, 64K data, mixed 4K/8K/16K pages", - 0x1ff, 0x10000, { + 512, 0x10000, { { 0x0, 0x1000 }, { 0x0, 0x2000 }, { 0x0, 0x1000 }, @@ -183,7 +216,7 @@ static const page_test_t valid_tests[] = { }, }, { "512B blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset", - 0x1ff, 0x10000, { + 512, 0x10000, { { 0x0400, 0x0c00 }, { 0x0, 0x1000 }, { 0x0, 0x1000 }, @@ -200,48 +233,18 @@ static const page_test_t valid_tests[] = { /* 4K block tests */ { "4K blocks, 4K single page", - 0xfff, 0x1000, { - { 0x0, 0x1000 }, - }, - }, { - "4K blocks, 1K at start of page", - 0xfff, 0x400, { + 4096, 0x1000, { { 0x0, 0x1000 }, }, - }, { - "4K blocks, 1K at end of page", - 0xfff, 0x400, { - { 0x0c00, 0x0400 }, - }, - }, { - "4K blocks, 1K within page, 512B start offset", - 0xfff, 0x400, { - { 0x0200, 0x0e00 }, - }, }, { "4K blocks, 8K across 2x4K pages", - 0xfff, 0x2000, { + 4096, 0x2000, { { 0x0, 0x1000 }, { 0x0, 0x1000 }, }, - }, { - "4K blocks, 4K across two pages, 2K start offset", - 0xfff, 0x1000, { - { 0x0800, 0x0800 }, - { 0x0, 0x0800 }, - }, - }, { - "4K blocks, 16K across 5x4K pages, 512B start offset", - 0xfff, 0x4000, { - { 0x0200, 0x0e00 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x0200 }, - }, }, { "4K blocks, 64K data, 8x8K compound pages", - 0xfff, 0x10000, { + 4096, 0x10000, { { 0x0, 0x2000 }, { 0x0, 0x2000 }, { 0x0, 0x2000 }, @@ -251,22 +254,9 @@ static const page_test_t valid_tests[] = { { 0x0, 0x2000 }, { 0x0, 0x2000 }, }, - }, { - "4K blocks, 64K data, 9x8K compound pages, 512B start offset", - 0xfff, 0x10000, { - { 0x0200, 0x1e00 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x0200 }, - }, }, { "4K blocks, 64K data, 2x16K compound pages, 8x4K pages", - 0xfff, 0x10000, { + 4096, 0x10000, { { 0x0, 0x8000 }, { 0x0, 0x8000 }, { 0x0, 0x1000 }, @@ -280,7 +270,7 @@ static const page_test_t valid_tests[] = { }, }, { "4K blocks, 64K data, mixed 4K/8K/16K pages", - 0xfff, 0x10000, { + 4096, 0x10000, { { 0x0, 0x1000 }, { 0x0, 0x2000 }, { 0x0, 0x1000 }, @@ -292,29 +282,19 @@ static const page_test_t valid_tests[] = { { 0x0, 0x1000 }, { 0x0, 0x2000 }, }, - }, { - "4K blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset", - 0xfff, 0x10000, { - { 0x0400, 0x0c00 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x1000 }, - { 0x0, 0x2000 }, - { 0x0, 0x2000 }, - { 0x0, 0x1000 }, - { 0x0, 0x8000 }, - { 0x0, 0x1000 }, - { 0x0, 0x0400 }, - }, }, { 0 }, }; static const page_test_t invalid_tests[] = { + /* + * Gang tests. Composed of lots of smaller allocations, rarely properly + * aligned. + */ { "512B blocks, 16K data, 512 leader (gang block simulation)", - 0x1ff, 0x8000, { + 512, 0x8000, { { 0x0, 0x0200 }, { 0x0, 0x1000 }, { 0x0, 0x1000 }, @@ -324,7 +304,7 @@ static const page_test_t invalid_tests[] = { }, { "4K blocks, 32K data, 2 incompatible spans " "(gang abd simulation)", - 0xfff, 0x8000, { + 4096, 0x8000, { { 0x0800, 0x0800 }, { 0x0, 0x1000 }, { 0x0, 0x1000 }, @@ -337,6 +317,90 @@ static const page_test_t invalid_tests[] = { { 0x0, 0x0800 }, }, }, + + /* + * Blocks must not span multiple physical pages. These tests used to + * be considered valid, but were since found to be invalid and were + * moved here. + */ + { + "4K blocks, 4K across two pages, 2K start offset", + 4096, 0x1000, { + { 0x0800, 0x0800 }, + { 0x0, 0x0800 }, + }, + }, { + "4K blocks, 16K across 5x4K pages, 512B start offset", + 4096, 0x4000, { + { 0x0200, 0x0e00 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x0200 }, + }, + }, { + "4K blocks, 64K data, 9x8K compound pages, 512B start offset", + 4096, 0x10000, { + { 0x0200, 0x1e00 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x0200 }, + }, + }, { + "4K blocks, 64K data, mixed 4K/8K/16K pages, 1K start offset", + 4096, 0x10000, { + { 0x0400, 0x0c00 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x1000 }, + { 0x0, 0x2000 }, + { 0x0, 0x2000 }, + { 0x0, 0x1000 }, + { 0x0, 0x8000 }, + { 0x0, 0x1000 }, + { 0x0, 0x0400 }, + }, + }, + + /* + * This is the very typical case of a 4K block being allocated from + * the middle of a mixed-used slab backed by a higher-order compound + * page. + */ + { + "4K blocks, 4K data from compound slab, 2K-align offset", + 4096, 0x1000, { + { 0x1800, 0x6800 } + } + }, + + /* + * Blocks smaller than LBS should never be possible, but used to be by + * accident (see GH#16990). We test for and reject them just to be + * sure. + */ + { + "4K blocks, 1K at end of page", + 4096, 0x400, { + { 0x0c00, 0x0400 }, + }, + }, { + "4K blocks, 1K at start of page", + 4096, 0x400, { + { 0x0, 0x1000 }, + }, + }, { + "4K blocks, 1K within page, 512B start offset", + 4096, 0x400, { + { 0x0200, 0x0e00 }, + }, + }, + { 0 }, }; @@ -345,10 +409,8 @@ run_test(const page_test_t *test, bool verbose) { size_t rem = test->size; - vdev_disk_check_pages_t s = { - .bmask = 0xfff, - .npages = 0, - .end = 0, + vdev_disk_check_alignment_t s = { + .blocksize = test->blocksize, }; for (int i = 0; test->pages[i][1] > 0; i++) { @@ -362,7 +424,7 @@ run_test(const page_test_t *test, bool verbose) "rem %lx, take %lx\n", i, off, len, rem, take); - if (vdev_disk_check_pages_cb(NULL, off, take, &s)) { + if (vdev_disk_check_alignment_cb(NULL, off, take, &s)) { if (verbose) printf(" ABORT: misalignment detected, " "rem %lx\n", rem); @@ -389,7 +451,7 @@ run_test_set(const page_test_t *tests, bool want, int *ntests, int *npassed) for (const page_test_t *test = &tests[0]; test->name; test++) { bool pass = (run_test(test, false) == want); if (pass) { - printf("%s: PASS\n", test->name); + printf("%c %s: PASS\n", want ? '+' : '-', test->name); (*npassed)++; } else { printf("%s: FAIL [expected %s, got %s]\n", test->name,