diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 924a4d4aa6f9..64cccc3d3c5a 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -342,8 +342,8 @@ get_usage(zfs_help_t idx) return (gettext("\tunload-key [-r] " "<-a | filesystem|volume>\n")); case HELP_CHANGE_KEY: - return (gettext("\tchange-key [-l] [-o keyformat=]" - "\t [-o keylocation=] [-o pbkfd2iters=]" + return (gettext("\tchange-key [-l] [-o keyformat=]\n" + "\t [-o keylocation=] [-o pbkfd2iters=]\n" "\t \n" "\tchange-key -i [-l] \n")); } diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 4905927fca3f..4d9e6bbb5515 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -2121,6 +2121,15 @@ show_import(nvlist_t *config) "updating.\n")); break; + case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION: + (void) printf(gettext(" action: Existing " + "encrypted datasets contain an on-disk " + "incompatibility, which\n\tneeds to be " + "corrected. Backup these datasets to new " + "encrypted datasets\n\tand destroy the " + "old ones.\n")); + break; + default: /* * All errata must contain an action message. @@ -6499,6 +6508,17 @@ status_callback(zpool_handle_t *zhp, void *data) "run 'zpool scrub'.\n")); break; + case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION: + (void) printf(gettext("\tExisting encrypted datasets " + "contain an on-disk incompatibility\n\twhich " + "needs to be corrected.\n")); + (void) printf(gettext("action: To correct the issue " + "backup existing encrypted datasets to new\n\t" + "encrypted datasets and destroy the old ones. " + "'zfs mount -o ro' can\n\tbe used to temporarily " + "mount existing encrypted datasets readonly.\n")); + break; + default: /* * All errata which allow the pool to be imported diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index f7bba4c8cf16..4c33e0a5a92d 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -443,6 +443,8 @@ main(int argc, char *argv[]) drro->drr_raw_bonuslen = BSWAP_32(drro->drr_raw_bonuslen); drro->drr_toguid = BSWAP_64(drro->drr_toguid); + drro->drr_maxblkid = + BSWAP_64(drro->drr_maxblkid); } payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); @@ -451,7 +453,8 @@ main(int argc, char *argv[]) (void) printf("OBJECT object = %llu type = %u " "bonustype = %u blksz = %u bonuslen = %u " "dn_slots = %u raw_bonuslen = %u " - "flags = %u indblkshift = %u nlevels = %u " + "flags = %u maxblkid = %llu " + "indblkshift = %u nlevels = %u " "nblkptr = %u\n", (u_longlong_t)drro->drr_object, drro->drr_type, @@ -461,6 +464,7 @@ main(int argc, char *argv[]) drro->drr_dn_slots, drro->drr_raw_bonuslen, drro->drr_flags, + (u_longlong_t)drro->drr_maxblkid, drro->drr_indblkshift, drro->drr_nlevels, drro->drr_nblkptr); diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index a8b5418b60b2..8a21bf3781c8 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -200,7 +200,8 @@ extern uint64_t metaslab_df_alloc_threshold; extern unsigned long zfs_deadman_synctime_ms; extern int metaslab_preload_limit; extern boolean_t zfs_compressed_arc_enabled; -extern int zfs_abd_scatter_enabled; +extern int zfs_abd_scatter_enabled; +extern int dmu_object_alloc_chunk_shift; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; @@ -314,6 +315,7 @@ static ztest_shared_callstate_t *ztest_shared_callstate; ztest_func_t ztest_dmu_read_write; ztest_func_t ztest_dmu_write_parallel; ztest_func_t ztest_dmu_object_alloc_free; +ztest_func_t ztest_dmu_object_next_chunk; ztest_func_t ztest_dmu_commit_callbacks; ztest_func_t ztest_zap; ztest_func_t ztest_zap_parallel; @@ -361,6 +363,7 @@ ztest_info_t ztest_info[] = { ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always), ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always), ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always), + ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes), ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always), ZTI_INIT(ztest_zap, 30, &zopt_always), ZTI_INIT(ztest_zap_parallel, 100, &zopt_always), @@ -4055,6 +4058,26 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id) umem_free(od, size); } +/* + * Rewind the global allocator to verify object allocation backfilling. + */ +void +ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id) +{ + objset_t *os = zd->zd_os; + int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift; + uint64_t object; + + /* + * Rewind the global allocator randomly back to a lower object number + * to force backfilling and reclamation of recently freed dnodes. + */ + mutex_enter(&os->os_obj_lock); + object = ztest_random(os->os_obj_next_chunk); + os->os_obj_next_chunk = P2ALIGN(object, dnodes_per_chunk); + mutex_exit(&os->os_obj_lock); +} + #undef OD_ARRAY_SIZE #define OD_ARRAY_SIZE 2 diff --git a/contrib/dracut/90zfs/zfs-load-key.sh b/contrib/dracut/90zfs/zfs-load-key.sh new file mode 100644 index 000000000000..d86763fcc91e --- /dev/null +++ b/contrib/dracut/90zfs/zfs-load-key.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# This script only gets executed on systemd systems, see mount-zfs.sh for non-systemd systems + +# import the libs now that we know the pool imported +[ -f /lib/dracut-lib.sh ] && dracutlib=/lib/dracut-lib.sh +[ -f /usr/lib/dracut/modules.d/99base/dracut-lib.sh ] && dracutlib=/usr/lib/dracut/modules.d/99base/dracut-lib.sh +. "$dracutlib" + +# load the kernel command line vars +[ -z "$root" ] && root=$(getarg root=) +# If root is not ZFS= or zfs: or rootfstype is not zfs then we are not supposed to handle it. +[ "${root##zfs:}" = "${root}" -a "${root##ZFS=}" = "${root}" -a "$rootfstype" != "zfs" ] && exit 0 + +# There is a race between the zpool import and the pre-mount hooks, so we wait for a pool to be imported +while true; do + zpool list -H | grep -q -v '^$' && break + [[ $(systemctl is-failed zfs-import-cache.service) == 'failed' ]] && exit 1 + [[ $(systemctl is-failed zfs-import-scan.service) == 'failed' ]] && exit 1 + sleep 0.1s +done + +# run this after import as zfs-import-cache/scan service is confirmed good +if [[ "${root}" = "zfs:AUTO" ]] ; then + root=$(zpool list -H -o bootfs | awk '$1 != "-" {print; exit}') +else + root="${root##zfs:}" + root="${root##ZFS=}" +fi + +# if pool encryption is active and the zfs command understands '-o encryption' +if [[ $(zpool list -H -o feature@encryption $(echo "${root}" | awk -F\/ '{print $1}')) == 'active' ]]; then + # check if root dataset has encryption enabled + if $(zfs list -H -o encryption "${root}" | grep -q -v off); then + # figure out where the root dataset has its key, the keylocation should not be none + while true; do + if [[ $(zfs list -H -o keylocation "${root}") == 'none' ]]; then + root=$(echo -n "${root}" | awk 'BEGIN{FS=OFS="/"}{NF--; print}') + [[ "${root}" == '' ]] && exit 1 + else + break + fi + done + # decrypt them + TRY_COUNT=5 + while [ $TRY_COUNT != 0 ]; do + zfs load-key "$root" <<< $(systemd-ask-password "Encrypted ZFS password for ${root}: ") + [[ $? == 0 ]] && break + ((TRY_COUNT-=1)) + done + fi +fi diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 61c02e8a7683..1da43d3276be 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -434,6 +434,13 @@ int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, dmu_tx_t *tx); +/* + * Manually set the maxblkid on a dnode. This will adjust nlevels accordingly + * to accommodate the change. + */ +int dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid, + dmu_tx_t *tx); + /* * Set the checksum property on a dnode. The new checksum algorithm will * apply to all newly written blocks; existing blocks will not be affected. diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index 11b8fc625795..7ee992f3116e 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -127,7 +127,7 @@ struct objset { boolean_t os_rescan_dnodes; /* os_phys_buf should be written raw next txg */ - boolean_t os_next_write_raw; + boolean_t os_next_write_raw[TXG_SIZE]; /* Protected by os_obj_lock */ kmutex_t os_obj_lock; @@ -217,6 +217,7 @@ boolean_t dmu_objset_userobjused_enabled(objset_t *os); boolean_t dmu_objset_userobjspace_upgradable(objset_t *os); void dmu_objset_userobjspace_upgrade(objset_t *os); boolean_t dmu_objset_userobjspace_present(objset_t *os); +boolean_t dmu_objset_incompatible_encryption_version(objset_t *os); int dmu_fsname(const char *snapname, char *buf); diff --git a/include/sys/dnode.h b/include/sys/dnode.h index e5e39b18c9e9..691fd443a260 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -221,6 +221,13 @@ typedef struct dnode_phys { uint64_t dn_maxblkid; /* largest allocated block ID */ uint64_t dn_used; /* bytes (or sectors) of disk space */ + /* + * Both dn_pad2 and dn_pad3 are protected by the block's MAC. This + * allows us to protect any fields that might be added here in the + * future. In either case, developers will want to check + * zio_crypt_init_uios_dnode() to ensure the new field is being + * protected properly. + */ uint64_t dn_pad3[4]; /* @@ -301,6 +308,7 @@ struct dnode { uint8_t dn_rm_spillblk[TXG_SIZE]; /* for removing spill blk */ uint16_t dn_next_bonuslen[TXG_SIZE]; uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */ + uint64_t dn_next_maxblkid[TXG_SIZE]; /* next maxblkid in bytes */ /* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */ uint32_t dn_dbufs_count; /* count of dn_dbufs */ @@ -416,6 +424,7 @@ int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off, int minlvl, uint64_t blkfill, uint64_t txg); void dnode_evict_dbufs(dnode_t *dn); void dnode_evict_bonus(dnode_t *dn); +void dnode_free_interior_slots(dnode_t *dn); #define DNODE_IS_CACHEABLE(_dn) \ ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \ @@ -509,6 +518,11 @@ typedef struct dnode_stats { * which had already been unlinked in an earlier txg. */ kstat_named_t dnode_hold_free_txg; + /* + * Number of times dnode_free_interior_slots() needed to retry + * acquiring a slot zrl lock due to contention. + */ + kstat_named_t dnode_free_interior_lock_retry; /* * Number of new dnodes allocated by dnode_allocate(). */ diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h index 6fb91f67d1ab..d0c789035f3d 100644 --- a/include/sys/dsl_crypt.h +++ b/include/sys/dsl_crypt.h @@ -39,7 +39,7 @@ #define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1" #define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ" #define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT" - +#define DSL_CRYPTO_KEY_VERSION "DSL_CRYPTO_VERSION" /* * In-memory representation of a wrapping key. One of these structs will exist @@ -169,6 +169,7 @@ int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload); void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv); int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation); +boolean_t dsl_dir_incompatible_encryption_version(dsl_dir_t *dd); void spa_keystore_init(spa_keystore_t *sk); void spa_keystore_fini(spa_keystore_t *sk); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 6b1c3bb565b9..611279d6b822 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -891,6 +891,7 @@ typedef enum zpool_errata { ZPOOL_ERRATA_NONE, ZPOOL_ERRATA_ZOL_2094_SCRUB, ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY, + ZPOOL_ERRATA_ZOL_6845_ENCRYPTION, } zpool_errata_t; /* diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index 6924280c4a47..827f619d9670 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -219,10 +219,12 @@ typedef struct dmu_replay_record { uint8_t drr_flags; uint32_t drr_raw_bonuslen; uint64_t drr_toguid; - /* only nonzero for raw streams */ + /* only (possibly) nonzero for raw streams */ uint8_t drr_indblkshift; uint8_t drr_nlevels; uint8_t drr_nblkptr; + uint8_t drr_pad[5]; + uint64_t drr_maxblkid; /* bonus content follows */ } drr_object; struct drr_freeobjects { diff --git a/include/sys/zio_crypt.h b/include/sys/zio_crypt.h index 9cf9a17c2c13..57b4c1e7c322 100644 --- a/include/sys/zio_crypt.h +++ b/include/sys/zio_crypt.h @@ -36,6 +36,8 @@ struct zbookmark_phys; #define MASTER_KEY_MAX_LEN 32 #define SHA512_HMAC_KEYLEN 64 +#define ZIO_CRYPT_KEY_CURRENT_VERSION 1ULL + typedef enum zio_crypt_type { ZC_TYPE_NONE = 0, ZC_TYPE_CCM, @@ -64,6 +66,9 @@ typedef struct zio_crypt_key { /* encryption algorithm */ uint64_t zk_crypt; + /* on-disk format version */ + uint64_t zk_version; + /* GUID for uniquely identifying this key. Not encrypted on disk. */ uint64_t zk_guid; @@ -104,9 +109,9 @@ int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out); int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out); -int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, - uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, - zio_crypt_key_t *key); +int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, + uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, + uint8_t *mac, zio_crypt_key_t *key); int zio_crypt_generate_iv(uint8_t *ivbuf); int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, uint8_t *ivbuf, uint8_t *salt); diff --git a/lib/libzfs/libzfs_crypto.c b/lib/libzfs/libzfs_crypto.c index b1fac2f62138..6ccee740f6e8 100644 --- a/lib/libzfs/libzfs_crypto.c +++ b/lib/libzfs/libzfs_crypto.c @@ -1054,7 +1054,7 @@ zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop, char *alt_keylocation) } try_again: - /* fetching and deriving the key are correctible errors. set the flag */ + /* fetching and deriving the key are correctable errors. set the flag */ correctible = B_TRUE; /* get key material from key format and location */ @@ -1110,22 +1110,25 @@ zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop, char *alt_keylocation) error: zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); - if (key_material != NULL) + if (key_material != NULL) { free(key_material); - if (key_data != NULL) + key_material = NULL; + } + if (key_data != NULL) { free(key_data); + key_data = NULL; + } /* * Here we decide if it is ok to allow the user to retry entering their * key. The can_retry flag will be set if the user is entering their - * key from an interactive prompt. The correctible flag will only be - * set if an error that occured could be corrected by retrying. Both + * key from an interactive prompt. The correctable flag will only be + * set if an error that occurred could be corrected by retrying. Both * flags are needed to allow the user to attempt key entry again */ - if (can_retry && correctible && attempts <= MAX_KEY_PROMPT_ATTEMPTS) { - attempts++; + attempts++; + if (can_retry && correctible && attempts < MAX_KEY_PROMPT_ATTEMPTS) goto try_again; - } return (ret); } diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c index 320783523b7d..f900ac723107 100644 --- a/lib/libzfs/libzfs_status.c +++ b/lib/libzfs/libzfs_status.c @@ -351,6 +351,15 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap) if (find_vdev_problem(nvroot, vdev_removed)) return (ZPOOL_STATUS_REMOVED_DEV); + /* + * Informational errata available. + */ + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata); + if (errata) { + *erratap = errata; + return (ZPOOL_STATUS_ERRATA); + } + /* * Outdated, but usable, version */ @@ -382,15 +391,6 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap) } } - /* - * Informational errata available. - */ - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata); - if (errata) { - *erratap = errata; - return (ZPOOL_STATUS_ERRATA); - } - return (ZPOOL_STATUS_OK); } diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 45b0abe7fd6c..2f3fe97719d5 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1229,6 +1229,7 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) arc_buf_hdr_t *hdr = vbuf; bzero(hdr, HDR_FULL_SIZE); + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL); refcount_create(&hdr->b_l1hdr.b_refcnt); mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); @@ -3246,9 +3247,6 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata) ASSERT(!HDR_SHARED_DATA(hdr) || alloc_rdata); IMPLY(alloc_rdata, HDR_PROTECTED(hdr)); - if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr)) - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - if (alloc_rdata) { size = HDR_GET_PSIZE(hdr); ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL); @@ -6751,6 +6749,17 @@ arc_write_ready(zio_t *zio) ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG); ASSERT(HDR_PROTECTED(hdr)); + if (BP_SHOULD_BYTESWAP(bp)) { + if (BP_GET_LEVEL(bp) > 0) { + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64; + } else { + hdr->b_l1hdr.b_byteswap = + DMU_OT_BYTESWAP(BP_GET_TYPE(bp)); + } + } else { + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + } + hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp); hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset; zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt, diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 517a284de2be..e805681ce2a8 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2067,6 +2067,7 @@ dmu_buf_will_change_crypt_params(dmu_buf_t *db_fake, dmu_tx_t *tx) ASSERT3P(dr, !=, NULL); ASSERT3U(dr->dr_txg, ==, tx->tx_txg); dr->dt.dl.dr_raw = B_TRUE; + db->db_objset->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; } #pragma weak dmu_buf_fill_done = dbuf_fill_done diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 56740ae375a8..20ed3ebffcac 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2029,6 +2029,23 @@ dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, return (err); } +int +dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid, + dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + dnode_new_blkid(dn, maxblkid, tx, B_FALSE); + rw_exit(&dn->dn_struct_rwlock); + dnode_rele(dn, FTAG); + return (0); +} + void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, dmu_tx_t *tx) @@ -2214,8 +2231,10 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) dedup = B_FALSE; } - if (type == DMU_OT_DNODE || type == DMU_OT_OBJSET) + if (level <= 0 && + (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)) { compress = ZIO_COMPRESS_EMPTY; + } } zp->zp_compress = compress; @@ -2488,6 +2507,7 @@ EXPORT_SYMBOL(dmu_object_size_from_db); EXPORT_SYMBOL(dmu_object_dnsize_from_db); EXPORT_SYMBOL(dmu_object_set_nlevels); EXPORT_SYMBOL(dmu_object_set_blocksize); +EXPORT_SYMBOL(dmu_object_set_maxblkid); EXPORT_SYMBOL(dmu_object_set_checksum); EXPORT_SYMBOL(dmu_object_set_compress); EXPORT_SYMBOL(dmu_write_policy); diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c index e7412b7509f4..f53da407fd40 100644 --- a/module/zfs/dmu_object.c +++ b/module/zfs/dmu_object.c @@ -275,7 +275,6 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, return (err); } - int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx) { diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index e596b70e9ca0..befce9be6bc8 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -663,6 +663,9 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, return (SET_ERROR(EINVAL)); } else if (!readonly && dsl_dataset_is_snapshot(ds)) { return (SET_ERROR(EROFS)); + } else if (!readonly && decrypt && + dsl_dir_incompatible_encryption_version(ds->ds_dir)) { + return (SET_ERROR(EROFS)); } /* if we are decrypting, we can now check MACs in os->os_phys_buf */ @@ -1505,9 +1508,9 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) * the os_phys_buf raw. Neither of these actions will effect the MAC * at this point. */ - if (arc_is_unauthenticated(os->os_phys_buf) || os->os_next_write_raw) { + if (os->os_next_write_raw[tx->tx_txg & TXG_MASK]) { ASSERT(os->os_encrypted); - os->os_next_write_raw = B_FALSE; + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE; arc_convert_to_raw(os->os_phys_buf, os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER, DMU_OT_OBJSET, NULL, NULL, NULL); @@ -2635,6 +2638,13 @@ dmu_objset_find(char *name, int func(const char *, void *), void *arg, return (error); } +boolean_t +dmu_objset_incompatible_encryption_version(objset_t *os) +{ + return (dsl_dir_incompatible_encryption_version( + os->os_dsl_dataset->ds_dir)); +} + void dmu_objset_set_user(objset_t *os, void *user_ptr) { diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 09d79742bae6..2c2ed8fb3191 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -570,6 +570,7 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, drro->drr_flags |= DRR_RAW_BYTESWAP; /* needed for reconstructing dnp on recv side */ + drro->drr_maxblkid = dnp->dn_maxblkid; drro->drr_indblkshift = dnp->dn_indblkshift; drro->drr_nlevels = dnp->dn_nlevels; drro->drr_nblkptr = dnp->dn_nblkptr; @@ -2294,6 +2295,7 @@ byteswap_record(dmu_replay_record_t *drr) DO32(drr_object.drr_bonuslen); DO32(drr_object.drr_raw_bonuslen); DO64(drr_object.drr_toguid); + DO64(drr_object.drr_maxblkid); break; case DRR_FREEOBJECTS: DO64(drr_freeobjects.drr_firstobj); @@ -2453,10 +2455,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, } err = dmu_object_info(rwa->os, drro->drr_object, &doi); - - if (err != 0 && err != ENOENT) + if (err != 0 && err != ENOENT && err != EEXIST) return (SET_ERROR(EINVAL)); - object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT; if (drro->drr_object > rwa->max_object) rwa->max_object = drro->drr_object; @@ -2474,20 +2474,99 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, int nblkptr = deduce_nblkptr(drro->drr_bonustype, drro->drr_bonuslen); + object = drro->drr_object; + /* nblkptr will be bounded by the bonus size and type */ if (rwa->raw && nblkptr != drro->drr_nblkptr) return (SET_ERROR(EINVAL)); - if (drro->drr_blksz != doi.doi_data_block_size || + if (rwa->raw && + (drro->drr_blksz != doi.doi_data_block_size || + nblkptr < doi.doi_nblkptr || + indblksz != doi.doi_metadata_block_size || + drro->drr_nlevels < doi.doi_indirection || + drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT)) { + err = dmu_free_long_range_raw(rwa->os, + drro->drr_object, 0, DMU_OBJECT_END); + if (err != 0) + return (SET_ERROR(EINVAL)); + } else if (drro->drr_blksz != doi.doi_data_block_size || nblkptr < doi.doi_nblkptr || - (rwa->raw && - (indblksz != doi.doi_metadata_block_size || - drro->drr_nlevels < doi.doi_indirection))) { + drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { err = dmu_free_long_range(rwa->os, drro->drr_object, 0, DMU_OBJECT_END); if (err != 0) return (SET_ERROR(EINVAL)); } + + /* + * The dmu does not currently support decreasing nlevels + * on an object. For non-raw sends, this does not matter + * and the new object can just use the previous one's nlevels. + * For raw sends, however, the structure of the received dnode + * (including nlevels) must match that of the send side. + * Therefore, instead of using dmu_object_reclaim(), we must + * free the object completely and call dmu_object_claim_dnsize() + * instead. + */ + if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) || + drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { + if (rwa->raw) { + err = dmu_free_long_object_raw(rwa->os, + drro->drr_object); + } else { + err = dmu_free_long_object(rwa->os, + drro->drr_object); + } + if (err != 0) + return (SET_ERROR(EINVAL)); + + txg_wait_synced(dmu_objset_pool(rwa->os), 0); + object = DMU_NEW_OBJECT; + } + } else if (err == EEXIST) { + /* + * The object requested is currently an interior slot of a + * multi-slot dnode. This will be resolved when the next txg + * is synced out, since the send stream will have told us + * to free this slot when we freed the associated dnode + * earlier in the stream. + */ + txg_wait_synced(dmu_objset_pool(rwa->os), 0); + object = drro->drr_object; + } else { + /* object is free and we are about to allocate a new one */ + object = DMU_NEW_OBJECT; + } + + /* + * If this is a multi-slot dnode there is a chance that this + * object will expand into a slot that is already used by + * another object from the previous snapshot. We must free + * these objects before we attempt to allocate the new dnode. + */ + if (drro->drr_dn_slots > 1) { + for (uint64_t slot = drro->drr_object + 1; + slot < drro->drr_object + drro->drr_dn_slots; + slot++) { + dmu_object_info_t slot_doi; + + err = dmu_object_info(rwa->os, slot, &slot_doi); + if (err == ENOENT || err == EEXIST) + continue; + else if (err != 0) + return (err); + + if (rwa->raw) + err = dmu_free_long_object_raw(rwa->os, slot); + else + err = dmu_free_long_object(rwa->os, slot); + + if (err != 0) + return (err); + } + + txg_wait_synced(dmu_objset_pool(rwa->os), 0); } tx = dmu_tx_create(rwa->os); @@ -2538,6 +2617,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, drro->drr_blksz, drro->drr_indblkshift, tx)); VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object, drro->drr_nlevels, tx)); + VERIFY0(dmu_object_set_maxblkid(rwa->os, drro->drr_object, + drro->drr_maxblkid, tx)); } if (data != NULL) { @@ -2839,9 +2920,13 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, dmu_tx_abort(tx); return (err); } - dmu_buf_will_dirty(db_spill, tx); - if (rwa->raw) + + if (rwa->raw) { VERIFY0(dmu_object_dirty_raw(rwa->os, drrs->drr_object, tx)); + dmu_buf_will_change_crypt_params(db_spill, tx); + } else { + dmu_buf_will_dirty(db_spill, tx); + } if (db_spill->db_size < drrs->drr_length) VERIFY(0 == dbuf_spill_set_blksz(db_spill, @@ -3186,7 +3271,7 @@ receive_read_record(struct receive_arg *ra) * See receive_read_prefetch for an explanation why we're * storing this object in the ignore_obj_list. */ - if (err == ENOENT || + if (err == ENOENT || err == EEXIST || (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) { objlist_insert(&ra->ignore_objlist, drro->drr_object); err = 0; @@ -3772,7 +3857,12 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, int next_err = 0; while (next_err == 0) { - free_err = dmu_free_long_object(rwa->os, obj); + if (drc->drc_raw) { + free_err = dmu_free_long_object_raw(rwa->os, + obj); + } else { + free_err = dmu_free_long_object(rwa->os, obj); + } if (free_err != 0 && free_err != ENOENT) break; diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index c1fbf3c3b3f7..b4c131e98323 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -55,6 +55,7 @@ dnode_stats_t dnode_stats = { { "dnode_hold_free_overflow", KSTAT_DATA_UINT64 }, { "dnode_hold_free_refcount", KSTAT_DATA_UINT64 }, { "dnode_hold_free_txg", KSTAT_DATA_UINT64 }, + { "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 }, { "dnode_allocate", KSTAT_DATA_UINT64 }, { "dnode_reallocate", KSTAT_DATA_UINT64 }, { "dnode_buf_evict", KSTAT_DATA_UINT64 }, @@ -134,6 +135,7 @@ dnode_cons(void *arg, void *unused, int kmflag) bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk)); bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen)); bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz)); + bzero(&dn->dn_next_maxblkid[0], sizeof (dn->dn_next_maxblkid)); for (i = 0; i < TXG_SIZE; i++) { list_link_init(&dn->dn_dirty_link[i]); @@ -193,6 +195,7 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_bonuslen[i]); ASSERT0(dn->dn_next_blksz[i]); + ASSERT0(dn->dn_next_maxblkid[i]); } ASSERT0(dn->dn_allocated_txg); @@ -516,7 +519,8 @@ dnode_destroy(dnode_t *dn) mutex_exit(&os->os_lock); /* the dnode can no longer move, so we can release the handle */ - zrl_remove(&dn->dn_handle->dnh_zrlock); + if (!zrl_is_locked(&dn->dn_handle->dnh_zrlock)) + zrl_remove(&dn->dn_handle->dnh_zrlock); dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; @@ -602,6 +606,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT0(dn->dn_next_bonustype[i]); ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_blksz[i]); + ASSERT0(dn->dn_next_maxblkid[i]); ASSERT(!list_link_active(&dn->dn_dirty_link[i])); ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); ASSERT3P(dn->dn_free_ranges[i], ==, NULL); @@ -662,6 +667,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset)))); dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS; + + dnode_free_interior_slots(dn); DNODE_STAT_BUMP(dnode_reallocate); /* clean up any unreferenced dbufs */ @@ -767,6 +774,8 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) sizeof (odn->dn_next_bonuslen)); bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0], sizeof (odn->dn_next_blksz)); + bcopy(&odn->dn_next_maxblkid[0], &ndn->dn_next_maxblkid[0], + sizeof (odn->dn_next_maxblkid)); for (i = 0; i < TXG_SIZE; i++) { list_move_tail(&ndn->dn_dirty_records[i], &odn->dn_dirty_records[i]); @@ -1062,19 +1071,73 @@ dnode_set_slots(dnode_children_t *children, int idx, int slots, void *ptr) } static boolean_t -dnode_check_slots(dnode_children_t *children, int idx, int slots, void *ptr) +dnode_check_slots_free(dnode_children_t *children, int idx, int slots) { ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); for (int i = idx; i < idx + slots; i++) { dnode_handle_t *dnh = &children->dnc_children[i]; - if (dnh->dnh_dnode != ptr) + dnode_t *dn = dnh->dnh_dnode; + + if (dn == DN_SLOT_FREE) { + continue; + } else if (DN_SLOT_IS_PTR(dn)) { + mutex_enter(&dn->dn_mtx); + dmu_object_type_t type = dn->dn_type; + mutex_exit(&dn->dn_mtx); + + if (type != DMU_OT_NONE) + return (B_FALSE); + + continue; + } else { return (B_FALSE); + } + + return (B_FALSE); } return (B_TRUE); } +static void +dnode_reclaim_slots(dnode_children_t *children, int idx, int slots) +{ + ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); + + for (int i = idx; i < idx + slots; i++) { + dnode_handle_t *dnh = &children->dnc_children[i]; + + ASSERT(zrl_is_locked(&dnh->dnh_zrlock)); + + if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) { + ASSERT3S(dnh->dnh_dnode->dn_type, ==, DMU_OT_NONE); + dnode_destroy(dnh->dnh_dnode); + dnh->dnh_dnode = DN_SLOT_FREE; + } + } +} + +void +dnode_free_interior_slots(dnode_t *dn) +{ + dnode_children_t *children = dmu_buf_get_user(&dn->dn_dbuf->db); + int epb = dn->dn_dbuf->db.db_size >> DNODE_SHIFT; + int idx = (dn->dn_object & (epb - 1)) + 1; + int slots = dn->dn_num_slots - 1; + + if (slots == 0) + return; + + ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); + + while (!dnode_slots_tryenter(children, idx, slots)) + DNODE_STAT_BUMP(dnode_free_interior_lock_retry); + + dnode_set_slots(children, idx, slots, DN_SLOT_FREE); + dnode_slots_rele(children, idx, slots); +} + void dnode_special_close(dnode_handle_t *dnh) { @@ -1372,7 +1435,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, while (dn == DN_SLOT_UNINIT) { dnode_slots_hold(dnc, idx, slots); - if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) { + if (!dnode_check_slots_free(dnc, idx, slots)) { DNODE_STAT_BUMP(dnode_hold_free_misses); dnode_slots_rele(dnc, idx, slots); dbuf_rele(db, FTAG); @@ -1385,15 +1448,29 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, continue; } - if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) { + if (!dnode_check_slots_free(dnc, idx, slots)) { DNODE_STAT_BUMP(dnode_hold_free_lock_misses); dnode_slots_rele(dnc, idx, slots); dbuf_rele(db, FTAG); return (SET_ERROR(ENOSPC)); } + /* + * Allocated but otherwise free dnodes which would + * be in the interior of a multi-slot dnodes need + * to be freed. Single slot dnodes can be safely + * re-purposed as a performance optimization. + */ + if (slots > 1) + dnode_reclaim_slots(dnc, idx + 1, slots - 1); + dnh = &dnc->dnc_children[idx]; - dn = dnode_create(os, dn_block + idx, db, object, dnh); + if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) { + dn = dnh->dnh_dnode; + } else { + dn = dnode_create(os, dn_block + idx, db, + object, dnh); + } } mutex_enter(&dn->dn_mtx); @@ -1751,6 +1828,7 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) goto out; dn->dn_maxblkid = blkid; + dn->dn_next_maxblkid[tx->tx_txg & TXG_MASK] = blkid; /* * Compute the number of levels necessary to support the new maxblkid. diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 2ec729a6f93a..7d3850a5f82a 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -519,6 +519,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dn->dn_next_nlevels[txgoff] = 0; dn->dn_next_indblkshift[txgoff] = 0; dn->dn_next_blksz[txgoff] = 0; + dn->dn_next_maxblkid[txgoff] = 0; /* ASSERT(blkptrs are zero); */ ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); @@ -528,6 +529,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) if (dn->dn_allocated_txg != dn->dn_free_txg) dmu_buf_will_dirty(&dn->dn_dbuf->db, tx); bzero(dn->dn_phys, sizeof (dnode_phys_t) * dn->dn_num_slots); + dnode_free_interior_slots(dn); mutex_enter(&dn->dn_mtx); dn->dn_type = DMU_OT_NONE; @@ -535,6 +537,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_have_spill = B_FALSE; + dn->dn_num_slots = 1; mutex_exit(&dn->dn_mtx); ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); @@ -718,6 +721,17 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) dn->dn_next_nlevels[txgoff] = 0; } + /* + * This must be done after dnode_sync_free_range() + * and dnode_increase_indirection(). + */ + if (dn->dn_next_maxblkid[txgoff]) { + mutex_enter(&dn->dn_mtx); + dnp->dn_maxblkid = dn->dn_next_maxblkid[txgoff]; + dn->dn_next_maxblkid[txgoff] = 0; + mutex_exit(&dn->dn_mtx); + } + if (dn->dn_next_nblkptr[txgoff]) { /* this should only happen on a realloc */ ASSERT(dn->dn_allocated_txg == tx->tx_txg); diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 59562d194e73..6a63d54cadb3 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -347,7 +347,7 @@ spa_keystore_fini(spa_keystore_t *sk) rw_destroy(&sk->sk_dk_lock); } -int +static int dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj) { if (dd->dd_crypto_obj == 0) @@ -357,6 +357,34 @@ dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj) DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, rddobj)); } +int +dsl_dir_get_encryption_version(dsl_dir_t *dd, uint64_t *version) +{ + *version = 0; + + if (dd->dd_crypto_obj == 0) + return (SET_ERROR(ENOENT)); + + /* version 0 is implied by ENOENT */ + (void) zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_VERSION, 8, 1, version); + + return (0); +} + +boolean_t +dsl_dir_incompatible_encryption_version(dsl_dir_t *dd) +{ + int ret; + uint64_t version = 0; + + ret = dsl_dir_get_encryption_version(dd, &version); + if (ret != 0) + return (B_FALSE); + + return (version != ZIO_CRYPT_KEY_CURRENT_VERSION); +} + static int spa_keystore_wkey_hold_ddobj_impl(spa_t *spa, uint64_t ddobj, void *tag, dsl_wrapping_key_t **wkey_out) @@ -514,7 +542,7 @@ dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, uint64_t dckobj, void *tag, dsl_crypto_key_t **dck_out) { int ret; - uint64_t crypt = 0, guid = 0; + uint64_t crypt = 0, guid = 0, version = 0; uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; @@ -556,12 +584,15 @@ dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, if (ret != 0) goto error; + /* the initial on-disk format for encryption did not have a version */ + (void) zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); + /* * Unwrap the keys. If there is an error return EACCES to indicate * an authentication failure. */ - ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, guid, raw_keydata, - raw_hmac_keydata, iv, mac, &dck->dck_key); + ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, version, guid, + raw_keydata, raw_hmac_keydata, iv, mac, &dck->dck_key); if (ret != 0) { ret = SET_ERROR(EACCES); goto error; @@ -1883,7 +1914,7 @@ dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) dsl_dataset_t *ds = NULL; uint8_t *buf = NULL; uint_t len; - uint64_t intval, guid, nlevels, blksz, ibs, nblkptr; + uint64_t intval, guid, nlevels, blksz, ibs, nblkptr, maxblkid, version; boolean_t is_passphrase = B_FALSE; ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, FTAG, &ds); @@ -1952,6 +1983,17 @@ dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) goto error; } + /* + * We don't support receiving old on-disk formats. The version 0 + * implementation protected several fields in an objset that were + * not always portable during a raw receive. As a result, we call + * the old version an on-disk errata #3. + */ + ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_VERSION, &version); + if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) { + ret = SET_ERROR(ENOTSUP); + goto error; + } ret = nvlist_lookup_uint8_array(nvl, "portable_mac", &buf, &len); if (ret != 0 || len != ZIO_OBJSET_MAC_LEN) { @@ -2028,6 +2070,12 @@ dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) goto error; } + ret = nvlist_lookup_uint64(nvl, "mdn_maxblkid", &maxblkid); + if (ret != 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + ret = dmu_objset_from_ds(ds, &os); if (ret != 0) goto error; @@ -2078,8 +2126,9 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) uint8_t *keydata, *hmac_keydata, *iv, *mac, *portable_mac; uint_t len; uint64_t rddobj, one = 1; + uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; uint64_t crypt, guid, keyformat, iters, salt; - uint64_t compress, checksum, nlevels, blksz, ibs; + uint64_t compress, checksum, nlevels, blksz, ibs, maxblkid; char *keylocation = "prompt"; VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); @@ -2108,6 +2157,7 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) nlevels = fnvlist_lookup_uint64(nvl, "mdn_nlevels"); blksz = fnvlist_lookup_uint64(nvl, "mdn_blksz"); ibs = fnvlist_lookup_uint64(nvl, "mdn_indblkshift"); + maxblkid = fnvlist_lookup_uint64(nvl, "mdn_maxblkid"); /* if we haven't created an objset for the ds yet, do that now */ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); @@ -2127,11 +2177,16 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) arc_release(os->os_phys_buf, &os->os_phys_buf); bcopy(portable_mac, os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN); bzero(os->os_phys->os_local_mac, ZIO_OBJSET_MAC_LEN); - os->os_next_write_raw = B_TRUE; + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; /* set metadnode compression and checksum */ mdn->dn_compress = compress; mdn->dn_checksum = checksum; + + rw_enter(&mdn->dn_struct_rwlock, RW_WRITER); + dnode_new_blkid(mdn, maxblkid, tx, B_FALSE); + rw_exit(&mdn->dn_struct_rwlock); + dsl_dataset_dirty(ds, tx); /* if this is a new dataset setup the DSL Crypto Key. */ @@ -2146,6 +2201,9 @@ dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, ds->ds_dir->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, + ds->ds_dir->dd_crypto_obj, DSL_CRYPTO_KEY_VERSION, + sizeof (uint64_t), 1, &version, tx)); dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx); ds->ds_feature_inuse[SPA_FEATURE_ENCRYPTION] = B_TRUE; @@ -2209,7 +2267,8 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) dsl_dir_t *rdd = NULL; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; - uint64_t crypt = 0, guid = 0, format = 0, iters = 0, salt = 0; + uint64_t crypt = 0, guid = 0, format = 0; + uint64_t iters = 0, salt = 0, version = 0; uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; uint8_t iv[WRAPPING_IV_LEN]; @@ -2254,6 +2313,17 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) if (ret != 0) goto error; + /* + * We don't support raw sends of legacy on-disk formats. See the + * comment in dsl_crypto_recv_key_check() for details. + */ + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); + if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) { + dp->dp_spa->spa_errata = ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; + ret = SET_ERROR(ENOTSUP); + goto error; + } + /* * Lookup wrapping key properties. An early version of the code did * not correctly add these values to the wrapping key or the DSL @@ -2293,6 +2363,7 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, crypt); fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_GUID, guid); + fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_VERSION, version); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, raw_keydata, MASTER_KEY_MAX_LEN)); VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, @@ -2312,6 +2383,7 @@ dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) fnvlist_add_uint64(nvl, "mdn_blksz", mdn->dn_datablksz); fnvlist_add_uint64(nvl, "mdn_indblkshift", mdn->dn_indblkshift); fnvlist_add_uint64(nvl, "mdn_nblkptr", mdn->dn_nblkptr); + fnvlist_add_uint64(nvl, "mdn_maxblkid", mdn->dn_maxblkid); *nvl_out = nvl; return (0); @@ -2332,7 +2404,8 @@ dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dmu_tx_t *tx) { dsl_crypto_key_t dck; - uint64_t one = 1; + uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; + uint64_t one = 1ULL; ASSERT(dmu_tx_is_syncing(tx)); ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); @@ -2349,6 +2422,8 @@ dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dsl_crypto_key_sync(&dck, tx); VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, + DSL_CRYPTO_KEY_VERSION, sizeof (uint64_t), 1, &version, tx)); zio_crypt_key_destroy(&dck.dck_key); bzero(&dck.dck_key, sizeof (zio_crypt_key_t)); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 36ceaf17583f..3c329f20783e 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -941,7 +941,7 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) bzero(&os->os_zil_header, sizeof (os->os_zil_header)); if (os->os_encrypted) - os->os_next_write_raw = B_TRUE; + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dsl_dataset_sync(ds, zio, tx); diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 68791fe742f2..96e8dd62e1a6 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -187,6 +188,12 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, VERIFY0(zap_lookup(dp->dp_meta_objset, ddobj, DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj)); + + /* check for on-disk format errata */ + if (dsl_dir_incompatible_encryption_version(dd)) { + dp->dp_spa->spa_errata = + ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; + } } mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 7286773d98b9..18b4ec3d6cf1 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1100,6 +1100,15 @@ static int zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) { int error; + boolean_t readonly = zfs_is_readonly(zfsvfs); + + /* + * Check for a bad on-disk format version now since we + * lied about owning the dataset readonly before. + */ + if (!readonly && + dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) + return (SET_ERROR(EROFS)); error = zfs_register_callbacks(zfsvfs->z_vfs); if (error) @@ -1113,13 +1122,10 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) * operations out since we closed the ZIL. */ if (mounting) { - boolean_t readonly; - /* * During replay we remove the read only flag to * allow replays to succeed. */ - readonly = zfs_is_readonly(zfsvfs); if (readonly != 0) readonly_changed_cb(zfsvfs, B_FALSE); else diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 81bc6de412e0..2efe2f0d84fc 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -796,7 +796,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log); BP_ZERO(&zh->zh_log); if (os->os_encrypted) - os->os_next_write_raw = B_TRUE; + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; dsl_dataset_dirty(dmu_objset_ds(os), tx); dmu_objset_disown(os, B_FALSE, FTAG); return (0); @@ -819,6 +819,8 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1) zh->zh_flags |= ZIL_REPLAY_NEEDED; zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID; + if (os->os_encrypted) + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; dsl_dataset_dirty(dmu_objset_ds(os), tx); } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 263c77e4a2d3..37259ad8ec28 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -402,6 +402,8 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) int ret; void *tmp; blkptr_t *bp = zio->io_bp; + spa_t *spa = zio->io_spa; + uint64_t dsobj = zio->io_bookmark.zb_objset; uint64_t lsize = BP_GET_LSIZE(bp); dmu_object_type_t ot = BP_GET_TYPE(bp); uint8_t salt[ZIO_DATA_SALT_LEN]; @@ -460,13 +462,12 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) */ if (BP_IS_AUTHENTICATED(bp)) { if (ot == DMU_OT_OBJSET) { - ret = spa_do_crypt_objset_mac_abd(B_FALSE, zio->io_spa, - zio->io_bookmark.zb_objset, zio->io_abd, size, - BP_SHOULD_BYTESWAP(bp)); + ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa, + dsobj, zio->io_abd, size, BP_SHOULD_BYTESWAP(bp)); } else { zio_crypt_decode_mac_bp(bp, mac); - ret = spa_do_crypt_mac_abd(B_FALSE, zio->io_spa, - zio->io_bookmark.zb_objset, zio->io_abd, size, mac); + ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj, + zio->io_abd, size, mac); } abd_copy(data, zio->io_abd, size); @@ -486,9 +487,8 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) zio_crypt_decode_mac_bp(bp, mac); } - ret = spa_do_crypt_abd(B_FALSE, zio->io_spa, zio->io_bookmark.zb_objset, - bp, bp->blk_birth, size, data, zio->io_abd, iv, mac, salt, - &no_crypt); + ret = spa_do_crypt_abd(B_FALSE, spa, dsobj, bp, bp->blk_birth, + size, data, zio->io_abd, iv, mac, salt, &no_crypt); if (no_crypt) abd_copy(data, zio->io_abd, size); @@ -509,7 +509,7 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) ret = SET_ERROR(EIO); if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, - zio->io_spa, NULL, &zio->io_bookmark, zio, 0, 0); + spa, NULL, &zio->io_bookmark, zio, 0, 0); } } else { zio->io_error = ret; @@ -3729,6 +3729,7 @@ zio_encrypt(zio_t *zio) spa_t *spa = zio->io_spa; blkptr_t *bp = zio->io_bp; uint64_t psize = BP_GET_PSIZE(bp); + uint64_t dsobj = zio->io_bookmark.zb_objset; dmu_object_type_t ot = BP_GET_TYPE(bp); void *enc_buf = NULL; abd_t *eabd = NULL; @@ -3752,10 +3753,27 @@ zio_encrypt(zio_t *zio) /* if we are doing raw encryption set the provided encryption params */ if (zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) { + ASSERT0(BP_GET_LEVEL(bp)); BP_SET_CRYPT(bp, B_TRUE); BP_SET_BYTEORDER(bp, zp->zp_byteorder); if (ot != DMU_OT_OBJSET) zio_crypt_encode_mac_bp(bp, zp->zp_mac); + + /* dnode blocks must be written out in the provided byteorder */ + if (zp->zp_byteorder != ZFS_HOST_BYTEORDER && + ot == DMU_OT_DNODE) { + void *bswap_buf = zio_buf_alloc(psize); + abd_t *babd = abd_get_from_buf(bswap_buf, psize); + + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + abd_copy_to_buf(bswap_buf, zio->io_abd, psize); + dmu_ot_byteswap[DMU_OT_BYTESWAP(ot)].ob_func(bswap_buf, + psize); + + abd_take_ownership_of_buf(babd, B_TRUE); + zio_push_transform(zio, babd, psize, psize, NULL); + } + if (DMU_OT_IS_ENCRYPTED(ot)) zio_crypt_encode_params_bp(bp, zp->zp_salt, zp->zp_iv); return (ZIO_PIPELINE_CONTINUE); @@ -3779,17 +3797,16 @@ zio_encrypt(zio_t *zio) ASSERT0(DMU_OT_IS_ENCRYPTED(ot)); ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); BP_SET_CRYPT(bp, B_TRUE); - VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa, - zio->io_bookmark.zb_objset, zio->io_abd, psize, - BP_SHOULD_BYTESWAP(bp))); + VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa, dsobj, + zio->io_abd, psize, BP_SHOULD_BYTESWAP(bp))); return (ZIO_PIPELINE_CONTINUE); } /* unencrypted object types are only authenticated with a MAC */ if (!DMU_OT_IS_ENCRYPTED(ot)) { BP_SET_CRYPT(bp, B_TRUE); - VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa, - zio->io_bookmark.zb_objset, zio->io_abd, psize, mac)); + VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa, dsobj, + zio->io_abd, psize, mac)); zio_crypt_encode_mac_bp(bp, mac); return (ZIO_PIPELINE_CONTINUE); } @@ -3823,8 +3840,8 @@ zio_encrypt(zio_t *zio) } /* Perform the encryption. This should not fail */ - VERIFY0(spa_do_crypt_abd(B_TRUE, spa, zio->io_bookmark.zb_objset, bp, - zio->io_txg, psize, zio->io_abd, eabd, iv, mac, salt, &no_crypt)); + VERIFY0(spa_do_crypt_abd(B_TRUE, spa, dsobj, bp, zio->io_txg, + psize, zio->io_abd, eabd, iv, mac, salt, &no_crypt)); /* encode encryption metadata into the bp */ if (ot == DMU_OT_INTENT_LOG) { @@ -4154,7 +4171,6 @@ zio_done(zio_t *zio) if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(zio->io_bp) && zio->io_bp_override == NULL && !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { - ASSERT(!BP_SHOULD_BYTESWAP(zio->io_bp)); ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp)); ASSERT(BP_COUNT_GANG(zio->io_bp) == 0 || diff --git a/module/zfs/zio_crypt.c b/module/zfs/zio_crypt.c index 5ffa1e8b0ce2..823e6b8d66ea 100644 --- a/module/zfs/zio_crypt.c +++ b/module/zfs/zio_crypt.c @@ -187,6 +187,12 @@ (MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT)) unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT; +typedef struct blkptr_auth_buf { + uint64_t bab_prop; /* blk_prop - portable mask */ + uint8_t bab_mac[ZIO_DATA_MAC_LEN]; /* MAC from blk_cksum */ + uint64_t bab_pad; /* reserved for future use */ +} blkptr_auth_buf_t; + zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = { {"", ZC_TYPE_NONE, 0, "inherit"}, {"", ZC_TYPE_NONE, 0, "on"}, @@ -275,6 +281,7 @@ zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key) key->zk_hmac_tmpl = NULL; key->zk_crypt = crypt; + key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION; key->zk_salt_count = 0; rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); @@ -472,10 +479,10 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, { int ret; uio_t puio, cuio; + uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; uint64_t crypt = key->zk_crypt; - uint64_t le_guid = LE_64(key->zk_guid); - uint_t enc_len, keydata_len; + uint_t enc_len, keydata_len, aad_len; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); @@ -500,6 +507,22 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, cipher_iovecs[2].iov_base = mac; cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + /* + * Although we don't support writing to the old format, we do + * support rewrapping the key so that the user can move and + * quarantine datasets on the old format. + */ + if (key->zk_version == 0) { + aad_len = sizeof (uint64_t); + aad[0] = LE_64(key->zk_guid); + } else { + ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + aad_len = sizeof (uint64_t) * 3; + aad[0] = LE_64(key->zk_guid); + aad[1] = LE_64(crypt); + aad[2] = LE_64(key->zk_version); + } + enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN; puio.uio_iov = plain_iovecs; puio.uio_iovcnt = 2; @@ -510,7 +533,7 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, /* encrypt the keys and store the resulting ciphertext and mac */ ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len, - &puio, &cuio, (uint8_t *)&le_guid, sizeof (uint64_t)); + &puio, &cuio, (uint8_t *)aad, aad_len); if (ret != 0) goto error; @@ -521,16 +544,16 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, } int -zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, - uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, - zio_crypt_key_t *key) +zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, + uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, + uint8_t *mac, zio_crypt_key_t *key) { int ret; crypto_mechanism_t mech; uio_t puio, cuio; + uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; - uint_t enc_len, keydata_len; - uint64_t le_guid = LE_64(guid); + uint_t enc_len, keydata_len, aad_len; ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); @@ -550,6 +573,17 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, cipher_iovecs[2].iov_base = mac; cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + if (version == 0) { + aad_len = sizeof (uint64_t); + aad[0] = LE_64(guid); + } else { + ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + aad_len = sizeof (uint64_t) * 3; + aad[0] = LE_64(guid); + aad[1] = LE_64(crypt); + aad[2] = LE_64(version); + } + enc_len = keydata_len + SHA512_HMAC_KEYLEN; puio.uio_iov = plain_iovecs; puio.uio_segflg = UIO_SYSSPACE; @@ -560,7 +594,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, /* decrypt the keys and store the result in the output buffers */ ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len, - &puio, &cuio, (uint8_t *)&le_guid, sizeof (uint64_t)); + &puio, &cuio, (uint8_t *)aad, aad_len); if (ret != 0) goto error; @@ -602,6 +636,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, key->zk_hmac_tmpl = NULL; key->zk_crypt = crypt; + key->zk_version = version; key->zk_guid = guid; key->zk_salt_count = 0; rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); @@ -700,19 +735,32 @@ zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, * byte strings, which normally means that these strings would not need to deal * with byteswapping at all. However, both blkptr_t and zil_header_t may be * byteswapped by lower layers and so we must "undo" that byteswap here upon - * decoding. + * decoding and encoding in a non-native byteorder. These functions require + * that the byteorder bit is correct before being called. */ void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv) { + uint64_t val64; uint32_t val32; ASSERT(BP_IS_ENCRYPTED(bp)); - bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); - bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); - bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); - BP_SET_IV2(bp, val32); + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); + bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); + bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); + BP_SET_IV2(bp, val32); + } else { + bcopy(salt, &val64, sizeof (uint64_t)); + bp->blk_dva[2].dva_word[0] = BSWAP_64(val64); + + bcopy(iv, &val64, sizeof (uint64_t)); + bp->blk_dva[2].dva_word[1] = BSWAP_64(val64); + + bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); + BP_SET_IV2(bp, BSWAP_32(val32)); + } } void @@ -751,12 +799,22 @@ zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv) void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac) { + uint64_t val64; + ASSERT(BP_USES_CRYPT(bp)); ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET); - bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); - bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], - sizeof (uint64_t)); + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); + bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], + sizeof (uint64_t)); + } else { + bcopy(mac, &val64, sizeof (uint64_t)); + bp->blk_cksum.zc_word[2] = BSWAP_64(val64); + + bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t)); + bp->blk_cksum.zc_word[3] = BSWAP_64(val64); + } } void @@ -841,55 +899,107 @@ zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen) abd_return_buf(src_abd, src, datalen); } +/* + * This function decides what fields from blk_prop are included in + * the on-disk various MAC algorithms. + */ static void -zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp) +zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version) { - BP_SET_DEDUP(bp, 0); - BP_SET_CHECKSUM(bp, 0); + /* + * Version 0 did not properly zero out all non-portable fields + * as it should have done. We maintain this code so that we can + * do read-only imports of pools on this version. + */ + if (version == 0) { + BP_SET_DEDUP(bp, 0); + BP_SET_CHECKSUM(bp, 0); + BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + return; + } + + ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + + /* + * The hole_birth feature might set these fields even if this bp + * is a hole. We zero them out here to guarantee that raw sends + * will function with or without the feature. + */ + if (BP_IS_HOLE(bp)) { + bp->blk_prop = 0ULL; + return; + } /* - * psize cannot be set to zero or it will trigger asserts, but the - * value doesn't really matter as long as it is constant. + * At L0 we want to verify these fields to ensure that data blocks + * can not be reinterpretted. For instance, we do not want an attacker + * to trick us into returning raw lz4 compressed data to the user + * by modifying the compression bits. At higher levels, we cannot + * enforce this policy since raw sends do not convey any information + * about indirect blocks, so these values might be different on the + * receive side. Fortunately, this does not open any new attack + * vectors, since any alterations that can be made to a higher level + * bp must still verify the correct order of the layer below it. */ - BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + if (BP_GET_LEVEL(bp) != 0) { + BP_SET_BYTEORDER(bp, 0); + BP_SET_COMPRESS(bp, 0); + + /* + * psize cannot be set to zero or it will trigger + * asserts, but the value doesn't really matter as + * long as it is constant. + */ + BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + } + + BP_SET_DEDUP(bp, 0); + BP_SET_CHECKSUM(bp, 0); } -static int -zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, - blkptr_t *bp) +static void +zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp, + blkptr_auth_buf_t *bab, uint_t *bab_len) { - int ret; - crypto_data_t cd; - uint64_t le_blkprop; blkptr_t tmpbp = *bp; - uint8_t mac[ZIO_DATA_MAC_LEN]; - - cd.cd_format = CRYPTO_DATA_RAW; - cd.cd_offset = 0; if (should_bswap) byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); ASSERT0(BP_IS_EMBEDDED(&tmpbp)); - zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); - le_blkprop = (ZFS_HOST_BYTEORDER) ? - tmpbp.blk_prop : BSWAP_64(tmpbp.blk_prop); + zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac); - cd.cd_length = sizeof (uint64_t); - cd.cd_raw.iov_base = (char *)&le_blkprop; - cd.cd_raw.iov_len = cd.cd_length; + /* + * We always MAC blk_prop in LE to ensure portability. This + * must be done after decoding the mac, since the endianness + * will get zero'd out here. + */ + zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version); + bab->bab_prop = LE_64(tmpbp.blk_prop); + bab->bab_pad = 0ULL; + + /* version 0 did not include the padding */ + *bab_len = sizeof (blkptr_auth_buf_t); + if (version == 0) + *bab_len -= sizeof (uint64_t); +} - ret = crypto_mac_update(ctx, &cd, NULL); - if (ret != CRYPTO_SUCCESS) { - ret = SET_ERROR(EIO); - goto error; - } +static int +zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version, + boolean_t should_bswap, blkptr_t *bp) +{ + int ret; + uint_t bab_len; + blkptr_auth_buf_t bab; + crypto_data_t cd; - zio_crypt_decode_mac_bp(&tmpbp, mac); - cd.cd_length = ZIO_DATA_MAC_LEN; - cd.cd_raw.iov_base = (char *)mac; + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + cd.cd_format = CRYPTO_DATA_RAW; + cd.cd_offset = 0; + cd.cd_length = bab_len; + cd.cd_raw.iov_base = (char *)&bab; cd.cd_raw.iov_len = cd.cd_length; ret = crypto_mac_update(ctx, &cd, NULL); @@ -905,60 +1015,32 @@ zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, } static void -zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, boolean_t should_bswap, - blkptr_t *bp) +zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version, + boolean_t should_bswap, blkptr_t *bp) { - blkptr_t tmpbp = *bp; - uint8_t mac[ZIO_DATA_MAC_LEN]; - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + uint_t bab_len; + blkptr_auth_buf_t bab; - ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); - ASSERT0(BP_IS_EMBEDDED(&tmpbp)); - zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); - zio_crypt_decode_mac_bp(&tmpbp, mac); - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); - - SHA2Update(ctx, &tmpbp.blk_prop, sizeof (uint64_t)); - SHA2Update(ctx, mac, ZIO_DATA_MAC_LEN); + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + SHA2Update(ctx, &bab, bab_len); } static void -zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, +zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version, boolean_t should_bswap, blkptr_t *bp) { - uint_t crypt_len; - blkptr_t tmpbp = *bp; - uint8_t mac[ZIO_DATA_MAC_LEN]; - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); - - ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); - ASSERT0(BP_IS_EMBEDDED(&tmpbp)); - zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); - zio_crypt_decode_mac_bp(&tmpbp, mac); - - if (should_bswap) - byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + uint_t bab_len; + blkptr_auth_buf_t bab; - crypt_len = sizeof (uint64_t); - bcopy(&tmpbp.blk_prop, *aadp, crypt_len); - *aadp += crypt_len; - *aad_len += crypt_len; - - crypt_len = ZIO_DATA_MAC_LEN; - bcopy(mac, *aadp, crypt_len); - *aadp += crypt_len; - *aad_len += crypt_len; + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + bcopy(&bab, *aadp, bab_len); + *aadp += bab_len; + *aad_len += bab_len; } static int -zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, - dnode_phys_t *dnp) +zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version, + boolean_t should_bswap, dnode_phys_t *dnp) { int ret, i; dnode_phys_t *adnp; @@ -992,14 +1074,14 @@ zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, } for (i = 0; i < dnp->dn_nblkptr; i++) { - ret = zio_crypt_bp_do_hmac_updates(ctx, + ret = zio_crypt_bp_do_hmac_updates(ctx, version, should_bswap, &dnp->dn_blkptr[i]); if (ret != 0) goto error; } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { - ret = zio_crypt_bp_do_hmac_updates(ctx, + ret = zio_crypt_bp_do_hmac_updates(ctx, version, should_bswap, DN_SPILL_BLKPTR(dnp)); if (ret != 0) goto error; @@ -1095,8 +1177,8 @@ zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, } /* add in fields from the metadnode */ - ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, - &osp->os_meta_dnode); + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_meta_dnode); if (ret) goto error; @@ -1149,13 +1231,13 @@ zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, } /* add in fields from the user accounting dnodes */ - ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, - &osp->os_userused_dnode); + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_userused_dnode); if (ret) goto error; - ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, - &osp->os_groupused_dnode); + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_groupused_dnode); if (ret) goto error; @@ -1194,9 +1276,9 @@ zio_crypt_destroy_uio(uio_t *uio) * checksum, and psize bits. For an explanation of the purpose of this, see * the comment block on object set authentication. */ -int -zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, - uint_t datalen, boolean_t byteswap, uint8_t *cksum) +static int +zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf, + uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum) { blkptr_t *bp; int i, epb = datalen >> SPA_BLKPTRSHIFT; @@ -1206,7 +1288,8 @@ zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, /* checksum all of the MACs from the layer below */ SHA2Init(SHA512, &ctx); for (i = 0, bp = buf; i < epb; i++, bp++) { - zio_crypt_bp_do_indrect_checksum_updates(&ctx, byteswap, bp); + zio_crypt_bp_do_indrect_checksum_updates(&ctx, version, + byteswap, bp); } SHA2Final(digestbuf, &ctx); @@ -1222,10 +1305,34 @@ zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, } int -zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, +zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, uint_t datalen, boolean_t byteswap, uint8_t *cksum) { + int ret; + /* + * Unfortunately, callers of this function will not always have + * easy access to the on-disk format version. This info is + * normally found in the DSL Crypto Key, but the checksum-of-MACs + * is expected to be verifiable even when the key isn't loaded. + * Here, instead of doing a ZAP lookup for the version for each + * zio, we simply try both existing formats. + */ + ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf, + datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum); + if (ret == ECKSUM) { + ASSERT(!generate); + ret = zio_crypt_do_indirect_mac_checksum_impl(generate, + buf, datalen, 0, byteswap, cksum); + } + + return (ret); +} + +int +zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, + uint_t datalen, boolean_t byteswap, uint8_t *cksum) +{ int ret; void *buf; @@ -1439,10 +1546,10 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, * Special case handling routine for encrypting / decrypting dnode blocks. */ static int -zio_crypt_init_uios_dnode(boolean_t encrypt, uint8_t *plainbuf, - uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio, - uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, - boolean_t *no_crypt) +zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version, + uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, + uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, + uint_t *auth_len, boolean_t *no_crypt) { int ret; uint_t nr_src, nr_dst, crypt_len; @@ -1544,12 +1651,12 @@ zio_crypt_init_uios_dnode(boolean_t encrypt, uint8_t *plainbuf, for (j = 0; j < dnp->dn_nblkptr; j++) { zio_crypt_bp_do_aad_updates(&aadp, &aad_len, - byteswap, &dnp->dn_blkptr[j]); + version, byteswap, &dnp->dn_blkptr[j]); } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { zio_crypt_bp_do_aad_updates(&aadp, &aad_len, - byteswap, DN_SPILL_BLKPTR(dnp)); + version, byteswap, DN_SPILL_BLKPTR(dnp)); } /* @@ -1682,9 +1789,9 @@ zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf, * data (AAD) for the encryption modes. */ static int -zio_crypt_init_uios(boolean_t encrypt, dmu_object_type_t ot, uint8_t *plainbuf, - uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uint8_t *mac, - uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, +zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot, + uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, + uint8_t *mac, uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { int ret; @@ -1700,9 +1807,9 @@ zio_crypt_init_uios(boolean_t encrypt, dmu_object_type_t ot, uint8_t *plainbuf, no_crypt); break; case DMU_OT_DNODE: - ret = zio_crypt_init_uios_dnode(encrypt, plainbuf, cipherbuf, - datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len, - no_crypt); + ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf, + cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf, + auth_len, no_crypt); break; default: ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf, @@ -1754,9 +1861,9 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, bzero(&cuio, sizeof (uio_t)); /* create uios for encryption */ - ret = zio_crypt_init_uios(encrypt, ot, plainbuf, cipherbuf, datalen, - byteswap, mac, &puio, &cuio, &enc_len, &authbuf, &auth_len, - no_crypt); + ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf, + cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len, + &authbuf, &auth_len, no_crypt); if (ret != 0) return (ret); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 6ea822467b61..572018d7524b 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1406,7 +1406,12 @@ zvol_open(struct block_device *bdev, fmode_t flag) goto out_mutex; } - if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) { + /* + * Check for a bad on-disk format version now since we + * lied about owning the dataset readonly before. + */ + if ((flag & FMODE_WRITE) && ((zv->zv_flags & ZVOL_RDONLY) || + dmu_objset_incompatible_encryption_version(zv->zv_objset))) { error = -EROFS; goto out_open_count; } diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index af96e6a64eb9..ca8e0bf3195f 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -332,7 +332,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos', 'zpool_import_features_003_pos','zpool_import_missing_001_pos', 'zpool_import_missing_002_pos', 'zpool_import_rename_001_pos', 'zpool_import_all_001_pos', - 'zpool_import_encrypted', 'zpool_import_encrypted_load'] + 'zpool_import_encrypted', 'zpool_import_encrypted_load', + 'zpool_import_errata3'] tags = ['functional', 'cli_root', 'zpool_import'] [tests/functional/cli_root/zpool_labelclear] @@ -650,7 +651,8 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled', 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', - 'send-c_recv_dedup', 'send_encrypted_heirarchy', 'send_freeobjects'] + 'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_heirarchy', + 'send_freeobjects', 'send_realloc_dnode_size'] tags = ['functional', 'rsend'] [tests/functional/scrub_mirror] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am index fab6e7459ffe..687646f05e78 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am @@ -26,10 +26,12 @@ dist_pkgdata_SCRIPTS = \ zpool_import_missing_003_pos.ksh \ zpool_import_rename_001_pos.ksh \ zpool_import_encrypted.ksh \ - zpool_import_encrypted_load.ksh + zpool_import_encrypted_load.ksh \ + zpool_import_errata3.ksh BLOCKFILES = \ - unclean_export.dat.bz2 + unclean_export.dat.bz2 \ + cryptv0.dat.bz2 dist_pkgdata_DATA = $(BLOCKFILES) EXTRA_DIST = $(BLOCKFILES) diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/cryptv0.dat.bz2 b/tests/zfs-tests/tests/functional/cli_root/zpool_import/cryptv0.dat.bz2 new file mode 100644 index 000000000000..1c625c2c447b Binary files /dev/null and b/tests/zfs-tests/tests/functional/cli_root/zpool_import/cryptv0.dat.bz2 differ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh new file mode 100755 index 000000000000..67e4caee8776 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh @@ -0,0 +1,99 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zpool import' should import a pool with Errata #3 while preventing +# the user from performing read write operations +# +# STRATEGY: +# 1. Import a pre-packaged pool with Errata #3 +# 2. Attempt to write to the effected datasets +# 3. Attempt to read from the effected datasets +# 4. Attempt to perform a raw send of the effected datasets +# 5. Perform a regular send of the datasets under a new encryption root +# 6. Verify the new datasets can be read from and written to +# 7. Destroy the old effected datasets +# 8. Reimport the pool and verify that the errata is no longer present +# + +verify_runnable "global" + +POOL_NAME=cryptv0 +POOL_FILE=cryptv0.dat + +function uncompress_pool +{ + log_note "Creating pool from $POOL_FILE" + log_must bzcat \ + $STF_SUITE/tests/functional/cli_root/zpool_import/$POOL_FILE.bz2 \ + > /$TESTPOOL/$POOL_FILE + return 0 +} + +function cleanup +{ + poolexists $POOL_NAME && log_must zpool destroy $POOL_NAME + [[ -e /$TESTPOOL/$POOL_FILE ]] && rm /$TESTPOOL/$POOL_FILE + return 0 +} +log_onexit cleanup + +log_assert "Verify that Errata 3 is properly handled" + +uncompress_pool +log_must zpool import -d /$TESTPOOL/ $POOL_NAME +log_must eval "zpool status | grep -q Errata" +log_must eval "echo 'password' | zfs load-key $POOL_NAME/testfs" +log_must eval "echo 'password' | zfs load-key $POOL_NAME/testvol" + +log_mustnot zfs mount $POOL_NAME/testfs +log_must zfs mount -o ro $POOL_NAME/testfs + +old_mntpnt=$(get_prop mountpoint $POOL_NAME/testfs) +log_must eval "ls $old_mntpnt | grep -q testfile" +block_device_wait +log_mustnot dd if=/dev/zero of=/dev/zvol/$POOL_NAME/testvol bs=512 count=1 +log_must dd if=/dev/zvol/$POOL_NAME/testvol of=/dev/null bs=512 count=1 +log_must eval "echo 'password' | zfs create \ + -o encryption=on -o keyformat=passphrase -o keylocation=prompt \ + cryptv0/encroot" +log_mustnot eval "zfs send -w $POOL_NAME/testfs@snap1 | \ + zfs recv $POOL_NAME/encroot/testfs" +log_mustnot eval "zfs send -w $POOL_NAME/testvol@snap1 | \ + zfs recv $POOL_NAME/encroot/testvol" + +log_must eval "zfs send $POOL_NAME/testfs@snap1 | \ + zfs recv $POOL_NAME/encroot/testfs" +log_must eval "zfs send $POOL_NAME/testvol@snap1 | \ + zfs recv $POOL_NAME/encroot/testvol" +block_device_wait +log_must dd if=/dev/zero of=/dev/zvol/$POOL_NAME/encroot/testvol bs=512 count=1 +new_mntpnt=$(get_prop mountpoint $POOL_NAME/encroot/testfs) +log_must eval "ls $new_mntpnt | grep -q testfile" +log_must zfs destroy -r $POOL_NAME/testfs +log_must zfs destroy -r $POOL_NAME/testvol + +log_must zpool export $POOL_NAME +log_must zpool import -d /$TESTPOOL/ $POOL_NAME +log_mustnot eval "zpool status | grep -q Errata" +log_pass "Errata 3 is properly handled" diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am index 8833d1d760fe..7a8b8a33a474 100644 --- a/tests/zfs-tests/tests/functional/rsend/Makefile.am +++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am @@ -23,6 +23,7 @@ dist_pkgdata_SCRIPTS = \ rsend_021_pos.ksh \ rsend_022_pos.ksh \ rsend_024_pos.ksh \ + send_encrypted_files.ksh \ send_encrypted_heirarchy.ksh \ send-cD.ksh \ send-c_embedded_blocks.ksh \ @@ -39,4 +40,5 @@ dist_pkgdata_SCRIPTS = \ send-c_volume.ksh \ send-c_zstreamdump.ksh \ send-cpL_varied_recsize.ksh \ - send_freeobjects.ksh + send_freeobjects.ksh \ + send_realloc_dnode_size.ksh diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh new file mode 100644 index 000000000000..20f3788d561a --- /dev/null +++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh @@ -0,0 +1,101 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Datto Inc. All rights reserved. +# + +. $STF_SUITE/tests/functional/rsend/rsend.kshlib + +# +# DESCRIPTION: +# +# +# STRATEGY: +# 1. Create a new encrypted filesystem +# 2. Add an empty file to the filesystem +# 3. Add a small 512 byte file to the filesystem +# 4. Add a larger 32M file to the filesystem +# 5. Add a large sparse file to the filesystem +# 6. Add a file truncated to 4M to the filesystem +# 7. Add a sparse file with metadata compression disabled to the filesystem +# 8. Add and remove 1000 empty files to the filesystem +# 9. Snapshot the filesystem +# 10. Send and receive the filesystem, ensuring that it can be mounted +# + +verify_runnable "both" + +function set_metadata_compression_disabled # <0|1> +{ + echo $1 > /sys/module/zfs/parameters/zfs_mdcomp_disable +} + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/recv && \ + log_must zfs destroy -r $TESTPOOL/recv + [[ -f $keyfile ]] && log_must rm $keyfile + [[ -f $sendfile ]] && log_must rm $sendfile +} +log_onexit cleanup + +log_assert "Verify 'zfs send -w' works with many different file layouts" + +typeset keyfile=/$TESTPOOL/pkey +typeset sendfile=/$TESTPOOL/sendfile + +log_must eval "echo 'password' > $keyfile" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file://$keyfile $TESTPOOL/$TESTFS2 + +log_must touch /$TESTPOOL/$TESTFS2/empty +log_must mkfile 512 /$TESTPOOL/$TESTFS2/small +log_must mkfile 32M /$TESTPOOL/$TESTFS2/full +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS2/sparse \ + bs=512 count=1 seek=10G >/dev/null 2>&1 +log_must mkfile 32M /$TESTPOOL/$TESTFS2/truncated +log_must truncate -s 4M /$TESTPOOL/$TESTFS2/truncated +sync + +log_must set_metadata_compression_disabled 1 +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS2/no_mdcomp \ + count=1 bs=512 seek=10G >/dev/null 2>&1 +sync +log_must set_metadata_compression_disabled 0 + +log_must mkdir -p /$TESTPOOL/$TESTFS2/dir +for i in {1..1000}; do + log_must mkfile 512 /$TESTPOOL/$TESTFS2/dir/file-$i +done +sync + +for i in {1..1000}; do + log_must rm /$TESTPOOL/$TESTFS2/dir/file-$i +done +sync + +log_must zfs snapshot $TESTPOOL/$TESTFS2@now +log_must eval "zfs send -wR $TESTPOOL/$TESTFS2@now > $sendfile" + +log_must eval "zfs recv -F $TESTPOOL/recv < $sendfile" +log_must zfs load-key $TESTPOOL/recv + +log_must zfs mount -a + +log_pass "Verified 'zfs send -w' works with many different file layouts" diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh new file mode 100755 index 000000000000..206763949e8d --- /dev/null +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/rsend/rsend.kshlib + +# +# Description: +# Verify incremental receive properly handles objects with changed +# dnode slot count. +# +# Strategy: +# 1. Populate a dataset with 1k byte dnodes and snapshot +# 2. Remove objects, set dnodesize=legacy, and remount dataset so new objects +# get recycled numbers and formerly "interior" dnode slots get assigned +# to new objects +# 3. Remove objects, set dnodesize=2k, and remount dataset so new objects +# overlap with recently recycled and formerly "normal" dnode slots get +# assigned to new objects +# 4. Generate initial and incremental streams +# 5. Verify initial and incremental streams can be received +# + +verify_runnable "both" + +log_assert "Verify incremental receive handles objects with changed dnode size" + +function cleanup +{ + rm -f $BACKDIR/fs-dn-legacy + rm -f $BACKDIR/fs-dn-1k + rm -f $BACKDIR/fs-dn-2k + + if datasetexists $POOL/fs ; then + log_must zfs destroy -rR $POOL/fs + fi + + if datasetexists $POOL/newfs ; then + log_must zfs destroy -rR $POOL/newfs + fi +} + +log_onexit cleanup + +# 1. Populate a dataset with 1k byte dnodes and snapshot +log_must zfs create -o dnodesize=1k $POOL/fs +log_must mk_files 200 262144 0 $POOL/fs +log_must zfs snapshot $POOL/fs@a + +# 2. Remove objects, set dnodesize=legacy, and remount dataset so new objects +# get recycled numbers and formerly "interior" dnode slots get assigned +# to new objects +rm /$POOL/fs/* + +log_must zfs unmount $POOL/fs +log_must zfs set dnodesize=legacy $POOL/fs +log_must zfs mount $POOL/fs + +log_must mk_files 200 262144 0 $POOL/fs +log_must zfs snapshot $POOL/fs@b + +# 3. Remove objects, set dnodesize=2k, and remount dataset so new objects +# overlap with recently recycled and formerly "normal" dnode slots get +# assigned to new objects +rm /$POOL/fs/* + +log_must zfs unmount $POOL/fs +log_must zfs set dnodesize=2k $POOL/fs +log_must zfs mount $POOL/fs + +mk_files 200 262144 0 $POOL/fs +log_must zfs snapshot $POOL/fs@c + +# 4. Generate initial and incremental streams +log_must eval "zfs send $POOL/fs@a > $BACKDIR/fs-dn-1k" +log_must eval "zfs send -i $POOL/fs@a $POOL/fs@b > $BACKDIR/fs-dn-legacy" +log_must eval "zfs send -i $POOL/fs@b $POOL/fs@c > $BACKDIR/fs-dn-2k" + +# 5. Verify initial and incremental streams can be received +log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-1k" +log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-legacy" +log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-2k" + +log_pass "Verify incremental receive handles objects with changed dnode size"