Skip to content

Commit

Permalink
Encryption Stability and On-Disk Format Fixes
Browse files Browse the repository at this point in the history
The on-disk format for encrypted datasets protects not only
the encrypted and authenticated blocks themselves, but also
the order and interpretation of these blocks. In order to
make this work while maintaining the ability to do raw
sends, the indirect bps maintain a secure checksum of all
the MACs in the block below it along with a few other
fields that determine how the data is interpreted.

Unfortunately, the current on-disk format erroneously
includes some fields which are not portable and thus cannot
support raw sends. It is not possible to easily work around
this issue due to a separate and much smaller bug which
causes indirect blocks for encrypted dnodes to not be
compressed, which conflicts with the previous bug. In
addition, the current code generates incompatible on-disk
formats on big endian and little endian systems due to an
issue with how block pointers are authenticated. Finally,
raw send streams do not currently include dn_maxblkid when
sending both the metadnode and normal dnodes which are
needed in order to ensure that we are correctly maintaining
the portable objset MAC.

This patch zero's out the offending fields when computing
the bp MAC and ensures that these MACs are always
calculated in little endian order (regardless of the host
system's byte order). This patch also registers an errata
for the old on-disk format, which we detect by adding a
"version" field to newly created DSL Crypto Keys. We allow
datasets without a version (version 0) to only be mounted
for read so that they can easily be migrated. We also now
include dn_maxblkid in raw send streams to ensure the MAC
can be maintained correctly.

This patch also contains minor bug fixes and cleanups.

Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #6845
Closes #6864
Closes #7052
  • Loading branch information
Tom Caputi authored and behlendorf committed Feb 2, 2018
1 parent 4c46b99 commit ae76f45
Show file tree
Hide file tree
Showing 31 changed files with 787 additions and 187 deletions.
4 changes: 2 additions & 2 deletions cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,8 @@ get_usage(zfs_help_t idx)
return (gettext("\tunload-key [-r] "
"<-a | filesystem|volume>\n"));
case HELP_CHANGE_KEY:
return (gettext("\tchange-key [-l] [-o keyformat=<value>]"
"\t [-o keylocation=<value>] [-o pbkfd2iters=<value>]"
return (gettext("\tchange-key [-l] [-o keyformat=<value>]\n"
"\t [-o keylocation=<value>] [-o pbkfd2iters=<value>]\n"
"\t <filesystem|volume>\n"
"\tchange-key -i [-l] <filesystem|volume>\n"));
}
Expand Down
20 changes: 20 additions & 0 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2123,6 +2123,15 @@ show_import(nvlist_t *config)
"updating.\n"));
break;

case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION:
(void) printf(gettext(" action: Existing "
"encrypted datasets contain an on-disk "
"incompatibility, which\n\tneeds to be "
"corrected. Backup these datasets to new "
"encrypted datasets\n\tand destroy the "
"old ones.\n"));
break;

default:
/*
* All errata must contain an action message.
Expand Down Expand Up @@ -6501,6 +6510,17 @@ status_callback(zpool_handle_t *zhp, void *data)
"run 'zpool scrub'.\n"));
break;

case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION:
(void) printf(gettext("\tExisting encrypted datasets "
"contain an on-disk incompatibility\n\twhich "
"needs to be corrected.\n"));
(void) printf(gettext("action: To correct the issue "
"backup existing encrypted datasets to new\n\t"
"encrypted datasets and destroy the old ones. "
"'zfs mount -o ro' can\n\tbe used to temporarily "
"mount existing encrypted datasets readonly.\n"));
break;

default:
/*
* All errata which allow the pool to be imported
Expand Down
6 changes: 5 additions & 1 deletion cmd/zstreamdump/zstreamdump.c
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,8 @@ main(int argc, char *argv[])
drro->drr_raw_bonuslen =
BSWAP_32(drro->drr_raw_bonuslen);
drro->drr_toguid = BSWAP_64(drro->drr_toguid);
drro->drr_maxblkid =
BSWAP_64(drro->drr_maxblkid);
}

payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
Expand All @@ -451,7 +453,8 @@ main(int argc, char *argv[])
(void) printf("OBJECT object = %llu type = %u "
"bonustype = %u blksz = %u bonuslen = %u "
"dn_slots = %u raw_bonuslen = %u "
"flags = %u indblkshift = %u nlevels = %u "
"flags = %u maxblkid = %llu "
"indblkshift = %u nlevels = %u "
"nblkptr = %u\n",
(u_longlong_t)drro->drr_object,
drro->drr_type,
Expand All @@ -461,6 +464,7 @@ main(int argc, char *argv[])
drro->drr_dn_slots,
drro->drr_raw_bonuslen,
drro->drr_flags,
(u_longlong_t)drro->drr_maxblkid,
drro->drr_indblkshift,
drro->drr_nlevels,
drro->drr_nblkptr);
Expand Down
52 changes: 52 additions & 0 deletions contrib/dracut/90zfs/zfs-load-key.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash

# This script only gets executed on systemd systems, see mount-zfs.sh for non-systemd systems

# import the libs now that we know the pool imported
[ -f /lib/dracut-lib.sh ] && dracutlib=/lib/dracut-lib.sh
[ -f /usr/lib/dracut/modules.d/99base/dracut-lib.sh ] && dracutlib=/usr/lib/dracut/modules.d/99base/dracut-lib.sh
. "$dracutlib"

# load the kernel command line vars
[ -z "$root" ] && root=$(getarg root=)
# If root is not ZFS= or zfs: or rootfstype is not zfs then we are not supposed to handle it.
[ "${root##zfs:}" = "${root}" -a "${root##ZFS=}" = "${root}" -a "$rootfstype" != "zfs" ] && exit 0

# There is a race between the zpool import and the pre-mount hooks, so we wait for a pool to be imported
while true; do
zpool list -H | grep -q -v '^$' && break
[[ $(systemctl is-failed zfs-import-cache.service) == 'failed' ]] && exit 1
[[ $(systemctl is-failed zfs-import-scan.service) == 'failed' ]] && exit 1
sleep 0.1s
done

# run this after import as zfs-import-cache/scan service is confirmed good
if [[ "${root}" = "zfs:AUTO" ]] ; then
root=$(zpool list -H -o bootfs | awk '$1 != "-" {print; exit}')
else
root="${root##zfs:}"
root="${root##ZFS=}"
fi

# if pool encryption is active and the zfs command understands '-o encryption'
if [[ $(zpool list -H -o feature@encryption $(echo "${root}" | awk -F\/ '{print $1}')) == 'active' ]]; then
# check if root dataset has encryption enabled
if $(zfs list -H -o encryption "${root}" | grep -q -v off); then
# figure out where the root dataset has its key, the keylocation should not be none
while true; do
if [[ $(zfs list -H -o keylocation "${root}") == 'none' ]]; then
root=$(echo -n "${root}" | awk 'BEGIN{FS=OFS="/"}{NF--; print}')
[[ "${root}" == '' ]] && exit 1
else
break
fi
done
# decrypt them
TRY_COUNT=5
while [ $TRY_COUNT != 0 ]; do
zfs load-key "$root" <<< $(systemd-ask-password "Encrypted ZFS password for ${root}: ")
[[ $? == 0 ]] && break
((TRY_COUNT-=1))
done
fi
fi
7 changes: 7 additions & 0 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,13 @@ int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels,
int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
int ibs, dmu_tx_t *tx);

/*
* Manually set the maxblkid on a dnode. This will adjust nlevels accordingly
* to accommodate the change.
*/
int dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid,
dmu_tx_t *tx);

/*
* Set the checksum property on a dnode. The new checksum algorithm will
* apply to all newly written blocks; existing blocks will not be affected.
Expand Down
1 change: 1 addition & 0 deletions include/sys/dmu_objset.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ boolean_t dmu_objset_userobjused_enabled(objset_t *os);
boolean_t dmu_objset_userobjspace_upgradable(objset_t *os);
void dmu_objset_userobjspace_upgrade(objset_t *os);
boolean_t dmu_objset_userobjspace_present(objset_t *os);
boolean_t dmu_objset_incompatible_encryption_version(objset_t *os);

int dmu_fsname(const char *snapname, char *buf);

Expand Down
8 changes: 8 additions & 0 deletions include/sys/dnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,13 @@ typedef struct dnode_phys {
uint64_t dn_maxblkid; /* largest allocated block ID */
uint64_t dn_used; /* bytes (or sectors) of disk space */

/*
* Both dn_pad2 and dn_pad3 are protected by the block's MAC. This
* allows us to protect any fields that might be added here in the
* future. In either case, developers will want to check
* zio_crypt_init_uios_dnode() to ensure the new field is being
* protected properly.
*/
uint64_t dn_pad3[4];

/*
Expand Down Expand Up @@ -301,6 +308,7 @@ struct dnode {
uint8_t dn_rm_spillblk[TXG_SIZE]; /* for removing spill blk */
uint16_t dn_next_bonuslen[TXG_SIZE];
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
uint64_t dn_next_maxblkid[TXG_SIZE]; /* next maxblkid in bytes */

/* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */
uint32_t dn_dbufs_count; /* count of dn_dbufs */
Expand Down
3 changes: 2 additions & 1 deletion include/sys/dsl_crypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1"
#define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ"
#define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT"

#define DSL_CRYPTO_KEY_VERSION "DSL_CRYPTO_VERSION"

/*
* In-memory representation of a wrapping key. One of these structs will exist
Expand Down Expand Up @@ -169,6 +169,7 @@ int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload);
void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv);
int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation);
boolean_t dsl_dir_incompatible_encryption_version(dsl_dir_t *dd);

void spa_keystore_init(spa_keystore_t *sk);
void spa_keystore_fini(spa_keystore_t *sk);
Expand Down
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,7 @@ typedef enum zpool_errata {
ZPOOL_ERRATA_NONE,
ZPOOL_ERRATA_ZOL_2094_SCRUB,
ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY,
ZPOOL_ERRATA_ZOL_6845_ENCRYPTION,
} zpool_errata_t;

/*
Expand Down
4 changes: 3 additions & 1 deletion include/sys/zfs_ioctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,12 @@ typedef struct dmu_replay_record {
uint8_t drr_flags;
uint32_t drr_raw_bonuslen;
uint64_t drr_toguid;
/* only nonzero for raw streams */
/* only (possibly) nonzero for raw streams */
uint8_t drr_indblkshift;
uint8_t drr_nlevels;
uint8_t drr_nblkptr;
uint8_t drr_pad[5];
uint64_t drr_maxblkid;
/* bonus content follows */
} drr_object;
struct drr_freeobjects {
Expand Down
11 changes: 8 additions & 3 deletions include/sys/zio_crypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct zbookmark_phys;
#define MASTER_KEY_MAX_LEN 32
#define SHA512_HMAC_KEYLEN 64

#define ZIO_CRYPT_KEY_CURRENT_VERSION 1ULL

typedef enum zio_crypt_type {
ZC_TYPE_NONE = 0,
ZC_TYPE_CCM,
Expand Down Expand Up @@ -64,6 +66,9 @@ typedef struct zio_crypt_key {
/* encryption algorithm */
uint64_t zk_crypt;

/* on-disk format version */
uint64_t zk_version;

/* GUID for uniquely identifying this key. Not encrypted on disk. */
uint64_t zk_guid;

Expand Down Expand Up @@ -104,9 +109,9 @@ int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out);

int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out);
int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid,
uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac,
zio_crypt_key_t *key);
int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
uint8_t *mac, zio_crypt_key_t *key);
int zio_crypt_generate_iv(uint8_t *ivbuf);
int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
uint_t datalen, uint8_t *ivbuf, uint8_t *salt);
Expand Down
19 changes: 11 additions & 8 deletions lib/libzfs/libzfs_crypto.c
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,7 @@ zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop, char *alt_keylocation)
}

try_again:
/* fetching and deriving the key are correctible errors. set the flag */
/* fetching and deriving the key are correctable errors. set the flag */
correctible = B_TRUE;

/* get key material from key format and location */
Expand Down Expand Up @@ -1110,22 +1110,25 @@ zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop, char *alt_keylocation)

error:
zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf);
if (key_material != NULL)
if (key_material != NULL) {
free(key_material);
if (key_data != NULL)
key_material = NULL;
}
if (key_data != NULL) {
free(key_data);
key_data = NULL;
}

/*
* Here we decide if it is ok to allow the user to retry entering their
* key. The can_retry flag will be set if the user is entering their
* key from an interactive prompt. The correctible flag will only be
* set if an error that occured could be corrected by retrying. Both
* key from an interactive prompt. The correctable flag will only be
* set if an error that occurred could be corrected by retrying. Both
* flags are needed to allow the user to attempt key entry again
*/
if (can_retry && correctible && attempts <= MAX_KEY_PROMPT_ATTEMPTS) {
attempts++;
attempts++;
if (can_retry && correctible && attempts < MAX_KEY_PROMPT_ATTEMPTS)
goto try_again;
}

return (ret);
}
Expand Down
18 changes: 9 additions & 9 deletions lib/libzfs/libzfs_status.c
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,15 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap)
if (find_vdev_problem(nvroot, vdev_removed))
return (ZPOOL_STATUS_REMOVED_DEV);

/*
* Informational errata available.
*/
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata);
if (errata) {
*erratap = errata;
return (ZPOOL_STATUS_ERRATA);
}

/*
* Outdated, but usable, version
*/
Expand Down Expand Up @@ -382,15 +391,6 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap)
}
}

/*
* Informational errata available.
*/
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata);
if (errata) {
*erratap = errata;
return (ZPOOL_STATUS_ERRATA);
}

return (ZPOOL_STATUS_OK);
}

Expand Down
15 changes: 12 additions & 3 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,7 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
arc_buf_hdr_t *hdr = vbuf;

bzero(hdr, HDR_FULL_SIZE);
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
refcount_create(&hdr->b_l1hdr.b_refcnt);
mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
Expand Down Expand Up @@ -3246,9 +3247,6 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata)
ASSERT(!HDR_SHARED_DATA(hdr) || alloc_rdata);
IMPLY(alloc_rdata, HDR_PROTECTED(hdr));

if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr))
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;

if (alloc_rdata) {
size = HDR_GET_PSIZE(hdr);
ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
Expand Down Expand Up @@ -6751,6 +6749,17 @@ arc_write_ready(zio_t *zio)
ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
ASSERT(HDR_PROTECTED(hdr));

if (BP_SHOULD_BYTESWAP(bp)) {
if (BP_GET_LEVEL(bp) > 0) {
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64;
} else {
hdr->b_l1hdr.b_byteswap =
DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
}
} else {
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
}

hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
Expand Down
22 changes: 21 additions & 1 deletion module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2029,6 +2029,23 @@ dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
return (err);
}

int
dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid,
dmu_tx_t *tx)
{
dnode_t *dn;
int err;

err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
dnode_new_blkid(dn, maxblkid, tx, B_FALSE);
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
return (0);
}

void
dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
dmu_tx_t *tx)
Expand Down Expand Up @@ -2214,8 +2231,10 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
dedup = B_FALSE;
}

if (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)
if (level <= 0 &&
(type == DMU_OT_DNODE || type == DMU_OT_OBJSET)) {
compress = ZIO_COMPRESS_EMPTY;
}
}

zp->zp_compress = compress;
Expand Down Expand Up @@ -2488,6 +2507,7 @@ EXPORT_SYMBOL(dmu_object_size_from_db);
EXPORT_SYMBOL(dmu_object_dnsize_from_db);
EXPORT_SYMBOL(dmu_object_set_nlevels);
EXPORT_SYMBOL(dmu_object_set_blocksize);
EXPORT_SYMBOL(dmu_object_set_maxblkid);
EXPORT_SYMBOL(dmu_object_set_checksum);
EXPORT_SYMBOL(dmu_object_set_compress);
EXPORT_SYMBOL(dmu_write_policy);
Expand Down
Loading

0 comments on commit ae76f45

Please sign in to comment.