Skip to content

Commit

Permalink
Encryption Stability and On-Disk Format Fixes
Browse files Browse the repository at this point in the history
The on-disk format for encrypted datasets protects not only
the encrypted and authenticated blocks themselves, but also
the order and interpretation of these blocks. In order to
make this work while maintaining the ability to do raw
sends, the indirect bps maintain a secure checksum of all
the MACs in the block below it along with a few other
fields that determine how the data is interpreted.

Unfortunately, the current on-disk format erroneously
includes some fields which are not portable and thus cannot
support raw sends. It is not possible to easily work around
this issue due to a separate and much smaller bug which
causes indirect blocks for encrypted dnodes to not be
compressed, which conflicts with the previous bug. In
addition, the current code generates incompatible on-disk
formats on big endian and little endian systems due to an
issue with how block pointers are authenticated. Finally,
raw send streams do not currently include dn_maxblkid when
sending both the metadnode and normal dnodes which are
needed in order to ensure that we are correctly maintaining
the portable objset MAC.

This patch zero's out the offending fields when computing
the bp MAC and ensures that these MACs are always
calculated in little endian order (regardless of the host
system's byte order). This patch also registers an errata
for the old on-disk format, which we detect by adding a
"version" field to newly created DSL Crypto Keys. We allow
datasets without a version (version 0) to only be mounted
for read so that they can easily be migrated. We also now
include dn_maxblkid in raw send streams to ensure the MAC
can be maintained correctly.

Fixes openzfs#6845

Signed-off-by: Tom Caputi <tcaputi@datto.com>
  • Loading branch information
Tom Caputi authored and prometheanfire committed Jan 11, 2018
1 parent fcbf46e commit 073e9d8
Show file tree
Hide file tree
Showing 28 changed files with 687 additions and 163 deletions.
4 changes: 2 additions & 2 deletions cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,8 @@ get_usage(zfs_help_t idx)
return (gettext("\tunload-key [-r] "
"<-a | filesystem|volume>\n"));
case HELP_CHANGE_KEY:
return (gettext("\tchange-key [-l] [-o keyformat=<value>]"
"\t [-o keylocation=<value>] [-o pbkfd2iters=<value>]"
return (gettext("\tchange-key [-l] [-o keyformat=<value>]\n"
"\t [-o keylocation=<value>] [-o pbkfd2iters=<value>]\n"
"\t <filesystem|volume>\n"
"\tchange-key -i [-l] <filesystem|volume>\n"));
}
Expand Down
15 changes: 15 additions & 0 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2121,6 +2121,15 @@ show_import(nvlist_t *config)
"updating.\n"));
break;

case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION:
(void) printf(gettext(" action: Existing "
"encrypted datasets contain an on-disk "
"incompatibility, which\n\tneeds to be "
"corrected. Backup these datasets to new "
"encrypted datasets\n\tand destroy the "
"old ones.\n"));
break;

default:
/*
* All errata must contain an action message.
Expand Down Expand Up @@ -6499,6 +6508,12 @@ status_callback(zpool_handle_t *zhp, void *data)
"run 'zpool scrub'.\n"));
break;

case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION:
(void) printf(gettext("action: To correct the issue "
"backup existing encrypted datasets to new\n\t"
"encrypted datasets and destroy the old ones.\n"));
break;

default:
/*
* All errata which allow the pool to be imported
Expand Down
4 changes: 3 additions & 1 deletion cmd/zstreamdump/zstreamdump.c
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,8 @@ main(int argc, char *argv[])
(void) printf("OBJECT object = %llu type = %u "
"bonustype = %u blksz = %u bonuslen = %u "
"dn_slots = %u raw_bonuslen = %u "
"flags = %u indblkshift = %u nlevels = %u "
"flags = %u maxblkid = %llu "
"indblkshift = %u nlevels = %u "
"nblkptr = %u\n",
(u_longlong_t)drro->drr_object,
drro->drr_type,
Expand All @@ -461,6 +462,7 @@ main(int argc, char *argv[])
drro->drr_dn_slots,
drro->drr_raw_bonuslen,
drro->drr_flags,
(u_longlong_t)drro->drr_maxblkid,
drro->drr_indblkshift,
drro->drr_nlevels,
drro->drr_nblkptr);
Expand Down
7 changes: 7 additions & 0 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,13 @@ int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels,
int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
int ibs, dmu_tx_t *tx);

/*
* Manually set the maxblkid on a dnode. This will adjust nlevels accordingly
* to accommodate the change.
*/
int dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid,
dmu_tx_t *tx);

/*
* Set the checksum property on a dnode. The new checksum algorithm will
* apply to all newly written blocks; existing blocks will not be affected.
Expand Down
1 change: 1 addition & 0 deletions include/sys/dmu_objset.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ boolean_t dmu_objset_userobjused_enabled(objset_t *os);
boolean_t dmu_objset_userobjspace_upgradable(objset_t *os);
void dmu_objset_userobjspace_upgrade(objset_t *os);
boolean_t dmu_objset_userobjspace_present(objset_t *os);
boolean_t dmu_objset_incompatible_encryption_version(objset_t *os);

int dmu_fsname(const char *snapname, char *buf);

Expand Down
8 changes: 8 additions & 0 deletions include/sys/dnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,13 @@ typedef struct dnode_phys {
uint64_t dn_maxblkid; /* largest allocated block ID */
uint64_t dn_used; /* bytes (or sectors) of disk space */

/*
* Both dn_pad2 and dn_pad3 are protected by the block's MAC. This
* allows us to protect any fields that might be added here in the
* future. In either case, developers will want to check
* zio_crypt_init_uios_dnode() to ensure the new field is being
* protected properly.
*/
uint64_t dn_pad3[4];

/*
Expand Down Expand Up @@ -301,6 +308,7 @@ struct dnode {
uint8_t dn_rm_spillblk[TXG_SIZE]; /* for removing spill blk */
uint16_t dn_next_bonuslen[TXG_SIZE];
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
uint64_t dn_next_maxblkid[TXG_SIZE]; /* next maxblkid in bytes */

/* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */
uint32_t dn_dbufs_count; /* count of dn_dbufs */
Expand Down
3 changes: 2 additions & 1 deletion include/sys/dsl_crypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1"
#define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ"
#define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT"

#define DSL_CRYPTO_KEY_VERSION "DSL_CRYPTO_VERSION"

/*
* In-memory representation of a wrapping key. One of these structs will exist
Expand Down Expand Up @@ -169,6 +169,7 @@ int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload);
void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv);
int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation);
boolean_t dsl_dir_incompatible_encryption_version(dsl_dir_t *dd);

void spa_keystore_init(spa_keystore_t *sk);
void spa_keystore_fini(spa_keystore_t *sk);
Expand Down
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,7 @@ typedef enum zpool_errata {
ZPOOL_ERRATA_NONE,
ZPOOL_ERRATA_ZOL_2094_SCRUB,
ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY,
ZPOOL_ERRATA_ZOL_6845_ENCRYPTION,
} zpool_errata_t;

/*
Expand Down
4 changes: 3 additions & 1 deletion include/sys/zfs_ioctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,12 @@ typedef struct dmu_replay_record {
uint8_t drr_flags;
uint32_t drr_raw_bonuslen;
uint64_t drr_toguid;
/* only nonzero for raw streams */
/* only (possibly) nonzero for raw streams */
uint8_t drr_indblkshift;
uint8_t drr_nlevels;
uint8_t drr_nblkptr;
uint8_t drr_pad[5];
uint64_t drr_maxblkid;
/* bonus content follows */
} drr_object;
struct drr_freeobjects {
Expand Down
11 changes: 8 additions & 3 deletions include/sys/zio_crypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct zbookmark_phys;
#define MASTER_KEY_MAX_LEN 32
#define SHA512_HMAC_KEYLEN 64

#define ZIO_CRYPT_KEY_CURRENT_VERSION 1ULL

typedef enum zio_crypt_type {
ZC_TYPE_NONE = 0,
ZC_TYPE_CCM,
Expand Down Expand Up @@ -64,6 +66,9 @@ typedef struct zio_crypt_key {
/* encryption algorithm */
uint64_t zk_crypt;

/* on-disk format version */
uint64_t zk_version;

/* GUID for uniquely identifying this key. Not encrypted on disk. */
uint64_t zk_guid;

Expand Down Expand Up @@ -104,9 +109,9 @@ int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out);

int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out);
int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid,
uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac,
zio_crypt_key_t *key);
int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
uint8_t *mac, zio_crypt_key_t *key);
int zio_crypt_generate_iv(uint8_t *ivbuf);
int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
uint_t datalen, uint8_t *ivbuf, uint8_t *salt);
Expand Down
15 changes: 12 additions & 3 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,7 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
arc_buf_hdr_t *hdr = vbuf;

bzero(hdr, HDR_FULL_SIZE);
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
refcount_create(&hdr->b_l1hdr.b_refcnt);
mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
Expand Down Expand Up @@ -3246,9 +3247,6 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata)
ASSERT(!HDR_SHARED_DATA(hdr) || alloc_rdata);
IMPLY(alloc_rdata, HDR_PROTECTED(hdr));

if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr))
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;

if (alloc_rdata) {
size = HDR_GET_PSIZE(hdr);
ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
Expand Down Expand Up @@ -6751,6 +6749,17 @@ arc_write_ready(zio_t *zio)
ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
ASSERT(HDR_PROTECTED(hdr));

if (BP_SHOULD_BYTESWAP(bp)) {
if (BP_GET_LEVEL(bp) > 0) {
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64;
} else {
hdr->b_l1hdr.b_byteswap =
DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
}
} else {
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
}

hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
Expand Down
22 changes: 21 additions & 1 deletion module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2029,6 +2029,23 @@ dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
return (err);
}

int
dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid,
dmu_tx_t *tx)
{
dnode_t *dn;
int err;

err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
dnode_new_blkid(dn, maxblkid, tx, B_FALSE);
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
return (0);
}

void
dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
dmu_tx_t *tx)
Expand Down Expand Up @@ -2214,8 +2231,10 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
dedup = B_FALSE;
}

if (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)
if (level <= 0 &&
(type == DMU_OT_DNODE || type == DMU_OT_OBJSET)) {
compress = ZIO_COMPRESS_EMPTY;
}
}

zp->zp_compress = compress;
Expand Down Expand Up @@ -2488,6 +2507,7 @@ EXPORT_SYMBOL(dmu_object_size_from_db);
EXPORT_SYMBOL(dmu_object_dnsize_from_db);
EXPORT_SYMBOL(dmu_object_set_nlevels);
EXPORT_SYMBOL(dmu_object_set_blocksize);
EXPORT_SYMBOL(dmu_object_set_maxblkid);
EXPORT_SYMBOL(dmu_object_set_checksum);
EXPORT_SYMBOL(dmu_object_set_compress);
EXPORT_SYMBOL(dmu_write_policy);
Expand Down
10 changes: 10 additions & 0 deletions module/zfs/dmu_objset.c
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,9 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
return (SET_ERROR(EINVAL));
} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
return (SET_ERROR(EROFS));
} else if (!readonly && decrypt &&
dsl_dir_incompatible_encryption_version(ds->ds_dir)) {
return (SET_ERROR(EROFS));
}

/* if we are decrypting, we can now check MACs in os->os_phys_buf */
Expand Down Expand Up @@ -2635,6 +2638,13 @@ dmu_objset_find(char *name, int func(const char *, void *), void *arg,
return (error);
}

boolean_t
dmu_objset_incompatible_encryption_version(objset_t *os)
{
return (dsl_dir_incompatible_encryption_version(
os->os_dsl_dataset->ds_dir));
}

void
dmu_objset_set_user(objset_t *os, void *user_ptr)
{
Expand Down
4 changes: 4 additions & 0 deletions module/zfs/dmu_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
drro->drr_flags |= DRR_RAW_BYTESWAP;

/* needed for reconstructing dnp on recv side */
drro->drr_maxblkid = dnp->dn_maxblkid;
drro->drr_indblkshift = dnp->dn_indblkshift;
drro->drr_nlevels = dnp->dn_nlevels;
drro->drr_nblkptr = dnp->dn_nblkptr;
Expand Down Expand Up @@ -2294,6 +2295,7 @@ byteswap_record(dmu_replay_record_t *drr)
DO32(drr_object.drr_bonuslen);
DO32(drr_object.drr_raw_bonuslen);
DO64(drr_object.drr_toguid);
DO64(drr_object.drr_maxblkid);
break;
case DRR_FREEOBJECTS:
DO64(drr_freeobjects.drr_firstobj);
Expand Down Expand Up @@ -2538,6 +2540,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
drro->drr_blksz, drro->drr_indblkshift, tx));
VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object,
drro->drr_nlevels, tx));
VERIFY0(dmu_object_set_maxblkid(rwa->os, drro->drr_object,
drro->drr_maxblkid, tx));
}

if (data != NULL) {
Expand Down
6 changes: 6 additions & 0 deletions module/zfs/dnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk));
bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen));
bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz));
bzero(&dn->dn_next_maxblkid[0], sizeof (dn->dn_next_maxblkid));

for (i = 0; i < TXG_SIZE; i++) {
list_link_init(&dn->dn_dirty_link[i]);
Expand Down Expand Up @@ -193,6 +194,7 @@ dnode_dest(void *arg, void *unused)
ASSERT0(dn->dn_rm_spillblk[i]);
ASSERT0(dn->dn_next_bonuslen[i]);
ASSERT0(dn->dn_next_blksz[i]);
ASSERT0(dn->dn_next_maxblkid[i]);
}

ASSERT0(dn->dn_allocated_txg);
Expand Down Expand Up @@ -602,6 +604,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT0(dn->dn_next_bonustype[i]);
ASSERT0(dn->dn_rm_spillblk[i]);
ASSERT0(dn->dn_next_blksz[i]);
ASSERT0(dn->dn_next_maxblkid[i]);
ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
Expand Down Expand Up @@ -767,6 +770,8 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
sizeof (odn->dn_next_bonuslen));
bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0],
sizeof (odn->dn_next_blksz));
bcopy(&odn->dn_next_maxblkid[0], &ndn->dn_next_maxblkid[0],
sizeof (odn->dn_next_maxblkid));
for (i = 0; i < TXG_SIZE; i++) {
list_move_tail(&ndn->dn_dirty_records[i],
&odn->dn_dirty_records[i]);
Expand Down Expand Up @@ -1751,6 +1756,7 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
goto out;

dn->dn_maxblkid = blkid;
dn->dn_next_maxblkid[tx->tx_txg & TXG_MASK] = blkid;

/*
* Compute the number of levels necessary to support the new maxblkid.
Expand Down
12 changes: 12 additions & 0 deletions module/zfs/dnode_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dn->dn_next_nlevels[txgoff] = 0;
dn->dn_next_indblkshift[txgoff] = 0;
dn->dn_next_blksz[txgoff] = 0;
dn->dn_next_maxblkid[txgoff] = 0;

/* ASSERT(blkptrs are zero); */
ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
Expand Down Expand Up @@ -718,6 +719,17 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dn->dn_next_nlevels[txgoff] = 0;
}

/*
* This must be done after dnode_sync_free_range()
* and dnode_increase_indirection().
*/
if (dn->dn_next_maxblkid[txgoff]) {
mutex_enter(&dn->dn_mtx);
dnp->dn_maxblkid = dn->dn_next_maxblkid[txgoff];
dn->dn_next_maxblkid[txgoff] = 0;
mutex_exit(&dn->dn_mtx);
}

if (dn->dn_next_nblkptr[txgoff]) {
/* this should only happen on a realloc */
ASSERT(dn->dn_allocated_txg == tx->tx_txg);
Expand Down
Loading

0 comments on commit 073e9d8

Please sign in to comment.