Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial version of time dependent geometry #9

Closed
Closed
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_NPARITY "nparity"
#define ZPOOL_CONFIG_RAIDZ_LOGICAL_WIDTH "raidz_logical_width"
#define ZPOOL_CONFIG_RAIDZ_EXPAND_OFFSET "raidz_expand_offset"
#define ZPOOL_CONFIG_RAIDZ_EXPAND_TXGS "raidz_expand_txgs"
#define ZPOOL_CONFIG_HOSTID "hostid"
#define ZPOOL_CONFIG_HOSTNAME "hostname"
#define ZPOOL_CONFIG_LOADED_TIME "initial_load_time"
Expand Down
7 changes: 7 additions & 0 deletions include/sys/vdev_raidz.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ extern "C" {
struct zio;
struct raidz_row;
struct raidz_map;
struct vdev_raidz;
#if !defined(_KERNEL)
struct kernel_param {};
#endif
Expand All @@ -47,6 +48,7 @@ struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t,
struct raidz_map *vdev_raidz_map_alloc_expanded(abd_t *, uint64_t, uint64_t,
uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
void vdev_raidz_map_free(struct raidz_map *);
void vdev_raidz_free(struct vdev_raidz *);
void vdev_raidz_generate_parity(struct raidz_map *);
void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);

Expand Down Expand Up @@ -84,6 +86,11 @@ typedef struct vdev_raidz_expand {

uint64_t vre_offset_pertxg[TXG_SIZE];

/*
* Last reflow txg per attached device.
*/
avl_tree_t vre_txgs;
ahrens marked this conversation as resolved.
Show resolved Hide resolved

dsl_scan_state_t vre_state;
time_t vre_start_time;
time_t vre_end_time;
Expand Down
7 changes: 3 additions & 4 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include <sys/abd.h>
#include <sys/vdev_initialize.h>
#include <sys/vdev_trim.h>
#include <sys/vdev_raidz.h>
#include <sys/zvol.h>
#include <sys/zfs_ratelimit.h>

Expand Down Expand Up @@ -909,10 +910,8 @@ vdev_free(vdev_t *vd)
ASSERT(vd->vdev_child == NULL);
ASSERT(vd->vdev_guid_sum == vd->vdev_guid);

if (vd->vdev_ops == &vdev_raidz_ops) {
vdev_raidz_t *rz = vd->vdev_tsd;
kmem_free(rz, sizeof (*rz));
}
if (vd->vdev_ops == &vdev_raidz_ops)
vdev_raidz_free(vd->vdev_tsd);

/*
* Discard allocation state.
Expand Down
137 changes: 119 additions & 18 deletions module/zfs/vdev_raidz.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,38 @@ vdev_raidz_map_free_vsd(zio_t *zio)
}
}

typedef struct reflow_node {
uint64_t re_txg;
uint64_t re_logical_width;
ahrens marked this conversation as resolved.
Show resolved Hide resolved
avl_node_t re_link;
} reflow_node_t;

static int
vedv_raidz_reflow_compare(const void *x1, const void *x2)
{
const reflow_node_t *l = (reflow_node_t *)x1;
const reflow_node_t *r = (reflow_node_t *)x2;

if (l->re_txg < r->re_txg)
return (-1);
else if (l->re_txg == r->re_txg)
return (0);

return (1);
ahrens marked this conversation as resolved.
Show resolved Hide resolved
}

void
vdev_raidz_free(vdev_raidz_t *vdrz)
{
reflow_node_t *re;
void *cookie = NULL;
avl_tree_t *tree = &vdrz->vn_vre.vre_txgs;
while ((re = avl_destroy_nodes(tree, &cookie)) != NULL)
kmem_free(re, sizeof (*re));
avl_destroy(&vdrz->vn_vre.vre_txgs);
kmem_free(vdrz, sizeof (*vdrz));
}

/*ARGSUSED*/
static void
vdev_raidz_cksum_free(void *arg, size_t ignored)
Expand Down Expand Up @@ -2010,6 +2042,20 @@ vdev_raidz_io_start_read(zio_t *zio, raidz_row_t *rr, boolean_t forceparity)
}
}

static uint64_t
vdev_raidz_get_width(vdev_raidz_t *vdrz, uint64_t blk_birth)
{
reflow_node_t *re, lookup = { blk_birth, 0 };
avl_index_t where;

re = avl_find(&vdrz->vn_vre.vre_txgs, &lookup, &where);
if (re != NULL)
return (re->re_logical_width);

re = avl_nearest(&vdrz->vn_vre.vre_txgs, where, AVL_BEFORE);
return (re->re_logical_width);
}

/*
* Start an IO operation on a RAIDZ VDev
*
Expand All @@ -2036,22 +2082,30 @@ vdev_raidz_io_start(zio_t *zio)
raidz_map_t *rm;

if (vdrz->vd_logical_width != vdrz->vd_physical_width) {
/* XXX rangelock not needed after expansion completes */
zfs_locked_range_t *lr =
zfs_rangelock_enter(&vdrz->vn_vre.vre_rangelock,
zio->io_offset, zio->io_size, RL_READER);

rm = vdev_raidz_map_alloc_expanded(zio->io_abd,
zio->io_size, zio->io_offset,
tvd->vdev_ashift, vdrz->vd_physical_width,
vdrz->vd_logical_width, vdrz->vd_nparity,
vdrz->vn_vre.vre_offset_phys);
rm->rm_lr = lr;
/*
* XXX If this is a write, will need to do additional
* writes to locations that are already copied, but
* not yet reflected in the on-disk format.
*/
uint64_t width = vdev_raidz_get_width(vdrz, zio->io_bp->blk_birth);
ahrens marked this conversation as resolved.
Show resolved Hide resolved
if (vdrz->vn_vre.vre_offset != UINT64_MAX ||
(zio->io_type == ZIO_TYPE_READ && width != vdrz->vd_physical_width)) {
ahrens marked this conversation as resolved.
Show resolved Hide resolved
/* XXX rangelock not needed after expansion completes */
zfs_locked_range_t *lr =
zfs_rangelock_enter(&vdrz->vn_vre.vre_rangelock,
zio->io_offset, zio->io_size, RL_READER);

rm = vdev_raidz_map_alloc_expanded(zio->io_abd,
zio->io_size, zio->io_offset,
tvd->vdev_ashift, vdrz->vd_physical_width,
width, vdrz->vd_nparity,
vdrz->vn_vre.vre_offset_phys);
rm->rm_lr = lr;
/*
* XXX If this is a write, will need to do additional
* writes to locations that are already copied, but
* not yet reflected in the on-disk format.
*/
} else {
rm = vdev_raidz_map_alloc(zio,
tvd->vdev_ashift, vdrz->vd_physical_width,
vdrz->vd_nparity);
}
} else {
rm = vdev_raidz_map_alloc(zio,
tvd->vdev_ashift, vdrz->vd_logical_width,
Expand Down Expand Up @@ -2977,12 +3031,19 @@ raidz_reflow_complete_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg;
vdev_raidz_expand_t *vre = spa->spa_raidz_expand;
vdev_t *raidvd = vdev_lookup_top(spa, vre->vre_vdev_id);
vdev_raidz_t *vdrz = raidvd->vdev_tsd;

for (int i = 0; i < TXG_SIZE; i++)
ASSERT0(vre->vre_offset_pertxg[i]);

vre->vre_offset_phys = UINT64_MAX;

reflow_node_t *re = kmem_zalloc(sizeof (*re), KM_SLEEP);
re->re_txg = tx->tx_txg;
re->re_logical_width = vdrz->vd_physical_width - 1;
avl_add(&vdrz->vn_vre.vre_txgs, re);

/*
* vre_offset_phys will be removed from the on-disk config by
* vdev_raidz_config_generate().
Expand Down Expand Up @@ -3337,6 +3398,13 @@ vdev_raidz_attach_sync(void *arg, dmu_tx_t *tx)
ASSERT3P(raidvd->vdev_child[raidvd->vdev_children - 1], ==,
new_child);

if (vdrz->vd_logical_width == vdrz->vd_physical_width) {
reflow_node_t *re = kmem_zalloc(sizeof (*re), KM_SLEEP);
re->re_txg = 0;
re->re_logical_width = vdrz->vd_logical_width;
avl_add(&vdrz->vn_vre.vre_txgs, re);
}

vdrz->vd_physical_width++;

vdrz->vn_vre.vre_vdev_id = raidvd->vdev_id;
Expand Down Expand Up @@ -3402,6 +3470,24 @@ vdev_raidz_config_generate(vdev_t *vd, nvlist_t *nv)
fnvlist_add_uint64(nv, ZPOOL_CONFIG_RAIDZ_EXPAND_OFFSET,
vdrz->vn_vre.vre_offset_phys);
}

if (!avl_is_empty(&vdrz->vn_vre.vre_txgs)) {
uint64_t i = 0;
uint64_t count = avl_numnodes(&vdrz->vn_vre.vre_txgs);
uint64_t *txgs = kmem_alloc(sizeof (uint64_t) * count,
KM_SLEEP);

for (reflow_node_t *re =
avl_first(&vdrz->vn_vre.vre_txgs); re;
re = AVL_NEXT(&vdrz->vn_vre.vre_txgs, re)) {
txgs[i++] = re->re_txg;
}

fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_RAIDZ_EXPAND_TXGS,
txgs, count);

kmem_free(txgs, sizeof (uint64_t) * count);
}
}

/*
Expand All @@ -3412,7 +3498,8 @@ vdev_raidz_config_generate(vdev_t *vd, nvlist_t *nv)
void *
vdev_raidz_get_tsd(spa_t *spa, nvlist_t *nv)
{
uint64_t nparity, lw;
uint64_t nparity, lw, *txgs;
uint_t txgs_size;
vdev_raidz_t *vdrz = kmem_zalloc(sizeof (*vdrz), KM_SLEEP);

vdrz->vn_vre.vre_vdev_id = -1;
Expand Down Expand Up @@ -3450,6 +3537,20 @@ vdev_raidz_get_tsd(spa_t *spa, nvlist_t *nv)
*/
}

avl_create(&vdrz->vn_vre.vre_txgs, vedv_raidz_reflow_compare,
sizeof (reflow_node_t), offsetof(reflow_node_t, re_link));

error = nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_RAIDZ_EXPAND_TXGS,
&txgs, &txgs_size);
if (error == 0) {
for (int i = 0; i < txgs_size; i++) {
reflow_node_t *re = kmem_zalloc(sizeof (*re), KM_SLEEP);
re->re_txg = txgs[i];
re->re_logical_width = vdrz->vd_logical_width + i;
avl_add(&vdrz->vn_vre.vre_txgs, re);
ahrens marked this conversation as resolved.
Show resolved Hide resolved
}
}

if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
&nparity) == 0) {
if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY)
Expand Down Expand Up @@ -3479,7 +3580,7 @@ vdev_raidz_get_tsd(spa_t *spa, nvlist_t *nv)
vdrz->vd_nparity = nparity;
return (vdrz);
out:
kmem_free(vdrz, sizeof (*vdrz));
vdev_raidz_free(vdrz);
return (NULL);
}

Expand Down