Skip to content

Commit

Permalink
ddt: cleanup the stats & histogram code
Browse files Browse the repository at this point in the history
Both the API and the code were kinda mangled and I was really struggling
to follow it. The worst offender was the old ddt_stat_add(); after
fixing it up the rest of the changes are mostly knock-on effects and
targets of opportunity.

Note that the old ddt_stat_add() was safe against overflows - it could
produce crazy numbers, but the compiler wouldn't do anything stupid. The
assertions in ddt_stat_sub() go a lot of the way to protecting against
this; getting in a position where overflows are a problem is definitely
a programming error.

Also expanding ddt_stat_add() and ddt_histogram_empty() produces less
efficient assembly. I'm not bothered about this right now though; these
should not be hot functions, and if they are we'll optimise them later.
If we have to go back to the old form, we'll comment it like crazy.

Finally, I've removed the assertion that the bucket will never be
negative, as it will soon be possible to have entries with zero
refcounts: an entry for a block that is no longer on the pool, but is on
the log waiting to be synced out. It might be better to have a separate
bucket for these, since they're still using real space on disk, but
ultimately these stats are driving UI, and for now I've chosen to keep
them matching how they've looked in the past, as well as match the
operators mental model - pool usage is managed elsewhere.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes #15895
  • Loading branch information
robn authored and behlendorf committed Aug 16, 2024
1 parent f4aeb23 commit 27e9cb5
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 51 deletions.
22 changes: 10 additions & 12 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -7357,29 +7357,27 @@ dump_simulated_ddt(spa_t *spa)
spa_config_exit(spa, SCL_CONFIG, FTAG);

while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
ddt_stat_t dds;
uint64_t refcnt = zdde->zdde_ref_blocks;
ASSERT(refcnt != 0);

dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
dds.dds_psize = zdde->zdde_ref_psize / refcnt;
dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
ddt_stat_t *dds = &ddh_total.ddh_stat[highbit64(refcnt) - 1];

dds.dds_ref_blocks = zdde->zdde_ref_blocks;
dds.dds_ref_lsize = zdde->zdde_ref_lsize;
dds.dds_ref_psize = zdde->zdde_ref_psize;
dds.dds_ref_dsize = zdde->zdde_ref_dsize;
dds->dds_blocks += zdde->zdde_ref_blocks / refcnt;
dds->dds_lsize += zdde->zdde_ref_lsize / refcnt;
dds->dds_psize += zdde->zdde_ref_psize / refcnt;
dds->dds_dsize += zdde->zdde_ref_dsize / refcnt;

ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
&dds, 0);
dds->dds_ref_blocks += zdde->zdde_ref_blocks;
dds->dds_ref_lsize += zdde->zdde_ref_lsize;
dds->dds_ref_psize += zdde->zdde_ref_psize;
dds->dds_ref_dsize += zdde->zdde_ref_dsize;

umem_free(zdde, sizeof (*zdde));
}

avl_destroy(&t);

ddt_histogram_stat(&dds_total, &ddh_total);
ddt_histogram_total(&dds_total, &ddh_total);

(void) printf("Simulated DDT histogram:\n");

Expand Down
8 changes: 7 additions & 1 deletion include/sys/ddt.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,9 +318,15 @@ extern uint64_t ddt_phys_birth(const ddt_univ_phys_t *ddp,
extern int ddt_phys_dva_count(const ddt_univ_phys_t *ddp, ddt_phys_variant_t v,
boolean_t encrypted);

extern void ddt_histogram_add_entry(ddt_t *ddt, ddt_histogram_t *ddh,
const ddt_lightweight_entry_t *ddlwe);
extern void ddt_histogram_sub_entry(ddt_t *ddt, ddt_histogram_t *ddh,
const ddt_lightweight_entry_t *ddlwe);

extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
extern void ddt_histogram_total(ddt_stat_t *dds, const ddt_histogram_t *ddh);
extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);

extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
extern uint64_t ddt_get_ddt_dsize(spa_t *spa);
extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
Expand Down
4 changes: 0 additions & 4 deletions include/sys/ddt_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ typedef struct {

extern const ddt_ops_t ddt_zap_ops;

extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);

/*
* These are only exposed so that zdb can access them. Try not to use them
* outside of the DDT implementation proper, and if you do, consider moving
Expand All @@ -95,8 +93,6 @@ extern uint64_t ddt_phys_total_refcnt(const ddt_t *ddt, const ddt_entry_t *dde);

extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);

extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);

extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
char *name);
extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
Expand Down
24 changes: 20 additions & 4 deletions module/zfs/ddt.c
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,18 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp)
/* Flag cleanup required */
dde->dde_flags |= DDE_FLAG_OVERQUOTA;
} else if (error == 0) {
ddt_stat_update(ddt, dde, -1ULL);
/*
* The histograms only track inactive (stored) blocks.
* We've just put an entry onto the live list, so we need to
* remove its counts. When its synced back, it'll be re-added
* to the right one.
*/
ddt_histogram_t *ddh =
&ddt->ddt_histogram[dde->dde_type][dde->dde_class];

ddt_lightweight_entry_t ddlwe;
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
ddt_histogram_sub_entry(ddt, ddh, &ddlwe);
}

/* Entry loaded, everyone can proceed now */
Expand Down Expand Up @@ -1527,11 +1538,18 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
if (total_refcnt != 0) {
dde->dde_type = ntype;
dde->dde_class = nclass;
ddt_stat_update(ddt, dde, 0);

if (!ddt_object_exists(ddt, ntype, nclass))
ddt_object_create(ddt, ntype, nclass, tx);
VERIFY0(ddt_object_update(ddt, ntype, nclass, dde, tx));

ddt_lightweight_entry_t ddlwe;
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);

ddt_histogram_t *ddh =
&ddt->ddt_histogram[ntype][nclass];
ddt_histogram_add_entry(ddt, ddh, &ddlwe);

/*
* If the class changes, the order that we scan this bp
* changes. If it decreases, we could miss it, so
Expand All @@ -1540,8 +1558,6 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
* traversing.)
*/
if (nclass < oclass) {
ddt_lightweight_entry_t ddlwe;
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
dsl_scan_ddt_entry(dp->dp_scan,
ddt->ddt_checksum, ddt, &ddlwe, tx);
}
Expand Down
107 changes: 77 additions & 30 deletions module/zfs/ddt_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,24 @@
#include <sys/ddt_impl.h>

static void
ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
ddt_stat_generate(ddt_t *ddt, const ddt_lightweight_entry_t *ddlwe,
ddt_stat_t *dds)
{
spa_t *spa = ddt->ddt_spa;
ddt_key_t *ddk = &dde->dde_key;
uint64_t lsize = DDK_GET_LSIZE(ddk);
uint64_t psize = DDK_GET_PSIZE(ddk);
uint64_t lsize = DDK_GET_LSIZE(&ddlwe->ddlwe_key);
uint64_t psize = DDK_GET_PSIZE(&ddlwe->ddlwe_key);

memset(dds, 0, sizeof (*dds));

for (int p = 0; p < DDT_NPHYS(ddt); p++) {
const ddt_univ_phys_t *ddp = dde->dde_phys;
for (int p = 0; p < ddlwe->ddlwe_nphys; p++) {
const ddt_univ_phys_t *ddp = &ddlwe->ddlwe_phys;
ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);

if (ddt_phys_birth(ddp, v) == 0)
continue;

int ndvas = ddt_phys_dva_count(ddp, v,
DDK_GET_CRYPT(&dde->dde_key));
DDK_GET_CRYPT(&ddlwe->ddlwe_key));
const dva_t *dvas = (ddt->ddt_flags & DDT_FLAG_FLAT) ?
ddp->ddp_flat.ddp_dva : ddp->ddp_trad[p].ddp_dva;

Expand All @@ -72,61 +72,108 @@ ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
}
}

static void
ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src)
{
dst->dds_blocks += src->dds_blocks;
dst->dds_lsize += src->dds_lsize;
dst->dds_psize += src->dds_psize;
dst->dds_dsize += src->dds_dsize;
dst->dds_ref_blocks += src->dds_ref_blocks;
dst->dds_ref_lsize += src->dds_ref_lsize;
dst->dds_ref_psize += src->dds_ref_psize;
dst->dds_ref_dsize += src->dds_ref_dsize;
}

static void
ddt_stat_sub(ddt_stat_t *dst, const ddt_stat_t *src)
{
/* This caught more during development than you might expect... */
ASSERT3U(dst->dds_blocks, >=, src->dds_blocks);
ASSERT3U(dst->dds_lsize, >=, src->dds_lsize);
ASSERT3U(dst->dds_psize, >=, src->dds_psize);
ASSERT3U(dst->dds_dsize, >=, src->dds_dsize);
ASSERT3U(dst->dds_ref_blocks, >=, src->dds_ref_blocks);
ASSERT3U(dst->dds_ref_lsize, >=, src->dds_ref_lsize);
ASSERT3U(dst->dds_ref_psize, >=, src->dds_ref_psize);
ASSERT3U(dst->dds_ref_dsize, >=, src->dds_ref_dsize);

dst->dds_blocks -= src->dds_blocks;
dst->dds_lsize -= src->dds_lsize;
dst->dds_psize -= src->dds_psize;
dst->dds_dsize -= src->dds_dsize;
dst->dds_ref_blocks -= src->dds_ref_blocks;
dst->dds_ref_lsize -= src->dds_ref_lsize;
dst->dds_ref_psize -= src->dds_ref_psize;
dst->dds_ref_dsize -= src->dds_ref_dsize;
}

void
ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg)
ddt_histogram_add_entry(ddt_t *ddt, ddt_histogram_t *ddh,
const ddt_lightweight_entry_t *ddlwe)
{
const uint64_t *s = (const uint64_t *)src;
uint64_t *d = (uint64_t *)dst;
uint64_t *d_end = (uint64_t *)(dst + 1);
ddt_stat_t dds;
int bucket;

ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */
ddt_stat_generate(ddt, ddlwe, &dds);

bucket = highbit64(dds.dds_ref_blocks) - 1;
if (bucket < 0)
return;

for (int i = 0; i < d_end - d; i++)
d[i] += (s[i] ^ neg) - neg;
ddt_stat_add(&ddh->ddh_stat[bucket], &dds);
}

void
ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg)
ddt_histogram_sub_entry(ddt_t *ddt, ddt_histogram_t *ddh,
const ddt_lightweight_entry_t *ddlwe)
{
ddt_stat_t dds;
ddt_histogram_t *ddh;
int bucket;

ddt_stat_generate(ddt, dde, &dds);
ddt_stat_generate(ddt, ddlwe, &dds);

bucket = highbit64(dds.dds_ref_blocks) - 1;
ASSERT3U(bucket, >=, 0);
if (bucket < 0)
return;

ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class];

ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg);
ddt_stat_sub(&ddh->ddh_stat[bucket], &dds);
}

void
ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src)
{
for (int h = 0; h < 64; h++)
ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0);
ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h]);
}

void
ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh)
ddt_histogram_total(ddt_stat_t *dds, const ddt_histogram_t *ddh)
{
memset(dds, 0, sizeof (*dds));

for (int h = 0; h < 64; h++)
ddt_stat_add(dds, &ddh->ddh_stat[h], 0);
ddt_stat_add(dds, &ddh->ddh_stat[h]);
}

boolean_t
ddt_histogram_empty(const ddt_histogram_t *ddh)
{
const uint64_t *s = (const uint64_t *)ddh;
const uint64_t *s_end = (const uint64_t *)(ddh + 1);
for (int h = 0; h < 64; h++) {
const ddt_stat_t *dds = &ddh->ddh_stat[h];

if (dds->dds_blocks == 0 &&
dds->dds_lsize == 0 &&
dds->dds_psize == 0 &&
dds->dds_dsize == 0 &&
dds->dds_ref_blocks == 0 &&
dds->dds_ref_lsize == 0 &&
dds->dds_ref_psize == 0 &&
dds->dds_ref_dsize == 0)
continue;

while (s < s_end)
if (*s++ != 0)
return (B_FALSE);
return (B_FALSE);
}

return (B_TRUE);
}
Expand Down Expand Up @@ -222,7 +269,7 @@ ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)

ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
ddt_get_dedup_histogram(spa, ddh_total);
ddt_histogram_stat(dds_total, ddh_total);
ddt_histogram_total(dds_total, ddh_total);
kmem_free(ddh_total, sizeof (ddt_histogram_t));
}

Expand Down

0 comments on commit 27e9cb5

Please sign in to comment.