Skip to content

Commit

Permalink
DAOS-16866 bio: pre-allocate more DMA chunks on engine start (#15579)
Browse files Browse the repository at this point in the history
Each VOS xstream used to pre-allocate 24 DMA chunks (192MB) on engine start,
then the per-xstream DMA buffer will be expanded on-demand until hitting the
upper bound (128 chunks, 1GB by default).

This PR bumped the pre-allocate size to 60% of upper bound, and made the
pre-allocate percentage configurable via env var DAOS_DMA_INIT_PCT.

Signed-off-by: Niu Yawei <yawei.niu@hpe.com>
  • Loading branch information
NiuYawei authored Dec 20, 2024
1 parent 1499fcc commit 14f2abd
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 10 deletions.
5 changes: 4 additions & 1 deletion src/bio/bio_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ dma_buffer_create(unsigned int init_cnt, int tgt_id)

rc = dma_buffer_grow(buf, init_cnt);
if (rc != 0) {
D_ERROR("Failed to grow DMA buffer (%u chunks)\n", buf->bdb_tot_cnt);
dma_buffer_destroy(buf);
return NULL;
}
Expand Down Expand Up @@ -867,8 +868,10 @@ dma_map_one(struct bio_desc *biod, struct bio_iov *biov, void *arg)
*/
if (pg_cnt > bio_chk_sz) {
chk = dma_alloc_chunk(pg_cnt);
if (chk == NULL)
if (chk == NULL) {
D_ERROR("Failed to allocate %u pages DMA buffer\n", pg_cnt);
return -DER_NOMEM;
}

chk->bdc_type = biod->bd_chk_type;
rc = iod_add_chunk(biod, chk);
Expand Down
31 changes: 22 additions & 9 deletions src/bio/bio_xstream.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
/* SPDK blob parameters */
#define DAOS_BS_CLUSTER_SZ (1ULL << 25) /* 32MB */
/* DMA buffer parameters */
#define DAOS_DMA_CHUNK_CNT_INIT 24 /* Per-xstream init chunks, 192MB */
#define DAOS_DMA_CHUNK_CNT_MAX 128 /* Per-xstream max chunks, 1GB */
#define DAOS_DMA_CHUNK_INIT_PCT 60 /* Default pre-xstream init chunks, in percentage */
#define DAOS_DMA_CHUNK_CNT_MAX 128 /* Default per-xstream max chunks, 1GB */
#define DAOS_DMA_CHUNK_CNT_MIN 32 /* Per-xstream min chunks, 256MB */

/* Max in-flight blob IOs per io channel */
Expand All @@ -48,8 +48,8 @@ unsigned int bio_chk_sz;
unsigned int bio_chk_cnt_max;
/* NUMA node affinity */
unsigned int bio_numa_node;
/* Per-xstream initial DMA buffer size (in chunk count) */
static unsigned int bio_chk_cnt_init;
/* Per-xstream initial DMA buffer size (in percentage) */
static unsigned int bio_chk_init_pct;
/* Diret RDMA over SCM */
bool bio_scm_rdma;
/* Whether SPDK inited */
Expand Down Expand Up @@ -203,6 +203,14 @@ bypass_health_collect()
return nvme_glb.bd_bypass_health_collect;
}

static inline unsigned int
init_chk_cnt()
{
unsigned init_cnt = (bio_chk_cnt_max * bio_chk_init_pct / 100);

return (init_cnt == 0) ? 1 : init_cnt;
}

int
bio_nvme_init(const char *nvme_conf, int numa_node, unsigned int mem_size,
unsigned int hugepage_size, unsigned int tgt_nr, bool bypass_health_collect)
Expand Down Expand Up @@ -249,7 +257,7 @@ bio_nvme_init(const char *nvme_conf, int numa_node, unsigned int mem_size,
*/
glb_criteria.fc_max_csum_errs = UINT32_MAX;

bio_chk_cnt_init = DAOS_DMA_CHUNK_CNT_INIT;
bio_chk_init_pct = DAOS_DMA_CHUNK_INIT_PCT;
bio_chk_cnt_max = DAOS_DMA_CHUNK_CNT_MAX;
bio_chk_sz = ((uint64_t)size_mb << 20) >> BIO_DMA_PAGE_SHIFT;

Expand Down Expand Up @@ -291,8 +299,13 @@ bio_nvme_init(const char *nvme_conf, int numa_node, unsigned int mem_size,
mem_size, tgt_nr);
return -DER_INVAL;
}
D_INFO("Set per-xstream DMA buffer upper bound to %u %uMB chunks\n",
bio_chk_cnt_max, size_mb);

d_getenv_uint("DAOS_DMA_INIT_PCT", &bio_chk_init_pct);
if (bio_chk_init_pct == 0 || bio_chk_init_pct >= 100)
bio_chk_init_pct = DAOS_DMA_CHUNK_INIT_PCT;

D_INFO("Set per-xstream DMA buffer upper bound to %u %uMB chunks, prealloc %u chunks\n",
bio_chk_cnt_max, size_mb, init_chk_cnt());

spdk_bs_opts_init(&nvme_glb.bd_bs_opts, sizeof(nvme_glb.bd_bs_opts));
nvme_glb.bd_bs_opts.cluster_sz = DAOS_BS_CLUSTER_SZ;
Expand Down Expand Up @@ -1560,7 +1573,7 @@ bio_xsctxt_alloc(struct bio_xs_context **pctxt, int tgt_id, bool self_polling)

/* Skip NVMe context setup if the daos_nvme.conf isn't present */
if (!bio_nvme_configured(SMD_DEV_TYPE_MAX)) {
ctxt->bxc_dma_buf = dma_buffer_create(bio_chk_cnt_init, tgt_id);
ctxt->bxc_dma_buf = dma_buffer_create(init_chk_cnt(), tgt_id);
if (ctxt->bxc_dma_buf == NULL) {
D_FREE(ctxt);
*pctxt = NULL;
Expand Down Expand Up @@ -1673,7 +1686,7 @@ bio_xsctxt_alloc(struct bio_xs_context **pctxt, int tgt_id, bool self_polling)
D_ASSERT(d_bdev != NULL);
}

ctxt->bxc_dma_buf = dma_buffer_create(bio_chk_cnt_init, tgt_id);
ctxt->bxc_dma_buf = dma_buffer_create(init_chk_cnt(), tgt_id);
if (ctxt->bxc_dma_buf == NULL) {
D_ERROR("failed to initialize dma buffer\n");
rc = -DER_NOMEM;
Expand Down
9 changes: 9 additions & 0 deletions utils/test_memcheck.supp
Original file line number Diff line number Diff line change
Expand Up @@ -418,3 +418,12 @@
...
fun:runtime.persistentalloc
}
{
DAOS-16866
Memcheck:Leak
match-leak-kinds: reachable
fun:malloc
fun:mem_map_get_map_1gb
fun:spdk_mem_map_set_translation
...
}

0 comments on commit 14f2abd

Please sign in to comment.