From f9e119b4aabb7db12629ceb8d1c904b8f31a4c23 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 13 Jan 2025 20:15:00 +0000 Subject: [PATCH 1/2] DAOS-16931 container: fix the oid iv check for same request on retry the priv pointer is always freed/allocated, so use the sgl iov pointer instead. Signed-off-by: Mohamad Chaarawi --- src/container/oid_iv.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/container/oid_iv.c b/src/container/oid_iv.c index ae953a19b58..cc2dd2e0578 100644 --- a/src/container/oid_iv.c +++ b/src/container/oid_iv.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2017-2024 Intel Corporation. + * (C) Copyright 2017-2025 Intel Corporation. * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -130,17 +130,23 @@ oid_iv_ent_update(struct ds_iv_entry *ns_entry, struct ds_iv_key *iv_key, d_rank_t myrank = dss_self_rank(); int rc; + if (src == NULL) { + D_DEBUG(DB_MD, "%u: ON UPDATE delete entry iv_entry %p\n", myrank, ns_entry); + ns_entry->iv_to_delete = 1; + return 0; + } + D_ASSERT(priv != NULL); + oids = src->sg_iovs[0].iov_buf; entry = ns_entry->iv_value.sg_iovs[0].iov_buf; + rc = ABT_mutex_trylock(entry->lock); /** For retry requests, from _iv_op(), the lock may not be released in some cases. */ - if (rc == ABT_ERR_MUTEX_LOCKED && entry->current_req != priv) + if (rc == ABT_ERR_MUTEX_LOCKED && entry->current_req != oids) return -DER_BUSY; - entry->current_req = priv; - avail = &entry->rg; - - oids = src->sg_iovs[0].iov_buf; + entry->current_req = oids; + avail = &entry->rg; if (myrank == oids->req_rank) num_oids = oids->req_num_oids; From 50bc7b4edc2531f5580e54931c985e482efb1125 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 14 Jan 2025 16:07:35 +0000 Subject: [PATCH 2/2] DAOS-16931 container: just for debug Quick-Functional: true Test-tag: test_daos_dfs_unit Test-repeat: 10 Signed-off-by: Mohamad Chaarawi --- src/cart/crt_iv.c | 11 +++++------ src/container/oid_iv.c | 20 ++++++++++++++------ src/tests/suite/dfs_unit_test.c | 15 +++++++++++---- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/cart/crt_iv.c b/src/cart/crt_iv.c index c1d9c9e85cb..c07dcd9b2f3 100644 --- a/src/cart/crt_iv.c +++ b/src/cart/crt_iv.c @@ -3151,7 +3151,7 @@ crt_hdlr_iv_update(crt_rpc_t *rpc_req) struct crt_ivns_id ivns_id; struct crt_ivns_internal *ivns_internal = NULL; struct crt_iv_ops *iv_ops; - d_sg_list_t iv_value = {0}; + d_sg_list_t iv_value = {0}; struct crt_bulk_desc bulk_desc; crt_bulk_t local_bulk_handle; struct bulk_update_cb_info *cb_info; @@ -3284,8 +3284,8 @@ crt_hdlr_iv_update(crt_rpc_t *rpc_req) D_GOTO(exit, rc); } - rc = iv_ops->ivo_on_get(ivns_internal, &input->ivu_key, 0, - CRT_IV_PERM_WRITE, &iv_value, &user_priv); + rc = iv_ops->ivo_on_get(ivns_internal, &input->ivu_key, 0, CRT_IV_PERM_WRITE, &iv_value, + &user_priv); if (rc != 0) { D_ERROR("ivo_on_get(): "DF_RC"\n", DP_RC(rc)); D_GOTO(send_error, rc); @@ -3293,8 +3293,7 @@ crt_hdlr_iv_update(crt_rpc_t *rpc_req) put_needed = true; size = d_sgl_buf_size(&iv_value); - rc = crt_bulk_create(rpc_req->cr_ctx, &iv_value, CRT_BULK_RW, - &local_bulk_handle); + rc = crt_bulk_create(rpc_req->cr_ctx, &iv_value, CRT_BULK_RW, &local_bulk_handle); if (rc != 0) { D_ERROR("crt_bulk_create(): "DF_RC"\n", DP_RC(rc)); D_GOTO(send_error, rc); @@ -3320,7 +3319,7 @@ crt_hdlr_iv_update(crt_rpc_t *rpc_req) IVNS_ADDREF(ivns_internal); cb_info->buc_input = input; cb_info->buc_bulk_hdl = local_bulk_handle; - cb_info->buc_iv_value = iv_value; + cb_info->buc_iv_value = iv_value; cb_info->buc_user_priv = user_priv; rc = crt_bulk_transfer(&bulk_desc, bulk_update_transfer_done, diff --git a/src/container/oid_iv.c b/src/container/oid_iv.c index cc2dd2e0578..12cdbcc00d1 100644 --- a/src/container/oid_iv.c +++ b/src/container/oid_iv.c @@ -142,10 +142,19 @@ oid_iv_ent_update(struct ds_iv_entry *ns_entry, struct ds_iv_key *iv_key, rc = ABT_mutex_trylock(entry->lock); /** For retry requests, from _iv_op(), the lock may not be released in some cases. */ - if (rc == ABT_ERR_MUTEX_LOCKED && entry->current_req != oids) + if (rc == ABT_ERR_MUTEX_LOCKED) { + if (entry->current_req == src) + D_DEBUG(DB_MD, + "%u: ON UPDATE src %p; priv %p; oids %p; SAME req detected\n", + myrank, src, priv, oids); + } else { + D_DEBUG(DB_MD, "%u: ON UPDATE src %p; priv %p; oids %p; UNLOCKED Mutex\n", myrank, + src, priv, oids); + } + if (rc == ABT_ERR_MUTEX_LOCKED && entry->current_req != src) return -DER_BUSY; - entry->current_req = oids; + entry->current_req = src; avail = &entry->rg; if (myrank == oids->req_rank) @@ -290,13 +299,13 @@ oid_iv_alloc(struct ds_iv_entry *entry, struct ds_iv_key *key, rc = d_sgl_init(sgl, 1); if (rc) return rc; - D_ALLOC(sgl->sg_iovs[0].iov_buf, sizeof(struct oid_iv_range)); if (sgl->sg_iovs[0].iov_buf == NULL) D_GOTO(free, rc = -DER_NOMEM); sgl->sg_iovs[0].iov_buf_len = sizeof(struct oid_iv_range); sgl->sg_iovs[0].iov_len = sizeof(struct oid_iv_range); + D_DEBUG(DB_MD, "%u: IV ALLOC: oids = %p\n", dss_self_rank(), sgl->sg_iovs[0].iov_buf); free: if (rc) d_sgl_fini(sgl, true); @@ -321,7 +330,7 @@ oid_iv_reserve(void *ns, uuid_t po_uuid, uuid_t co_uuid, uint64_t num_oids, d_sg struct oid_iv_key *oid_key; struct ds_iv_key key; struct oid_iv_range *oids; - int rc; + int rc; D_DEBUG(DB_MD, "%d: OID alloc CUUID " DF_UUIDF "/" DF_UUIDF " num_oids %" PRIu64 "\n", dss_self_rank(), DP_UUID(po_uuid), DP_UUID(co_uuid), num_oids); @@ -338,8 +347,7 @@ oid_iv_reserve(void *ns, uuid_t po_uuid, uuid_t co_uuid, uint64_t num_oids, d_sg oids->req_rank = dss_self_rank(); oids->req_num_oids = num_oids; - rc = ds_iv_update(ns, &key, value, 0, CRT_IV_SYNC_NONE, - CRT_IV_SYNC_BIDIRECTIONAL, true /* retry */); + rc = ds_iv_update(ns, &key, value, 0, CRT_IV_SYNC_NONE, CRT_IV_SYNC_BIDIRECTIONAL, true); if (rc) D_ERROR("iv update failed "DF_RC"\n", DP_RC(rc)); diff --git a/src/tests/suite/dfs_unit_test.c b/src/tests/suite/dfs_unit_test.c index a5feb6c3ca5..be416d36b9a 100644 --- a/src/tests/suite/dfs_unit_test.c +++ b/src/tests/suite/dfs_unit_test.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -662,8 +663,8 @@ dfs_test_rm(const char *name) assert_int_equal(rc, 0); } -int dfs_test_thread_nr = 8; -#define DFS_TEST_MAX_THREAD_NR (16) +int dfs_test_thread_nr = 16; +#define DFS_TEST_MAX_THREAD_NR (16) pthread_t dfs_test_tid[DFS_TEST_MAX_THREAD_NR]; struct dfs_test_thread_arg { @@ -3335,6 +3336,11 @@ dfs_test_pipeline_find(void **state) assert_int_equal(rc, 0); } +static const struct CMUnitTest dfs_unit_tests1[] = { + {"DFS_UNIT_TEST14: multi-threads connect to same container", dfs_test_mt_connect, async_disable, + test_case_teardown}, +}; + static const struct CMUnitTest dfs_unit_tests[] = { { "DFS_UNIT_TEST1: DFS mount / umount", dfs_test_mount, async_disable, test_case_teardown}, @@ -3456,8 +3462,9 @@ run_dfs_unit_test(int rank, int size) { int rc = 0; + printf("tmp %p\n", dfs_unit_tests); par_barrier(PAR_COMM_WORLD); - rc = cmocka_run_group_tests_name("DAOS_FileSystem_DFS_Unit", dfs_unit_tests, dfs_setup, + rc = cmocka_run_group_tests_name("DAOS_FileSystem_DFS_Unit", dfs_unit_tests1, dfs_setup, dfs_teardown); par_barrier(PAR_COMM_WORLD); @@ -3465,7 +3472,7 @@ run_dfs_unit_test(int rank, int size) d_setenv("DFS_USE_DTX", "1", 1); par_barrier(PAR_COMM_WORLD); - rc += cmocka_run_group_tests_name("DAOS_FileSystem_DFS_Unit_DTX", dfs_unit_tests, + rc += cmocka_run_group_tests_name("DAOS_FileSystem_DFS_Unit_DTX", dfs_unit_tests1, dfs_setup, dfs_teardown); par_barrier(PAR_COMM_WORLD); return rc;