From cb4f615e170c47cf09683e8fa88ca0fffec26e3f Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Tue, 18 Jul 2023 17:29:09 +0000 Subject: [PATCH 1/3] DAOS-13548 client: enable one NLT fault injection test for libpil4dfs Required-githooks: true Signed-off-by: Lei Huang --- src/client/dfs/dfs.c | 9 ++++- src/client/dfuse/pil4dfs/int_dfs.c | 54 ++++++++++++++++++------------ utils/node_local_test.py | 2 +- 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 0184287abc8..7f37639998d 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -1028,7 +1028,7 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz rc = daos_obj_query_max_epoch(dir_oh, th, &ep, NULL); if (rc) { - daos_obj_close(oh, NULL); + daos_obj_close(dir_oh, NULL); return daos_der2errno(rc); } @@ -1063,6 +1063,9 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz } else { daos_handle_t file_oh; + /* Added to avoid issues in daos_hhash_link_lookup due to uninitialized + * file_oh in NLT fault injection test */ + memset(&file_oh, 0, sizeof(daos_handle_t)); rc = daos_array_open_with_attr(dfs->coh, entry.oid, th, DAOS_OO_RO, 1, entry.chunk_size ? entry.chunk_size : dfs->attr.da_chunk_size, &file_oh, NULL); @@ -1078,6 +1081,10 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz } rc = daos_array_close(file_oh, NULL); + /* Try again in case of out of memory. Needed to avoid memory leak in + * NLT fault injection */ + if (rc == -DER_NOMEM) + rc = daos_array_close(file_oh, NULL); if (rc) return daos_der2errno(rc); } diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 9b91b429331..da430d1d178 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -274,9 +274,7 @@ rec_free(struct d_hash_table *htable, d_list_t *rlink) rc = dfs_release(hdl->oh); if (rc == ENOMEM) - rc = dfs_release(hdl->oh); - if (rc) - D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); + dfs_release(hdl->oh); D_FREE(hdl); } @@ -315,7 +313,7 @@ lookup_insert_dir(struct dfs_mt *mt, const char *name, size_t len, dfs_obj_t **o dfs_obj_t *oh; d_list_t *rlink; mode_t mode; - int rc; + int rc, rc2; /* TODO: Remove this after testing. */ D_ASSERT(strlen(name) == len); @@ -333,7 +331,9 @@ lookup_insert_dir(struct dfs_mt *mt, const char *name, size_t len, dfs_obj_t **o if (!S_ISDIR(mode)) { /* Not a directory, return ENOENT*/ - dfs_release(oh); + rc = dfs_release(oh); + if (rc == ENOMEM) + dfs_release(oh); return ENOTDIR; } @@ -357,7 +357,9 @@ lookup_insert_dir(struct dfs_mt *mt, const char *name, size_t len, dfs_obj_t **o return 0; out_release: - dfs_release(oh); + rc2 = dfs_release(oh); + if (rc2 == ENOMEM) + dfs_release(oh); D_FREE(hdl); return rc; } @@ -619,8 +621,10 @@ discover_daos_mount_with_env(void) D_FATAL("DAOS_CONTAINER is not set.\n"); D_GOTO(out, rc = EINVAL); } - D_STRNDUP(dfs_list[num_dfs].fs_root, fs_root, len_fs_root); + /* Added to avoid issues due to fault injection. Try again in case of ENOMEM */ + if (dfs_list[num_dfs].fs_root == NULL) + D_STRNDUP(dfs_list[num_dfs].fs_root, fs_root, len_fs_root); if (dfs_list[num_dfs].fs_root == NULL) D_GOTO(out, rc = ENOMEM); @@ -680,6 +684,8 @@ discover_dfuse_mounts(void) atomic_init(&pt_dfs_mt->inited, 0); pt_dfs_mt->pool = NULL; D_STRNDUP(pt_dfs_mt->fs_root, fs_entry->mnt_dir, pt_dfs_mt->len_fs_root); + if (pt_dfs_mt->fs_root == NULL) + D_STRNDUP(pt_dfs_mt->fs_root, fs_entry->mnt_dir, pt_dfs_mt->len_fs_root); if (pt_dfs_mt->fs_root == NULL) D_GOTO(out, rc = ENOMEM); num_dfs++; @@ -1381,6 +1387,8 @@ free_fd(int idx) if (saved_obj) { rc = dfs_release(saved_obj->file); + if (rc == ENOMEM) + dfs_release(saved_obj->file); /** This memset() is not necessary. It is left here intended. In case of duplicated * fd exists, multiple fd could point to same struct file_obj. struct file_obj is * supposed to be freed only when reference count reaches zero. With zeroing out @@ -1391,8 +1399,6 @@ free_fd(int idx) */ memset(saved_obj, 0, sizeof(struct file_obj)); D_FREE(saved_obj); - if (rc) - D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); } } @@ -1429,8 +1435,8 @@ free_dirfd(int idx) D_FREE(saved_obj->path); D_FREE(saved_obj->ents); rc = dfs_release(saved_obj->dir); - if (rc) - D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); + if (rc == ENOMEM) + dfs_release(saved_obj->dir); /** This memset() is not necessary. It is left here intended. In case of duplicated * fd exists, multiple fd could point to same struct dir_obj. struct dir_obj is * supposed to be freed only when reference count reaches zero. With zeroing out @@ -2641,19 +2647,19 @@ opendir(const char *path) D_ALLOC_ARRAY(dir_list[idx_dirfd]->ents, READ_DIR_BATCH_SIZE); if (dir_list[idx_dirfd]->ents == NULL) { free_dirfd(idx_dirfd); - D_GOTO(out_err, rc = ENOMEM); + D_GOTO(out_err_ret, rc = ENOMEM); } /* allocate memory for path[]. */ D_ASPRINTF(dir_list[idx_dirfd]->path, "%s%s", dfs_mt->fs_root, full_path); if (dir_list[idx_dirfd]->path == NULL) { free_dirfd(idx_dirfd); - D_GOTO(out_err, rc = ENOMEM); + D_GOTO(out_err_ret, rc = ENOMEM); } if (strnlen(dir_list[idx_dirfd]->path, DFS_MAX_PATH) >= DFS_MAX_PATH) { D_DEBUG(DB_ANY, "path is longer than DFS_MAX_PATH: %d (%s)\n", ENAMETOOLONG, strerror(ENAMETOOLONG)); free_dirfd(idx_dirfd); - D_GOTO(out_err, rc = ENAMETOOLONG); + D_GOTO(out_err_ret, rc = ENAMETOOLONG); } FREE(parent_dir); @@ -3197,6 +3203,8 @@ readlink(const char *path, char *buf, size_t size) if (rc) goto out_release; rc = dfs_release(obj); + if (rc == ENOMEM) + rc = dfs_release(obj); if (rc) goto out_err; /* The NULL at the end should not be included in the length */ @@ -3209,8 +3217,8 @@ readlink(const char *path, char *buf, size_t size) out_release: rc2 = dfs_release(obj); - if (rc2) - D_ERROR("dfs_release() failed: %d (%s)\n", rc2, strerror(rc2)); + if (rc2 == ENOMEM) + dfs_release(obj); out_err: FREE(parent_dir); @@ -3627,8 +3635,8 @@ rename(const char *old_name, const char *new_name) D_FREE(buff); errno_save = rc; rc = dfs_release(obj_old); - if (rc) - D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); + if (rc == ENOMEM) + dfs_release(obj_old); errno = errno_save; return (-1); @@ -3639,8 +3647,8 @@ rename(const char *old_name, const char *new_name) fclose(fIn); errno_save = rc; rc = dfs_release(obj_new); - if (rc) - D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); + if (rc == ENOMEM) + dfs_release(obj_new); errno = errno_save; return (-1); @@ -5492,13 +5500,17 @@ finalize_dfs(void) continue; } rc = daos_cont_close(dfs_list[i].coh, NULL); + if (rc == -DER_NOMEM) + rc = daos_cont_close(dfs_list[i].coh, NULL); if (rc != 0) { D_ERROR("error in daos_cont_close(%s): " DF_RC "\n", dfs_list[i].fs_root, DP_RC(rc)); continue; } rc = daos_pool_disconnect(dfs_list[i].poh, NULL); - if (rc != 0) { + if (rc == -DER_NOMEM) + rc = daos_pool_disconnect(dfs_list[i].poh, NULL); + if (rc != -DER_SUCCESS) { D_ERROR("error in daos_pool_disconnect(%s): " DF_RC "\n", dfs_list[i].fs_root, DP_RC(rc)); continue; diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 7ef0c56b664..076866c3aeb 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -5766,7 +5766,7 @@ def run(wf, args): fatal_errors.add_result(test_alloc_cont_create(server, conf, wf_client)) # Disabled for now because of errors - # fatal_errors.add_result(test_alloc_pil4dfs_ls(server, conf, wf_client)) + fatal_errors.add_result(test_alloc_pil4dfs_ls(server, conf, wf_client)) wf_client.close() From 806b59ac61f03bb6b84faa862817ae2d7faac24f Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Tue, 18 Jul 2023 17:31:48 +0000 Subject: [PATCH 2/3] setup githook after merging with master Required-githooks: true Signed-off-by: Lei Huang From f144333e4d668e1e724f48a198fd0daf0b05f231 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Fri, 21 Jul 2023 06:32:19 +0000 Subject: [PATCH 3/3] To address reviewer's comments initialization with an invalid handle in dc_obj_open() add "test_cmd.ignore_busy = True" in "test_alloc_pil4dfs_ls" remove retry daos_cont_close() and daos_pool_disconnect() Required-githooks: true Signed-off-by: Lei Huang --- src/client/dfs/dfs.c | 3 --- src/client/dfuse/pil4dfs/int_dfs.c | 8 ++------ src/object/cli_obj.c | 2 ++ utils/node_local_test.py | 1 + 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 710d0c964bb..ea6bf1b12f1 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -1063,9 +1063,6 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz } else { daos_handle_t file_oh; - /* Added to avoid issues in daos_hhash_link_lookup due to uninitialized - * file_oh in NLT fault injection test */ - memset(&file_oh, 0, sizeof(daos_handle_t)); rc = daos_array_open_with_attr(dfs->coh, entry.oid, th, DAOS_OO_RO, 1, entry.chunk_size ? entry.chunk_size : dfs->attr.da_chunk_size, &file_oh, NULL); diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index da430d1d178..6b450e55972 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -275,6 +275,8 @@ rec_free(struct d_hash_table *htable, d_list_t *rlink) rc = dfs_release(hdl->oh); if (rc == ENOMEM) dfs_release(hdl->oh); + if (rc) + D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); D_FREE(hdl); } @@ -684,8 +686,6 @@ discover_dfuse_mounts(void) atomic_init(&pt_dfs_mt->inited, 0); pt_dfs_mt->pool = NULL; D_STRNDUP(pt_dfs_mt->fs_root, fs_entry->mnt_dir, pt_dfs_mt->len_fs_root); - if (pt_dfs_mt->fs_root == NULL) - D_STRNDUP(pt_dfs_mt->fs_root, fs_entry->mnt_dir, pt_dfs_mt->len_fs_root); if (pt_dfs_mt->fs_root == NULL) D_GOTO(out, rc = ENOMEM); num_dfs++; @@ -5500,16 +5500,12 @@ finalize_dfs(void) continue; } rc = daos_cont_close(dfs_list[i].coh, NULL); - if (rc == -DER_NOMEM) - rc = daos_cont_close(dfs_list[i].coh, NULL); if (rc != 0) { D_ERROR("error in daos_cont_close(%s): " DF_RC "\n", dfs_list[i].fs_root, DP_RC(rc)); continue; } rc = daos_pool_disconnect(dfs_list[i].poh, NULL); - if (rc == -DER_NOMEM) - rc = daos_pool_disconnect(dfs_list[i].poh, NULL); if (rc != -DER_SUCCESS) { D_ERROR("error in daos_pool_disconnect(%s): " DF_RC "\n", dfs_list[i].fs_root, DP_RC(rc)); diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index 8bf5f39d102..df6e22f34f4 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -1414,6 +1414,8 @@ dc_obj_open(tse_task_t *task) args = dc_task_get_args(task); D_ASSERTF(args != NULL, "Task Argument OPC does not match DC OPC\n"); + /* initialize with an invalid handle */ + (*args->oh).cookie = 0; obj = obj_alloc(); if (obj == NULL) diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 64ea4084da0..63fc50a5479 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -5516,6 +5516,7 @@ def test_alloc_pil4dfs_ls(server, conf, wf): test_cmd.use_pil4dfs(container) test_cmd.check_daos_stderr = False test_cmd.check_post_stdout = False + test_cmd.ignore_busy = True return test_cmd.launch()