From a0b33e593d8d5a47ea54208acdc74a11ff6e5062 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 7 May 2020 14:28:27 +0000 Subject: [PATCH 1/3] DAOS-4763 dfs + object: Add new API for anchor split and DFS get_shard. To optimize find performance, we need to be able to execute a parallel readdir. To do that we need to be able to split an anchor among multiple clients. For this, we will split it depending on the number of shards a directory is shared over. We need a new API for the anchor split based on the shard number, and a new API for DFS to return the number of shards of a directory or file. Signed-off-by: Mohamad Chaarawi --- src/client/api/array.c | 6 ++++++ src/client/api/object.c | 5 +++++ src/client/array/dc_array.c | 23 +++++++++++++++++++++++ src/client/dfs/dfs.c | 28 ++++++++++++++++++++++++++++ src/include/daos/array.h | 1 + src/include/daos/object.h | 1 + src/include/daos_array.h | 16 ++++++++++++++++ src/include/daos_fs.h | 11 +++++++++++ src/include/daos_obj.h | 12 ++++++++++++ src/object/cli_obj.c | 11 ++++++++++- 10 files changed, 113 insertions(+), 1 deletion(-) diff --git a/src/client/api/array.c b/src/client/api/array.c index ec8fd2ce88d..d110d4b35cb 100644 --- a/src/client/api/array.c +++ b/src/client/api/array.c @@ -269,3 +269,9 @@ daos_array_set_size(daos_handle_t oh, daos_handle_t th, daos_size_t size, return dc_task_schedule(task, true); } /* end daos_array_set_size */ + +int +daos_array_get_num_shards(daos_handle_t oh, uint32_t *num_shards) +{ + return dc_array_get_num_shards(oh, num_shards); +} diff --git a/src/client/api/object.c b/src/client/api/object.c index af14c022ef3..6144e79980d 100644 --- a/src/client/api/object.c +++ b/src/client/api/object.c @@ -283,3 +283,8 @@ daos_obj_verify(daos_handle_t coh, daos_obj_id_t oid, daos_epoch_t epoch) daos_obj_close(oh, NULL); return rc; } + +int +daos_obj_anchor_split(uint32_t shard, daos_anchor_t *anchor) { + return dc_obj_anchor_split(shard, anchor); +} diff --git a/src/client/array/dc_array.c b/src/client/array/dc_array.c index 039cc960f61..cee2dbe7160 100644 --- a/src/client/array/dc_array.c +++ b/src/client/array/dc_array.c @@ -2244,3 +2244,26 @@ dc_array_set_size(tse_task_t *task) tse_task_complete(task, rc); return rc; } /* end daos_array_set_size */ + +int +dc_array_get_num_shards(daos_handle_t oh, uint32_t *num_shards) +{ + struct dc_array *array; + struct daos_obj_layout *layout; + int rc; + + if (num_shards == NULL) + return -DER_INVAL; + + array = array_hdl2ptr(oh); + if (array == NULL) + return -DER_NO_HDL; + + rc = dc_obj_layout_get(array->daos_oh, &layout); + if (rc) + return rc; + + *num_shards = layout->ol_nr; + daos_obj_layout_free(layout); + return 0; +} diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index 08f474de6f0..e3652417a0a 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "daos.h" @@ -3956,3 +3957,30 @@ dfs_umount_root_cont(dfs_t *dfs) rc = daos_cont_close(coh, NULL); return daos_der2errno(rc); } + +int +dfs_obj_get_num_shards(dfs_obj_t *obj, uint32_t *num_shards) +{ + struct daos_obj_layout *layout; + int rc; + + if (obj == NULL || num_shards == NULL) + return EINVAL; + + if (S_ISDIR(obj->mode)) { + rc = dc_obj_layout_get(obj->oh, &layout); + if (rc) + return rc; + + *num_shards = layout->ol_nr; + daos_obj_layout_free(layout); + return 0; + } + + if (S_ISREG(obj->mode)) { + rc = daos_array_get_num_shards(obj->oh, num_shards); + return daos_der2errno(rc); + } + + return EINVAL; +} diff --git a/src/include/daos/array.h b/src/include/daos/array.h index 220e21136eb..4bc1d7be452 100644 --- a/src/include/daos/array.h +++ b/src/include/daos/array.h @@ -42,5 +42,6 @@ int dc_array_set_size(tse_task_t *task); int dc_array_local2global(daos_handle_t oh, d_iov_t *glob); int dc_array_global2local(daos_handle_t coh, d_iov_t glob, unsigned int mode, daos_handle_t *oh); +int dc_array_get_num_shards(daos_handle_t oh, uint32_t *num_shards); #endif /* __DAOS_ARRAYX_H__ */ diff --git a/src/include/daos/object.h b/src/include/daos/object.h index eaf4be1f1e3..ba03dc6cfa2 100644 --- a/src/include/daos/object.h +++ b/src/include/daos/object.h @@ -348,6 +348,7 @@ int dc_obj_layout_get(daos_handle_t oh, struct daos_obj_layout **p_layout); int dc_obj_layout_refresh(daos_handle_t oh); int dc_obj_verify(daos_handle_t oh, daos_epoch_t *epochs, unsigned int nr); daos_handle_t dc_obj_hdl2cont_hdl(daos_handle_t oh); +int dc_obj_anchor_split(uint32_t shard, daos_anchor_t *anchor); /** Decode shard number from enumeration anchor */ static inline uint32_t diff --git a/src/include/daos_array.h b/src/include/daos_array.h index 4512e40a556..2563bc49f8e 100644 --- a/src/include/daos_array.h +++ b/src/include/daos_array.h @@ -436,6 +436,22 @@ int daos_array_get_attr(daos_handle_t oh, daos_size_t *chunk_size, daos_size_t *cell_size); +/** + * Retrieve array number of shards. + * + * \param[in] oh Array object open handle. + * \param[out] num_shards + * Number of shards. + * + * \return These values will be returned by \a ev::ev_error in + * non-blocking mode: + * 0 Success + * -DER_NO_HDL Invalid object open handle + * -DER_INVAL Invalid parameter + */ +int +daos_array_get_num_shards(daos_handle_t oh, uint32_t *num_shards); + #if defined(__cplusplus) } #endif diff --git a/src/include/daos_fs.h b/src/include/daos_fs.h index ab756b10a3b..a373566bfea 100644 --- a/src/include/daos_fs.h +++ b/src/include/daos_fs.h @@ -537,6 +537,17 @@ dfs_exchange(dfs_t *dfs, dfs_obj_t *parent1, char *name1, int dfs_get_mode(dfs_obj_t *obj, mode_t *mode); +/** + * Retrieve the number of shards of an open file or directory. + * + * \param[in] obj Open object to query. + * \param[out] num_shards Number of shards. + * + * \return 0 on success, errno code on failure. + */ +int +dfs_obj_get_num_shards(dfs_obj_t *obj, uint32_t *num_shards); + /** * Retrieve the DAOS open handle of a DFS file object. User should not close * this handle. This is used in cases like MPI-IO where 1 rank creates the file diff --git a/src/include/daos_obj.h b/src/include/daos_obj.h index 98931a3237c..8a9be69e387 100644 --- a/src/include/daos_obj.h +++ b/src/include/daos_obj.h @@ -841,6 +841,18 @@ daos_obj_query_key(daos_handle_t oh, daos_handle_t th, uint64_t flags, int daos_obj_verify(daos_handle_t coh, daos_obj_id_t oid, daos_epoch_t epoch); +/** + * Set an anchor for enumeration for one shard only. + * + * \param[in] oh Object open handle. + * \param[in/out] + * anchor Anchor modified for 1 shard only + * + * \return 0 Success and consistent + */ +int +daos_obj_anchor_split(uint32_t shard, daos_anchor_t *anchor); + #if defined(__cplusplus) } #endif diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index c112300196e..735e6752680 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -2152,7 +2152,8 @@ obj_list_dkey_cb(tse_task_t *task, struct obj_list_arg *arg, unsigned int opc) if (!daos_anchor_is_eof(anchor)) { D_DEBUG(DB_IO, "More keys in shard %d\n", shard); - } else if ((shard < obj->cob_shards_nr - grp_size)) { + } else if (!(daos_anchor_get_flags(anchor) & DIOF_TO_SPEC_SHARD) && + (shard < obj->cob_shards_nr - grp_size)) { shard += grp_size; D_DEBUG(DB_IO, "next shard %d grp %d nr %u\n", shard, grp_size, obj->cob_shards_nr); @@ -3489,3 +3490,11 @@ dc_obj_verify(daos_handle_t oh, daos_epoch_t *epochs, unsigned int nr) return rc; } + +int +dc_obj_anchor_split(uint32_t shard, daos_anchor_t *anchor) { + daos_anchor_set_zero(anchor); + dc_obj_shard2anchor(anchor, shard); + daos_anchor_set_flags(anchor, DIOF_TO_SPEC_SHARD); + return 0; +} From c50bf20daf357f95277cf94115da4d0979a7289c Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 13 May 2020 20:56:47 +0000 Subject: [PATCH 2/3] fix checkpatch errors Signed-off-by: Mohamad Chaarawi --- src/client/dfs/dfs.c | 6 +++--- src/include/daos_fs.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index b56701a5c8f..b83d3b1fffc 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -3972,10 +3972,10 @@ dfs_obj_anchor_split(dfs_obj_t *obj, uint32_t *nr, daos_anchor_t *anchors) return rc; /** TBD - support more than per shard iteration */ - if (*nr !=0 && *nr != layout->ol_nr) { + if (*nr != 0 && *nr != layout->ol_nr) { D_ERROR("For now, num anchors should be the same as what is" - "reported as optimal\n"); - D_GOTO(out, rc = ENOSYS); + " reported as optimal\n"); + D_GOTO(out, rc = EINVAL); } *nr = layout->ol_nr; diff --git a/src/include/daos_fs.h b/src/include/daos_fs.h index d4cb44f1a8c..867433b5892 100644 --- a/src/include/daos_fs.h +++ b/src/include/daos_fs.h @@ -486,7 +486,7 @@ dfs_obj_anchor_split(dfs_obj_t *obj, uint32_t *nr, daos_anchor_t *anchors); /** * Set an anchor with an index based on split done with dfs_obj_anchor_split. * The anchor passed will be re-intialized and set to start and finish iteration - * based on the specified index. + * based on the specified index. * * \param[in] obj Dir object to split anchor for. * \param[in] index Index of set this anchor for iteration. @@ -497,7 +497,7 @@ dfs_obj_anchor_split(dfs_obj_t *obj, uint32_t *nr, daos_anchor_t *anchors); */ int dfs_obj_anchor_set(dfs_obj_t *obj, uint32_t index, daos_anchor_t *anchor); - + /** * Create a directory. * From d21832833eff64cad44594f8a54868ef47cc4c53 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 18 May 2020 15:10:40 +0000 Subject: [PATCH 3/3] add object level API instead of only DFS API to allow the split to be done for regular KV object. Signed-off-by: Mohamad Chaarawi --- src/client/api/object.c | 47 +++++++++++++++++++++++++++++++++++++++++ src/client/dfs/dfs.c | 39 +++++----------------------------- src/include/daos_obj.h | 44 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 34 deletions(-) diff --git a/src/client/api/object.c b/src/client/api/object.c index af14c022ef3..6887954a5f2 100644 --- a/src/client/api/object.c +++ b/src/client/api/object.c @@ -283,3 +283,50 @@ daos_obj_verify(daos_handle_t coh, daos_obj_id_t oid, daos_epoch_t epoch) daos_obj_close(oh, NULL); return rc; } + +int +daos_obj_anchor_split(daos_handle_t oh, uint32_t *nr, daos_anchor_t *anchors) +{ + struct daos_obj_layout *layout; + int rc; + + if (nr == NULL) + return -DER_INVAL; + + rc = dc_obj_layout_get(oh, &layout); + if (rc) + return rc; + + /** TBD - support more than per shard iteration */ + if (*nr != 0 && *nr != layout->ol_nr) { + D_ERROR("For now, num anchors should be the same as what is" + " reported as optimal\n"); + D_GOTO(out, rc = -DER_INVAL); + } + + *nr = layout->ol_nr; + + if (anchors) { + uint32_t i; + + for (i = 0; i < layout->ol_nr; i++) { + daos_anchor_set_zero(&anchors[i]); + dc_obj_shard2anchor(&anchors[i], i); + daos_anchor_set_flags(&anchors[i], DIOF_TO_SPEC_SHARD); + } + } +out: + daos_obj_layout_free(layout); + return rc; +} + +int +daos_obj_anchor_set(daos_handle_t oh, uint32_t index, daos_anchor_t *anchor) +{ + /** TBD - support more than per shard iteration */ + daos_anchor_set_zero(anchor); + dc_obj_shard2anchor(anchor, index); + daos_anchor_set_flags(anchor, DIOF_TO_SPEC_SHARD); + + return 0; +} diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index b83d3b1fffc..0263569aaae 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include "daos.h" @@ -3961,45 +3960,17 @@ dfs_umount_root_cont(dfs_t *dfs) int dfs_obj_anchor_split(dfs_obj_t *obj, uint32_t *nr, daos_anchor_t *anchors) { - struct daos_obj_layout *layout; - int rc; - if (obj == NULL || nr == NULL || !S_ISDIR(obj->mode)) return EINVAL; - rc = dc_obj_layout_get(obj->oh, &layout); - if (rc) - return rc; - - /** TBD - support more than per shard iteration */ - if (*nr != 0 && *nr != layout->ol_nr) { - D_ERROR("For now, num anchors should be the same as what is" - " reported as optimal\n"); - D_GOTO(out, rc = EINVAL); - } - - *nr = layout->ol_nr; - - if (anchors) { - uint32_t i; - - for (i = 0; i < layout->ol_nr; i++) { - daos_anchor_set_zero(&anchors[i]); - dc_obj_shard2anchor(&anchors[i], i); - daos_anchor_set_flags(&anchors[i], DIOF_TO_SPEC_SHARD); - } - } -out: - daos_obj_layout_free(layout); - return rc; + return daos_obj_anchor_split(obj->oh, nr, anchors); } int dfs_obj_anchor_set(dfs_obj_t *obj, uint32_t index, daos_anchor_t *anchor) { - /** TBD - support more than per shard iteration */ - daos_anchor_set_zero(anchor); - dc_obj_shard2anchor(anchor, index); - daos_anchor_set_flags(anchor, DIOF_TO_SPEC_SHARD); - return 0; + if (obj == NULL || !S_ISDIR(obj->mode)) + return EINVAL; + + return daos_obj_anchor_set(obj->oh, index, anchor); } diff --git a/src/include/daos_obj.h b/src/include/daos_obj.h index 98931a3237c..01a2bc75495 100644 --- a/src/include/daos_obj.h +++ b/src/include/daos_obj.h @@ -841,6 +841,50 @@ daos_obj_query_key(daos_handle_t oh, daos_handle_t th, uint64_t flags, int daos_obj_verify(daos_handle_t coh, daos_obj_id_t oid, daos_epoch_t epoch); +/** + * Provide a function for objects to split an anchor to be able to execute a + * parallel listing/enumeration. This routine suggests the optimal number of + * anchors to use instead of just 1 and optionally returns all those + * anchors. The user would allocate the array of anchors after querying the + * number of anchors needed. Alternatively, user does not provide an array and + * can call daos_obj_anchor_set() for every anchor to set. + * + * The user could suggest how many anchors to split the iteration over. This + * feature is not supported yet. + * + * \param[in] oh Open object handle. + * \param[in/out] + * nr [in]: Number of anchors requested and allocated in + * \a anchors. Pass 0 for DAOS to recommend split num. + * [out]: Number of anchors recommended if 0 is passed in. + * \param[in] anchors Optional array of anchors that are split. + * + * \return These values will be returned: + * 0 Success + * -DER_NO_HDL Invalid object open handle + * -DER_INVAL Invalid parameter + */ +int +daos_obj_anchor_split(daos_handle_t oh, uint32_t *nr, daos_anchor_t *anchors); + +/** + * Set an anchor with an index based on split done with daos_obj_anchor_split. + * The anchor passed will be re-intialized and set to start and finish iteration + * based on the specified index. + * + * \param[in] oh Open object handle. + * \param[in] index Index of set this anchor for iteration. + * \param[in,out] + * anchor Hash anchor to set. + * + * \return These values will be returned: + * 0 Success + * -DER_NO_HDL Invalid object open handle + * -DER_INVAL Invalid parameter + */ +int +daos_obj_anchor_set(daos_handle_t oh, uint32_t index, daos_anchor_t *anchor); + #if defined(__cplusplus) } #endif