diff --git a/docs/admin/troubleshooting.md b/docs/admin/troubleshooting.md index 35fa0dcc9e0..6438b46aff2 100644 --- a/docs/admin/troubleshooting.md +++ b/docs/admin/troubleshooting.md @@ -554,6 +554,21 @@ To resolve the issue: Alternately, the administrator may erase and re-format the DAOS system to start over fresh using the new addresses. +### Engines become unavailable + +Engines may become unavailable due to server power losses and reboots, network switch failures, etc. After staying unavailable for a certain period of time, these engines may become "excluded" or "errored" in `dmg system query` output. Once the states of all engines stabilize (see [`CRT_EVENT_DELAY`](env_variables.md)), each pool will check whether there is enough redundancy (see [Pool RF](pool_operations.md#pool-redundancy-factor)) to tolerate the unavailability of the "excluded" or "errored" engines. If there is enough redundancy, these engines will be excluded from the pool ("Disabled ranks" in `dmg pool query --health-only` output); otherwise, the pool will perform no exclusion ("Dead ranks" in `dmg pool query --health-only` output as described in [Querying a Pool](pool_operations.md#querying-a-pool)) and may become temporarily unavailable (as seen by timeouts of `dmg pool query`, `dmg pool list`, etc.). Similarly, when engines become available, whenever the states of all engines stabilize, each pool will perform the aforementioned check for any unavailable engines that remain. + +To restore availability as well as capacity and performance, try to start all "excluded" or "errored" engines. Starting all of them at the same time minimizes the chance of triggering rebuild jobs. In many cases, the following command suffices: +``` +$ dmg system start +``` +If some pools remain unavailable (e.g., `dmg pool list` keeps timing out) after the previous step, restart the whole system: +``` +$ dmg system stop --force +$ dmg system start +``` +If some engines have been excluded from certain pools, and they are available again, reintegrate them to the pools. + ## Diagnostic and Recovery Tools !!! WARNING : Please be careful and use this tool under supervision of DAOS support team. diff --git a/src/client/api/pool.c b/src/client/api/pool.c index 98ef86c6717..4a28a7484ac 100644 --- a/src/client/api/pool.c +++ b/src/client/api/pool.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2015-2023 Intel Corporation. + * (C) Copyright 2015-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -91,15 +91,54 @@ daos_pool_query(daos_handle_t poh, d_rank_list_t **ranks, daos_pool_info_t *info return -DER_INVAL; } + if (ranks != NULL && info != NULL && + (info->pi_bits & (DPI_ENGINES_ENABLED | DPI_ENGINES_DISABLED)) == + (DPI_ENGINES_ENABLED | DPI_ENGINES_DISABLED)) { + D_ERROR("enabled and disabled not supported in v1 query\n"); + return -DER_NOTSUPPORTED; + } + rc = dc_task_create(dc_pool_query, NULL, ev, &task); if (rc) return rc; args = dc_task_get_args(task); - args->poh = poh; - args->ranks = ranks; + args->poh = poh; args->info = info; args->prop = pool_prop; + if (info != NULL && (info->pi_bits & DPI_ENGINES_ENABLED) != 0) + args->enabled_ranks = ranks; + else + args->disabled_ranks = ranks; + + return dc_task_schedule(task, true); +} + +int +daos_pool_query_v2(daos_handle_t poh, d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ranks, + daos_pool_info_t *info, daos_prop_t *pool_prop, daos_event_t *ev) +{ + daos_pool_query_t *args; + tse_task_t *task; + int rc; + + DAOS_API_ARG_ASSERT(*args, POOL_QUERY); + + if (pool_prop != NULL && !daos_prop_valid(pool_prop, true, false)) { + D_ERROR("invalid pool_prop parameter.\n"); + return -DER_INVAL; + } + + rc = dc_task_create(dc_pool_query, NULL, ev, &task); + if (rc) + return rc; + + args = dc_task_get_args(task); + args->poh = poh; + args->enabled_ranks = enabled_ranks; + args->disabled_ranks = disabled_ranks; + args->info = info; + args->prop = pool_prop; return dc_task_schedule(task, true); } diff --git a/src/control/cmd/daos/pool.go b/src/control/cmd/daos/pool.go index 0066f3959b7..4da7a2ee8a1 100644 --- a/src/control/cmd/daos/pool.go +++ b/src/control/cmd/daos/pool.go @@ -295,21 +295,21 @@ func convertPoolInfo(pinfo *C.daos_pool_info_t) (*daos.PoolInfo, error) { return poolInfo, nil } -func queryPoolRankLists(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { - var rlPtr **C.d_rank_list_t = nil - var rl *C.d_rank_list_t = nil +func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { + poolInfo := &daos.PoolInfo{} - if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) || queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) || - queryMask.HasOption(daos.PoolQueryOptionDeadEngines) { - rlPtr = &rl - } + var enabledRanks *C.d_rank_list_t + var disabledRanks *C.d_rank_list_t cPoolInfo := C.daos_pool_info_t{ pi_bits: C.uint64_t(queryMask), } - rc := C.daos_pool_query(poolHdl, rlPtr, &cPoolInfo, nil, nil) - defer C.d_rank_list_free(rl) + rc := C.daos_pool_query_v2(poolHdl, &enabledRanks, &disabledRanks, &cPoolInfo, nil, nil) + defer func() { + C.d_rank_list_free(enabledRanks) + C.d_rank_list_free(disabledRanks) + }() if err := daosError(rc); err != nil { return nil, err } @@ -319,79 +319,19 @@ func queryPoolRankLists(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) ( return nil, err } - if rlPtr != nil { - rs, err := rankSetFromC(rl) + if enabledRanks != nil { + rs, err := rankSetFromC(enabledRanks) if err != nil { return nil, err } - if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) { - poolInfo.EnabledRanks = rs - } - if queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { - poolInfo.DisabledRanks = rs - } - if queryMask.HasOption(daos.PoolQueryOptionDeadEngines) { - poolInfo.DeadRanks = rs - } + poolInfo.EnabledRanks = rs } - - return poolInfo, nil -} -func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { - poolInfo := &daos.PoolInfo{} - originalMask := queryMask // Save the original queryMask - - // Function to handle the query and return a single RankList - queryAndUpdate := func(option string) error { - // Clear previous options and set new option - queryMask.ClearAll() - queryMask.SetOptions(option) - - poolInfo1, err := queryPoolRankLists(poolHdl, queryMask) + if disabledRanks != nil { + rs, err := rankSetFromC(disabledRanks) if err != nil { - return err - } - - switch option { - case daos.PoolQueryOptionEnabledEngines: - poolInfo.EnabledRanks = poolInfo1.EnabledRanks - case daos.PoolQueryOptionDisabledEngines: - poolInfo.DisabledRanks = poolInfo1.DisabledRanks - case daos.PoolQueryOptionDeadEngines: - poolInfo.DeadRanks = poolInfo1.DeadRanks - } - return nil - } - - // Preprocess queryMask, select one option for the first query - var firstOption string - if originalMask.HasOption(daos.PoolQueryOptionEnabledEngines) { - firstOption = daos.PoolQueryOptionEnabledEngines - } else if originalMask.HasOption(daos.PoolQueryOptionDisabledEngines) { - firstOption = daos.PoolQueryOptionDisabledEngines - } else if originalMask.HasOption(daos.PoolQueryOptionDeadEngines) { - firstOption = daos.PoolQueryOptionDeadEngines - } - - // Perform the first query to get basic information - if err := queryAndUpdate(firstOption); err != nil { - return nil, err - } - - // Check the original query mask and update fields as needed - queryOptions := []string{ - daos.PoolQueryOptionEnabledEngines, - daos.PoolQueryOptionDisabledEngines, - daos.PoolQueryOptionDeadEngines, - } - - // Process each option sequentially - for _, opt := range queryOptions { - if originalMask.HasOption(opt) && opt != firstOption { - if err := queryAndUpdate(opt); err != nil { - return nil, err - } + return nil, err } + poolInfo.DisabledRanks = rs } return poolInfo, nil diff --git a/src/control/common/proto/logging.go b/src/control/common/proto/logging.go index 8e23cae1839..ee2e99e5f77 100644 --- a/src/control/common/proto/logging.go +++ b/src/control/common/proto/logging.go @@ -112,6 +112,22 @@ func Debug(msg proto.Message) string { for i, b := range m.TierBytes { fmt.Fprintf(&bld, "%d:%d ", i, b) } + case *mgmtpb.PoolQueryReq: + fmt.Fprintf(&bld, "%T id:%s qm:%s", m, m.Id, daos.PoolQueryMask(m.QueryMask)) + case *mgmtpb.PoolQueryResp: + fmt.Fprintf(&bld, "%T status:%s uuid:%s qm:%s map:%d tot(eng/tgts):%d/%d ver(p/u):%d/%d svc_ldr:%d ", + m, daos.Status(m.Status), m.Uuid, daos.PoolQueryMask(m.QueryMask), m.Version, + m.TotalEngines, m.TotalTargets, m.PoolLayoutVer, m.UpgradeLayoutVer, m.SvcLdr) + ranks := &ranklist.RankSet{} + for _, r := range m.SvcReps { + ranks.Add(ranklist.Rank(r)) + } + fmt.Fprintf(&bld, "svc_ranks:%s ", ranks.String()) + fmt.Fprintf(&bld, "ena_ranks:%s ", m.EnabledRanks) + fmt.Fprintf(&bld, "dis_ranks:%s ", m.DisabledRanks) + fmt.Fprintf(&bld, "dead_ranks:%s ", m.DeadRanks) + fmt.Fprintf(&bld, "rebuild:%+v ", m.Rebuild) + fmt.Fprintf(&bld, "tier_stats:%+v ", m.TierStats) case *mgmtpb.PoolEvictReq: fmt.Fprintf(&bld, "%T pool:%s", m, m.Id) if len(m.Handles) > 0 { diff --git a/src/include/daos_pool.h b/src/include/daos_pool.h index 24746a70ff7..aadc5badbc5 100644 --- a/src/include/daos_pool.h +++ b/src/include/daos_pool.h @@ -341,6 +341,52 @@ int daos_pool_query(daos_handle_t poh, d_rank_list_t **ranks, daos_pool_info_t *info, daos_prop_t *pool_prop, daos_event_t *ev); +/** + * Query pool information, including multiple rank lists (enabled, disabled). User + * should provide at least one of \a info or the \a *disabled_ranks list as output buffer. + * + * \param[in] poh Pool connection handle. + * \param[out] enabled_ranks + * Optional, returned enabled pool storage engine ranks. This list will + * be populated if #info is not NULL and #pi_bits has DPI_ENGINES_ENABLED set. + * The caller is responsible for freeing the list with d_rank_list_free(). + * \param[out] disabled_ranks + * Optional, returned disabled pool storage engine ranks. + * If #info is NULL, this list will be populated with the ranks of + * all engines with any targets disabled. If #info is not NULL, this + * list will be populated if #pi_bits has DPI_ENGINES_DISABLED set. + * The caller is responsible for freeing the list with d_rank_list_free(). + * \param[in,out] + * info Optional, returned pool information, + * see daos_pool_info_bit. + * \param[out] pool_prop + * Optional, returned pool properties. + * If it is NULL, then needs not query the properties. + * If pool_prop is non-NULL but its dpp_entries is NULL, + * will query all pool properties, DAOS internally + * allocates the needed buffers and assign pointer to + * dpp_entries. + * If pool_prop's dpp_nr > 0 and dpp_entries is non-NULL, + * will query the properties for specific dpe_type(s), DAOS + * internally allocates the needed buffer for dpe_str or + * dpe_val_ptr, if the dpe_type with immediate value then + * will directly assign it to dpe_val. + * User can free the associated buffer by calling + * daos_prop_free(). + * \param[in] ev Completion event, it is optional and can be NULL. + * The function will run in blocking mode if \a ev is NULL. + * + * \return These values will be returned by \a ev::ev_error in + * non-blocking mode: + * 0 Success + * -DER_INVAL Invalid parameter + * -DER_UNREACH Network is unreachable + * -DER_NO_HDL Invalid pool handle + */ +int +daos_pool_query_v2(daos_handle_t poh, d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ranks, + daos_pool_info_t *info, daos_prop_t *pool_prop, daos_event_t *ev); + /** * Query information of storage targets within a DAOS pool. * diff --git a/src/include/daos_task.h b/src/include/daos_task.h index 819e6edffab..500b07bd577 100644 --- a/src/include/daos_task.h +++ b/src/include/daos_task.h @@ -206,8 +206,10 @@ typedef struct { typedef struct { /** Pool open handle. */ daos_handle_t poh; - /** Optional, returned storage ranks in this pool. */ - d_rank_list_t **ranks; + /** Optional, returned enabled storage ranks in this pool. */ + d_rank_list_t **enabled_ranks; + /** Optional, returned disabled storage ranks in this pool. */ + d_rank_list_t **disabled_ranks; /** Optional, returned pool information. */ daos_pool_info_t *info; /** Optional, returned pool properties. */ diff --git a/src/pool/cli.c b/src/pool/cli.c index 6825f57568f..e6ee89d6e29 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -555,65 +555,22 @@ dc_pool_map_update(struct dc_pool *pool, struct pool_map *map, bool connect) return rc; } -static void -pool_print_range_list(struct dc_pool *pool, d_rank_range_list_t *list, bool enabled) -{ - const size_t MAXBYTES = 512; - char line[MAXBYTES]; - char *linepos = line; - int ret; - unsigned int written = 0; - unsigned int remaining = MAXBYTES; - int i; - - ret = snprintf(linepos, remaining, DF_UUID": %s ranks: ", DP_UUID(pool->dp_pool), - enabled ? "ENABLED" : "DISABLED"); - if ((ret < 0) || (ret >= remaining)) - goto err; - written += ret; - remaining -= ret; - linepos += ret; - - for (i = 0; i < list->rrl_nr; i++) { - uint32_t lo = list->rrl_ranges[i].lo; - uint32_t hi = list->rrl_ranges[i].hi; - bool lastrange = (i == (list->rrl_nr - 1)); - - if (lo == hi) - ret = snprintf(linepos, remaining, "%u%s", lo, lastrange ? "" : ","); - else - ret = snprintf(linepos, remaining, "%u-%u%s", lo, hi, lastrange ? "" : ","); - if ((ret < 0) || (ret >= remaining)) - goto err; - written += ret; - remaining -= ret; - linepos += ret; - } - D_DEBUG(DB_MD, "%s\n", line); - return; -err: - if (written > 0) - D_DEBUG(DB_MD, "%s%s\n", line, (ret >= remaining) ? " ...(TRUNCATED): " : ""); - D_ERROR(DF_UUID": snprintf failed, %d\n", DP_UUID(pool->dp_pool), ret); -} - /* * Using "map_buf", "map_version", and "mode", update "pool->dp_map" and fill * "ranks" and/or "info", "prop" if not NULL. */ static int -process_query_reply(struct dc_pool *pool, struct pool_buf *map_buf, - uint32_t map_version, uint32_t leader_rank, - struct daos_pool_space *ps, struct daos_rebuild_status *rs, - d_rank_list_t **ranks, daos_pool_info_t *info, - daos_prop_t *prop_req, daos_prop_t *prop_reply, - bool connect) +process_query_reply(struct dc_pool *pool, struct pool_buf *map_buf, uint32_t map_version, + uint32_t leader_rank, struct daos_pool_space *ps, + struct daos_rebuild_status *rs, d_rank_list_t **enabled_ranks, + d_rank_list_t **disabled_ranks, daos_pool_info_t *info, daos_prop_t *prop_req, + daos_prop_t *prop_reply, bool connect) { struct pool_map *map; int rc; - D_DEBUG(DB_MD, DF_UUID": info=%p (pi_bits="DF_X64"), ranks=%p\n", - DP_UUID(pool->dp_pool), info, info ? info->pi_bits : 0, ranks); + D_DEBUG(DB_MD, DF_UUID ": info=%p (pi_bits=" DF_X64 "), disabled_ranks=%p\n", + DP_UUID(pool->dp_pool), info, info ? info->pi_bits : 0, disabled_ranks); rc = pool_map_create(map_buf, map_version, &map); if (rc != 0) { @@ -636,20 +593,18 @@ process_query_reply(struct dc_pool *pool, struct pool_buf *map_buf, } /* Scan all targets for populating ranks */ - if (ranks != NULL) { - d_rank_range_list_t *range_list; - bool get_enabled = (info ? ((info->pi_bits & DPI_ENGINES_ENABLED) != 0) : false); - - rc = pool_map_get_ranks(pool->dp_pool, map, get_enabled, ranks); + if (disabled_ranks != NULL) { + /* v1 behavior: if info is NULL, populate the list with disabled ranks */ + if (info == NULL || (info->pi_bits & DPI_ENGINES_DISABLED) != 0) { + rc = pool_map_get_ranks(pool->dp_pool, map, false, disabled_ranks); + if (rc != 0) + goto out_unlock; + } + } + if (enabled_ranks != NULL && info != NULL && (info->pi_bits & DPI_ENGINES_ENABLED) != 0) { + rc = pool_map_get_ranks(pool->dp_pool, map, true, enabled_ranks); if (rc != 0) goto out_unlock; - - /* For debug logging - convert to rank ranges */ - range_list = d_rank_range_list_create_from_ranks(*ranks); - if (range_list) { - pool_print_range_list(pool, range_list, get_enabled); - d_rank_range_list_free(range_list); - } } out_unlock: @@ -893,7 +848,7 @@ pool_connect_cp(tse_task_t *task, void *data) rc = process_query_reply(tpriv->pool, map_buf, pco->pco_op.po_map_version, pco->pco_op.po_hint.sh_rank, &pco->pco_space, &pco->pco_rebuild_st, - NULL /* tgts */, info, NULL, NULL, true); + NULL, NULL /* tgts */, info, NULL, NULL, true); if (rc != 0) { if (rc == -DER_AGAIN) { rc = tse_task_reinit(task); @@ -1786,7 +1741,8 @@ dc_pool_exclude_out(tse_task_t *task) struct pool_query_arg { struct dc_pool *dqa_pool; - d_rank_list_t **dqa_ranks; + d_rank_list_t **dqa_enabled_ranks; + d_rank_list_t **dqa_disabled_ranks; daos_pool_info_t *dqa_info; daos_prop_t *dqa_prop; crt_bulk_t dqa_bulk; @@ -1800,8 +1756,6 @@ pool_query_cb(tse_task_t *task, void *data) struct pool_query_arg *arg = (struct pool_query_arg *)data; struct pool_buf *map_buf = arg->dqa_map_buf; struct pool_query_out *out_v5 = crt_reply_get(arg->rpc); - d_rank_list_t *ranks = NULL; - d_rank_list_t **ranks_arg; int rc = task->dt_result; rc = pool_rsvc_client_complete_rpc(arg->dqa_pool, &arg->rpc->cr_ep, rc, @@ -1837,14 +1791,10 @@ pool_query_cb(tse_task_t *task, void *data) D_GOTO(out, rc); } - ranks_arg = arg->dqa_ranks ? arg->dqa_ranks : &ranks; - - rc = process_query_reply(arg->dqa_pool, map_buf, - out_v5->pqo_op.po_map_version, - out_v5->pqo_op.po_hint.sh_rank, - &out_v5->pqo_space, &out_v5->pqo_rebuild_st, - ranks_arg, arg->dqa_info, - arg->dqa_prop, out_v5->pqo_prop, false); + rc = process_query_reply( + arg->dqa_pool, map_buf, out_v5->pqo_op.po_map_version, out_v5->pqo_op.po_hint.sh_rank, + &out_v5->pqo_space, &out_v5->pqo_rebuild_st, arg->dqa_enabled_ranks, + arg->dqa_disabled_ranks, arg->dqa_info, arg->dqa_prop, out_v5->pqo_prop, false); if (rc != 0) { if (rc == -DER_AGAIN) { rc = tse_task_reinit(task); @@ -1852,10 +1802,6 @@ pool_query_cb(tse_task_t *task, void *data) } D_GOTO(out, rc); } - D_DEBUG(DB_MD, DF_UUID": got ranklist with %u ranks\n", - DP_UUID(arg->dqa_pool->dp_pool), (*ranks_arg)->rl_nr); - if (ranks_arg == &ranks) - d_rank_list_free(ranks); out: crt_req_decref(arg->rpc); @@ -1887,9 +1833,9 @@ dc_pool_query(tse_task_t *task) if (pool == NULL) D_GOTO(out_task, rc = -DER_NO_HDL); - D_DEBUG(DB_MD, DF_UUID": querying: hdl="DF_UUID" ranks=%p info=%p\n", - DP_UUID(pool->dp_pool), DP_UUID(pool->dp_pool_hdl), - args->ranks, args->info); + D_DEBUG(DB_MD, DF_UUID ": querying: hdl=" DF_UUID " disabled_ranks=%p info=%p\n", + DP_UUID(pool->dp_pool), DP_UUID(pool->dp_pool_hdl), args->disabled_ranks, + args->info); ep.ep_grp = pool->dp_sys->sy_group; rc = dc_pool_choose_svc_rank(NULL /* label */, pool->dp_pool, @@ -1916,12 +1862,13 @@ dc_pool_query(tse_task_t *task) D_GOTO(out_rpc, rc); pool_query_in_set_data(rpc, query_args.dqa_bulk, pool_query_bits(args->info, args->prop)); - query_args.dqa_pool = pool; - query_args.dqa_ranks = args->ranks; - query_args.dqa_info = args->info; - query_args.dqa_prop = args->prop; - query_args.dqa_map_buf = map_buf; - query_args.rpc = rpc; + query_args.dqa_pool = pool; + query_args.dqa_enabled_ranks = args->enabled_ranks; + query_args.dqa_disabled_ranks = args->disabled_ranks; + query_args.dqa_info = args->info; + query_args.dqa_prop = args->prop; + query_args.dqa_map_buf = map_buf; + query_args.rpc = rpc; rc = tse_task_register_comp_cb(task, pool_query_cb, &query_args, sizeof(query_args)); diff --git a/src/pool/srv_cli.c b/src/pool/srv_cli.c index bbc228dd2b1..2af0b587ffa 100644 --- a/src/pool/srv_cli.c +++ b/src/pool/srv_cli.c @@ -491,6 +491,8 @@ process_query_result(d_rank_list_t **enabled_ranks, d_rank_list_t **disabled_ran DP_UUID(pool_uuid), dead_rank_list->rl_nr); } + /* remove dead ranks from enabled list */ + d_rank_list_filter(dead_rank_list, enabled_rank_list, true); pool_query_reply_to_info(pool_uuid, map_buf, map_version, leader_rank, ps, rs, info); info->pi_ndisabled = num_disabled; if (enabled_rank_list != NULL) diff --git a/src/tests/suite/daos_pool.c b/src/tests/suite/daos_pool.c index a378740b7aa..dce652968aa 100644 --- a/src/tests/suite/daos_pool.c +++ b/src/tests/suite/daos_pool.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -676,7 +676,7 @@ pool_op_retry(void **state) test_set_engine_fail_loc(arg, leader_rank, DAOS_POOL_QUERY_FAIL_CORPC | DAOS_FAIL_ONCE); print_message("querying pool info... "); memset(&info, 'D', sizeof(info)); - info.pi_bits = DPI_ALL; + info.pi_bits = DPI_ALL ^ (DPI_ENGINES_ENABLED | DPI_ENGINES_DEAD); rc = daos_pool_query(poh, &engine_ranks, &info, NULL, NULL /* ev */); assert_rc_equal(rc, 0); assert_int_equal(info.pi_ndisabled, 0);