Skip to content

Commit

Permalink
DAOS-6748 placement: Do not clear used_tgts in get_target (#4751) (#4829
Browse files Browse the repository at this point in the history
)

Do not clear used_tgts in get_targets, since we do not want
put multiple shards in a single VOS target yet, which could
happen when there are different VOS targets in each fault
domain.

Add dom_occupied to get_targets to represent the domain whose
targets has been used up for the objects, so to avoid the
domain to be choosen.

Do not need rehash within the same rank, if the targets has been
used, let's just walk through the targets to find the avaible
spots.

Signed-off-by: Di Wang <di.wang@intel.com>
  • Loading branch information
wangdi authored Mar 5, 2021
1 parent f6b136a commit f0ba094
Showing 1 changed file with 87 additions and 32 deletions.
119 changes: 87 additions & 32 deletions src/placement/jump_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,9 @@ get_num_domains(struct pool_domain *curr_dom, enum PL_OP_TYPE op_type)
* \param[in] dom_used This is a contiguous array that contains
* information on whether or not an internal node
* (non-target) in a domain has been used.
* \param[in] dom_occupied This is a contiguous array that contains
* information on whether or not an internal node
* (non-target) in a domain has been occupied.
* \param[in] used_targets A list of the targets that have been used. We
* iterate through this when selecting the next
* target in a placement to determine if that
Expand All @@ -331,22 +334,22 @@ get_num_domains(struct pool_domain *curr_dom, enum PL_OP_TYPE op_type)
* targets are allowed in the case that there
* are more shards than targets
*
* \return an int value indicating if the returned target is available (0)
* or failed (1)
*/
#define MAX_STACK 5
static void
get_target(struct pool_domain *curr_dom, struct pool_target **target,
uint64_t obj_key, uint8_t *dom_used, uint8_t *tgts_used,
int shard_num, enum PL_OP_TYPE op_type)
uint64_t obj_key, uint8_t *dom_used, uint8_t *dom_occupied,
uint8_t *tgts_used, int shard_num, enum PL_OP_TYPE op_type)
{
int range_set;
uint8_t found_target = 0;
uint32_t selected_dom;
struct pool_domain *root_pos;
struct pool_domain *dom_stack[MAX_STACK] = { 0 };
int top = -1;

obj_key = crc(obj_key, shard_num);
root_pos = curr_dom;

do {
uint32_t num_doms;

Expand All @@ -355,7 +358,6 @@ get_target(struct pool_domain *curr_dom, struct pool_target **target,

/* If choosing target (lowest fault domain level) */
if (curr_dom->do_children == NULL) {

uint32_t fail_num = 0;
uint32_t dom_id;
uint32_t start_tgt;
Expand All @@ -365,30 +367,34 @@ get_target(struct pool_domain *curr_dom, struct pool_target **target,
end_tgt = start_tgt + (num_doms - 1);

range_set = isset_range(tgts_used, start_tgt, end_tgt);
if (range_set)
clrbit_range(tgts_used, start_tgt, end_tgt);
if (range_set) {
/* Used up all targets in this domain */
setbit(dom_occupied, curr_dom - root_pos);
D_ASSERT(top != -1);
curr_dom = dom_stack[top--]; /* try parent */
continue;
}

do {
/*
* Must crc key because jump consistent hash
* requires an even distribution or it will
* not work
*/
obj_key = crc(obj_key, fail_num++);
/*
* Must crc key because jump consistent hash
* requires an even distribution or it will
* not work
*/
obj_key = crc(obj_key, fail_num++);

/* Get target for shard */
selected_dom = jump_consistent_hash(obj_key,
num_doms);
/* Get target for shard */
selected_dom = jump_consistent_hash(obj_key, num_doms);

do {
selected_dom = selected_dom % num_doms;
/* Retrieve actual target using index */
*target = &curr_dom->do_targets[selected_dom];

/* Get target id to check if target used */
dom_id = (*target)->ta_comp.co_id;

selected_dom++;
} while (isset(tgts_used, dom_id));
setbit(tgts_used, dom_id);

setbit(tgts_used, dom_id);
/* Found target (which may be available or not) */
found_target = 1;
} else {
Expand All @@ -408,9 +414,40 @@ get_target(struct pool_domain *curr_dom, struct pool_target **target,
start_dom = (curr_dom->do_children) - root_pos;
end_dom = start_dom + (num_doms - 1);

range_set = isset_range(dom_occupied, start_dom,
end_dom);
if (range_set) {
if (top == -1) {
*target = NULL;
return;
}
setbit(dom_occupied, curr_dom - root_pos);
curr_dom = dom_stack[top--];
continue;
}

range_set = isset_range(dom_used, start_dom, end_dom);
if (range_set)
clrbit_range(dom_used, start_dom, end_dom);
if (range_set) {
int idx;

/* Skip the domain whose targets are used up */
for (idx = start_dom; idx <= end_dom; ++idx) {
if (isclr(dom_occupied, idx))
clrbit(dom_used, idx);
}
/* if all children of the current dom have been
* used, then let's go back its parent to check
* its siblings.
*/
if (curr_dom != root_pos) {
setbit(dom_used, curr_dom - root_pos);
D_ASSERT(top != -1);
curr_dom = dom_stack[top--];
} else {
curr_dom = root_pos;
}
continue;
}

/*
* Keep choosing new domains until one that has
Expand All @@ -425,6 +462,8 @@ get_target(struct pool_domain *curr_dom, struct pool_target **target,
/* Mark this domain as used */
setbit(dom_used, start_dom + selected_dom);

D_ASSERT(top < MAX_STACK - 1);
dom_stack[++top] = curr_dom;
curr_dom = &(curr_dom->do_children[selected_dom]);
obj_key = crc(obj_key, curr_dom->do_comp.co_id);
}
Expand Down Expand Up @@ -475,7 +514,7 @@ static int
obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md,
struct pl_obj_layout *layout, struct jm_obj_placement *jmop,
d_list_t *remap_list, enum PL_OP_TYPE op_type,
uint8_t *tgts_used, uint8_t *dom_used,
uint8_t *tgts_used, uint8_t *dom_used, uint8_t *dom_occupied,
uint32_t failed_in_layout, d_list_t *extend_list)
{
struct failed_shard *f_shard;
Expand Down Expand Up @@ -518,7 +557,9 @@ obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md,
if (spare_avail) {
rebuild_key = crc(key, f_shard->fs_shard_idx);
get_target(root, &spare_tgt, crc(key, rebuild_key),
dom_used, tgts_used, shard_id, op_type);
dom_used, dom_occupied, tgts_used,
shard_id, op_type);
D_ASSERT(spare_tgt != NULL);
D_DEBUG(DB_PL, "Trying new target: "DF_TARGET"\n",
DP_TARGET(spare_tgt));
spares_left--;
Expand Down Expand Up @@ -618,8 +659,9 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
struct pool_domain *root;
daos_obj_id_t oid;
d_list_t extend_list;
uint8_t *dom_used;
uint8_t *tgts_used;
uint8_t *dom_used = NULL;
uint8_t *dom_occupied = NULL;
uint8_t *tgts_used = NULL;
uint32_t dom_used_length;
uint64_t key;
uint32_t fail_tgt_cnt;
Expand Down Expand Up @@ -650,10 +692,11 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
dom_used_length = (struct pool_domain *)(root->do_targets) - (root) + 1;

D_ALLOC_ARRAY(dom_used, (dom_used_length / 8) + 1);
D_ALLOC_ARRAY(dom_occupied, (dom_used_length / 8) + 1);
D_ALLOC_ARRAY(tgts_used, (root->do_target_nr / 8) + 1);
D_INIT_LIST_HEAD(&extend_list);

if (dom_used == NULL || tgts_used == NULL)
if (dom_used == NULL || dom_occupied == NULL || tgts_used == NULL)
D_GOTO(out, rc = -DER_NOMEM);

/**
Expand Down Expand Up @@ -701,8 +744,17 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
uint32_t tgt_id;
uint32_t fseq;

get_target(root, &target, key, dom_used, tgts_used, k,
op_type);
get_target(root, &target, key, dom_used, dom_occupied,
tgts_used, k, op_type);

if (target == NULL) {
D_DEBUG(DB_PL, "no targets for %d/%d/%d\n",
i, j, k);
layout->ol_shards[k].po_target = -1;
layout->ol_shards[k].po_shard = -1;
layout->ol_shards[k].po_fseq = 0;
continue;
}

tgt_id = target->ta_comp.co_id;
fseq = target->ta_comp.co_fseq;
Expand Down Expand Up @@ -740,8 +792,9 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
D_DEBUG(DB_PL, "Fail tgt cnt: %d\n", fail_tgt_cnt);
if (fail_tgt_cnt > 0)
rc = obj_remap_shards(jmap, md, layout, jmop, remap_list,
op_type, tgts_used, dom_used, fail_tgt_cnt,
&extend_list);
op_type, tgts_used, dom_used,
dom_occupied, fail_tgt_cnt,
&extend_list);
out:
if (rc) {
D_ERROR("jump_map_obj_layout_fill failed, rc "DF_RC"\n",
Expand All @@ -750,6 +803,8 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
}
if (dom_used)
D_FREE(dom_used);
if (dom_occupied)
D_FREE(dom_occupied);
if (tgts_used)
D_FREE(tgts_used);

Expand Down

0 comments on commit f0ba094

Please sign in to comment.