Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-6632 obj: add support for hints on the oclass generate API #4831

Merged
merged 5 commits into from
Mar 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions src/include/daos_obj.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,20 +347,29 @@ daos_obj_generate_id(daos_obj_id_t *oid, daos_ofeat_t ofeats,
oid->hi |= hdr;
}

#define DAOS_OCH_RDD_BITS 4
#define DAOS_OCH_SHD_BITS 6
#define DAOS_OCH_RDD_SHIFT 0
#define DAOS_OCH_SHD_SHIFT DAOS_OCH_RDD_BITS
#define DAOS_OCH_RDD_MAX_VAL ((1ULL << DAOS_OCH_RDD_BITS) - 1)
#define DAOS_OCH_SHD_MAX_VAL ((1ULL << DAOS_OCH_SHD_BITS) - 1)
#define DAOS_OCH_RDD_MASK (DAOS_OCH_RDD_MAX_VAL << DAOS_OCH_RDD_SHIFT)
#define DAOS_OCH_SHD_MASK (DAOS_OCH_SHD_MAX_VAL << DAOS_OCH_SHD_SHIFT)

/** Flags for oclass hints */
enum {
/** Flags to control OC Redundancy */
DAOS_OC_RDD_DEF = (1 << 0), /** Default - use RF prop */
DAOS_OC_RDD_NO = (1 << 1), /** No redundancy */
DAOS_OC_RDD_RP = (1 << 2), /** Replication */
DAOS_OC_RDD_EC = (1 << 3), /** Erasure Code */
DAOS_OCH_RDD_DEF = (1 << 0), /** Default - use RF prop */
DAOS_OCH_RDD_NO = (1 << 1), /** No redundancy */
DAOS_OCH_RDD_RP = (1 << 2), /** Replication */
DAOS_OCH_RDD_EC = (1 << 3), /** Erasure Code */
/** Flags to control OC Sharding */
DAOS_OC_SHD_DEF = (1 << 4), /** Default - use 1 grp */
DAOS_OC_SHD_TINY = (1 << 5), /** <= 4 grps */
DAOS_OC_SHD_REG = (1 << 6), /** max(128, 25%) */
DAOS_OC_SHD_HI = (1 << 7), /** max(256, 50%) */
DAOS_OC_SHD_EXT = (1 << 8), /** max(1024, 80%) */
DAOS_OC_SHD_MAX = (1 << 9), /** 100% */
DAOS_OCH_SHD_DEF = (1 << 4), /** Default - use 1 grp */
DAOS_OCH_SHD_TINY = (1 << 5), /** <= 4 grps */
DAOS_OCH_SHD_REG = (1 << 6), /** max(128, 25%) */
DAOS_OCH_SHD_HI = (1 << 7), /** max(256, 50%) */
DAOS_OCH_SHD_EXT = (1 << 8), /** max(1024, 80%) */
DAOS_OCH_SHD_MAX = (1 << 9), /** 100% */
};

/**
Expand Down
13 changes: 7 additions & 6 deletions src/object/cli_obj.c
Original file line number Diff line number Diff line change
Expand Up @@ -5403,10 +5403,6 @@ daos_obj_generate_oid(daos_handle_t coh, daos_obj_id_t *oid,
struct pl_map_attr attr;
int rc;

/** TODO - unsupported for now */
if (cid == OC_UNKNOWN)
return -DER_INVAL;

/** select the oclass */
poh = dc_cont_hdl2pool_hdl(coh);
if (daos_handle_is_inval(poh))
Expand All @@ -5422,8 +5418,13 @@ daos_obj_generate_oid(daos_handle_t coh, daos_obj_id_t *oid,
D_DEBUG(DB_TRACE, "available domain=%d, targets=%d\n",
attr.pa_domain_nr, attr.pa_target_nr);

rc = daos_oclass_fit_max(cid, attr.pa_domain_nr, attr.pa_target_nr,
&cid);
/** TODO - unsupported for now */
if (cid == OC_UNKNOWN)
rc = dc_set_oclass(coh, attr.pa_domain_nr, attr.pa_target_nr,
ofeats, hints, &cid);
else
rc = daos_oclass_fit_max(cid, attr.pa_domain_nr,
attr.pa_target_nr, &cid);
if (rc)
return rc;

Expand Down
106 changes: 106 additions & 0 deletions src/object/obj_class.c
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,112 @@ daos_oclass_fit_max(daos_oclass_id_t oc_id, int domain_nr, int target_nr,
return oc ? 0 : -DER_NONEXIST;
}

int
dc_set_oclass(daos_handle_t coh, int domain_nr, int target_nr,
daos_ofeat_t ofeats, daos_oclass_hints_t hints,
daos_oclass_id_t *oc_id_p)
{
uint64_t rf_factor;
daos_oclass_id_t cid = 0;
struct daos_obj_class *oc;
struct daos_oclass_attr ca;
uint16_t shd, rdd;
int grp_size;

rf_factor = dc_cont_hdl2redunfac(coh);
rdd = hints & DAOS_OCH_RDD_MASK;
shd = hints & DAOS_OCH_SHD_MASK;

/** first set a reasonable default based on RF & RDD hint (if set) */
switch (rf_factor) {
case DAOS_PROP_CO_REDUN_RF0:
if (rdd == DAOS_OCH_RDD_RP)
cid = OC_RP_2GX;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RF0 means has no data protection right? so should be OC_SX for DAOS_OCH_RDD_RP, and seems is invalid parameter if with RF0 and rdd == DAOS_OCH_RDD_EC?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RF0 means no data protection as the minimum, but one can still do data protection, right?
this gets set when cont prop is RF0, and user says he wants replication in the hint. i think this should be valid?
if the hint is 0, it gets set to SX.
why is rdd = DAOS_OCH_RDD_EC invalid here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, this one looks fine to me.

else if (rdd == DAOS_OCH_RDD_EC)
cid = OC_EC_2P1G1;
else
cid = OC_SX;
break;
case DAOS_PROP_CO_REDUN_RF1:
if (rdd == DAOS_OCH_RDD_RP)
cid = OC_RP_2GX;
else if (rdd == DAOS_OCH_RDD_EC || ofeats & DAOS_OF_ARRAY ||
ofeats & DAOS_OF_ARRAY_BYTE)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just confirm that does it mean that dc_array obj always map to EC ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

by default, yes if the container has RF > 0

/** TODO - this should be GX when supported */
cid = OC_EC_2P1G1;
else
cid = OC_RP_2GX;
break;
case DAOS_PROP_CO_REDUN_RF2:
if (rdd == DAOS_OCH_RDD_RP)
cid = OC_RP_3GX;
else if (rdd == DAOS_OCH_RDD_EC || ofeats & DAOS_OF_ARRAY ||
ofeats & DAOS_OF_ARRAY_BYTE)
/** TODO - this should be GX when supported */
cid = OC_EC_2P2G1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

must it be 2P1 or 2P2? is it possible to select 4P2, 8P2, 16P2 in future?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea, i got those from @wangdi1
there are some that are not available today, and this is what was suggested as a reasonable default if not set by user.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, I mean P2 is the most popular. But I thought 4P2G1 might be most post popular choice generally. But I thought(heard) ANL might mostly use 8P2 or 16P2 for performance reason. Hmm, maybe domain_nr should be involved for consideration?
if domain_nr >=10
cid = 8P2G1
else if domain_rn >= 6
cid = 4P2G1
else
cid = 2P2G1.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed that we probably need more correlation between the domain but also the target nr to set also the GX.
we can tackle this in a future PR when we add more EC object class?

Copy link
Contributor

@gnailzenh gnailzenh Mar 25, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added all classes (commit 806d1cf), I assume we are not going to land it to 1.2, so we can switch to new classes in follow-on patch

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we actually want to add to 1.2.
let me see what you added and adjust this PR i guess.

Copy link
Contributor Author

@mchaarawi mchaarawi Mar 25, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah wait.. i thought you were talking about my PR. but if we are not landing your PR to 1.2, then i can't change it here since we want to land this to 1.2.
i can do the change to master in a future PR to master. so if you are OK with this, please land it

else
cid = OC_RP_3GX;
break;
case DAOS_PROP_CO_REDUN_RF3:
case DAOS_PROP_CO_REDUN_RF4:
return -DER_INVAL;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RF3/RF4 is not supported temporarily, and may be supported later?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is supported at the container level, but there is no oclass to support it today.. we need to add the oclass for it.
by default setting, this will not work today unless we create oclass to support it.

}

/*
* If there are no sharding hints, we can return.
* TODO - since all EC classes are only G1, no need to check sharding.
* hint for that.
*/
if (shd == 0 || cid == OC_EC_2P2G1 || cid == OC_EC_2P1G1) {
oc = oclass_fit_max(cid, domain_nr, target_nr);
if (oc)
*oc_id_p = oc->oc_id;

return oc ? 0 : -DER_NONEXIST;
}

oc = oclass_ident2cl(cid);
if (!oc)
return -DER_INVAL;

memcpy(&ca, &oc->oc_attr, sizeof(ca));
grp_size = daos_oclass_grp_size(&ca);

/** adjust the group size based on the sharding hint */
switch (shd) {
case DAOS_OCH_SHD_DEF:
case DAOS_OCH_SHD_MAX:
ca.ca_grp_nr = DAOS_OBJ_GRP_MAX;
break;
case DAOS_OCH_SHD_TINY:
ca.ca_grp_nr = 4;
break;
case DAOS_OCH_SHD_REG:
ca.ca_grp_nr = max(128, target_nr * 25 / 100);
break;
case DAOS_OCH_SHD_HI:
ca.ca_grp_nr = max(256, target_nr * 50 / 100);
break;
case DAOS_OCH_SHD_EXT:
ca.ca_grp_nr = max(1024, target_nr * 80 / 100);
break;
default:
D_ERROR("Invalid sharding hint\n");
return -DER_INVAL;
}

if (ca.ca_grp_nr == DAOS_OBJ_GRP_MAX ||
ca.ca_grp_nr * grp_size > target_nr) {
/* search for the highest scalability in the allowed range */
ca.ca_grp_nr = max(1, (target_nr / grp_size));
}
oc = oclass_scale2cl(&ca);
if (oc)
*oc_id_p = oc->oc_id;

return oc ? 0 : -DER_NONEXIST;
}

/** a structure to map EC object class to EC codec structure */
struct daos_oc_ec_codec {
/** object class id */
Expand Down
5 changes: 5 additions & 0 deletions src/object/obj_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,11 @@ struct dc_obj_verify_args {
struct dc_obj_verify_cursor cursor;
};

int
dc_set_oclass(daos_handle_t coh, int domain_nr, int target_nr,
daos_ofeat_t ofeats, daos_oclass_hints_t hints,
daos_oclass_id_t *oc_id_);

int dc_obj_shard_open(struct dc_object *obj, daos_unit_oid_t id,
unsigned int mode, struct dc_obj_shard *shard);
void dc_obj_shard_close(struct dc_obj_shard *shard);
Expand Down
Loading