diff --git a/src/include/daos_obj.h b/src/include/daos_obj.h index d4354f45078..204b4f9c034 100644 --- a/src/include/daos_obj.h +++ b/src/include/daos_obj.h @@ -347,20 +347,29 @@ daos_obj_generate_id(daos_obj_id_t *oid, daos_ofeat_t ofeats, oid->hi |= hdr; } +#define DAOS_OCH_RDD_BITS 4 +#define DAOS_OCH_SHD_BITS 6 +#define DAOS_OCH_RDD_SHIFT 0 +#define DAOS_OCH_SHD_SHIFT DAOS_OCH_RDD_BITS +#define DAOS_OCH_RDD_MAX_VAL ((1ULL << DAOS_OCH_RDD_BITS) - 1) +#define DAOS_OCH_SHD_MAX_VAL ((1ULL << DAOS_OCH_SHD_BITS) - 1) +#define DAOS_OCH_RDD_MASK (DAOS_OCH_RDD_MAX_VAL << DAOS_OCH_RDD_SHIFT) +#define DAOS_OCH_SHD_MASK (DAOS_OCH_SHD_MAX_VAL << DAOS_OCH_SHD_SHIFT) + /** Flags for oclass hints */ enum { /** Flags to control OC Redundancy */ - DAOS_OC_RDD_DEF = (1 << 0), /** Default - use RF prop */ - DAOS_OC_RDD_NO = (1 << 1), /** No redundancy */ - DAOS_OC_RDD_RP = (1 << 2), /** Replication */ - DAOS_OC_RDD_EC = (1 << 3), /** Erasure Code */ + DAOS_OCH_RDD_DEF = (1 << 0), /** Default - use RF prop */ + DAOS_OCH_RDD_NO = (1 << 1), /** No redundancy */ + DAOS_OCH_RDD_RP = (1 << 2), /** Replication */ + DAOS_OCH_RDD_EC = (1 << 3), /** Erasure Code */ /** Flags to control OC Sharding */ - DAOS_OC_SHD_DEF = (1 << 4), /** Default - use 1 grp */ - DAOS_OC_SHD_TINY = (1 << 5), /** <= 4 grps */ - DAOS_OC_SHD_REG = (1 << 6), /** max(128, 25%) */ - DAOS_OC_SHD_HI = (1 << 7), /** max(256, 50%) */ - DAOS_OC_SHD_EXT = (1 << 8), /** max(1024, 80%) */ - DAOS_OC_SHD_MAX = (1 << 9), /** 100% */ + DAOS_OCH_SHD_DEF = (1 << 4), /** Default - use 1 grp */ + DAOS_OCH_SHD_TINY = (1 << 5), /** <= 4 grps */ + DAOS_OCH_SHD_REG = (1 << 6), /** max(128, 25%) */ + DAOS_OCH_SHD_HI = (1 << 7), /** max(256, 50%) */ + DAOS_OCH_SHD_EXT = (1 << 8), /** max(1024, 80%) */ + DAOS_OCH_SHD_MAX = (1 << 9), /** 100% */ }; /** diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index 7c2da71e7ba..6b4b42877b7 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -5403,10 +5403,6 @@ daos_obj_generate_oid(daos_handle_t coh, daos_obj_id_t *oid, struct pl_map_attr attr; int rc; - /** TODO - unsupported for now */ - if (cid == OC_UNKNOWN) - return -DER_INVAL; - /** select the oclass */ poh = dc_cont_hdl2pool_hdl(coh); if (daos_handle_is_inval(poh)) @@ -5422,8 +5418,13 @@ daos_obj_generate_oid(daos_handle_t coh, daos_obj_id_t *oid, D_DEBUG(DB_TRACE, "available domain=%d, targets=%d\n", attr.pa_domain_nr, attr.pa_target_nr); - rc = daos_oclass_fit_max(cid, attr.pa_domain_nr, attr.pa_target_nr, - &cid); + /** TODO - unsupported for now */ + if (cid == OC_UNKNOWN) + rc = dc_set_oclass(coh, attr.pa_domain_nr, attr.pa_target_nr, + ofeats, hints, &cid); + else + rc = daos_oclass_fit_max(cid, attr.pa_domain_nr, + attr.pa_target_nr, &cid); if (rc) return rc; diff --git a/src/object/obj_class.c b/src/object/obj_class.c index 222a3436496..8b9f1443557 100644 --- a/src/object/obj_class.c +++ b/src/object/obj_class.c @@ -1020,6 +1020,112 @@ daos_oclass_fit_max(daos_oclass_id_t oc_id, int domain_nr, int target_nr, return oc ? 0 : -DER_NONEXIST; } +int +dc_set_oclass(daos_handle_t coh, int domain_nr, int target_nr, + daos_ofeat_t ofeats, daos_oclass_hints_t hints, + daos_oclass_id_t *oc_id_p) +{ + uint64_t rf_factor; + daos_oclass_id_t cid = 0; + struct daos_obj_class *oc; + struct daos_oclass_attr ca; + uint16_t shd, rdd; + int grp_size; + + rf_factor = dc_cont_hdl2redunfac(coh); + rdd = hints & DAOS_OCH_RDD_MASK; + shd = hints & DAOS_OCH_SHD_MASK; + + /** first set a reasonable default based on RF & RDD hint (if set) */ + switch (rf_factor) { + case DAOS_PROP_CO_REDUN_RF0: + if (rdd == DAOS_OCH_RDD_RP) + cid = OC_RP_2GX; + else if (rdd == DAOS_OCH_RDD_EC) + cid = OC_EC_2P1G1; + else + cid = OC_SX; + break; + case DAOS_PROP_CO_REDUN_RF1: + if (rdd == DAOS_OCH_RDD_RP) + cid = OC_RP_2GX; + else if (rdd == DAOS_OCH_RDD_EC || ofeats & DAOS_OF_ARRAY || + ofeats & DAOS_OF_ARRAY_BYTE) + /** TODO - this should be GX when supported */ + cid = OC_EC_2P1G1; + else + cid = OC_RP_2GX; + break; + case DAOS_PROP_CO_REDUN_RF2: + if (rdd == DAOS_OCH_RDD_RP) + cid = OC_RP_3GX; + else if (rdd == DAOS_OCH_RDD_EC || ofeats & DAOS_OF_ARRAY || + ofeats & DAOS_OF_ARRAY_BYTE) + /** TODO - this should be GX when supported */ + cid = OC_EC_2P2G1; + else + cid = OC_RP_3GX; + break; + case DAOS_PROP_CO_REDUN_RF3: + case DAOS_PROP_CO_REDUN_RF4: + return -DER_INVAL; + } + + /* + * If there are no sharding hints, we can return. + * TODO - since all EC classes are only G1, no need to check sharding. + * hint for that. + */ + if (shd == 0 || cid == OC_EC_2P2G1 || cid == OC_EC_2P1G1) { + oc = oclass_fit_max(cid, domain_nr, target_nr); + if (oc) + *oc_id_p = oc->oc_id; + + return oc ? 0 : -DER_NONEXIST; + } + + oc = oclass_ident2cl(cid); + if (!oc) + return -DER_INVAL; + + memcpy(&ca, &oc->oc_attr, sizeof(ca)); + grp_size = daos_oclass_grp_size(&ca); + + /** adjust the group size based on the sharding hint */ + switch (shd) { + case DAOS_OCH_SHD_DEF: + case DAOS_OCH_SHD_MAX: + ca.ca_grp_nr = DAOS_OBJ_GRP_MAX; + break; + case DAOS_OCH_SHD_TINY: + ca.ca_grp_nr = 4; + break; + case DAOS_OCH_SHD_REG: + ca.ca_grp_nr = max(128, target_nr * 25 / 100); + break; + case DAOS_OCH_SHD_HI: + ca.ca_grp_nr = max(256, target_nr * 50 / 100); + break; + case DAOS_OCH_SHD_EXT: + ca.ca_grp_nr = max(1024, target_nr * 80 / 100); + break; + default: + D_ERROR("Invalid sharding hint\n"); + return -DER_INVAL; + } + + if (ca.ca_grp_nr == DAOS_OBJ_GRP_MAX || + ca.ca_grp_nr * grp_size > target_nr) { + /* search for the highest scalability in the allowed range */ + ca.ca_grp_nr = max(1, (target_nr / grp_size)); + } + oc = oclass_scale2cl(&ca); + if (oc) + *oc_id_p = oc->oc_id; + + return oc ? 0 : -DER_NONEXIST; +} + /** a structure to map EC object class to EC codec structure */ struct daos_oc_ec_codec { /** object class id */ diff --git a/src/object/obj_internal.h b/src/object/obj_internal.h index e50c858af80..ae79fb26a01 100644 --- a/src/object/obj_internal.h +++ b/src/object/obj_internal.h @@ -441,6 +441,11 @@ struct dc_obj_verify_args { struct dc_obj_verify_cursor cursor; }; +int +dc_set_oclass(daos_handle_t coh, int domain_nr, int target_nr, + daos_ofeat_t ofeats, daos_oclass_hints_t hints, + daos_oclass_id_t *oc_id_); + int dc_obj_shard_open(struct dc_object *obj, daos_unit_oid_t id, unsigned int mode, struct dc_obj_shard *shard); void dc_obj_shard_close(struct dc_obj_shard *shard); diff --git a/src/tests/suite/daos_obj.c b/src/tests/suite/daos_obj.c index df65360a396..53f41ef0f1e 100644 --- a/src/tests/suite/daos_obj.c +++ b/src/tests/suite/daos_obj.c @@ -4118,6 +4118,212 @@ io_fetch_retry_another_replica(void **state) ioreq_fini(&req); } +static int +compare_oclass(daos_handle_t coh, daos_obj_id_t oid, daos_oclass_id_t ecid) +{ + daos_oclass_id_t cid; + int rc; + + /** get oclass of OID */ + cid = daos_obj_id2class(oid); + + /* + * get the expected oclass - this is needed to convert things with GX to + * fit them in current system. + */ + rc = daos_obj_generate_oid(coh, &oid, 0, ecid, 0, 0); + assert_rc_equal(rc, 0); + ecid = daos_obj_id2class(oid); + + if (cid == ecid) + return 0; + else + return 1; +} + +static int +check_oclass(daos_handle_t coh, daos_oclass_hints_t hints, daos_ofeat_t feats, + enum daos_obj_resil res, unsigned int nr, daos_oclass_id_t ecid) +{ + daos_obj_id_t oid; + daos_oclass_id_t cid; + struct daos_oclass_attr *attr; + char name[10]; + int rc; + + oid.hi = 1; + oid.lo = 1; + rc = daos_obj_generate_oid(coh, &oid, feats, 0, hints, 0); + assert_rc_equal(rc, 0); + + cid = daos_obj_id2class(oid); + attr = daos_oclass_attr_find(oid); + + daos_oclass_id2name(cid, name); + printf("%s\n", name); + assert_int_equal(attr->ca_resil, res); + if (res == DAOS_RES_REPL) { + assert_int_equal(attr->u.rp.r_num, nr); + } else if (res == DAOS_RES_EC) { + assert_int_equal(attr->u.ec.e_p, nr - 1); + assert_int_equal(attr->u.ec.e_k, 2); + } + + /** need an easier way to determine grp nr. for now use fit for GX */ + rc = compare_oclass(coh, oid, ecid); + if (rc) { + fail_msg("Mismatch oclass %d vs %d\n", cid, OC_RP_2GX); + rc = -DER_MISMATCH; + } + + return rc; +} + +/** i/o to variable idx offset */ +static void +oclass_auto_setting(void **state) +{ + test_arg_t *arg = *state; + uuid_t uuid; + daos_handle_t coh; + daos_pool_info_t info = {0}; + daos_prop_t *prop = NULL; + daos_ofeat_t feat_kv, feat_array, feat_byte_array; + int rc; + + rc = daos_pool_query(arg->pool.poh, NULL, &info, NULL, NULL); + assert_rc_equal(rc, 0); + + feat_array = DAOS_OF_DKEY_UINT64 | DAOS_OF_KV_FLAT | DAOS_OF_ARRAY; + feat_byte_array = DAOS_OF_DKEY_UINT64 | DAOS_OF_KV_FLAT | + DAOS_OF_ARRAY_BYTE; + feat_kv = DAOS_OF_KV_FLAT; + + prop = daos_prop_alloc(1); + assert_non_null(prop); + + /** create container with RF = 0 */ + print_message("OID settings with container RF0:\n"); + prop->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_FAC; + prop->dpp_entries[0].dpe_val = DAOS_PROP_CO_REDUN_RF0; + uuid_generate(uuid); + rc = daos_cont_create(arg->pool.poh, uuid, prop, NULL); + assert_rc_equal(rc, 0); + rc = daos_cont_open(arg->pool.poh, uuid, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + /** ALL oids by default should use OC_SX fit to current DAOS system */ + print_message("DEFAULT oid class:\t"); + rc = check_oclass(coh, 0, 0, DAOS_RES_REPL, 1, OC_SX); + assert_rc_equal(rc, 0); + + print_message("KV oid class:\t"); + rc = check_oclass(coh, 0, feat_kv, DAOS_RES_REPL, 1, OC_SX); + assert_rc_equal(rc, 0); + + print_message("ARRAY oid class:\t"); + rc = check_oclass(coh, 0, feat_array, DAOS_RES_REPL, 1, OC_SX); + assert_rc_equal(rc, 0); + + print_message("BYTE ARRAY oid class:\t"); + rc = check_oclass(coh, 0, feat_byte_array, DAOS_RES_REPL, 1, OC_SX); + assert_rc_equal(rc, 0); + + /** RP hint should use RP_2GX fit to current DAOS system */ + print_message("oid with DAOS_OCH_RDD_RP hint:\t"); + rc = check_oclass(coh, DAOS_OCH_RDD_RP, 0, DAOS_RES_REPL, 2, OC_RP_2GX); + assert_rc_equal(rc, 0); + + /** EC hint should use OC_EC_2P1G1 */ + print_message("KC oid with DAOS_OCH_RDD_EC hint:\t"); + rc = check_oclass(coh, DAOS_OCH_RDD_EC, feat_kv, DAOS_RES_EC, 2, + OC_EC_2P1G1); + assert_rc_equal(rc, 0); + + /** RP hint with Tiny sharding should use RP_2G4 */ + print_message("oid with DAOS_OCH_RDD_RP | DAOS_OCH_SHD_TINY hint:\t"); + rc = check_oclass(coh, DAOS_OCH_RDD_RP | DAOS_OCH_SHD_TINY, + feat_byte_array, DAOS_RES_REPL, 2, OC_RP_2G4); + assert_rc_equal(rc, 0); + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + rc = daos_cont_destroy(arg->pool.poh, uuid, 0, NULL); + assert_rc_equal(rc, 0); + + print_message("OID settings with container RF1:\n"); + /** create container with rf = 1 */ + prop->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_FAC; + prop->dpp_entries[0].dpe_val = DAOS_PROP_CO_REDUN_RF1; + uuid_generate(uuid); + rc = daos_cont_create(arg->pool.poh, uuid, prop, NULL); + assert_rc_equal(rc, 0); + rc = daos_cont_open(arg->pool.poh, uuid, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + /** default oid should be OC_RP_2GX fit to daos system*/ + print_message("DEFAULT oid class:\t"); + rc = check_oclass(coh, 0, 0, DAOS_RES_REPL, 2, OC_RP_2GX); + assert_rc_equal(rc, 0); + + /** KV oid should be OC_RP_2GX fit to daos system*/ + print_message("KV oid class:\t"); + rc = check_oclass(coh, 0, feat_kv, DAOS_RES_REPL, 2, OC_RP_2GX); + assert_rc_equal(rc, 0); + + /** ARRAY oid should be OC_EC_2P1G1 */ + print_message("ARRAY oid class:\t"); + rc = check_oclass(coh, 0, feat_array, DAOS_RES_EC, 2, OC_EC_2P1G1); + assert_rc_equal(rc, 0); + + /** Byte Array oid should be OC_EC_2P1G1 */ + print_message("BYTE ARRAY oid class:\t"); + rc = check_oclass(coh, 0, feat_byte_array, DAOS_RES_EC, 2, + OC_EC_2P1G1); + assert_rc_equal(rc, 0); + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + rc = daos_cont_destroy(arg->pool.poh, uuid, 0, NULL); + assert_rc_equal(rc, 0); + + print_message("OID settings with container RF2:\n"); + /** create container with rf = 1 */ + prop->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_FAC; + prop->dpp_entries[0].dpe_val = DAOS_PROP_CO_REDUN_RF2; + uuid_generate(uuid); + rc = daos_cont_create(arg->pool.poh, uuid, prop, NULL); + assert_rc_equal(rc, 0); + rc = daos_cont_open(arg->pool.poh, uuid, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, 0); + + /** default oid should be OC_RP_3GX fit to daos system*/ + print_message("DEFAULT oid class:\t"); + rc = check_oclass(coh, 0, 0, DAOS_RES_REPL, 3, OC_RP_3GX); + assert_rc_equal(rc, 0); + + /** KV oid should be OC_RP_2GX fit to daos system*/ + print_message("KV oid class:\t"); + rc = check_oclass(coh, 0, feat_kv, DAOS_RES_REPL, 3, OC_RP_3GX); + assert_rc_equal(rc, 0); + + /** ARRAY oid should be OC_EC_2P1G1 */ + print_message("ARRAY oid class:\t"); + rc = check_oclass(coh, 0, feat_array, DAOS_RES_EC, 3, OC_EC_2P2G1); + assert_rc_equal(rc, 0); + + /** Byte Array oid should be OC_EC_2P1G1 */ + print_message("BYTE ARRAY oid class:\t"); + rc = check_oclass(coh, 0, feat_byte_array, DAOS_RES_EC, 3, + OC_EC_2P2G1); + assert_rc_equal(rc, 0); + + rc = daos_cont_close(coh, NULL); + assert_rc_equal(rc, 0); + rc = daos_cont_destroy(arg->pool.poh, uuid, 0, NULL); + assert_rc_equal(rc, 0); +} + static const struct CMUnitTest io_tests[] = { { "IO1: simple update/fetch/verify", io_simple, async_disable, test_case_teardown}, @@ -4205,6 +4411,8 @@ static const struct CMUnitTest io_tests[] = { { "IO42: IO fetch from an alternative node after first try failed", io_fetch_retry_another_replica, async_disable, test_case_teardown}, + { "IO43: Object class selection", + oclass_auto_setting, async_disable, test_case_teardown}, }; int