diff --git a/ompi/mca/coll/ucx/Makefile.am b/ompi/mca/coll/ucx/Makefile.am index e82d63f82b8..60bee91e955 100644 --- a/ompi/mca/coll/ucx/Makefile.am +++ b/ompi/mca/coll/ucx/Makefile.am @@ -18,10 +18,12 @@ AM_CPPFLAGS = $(coll_ucx_CPPFLAGS) -DCOLL_UCX_HOME=\"$(coll_ucx_HOME)\" $(coll_u coll_ucx_sources = \ coll_ucx.h \ coll_ucx_request.h \ + coll_ucx_datatype.h \ coll_ucx_freelist.h \ coll_ucx_op.c \ coll_ucx_module.c \ coll_ucx_request.c \ + coll_ucx_datatype.c \ coll_ucx_component.c # Make the output library in this directory, and name it either diff --git a/ompi/mca/coll/ucx/coll_ucx.h b/ompi/mca/coll/ucx/coll_ucx.h index 5bef95720cd..9e3bf616651 100644 --- a/ompi/mca/coll/ucx/coll_ucx.h +++ b/ompi/mca/coll/ucx/coll_ucx.h @@ -26,9 +26,9 @@ #include "ompi/communicator/communicator.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/attribute/attribute.h" -#include "ompi/op/op.h" #include "orte/runtime/orte_globals.h" +#include "ompi/datatype/ompi_datatype_internal.h" #include "opal/mca/common/ucx/common_ucx.h" #include "ucg/api/ucg_mpi.h" @@ -71,6 +71,13 @@ typedef struct mca_coll_ucx_component { mca_coll_ucx_freelist_t persistent_ops; ompi_request_t completed_send_req; size_t request_size; + + /* Datatypes */ + int datatype_attr_keyval; + ucp_datatype_t predefined_types[OMPI_DATATYPE_MPI_MAX_PREDEFINED]; + + /* Converters pool */ + mca_coll_ucx_freelist_t convs; } mca_coll_ucx_component_t; OMPI_MODULE_DECLSPEC extern mca_coll_ucx_component_t mca_coll_ucx_component; diff --git a/ompi/mca/coll/ucx/coll_ucx_component.c b/ompi/mca/coll/ucx/coll_ucx_component.c index 4be598320b5..ee73c8fee5f 100644 --- a/ompi/mca/coll/ucx/coll_ucx_component.c +++ b/ompi/mca/coll/ucx/coll_ucx_component.c @@ -21,6 +21,7 @@ #include "coll_ucx.h" #include "coll_ucx_request.h" +#include "coll_ucx_datatype.h" /* @@ -266,6 +267,12 @@ int mca_coll_ucx_open(void) goto out; } + int i; + mca_coll_ucx_component.datatype_attr_keyval = MPI_KEYVAL_INVALID; + for (i = 0; i < OMPI_DATATYPE_MAX_PREDEFINED; ++i) { + mca_coll_ucx_component.predefined_types[i] = COLL_UCX_DATATYPE_INVALID; + } + ucs_list_head_init(&mca_coll_ucx_component.group_head); return OMPI_SUCCESS; @@ -279,6 +286,14 @@ int mca_coll_ucx_close(void) { COLL_UCX_VERBOSE(1, "mca_coll_ucx_close"); + int i; + for (i = 0; i < OMPI_DATATYPE_MAX_PREDEFINED; ++i) { + if (mca_coll_ucx_component.predefined_types[i] != COLL_UCX_DATATYPE_INVALID) { + ucp_dt_destroy(mca_coll_ucx_component.predefined_types[i]); + mca_coll_ucx_component.predefined_types[i] = COLL_UCX_DATATYPE_INVALID; + } + } + if (mca_coll_ucx_component.ucg_worker != NULL) { mca_coll_ucx_cleanup(); mca_coll_ucx_component.ucg_worker = NULL; @@ -355,11 +370,10 @@ int mca_coll_ucx_init(void) } /* Initialize the free lists */ - OBJ_CONSTRUCT(&mca_coll_ucx_component.persistent_ops, mca_coll_ucx_freelist_t); - - /* Create a completed request to be returned from isend */ - OBJ_CONSTRUCT(&mca_coll_ucx_component.completed_send_req, ompi_request_t); - mca_coll_ucx_completed_request_init(&mca_coll_ucx_component.completed_send_req); + OBJ_CONSTRUCT(&mca_coll_ucx_component.convs, mca_coll_ucx_freelist_t); + COLL_UCX_FREELIST_INIT(&mca_coll_ucx_component.convs, + mca_coll_ucx_convertor_t, + 128, -1, 128); rc = opal_progress_register(mca_coll_ucx_progress); if (OPAL_SUCCESS != rc) { @@ -384,10 +398,7 @@ void mca_coll_ucx_cleanup(void) opal_progress_unregister(mca_coll_ucx_progress); - mca_coll_ucx_component.completed_send_req.req_state = OMPI_REQUEST_INVALID; - OMPI_REQUEST_FINI(&mca_coll_ucx_component.completed_send_req); - OBJ_DESTRUCT(&mca_coll_ucx_component.completed_send_req); - OBJ_DESTRUCT(&mca_coll_ucx_component.persistent_ops); + OBJ_DESTRUCT(&mca_coll_ucx_component.convs); if (mca_coll_ucx_component.ucg_worker) { ucg_worker_destroy(mca_coll_ucx_component.ucg_worker); diff --git a/ompi/mca/coll/ucx/coll_ucx_datatype.c b/ompi/mca/coll/ucx/coll_ucx_datatype.c new file mode 100644 index 00000000000..05eb985cf93 --- /dev/null +++ b/ompi/mca/coll/ucx/coll_ucx_datatype.c @@ -0,0 +1,271 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * + * Copyright (c) 2020 Huawei Technologies Co., Ltd. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "coll_ucx_datatype.h" +#include "coll_ucx_request.h" + +#include "ompi/runtime/mpiruntime.h" +#include "ompi/attribute/attribute.h" + +#include +#include + +static void* coll_ucx_generic_datatype_start_pack(void *context, const void *buffer, + size_t count) +{ + ompi_datatype_t *datatype = context; + mca_coll_ucx_convertor_t *convertor; + + convertor = (mca_coll_ucx_convertor_t *)COLL_UCX_FREELIST_GET(&mca_coll_ucx_component.convs); + + OMPI_DATATYPE_RETAIN(datatype); + convertor->datatype = datatype; + opal_convertor_copy_and_prepare_for_send(ompi_proc_local_proc->super.proc_convertor, + &datatype->super, count, buffer, 0, + &convertor->opal_conv); + return convertor; +} + +static void* coll_ucx_generic_datatype_start_unpack(void *context, void *buffer, + size_t count) +{ + ompi_datatype_t *datatype = context; + mca_coll_ucx_convertor_t *convertor; + + convertor = (mca_coll_ucx_convertor_t *)COLL_UCX_FREELIST_GET(&mca_coll_ucx_component.convs); + + OMPI_DATATYPE_RETAIN(datatype); + convertor->datatype = datatype; + convertor->offset = 0; + opal_convertor_copy_and_prepare_for_recv(ompi_proc_local_proc->super.proc_convertor, + &datatype->super, count, buffer, 0, + &convertor->opal_conv); + return convertor; +} + +static size_t coll_ucx_generic_datatype_packed_size(void *state) +{ + mca_coll_ucx_convertor_t *convertor = state; + size_t size; + + opal_convertor_get_packed_size(&convertor->opal_conv, &size); + return size; +} + +static size_t coll_ucx_generic_datatype_pack(void *state, size_t offset, + void *dest, size_t max_length) +{ + mca_coll_ucx_convertor_t *convertor = state; + uint32_t iov_count; + struct iovec iov; + size_t length; + + iov_count = 1; + iov.iov_base = dest; + iov.iov_len = max_length; + + opal_convertor_set_position(&convertor->opal_conv, &offset); + length = max_length; + opal_convertor_pack(&convertor->opal_conv, &iov, &iov_count, &length); + return length; +} + +static ucs_status_t coll_ucx_generic_datatype_unpack(void *state, size_t offset, + const void *src, size_t length) +{ + mca_coll_ucx_convertor_t *convertor = state; + + uint32_t iov_count; + struct iovec iov; + opal_convertor_t conv; + + iov_count = 1; + iov.iov_base = (void*)src; + iov.iov_len = length; + + /* in case if unordered message arrived - create separate convertor to + * unpack data. */ + if (offset != convertor->offset) { + OBJ_CONSTRUCT(&conv, opal_convertor_t); + opal_convertor_copy_and_prepare_for_recv(ompi_proc_local_proc->super.proc_convertor, + &convertor->datatype->super, + convertor->opal_conv.count, + convertor->opal_conv.pBaseBuf, 0, + &conv); + opal_convertor_set_position(&conv, &offset); + opal_convertor_unpack(&conv, &iov, &iov_count, &length); + opal_convertor_cleanup(&conv); + OBJ_DESTRUCT(&conv); + /* permanently switch to un-ordered mode */ + convertor->offset = 0; + } else { + opal_convertor_unpack(&convertor->opal_conv, &iov, &iov_count, &length); + convertor->offset += length; + } + return UCS_OK; +} + +static void coll_ucx_generic_datatype_finish(void *state) +{ + mca_coll_ucx_convertor_t *convertor = state; + + opal_convertor_cleanup(&convertor->opal_conv); + OMPI_DATATYPE_RELEASE(convertor->datatype); + COLL_UCX_FREELIST_RETURN(&mca_coll_ucx_component.convs, &convertor->super); +} + +static ucp_generic_dt_ops_t coll_ucx_generic_datatype_ops = { + .start_pack = coll_ucx_generic_datatype_start_pack, + .start_unpack = coll_ucx_generic_datatype_start_unpack, + .packed_size = coll_ucx_generic_datatype_packed_size, + .pack = coll_ucx_generic_datatype_pack, + .unpack = coll_ucx_generic_datatype_unpack, + .finish = coll_ucx_generic_datatype_finish +}; + +int mca_coll_ucx_datatype_attr_del_fn(ompi_datatype_t* datatype, int keyval, + void *attr_val, void *extra) +{ + ucp_datatype_t ucp_datatype = (ucp_datatype_t)attr_val; + +#ifdef HAVE_UCP_REQUEST_PARAM_T + free((void*)datatype->pml_data); +#else + COLL_UCX_ASSERT((uint64_t)ucp_datatype == datatype->pml_data); +#endif + ucp_dt_destroy(ucp_datatype); + datatype->pml_data = COLL_UCX_DATATYPE_INVALID; + return OMPI_SUCCESS; +} + +__opal_attribute_always_inline__ +static inline int mca_coll_ucx_datatype_is_contig(ompi_datatype_t *datatype) +{ + ptrdiff_t lb; + + ompi_datatype_type_lb(datatype, &lb); + + return (datatype->super.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && + (datatype->super.flags & OPAL_DATATYPE_FLAG_NO_GAPS) && + (lb == 0); +} + +#ifdef HAVE_UCP_REQUEST_PARAM_T +__opal_attribute_always_inline__ static inline +coll_ucx_datatype_t *mca_coll_ucx_init_nbx_datatype(ompi_datatype_t *datatype, + ucp_datatype_t ucp_datatype, + size_t size) +{ + coll_ucx_datatype_t *pml_datatype; + int is_contig_pow2; + + pml_datatype = malloc(sizeof(*pml_datatype)); + if (pml_datatype == NULL) { + int err = MPI_ERR_INTERN; + COLL_UCX_ERROR("Failed to allocate datatype structure"); + /* TODO: this error should return to the caller and invoke an error + * handler from the MPI API call. + * For now, it is fatal. */ + ompi_mpi_errors_are_fatal_comm_handler(NULL, &err, "Failed to allocate datatype structure"); + } + + pml_datatype->datatype = ucp_datatype; + + is_contig_pow2 = mca_coll_ucx_datatype_is_contig(datatype) && + (size && !(size & (size - 1))); /* is_pow2(size) */ + if (is_contig_pow2) { + pml_datatype->size_shift = (int)(log(size) / log(2.0)); /* log2(size) */ + } else { + pml_datatype->size_shift = 0; + } + + return pml_datatype; +} +#endif + +ucp_datatype_t mca_coll_ucx_init_datatype(ompi_datatype_t *datatype) +{ + size_t size = 0; /* init to suppress compiler warning */ + ucp_datatype_t ucp_datatype; + ucs_status_t status; + int ret; + + if (mca_coll_ucx_datatype_is_contig(datatype)) { + ompi_datatype_type_size(datatype, &size); + ucp_datatype = ucp_dt_make_contig(size); + goto out; + } + + status = ucp_dt_create_generic(&coll_ucx_generic_datatype_ops, + datatype, &ucp_datatype); + if (status != UCS_OK) { + int err = MPI_ERR_INTERN; + COLL_UCX_ERROR("Failed to create UCX datatype for %s", datatype->name); + /* TODO: this error should return to the caller and invoke an error + * handler from the MPI API call. + * For now, it is fatal. */ + ompi_mpi_errors_are_fatal_comm_handler(NULL, &err, "Failed to allocate datatype structure"); + } + + /* Add custom attribute, to clean up UCX resources when OMPI datatype is + * released. + */ + if (ompi_datatype_is_predefined(datatype)) { + COLL_UCX_ASSERT(datatype->id < OMPI_DATATYPE_MAX_PREDEFINED); + mca_coll_ucx_component.predefined_types[datatype->id] = ucp_datatype; + } else { + ret = ompi_attr_set_c(TYPE_ATTR, datatype, &datatype->d_keyhash, + mca_coll_ucx_component.datatype_attr_keyval, + (void*)ucp_datatype, false); + if (ret != OMPI_SUCCESS) { + int err = MPI_ERR_INTERN; + COLL_UCX_ERROR("Failed to add UCX datatype attribute for %s (%p): %d", + datatype->name, (void*)datatype, ret); + /* TODO: this error should return to the caller and invoke an error + * handler from the MPI API call. + * For now, it is fatal. */ + ompi_mpi_errors_are_fatal_comm_handler(NULL, &err, "Failed to allocate datatype structure"); + } + } +out: + COLL_UCX_VERBOSE(7, "created generic UCX datatype 0x%"PRIx64, ucp_datatype) + +#ifdef HAVE_UCP_REQUEST_PARAM_T + UCS_STATIC_ASSERT(sizeof(datatype->pml_data) >= sizeof(coll_ucx_datatype_t*)); + datatype->pml_data = (uint64_t)mca_coll_ucx_init_nbx_datatype(datatype, + ucp_datatype, + size); +#else + datatype->pml_data = ucp_datatype; +#endif + + return ucp_datatype; +} + +static void mca_coll_ucx_convertor_construct(mca_coll_ucx_convertor_t *convertor) +{ + OBJ_CONSTRUCT(&convertor->opal_conv, opal_convertor_t); +} + +static void mca_coll_ucx_convertor_destruct(mca_coll_ucx_convertor_t *convertor) +{ + OBJ_DESTRUCT(&convertor->opal_conv); +} + +OBJ_CLASS_INSTANCE(mca_coll_ucx_convertor_t, + opal_free_list_item_t, + mca_coll_ucx_convertor_construct, + mca_coll_ucx_convertor_destruct); diff --git a/ompi/mca/coll/ucx/coll_ucx_datatype.h b/ompi/mca/coll/ucx/coll_ucx_datatype.h new file mode 100644 index 00000000000..1966cafea25 --- /dev/null +++ b/ompi/mca/coll/ucx/coll_ucx_datatype.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. + * Copyright (C) Huawei Technologies Co., Ltd. 2020. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef COLL_UCX_DATATYPE_H_ +#define COLL_UCX_DATATYPE_H_ + +#include "coll_ucx.h" + + +#define COLL_UCX_DATATYPE_INVALID 0 + +#ifdef HAVE_UCP_REQUEST_PARAM_T +typedef struct { + ucp_datatype_t datatype; + int size_shift; +} coll_ucx_datatype_t; +#endif + +struct coll_ucx_convertor { + opal_free_list_item_t super; + ompi_datatype_t *datatype; + opal_convertor_t opal_conv; + size_t offset; +}; + +ucp_datatype_t mca_coll_ucx_init_datatype(ompi_datatype_t *datatype); + +int mca_coll_ucx_datatype_attr_del_fn(ompi_datatype_t* datatype, int keyval, + void *attr_val, void *extra); + +OBJ_CLASS_DECLARATION(mca_coll_ucx_convertor_t); + + +__opal_attribute_always_inline__ +static inline ucp_datatype_t mca_coll_ucx_get_datatype(ompi_datatype_t *datatype) +{ +#ifdef HAVE_UCP_REQUEST_PARAM_T + coll_ucx_datatype_t *ucp_type = (coll_ucx_datatype_t*)datatype->pml_data; + + if (OPAL_LIKELY(ucp_type != COLL_UCX_DATATYPE_INVALID)) { + return ucp_type->datatype; + } +#else + ucp_datatype_t ucp_type = datatype->pml_data; + + if (OPAL_LIKELY(ucp_type != COLL_UCX_DATATYPE_INVALID)) { + return ucp_type; + } +#endif + + return mca_coll_ucx_init_datatype(datatype); +} + +#ifdef HAVE_UCP_REQUEST_PARAM_T +__opal_attribute_always_inline__ +static inline coll_ucx_datatype_t* +mca_coll_ucx_get_op_data(ompi_datatype_t *datatype) +{ + coll_ucx_datatype_t *ucp_type = (coll_ucx_datatype_t*)datatype->pml_data; + + if (OPAL_LIKELY(ucp_type != COLL_UCX_DATATYPE_INVALID)) { + return ucp_type; + } + + mca_coll_ucx_init_datatype(datatype); + return (coll_ucx_datatype_t*)datatype->pml_data; +} + +__opal_attribute_always_inline__ +static inline size_t mca_coll_ucx_get_data_size(coll_ucx_datatype_t *op_data, + size_t count) +{ + return count << op_data->size_shift; +} +#endif + +#endif /* COLL_UCX_DATATYPE_H_ */ diff --git a/ompi/mca/coll/ucx/coll_ucx_module.c b/ompi/mca/coll/ucx/coll_ucx_module.c index 3cc84c474e6..0b1baaad8bd 100644 --- a/ompi/mca/coll/ucx/coll_ucx_module.c +++ b/ompi/mca/coll/ucx/coll_ucx_module.c @@ -18,6 +18,7 @@ #include "coll_ucx.h" #include "coll_ucx_request.h" +#include "coll_ucx_datatype.h" #include #include @@ -223,7 +224,7 @@ static int mca_coll_ucx_init_global_topo(mca_coll_ucx_module_t *module) goto end; } - /* Create a topo matrix. As it is Diagonal symmetry, only half of the matrix will be computed. */ + /* Create a topo matrix. As it is Diagonal symmetryן¼� only half of the matrix will be computed. */ ret = mca_coll_ucx_create_topo_map(node_index, topo_info, LOC_SIZE, rank_cnt); if (ret != OMPI_SUCCESS) { status = OMPI_ERROR; @@ -281,7 +282,7 @@ static int mca_coll_ucx_create_comm_topo(ucg_group_params_t *args, struct ompi_c return OMPI_SUCCESS; } - /* Create a topo matrix. As it is Diagonal symmetry, only half of the matrix will be computed. */ + /* Create a topo matrix. As it is Diagonal symmetryן¼� only half of the matrix will be computed. */ unsigned i; for (i = 0; i < args->member_count; i++) { /* Find the rank in the MPI_COMM_WORLD for rank i in the comm. */ @@ -323,6 +324,13 @@ static void mca_coll_ucg_create_distance_array(struct ompi_communicator_t *comm, } } +static int mca_coll_ucg_datatype_convert(ompi_datatype_t *mpi_dt, + ucp_datatype_t *ucp_dt) +{ + *ucp_dt = mca_coll_ucx_get_datatype(mpi_dt); + return 0; +} + static void mca_coll_ucg_init_group_param(struct ompi_communicator_t *comm, ucg_group_params_t *args) { args->member_count = ompi_comm_size(comm); @@ -332,6 +340,7 @@ static void mca_coll_ucg_init_group_param(struct ompi_communicator_t *comm, ucg_ args->release_address_f = mca_coll_ucx_release_address; args->cb_group_obj = comm; args->op_is_commute_f = ompi_op_is_commute; + args->mpi_dt_convert = mca_coll_ucg_datatype_convert; } static void mca_coll_ucg_arg_free(struct ompi_communicator_t *comm, ucg_group_params_t *args) @@ -459,6 +468,26 @@ static int mca_coll_ucx_module_enable(mca_coll_base_module_t *module, mca_coll_ucx_module_t *ucx_module = (mca_coll_ucx_module_t*) module; int rc; + if (mca_coll_ucx_component.datatype_attr_keyval == MPI_KEYVAL_INVALID) { + /* Create a key for adding custom attributes to datatypes */ + ompi_attribute_fn_ptr_union_t copy_fn; + ompi_attribute_fn_ptr_union_t del_fn; + copy_fn.attr_datatype_copy_fn = + (MPI_Type_internal_copy_attr_function*)MPI_TYPE_NULL_COPY_FN; + del_fn.attr_datatype_delete_fn = mca_coll_ucx_datatype_attr_del_fn; + rc = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, del_fn, + &mca_coll_ucx_component.datatype_attr_keyval, + NULL, 0, NULL); + if (rc != OMPI_SUCCESS) { + COLL_UCX_ERROR("Failed to create keyval for UCX datatypes: %d", rc); + return rc; + } + + COLL_UCX_FREELIST_INIT(&mca_coll_ucx_component.convs, + mca_coll_ucx_convertor_t, + 128, -1, 128); + } + /* prepare the placeholder for the array of request* */ module->base_data = OBJ_NEW(mca_coll_base_comm_t); if (NULL == module->base_data) { @@ -470,9 +499,6 @@ static int mca_coll_ucx_module_enable(mca_coll_base_module_t *module, return rc; } - COLL_UCX_FREELIST_INIT(&mca_coll_ucx_component.persistent_ops, mca_coll_ucx_persistent_op_t, - 128, -1, 128); - COLL_UCX_VERBOSE(1, "UCX Collectives Module initialized"); return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/ucx/coll_ucx_op.c b/ompi/mca/coll/ucx/coll_ucx_op.c index 0dfebe42464..d5f2f5e000d 100644 --- a/ompi/mca/coll/ucx/coll_ucx_op.c +++ b/ompi/mca/coll/ucx/coll_ucx_op.c @@ -17,11 +17,6 @@ #include "ompi/message/message.h" #include -static inline int mca_coll_ucx_is_datatype_supported(struct ompi_datatype_t *dtype, int count) -{ - return ompi_datatype_is_contiguous_memory_layout(dtype, count); -} - int mca_coll_ucx_start(size_t count, ompi_request_t** requests) { mca_coll_ucx_persistent_op_t *preq = NULL; @@ -96,10 +91,6 @@ int mca_coll_ucx_allreduce(const void *sbuf, void *rbuf, int count, { mca_coll_ucx_module_t *ucx_module = (mca_coll_ucx_module_t*)module; - if (ucs_unlikely(!mca_coll_ucx_is_datatype_supported(dtype, count))) { - COLL_UCX_ERROR("UCX component does not support discontinuous datatype. Please use other coll component."); - return OMPI_ERR_NOT_SUPPORTED; - } COLL_UCX_TRACE("%s", sbuf, rbuf, count, dtype, comm, "allreduce START"); ucs_status_ptr_t req = COLL_UCX_REQ_ALLOCA(ucx_module); @@ -418,10 +409,6 @@ int mca_coll_ucx_bcast(void *buff, int count, struct ompi_datatype_t *dtype, int { mca_coll_ucx_module_t *ucx_module = (mca_coll_ucx_module_t*)module; - if (ucs_unlikely(!mca_coll_ucx_is_datatype_supported(dtype, count))) { - COLL_UCX_ERROR("UCX component does not support discontinuous datatype. Please use other coll component."); - return OMPI_ERR_NOT_SUPPORTED; - } COLL_UCX_TRACE("%s", buff, buff, count, dtype, comm, "bcast"); ucs_status_ptr_t req = COLL_UCX_REQ_ALLOCA(ucx_module);