diff --git a/ompi/mca/mtl/ofi/help-mtl-ofi.txt b/ompi/mca/mtl/ofi/help-mtl-ofi.txt index 8131766ae00..58f55a42daf 100644 --- a/ompi/mca/mtl/ofi/help-mtl-ofi.txt +++ b/ompi/mca/mtl/ofi/help-mtl-ofi.txt @@ -16,3 +16,6 @@ unusual; your job may behave unpredictably (and/or abort) after this. Local host: %s Location: %s:%d Error: %s (%zd) +# +[message too big] +Message size %llu bigger than supported by selected transport. Max = %llu diff --git a/ompi/mca/mtl/ofi/mtl_ofi.h b/ompi/mca/mtl/ofi/mtl_ofi.h index 8a6918cf78f..77261e4bc21 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.h +++ b/ompi/mca/mtl/ofi/mtl_ofi.h @@ -247,13 +247,20 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl, endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); ompi_ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); - if (OMPI_SUCCESS != ompi_ret) return ompi_ret; + if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) { + return ompi_ret; + } ofi_req->buffer = (free_after) ? start : NULL; ofi_req->length = length; ofi_req->status.MPI_ERROR = OMPI_SUCCESS; - if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == mode)) { + if (OPAL_UNLIKELY(length > endpoint->mtl_ofi_module->max_msg_size)) { + opal_show_help("help-mtl-ofi.txt", + "message too big", false, + length, endpoint->mtl_ofi_module->max_msg_size); + return OMPI_ERROR; + } else if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == mode)) { ack_req = malloc(sizeof(ompi_mtl_ofi_request_t)); assert(ack_req); ack_req->parent = ofi_req; diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index b5d3959837b..56a68c13d08 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -466,9 +466,10 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, } /** - * Save the maximum inject size. + * Save the maximum sizes. */ ompi_mtl_ofi.max_inject_size = prov->tx_attr->inject_size; + ompi_mtl_ofi.max_msg_size = prov->ep_attr->max_msg_size; /** * Create the objects that will be bound to the endpoint. diff --git a/ompi/mca/mtl/ofi/mtl_ofi_types.h b/ompi/mca/mtl/ofi/mtl_ofi_types.h index e8c3f21b53e..f0b36feefae 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_types.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_types.h @@ -49,6 +49,9 @@ typedef struct mca_mtl_ofi_module_t { /** Maximum inject size */ size_t max_inject_size; + /** Largest message that can be sent in a single send. */ + size_t max_msg_size; + /** Maximum number of CQ events to read in OFI Progress */ int ofi_progress_event_count;