diff --git a/ompi/mca/coll/base/coll_base_bcast.c b/ompi/mca/coll/base/coll_base_bcast.c index 38210bab9df..aede9089353 100644 --- a/ompi/mca/coll/base/coll_base_bcast.c +++ b/ompi/mca/coll/base/coll_base_bcast.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Research Organization for Information Science + * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -36,38 +36,30 @@ int ompi_coll_base_bcast_intra_generic( void* buffer, - int original_count, + int count, struct ompi_datatype_t* datatype, int root, struct ompi_communicator_t* comm, mca_coll_base_module_t *module, - uint32_t count_by_segment, + size_t segment_size, ompi_coll_tree_t* tree ) { - int err = 0, line, i, rank, segindex, req_index; - int num_segments; /* Number of segments */ - int sendcount; /* number of elements sent in this segment */ - size_t realsegsize, type_size; - char *tmpbuf; - ptrdiff_t extent, lb; + int err = 0, line, i = 0, rank; + size_t offset = 0; + size_t next_offset; + size_t size; + size_t remaining; + int sc_index = 0, rc_index = 0; ompi_request_t *recv_reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; ompi_request_t **send_reqs = NULL; + ompi_datatype_type_size(datatype, &remaining); + remaining *= count; #if OPAL_ENABLE_DEBUG - int size; - size = ompi_comm_size(comm); - assert( size > 1 ); + assert( ompi_comm_size(comm) > 1 ); #endif rank = ompi_comm_rank(comm); - ompi_datatype_get_extent (datatype, &lb, &extent); - ompi_datatype_type_size( datatype, &type_size ); - num_segments = (original_count + count_by_segment - 1) / count_by_segment; - realsegsize = (ptrdiff_t)count_by_segment * extent; - - /* Set the buffer pointers */ - tmpbuf = (char *) buffer; - if( tree->tree_nextsize != 0 ) { send_reqs = ompi_coll_base_comm_get_reqs(module->base_data, tree->tree_nextsize); if( NULL == send_reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto error_hndl; } @@ -75,38 +67,93 @@ ompi_coll_base_bcast_intra_generic( void* buffer, /* Root code */ if( rank == root ) { - /* - For each segment: - - send segment to all children. - The last segment may have less elements than other segments. - */ - sendcount = count_by_segment; - for( segindex = 0; segindex < num_segments; segindex++ ) { - if( segindex == (num_segments - 1) ) { - sendcount = original_count - segindex * count_by_segment; + opal_convertor_t send_convertors[2]; + ompi_proc_t* proc = ompi_comm_peer_lookup(comm,tree->tree_next[0]); + OBJ_CONSTRUCT(&send_convertors[0], opal_convertor_t); + OBJ_CONSTRUCT(&send_convertors[1], opal_convertor_t); + send_convertors[0].stack_pos = -1; + send_convertors[1].stack_pos = -1; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertors[0] ); + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertors[1] ); + opal_convertor_set_position(&send_convertors[0], &offset); + while (remaining) { + next_offset = offset + (segment_sizetree_nextsize; i++ ) { - err = MCA_PML_CALL(isend(tmpbuf, sendcount, datatype, - tree->tree_next[i], - MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, comm, - &send_reqs[i])); + size = next_offset - offset; + + err = MCA_PML_CALL(icsend(&send_convertors[sc_index], + &size, + tree->tree_next[0], + MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, comm, + &send_reqs[0])); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + for( i = 1; i < tree->tree_nextsize; i++ ) { + opal_convertor_t send_convertor; + ompi_proc_t* proc = ompi_comm_peer_lookup(comm,tree->tree_next[0]); + OBJ_CONSTRUCT(&send_convertor, opal_convertor_t); + send_convertor.stack_pos = -1; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertor); + opal_convertor_set_position(&send_convertor, &offset); + err = MCA_PML_CALL(icsend(&send_convertor, + &size, + tree->tree_next[i], + MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, comm, + &send_reqs[i])); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } - /* complete the sends before starting the next sends */ err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, MPI_STATUSES_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - /* update tmp buffer */ - tmpbuf += realsegsize; - + offset = next_offset; + remaining -= size; + sc_index ^= 1; } } /* Intermediate nodes code */ else if( tree->tree_nextsize > 0 ) { + opal_convertor_t send_convertors[2], recv_convertors[2]; + OBJ_CONSTRUCT(&send_convertors[0], opal_convertor_t); + OBJ_CONSTRUCT(&send_convertors[1], opal_convertor_t); + OBJ_CONSTRUCT(&recv_convertors[0], opal_convertor_t); + OBJ_CONSTRUCT(&recv_convertors[1], opal_convertor_t); + send_convertors[0].stack_pos = -1; + send_convertors[1].stack_pos = -1; + recv_convertors[0].stack_pos = -1; + recv_convertors[1].stack_pos = -1; /* Create the pipeline. 1) Post the first receive @@ -118,30 +165,75 @@ ompi_coll_base_bcast_intra_generic( void* buffer, 4) Compute number of elements in last segment. 5) Send the last segment to children */ - req_index = 0; - err = MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype, - tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, - comm, &recv_reqs[req_index])); + ompi_proc_t* proc = ompi_comm_peer_lookup(comm,tree->tree_prev); + size_t sizes[2], offsets[2]; + offsets[0] = 0; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &recv_convertors[0] ); + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &recv_convertors[1] ); + opal_convertor_set_position(&recv_convertors[0], &offsets[0]); + next_offset = offsets[0] + (segment_sizetree_prev, MCA_COLL_BASE_TAG_BCAST, + comm, &recv_reqs[rc_index])); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - - for( segindex = 1; segindex < num_segments; segindex++ ) { - - req_index = req_index ^ 0x1; - - /* post new irecv */ - err = MCA_PML_CALL(irecv( tmpbuf + realsegsize, count_by_segment, - datatype, tree->tree_prev, - MCA_COLL_BASE_TAG_BCAST, - comm, &recv_reqs[req_index])); + offsets[1] = sizes[0]; + remaining -= sizes[0]; + while (remaining) { + rc_index ^= 1; + next_offset = offsets[rc_index] + (segment_sizetree_prev, MCA_COLL_BASE_TAG_BCAST, + comm, &recv_reqs[rc_index])); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - - /* wait for and forward the previous segment to children */ - err = ompi_request_wait( &recv_reqs[req_index ^ 0x1], - MPI_STATUS_IGNORE ); + /* wait on the previous segment */ + err = ompi_request_wait( &recv_reqs[rc_index^1], MPI_STATUS_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - for( i = 0; i < tree->tree_nextsize; i++ ) { - err = MCA_PML_CALL(isend(tmpbuf, count_by_segment, datatype, + opal_convertor_t send_convertor; + OBJ_CONSTRUCT(&send_convertor, opal_convertor_t); + send_convertor.stack_pos = -1; + ompi_proc_t* proc = ompi_comm_peer_lookup(comm, tree->tree_next[i]); + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertor); + opal_convertor_set_position(&send_convertor, &offsets[rc_index^1]); + err = MCA_PML_CALL(icsend(&send_convertor, &sizes[rc_index^1], tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, MCA_PML_BASE_SEND_STANDARD, comm, @@ -153,18 +245,30 @@ ompi_coll_base_bcast_intra_generic( void* buffer, err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, MPI_STATUSES_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - - /* Update the receive buffer */ - tmpbuf += realsegsize; - + offsets[rc_index^1] = next_offset; + remaining -= sizes[rc_index]; } /* Process the last segment */ - err = ompi_request_wait( &recv_reqs[req_index], MPI_STATUS_IGNORE ); + err = ompi_request_wait( &recv_reqs[rc_index], MPI_STATUS_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - sendcount = original_count - (ptrdiff_t)(num_segments - 1) * count_by_segment; + for( i = 0; i < tree->tree_nextsize; i++ ) { - err = MCA_PML_CALL(isend(tmpbuf, sendcount, datatype, + opal_convertor_t send_convertor; + OBJ_CONSTRUCT(&send_convertor, opal_convertor_t); + send_convertor.stack_pos = -1; + ompi_proc_t* proc = ompi_comm_peer_lookup(comm, tree->tree_next[i]); + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertor); + opal_convertor_set_position(&send_convertor, &offsets[rc_index]); + err = MCA_PML_CALL(icsend(&send_convertor, &sizes[rc_index], tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, MCA_PML_BASE_SEND_STANDARD, comm, @@ -172,6 +276,7 @@ ompi_coll_base_bcast_intra_generic( void* buffer, if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } + /* complete the sends before starting the next iteration */ err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, MPI_STATUSES_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } @@ -179,35 +284,56 @@ ompi_coll_base_bcast_intra_generic( void* buffer, /* Leaf nodes */ else { - /* - Receive all segments from parent in a loop: - 1) post irecv for the first segment - 2) for segments 1 .. num_segments - - post irecv for the next segment - - wait on the previous segment to arrive - 3) wait for the last segment - */ - req_index = 0; - err = MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype, - tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, - comm, &recv_reqs[req_index])); - if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + opal_convertor_t recv_convertors[2]; + ompi_proc_t* proc = ompi_comm_peer_lookup(comm,tree->tree_prev); + OBJ_CONSTRUCT(&recv_convertors[0], opal_convertor_t); + OBJ_CONSTRUCT(&recv_convertors[1], opal_convertor_t); + recv_convertors[0].stack_pos = -1; + recv_convertors[1].stack_pos = -1; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &recv_convertors[0] ); + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &recv_convertors[1] ); + opal_convertor_set_position(&recv_convertors[0], &offset); + while (remaining) { + next_offset = offset + (segment_sizetree_prev, MCA_COLL_BASE_TAG_BCAST, - comm, &recv_reqs[req_index])); + err = MCA_PML_CALL(icrecv(&recv_convertors[rc_index], + &size, + tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, + comm, &recv_reqs[rc_index])); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + offset = next_offset; + remaining -= size; + rc_index ^= 1; /* wait on the previous segment */ - err = ompi_request_wait( &recv_reqs[req_index ^ 0x1], + err = ompi_request_wait( &recv_reqs[rc_index], MPI_STATUS_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } - err = ompi_request_wait( &recv_reqs[req_index], MPI_STATUS_IGNORE ); + err = ompi_request_wait( &recv_reqs[rc_index^1], MPI_STATUS_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } @@ -234,23 +360,15 @@ ompi_coll_base_bcast_intra_bintree ( void* buffer, mca_coll_base_module_t *module, uint32_t segsize ) { - int segcount = count; - size_t typelng; mca_coll_base_comm_t *data = module->base_data; COLL_BASE_UPDATE_BINTREE( comm, module, root ); - /** - * Determine number of elements sent per operation. - */ - ompi_datatype_type_size( datatype, &typelng ); - COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount ); - - OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binary rank %d ss %5d typelng %lu segcount %d", - ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount)); + OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binary rank %d ss %5d", + ompi_comm_rank(comm), segsize)); return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module, - segcount, data->cached_bintree ); + segsize, data->cached_bintree ); } int @@ -262,23 +380,15 @@ ompi_coll_base_bcast_intra_pipeline( void* buffer, mca_coll_base_module_t *module, uint32_t segsize ) { - int segcount = count; - size_t typelng; mca_coll_base_comm_t *data = module->base_data; COLL_BASE_UPDATE_PIPELINE( comm, module, root ); - /** - * Determine number of elements sent per operation. - */ - ompi_datatype_type_size( datatype, &typelng ); - COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount ); - - OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_pipeline rank %d ss %5d typelng %lu segcount %d", - ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount)); + OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_pipeline rank %d ss %5d", + ompi_comm_rank(comm), segsize)); return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module, - segcount, data->cached_pipeline ); + segsize, data->cached_pipeline ); } int @@ -290,23 +400,15 @@ ompi_coll_base_bcast_intra_chain( void* buffer, mca_coll_base_module_t *module, uint32_t segsize, int32_t chains ) { - int segcount = count; - size_t typelng; mca_coll_base_comm_t *data = module->base_data; COLL_BASE_UPDATE_CHAIN( comm, module, root, chains ); - /** - * Determine number of elements sent per operation. - */ - ompi_datatype_type_size( datatype, &typelng ); - COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount ); - - OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_chain rank %d fo %d ss %5d typelng %lu segcount %d", - ompi_comm_rank(comm), chains, segsize, (unsigned long)typelng, segcount)); + OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_chain rank %d fo %d ss %5d", + ompi_comm_rank(comm), chains, segsize)); return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module, - segcount, data->cached_chain ); + segsize, data->cached_chain ); } int @@ -318,23 +420,15 @@ ompi_coll_base_bcast_intra_binomial( void* buffer, mca_coll_base_module_t *module, uint32_t segsize ) { - int segcount = count; - size_t typelng; mca_coll_base_comm_t *data = module->base_data; COLL_BASE_UPDATE_BMTREE( comm, module, root ); - /** - * Determine number of elements sent per operation. - */ - ompi_datatype_type_size( datatype, &typelng ); - COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount ); - - OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d", - ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount)); + OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binomial rank %d ss %5d", + ompi_comm_rank(comm), segsize)); return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module, - segcount, data->cached_bmtree ); + segsize, data->cached_bmtree ); } int @@ -346,16 +440,13 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, mca_coll_base_module_t *module, uint32_t segsize ) { - int err=0, line, rank, size, segindex, i, lr, pair; - uint32_t counts[2]; - int segcount[2]; /* Number of elements sent with each segment */ - int num_segments[2]; /* Number of segmenets */ - int sendcount[2]; /* the same like segcount, except for the last segment */ - size_t realsegsize[2], type_size; - char *tmpbuf[2]; - ptrdiff_t type_extent, lb; - ompi_request_t *base_req, *new_req; + int err=0, line, rank, size, i, lr, pair; + size_t type_size; ompi_coll_tree_t *tree; + opal_convertor_t send_convertors[2], recv_convertors[2]; + size_t remainings[2], sizes[2]; + ompi_proc_t *proc; + int rc_index = 0; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -372,47 +463,79 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, err = ompi_datatype_type_size( datatype, &type_size ); - /* Determine number of segments and number of elements per segment */ - counts[0] = count/2; - if (count % 2 != 0) counts[0]++; - counts[1] = count - counts[0]; - if ( segsize > 0 ) { - /* Note that ompi_datatype_type_size() will never return a negative - value in typelng; it returns an int [vs. an unsigned type] - because of the MPI spec. */ - if (segsize < ((uint32_t) type_size)) { - segsize = type_size; /* push segsize up to hold one type */ + sizes[1] = type_size * count ; + sizes[0] = sizes[1] / 2; + + OBJ_CONSTRUCT(&send_convertors[0], opal_convertor_t); + OBJ_CONSTRUCT(&send_convertors[1], opal_convertor_t); + OBJ_CONSTRUCT(&recv_convertors[0], opal_convertor_t); + OBJ_CONSTRUCT(&recv_convertors[1], opal_convertor_t); + send_convertors[0].stack_pos = -1; + send_convertors[1].stack_pos = -1; + recv_convertors[0].stack_pos = -1; + recv_convertors[1].stack_pos = -1; + + if (rank == root || tree->tree_nextsize > 0) { + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + proc = ompi_comm_peer_lookup(comm,tree->tree_next[0]); + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertors[0] ); + if (tree->tree_nextsize > 1) { + proc = ompi_comm_peer_lookup(comm, tree->tree_next[1]); + } else { + proc = ompi_comm_peer_lookup(comm, (root+size-1)%size); } - segcount[0] = segcount[1] = segsize / type_size; - num_segments[0] = counts[0]/segcount[0]; - if ((counts[0] % segcount[0]) != 0) num_segments[0]++; - num_segments[1] = counts[1]/segcount[1]; - if ((counts[1] % segcount[1]) != 0) num_segments[1]++; - } else { - segcount[0] = counts[0]; - segcount[1] = counts[1]; - num_segments[0] = num_segments[1] = 1; + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertors[1] ); + opal_convertor_set_position(&send_convertors[1], &sizes[0]); + } + if (rank != root) { + /* Just consume segments as fast as possible */ + proc = ompi_comm_peer_lookup(comm, tree->tree_prev); + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &recv_convertors[0]); + opal_convertor_set_position(&recv_convertors[0], &sizes[0]); + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &recv_convertors[1]); } - + sizes[1] -= sizes[0]; + remainings[0] = sizes[0]; + remainings[1] = sizes[1]; + /* if the message is too small to be split into segments */ - if( (counts[0] == 0 || counts[1] == 0) || - (segsize > ((ptrdiff_t)counts[0] * type_size)) || - (segsize > ((ptrdiff_t)counts[1] * type_size)) ) { + if(0 == sizes[0] || 0 == sizes[1]) { /* call linear version here ! */ return (ompi_coll_base_bcast_intra_chain ( buffer, count, datatype, root, comm, module, segsize, 1 )); } - - err = ompi_datatype_get_extent (datatype, &lb, &type_extent); - - /* Determine real segment size */ - realsegsize[0] = (ptrdiff_t)segcount[0] * type_extent; - realsegsize[1] = (ptrdiff_t)segcount[1] * type_extent; - - /* set the buffer pointers */ - tmpbuf[0] = (char *) buffer; - tmpbuf[1] = (char *) buffer + (ptrdiff_t)counts[0] * type_extent; + if (0 == segsize) { + // segsize = max(sizes); + segsize = sizes[1]; + } /* Step 1: Root splits the buffer in 2 and sends segmented message down the branches. @@ -425,26 +548,34 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, /* root code */ if( rank == root ) { - /* determine segment count */ - sendcount[0] = segcount[0]; - sendcount[1] = segcount[1]; /* for each segment */ - for (segindex = 0; segindex < num_segments[0]; segindex++) { + while(0 != remainings[0] || (tree->tree_nextsize > 1 && 0 != remainings[1])) { /* for each child */ for( i = 0; i < tree->tree_nextsize && i < 2; i++ ) { - if (segindex >= num_segments[i]) { /* no more segments */ + size_t segment_size; + size_t offset, next_offset; + if (0 == remainings[i]) { /* no more data to send */ continue; } /* determine how many elements are being sent in this round */ - if(segindex == (num_segments[i] - 1)) - sendcount[i] = counts[i] - segindex*segcount[i]; + offset = sizes[i] - remainings[i]; + next_offset = offset + segsize; + if (next_offset > sizes[i]) { + next_offset = sizes[i]; + } + if (0 != i) { + offset += sizes[0]; + next_offset += sizes[0]; + } + opal_convertor_set_position(&send_convertors[i], &next_offset); + segment_size = next_offset - offset; + opal_convertor_set_position(&send_convertors[i], &offset); /* send data */ - MCA_PML_CALL(send(tmpbuf[i], sendcount[i], datatype, + MCA_PML_CALL(csend(&send_convertors[i], &segment_size, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, MCA_PML_BASE_SEND_STANDARD, comm)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - /* update tmp buffer */ - tmpbuf[i] += realsegsize[i]; + remainings[i] -= segment_size; } } } @@ -462,68 +593,169 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, * post the next receive and after that wait for the previous receive to complete * and we disseminating the data to all children. */ - sendcount[lr] = segcount[lr]; - err = MCA_PML_CALL(irecv(tmpbuf[lr], sendcount[lr], datatype, - tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, - comm, &base_req)); - if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + size_t offsets[2]; + size_t segment_sizes[2]; + ompi_request_t *reqs[2]; + /* determine how many elements are being sent in this round */ + offsets[0] = 0; + if (0 != lr) { + offsets[0] += sizes[0]; + } + opal_convertor_set_position(&recv_convertors[0], &offsets[0]); - for( segindex = 1; segindex < num_segments[lr]; segindex++ ) { - /* determine how many elements to expect in this round */ - if( segindex == (num_segments[lr] - 1)) - sendcount[lr] = counts[lr] - (ptrdiff_t)segindex * (ptrdiff_t)segcount[lr]; - /* post new irecv */ - err = MCA_PML_CALL(irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr], - datatype, tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, - comm, &new_req)); + offsets[0] += sizes[lr] - remainings[lr]; + offsets[1] = offsets[0] + segsize; + if (offsets[1] > sizes[lr] + (lr?sizes[0]:0)) { + offsets[1] = sizes[lr] + (lr?sizes[0]:0); + } + opal_convertor_set_position(&recv_convertors[1], &offsets[1]); + if (offsets[1] == offsets[0]) { + segment_sizes[0] = remainings[lr]; + } else { + segment_sizes[0] = offsets[1] - offsets[0]; + } + /* send recv */ + MCA_PML_CALL(icrecv(&recv_convertors[0], &segment_sizes[0], + tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, + comm, &reqs[0])); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + remainings[lr] -= segment_sizes[0]; + while (0 != remainings[lr]) { + size_t next_offset; + rc_index ^= 1; + /* determine how many elements are being sent in this round */ + offsets[rc_index] = sizes[lr] - remainings[lr]; + next_offset = offsets[rc_index] + segsize; + if (next_offset > sizes[lr]) { + next_offset = sizes[lr]; + } + if (0 != lr) { + offsets[rc_index] += sizes[0]; + next_offset += sizes[0]; + } + opal_convertor_set_position(&recv_convertors[rc_index^1], &next_offset); + if (next_offset == offsets[rc_index]) { + segment_sizes[rc_index] = remainings[lr]; + } else { + segment_sizes[rc_index] = next_offset - offsets[rc_index]; + } + /* send recv */ + MCA_PML_CALL(icrecv(&recv_convertors[rc_index], &segment_sizes[rc_index], + tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, + comm, &reqs[rc_index])); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - + remainings[lr] -= segment_sizes[rc_index]; /* wait for and forward the previous segment */ - err = ompi_request_wait( &base_req, MPI_STATUS_IGNORE ); + err = ompi_request_wait( &reqs[rc_index^1], MPI_STATUS_IGNORE ); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children (segcount[lr]) */ - err = MCA_PML_CALL(send( tmpbuf[lr], segcount[lr], datatype, - tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, + opal_convertor_t send_convertor; + ompi_proc_t* proc = ompi_comm_peer_lookup(comm,tree->tree_next[i]); + OBJ_CONSTRUCT(&send_convertor, opal_convertor_t); + send_convertor.stack_pos = -1; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertor); + opal_convertor_set_position(&send_convertor, &offsets[rc_index^1]); + err = MCA_PML_CALL(csend(&send_convertor, + &segment_sizes[rc_index^1], + tree->tree_next[i], + MCA_COLL_BASE_TAG_BCAST, MCA_PML_BASE_SEND_STANDARD, comm)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } /* end of for each child */ - - /* upate the base request */ - base_req = new_req; - /* go to the next buffer (ie. the one corresponding to the next recv) */ - tmpbuf[lr] += realsegsize[lr]; - } /* end of for segindex */ - - /* wait for the last segment and forward current segment */ - err = ompi_request_wait( &base_req, MPI_STATUS_IGNORE ); - for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */ - err = MCA_PML_CALL(send(tmpbuf[lr], sendcount[lr], datatype, - tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, comm)); + offsets[rc_index^1] = next_offset; + } + err = ompi_request_wait( &reqs[rc_index], MPI_STATUS_IGNORE ); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children (segcount[lr]) */ + opal_convertor_t send_convertor; + ompi_proc_t* proc = ompi_comm_peer_lookup(comm,tree->tree_next[i]); + OBJ_CONSTRUCT(&send_convertor, opal_convertor_t); + send_convertor.stack_pos = -1; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertor); + opal_convertor_set_position(&send_convertor, &offsets[rc_index]); + err = MCA_PML_CALL(csend(&send_convertor, + &segment_sizes[rc_index], + tree->tree_next[i], + MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, comm)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } /* end of for each child */ } /* leaf nodes */ else { - /* Just consume segments as fast as possible */ - sendcount[lr] = segcount[lr]; - for (segindex = 0; segindex < num_segments[lr]; segindex++) { - /* determine how many elements to expect in this round */ - if (segindex == (num_segments[lr] - 1)) - sendcount[lr] = counts[lr] - (ptrdiff_t)segindex * (ptrdiff_t)segcount[lr]; - /* receive segments */ - err = MCA_PML_CALL(recv(tmpbuf[lr], sendcount[lr], datatype, - tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, - comm, MPI_STATUS_IGNORE)); + size_t offset, next_offset; + size_t segment_size; + /* determine how many elements are being sent in this round */ + offset = lr?sizes[0]:0; + opal_convertor_set_position(&recv_convertors[0], &offset); + offset = sizes[lr] - remainings[lr]; + next_offset = offset + segsize; + if (next_offset > sizes[lr]) { + next_offset = sizes[lr]; + } + if (0 != lr) { + offset += sizes[0]; + next_offset += sizes[0]; + } + opal_convertor_set_position(&recv_convertors[1], &next_offset); + if (next_offset == offset) { + segment_size = remainings[lr]; + } else { + segment_size = next_offset - offset; + } + /* send recv */ + MCA_PML_CALL(crecv(&recv_convertors[0], &segment_size, + tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, + comm, MPI_STATUSES_IGNORE)); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + remainings[lr] -= segment_size; + while (0 != remainings[lr]) { + rc_index ^= 1; + /* determine how many elements are being sent in this round */ + offset = sizes[lr] - remainings[lr]; + next_offset = offset + segsize; + if (next_offset > sizes[lr]) { + next_offset = sizes[lr]; + } + if (0 != lr) { + offset += sizes[0]; + next_offset += sizes[0]; + } + opal_convertor_set_position(&recv_convertors[rc_index^1], &next_offset); + if (next_offset == offset) { + segment_size = remainings[lr]; + } else { + segment_size = next_offset - offset; + } + /* send recv */ + MCA_PML_CALL(crecv(&recv_convertors[rc_index], &segment_size, + tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, + comm, MPI_STATUSES_IGNORE)); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - /* update the initial pointer to the buffer */ - tmpbuf[lr] += realsegsize[lr]; + remainings[lr] -= segment_size; } } /* reset the buffer pointers */ - tmpbuf[0] = (char *) buffer; - tmpbuf[1] = (char *) buffer + (ptrdiff_t)counts[0] * type_extent; + remainings[0] = sizes[0]; + remainings[1] = sizes[1]; /* Step 2: Find your immediate pair (identical node in opposite subtree) and SendRecv @@ -541,36 +773,123 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, } if ( (size%2) != 0 && rank != root) { - - err = ompi_coll_base_sendrecv( tmpbuf[lr], counts[lr], datatype, - pair, MCA_COLL_BASE_TAG_BCAST, - tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype, - pair, MCA_COLL_BASE_TAG_BCAST, - comm, MPI_STATUS_IGNORE, rank); + size_t offset; + opal_convertor_t send_convertor, recv_convertor; + ompi_request_t *req; + ompi_proc_t *proc; + OBJ_CONSTRUCT(&send_convertor, opal_convertor_t); + OBJ_CONSTRUCT(&recv_convertor, opal_convertor_t); + send_convertor.stack_pos = -1; + recv_convertor.stack_pos = -1; + proc = ompi_comm_peer_lookup(comm, pair); + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertor); + offset = lr?sizes[0]:0; + opal_convertor_set_position(&send_convertor, &offset); + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &recv_convertor); + offset = ((lr+1)%2)?sizes[0]:0; + opal_convertor_set_position(&recv_convertor, &offset); + err = MCA_PML_CALL(icrecv(&recv_convertor, + &sizes[(lr+1)%2], + pair, MCA_COLL_BASE_TAG_BCAST, + comm, &req)); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + MCA_PML_CALL(csend(&send_convertor, &sizes[lr], + pair, MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + err = ompi_request_wait( &req, MPI_STATUS_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } else if ( (size%2) == 0 ) { /* root sends right buffer to the last node */ if( rank == root ) { - err = MCA_PML_CALL(send(tmpbuf[1], counts[1], datatype, - (root+size-1)%size, MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, comm)); - if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } - + if (0 != sizes[1]) { + /* determine how many elements are being sent in this round */ + opal_convertor_set_position(&send_convertors[1], &sizes[0]); + MCA_PML_CALL(csend(&send_convertors[1], &sizes[1], + (root+size-1)%size, MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + } } /* last node receives right buffer from the root */ else if (rank == (root+size-1)%size) { - err = MCA_PML_CALL(recv(tmpbuf[1], counts[1], datatype, - root, MCA_COLL_BASE_TAG_BCAST, - comm, MPI_STATUS_IGNORE)); - if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + opal_convertor_t recv_convertor; + proc = ompi_comm_peer_lookup(comm, root); + OBJ_CONSTRUCT(&recv_convertor, opal_convertor_t); + recv_convertor.stack_pos = -1; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &recv_convertor); + if (0 != sizes[1]) { + /* determine how many elements are being sent in this round */ + opal_convertor_set_position(&recv_convertor, &sizes[0]); + MCA_PML_CALL(crecv(&recv_convertor, &sizes[1], + root, MCA_COLL_BASE_TAG_BCAST, + comm, MPI_STATUSES_IGNORE)); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + } } /* everyone else exchanges buffers */ else { - err = ompi_coll_base_sendrecv( tmpbuf[lr], counts[lr], datatype, - pair, MCA_COLL_BASE_TAG_BCAST, - tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype, - pair, MCA_COLL_BASE_TAG_BCAST, - comm, MPI_STATUS_IGNORE, rank); + size_t offset; + opal_convertor_t send_convertor, recv_convertor; + ompi_request_t *req; + ompi_proc_t *proc; + OBJ_CONSTRUCT(&send_convertor, opal_convertor_t); + OBJ_CONSTRUCT(&recv_convertor, opal_convertor_t); + send_convertor.stack_pos = -1; + recv_convertor.stack_pos = -1; + proc = ompi_comm_peer_lookup(comm, pair); + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &send_convertor); + offset = lr?sizes[0]:0; + opal_convertor_set_position(&send_convertor, &offset); + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(datatype->super), + count, + buffer, + 0, + &recv_convertor); + offset = ((lr+1)%2)?sizes[0]:0; + opal_convertor_set_position(&recv_convertor, &offset); + err = MCA_PML_CALL(icrecv(&recv_convertor, + &sizes[(lr+1)%2], + pair, MCA_COLL_BASE_TAG_BCAST, + comm, &req)); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + MCA_PML_CALL(csend(&send_convertor, &sizes[lr], + pair, MCA_COLL_BASE_TAG_BCAST, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + err = ompi_request_wait( &req, MPI_STATUS_IGNORE ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } } diff --git a/ompi/mca/coll/base/coll_base_functions.h b/ompi/mca/coll/base/coll_base_functions.h index 9e81e2bd182..cd1cd30901a 100644 --- a/ompi/mca/coll/base/coll_base_functions.h +++ b/ompi/mca/coll/base/coll_base_functions.h @@ -14,7 +14,7 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. @@ -213,7 +213,7 @@ int ompi_coll_base_barrier_intra_tree(BARRIER_ARGS); int ompi_coll_base_barrier_intra_basic_linear(BARRIER_ARGS); /* Bcast */ -int ompi_coll_base_bcast_intra_generic(BCAST_ARGS, uint32_t count_by_segment, ompi_coll_tree_t* tree); +int ompi_coll_base_bcast_intra_generic(BCAST_ARGS, size_t segment_size, ompi_coll_tree_t* tree); int ompi_coll_base_bcast_intra_basic_linear(BCAST_ARGS); int ompi_coll_base_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains); int ompi_coll_base_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize); diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c index 7d5e480095a..9b853970110 100644 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c +++ b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c @@ -9,6 +9,8 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -1497,7 +1499,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_procs( /**************** Send *****************/ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend_init( - void *buf, size_t count, + const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, @@ -1558,7 +1560,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend_init( * Update Message */ HOKE_CONTENT_REF_ALLOC(new_content); - new_content->buffer = buf; + new_content->buffer = (void *)buf; new_content->request = *request; new_content->done = false; new_content->active = false; @@ -1710,7 +1712,7 @@ static int ompi_crcp_bkmrk_request_complete_isend_init(struct ompi_request_t *re ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend( - void *buf, size_t count, + const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, @@ -1869,7 +1871,7 @@ static int ompi_crcp_bkmrk_request_complete_isend(struct ompi_request_t *request ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_send( - void *buf, size_t count, + const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, diff --git a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.h b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.h index 11784213599..0841870aed0 100644 --- a/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.h +++ b/ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.h @@ -8,6 +8,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -69,21 +71,21 @@ BEGIN_C_DECLS ompi_crcp_base_pml_state_t* pml_state ); ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend_init - ( void *buf, size_t count, ompi_datatype_t *datatype, + ( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t* pml_state ); ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend - ( void *buf, size_t count, ompi_datatype_t *datatype, + ( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t* pml_state ); ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_send - ( void *buf, size_t count, ompi_datatype_t *datatype, + ( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, ompi_crcp_base_pml_state_t* pml_state ); diff --git a/ompi/mca/crcp/crcp.h b/ompi/mca/crcp/crcp.h index ff43aa029c9..2dcdba0536e 100644 --- a/ompi/mca/crcp/crcp.h +++ b/ompi/mca/crcp/crcp.h @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -128,17 +130,17 @@ typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_probe_fn_t) ompi_status_public_t* status, ompi_crcp_base_pml_state_t* ); typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_isend_init_fn_t) - ( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, + ( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t* ); typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_isend_fn_t) - ( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, + ( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t* ); typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_send_fn_t) - ( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, + ( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, ompi_crcp_base_pml_state_t* ); diff --git a/ompi/mca/pml/base/pml_base_recvreq.h b/ompi/mca/pml/base/pml_base_recvreq.h index d20663e12f2..57a92b7307f 100644 --- a/ompi/mca/pml/base/pml_base_recvreq.h +++ b/ompi/mca/pml/base/pml_base_recvreq.h @@ -13,6 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -74,6 +76,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_base_recv_request_t); (request)->req_bytes_packed = 0; \ (request)->req_base.req_addr = addr; \ (request)->req_base.req_count = count; \ + (request)->req_base.req_offset = 0; \ (request)->req_base.req_peer = src; \ (request)->req_base.req_tag = tag; \ (request)->req_base.req_comm = comm; \ diff --git a/ompi/mca/pml/base/pml_base_request.h b/ompi/mca/pml/base/pml_base_request.h index 90a86505e07..2749dfffd0a 100644 --- a/ompi/mca/pml/base/pml_base_request.h +++ b/ompi/mca/pml/base/pml_base_request.h @@ -13,7 +13,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -72,6 +72,7 @@ struct mca_pml_base_request_t { void *req_addr; /**< pointer to application buffer */ size_t req_count; /**< count of user datatype elements */ + size_t req_offset; /**< offset the request starts at */ int32_t req_peer; /**< peer process - rank w/in this communicator */ int32_t req_tag; /**< user defined tag */ struct ompi_proc_t* req_proc; /**< peer process */ diff --git a/ompi/mca/pml/base/pml_base_sendreq.h b/ompi/mca/pml/base/pml_base_sendreq.h index 3f6cce1e578..ad95482af40 100644 --- a/ompi/mca/pml/base/pml_base_sendreq.h +++ b/ompi/mca/pml/base/pml_base_sendreq.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. @@ -87,6 +87,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t ); (request)->req_send_mode = mode; \ (request)->req_base.req_addr = (void *)addr; \ (request)->req_base.req_count = count; \ + (request)->req_base.req_offset= 0; \ (request)->req_base.req_datatype = datatype; \ (request)->req_base.req_peer = (int32_t)peer; \ (request)->req_base.req_tag = (int32_t)tag; \ diff --git a/ompi/mca/pml/bfo/pml_bfo.c b/ompi/mca/pml/bfo/pml_bfo.c index e3a1beb447a..25afee4fd07 100644 --- a/ompi/mca/pml/bfo/pml_bfo.c +++ b/ompi/mca/pml/bfo/pml_bfo.c @@ -16,6 +16,8 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -64,9 +66,13 @@ mca_pml_bfo_t mca_pml_bfo = { mca_pml_bfo_irecv_init, mca_pml_bfo_irecv, mca_pml_bfo_recv, + NULL, /* icrecv */ + NULL, /* crecv */ mca_pml_bfo_isend_init, mca_pml_bfo_isend, mca_pml_bfo_send, + NULL, /* icsend */ + NULL, /* csend */ mca_pml_bfo_iprobe, mca_pml_bfo_probe, mca_pml_bfo_start, diff --git a/ompi/mca/pml/cm/pml_cm.c b/ompi/mca/pml/cm/pml_cm.c index a7322e4c331..5f9246be967 100644 --- a/ompi/mca/pml/cm/pml_cm.c +++ b/ompi/mca/pml/cm/pml_cm.c @@ -11,6 +11,8 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,9 +42,13 @@ ompi_pml_cm_t ompi_pml_cm = { mca_pml_cm_irecv_init, mca_pml_cm_irecv, mca_pml_cm_recv, + NULL, /* icrecv */ + NULL, /* crecv */ mca_pml_cm_isend_init, mca_pml_cm_isend, mca_pml_cm_send, + NULL, /* icsend */ + NULL, /* csend */ mca_pml_cm_iprobe, mca_pml_cm_probe, mca_pml_cm_start, diff --git a/ompi/mca/pml/crcpw/pml_crcpw.h b/ompi/mca/pml/crcpw/pml_crcpw.h index 478253c150f..6be9de78c74 100644 --- a/ompi/mca/pml/crcpw/pml_crcpw.h +++ b/ompi/mca/pml/crcpw/pml_crcpw.h @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -108,13 +110,13 @@ BEGIN_C_DECLS struct ompi_message_t **message, ompi_status_public_t* status ); - int mca_pml_crcpw_isend_init( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, + int mca_pml_crcpw_isend_init( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ); - int mca_pml_crcpw_isend( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, + int mca_pml_crcpw_isend( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ); - int mca_pml_crcpw_send( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, + int mca_pml_crcpw_send( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm ); int mca_pml_crcpw_irecv_init( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, diff --git a/ompi/mca/pml/crcpw/pml_crcpw_module.c b/ompi/mca/pml/crcpw/pml_crcpw_module.c index c5982c55383..1fa578a4868 100644 --- a/ompi/mca/pml/crcpw/pml_crcpw_module.c +++ b/ompi/mca/pml/crcpw/pml_crcpw_module.c @@ -13,6 +13,8 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,6 +48,7 @@ mca_pml_crcpw_module_t mca_pml_crcpw_module = { mca_pml_crcpw_isend_init, mca_pml_crcpw_isend, mca_pml_crcpw_send, + NULL, /* csend */ mca_pml_crcpw_iprobe, mca_pml_crcpw_probe, mca_pml_crcpw_start, @@ -368,7 +371,7 @@ int mca_pml_crcpw_probe( int dst, int tag, struct ompi_communicator_t* comm, omp return OMPI_SUCCESS; } -int mca_pml_crcpw_isend_init( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, +int mca_pml_crcpw_isend_init( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ) { int ret; @@ -407,7 +410,7 @@ int mca_pml_crcpw_isend_init( void *buf, size_t count, ompi_datatype_t *datatype return OMPI_SUCCESS; } -int mca_pml_crcpw_isend( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, +int mca_pml_crcpw_isend( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ) { int ret; @@ -449,7 +452,7 @@ int mca_pml_crcpw_isend( void *buf, size_t count, ompi_datatype_t *datatype, int return OMPI_SUCCESS; } -int mca_pml_crcpw_send( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, +int mca_pml_crcpw_send( const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm ) { int ret; diff --git a/ompi/mca/pml/example/pml_example.c b/ompi/mca/pml/example/pml_example.c index 799e3abe459..e62d8c255f0 100644 --- a/ompi/mca/pml/example/pml_example.c +++ b/ompi/mca/pml/example/pml_example.c @@ -6,6 +6,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,9 +32,13 @@ mca_pml_example_t mca_pml_example = { mca_pml_example_irecv_init, mca_pml_example_irecv, mca_pml_example_recv, + NULL, /* icrecv */ + NULL, /* crecv */ mca_pml_example_isend_init, mca_pml_example_isend, mca_pml_example_send, + NULL, /* icsend */ + NULL, /* csend */ mca_pml_example_iprobe, mca_pml_example_probe, mca_pml_example_start, diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index eaf3ab26e0f..68fccc0ab9e 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -18,6 +18,8 @@ * reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -64,9 +66,13 @@ mca_pml_ob1_t mca_pml_ob1 = { mca_pml_ob1_irecv_init, mca_pml_ob1_irecv, mca_pml_ob1_recv, + mca_pml_ob1_icrecv, + mca_pml_ob1_crecv, mca_pml_ob1_isend_init, mca_pml_ob1_isend, mca_pml_ob1_send, + mca_pml_ob1_icsend, + mca_pml_ob1_csend, mca_pml_ob1_iprobe, mca_pml_ob1_probe, mca_pml_ob1_start, diff --git a/ompi/mca/pml/ob1/pml_ob1.h b/ompi/mca/pml/ob1/pml_ob1.h index 4826587564a..65e84cf94c4 100644 --- a/ompi/mca/pml/ob1/pml_ob1.h +++ b/ompi/mca/pml/ob1/pml_ob1.h @@ -14,7 +14,7 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -165,6 +165,21 @@ extern int mca_pml_ob1_send( const void *buf, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm ); +extern int mca_pml_ob1_icsend( struct opal_convertor_t* convertor, + size_t *size, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request ); + +extern int mca_pml_ob1_csend( struct opal_convertor_t* convertor, + size_t *size, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm ); + extern int mca_pml_ob1_irecv_init( void *buf, size_t count, ompi_datatype_t *datatype, @@ -201,6 +216,20 @@ extern int mca_pml_ob1_mrecv( void *buf, struct ompi_message_t **message, ompi_status_public_t* status ); +extern int mca_pml_ob1_icrecv( opal_convertor_t* convertor, + size_t *size, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request ); + +extern int mca_pml_ob1_crecv( opal_convertor_t* convertor, + size_t *size, + int src, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ); + extern int mca_pml_ob1_dump( struct ompi_communicator_t* comm, int verbose ); diff --git a/ompi/mca/pml/ob1/pml_ob1_irecv.c b/ompi/mca/pml/ob1/pml_ob1_irecv.c index 37c0ce9e9e8..41413137c70 100644 --- a/ompi/mca/pml/ob1/pml_ob1_irecv.c +++ b/ompi/mca/pml/ob1/pml_ob1_irecv.c @@ -362,3 +362,110 @@ mca_pml_ob1_mrecv( void *buf, return rc; } + +int mca_pml_ob1_icrecv(opal_convertor_t *convertor, + size_t *size, + int src, + int tag, + struct ompi_communicator_t *comm, + struct ompi_request_t **request) +{ + mca_pml_ob1_recv_request_t *recvreq; + MCA_PML_OB1_RECV_REQUEST_ALLOC(recvreq); + if (NULL == recvreq) + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + + OBJ_RETAIN(convertor->pDesc); + MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, + convertor->pBaseBuf, + convertor->count, (ompi_datatype_t *)convertor->pDesc, src, tag, comm, false); + recvreq->req_recv.req_base.req_offset = convertor->bConverted; + // recvreq->req_recv.req_bytes_expected = *size; + + PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, + &((recvreq)->req_recv.req_base), + PERUSE_RECV); + + // MCA_PML_OB1_RECV_REQUEST_START(recvreq); + mca_pml_ob1_recv_req_start_with_convertor(recvreq, convertor, *size); + *request = (ompi_request_t *) recvreq; + return OMPI_SUCCESS; +} + + +int mca_pml_ob1_crecv(opal_convertor_t *convertor, + size_t *size, + int src, + int tag, + struct ompi_communicator_t *comm, + ompi_status_public_t * status) +{ + mca_pml_ob1_recv_request_t *recvreq = NULL; + int rc; + + if (OPAL_LIKELY(!ompi_mpi_thread_multiple)) { + recvreq = mca_pml_ob1_recvreq; + mca_pml_ob1_recvreq = NULL; + } + + if( OPAL_UNLIKELY(NULL == recvreq) ) { + MCA_PML_OB1_RECV_REQUEST_ALLOC(recvreq); + if (NULL == recvreq) + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + +#if 0 + MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, addr, count, datatype, + src, tag, comm, false); + + PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, + &(recvreq->req_recv.req_base), + PERUSE_RECV); + + MCA_PML_OB1_RECV_REQUEST_START(recvreq); + ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi); + + if (NULL != status) { /* return status */ + *status = recvreq->req_recv.req_base.req_ompi.req_status; + } + + rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; + + if (OPAL_UNLIKELY(ompi_mpi_thread_multiple || NULL != mca_pml_ob1_recvreq)) { + MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq); + } else { + mca_pml_ob1_recv_request_fini (recvreq); + mca_pml_ob1_recvreq = recvreq; + } +#else + OBJ_RETAIN(convertor->pDesc); + MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, + convertor->pBaseBuf, + convertor->count, (ompi_datatype_t *)convertor->pDesc, src, tag, comm, false); + recvreq->req_recv.req_base.req_offset = convertor->bConverted; + // recvreq->req_recv.req_bytes_expected = *size; + + PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, + &((recvreq)->req_recv.req_base), + PERUSE_RECV); + + // MCA_PML_OB1_RECV_REQUEST_START(recvreq); + mca_pml_ob1_recv_req_start_with_convertor(recvreq, convertor, *size); + ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi); + + if (NULL != status) { /* return status */ + *status = recvreq->req_recv.req_base.req_ompi.req_status; + } + + rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; + + if (OPAL_UNLIKELY(ompi_mpi_thread_multiple || NULL != mca_pml_ob1_recvreq)) { + MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq); + } else { + mca_pml_ob1_recv_request_fini (recvreq); + mca_pml_ob1_recvreq = recvreq; + } +#endif + + return rc; +} diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 3a5b0c2d7a0..df2a02d57bf 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -277,3 +277,139 @@ int mca_pml_ob1_send(const void *buf, return rc; } + +int mca_pml_ob1_icsend(opal_convertor_t* convertor, + size_t *size, + int dst, + int tag, + mca_pml_base_send_mode_t sendmode, + ompi_communicator_t * comm, + ompi_request_t ** request) +{ + mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst); + mca_pml_ob1_send_request_t *sendreq = NULL; + ompi_proc_t *dst_proc = ob1_proc->ompi_proc; + mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc); + int16_t seqn; + int rc; + + if (OPAL_UNLIKELY(NULL == endpoint)) { + return OMPI_ERR_UNREACH; + } + + seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + +#if 0 + if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { + rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, + endpoint, comm); + if (OPAL_LIKELY(0 <= rc)) { + /* NTH: it is legal to return ompi_request_empty since the only valid + * field in a send completion status is whether or not the send was + * cancelled (which it can't be at this point anyway). */ + *request = &ompi_request_empty; + return OMPI_SUCCESS; + } + } +#endif + + MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq); + if (NULL == sendreq) + return OMPI_ERR_OUT_OF_RESOURCE; + + OBJ_RETAIN(convertor->pDesc); + MCA_PML_OB1_SEND_REQUEST_INIT(sendreq, + convertor->pBaseBuf, + 0, + (ompi_datatype_t *)convertor->pDesc, + dst, tag, + comm, sendmode, false); + sendreq->req_send.req_base.req_offset = convertor->bConverted; + sendreq->req_send.req_base.req_count = convertor->count; + opal_convertor_clone(convertor, &sendreq->req_send.req_base.req_convertor, 1); + sendreq->req_send.req_bytes_packed = *size; + + PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, + &(sendreq)->req_send.req_base, + PERUSE_SEND); + + MCA_PML_OB1_SEND_REQUEST_START_W_SEQ(sendreq, endpoint, seqn, rc); + *request = (ompi_request_t *) sendreq; + return rc; +} + +int mca_pml_ob1_csend(struct opal_convertor_t* convertor, + size_t *size, + int dst, + int tag, + mca_pml_base_send_mode_t sendmode, + struct ompi_communicator_t* comm) + +{ + mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst); + ompi_proc_t *dst_proc = ob1_proc->ompi_proc; + mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc); + mca_pml_ob1_send_request_t *sendreq = NULL; + int16_t seqn; + int rc; + +#if 0 + assert(buf == convertor->pBaseBuf); + assert(count == convertor->count); + assert(&(datatype->super) == convertor->pDesc); +#endif + + if (OPAL_UNLIKELY(NULL == endpoint)) { + return OMPI_ERR_UNREACH; + } + + assert (MCA_PML_BASE_SEND_BUFFERED != sendmode); + + seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + + if (OPAL_LIKELY(!ompi_mpi_thread_multiple)) { + sendreq = mca_pml_ob1_sendreq; + mca_pml_ob1_sendreq = NULL; + } + + if( OPAL_UNLIKELY(NULL == sendreq) ) { + MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq); + if (NULL == sendreq) + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + + sendreq->req_send.req_base.req_proc = dst_proc; + sendreq->rdma_frag = NULL; + + OBJ_RETAIN(convertor->pDesc); + MCA_PML_OB1_SEND_REQUEST_INIT(sendreq, + convertor->pBaseBuf, + 0, + (ompi_datatype_t *)convertor->pDesc, + dst, tag, + comm, sendmode, false); + sendreq->req_send.req_base.req_offset = convertor->bConverted; + sendreq->req_send.req_base.req_count = convertor->count; + opal_convertor_clone(convertor, &sendreq->req_send.req_base.req_convertor, 1); + sendreq->req_send.req_bytes_packed = *size; + + PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, + &sendreq->req_send.req_base, + PERUSE_SEND); + + MCA_PML_OB1_SEND_REQUEST_START_W_SEQ(sendreq, endpoint, seqn, rc); + if (OPAL_LIKELY(rc == OMPI_SUCCESS)) { + ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi); + + rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR; + } + + if (OPAL_UNLIKELY(ompi_mpi_thread_multiple || NULL != mca_pml_ob1_sendreq)) { + MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq); + } else { + mca_pml_ob1_send_request_fini (sendreq); + mca_pml_ob1_sendreq = sendreq; + } + + return rc; +} diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index bbc90e1e471..34c9ecaca11 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -496,7 +496,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq bytes_received = mca_pml_ob1_compute_segment_length_base (segments, num_segments, sizeof(mca_pml_ob1_frag_hdr_t)); - data_offset = hdr->hdr_frag.hdr_frag_offset; + data_offset = hdr->hdr_frag.hdr_frag_offset + recvreq->req_recv.req_base.req_offset; /* * Make user buffer accessible(defined) before unpacking. @@ -694,7 +694,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq #endif /* OPAL_CUDA_GDR_SUPPORT */ - offset = 0; + offset = recvreq->req_recv.req_base.req_offset; OPAL_THREAD_LOCK(&recvreq->lock); opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor, &offset); @@ -731,7 +731,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq memcpy (frag->remote_handle, hdr + 1, btl->btl_registration_handle_size); /* update the read location */ - frag->remote_address = hdr->hdr_src_ptr + offset; + frag->remote_address = hdr->hdr_src_ptr + offset - recvreq->req_recv.req_base.req_offset; /* updating the write location */ OPAL_THREAD_LOCK(&recvreq->lock); @@ -779,7 +779,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq { size_t bytes_received = 0; size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_OB1_RECV_REQUEST_UNPACK */ - size_t data_offset = 0; + size_t data_offset = recvreq->req_recv.req_base.req_offset; mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval; bytes_received = mca_pml_ob1_compute_segment_length_base (segments, num_segments, @@ -845,7 +845,8 @@ void mca_pml_ob1_recv_request_progress_match( mca_pml_ob1_recv_request_t* recvre mca_btl_base_segment_t* segments, size_t num_segments ) { - size_t bytes_received, data_offset = 0; + // size_t bytes_received, data_offset = 0; + size_t bytes_received, data_offset = recvreq->req_recv.req_base.req_offset; size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_OB1_RECV_REQUEST_UNPACK */ mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval; @@ -998,9 +999,11 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq, /* take lock to protect convertor against concurrent access * from unpack */ OPAL_THREAD_LOCK(&recvreq->lock); + recvreq->req_rdma_offset += recvreq->req_recv.req_base.req_offset; opal_convertor_set_position (&recvreq->req_recv.req_base.req_convertor, &recvreq->req_rdma_offset); opal_convertor_get_current_pointer (&recvreq->req_recv.req_base.req_convertor, &data_ptr); + recvreq->req_rdma_offset -= recvreq->req_recv.req_base.req_offset; OPAL_THREAD_UNLOCK(&recvreq->lock); if (btl->btl_register_mem) { @@ -1271,3 +1274,128 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) } } } + +void mca_pml_ob1_recv_req_start_with_convertor(mca_pml_ob1_recv_request_t *req, opal_convertor_t *convertor, size_t size) +{ + ompi_communicator_t *comm = req->req_recv.req_base.req_comm; + mca_pml_ob1_comm_t *ob1_comm = comm->c_pml_comm; + mca_pml_ob1_comm_proc_t* proc; + mca_pml_ob1_recv_frag_t* frag; + opal_list_t *queue; + mca_pml_ob1_hdr_t* hdr; + + /* init/re-init the request */ + req->req_lock = 0; + req->req_pipeline_depth = 0; + req->req_bytes_received = 0; + req->req_bytes_expected = 0; + /* What about req_rdma_cnt ? */ + req->req_rdma_idx = 0; + req->req_pending = false; + req->req_ack_sent = false; + + MCA_PML_BASE_RECV_START(&req->req_recv); + + OB1_MATCHING_LOCK(&ob1_comm->matching_lock); + /** + * The laps of time between the ACTIVATE event and the SEARCH_UNEX one include + * the cost of the request lock. + */ + PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_BEGIN, + &(req->req_recv.req_base), PERUSE_RECV); + + /* assign sequence number */ + req->req_recv.req_base.req_sequence = ob1_comm->recv_sequence++; + + /* attempt to match posted recv */ + if(req->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { + frag = recv_req_match_wild(req, &proc); + queue = &ob1_comm->wild_receives; +#if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT + /* As we are in a homogeneous environment we know that all remote + * architectures are exactly the same as the local one. Therefore, + * we can safely construct the convertor based on the proc + * information of rank 0. + */ + if( NULL == frag ) { + req->req_recv.req_base.req_proc = ompi_proc_local_proc; + prepare_recv_req_convertor(req, convertor, size); + } +#endif /* !OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ + } else { + proc = mca_pml_ob1_peer_lookup (comm, req->req_recv.req_base.req_peer); + req->req_recv.req_base.req_proc = proc->ompi_proc; + frag = recv_req_match_specific_proc(req, proc); + queue = &proc->specific_receives; + /* wild cardrecv will be prepared on match */ + prepare_recv_req_convertor(req, convertor, size); + } + + if(OPAL_UNLIKELY(NULL == frag)) { + PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_END, + &(req->req_recv.req_base), PERUSE_RECV); + /* We didn't find any matches. Record this irecv so we can match + it when the message comes in. */ + append_recv_req_to_queue(queue, req); + req->req_match_received = false; + OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); + } else { + if(OPAL_LIKELY(!IS_PROB_REQ(req))) { + PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_MATCH_UNEX, + &(req->req_recv.req_base), PERUSE_RECV); + + hdr = (mca_pml_ob1_hdr_t*)frag->segments->seg_addr.pval; + PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_REMOVE_FROM_UNEX_Q, + req->req_recv.req_base.req_comm, + hdr->hdr_match.hdr_src, + hdr->hdr_match.hdr_tag, + PERUSE_RECV); + + PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_END, + &(req->req_recv.req_base), PERUSE_RECV); + + opal_list_remove_item(&proc->unexpected_frags, + (opal_list_item_t*)frag); + OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); + + switch(hdr->hdr_common.hdr_type) { + case MCA_PML_OB1_HDR_TYPE_MATCH: + mca_pml_ob1_recv_request_progress_match(req, frag->btl, frag->segments, + frag->num_segments); + break; + case MCA_PML_OB1_HDR_TYPE_RNDV: + mca_pml_ob1_recv_request_progress_rndv(req, frag->btl, frag->segments, + frag->num_segments); + break; + case MCA_PML_OB1_HDR_TYPE_RGET: + mca_pml_ob1_recv_request_progress_rget(req, frag->btl, frag->segments, + frag->num_segments); + break; + default: + assert(0); + } + + MCA_PML_OB1_RECV_FRAG_RETURN(frag); + + } else if (OPAL_UNLIKELY(IS_MPROB_REQ(req))) { + /* Remove the fragment from the match list, as it's now + matched. Stash it somewhere in the request (which, + yes, is a complete hack), where it will be plucked out + during the end of mprobe. The request will then be + "recreated" as a receive request, and the frag will be + restarted with this request during mrecv */ + opal_list_remove_item(&proc->unexpected_frags, + (opal_list_item_t*)frag); + OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); + + req->req_recv.req_base.req_addr = frag; + mca_pml_ob1_recv_request_matched_probe(req, frag->btl, + frag->segments, frag->num_segments); + + } else { + OB1_MATCHING_UNLOCK(&ob1_comm->matching_lock); + mca_pml_ob1_recv_request_matched_probe(req, frag->btl, + frag->segments, frag->num_segments); + } + } +} diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h index 82c4767d834..f2d8c634e82 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h @@ -13,7 +13,7 @@ * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ @@ -219,6 +219,7 @@ recv_request_pml_complete_check(mca_pml_ob1_recv_request_t *recvreq) } extern void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req); +extern void mca_pml_ob1_recv_req_start_with_convertor(mca_pml_ob1_recv_request_t *req, opal_convertor_t *convertor, size_t size); #define MCA_PML_OB1_RECV_REQUEST_START(r) mca_pml_ob1_recv_req_start(r) static inline void prepare_recv_req_converter(mca_pml_ob1_recv_request_t *req) @@ -236,6 +237,14 @@ static inline void prepare_recv_req_converter(mca_pml_ob1_recv_request_t *req) } } +static inline void prepare_recv_req_convertor(mca_pml_ob1_recv_request_t *req, opal_convertor_t *convertor, size_t size) +{ + if( req->req_recv.req_base.req_datatype->super.size | req->req_recv.req_base.req_count ) { + opal_convertor_clone(convertor, &req->req_recv.req_base.req_convertor, 1); + req->req_bytes_expected = size; + } +} + #define MCA_PML_OB1_RECV_REQUEST_MATCHED(request, hdr) \ recv_req_matched(request, hdr) diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index f358d733dab..1870615b425 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -977,9 +977,10 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) } /* pack into a descriptor */ - offset = (size_t)range->range_send_offset; + offset = (size_t)range->range_send_offset + sendreq->req_send.req_base.req_offset; opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, &offset); + offset -= sendreq->req_send.req_base.req_offset; range->range_send_offset = (uint64_t)offset; data_remaining = size; @@ -1234,7 +1235,8 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq, /* Get the address of the current offset. Note: at this time ob1 CAN NOT handle * non-contiguous RDMA. If that changes this code will be wrong. */ opal_convertor_get_offset_pointer (&sendreq->req_send.req_base.req_convertor, - hdr->hdr_rdma_offset, &frag->local_address); + hdr->hdr_rdma_offset+sendreq->req_send.req_base.req_offset, + &frag->local_address); mca_pml_ob1_send_request_put_frag(frag); } diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 5cb21f6aba6..fc3401e60a9 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -14,6 +14,8 @@ * Copyright (c) 2011-2012 NVIDIA Corporation. All rights reserved. * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -157,7 +159,12 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type) } #define MCA_PML_OB1_SEND_REQUEST_RESET(sendreq) \ - MCA_PML_BASE_SEND_REQUEST_RESET(&(sendreq)->req_send) + if ((sendreq)->req_send.req_bytes_packed > 0) { \ + size_t _position = sendreq->req_send.req_base.req_offset; \ + opal_convertor_set_position(&(sendreq)->req_send.req_base.req_convertor, \ + &_position); \ + assert( sendreq->req_send.req_base.req_offset == _position ); \ + } static inline void mca_pml_ob1_free_rdma_resources (mca_pml_ob1_send_request_t* sendreq) { @@ -473,6 +480,34 @@ mca_pml_ob1_send_request_start_seq (mca_pml_ob1_send_request_t* sendreq, mca_bml return OMPI_SUCCESS; } +static inline int +mca_pml_ob1_send_request_start_seq_size (mca_pml_ob1_send_request_t* sendreq, mca_bml_base_endpoint_t* endpoint, int32_t seqn, size_t *size) +{ + sendreq->req_endpoint = endpoint; + sendreq->req_state = 0; + sendreq->req_lock = 0; + sendreq->req_pipeline_depth = 0; + sendreq->req_bytes_delivered = 0; + sendreq->req_pending = MCA_PML_OB1_SEND_PENDING_NONE; + sendreq->req_send.req_base.req_sequence = seqn; + + MCA_PML_BASE_SEND_START( &sendreq->req_send ); + + for(size_t i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { + mca_bml_base_btl_t* bml_btl; + int rc; + + /* select a btl */ + bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); + rc = mca_pml_ob1_send_request_start_btl(sendreq, bml_btl); + if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) ) + return rc; + } + add_request_to_send_pending(sendreq, MCA_PML_OB1_SEND_PENDING_START, true); + + return OMPI_SUCCESS; +} + static inline int mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq ) { diff --git a/ompi/mca/pml/pml.h b/ompi/mca/pml/pml.h index 243b5993dda..7d938196480 100644 --- a/ompi/mca/pml/pml.h +++ b/ompi/mca/pml/pml.h @@ -13,7 +13,7 @@ * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -70,6 +70,7 @@ #include "mpi.h" /* needed for MPI_ANY_TAG */ #include "ompi/mca/pml/pml_constants.h" #include "ompi/request/request.h" +#include "opal/datatype/opal_convertor.h" BEGIN_C_DECLS @@ -276,6 +277,45 @@ typedef int (*mca_pml_base_module_mrecv_fn_t)( ompi_status_public_t* status ); +/** + * Post a convertor based receive request. + * + * @param convertor (INOUT)Convertor. + * @param size (INOUT) Max size to be sent. + * @param src (IN) Source rank w/in communicator. + * @param tag (IN) User defined tag. + * @param comm (IN) Communicator. + * @param request (OUT) Request handle. + * @return OMPI_SUCCESS or failure status. + */ +typedef int (*mca_pml_base_module_icrecv_fn_t)( + opal_convertor_t *convertor, + size_t *size, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request +); + +/** + * Post a convertor based receive and wait for completion. + * + * @param convertor (INOUT)Convertor. + * @param size (INOUT) Max size to be sent. + * @param src (IN) Source rank w/in communicator + * @param tag (IN) User defined tag + * @param comm (IN) Communicator + * @param status (OUT) Completion status + * @return OMPI_SUCCESS or failure status. + */ +typedef int (*mca_pml_base_module_crecv_fn_t)( + opal_convertor_t *convertor, + size_t *size, + int src, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status +); /** * Initialize a persistent send request. * @@ -348,6 +388,49 @@ typedef int (*mca_pml_base_module_send_fn_t)( struct ompi_communicator_t* comm ); +/** + * Post a convertor based send request. + * + * @param convertor (INOUT)Convertor. + * @param size (INOUT) Max size to be sent. + * @param dst (IN) Peer rank w/in communicator. + * @param tag (IN) User defined tag. + * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) + * @param comm (IN) Communicator. + * @param request (OUT) Request handle. + * @return OMPI_SUCCESS or failure status. + */ +typedef int (*mca_pml_base_module_icsend_fn_t)( + opal_convertor_t *convertor, + size_t *size, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request +); + + +/** + * Post a convertor based send request and wait for completion. + * + * @param convertor (INOUT)Convertor. + * @param size (INOUT) Max size to be sent. + * @param dst (IN) Peer rank w/in communicator. + * @param tag (IN) User defined tag. + * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) + * @param comm (IN) Communicator. + * @return OMPI_SUCCESS or failure status. + */ +typedef int (*mca_pml_base_module_csend_fn_t)( + opal_convertor_t *convertor, + size_t *size, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm +); + /** * Initiate one or more persistent requests. * @@ -502,9 +585,13 @@ struct mca_pml_base_module_1_0_1_t { mca_pml_base_module_irecv_init_fn_t pml_irecv_init; mca_pml_base_module_irecv_fn_t pml_irecv; mca_pml_base_module_recv_fn_t pml_recv; + mca_pml_base_module_icrecv_fn_t pml_icrecv; + mca_pml_base_module_crecv_fn_t pml_crecv; mca_pml_base_module_isend_init_fn_t pml_isend_init; mca_pml_base_module_isend_fn_t pml_isend; mca_pml_base_module_send_fn_t pml_send; + mca_pml_base_module_icsend_fn_t pml_icsend; + mca_pml_base_module_csend_fn_t pml_csend; mca_pml_base_module_iprobe_fn_t pml_iprobe; mca_pml_base_module_probe_fn_t pml_probe; mca_pml_base_module_start_fn_t pml_start; diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index 26700ef0758..d522ce22ef6 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -3,6 +3,8 @@ * Copyright (c) 2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,9 +58,13 @@ mca_pml_ucx_module_t ompi_pml_ucx = { mca_pml_ucx_irecv_init, mca_pml_ucx_irecv, mca_pml_ucx_recv, + NULL, /* icrecv */ + NULL, /* crecv */ mca_pml_ucx_isend_init, mca_pml_ucx_isend, mca_pml_ucx_send, + NULL, /* icsend */ + NULL, /* csend */ mca_pml_ucx_iprobe, mca_pml_ucx_probe, mca_pml_ucx_start, diff --git a/ompi/mca/pml/yalla/pml_yalla.c b/ompi/mca/pml/yalla/pml_yalla.c index 4494ca1022d..f2f0b6eedd9 100644 --- a/ompi/mca/pml/yalla/pml_yalla.c +++ b/ompi/mca/pml/yalla/pml_yalla.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -36,9 +36,13 @@ mca_pml_yalla_module_t ompi_pml_yalla = { mca_pml_yalla_irecv_init, mca_pml_yalla_irecv, mca_pml_yalla_recv, + NULL, /* icrecv */ + NULL, /* crecv */ mca_pml_yalla_isend_init, mca_pml_yalla_isend, mca_pml_yalla_send, + NULL, /* icsend */ + NULL, /* csend */ mca_pml_yalla_iprobe, mca_pml_yalla_probe, mca_pml_yalla_start, diff --git a/ompi/mpiext/split/Makefile.am b/ompi/mpiext/split/Makefile.am new file mode 100644 index 00000000000..c0c7c8a5c5f --- /dev/null +++ b/ompi/mpiext/split/Makefile.am @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This Makefile is not traversed during a normal "make all" in an OMPI +# build. It *is* traversed during "make dist", however. So you can +# put EXTRA_DIST targets in here. +# +# You can also use this as a convenience for building this MPI +# extension (i.e., "make all" in this directory to invoke "make all" +# in all the subdirectories). + +SUBDIRS = c + +EXTRA_DIST = README.txt diff --git a/ompi/mpiext/split/README.txt b/ompi/mpiext/split/README.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ompi/mpiext/split/c/Makefile.am b/ompi/mpiext/split/c/Makefile.am new file mode 100644 index 00000000000..0bb06319dd7 --- /dev/null +++ b/ompi/mpiext/split/c/Makefile.am @@ -0,0 +1,65 @@ +# +# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +SUBDIRS = profile + +# We must set these #defines so that the inner OMPI MPI prototype +# header files do the Right Thing. +AM_CPPFLAGS = -DOMPI_PROFILE_LAYER=0 -DOMPI_COMPILING_FORTRAN_WRAPPERS=1 + +# OMPI_BUILD_MPI_PROFILING is enabled when we want our generated MPI_* symbols +# to be replaced by PMPI_*. +# In this directory, we need it to be 0 + +AM_CPPFLAGS += -DOMPI_BUILD_MPI_PROFILING=0 + +# This file builds the C bindings for MPI extensions. It must be +# present in all MPI extensions. + +include $(top_srcdir)/Makefile.ompi-rules + +# Convenience libtool library that will be slurped up into libmpi.la. +noinst_LTLIBRARIES = libmpiext_split_c.la + +# This is where the top-level header file (that is included in +# ) must be installed. +ompidir = $(ompiincludedir)/ompi/mpiext/split/c + +# This is the header file that is installed. +ompi_HEADERS = mpiext_split_c.h + +# Sources for the convenience libtool library. Other than the one +# header file, all source files in the extension have no file naming +# conventions. +libmpiext_split_c_la_SOURCES = +if BUILD_MPI_BINDINGS_LAYER +libmpiext_split_c_la_SOURCES += \ + $(ompi_HEADERS) \ + mpiext_isplit_recv.c \ + mpiext_isplit_send.c \ + mpiext_split_recv.c \ + mpiext_split_send.c +endif + +libmpiext_split_c_la_LDFLAGS = -module -avoid-version +libmpiext_split_c_la_LIBADD = profile/libpmpiext_split_c.la + +# Man page installation +nodist_man_MANS = OMPI_Split_send.3 + +# Man page sources +EXTRA_DIST = $(nodist_man_MANS:.3=.3in) + +distclean-local: + rm -f $(nodist_man_MANS) diff --git a/ompi/mpiext/split/c/OMPI_Split_send.3in b/ompi/mpiext/split/c/OMPI_Split_send.3in new file mode 100644 index 00000000000..79b4320c039 --- /dev/null +++ b/ompi/mpiext/split/c/OMPI_Split_send.3in @@ -0,0 +1,67 @@ +.\" -*- nroff -*- +.\" Copyright 2013 Los Alamos National Security, LLC. All rights reserved. +.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright 2007-2008 Sun Microsystems, Inc. +.\" Copyright (c) 1996 Thinking Machines Corporation +.\" Copyright (c) 2016 Research Organization for Information Science +.\" and Technology (RIST). All rights reserved. +.\" $COPYRIGHT$ +.TH OMPI_Split_send 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME +\fBOMPI_Split_send\fP \- Performs a standard-mode blocking send. + +.SH SYNTAX +.ft R +.SH C Syntax +.nf +#include +int OMPI_Split_send(const void *\fIbuf\fP, int\fI count\fP, MPI_Datatype\fI datatype\fP, int\fI dest\fP, + int\fI tag\fP, MPI_Comm\fI comm\fP) + +.fi +.SH INPUT PARAMETERS +.ft R +.TP 1i +buf +Initial address of send buffer (choice). +.TP 1i +count +Number of elements send (nonnegative integer). +.TP 1i +datatype +Datatype of each send buffer element (handle). +.TP 1i +dest +Rank of destination (integer). +.TP 1i +tag +Message tag (integer). +.TP 1i +comm +Communicator (handle). + +.SH OUTPUT PARAMETER +.ft R +.TP 1i +IERROR +Fortran only: Error status (integer). + +.SH DESCRIPTION +.ft R +MPI_Split_send performs a standard-mode, blocking send. + +.SH NOTE +.ft R +This routine will block until the message is sent to the destination. For an in-depth explanation of the semantics of the standard-mode send, refer to the MPI-1 Standard. + +.SH ERRORS +Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. +.sp +Before the error value is returned, the current MPI error handler is +called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. + +.SH SEE ALSO +.ft R +.nf +MPI_Isplit_send + diff --git a/ompi/mpiext/split/c/mpiext_isplit_recv.c b/ompi/mpiext/split/c/mpiext_isplit_recv.c new file mode 100644 index 00000000000..9182e5452b5 --- /dev/null +++ b/ompi/mpiext/split/c/mpiext_isplit_recv.c @@ -0,0 +1,103 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/pml/base/pml_base_sendreq.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/memchecker.h" + +#include "ompi/mpiext/split/c/mpiext_split_c.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak OMPI_Isplit_recv = POMPI_Isplit_recv +#endif +#define OMPI_Isplit_recv POMPI_Isplit_recv +#endif + +static const char FUNC_NAME[] = "OMPI_Isplit_recv"; + + +int OMPI_Isplit_recv(void *buf, int count, MPI_Datatype type, int source, + int tag, MPI_Comm comm, MPI_Request *request) +{ + int rc = MPI_SUCCESS; + opal_convertor_t convertor; + size_t offset; + size_t size; + + MEMCHECKER( + memchecker_datatype(type); + memchecker_comm(comm); + ); + + if ( MPI_PARAM_CHECK ) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_CHECK_DATATYPE_FOR_RECV(rc, type, count); + OMPI_CHECK_USER_BUFFER(rc, buf, type, count); + + if (ompi_comm_invalid(comm)) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); + } else if (((tag < 0) && (tag != MPI_ANY_TAG)) || (tag > mca_pml.pml_max_tag)) { + rc = MPI_ERR_TAG; + } else if ((MPI_ANY_SOURCE != source) && + (MPI_PROC_NULL != source) && + ompi_comm_peer_invalid(comm, source)) { + rc = MPI_ERR_RANK; + } else if (NULL == request) { + rc = MPI_ERR_REQUEST; + } + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + } + + if (source == MPI_PROC_NULL) { + *request = &ompi_request_empty; + return MPI_SUCCESS; + } + + assert(count > 0); + + if (count > 0) { + ompi_proc_t* proc = ompi_comm_peer_lookup(comm,source); + OBJ_CONSTRUCT(&convertor, opal_convertor_t); + convertor.stack_pos = -1; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(type->super), + count, + buf, + 0, + &convertor ); + opal_convertor_get_unpacked_size( &convertor, &size ); + } + size = size / 2; + offset = 0; +#if 0 + opal_convertor_set_position(&convertor, &offset); + OPAL_CR_ENTER_LIBRARY(); + rc = MCA_PML_CALL(icrecv(&convertor, &size, source, tag, comm, request)); + if (OMPI_SUCCESS != rc) { + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); + } +#endif + offset += size; + opal_convertor_set_position(&convertor, &offset); + rc = MCA_PML_CALL(icrecv(&convertor, &size, source, tag, comm, request+1)); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); +} diff --git a/ompi/mpiext/split/c/mpiext_isplit_send.c b/ompi/mpiext/split/c/mpiext_isplit_send.c new file mode 100644 index 00000000000..63cd9c05bcf --- /dev/null +++ b/ompi/mpiext/split/c/mpiext_isplit_send.c @@ -0,0 +1,99 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/pml/base/pml_base_sendreq.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/memchecker.h" + +#include "ompi/mpiext/split/c/mpiext_split_c.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak OMPI_Isplit_send = POMPI_Isplit_send +#endif +#define OMPI_Isplit_send POMPI_Isplit_send +#endif + +static const char FUNC_NAME[] = "OMPI_Isplit_send"; + + +int OMPI_Isplit_send(const void *buf, int count, MPI_Datatype type, int dest, + int tag, MPI_Comm comm, MPI_Request *request) +{ + int rc = MPI_SUCCESS; + opal_convertor_t convertor; + size_t offset; + size_t size; + + MEMCHECKER( + memchecker_datatype(type); + memchecker_call(&opal_memchecker_base_isdefined, buf, count, type); + memchecker_comm(comm); + ); + + if ( MPI_PARAM_CHECK ) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (ompi_comm_invalid(comm)) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); + } else if (count < 0) { + rc = MPI_ERR_COUNT; + } else if (tag < 0 || tag > mca_pml.pml_max_tag) { + rc = MPI_ERR_TAG; + } else if (ompi_comm_peer_invalid(comm, dest) && + (MPI_PROC_NULL != dest)) { + rc = MPI_ERR_RANK; + } else { + OMPI_CHECK_DATATYPE_FOR_SEND(rc, type, count); + OMPI_CHECK_USER_BUFFER(rc, buf, type, count); + } + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + } + + if (MPI_PROC_NULL == dest) { + return MPI_SUCCESS; + } + + assert(count > 0); + if (count > 0) { + ompi_proc_t* proc = ompi_comm_peer_lookup(comm,dest); + OBJ_CONSTRUCT(&convertor, opal_convertor_t); + convertor.stack_pos = -1; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(type->super), + count, + buf, + 0, + &convertor ); + opal_convertor_get_packed_size( &convertor, &size ); + } + size = size / 2; + offset = 0; + opal_convertor_set_position(&convertor, &offset); + OPAL_CR_ENTER_LIBRARY(); + rc = MCA_PML_CALL(icsend(&convertor, &size, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm, request)); + if (OMPI_SUCCESS != rc) { + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); + } + offset += size; + opal_convertor_set_position(&convertor, &offset); + rc = MCA_PML_CALL(icsend(&convertor, &size, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm, request+1)); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); +} diff --git a/ompi/mpiext/split/c/mpiext_split_c.h b/ompi/mpiext/split/c/mpiext_split_c.h new file mode 100644 index 00000000000..cb78966b184 --- /dev/null +++ b/ompi/mpiext/split/c/mpiext_split_c.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +OMPI_DECLSPEC int OMPI_Isplit_recv(void *buf, int count, MPI_Datatype type, int source, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int OMPI_Split_send(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm); +OMPI_DECLSPEC int OMPI_Split_recv(void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC int OMPI_Isplit_send(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *req); + +OMPI_DECLSPEC int POMPI_Isplit_recv(void *buf, int count, MPI_Datatype type, int source, + int tag, MPI_Comm comm, MPI_Request *request); +OMPI_DECLSPEC int POMPI_Isplit_send(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Request *req); +OMPI_DECLSPEC int POMPI_Split_recv(void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC int POMPI_Split_send(const void *buf, int count, MPI_Datatype datatype, int dest, + int tag, MPI_Comm comm); diff --git a/ompi/mpiext/split/c/mpiext_split_recv.c b/ompi/mpiext/split/c/mpiext_split_recv.c new file mode 100644 index 00000000000..c79040b7798 --- /dev/null +++ b/ompi/mpiext/split/c/mpiext_split_recv.c @@ -0,0 +1,99 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/pml/base/pml_base_sendreq.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/memchecker.h" + +#include "ompi/mpiext/split/c/mpiext_split_c.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak OMPI_Split_recv = POMPI_Split_recv +#endif +#define OMPI_Split_recv POMPI_Split_recv +#endif + +static const char FUNC_NAME[] = "OMPI_Split_recv"; + + +int OMPI_Split_recv(void *buf, int count, MPI_Datatype type, int source, + int tag, MPI_Comm comm, MPI_Status *statuses) +{ + int rc = MPI_SUCCESS; + opal_convertor_t convertor; + size_t offset; + size_t size; + + MEMCHECKER( + memchecker_datatype(type); + memchecker_call(&opal_memchecker_base_isaddressable, buf, count, type); + memchecker_comm(comm); + ); + + if ( MPI_PARAM_CHECK ) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + OMPI_CHECK_DATATYPE_FOR_RECV(rc, type, count); + OMPI_CHECK_USER_BUFFER(rc, buf, type, count); + + if (ompi_comm_invalid(comm)) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); + } else if (((tag < 0) && (tag != MPI_ANY_TAG)) || (tag > mca_pml.pml_max_tag)) { + rc = MPI_ERR_TAG; + } else if ((source != MPI_ANY_SOURCE) && + (MPI_PROC_NULL != source) && + ompi_comm_peer_invalid(comm, source)) { + rc = MPI_ERR_RANK; + } + + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + } + + if (MPI_PROC_NULL == source) { + return MPI_SUCCESS; + } + assert(count > 0); + + if (count > 0) { + ompi_proc_t* proc = ompi_comm_peer_lookup(comm,source); + OBJ_CONSTRUCT(&convertor, opal_convertor_t); + convertor.stack_pos = -1; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_recv( + proc->super.proc_convertor, + &(type->super), + count, + buf, + 0, + &convertor ); + opal_convertor_get_unpacked_size( &convertor, &size ); + } + size = size / 2; + offset = 0; + opal_convertor_set_position(&convertor, &offset); + OPAL_CR_ENTER_LIBRARY(); + rc = MCA_PML_CALL(crecv(&convertor, &size, source, tag, comm, (MPI_STATUSES_IGNORE==statuses)?MPI_STATUS_IGNORE:statuses)); + if (OMPI_SUCCESS != rc) { + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); + } + offset += size; + opal_convertor_set_position(&convertor, &offset); + rc = MCA_PML_CALL(crecv(&convertor, &size, source, tag, comm, (MPI_STATUSES_IGNORE==statuses)?MPI_STATUS_IGNORE:statuses+1)); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); +} diff --git a/ompi/mpiext/split/c/mpiext_split_send.c b/ompi/mpiext/split/c/mpiext_split_send.c new file mode 100644 index 00000000000..8750f5d1122 --- /dev/null +++ b/ompi/mpiext/split/c/mpiext_split_send.c @@ -0,0 +1,99 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/pml/base/pml_base_sendreq.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/memchecker.h" + +#include "ompi/mpiext/split/c/mpiext_split_c.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak OMPI_Split_send = POMPI_Split_send +#endif +#define OMPI_Split_send POMPI_Split_send +#endif + +static const char FUNC_NAME[] = "OMPI_Split_send"; + + +int OMPI_Split_send(const void *buf, int count, MPI_Datatype type, int dest, + int tag, MPI_Comm comm) +{ + int rc = MPI_SUCCESS; + opal_convertor_t convertor; + size_t offset; + size_t size; + + MEMCHECKER( + memchecker_datatype(type); + memchecker_call(&opal_memchecker_base_isdefined, buf, count, type); + memchecker_comm(comm); + ); + + if ( MPI_PARAM_CHECK ) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (ompi_comm_invalid(comm)) { + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); + } else if (count < 0) { + rc = MPI_ERR_COUNT; + } else if (tag < 0 || tag > mca_pml.pml_max_tag) { + rc = MPI_ERR_TAG; + } else if (ompi_comm_peer_invalid(comm, dest) && + (MPI_PROC_NULL != dest)) { + rc = MPI_ERR_RANK; + } else { + OMPI_CHECK_DATATYPE_FOR_SEND(rc, type, count); + OMPI_CHECK_USER_BUFFER(rc, buf, type, count); + } + OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); + } + + if (MPI_PROC_NULL == dest) { + return MPI_SUCCESS; + } + + assert(count > 0); + if (count > 0) { + ompi_proc_t* proc = ompi_comm_peer_lookup(comm,dest); + OBJ_CONSTRUCT(&convertor, opal_convertor_t); + convertor.stack_pos = -1; + /* We will create a convertor specialized for the */ + /* remote architecture and prepared with the type. */ + opal_convertor_copy_and_prepare_for_send( + proc->super.proc_convertor, + &(type->super), + count, + buf, + 0, + &convertor ); + opal_convertor_get_packed_size( &convertor, &size ); + } + size = size / 2; + offset = 0; + opal_convertor_set_position(&convertor, &offset); + OPAL_CR_ENTER_LIBRARY(); + rc = MCA_PML_CALL(csend(&convertor, &size, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm)); + if (OMPI_SUCCESS != rc) { + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); + } + offset += size; + opal_convertor_set_position(&convertor, &offset); + rc = MCA_PML_CALL(csend(&convertor, &size, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm)); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); +} diff --git a/ompi/mpiext/split/c/profile/Makefile.am b/ompi/mpiext/split/c/profile/Makefile.am new file mode 100644 index 00000000000..9eb8224f2f6 --- /dev/null +++ b/ompi/mpiext/split/c/profile/Makefile.am @@ -0,0 +1,72 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2013 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. +# Copyright (c) 2012-2013 Inria. All rights reserved. +# Copyright (c) 2013 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2015-2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +include $(top_srcdir)/Makefile.ompi-rules + +# If OMPI_BUILD_MPI_PROFILING is enabled when we want our generated MPI_* symbols +# to be replaced by PMPI_*. +# In this directory, we definately need it to be 1. + +AM_CPPFLAGS = -DOMPI_BUILD_MPI_PROFILING=1 + +# +# This build needs to go through only if profiling is required. +# Further, this build HAS to go through if profiling is required. +# + +noinst_LTLIBRARIES = libpmpiext_split_c.la + +# This is where the top-level header file (that is included in +# ) must be installed. +ompidir = $(ompiincludedir)/ompi/mpiext/split/c + +# This is the header file that is installed. + +nodist_libpmpiext_split_c_la_SOURCES = \ + pmpiext_isplit_recv.c \ + pmpiext_isplit_send.c \ + pmpiext_split_recv.c \ + pmpiext_split_send.c + +# +# Sym link in the sources from the real MPI directory +# +$(nodist_libpmpiext_split_c_la_SOURCES): + $(OMPI_V_LN_S) if test ! -r $@ ; then \ + pname=`echo $@ | cut -b '2-'` ; \ + $(LN_S) $(top_srcdir)/ompi/mpiext/split/c/$$pname $@ ; \ + fi + +MAINTAINERCLEANFILES = $(nodist_libpmpiext_split_c_la_SOURCES) + +# Don't want these targets in here + +tags-recursive: +tags: +TAGS: +GTAGS: +ID: diff --git a/ompi/mpiext/split/configure.m4 b/ompi/mpiext/split/configure.m4 new file mode 100644 index 00000000000..5c8ec4c34ca --- /dev/null +++ b/ompi/mpiext/split/configure.m4 @@ -0,0 +1,28 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2009 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OMPI_MPIEXT_split_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([OMPI_MPIEXT_split_CONFIG], [ + AC_CONFIG_FILES([ompi/mpiext/split/Makefile]) + AC_CONFIG_FILES([ompi/mpiext/split/c/Makefile]) + AC_CONFIG_FILES([ompi/mpiext/split/c/profile/Makefile]) + + # This example can always build, so we just execute $1 if it was + # requested. + AS_IF([test "$ENABLE_split" = "1" || \ + test "$ENABLE_EXT_ALL" = "1"], + [$1], + [$2]) +])