Skip to content

Commit f5fcd84

Browse files
committed
mpi: retain operation and datatype in non blocking collectives
MPI standard states a user MPI_Op and/or user MPI_Datatype can be free'd after a call to a non blocking collective and before the non-blocking collective completes. Retain user (only) MPI_Op and MPI_Datatype when the non blocking call is invoked, and set a request callback so they are free'd when the MPI_Request completes. Thanks Thomas Ponweiser for reporting this Fixes #2151 Fixes #1304 Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
1 parent 59b9602 commit f5fcd84

23 files changed

+281
-33
lines changed

ompi/mca/coll/base/coll_base_util.c

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,27 @@
2929
#include "ompi/mca/pml/pml.h"
3030
#include "coll_base_util.h"
3131

32+
struct retain_op_data {
33+
ompi_request_complete_fn_t req_complete_cb;
34+
void *req_complete_cb_data;
35+
ompi_op_t *op;
36+
ompi_datatype_t *datatype;
37+
};
38+
39+
struct retain_datatypes_data {
40+
ompi_request_complete_fn_t req_complete_cb;
41+
void *req_complete_cb_data;
42+
ompi_datatype_t *stype;
43+
ompi_datatype_t *rtype;
44+
};
45+
46+
struct retain_datatypes_w_data {
47+
ompi_request_complete_fn_t req_complete_cb;
48+
void *req_complete_cb_data;
49+
int count;
50+
ompi_datatype_t *types[];
51+
};
52+
3253
int ompi_coll_base_sendrecv_actual( const void* sendbuf, size_t scount,
3354
ompi_datatype_t* sdatatype,
3455
int dest, int stag,
@@ -78,3 +99,142 @@ int ompi_coll_base_sendrecv_actual( const void* sendbuf, size_t scount,
7899
return (err);
79100
}
80101

102+
static int release_op_callback(struct ompi_request_t *request) {
103+
struct retain_op_data * p = (struct retain_op_data *)request->req_complete_cb_data;
104+
int rc = OMPI_SUCCESS;
105+
assert (NULL != p);
106+
if (NULL != p->req_complete_cb) {
107+
request->req_complete_cb = p->req_complete_cb;
108+
request->req_complete_cb_data = p->req_complete_cb_data;
109+
rc = request->req_complete_cb(request);
110+
}
111+
if (NULL != p->op) {
112+
OBJ_RELEASE(p->op);
113+
}
114+
if (NULL != p->datatype) {
115+
OBJ_RELEASE(p->datatype);
116+
}
117+
free(p);
118+
return rc;
119+
}
120+
121+
int ompi_coll_base_retain_op( ompi_request_t *request, ompi_op_t *op,
122+
ompi_datatype_t *type) {
123+
bool retain = !ompi_op_is_intrinsic(op);
124+
retain |= !ompi_datatype_is_predefined(type);
125+
if (OPAL_UNLIKELY(retain)) {
126+
struct retain_op_data *p = (struct retain_op_data *)calloc(1, sizeof(struct retain_op_data));
127+
if (OPAL_UNLIKELY(NULL == p)) {
128+
return OMPI_ERR_OUT_OF_RESOURCE;
129+
}
130+
if (!ompi_op_is_intrinsic(op)) {
131+
OBJ_RETAIN(op);
132+
p->op = op;
133+
}
134+
if (!ompi_datatype_is_predefined(type)) {
135+
OBJ_RETAIN(type);
136+
p->datatype = type;
137+
}
138+
p->req_complete_cb = request->req_complete_cb;
139+
p->req_complete_cb_data = request->req_complete_cb_data;
140+
request->req_complete_cb = release_op_callback;
141+
request->req_complete_cb_data = p;
142+
}
143+
return OMPI_SUCCESS;
144+
}
145+
146+
static int release_datatypes_callback(struct ompi_request_t *request) {
147+
struct retain_datatypes_data * p = (struct retain_datatypes_data *)request->req_complete_cb_data;
148+
int rc = OMPI_SUCCESS;
149+
assert (NULL != p);
150+
if (NULL != p->req_complete_cb) {
151+
request->req_complete_cb = p->req_complete_cb;
152+
request->req_complete_cb_data = p->req_complete_cb_data;
153+
rc = request->req_complete_cb(request);
154+
}
155+
if (NULL != p->stype) {
156+
OBJ_RELEASE(p->stype);
157+
}
158+
if (NULL != p->rtype) {
159+
OBJ_RELEASE(p->rtype);
160+
}
161+
free(p);
162+
return rc;
163+
}
164+
165+
int ompi_coll_base_retain_datatypes( ompi_request_t *request, ompi_datatype_t *stype,
166+
ompi_datatype_t *rtype) {
167+
bool retain = NULL != stype && !ompi_datatype_is_predefined(stype);
168+
retain |= NULL != rtype && !ompi_datatype_is_predefined(rtype);
169+
if (OPAL_UNLIKELY(retain)) {
170+
struct retain_datatypes_data *p = (struct retain_datatypes_data *)calloc(1, sizeof(struct retain_datatypes_data));
171+
if (OPAL_UNLIKELY(NULL == p)) {
172+
return OMPI_ERR_OUT_OF_RESOURCE;
173+
}
174+
if (NULL != stype && !ompi_datatype_is_predefined(stype)) {
175+
OBJ_RETAIN(stype);
176+
p->stype = stype;
177+
}
178+
if (NULL != rtype && !ompi_datatype_is_predefined(rtype)) {
179+
OBJ_RETAIN(rtype);
180+
p->rtype = rtype;
181+
}
182+
p->req_complete_cb = request->req_complete_cb;
183+
p->req_complete_cb_data = request->req_complete_cb_data;
184+
request->req_complete_cb = release_datatypes_callback;
185+
request->req_complete_cb_data = p;
186+
}
187+
return OMPI_SUCCESS;
188+
}
189+
190+
static int release_datatypes_w_callback(struct ompi_request_t *request) {
191+
struct retain_datatypes_w_data * p = (struct retain_datatypes_w_data *)request->req_complete_cb_data;
192+
int rc = OMPI_SUCCESS;
193+
assert (NULL != p);
194+
if (NULL != p->req_complete_cb) {
195+
request->req_complete_cb = p->req_complete_cb;
196+
request->req_complete_cb_data = p->req_complete_cb_data;
197+
rc = request->req_complete_cb(request);
198+
}
199+
for (int i=0; i<p->count; i++) {
200+
OBJ_RELEASE(p->types[i]);
201+
}
202+
free(p);
203+
return rc;
204+
}
205+
206+
int ompi_coll_base_retain_datatypes_w( ompi_request_t *request, int count,
207+
ompi_datatype_t *const stypes[], ompi_datatype_t *const rtypes[]) {
208+
int datatypes = 0;
209+
for (int i=0; i<count; i++) {
210+
if (NULL != stypes[i] && !ompi_datatype_is_predefined(stypes[i])) {
211+
datatypes++;
212+
}
213+
if (NULL != rtypes[i] && !ompi_datatype_is_predefined(rtypes[i])) {
214+
datatypes++;
215+
}
216+
}
217+
if (OPAL_UNLIKELY(0 < datatypes)) {
218+
struct retain_datatypes_w_data *p = (struct retain_datatypes_w_data *)calloc(1, sizeof(struct retain_datatypes_data)+(datatypes-1)*sizeof(ompi_datatype_t *));
219+
if (OPAL_UNLIKELY(NULL == p)) {
220+
return OMPI_ERR_OUT_OF_RESOURCE;
221+
}
222+
datatypes = 0;
223+
for (int i=0; i<count; i++) {
224+
if (NULL != stypes[i] && !ompi_datatype_is_predefined(stypes[i])) {
225+
p->types[datatypes++] = stypes[i];
226+
OBJ_RETAIN(stypes[i]);
227+
}
228+
if (NULL != rtypes[i] && !ompi_datatype_is_predefined(rtypes[i])) {
229+
p->types[datatypes++] = rtypes[i];
230+
OBJ_RETAIN(rtypes[i]);
231+
}
232+
}
233+
p->req_complete_cb = request->req_complete_cb;
234+
p->req_complete_cb_data = request->req_complete_cb_data;
235+
request->req_complete_cb = release_datatypes_w_callback;
236+
request->req_complete_cb_data = p;
237+
}
238+
return OMPI_SUCCESS;
239+
}
240+

ompi/mca/coll/base/coll_base_util.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "ompi/mca/mca.h"
2828
#include "ompi/datatype/ompi_datatype.h"
2929
#include "ompi/request/request.h"
30+
#include "ompi/op/op.h"
3031
#include "ompi/mca/pml/pml.h"
3132

3233
BEGIN_C_DECLS
@@ -70,5 +71,14 @@ ompi_coll_base_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdatatyp
7071
source, rtag, comm, status);
7172
}
7273

74+
int ompi_coll_base_retain_op( ompi_request_t *request, ompi_op_t *op,
75+
ompi_datatype_t *type);
76+
77+
int ompi_coll_base_retain_datatypes( ompi_request_t *request, ompi_datatype_t *stype,
78+
ompi_datatype_t *rtype);
79+
80+
int ompi_coll_base_retain_datatypes_w( ompi_request_t *request, int count,
81+
ompi_datatype_t *const stypes[],
82+
ompi_datatype_t *const rtypes[]);
7383
END_C_DECLS
7484
#endif /* MCA_COLL_BASE_UTIL_EXPORT_H */

ompi/mpi/c/iallgather.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved.
1515
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
1616
* reserved.
17-
* Copyright (c) 2015 Research Organization for Information Science
17+
* Copyright (c) 2015-2017 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
1919
* $COPYRIGHT$
2020
*
@@ -31,6 +31,7 @@
3131
#include "ompi/communicator/communicator.h"
3232
#include "ompi/errhandler/errhandler.h"
3333
#include "ompi/datatype/ompi_datatype.h"
34+
#include "ompi/mca/coll/base/coll_base_util.h"
3435
#include "ompi/memchecker.h"
3536

3637
#if OMPI_BUILD_MPI_PROFILING
@@ -99,6 +100,9 @@ int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
99100
err = comm->c_coll->coll_iallgather(sendbuf, sendcount, sendtype,
100101
recvbuf, recvcount, recvtype, comm,
101102
request, comm->c_coll->coll_iallgather_module);
103+
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
104+
ompi_coll_base_retain_datatypes(*request, sendtype, recvtype);
105+
}
102106

103107
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
104108
}

ompi/mpi/c/iallgatherv.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
1515
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
1616
* reserved.
17-
* Copyright (c) 2015 Research Organization for Information Science
17+
* Copyright (c) 2015-2017 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
1919
* $COPYRIGHT$
2020
*
@@ -31,6 +31,7 @@
3131
#include "ompi/communicator/communicator.h"
3232
#include "ompi/errhandler/errhandler.h"
3333
#include "ompi/datatype/ompi_datatype.h"
34+
#include "ompi/mca/coll/base/coll_base_util.h"
3435
#include "ompi/memchecker.h"
3536

3637
#if OMPI_BUILD_MPI_PROFILING
@@ -123,6 +124,9 @@ int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
123124
recvbuf, recvcounts, displs,
124125
recvtype, comm, request,
125126
comm->c_coll->coll_iallgatherv_module);
127+
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
128+
ompi_coll_base_retain_datatypes(*request, sendtype, recvtype);
129+
}
126130
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
127131
}
128132

ompi/mpi/c/iallreduce.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
1414
* reserved.
15-
* Copyright (c) 2015 Research Organization for Information Science
15+
* Copyright (c) 2015-2017 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
1717
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1818
* $COPYRIGHT$
@@ -31,6 +31,7 @@
3131
#include "ompi/errhandler/errhandler.h"
3232
#include "ompi/datatype/ompi_datatype.h"
3333
#include "ompi/op/op.h"
34+
#include "ompi/mca/coll/base/coll_base_util.h"
3435
#include "ompi/memchecker.h"
3536

3637
#if OMPI_BUILD_MPI_PROFILING
@@ -109,10 +110,11 @@ int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count,
109110

110111
/* Invoke the coll component to perform the back-end operation */
111112

112-
OBJ_RETAIN(op);
113113
err = comm->c_coll->coll_iallreduce(sendbuf, recvbuf, count, datatype,
114114
op, comm, request, comm->c_coll->coll_iallreduce_module);
115-
OBJ_RELEASE(op);
115+
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
116+
ompi_coll_base_retain_op(*request, op, datatype);
117+
}
116118
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
117119
}
118120

ompi/mpi/c/ialltoall.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved.
1515
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
1616
* reserved.
17-
* Copyright (c) 2014-2016 Research Organization for Information Science
17+
* Copyright (c) 2014-2017 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
1919
* $COPYRIGHT$
2020
*
@@ -31,6 +31,7 @@
3131
#include "ompi/communicator/communicator.h"
3232
#include "ompi/errhandler/errhandler.h"
3333
#include "ompi/datatype/ompi_datatype.h"
34+
#include "ompi/mca/coll/base/coll_base_util.h"
3435
#include "ompi/memchecker.h"
3536

3637
#if OMPI_BUILD_MPI_PROFILING
@@ -98,5 +99,8 @@ int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
9899
err = comm->c_coll->coll_ialltoall(sendbuf, sendcount, sendtype,
99100
recvbuf, recvcount, recvtype, comm,
100101
request, comm->c_coll->coll_ialltoall_module);
102+
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
103+
ompi_coll_base_retain_datatypes(*request, sendtype, recvtype);
104+
}
101105
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
102106
}

ompi/mpi/c/ialltoallv.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
1515
* reserved.
16-
* Copyright (c) 2014-2016 Research Organization for Information Science
16+
* Copyright (c) 2014-2017 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
1818
* $COPYRIGHT$
1919
*
@@ -30,6 +30,7 @@
3030
#include "ompi/communicator/communicator.h"
3131
#include "ompi/errhandler/errhandler.h"
3232
#include "ompi/datatype/ompi_datatype.h"
33+
#include "ompi/mca/coll/base/coll_base_util.h"
3334
#include "ompi/memchecker.h"
3435

3536
#if OMPI_BUILD_MPI_PROFILING
@@ -127,6 +128,9 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl
127128
err = comm->c_coll->coll_ialltoallv(sendbuf, sendcounts, sdispls,
128129
sendtype, recvbuf, recvcounts, rdispls,
129130
recvtype, comm, request, comm->c_coll->coll_ialltoallv_module);
131+
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
132+
ompi_coll_base_retain_datatypes(*request, sendtype, recvtype);
133+
}
130134
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
131135
}
132136

ompi/mpi/c/ialltoallw.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
1515
* reserved.
16-
* Copyright (c) 2014-2016 Research Organization for Information Science
16+
* Copyright (c) 2014-2017 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
1818
* $COPYRIGHT$
1919
*
@@ -30,6 +30,7 @@
3030
#include "ompi/communicator/communicator.h"
3131
#include "ompi/errhandler/errhandler.h"
3232
#include "ompi/datatype/ompi_datatype.h"
33+
#include "ompi/mca/coll/base/coll_base_util.h"
3334
#include "ompi/memchecker.h"
3435

3536
#if OMPI_BUILD_MPI_PROFILING
@@ -124,6 +125,12 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl
124125
sendtypes, recvbuf, recvcounts,
125126
rdispls, recvtypes, comm, request,
126127
comm->c_coll->coll_ialltoallw_module);
128+
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
129+
ompi_coll_base_retain_datatypes_w(*request,
130+
OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm),
131+
sendtypes,
132+
recvtypes);
133+
}
127134
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
128135
}
129136

ompi/mpi/c/ibcast.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved.
3-
* Copyright (c) 2015 Research Organization for Information Science
3+
* Copyright (c) 2015-2017 Research Organization for Information Science
44
* and Technology (RIST). All rights reserved.
55
* Copyright (c) 2017 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
@@ -19,6 +19,7 @@
1919
#include "ompi/communicator/communicator.h"
2020
#include "ompi/errhandler/errhandler.h"
2121
#include "ompi/datatype/ompi_datatype.h"
22+
#include "ompi/mca/coll/base/coll_base_util.h"
2223
#include "ompi/memchecker.h"
2324

2425
#if OMPI_BUILD_MPI_PROFILING
@@ -83,5 +84,8 @@ int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype,
8384
err = comm->c_coll->coll_ibcast(buffer, count, datatype, root, comm,
8485
request,
8586
comm->c_coll->coll_ibcast_module);
87+
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
88+
ompi_coll_base_retain_datatypes(*request, datatype, NULL);
89+
}
8690
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
8791
}

0 commit comments

Comments
 (0)