From baad166493a314cb803ac671741f2977edebb186 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 14 Feb 2020 20:14:24 -0600 Subject: [PATCH] ch4/am: hold a reference to comm in long_req_target_msg_cb By the time we match the request, the comm may have been freed from user space already. Hold on to a reference so we are still able to use it to send additional active messages. This should not be necessary if we use global rank in sending am transport. --- src/mpid/ch4/src/ch4r_callbacks.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/mpid/ch4/src/ch4r_callbacks.c b/src/mpid/ch4/src/ch4r_callbacks.c index a971215fc4b..2c7cdd8c4a9 100644 --- a/src/mpid/ch4/src/ch4r_callbacks.c +++ b/src/mpid/ch4/src/ch4r_callbacks.c @@ -553,8 +553,12 @@ int MPIDIG_send_long_req_target_msg_cb(int handler_id, void *am_hdr, void **data } MPID_THREAD_CS_EXIT(VCI, MPIDIU_THREAD_MPIDIG_GLOBAL_MUTEX); } else { - /* Matching receive was posted, tell the netmod */ - MPIR_Comm_release(root_comm); /* -1 for posted_list */ + /* Matching receive was posted */ + rreq->comm = root_comm; + /* NOTE: we are skipping MPIR_Comm_release for taking off posted_list since we are holding + * the reference to root_comm in the rreq. We need to hold on to this reference so the comm + * may remain valid by the time we send ack (using the comm). + */ MPIDIG_REQUEST(rreq, req->status) |= MPIDIG_REQ_LONG_RTS; MPIDIG_REQUEST(rreq, req->rreq.peer_req_ptr) = lreq_hdr->sreq_ptr; MPIDIG_REQUEST(rreq, rank) = hdr->src_rank;