Skip to content

Commit

Permalink
ch4/am: check is_local in matching posted_list
Browse files Browse the repository at this point in the history
This is necessary when both shmmod and netmod are using active messages
and an anysource recv pair are posted in the same posted_list. Without
it, a netmod send will get matched to a shmmod recv, resulting in
errors.
  • Loading branch information
hzhou committed Feb 25, 2020
1 parent baad166 commit 21870d1
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
4 changes: 2 additions & 2 deletions src/mpid/ch4/src/ch4r_callbacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ int MPIDIG_send_target_msg_cb(int handler_id, void *am_hdr, void **data, size_t
/* MPIDI_CS_ENTER(); */
while (TRUE) {
rreq = MPIDIG_dequeue_posted(hdr->src_rank, hdr->tag, hdr->context_id,
&MPIDIG_COMM(root_comm, posted_list));
is_local, &MPIDIG_COMM(root_comm, posted_list));
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (rreq) {
int is_cancelled;
Expand Down Expand Up @@ -488,7 +488,7 @@ int MPIDIG_send_long_req_target_msg_cb(int handler_id, void *am_hdr, void **data
/* MPIDI_CS_ENTER(); */
while (TRUE) {
rreq = MPIDIG_dequeue_posted(hdr->src_rank, hdr->tag, hdr->context_id,
&MPIDIG_COMM(root_comm, posted_list));
is_local, &MPIDIG_COMM(root_comm, posted_list));
#ifndef MPIDI_CH4_DIRECT_NETMOD
if (rreq) {
int is_cancelled;
Expand Down
15 changes: 10 additions & 5 deletions src/mpid/ch4/src/ch4r_recvq.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ MPL_STATIC_INLINE_PREFIX MPIR_Request *MPIDIG_find_unexp(int rank, int tag,

MPL_STATIC_INLINE_PREFIX MPIR_Request *MPIDIG_dequeue_posted(int rank, int tag,
MPIR_Context_id_t context_id,
MPIDIG_rreq_t ** list)
int is_local, MPIDIG_rreq_t ** list)
{
MPIR_Request *req = NULL;
MPIDIG_rreq_t *curr, *tmp;
Expand All @@ -158,13 +158,18 @@ MPL_STATIC_INLINE_PREFIX MPIR_Request *MPIDIG_dequeue_posted(int rank, int tag,
MPIR_T_PVAR_TIMER_START(RECVQ, time_failed_matching_postedq);
DL_FOREACH_SAFE(*list, curr, tmp) {
MPIR_T_PVAR_COUNTER_INC(RECVQ, posted_recvq_match_attempts, 1);
req = curr->request;
if (MPIDIG_match_posted(rank, tag, context_id, req)) {
#ifndef MPIDI_CH4_DIRECT_NETMOD
/* NOTE: extra negation to force logical comparisons */
if (!MPIDI_REQUEST(curr->request, is_local) != !is_local) {
continue;
}
#endif
if (MPIDIG_match_posted(rank, tag, context_id, curr->request)) {
req = curr->request;
DL_DELETE(*list, curr);
MPIR_T_PVAR_LEVEL_DEC(RECVQ, posted_recvq_length, 1);
break;
}
req = NULL;
}
if (!req)
MPIR_T_PVAR_TIMER_END(RECVQ, time_failed_matching_postedq);
Expand Down Expand Up @@ -305,7 +310,7 @@ MPL_STATIC_INLINE_PREFIX MPIR_Request *MPIDIG_find_unexp(int rank, int tag,

MPL_STATIC_INLINE_PREFIX MPIR_Request *MPIDIG_dequeue_posted(int rank, int tag,
MPIR_Context_id_t context_id,
MPIDIG_rreq_t ** list)
int is_local, MPIDIG_rreq_t ** list)
{
MPIR_Request *req = NULL;
MPIDIG_rreq_t *curr, *tmp;
Expand Down

0 comments on commit 21870d1

Please sign in to comment.