Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finalize changes in SST RDMA DP #2522

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 62 additions & 23 deletions source/adios2/toolkit/sst/dp/rdma_dp.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
if (!fabric->info)
{
Svcs->verbose(CP_Stream, DPCriticalVerbose,
"copying the fabric failed.\n");
"copying the fabric info failed.\n");
return;
}

Expand Down Expand Up @@ -365,22 +365,56 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
fi_freeinfo(originfo);
}

static void fini_fabric(struct fabric_state *fabric)
static void fini_fabric(struct fabric_state *fabric, CP_Services Svcs,
void *CP_Stream)
{

int status;
int res;

do
{
status = fi_close((struct fid *)fabric->cq_signal);
} while (status == FI_EBUSY);
res = fi_close((struct fid *)fabric->signal);
} while (res == -FI_EBUSY);

fi_close((struct fid *)fabric->domain);
fi_close((struct fid *)fabric->fabric);
if (res != FI_SUCCESS)
{
Svcs->verbose(CP_Stream, DPCriticalVerbose,
"could not close ep, failed with %d (%s).\n", res,
fi_strerror(res));
return;
}

if (status)
res = fi_close((struct fid *)fabric->cq_signal);
if (res != FI_SUCCESS)
{
// TODO: error handling
Svcs->verbose(CP_Stream, DPCriticalVerbose,
"could not close cq, failed with %d (%s).\n", res,
fi_strerror(res));
}

res = fi_close((struct fid *)fabric->av);
if (res != FI_SUCCESS)
{
Svcs->verbose(CP_Stream, DPCriticalVerbose,
"could not close av, failed with %d (%s).\n", res,
fi_strerror(res));
}
res = fi_close((struct fid *)fabric->domain);
if (res != FI_SUCCESS)
{
Svcs->verbose(CP_Stream, DPCriticalVerbose,
"could not close domain, failed with %d (%s).\n", res,
fi_strerror(res));
return;
}

res = fi_close((struct fid *)fabric->fabric);
if (res != FI_SUCCESS)
{
Svcs->verbose(CP_Stream, DPCriticalVerbose,
"could not close fabric, failed with %d (%s).\n", res,
fi_strerror(res));
return;
}

fi_freeinfo(fabric->info);
Expand Down Expand Up @@ -1496,7 +1530,7 @@ static void RdmaDestroyReader(CP_Services Svcs, DP_RS_Stream RS_Stream_v)
"Tearing down RDMA state on reader.\n");
if (RS_Stream->Fabric)
{
fini_fabric(RS_Stream->Fabric);
fini_fabric(RS_Stream->Fabric, Svcs, RS_Stream->CP_Stream);
}

while (StepLog)
Expand Down Expand Up @@ -1597,24 +1631,15 @@ static void RdmaDestroyWriter(CP_Services Svcs, DP_WS_Stream WS_Stream_v)
#endif /* SST_HAVE_CRAY_DRC */

Svcs->verbose(WS_Stream->CP_Stream, DPTraceVerbose,
"Tearing down RDMA state on writer.\n");
if (WS_Stream->Fabric)
{
fini_fabric(WS_Stream->Fabric);
}

#ifdef SST_HAVE_CRAY_DRC
if (WS_Stream->Rank == 0)
{
drc_release(Credential, 0);
}
#endif /* SST_HAVE_CRAY_DRC */

"Releasing reader-specific state for remaining readers.\n");
while (WS_Stream->ReaderCount > 0)
{
RdmaDestroyWriterPerReader(Svcs, WS_Stream->Readers[0]);
}

Svcs->verbose(WS_Stream->CP_Stream, DPTraceVerbose,
"Releasing remaining timesteps.\n");

pthread_mutex_lock(&ts_mutex);
while (WS_Stream->Timesteps)
{
Expand All @@ -1625,6 +1650,20 @@ static void RdmaDestroyWriter(CP_Services Svcs, DP_WS_Stream WS_Stream_v)
}
pthread_mutex_unlock(&ts_mutex);

Svcs->verbose(WS_Stream->CP_Stream, DPTraceVerbose,
"Tearing down RDMA state on writer.\n");
if (WS_Stream->Fabric)
{
fini_fabric(WS_Stream->Fabric, Svcs, WS_Stream->CP_Stream);
}

#ifdef SST_HAVE_CRAY_DRC
if (WS_Stream->Rank == 0)
{
drc_release(Credential, 0);
}
#endif /* SST_HAVE_CRAY_DRC */

free(WS_Stream->Fabric);
free(WS_Stream);
}
Expand Down