Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove host/device mirroring for a few common fields #336

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/cds/cds.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ struct cds_t

int compute[NSCALAR_MAX];

dfloat* U, * S;
dfloat* rkNS;
// dfloat *rhsS;
dfloat* rkS;
Expand Down Expand Up @@ -72,11 +71,10 @@ struct cds_t

int Nsubsteps;
dfloat sdt;
dfloat* Ue;
occa::memory o_Ue;

int var_coeff;
dfloat* prop, * ellipticCoeff;
dfloat * ellipticCoeff;
occa::memory o_prop, o_ellipticCoeff;
occa::memory o_rho, o_diff;

Expand Down
6 changes: 1 addition & 5 deletions src/core/nrs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,6 @@ struct nrs_t
int isOutputStep;
int outputForceStep;

dfloat* U, * P;
dfloat* BF, * FU;

//RK Subcycle Data
int nRK;
dfloat* coeffsfRK, * weightsRK, * nodesRK;
Expand All @@ -84,7 +81,6 @@ struct nrs_t
dfloat* Ue, sdt;
occa::memory o_Ue;

dfloat* div;
occa::memory o_div;

dfloat rho, mue;
Expand Down Expand Up @@ -130,7 +126,7 @@ struct nrs_t
occa::memory o_FU;

int var_coeff;
dfloat* prop, * ellipticCoeff;
dfloat * ellipticCoeff;
occa::memory o_prop, o_ellipticCoeff;

//EXTBDF data
Expand Down
61 changes: 23 additions & 38 deletions src/core/setup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,7 @@ void nrsSetup(MPI_Comm comm, setupAide &options, nrs_t *nrs)
mesh->o_invLMM.copyFrom(platform->o_mempool.slice0, mesh->Nlocal * sizeof(dfloat));

const int nAB = std::max(nrs->nEXT, mesh->nAB);
mesh->U = (dfloat*) calloc(nrs->NVfields * nrs->fieldOffset * nAB, sizeof(dfloat));
mesh->o_U = platform->device.malloc(nrs->NVfields * nrs->fieldOffset * nAB * sizeof(dfloat), mesh->U);
mesh->o_U = platform->device.malloc(nrs->NVfields * nrs->fieldOffset * nAB * sizeof(dfloat));
if(nrs->Nsubsteps)
mesh->o_divU = platform->device.malloc(nrs->fieldOffset * nAB, sizeof(dfloat));
}
Expand All @@ -198,35 +197,31 @@ void nrsSetup(MPI_Comm comm, setupAide &options, nrs_t *nrs)
nrs->o_Urst = platform->device.malloc(Nstates * nrs->NVfields * offset, sizeof(dfloat));
}

nrs->U = (dfloat*) calloc(nrs->NVfields * std::max(nrs->nBDF, nrs->nEXT) * nrs->fieldOffset,sizeof(dfloat));
nrs->Ue = (dfloat*) calloc(nrs->NVfields * nrs->fieldOffset,sizeof(dfloat));
nrs->P = (dfloat*) calloc(nrs->fieldOffset,sizeof(dfloat));
nrs->BF = (dfloat*) calloc(nrs->NVfields * nrs->fieldOffset,sizeof(dfloat));
nrs->FU = (dfloat*) calloc(nrs->NVfields * nrs->nEXT * nrs->fieldOffset,sizeof(dfloat));

nrs->o_U = platform->device.malloc(nrs->NVfields * std::max(nrs->nBDF,nrs->nEXT) * nrs->fieldOffset * sizeof(dfloat), nrs->U);
nrs->o_Ue = platform->device.malloc(nrs->NVfields * nrs->fieldOffset * sizeof(dfloat), nrs->Ue);
nrs->o_P = platform->device.malloc(nrs->fieldOffset * sizeof(dfloat), nrs->P);
nrs->o_BF = platform->device.malloc(nrs->NVfields * nrs->fieldOffset * sizeof(dfloat), nrs->BF);
nrs->o_FU = platform->device.malloc(nrs->NVfields * nrs->nEXT * nrs->fieldOffset * sizeof(dfloat), nrs->FU);
nrs->o_U = platform->device.malloc(nrs->NVfields * std::max(nrs->nBDF,nrs->nEXT) * nrs->fieldOffset * sizeof(dfloat));
nrs->o_Ue = platform->device.malloc(nrs->NVfields * nrs->fieldOffset * sizeof(dfloat));
nrs->o_P = platform->device.malloc(nrs->fieldOffset * sizeof(dfloat));
nrs->o_BF = platform->device.malloc(nrs->NVfields * nrs->fieldOffset * sizeof(dfloat));
nrs->o_FU = platform->device.malloc(nrs->NVfields * nrs->nEXT * nrs->fieldOffset * sizeof(dfloat));

nrs->var_coeff = 1; // use always var coeff elliptic
nrs->ellipticCoeff = (dfloat*) calloc(2 * nrs->fieldOffset,sizeof(dfloat));
nrs->o_ellipticCoeff = device.malloc(2 * nrs->fieldOffset * sizeof(dfloat),
nrs->ellipticCoeff);

nrs->prop = (dfloat*) calloc(2 * nrs->fieldOffset,sizeof(dfloat));
dfloat* prop = (dfloat*) calloc(2 * nrs->fieldOffset,sizeof(dfloat));
for (int e = 0; e < mesh->Nelements; e++)
for (int n = 0; n < mesh->Np; n++) {
nrs->prop[0 * nrs->fieldOffset + e * mesh->Np + n] = mue;
nrs->prop[1 * nrs->fieldOffset + e * mesh->Np + n] = rho;
prop[0 * nrs->fieldOffset + e * mesh->Np + n] = mue;
prop[1 * nrs->fieldOffset + e * mesh->Np + n] = rho;
}
nrs->o_prop = device.malloc(2 * nrs->fieldOffset * sizeof(dfloat), nrs->prop);
nrs->o_prop = device.malloc(2 * nrs->fieldOffset * sizeof(dfloat), prop);

free(prop);

nrs->o_mue = nrs->o_prop.slice(0 * nrs->fieldOffset * sizeof(dfloat));
nrs->o_rho = nrs->o_prop.slice(1 * nrs->fieldOffset * sizeof(dfloat));

nrs->div = (dfloat*) calloc(nrs->fieldOffset,sizeof(dfloat));
nrs->o_div = device.malloc(nrs->fieldOffset * sizeof(dfloat), nrs->div);
nrs->o_div = device.malloc(nrs->fieldOffset * sizeof(dfloat));

nrs->o_coeffEXT = platform->device.malloc(nrs->nEXT * sizeof(dfloat), nrs->coeffEXT);
nrs->o_coeffBDF = platform->device.malloc(nrs->nBDF * sizeof(dfloat), nrs->coeffBDF);
Expand Down Expand Up @@ -565,7 +560,7 @@ void nrsSetup(MPI_Comm comm, setupAide &options, nrs_t *nrs)
if(!buildOnly) {
// get IC + t0 from nek
double startTime;
nek::copyFromNek(startTime);
nek::ocopyFromNek(startTime);
platform->options.setArgs("START TIME", to_string_f(startTime));

if(platform->comm.mpiRank == 0) printf("calling udf_setup ... "); fflush(stdout);
Expand Down Expand Up @@ -957,14 +952,6 @@ cds_t* cdsSetup(nrs_t* nrs, setupAide options, occa::properties& kernelInfoBC)
cds->gshT = cds->gsh;
}

// Solution storage at interpolation nodes
cds->U = nrs->U; // Point to INS side Velocity
cds->S =
(dfloat*) calloc(std::max(cds->nBDF, cds->nEXT) * cds->fieldOffsetSum,sizeof(dfloat));
cds->BF = (dfloat*) calloc(cds->fieldOffsetSum,sizeof(dfloat));
cds->FS =
(dfloat*) calloc(cds->nBDF * cds->fieldOffsetSum,sizeof(dfloat));

cds->Nsubsteps = nrs->Nsubsteps;
if(cds->Nsubsteps) {
cds->nRK = nrs->nRK;
Expand All @@ -978,10 +965,8 @@ cds_t* cdsSetup(nrs_t* nrs, setupAide options, occa::properties& kernelInfoBC)
cds->dt = nrs->dt;
cds->sdt = nrs->sdt;

cds->prop = (dfloat*) calloc(2 * cds->fieldOffsetSum,sizeof(dfloat));



dfloat * prop = (dfloat*) malloc(2 * cds->fieldOffsetSum * sizeof(dfloat));
for(int is = 0; is < cds->NSfields; is++) {
std::stringstream ss;
ss << std::setfill('0') << std::setw(2) << is;
Expand All @@ -997,12 +982,13 @@ cds_t* cdsSetup(nrs_t* nrs, setupAide options, occa::properties& kernelInfoBC)
const dlong off = cds->fieldOffsetSum;
for (int e = 0; e < mesh->Nelements; e++)
for (int n = 0; n < mesh->Np; n++) {
cds->prop[0 * off + cds->fieldOffsetScan[is] + e * mesh->Np + n] = diff;
cds->prop[1 * off + cds->fieldOffsetScan[is] + e * mesh->Np + n] = rho;
prop[0 * off + cds->fieldOffsetScan[is] + e * mesh->Np + n] = diff;
prop[1 * off + cds->fieldOffsetScan[is] + e * mesh->Np + n] = rho;
}
}
cds->o_prop =
device.malloc(2 * cds->fieldOffsetSum * sizeof(dfloat), cds->prop);
device.malloc(2 * cds->fieldOffsetSum * sizeof(dfloat), prop);
free(prop);
cds->o_diff = cds->o_prop.slice(0 * cds->fieldOffsetSum * sizeof(dfloat));
cds->o_rho = cds->o_prop.slice(1 * cds->fieldOffsetSum * sizeof(dfloat));

Expand All @@ -1013,13 +999,12 @@ cds_t* cdsSetup(nrs_t* nrs, setupAide options, occa::properties& kernelInfoBC)
cds->o_U = nrs->o_U;
cds->o_Ue = nrs->o_Ue;
cds->o_S =
platform->device.malloc(std::max(cds->nBDF, cds->nEXT) * cds->fieldOffsetSum * sizeof(dfloat), cds->S);
platform->device.malloc(std::max(cds->nBDF, cds->nEXT) * cds->fieldOffsetSum * sizeof(dfloat));
cds->o_Se =
platform->device.malloc(cds->fieldOffsetSum , sizeof(dfloat));
cds->o_BF = platform->device.malloc(cds->fieldOffsetSum * sizeof(dfloat), cds->BF);
cds->o_BF = platform->device.malloc(cds->fieldOffsetSum * sizeof(dfloat));
cds->o_FS =
platform->device.malloc(cds->nEXT * cds->fieldOffsetSum * sizeof(dfloat),
cds->FS);
platform->device.malloc(cds->nEXT * cds->fieldOffsetSum * sizeof(dfloat));

cds->o_relUrst = nrs->o_relUrst;
cds->o_Urst = nrs->o_Urst;
Expand Down
10 changes: 0 additions & 10 deletions src/lib/nekrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,17 +130,7 @@ void setup(MPI_Comm comm_in, int buildOnly, int commSizeTarget,

nrsSetup(comm, options, nrs);

nrs->o_U.copyFrom(nrs->U);
nrs->o_P.copyFrom(nrs->P);
nrs->o_prop.copyFrom(nrs->prop);
if(nrs->Nscalar) {
nrs->cds->o_S.copyFrom(nrs->cds->S);
nrs->cds->o_prop.copyFrom(nrs->cds->prop);
}

evaluateProperties(nrs, startTime());
nrs->o_prop.copyTo(nrs->prop);
if(nrs->Nscalar) nrs->cds->o_prop.copyTo(nrs->cds->prop);

nek::ocopyToNek(startTime(), 0);

Expand Down
1 change: 0 additions & 1 deletion src/mesh/mesh.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ struct mesh_t

// mesh velocity
occa::memory o_U;
dfloat* U; // host shadow of mesh velocity

occa::memory o_D;
occa::memory o_DPfloat;
Expand Down
105 changes: 26 additions & 79 deletions src/nekInterface/nekInterfaceAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -706,10 +706,6 @@ void copyToNek(dfloat time)
mesh_t* mesh = nrs->meshV;
dlong Nlocal = mesh->Nelements * mesh->Np;

dfloat* vx = nrs->U + 0 * nrs->fieldOffset;
dfloat* vy = nrs->U + 1 * nrs->fieldOffset;
dfloat* vz = nrs->U + 2 * nrs->fieldOffset;

*(nekData.time) = time;
*(nekData.p0th) = nrs->p0th[0];
*(nekData.dp0thdt) = nrs->dp0thdt;
Expand All @@ -718,71 +714,41 @@ void copyToNek(dfloat time)
mesh_t *mesh = nrs->meshV;
if(nrs->cht) mesh = nrs->cds->mesh[0];
const dlong Nlocal = mesh->Nelements * mesh->Np;
dfloat* wx = mesh->U + 0 * nrs->fieldOffset;
dfloat* wy = mesh->U + 1 * nrs->fieldOffset;
dfloat* wz = mesh->U + 2 * nrs->fieldOffset;
memcpy(nekData.wx, wx, sizeof(dfloat) * Nlocal);
memcpy(nekData.wy, wy, sizeof(dfloat) * Nlocal);
memcpy(nekData.wz, wz, sizeof(dfloat) * Nlocal);
memcpy(nekData.xm1, mesh->x, sizeof(dfloat) * Nlocal);
memcpy(nekData.ym1, mesh->y, sizeof(dfloat) * Nlocal);
memcpy(nekData.zm1, mesh->z, sizeof(dfloat) * Nlocal);
mesh->o_U.copyTo(nekData.wx, sizeof(dfloat) * Nlocal, 0 * nrs->fieldOffset * sizeof(dfloat));
mesh->o_U.copyTo(nekData.wy, sizeof(dfloat) * Nlocal, 1 * nrs->fieldOffset * sizeof(dfloat));
mesh->o_U.copyTo(nekData.wz, sizeof(dfloat) * Nlocal, 2 * nrs->fieldOffset * sizeof(dfloat));

mesh->o_x.copyTo(nekData.xm1, sizeof(dfloat) * Nlocal);
mesh->o_y.copyTo(nekData.ym1, sizeof(dfloat) * Nlocal);
mesh->o_z.copyTo(nekData.zm1, sizeof(dfloat) * Nlocal);
recomputeGeometry();
}

memcpy(nekData.vx, vx, sizeof(dfloat) * Nlocal);
memcpy(nekData.vy, vy, sizeof(dfloat) * Nlocal);
memcpy(nekData.vz, vz, sizeof(dfloat) * Nlocal);
memcpy(nekData.pr, nrs->P, sizeof(dfloat) * Nlocal);
nrs->o_U.copyTo(nekData.vx, sizeof(dfloat) * Nlocal, 0 * nrs->fieldOffset * sizeof(dfloat));
nrs->o_U.copyTo(nekData.vy, sizeof(dfloat) * Nlocal, 1 * nrs->fieldOffset * sizeof(dfloat));
nrs->o_U.copyTo(nekData.vz, sizeof(dfloat) * Nlocal, 2 * nrs->fieldOffset * sizeof(dfloat));
nrs->o_P.copyTo(nekData.pr, sizeof(dfloat) * Nlocal);

if(nrs->Nscalar) {
if(platform->options.compareArgs("LOWMACH", "TRUE")) memcpy(nekData.qtl, nrs->div, sizeof(dfloat) * Nlocal);
if(platform->options.compareArgs("LOWMACH", "TRUE")) nrs->o_div.copyTo(nekData.qtl, sizeof(dfloat) * Nlocal);
const dlong nekFieldOffset = nekData.lelt * mesh->Np;
for(int is = 0; is < nrs->Nscalar; is++) {
mesh_t* mesh;
(is) ? mesh = nrs->cds->meshV : mesh = nrs->cds->mesh[0];
const dlong Nlocal = mesh->Nelements * mesh->Np;
dfloat* Ti = nekData.t + is * nekFieldOffset;
dfloat* Si = nrs->cds->S + nrs->cds->fieldOffsetScan[is];
memcpy(Ti, Si, Nlocal * sizeof(dfloat));
nrs->cds->o_S.copyTo(Ti, Nlocal * sizeof(dfloat), nrs->cds->fieldOffsetScan[is] * sizeof(dfloat));
}
}
}

void ocopyToNek(void)
{
nrs->o_U.copyTo(nrs->U);
nrs->o_P.copyTo(nrs->P);
if(nrs->Nscalar){
nrs->o_div.copyTo(nrs->div);
nrs->cds->o_S.copyTo(nrs->cds->S);
}
if(platform->options.compareArgs("MOVING MESH", "TRUE")){
mesh_t *mesh = nrs->meshV;
if(nrs->cht) mesh = nrs->cds->mesh[0];
mesh->o_U.copyTo(mesh->U);
mesh->o_x.copyTo(mesh->x);
mesh->o_y.copyTo(mesh->y);
mesh->o_z.copyTo(mesh->z);
}
copyToNek(0.0);
}

void ocopyToNek(dfloat time, int tstep)
{
nrs->o_U.copyTo(nrs->U);
nrs->o_P.copyTo(nrs->P);
if(nrs->Nscalar){
nrs->o_div.copyTo(nrs->div);
nrs->cds->o_S.copyTo(nrs->cds->S);
}
if(platform->options.compareArgs("MOVING MESH", "TRUE")){
mesh_t *mesh = nrs->meshV;
if(nrs->cht) mesh = nrs->cds->mesh[0];
mesh->o_U.copyTo(mesh->U);
mesh->o_x.copyTo(mesh->x);
mesh->o_y.copyTo(mesh->y);
mesh->o_z.copyTo(mesh->z);
}
copyToNek(time, tstep);
}

Expand All @@ -795,19 +761,6 @@ void copyToNek(dfloat time, int tstep)
void ocopyFromNek(dfloat &time)
{
copyFromNek(time);
nrs->o_P.copyFrom(nrs->P);
nrs->o_U.copyFrom(nrs->U);
if(nrs->Nscalar){
nrs->cds->o_S.copyFrom(nrs->cds->S);
}
if(platform->options.compareArgs("MOVING MESH", "TRUE")){
mesh_t *mesh = nrs->meshV;
if(nrs->cht) mesh = nrs->cds->mesh[0];
mesh->o_x.copyFrom(mesh->x);
mesh->o_y.copyFrom(mesh->y);
mesh->o_z.copyFrom(mesh->z);
mesh->o_U.copyFrom(mesh->U);
}
}

void copyFromNek(dfloat &time)
Expand All @@ -824,38 +777,32 @@ void copyFromNek(dfloat &time)
nrs->p0th[0] = *(nekData.p0th);
nrs->dp0thdt = *(nekData.dp0thdt);

dfloat* vx = nrs->U + 0 * nrs->fieldOffset;
dfloat* vy = nrs->U + 1 * nrs->fieldOffset;
dfloat* vz = nrs->U + 2 * nrs->fieldOffset;
nrs->o_U.copyFrom(nekData.vx, sizeof(dfloat) * Nlocal, 0 * nrs->fieldOffset * sizeof(dfloat));
nrs->o_U.copyFrom(nekData.vy, sizeof(dfloat) * Nlocal, 1 * nrs->fieldOffset * sizeof(dfloat));
nrs->o_U.copyFrom(nekData.vz, sizeof(dfloat) * Nlocal, 2 * nrs->fieldOffset * sizeof(dfloat));

memcpy(vx, nekData.vx, sizeof(dfloat) * Nlocal);
memcpy(vy, nekData.vy, sizeof(dfloat) * Nlocal);
memcpy(vz, nekData.vz, sizeof(dfloat) * Nlocal);
if(platform->options.compareArgs("MOVING MESH", "TRUE")){
mesh_t *mesh = nrs->meshV;
if(nrs->cht) mesh = nrs->cds->mesh[0];
const dlong Nlocal = mesh->Nelements * mesh->Np;
dfloat* wx = mesh->U + 0 * nrs->fieldOffset;
dfloat* wy = mesh->U + 1 * nrs->fieldOffset;
dfloat* wz = mesh->U + 2 * nrs->fieldOffset;
memcpy(wx, nekData.wx, sizeof(dfloat) * Nlocal);
memcpy(wy, nekData.wy, sizeof(dfloat) * Nlocal);
memcpy(wz, nekData.wz, sizeof(dfloat) * Nlocal);
memcpy(nekData.xm1, mesh->x, sizeof(dfloat) * Nlocal);
memcpy(nekData.ym1, mesh->y, sizeof(dfloat) * Nlocal);
memcpy(nekData.zm1, mesh->z, sizeof(dfloat) * Nlocal);
mesh->o_U.copyFrom(nekData.wx, sizeof(dfloat) * Nlocal, 0 * nrs->fieldOffset * sizeof(dfloat));
mesh->o_U.copyFrom(nekData.wy, sizeof(dfloat) * Nlocal, 1 * nrs->fieldOffset * sizeof(dfloat));
mesh->o_U.copyFrom(nekData.wz, sizeof(dfloat) * Nlocal, 2 * nrs->fieldOffset * sizeof(dfloat));

mesh->o_x.copyTo(nekData.xm1, sizeof(dfloat) * Nlocal);
mesh->o_y.copyTo(nekData.ym1, sizeof(dfloat) * Nlocal);
mesh->o_z.copyTo(nekData.zm1, sizeof(dfloat) * Nlocal);
recomputeGeometry();
}
memcpy(nrs->P, nekData.pr, sizeof(dfloat) * Nlocal);
nrs->o_P.copyFrom(nekData.pr, sizeof(dfloat) * Nlocal);
if(nrs->Nscalar) {
const dlong nekFieldOffset = nekData.lelt * mesh->Np;
for(int is = 0; is < nrs->Nscalar; is++) {
mesh_t* mesh;
(is) ? mesh = nrs->cds->meshV : mesh = nrs->cds->mesh[0];
const dlong Nlocal = mesh->Nelements * mesh->Np;
dfloat* Ti = nekData.t + is * nekFieldOffset;
dfloat* Si = nrs->cds->S + nrs->cds->fieldOffsetScan[is];
memcpy(Si, Ti, Nlocal * sizeof(dfloat));
nrs->cds->o_S.copyFrom(Ti, Nlocal * sizeof(dfloat), nrs->cds->fieldOffsetScan[is] * sizeof(dfloat));
}
}
}
Expand Down