Skip to content

Commit

Permalink
Revert "Enable residual projection for block solver (#189)"
Browse files Browse the repository at this point in the history
This reverts commit 2eb77a5.
  • Loading branch information
stgeke authored Dec 20, 2020
1 parent 2eb77a5 commit f8a5afe
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 116 deletions.
6 changes: 0 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,6 @@ jobs:
- stage: test
name: "ethier subcycle"
script: cd $NEKRS_EXAMPLES/ethier && nrsmpi ethier 2 2
- stage: test
name: "ethier velocity and pressure projection"
script: cd $NEKRS_EXAMPLES/ethier && nrsmpi ethier 2 3
- stage: test
name: "ethier (block) velocity and pressure projection with subcycling"
script: cd $NEKRS_EXAMPLES/ethier && nrsmpi ethier 2 4
- stage: test
name: "lowMach default"
script: cd $NEKRS_EXAMPLES/lowMach && nrsmpi lowMach 2 1
Expand Down
38 changes: 5 additions & 33 deletions examples/ethier/ci.inc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ void ciSetup(MPI_Comm comm, setupAide &options)
options.setArgs("SCALAR00 DENSITY", string("1"));
options.setArgs("SCALAR01 DIFFUSIVITY", string("0.01"));
options.setArgs("SCALAR01 DENSITY", string("1"));
options.setArgs("END TIME", string("0.06"));
options.setArgs("END TIME", string("0.2"));
options.setArgs("DT", string("2e-3"));
options.setArgs("SUBCYCLING STEPS", string("0"));
options.setArgs("PRESSURE RESIDUAL PROJECTION", "FALSE");
Expand All @@ -33,17 +33,6 @@ void ciSetup(MPI_Comm comm, setupAide &options)
options.setArgs("SUBCYCLING STEPS", string("1"));
options.setArgs("PRESSURE RESIDUAL PROJECTION", "TRUE");
}
if (ciMode == 3) {
options.setArgs("PRESSURE RESIDUAL PROJECTION", "TRUE");
options.setArgs("VELOCITY RESIDUAL PROJECTION", "TRUE");
}
if (ciMode == 4) {
options.setArgs("END TIME", string("0.2"));
options.setArgs("VELOCITY BLOCK SOLVER", "TRUE");
options.setArgs("SUBCYCLING STEPS", string("1"));
options.setArgs("PRESSURE RESIDUAL PROJECTION", "TRUE");
options.setArgs("VELOCITY RESIDUAL PROJECTION", "TRUE");
}
options.setArgs("TIME INTEGRATOR", "TOMBO3");
options.setArgs("ADVECTION TYPE", "CONVECTIVE+CUBATURE");
options.setArgs("VELOCITY SOLVER TOLERANCE", string("1e-12"));
Expand All @@ -64,40 +53,23 @@ void ciTestErrors(nrs_t *nrs, dfloat time, int tstep)

double *err = (double *) nek_scPtr(1);

double vxErr, prErr;
const double vxErr = abs((err[0] - 3.65E-10)/err[0]);
const double prErr = abs((err[1] - 6.71E-10)/err[1]);
double s1Err, s2Err;

int pIterErr;
int velIterErr;

switch (ciMode) {
case 1 : velIterErr = abs(nrs->NiterU - 10);
s1Err = abs((err[2] - 5.25E-12)/err[2]);
s2Err = abs((err[3] - 6.09E-12)/err[3]);
s1Err = abs((err[2] - 1.00E-11)/err[2]);
s2Err = abs((err[3] - 1.31E-11)/err[3]);
pIterErr = abs(nrs->NiterP - 4);
vxErr = abs((err[0] - 2.78E-10)/err[0]);
prErr = abs((err[1] - 7.17E-10)/err[1]);
break;
case 2 : velIterErr = abs(nrs->NiterU - 10);
s1Err = abs((err[2] - 6.49E-12)/err[2]);
s2Err = abs((err[3] - 7.28E-12)/err[3]);
pIterErr = abs(nrs->NiterP - 1);
vxErr = abs((err[0] - 2.78E-10)/err[0]);
prErr = abs((err[1] - 8.38E-10)/err[1]);
break;
case 3 : velIterErr = abs(nrs->NiterU - 4);
s1Err = abs((err[2] - 5.25E-12)/err[2]);
s2Err = abs((err[3] - 6.09E-12)/err[3]);
pIterErr = abs(nrs->NiterP - 1);
vxErr = abs((err[0] - 2.78E-10)/err[0]);
prErr = abs((err[1] - 8.34E-10)/err[1]);
break;
case 4 : velIterErr = abs(nrs->NiterU - 1);
s1Err = abs((err[2] - 1.71E-11)/err[2]);
s2Err = abs((err[3] - 2.00E-11)/err[3]);
pIterErr = abs(nrs->NiterP - 1);
vxErr = abs((err[0] - 3.65E-10)/err[0]);
prErr = abs((err[1] - 6.71E-10)/err[1]);
break;
}

Expand Down
37 changes: 10 additions & 27 deletions okl/elliptic/ellipticResidualProjection.okl
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,13 @@
*/

@kernel void scalarMultiply(const dlong N,
const dlong fieldOffset,
const dlong offset,
const dfloat alpha,
@restrict dfloat* x)
{
for(dlong n = 0; n < N; ++n; @tile(p_threadBlockSize,@outer,@inner))
if(n < N){
#pragma unroll p_Nfields
for(dlong field = 0 ; field < p_Nfields; ++field){
x[n + offset + field * fieldOffset] = alpha * x[n + offset + field * fieldOffset];
}
}
if(n < N)
x[n + offset] = alpha * x[n + offset];
}

@kernel void multiScaledAddwOffset(const dlong N,
Expand All @@ -48,11 +43,9 @@
@restrict dfloat* x)
{
for(dlong n = 0; n < N; ++n; @tile(p_threadBlockSize,@outer,@inner))
if(n < N){
if(n < N)
for(dlong k = 0; k < m - 1; ++k)
for(dlong fld = 0; fld < p_Nfields; ++fld)
x[n + destOffset + fld * fieldOffset] = -alphas[k] * x[n + p_Nfields * k * fieldOffset + fld * fieldOffset] + beta * x[n + destOffset + fld * fieldOffset];
}
x[n + destOffset] = -alphas[k] * x[n + k * fieldOffset] + beta * x[n + destOffset];
}

@kernel void accumulate(const dlong N,
Expand All @@ -65,12 +58,10 @@
for(dlong n = 0; n < N; ++n; @tile(p_threadBlockSize,@outer,@inner))
if(n < N) {
// y = alpha[0] * o_xx[:,0]
for(dlong fld = 0 ; fld < p_Nfields; ++fld)
y[n + fld * fieldOffset] = alpha[0] * x[n + fld * fieldOffset];
y[n] = alpha[0] * x[n];
for(dlong k = 1; k < m; ++k)
for(dlong fld = 0 ; fld < p_Nfields; ++fld)
// y += alpha[k] * o_xx[:,k]
y[n + fld * fieldOffset] += alpha[k] * x[n + p_Nfields * k * fieldOffset + fld * fieldOffset];
// y += alpha[k] * o_xx[:,k]
y[n] += alpha[k] * x[n + k * fieldOffset];
}
}

Expand All @@ -87,24 +78,16 @@
for(dlong b = 0; b < (N + p_threadBlockSize - 1) / p_threadBlockSize; ++b; @outer(0)) {
@shared volatile dfloat s_wxy[p_threadBlockSize];
@exclusive dfloat w_loc;
@exclusive dfloat y_loc[p_Nfields];
@exclusive dfloat y_loc;

for(int v = 0; v < m; ++v) {
for(int t = 0; t < p_threadBlockSize; ++t; @inner(0)) {
const dlong id = t + p_threadBlockSize * b;
if(v == 0 && id < N) {
w_loc = w[id];
for(dlong fld = 0; fld < p_Nfields; ++fld){
y_loc[fld] = y[id + offset + fld * fieldOffset];
}
}
dfloat res = 0.f;
if(id < N){
for(dlong fld = 0; fld < p_Nfields; ++fld){
res += w_loc * x[id + p_Nfields * fieldOffset * v + fld * fieldOffset] * y_loc[fld];
}
y_loc = y[id + offset];
}
s_wxy[t] = res;
s_wxy[t] = (id < N) ? w_loc * x[id + fieldOffset * v] * y_loc : 0.f;
}

@barrier("local");
Expand Down
2 changes: 2 additions & 0 deletions src/core/parReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,8 @@ setupAide parRead(std::string &setupFile, MPI_Comm comm)
options.setArgs("VELOCITY BLOCK SOLVER", "FALSE");
if(std::strstr(vsolver.c_str(), "block")) {
options.setArgs("VELOCITY BLOCK SOLVER", "TRUE");
if(options.compareArgs("VELOCITY RESIDUAL PROJECTION","TRUE"))
exit("Residual projection is not enabled for the velocity block solver!", EXIT_FAILURE);
}
}

Expand Down
83 changes: 36 additions & 47 deletions src/elliptic/ellipticResidualProjection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,30 +33,31 @@ void ResidualProjection::matvec(occa::memory& o_Ax,
occa::memory& o_x,
const dlong x_offset)
{
occa::memory o_xtmp = o_x + Nfields * fieldOffset * x_offset * sizeof(dfloat);
occa::memory o_Axtmp = o_Ax + Nfields * fieldOffset * Ax_offset * sizeof(dfloat);
occa::memory o_xtmp = o_x + fieldOffset * x_offset * sizeof(dfloat);
occa::memory o_Axtmp = o_Ax + fieldOffset * Ax_offset * sizeof(dfloat);
matvecOperator(o_xtmp, o_Axtmp);
}

void ResidualProjection::updateProjectionSpace()
{
if(numVecsProjection <= 0) return;
dlong m = numVecsProjection;
if(m <= 0) return;

multiWeightedInnerProduct(o_xx, o_bb, numVecsProjection - 1);
const dfloat norm_orig = alpha[numVecsProjection - 1];
multiWeightedInnerProduct(o_xx, m, o_bb, m - 1);
const dfloat norm_orig = alpha[m - 1];
dfloat norm_new = norm_orig;
const dfloat one = 1.0;
multiScaledAddwOffsetKernel(Nlocal, numVecsProjection, Nfields * (numVecsProjection - 1) * fieldOffset, fieldOffset, o_alpha, one, o_xx);
multiScaledAddwOffsetKernel(Nlocal, numVecsProjection, Nfields * (numVecsProjection - 1) * fieldOffset, fieldOffset, o_alpha, one, o_bb);
for(int k = 0; k < numVecsProjection - 1; ++k)
multiScaledAddwOffsetKernel(Nlocal, m, (m - 1) * fieldOffset, fieldOffset, o_alpha, one, o_xx);
multiScaledAddwOffsetKernel(Nlocal, m, (m - 1) * fieldOffset, fieldOffset, o_alpha, one, o_bb);
for(int k = 0; k < m - 1; ++k)
norm_new = norm_new - alpha[k] * alpha[k];
norm_new = sqrt(norm_new);
dfloat tol = 1e-7;
const dfloat test = norm_new / norm_orig;
if(test > tol) {
const dfloat scale = 1.0 / norm_new;
scalarMultiplyKernel(Nlocal, fieldOffset, Nfields * (numVecsProjection - 1) * fieldOffset, scale, o_xx);
scalarMultiplyKernel(Nlocal, fieldOffset, Nfields * (numVecsProjection - 1) * fieldOffset, scale, o_bb);
scalarMultiplyKernel(Nlocal, (m - 1) * fieldOffset, scale, o_xx);
scalarMultiplyKernel(Nlocal, (m - 1) * fieldOffset, scale, o_bb);
} else {
if(verbose && rank == 0) {
std::cout << "Detected rank deficiency: " << test << ".\n";
Expand All @@ -71,16 +72,13 @@ void ResidualProjection::computePreProjection(occa::memory& o_r)
dfloat one = 1.0;
dfloat zero = 0.0;
dfloat mone = -1.0;
if(numVecsProjection <= 0) return;
multiWeightedInnerProduct(o_xx,o_r,0);
const int m = numVecsProjection;
if(m <= 0) return;
multiWeightedInnerProduct(o_xx,m,o_r,0);

accumulateKernel(Nlocal, numVecsProjection, fieldOffset, o_alpha, o_xx, o_xbar);
accumulateKernel(Nlocal, numVecsProjection, fieldOffset, o_alpha, o_bb, o_rtmp);
if(blockSolver){
scaledAddKernel(Nlocal, fieldOffset, mone, o_rtmp, one, o_r);
} else {
scaledAddKernel(Nlocal, mone, o_rtmp, one, o_r);
}
accumulateKernel(Nlocal, m, fieldOffset, o_alpha, o_xx, o_xbar);
accumulateKernel(Nlocal, m, fieldOffset, o_alpha, o_bb, o_rtmp);
scaledAddKernel(Nlocal, mone, o_rtmp, one, o_r);
}

void ResidualProjection::computePostProjection(occa::memory & o_x)
Expand All @@ -91,33 +89,25 @@ void ResidualProjection::computePostProjection(occa::memory & o_x)
if(numVecsProjection == 0) {
// reset bases
numVecsProjection = 1;
o_xx.copyFrom(o_x, Nfields * fieldOffset * sizeof(dfloat));
o_xx.copyFrom(o_x, Nlocal * sizeof(dfloat));
} else if(numVecsProjection == maxNumVecsProjection) {
numVecsProjection = 1;
if(blockSolver){
scaledAddKernel(Nlocal, fieldOffset, one, o_xbar, one, o_x);
} else {
scaledAddKernel(Nlocal, one, o_xbar, one, o_x);
}
o_xx.copyFrom(o_x, Nfields * fieldOffset * sizeof(dfloat));
scaledAddKernel(Nlocal, one, o_xbar, one, o_x);
o_xx.copyFrom(o_x, Nlocal * sizeof(dfloat));
} else {
numVecsProjection++;
// xx[m-1] = x
o_xx.copyFrom(o_x, Nfields * fieldOffset * sizeof(dfloat), Nfields * (numVecsProjection - 1) * fieldOffset * sizeof(dfloat), 0);
o_xx.copyFrom(o_x, Nlocal * sizeof(dfloat), (numVecsProjection - 1) * fieldOffset * sizeof(dfloat), 0);
// x = x + xbar
if(blockSolver){
scaledAddKernel(Nlocal, fieldOffset, one, o_xbar, one, o_x);
} else {
scaledAddKernel(Nlocal, one, o_xbar, one, o_x);
}
scaledAddKernel(Nlocal, one, o_xbar, one, o_x);
}
const dlong previousNumVecsProjection = numVecsProjection;
const dlong m_save = numVecsProjection;
matvec(o_bb,numVecsProjection - 1,o_xx,numVecsProjection - 1);

updateProjectionSpace();
if (numVecsProjection < previousNumVecsProjection) { // Last vector was linearly dependent, reset space
if (numVecsProjection < m_save) { // Last vector was linearly dependent, reset space
numVecsProjection = 1;
o_xx.copyFrom(o_x, Nfields * fieldOffset * sizeof(dfloat)); // writes first n words of o_xx, first approximation vector
o_xx.copyFrom(o_x, Nlocal * sizeof(dfloat)); // writes first n words of o_xx, first approximation vector
matvec(o_bb,0,o_xx,0);
updateProjectionSpace();
}
Expand All @@ -131,14 +121,12 @@ ResidualProjection::ResidualProjection(elliptic_t& elliptic,
numTimeSteps(_numTimeSteps),
Nlocal(elliptic.mesh->Np * elliptic.mesh->Nelements),
fieldOffset(elliptic.Ntotal),
Nfields(elliptic.Nfields),
Nblock(elliptic.Nblock),
Nblock2(elliptic.Nblock2),
resNormFactor(elliptic.resNormFactor),
rank(elliptic.mesh->rank),
size(elliptic.mesh->size),
comm(elliptic.mesh->comm),
blockSolver(elliptic.blockSolver),
o_tmp(elliptic.o_tmp),
o_tmp2(elliptic.o_tmp2),
o_wrk(elliptic.o_wrk),
Expand All @@ -155,9 +143,9 @@ ResidualProjection::ResidualProjection(elliptic_t& elliptic,
work = (dfloat*) calloc(maxNumVecsProjection, sizeof(dfloat));
multiwork = (dfloat*) calloc(Nblock * maxNumVecsProjection, sizeof(dfloat));
o_alpha = elliptic.mesh->device.malloc(maxNumVecsProjection * sizeof(dfloat));
o_xbar = elliptic.mesh->device.malloc(Nfields * fieldOffset * sizeof(dfloat));
o_xx = elliptic.mesh->device.malloc(Nfields * fieldOffset * maxNumVecsProjection * sizeof(dfloat));
o_bb = elliptic.mesh->device.malloc(Nfields * fieldOffset * maxNumVecsProjection * sizeof(dfloat));
o_xbar = elliptic.mesh->device.malloc(Nlocal * sizeof(dfloat));
o_xx = elliptic.mesh->device.malloc(fieldOffset * maxNumVecsProjection * sizeof(dfloat));
o_bb = elliptic.mesh->device.malloc(fieldOffset * maxNumVecsProjection * sizeof(dfloat));

string install_dir;
install_dir.assign(getenv("NEKRS_INSTALL_DIR"));
Expand All @@ -172,7 +160,6 @@ ResidualProjection::ResidualProjection(elliptic_t& elliptic,
properties["defines/p_blockSize"] = BLOCKSIZE;
properties["defines/dfloat"] = dfloatString;
properties["defines/dlong"] = dlongString;
properties["defines/p_Nfields"] = Nfields;

filename = oklpath + "ellipticResidualProjection.okl";
scalarMultiplyKernel = elliptic.mesh->device.buildKernel(filename.c_str(),
Expand Down Expand Up @@ -206,7 +193,8 @@ void ResidualProjection::pre(occa::memory& o_r)
if(timestep < numTimeSteps)
return;

if(numVecsProjection <= 0) return;
const int m = numVecsProjection;
if(m <= 0) return;
dfloat priorResidualNorm = 0.0;
if(verbose) priorResidualNorm =
sqrt(weightedNorm(o_r) * resNormFactor);
Expand Down Expand Up @@ -234,23 +222,24 @@ void ResidualProjection::post(occa::memory& o_x)

void ResidualProjection::multiWeightedInnerProduct(
occa::memory &o_a,
const dlong m,
occa::memory &o_b,
const dlong offset)
{
#ifdef ELLIPTIC_ENABLE_TIMER
timer::tic("dotp",1);
#endif
multiWeightedInnerProduct2Kernel(Nlocal, fieldOffset, Nblock, numVecsProjection, Nfields * offset * fieldOffset, o_invDegree, o_a, o_b, o_wrk);
multiWeightedInnerProduct2Kernel(Nlocal, fieldOffset, Nblock, m, offset * fieldOffset, o_invDegree, o_a, o_b, o_wrk);

o_wrk.copyTo(multiwork, sizeof(dfloat) * numVecsProjection * Nblock);
for(dlong k = 0; k < numVecsProjection; ++k) {
o_wrk.copyTo(multiwork, sizeof(dfloat) * m * Nblock);
for(dlong k = 0; k < m; ++k) {
dfloat accum = 0.0;
for(dlong n = 0; n < Nblock; ++n)
accum += multiwork[n + k * Nblock];
alpha[k] = accum;
}
MPI_Allreduce(MPI_IN_PLACE, alpha, numVecsProjection, MPI_DFLOAT, MPI_SUM, comm);
o_alpha.copyFrom(alpha,sizeof(dfloat) * numVecsProjection);
MPI_Allreduce(MPI_IN_PLACE, alpha, m, MPI_DFLOAT, MPI_SUM, comm);
o_alpha.copyFrom(alpha,sizeof(dfloat) * m);
#ifdef ELLIPTIC_ENABLE_TIMER
timer::toc("dotp");
#endif
Expand Down
3 changes: 1 addition & 2 deletions src/elliptic/ellipticResidualProjection.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class ResidualProjection final
void matvec(occa::memory& o_Ax, const dlong Ax_offset, occa::memory& o_x, const dlong x_offset);
void multiWeightedInnerProduct(
occa::memory& o_a,
const dlong m,
occa::memory& o_b,
const dlong offset);
const dlong maxNumVecsProjection;
Expand Down Expand Up @@ -80,13 +81,11 @@ class ResidualProjection final
dlong numVecsProjection;
const dlong Nlocal; // vector size
const dlong fieldOffset; // offset
const dlong Nfields;
const dlong Nblock;
const dlong Nblock2;
const dfloat resNormFactor;
const int rank;
const int size;
const int blockSolver;
MPI_Comm comm;

std::function<void(occa::memory&,occa::memory&)> matvecOperator;
Expand Down
2 changes: 1 addition & 1 deletion src/elliptic/ellipticSolve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ int ellipticSolve(elliptic_t* elliptic,
ellipticUpdateJacobi(elliptic);

if(options.compareArgs("RESIDUAL PROJECTION","TRUE"))
elliptic->o_x0.copyFrom(o_x, elliptic->Nfields * elliptic->Ntotal * sizeof(dfloat));
elliptic->o_x0.copyFrom(o_x, elliptic->Ntotal * sizeof(dfloat));

// compute initial residual
ellipticOperator(elliptic, o_x, elliptic->o_Ap, dfloatString);
Expand Down

0 comments on commit f8a5afe

Please sign in to comment.