From f8a5afe1f252665e5ca9ee99d343d57fe58e6aa0 Mon Sep 17 00:00:00 2001 From: stgeke Date: Sun, 20 Dec 2020 11:36:19 +0100 Subject: [PATCH] Revert "Enable residual projection for block solver (#189)" This reverts commit 2eb77a5e17ada55de827a8833c658cb91f25219a. --- .travis.yml | 6 -- examples/ethier/ci.inc | 38 ++-------- okl/elliptic/ellipticResidualProjection.okl | 37 +++------ src/core/parReader.cpp | 2 + src/elliptic/ellipticResidualProjection.cpp | 83 +++++++++------------ src/elliptic/ellipticResidualProjection.h | 3 +- src/elliptic/ellipticSolve.cpp | 2 +- 7 files changed, 55 insertions(+), 116 deletions(-) diff --git a/.travis.yml b/.travis.yml index 862706fc8..e6ba2eb7b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,12 +37,6 @@ jobs: - stage: test name: "ethier subcycle" script: cd $NEKRS_EXAMPLES/ethier && nrsmpi ethier 2 2 - - stage: test - name: "ethier velocity and pressure projection" - script: cd $NEKRS_EXAMPLES/ethier && nrsmpi ethier 2 3 - - stage: test - name: "ethier (block) velocity and pressure projection with subcycling" - script: cd $NEKRS_EXAMPLES/ethier && nrsmpi ethier 2 4 - stage: test name: "lowMach default" script: cd $NEKRS_EXAMPLES/lowMach && nrsmpi lowMach 2 1 diff --git a/examples/ethier/ci.inc b/examples/ethier/ci.inc index c89a4861b..9ec77e509 100644 --- a/examples/ethier/ci.inc +++ b/examples/ethier/ci.inc @@ -19,7 +19,7 @@ void ciSetup(MPI_Comm comm, setupAide &options) options.setArgs("SCALAR00 DENSITY", string("1")); options.setArgs("SCALAR01 DIFFUSIVITY", string("0.01")); options.setArgs("SCALAR01 DENSITY", string("1")); - options.setArgs("END TIME", string("0.06")); + options.setArgs("END TIME", string("0.2")); options.setArgs("DT", string("2e-3")); options.setArgs("SUBCYCLING STEPS", string("0")); options.setArgs("PRESSURE RESIDUAL PROJECTION", "FALSE"); @@ -33,17 +33,6 @@ void ciSetup(MPI_Comm comm, setupAide &options) options.setArgs("SUBCYCLING STEPS", string("1")); options.setArgs("PRESSURE RESIDUAL PROJECTION", "TRUE"); } - if (ciMode == 3) { - options.setArgs("PRESSURE RESIDUAL PROJECTION", "TRUE"); - options.setArgs("VELOCITY RESIDUAL PROJECTION", "TRUE"); - } - if (ciMode == 4) { - options.setArgs("END TIME", string("0.2")); - options.setArgs("VELOCITY BLOCK SOLVER", "TRUE"); - options.setArgs("SUBCYCLING STEPS", string("1")); - options.setArgs("PRESSURE RESIDUAL PROJECTION", "TRUE"); - options.setArgs("VELOCITY RESIDUAL PROJECTION", "TRUE"); - } options.setArgs("TIME INTEGRATOR", "TOMBO3"); options.setArgs("ADVECTION TYPE", "CONVECTIVE+CUBATURE"); options.setArgs("VELOCITY SOLVER TOLERANCE", string("1e-12")); @@ -64,7 +53,8 @@ void ciTestErrors(nrs_t *nrs, dfloat time, int tstep) double *err = (double *) nek_scPtr(1); - double vxErr, prErr; + const double vxErr = abs((err[0] - 3.65E-10)/err[0]); + const double prErr = abs((err[1] - 6.71E-10)/err[1]); double s1Err, s2Err; int pIterErr; @@ -72,32 +62,14 @@ void ciTestErrors(nrs_t *nrs, dfloat time, int tstep) switch (ciMode) { case 1 : velIterErr = abs(nrs->NiterU - 10); - s1Err = abs((err[2] - 5.25E-12)/err[2]); - s2Err = abs((err[3] - 6.09E-12)/err[3]); + s1Err = abs((err[2] - 1.00E-11)/err[2]); + s2Err = abs((err[3] - 1.31E-11)/err[3]); pIterErr = abs(nrs->NiterP - 4); - vxErr = abs((err[0] - 2.78E-10)/err[0]); - prErr = abs((err[1] - 7.17E-10)/err[1]); break; case 2 : velIterErr = abs(nrs->NiterU - 10); - s1Err = abs((err[2] - 6.49E-12)/err[2]); - s2Err = abs((err[3] - 7.28E-12)/err[3]); - pIterErr = abs(nrs->NiterP - 1); - vxErr = abs((err[0] - 2.78E-10)/err[0]); - prErr = abs((err[1] - 8.38E-10)/err[1]); - break; - case 3 : velIterErr = abs(nrs->NiterU - 4); - s1Err = abs((err[2] - 5.25E-12)/err[2]); - s2Err = abs((err[3] - 6.09E-12)/err[3]); - pIterErr = abs(nrs->NiterP - 1); - vxErr = abs((err[0] - 2.78E-10)/err[0]); - prErr = abs((err[1] - 8.34E-10)/err[1]); - break; - case 4 : velIterErr = abs(nrs->NiterU - 1); s1Err = abs((err[2] - 1.71E-11)/err[2]); s2Err = abs((err[3] - 2.00E-11)/err[3]); pIterErr = abs(nrs->NiterP - 1); - vxErr = abs((err[0] - 3.65E-10)/err[0]); - prErr = abs((err[1] - 6.71E-10)/err[1]); break; } diff --git a/okl/elliptic/ellipticResidualProjection.okl b/okl/elliptic/ellipticResidualProjection.okl index 99545e9f6..b54e39962 100644 --- a/okl/elliptic/ellipticResidualProjection.okl +++ b/okl/elliptic/ellipticResidualProjection.okl @@ -25,18 +25,13 @@ */ @kernel void scalarMultiply(const dlong N, - const dlong fieldOffset, const dlong offset, const dfloat alpha, @restrict dfloat* x) { for(dlong n = 0; n < N; ++n; @tile(p_threadBlockSize,@outer,@inner)) - if(n < N){ - #pragma unroll p_Nfields - for(dlong field = 0 ; field < p_Nfields; ++field){ - x[n + offset + field * fieldOffset] = alpha * x[n + offset + field * fieldOffset]; - } - } + if(n < N) + x[n + offset] = alpha * x[n + offset]; } @kernel void multiScaledAddwOffset(const dlong N, @@ -48,11 +43,9 @@ @restrict dfloat* x) { for(dlong n = 0; n < N; ++n; @tile(p_threadBlockSize,@outer,@inner)) - if(n < N){ + if(n < N) for(dlong k = 0; k < m - 1; ++k) - for(dlong fld = 0; fld < p_Nfields; ++fld) - x[n + destOffset + fld * fieldOffset] = -alphas[k] * x[n + p_Nfields * k * fieldOffset + fld * fieldOffset] + beta * x[n + destOffset + fld * fieldOffset]; - } + x[n + destOffset] = -alphas[k] * x[n + k * fieldOffset] + beta * x[n + destOffset]; } @kernel void accumulate(const dlong N, @@ -65,12 +58,10 @@ for(dlong n = 0; n < N; ++n; @tile(p_threadBlockSize,@outer,@inner)) if(n < N) { // y = alpha[0] * o_xx[:,0] - for(dlong fld = 0 ; fld < p_Nfields; ++fld) - y[n + fld * fieldOffset] = alpha[0] * x[n + fld * fieldOffset]; + y[n] = alpha[0] * x[n]; for(dlong k = 1; k < m; ++k) - for(dlong fld = 0 ; fld < p_Nfields; ++fld) - // y += alpha[k] * o_xx[:,k] - y[n + fld * fieldOffset] += alpha[k] * x[n + p_Nfields * k * fieldOffset + fld * fieldOffset]; + // y += alpha[k] * o_xx[:,k] + y[n] += alpha[k] * x[n + k * fieldOffset]; } } @@ -87,24 +78,16 @@ for(dlong b = 0; b < (N + p_threadBlockSize - 1) / p_threadBlockSize; ++b; @outer(0)) { @shared volatile dfloat s_wxy[p_threadBlockSize]; @exclusive dfloat w_loc; - @exclusive dfloat y_loc[p_Nfields]; + @exclusive dfloat y_loc; for(int v = 0; v < m; ++v) { for(int t = 0; t < p_threadBlockSize; ++t; @inner(0)) { const dlong id = t + p_threadBlockSize * b; if(v == 0 && id < N) { w_loc = w[id]; - for(dlong fld = 0; fld < p_Nfields; ++fld){ - y_loc[fld] = y[id + offset + fld * fieldOffset]; - } - } - dfloat res = 0.f; - if(id < N){ - for(dlong fld = 0; fld < p_Nfields; ++fld){ - res += w_loc * x[id + p_Nfields * fieldOffset * v + fld * fieldOffset] * y_loc[fld]; - } + y_loc = y[id + offset]; } - s_wxy[t] = res; + s_wxy[t] = (id < N) ? w_loc * x[id + fieldOffset * v] * y_loc : 0.f; } @barrier("local"); diff --git a/src/core/parReader.cpp b/src/core/parReader.cpp index 7539d381d..dad1ac845 100644 --- a/src/core/parReader.cpp +++ b/src/core/parReader.cpp @@ -503,6 +503,8 @@ setupAide parRead(std::string &setupFile, MPI_Comm comm) options.setArgs("VELOCITY BLOCK SOLVER", "FALSE"); if(std::strstr(vsolver.c_str(), "block")) { options.setArgs("VELOCITY BLOCK SOLVER", "TRUE"); + if(options.compareArgs("VELOCITY RESIDUAL PROJECTION","TRUE")) + exit("Residual projection is not enabled for the velocity block solver!", EXIT_FAILURE); } } diff --git a/src/elliptic/ellipticResidualProjection.cpp b/src/elliptic/ellipticResidualProjection.cpp index 641eab748..97dbdafd8 100644 --- a/src/elliptic/ellipticResidualProjection.cpp +++ b/src/elliptic/ellipticResidualProjection.cpp @@ -33,30 +33,31 @@ void ResidualProjection::matvec(occa::memory& o_Ax, occa::memory& o_x, const dlong x_offset) { - occa::memory o_xtmp = o_x + Nfields * fieldOffset * x_offset * sizeof(dfloat); - occa::memory o_Axtmp = o_Ax + Nfields * fieldOffset * Ax_offset * sizeof(dfloat); + occa::memory o_xtmp = o_x + fieldOffset * x_offset * sizeof(dfloat); + occa::memory o_Axtmp = o_Ax + fieldOffset * Ax_offset * sizeof(dfloat); matvecOperator(o_xtmp, o_Axtmp); } void ResidualProjection::updateProjectionSpace() { - if(numVecsProjection <= 0) return; + dlong m = numVecsProjection; + if(m <= 0) return; - multiWeightedInnerProduct(o_xx, o_bb, numVecsProjection - 1); - const dfloat norm_orig = alpha[numVecsProjection - 1]; + multiWeightedInnerProduct(o_xx, m, o_bb, m - 1); + const dfloat norm_orig = alpha[m - 1]; dfloat norm_new = norm_orig; const dfloat one = 1.0; - multiScaledAddwOffsetKernel(Nlocal, numVecsProjection, Nfields * (numVecsProjection - 1) * fieldOffset, fieldOffset, o_alpha, one, o_xx); - multiScaledAddwOffsetKernel(Nlocal, numVecsProjection, Nfields * (numVecsProjection - 1) * fieldOffset, fieldOffset, o_alpha, one, o_bb); - for(int k = 0; k < numVecsProjection - 1; ++k) + multiScaledAddwOffsetKernel(Nlocal, m, (m - 1) * fieldOffset, fieldOffset, o_alpha, one, o_xx); + multiScaledAddwOffsetKernel(Nlocal, m, (m - 1) * fieldOffset, fieldOffset, o_alpha, one, o_bb); + for(int k = 0; k < m - 1; ++k) norm_new = norm_new - alpha[k] * alpha[k]; norm_new = sqrt(norm_new); dfloat tol = 1e-7; const dfloat test = norm_new / norm_orig; if(test > tol) { const dfloat scale = 1.0 / norm_new; - scalarMultiplyKernel(Nlocal, fieldOffset, Nfields * (numVecsProjection - 1) * fieldOffset, scale, o_xx); - scalarMultiplyKernel(Nlocal, fieldOffset, Nfields * (numVecsProjection - 1) * fieldOffset, scale, o_bb); + scalarMultiplyKernel(Nlocal, (m - 1) * fieldOffset, scale, o_xx); + scalarMultiplyKernel(Nlocal, (m - 1) * fieldOffset, scale, o_bb); } else { if(verbose && rank == 0) { std::cout << "Detected rank deficiency: " << test << ".\n"; @@ -71,16 +72,13 @@ void ResidualProjection::computePreProjection(occa::memory& o_r) dfloat one = 1.0; dfloat zero = 0.0; dfloat mone = -1.0; - if(numVecsProjection <= 0) return; - multiWeightedInnerProduct(o_xx,o_r,0); + const int m = numVecsProjection; + if(m <= 0) return; + multiWeightedInnerProduct(o_xx,m,o_r,0); - accumulateKernel(Nlocal, numVecsProjection, fieldOffset, o_alpha, o_xx, o_xbar); - accumulateKernel(Nlocal, numVecsProjection, fieldOffset, o_alpha, o_bb, o_rtmp); - if(blockSolver){ - scaledAddKernel(Nlocal, fieldOffset, mone, o_rtmp, one, o_r); - } else { - scaledAddKernel(Nlocal, mone, o_rtmp, one, o_r); - } + accumulateKernel(Nlocal, m, fieldOffset, o_alpha, o_xx, o_xbar); + accumulateKernel(Nlocal, m, fieldOffset, o_alpha, o_bb, o_rtmp); + scaledAddKernel(Nlocal, mone, o_rtmp, one, o_r); } void ResidualProjection::computePostProjection(occa::memory & o_x) @@ -91,33 +89,25 @@ void ResidualProjection::computePostProjection(occa::memory & o_x) if(numVecsProjection == 0) { // reset bases numVecsProjection = 1; - o_xx.copyFrom(o_x, Nfields * fieldOffset * sizeof(dfloat)); + o_xx.copyFrom(o_x, Nlocal * sizeof(dfloat)); } else if(numVecsProjection == maxNumVecsProjection) { numVecsProjection = 1; - if(blockSolver){ - scaledAddKernel(Nlocal, fieldOffset, one, o_xbar, one, o_x); - } else { - scaledAddKernel(Nlocal, one, o_xbar, one, o_x); - } - o_xx.copyFrom(o_x, Nfields * fieldOffset * sizeof(dfloat)); + scaledAddKernel(Nlocal, one, o_xbar, one, o_x); + o_xx.copyFrom(o_x, Nlocal * sizeof(dfloat)); } else { numVecsProjection++; // xx[m-1] = x - o_xx.copyFrom(o_x, Nfields * fieldOffset * sizeof(dfloat), Nfields * (numVecsProjection - 1) * fieldOffset * sizeof(dfloat), 0); + o_xx.copyFrom(o_x, Nlocal * sizeof(dfloat), (numVecsProjection - 1) * fieldOffset * sizeof(dfloat), 0); // x = x + xbar - if(blockSolver){ - scaledAddKernel(Nlocal, fieldOffset, one, o_xbar, one, o_x); - } else { - scaledAddKernel(Nlocal, one, o_xbar, one, o_x); - } + scaledAddKernel(Nlocal, one, o_xbar, one, o_x); } - const dlong previousNumVecsProjection = numVecsProjection; + const dlong m_save = numVecsProjection; matvec(o_bb,numVecsProjection - 1,o_xx,numVecsProjection - 1); updateProjectionSpace(); - if (numVecsProjection < previousNumVecsProjection) { // Last vector was linearly dependent, reset space + if (numVecsProjection < m_save) { // Last vector was linearly dependent, reset space numVecsProjection = 1; - o_xx.copyFrom(o_x, Nfields * fieldOffset * sizeof(dfloat)); // writes first n words of o_xx, first approximation vector + o_xx.copyFrom(o_x, Nlocal * sizeof(dfloat)); // writes first n words of o_xx, first approximation vector matvec(o_bb,0,o_xx,0); updateProjectionSpace(); } @@ -131,14 +121,12 @@ ResidualProjection::ResidualProjection(elliptic_t& elliptic, numTimeSteps(_numTimeSteps), Nlocal(elliptic.mesh->Np * elliptic.mesh->Nelements), fieldOffset(elliptic.Ntotal), - Nfields(elliptic.Nfields), Nblock(elliptic.Nblock), Nblock2(elliptic.Nblock2), resNormFactor(elliptic.resNormFactor), rank(elliptic.mesh->rank), size(elliptic.mesh->size), comm(elliptic.mesh->comm), - blockSolver(elliptic.blockSolver), o_tmp(elliptic.o_tmp), o_tmp2(elliptic.o_tmp2), o_wrk(elliptic.o_wrk), @@ -155,9 +143,9 @@ ResidualProjection::ResidualProjection(elliptic_t& elliptic, work = (dfloat*) calloc(maxNumVecsProjection, sizeof(dfloat)); multiwork = (dfloat*) calloc(Nblock * maxNumVecsProjection, sizeof(dfloat)); o_alpha = elliptic.mesh->device.malloc(maxNumVecsProjection * sizeof(dfloat)); - o_xbar = elliptic.mesh->device.malloc(Nfields * fieldOffset * sizeof(dfloat)); - o_xx = elliptic.mesh->device.malloc(Nfields * fieldOffset * maxNumVecsProjection * sizeof(dfloat)); - o_bb = elliptic.mesh->device.malloc(Nfields * fieldOffset * maxNumVecsProjection * sizeof(dfloat)); + o_xbar = elliptic.mesh->device.malloc(Nlocal * sizeof(dfloat)); + o_xx = elliptic.mesh->device.malloc(fieldOffset * maxNumVecsProjection * sizeof(dfloat)); + o_bb = elliptic.mesh->device.malloc(fieldOffset * maxNumVecsProjection * sizeof(dfloat)); string install_dir; install_dir.assign(getenv("NEKRS_INSTALL_DIR")); @@ -172,7 +160,6 @@ ResidualProjection::ResidualProjection(elliptic_t& elliptic, properties["defines/p_blockSize"] = BLOCKSIZE; properties["defines/dfloat"] = dfloatString; properties["defines/dlong"] = dlongString; - properties["defines/p_Nfields"] = Nfields; filename = oklpath + "ellipticResidualProjection.okl"; scalarMultiplyKernel = elliptic.mesh->device.buildKernel(filename.c_str(), @@ -206,7 +193,8 @@ void ResidualProjection::pre(occa::memory& o_r) if(timestep < numTimeSteps) return; - if(numVecsProjection <= 0) return; + const int m = numVecsProjection; + if(m <= 0) return; dfloat priorResidualNorm = 0.0; if(verbose) priorResidualNorm = sqrt(weightedNorm(o_r) * resNormFactor); @@ -234,23 +222,24 @@ void ResidualProjection::post(occa::memory& o_x) void ResidualProjection::multiWeightedInnerProduct( occa::memory &o_a, + const dlong m, occa::memory &o_b, const dlong offset) { #ifdef ELLIPTIC_ENABLE_TIMER timer::tic("dotp",1); #endif - multiWeightedInnerProduct2Kernel(Nlocal, fieldOffset, Nblock, numVecsProjection, Nfields * offset * fieldOffset, o_invDegree, o_a, o_b, o_wrk); + multiWeightedInnerProduct2Kernel(Nlocal, fieldOffset, Nblock, m, offset * fieldOffset, o_invDegree, o_a, o_b, o_wrk); - o_wrk.copyTo(multiwork, sizeof(dfloat) * numVecsProjection * Nblock); - for(dlong k = 0; k < numVecsProjection; ++k) { + o_wrk.copyTo(multiwork, sizeof(dfloat) * m * Nblock); + for(dlong k = 0; k < m; ++k) { dfloat accum = 0.0; for(dlong n = 0; n < Nblock; ++n) accum += multiwork[n + k * Nblock]; alpha[k] = accum; } - MPI_Allreduce(MPI_IN_PLACE, alpha, numVecsProjection, MPI_DFLOAT, MPI_SUM, comm); - o_alpha.copyFrom(alpha,sizeof(dfloat) * numVecsProjection); + MPI_Allreduce(MPI_IN_PLACE, alpha, m, MPI_DFLOAT, MPI_SUM, comm); + o_alpha.copyFrom(alpha,sizeof(dfloat) * m); #ifdef ELLIPTIC_ENABLE_TIMER timer::toc("dotp"); #endif diff --git a/src/elliptic/ellipticResidualProjection.h b/src/elliptic/ellipticResidualProjection.h index 2e414d848..625419e94 100644 --- a/src/elliptic/ellipticResidualProjection.h +++ b/src/elliptic/ellipticResidualProjection.h @@ -46,6 +46,7 @@ class ResidualProjection final void matvec(occa::memory& o_Ax, const dlong Ax_offset, occa::memory& o_x, const dlong x_offset); void multiWeightedInnerProduct( occa::memory& o_a, + const dlong m, occa::memory& o_b, const dlong offset); const dlong maxNumVecsProjection; @@ -80,13 +81,11 @@ class ResidualProjection final dlong numVecsProjection; const dlong Nlocal; // vector size const dlong fieldOffset; // offset - const dlong Nfields; const dlong Nblock; const dlong Nblock2; const dfloat resNormFactor; const int rank; const int size; - const int blockSolver; MPI_Comm comm; std::function matvecOperator; diff --git a/src/elliptic/ellipticSolve.cpp b/src/elliptic/ellipticSolve.cpp index 9dac1beac..1cb888870 100644 --- a/src/elliptic/ellipticSolve.cpp +++ b/src/elliptic/ellipticSolve.cpp @@ -44,7 +44,7 @@ int ellipticSolve(elliptic_t* elliptic, ellipticUpdateJacobi(elliptic); if(options.compareArgs("RESIDUAL PROJECTION","TRUE")) - elliptic->o_x0.copyFrom(o_x, elliptic->Nfields * elliptic->Ntotal * sizeof(dfloat)); + elliptic->o_x0.copyFrom(o_x, elliptic->Ntotal * sizeof(dfloat)); // compute initial residual ellipticOperator(elliptic, o_x, elliptic->o_Ap, dfloatString);