diff --git a/examples/ethier/ci.inc b/examples/ethier/ci.inc index cf36d8092..d49e1319c 100644 --- a/examples/ethier/ci.inc +++ b/examples/ethier/ci.inc @@ -125,7 +125,7 @@ void ciTestErrors(nrs_t *nrs, dfloat time, int tstep) const int rank = platform->comm.mpiRank; if(tstep == 1){ int NiterP = nrs->pSolver->Niter; - const int expectedNiterP = 8; + const int expectedNiterP = 6; const int pIterErr = abs(NiterP - expectedNiterP); if(pIterErr >= 2) { if(rank==0){ diff --git a/okl/elliptic/chebyshev.okl b/okl/elliptic/chebyshev.okl index d7c6282b8..52c6b9f90 100644 --- a/okl/elliptic/chebyshev.okl +++ b/okl/elliptic/chebyshev.okl @@ -55,4 +55,27 @@ SOFTWARE. x[n] = x[n] + d_value; } } +} +@kernel void updateIntermediateSolutionVec(const dlong N, + const pfloat rhoDivDelta, + const pfloat rho_n, + const pfloat rho_np1, + @restrict const pfloat * SAd, + @restrict pfloat * r, + @restrict pfloat * d, + @restrict pfloat * x){ + + //r_k+1 = r_k - SAd_k + //d_k+1 = rho_k+1*rho_k*d_k + 2*rho_k+1*r_k+1/delta + //x_k+1 = x_k + d_k + for(dlong n=0;nupdateChebyshevSolutionVecKernel(Nrows, rhoDivDelta, rho_np1, rho_n, o_Ad, o_res, o_d, o_x); } +void MGLevel::smoothChebyshevTwoIteration (occa::memory &o_r, occa::memory &o_x, bool xIsZero) +{ + const pfloat theta = 0.5 * (lambda1 + lambda0); + const pfloat delta = 0.5 * (lambda1 - lambda0); + const pfloat invTheta = 1.0 / theta; + const pfloat sigma = theta / delta; + pfloat rho_n = 1. / sigma; + pfloat rho_np1; + + pfloat one = 1., mone = -1., zero = 0.0; + + occa::memory o_res = o_smootherResidual; + occa::memory o_Ad = o_smootherResidual2; + occa::memory o_d = o_smootherUpdate; + + if(xIsZero) { //skip the Ax if x is zero + //res = Sr + this->smoother(o_r, o_res, xIsZero); + + elliptic->updateSmoothedSolutionVecKernel(Nrows, invTheta, o_res, one, o_d, zero, o_x); + } else { + //res = S(r-Ax) + this->Ax(o_x,o_res); + elliptic->scaledAddPfloatKernel(Nrows, one, o_r, mone, o_res); + this->smoother(o_res, o_res, xIsZero); + + elliptic->updateSmoothedSolutionVecKernel(Nrows, invTheta, o_res, one, o_d, one, o_x); + } + + + //r_k+1 = r_k - SAd_k + this->Ax(o_d,o_Ad); + this->smoother(o_Ad, o_Ad, xIsZero); + rho_np1 = 1.0 / (2. * sigma - rho_n); + pfloat rhoDivDelta = 2.0 * rho_np1 / delta; + + elliptic->updateIntermediateSolutionVecKernel(Nrows, rhoDivDelta, rho_n, rho_np1, o_Ad, o_res, o_d, o_x); + + rho_n = rho_np1; + //r_k+1 = r_k - SAd_k + this->Ax(o_d,o_Ad); + this->smoother(o_Ad, o_Ad, xIsZero); + rho_np1 = 1.0 / (2. * sigma - rho_n); + rhoDivDelta = 2.0 * rho_np1 / delta; + + elliptic->updateIntermediateSolutionVecKernel(Nrows, rhoDivDelta, rho_n, rho_np1, o_Ad, o_res, o_d, o_x); + +} void MGLevel::smoothChebyshev (occa::memory &o_r, occa::memory &o_x, bool xIsZero) { if(ChebyshevIterations == 1) { smoothChebyshevOneIteration(o_r,o_x,xIsZero); return; + } else if (ChebyshevIterations == 2) { + smoothChebyshevTwoIteration(o_r,o_x,xIsZero); + return; } const pfloat theta = 0.5 * (lambda1 + lambda0); const pfloat delta = 0.5 * (lambda1 - lambda0); diff --git a/src/elliptic/ellipticSolveSetup.cpp b/src/elliptic/ellipticSolveSetup.cpp index eef68d86e..1e28a5509 100644 --- a/src/elliptic/ellipticSolveSetup.cpp +++ b/src/elliptic/ellipticSolveSetup.cpp @@ -303,6 +303,11 @@ void ellipticSolveSetup(elliptic_t* elliptic, occa::properties kernelInfo) "updateChebyshevSolutionVec", kernelInfo); + elliptic->updateIntermediateSolutionVecKernel = + platform->device.buildKernel(filename, + "updateIntermediateSolutionVec", + kernelInfo); + } // add custom defines