Nek5000 · stgeke · Apr 1, 2021 · Mar 31, 2021
diff --git a/examples/ethier/ci.inc b/examples/ethier/ci.inc
@@ -125,7 +125,7 @@ void ciTestErrors(nrs_t *nrs, dfloat time, int tstep)
   const int rank = platform->comm.mpiRank;
   if(tstep == 1){
     int NiterP = nrs->pSolver->Niter;
-    const int expectedNiterP = 8;
+    const int expectedNiterP = 6;
     const int pIterErr = abs(NiterP - expectedNiterP);
     if(pIterErr >= 2) {
       if(rank==0){

diff --git a/okl/elliptic/chebyshev.okl b/okl/elliptic/chebyshev.okl
@@ -55,4 +55,27 @@ SOFTWARE.
       x[n] = x[n] + d_value;
     }
   }
+}
+@kernel void updateIntermediateSolutionVec(const dlong N,
+                      const pfloat rhoDivDelta,
+                      const pfloat rho_n,
+                      const pfloat rho_np1,
+                      @restrict const pfloat * SAd,
+                      @restrict pfloat *  r,
+                      @restrict pfloat *  d,
+                      @restrict pfloat * x){
+
+  //r_k+1 = r_k - SAd_k
+  //d_k+1 = rho_k+1*rho_k*d_k  + 2*rho_k+1*r_k+1/delta
+  //x_k+1 = x_k + d_k
+  for(dlong n=0;n<N;++n;@tile(p_blockSize,@outer,@inner)){
+    if(n<N){
+      const pfloat rkp1 = r[n] - SAd[n];
+      const pfloat dk = d[n];
+      r[n] = rkp1;
+      const pfloat dkp1 = rho_np1 * rho_n * dk + rhoDivDelta * rkp1;
+      d[n] = dkp1;
+      x[n] = x[n] + dkp1;
+    }
+  }
 }
diff --git a/src/elliptic/elliptic.h b/src/elliptic/elliptic.h
@@ -129,9 +129,14 @@ struct elliptic_t
   occa::kernel copyDfloatToPfloatKernel;
   occa::kernel fusedCopyDfloatToPfloatKernel;
   occa::kernel copyPfloatToDPfloatKernel;
+
+  // special kernels for single Chebyshev iteration
   occa::kernel updateSmoothedSolutionVecKernel;
   occa::kernel updateChebyshevSolutionVecKernel;
 
+  // special kernel for two Chebyshev iterations
+  occa::kernel updateIntermediateSolutionVecKernel;
+
   dfloat resNormFactor;
 
   // combined PCG update step

diff --git a/src/elliptic/ellipticMultiGrid.h b/src/elliptic/ellipticMultiGrid.h
@@ -146,6 +146,7 @@ class MGLevel : public parAlmond::multigridLevel
   void buildCoarsenerQuadHex(mesh_t** meshLevels, int Nf, int Nc);
 private:
   void smoothChebyshevOneIteration (occa::memory &o_r, occa::memory &o_x, bool xIsZero);
+  void smoothChebyshevTwoIteration (occa::memory &o_r, occa::memory &o_x, bool xIsZero);
 };
 
 void MGLevelAllocateStorage(MGLevel* level, int k, parAlmond::CycleType ctype);

diff --git a/src/elliptic/ellipticMultiGridLevel.cpp b/src/elliptic/ellipticMultiGridLevel.cpp
@@ -139,12 +139,63 @@ void MGLevel::smoothChebyshevOneIteration (occa::memory &o_r, occa::memory &o_x,
   pfloat rhoDivDelta = 2.0 * rho_np1 / delta;
   elliptic->updateChebyshevSolutionVecKernel(Nrows, rhoDivDelta, rho_np1, rho_n, o_Ad, o_res, o_d, o_x);
 }
+void MGLevel::smoothChebyshevTwoIteration (occa::memory &o_r, occa::memory &o_x, bool xIsZero)
+{
+  const pfloat theta = 0.5 * (lambda1 + lambda0);
+  const pfloat delta = 0.5 * (lambda1 - lambda0);
+  const pfloat invTheta = 1.0 / theta;
+  const pfloat sigma = theta / delta;
+  pfloat rho_n = 1. / sigma;
+  pfloat rho_np1;
+
+  pfloat one = 1., mone = -1., zero = 0.0;
+
+  occa::memory o_res = o_smootherResidual;
+  occa::memory o_Ad  = o_smootherResidual2;
+  occa::memory o_d   = o_smootherUpdate;
+
+  if(xIsZero) { //skip the Ax if x is zero
+    //res = Sr
+    this->smoother(o_r, o_res, xIsZero);
+
+    elliptic->updateSmoothedSolutionVecKernel(Nrows, invTheta, o_res, one, o_d, zero, o_x);
+  } else {
+    //res = S(r-Ax)
+    this->Ax(o_x,o_res);
+    elliptic->scaledAddPfloatKernel(Nrows, one, o_r, mone, o_res);
+    this->smoother(o_res, o_res, xIsZero);
+
+    elliptic->updateSmoothedSolutionVecKernel(Nrows, invTheta, o_res, one, o_d, one, o_x);
+  }
+
+
+  //r_k+1 = r_k - SAd_k
+  this->Ax(o_d,o_Ad);
+  this->smoother(o_Ad, o_Ad, xIsZero);
+  rho_np1 = 1.0 / (2. * sigma - rho_n);
+  pfloat rhoDivDelta = 2.0 * rho_np1 / delta;
+
+  elliptic->updateIntermediateSolutionVecKernel(Nrows, rhoDivDelta, rho_n, rho_np1, o_Ad, o_res, o_d, o_x);
+
+  rho_n = rho_np1;
+  //r_k+1 = r_k - SAd_k
+  this->Ax(o_d,o_Ad);
+  this->smoother(o_Ad, o_Ad, xIsZero);
+  rho_np1 = 1.0 / (2. * sigma - rho_n);
+  rhoDivDelta = 2.0 * rho_np1 / delta;
+
+  elliptic->updateIntermediateSolutionVecKernel(Nrows, rhoDivDelta, rho_n, rho_np1, o_Ad, o_res, o_d, o_x);
+
+}
 
 void MGLevel::smoothChebyshev (occa::memory &o_r, occa::memory &o_x, bool xIsZero)
 {
   if(ChebyshevIterations == 1) {
     smoothChebyshevOneIteration(o_r,o_x,xIsZero);
     return;
+  } else if (ChebyshevIterations == 2) {
+    smoothChebyshevTwoIteration(o_r,o_x,xIsZero);
+    return;
   }
   const pfloat theta = 0.5 * (lambda1 + lambda0);
   const pfloat delta = 0.5 * (lambda1 - lambda0);

diff --git a/src/elliptic/ellipticSolveSetup.cpp b/src/elliptic/ellipticSolveSetup.cpp
@@ -303,6 +303,11 @@ void ellipticSolveSetup(elliptic_t* elliptic, occa::properties kernelInfo)
                                    "updateChebyshevSolutionVec",
                                    kernelInfo);
 
+        elliptic->updateIntermediateSolutionVecKernel =
+          platform->device.buildKernel(filename,
+                                   "updateIntermediateSolutionVec",
+                                   kernelInfo);
+
   }
 
   // add custom defines