From 2e9a1594ace1e62e227d763ecc6d597308d05c9a Mon Sep 17 00:00:00 2001
From: "Aaron M. Lattanzi" <103702284+AMLattanzi@users.noreply.github.com>
Date: Mon, 29 Jul 2024 10:42:31 -0700
Subject: [PATCH] This needs further testing on BOMEX. (#1707)

---
 Exec/DevTests/Bomex/input_Kessler            | 30 ++++----
 Source/Advection/Advection.H                 |  7 +-
 Source/Advection/AdvectionSrcForState.cpp    | 76 ++++++++++++++++++--
 Source/DataStructs/DataStruct.H              |  6 ++
 Source/TimeIntegration/ERF_slow_rhs_post.cpp | 42 +++++++++--
 Source/TimeIntegration/ERF_slow_rhs_pre.cpp  | 39 ++++++++--
 6 files changed, 168 insertions(+), 32 deletions(-)
diff --git a/Exec/DevTests/Bomex/input_Kessler b/Exec/DevTests/Bomex/input_Kessler
index cc018cbdf..a2861a7e0 100644
--- a/Exec/DevTests/Bomex/input_Kessler
+++ b/Exec/DevTests/Bomex/input_Kessler
@@ -1,9 +1,8 @@
 # ------------------  INPUTS TO MAIN PROGRAM  -------------------
-#stop_time = 21600   #  6 hours
 #stop_time = 3600    #  1 hour
+#stop_time = 4800    #  1.5 hours
 #stop_time = 7200    #  2 hours
-
-stop_time = 4800    #  80 minutes
+stop_time = 21600   #  6 hours
 
 amrex.fpe_trap_invalid = 1
 
@@ -29,26 +28,27 @@ zhi.type = "SlipWall"
 #zhi.theta_grad = 0.00365
     
 # TIME STEP CONTROL
-erf.no_substepping = 1
-erf.fixed_dt       = 0.075 # fixed time step depending on grid resolution
-
+erf.fixed_dt           = 0.3 # fixed time step depending on grid resolution
+erf.fixed_mri_dt_ratio = 4
+erf.use_mono_adv       = true
+    
 # DIAGNOSTICS & VERBOSITY
 erf.sum_interval   = 1       # timesteps between computing mass
-erf.v              = 0       # verbosity in ERF.cpp
+erf.v              = 1       # verbosity in ERF.cpp
 amr.v              = 1       # verbosity in Amr.cpp
-erf.data_log       = "surf" "mean" "flux" # "subgrid"
-erf.profile_int    = 800     # (every minute with dt = 0.075)
+erf.data_log       = "surf" "mean" "flux" "subgrid"
+erf.profile_int    = 200     # (every minute with dt = 0.075)
 
 # REFINEMENT / REGRIDDING
 amr.max_level       = 0       # maximum level number allowed
 
 # CHECKPOINT FILES
 erf.check_file      = chk     # root name of checkpoint file
-erf.check_int       = 8000    # number of timesteps between checkpoints
+erf.check_int       = 2000    # number of timesteps between checkpoints
 
 # PLOTFILES
 erf.plot_file_1     = plt     # prefix of plotfile name
-erf.plot_int_1      = 8000    # number of timesteps between plotfiles
+erf.plot_int_1      = 2000    # number of timesteps between plotfiles
 erf.plot_vars_1     = density rhotheta x_velocity y_velocity z_velocity pressure temp theta qt qp qv qc qsat
 
 # SOLVER CHOICE
@@ -58,10 +58,10 @@ erf.use_gravity = true
 
 erf.dycore_horiz_adv_type    = Upwind_3rd
 erf.dycore_vert_adv_type     = Upwind_3rd
-erf.dryscal_horiz_adv_type   = WENOZ5
-erf.dryscal_vert_adv_type    = WENOZ5
-erf.moistscal_horiz_adv_type = WENOZ5
-erf.moistscal_vert_adv_type  = WENOZ5
+erf.dryscal_horiz_adv_type   = Upwind_3rd
+erf.dryscal_vert_adv_type    = Upwind_3rd
+erf.moistscal_horiz_adv_type = WENO5
+erf.moistscal_vert_adv_type  = WENO5
 
 erf.moisture_model  = "Kessler_NoRain"
 erf.buoyancy_type   = 1
diff --git a/Source/Advection/Advection.H b/Source/Advection/Advection.H
index 9ac750cd1..6228e7672 100644
--- a/Source/Advection/Advection.H
+++ b/Source/Advection/Advection.H
@@ -31,13 +31,18 @@ void AdvectionSrcForRho (const amrex::Box& bx,
                          const bool const_rho);
 
 /** Compute advection tendency for all scalars other than density and potential temperature */
-void AdvectionSrcForScalars (const amrex::Box& bx,
+void AdvectionSrcForScalars (const amrex::Real& dt,
+                             const amrex::Box& bx,
                              const int icomp, const int ncomp,
                              const amrex::Array4<const amrex::Real>& avg_xmom,
                              const amrex::Array4<const amrex::Real>& avg_ymom,
                              const amrex::Array4<const amrex::Real>& avg_zmom,
+                             const amrex::Array4<const amrex::Real>& cur_cons,
                              const amrex::Array4<const amrex::Real>& cell_prim,
                              const amrex::Array4<amrex::Real>& src,
+                             const bool& use_mono_adv,
+                             amrex::Real* max_s_ptr,
+                             amrex::Real* min_s_ptr,
                              const amrex::Array4<const amrex::Real>& vf_arr,
                              const amrex::GpuArray<amrex::Real, AMREX_SPACEDIM>& cellSizeInv,
                              const amrex::Array4<const amrex::Real>& mf_m,
diff --git a/Source/Advection/AdvectionSrcForState.cpp b/Source/Advection/AdvectionSrcForState.cpp
index a7379af6e..02248d631 100644
--- a/Source/Advection/AdvectionSrcForState.cpp
+++ b/Source/Advection/AdvectionSrcForState.cpp
@@ -44,8 +44,7 @@ AdvectionSrcForRho (const Box& bx,
                     const Array4<const Real>& mf_u,
                     const Array4<const Real>& mf_v,
                     const GpuArray<const Array4<Real>, AMREX_SPACEDIM>& flx_arr,
-                    const bool const_rho
-                   )
+                    const bool const_rho)
 {
     BL_PROFILE_VAR("AdvectionSrcForRho", AdvectionSrcForRho);
     auto dxInv = cellSizeInv[0], dyInv = cellSizeInv[1], dzInv = cellSizeInv[2];
@@ -68,7 +67,7 @@ AdvectionSrcForRho (const Box& bx,
     {
         Real mfsq = mf_m(i,j,0) * mf_m(i,j,0);
         (flx_arr[2])(i,j,k,0) = az_arr(i,j,k) * Omega(i,j,k) / mfsq;
-        avg_zmom(i,j,k  ) = (flx_arr[2])(i,j,k,0);
+        avg_zmom(i,j,k) = (flx_arr[2])(i,j,k,0);
     });
 
     if (const_rho) {
@@ -117,12 +116,19 @@ AdvectionSrcForRho (const Box& bx,
  */
 
 void
-AdvectionSrcForScalars (const Box& bx, const int icomp, const int ncomp,
+AdvectionSrcForScalars (const Real& dt,
+                        const Box& bx,
+                        const int icomp,
+                        const int ncomp,
                         const Array4<const Real>& avg_xmom,
                         const Array4<const Real>& avg_ymom,
                         const Array4<const Real>& avg_zmom,
+                        const Array4<const Real>& cur_cons,
                         const Array4<const Real>& cell_prim,
                         const Array4<Real>& advectionSrc,
+                        const bool& use_mono_adv,
+                        Real* max_s_ptr,
+                        Real* min_s_ptr,
                         const Array4<const Real>& detJ,
                         const GpuArray<Real, AMREX_SPACEDIM>& cellSizeInv,
                         const Array4<const Real>& mf_m,
@@ -248,6 +254,68 @@ AdvectionSrcForScalars (const Box& bx, const int icomp, const int ncomp,
         }
     }
 
+    // Monotonicity preserving order reduction for SLOW SCALARS (0-th upwind)
+    if (use_mono_adv) {
+        ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        {
+            const int cons_index = icomp + n;
+            const int prim_index = cons_index - 1;
+
+            Real max_val = max_s_ptr[cons_index];
+            Real min_val = min_s_ptr[cons_index];
+
+            Real invdetJ = (detJ(i,j,k) > 0.) ?  1. / detJ(i,j,k) : 1.;
+            Real mfsq = mf_m(i,j,0) * mf_m(i,j,0);
+
+            Real RHS = - invdetJ * mfsq * (
+                ( (flx_arr[0])(i+1,j  ,k  ,cons_index) - (flx_arr[0])(i,j,k,cons_index) ) * dxInv +
+                ( (flx_arr[1])(i  ,j+1,k  ,cons_index) - (flx_arr[1])(i,j,k,cons_index) ) * dyInv +
+                ( (flx_arr[2])(i  ,j  ,k+1,cons_index) - (flx_arr[2])(i,j,k,cons_index) ) * dzInv );
+
+            // NOTE: This forward prediction uses the `cur_cons` as opposed to `old_cons` since
+            //       source terms may cause increase/decrease in a variable each RK stage. We
+            //       want to ensure the advection operator does not induce over/under-shoot
+            //       from the current state. If the `old_cons` is used and significant forcing is
+            //       present, we could trip an order reduction just due to the source terms.
+            Real tmp_upd = cur_cons(i,j,k,cons_index) + RHS*dt;
+            if (tmp_upd<min_val || tmp_upd>max_val) {
+                // HI
+                if (avg_xmom(i+1,j,k)>0.0) {
+                    (flx_arr[0])(i+1,j,k,cons_index) = avg_xmom(i+1,j,k) * cell_prim(i  ,j,k,prim_index);
+                } else {
+                    (flx_arr[0])(i+1,j,k,cons_index) = avg_xmom(i+1,j,k) * cell_prim(i+1,j,k,prim_index);
+                }
+                if (avg_ymom(i,j+1,k)>0.0) {
+                    (flx_arr[1])(i,j+1,k,cons_index) = avg_ymom(i,j+1,k) * cell_prim(i,j  ,k,prim_index);
+                } else {
+                    (flx_arr[1])(i,j+1,k,cons_index) = avg_ymom(i,j+1,k) * cell_prim(i,j+1,k,prim_index);
+                }
+                if (avg_zmom(i,j,k+1)>0.0) {
+                    (flx_arr[2])(i,j,k+1,cons_index) = avg_zmom(i,j,k+1) * cell_prim(i,j,k  ,prim_index);
+                } else {
+                    (flx_arr[2])(i,j,k+1,cons_index) = avg_zmom(i,j,k+1) * cell_prim(i,j,k+1,prim_index);
+                }
+
+                // LO
+                if (avg_xmom(i,j,k)>0.0) {
+                    (flx_arr[0])(i,j,k,cons_index) = avg_xmom(i,j,k) * cell_prim(i-1,j,k,prim_index);
+                } else {
+                    (flx_arr[0])(i,j,k,cons_index) = avg_xmom(i,j,k) * cell_prim(i  ,j,k,prim_index);
+                }
+                if (avg_ymom(i,j,k)>0.0) {
+                    (flx_arr[1])(i,j,k,cons_index) = avg_ymom(i,j,k) * cell_prim(i,j-1,k,prim_index);
+                } else {
+                    (flx_arr[1])(i,j,k,cons_index) = avg_ymom(i,j,k) * cell_prim(i,j  ,k,prim_index);
+                }
+                if (avg_zmom(i,j,k)>0.0) {
+                    (flx_arr[2])(i,j,k,cons_index) = avg_zmom(i,j,k) * cell_prim(i,j,k-1,prim_index);
+                } else {
+                    (flx_arr[2])(i,j,k,cons_index) = avg_zmom(i,j,k) * cell_prim(i,j,k  ,prim_index);
+                }
+            }
+        });
+    }
+
     ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
     {
         Real invdetJ = (detJ(i,j,k) > 0.) ?  1. / detJ(i,j,k) : 1.;
diff --git a/Source/DataStructs/DataStruct.H b/Source/DataStructs/DataStruct.H
index 0caeb5fa4..e2ee13a6e 100644
--- a/Source/DataStructs/DataStruct.H
+++ b/Source/DataStructs/DataStruct.H
@@ -294,6 +294,9 @@ struct SolverChoice {
             NumDiffCoeff *= std::pow(2.0,-6);
         }
 
+        // Use monotonic advection?
+        pp.query("use_mono_adv",use_mono_adv);
+
            advChoice.init_params();
           diffChoice.init_params();
         spongeChoice.init_params();
@@ -523,6 +526,9 @@ struct SolverChoice {
     bool use_NumDiff{false};
     amrex::Real NumDiffCoeff{0.};
 
+    // Monotonic advection limiter
+    bool use_mono_adv{false};
+
     CouplingType coupling_type;
     TerrainType  terrain_type;
     MoistureType moisture_type;
diff --git a/Source/TimeIntegration/ERF_slow_rhs_post.cpp b/Source/TimeIntegration/ERF_slow_rhs_post.cpp
index 9e3360319..fe30fe6a5 100644
--- a/Source/TimeIntegration/ERF_slow_rhs_post.cpp
+++ b/Source/TimeIntegration/ERF_slow_rhs_post.cpp
@@ -61,7 +61,8 @@ void erf_slow_rhs_post (int level, int finest_level,
                         const MultiFab& source,
                         const MultiFab* SmnSmn,
                         const MultiFab* eddyDiffs,
-                        MultiFab* Hfx3, MultiFab* Diss,
+                        MultiFab* Hfx3,
+                        MultiFab* Diss,
                         const Geometry geom,
                         const SolverChoice& solverChoice,
                         std::unique_ptr<ABLMost>& most,
@@ -111,6 +112,7 @@ void erf_slow_rhs_post (int level, int finest_level,
     const bool l_moving_terrain   = (solverChoice.terrain_type == TerrainType::Moving);
     if (l_moving_terrain) AMREX_ALWAYS_ASSERT(l_use_terrain);
 
+    const bool l_use_mono_adv   = solverChoice.use_mono_adv;
     const bool l_use_QKE        = tc.use_QKE && tc.advect_QKE;
     const bool l_use_deardorff  = (tc.les_type == LESType::Deardorff);
     const bool l_use_diff       = ((dc.molec_diff_type != MolecDiffType::None) ||
@@ -163,6 +165,32 @@ void erf_slow_rhs_post (int level, int finest_level,
          is_valid_slow_var[RhoQ1_comp] = 1;
     }
 
+    // *****************************************************************************
+    // Monotonic advection for scalars
+    // *****************************************************************************
+    int nvar = S_new[IntVars::cons].nComp();
+    Vector<Real> max_scal(nvar, 1.0e34); Gpu::DeviceVector<Real> max_scal_d(nvar);
+    Vector<Real> min_scal(nvar,-1.0e34); Gpu::DeviceVector<Real> min_scal_d(nvar);
+    if (l_use_mono_adv) {
+        auto const& ma_s_arr = S_new[IntVars::cons].const_arrays();
+        for (int ivar(RhoKE_comp); ivar<nvar; ++ivar) {
+            GpuTuple<Real,Real> mm = ParReduce(TypeList<ReduceOpMax,ReduceOpMin>{},
+                                               TypeList<Real, Real>{},
+                                               S_new[IntVars::cons], IntVect(0),
+                [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
+                -> GpuTuple<Real,Real>
+                {
+                    return { ma_s_arr[box_no](i,j,k,ivar), ma_s_arr[box_no](i,j,k,ivar) };
+                });
+            max_scal[ivar] = get<0>(mm);
+            min_scal[ivar] = get<1>(mm);
+        }
+    }
+    Gpu::copy(Gpu::hostToDevice, max_scal.begin(), max_scal.end(), max_scal_d.begin());
+    Gpu::copy(Gpu::hostToDevice, min_scal.begin(), min_scal.end(), min_scal_d.begin());
+    Real* max_s_ptr = max_scal_d.data();
+    Real* min_s_ptr = min_scal_d.data();
+
     // *************************************************************************
     // Calculate cell-centered eddy viscosity & diffusivities
     //
@@ -204,7 +232,7 @@ void erf_slow_rhs_post (int level, int finest_level,
         // *************************************************************************
         // Define Array4's
         // *************************************************************************
-        const Array4<      Real> & old_cons   = S_old[IntVars::cons].array(mfi);
+        const Array4<const Real> & old_cons   = S_old[IntVars::cons].array(mfi);
         const Array4<      Real> & cell_rhs   = S_rhs[IntVars::cons].array(mfi);
 
         const Array4<      Real> & new_cons  = S_new[IntVars::cons].array(mfi);
@@ -340,10 +368,10 @@ void erf_slow_rhs_post (int level, int finest_level,
                     num_comp = 1;
                 }
 
-                AdvectionSrcForScalars(tbx, start_comp, num_comp, avg_xmom, avg_ymom, avg_zmom,
-                                       cur_prim, cell_rhs,
-                                       detJ_arr,
-                                       dxInv, mf_m,
+                AdvectionSrcForScalars(dt, tbx, start_comp, num_comp, avg_xmom, avg_ymom, avg_zmom,
+                                       cur_cons, cur_prim, cell_rhs,
+                                       l_use_mono_adv, max_s_ptr, min_s_ptr,
+                                       detJ_arr, dxInv, mf_m,
                                        horiz_adv_type, vert_adv_type,
                                        horiz_upw_frac, vert_upw_frac,
                                        flx_arr, domain, bc_ptr_h);
@@ -429,6 +457,8 @@ void erf_slow_rhs_post (int level, int finest_level,
                             cur_cons(i,j,k,n) = amrex::max(cur_cons(i,j,k,n), eps);
                         } else if (ivar == RhoQKE_comp) {
                             cur_cons(i,j,k,n) = amrex::max(cur_cons(i,j,k,n), 1e-12);
+                        } else if (ivar >= RhoQ1_comp) {
+                            cur_cons(i,j,k,n) = amrex::max(cur_cons(i,j,k,n), 0.0);
                         }
                     });
 
diff --git a/Source/TimeIntegration/ERF_slow_rhs_pre.cpp b/Source/TimeIntegration/ERF_slow_rhs_pre.cpp
index 059923b56..2a0b6d50d 100644
--- a/Source/TimeIntegration/ERF_slow_rhs_pre.cpp
+++ b/Source/TimeIntegration/ERF_slow_rhs_pre.cpp
@@ -85,7 +85,8 @@ void erf_slow_rhs_pre (int level, int finest_level,
                        MultiFab* Tau23, MultiFab* Tau31, MultiFab* Tau32,
                        MultiFab* SmnSmn,
                        MultiFab* eddyDiffs,
-                       MultiFab* Hfx3, MultiFab* Diss,
+                       MultiFab* Hfx3,
+                       MultiFab* Diss,
                        const Geometry geom,
                        const SolverChoice& solverChoice,
                        std::unique_ptr<ABLMost>& most,
@@ -136,6 +137,7 @@ void erf_slow_rhs_pre (int level, int finest_level,
     const bool    l_moving_terrain = (solverChoice.terrain_type == TerrainType::Moving);
     if (l_moving_terrain) AMREX_ALWAYS_ASSERT (l_use_terrain);
 
+    const bool l_use_mono_adv   = solverChoice.use_mono_adv;
     const bool l_reflux = (solverChoice.coupling_type == CouplingType::TwoWay);
 
     const bool l_use_diff       = ( (dc.molec_diff_type != MolecDiffType::None) ||
@@ -199,9 +201,34 @@ void erf_slow_rhs_pre (int level, int finest_level,
     } // l_use_diff
 
     // *****************************************************************************
-    // Define updates and fluxes in the current RK stage
+    // Monotonic advection for scalars
     // *****************************************************************************
+    int nvar = S_data[IntVars::cons].nComp();
+    Vector<Real> max_scal(nvar, 1.0e34); Gpu::DeviceVector<Real> max_scal_d(nvar);
+    Vector<Real> min_scal(nvar,-1.0e34); Gpu::DeviceVector<Real> min_scal_d(nvar);
+    if (l_use_mono_adv) {
+        auto const& ma_s_arr = S_data[IntVars::cons].const_arrays();
+        for (int ivar(RhoTheta_comp); ivar<RhoKE_comp; ++ivar) {
+            GpuTuple<Real,Real> mm = ParReduce(TypeList<ReduceOpMax,ReduceOpMin>{},
+                                               TypeList<Real, Real>{},
+                                               S_data[IntVars::cons], IntVect(0),
+                [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
+                -> GpuTuple<Real,Real>
+                {
+                    return { ma_s_arr[box_no](i,j,k,ivar), ma_s_arr[box_no](i,j,k,ivar) };
+                });
+            max_scal[ivar] = get<0>(mm);
+            min_scal[ivar] = get<1>(mm);
+        }
+    }
+    Gpu::copy(Gpu::hostToDevice, max_scal.begin(), max_scal.end(), max_scal_d.begin());
+    Gpu::copy(Gpu::hostToDevice, min_scal.begin(), min_scal.end(), min_scal_d.begin());
+    Real* max_s_ptr = max_scal_d.data();
+    Real* min_s_ptr = min_scal_d.data();
 
+    // *****************************************************************************
+    // Define updates and fluxes in the current RK stage
+    // *****************************************************************************
 #ifdef _OPENMP
 #pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
@@ -276,7 +303,6 @@ void erf_slow_rhs_pre (int level, int finest_level,
         // Base state
         const Array4<const Real>& p0_arr = p0->const_array(mfi);
 
-        // *****************************************************************************
         // *****************************************************************************
         // Define flux arrays for use in advection
         // *****************************************************************************
@@ -414,10 +440,11 @@ void erf_slow_rhs_pre (int level, int finest_level,
                            flx_arr, l_const_rho);
 
         int icomp = RhoTheta_comp; int ncomp = 1;
-        AdvectionSrcForScalars(bx, icomp, ncomp,
+        AdvectionSrcForScalars(dt, bx, icomp, ncomp,
                                avg_xmom, avg_ymom, avg_zmom,
-                               cell_prim, cell_rhs, detJ_arr,
-                               dxInv, mf_m,
+                               cell_data, cell_prim, cell_rhs,
+                               l_use_mono_adv, max_s_ptr, min_s_ptr,
+                               detJ_arr, dxInv, mf_m,
                                l_horiz_adv_type, l_vert_adv_type,
                                l_horiz_upw_frac, l_vert_upw_frac,
                                flx_arr, domain, bc_ptr_h);