diff --git a/doc/ChangeLog b/doc/ChangeLog
index 9822c0361f..fb46dbae7a 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,5 +1,64 @@
 ===============================================================
 
+Tag name: cam6_4_052
+Originator(s): huebleruwm, nusbaume
+Date: 6 Jan 2025
+One-line Summary: clubb_intr GPUization
+Github PR URL: https://github.com/ESCOMP/CAM/pull/1175
+
+Purpose of changes (include the issue number and title text for each relevant GitHub issue):
+
+Adds OpenACC directives to clubb_intr.F90 to enable GPU offloading of CLUBB.
+BFB on CPUs, answer changing (but passes ECT) on GPUs.
+
+Describe any changes made to build system: N/A
+
+Describe any changes made to the namelist: N/A
+
+List any changes to the defaults for the boundary datasets: N/A
+
+Describe any substantial timing or memory changes: N/A
+
+Code reviewed by: nusbaume, katetc, cacraigucar
+
+List all files eliminated: N/A
+
+List all files added and what they do: N/A
+
+List all existing files that have been modified, and describe the changes:
+M       src/physics/cam/clubb_intr.F90
+ - add OpenACC directives for GPU offloading
+ - add timer calls to evaulate performance
+
+If there were any failures reported from running test_driver.sh on any test
+platform, and checkin with these failures has been OK'd by the gatekeeper,
+then copy the lines from the td.*.status files for the failed tests to the
+appropriate machine below.  All failed tests must be justified.
+
+derecho/intel/aux_cam:
+
+  DIFF SMS_Lh12.f09_f09_mg17.FCSD_HCO.derecho_intel.cam-outfrq3h
+  FAIL ERP_Ln9.f09_f09_mg17.FCSD_HCO.derecho_intel.cam-outfrq9s
+ - pre-existing failure due to HEMCO not having reproducible results issues #1018 and #856
+
+  PEND SMS_D_Ln9.f19_f19_mg17.FXHIST.derecho_intel.cam-outfrq9s_amie
+  FAIL SMS_D_Ln9_P1280x1.ne0CONUSne30x8_ne0CONUSne30x8_mt12.FCHIST.derecho_intel.cam-outfrq9s
+ - pre-existing failures due to build-namelist error requiring CLM/CTSM external update.
+
+derecho/nvhpc/aux_cam:
+
+ERS_Ln9.ne30pg3_ne30pg3_mg17.F2000dev.derecho_nvhpc.cam-outfrq9s_gpu_default (Overall: FAIL)
+ - Expected baseline failures due to non-BFB answer changes with new CLUBB GPU-offloading.
+
+izumi/nag/aux_cam:
+  FAIL DAE.f45_f45_mg37.FHS94.izumi_nag.cam-dae
+ - pre-existing failure -- issue #670
+
+izumi/gnu/aux_cam: ALL PASS
+
+===============================================================
+===============================================================
+
 Tag name: cam6_4_051
 Originator(s): fvitt
 Date: 2 Jan 2025
diff --git a/src/physics/cam/clubb_intr.F90 b/src/physics/cam/clubb_intr.F90
index 41580cc2c7..a9f25f0256 100644
--- a/src/physics/cam/clubb_intr.F90
+++ b/src/physics/cam/clubb_intr.F90
@@ -31,7 +31,7 @@ module clubb_intr
 
 #ifdef CLUBB_SGS
   use clubb_api_module,    only: pdf_parameter, implicit_coefs_terms
-  use clubb_api_module,    only: clubb_config_flags_type, grid, stats, & 
+  use clubb_api_module,    only: clubb_config_flags_type, grid, stats, &
                                  nu_vertical_res_dep, stats_metadata_type, &
                                  hm_metadata_type, sclr_idx_type
 
@@ -52,12 +52,16 @@ module clubb_intr
                                 stats_sfc(pcols)        ! stats_sfc
   type (hm_metadata_type) :: &
     hm_metadata
-                                
+
   type (stats_metadata_type) :: &
     stats_metadata
 
   type (sclr_idx_type) :: &
     sclr_idx
+
+  integer :: &
+    nzm_clubb, &  !Number of vertical levels used by CLUBB momentum variables
+    nzt_clubb     !Number of vertical levels used by CLUBB thermodynamic variables
 #endif
 
   private
@@ -95,7 +99,7 @@ module clubb_intr
 
   ! These are zero by default, but will be set by SILHS before they are used by subcolumns
   integer :: &
-      hydromet_dim = 0, & 
+      hydromet_dim = 0, &
       pdf_dim      = 0
 
 
@@ -117,7 +121,7 @@ module clubb_intr
     hm_metadata
 #endif
 #endif
-      
+
   ! ------------ !
   ! Private data !
   ! ------------ !
@@ -340,7 +344,7 @@ module clubb_intr
     clubb_l_mono_flux_lim_um,           & ! Flag to turn on monotonic flux limiter for um
     clubb_l_mono_flux_lim_vm,           & ! Flag to turn on monotonic flux limiter for vm
     clubb_l_mono_flux_lim_spikefix,     & ! Flag to implement monotonic flux limiter code that
-                                          ! eliminates spurious drying tendencies at model top  
+                                          ! eliminates spurious drying tendencies at model top
     clubb_l_host_applies_sfc_fluxes       ! Whether the host model applies the surface fluxes
 
   logical :: &
@@ -349,7 +353,7 @@ module clubb_intr
 !  Constant parameters
   logical, parameter, private :: &
     l_implemented    = .true.            ! Implemented in a host model (always true)
-    
+
   logical, parameter, private :: &
     apply_to_heat    = .false.           ! Apply WACCM energy fixer to heat or not (.true. = yes (duh))
 
@@ -1477,7 +1481,7 @@ subroutine clubb_ini_cam(pbuf2d)
     logical, parameter :: l_input_fields = .false. ! Always false for CAM-CLUBB.
     logical, parameter :: l_update_pressure = .false. ! Always false for CAM-CLUBB.
 
-    integer :: nlev, ierr=0
+    integer :: ierr=0
 
     real(r8) :: &
       C1, C1b, C1c, C2rt, C2thl, C2rtthl, &
@@ -1505,12 +1509,15 @@ subroutine clubb_ini_cam(pbuf2d)
 
     !----- Begin Code -----
 
-    nlev = pver + 1 - top_lev
-
     if (core_rknd /= r8) then
       call endrun('clubb_ini_cam:  CLUBB library core_rknd must match CAM r8 and it does not')
     end if
 
+    ! Determine number of vertical levels used in clubb, thermo variables are nzt_clubb
+    ! and momentum variables are nzm_clubb
+    nzt_clubb = pver  + 1 - top_lev
+    nzm_clubb = pverp + 1 - top_lev
+
     ! Allocate PDF parameters across columns and chunks
     allocate( &
        pdf_params_chnk(begchunk:endchunk),   &
@@ -1733,7 +1740,7 @@ subroutine clubb_ini_cam(pbuf2d)
     clubb_params_single_col(iwpxp_Ri_exp) = clubb_wpxp_Ri_exp
     clubb_params_single_col(iz_displace) = clubb_z_displace
 
-    ! Override clubb default 
+    ! Override clubb default
     if ( trim(subcol_scheme) == 'SILHS' ) then
       clubb_config_flags%saturation_formula = saturation_flatau
     else
@@ -1742,17 +1749,17 @@ subroutine clubb_ini_cam(pbuf2d)
 
     ! Define model constant parameters
     call setup_parameters_model_api( theta0, ts_nudge, clubb_params_single_col(iSkw_max_mag) )
-   
+
     !  Set up CLUBB core.  Note that some of these inputs are overwritten
     !  when clubb_tend_cam is called.  The reason is that heights can change
     !  at each time step, which is why dummy arrays are read in here for heights
     !  as they are immediately overwrote.
 !$OMP PARALLEL
-    call check_clubb_settings_api( nlev+1, clubb_params_single_col,  & ! Intent(in)
-                                   l_implemented,                    & ! Intent(in)
-                                   l_input_fields,                   & ! Intent(in)
-                                   clubb_config_flags,               & ! intent(in)
-                                   err_code )                          ! Intent(out)
+    call check_clubb_settings_api( nzm_clubb, clubb_params_single_col,  & ! Intent(in)
+                                   l_implemented,                       & ! Intent(in)
+                                   l_input_fields,                      & ! Intent(in)
+                                   clubb_config_flags,                  & ! intent(in)
+                                   err_code )                             ! Intent(out)
 
     if ( err_code == clubb_fatal_error ) then
        call endrun('clubb_ini_cam:  FATAL ERROR CALLING SETUP_CLUBB_CORE')
@@ -1885,9 +1892,9 @@ subroutine clubb_ini_cam(pbuf2d)
     dum3 = 300._r8
 
     if (stats_metadata%l_stats) then
-      
+
       call stats_init_clubb( .true., dum1, dum2, &
-                             nlev+1, nlev+1, nlev+1, dum3, &
+                             nzm_clubb, nzm_clubb, nzm_clubb, dum3, &
                              stats_zt(:), stats_zm(:), stats_sfc(:), &
                              stats_rad_zt(:), stats_rad_zm(:))
 
@@ -2111,6 +2118,7 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     use cam_logfile,    only: iulog
     use tropopause,     only: tropopause_findChemTrop
     use time_manager,   only: get_nstep, is_first_restart_step
+    use perf_mod,       only: t_startf, t_stopf
 
 #ifdef CLUBB_SGS
     use hb_diff,                   only: pblintd
@@ -2146,8 +2154,6 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     use macrop_driver,             only: liquid_macro_tend
     use clubb_mf,                  only: integrate_mf
 
-    use perf_mod
-
 #endif
 
     implicit none
@@ -2185,12 +2191,14 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     !                   Local Variables                    !
     ! ---------------------------------------------------- !
 
+    integer :: i !Must be delcared outside "CLUBB_SGS" ifdef for det_s and det_ice zero-ing loops
+
 #ifdef CLUBB_SGS
 
     type(physics_state) :: state1                ! Local copy of state variable
     type(physics_ptend) :: ptend_loc             ! Local tendency from processes, added up to return as ptend_all
 
-    integer :: i, j,  k, t, ixind, nadv
+    integer :: j, k, t, ixind, nadv
     integer :: ixcldice, ixcldliq, ixnumliq, ixnumice, ixq
     integer :: itim_old
     integer :: ncol, lchnk                       ! # of columns, and chunk identifier
@@ -2239,7 +2247,7 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     ! Local CLUBB variables dimensioned as NCOL (only useful columns) to be sent into the clubb run api
     ! NOTE: THESE VARIABLS SHOULD NOT BE USED IN PBUF OR OUTFLD (HISTORY) SUBROUTINES
-    real(r8), dimension(state%ncol,pverp+1-top_lev) :: &
+    real(r8), dimension(state%ncol,nzm_clubb) :: &
       thlm_forcing,             & ! theta_l forcing (thermodynamic levels)      [K/s]
       rtm_forcing,              & ! r_t forcing (thermodynamic levels)          [(kg/kg)/s]
       um_forcing,               & ! u wind forcing (thermodynamic levels)     	[m/s/s]
@@ -2339,25 +2347,23 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     ! Local CLUBB variables dimensioned as NCOL (only useful columns) to be sent into the clubb run api
     ! NOTE: THESE VARIABLS SHOULD NOT BE USED IN PBUF OR OUTFLD (HISTORY) SUBROUTINES
-    real(r8), dimension(state%ncol,pverp+1-top_lev,sclr_dim) :: &
+    real(r8), dimension(state%ncol,nzm_clubb,sclr_dim) :: &
       sclrm_forcing,  & ! Passive scalar forcing              [{units vary}/s]
       sclrm,          & ! Passive scalar mean (thermo. levels)          [units vary]
       sclrp2,         & ! sclr'^2 (momentum levels)                     [{units vary}^2]
       sclrp3,         & ! sclr'^3 (thermo. levels)                      [{units vary}^3]
       sclrprtp,       & ! sclr'rt' (momentum levels)          [{units vary} (kg/kg)]
       sclrpthlp,      & ! sclr'thlp' (momentum levels)        [{units vary} (K)]
-      wpsclrp           ! w'sclr' (momentum levels)                     [{units vary} m/s]
-
-    real(r8), dimension(state%ncol,pverp,sclr_dim) :: &
-      sclrpthvp_inout  ! sclr'th_v' (momentum levels)                  [{units vary} (K)]
+      wpsclrp,        & ! w'sclr' (momentum levels)                     [{units vary} m/s]
+      sclrpthvp_inout   ! sclr'th_v' (momentum levels)                  [{units vary} (K)]
 
-    real(r8), dimension(state%ncol,pverp+1-top_lev,edsclr_dim) :: &
+    real(r8), dimension(state%ncol,nzm_clubb,edsclr_dim) :: &
       edsclrm_forcing,  & ! Eddy passive scalar forcing         [{units vary}/s]
       edsclr_in           ! Scalars to be diffused through CLUBB 		[units vary]
 
     ! Local CLUBB variables dimensioned as NCOL (only useful columns) to be sent into the clubb run api
     ! NOTE: THESE VARIABLS SHOULD NOT BE USED IN PBUF OR OUTFLD (HISTORY) SUBROUTINES
-    real(r8), dimension(state%ncol,pverp+1-top_lev,hydromet_dim) :: &
+    real(r8), dimension(state%ncol,nzm_clubb,hydromet_dim) :: &
       hydromet,     &
       wphydrometp,  &
       wp2hmp,       &
@@ -2366,10 +2372,9 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     ! Variables below are needed to compute energy integrals for conservation
     ! NOTE: Arrays of size PCOLS (all possible columns) can be used to access State, PBuf and History Subroutines
-    real(r8) :: ke_a(pcols), ke_b(pcols), te_a(pcols), te_b(pcols)
-    real(r8) :: wv_a(pcols), wv_b(pcols), wl_b(pcols), wl_a(pcols)
-    real(r8) :: se_dis(pcols), se_a(pcols), se_b(pcols), clubb_s(pcols,pver)
-    real(r8) :: eleak(pcols)
+    real(r8) :: te_a, se_a, ke_a, wv_a, wl_a
+    real(r8) :: te_b, se_b, ke_b, wv_b, wl_b
+    real(r8) :: se_dis(pcols), clubb_s(pcols,pver), eleak(pcols)
 
     real(r8) :: inv_exner_clubb(pcols,pverp)     ! Inverse exner function consistent with CLUBB  [-]
     real(r8) :: inv_exner_clubb_surf(pcols)      ! Inverse exner function at the surface
@@ -2409,7 +2414,7 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     real(r8) :: rrho(pcols)                      ! Inverse of air density                        [1/kg/m^3]
     real(r8) :: kinwat(pcols)                    ! Kinematic water vapor flux                    [m/s]
     real(r8) :: latsub
-    real(r8) :: thlp2_rad_out(pcols,pverp+1-top_lev)
+    real(r8) :: thlp2_rad_out(pcols,nzm_clubb)
     real(r8) :: apply_const, rtm_test
     real(r8) :: dl_rad, di_rad, dt_low
 
@@ -2576,7 +2581,21 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     real(r8) :: temp2d(pcols,pver), temp2dp(pcols,pverp)  ! temporary array for holding scaled outputs
 
-    integer :: nlev
+    real(r8), dimension(pcols,pver) :: &
+      rvmtend_clubb, &
+      rcmtend_clubb, &
+      rimtend_clubb, &
+      stend_clubb, &
+      utend_clubb, &
+      vtend_clubb, &
+      dpdlfliq, &
+      dpdlfice, &
+      dpdlft, &
+      detnliquid
+
+    real(r8), dimension(pcols,pverp) :: &
+      wprcp_clubb, &
+      wpthvp_clubb
 
     intrinsic :: max
 
@@ -2591,43 +2610,39 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     real(r8), dimension(state%ncol,nparams) :: &
       clubb_params    ! Adjustable CLUBB parameters (C1, C2 ...)
- 
-#endif
-    det_s(:)   = 0.0_r8
-    det_ice(:) = 0.0_r8
-
-#ifdef CLUBB_SGS
 
-    !-----------------------------------------------------------------------------------!
-    !                           MAIN COMPUTATION BEGINS HERE                            !
-    !-----------------------------------------------------------------------------------!
+    integer :: &
+      sclr, &
+      edsclr, &
+      n
 
-    call t_startf("clubb_tend_cam")
+#endif
 
-    nlev = pver + 1 - top_lev
+  call t_startf('clubb_tend_cam')
 
-    rtp2_zt_out = 0._r8
-    thl2_zt_out = 0._r8
-    wp2_zt_out  = 0._r8
-    pdfp_rtp2   = 0._r8
-    wm_zt_out   = 0._r8
+  do i = 1, pcols
+    det_s(i)   = 0.0_r8
+    det_ice(i) = 0.0_r8
+  end do
 
-    temp2d      = 0._r8
-    temp2dp     = 0._r8
+#ifdef CLUBB_SGS
 
-    dl_rad = clubb_detliq_rad
-    di_rad = clubb_detice_rad
-    dt_low = clubb_detphase_lowtemp
+#ifdef _OPENACC
+    ! These options have not been GPUized
+    if ( do_clubb_mf ) call endrun(subr//': do_clubb_mf=.true. not available when compiling with OpenACC')
+    if ( do_rainturb ) call endrun(subr//': do_rainturb=.true. not available when compiling with OpenACC')
+    if ( do_cldcool )  call endrun(subr//': do_cldcool=.true. not available when compiling with OpenACC')
+    if ( clubb_do_icesuper )  call endrun(subr//': clubb_do_icesuper=.true. not available when compiling with OpenACC')
+    if ( single_column .and. .not. scm_cambfb_mode )  then
+      call endrun(subr//': (single_column && !scm_cambfb_mode)=.true. not available when compiling with OpenACC')
+    end if
+#endif
 
-    frac_limit = 0.01_r8
-    ic_limit   = 1.e-12_r8
-    inv_rh2o = 1._r8/rh2o
+    !-----------------------------------------------------------------------------------!
+    !                           MAIN COMPUTATION BEGINS HERE                            !
+    !-----------------------------------------------------------------------------------!
 
-    if (clubb_do_adv) then
-      apply_const = 1._r8  ! Initialize to one, only if CLUBB's moments are advected
-    else
-      apply_const = 0._r8  ! Never want this if CLUBB's moments are not advected
-    endif
+    call t_startf('clubb_tend_cam:NAR')
 
     !  Get indicees for cloud and ice mass and cloud and ice number
     call cnst_get_ind('Q',ixq)
@@ -2636,28 +2651,6 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     call cnst_get_ind('NUMLIQ',ixnumliq)
     call cnst_get_ind('NUMICE',ixnumice)
 
-    if (clubb_do_icesuper) then
-      call pbuf_get_field(pbuf, naai_idx, naai)
-    end if
-
-    !  Initialize physics tendency arrays, copy the state to state1 array to use in this routine
-    call physics_ptend_init(ptend_all, state%psetcols, 'clubb')
-
-    ! Copy the state to state1 array to use in this routine
-    call physics_state_copy(state, state1)
-
-    ! Constituents are all treated as dry mmr by clubb.  Convert the water species to
-    ! a dry basis.
-    call set_wet_to_dry(state1, convert_cnst_type='wet')
-
-    if (clubb_do_liqsupersat) then
-      call pbuf_get_field(pbuf, npccn_idx, npccn)
-    endif
-
-    !  Determine number of columns and which chunk computation is to be performed on
-    ncol = state%ncol
-    lchnk = state%lchnk
-
     !  Determine time step of physics buffer
     itim_old = pbuf_old_tim_idx()
 
@@ -2754,80 +2747,53 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     call pbuf_get_field(pbuf, vpwp_clubb_gw_mc_idx,   vpwp_clubb_gw_mc )
     call pbuf_get_field(pbuf, wpthlp_clubb_gw_mc_idx, wpthlp_clubb_gw_mc )
 
-
-    ! Allocate pdf_params only if they aren't allocated already.
-    if ( .not. allocated(pdf_params_chnk(lchnk)%mixt_frac) ) then
-      call init_pdf_params_api( pverp+1-top_lev, ncol, pdf_params_chnk(lchnk) )
-      call init_pdf_params_api( pverp+1-top_lev, ncol, pdf_params_zm_chnk(lchnk) )
+    if (clubb_do_icesuper) then
+      call pbuf_get_field(pbuf, naai_idx, naai)
     end if
 
-     if ( .not. allocated(pdf_implicit_coefs_terms_chnk(lchnk)%coef_wp4_implicit) ) then
-       call init_pdf_implicit_coefs_terms_api( pverp+1-top_lev, ncol, sclr_dim, &
-                                               pdf_implicit_coefs_terms_chnk(lchnk) )
-     end if
+    !  Initialize physics tendency arrays
+    call physics_ptend_init(ptend_all, state%psetcols, 'clubb')
 
-    ! Initialize the apply_const variable (note special logic is due to eularian backstepping)
-    if (clubb_do_adv .and. (is_first_step() .or. all(wpthlp(1:ncol,1:pver)  ==  0._r8))) then
-      apply_const = 0._r8  ! On first time through do not remove constant
-                           !  from moments since it has not been added yet
-    endif
+    ! Copy the state to state1 array to use in this routine
+    call physics_state_copy(state, state1)
 
-    !  Set the ztodt timestep in pbuf for SILHS
-    ztodtptr(:) = 1.0_r8*hdtime
+    ! Constituents are all treated as dry mmr by clubb.  Convert the water species to
+    ! a dry basis.
+    call set_wet_to_dry(state1, convert_cnst_type='wet')
+
+    if (clubb_do_liqsupersat) then
+      call pbuf_get_field(pbuf, npccn_idx, npccn)
+    endif
 
     ! Define the grid box size.  CLUBB needs this information to determine what
     !  the maximum length scale should be.  This depends on the column for
     !  variable mesh grids and lat-lon grids
-
     call grid_size(state1, grid_dx, grid_dy)
 
-    if (clubb_do_icesuper) then
-
-      ! -------------------------------------- !
-      ! Ice Saturation Adjustment Computation  !
-      ! -------------------------------------- !
-
-      lq2(:)  = .FALSE.
-      lq2(1)  = .TRUE.
-      lq2(ixcldice) = .TRUE.
-      lq2(ixnumice) = .TRUE.
-
-      latsub = latvap + latice
-
-      call physics_ptend_init(ptend_loc, state%psetcols, 'iceadj', ls=.true., lq=lq2 )
-
-      stend(:ncol,:)=0._r8
-      qvtend(:ncol,:)=0._r8
-      qitend(:ncol,:)=0._r8
-      initend(:ncol,:)=0._r8
-
-      call ice_macro_tend(naai(1:ncol,top_lev:pver), state1%t(1:ncol,top_lev:pver),                       &
-                          state1%pmid(1:ncol,top_lev:pver), state1%q(1:ncol,top_lev:pver,1),              &
-                          state1%q(1:ncol,top_lev:pver,ixcldice), state1%q(1:ncol,top_lev:pver,ixnumice), &
-                          latsub, hdtime, stend(1:ncol,top_lev:pver), qvtend(1:ncol,top_lev:pver),        &
-                          qitend(1:ncol,top_lev:pver), initend(1:ncol,top_lev:pver), ncol*(pver-top_lev+1))
-
-      ! update local copy of state with the tendencies
-      ptend_loc%q(:ncol,top_lev:pver,1)=qvtend(:ncol,top_lev:pver)
-      ptend_loc%q(:ncol,top_lev:pver,ixcldice)=qitend(:ncol,top_lev:pver)
-      ptend_loc%q(:ncol,top_lev:pver,ixnumice)=initend(:ncol,top_lev:pver)
-      ptend_loc%s(:ncol,top_lev:pver)=stend(:ncol,top_lev:pver)
+    ! Determine number of columns and which chunk computation is to be performed on
+    ncol = state%ncol
+    lchnk = state%lchnk
 
-      ! Add the ice tendency to the output tendency
-      call physics_ptend_sum(ptend_loc, ptend_all, ncol)
+    ! Allocate pdf_params only if they aren't allocated already.
+    if ( .not. allocated(pdf_params_chnk(lchnk)%mixt_frac) ) then
+      call init_pdf_params_api( nzm_clubb, ncol, pdf_params_chnk(lchnk) )
+      call init_pdf_params_api( nzm_clubb, ncol, pdf_params_zm_chnk(lchnk) )
+    end if
 
-      ! ptend_loc is reset to zero by this call
-      call physics_update(state1, ptend_loc, hdtime)
+    if ( .not. allocated(pdf_implicit_coefs_terms_chnk(lchnk)%coef_wp4_implicit) ) then
+      call init_pdf_implicit_coefs_terms_api( nzm_clubb, ncol, sclr_dim, &
+                                              pdf_implicit_coefs_terms_chnk(lchnk) )
+    end if
 
-      !Write output for tendencies:
-      temp2d(:ncol,:pver) =  stend(:ncol,:pver)/cpairv(:ncol,:pver,lchnk)
-      call outfld( 'TTENDICE',  temp2d, pcols, lchnk )
-      call outfld( 'QVTENDICE', qvtend, pcols, lchnk )
-      call outfld( 'QITENDICE', qitend, pcols, lchnk )
-      call outfld( 'NITENDICE', initend, pcols, lchnk )
+    !--------------------- Scalar Setting --------------------
 
-    endif
+    dl_rad = clubb_detliq_rad
+    di_rad = clubb_detice_rad
+    dt_low = clubb_detphase_lowtemp
 
+    frac_limit = 0.01_r8
+    ic_limit   = 1.e-12_r8
+    inv_rh2o = 1._r8/rh2o
 
     !  Determine CLUBB time step and make it sub-step friendly
     !  For now we want CLUBB time step to be 5 min since that is
@@ -2867,214 +2833,520 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     !  host time step divided by CLUBB time step
     nadv = max(hdtime/dtime,1._r8)
 
-    !  Initialize forcings for transported scalars to zero
-    sclrm_forcing(:,:,:)   = 0._r8
-    edsclrm_forcing(:,:,:) = 0._r8
-    sclrm(:,:,:)           = 0._r8
 
-    !  Compute inverse exner function consistent with CLUBB's definition, which uses a constant
-    !  surface pressure.  CAM's exner (in state) does not.  Therefore, for consistent
-    !  treatment with CLUBB code, anytime exner is needed to treat CLUBB variables
-    !  (such as thlm), use "inv_exner_clubb" otherwise use the exner in state
-    do k=1,pver
-      do i=1,ncol
-        inv_exner_clubb(i,k) = 1._r8/((state1%pmid(i,k)/p0_clubb)**(rairv(i,k,lchnk)/cpairv(i,k,lchnk)))
-      enddo
-    enddo
-
-    !  Compute exner at the surface for converting the sensible heat fluxes
-    !  to a flux of potential temperature for use as clubb's boundary conditions
-    do i=1,ncol
-       inv_exner_clubb_surf(i) = 1._r8/((state1%pmid(i,pver)/p0_clubb)**(rairv(i,pver,lchnk)/cpairv(i,pver,lchnk)))
-    enddo
-
-    !  At each CLUBB call, initialize mean momentum  and thermo CLUBB state
-    !  from the CAM state
-    do k=1,pver   ! loop over levels
-      do i=1,ncol ! loop over columns
-
-        rtm(i,k)  = state1%q(i,k,ixq)+state1%q(i,k,ixcldliq)
-        rvm(i,k)  = state1%q(i,k,ixq)
-        um(i,k)   = state1%u(i,k)
-        vm(i,k)   = state1%v(i,k)
-        thlm(i,k) = ( state1%t(i,k) &
-                      - (latvap/cpairv(i,k,lchnk))*state1%q(i,k,ixcldliq) ) &
-                    * inv_exner_clubb(i,k)
-
-        if (clubb_do_adv) then
-          if (macmic_it  ==  1) then
+    !  Set stats output and increment equal to CLUBB and host dt
+    stats_metadata%stats_tsamp = dtime
+    stats_metadata%stats_tout  = hdtime
 
-            !  Note that some of the moments below can be positive or negative.
-            !    Remove a constant that was added to prevent dynamics from clipping
-            !    them to prevent dynamics from making them positive.
-            thlp2(i,k)   = state1%q(i,k,ixthlp2)
-            rtp2(i,k)    = state1%q(i,k,ixrtp2)
-            rtpthlp(i,k) = state1%q(i,k,ixrtpthlp) - (rtpthlp_const*apply_const)
-            wpthlp(i,k)  = state1%q(i,k,ixwpthlp) - (wpthlp_const*apply_const)
-            wprtp(i,k)   = state1%q(i,k,ixwprtp) - (wprtp_const*apply_const)
-            wp2(i,k)     = state1%q(i,k,ixwp2)
-            wp3(i,k)     = state1%q(i,k,ixwp3) - (wp3_const*apply_const)
-            up2(i,k)     = state1%q(i,k,ixup2)
-            vp2(i,k)     = state1%q(i,k,ixvp2)
-          endif
-        endif
+    stats_nsamp = nint(stats_metadata%stats_tsamp/dtime)
+    stats_nout = nint(stats_metadata%stats_tout/dtime)
 
-      enddo
-    enddo
 
     if (clubb_do_adv) then
-      ! If not last step of macmic loop then set apply_const back to
-      !   zero to prevent output from being corrupted.
-      if (macmic_it  ==  cld_macmic_num_steps) then
-        apply_const = 1._r8
-      else
-        apply_const = 0._r8
-      endif
+      apply_const = 1._r8  ! Initialize to one, only if CLUBB's moments are advected
+    else
+      apply_const = 0._r8  ! Never want this if CLUBB's moments are not advected
     endif
 
-    rtm(1:ncol,pverp)  = rtm(1:ncol,pver)
-    um(1:ncol,pverp)   = state1%u(1:ncol,pver)
-    vm(1:ncol,pverp)   = state1%v(1:ncol,pver)
-    thlm(1:ncol,pverp) = thlm(1:ncol,pver)
-
-    if (clubb_do_adv) then
-      thlp2(1:ncol,pverp)   = thlp2(1:ncol,pver)
-      rtp2(1:ncol,pverp)    = rtp2(1:ncol,pver)
-      rtpthlp(1:ncol,pverp) = rtpthlp(1:ncol,pver)
-      wpthlp(1:ncol,pverp)  = wpthlp(1:ncol,pver)
-      wprtp(1:ncol,pverp)   = wprtp(1:ncol,pver)
-      wp2(1:ncol,pverp)     = wp2(1:ncol,pver)
-      wp3(1:ncol,pverp)     = wp3(1:ncol,pver)
-      up2(1:ncol,pverp)     = up2(1:ncol,pver)
-      vp2(1:ncol,pverp)     = vp2(1:ncol,pver)
+    ! Initialize the apply_const variable (note special logic is due to eulerian backstepping)
+    if (clubb_do_adv .and. (is_first_step() .or. all(wpthlp(1:ncol,1:pver)  ==  0._r8))) then
+      apply_const = 0._r8  ! On first time through do not remove constant
+                           !  from moments since it has not been added yet
     endif
 
-    !  Compute virtual potential temperature, which is needed for CLUBB
-    do k=1,pver
-      do i=1,ncol
-        thv(i,k) = state1%t(i,k)*inv_exner_clubb(i,k)*(1._r8+zvir*state1%q(i,k,ixq)&
-                   -state1%q(i,k,ixcldliq))
-      enddo
-    enddo
-
-    call physics_ptend_init(ptend_loc,state%psetcols, 'clubb', ls=.true., lu=.true., lv=.true., lq=lq)
-
-    !REMOVECAM - no longer need this when CAM is retired and pcols no longer exists
-    troplev(:) = 0
-    !REMOVECAM_END
-    call tropopause_findChemTrop(state, troplev)
-
-    ! Initialize EDMF outputs
-    if (do_clubb_mf) then
-      mf_dry_a_output(:,:)     = 0._r8
-      mf_moist_a_output(:,:)   = 0._r8
-      mf_dry_w_output(:,:)     = 0._r8
-      mf_moist_w_output(:,:)   = 0._r8
-      mf_dry_qt_output(:,:)    = 0._r8
-      mf_moist_qt_output(:,:)  = 0._r8
-      mf_dry_thl_output(:,:)   = 0._r8
-      mf_moist_thl_output(:,:) = 0._r8
-      mf_dry_u_output(:,:)     = 0._r8
-      mf_moist_u_output(:,:)   = 0._r8
-      mf_dry_v_output(:,:)     = 0._r8
-      mf_moist_v_output(:,:)   = 0._r8
-      mf_moist_qc_output(:,:)  = 0._r8
-      s_ae_output(:,:)         = 0._r8
-      s_aw_output(:,:)         = 0._r8
-      s_awthl_output(:,:)      = 0._r8
-      s_awqt_output(:,:)       = 0._r8
-      s_awql_output(:,:)       = 0._r8
-      s_awqi_output(:,:)       = 0._r8
-      s_awu_output(:,:)        = 0._r8
-      s_awv_output(:,:)        = 0._r8
-      mf_thlflx_output(:,:)    = 0._r8
-      mf_qtflx_output(:,:)     = 0._r8
-    end if
-
-    call t_startf("clubb_tend_cam_i_loop")
+    !--------------------- Initializations --------------------
 
-    ! Determine Coriolis force at given latitude. This is never used
-    ! when CLUBB is implemented in a host model, therefore just set
-    ! to zero.
-    fcor(:) = 0._r8
+    !  Set the ztodt timestep in pbuf for SILHS
+    ztodtptr(:) = 1.0_r8*hdtime
 
-    ! Define the CLUBB momentum grid (in height, units of m)
-    do k=1, nlev+1
-      do i=1, ncol
-        zi_g(i,k) = state1%zi(i,pverp-k+1)-state1%zi(i,pver+1)
+    call t_stopf('clubb_tend_cam:NAR')
+    call t_startf('clubb_tend_cam:acc_copyin')
+    !$acc data copyin( sclr_idx, clubb_params_single_col, grid_dx, grid_dy, rairv, cpairv, radf_clubb, qrl, &
+    !$acc              pdf_params_chnk(lchnk), pdf_params_zm_chnk(lchnk), &
+    !$acc              state1, state1%q, state1%u, state1%v, state1%t, state1%pmid, state1%s, state1%pint, &
+    !$acc              state1%zm, state1%zi, state1%pdeldry, state1%pdel, state1%omega, state1%phis, &
+    !$acc              cam_in, cam_in%shf, cam_in%wsx, cam_in%wsy, cam_in%cflx, &
+    !$acc              rrho, prer_evap, rtp2_mc_zt, thlp2_mc_zt, wprtp_mc_zt, wpthlp_mc_zt, rtpthlp_mc_zt ) &
+    !$acc        copy( um, vm, upwp, vpwp, wpthvp, wp2thvp, rtpthvp, thlpthvp, up2, vp2, up3, vp3, &
+    !$acc              wp2, wp3, rtp2, thlp2, rtp3, thlp3, thlm, rtm, rvm, wprtp, wpthlp, rtpthlp, &
+    !$acc              cloud_frac, wp2rtp, wp2thlp, uprcp, vprcp, rc_coef, wp4, wpup2, wpvp2, &
+    !$acc              ttend_clubb_mc, upwp_clubb_gw_mc, vpwp_clubb_gw_mc, thlp2_clubb_gw_mc, wpthlp_clubb_gw_mc, &
+    !$acc              ttend_clubb, upwp_clubb_gw, vpwp_clubb_gw, thlp2_clubb_gw, wpthlp_clubb_gw, &
+    !$acc              wp2up2, wp2vp2, ice_supersat_frac ) &
+    !$acc     copyout( temp2d, temp2dp, rtp2_zt_out, thl2_zt_out, wp2_zt_out, pdfp_rtp2, wm_zt_out, inv_exner_clubb, &
+    !$acc              rcm, wprcp, rcm_in_layer, cloud_cover, zt_out, zi_out, khzm, qclvar, thv, dz_g, &
+    !$acc              clubbtop, se_dis, eleak, clubb_s, wpthvp_clubb, wprcp_clubb ) &
+    !$acc      create( upwp_sfc_pert, vpwp_sfc_pert, khzt_out, khzm_out, &
+    !$acc              fcor, um_in, vm_in, upwp_in, vpwp_in, wpthvp_in, wp2thvp_in, rtpthvp_in, thlpthvp_in, &
+    !$acc              up2_in, vp2_in, up3_in, vp3_in, wp2_in, wp3_in, rtp2_in, thlp2_in, rtp3_in, &
+    !$acc              thlp3_in, thlm_in, rtm_in, rvm_in, wprtp_in, wpthlp_in, rtpthlp_in, cloud_frac_inout, &
+    !$acc              rcm_inout, wp2rtp_inout, wp2thlp_inout, uprcp_inout, vprcp_inout, &
+    !$acc              rc_coef_inout, wp4_inout, wpup2_inout, wpvp2_inout, wp2up2_inout, wp2vp2_inout, &
+    !$acc              ice_supersat_frac_inout, pre_in, kappa_zt, qc_zt, invrs_exner_zt, kappa_zm, p_in_Pa_zm, &
+    !$acc              invrs_exner_zm, cloud_cover_out, rcm_in_layer_out, wprcp_out, &
+    !$acc              qclvar_out, rtp2_zt, thl2_zt, wp2_zt, w_up_in_cloud_out, cloudy_downdraft_frac_out, &
+    !$acc              w_down_in_cloud_out, invrs_tau_zm_out, vm_pert_inout, upwp_pert_inout, vpwp_pert_inout, &
+    !$acc              thlm_forcing, rtm_forcing, um_forcing, vm_forcing, &
+    !$acc              wprtp_forcing, wpthlp_forcing, rtp2_forcing, thlp2_forcing, &
+    !$acc              rtpthlp_forcing, wm_zm, wm_zt, rho_zm, rho_zt, rho_ds_zm, rho_ds_zt, &
+    !$acc              invrs_rho_ds_zm, invrs_rho_ds_zt, thv_ds_zm, thv_ds_zt, rfrzm, &
+    !$acc              radf, wpthlp_sfc, clubb_params, sfc_elevation, wprtp_sfc, upwp_sfc, vpwp_sfc, &
+    !$acc              rtm_ref, thlm_ref, um_ref, vm_ref, ug, vg, p_in_Pa, exner, um_pert_inout, &
+    !$acc              inv_exner_clubb_surf, thlprcp_out, zi_g, zt_g, qrl_clubb, &
+    !$acc              pdf_zm_w_1, pdf_zm_w_2, pdf_zm_varnce_w_1, pdf_zm_varnce_w_2, pdf_zm_mixt_frac, &
+    !$acc              pdf_params_chnk(lchnk)%w_1, pdf_params_chnk(lchnk)%w_2, &
+    !$acc              pdf_params_chnk(lchnk)%varnce_w_1, pdf_params_chnk(lchnk)%varnce_w_2, &
+    !$acc              pdf_params_chnk(lchnk)%rt_1, pdf_params_chnk(lchnk)%rt_2, &
+    !$acc              pdf_params_chnk(lchnk)%varnce_rt_1, pdf_params_chnk(lchnk)%varnce_rt_2,  &
+    !$acc              pdf_params_chnk(lchnk)%thl_1, pdf_params_chnk(lchnk)%thl_2, &
+    !$acc              pdf_params_chnk(lchnk)%varnce_thl_1, pdf_params_chnk(lchnk)%varnce_thl_2, &
+    !$acc              pdf_params_chnk(lchnk)%corr_w_rt_1, pdf_params_chnk(lchnk)%corr_w_rt_2,  &
+    !$acc              pdf_params_chnk(lchnk)%corr_w_thl_1, pdf_params_chnk(lchnk)%corr_w_thl_2, &
+    !$acc              pdf_params_chnk(lchnk)%corr_rt_thl_1, pdf_params_chnk(lchnk)%corr_rt_thl_2,&
+    !$acc              pdf_params_chnk(lchnk)%alpha_thl, pdf_params_chnk(lchnk)%alpha_rt, &
+    !$acc              pdf_params_chnk(lchnk)%crt_1, pdf_params_chnk(lchnk)%crt_2, pdf_params_chnk(lchnk)%cthl_1, &
+    !$acc              pdf_params_chnk(lchnk)%cthl_2, pdf_params_chnk(lchnk)%chi_1, &
+    !$acc              pdf_params_chnk(lchnk)%chi_2, pdf_params_chnk(lchnk)%stdev_chi_1, &
+    !$acc              pdf_params_chnk(lchnk)%stdev_chi_2, pdf_params_chnk(lchnk)%stdev_eta_1, &
+    !$acc              pdf_params_chnk(lchnk)%stdev_eta_2, pdf_params_chnk(lchnk)%covar_chi_eta_1, &
+    !$acc              pdf_params_chnk(lchnk)%covar_chi_eta_2, pdf_params_chnk(lchnk)%corr_w_chi_1, &
+    !$acc              pdf_params_chnk(lchnk)%corr_w_chi_2, pdf_params_chnk(lchnk)%corr_w_eta_1, &
+    !$acc              pdf_params_chnk(lchnk)%corr_w_eta_2, pdf_params_chnk(lchnk)%corr_chi_eta_1, &
+    !$acc              pdf_params_chnk(lchnk)%corr_chi_eta_2, pdf_params_chnk(lchnk)%rsatl_1, &
+    !$acc              pdf_params_chnk(lchnk)%rsatl_2, pdf_params_chnk(lchnk)%rc_1, pdf_params_chnk(lchnk)%rc_2, &
+    !$acc              pdf_params_chnk(lchnk)%cloud_frac_1, pdf_params_chnk(lchnk)%cloud_frac_2,  &
+    !$acc              pdf_params_chnk(lchnk)%mixt_frac, pdf_params_chnk(lchnk)%ice_supersat_frac_1, &
+    !$acc              pdf_params_chnk(lchnk)%ice_supersat_frac_2, &
+    !$acc              pdf_params_zm_chnk(lchnk)%w_1, pdf_params_zm_chnk(lchnk)%w_2, &
+    !$acc              pdf_params_zm_chnk(lchnk)%varnce_w_1, pdf_params_zm_chnk(lchnk)%varnce_w_2, &
+    !$acc              pdf_params_zm_chnk(lchnk)%rt_1, pdf_params_zm_chnk(lchnk)%rt_2, &
+    !$acc              pdf_params_zm_chnk(lchnk)%varnce_rt_1, pdf_params_zm_chnk(lchnk)%varnce_rt_2,  &
+    !$acc              pdf_params_zm_chnk(lchnk)%thl_1, pdf_params_zm_chnk(lchnk)%thl_2, &
+    !$acc              pdf_params_zm_chnk(lchnk)%varnce_thl_1, pdf_params_zm_chnk(lchnk)%varnce_thl_2, &
+    !$acc              pdf_params_zm_chnk(lchnk)%corr_w_rt_1, pdf_params_zm_chnk(lchnk)%corr_w_rt_2,  &
+    !$acc              pdf_params_zm_chnk(lchnk)%corr_w_thl_1, pdf_params_zm_chnk(lchnk)%corr_w_thl_2, &
+    !$acc              pdf_params_zm_chnk(lchnk)%corr_rt_thl_1, pdf_params_zm_chnk(lchnk)%corr_rt_thl_2,&
+    !$acc              pdf_params_zm_chnk(lchnk)%alpha_thl, pdf_params_zm_chnk(lchnk)%alpha_rt, &
+    !$acc              pdf_params_zm_chnk(lchnk)%crt_1, pdf_params_zm_chnk(lchnk)%crt_2, pdf_params_zm_chnk(lchnk)%cthl_1, &
+    !$acc              pdf_params_zm_chnk(lchnk)%cthl_2, pdf_params_zm_chnk(lchnk)%chi_1, &
+    !$acc              pdf_params_zm_chnk(lchnk)%chi_2, pdf_params_zm_chnk(lchnk)%stdev_chi_1, &
+    !$acc              pdf_params_zm_chnk(lchnk)%stdev_chi_2, pdf_params_zm_chnk(lchnk)%stdev_eta_1, &
+    !$acc              pdf_params_zm_chnk(lchnk)%stdev_eta_2, pdf_params_zm_chnk(lchnk)%covar_chi_eta_1, &
+    !$acc              pdf_params_zm_chnk(lchnk)%covar_chi_eta_2, pdf_params_zm_chnk(lchnk)%corr_w_chi_1, &
+    !$acc              pdf_params_zm_chnk(lchnk)%corr_w_chi_2, pdf_params_zm_chnk(lchnk)%corr_w_eta_1, &
+    !$acc              pdf_params_zm_chnk(lchnk)%corr_w_eta_2, pdf_params_zm_chnk(lchnk)%corr_chi_eta_1, &
+    !$acc              pdf_params_zm_chnk(lchnk)%corr_chi_eta_2, pdf_params_zm_chnk(lchnk)%rsatl_1, &
+    !$acc              pdf_params_zm_chnk(lchnk)%rsatl_2, pdf_params_zm_chnk(lchnk)%rc_1, pdf_params_zm_chnk(lchnk)%rc_2, &
+    !$acc              pdf_params_zm_chnk(lchnk)%cloud_frac_1, pdf_params_zm_chnk(lchnk)%cloud_frac_2,  &
+    !$acc              pdf_params_zm_chnk(lchnk)%mixt_frac, pdf_params_zm_chnk(lchnk)%ice_supersat_frac_1, &
+    !$acc              pdf_params_zm_chnk(lchnk)%ice_supersat_frac_2 )
+
+    !$acc data if( sclr_dim > 0 ) &
+    !$acc      create( wpsclrp_sfc, sclrm_forcing, sclrm, wpsclrp, sclrp2, sclrp3, sclrprtp, sclrpthlp, sclrpthvp_inout) &
+    !$acc      copyin( sclr_tol )
+
+    !$acc data if( edsclr_dim > 0 ) &
+    !$acc      create( wpedsclrp_sfc, edsclrm_forcing, edsclr_in ) &
+    !$acc     copyout( edsclr_out )
+
+    !$acc data if( hydromet_dim > 0 ) &
+    !$acc      create( hydromet, wphydrometp, wp2hmp, rtphmp_zt, thlphmp_zt ) &
+    !$acc      copyin( hm_metadata, hm_metadata%l_mix_rat_hm )
+    call t_stopf('clubb_tend_cam:acc_copyin')
+    call t_startf('clubb_tend_cam:ACCR')
+
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k = 1, pverp
+      do i = 1, pcols
+        rtp2_zt_out(i,k) = 0._r8
+        thl2_zt_out(i,k) = 0._r8
+        wp2_zt_out(i,k)  = 0._r8
+        pdfp_rtp2(i,k)   = 0._r8
+        wm_zt_out(i,k)   = 0._r8
+        temp2dp(i,k)     = 0._r8
       end do
     end do
 
-    ! Define the CLUBB thermodynamic grid (in units of m)
-    do k=1, nlev
-      do i=1, ncol
-        zt_g(i,k+1) = state1%zm(i,pver-k+1)-state1%zi(i,pver+1)
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k = 1, pver
+      do i = 1, pcols
+        temp2d(i,k) = 0._r8
       end do
     end do
 
-    do k=1, pver
-      do i=1, ncol
-        dz_g(i,k) = state1%zi(i,k)-state1%zi(i,k+1)  ! compute thickness
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k = 1, nzm_clubb
+      do i = 1, ncol
+
+        !  Define forcings from CAM to CLUBB as zero for momentum and thermo,
+        !  forcings already applied through CAM
+        thlm_forcing(i,k) = 0._r8
+        rtm_forcing(i,k) = 0._r8
+        um_forcing(i,k) = 0._r8
+        vm_forcing(i,k) = 0._r8
+
+        rtm_ref(i,k) = 0.0_r8
+        thlm_ref(i,k) = 0.0_r8
+        um_ref(i,k) = 0.0_r8
+        vm_ref(i,k) = 0.0_r8
+        ug(i,k) = 0.0_r8
+        vg(i,k) = 0.0_r8
       end do
     end do
 
-    !  Thermodynamic ghost point is below surface
-    do i=1, ncol
-      zt_g(i,1) = -1._r8*zt_g(i,2)
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k = 1, nzm_clubb
+      do i = 1, ncol
+        ! Perturbed winds are not used in CAM
+        um_pert_inout(i,k)   = 0.0_r8
+        vm_pert_inout(i,k)   = 0.0_r8
+        upwp_pert_inout(i,k) = 0.0_r8
+        vpwp_pert_inout(i,k) = 0.0_r8
+
+        !  Initialize these to prevent crashing behavior
+        wprcp_out(i,k)        = 0._r8
+        rcm_in_layer_out(i,k) = 0._r8
+        cloud_cover_out(i,k)  = 0._r8
+        khzm_out(i,k)         = 0._r8
+        khzt_out(i,k)         = 0._r8
+      end do
     end do
 
-    do i=1, ncol
-      !  Set the elevation of the surface
-      sfc_elevation(i) = state1%zi(i,pver+1)
+    !$acc parallel loop gang vector default(present)
+    do i = 1, ncol
+      ! Perturbed winds are not used in CAM
+      upwp_sfc_pert(i) = 0.0_r8
+      vpwp_sfc_pert(i) = 0.0_r8
+
+      ! Determine Coriolis force at given latitude. This is never used
+      ! when CLUBB is implemented in a host model, therefore just set
+      ! to zero.
+      fcor(i) = 0._r8
     end do
 
-    !  Compute thermodynamic stuff needed for CLUBB on thermo levels.
-    !  Inputs for the momentum levels are set below setup_clubb core
-    do k=1,nlev
-      do i=1, ncol
-        ! base state (dry) variables
-        rho_ds_zt(i,k+1)       = rga*(state1%pdeldry(i,pver-k+1)/dz_g(i,pver-k+1))
-        invrs_rho_ds_zt(i,k+1) = 1._r8/(rho_ds_zt(i,k+1))
+    if ( sclr_dim > 0 ) then
+      !  higher order scalar stuff, put to zero
+      !$acc parallel loop gang vector collapse(3) default(present)
+      do sclr = 1, sclr_dim
+        do k = 1, nzm_clubb
+          do i=1, ncol
+            sclrm(i,k,sclr)           = 0._r8
+            wpsclrp(i,k,sclr)         = 0._r8
+            sclrp2(i,k,sclr)          = 0._r8
+            sclrp3(i,k,sclr)          = 0._r8
+            sclrprtp(i,k,sclr)        = 0._r8
+            sclrpthlp(i,k,sclr)       = 0._r8
+            sclrpthvp_inout(i,k,sclr) = 0._r8
+            sclrm_forcing(i,k,sclr)   = 0._r8
+          end do
+        end do
+      end do
 
-        ! full state (moist) variables
-        p_in_Pa(i,k+1)         = state1%pmid(i,pver-k+1)
-        exner(i,k+1)           = 1._r8/inv_exner_clubb(i,pver-k+1)
-        thv(i,k+1)             = state1%t(i,pver-k+1)*inv_exner_clubb(i,pver-k+1)*(1._r8+zvir*state1%q(i,pver-k+1,ixq) &
-                                   -state1%q(i,pver-k+1,ixcldliq))
-        rho_zt(i,k+1)          = rga*state1%pdel(i,pver-k+1)/dz_g(i,pver-k+1)
+      !$acc parallel loop gang vector collapse(2) default(present)
+      do sclr = 1, sclr_dim
+        do i=1, ncol
+          wpsclrp_sfc(i,sclr) = 0._r8
+        end do
+      end do
+    end if
 
-        ! exception - setting this to moist thv
-        thv_ds_zt(i,k+1)       = thv(i,k+1)
+    if ( hydromet_dim > 0 ) then
+      !$acc parallel loop gang vector collapse(3) default(present)
+      do ixind=1, hydromet_dim
+        do k=1, nzm_clubb
+          do i=1, ncol
+            hydromet(i,k,ixind)    = 0._r8
+            wphydrometp(i,k,ixind) = 0._r8
+            wp2hmp(i,k,ixind)      = 0._r8
+            rtphmp_zt(i,k,ixind)   = 0._r8
+            thlphmp_zt(i,k,ixind)  = 0._r8
+          end do
+        end do
+      end do
+    end if
 
-        rfrzm(i,k+1)           = state1%q(i,pver-k+1,ixcldice)
-        radf(i,k+1)            = radf_clubb(i,pver-k+1)
-        qrl_clubb(i,k+1)       = qrl(i,pver-k+1)/(cpairv(i,k,lchnk)*state1%pdeldry(i,pver-k+1))
+    if ( edsclr_dim > 0 ) then
+      !$acc parallel loop gang vector collapse(3) default(present)
+      do edsclr = 1, edsclr_dim
+        do i = 1, ncol
+          do k = 1, nzm_clubb
+            edsclrm_forcing(i,k,edsclr) = 0._r8
+            edsclr_in(i,k,edsclr)       = 0._r8
+          end do
+        end do
       end do
-    end do
 
-    !  Compute mean w wind on thermo grid, convert from omega to w
-    do k=1,nlev
-      do i=1,ncol
-        wm_zt(i,k+1) = -1._r8*(state1%omega(i,pver-k+1)-state1%omega(i,pver))/(rho_zt(i,k+1)*gravit)
+      !  Define surface sources for transported variables for diffusion, will
+      !  be zero as these tendencies are done in vertical_diffusion
+      !$acc parallel loop gang vector collapse(2) default(present)
+      do edsclr = 1, edsclr_dim
+        do i = 1, ncol
+          wpedsclrp_sfc(i,edsclr) = 0._r8
+        end do
       end do
-    end do
+    end if
 
-    !  Below computes the same stuff for the ghost point.  May or may
-    !  not be needed, just to be safe to avoid NaN's
-    do i=1, ncol
-      thv_ds_zt(i,1)       = thv_ds_zt(i,2)
-      rho_ds_zt(i,1)       = rho_ds_zt(i,2)
-      invrs_rho_ds_zt(i,1) = invrs_rho_ds_zt(i,2)
-      p_in_Pa(i,1)         = p_in_Pa(i,2)
-      exner(i,1)           = exner(i,2)
-      thv(i,1)             = thv(i,2)
-      rho_zt(i,1)          = rho_zt(i,2)
-      rfrzm(i,1)           = rfrzm(i,2)
-      radf(i,1)            = radf(i,2)
-      qrl_clubb(i,1)       = qrl_clubb(i,2)
-      wm_zt(i,1)           = wm_zt(i,2)
-    end do
+    ! need to initialize macmic coupling to zero
+    if ( macmic_it == 1 ) then
+      !$acc parallel loop gang vector collapse(2) default(present)
+      do i = 1, ncol
+        do k = 1, pverp
+          ttend_clubb_mc(i,k) = 0._r8
+          upwp_clubb_gw_mc(i,k) = 0._r8
+          vpwp_clubb_gw_mc(i,k) = 0._r8
+          thlp2_clubb_gw_mc(i,k) = 0._r8
+          wpthlp_clubb_gw_mc(i,k) = 0._r8
+        end do
+      end do
+    end if
 
+    ! Initialize EDMF outputs
+    if (do_clubb_mf) then
+      do k = 1, pverp
+        do i = 1, pcols
+          mf_dry_a_output(i,k)     = 0._r8
+          mf_moist_a_output(i,k)   = 0._r8
+          mf_dry_w_output(i,k)     = 0._r8
+          mf_moist_w_output(i,k)   = 0._r8
+          mf_dry_qt_output(i,k)    = 0._r8
+          mf_moist_qt_output(i,k)  = 0._r8
+          mf_dry_thl_output(i,k)   = 0._r8
+          mf_moist_thl_output(i,k) = 0._r8
+          mf_dry_u_output(i,k)     = 0._r8
+          mf_moist_u_output(i,k)   = 0._r8
+          mf_dry_v_output(i,k)     = 0._r8
+          mf_moist_v_output(i,k)   = 0._r8
+          mf_moist_qc_output(i,k)  = 0._r8
+          s_ae_output(i,k)         = 0._r8
+          s_aw_output(i,k)         = 0._r8
+          s_awthl_output(i,k)      = 0._r8
+          s_awqt_output(i,k)       = 0._r8
+          s_awql_output(i,k)       = 0._r8
+          s_awqi_output(i,k)       = 0._r8
+          s_awu_output(i,k)        = 0._r8
+          s_awv_output(i,k)        = 0._r8
+          mf_thlflx_output(i,k)    = 0._r8
+          mf_qtflx_output(i,k)     = 0._r8
+        end do
+      end do
+    end if
+
+    if (clubb_do_icesuper) then
+
+      ! -------------------------------------- !
+      ! Ice Saturation Adjustment Computation  !
+      ! -------------------------------------- !
+
+      lq2(:)  = .FALSE.
+      lq2(1)  = .TRUE.
+      lq2(ixcldice) = .TRUE.
+      lq2(ixnumice) = .TRUE.
+
+      latsub = latvap + latice
+
+      call physics_ptend_init(ptend_loc, state%psetcols, 'iceadj', ls=.true., lq=lq2 )
+
+      do i = 1, ncol
+        do k = 1, pver
+          stend(i,k)    = 0._r8
+          qvtend(i,k)   = 0._r8
+          qitend(i,k)   = 0._r8
+          initend(i,k)  = 0._r8
+        end do
+      end do
+
+      call t_startf('clubb_tend_cam:ice_macro_tend')
+      call ice_macro_tend(naai(1:ncol,top_lev:pver), state1%t(1:ncol,top_lev:pver),                       &
+                          state1%pmid(1:ncol,top_lev:pver), state1%q(1:ncol,top_lev:pver,1),              &
+                          state1%q(1:ncol,top_lev:pver,ixcldice), state1%q(1:ncol,top_lev:pver,ixnumice), &
+                          latsub, hdtime, stend(1:ncol,top_lev:pver), qvtend(1:ncol,top_lev:pver),        &
+                          qitend(1:ncol,top_lev:pver), initend(1:ncol,top_lev:pver), ncol*(pver-top_lev+1))
+      call t_stopf('clubb_tend_cam:ice_macro_tend')
+
+      ! update local copy of state with the tendencies
+      do i = 1, ncol
+        do k = top_lev, pver
+          ptend_loc%q(i,k,1)         = qvtend(i,k)
+          ptend_loc%q(i,k,ixcldice)  = qitend(i,k)
+          ptend_loc%q(i,k,ixnumice)  = initend(i,k)
+          ptend_loc%s(i,k)           = stend(i,k)
+        end do
+      end do
+
+      ! Add the ice tendency to the output tendency
+      call physics_ptend_sum(ptend_loc, ptend_all, ncol)
+
+      ! ptend_loc is reset to zero by this call
+      call physics_update(state1, ptend_loc, hdtime)
+
+      ! Write output for tendencies:
+      do i = 1, ncol
+        do k = 1, pver
+          temp2d(i,k) =  stend(i,k) / cpairv(i,k,lchnk)
+        end do
+      end do
+
+      call outfld( 'TTENDICE',  temp2d, pcols, lchnk )
+      call outfld( 'QVTENDICE', qvtend, pcols, lchnk )
+      call outfld( 'QITENDICE', qitend, pcols, lchnk )
+      call outfld( 'NITENDICE', initend, pcols, lchnk )
+
+    endif
+
+    if (clubb_do_adv) then
+
+      if (macmic_it  ==  1) then
+
+        !  Note that some of the moments below can be positive or negative.
+        !    Remove a constant that was added to prevent dynamics from clipping
+        !    them to prevent dynamics from making them positive.
+        do k = 1, pver
+          do i = 1, ncol
+            thlp2(i,k)   = state1%q(i,k,ixthlp2)
+            rtp2(i,k)    = state1%q(i,k,ixrtp2)
+            rtpthlp(i,k) = state1%q(i,k,ixrtpthlp) - ( rtpthlp_const * apply_const )
+            wpthlp(i,k)  = state1%q(i,k,ixwpthlp) - ( wpthlp_const * apply_const )
+            wprtp(i,k)   = state1%q(i,k,ixwprtp) - ( wprtp_const * apply_const )
+            wp2(i,k)     = state1%q(i,k,ixwp2)
+            wp3(i,k)     = state1%q(i,k,ixwp3) - ( wp3_const * apply_const )
+            up2(i,k)     = state1%q(i,k,ixup2)
+            vp2(i,k)     = state1%q(i,k,ixvp2)
+          enddo
+        enddo
+
+      endif
+
+      ! If not last step of macmic loop then set apply_const back to
+      !   zero to prevent output from being corrupted.
+      if (macmic_it  ==  cld_macmic_num_steps) then
+        apply_const = 1._r8
+      else
+        apply_const = 0._r8
+      endif
+
+      do i = 1, ncol
+        thlp2(i,pverp)   = thlp2(i,pver)
+        rtp2(i,pverp)    = rtp2(i,pver)
+        rtpthlp(i,pverp) = rtpthlp(i,pver)
+        wpthlp(i,pverp)  = wpthlp(i,pver)
+        wprtp(i,pverp)   = wprtp(i,pver)
+        wp2(i,pverp)     = wp2(i,pver)
+        wp3(i,pverp)     = wp3(i,pver)
+        up2(i,pverp)     = up2(i,pver)
+        vp2(i,pverp)     = vp2(i,pver)
+      end do
+
+    endif
+
+    ! Define the CLUBB momentum grid (in height, units of m)
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k=1, nzm_clubb
+      do i=1, ncol
+        zi_g(i,k) = state1%zi(i,pverp-k+1) - state1%zi(i,pver+1)
+      end do
+    end do
+
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k=1, pver
+      do i=1, ncol
+
+        !  Compute inverse exner function consistent with CLUBB's definition, which uses a constant
+        !  surface pressure.  CAM's exner (in state) does not.  Therefore, for consistent
+        !  treatment with CLUBB code, anytime exner is needed to treat CLUBB variables
+        !  (such as thlm), use "inv_exner_clubb" otherwise use the exner in state
+        inv_exner_clubb(i,k) = 1._r8 / ( ( state1%pmid(i,k) / p0_clubb )**( rairv(i,k,lchnk) / cpairv(i,k,lchnk) ) )
+
+        !  Compute virtual potential temperature, which is needed for CLUBB
+        thv(i,k) = state1%t(i,k) * inv_exner_clubb(i,k) &
+                   * ( 1._r8 + zvir * state1%q(i,k,ixq) - state1%q(i,k,ixcldliq) )
+
+        dz_g(i,k) = state1%zi(i,k) - state1%zi(i,k+1)  ! compute thickness
+
+        !  At each CLUBB call, initialize mean momentum  and thermo CLUBB state
+        !  from the CAM state
+        rtm(i,k)  = state1%q(i,k,ixq) + state1%q(i,k,ixcldliq)
+        rvm(i,k)  = state1%q(i,k,ixq)
+        um(i,k)   = state1%u(i,k)
+        vm(i,k)   = state1%v(i,k)
+        thlm(i,k) = ( state1%t(i,k) - ( latvap / cpairv(i,k,lchnk) ) * state1%q(i,k,ixcldliq) ) &
+                    * inv_exner_clubb(i,k)
+
+      enddo
+    enddo
+
+    !$acc parallel loop gang vector default(present)
+    do i = 1, ncol
+      rtm(i,pverp)  = rtm(i,pver)
+      um(i,pverp)   = state1%u(i,pver)
+      vm(i,pverp)   = state1%v(i,pver)
+      thlm(i,pverp) = thlm(i,pver)
+
+      !  Compute exner at the surface for converting the sensible heat fluxes
+      !  to a flux of potential temperature for use as clubb's boundary conditions
+      inv_exner_clubb_surf(i) = inv_exner_clubb(i,pver)
+    end do
+
+    !  Compute thermodynamic stuff needed for CLUBB on thermo levels.
+    !  Inputs for the momentum levels are set below setup_clubb core
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k = 1, nzt_clubb
+      do i = 1, ncol
+
+        ! Define the CLUBB thermodynamic grid (in units of m)
+        zt_g(i,k+1) = state1%zm(i,pver-k+1) - state1%zi(i,pver+1)
+
+        ! base state (dry) variables
+        rho_ds_zt(i,k+1)       = rga * ( state1%pdeldry(i,pver-k+1) / dz_g(i,pver-k+1) )
+        invrs_rho_ds_zt(i,k+1) = 1._r8 / rho_ds_zt(i,k+1)
+
+        ! full state (moist) variables
+        p_in_Pa(i,k+1)         = state1%pmid(i,pver-k+1)
+        exner(i,k+1)           = 1._r8 / inv_exner_clubb(i,pver-k+1)
+        thv(i,k+1)             = state1%t(i,pver-k+1) * inv_exner_clubb(i,pver-k+1) &
+                                 * ( 1._r8 + zvir * state1%q(i,pver-k+1,ixq) - state1%q(i,pver-k+1,ixcldliq) )
+        rho_zt(i,k+1)          = rga * state1%pdel(i,pver-k+1) / dz_g(i,pver-k+1)
+
+        ! exception - setting this to moist thv
+        thv_ds_zt(i,k+1)       = thv(i,k+1)
+
+        rfrzm(i,k+1)           = state1%q(i,pver-k+1,ixcldice)
+        radf(i,k+1)            = radf_clubb(i,pver-k+1)
+        qrl_clubb(i,k+1)       = qrl(i,pver-k+1) / ( cpairv(i,k,lchnk) * state1%pdeldry(i,pver-k+1) )
+
+        !  Compute mean w wind on thermo grid, convert from omega to w
+        wm_zt(i,k+1) = -1._r8 * ( state1%omega(i,pver-k+1) - state1%omega(i,pver) ) &
+                                / ( rho_zt(i,k+1) * gravit )
+      end do
+    end do
+
+    !  Below computes the same stuff for the ghost point.  May or may
+    !  not be needed, just to be safe to avoid NaN's
+    !$acc parallel loop gang vector default(present)
+    do i = 1, ncol
+      zt_g(i,1)            = -1._r8 * zt_g(i,2)
+      rho_ds_zt(i,1)       = rho_ds_zt(i,2)
+      invrs_rho_ds_zt(i,1) = invrs_rho_ds_zt(i,2)
+      p_in_Pa(i,1)         = p_in_Pa(i,2)
+      exner(i,1)           = exner(i,2)
+      thv(i,1)             = thv(i,2)
+      rho_zt(i,1)          = rho_zt(i,2)
+      thv_ds_zt(i,1)       = thv_ds_zt(i,2)
+      rfrzm(i,1)           = rfrzm(i,2)
+      radf(i,1)            = radf(i,2)
+      qrl_clubb(i,1)       = qrl_clubb(i,2)
+      wm_zt(i,1)           = wm_zt(i,2)
+
+      !  Set the elevation of the surface
+      sfc_elevation(i) = state1%zi(i,pverp)
+    end do
+
+
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do i = 1, ncol
+      do n = 1, nparams
+        clubb_params(i,n) = clubb_params_single_col(n)
+      end do
+    end do
 
     ! ------------------------------------------------- !
     ! Begin case specific code for SCAM cases.          !
@@ -3126,20 +3398,6 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     end if
 
-    !  Define surface sources for transported variables for diffusion, will
-    !  be zero as these tendencies are done in vertical_diffusion
-    do ixind=1,edsclr_dim
-      do i=1,ncol
-        wpedsclrp_sfc(i,ixind) = 0._r8
-      end do
-    end do
-
-    !  Set stats output and increment equal to CLUBB and host dt
-    stats_metadata%stats_tsamp = dtime
-    stats_metadata%stats_tout  = hdtime
-
-    stats_nsamp = nint(stats_metadata%stats_tsamp/dtime)
-    stats_nout = nint(stats_metadata%stats_tout/dtime)
 
     !  Heights need to be set at each timestep.  Therefore, recall
     !  setup_grid and setup_parameters for this.
@@ -3148,62 +3406,74 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     !  Important note:  do not make any calls that use CLUBB grid-height
     !                   operators (such as zt2zm_api, etc.) until AFTER the
     !                   call to setup_grid_heights_api.
-    call setup_grid_api( nlev+1, ncol, sfc_elevation, l_implemented,      & ! intent(in)
-                         grid_type, zi_g(:,2), zi_g(:,1), zi_g(:,nlev+1), & ! intent(in)
+
+    call t_stopf('clubb_tend_cam:ACCR')
+    call t_startf('clubb_tend_cam:NAR')
+    !$acc update host( zi_g, zt_g, clubb_params, sfc_elevation )
+
+    call setup_grid_api( nzm_clubb, ncol, sfc_elevation, l_implemented,      & ! intent(in)
+                         grid_type, zi_g(:,2), zi_g(:,1), zi_g(:,nzm_clubb), & ! intent(in)
                          zi_g, zt_g,                                      & ! intent(in)
                          gr )                                               ! intent(out)
 
-    do i = 1, ncol
-      clubb_params(i,:) = clubb_params_single_col(:)
-    end do
 
     call setup_parameters_api( zi_g(:,2), clubb_params, gr, ncol, grid_type,  & ! intent(in)
                                clubb_config_flags%l_prescribed_avg_deltaz,    & ! intent(in)
                                lmin, nu_vert_res_dep, err_code )                ! intent(out)
+
     if ( err_code == clubb_fatal_error ) then
-       call endrun(subr//':  Fatal error in CLUBB setup_parameters')
+      call endrun(subr//':  Fatal error in CLUBB setup_parameters')
     end if
 
-
-    !  Define forcings from CAM to CLUBB as zero for momentum and thermo,
-    !  forcings already applied through CAM
-    thlm_forcing(:,:) = 0._r8
-    rtm_forcing(:,:) = 0._r8
-    um_forcing(:,:) = 0._r8
-    vm_forcing(:,:) = 0._r8
-
-
-    rtm_ref(:,:) = 0.0_r8
-    thlm_ref(:,:) = 0.0_r8
-    um_ref(:,:) = 0.0_r8
-    vm_ref(:,:) = 0.0_r8
-    ug(:,:) = 0.0_r8
-    vg(:,:) = 0.0_r8
+    call t_stopf('clubb_tend_cam:NAR')
+    call t_startf('clubb_tend_cam:acc_copyin')
+    !$acc data copyin( gr, gr%zm, gr%zt, gr%dzm, gr%dzt, gr%invrs_dzt, gr%invrs_dzm, &
+    !$acc              gr%weights_zt2zm, gr%weights_zm2zt, &
+    !$acc              nu_vert_res_dep, nu_vert_res_dep%nu2, nu_vert_res_dep%nu9, &
+    !$acc              nu_vert_res_dep%nu1, nu_vert_res_dep%nu8, nu_vert_res_dep%nu10, &
+    !$acc              nu_vert_res_dep%nu6)
+    call t_stopf('clubb_tend_cam:acc_copyin')
+    call t_startf('clubb_tend_cam:ACCR')
+
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k = 1, nzm_clubb
+      do i = 1, ncol
+        rtp2_forcing(i,k)    = rtp2_mc_zt(i,k)
+        thlp2_forcing(i,k)   = thlp2_mc_zt(i,k)
+        wprtp_forcing(i,k)   = wprtp_mc_zt(i,k)
+        wpthlp_forcing(i,k)  = wpthlp_mc_zt(i,k)
+        rtpthlp_forcing(i,k) = rtpthlp_mc_zt(i,k)
+      end do
+    end do
 
     ! Add forcings for SILHS covariance contributions
-    rtp2_forcing    = zt2zm_api( pverp+1-top_lev, ncol, gr, rtp2_mc_zt(1:ncol,:) )
-    thlp2_forcing   = zt2zm_api( pverp+1-top_lev, ncol, gr, thlp2_mc_zt(1:ncol,:) )
-    wprtp_forcing   = zt2zm_api( pverp+1-top_lev, ncol, gr, wprtp_mc_zt(1:ncol,:) )
-    wpthlp_forcing  = zt2zm_api( pverp+1-top_lev, ncol, gr, wpthlp_mc_zt(1:ncol,:) )
-    rtpthlp_forcing = zt2zm_api( pverp+1-top_lev, ncol, gr, rtpthlp_mc_zt(1:ncol,:) )
+    rtp2_forcing    = zt2zm_api( nzm_clubb, ncol, gr, rtp2_forcing )
+    thlp2_forcing   = zt2zm_api( nzm_clubb, ncol, gr, thlp2_forcing )
+    wprtp_forcing   = zt2zm_api( nzm_clubb, ncol, gr, wprtp_forcing )
+    wpthlp_forcing  = zt2zm_api( nzm_clubb, ncol, gr, wpthlp_forcing )
+    rtpthlp_forcing = zt2zm_api( nzm_clubb, ncol, gr, rtpthlp_forcing )
 
     ! Zero out SILHS covariance contribution terms
-    rtp2_mc_zt(:,:) = 0.0_r8
-    thlp2_mc_zt(:,:) = 0.0_r8
-    wprtp_mc_zt(:,:) = 0.0_r8
-    wpthlp_mc_zt(:,:) = 0.0_r8
-    rtpthlp_mc_zt(:,:) = 0.0_r8
-
-
-    !  Compute some inputs from the thermodynamic grid
-    !  to the momentum grid
-    rho_ds_zm       = zt2zm_api( pverp+1-top_lev, ncol, gr, rho_ds_zt )
-    rho_zm          = zt2zm_api( pverp+1-top_lev, ncol, gr, rho_zt )
-    invrs_rho_ds_zm = zt2zm_api( pverp+1-top_lev, ncol, gr, invrs_rho_ds_zt )
-    thv_ds_zm       = zt2zm_api( pverp+1-top_lev, ncol, gr, thv_ds_zt )
-    wm_zm           = zt2zm_api( pverp+1-top_lev, ncol, gr, wm_zt )
-
-    !  Surface fluxes provided by host model
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k = 1, pverp
+      do i = 1, pcols
+        rtp2_mc_zt(i,k)     = 0.0_r8
+        thlp2_mc_zt(i,k)    = 0.0_r8
+        wprtp_mc_zt(i,k)    = 0.0_r8
+        wpthlp_mc_zt(i,k)   = 0.0_r8
+        rtpthlp_mc_zt(i,k)  = 0.0_r8
+      end do
+    end do
+
+    ! Compute some inputs from the thermodynamic grid to the momentum grid
+    rho_ds_zm       = zt2zm_api( nzm_clubb, ncol, gr, rho_ds_zt )
+    rho_zm          = zt2zm_api( nzm_clubb, ncol, gr, rho_zt )
+    invrs_rho_ds_zm = zt2zm_api( nzm_clubb, ncol, gr, invrs_rho_ds_zt )
+    thv_ds_zm       = zt2zm_api( nzm_clubb, ncol, gr, thv_ds_zt )
+    wm_zm           = zt2zm_api( nzm_clubb, ncol, gr, wm_zt )
+
+    ! Surface fluxes provided by host model
+    !$acc parallel loop gang vector default(present)
     do i=1,ncol
       wpthlp_sfc(i) = cam_in%shf(i)/(cpairv(i,pver,lchnk)*rho_ds_zm(i,1)) ! Sensible heat flux
       wpthlp_sfc(i) = wpthlp_sfc(i)*inv_exner_clubb_surf(i)   ! Potential temperature flux
@@ -3213,31 +3483,44 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     ! Implementation after Thomas Toniazzo (NorESM) and Colin Zarzycki (PSU)
     !  Other Surface fluxes provided by host model
     if( (cld_macmic_num_steps > 1) .and. clubb_l_intr_sfc_flux_smooth ) then
-       ! Adjust surface stresses using winds from the prior macmic iteration
-       do i=1,ncol
-          ubar = sqrt(state1%u(i,pver)**2+state1%v(i,pver)**2)
-          if (ubar <  0.25_r8) ubar = 0.25_r8
 
-          call calc_ustar( state1%t(i,pver), state1%pmid(i,pver), cam_in%wsx(i), cam_in%wsy(i), &
-               rrho(i), ustar )
+      call t_stopf('clubb_tend_cam:ACCR')
+      call t_startf('clubb_tend_cam:NAR')
+      !$acc update host( state1%u, state1%v, state1%t, state1%pmid, cam_in%wsx, cam_in%wsy, rrho )
+
+      ! Adjust surface stresses using winds from the prior macmic iteration
+      do i=1,ncol
+        ubar = sqrt(state1%u(i,pver)**2+state1%v(i,pver)**2)
+        if (ubar <  0.25_r8) ubar = 0.25_r8
+
+        call calc_ustar( state1%t(i,pver), state1%pmid(i,pver), cam_in%wsx(i), cam_in%wsy(i), &
+              rrho(i), ustar )
+
+        upwp_sfc(i) = -state1%u(i,pver)*ustar**2/ubar
+        vpwp_sfc(i) = -state1%v(i,pver)*ustar**2/ubar
+      end do
+
+      !$acc update device( upwp_sfc, vpwp_sfc )
+      call t_stopf('clubb_tend_cam:NAR')
+      call t_startf('clubb_tend_cam:ACCR')
 
-          upwp_sfc(i) = -state1%u(i,pver)*ustar**2/ubar
-          vpwp_sfc(i) = -state1%v(i,pver)*ustar**2/ubar
-       end do
     else
-       do i=1,ncol
-          upwp_sfc(i)   = cam_in%wsx(i)/rho_ds_zm(i,1)               ! Surface meridional momentum flux
-          vpwp_sfc(i)   = cam_in%wsy(i)/rho_ds_zm(i,1)               ! Surface zonal momentum flux
-       end do
+
+      !$acc parallel loop gang vector default(present)
+      do i=1,ncol
+        upwp_sfc(i)   = cam_in%wsx(i)/rho_ds_zm(i,1)               ! Surface meridional momentum flux
+        vpwp_sfc(i)   = cam_in%wsy(i)/rho_ds_zm(i,1)               ! Surface zonal momentum flux
+      end do
+
     endif
 
-    ! Perturbed winds are not used in CAM
-    upwp_sfc_pert = 0.0_r8
-    vpwp_sfc_pert = 0.0_r8
+    call t_startf('clubb_tend_cam:flip-index')
 
     !  Need to flip arrays around for CLUBB core
-    do k=1,nlev+1
-      do i=1,ncol
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k = 1, nzm_clubb
+      do i = 1, ncol
+
         um_in(i,k)      = um(i,pverp-k+1)
         vm_in(i,k)      = vm(i,pverp-k+1)
         upwp_in(i,k)    = upwp(i,pverp-k+1)
@@ -3267,18 +3550,6 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
           rcm_inout(i,k) = state1%q(i,pverp-k+1,ixcldliq)
         end if
 
-        ! We only need to copy pdf_params from pbuf if this is a restart and
-        ! we're calling pdf_closure at the end of advance_clubb_core
-        if ( is_first_restart_step() &
-             .and. clubb_config_flags%ipdf_call_placement .eq. ipdf_post_advance_fields ) then
-          pdf_params_zm_chnk(lchnk)%w_1(i,k)        = pdf_zm_w_1(i,pverp-k+1)
-          pdf_params_zm_chnk(lchnk)%w_2(i,k)        = pdf_zm_w_2(i,pverp-k+1)
-          pdf_params_zm_chnk(lchnk)%varnce_w_1(i,k) = pdf_zm_varnce_w_1(i,pverp-k+1)
-          pdf_params_zm_chnk(lchnk)%varnce_w_2(i,k) = pdf_zm_varnce_w_2(i,pverp-k+1)
-          pdf_params_zm_chnk(lchnk)%mixt_frac(i,k)  = pdf_zm_mixt_frac(i,pverp-k+1)
-        end if
-
-        sclrpthvp_inout(i,k,:) = 0._r8
         wp2rtp_inout(i,k)  = wp2rtp(i,pverp-k+1)
         wp2thlp_inout(i,k) = wp2thlp(i,pverp-k+1)
         uprcp_inout(i,k)   = uprcp(i,pverp-k+1)
@@ -3293,63 +3564,33 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
       end do
     end do
 
-    ! Perturbed winds are not used in CAM
-    um_pert_inout   = 0.0_r8
-    vm_pert_inout   = 0.0_r8
-    upwp_pert_inout = 0.0_r8
-    vpwp_pert_inout = 0.0_r8
+    ! We only need to copy pdf_params from pbuf if this is a restart and
+    ! we're calling pdf_closure at the end of advance_clubb_core
+    if ( is_first_restart_step() &
+         .and. clubb_config_flags%ipdf_call_placement .eq. ipdf_post_advance_fields ) then
+      !$acc parallel loop gang vector collapse(2) default(present)
+      do k = 1, nzm_clubb
+        do i = 1, ncol
+          pdf_params_zm_chnk(lchnk)%w_1(i,k)        = pdf_zm_w_1(i,pverp-k+1)
+          pdf_params_zm_chnk(lchnk)%w_2(i,k)        = pdf_zm_w_2(i,pverp-k+1)
+          pdf_params_zm_chnk(lchnk)%varnce_w_1(i,k) = pdf_zm_varnce_w_1(i,pverp-k+1)
+          pdf_params_zm_chnk(lchnk)%varnce_w_2(i,k) = pdf_zm_varnce_w_2(i,pverp-k+1)
+          pdf_params_zm_chnk(lchnk)%mixt_frac(i,k)  = pdf_zm_mixt_frac(i,pverp-k+1)
+        end do
+      end do
+    end if
 
-    do k=2,nlev+1
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k=2, nzm_clubb
       do i=1,ncol
         pre_in(i,k) = prer_evap(i,pverp-k+1)
       end do
     end do
 
+    !$acc parallel loop gang vector default(present)
     do i=1,ncol
-      pre_in(i,1) = pre_in(i,2)
-    end do
-
-    do i=1,ncol
-      rcm_inout(i,1) = rcm_inout(i,2)
-    end do
-
-    !  Initialize these to prevent crashing behavior
-    do k=1,nlev+1
-      do i=1,ncol
-        wprcp_out(i,k)        = 0._r8
-        rcm_in_layer_out(i,k) = 0._r8
-        cloud_cover_out(i,k)  = 0._r8
-        edsclr_in(i,k,:)      = 0._r8
-        khzm_out(i,k)         = 0._r8
-        khzt_out(i,k)         = 0._r8
-      end do
-    end do
-
-    !  higher order scalar stuff, put to zero
-    do ixind=1, sclr_dim
-      do k=1, nlev+1
-        do i=1, ncol
-          sclrm(i,k,ixind)     = 0._r8
-          wpsclrp(i,k,ixind)   = 0._r8
-          sclrp2(i,k,ixind)    = 0._r8
-          sclrp3(i,k,ixind)    = 0._r8
-          sclrprtp(i,k,ixind)  = 0._r8
-          sclrpthlp(i,k,ixind) = 0._r8
-          wpsclrp_sfc(i,ixind) = 0._r8
-        end do
-      end do
-    end do
-
-    do ixind=1, hydromet_dim
-      do k=1, nlev+1
-        do i=1, ncol
-          hydromet(i,k,ixind)    = 0._r8
-          wphydrometp(i,k,ixind) = 0._r8
-          wp2hmp(i,k,ixind)      = 0._r8
-          rtphmp_zt(i,k,ixind)   = 0._r8
-          thlphmp_zt(i,k,ixind)  = 0._r8
-        end do
-      end do
+      pre_in(i,1)     = pre_in(i,2)
+      rcm_inout(i,1)  = rcm_inout(i,2)
     end do
 
     ! pressure,exner on momentum grid needed for mass flux calc.
@@ -3369,7 +3610,7 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
         invrs_exner_zt(i,1) = invrs_exner_zt(i,2)
       end do
 
-      kappa_zm(1:ncol,:) = zt2zm_api(pverp+1-top_lev, ncol, gr, kappa_zt(1:ncol,:))
+      kappa_zm(1:ncol,:) = zt2zm_api(nzm_clubb, ncol, gr, kappa_zt(1:ncol,:))
 
       do k=1,pverp
         do i=1,ncol
@@ -3380,21 +3621,20 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     end if
 
-
     if (clubb_do_adv) then
       if (macmic_it  ==  1) then
 
-        wp2_in     = zt2zm_api(pverp+1-top_lev, ncol, gr, wp2_in )
-        wpthlp_in  = zt2zm_api(pverp+1-top_lev, ncol, gr, wpthlp_in )
-        wprtp_in   = zt2zm_api(pverp+1-top_lev, ncol, gr, wprtp_in )
-        up2_in     = zt2zm_api(pverp+1-top_lev, ncol, gr, up2_in )
-        vp2_in     = zt2zm_api(pverp+1-top_lev, ncol, gr, vp2_in )
-        thlp2_in   = zt2zm_api(pverp+1-top_lev, ncol, gr, thlp2_in )
-        rtp2_in    = zt2zm_api(pverp+1-top_lev, ncol, gr, rtp2_in )
-        rtpthlp_in = zt2zm_api(pverp+1-top_lev, ncol, gr, rtpthlp_in )
+        wp2_in     = zt2zm_api(nzm_clubb, ncol, gr, wp2_in )
+        wpthlp_in  = zt2zm_api(nzm_clubb, ncol, gr, wpthlp_in )
+        wprtp_in   = zt2zm_api(nzm_clubb, ncol, gr, wprtp_in )
+        up2_in     = zt2zm_api(nzm_clubb, ncol, gr, up2_in )
+        vp2_in     = zt2zm_api(nzm_clubb, ncol, gr, vp2_in )
+        thlp2_in   = zt2zm_api(nzm_clubb, ncol, gr, thlp2_in )
+        rtp2_in    = zt2zm_api(nzm_clubb, ncol, gr, rtp2_in )
+        rtpthlp_in = zt2zm_api(nzm_clubb, ncol, gr, rtpthlp_in )
 
-        do k=1,nlev+1
-          do i=1,ncol
+        do k = 1, nzm_clubb
+          do i = 1, ncol
             thlp2_in(i,k) = max(thl_tol**2,thlp2_in(i,k))
             rtp2_in(i,k)  = max(rt_tol**2,rtp2_in(i,k))
             wp2_in(i,k)   = max(w_tol_sqd,wp2_in(i,k))
@@ -3413,12 +3653,14 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
         icnt = icnt+1
 
-        do k=1,nlev
+        !$acc parallel loop gang vector collapse(2) default(present)
+        do k=1,nzt_clubb
           do i=1,ncol
             edsclr_in(i,k+1,icnt) = state1%q(i,pver-k+1,ixind)
           end do
         end do
 
+        !$acc parallel loop gang vector default(present)
         do i=1,ncol
           edsclr_in(i,1,icnt) = edsclr_in(i,2,icnt)
         end do
@@ -3426,15 +3668,17 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
       end if
     end do
 
-
     if (clubb_l_do_expldiff_rtm_thlm) then
-      do k=1,nlev
+
+      !$acc parallel loop gang vector collapse(2) default(present)
+      do k=1,nzt_clubb
         do i=1, ncol
           edsclr_in(i,k+1,icnt+1) = thlm(i,pver-k+1)
           edsclr_in(i,k+1,icnt+2) = rtm(i,pver-k+1)
         end do
       end do
 
+      !$acc parallel loop gang vector default(present)
       do i=1, ncol
         edsclr_in(i,1,icnt+1) = edsclr_in(i,2,icnt+1)
         edsclr_in(i,1,icnt+2) = edsclr_in(i,2,icnt+2)
@@ -3442,12 +3686,7 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     endif
 
-    ! need to initialize macmic coupling to zero
-    if (macmic_it==1) ttend_clubb_mc(:ncol,:) = 0._r8
-    if (macmic_it==1) upwp_clubb_gw_mc(:ncol,:) = 0._r8
-    if (macmic_it==1) vpwp_clubb_gw_mc(:ncol,:) = 0._r8
-    if (macmic_it==1) thlp2_clubb_gw_mc(:ncol,:) = 0._r8
-    if (macmic_it==1) wpthlp_clubb_gw_mc(:ncol,:) = 0._r8
+    call t_stopf('clubb_tend_cam:flip-index')
 
     do t=1,nadv    ! do needed number of "sub" timesteps for each CAM step
 
@@ -3461,6 +3700,7 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
       !###################### CALL MF DIAGNOSTIC PLUMES ######################
       !#######################################################################
       if (do_clubb_mf) then
+        call t_startf('clubb_tend_cam:do_clubb_mf')
 
         do k=2,pverp
           do i=1, ncol
@@ -3473,8 +3713,8 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
           invrs_dzt(i,:) = 1._r8/dzt(i,:)
         end do
 
-        rtm_zm_in(1:ncol,:)  = zt2zm_api( pverp+1-top_lev, ncol, gr, rtm_in(1:ncol,:) )
-        thlm_zm_in(1:ncol,:) = zt2zm_api( pverp+1-top_lev, ncol, gr, thlm_in(1:ncol,:) )
+        rtm_zm_in(1:ncol,:)  = zt2zm_api( nzm_clubb, ncol, gr, rtm_in(1:ncol,:) )
+        thlm_zm_in(1:ncol,:) = zt2zm_api( nzm_clubb, ncol, gr, thlm_in(1:ncol,:) )
 
         do i=1, ncol
           call integrate_mf( pverp, dzt(i,:), zi_g(i,:), p_in_Pa_zm(i,:), invrs_exner_zm(i,:), & ! input
@@ -3511,11 +3751,13 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
                                ((rho_ds_zm(i,k) * mf_thlflx(i,k)) - (rho_ds_zm(i,k-1) * mf_thlflx(i,k-1)))
           end do
         end do
+        call t_stopf('clubb_tend_cam:do_clubb_mf')
 
       end if
 
       !  Advance CLUBB CORE one timestep in the future
-      call advance_clubb_core_api( gr, pverp+1-top_lev, ncol, &
+      call t_startf('clubb_tend_cam:advance_clubb_core_api')
+      call advance_clubb_core_api( gr, nzm_clubb, ncol, &
           l_implemented, dtime, fcor, sfc_elevation, &
           hydromet_dim, &
           sclr_dim, sclr_tol, edsclr_dim, sclr_idx, &
@@ -3559,6 +3801,7 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
           wprcp_out, w_up_in_cloud_out, w_down_in_cloud_out,  &
           cloudy_updraft_frac_out, cloudy_downdraft_frac_out, &
           rcm_in_layer_out, cloud_cover_out, invrs_tau_zm_out )
+      call t_stopf('clubb_tend_cam:advance_clubb_core_api')
 
       ! Note that CLUBB does not produce an error code specific to any column, and
       ! one value only for the entire chunk
@@ -3574,22 +3817,23 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
         call endrun(subr//':  Fatal error in CLUBB library')
       end if
 
-      if (do_rainturb) then
+      if ( do_rainturb ) then
+        call t_startf('clubb_tend_cam:do_rainturb')
 
-        do k=1,nlev+1
+        do k=1,nzm_clubb
           do i=1,ncol
             rvm_in(i,k) = rtm_in(i,k) - rcm_inout(i,k)
           end do
         end do
 
-        call update_xp2_mc_api( gr, nlev+1, ncol, dtime, cloud_frac_inout, &
+        call update_xp2_mc_api( gr, nzm_clubb, ncol, dtime, cloud_frac_inout, &
           rcm_inout, rvm_in, thlm_in, wm_zt, &
           exner, pre_in, pdf_params_chnk(lchnk), &
           rtp2_mc_out, thlp2_mc_out, &
           wprtp_mc_out, wpthlp_mc_out, &
           rtpthlp_mc_out)
 
-        do k=1,nlev+1
+        do k=1,nzm_clubb
           do i=1,ncol
             dum1 = (1._r8 - cam_in%landfrac(i))
 
@@ -3601,17 +3845,18 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
           end do
         end do
 
+        call t_stopf('clubb_tend_cam:do_rainturb')
       end if
 
-
       if (do_cldcool) then
+        call t_startf('clubb_tend_cam:do_cldcool')
 
-        rcm_out_zm = zt2zm_api(pverp+1-top_lev, ncol, gr, rcm_inout )
-        qrl_zm     = zt2zm_api(pverp+1-top_lev, ncol, gr, qrl_clubb )
+        rcm_out_zm = zt2zm_api(nzm_clubb, ncol, gr, rcm_inout )
+        qrl_zm     = zt2zm_api(nzm_clubb, ncol, gr, qrl_clubb )
         thlp2_rad_out(:,:) = 0._r8
 
         do i=1, ncol
-          call calculate_thlp2_rad_api(nlev+1, rcm_out_zm(i,:), thlprcp_out(i,:), qrl_zm(i,:), clubb_params(i,:), &
+          call calculate_thlp2_rad_api(nzm_clubb, rcm_out_zm(i,:), thlprcp_out(i,:), qrl_zm(i,:), clubb_params(i,:), &
                                        thlp2_rad_out(i,:))
         end do
 
@@ -3619,16 +3864,19 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
           thlp2_in(i,:) = thlp2_in(i,:) + thlp2_rad_out(i,:) * dtime
           thlp2_in(i,:) = max(thl_tol**2,thlp2_in(i,:))
         end do
+        call t_stopf('clubb_tend_cam:do_cldcool')
 
       end if
 
       !  Check to see if stats should be output, here stats are read into
       !  output arrays to make them conformable to CAM output
       if (stats_metadata%l_stats) then
+        call t_startf('clubb_tend_cam:stats_end_timestep_clubb')
         do i=1, ncol
           call stats_end_timestep_clubb(i, stats_zt(i), stats_zm(i), stats_rad_zt(i), stats_rad_zm(i), stats_sfc(i), &
                                         out_zt, out_zm, out_radzt, out_radzm, out_sfc)
         end do
+        call t_stopf('clubb_tend_cam:stats_end_timestep_clubb')
       end if
 
     enddo  ! end time loop
@@ -3636,16 +3884,16 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     if (clubb_do_adv) then
       if (macmic_it  ==  cld_macmic_num_steps) then
 
-        wp2_in     = zm2zt_api( pverp+1-top_lev, ncol, gr, wp2_in )
-        wpthlp_in  = zm2zt_api( pverp+1-top_lev, ncol, gr, wpthlp_in )
-        wprtp_in   = zm2zt_api( pverp+1-top_lev, ncol, gr, wprtp_in )
-        up2_in     = zm2zt_api( pverp+1-top_lev, ncol, gr, up2_in )
-        vp2_in     = zm2zt_api( pverp+1-top_lev, ncol, gr, vp2_in )
-        thlp2_in   = zm2zt_api( pverp+1-top_lev, ncol, gr, thlp2_in )
-        rtp2_in    = zm2zt_api( pverp+1-top_lev, ncol, gr, rtp2_in )
-        rtpthlp_in = zm2zt_api( pverp+1-top_lev, ncol, gr, rtpthlp_in )
+        wp2_in     = zm2zt_api( nzm_clubb, ncol, gr, wp2_in )
+        wpthlp_in  = zm2zt_api( nzm_clubb, ncol, gr, wpthlp_in )
+        wprtp_in   = zm2zt_api( nzm_clubb, ncol, gr, wprtp_in )
+        up2_in     = zm2zt_api( nzm_clubb, ncol, gr, up2_in )
+        vp2_in     = zm2zt_api( nzm_clubb, ncol, gr, vp2_in )
+        thlp2_in   = zm2zt_api( nzm_clubb, ncol, gr, thlp2_in )
+        rtp2_in    = zm2zt_api( nzm_clubb, ncol, gr, rtp2_in )
+        rtpthlp_in = zm2zt_api( nzm_clubb, ncol, gr, rtpthlp_in )
 
-        do k=1,nlev+1
+        do k=1,nzm_clubb
           do i=1, ncol
             thlp2_in(i,k) = max(thl_tol**2, thlp2_in(i,k))
             rtp2_in(i,k)  = max(rt_tol**2, rtp2_in(i,k))
@@ -3659,12 +3907,15 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     end if
 
     ! Convert RTP2 and THLP2 to thermo grid for output
-    rtp2_zt = zm2zt_api( pverp+1-top_lev, ncol, gr, rtp2_in )
-    thl2_zt = zm2zt_api( pverp+1-top_lev, ncol, gr, thlp2_in )
-    wp2_zt  = zm2zt_api( pverp+1-top_lev, ncol, gr, wp2_in )
+    rtp2_zt = zm2zt_api( nzm_clubb, ncol, gr, rtp2_in )
+    thl2_zt = zm2zt_api( nzm_clubb, ncol, gr, thlp2_in )
+    wp2_zt  = zm2zt_api( nzm_clubb, ncol, gr, wp2_in )
+
+    call t_startf('clubb_tend_cam:flip-index')
 
     !  Arrays need to be "flipped" to CAM grid
-    do k=1, nlev+1
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k=1, nzm_clubb
       do i=1, ncol
         um(i,pverp-k+1)           = um_in(i,k)
         vm(i,pverp-k+1)           = vm_in(i,k)
@@ -3723,47 +3974,19 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
       end do
     end do
 
-    !  Accumulate vars through macmic subcycle
-    upwp_clubb_gw_mc(:ncol,:)   = upwp_clubb_gw_mc(:ncol,:)   + upwp(:ncol,:)
-    vpwp_clubb_gw_mc(:ncol,:)   = vpwp_clubb_gw_mc(:ncol,:)   + vpwp(:ncol,:)
-    thlp2_clubb_gw_mc(:ncol,:)  = thlp2_clubb_gw_mc(:ncol,:)  + thlp2(:ncol,:)
-    wpthlp_clubb_gw_mc(:ncol,:) = wpthlp_clubb_gw_mc(:ncol,:) + wpthlp(:ncol,:)
-
-    ! And average at last macmic step
-    if (macmic_it == cld_macmic_num_steps) then
-       upwp_clubb_gw(:ncol,:)   = upwp_clubb_gw_mc(:ncol,:)/REAL(cld_macmic_num_steps,r8)
-       vpwp_clubb_gw(:ncol,:)   = vpwp_clubb_gw_mc(:ncol,:)/REAL(cld_macmic_num_steps,r8)
-       thlp2_clubb_gw(:ncol,:)  = thlp2_clubb_gw_mc(:ncol,:)/REAL(cld_macmic_num_steps,r8)
-       wpthlp_clubb_gw(:ncol,:) = wpthlp_clubb_gw_mc(:ncol,:)/REAL(cld_macmic_num_steps,r8)
-    end if
-
-    do k=1, nlev+1
-      do i=1, ncol
-
-        mean_rt = pdf_params_chnk(lchnk)%mixt_frac(i,k) &
-                  * pdf_params_chnk(lchnk)%rt_1(i,k) &
-                  + ( 1.0_r8 - pdf_params_chnk(lchnk)%mixt_frac(i,k) ) &
-                    * pdf_params_chnk(lchnk)%rt_2(i,k)
-
-        pdfp_rtp2(i,pverp-k+1) = pdf_params_chnk(lchnk)%mixt_frac(i,k) &
-                                 * ( ( pdf_params_chnk(lchnk)%rt_1(i,k) - mean_rt )**2 &
-                                     + pdf_params_chnk(lchnk)%varnce_rt_1(i,k) ) &
-                                 + ( 1.0_r8 - pdf_params_chnk(lchnk)%mixt_frac(i,k) ) &
-                                   * ( ( pdf_params_chnk(lchnk)%rt_2(i,k) - mean_rt )**2 &
-                                       + pdf_params_chnk(lchnk)%varnce_rt_2(i,k) )
-      end do
-    end do
-
-    do ixind=1,edsclr_dim
-      do k=1, nlev+1
-        do i=1, ncol
-          edsclr_out(i,pverp-k+1,ixind) = edsclr_in(i,k,ixind)
+    if ( edsclr_dim > 0 ) then
+      !$acc parallel loop gang vector collapse(3) default(present)
+      do ixind=1,edsclr_dim
+        do k=1, nzm_clubb
+          do i=1, ncol
+            edsclr_out(i,pverp-k+1,ixind) = edsclr_in(i,k,ixind)
+          end do
         end do
       end do
-    end do
+    end if
 
     if (do_clubb_mf) then
-      do k=1, nlev+1
+      do k=1, nzm_clubb
         do i=1, ncol
           mf_dry_a_output(i,pverp-k+1)     = mf_dry_a(i,k)
           mf_moist_a_output(i,pverp-k+1)   = mf_moist_a(i,k)
@@ -3794,9 +4017,51 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
       end do
     end if
 
-      ! Values to use above top_lev, for variables that have not already been
-      ! set up there. These are mostly fill values that should not actually be
-      ! used in the run, but may end up in diagnostic output.
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k=1, nzm_clubb
+      do i=1, ncol
+
+        mean_rt = pdf_params_chnk(lchnk)%mixt_frac(i,k) &
+                  * pdf_params_chnk(lchnk)%rt_1(i,k) &
+                  + ( 1.0_r8 - pdf_params_chnk(lchnk)%mixt_frac(i,k) ) &
+                    * pdf_params_chnk(lchnk)%rt_2(i,k)
+
+        pdfp_rtp2(i,pverp-k+1) = pdf_params_chnk(lchnk)%mixt_frac(i,k) &
+                                 * ( ( pdf_params_chnk(lchnk)%rt_1(i,k) - mean_rt )**2 &
+                                     + pdf_params_chnk(lchnk)%varnce_rt_1(i,k) ) &
+                                 + ( 1.0_r8 - pdf_params_chnk(lchnk)%mixt_frac(i,k) ) &
+                                   * ( ( pdf_params_chnk(lchnk)%rt_2(i,k) - mean_rt )**2 &
+                                       + pdf_params_chnk(lchnk)%varnce_rt_2(i,k) )
+      end do
+    end do
+
+    call t_stopf('clubb_tend_cam:flip-index')
+
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k=1, pverp
+      do i=1, ncol
+
+        !  Accumulate vars through macmic subcycle
+        upwp_clubb_gw_mc(i,k)   = upwp_clubb_gw_mc(i,k)   + upwp(i,k)
+        vpwp_clubb_gw_mc(i,k)   = vpwp_clubb_gw_mc(i,k)   + vpwp(i,k)
+        thlp2_clubb_gw_mc(i,k)  = thlp2_clubb_gw_mc(i,k)  + thlp2(i,k)
+        wpthlp_clubb_gw_mc(i,k) = wpthlp_clubb_gw_mc(i,k) + wpthlp(i,k)
+
+        ! And average at last macmic step
+        if (macmic_it == cld_macmic_num_steps) then
+           upwp_clubb_gw(i,k)   = upwp_clubb_gw_mc(i,k)/REAL(cld_macmic_num_steps,r8)
+           vpwp_clubb_gw(i,k)   = vpwp_clubb_gw_mc(i,k)/REAL(cld_macmic_num_steps,r8)
+           thlp2_clubb_gw(i,k)  = thlp2_clubb_gw_mc(i,k)/REAL(cld_macmic_num_steps,r8)
+           wpthlp_clubb_gw(i,k) = wpthlp_clubb_gw_mc(i,k)/REAL(cld_macmic_num_steps,r8)
+        end if
+
+      end do
+    end do
+
+    ! Values to use above top_lev, for variables that have not already been
+    ! set up there. These are mostly fill values that should not actually be
+    ! used in the run, but may end up in diagnostic output.
+    !$acc parallel loop gang vector collapse(2) default(present)
     do k=1, top_lev-1
       do i=1, ncol
         upwp(i,k)         = 0._r8
@@ -3812,24 +4077,31 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
       end do
     end do
 
+    !  Fill up arrays needed for McICA.  Note we do not want the ghost point,
+    !   thus why the second loop is needed.
+    !$acc parallel loop gang vector default(present)
+    do i=1, pcols
+      zi_out(i,1) = 0._r8
+    end do
+
     ! enforce zero tracer tendencies above the top_lev level -- no change
     icnt=0
     do ixind=1,pcnst
       if (lq(ixind)) then
         icnt=icnt+1
 
-        do i=1, ncol
-          edsclr_out(i,:top_lev-1,icnt) = state1%q(i,:top_lev-1,ixind)
+        !$acc parallel loop gang vector collapse(2) default(present)
+        do k=1, top_lev-1
+          do i=1, ncol
+            edsclr_out(i,k,icnt) = state1%q(i,k,ixind)
+          end do
         end do
 
       end if
     end do
 
-    !  Fill up arrays needed for McICA.  Note we do not want the ghost point,
-    !   thus why the second loop is needed.
-    zi_out(:,1) = 0._r8
-
     ! Compute static energy using CLUBB's variables
+    !$acc parallel loop gang vector collapse(2) default(present)
     do k=1,pver
       do i=1, ncol
         clubb_s(i,k) = cpairv(i,k,lchnk) * thlm(i,k) / inv_exner_clubb(i,k) &
@@ -3843,63 +4115,59 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     ! Initialize clubbtop to top_lev, for finding the highlest level CLUBB is
     !  active for informing where to apply the energy fixer.
+    !$acc parallel loop gang vector default(present)
     do i=1, ncol
       clubbtop(i) = top_lev
       do while ((rtp2(i,clubbtop(i)) <= 1.e-15_r8 .and. rcm(i,clubbtop(i))  ==  0._r8) .and. clubbtop(i) <  pver)
         clubbtop(i) = clubbtop(i) + 1
       end do
     end do
-    !
-    ! set pbuf field so that HB scheme is only applied above CLUBB top
-    !
-    if (do_hb_above_clubb) then
-      call pbuf_set_field(pbuf, clubbtop_idx, clubbtop)
-    endif
 
+    !$acc parallel loop gang vector default(present)
+    do i=1, ncol
 
-    ! Compute integrals for static energy, kinetic energy, water vapor, and liquid water
-    ! after CLUBB is called.  This is for energy conservation purposes.
-    se_a(:) = 0._r8
-    ke_a(:) = 0._r8
-    wv_a(:) = 0._r8
-    wl_a(:) = 0._r8
+      se_a = 0._r8
+      ke_a = 0._r8
+      wv_a = 0._r8
+      wl_a = 0._r8
 
-    do k=1,pver
-      do i=1, ncol
-        se_a(i) = se_a(i) + clubb_s(i,k)*state1%pdel(i,k)*rga
-        ke_a(i) = ke_a(i) + 0.5_r8*(um(i,k)**2+vm(i,k)**2)*state1%pdel(i,k)*rga
-        wv_a(i) = wv_a(i) + (rtm(i,k)-rcm(i,k))*state1%pdeldry(i,k)*rga
-        wl_a(i) = wl_a(i) + (rcm(i,k))*state1%pdeldry(i,k)*rga
+      se_b = 0._r8
+      ke_b = 0._r8
+      wv_b = 0._r8
+      wl_b = 0._r8
+
+      do k=1,pver
+        ! Compute integrals for static energy, kinetic energy, water vapor, and liquid water
+        ! after CLUBB is called.  This is for energy conservation purposes.
+        se_a = se_a + clubb_s(i,k)*state1%pdel(i,k)*rga
+        ke_a = ke_a + 0.5_r8*(um(i,k)**2+vm(i,k)**2)*state1%pdel(i,k)*rga
+        wv_a = wv_a + (rtm(i,k)-rcm(i,k))*state1%pdeldry(i,k)*rga
+        wl_a = wl_a + (rcm(i,k))*state1%pdeldry(i,k)*rga
       end do
-    end do
 
-    ! Do the same as above, but for before CLUBB was called.
-    se_b(:) = 0._r8
-    ke_b(:) = 0._r8
-    wv_b(:) = 0._r8
-    wl_b(:) = 0._r8
+      ! Based on these integrals, compute the total energy after CLUBB call
+      te_a = se_a + ke_a + (latvap+latice) * wv_a + latice * wl_a
 
-    do k=1, pver
-      do i=1, ncol
-        se_b(i) = se_b(i) + state1%s(i,k)*state1%pdel(i,k)*rga
-        ke_b(i) = ke_b(i) + 0.5_r8*(state1%u(i,k)**2+state1%v(i,k)**2)*state1%pdel(i,k)*rga
-        wv_b(i) = wv_b(i) + state1%q(i,k,ixq)*state1%pdeldry(i,k)*rga
-        wl_b(i) = wl_b(i) + state1%q(i,k,ixcldliq)*state1%pdeldry(i,k)*rga
+      do k=1, pver
+        ! Do the same as above, but for before CLUBB was called.
+        se_b = se_b + state1%s(i,k)*state1%pdel(i,k)*rga
+        ke_b = ke_b + 0.5_r8*(state1%u(i,k)**2+state1%v(i,k)**2)*state1%pdel(i,k)*rga
+        wv_b = wv_b + state1%q(i,k,ixq)*state1%pdeldry(i,k)*rga
+        wl_b = wl_b + state1%q(i,k,ixcldliq)*state1%pdeldry(i,k)*rga
       end do
-    end do
 
-
-    do i=1, ncol
-      ! Based on these integrals, compute the total energy before and after CLUBB call
-      te_a(i) = se_a(i) + ke_a(i) + (latvap+latice) * wv_a(i) + latice * wl_a(i)
-      te_b(i) = se_b(i) + ke_b(i) + (latvap+latice) * wv_b(i) + latice * wl_b(i)
+      ! Based on these integrals, compute the total energy before CLUBB call
+      te_b = se_b + ke_b + (latvap+latice) * wv_b + latice * wl_b
 
       ! Take into account the surface fluxes of heat and moisture
       !  Use correct qflux from cam_in, not lhf/latvap as was done previously
-      te_b(i) = te_b(i) + (cam_in%shf(i)+cam_in%cflx(i,1)*(latvap+latice)) * hdtime
+      te_b = te_b + (cam_in%shf(i)+cam_in%cflx(i,1)*(latvap+latice)) * hdtime
 
       ! Compute the disbalance of total energy, over depth where CLUBB is active
-      se_dis(i) = (te_a(i) - te_b(i))/(state1%pint(i,pverp)-state1%pint(i,clubbtop(i)))
+      se_dis(i) = ( te_a - te_b ) / ( state1%pint(i,pverp) - state1%pint(i,clubbtop(i)) )
+
+      eleak(i) = ( te_a - te_b ) / hdtime
+
     end do
 
     ! Fix the total energy coming out of CLUBB so it achieves energy conservation.
@@ -3910,46 +4178,80 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     ! when using specified dynamics, so allow this to be turned off via a namelist
     ! variable.
     if (clubb_do_energyfix) then
+
+      !$acc parallel loop gang vector default(present)
       do i=1, ncol
+
         do k=clubbtop(i),pver
           clubb_s(i,k) = clubb_s(i,k) - se_dis(i)*gravit
         end do
         ! convert to units of +ve [K]
         se_dis(i) = -1._r8*se_dis(i)*gravit/cpairv(i,pver,lchnk)
+
       end do
+
     endif
 
+    !$acc parallel loop gang vector collapse(2) default(present)
+    do k=1, pverp
+      do i=1, ncol
+        wpthvp_clubb(i,k) = wpthvp(i,k) * cpair
+        wprcp_clubb(i,k)  = wprcp(i,k) * latvap
+      end do
+    end do
+
+    call t_stopf('clubb_tend_cam:ACCR')
+
+    call t_startf('clubb_tend_cam:acc_copyout')
+    !$acc end data
+    !$acc end data
+    !$acc end data
+    !$acc end data
+    !$acc end data
+    call t_stopf('clubb_tend_cam:acc_copyout')
+
+    call t_startf('clubb_tend_cam:NAR')
+
+
+    call physics_ptend_init( ptend_loc, state%psetcols, 'clubb', ls=.true., lu=.true., lv=.true., lq=lq )
 
     !  Now compute the tendencies of CLUBB to CAM, note that pverp is the ghost point
     !  for all variables and therefore is never called in this loop
-    rtm_integral_vtend(:) = 0._r8
-    rtm_integral_ltend(:) = 0._r8
+    do i=1, ncol
 
-    do k=1, pver
-       do i=1, ncol
+      rtm_integral_vtend(i) = 0._r8
+      rtm_integral_ltend(i) = 0._r8
 
-          ptend_loc%u(i,k)          = (um(i,k) - state1%u(i,k))               / hdtime ! east-west wind
-          ptend_loc%v(i,k)          = (vm(i,k) - state1%v(i,k))               / hdtime ! north-south wind
-          ptend_loc%q(i,k,ixq)      = (rtm(i,k) - rcm(i,k)-state1%q(i,k,ixq)) / hdtime ! water vapor
-          ptend_loc%q(i,k,ixcldliq) = (rcm(i,k) - state1%q(i,k,ixcldliq))     / hdtime ! Tendency of liquid water
-          ptend_loc%s(i,k)          = (clubb_s(i,k) - state1%s(i,k))          / hdtime ! Tendency of static energy
+      do k=1, pver
 
-          rtm_integral_ltend(i) = rtm_integral_ltend(i) + ptend_loc%q(i,k,ixcldliq)*state1%pdel(i,k)
-          rtm_integral_vtend(i) = rtm_integral_vtend(i) + ptend_loc%q(i,k,ixq)*state1%pdel(i,k)
+        ptend_loc%u(i,k)          = (um(i,k) - state1%u(i,k))               / hdtime ! east-west wind
+        ptend_loc%v(i,k)          = (vm(i,k) - state1%v(i,k))               / hdtime ! north-south wind
+        ptend_loc%q(i,k,ixq)      = (rtm(i,k) - rcm(i,k)-state1%q(i,k,ixq)) / hdtime ! water vapor
+        ptend_loc%q(i,k,ixcldliq) = (rcm(i,k) - state1%q(i,k,ixcldliq))     / hdtime ! Tendency of liquid water
+        ptend_loc%s(i,k)          = (clubb_s(i,k) - state1%s(i,k))          / hdtime ! Tendency of static energy
 
-       end do
-    end do
+        rtm_integral_ltend(i) = rtm_integral_ltend(i) + ptend_loc%q(i,k,ixcldliq)*state1%pdel(i,k)
+        rtm_integral_vtend(i) = rtm_integral_vtend(i) + ptend_loc%q(i,k,ixq)*state1%pdel(i,k)
+
+      end do
 
-    rtm_integral_ltend(:) = rtm_integral_ltend(:)/gravit
-    rtm_integral_vtend(:) = rtm_integral_vtend(:)/gravit
+      rtm_integral_ltend(i) = rtm_integral_ltend(i)/gravit
+      rtm_integral_vtend(i) = rtm_integral_vtend(i)/gravit
+
+    end do
 
     ! Accumulate Air Temperature Tendency (TTEND) for Gravity Wave parameterization
-    ttend_clubb_mc(:ncol,:pver) = ttend_clubb_mc(:ncol,:pver) + ptend_loc%s(:ncol,:pver)/cpair
+    do k=1, pver
+      do i=1, ncol
+        ttend_clubb_mc(i,k) = ttend_clubb_mc(i,k) + ptend_loc%s(i,k)/cpair
 
-    ! Average at last macmic step
-    if (macmic_it == cld_macmic_num_steps) then
-       ttend_clubb(:ncol,:)  = ttend_clubb_mc(:ncol,:pver)/REAL(cld_macmic_num_steps,r8)
-    end if
+        ! Average at last macmic step
+        if (macmic_it == cld_macmic_num_steps) then
+           ttend_clubb(i,k)  = ttend_clubb_mc(i,k) / REAL(cld_macmic_num_steps,r8)
+        end if
+
+      end do
+    end do
 
     if (clubb_do_adv) then
       if (macmic_it == cld_macmic_num_steps) then
@@ -3978,6 +4280,12 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
           end do
         end do
 
+        ! Add constant to ghost point so that output is not corrupted
+        wp3(:,pverp)     = wp3(:,pverp)     + wp3_const
+        rtpthlp(:,pverp) = rtpthlp(:,pverp) + rtpthlp_const
+        wpthlp(:,pverp)  = wpthlp(:,pverp)  + wpthlp_const
+        wprtp(:,pverp)   = wprtp(:,pverp)   + wprtp_const
+
       else
 
         do k=1, pver
@@ -4021,46 +4329,26 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
       end if
     end do
 
-    call t_stopf("clubb_tend_cam_i_loop")
-
-    call outfld('KVH_CLUBB', khzm, pcols, lchnk)
-
-    eleak(:ncol) = (te_a(:ncol) - te_b(:ncol))/hdtime
-    call outfld('ELEAK_CLUBB', eleak, pcols, lchnk)
-    call outfld('TFIX_CLUBB', se_dis, pcols, lchnk)
+    rvmtend_clubb(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixq)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
+    rcmtend_clubb(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixcldliq)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
+    rimtend_clubb(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixcldice)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
+    stend_clubb(:ncol,:pver) = ptend_loc%s(:ncol,:pver)
+    utend_clubb(:ncol,:pver) = ptend_loc%u(:ncol,:pver)
+    vtend_clubb(:ncol,:pver) = ptend_loc%v(:ncol,:pver)
+    cmeliq(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixcldliq)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
 
-    ! Add constant to ghost point so that output is not corrupted
-    if (clubb_do_adv) then
-      if (macmic_it == cld_macmic_num_steps) then
-        wp3(:,pverp)     = wp3(:,pverp)     + wp3_const
-        rtpthlp(:,pverp) = rtpthlp(:,pverp) + rtpthlp_const
-        wpthlp(:,pverp)  = wpthlp(:,pverp)  + wpthlp_const
-        wprtp(:,pverp)   = wprtp(:,pverp)   + wprtp_const
-      end if
-    end if
+    !
+    ! set pbuf field so that HB scheme is only applied above CLUBB top
+    !
+    if (do_hb_above_clubb) then
+      call pbuf_set_field(pbuf, clubbtop_idx, clubbtop)
+    endif
 
     ! ------------------------------------------------- !
     ! End column computation of CLUBB, begin to apply   !
     ! and compute output, etc                           !
     ! ------------------------------------------------- !
 
-    !  Output CLUBB tendencies (convert dry basis to wet for consistency with  history variable definition)
-    temp2d(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixq)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
-    call outfld( 'RVMTEND_CLUBB', temp2d, pcols, lchnk)
-
-    temp2d(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixcldliq)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
-    call outfld( 'RCMTEND_CLUBB', temp2d, pcols, lchnk)
-
-    temp2d(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixcldice)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
-    call outfld( 'RIMTEND_CLUBB', temp2d, pcols, lchnk)
-
-    call outfld( 'STEND_CLUBB', ptend_loc%s,pcols, lchnk)
-    call outfld( 'UTEND_CLUBB', ptend_loc%u,pcols, lchnk)
-    call outfld( 'VTEND_CLUBB', ptend_loc%v,pcols, lchnk)
-
-    cmeliq(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixcldliq)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
-    call outfld( 'CMELIQ', cmeliq, pcols, lchnk)
-
     call physics_ptend_sum(ptend_loc,ptend_all,ncol)
     call physics_update(state1,ptend_loc,hdtime)
 
@@ -4071,6 +4359,7 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     if (clubb_do_liqsupersat) then
 
+      call t_startf('clubb_cam_tend:do_liqsupersat')
       ! -------------------------------------- !
       ! Ice Saturation Adjustment Computation  !
       ! -------------------------------------- !
@@ -4122,6 +4411,7 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
       end where
 
       call outfld( 'FQTENDICE', fqtend, pcols, lchnk )
+      call t_stopf('clubb_cam_tend:do_liqsupersat')
     end if
 
     ! ------------------------------------------------------------ !
@@ -4149,6 +4439,7 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
 
     do k=1,pver
       do i=1,ncol
+
         if( state1%t(i,k) > meltpt_temp ) then
           dum1 = 0.0_r8
         elseif ( state1%t(i,k) < dt_low ) then
@@ -4185,19 +4476,10 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     enddo
 
     det_ice(:ncol) = det_ice(:ncol)/1000._r8  ! divide by density of water
-
-    ! output moist basis to be consistent with history variable definition
-    temp2d(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixcldliq)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
-    call outfld( 'DPDLFLIQ', temp2d, pcols, lchnk)
-
-    ! output moist basis to be consistent with history variable definition
-    temp2d(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixcldice)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
-    call outfld( 'DPDLFICE', temp2d, pcols, lchnk)
-
-    temp2d(:ncol,:pver) = ptend_loc%s(:ncol,:pver)/cpairv(:ncol,:pver, lchnk)
-    call outfld( 'DPDLFT',   temp2d, pcols, lchnk)
-
-    call outfld( 'DETNLIQTND', ptend_loc%q(:,:,ixnumliq),pcols, lchnk )
+    dpdlfliq(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixcldliq)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
+    dpdlfice(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixcldice)*state1%pdeldry(:ncol,:pver)/state1%pdel(:ncol,:pver)
+    dpdlft(:ncol,:pver) = ptend_loc%s(:ncol,:pver)/cpairv(:ncol,:pver, lchnk)
+    detnliquid(:ncol,:pver) = ptend_loc%q(:ncol,:pver,ixnumliq)
 
     call physics_ptend_sum(ptend_loc,ptend_all,ncol)
     call physics_update(state1,ptend_loc,hdtime)
@@ -4224,11 +4506,20 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
       relvarmax = 10.0_r8
     endif
 
-    relvar(:,:) = relvarmax  ! default
+    do i = 1, ncol
+      do k = 1, pver
+        relvar(i,k) = relvarmax  ! default
+      end do
+    end do
 
     if (deep_scheme .ne. 'CLUBB_SGS') then
-      where (rcm(:ncol,:pver) /= 0 .and. qclvar(:ncol,:pver) /= 0) &
-          relvar(:ncol,:pver) = min(relvarmax,max(0.001_r8,rcm(:ncol,:pver)**2/qclvar(:ncol,:pver)))
+      do i = 1, ncol
+        do k = 1, pver
+          if ( rcm(i,k) /= 0 .and. qclvar(i,k) /= 0 ) then
+            relvar(i,k) = min( relvarmax, max(0.001_r8, rcm(i,k)**2 / qclvar(i,k) ) )
+          end if
+        end do
+      end do
     endif
 
     ! ------------------------------------------------- !
@@ -4341,6 +4632,11 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     !  use the aist_vector function to compute the ice cloud fraction                   !
     ! --------------------------------------------------------------------------------- !
 
+    !REMOVECAM - no longer need this when CAM is retired and pcols no longer exists
+    troplev(:) = 0
+    !REMOVECAM_END
+    call tropopause_findChemTrop( state, troplev )
+
     aist(:,:top_lev-1) = 0._r8
     qsatfac(:, :) = 0._r8 ! Zero out entire profile in case qsatfac is left undefined in aist_vector below
 
@@ -4430,8 +4726,6 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
                 ustar2, obklen, kbfs, pblh, dummy2, &
                 state1%zi, cloud_frac(:,1:pver), 1._r8-cam_in%landfrac, dummy3)
 
-    !  Output the PBL depth
-    call outfld('PBLH', pblh, pcols, lchnk)
 
     ! Assign the first pver levels of cloud_frac back to cld
     cld(:,1:pver) = cloud_frac(:,1:pver)
@@ -4440,6 +4734,30 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     !  END CLOUD FRACTION DIAGNOSIS, begin to store variables back into buffer          !
     ! --------------------------------------------------------------------------------- !
 
+    call outfld( 'DETNLIQTND', detnliquid,pcols, lchnk )
+
+    !  Output CLUBB tendencies (convert dry basis to wet for consistency with  history variable definition)
+    call outfld( 'RVMTEND_CLUBB', rvmtend_clubb, pcols, lchnk)
+    call outfld( 'RCMTEND_CLUBB', rcmtend_clubb, pcols, lchnk)
+    call outfld( 'RIMTEND_CLUBB', rimtend_clubb, pcols, lchnk)
+    call outfld( 'STEND_CLUBB', stend_clubb, pcols, lchnk)
+    call outfld( 'UTEND_CLUBB', utend_clubb, pcols, lchnk)
+    call outfld( 'VTEND_CLUBB', vtend_clubb, pcols, lchnk)
+
+    call outfld( 'CMELIQ', cmeliq, pcols, lchnk)
+
+    ! output moist basis to be consistent with history variable definition
+    call outfld( 'DPDLFLIQ', dpdlfliq, pcols, lchnk)
+    call outfld( 'DPDLFICE', dpdlfice, pcols, lchnk)
+    call outfld( 'DPDLFT',   dpdlft, pcols, lchnk)
+
+    !  Output the PBL depth
+    call outfld('PBLH', pblh, pcols, lchnk)
+
+    call outfld('KVH_CLUBB', khzm, pcols, lchnk)
+    call outfld('ELEAK_CLUBB', eleak, pcols, lchnk)
+    call outfld('TFIX_CLUBB', se_dis, pcols, lchnk)
+
     !  Output calls of variables goes here
     call outfld( 'RELVAR',           relvar,                pcols, lchnk )
     call outfld( 'RHO_CLUBB',        rho(:,1:pver),         pcols, lchnk )
@@ -4456,13 +4774,8 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
     call outfld( 'RCM_CLUBB',        rcm(:,1:pver),         pcols, lchnk )
     call outfld( 'RTM_CLUBB',        rtm(:,1:pver),         pcols, lchnk )
     call outfld( 'THLM_CLUBB',       thlm(:,1:pver),        pcols, lchnk )
-
-    temp2dp(:ncol,:) = wprcp(:ncol,:) * latvap
-    call outfld( 'WPRCP_CLUBB',      temp2dp,                 pcols, lchnk )
-
-    temp2dp(:ncol,:) = wpthvp(:ncol,:) * cpair
-    call outfld( 'WPTHVP_CLUBB',     temp2dp,                 pcols, lchnk )
-
+    call outfld( 'WPRCP_CLUBB',      wprcp_clubb,           pcols, lchnk )
+    call outfld( 'WPTHVP_CLUBB',     wpthvp_clubb,          pcols, lchnk )
     call outfld( 'RTP2_ZT_CLUBB',    rtp2_zt_out(:,1:pver),   pcols, lchnk )
     call outfld( 'THLP2_ZT_CLUBB',   thl2_zt_out(:,1:pver),   pcols, lchnk )
     call outfld( 'WP2_ZT_CLUBB',     wp2_zt_out(:,1:pver),    pcols, lchnk )
@@ -4547,11 +4860,13 @@ subroutine clubb_tend_cam( state,   ptend_all,   pbuf,     hdtime, &
       enddo
 
     endif
+    call t_stopf('clubb_tend_cam:NAR')
+#endif
 
-    call t_stopf("clubb_tend_cam")
+    call t_stopf('clubb_tend_cam')
 
     return
-#endif
+
   end subroutine clubb_tend_cam
 
   subroutine clubb_emissions_cam (state, cam_in, ptend)
@@ -4833,7 +5148,7 @@ subroutine stats_init_clubb( l_stats_in, stats_tsamp_in, stats_tout_in, &
 
     !  Set stats_variables variables with inputs from calling subroutine
     stats_metadata%l_stats = l_stats_in
-    
+
     stats_metadata%stats_tsamp = stats_tsamp_in
     stats_metadata%stats_tout  = stats_tout_in