diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
index e03822a909cb..3b670469fa65 100644
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@@ -60,7 +60,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix: 
-        testscript: ['tutorials.py', 'parallel_regression.py', 'parallel_regression_AD.py', 'serial_regression.py', 'serial_regression_AD.py', 'hybrid_regression.py']
+        testscript: ['tutorials.py', 'parallel_regression.py', 'parallel_regression_AD.py', 'serial_regression.py', 'serial_regression_AD.py', 'hybrid_regression.py', 'hybrid_regression_AD.py']
         include:
           - testscript: 'tutorials.py'
             tag: MPI
@@ -74,6 +74,8 @@ jobs:
             tag: NoMPI
           - testscript: 'hybrid_regression.py'
             tag: OMP
+          - testscript: 'hybrid_regression_AD.py'
+            tag: OMP
     steps:
       - name: Download All artifact
         uses: actions/download-artifact@v2
diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp
index 18c430c7b2f7..d39478835e9c 100644
--- a/Common/include/basic_types/ad_structure.hpp
+++ b/Common/include/basic_types/ad_structure.hpp
@@ -252,7 +252,7 @@ namespace AD{
 
   /*!
    * \brief Start a passive region, i.e. stop recording.
-   * \return True is tape was active.
+   * \return True if tape was active.
    */
   inline bool BeginPassive() { return false; }
 
@@ -262,6 +262,28 @@ namespace AD{
    */
   inline void EndPassive(bool wasActive) {}
 
+  /*!
+   * \brief Pause the use of preaccumulation.
+   * \return True if preaccumulation was active.
+   */
+  inline bool PausePreaccumulation() { return false; }
+
+  /*!
+   * \brief Resume the use of preaccumulation.
+   * \param[in] wasActive - Whether preaccumulation was active before pausing.
+   */
+  inline void ResumePreaccumulation(bool wasActive) {}
+
+  /*!
+   * \brief Begin a hybrid parallel adjoint evaluation mode that assumes an inherently safe reverse path.
+   */
+  inline void StartNoSharedReading() {}
+
+  /*!
+   * \brief End the "no shared reading" adjoint evaluation mode.
+   */
+  inline void EndNoSharedReading() {}
+
 #else
   using CheckpointHandler = codi::DataStore;
 
@@ -271,9 +293,10 @@ namespace AD{
 
   extern ExtFuncHelper* FuncHelper;
 
-  extern bool Status;
-
   extern bool PreaccActive;
+#ifdef HAVE_OPDI
+  SU2_OMP(threadprivate(PreaccActive))
+#endif
 
   extern bool PreaccEnabled;
 
@@ -290,6 +313,9 @@ namespace AD{
   extern std::vector<TapePosition> TapePositions;
 
   extern codi::PreaccumulationHelper<su2double> PreaccHelper;
+#ifdef HAVE_OPDI
+  SU2_OMP(threadprivate(PreaccHelper))
+#endif
 
   /*--- Reference to the tape. ---*/
 
@@ -446,6 +472,7 @@ namespace AD{
   FORCEINLINE void EndPreacc(){
     if (PreaccActive) {
       PreaccHelper.finish(false);
+      PreaccActive = false;
     }
   }
 
@@ -522,6 +549,39 @@ namespace AD{
 
   FORCEINLINE void EndPassive(bool wasActive) { if(wasActive) StartRecording(); }
 
+  FORCEINLINE bool PausePreaccumulation() {
+    const auto current = PreaccEnabled;
+    if (!current) return false;
+    SU2_OMP_BARRIER
+    SU2_OMP_MASTER
+    PreaccEnabled = false;
+    END_SU2_OMP_MASTER
+    SU2_OMP_BARRIER
+    return true;
+  }
+
+  FORCEINLINE void ResumePreaccumulation(bool wasActive) {
+    if (!wasActive) return;
+    SU2_OMP_BARRIER
+    SU2_OMP_MASTER
+    PreaccEnabled = true;
+    END_SU2_OMP_MASTER
+    SU2_OMP_BARRIER
+  }
+
+  FORCEINLINE void StartNoSharedReading() {
+#ifdef HAVE_OPDI
+    opdi::logic->setAdjointAccessMode(opdi::LogicInterface::AdjointAccessMode::Classical);
+    opdi::logic->addReverseBarrier();
+#endif
+  }
+
+  FORCEINLINE void EndNoSharedReading() {
+#ifdef HAVE_OPDI
+    opdi::logic->setAdjointAccessMode(opdi::LogicInterface::AdjointAccessMode::Atomic);
+    opdi::logic->addReverseBarrier();
+#endif
+  }
 #endif // CODI_REVERSE_TYPE
 
 } // namespace AD
diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp
index c1600b310f42..3bb734a7696c 100644
--- a/Common/include/code_config.hpp
+++ b/Common/include/code_config.hpp
@@ -79,25 +79,15 @@ using su2conditional_t = typename su2conditional<B,T,F>::type;
 #include "codi.hpp"
 #include "codi/tools/dataStore.hpp"
 
-#ifndef CODI_INDEX_TAPE
-#define CODI_INDEX_TAPE 0
-#endif
-#ifndef CODI_PRIMAL_TAPE
-#define CODI_PRIMAL_TAPE 0
-#endif
-#ifndef CODI_PRIMAL_INDEX_TAPE
-#define CODI_PRIMAL_INDEX_TAPE 0
-#endif
-
 #if defined(HAVE_OMP)
 using su2double = codi::RealReverseIndexParallel;
 #else
-#if CODI_INDEX_TAPE
+#if defined(CODI_INDEX_TAPE)
 using su2double = codi::RealReverseIndex;
-#elif CODI_PRIMAL_TAPE
-using su2double = codi::RealReversePrimal;
-#elif CODI_PRIMAL_INDEX_TAPE
-using su2double = codi::RealReversePrimalIndex;
+//#elif defined(CODI_PRIMAL_TAPE)
+//using su2double = codi::RealReversePrimal;
+//#elif defined(CODI_PRIMAL_INDEX_TAPE)
+//using su2double = codi::RealReversePrimalIndex;
 #else
 using su2double = codi::RealReverse;
 #endif
diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp
index c69643cefe14..06bd75bc349a 100644
--- a/Common/include/linear_algebra/CSysSolve.hpp
+++ b/Common/include/linear_algebra/CSysSolve.hpp
@@ -256,6 +256,7 @@ class CSysSolve {
   void HandleTemporariesOut(CSysVector<OtherType>& LinSysSol) {
 
     /*--- Reset the pointers. ---*/
+    SU2_OMP_BARRIER
     SU2_OMP_MASTER {
       LinSysRes_ptr = nullptr;
       LinSysSol_ptr = nullptr;
@@ -276,6 +277,7 @@ class CSysSolve {
     LinSysSol.PassiveCopy(LinSysSol_tmp);
 
     /*--- Reset the pointers. ---*/
+    SU2_OMP_BARRIER
     SU2_OMP_MASTER {
       LinSysRes_ptr = nullptr;
       LinSysSol_ptr = nullptr;
diff --git a/Common/include/toolboxes/graph_toolbox.hpp b/Common/include/toolboxes/graph_toolbox.hpp
index 9dba7b4d9559..410a9bef4b5c 100644
--- a/Common/include/toolboxes/graph_toolbox.hpp
+++ b/Common/include/toolboxes/graph_toolbox.hpp
@@ -527,7 +527,7 @@ T createNaturalColoring(Index_t numInnerIndexes)
  * \param[out] indexColor - Optional, vector with colors given to the outer indices.
  * \return Coloring in the same type of the input pattern.
  */
-template<class T, typename Color_t = char, size_t MaxColors = 32, size_t MaxMB = 128>
+template<class T, typename Color_t = char, size_t MaxColors = 64, size_t MaxMB = 128>
 T colorSparsePattern(const T& pattern, size_t groupSize = 1, bool balanceColors = false,
                      std::vector<Color_t>* indexColor = nullptr)
 {
diff --git a/Common/src/CConfig.cpp b/Common/src/CConfig.cpp
index e271a37315ef..edafa7b838d2 100644
--- a/Common/src/CConfig.cpp
+++ b/Common/src/CConfig.cpp
@@ -4473,11 +4473,7 @@ void CConfig::SetPostprocessing(SU2_COMPONENT val_software, unsigned short val_i
 #if defined CODI_REVERSE_TYPE
   AD_Mode = YES;
 
-#if defined HAVE_OMP
-  AD::PreaccEnabled = false;
-#else
   AD::PreaccEnabled = AD_Preaccumulation;
-#endif
 
 #else
   if (AD_Mode == YES) {
diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp
index f6defb624350..4925466c0fad 100644
--- a/Common/src/basic_types/ad_structure.cpp
+++ b/Common/src/basic_types/ad_structure.cpp
@@ -35,9 +35,16 @@ namespace AD {
   std::vector<TapePosition> TapePositions;
 
   bool PreaccActive = false;
+#ifdef HAVE_OPDI
+  SU2_OMP(threadprivate(PreaccActive))
+#endif
+
   bool PreaccEnabled = true;
 
   codi::PreaccumulationHelper<su2double> PreaccHelper;
+#ifdef HAVE_OPDI
+  SU2_OMP(threadprivate(PreaccHelper))
+#endif
 
   ExtFuncHelper* FuncHelper;
 
diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp
index 0ac704389c11..b770f6b95db4 100644
--- a/Common/src/geometry/CPhysicalGeometry.cpp
+++ b/Common/src/geometry/CPhysicalGeometry.cpp
@@ -7701,7 +7701,8 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
 
       const auto nNodes = bound[iMarker][iElem]->GetnNodes();
 
-      AD::StartPreacc();
+      /*--- Cannot preaccumulate if hybrid parallel due to shared reading. ---*/
+      if (omp_get_num_threads() == 1) AD::StartPreacc();
 
       /*--- Get pointers to the coordinates of all the element nodes ---*/
       array<const su2double*, N_POINTS_MAXIMUM> Coord;
diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
index 14752136f9d9..ef60b2464f14 100644
--- a/Common/src/linear_algebra/CSysMatrix.cpp
+++ b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -185,10 +185,17 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
   /*--- This is akin to the row_ptr. ---*/
   omp_partitions = new unsigned long [omp_num_parts+1];
 
-  /// TODO: Use a work estimate to produce more balanced partitions.
-  auto pts_per_part = roundUpDiv(nPointDomain, omp_num_parts);
-  for(auto part = 0ul; part < omp_num_parts; ++part)
-    omp_partitions[part] = part * pts_per_part;
+  /*--- Work estimate based on non-zeros to produce balanced partitions. ---*/
+
+  const auto row_ptr_prec = ilu_needed? row_ptr_ilu : row_ptr;
+  const auto nnz_prec = row_ptr_prec[nPointDomain];
+
+  const auto nnz_per_part = roundUpDiv(nnz_prec, omp_num_parts);
+
+  for (auto iPoint = 0ul, part = 0ul; iPoint < nPointDomain; ++iPoint) {
+    if (row_ptr_prec[iPoint] >= part*nnz_per_part)
+      omp_partitions[part++] = iPoint;
+  }
   omp_partitions[omp_num_parts] = nPointDomain;
 
   /*--- Generate MKL Kernels ---*/
diff --git a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp
index 38934f8a2d9d..120efce7be56 100644
--- a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp
+++ b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp
@@ -76,7 +76,8 @@ void computeGradientsGreenGauss(CSolver* solver,
   {
     auto nodes = geometry.nodes;
 
-    AD::StartPreacc();
+    /*--- Cannot preaccumulate if hybrid parallel due to shared reading. ---*/
+    if (omp_get_num_threads() == 1) AD::StartPreacc();
     AD::SetPreaccIn(nodes->GetVolume(iPoint));
     AD::SetPreaccIn(nodes->GetPeriodicVolume(iPoint));
 
diff --git a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
index dcd923901dcb..b960503deb6b 100644
--- a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
+++ b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
@@ -203,7 +203,8 @@ void computeGradientsLeastSquares(CSolver* solver,
     auto nodes = geometry.nodes;
     const auto coord_i = nodes->GetCoord(iPoint);
 
-    AD::StartPreacc();
+    /*--- Cannot preaccumulate if hybrid parallel due to shared reading. ---*/
+    if (omp_get_num_threads() == 1) AD::StartPreacc();
     AD::SetPreaccIn(coord_i, nDim);
 
     for (size_t iVar = varBegin; iVar < varEnd; ++iVar)
diff --git a/SU2_CFD/include/limiters/computeLimiters_impl.hpp b/SU2_CFD/include/limiters/computeLimiters_impl.hpp
index 2876c889f66b..b9613621b11d 100644
--- a/SU2_CFD/include/limiters/computeLimiters_impl.hpp
+++ b/SU2_CFD/include/limiters/computeLimiters_impl.hpp
@@ -132,7 +132,8 @@ void computeLimiters_impl(CSolver* solver,
     auto nodes = geometry.nodes;
     const auto coord_i = nodes->GetCoord(iPoint);
 
-    AD::StartPreacc();
+    /*--- Cannot preaccumulate if hybrid parallel due to shared reading. ---*/
+    if (omp_get_num_threads() == 1) AD::StartPreacc();
     AD::SetPreaccIn(coord_i, nDim);
 
     for (size_t iVar = varBegin; iVar < varEnd; ++iVar)
diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
index 874d57b0c457..5c57b9540f6e 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
@@ -319,7 +319,11 @@ void CFVMFlowSolverBase<V, R>::HybridParallelInitialization(const CConfig& confi
       cout << "WARNING: On " << numRanksUsingReducer << " MPI ranks the coloring efficiency was less than "
            << COLORING_EFF_THRESH << " (min value was " << minEff << ").\n"
            << "         Those ranks will now use a fallback strategy, better performance may be possible\n"
-           << "         with a different value of config option EDGE_COLORING_GROUP_SIZE (default 512)." << endl;
+           << "         with a different value of config option EDGE_COLORING_GROUP_SIZE (default 512)."
+#ifdef HAVE_OPDI
+           << "\n         The memory usage of the discrete adjoint solver is higher when using the fallback."
+#endif
+           << endl;
     }
 
     if (config.GetUseVectorization() && (omp_get_max_threads() > 1) &&
@@ -1531,6 +1535,12 @@ void CFVMFlowSolverBase<V, R>::EdgeFluxResidual(const CGeometry *geometry,
     InstantiateEdgeNumerics(solvers, config);
   }
 
+  /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of
+  * variables, otherwise switch to the faster adjoint evaluation mode. ---*/
+  bool pausePreacc = false;
+  if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation();
+  else AD::StartNoSharedReading();
+
   /*--- Loop over edge colors. ---*/
   for (auto color : EdgeColoring) {
     /*--- Chunk size is at least OMP_MIN_SIZE and a multiple of the color group size. ---*/
@@ -1553,6 +1563,10 @@ void CFVMFlowSolverBase<V, R>::EdgeFluxResidual(const CGeometry *geometry,
     END_SU2_OMP_FOR
   }
 
+  /*--- Restore preaccumulation and adjoint evaluation state. ---*/
+  AD::ResumePreaccumulation(pausePreacc);
+  if (!ReducerStrategy) AD::EndNoSharedReading();
+
   if (ReducerStrategy) {
     SumEdgeFluxes(geometry);
     if (config->GetKind_TimeIntScheme() == EULER_IMPLICIT) {
@@ -1607,6 +1621,8 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
 
     /*--- Loop over all nodes (excluding halos) ---*/
 
+    AD::StartNoSharedReading();
+
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -1642,6 +1658,8 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
     }
     END_SU2_OMP_FOR
 
+    AD::EndNoSharedReading();
+
   }
 
   else {
@@ -1719,6 +1737,8 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
     /*--- Loop over all nodes (excluding halos) to compute the remainder
      of the dual time-stepping source term. ---*/
 
+    AD::StartNoSharedReading();
+
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -1756,6 +1776,8 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
       }
     }
     END_SU2_OMP_FOR
+
+    AD::EndNoSharedReading();
   }
 
 }
diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp
index 99f7003d048c..37f2cdabc421 100644
--- a/SU2_CFD/src/SU2_CFD.cpp
+++ b/SU2_CFD/src/SU2_CFD.cpp
@@ -73,11 +73,6 @@ int main(int argc, char *argv[]) {
 #endif
   SU2_MPI::Comm MPICommunicator = SU2_MPI::GetComm();
 
-  /*--- AD initialization ---*/
-#ifdef HAVE_OPDI
-  AD::getGlobalTape().initialize();
-#endif
-
   /*--- Uncomment the following line if runtime NaN catching is desired. ---*/
   // feenableexcept(FE_INVALID | FE_OVERFLOW | FE_DIVBYZERO );
 
diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp
index c52d5afe0e05..e9d15ee6809b 100644
--- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp
+++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp
@@ -880,6 +880,12 @@ void CDiscAdjMultizoneDriver::SetAdj_ObjFunction() {
 
 void CDiscAdjMultizoneDriver::ComputeAdjoints(unsigned short iZone, bool eval_transfer) {
 
+#if defined(CODI_INDEX_TAPE) || defined(HAVE_OPDI)
+  if (nZone > 1 && rank == MASTER_NODE) {
+    std::cout << "WARNING: Index AD types do not support multiple zones." << std::endl;
+  }
+#endif
+
   AD::ClearAdjoints();
 
   /*--- Initialize the adjoints in iZone ---*/
diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp
index 010ae194cefd..5228561df82b 100644
--- a/SU2_CFD/src/integration/CIntegration.cpp
+++ b/SU2_CFD/src/integration/CIntegration.cpp
@@ -76,6 +76,10 @@ void CIntegration::Space_Integration(CGeometry *geometry,
   CNumerics* conv_bound_numerics = numerics[CONV_BOUND_TERM + omp_get_thread_num()*MAX_TERMS];
   CNumerics* visc_bound_numerics = numerics[VISC_BOUND_TERM + omp_get_thread_num()*MAX_TERMS];
 
+  /*--- Pause preaccumulation in boundary conditions for hybrid parallel AD. ---*/
+  /// TODO: Check if this is really needed.
+  //const auto pausePreacc = (omp_get_num_threads() > 1) && AD::PausePreaccumulation();
+
   /*--- Boundary conditions that depend on other boundaries (they require MPI sincronization)---*/
 
   solver_container[MainSolver]->BC_Fluid_Interface(geometry, solver_container, conv_bound_numerics, visc_bound_numerics, config);
@@ -181,6 +185,8 @@ void CIntegration::Space_Integration(CGeometry *geometry,
     solver_container[MainSolver]->BC_Periodic(geometry, solver_container, conv_bound_numerics, config);
   }
 
+  //AD::ResumePreaccumulation(pausePreacc);
+
 }
 
 void CIntegration::Time_Integration(CGeometry *geometry, CSolver **solver_container, CConfig *config,
diff --git a/SU2_CFD/src/integration/CMultiGridIntegration.cpp b/SU2_CFD/src/integration/CMultiGridIntegration.cpp
index d361e5e49721..a29e1a0e9872 100644
--- a/SU2_CFD/src/integration/CMultiGridIntegration.cpp
+++ b/SU2_CFD/src/integration/CMultiGridIntegration.cpp
@@ -547,10 +547,12 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar
 
 void CMultiGridIntegration::SetResidual_Term(CGeometry *geometry, CSolver *solver) {
 
+  AD::StartNoSharedReading();
   SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPointDomain(), omp_get_num_threads()))
   for (unsigned long iPoint = 0; iPoint < geometry->GetnPointDomain(); iPoint++)
     solver->LinSysRes.AddBlock(iPoint, solver->GetNodes()->GetResTruncError(iPoint));
   END_SU2_OMP_FOR
+  AD::EndNoSharedReading();
 
 }
 
diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp
index 1b9269be9971..2233bfc548cd 100644
--- a/SU2_CFD/src/solvers/CEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CEulerSolver.cpp
@@ -1886,6 +1886,8 @@ unsigned long CEulerSolver::SetPrimitive_Variables(CSolver **solver_container, c
    *    further reduction if function is called in parallel ---*/
   unsigned long nonPhysicalPoints = 0;
 
+  AD::StartNoSharedReading();
+
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (unsigned long iPoint = 0; iPoint < nPoint; iPoint ++) {
 
@@ -1900,6 +1902,8 @@ unsigned long CEulerSolver::SetPrimitive_Variables(CSolver **solver_container, c
   }
   END_SU2_OMP_FOR
 
+  AD::EndNoSharedReading();
+
   return nonPhysicalPoints;
 }
 
@@ -1992,6 +1996,12 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain
   su2double Primitive_i[MAXNVAR] = {0.0}, Primitive_j[MAXNVAR] = {0.0};
   su2double Secondary_i[MAXNVAR] = {0.0}, Secondary_j[MAXNVAR] = {0.0};
 
+  /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of
+  * variables, otherwise switch to the faster adjoint evaluation mode. ---*/
+  bool pausePreacc = false;
+  if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation();
+  else AD::StartNoSharedReading();
+
   /*--- Loop over edge colors. ---*/
   for (auto color : EdgeColoring)
   {
@@ -2176,6 +2186,10 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain
   END_SU2_OMP_FOR
   } // end color loop
 
+  /*--- Restore preaccumulation and adjoint evaluation state. ---*/
+  AD::ResumePreaccumulation(pausePreacc);
+  if (!ReducerStrategy) AD::EndNoSharedReading();
+
   if (ReducerStrategy) {
     SumEdgeFluxes(geometry);
     if (implicit)
@@ -2284,6 +2298,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
   if (body_force) {
 
     /*--- Loop over all points ---*/
+    AD::StartNoSharedReading();
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -2302,6 +2317,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
 
     }
     END_SU2_OMP_FOR
+    AD::EndNoSharedReading();
   }
 
   if (rotating_frame) {
@@ -2312,6 +2328,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
     SetRotatingFrame_GCL(geometry, config);
 
     /*--- Loop over all points ---*/
+    AD::StartNoSharedReading();
     SU2_OMP_FOR_DYN(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -2333,6 +2350,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
 
     }
     END_SU2_OMP_FOR
+    AD::EndNoSharedReading();
   }
 
   if (axisymmetric) {
@@ -2343,6 +2361,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
     }
 
     /*--- loop over points ---*/
+    AD::StartNoSharedReading();
     SU2_OMP_FOR_DYN(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -2387,8 +2406,12 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
         Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
     }
     END_SU2_OMP_FOR
+
+    AD::EndNoSharedReading();
   }
 
+  AD::StartNoSharedReading();
+
   if (gravity) {
 
     /*--- loop over points ---*/
@@ -2492,6 +2515,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
     }
   }
 
+  AD::EndNoSharedReading();
 }
 
 void CEulerSolver::Source_Template(CGeometry *geometry, CSolver **solver_container, CNumerics *numerics,
diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp
index 5b16a24c00af..d50b53aabdbc 100644
--- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp
@@ -938,6 +938,8 @@ unsigned long CIncEulerSolver::SetPrimitive_Variables(CSolver **solver_container
 
   unsigned long iPoint, nonPhysicalPoints = 0;
 
+  AD::StartNoSharedReading();
+
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (iPoint = 0; iPoint < nPoint; iPoint ++) {
 
@@ -951,6 +953,8 @@ unsigned long CIncEulerSolver::SetPrimitive_Variables(CSolver **solver_container
   }
   END_SU2_OMP_FOR
 
+  AD::EndNoSharedReading();
+
   return nonPhysicalPoints;
 }
 
@@ -1018,6 +1022,12 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co
   bool implicit    = (config->GetKind_TimeIntScheme() == EULER_IMPLICIT);
   bool jst_scheme  = ((config->GetKind_Centered_Flow() == JST) && (iMesh == MESH_0));
 
+  /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of
+  * variables, otherwise switch to the faster adjoint evaluation mode. ---*/
+  bool pausePreacc = false;
+  if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation();
+  else AD::StartNoSharedReading();
+
   /*--- Loop over edge colors. ---*/
   for (auto color : EdgeColoring)
   {
@@ -1082,6 +1092,10 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co
   END_SU2_OMP_FOR
   } // end color loop
 
+  /*--- Restore preaccumulation and adjoint evaluation state. ---*/
+  AD::ResumePreaccumulation(pausePreacc);
+  if (!ReducerStrategy) AD::EndNoSharedReading();
+
   if (ReducerStrategy) {
     SumEdgeFluxes(geometry);
     if (implicit)
@@ -1110,6 +1124,12 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont
   const bool limiter    = (config->GetKind_SlopeLimit_Flow() != NO_LIMITER);
   const bool van_albada = (config->GetKind_SlopeLimit_Flow() == VAN_ALBADA_EDGE);
 
+  /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of
+  * variables, otherwise switch to the faster adjoint evaluation mode. ---*/
+  bool pausePreacc = false;
+  if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation();
+  else AD::StartNoSharedReading();
+
   /*--- Loop over edge colors. ---*/
   for (auto color : EdgeColoring)
   {
@@ -1250,6 +1270,10 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont
   END_SU2_OMP_FOR
   } // end color loop
 
+  /*--- Restore preaccumulation and adjoint evaluation state. ---*/
+  AD::ResumePreaccumulation(pausePreacc);
+  if (!ReducerStrategy) AD::EndNoSharedReading();
+
   if (ReducerStrategy) {
     SumEdgeFluxes(geometry);
     if (implicit)
@@ -1298,6 +1322,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
   const bool streamwise_periodic             = (config->GetKind_Streamwise_Periodic() != ENUM_STREAMWISE_PERIODIC::NONE);
   const bool streamwise_periodic_temperature = config->GetStreamwise_Periodic_Temperature();
 
+  AD::StartNoSharedReading();
+
   if (body_force) {
 
     /*--- Loop over all points ---*/
@@ -1399,12 +1425,16 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
     END_SU2_OMP_FOR
   }
 
+  AD::EndNoSharedReading();
+
   if (axisymmetric) {
 
     /*--- For viscous problems, we need an additional gradient. ---*/
 
     if (viscous) {
 
+      AD::StartNoSharedReading();
+
       SU2_OMP_FOR_STAT(omp_chunk_size)
       for (iPoint = 0; iPoint < nPoint; iPoint++) {
 
@@ -1423,6 +1453,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       }
       END_SU2_OMP_FOR
 
+      AD::EndNoSharedReading();
+
       /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/
 
       if (config->GetKind_Gradient_Method() == GREEN_GAUSS) {
@@ -1436,6 +1468,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
 
     /*--- loop over points ---*/
 
+    AD::StartNoSharedReading();
+
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -1486,10 +1520,14 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
 
     }
     END_SU2_OMP_FOR
+
+    AD::EndNoSharedReading();
   }
 
   if (radiation) {
 
+    AD::StartNoSharedReading();
+
     CNumerics* second_numerics = numerics_container[SOURCE_SECOND_TERM + omp_get_thread_num()*MAX_TERMS];
 
     SU2_OMP_FOR_STAT(omp_chunk_size)
@@ -1530,6 +1568,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
     }
     END_SU2_OMP_FOR
 
+    AD::EndNoSharedReading();
   }
 
   if (streamwise_periodic) {
@@ -1537,6 +1576,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
     /*--- For turbulent streamwise periodic problems w/ energy eq, we need an additional gradient of Eddy viscosity. ---*/
     if (streamwise_periodic_temperature && turbulent) {
 
+      AD::StartNoSharedReading();
+
       SU2_OMP_FOR_STAT(omp_chunk_size)
       for (iPoint = 0; iPoint < nPoint; iPoint++) {
         /*--- Set the auxiliary variable, Eddy viscosity mu_t, for this node. ---*/
@@ -1544,6 +1585,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       }
       END_SU2_OMP_FOR
 
+      AD::EndNoSharedReading();
+
       /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/
       if (config->GetKind_Gradient_Method() == GREEN_GAUSS) {
         SetAuxVar_Gradient_GG(geometry, config);
@@ -1557,6 +1600,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
     /*--- Set delta_p, m_dot, inlet_T, integrated_heat ---*/
     numerics->SetStreamwisePeriodicValues(SPvals);
 
+    AD::StartNoSharedReading();
+
     /*--- Loop over all points ---*/
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
@@ -1584,6 +1629,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
     } // for iPoint
     END_SU2_OMP_FOR
 
+    AD::EndNoSharedReading();
+
     if(!streamwise_periodic_temperature && energy) {
 
       CNumerics* second_numerics = numerics_container[SOURCE_SECOND_TERM + omp_get_thread_num()*MAX_TERMS];
@@ -1625,7 +1672,6 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
           END_SU2_OMP_FOR
         }// if periodic inlet boundary
       }// for iMarker
-
     }// if !streamwise_periodic_temperature
   }// if streamwise_periodic
 
@@ -1638,6 +1684,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       su2double time = 0.0;
       if (config->GetTime_Marching() != TIME_MARCHING::STEADY) time = config->GetPhysicalTime();
 
+      AD::StartNoSharedReading();
+
       /*--- Loop over points ---*/
       SU2_OMP_FOR_STAT(omp_chunk_size)
       for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
@@ -1659,6 +1707,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
 
       }
       END_SU2_OMP_FOR
+
+      AD::EndNoSharedReading();
     }
   }
 
@@ -2535,6 +2585,8 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
 
     /*--- Loop over all nodes (excluding halos) ---*/
 
+    AD::StartNoSharedReading();
+
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -2587,6 +2639,8 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
       }
     }
     END_SU2_OMP_FOR
+
+    AD::EndNoSharedReading();
   }
 
   else {
@@ -2674,6 +2728,8 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
     /*--- Loop over all nodes (excluding halos) to compute the remainder
      of the dual time-stepping source term. ---*/
 
+    AD::StartNoSharedReading();
+
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -2729,6 +2785,8 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
       }
     }
     END_SU2_OMP_FOR
+
+    AD::EndNoSharedReading();
   }
 
 }
diff --git a/SU2_CFD/src/solvers/CIncNSSolver.cpp b/SU2_CFD/src/solvers/CIncNSSolver.cpp
index 6591c503ea10..befbe73bf65d 100644
--- a/SU2_CFD/src/solvers/CIncNSSolver.cpp
+++ b/SU2_CFD/src/solvers/CIncNSSolver.cpp
@@ -327,6 +327,8 @@ unsigned long CIncNSSolver::SetPrimitive_Variables(CSolver **solver_container, c
 
   bool tkeNeeded = ((turb_model == SST) || (turb_model == SST_SUST));
 
+  AD::StartNoSharedReading();
+
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (iPoint = 0; iPoint < nPoint; iPoint++) {
 
@@ -356,6 +358,8 @@ unsigned long CIncNSSolver::SetPrimitive_Variables(CSolver **solver_container, c
   }
   END_SU2_OMP_FOR
 
+  AD::EndNoSharedReading();
+
   return nonPhysicalPoints;
 
 }
diff --git a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp
index 73651b8e74db..4dd24469de4d 100644
--- a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp
@@ -793,6 +793,8 @@ void CNEMOEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_con
     ComputeAxisymmetricAuxGradients(geometry,config);
   }
 
+  AD::StartNoSharedReading();
+
   /*--- loop over interior points ---*/
   SU2_OMP_FOR_DYN(omp_chunk_size)
   for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
@@ -918,6 +920,8 @@ void CNEMOEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_con
   }
   END_SU2_OMP_FOR
 
+  AD::EndNoSharedReading();
+
   /*--- Checking for NaN ---*/
   unsigned long eAxi_global = eAxi_local;
   unsigned long eChm_global = eChm_local;
diff --git a/SU2_CFD/src/solvers/CNSSolver.cpp b/SU2_CFD/src/solvers/CNSSolver.cpp
index 056e0c478241..b07f6798be33 100644
--- a/SU2_CFD/src/solvers/CNSSolver.cpp
+++ b/SU2_CFD/src/solvers/CNSSolver.cpp
@@ -135,6 +135,8 @@ unsigned long CNSSolver::SetPrimitive_Variables(CSolver **solver_container, cons
   const unsigned short turb_model = config->GetKind_Turb_Model();
   const bool tkeNeeded = (turb_model == SST) || (turb_model == SST_SUST);
 
+  AD::StartNoSharedReading();
+
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (unsigned long iPoint = 0; iPoint < nPoint; iPoint ++) {
 
@@ -164,6 +166,8 @@ unsigned long CNSSolver::SetPrimitive_Variables(CSolver **solver_container, cons
   }
   END_SU2_OMP_FOR
 
+  AD::EndNoSharedReading();
+
   return nonPhysicalPoints;
 }
 
diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp
index 467ba4d519c6..4e360ff799db 100644
--- a/SU2_CFD/src/solvers/CSolver.cpp
+++ b/SU2_CFD/src/solvers/CSolver.cpp
@@ -4213,7 +4213,7 @@ void CSolver::BasicLoadRestart(CGeometry *geometry, const CConfig *config, const
 }
 
 void CSolver::SavelibROM(CGeometry *geometry, CConfig *config, bool converged) {
-  
+
 #if defined(HAVE_LIBROM) && !defined(CODI_FORWARD_TYPE) && !defined(CODI_REVERSE_TYPE)
   const bool unsteady            = config->GetTime_Domain();
   const string filename          = config->GetlibROMbase_FileName();
@@ -4225,38 +4225,38 @@ void CSolver::SavelibROM(CGeometry *geometry, CConfig *config, bool converged) {
   bool incremental = false;
 
   if (!u_basis_generator) {
-    
+
     /*--- Define SVD basis generator ---*/
     auto timesteps = static_cast<int>(nTimeIter - TimeIter);
     CAROM::Options svd_options = CAROM::Options(dim, timesteps, -1,
                                                 false, true).setMaxBasisDimension(int(maxBasisDim));
-    
+
     if (config->GetKind_PODBasis() == POD_KIND::STATIC) {
       if (rank == MASTER_NODE) std::cout << "Creating static basis generator." << std::endl;
-      
+
       if (unsteady) {
         if (rank == MASTER_NODE) std::cout << "Incremental basis generator recommended for unsteady simulations." << std::endl;
       }
     }
     else {
       if (rank == MASTER_NODE) std::cout << "Creating incremental basis generator." << std::endl;
-      
+
       svd_options.setIncrementalSVD(1.0e-3, config->GetDelta_UnstTime(),
                                     1.0e-2, config->GetDelta_UnstTime()*nTimeIter, true).setDebugMode(false);
       incremental = true;
     }
-    
+
     u_basis_generator.reset(new CAROM::BasisGenerator(
       svd_options, incremental,
       filename));
-    
+
     // Save mesh ordering
     std::ofstream f;
     f.open(filename + "_mesh_" + to_string(rank) + ".csv");
       for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
         unsigned long globalPoint = geometry->nodes->GetGlobalIndex(iPoint);
         auto Coord = geometry->nodes->GetCoord(iPoint);
-        
+
         for (unsigned long iDim; iDim < nDim; iDim++) {
           f << Coord[iDim] << ", ";
         }
@@ -4271,31 +4271,31 @@ void CSolver::SavelibROM(CGeometry *geometry, CConfig *config, bool converged) {
     su2double t =  config->GetCurrent_UnstTime();
     u_basis_generator->takeSample(const_cast<su2double*>(base_nodes->GetSolution().data()), t, dt);
   }
-   
+
   /*--- End collection of data and save POD ---*/
-  
+
   if (converged) {
-  
+
     if (!unsteady) {
        // dt is different for each node, so just use a placeholder dt
        su2double dt = base_nodes->GetDelta_Time(0);
        su2double t = dt*TimeIter;
        u_basis_generator->takeSample(const_cast<su2double*>(base_nodes->GetSolution().data()), t, dt);
     }
-    
+
     if (config->GetKind_PODBasis() == POD_KIND::STATIC) {
       u_basis_generator->writeSnapshot();
     }
-    
+
     if (rank == MASTER_NODE) std::cout << "Computing SVD" << std::endl;
     int rom_dim = u_basis_generator->getSpatialBasis()->numColumns();
-    
+
     if (rank == MASTER_NODE) std::cout << "Basis dimension: " << rom_dim << std::endl;
     u_basis_generator->endSamples();
-    
+
     if (rank == MASTER_NODE) std::cout << "ROM Sampling ended" << std::endl;
   }
-  
+
 #else
   SU2_MPI::Error("SU2 was not compiled with libROM support.", CURRENT_FUNCTION);
 #endif
diff --git a/SU2_CFD/src/solvers/CTurbSASolver.cpp b/SU2_CFD/src/solvers/CTurbSASolver.cpp
index 0cef0e5ecb90..b5ffb229fd34 100644
--- a/SU2_CFD/src/solvers/CTurbSASolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSASolver.cpp
@@ -255,6 +255,8 @@ void CTurbSASolver::Postprocessing(CGeometry *geometry, CSolver **solver_contain
 
   /*--- Compute eddy viscosity ---*/
 
+  AD::StartNoSharedReading();
+
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (unsigned long iPoint = 0; iPoint < nPoint; iPoint ++) {
 
@@ -284,6 +286,7 @@ void CTurbSASolver::Postprocessing(CGeometry *geometry, CSolver **solver_contain
   }
   END_SU2_OMP_FOR
 
+  AD::EndNoSharedReading();
 }
 
 
@@ -297,10 +300,11 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai
 
   CVariable* flowNodes = solver_container[FLOW_SOL]->GetNodes();
 
-
   /*--- Pick one numerics object per thread. ---*/
   CNumerics* numerics = numerics_container[SOURCE_FIRST_TERM + omp_get_thread_num()*MAX_TERMS];
 
+  AD::StartNoSharedReading();
+
   /*--- Loop over all points. ---*/
 
   SU2_OMP_FOR_DYN(omp_chunk_size)
@@ -400,6 +404,8 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai
     END_SU2_OMP_FOR
   }
 
+  AD::EndNoSharedReading();
+
 }
 
 void CTurbSASolver::Source_Template(CGeometry *geometry, CSolver **solver_container, CNumerics *numerics,
diff --git a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp
index 819a57c0a49f..f5a6fe8f87b9 100644
--- a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp
@@ -244,6 +244,8 @@ void CTurbSSTSolver::Postprocessing(CGeometry *geometry, CSolver **solver_contai
     SetSolution_Gradient_LS(geometry, config);
   }
 
+  AD::StartNoSharedReading();
+
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (unsigned long iPoint = 0; iPoint < nPoint; iPoint ++) {
 
@@ -275,6 +277,7 @@ void CTurbSSTSolver::Postprocessing(CGeometry *geometry, CSolver **solver_contai
   }
   END_SU2_OMP_FOR
 
+  AD::EndNoSharedReading();
 }
 
 void CTurbSSTSolver::Source_Residual(CGeometry *geometry, CSolver **solver_container,
@@ -291,6 +294,8 @@ void CTurbSSTSolver::Source_Residual(CGeometry *geometry, CSolver **solver_conta
 
   /*--- Loop over all points. ---*/
 
+  AD::StartNoSharedReading();
+
   SU2_OMP_FOR_DYN(omp_chunk_size)
   for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -350,6 +355,8 @@ void CTurbSSTSolver::Source_Residual(CGeometry *geometry, CSolver **solver_conta
   }
   END_SU2_OMP_FOR
 
+  AD::EndNoSharedReading();
+
 }
 
 void CTurbSSTSolver::Source_Template(CGeometry *geometry, CSolver **solver_container, CNumerics *numerics,
diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp
index 64ea6dcf5259..b3bce445df5b 100644
--- a/SU2_CFD/src/solvers/CTurbSolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSolver.cpp
@@ -106,6 +106,12 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe
   su2double solution_i[MAXNVAR] = {0.0}, flowPrimVar_i[MAXNVARFLOW] = {0.0};
   su2double solution_j[MAXNVAR] = {0.0}, flowPrimVar_j[MAXNVARFLOW] = {0.0};
 
+  /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of
+  * variables, otherwise switch to the faster adjoint evaluation mode. ---*/
+  bool pausePreacc = false;
+  if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation();
+  else AD::StartNoSharedReading();
+
   /*--- Loop over edge colors. ---*/
   for (auto color : EdgeColoring)
   {
@@ -232,6 +238,10 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe
   END_SU2_OMP_FOR
   } // end color loop
 
+  /*--- Restore preaccumulation and adjoint evaluation state. ---*/
+  AD::ResumePreaccumulation(pausePreacc);
+  if (!ReducerStrategy) AD::EndNoSharedReading();
+
   if (ReducerStrategy) {
     SumEdgeFluxes(geometry);
     if (implicit) Jacobian.SetDiagonalAsColumnSum();
@@ -779,6 +789,8 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
 
     /*--- Loop over all nodes (excluding halos) ---*/
 
+    AD::StartNoSharedReading();
+
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -845,6 +857,8 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
     }
     END_SU2_OMP_FOR
 
+    AD::EndNoSharedReading();
+
   } else {
 
     /*--- For unsteady flows on dynamic meshes (rigidly transforming or
@@ -945,6 +959,8 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
     /*--- Loop over all nodes (excluding halos) to compute the remainder
      of the dual time-stepping source term. ---*/
 
+    AD::StartNoSharedReading();
+
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
@@ -1013,6 +1029,8 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
     }
     END_SU2_OMP_FOR
 
+    AD::EndNoSharedReading();
+
   } // end dynamic grid
 
 }
diff --git a/TestCases/hybrid_regression.py b/TestCases/hybrid_regression.py
index d5b5beea57d0..17d336d2efc3 100644
--- a/TestCases/hybrid_regression.py
+++ b/TestCases/hybrid_regression.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-## \file parallel_regression.py
+## \file hybrid_regression.py
 #  \brief Python script for automated regression testing of SU2 examples
 #  \author A. Aranake, A. Campos, T. Economon, T. Lukaczyk, S. Padron
 #  \version 7.1.1 "Blackbird"
@@ -79,7 +79,7 @@ def main():
     fixedCL_naca0012.cfg_dir   = "fixed_cl/naca0012"
     fixedCL_naca0012.cfg_file  = "inv_NACA0012.cfg"
     fixedCL_naca0012.test_iter = 10
-    fixedCL_naca0012.test_vals = [-7.374790, -1.872333, 0.300000, 0.019471]
+    fixedCL_naca0012.test_vals = [-7.374806, -1.872330, 0.300000, 0.019471]
     test_list.append(fixedCL_naca0012)
 
     # HYPERSONIC FLOW PAST BLUNT BODY
@@ -107,7 +107,7 @@ def main():
     cylinder.cfg_dir   = "navierstokes/cylinder"
     cylinder.cfg_file  = "lam_cylinder.cfg"
     cylinder.test_iter = 25
-    cylinder.test_vals = [-6.765432, -1.297428, 0.019596, 0.310240]
+    cylinder.test_vals = [-6.765429, -1.297425, 0.019571, 0.310232]
     test_list.append(cylinder)
 
     # Laminar cylinder (low Mach correction)
@@ -115,7 +115,7 @@ def main():
     cylinder_lowmach.cfg_dir   = "navierstokes/cylinder"
     cylinder_lowmach.cfg_file  = "cylinder_lowmach.cfg"
     cylinder_lowmach.test_iter = 25
-    cylinder_lowmach.test_vals = [-6.850130, -1.388096, -0.056203, 108.140819]
+    cylinder_lowmach.test_vals = [-6.850130, -1.388096, -0.056036, 108.140811]
     test_list.append(cylinder_lowmach)
 
     # 2D Poiseuille flow (body force driven with periodic inlet / outlet)
@@ -131,7 +131,7 @@ def main():
     poiseuille_profile.cfg_dir   = "navierstokes/poiseuille"
     poiseuille_profile.cfg_file  = "profile_poiseuille.cfg"
     poiseuille_profile.test_iter = 10
-    poiseuille_profile.test_vals = [-12.494721, -7.712408, -0.000000, 2.085796]
+    poiseuille_profile.test_vals = [-12.494752, -7.712204, -0.000000, 2.085796]
     test_list.append(poiseuille_profile)
 
     ##########################
@@ -151,7 +151,7 @@ def main():
     rae2822_sst.cfg_dir   = "rans/rae2822"
     rae2822_sst.cfg_file  = "turb_SST_RAE2822.cfg"
     rae2822_sst.test_iter = 20
-    rae2822_sst.test_vals = [-0.510633, 4.871233, 0.811923, 0.061627]
+    rae2822_sst.test_vals = [-0.510635, 4.871104, 0.811904, 0.061614]
     test_list.append(rae2822_sst)
 
     # RAE2822 SST_SUST
@@ -159,7 +159,7 @@ def main():
     rae2822_sst_sust.cfg_dir   = "rans/rae2822"
     rae2822_sst_sust.cfg_file  = "turb_SST_SUST_RAE2822.cfg"
     rae2822_sst_sust.test_iter = 20
-    rae2822_sst_sust.test_vals = [-2.430689, 4.871233, 0.811923, 0.061627]
+    rae2822_sst_sust.test_vals = [-2.430589, 4.871104, 0.811903, 0.061614]
     test_list.append(rae2822_sst_sust)
 
     # Flat plate
@@ -175,7 +175,7 @@ def main():
     turb_oneram6.cfg_dir   = "rans/oneram6"
     turb_oneram6.cfg_file  = "turb_ONERAM6.cfg"
     turb_oneram6.test_iter = 10
-    turb_oneram6.test_vals = [-2.388851, -6.689340, 0.230320, 0.157649]
+    turb_oneram6.test_vals = [-2.388836, -6.689414, 0.230320, 0.157640]
     test_list.append(turb_oneram6)
 
     # NACA0012 (SA, FUN3D finest grid results: CL=1.0983, CD=0.01242)
@@ -183,7 +183,7 @@ def main():
     turb_naca0012_sa.cfg_dir   = "rans/naca0012"
     turb_naca0012_sa.cfg_file  = "turb_NACA0012_sa.cfg"
     turb_naca0012_sa.test_iter = 10
-    turb_naca0012_sa.test_vals = [-11.531286, -14.899968, 1.064330, 0.019756]
+    turb_naca0012_sa.test_vals = [-11.531271, -14.899968, 1.064330, 0.019756]
     test_list.append(turb_naca0012_sa)
 
     # NACA0012 (SST, FUN3D finest grid results: CL=1.0840, CD=0.01253)
@@ -191,7 +191,7 @@ def main():
     turb_naca0012_sst.cfg_dir   = "rans/naca0012"
     turb_naca0012_sst.cfg_file  = "turb_NACA0012_sst.cfg"
     turb_naca0012_sst.test_iter = 10
-    turb_naca0012_sst.test_vals = [ -11.450482, -12.797872, -5.863656, 1.049989, 0.019163, -1.856223]
+    turb_naca0012_sst.test_vals = [-11.450475, -12.797872, -5.863655, 1.049989, 0.019163, -1.856263]
     test_list.append(turb_naca0012_sst)
 
     # NACA0012 (SST_SUST, FUN3D finest grid results: CL=1.0840, CD=0.01253)
@@ -199,7 +199,7 @@ def main():
     turb_naca0012_sst_sust.cfg_dir   = "rans/naca0012"
     turb_naca0012_sst_sust.cfg_file  = "turb_NACA0012_sst_sust.cfg"
     turb_naca0012_sst_sust.test_iter = 10
-    turb_naca0012_sst_sust.test_vals = [-11.367055, -12.640670, -5.746919, 1.005233, 0.019017, -1.913885]
+    turb_naca0012_sst_sust.test_vals = [-11.367051, -12.640670, -5.746919, 1.005233, 0.019017, -1.913905]
     test_list.append(turb_naca0012_sst_sust)
 
     # NACA0012 (SST, fixed values for turbulence quantities)
@@ -207,7 +207,7 @@ def main():
     turb_naca0012_sst_fixedvalues.cfg_dir   = "rans/naca0012"
     turb_naca0012_sst_fixedvalues.cfg_file  = "turb_NACA0012_sst_fixedvalues.cfg"
     turb_naca0012_sst_fixedvalues.test_iter = 10
-    turb_naca0012_sst_fixedvalues.test_vals = [-5.192492, -9.575904, -1.568271, 1.022569, 0.040527, -2.384883]
+    turb_naca0012_sst_fixedvalues.test_vals = [-5.192502, -9.575898, -1.568269, 1.022571, 0.040527, -2.384329]
     test_list.append(turb_naca0012_sst_fixedvalues)
 
     # PROPELLER
@@ -215,7 +215,7 @@ def main():
     propeller.cfg_dir   = "rans/propeller"
     propeller.cfg_file  = "propeller.cfg"
     propeller.test_iter = 10
-    propeller.test_vals = [-3.389576, -8.409529, 0.000048, 0.056329]
+    propeller.test_vals = [-3.389575, -8.409529, 0.000048, 0.056329]
     test_list.append(propeller)
 
     #######################################
@@ -240,7 +240,7 @@ def main():
     turb_naca0012_sst_restart_mg.cfg_file  = "turb_NACA0012_sst_multigrid_restart.cfg"
     turb_naca0012_sst_restart_mg.test_iter = 20
     turb_naca0012_sst_restart_mg.ntest_vals = 5
-    turb_naca0012_sst_restart_mg.test_vals = [-7.652983, -7.729472, -1.981061, -0.000015, 0.079061]
+    turb_naca0012_sst_restart_mg.test_vals = [-7.652987, -7.729472, -1.981061, -0.000015, 0.079061]
     test_list.append(turb_naca0012_sst_restart_mg)
 
     #############################
@@ -252,7 +252,7 @@ def main():
     turb_naca0012_1c.cfg_dir   = "rans_uq/naca0012"
     turb_naca0012_1c.cfg_file  = "turb_NACA0012_uq_1c.cfg"
     turb_naca0012_1c.test_iter = 10
-    turb_naca0012_1c.test_vals = [-4.980749, 1.139261, 0.244629, -0.112860]
+    turb_naca0012_1c.test_vals = [-4.980749, 1.139261, 0.244644, -0.112857]
     test_list.append(turb_naca0012_1c)
 
     # NACA0012 2c
@@ -260,7 +260,7 @@ def main():
     turb_naca0012_2c.cfg_dir   = "rans_uq/naca0012"
     turb_naca0012_2c.cfg_file  = "turb_NACA0012_uq_2c.cfg"
     turb_naca0012_2c.test_iter = 10
-    turb_naca0012_2c.test_vals = [-5.483337, 0.968887, 0.212022, -0.120321]
+    turb_naca0012_2c.test_vals = [-5.483337, 0.968887, 0.212057, -0.120310]
     test_list.append(turb_naca0012_2c)
 
     # NACA0012 3c
@@ -268,7 +268,7 @@ def main():
     turb_naca0012_3c.cfg_dir   = "rans_uq/naca0012"
     turb_naca0012_3c.cfg_file  = "turb_NACA0012_uq_3c.cfg"
     turb_naca0012_3c.test_iter = 10
-    turb_naca0012_3c.test_vals = [-5.584300, 0.931383, 0.205075, -0.120905]
+    turb_naca0012_3c.test_vals = [-5.584300, 0.931383, 0.205113, -0.120892]
     test_list.append(turb_naca0012_3c)
 
     # NACA0012 p1c1
@@ -276,7 +276,7 @@ def main():
     turb_naca0012_p1c1.cfg_dir   = "rans_uq/naca0012"
     turb_naca0012_p1c1.cfg_file  = "turb_NACA0012_uq_p1c1.cfg"
     turb_naca0012_p1c1.test_iter = 10
-    turb_naca0012_p1c1.test_vals = [-5.133237, 1.075365, 0.337532, -0.077873]
+    turb_naca0012_p1c1.test_vals = [-5.133233, 1.075372, 0.337556, -0.077868]
     test_list.append(turb_naca0012_p1c1)
 
     # NACA0012 p1c2
@@ -284,7 +284,7 @@ def main():
     turb_naca0012_p1c2.cfg_dir   = "rans_uq/naca0012"
     turb_naca0012_p1c2.cfg_file  = "turb_NACA0012_uq_p1c2.cfg"
     turb_naca0012_p1c2.test_iter = 10
-    turb_naca0012_p1c2.test_vals = [-5.554623, 0.943691, 0.226361, -0.116560]
+    turb_naca0012_p1c2.test_vals = [-5.554619, 0.943693, 0.226386, -0.116553]
     test_list.append(turb_naca0012_p1c2)
 
     ######################################
@@ -305,7 +305,7 @@ def main():
     hb_rans_preconditioning.cfg_dir   = "harmonic_balance/hb_rans_preconditioning"
     hb_rans_preconditioning.cfg_file  = "davis.cfg"
     hb_rans_preconditioning.test_iter = 25
-    hb_rans_preconditioning.test_vals = [-1.902111, -5.949291, 0.007768, 0.128060]
+    hb_rans_preconditioning.test_vals = [-1.902111, -5.949288, 0.007768, 0.128060]
     hb_rans_preconditioning.new_output = False
     test_list.append(hb_rans_preconditioning)
 
@@ -327,7 +327,7 @@ def main():
     inc_nozzle.cfg_dir   = "incomp_euler/nozzle"
     inc_nozzle.cfg_file  = "inv_nozzle.cfg"
     inc_nozzle.test_iter = 20
-    inc_nozzle.test_vals = [-5.973103, -4.911802, -0.000195, 0.121643]
+    inc_nozzle.test_vals = [-5.971249, -4.910844, -0.000196, 0.121635]
     inc_nozzle.new_output = True
     test_list.append(inc_nozzle)
 
@@ -340,7 +340,7 @@ def main():
     inc_lam_cylinder.cfg_dir   = "incomp_navierstokes/cylinder"
     inc_lam_cylinder.cfg_file  = "incomp_cylinder.cfg"
     inc_lam_cylinder.test_iter = 10
-    inc_lam_cylinder.test_vals = [-4.004277, -3.227956, 0.003852, 7.626578]
+    inc_lam_cylinder.test_vals = [-4.004277, -3.227956, 0.003851, 7.626583]
     inc_lam_cylinder.new_output  = True
     test_list.append(inc_lam_cylinder)
 
@@ -358,7 +358,7 @@ def main():
     inc_poly_cylinder.cfg_dir   = "incomp_navierstokes/cylinder"
     inc_poly_cylinder.cfg_file  = "poly_cylinder.cfg"
     inc_poly_cylinder.test_iter = 20
-    inc_poly_cylinder.test_vals = [-7.849071, -2.092548, 0.029423, 1.922053]
+    inc_poly_cylinder.test_vals = [-7.851512, -2.093420, 0.029974, 1.921595]
     inc_poly_cylinder.new_output  = True
     test_list.append(inc_poly_cylinder)
 
@@ -367,7 +367,7 @@ def main():
     inc_lam_bend.cfg_dir   = "incomp_navierstokes/bend"
     inc_lam_bend.cfg_file  = "lam_bend.cfg"
     inc_lam_bend.test_iter = 10
-    inc_lam_bend.test_vals = [-3.438863, -3.102176, -0.017532, -0.193429]
+    inc_lam_bend.test_vals = [-3.436191, -3.098014, -0.017338, -0.193981]
     test_list.append(inc_lam_bend)
 
     ############################
@@ -379,7 +379,7 @@ def main():
     inc_turb_naca0012.cfg_dir   = "incomp_rans/naca0012"
     inc_turb_naca0012.cfg_file  = "naca0012.cfg"
     inc_turb_naca0012.test_iter = 20
-    inc_turb_naca0012.test_vals = [-4.788495, -11.040511, 0.000023, 0.309503]
+    inc_turb_naca0012.test_vals = [-4.788405, -11.040493, 0.000008, 0.309506]
     inc_turb_naca0012.new_output  = True
     test_list.append(inc_turb_naca0012)
 
@@ -388,7 +388,7 @@ def main():
     inc_turb_naca0012_sst_sust.cfg_dir   = "incomp_rans/naca0012"
     inc_turb_naca0012_sst_sust.cfg_file  = "naca0012_SST_SUST.cfg"
     inc_turb_naca0012_sst_sust.test_iter = 20
-    inc_turb_naca0012_sst_sust.test_vals = [-7.276424, 0.145861, 0.000003, 0.312011]
+    inc_turb_naca0012_sst_sust.test_vals = [-7.276424, 0.145860, 0.000003, 0.312011]
     test_list.append(inc_turb_naca0012_sst_sust)
 
     ######################################
@@ -400,7 +400,7 @@ def main():
     cavity.cfg_dir   = "moving_wall/cavity"
     cavity.cfg_file  = "lam_cavity.cfg"
     cavity.test_iter = 25
-    cavity.test_vals = [-5.627934, -0.164469, 0.051998, 2.547065]
+    cavity.test_vals = [-5.627934, -0.164469, 0.052000, 2.547063]
     test_list.append(cavity)
 
     # Spinning cylinder
@@ -408,7 +408,7 @@ def main():
     spinning_cylinder.cfg_dir   = "moving_wall/spinning_cylinder"
     spinning_cylinder.cfg_file  = "spinning_cylinder.cfg"
     spinning_cylinder.test_iter = 25
-    spinning_cylinder.test_vals = [-7.996313, -2.601764, 1.510692, 1.493876]
+    spinning_cylinder.test_vals = [-8.001289, -2.607956, 1.501322, 1.488559]
     test_list.append(spinning_cylinder)
 
     ######################################
@@ -420,7 +420,7 @@ def main():
     square_cylinder.cfg_dir   = "unsteady/square_cylinder"
     square_cylinder.cfg_file  = "turb_square.cfg"
     square_cylinder.test_iter = 3
-    square_cylinder.test_vals = [-1.162572, 0.066371, 1.399790, 2.220393]
+    square_cylinder.test_vals = [-1.162564, 0.066401, 1.399788, 2.220402]
     square_cylinder.unsteady  = True
     test_list.append(square_cylinder)
 
@@ -429,7 +429,7 @@ def main():
     sine_gust.cfg_dir   = "gust"
     sine_gust.cfg_file  = "inv_gust_NACA0012.cfg"
     sine_gust.test_iter = 5
-    sine_gust.test_vals = [-1.977520, 3.481804, -0.012403, -0.007453]
+    sine_gust.test_vals = [-1.977520, 3.481804, -0.012402, -0.007454]
     sine_gust.unsteady  = True
     test_list.append(sine_gust)
 
@@ -438,7 +438,7 @@ def main():
     aeroelastic.cfg_dir   = "aeroelastic"
     aeroelastic.cfg_file  = "aeroelastic_NACA64A010.cfg"
     aeroelastic.test_iter = 2
-    aeroelastic.test_vals = [0.074447, 0.033116, -0.001649, -0.000127]
+    aeroelastic.test_vals = [0.074433, 0.033108, -0.001650, -0.000127]
     aeroelastic.unsteady  = True
     test_list.append(aeroelastic)
 
@@ -465,7 +465,7 @@ def main():
     unst_deforming_naca0012.cfg_dir   = "disc_adj_euler/naca0012_pitching_def"
     unst_deforming_naca0012.cfg_file  = "inv_NACA0012_pitching_deform.cfg"
     unst_deforming_naca0012.test_iter = 5
-    unst_deforming_naca0012.test_vals = [-3.665128, -3.793593, -3.716506, -3.148308]
+    unst_deforming_naca0012.test_vals = [-3.665120, -3.793643, -3.716518, -3.148310]
     unst_deforming_naca0012.unsteady  = True
     test_list.append(unst_deforming_naca0012)
 
@@ -478,7 +478,7 @@ def main():
     edge_VW.cfg_dir   = "nicf/edge"
     edge_VW.cfg_file  = "edge_VW.cfg"
     edge_VW.test_iter = 100
-    edge_VW.test_vals = [-5.040283, 1.124491, -0.000009, 0.000000]
+    edge_VW.test_vals = [-5.040287, 1.124488, -0.000009, 0.000000]
     test_list.append(edge_VW)
 
     # Rarefaction shock wave edge_PPR
@@ -486,7 +486,7 @@ def main():
     edge_PPR.cfg_dir   = "nicf/edge"
     edge_PPR.cfg_file  = "edge_PPR.cfg"
     edge_PPR.test_iter = 100
-    edge_PPR.test_vals = [-5.401640, 0.738165, -0.000035, 0.000000]
+    edge_PPR.test_vals = [-5.401601, 0.738205, -0.000035, 0.000000]
     test_list.append(edge_PPR)
 
     ######################################
@@ -498,7 +498,7 @@ def main():
     Jones_tc.cfg_dir   = "turbomachinery/APU_turbocharger"
     Jones_tc.cfg_file  = "Jones.cfg"
     Jones_tc.test_iter = 5
-    Jones_tc.test_vals = [-5.279930, 0.379651, 72.212090, 1.277440]
+    Jones_tc.test_vals = [-5.279930, 0.379651, 72.212100, 1.277439]
     Jones_tc.new_output = False
     test_list.append(Jones_tc)
 
@@ -507,7 +507,7 @@ def main():
     Jones_tc_rst.cfg_dir   = "turbomachinery/APU_turbocharger"
     Jones_tc_rst.cfg_file  = "Jones_rst.cfg"
     Jones_tc_rst.test_iter = 5
-    Jones_tc_rst.test_vals = [-4.625248, -1.568821, 33.995140, 10.181940]
+    Jones_tc_rst.test_vals = [-4.625251, -1.568824, 33.995140, 10.181940]
     Jones_tc_rst.new_output = False
     test_list.append(Jones_tc_rst)
 
@@ -516,7 +516,7 @@ def main():
     axial_stage2D.cfg_dir   = "turbomachinery/axial_stage_2D"
     axial_stage2D.cfg_file  = "Axial_stage2D.cfg"
     axial_stage2D.test_iter = 20
-    axial_stage2D.test_vals = [-1.933200, 5.379973, 73.357900, 0.925878]
+    axial_stage2D.test_vals = [-1.933139, 5.380376, 73.357910, 0.925874]
     axial_stage2D.new_output = False
     test_list.append(axial_stage2D)
 
@@ -525,7 +525,7 @@ def main():
     transonic_stator.cfg_dir   = "turbomachinery/transonic_stator_2D"
     transonic_stator.cfg_file  = "transonic_stator.cfg"
     transonic_stator.test_iter = 20
-    transonic_stator.test_vals = [-0.562430, 5.828446, 96.436050, 0.062506]
+    transonic_stator.test_vals = [-0.565608, 5.833408, 96.476150, 0.062517]
     transonic_stator.new_output = False
     test_list.append(transonic_stator)
 
@@ -534,7 +534,7 @@ def main():
     transonic_stator_rst.cfg_dir   = "turbomachinery/transonic_stator_2D"
     transonic_stator_rst.cfg_file  = "transonic_stator_rst.cfg"
     transonic_stator_rst.test_iter = 20
-    transonic_stator_rst.test_vals = [-6.621626, -0.614366, 5.002986, 0.002951]
+    transonic_stator_rst.test_vals = [-6.619122, -0.615716, 5.002986, 0.002951]
     transonic_stator_rst.new_output = False
     test_list.append(transonic_stator_rst)
 
@@ -547,7 +547,7 @@ def main():
     uniform_flow.cfg_dir   = "sliding_interface/uniform_flow"
     uniform_flow.cfg_file  = "uniform_NN.cfg"
     uniform_flow.test_iter = 5
-    uniform_flow.test_vals = [5.000000, 0.000000, -0.188748, -10.631530]
+    uniform_flow.test_vals = [5.000000, 0.000000, -0.188748, -10.631524]
     uniform_flow.unsteady  = True
     uniform_flow.multizone = True
     test_list.append(uniform_flow)
@@ -557,7 +557,7 @@ def main():
     channel_2D.cfg_dir   = "sliding_interface/channel_2D"
     channel_2D.cfg_file  = "channel_2D_WA.cfg"
     channel_2D.test_iter = 2
-    channel_2D.test_vals = [2.000000, 0.000000, 0.398089, 0.352762, 0.405397]
+    channel_2D.test_vals = [2.000000, 0.000000, 0.397972, 0.352756, 0.405398]
     channel_2D.unsteady  = True
     channel_2D.multizone = True
     test_list.append(channel_2D)
@@ -567,7 +567,7 @@ def main():
     channel_3D.cfg_dir   = "sliding_interface/channel_3D"
     channel_3D.cfg_file  = "channel_3D_WA.cfg"
     channel_3D.test_iter = 2
-    channel_3D.test_vals = [2.000000, 0.000000, 0.620151, 0.505156, 0.415292]
+    channel_3D.test_vals = [2.000000, 0.000000, 0.620149, 0.505190, 0.415133]
     channel_3D.unsteady  = True
     channel_3D.multizone = True
     test_list.append(channel_3D)
@@ -577,7 +577,7 @@ def main():
     pipe.cfg_dir   = "sliding_interface/pipe"
     pipe.cfg_file  = "pipe_NN.cfg"
     pipe.test_iter = 2
-    pipe.test_vals = [0.150024, 0.491949, 0.677757, 0.963990, 1.006944]
+    pipe.test_vals = [0.150024, 0.491949, 0.677759, 0.963991, 1.006947]
     pipe.unsteady  = True
     pipe.multizone = True
     test_list.append(pipe)
@@ -587,7 +587,7 @@ def main():
     rotating_cylinders.cfg_dir   = "sliding_interface/rotating_cylinders"
     rotating_cylinders.cfg_file  = "rot_cylinders_WA.cfg"
     rotating_cylinders.test_iter = 3
-    rotating_cylinders.test_vals = [3.000000, 0.000000, 0.777567, 1.134807, 1.224136]
+    rotating_cylinders.test_vals = [3.000000, 0.000000, 0.777568, 1.134807, 1.224137]
     rotating_cylinders.unsteady  = True
     rotating_cylinders.multizone  = True
     test_list.append(rotating_cylinders)
@@ -597,7 +597,7 @@ def main():
     supersonic_vortex_shedding.cfg_dir   = "sliding_interface/supersonic_vortex_shedding"
     supersonic_vortex_shedding.cfg_file  = "sup_vor_shed_WA.cfg"
     supersonic_vortex_shedding.test_iter = 5
-    supersonic_vortex_shedding.test_vals = [5.000000, 0.000000, 1.216554, 1.639121]
+    supersonic_vortex_shedding.test_vals = [5.000000, 0.000000, 1.216554, 1.639119]
     supersonic_vortex_shedding.unsteady  = True
     supersonic_vortex_shedding.multizone  = True
     test_list.append(supersonic_vortex_shedding)
@@ -607,7 +607,7 @@ def main():
     bars_SST_2D.cfg_dir   = "sliding_interface/bars_SST_2D"
     bars_SST_2D.cfg_file  = "bars.cfg"
     bars_SST_2D.test_iter = 13
-    bars_SST_2D.test_vals = [13.000000, -0.619686, -1.564594]
+    bars_SST_2D.test_vals = [13.000000, -0.619686, -1.564595]
     bars_SST_2D.multizone = True
     test_list.append(bars_SST_2D)
 
@@ -616,7 +616,7 @@ def main():
     slinc_steady.cfg_dir   = "sliding_interface/incompressible_steady"
     slinc_steady.cfg_file  = "config.cfg"
     slinc_steady.test_iter = 19
-    slinc_steady.test_vals = [19.000000, -1.800461, -2.115195] #last 3 columns
+    slinc_steady.test_vals = [19.000000, -1.800401, -2.114687]
     slinc_steady.multizone = True
     test_list.append(slinc_steady)
 
@@ -646,7 +646,7 @@ def main():
     fsi2d.cfg_dir   = "fea_fsi/WallChannel_2d"
     fsi2d.cfg_file  = "configFSI.cfg"
     fsi2d.test_iter = 4
-    fsi2d.test_vals = [4, 0, -3.743230, -4.133462]
+    fsi2d.test_vals = [4.000000, 0.000000, -3.743227, -4.133479]
     fsi2d.multizone= True
     fsi2d.unsteady = True
     test_list.append(fsi2d)
@@ -656,7 +656,7 @@ def main():
     stat_fsi.cfg_dir   = "fea_fsi/stat_fsi"
     stat_fsi.cfg_file  = "config.cfg"
     stat_fsi.test_iter = 7
-    stat_fsi.test_vals = [-3.242851, -4.866383, 0.000000, 11]
+    stat_fsi.test_vals = [-5.403596, -5.722583, 0.000000, 10.000000]
     stat_fsi.multizone = True
     test_list.append(stat_fsi)
 
@@ -665,7 +665,7 @@ def main():
     dyn_fsi.cfg_dir   = "fea_fsi/dyn_fsi"
     dyn_fsi.cfg_file  = "config.cfg"
     dyn_fsi.test_iter = 4
-    dyn_fsi.test_vals = [-4.355806, -4.060581, 5.3837e-08, 100]
+    dyn_fsi.test_vals = [-4.355806, -4.060582, 0.000000, 102.000000]
     dyn_fsi.multizone = True
     dyn_fsi.unsteady  = True
     test_list.append(dyn_fsi)
@@ -675,7 +675,7 @@ def main():
     stat_fsi_restart.cfg_dir   = "fea_fsi/stat_fsi"
     stat_fsi_restart.cfg_file  = "config_restart.cfg"
     stat_fsi_restart.test_iter = 1
-    stat_fsi_restart.test_vals = [-3.474239, -4.250710, 0.000000, 36.000000]
+    stat_fsi_restart.test_vals = [-3.474082, -4.242343, 0.000000, 37.000000]
     stat_fsi_restart.multizone = True
     test_list.append(stat_fsi_restart)
 
diff --git a/TestCases/hybrid_regression_AD.py b/TestCases/hybrid_regression_AD.py
new file mode 100644
index 000000000000..c40ac8700460
--- /dev/null
+++ b/TestCases/hybrid_regression_AD.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python
+
+## \file hybrid_regression_AD.py
+#  \brief Python script for automated regression testing of SU2 examples
+#  \author A. Aranake, A. Campos, T. Economon, T. Lukaczyk, S. Padron
+#  \version 7.1.1 "Blackbird"
+#
+# SU2 Project Website: https://su2code.github.io
+# 
+# The SU2 Project is maintained by the SU2 Foundation 
+# (http://su2foundation.org)
+#
+# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
+#
+# SU2 is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# SU2 is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with SU2. If not, see <http://www.gnu.org/licenses/>.
+
+# make print(*args) function available in PY2.6+, does'nt work on PY < 2.6
+from __future__ import print_function
+
+import sys
+from TestCase import TestCase    
+
+def main():
+    '''This program runs SU2 and ensures that the output matches specified values. 
+       This will be used to do checks when code is pushed to github 
+       to make sure nothing is broken. '''
+
+    test_list = []
+
+    #####################################
+    ### Disc. adj. compressible Euler ###
+    #####################################
+
+    # Inviscid NACA0012
+    discadj_naca0012           = TestCase('discadj_naca0012')
+    discadj_naca0012.cfg_dir   = "cont_adj_euler/naca0012"
+    discadj_naca0012.cfg_file  = "inv_NACA0012_discadj.cfg"
+    discadj_naca0012.test_iter = 100
+    discadj_naca0012.test_vals = [-3.561506, -8.926634, -0.000000, 0.005587]
+    test_list.append(discadj_naca0012)
+   
+    # Inviscid Cylinder 3D (multiple markers)
+    discadj_cylinder3D           = TestCase('discadj_cylinder3D')
+    discadj_cylinder3D.cfg_dir   = "disc_adj_euler/cylinder3D"
+    discadj_cylinder3D.cfg_file  = "inv_cylinder3D.cfg"
+    discadj_cylinder3D.test_iter = 5
+    discadj_cylinder3D.test_vals = [-3.730673, -3.832084, -0.000000, 0.000000]
+    test_list.append(discadj_cylinder3D)
+
+    # Arina nozzle 2D
+    discadj_arina2k              = TestCase('discadj_arina2k')
+    discadj_arina2k.cfg_dir      = "disc_adj_euler/arina2k"
+    discadj_arina2k.cfg_file     = "Arina2KRS.cfg"
+    discadj_arina2k.test_iter    = 20
+    discadj_arina2k.test_vals    = [-3.087876, -3.481506, 0.068878, 0.000000]
+    test_list.append(discadj_arina2k)
+    
+    ####################################
+    ### Disc. adj. compressible RANS ###
+    ####################################
+
+    # Adjoint turbulent NACA0012 SA
+    discadj_rans_naca0012_sa           = TestCase('discadj_rans_naca0012_sa')
+    discadj_rans_naca0012_sa.cfg_dir   = "disc_adj_rans/naca0012"
+    discadj_rans_naca0012_sa.cfg_file  = "turb_NACA0012_sa.cfg"
+    discadj_rans_naca0012_sa.test_iter = 10
+    discadj_rans_naca0012_sa.test_vals = [-2.230632, 0.696530, 0.177890, -0.000016]
+    test_list.append(discadj_rans_naca0012_sa)
+
+    # Adjoint turbulent NACA0012 SST
+    discadj_rans_naca0012_sst           = TestCase('discadj_rans_naca0012_sst')
+    discadj_rans_naca0012_sst.cfg_dir   = "disc_adj_rans/naca0012"
+    discadj_rans_naca0012_sst.cfg_file  = "turb_NACA0012_sst.cfg"
+    discadj_rans_naca0012_sst.test_iter = 10
+    discadj_rans_naca0012_sst.test_vals = [-2.221793, -0.491367, 0.182000, -0.000018]
+    test_list.append(discadj_rans_naca0012_sst)
+
+    #######################################
+    ### Disc. adj. incompressible Euler ###
+    #######################################
+
+    # Adjoint Incompressible Inviscid NACA0012
+    discadj_incomp_NACA0012           = TestCase('discadj_incomp_NACA0012')
+    discadj_incomp_NACA0012.cfg_dir   = "disc_adj_incomp_euler/naca0012"
+    discadj_incomp_NACA0012.cfg_file  = "incomp_NACA0012_disc.cfg"
+    discadj_incomp_NACA0012.test_iter = 20
+    discadj_incomp_NACA0012.test_vals = [20.000000, -4.092007, -2.652751, 0.000000]
+    test_list.append(discadj_incomp_NACA0012)
+
+    #####################################
+    ### Disc. adj. incompressible N-S ###
+    #####################################
+
+    # Adjoint Incompressible Viscous Cylinder (Heated)
+    discadj_incomp_cylinder           = TestCase('discadj_incomp_cylinder')
+    discadj_incomp_cylinder.cfg_dir   = "disc_adj_incomp_navierstokes/cylinder"
+    discadj_incomp_cylinder.cfg_file  = "heated_cylinder.cfg"
+    discadj_incomp_cylinder.test_iter = 20
+    discadj_incomp_cylinder.test_vals = [20.000000, -2.705921, -2.837904, 0.000000]
+    test_list.append(discadj_incomp_cylinder)
+
+    ######################################
+    ### Disc. adj. incompressible RANS ###
+    ######################################
+
+    # Adjoint Incompressible Turbulent NACA 0012 SA
+    discadj_incomp_turb_NACA0012_sa           = TestCase('discadj_incomp_turb_NACA0012_sa')
+    discadj_incomp_turb_NACA0012_sa.cfg_dir   = "disc_adj_incomp_rans/naca0012"
+    discadj_incomp_turb_NACA0012_sa.cfg_file  = "turb_naca0012_sa.cfg"
+    discadj_incomp_turb_NACA0012_sa.test_iter = 10
+    discadj_incomp_turb_NACA0012_sa.test_vals = [10.000000, -3.845995, -1.031097, 0.000000]
+    test_list.append(discadj_incomp_turb_NACA0012_sa)
+
+    # Adjoint Incompressible Turbulent NACA 0012 SST
+    discadj_incomp_turb_NACA0012_sst           = TestCase('discadj_incomp_turb_NACA0012_sst')
+    discadj_incomp_turb_NACA0012_sst.cfg_dir   = "disc_adj_incomp_rans/naca0012"
+    discadj_incomp_turb_NACA0012_sst.cfg_file  = "turb_naca0012_sst.cfg"
+    discadj_incomp_turb_NACA0012_sst.test_iter = 10
+    discadj_incomp_turb_NACA0012_sst.test_vals = [-3.845593, -2.414026, -8.420194, 0.000000]
+    test_list.append(discadj_incomp_turb_NACA0012_sst)
+
+    #######################################################
+    ### Unsteady Disc. adj. compressible RANS           ###
+    #######################################################
+   
+    # Turbulent Cylinder
+    discadj_cylinder           = TestCase('unsteady_cylinder')
+    discadj_cylinder.cfg_dir   = "disc_adj_rans/cylinder"
+    discadj_cylinder.cfg_file  = "cylinder.cfg" 
+    discadj_cylinder.test_iter = 9
+    discadj_cylinder.test_vals = [3.746907, -1.544882, -0.008321, 0.000014]
+    discadj_cylinder.unsteady  = True
+    test_list.append(discadj_cylinder)
+    
+    ##############################################################
+    ### Unsteady Disc. adj. compressible RANS Windowed Average ###
+    ##############################################################
+
+    # Turbulent Cylinder
+    discadj_cylinder           = TestCase('unsteady_cylinder_windowed_average_AD')
+    discadj_cylinder.cfg_dir   = "disc_adj_rans/cylinder"
+    discadj_cylinder.cfg_file  = "cylinder_Windowing_AD.cfg" 
+    discadj_cylinder.test_iter = 9
+    discadj_cylinder.test_vals = [3.004402]
+    discadj_cylinder.unsteady  = True
+    test_list.append(discadj_cylinder)
+    
+    ##########################################################################
+    ### Unsteady Disc. adj. compressible RANS DualTimeStepping 1st order   ###
+    ##########################################################################
+
+    # Turbulent Cylinder
+    discadj_DT_1ST_cylinder           = TestCase('unsteady_cylinder_DT_1ST')
+    discadj_DT_1ST_cylinder.cfg_dir   = "disc_adj_rans/cylinder_DT_1ST"
+    discadj_DT_1ST_cylinder.cfg_file  = "cylinder.cfg"
+    discadj_DT_1ST_cylinder.test_iter = 9
+    discadj_DT_1ST_cylinder.test_vals = [3.698167, -1.607051, -0.002159, 0.000028]
+    discadj_DT_1ST_cylinder.unsteady  = True
+    test_list.append(discadj_DT_1ST_cylinder)
+
+    ######################################################
+    ### Unsteady Disc. adj. compressible pitching NACA ###
+    ######################################################
+
+    # compressible pitching NACA0012
+    discadj_pitchingNACA0012           = TestCase('pitchingNACA0012')
+    discadj_pitchingNACA0012.cfg_dir   = "disc_adj_euler/naca0012_pitching"
+    discadj_pitchingNACA0012.cfg_file  = "inv_NACA0012_pitching.cfg"
+    discadj_pitchingNACA0012.test_iter = 4
+    discadj_pitchingNACA0012.test_vals = [-1.219713, -1.645717, -0.007513, 0.000013]
+    discadj_pitchingNACA0012.unsteady  = True
+    test_list.append(discadj_pitchingNACA0012)
+
+    #######################################################
+    ### Disc. adj. turbomachinery                       ###
+    #######################################################
+    
+    # Transonic Stator 2D
+    discadj_trans_stator           = TestCase('transonic_stator')
+    discadj_trans_stator.cfg_dir   = "disc_adj_turbomachinery/transonic_stator_2D"
+    discadj_trans_stator.cfg_file  = "transonic_stator.cfg" 
+    discadj_trans_stator.test_iter = 79
+    discadj_trans_stator.test_vals = [79.000000, -1.938806, -1.995540]
+    test_list.append(discadj_trans_stator)
+    
+    ###################################
+    ### Structural Adjoint          ###
+    ###################################
+   
+    # Structural model
+    discadj_fea           = TestCase('discadj_fea')
+    discadj_fea.cfg_dir   = "disc_adj_fea"
+    discadj_fea.cfg_file  = "configAD_fem.cfg" 
+    discadj_fea.test_iter = 4
+    discadj_fea.test_vals = [1.774569, 1.928023, -0.000364, -8.690300]
+    test_list.append(discadj_fea) 
+
+    ######################################
+    ### RUN TESTS                      ###
+    ######################################
+
+    for test in test_list:
+        test.su2_exec = "SU2_CFD_AD -t 2"
+        test.timeout = 600
+        test.tol = 1e-4
+    #end
+
+    pass_list = [ test.run_test() for test in test_list ]
+
+    # Tests summary
+    print('==================================================================')
+    print('Summary of the hybrid parallel AD tests')
+    print('python version:', sys.version)
+    for i, test in enumerate(test_list):
+        if (pass_list[i]):
+            print('  passed - %s'%test.tag)
+        else:
+            print('* FAILED - %s'%test.tag)
+
+    if all(pass_list):
+        sys.exit(0)
+    else:
+        sys.exit(1)
+    # done
+
+if __name__ == '__main__':
+    main()
diff --git a/externals/codi b/externals/codi
index 6a67202a3887..3c3211fef2e2 160000
--- a/externals/codi
+++ b/externals/codi
@@ -1 +1 @@
-Subproject commit 6a67202a3887c8da490fdfde82bc46507de68692
+Subproject commit 3c3211fef2e225ab89680a4063b62bb3bb38a7e4
diff --git a/externals/opdi b/externals/opdi
index e56f79cada20..2735b503f601 160000
--- a/externals/opdi
+++ b/externals/opdi
@@ -1 +1 @@
-Subproject commit e56f79cada202d21e7425f5d5cfd5b1153f2465e
+Subproject commit 2735b503f60163e8d64e1ac56cce46173a9fd4a9
diff --git a/meson.build b/meson.build
index 15cdf6a57024..d710813fe390 100644
--- a/meson.build
+++ b/meson.build
@@ -53,8 +53,18 @@ endif
 
 if get_option('enable-autodiff') or get_option('enable-directdiff')
   codi_dep = [declare_dependency(include_directories: 'externals/codi/include')]
-  codi_rev_args = '-DCODI_REVERSE_TYPE'
-  codi_for_args = '-DCODI_FORWARD_TYPE'
+  codi_rev_args = ['-DCODI_REVERSE_TYPE']
+  codi_for_args = ['-DCODI_FORWARD_TYPE']
+endif
+
+if get_option('enable-autodiff')
+  if get_option('codi-tape') == 'JacobianIndex'
+    codi_rev_args += '-DCODI_INDEX_TAPE'
+  #elif get_option('codi-tape') == 'PrimalLinear'
+  #  codi_rev_args += '-DCODI_PRIMAL_TAPE'
+  #elif get_option('codi-tape') == 'PrimalIndex'
+  #  codi_rev_args += '-DCODI_PRIMAL_INDEX_TAPE'
+  endif
 endif
 
 # add cgns library
@@ -114,6 +124,10 @@ if omp
     elif get_option('opdi-backend') == 'ompt'
       su2_cpp_args += '-DFORCE_OPDI_OMPT_BACKEND'
     endif
+
+    if get_option('opdi-shared-read-opt') == false
+      su2_cpp_args += '-DOPDI_VARIABLE_ADJOINT_ACCESS_MODE=0'
+    endif
   endif
 endif
 
diff --git a/meson_options.txt b/meson_options.txt
index 7fce30fd444e..05a0b2c64fbc 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -19,5 +19,7 @@ option('enable-mixedprec', type : 'boolean', value : false, description: 'use si
 option('extra-deps', type : 'string', value : '', description: 'comma-separated list of extra (custom) dependencies to add for compilation')
 option('enable-mpp',  type : 'boolean', value : false, description: 'enable Mutation++ support')
 option('opdi-backend', type : 'combo', choices : ['auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice')
+option('codi-tape', type : 'combo', choices : ['JacobianLinear', 'JacobianIndex'], value : 'JacobianLinear', description: 'CoDiPack tape choice')
+option('opdi-shared-read-opt', type : 'boolean', value : true, description : 'OpDiLib shared reading optimization')
 option('librom_root', type : 'string', value : '', description: 'libROM base directory')
 option('enable-librom', type : 'boolean', value : false, description: 'enable LLNL libROM support')
diff --git a/meson_scripts/init.py b/meson_scripts/init.py
index 498f230d10be..c2fcd132697b 100755
--- a/meson_scripts/init.py
+++ b/meson_scripts/init.py
@@ -44,11 +44,11 @@ def init_submodules(method = 'auto'):
 
   # This information of the modules is used if projects was not cloned using git
   # The sha tag must be maintained manually to point to the correct commit
-  sha_version_codi = '6a67202a3887c8da490fdfde82bc46507de68692'
+  sha_version_codi = '3c3211fef2e225ab89680a4063b62bb3bb38a7e4'
   github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
   sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
   github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
-  sha_version_opdi = 'e56f79cada202d21e7425f5d5cfd5b1153f2465e'
+  sha_version_opdi = '2735b503f60163e8d64e1ac56cce46173a9fd4a9'
   github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib'
   sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2'
   github_repo_meson = 'https://github.com/mesonbuild/meson'
diff --git a/preconfigure.py b/preconfigure.py
index 110781d2e559..eea0e8880623 100755
--- a/preconfigure.py
+++ b/preconfigure.py
@@ -287,7 +287,7 @@ def init_codi(argument_dict, modes, mpi_support = False, update = False):
     
     # This information of the modules is used if projects was not cloned using git
     # The sha tag must be maintained manually to point to the correct commit
-    sha_version_codi = '6a67202a3887c8da490fdfde82bc46507de68692'
+    sha_version_codi = '3c3211fef2e225ab89680a4063b62bb3bb38a7e4'
     github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
     sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
     github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'