su2code · pcarruscag · Feb 26, 2020 · Jan 22, 2020 · Jan 23, 2020 · Jan 23, 2020
diff --git a/Common/include/CConfig.hpp b/Common/include/CConfig.hpp
@@ -1125,6 +1125,8 @@ class CConfig {
 
   string caseName;                 /*!< \brief Name of the current case */
 
+  unsigned long edgeColorGroupSize; /*!< \brief Size of the edge groups colored for OpenMP parallelization of edge loops. */
+
   /*!
    * \brief Set the default values of config options not set in the config file using another config object.
    * \param config - Config object to use the default values from.
@@ -4247,7 +4249,7 @@ class CConfig {
    * \brief Get whether to "Use Accurate Jacobians" for AUSM+up(2) and SLAU(2).
    * \return yes/no.
    */
-  bool GetUse_Accurate_Jacobians(void) { return Use_Accurate_Jacobians; }
+  bool GetUse_Accurate_Jacobians(void) const { return Use_Accurate_Jacobians; }
 
   /*!
    * \brief Get the kind of integration scheme (explicit or implicit)
@@ -4450,7 +4452,7 @@ class CConfig {
    * \brief Factor by which to multiply the dissipation contribution to Jacobians of central schemes.
    * \return The factor.
    */
-  su2double GetCent_Jac_Fix_Factor(void) { return Cent_Jac_Fix_Factor; }
+  su2double GetCent_Jac_Fix_Factor(void) const { return Cent_Jac_Fix_Factor; }
 
   /*!
    * \brief Get the kind of integration scheme (explicit or implicit)
@@ -5829,7 +5831,7 @@ class CConfig {
    * \brief Get a pointer to the body force vector.
    * \return A pointer to the body force vector.
    */
-  su2double* GetBody_Force_Vector(void) { return Body_Force_Vector; }
+  const su2double* GetBody_Force_Vector(void) const { return Body_Force_Vector; }
 
   /*!
    * \brief Get information about the rotational frame.
@@ -8487,7 +8489,7 @@ class CConfig {
    * \param[in] val_coeff - Index of the coefficient.
    * \return Alpha coefficient for the Runge-Kutta integration scheme.
    */
-  su2double* Get_Electric_Field_Dir(void) { return Electric_Field_Dir; }
+  const su2double* Get_Electric_Field_Dir(void) const { return Electric_Field_Dir; }
 
   /*!
    * \brief Check if the user wants to apply the load as a ramp.
@@ -9201,4 +9203,9 @@ class CConfig {
    */
   unsigned long GetLinear_Solver_Prec_Threads(void) const { return Linear_Solver_Prec_Threads; }
 
+  /*!
+   * \brief Get the size of the edge groups colored for OpenMP parallelization of edge loops.
+   */
+  unsigned long GetEdgeColoringGroupSize(void) const { return edgeColorGroupSize; }
+
 };
diff --git a/Common/include/interpolation_structure.hpp b/Common/include/interpolation_structure.hpp
@@ -1,7 +1,7 @@
 /*!
  * \file interpolation_structure.hpp
- * \brief Headers of the main subroutines used by SU2_FSI.
- *        The subroutines and functions are in the <i>interpolation_structure.cpp</i> file.
+ * \brief Headers of classes used for multiphysics interpolation.
+ *        The implementation is in the <i>interpolation_structure.cpp</i> file.
  * \author H. Kline
  * \version 7.0.1 "Blackbird"
  *
@@ -244,7 +244,7 @@ class CIsoparametric : public CInterpolator {
    * \param[in] nDim - the dimension of the coordinates.
    * \param[in] iZone_1 - zone index of the element to use for interpolation (the DONOR zone)
    * \param[in] donor_elem - element index of the element to use for interpolation (or global index of a point in 2D)
-   * \param[in[ nDonorPoints - number of donor points in the element.
+   * \param[in] nDonorPoints - number of donor points in the element.
    * \param[in] xj - point projected onto the plane of the donor element.
    * \param[out] isoparams - isoparametric coefficients. Must be allocated to size nNodes ahead of time. (size> nDonors)
    *

diff --git a/Common/include/linear_algebra/CSysMatrix.hpp b/Common/include/linear_algebra/CSysMatrix.hpp
@@ -95,6 +95,7 @@ class CSysMatrix {
   const unsigned long *row_ptr;     /*!< \brief Pointers to the first element in each row. */
   const unsigned long *dia_ptr;     /*!< \brief Pointers to the diagonal element in each row. */
   const unsigned long *col_ind;     /*!< \brief Column index for each of the elements in val(). */
+  vector<const ScalarType*> col_ptr;/*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */
 
   ScalarType *ILU_matrix;           /*!< \brief Entries of the ILU sparse matrix. */
   unsigned long nnz_ilu;            /*!< \brief Number of possible nonzero entries in the matrix (ILU). */
@@ -440,7 +441,7 @@ class CSysMatrix {
    * \param[in] val_block - Block to set to A(i, j).
    */
   template<class OtherType>
-  inline void SetBlock(unsigned long block_i, unsigned long block_j, OtherType **val_block) {
+  inline void SetBlock(unsigned long block_i, unsigned long block_j, const OtherType* const* val_block) {
 
     unsigned long iVar, jVar, index;
 
@@ -503,7 +504,7 @@ class CSysMatrix {
    * \param[in] val_block - Block to add to A(i, j).
    */
   template<class OtherType>
-  inline void AddBlock(unsigned long block_i, unsigned long block_j, OtherType **val_block) {
+  inline void AddBlock(unsigned long block_i, unsigned long block_j, const OtherType* const* val_block) {
 
     unsigned long iVar, jVar, index;
 
@@ -524,7 +525,7 @@ class CSysMatrix {
    * \param[in] val_block - Block to subtract to A(i, j).
    */
   template<class OtherType>
-  inline void SubtractBlock(unsigned long block_i, unsigned long block_j, OtherType **val_block) {
+  inline void SubtractBlock(unsigned long block_i, unsigned long block_j, const OtherType* const* val_block) {
 
     unsigned long iVar, jVar, index;
 
@@ -550,7 +551,7 @@ class CSysMatrix {
    */
   template<class OtherType, int Sign = 1>
   inline void UpdateBlocks(unsigned long iEdge, unsigned long iPoint, unsigned long jPoint,
-                           OtherType **block_i, OtherType **block_j) {
+                           const OtherType* const* block_i, const OtherType* const* block_j) {
 
     ScalarType *bii = &matrix[dia_ptr[iPoint]*nVar*nEqn];
     ScalarType *bjj = &matrix[dia_ptr[jPoint]*nVar*nEqn];
@@ -570,6 +571,84 @@ class CSysMatrix {
     }
   }
 
+  /*!
+   * \brief Short-hand for the "subtractive" version (sub from i* add to j*) of UpdateBlocks.
+   */
+  template<class OtherType>
+  inline void UpdateBlocksSub(unsigned long iEdge, unsigned long iPoint, unsigned long jPoint,
+                              const OtherType* const* block_i, const OtherType* const* block_j) {
+    UpdateBlocks<OtherType,-1>(iEdge, iPoint, jPoint, block_i, block_j);
+  }
+
+  /*!
+   * \brief Update 2 blocks ij and ji (add to i* sub from j*).
+   * \note The template parameter Sign, can be used create a "subtractive"
+   *       update i.e. subtract from row i and add to row j instead.
+   * \param[in] edge - Index of edge that connects iPoint and jPoint.
+   * \param[in] block_i - Subs from ji.
+   * \param[in] block_j - Adds to ij.
+   */
+  template<class OtherType, int Sign = 1>
+  inline void UpdateBlocks(unsigned long iEdge, const OtherType* const* block_i, const OtherType* const* block_j) {
+
+    ScalarType *bij = &matrix[edge_ptr(iEdge,0)*nVar*nEqn];
+    ScalarType *bji = &matrix[edge_ptr(iEdge,1)*nVar*nEqn];
+
+    unsigned long iVar, jVar, offset = 0;
+
+    for (iVar = 0; iVar < nVar; iVar++) {
+      for (jVar = 0; jVar < nEqn; jVar++) {
+        bij[offset] += PassiveAssign<ScalarType,OtherType>(block_j[iVar][jVar]) * Sign;
+        bji[offset] -= PassiveAssign<ScalarType,OtherType>(block_i[iVar][jVar]) * Sign;
+        ++offset;
+      }
+    }
+  }
+
+  /*!
+   * \brief Short-hand for the "subtractive" version (sub from i* add to j*) of UpdateBlocks.
+   */
+  template<class OtherType>
+  inline void UpdateBlocksSub(unsigned long iEdge, const OtherType* const* block_i, const OtherType* const* block_j) {
+    UpdateBlocks<OtherType,-1>(iEdge, block_i, block_j);
+  }
+
+  /*!
+   * \brief Adds the specified block to the (i, i) subblock of the matrix-by-blocks structure.
+   * \param[in] block_i - Diagonal index.
+   * \param[in] val_block - Block to add to the diagonal of the matrix.
+   */
+  template<class OtherType>
+  inline void AddBlock2Diag(unsigned long block_i, const OtherType* const* val_block) {
+
+    ScalarType *bii = &matrix[dia_ptr[block_i]*nVar*nEqn];
+
+    unsigned long iVar, jVar, offset = 0;
+
+    for (iVar = 0; iVar < nVar; iVar++)
+      for (jVar = 0; jVar < nEqn; jVar++)
+        bii[offset++] += PassiveAssign<ScalarType,OtherType>(val_block[iVar][jVar]);
+
+  }
+
+  /*!
+   * \brief Subtracts the specified block from the (i, i) subblock of the matrix-by-blocks structure.
+   * \param[in] block_i - Diagonal index.
+   * \param[in] val_block - Block to subtract from the diagonal of the matrix.
+   */
+  template<class OtherType>
+  inline void SubtractBlock2Diag(unsigned long block_i, const OtherType* const* val_block) {
+
+    ScalarType *bii = &matrix[dia_ptr[block_i]*nVar*nEqn];
+
+    unsigned long iVar, jVar, offset = 0;
+
+    for (iVar = 0; iVar < nVar; iVar++)
+      for (jVar = 0; jVar < nEqn; jVar++)
+        bii[offset++] -= PassiveAssign<ScalarType,OtherType>(val_block[iVar][jVar]);
+
+  }
+
   /*!
    * \brief Adds the specified value to the diagonal of the (i, i) subblock
    *        of the matrix-by-blocks structure.
@@ -616,6 +695,11 @@ class CSysMatrix {
   template<class OtherType>
   void EnforceSolutionAtNode(const unsigned long node_i, const OtherType *x_i, CSysVector<OtherType> & b);
 
+  /*!
+   * \brief Sets the diagonal entries of the matrix as the sum of the blocks in the corresponding column.
+   */
+  void SetDiagonalAsColumnSum();
+
   /*!
    * \brief Add a scaled sparse matrix to "this" (axpy-type operation, A = A+alpha*B).
    * \note Matrices must have the same sparse pattern.

diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp
@@ -57,6 +57,10 @@ using namespace std;
  * creating CSysSolve objects we can more easily assign different
  * matrix-vector products and preconditioners to different problems
  * that may arise in a hierarchical solver (i.e. multigrid).
+ *
+ * The methods of this class are designed to be called by multiple OpenMP threads.
+ * Beware of writes to class member variables, for example "Residual" should only
+ * be modified by one thread.
  */
 template<class ScalarType>
 class CSysSolve {

diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp
@@ -59,7 +59,7 @@ class CSysVector {
    * \brief Generic initialization from a scalar or array.
    * \note If val==nullptr vec_val is not initialized, only allocated.
    * \param[in] numBlk - number of blocks locally
-   * \param[in] numBlkDomain - number of blocks locally (without g cells)
+   * \param[in] numBlkDomain - number of blocks locally (without ghost cells)
    * \param[in] numVar - number of variables in each block
    * \param[in] val - default value for elements
    * \param[in] valIsArray - if true val is treated as array
@@ -360,7 +360,11 @@ class CSysVector {
    * \param[in] val_var - inde of the residual to be set.
    * \return Value of the residual.
    */
-  inline ScalarType GetBlock(unsigned long val_ipoint, unsigned long val_var) const {
+  inline const ScalarType& operator() (unsigned long val_ipoint, unsigned long val_var) const {
+    return vec_val[val_ipoint*nVar+val_var];
+  }
+  inline ScalarType& operator() (unsigned long val_ipoint, unsigned long val_var) {
     return vec_val[val_ipoint*nVar+val_var];
   }
+
 };
diff --git a/Common/include/omp_structure.hpp b/Common/include/omp_structure.hpp
@@ -38,6 +38,8 @@
 
 #pragma once
 
+#include <type_traits>
+
 #if defined(_MSC_VER)
 #define PRAGMIZE(X) __pragma(X)
 #else
@@ -46,7 +48,8 @@
 
 /*--- Detect compilation with OpenMP support, protect agaisnt
  *    using OpenMP with AD (not supported yet). ---*/
-#if defined(_OPENMP) && !defined(CODI_REVERSE_TYPE) && !defined(CODI_FORWARD_TYPE)
+//#if defined(_OPENMP) && !defined(CODI_REVERSE_TYPE) && !defined(CODI_FORWARD_TYPE)
+#if defined(_OPENMP)
 #define HAVE_OMP
 #include <omp.h>
 
@@ -84,7 +87,9 @@ inline constexpr int omp_get_thread_num(void) {return 0;}
 #define SU2_OMP_SIMD SU2_OMP(simd)
 
 #define SU2_OMP_MASTER SU2_OMP(master)
+#define SU2_OMP_ATOMIC SU2_OMP(atomic)
 #define SU2_OMP_BARRIER SU2_OMP(barrier)
+#define SU2_OMP_CRITICAL SU2_OMP(critical)
 
 #define SU2_OMP_PARALLEL SU2_OMP(parallel)
 #define SU2_OMP_PARALLEL_(ARGS) SU2_OMP(parallel ARGS)
@@ -122,3 +127,49 @@ inline size_t computeStaticChunkSize(size_t totalWork,
   return roundUpDiv(workPerThread, chunksPerThread);
 }
 
+/*!
+ * \brief Copy data from one array-like object to another in parallel.
+ * \param[in] size - Number of elements.
+ * \param[in] src - Source array.
+ * \param[in] dst - Destination array.
+ */
+template<class T, class U>
+void parallelCopy(size_t size, const T* src, U* dst)
+{
+  SU2_OMP_FOR_STAT(4196)
+  for(size_t i=0; i<size; ++i) dst[i] = src[i];
+}
+
+/*!
+ * \brief Set the entries of an array-like object to a constant value in parallel.
+ * \param[in] size - Number of elements.
+ * \param[in] val - Value to set.
+ * \param[in] dst - Destination array.
+ */
+template<class T, class U>
+void parallelSet(size_t size, T val, U* dst)
+{
+  SU2_OMP_FOR_STAT(4196)
+  for(size_t i=0; i<size; ++i) dst[i] = val;
+}
+
+/*!
+ * \brief Atomically update a (shared) lhs value with a (local) rhs value.
+ * \note For types without atomic support (non-arithmetic) this is done via critical.
+ * \param[in] rhs - Local variable being added to the shared one.
+ * \param[in,out] lhs - Shared variable being updated.
+ */
+template<class T,
+         typename std::enable_if<!std::is_arithmetic<T>::value,bool>::type = 0>
+inline void atomicAdd(T rhs, T& lhs)
+{
+  SU2_OMP_CRITICAL
+  lhs += rhs;
+}
+template<class T,
+         typename std::enable_if<std::is_arithmetic<T>::value,bool>::type = 0>
+inline void atomicAdd(T rhs, T& lhs)
+{
+  SU2_OMP_ATOMIC
+  lhs += rhs;
+}