Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
b1f667a
Merge branch 'small_blas_docs_update' into feature_hybrid_parallel_an…
pcarruscag Feb 27, 2020
3611c10
Merge remote-tracking branch 'upstream/develop' into feature_hybrid_p…
pcarruscag Feb 28, 2020
e428ef0
Merge branch 'small_blas_docs_update' into feature_hybrid_parallel_an…
pcarruscag Feb 28, 2020
66f4ac5
Merge remote-tracking branch 'upstream/develop' into feature_hybrid_p…
pcarruscag Feb 28, 2020
148d948
prevent restarted FGMRES from going into infinite loop when RHS is zero
pcarruscag Mar 2, 2020
4dddf6d
fix for "old compiler" compatibility and legacy build system
pcarruscag Mar 2, 2020
a4c1cf7
Merge remote-tracking branch 'upstream/develop' into feature_hybrid_p…
pcarruscag Mar 2, 2020
9344f88
update old build system
pcarruscag Mar 2, 2020
f4e8185
unnecessary initialization of stiffness matrix in CMeshSolver
pcarruscag Mar 2, 2020
101b52d
fix OpenMP bug in SetMesh_Stiffness, allow upper bound on element sti…
pcarruscag Mar 2, 2020
aba265f
potential fix for potential cause of observed deadlock
pcarruscag Mar 3, 2020
998777d
add dummy locks and functions to omp_structure
pcarruscag Mar 4, 2020
7e25a93
bad coloring fallback strategy for CFEASolver
pcarruscag Mar 4, 2020
32ef194
use the "reducer strategy" as a fallback for when grid coloring is ba…
pcarruscag Mar 4, 2020
14ed268
build diagonal and transpose map in parallel
pcarruscag Mar 4, 2020
9cd52b6
Merge remote-tracking branch 'upstream/feature_update_elasticity_outp…
pcarruscag Mar 4, 2020
d7bb841
use a more expressive function to round up to next multiple
pcarruscag Mar 4, 2020
2fc6f30
use reducer strategy for turbulence solvers
pcarruscag Mar 4, 2020
c88ea7a
polish up the reducer strategy, avoid unnecessary resets of CSysMatrix
pcarruscag Mar 4, 2020
12c12b1
Merge remote-tracking branch 'upstream/develop' into feature_hybrid_p…
pcarruscag Mar 4, 2020
bcf6af4
small regression changes
pcarruscag Mar 4, 2020
19f2fe5
write Undivided Laplacian as point loop
pcarruscag Mar 4, 2020
13e9572
write Centered_Dissipation_Sensor as a point loop
pcarruscag Mar 4, 2020
733aef1
allow forcing of "reducer strategy" without warnings, fix some indent…
pcarruscag Mar 4, 2020
debb952
update fixedcl regression
pcarruscag Mar 5, 2020
b406fa3
methods to set natural colorings
pcarruscag Mar 5, 2020
fb500f2
fix FEA solver lock strategy
pcarruscag Mar 5, 2020
db8d771
make overhead of reducer strategy due to bad coloring same as when co…
pcarruscag Mar 5, 2020
f4dd41a
fuse "JST dissipation" loops, cleanup Euler/NS preprocessing
pcarruscag Mar 5, 2020
775980e
fix virtual bug in SetPrimitiveVariables
pcarruscag Mar 5, 2020
45e3cbc
fix some indentation in CDriver
pcarruscag Mar 5, 2020
82e2d6d
Merge remote-tracking branch 'upstream/develop' into feature_hybrid_p…
pcarruscag Mar 11, 2020
7fb78f8
add hybrid options to config_template
pcarruscag Mar 11, 2020
2aaf89f
convert moving mesh part of turb solver's dual time residual to point…
pcarruscag Mar 11, 2020
fea625e
revise logic for flow variable reconstruction with MUSCL turbulence
pcarruscag Mar 24, 2020
cdf6b2c
Merge remote-tracking branch 'upstream/develop' into hybrid_parallel_…
pcarruscag Mar 25, 2020
b6361f1
update some testcases
pcarruscag Mar 25, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions Common/include/geometry/CGeometry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1621,7 +1621,7 @@ class CGeometry {
* \param[in] fillLvl - Level of fill of the pattern.
* \return Reference to the sparse pattern.
*/
const CCompressedSparsePatternUL& GetSparsePattern(ConnectivityType type, unsigned long fillLvl);
const CCompressedSparsePatternUL& GetSparsePattern(ConnectivityType type, unsigned long fillLvl = 0);

/*!
* \brief Get the edge to sparse pattern map.
Expand All @@ -1630,12 +1630,25 @@ class CGeometry {
*/
const CEdgeToNonZeroMapUL& GetEdgeToSparsePatternMap(void);

/*!
* \brief Get the transpose of the (main, i.e 0 fill) sparse pattern (e.g. CSR becomes CSC).
* \param[in] type - Finite volume or finite element.
* \return Reference to the map.
*/
const su2vector<unsigned long>& GetTransposeSparsePatternMap(ConnectivityType type);

/*!
* \brief Get the edge coloring.
* \note This method computes the coloring if that has not been done yet.
* \param[out] efficiency - optional output of the coloring efficiency.
* \return Reference to the coloring.
*/
const CCompressedSparsePatternUL& GetEdgeColoring(void);
const CCompressedSparsePatternUL& GetEdgeColoring(su2double* efficiency = nullptr);

/*!
* \brief Force the natural (sequential) edge coloring.
*/
void SetNaturalEdgeColoring();

/*!
* \brief Get the group size used in edge coloring.
Expand All @@ -1646,9 +1659,15 @@ class CGeometry {
/*!
* \brief Get the element coloring.
* \note This method computes the coloring if that has not been done yet.
* \param[out] efficiency - optional output of the coloring efficiency.
* \return Reference to the coloring.
*/
const CCompressedSparsePatternUL& GetElementColoring(void);
const CCompressedSparsePatternUL& GetElementColoring(su2double* efficiency = nullptr);

/*!
* \brief Force the natural (sequential) element coloring.
*/
void SetNaturalElementColoring();

/*!
* \brief Get the group size used in element coloring.
Expand Down
22 changes: 17 additions & 5 deletions Common/include/linear_algebra/CSysMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class CSysMatrix {
const unsigned long *row_ptr; /*!< \brief Pointers to the first element in each row. */
const unsigned long *dia_ptr; /*!< \brief Pointers to the diagonal element in each row. */
const unsigned long *col_ind; /*!< \brief Column index for each of the elements in val(). */
vector<const ScalarType*> col_ptr;/*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */
const unsigned long *col_ptr; /*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */

ScalarType *ILU_matrix; /*!< \brief Entries of the ILU sparse matrix. */
unsigned long nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */
Expand Down Expand Up @@ -349,10 +349,12 @@ class CSysMatrix {
* \param[in] neqn - Number of equations.
* \param[in] geometry - Geometrical definition of the problem.
* \param[in] config - Definition of the particular problem.
* \param[in] needTranspPtr - If "col_ptr" should be created.
*/
void Initialize(unsigned long npoint, unsigned long npointdomain,
unsigned short nvar, unsigned short neqn,
bool EdgeConnect, CGeometry *geometry, CConfig *config);
bool EdgeConnect, CGeometry *geometry,
CConfig *config, bool needTranspPtr = false);

/*!
* \brief Sets to zero all the entries of the sparse matrix.
Expand Down Expand Up @@ -584,11 +586,13 @@ class CSysMatrix {
* \brief Update 2 blocks ij and ji (add to i* sub from j*).
* \note The template parameter Sign, can be used create a "subtractive"
* update i.e. subtract from row i and add to row j instead.
* The parameter Overwrite allows completely writing over the
* current values held by the matrix.
* \param[in] edge - Index of edge that connects iPoint and jPoint.
* \param[in] block_i - Subs from ji.
* \param[in] block_j - Adds to ij.
*/
template<class OtherType, int Sign = 1>
template<class OtherType, int Sign = 1, bool Overwrite = false>
inline void UpdateBlocks(unsigned long iEdge, const OtherType* const* block_i, const OtherType* const* block_j) {

ScalarType *bij = &matrix[edge_ptr(iEdge,0)*nVar*nEqn];
Expand All @@ -598,8 +602,8 @@ class CSysMatrix {

for (iVar = 0; iVar < nVar; iVar++) {
for (jVar = 0; jVar < nEqn; jVar++) {
bij[offset] += PassiveAssign<ScalarType,OtherType>(block_j[iVar][jVar]) * Sign;
bji[offset] -= PassiveAssign<ScalarType,OtherType>(block_i[iVar][jVar]) * Sign;
bij[offset] = (Overwrite? ScalarType(0) : bij[offset]) + PassiveAssign<ScalarType,OtherType>(block_j[iVar][jVar]) * Sign;
bji[offset] = (Overwrite? ScalarType(0) : bji[offset]) - PassiveAssign<ScalarType,OtherType>(block_i[iVar][jVar]) * Sign;
++offset;
}
}
Expand All @@ -613,6 +617,14 @@ class CSysMatrix {
UpdateBlocks<OtherType,-1>(iEdge, block_i, block_j);
}

/*!
* \brief Short-hand for the "additive overwrite" version of UpdateBlocks.
*/
template<class OtherType>
inline void SetBlocks(unsigned long iEdge, const OtherType* const* block_i, const OtherType* const* block_j) {
UpdateBlocks<OtherType,1,true>(iEdge, block_i, block_j);
}

/*!
* \brief Adds the specified block to the (i, i) subblock of the matrix-by-blocks structure.
* \param[in] block_i - Diagonal index.
Expand Down
21 changes: 21 additions & 0 deletions Common/include/omp_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,19 @@ inline void omp_set_num_threads(int) { }
*/
inline constexpr int omp_get_thread_num(void) {return 0;}

/*!
* \brief Dummy lock type and associated functions.
*/
struct omp_lock_t {};
struct DummyVectorOfLocks {
omp_lock_t l;
inline omp_lock_t& operator[](int) {return l;}
};
inline void omp_init_lock(omp_lock_t*){}
inline void omp_set_lock(omp_lock_t*){}
inline void omp_unset_lock(omp_lock_t*){}
inline void omp_destroy_lock(omp_lock_t*){}
Comment on lines +83 to +94
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As usual we define "do-nothing" types and functions when compiling without hybrid parallel support to make compilation compatible without having to throw #ifdefs everywhere.


#endif

/*--- Convenience macros (do not use excessive nesting of macros). ---*/
Expand Down Expand Up @@ -108,6 +121,14 @@ inline constexpr size_t roundUpDiv(size_t numerator, size_t denominator)
return (numerator+denominator-1)/denominator;
}

/*!
* \brief Round up to next multiple.
*/
inline constexpr size_t nextMultiple(size_t argument, size_t multiple)
{
return roundUpDiv(argument, multiple) * multiple;
}

/*!
* \brief Compute a chunk size based on totalWork and number of threads such that
* all threads get the same number of chunks (with limited size).
Expand Down
2 changes: 2 additions & 0 deletions Common/include/option_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ const int SU2_CONN_SIZE = 10; /*!< \brief Size of the connectivity array that
that we read from a mesh file in the format [[globalID vtkType n0 n1 n2 n3 n4 n5 n6 n7 n8]. */
const int SU2_CONN_SKIP = 2; /*!< \brief Offset to skip the globalID and VTK type at the start of the element connectivity list for each CGNS element. */

const su2double COLORING_EFF_THRESH = 0.875; /*!< \brief Below this value fallback strategies are used instead. */

/*!
* \brief Boolean answers
*/
Expand Down
10 changes: 0 additions & 10 deletions Common/include/toolboxes/C1DInterpolation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,6 @@ class CAkimaInterpolation final: public C1DInterpolation{
SetSpline(X,Data);
}

/*!
* \brief Destructor of the CAkimaInterpolation class.
*/
~CAkimaInterpolation(){}

Comment on lines -87 to -91
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was removed for old compiler compatibility.

/*!
* \brief for setting the cofficients for the Akima spline.
* \param[in] X - the x values.
Expand Down Expand Up @@ -119,11 +114,6 @@ class CLinearInterpolation final: public C1DInterpolation{
SetSpline(X,Data);
}

/*!
* \brief Destructor of the CInletInterpolation class.
*/
~CLinearInterpolation(){}

/*!
* \brief for setting the cofficients for Linear 'spline'.
* \param[in] X - the x values.
Expand Down
108 changes: 82 additions & 26 deletions Common/include/toolboxes/graph_toolbox.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#pragma once

#include "C2DContainer.hpp"
#include "../omp_structure.hpp"

#include <set>
#include <vector>
Expand Down Expand Up @@ -59,6 +60,7 @@ class CCompressedSparsePattern {
su2vector<Index_t> m_outerPtr; /*!< \brief Start positions of the inner indices for each outer index. */
su2vector<Index_t> m_innerIdx; /*!< \brief Inner indices of the non zero entries. */
su2vector<Index_t> m_diagPtr; /*!< \brief Position of the diagonal entry. */
su2vector<Index_t> m_innerIdxTransp; /*!< \brief Position of the transpose non zero entries, requires symmetry. */

public:
using IndexType = Index_t;
Expand Down Expand Up @@ -107,10 +109,30 @@ class CCompressedSparsePattern {
if(!m_diagPtr.empty()) return;

m_diagPtr.resize(getOuterSize());

SU2_OMP_PARALLEL_(for schedule(static,roundUpDiv(getOuterSize(),omp_get_max_threads())))
for(Index_t k = 0; k < getOuterSize(); ++k)
m_diagPtr(k) = findInnerIdx(k,k);
}

/*!
* \brief Build a list of pointers to the transpose entries of the pattern, requires symmetry.
*/
void buildTransposePtr() {
if(!m_innerIdxTransp.empty()) return;

m_innerIdxTransp.resize(getNumNonZeros());

SU2_OMP_PARALLEL_(for schedule(static,roundUpDiv(getOuterSize(),omp_get_max_threads())))
for(Index_t i = 0; i < getOuterSize(); ++i) {
for(Index_t k = m_outerPtr(i); k < m_outerPtr(i+1); ++k) {
auto j = m_innerIdx(k);
m_innerIdxTransp(k) = findInnerIdx(j,i);
assert(m_innerIdxTransp(k) != m_innerIdx.size() && "The pattern is not symmetric.");
}
}
}

/*!
* \return True if the pattern is empty, i.e. has not been built yet.
*/
Expand Down Expand Up @@ -224,6 +246,14 @@ class CCompressedSparsePattern {
return m_diagPtr.data();
}

/*!
* \return Raw pointer to the transpose pointer vector.
*/
inline const su2vector<Index_t>& transposePtr() const {
assert(!m_innerIdxTransp.empty() && "Transpose map has not been built.");
return m_innerIdxTransp;
}

/*!
* \return The minimum inner index.
*/
Expand Down Expand Up @@ -384,6 +414,30 @@ CEdgeToNonZeroMap<Index_t> mapEdgesToSparsePattern(Geometry_t& geometry,
}


/*!
* \brief Create the natural coloring (equivalent to the normal sequential loop
* order) for a given number of inner indexes.
* \note This is to reduce overhead in "OpenMP-ready" code when only 1 thread is used.
* \param[in] numInnerIndexes - Number of indexes that are to be colored.
* \return Natural (sequential) coloring of the inner indices.
*/
template<class T = CCompressedSparsePatternUL,
class Index_t = typename T::IndexType>
T createNaturalColoring(Index_t numInnerIndexes)
{
/*--- One color. ---*/
su2vector<Index_t> outerPtr(2);
outerPtr(0) = 0;
outerPtr(1) = numInnerIndexes;

/*--- Containing all indexes in ascending order. ---*/
su2vector<Index_t> innerIdx(numInnerIndexes);
std::iota(innerIdx.data(), innerIdx.data()+numInnerIndexes, 0);

return T(std::move(outerPtr), std::move(innerIdx));
}


/*!
* \brief Color contiguous groups of outer indices of a sparse pattern such that
* within each color, any two groups do not have inner indices in common.
Expand All @@ -404,7 +458,7 @@ CEdgeToNonZeroMap<Index_t> mapEdgesToSparsePattern(Geometry_t& geometry,
* \param[out] indexColor - Optional, vector with colors given to the outer indices.
* \return Coloring in the same type of the input pattern.
*/
template<class T, typename Color_t = char, size_t MaxColors = 64, size_t MaxMB = 128>
template<class T, typename Color_t = char, size_t MaxColors = 32, size_t MaxMB = 128>
T colorSparsePattern(const T& pattern, size_t groupSize = 1, bool balanceColors = false,
std::vector<Color_t>* indexColor = nullptr)
{
Expand All @@ -415,6 +469,10 @@ T colorSparsePattern(const T& pattern, size_t groupSize = 1, bool balanceColors

const Index_t grpSz = groupSize;
const Index_t nOuter = pattern.getOuterSize();

/*--- Trivial case. ---*/
if(groupSize >= nOuter) return createNaturalColoring(nOuter);

const Index_t minIdx = pattern.getMinInnerIdx();
const Index_t nInner = pattern.getMaxInnerIdx()+1-minIdx;

Expand Down Expand Up @@ -520,30 +578,6 @@ T colorSparsePattern(const T& pattern, size_t groupSize = 1, bool balanceColors
}


/*!
* \brief Create the natural coloring (equivalent to the normal sequential loop
* order) for a given number of inner indexes.
* \note This is to reduce overhead in "OpenMP-ready" code when only 1 thread is used.
* \param[in] numInnerIndexes - Number of indexes that are to be colored.
* \return Natural (sequential) coloring of the inner indices.
*/
template<class T = CCompressedSparsePatternUL,
class Index_t = typename T::IndexType>
T createNaturalColoring(Index_t numInnerIndexes)
{
/*--- One color. ---*/
su2vector<Index_t> outerPtr(2);
outerPtr(0) = 0;
outerPtr(1) = numInnerIndexes;

/*--- Containing all indexes in ascending order. ---*/
su2vector<Index_t> innerIdx(numInnerIndexes);
std::iota(innerIdx.data(), innerIdx.data()+numInnerIndexes, 0);

return T(std::move(outerPtr), std::move(innerIdx));
}


/*!
* \brief A way to represent one grid color that allows range-for syntax.
*/
Expand All @@ -553,9 +587,11 @@ struct GridColor
static_assert(std::is_integral<T>::value,"");

const T size;
T groupSize;
const T* const indices;

GridColor(const T* idx = nullptr, T sz = 0) : size(sz), indices(idx) { }
GridColor(const T* idx = nullptr, T sz = 0, T grp = 0) :
size(sz), groupSize(grp), indices(idx) { }

inline const T* begin() const {return indices;}
inline const T* end() const {return indices+size;}
Expand Down Expand Up @@ -592,3 +628,23 @@ struct DummyGridColor
inline IteratorLikeInt begin() const {return IteratorLikeInt(0);}
inline IteratorLikeInt end() const {return IteratorLikeInt(size);}
};


/*!
* \brief Computes the efficiency of a grid coloring for given number of threads and chunk size.
*/
template<class SparsePattern>
su2double coloringEfficiency(const SparsePattern& coloring, int numThreads, int chunkSize)
{
using Index_t = typename SparsePattern::IndexType;

/*--- Ideally compute time is proportional to total work over number of threads. ---*/
su2double ideal = coloring.getNumNonZeros() / su2double(numThreads);

/*--- In practice the total work is quantized first by colors and then by chunks. ---*/
Index_t real = 0;
for(Index_t color = 0; color < coloring.getOuterSize(); ++color)
real += chunkSize * roundUpDiv(roundUpDiv(coloring.getNumNonZeros(color), chunkSize), numThreads);
Comment on lines +641 to +647
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The computation of coloring efficiency is described here, it is just a simple heuristic.


return ideal / real;
}
Loading