Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
153b6ef
Define SU2_OMP_SINGLE
maxaehle May 5, 2022
cf77f4e
Fix concurrent zero-initialization of SetBeta_Parameter::MaxVel2
maxaehle May 5, 2022
436f8cc
Fix concurrent calls to CConfig::SetGlobalParam in Preprocessing
maxaehle May 5, 2022
f3959c6
Add omp atomic/critical pragmas in CNSSolver::SetTau_Wall_WF
maxaehle May 5, 2022
6a0fb5c
omp single around SetGlobalParam for some other Preprocessing's
maxaehle May 5, 2022
4934504
Missing end macro.
jblueh May 5, 2022
a67538e
Fix race on residual in Solve_b.
jblueh May 5, 2022
afd748f
Explicit resizing of adjoint vector.
jblueh May 6, 2022
cb9234e
ResizeAdjoints() for non-AD builds.
maxaehle May 6, 2022
78ffdc0
Fix race condition in CMultiGridIntegration::MultiGrid_Cycle
maxaehle May 6, 2022
3e1fa38
Add barrier before the assignment in ompMasterAssignBarrier.
maxaehle May 6, 2022
f879d56
SU2_OMP_CRITICAL access of CVariable::Non_Physical.
maxaehle May 7, 2022
526e455
SU2_OMP_CRITICAL access of Non_Physical for incompressible Euler
maxaehle May 7, 2022
ac8ea30
OpenMP worksharing in PreprocessBC_Giles
maxaehle May 7, 2022
47dae26
Remove nowait clause.
maxaehle May 8, 2022
7d3bf0f
omp single in ComputeSurfaceAreaCfgFile
maxaehle May 8, 2022
1859c19
omp single -> omp master
maxaehle May 8, 2022
3723ede
fix
maxaehle May 8, 2022
4577b30
Undo incorrect loop splitting.
jblueh May 30, 2022
54cd32e
Safeguard Kind_TimeIntScheme.
jblueh May 30, 2022
dfe691b
Add an option for disabling preacc to tests.
jblueh May 30, 2022
8d79a2b
Consistent comment formatting.
jblueh May 30, 2022
d2c65f9
CoDiPack update.
jblueh May 30, 2022
4da85aa
OpDiLib update.
jblueh May 30, 2022
cdc87cf
Update commit hashes.
jblueh May 31, 2022
6fab68a
Fix hardcoded AD types.
jblueh Jun 2, 2022
f5f491d
OpDiLib update.
jblueh Jun 10, 2022
75aaa51
CoDiPack update.
jblueh Jun 10, 2022
d0b3138
Update test values.
jblueh Jun 14, 2022
dac909a
Explicit resizing no longer needed.
jblueh Jun 14, 2022
f397b5e
Atomics not needed.
jblueh Jun 15, 2022
f89bcfd
Merge branch 'develop' into fix_some_raceconditions
jblueh Jun 15, 2022
db77809
Use length 1 SIMD arrays for AD Types.
jblueh Jun 22, 2022
46d341a
Additional barriers.
jblueh Jun 22, 2022
e4afd06
CoDiPack update.
jblueh Jun 22, 2022
a236b5a
Update Common/src/linear_algebra/CSysSolve.cpp
jblueh Jun 28, 2022
7cffa8e
Fix leftover tsan findings in non-AD code.
jblueh Jun 29, 2022
8ed32fc
Add SU2_OMP_SAFE_GLOBAL_ACCESS variants.
jblueh Jun 29, 2022
0b021a7
Revert "Add an option for disabling preacc to tests."
jblueh Jun 29, 2022
7bd3512
Update Common/src/linear_algebra/CSysMatrix.cpp
jblueh Jun 29, 2022
7a2eb43
Further applications of SU2_OMP_SAFE_GLOBAL_ACCESS.
jblueh Jun 29, 2022
85ebbe0
Merge branch 'fix_race_conditions' of github:su2code/SU2 into fix_rac…
jblueh Jun 29, 2022
2378913
Merge branch 'develop' into fix_race_conditions
pcarruscag Jul 2, 2022
c523946
edge-based strategy for reconstructions
pcarruscag Jul 2, 2022
deda9e3
GetNode for non-contiguous edges.
jblueh Jul 6, 2022
a419f8d
Use hybrid parallel AD test values from CI pipeline.
jblueh Jul 6, 2022
a501b19
use the gather ctor of simd array
pcarruscag Jul 7, 2022
cf1a98d
pad nodes consistently
pcarruscag Jul 7, 2022
c3ac389
Update Common/include/linear_algebra/CSysVector.hpp
pcarruscag Jul 7, 2022
a68cc74
update testcases, streamwise pin converges well
pcarruscag Jul 7, 2022
9673bb9
Merge branch 'fix_race_conditions' of https://github.com/su2code/SU2 …
pcarruscag Jul 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 2 additions & 10 deletions Common/include/basic_types/ad_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,21 +559,13 @@ namespace AD{
FORCEINLINE bool PausePreaccumulation() {
const auto current = PreaccEnabled;
if (!current) return false;
SU2_OMP_BARRIER
SU2_OMP_MASTER
PreaccEnabled = false;
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
SU2_OMP_SAFE_GLOBAL_ACCESS(PreaccEnabled = false;)
return true;
}

FORCEINLINE void ResumePreaccumulation(bool wasActive) {
if (!wasActive) return;
SU2_OMP_BARRIER
SU2_OMP_MASTER
PreaccEnabled = true;
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
SU2_OMP_SAFE_GLOBAL_ACCESS(PreaccEnabled = true;)
}

FORCEINLINE void StartNoSharedReading() {
Expand Down
17 changes: 16 additions & 1 deletion Common/include/geometry/dual_grid/CEdge.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class CEdge {
using NodeArray = C2DContainer<Index, Index, StorageType::ColumnMajor, 64, DynamicSize, 2>;
NodeArray Nodes; /*!< \brief Vector to store the node indices of the edge. */
su2activematrix Normal; /*!< \brief Normal (area) of the edge. */
const Index nEdge, nEdgeSIMD;

friend class CPhysicalGeometry;

Expand Down Expand Up @@ -70,13 +71,27 @@ class CEdge {
inline unsigned long GetNode(unsigned long iEdge, unsigned long iNode) const { return Nodes(iEdge,iNode); }

/*!
* \brief SIMD version of GetNode, iNode returned for multiple contiguous iEdges
* \brief SIMD version of GetNode, iNode returned for contiguous iEdges.
*/
template<class T, size_t N>
FORCEINLINE simd::Array<T,N> GetNode(simd::Array<T,N> iEdge, unsigned long iNode) const {
return simd::Array<T,N>(&Nodes(iEdge[0],iNode));
}

/*!
* \brief Sets the tail of "Nodes" to repeat one of the last edges.
* \note This is needed when using the SIMD version of GetNode and
* the number of edges is not a multiple of the simd width.
*/
void SetPaddingNodes() {
for (auto iEdge = nEdge; iEdge < nEdgeSIMD; ++iEdge) {
/*--- Pad nodes by repeating the first edge in the last SIMD group. ---*/
const auto iEdge0 = nEdgeSIMD - simd::preferredLen<su2double>();
Nodes(iEdge, LEFT) = Nodes(iEdge0, LEFT);
Nodes(iEdge, RIGHT) = Nodes(iEdge0, RIGHT);
}
}

/*!
* \brief Set the node indices of an edge.
* \param[in] iEdge - Edge index.
Expand Down
22 changes: 8 additions & 14 deletions Common/include/linear_algebra/CSysSolve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,12 +219,11 @@ class CSysSolve {
void HandleTemporariesIn(const CSysVector<OtherType>& LinSysRes, CSysVector<OtherType>& LinSysSol) {

/*--- Set the pointers. ---*/
SU2_OMP_MASTER {
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
LinSysRes_ptr = &LinSysRes;
LinSysSol_ptr = &LinSysSol;
}
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
END_SU2_OMP_SAFE_GLOBAL_ACCESS
}

/*!
Expand All @@ -241,12 +240,11 @@ class CSysSolve {
LinSysSol_tmp.PassiveCopy(LinSysSol);

/*--- Set the pointers. ---*/
SU2_OMP_MASTER {
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
LinSysRes_ptr = &LinSysRes_tmp;
LinSysSol_ptr = &LinSysSol_tmp;
}
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
END_SU2_OMP_SAFE_GLOBAL_ACCESS
}

/*!
Expand All @@ -258,13 +256,11 @@ class CSysSolve {
void HandleTemporariesOut(CSysVector<OtherType>& LinSysSol) {

/*--- Reset the pointers. ---*/
SU2_OMP_BARRIER
SU2_OMP_MASTER {
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
LinSysRes_ptr = nullptr;
LinSysSol_ptr = nullptr;
}
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
END_SU2_OMP_SAFE_GLOBAL_ACCESS
}

/*!
Expand All @@ -279,13 +275,11 @@ class CSysSolve {
LinSysSol.PassiveCopy(LinSysSol_tmp);

/*--- Reset the pointers. ---*/
SU2_OMP_BARRIER
SU2_OMP_MASTER {
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
LinSysRes_ptr = nullptr;
LinSysSol_ptr = nullptr;
}
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
END_SU2_OMP_SAFE_GLOBAL_ACCESS
}

public:
Expand Down
4 changes: 2 additions & 2 deletions Common/include/linear_algebra/CSysSolve_b.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
#ifdef CODI_REVERSE_TYPE
template<class ScalarType>
struct CSysSolve_b {
static void Solve_b(const codi::RealReverse::Real* x, codi::RealReverse::Real* x_b, size_t m,
const codi::RealReverse::Real* y, const codi::RealReverse::Real* y_b, size_t n,
static void Solve_b(const su2double::Real* x, su2double::Real* x_b, size_t m,
const su2double::Real* y, const su2double::Real* y_b, size_t n,
codi::DataStore* d);
};
#endif
19 changes: 6 additions & 13 deletions Common/include/linear_algebra/CSysVector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
/*--- check if self-assignment, otherwise perform deep copy ---*/
if ((const void*)this == (const void*)&other) return;

SU2_OMP_MASTER
Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false);
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
SU2_OMP_SAFE_GLOBAL_ACCESS(Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false);)

CSYSVEC_PARFOR
for (auto i = 0ul; i < nElm; i++) vec_val[i] = SU2_TYPE::GetValue(other[i]);
Expand Down Expand Up @@ -297,11 +294,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
ScalarType dot(const VecExpr::CVecExpr<T, ScalarType>& expr) const {
static ScalarType dotRes;
/*--- All threads get the same "view" of the vectors and shared variable. ---*/
SU2_OMP_BARRIER
SU2_OMP_MASTER
dotRes = 0.0;
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
SU2_OMP_SAFE_GLOBAL_ACCESS(dotRes = 0.0;)

/*--- Local dot product for each thread. ---*/
ScalarType sum = 0.0;
Expand All @@ -317,16 +310,16 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>

#ifdef HAVE_MPI
/*--- Reduce across all mpi ranks, only master thread communicates. ---*/
SU2_OMP_BARRIER
SU2_OMP_MASTER {
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
sum = dotRes;
const auto mpi_type = (sizeof(ScalarType) < sizeof(double)) ? MPI_FLOAT : MPI_DOUBLE;
SelectMPIWrapper<ScalarType>::W::Allreduce(&sum, &dotRes, 1, mpi_type, MPI_SUM, SU2_MPI::GetComm());
}
END_SU2_OMP_MASTER
#endif
END_SU2_OMP_SAFE_GLOBAL_ACCESS
#else
/*--- Make view of result consistent across threads. ---*/
SU2_OMP_BARRIER
#endif

return dotRes;
}
Expand Down
19 changes: 19 additions & 0 deletions Common/include/parallelization/omp_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,25 @@ void omp_finalize();

#endif

/* The SU2_OMP_SAFE_GLOBAL_ACCESS constructs are used to safeguard code that should only be executed by the master
* thread, with all threads and memory views synchronized both beforehand and afterwards.
*/

#define BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS \
SU2_OMP_BARRIER \
SU2_OMP_MASTER

#define END_SU2_OMP_SAFE_GLOBAL_ACCESS \
END_SU2_OMP_MASTER \
SU2_OMP_BARRIER

#define SU2_OMP_SAFE_GLOBAL_ACCESS(...) \
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS \
{ \
__VA_ARGS__ \
} \
END_SU2_OMP_SAFE_GLOBAL_ACCESS

/*--- Convenience functions (e.g. to compute chunk sizes). ---*/

/*!
Expand Down
1 change: 1 addition & 0 deletions Common/include/parallelization/vectorization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ constexpr size_t PREFERRED_SIZE = 8;
*/
template<class T>
constexpr size_t preferredLen() { return PREFERRED_SIZE / sizeof(T); }

template<>
constexpr size_t preferredLen<su2double>() { return PREFERRED_SIZE / sizeof(passivedouble); }

Expand Down
81 changes: 36 additions & 45 deletions Common/src/geometry/CGeometry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,7 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) {

if (countPerPoint <= maxCountPerPoint) return;

SU2_OMP_BARRIER
SU2_OMP_MASTER {
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {

/*--- Store the larger packet size to the class data. ---*/

Expand All @@ -379,8 +378,7 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) {
bufS_P2PRecv = new unsigned short[maxCountPerPoint*nPoint_P2PRecv[nP2PRecv]] ();

}
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
END_SU2_OMP_SAFE_GLOBAL_ACCESS

}

Expand Down Expand Up @@ -763,10 +761,7 @@ void CGeometry::CompleteComms(CGeometry *geometry,
/*--- For efficiency, recv the messages dynamically based on
the order they arrive. ---*/

SU2_OMP_MASTER
SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status);
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status);)

/*--- Once we have recv'd a message, get the source rank. ---*/

Expand Down Expand Up @@ -831,12 +826,8 @@ void CGeometry::CompleteComms(CGeometry *geometry,
data in the loop above at this point. ---*/

#ifdef HAVE_MPI
SU2_OMP_MASTER
SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE);
END_SU2_OMP_MASTER
SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE);)
#endif
SU2_OMP_BARRIER

}

void CGeometry::PreprocessPeriodicComms(CGeometry *geometry,
Expand Down Expand Up @@ -1186,8 +1177,7 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) {

if (countPerPeriodicPoint <= maxCountPerPeriodicPoint) return;

SU2_OMP_BARRIER
SU2_OMP_MASTER {
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {

/*--- Store the larger packet size to the class data. ---*/

Expand All @@ -1213,8 +1203,7 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) {
bufS_PeriodicRecv = new unsigned short[nRecv] ();

}
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
END_SU2_OMP_SAFE_GLOBAL_ACCESS
}

void CGeometry::PostPeriodicRecvs(CGeometry *geometry,
Expand Down Expand Up @@ -1409,6 +1398,7 @@ void CGeometry::SetEdges(void) {
}
}
}
edges->SetPaddingNodes();
}

void CGeometry::SetFaces(void) {
Expand Down Expand Up @@ -2506,38 +2496,41 @@ void CGeometry::UpdateCustomBoundaryConditions(CGeometry **geometry_container, C
}

void CGeometry::ComputeSurfaceAreaCfgFile(const CConfig *config) {
const auto nMarker_Global = config->GetnMarker_CfgFile();
SurfaceAreaCfgFile.resize(nMarker_Global);
vector<su2double> LocalSurfaceArea(nMarker_Global, 0.0);
SU2_OMP_MASTER
{
const auto nMarker_Global = config->GetnMarker_CfgFile();
SurfaceAreaCfgFile.resize(nMarker_Global);
vector<su2double> LocalSurfaceArea(nMarker_Global, 0.0);

/*--- Loop over all local markers ---*/
for (unsigned short iMarker = 0; iMarker < nMarker; iMarker++) {
/*--- Loop over all local markers ---*/
for (unsigned short iMarker = 0; iMarker < nMarker; iMarker++) {

const auto Local_TagBound = config->GetMarker_All_TagBound(iMarker);
const auto Local_TagBound = config->GetMarker_All_TagBound(iMarker);

/*--- Loop over all global markers, and find the local-global pair via
matching unique string tags. ---*/
for (unsigned short iMarker_Global = 0; iMarker_Global < nMarker_Global; iMarker_Global++) {
/*--- Loop over all global markers, and find the local-global pair via
matching unique string tags. ---*/
for (unsigned short iMarker_Global = 0; iMarker_Global < nMarker_Global; iMarker_Global++) {

const auto Global_TagBound = config->GetMarker_CfgFile_TagBound(iMarker_Global);
if (Local_TagBound == Global_TagBound) {
const auto Global_TagBound = config->GetMarker_CfgFile_TagBound(iMarker_Global);
if (Local_TagBound == Global_TagBound) {

for(auto iVertex = 0ul; iVertex < nVertex[iMarker]; iVertex++ ) {
for(auto iVertex = 0ul; iVertex < nVertex[iMarker]; iVertex++ ) {

const auto iPoint = vertex[iMarker][iVertex]->GetNode();
const auto iPoint = vertex[iMarker][iVertex]->GetNode();

if(!nodes->GetDomain(iPoint)) continue;
if(!nodes->GetDomain(iPoint)) continue;

const auto AreaNormal = vertex[iMarker][iVertex]->GetNormal();
const auto Area = GeometryToolbox::Norm(nDim, AreaNormal);
const auto AreaNormal = vertex[iMarker][iVertex]->GetNormal();
const auto Area = GeometryToolbox::Norm(nDim, AreaNormal);

LocalSurfaceArea[iMarker_Global] += Area;
}// for iVertex
}//if Local == Global
}//for iMarker_Global
}//for iMarker
LocalSurfaceArea[iMarker_Global] += Area;
}// for iVertex
}//if Local == Global
}//for iMarker_Global
}//for iMarker

SU2_MPI::Allreduce(LocalSurfaceArea.data(), SurfaceAreaCfgFile.data(), SurfaceAreaCfgFile.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
SU2_MPI::Allreduce(LocalSurfaceArea.data(), SurfaceAreaCfgFile.data(), SurfaceAreaCfgFile.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
} END_SU2_OMP_MASTER
}

su2double CGeometry::GetSurfaceArea(const CConfig *config, unsigned short val_marker) const {
Expand Down Expand Up @@ -3133,7 +3126,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
END_SU2_OMP_FOR

/*--- Share with all processors ---*/
SU2_OMP_MASTER
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
{
su2double* dbl_buffer = new su2double [Global_nElemDomain*nDim];
SU2_MPI::Allreduce(cg_elem,dbl_buffer,Global_nElemDomain*nDim,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
Expand All @@ -3147,8 +3140,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
MPI_Allreduce(halo_detect.data(),char_buffer.data(),Global_nElemDomain,MPI_CHAR,MPI_SUM,SU2_MPI::GetComm());
halo_detect.swap(char_buffer);
}
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
END_SU2_OMP_SAFE_GLOBAL_ACCESS

SU2_OMP_FOR_STAT(256)
for(auto iElem=0ul; iElem<Global_nElemDomain; ++iElem) {
Expand Down Expand Up @@ -3187,14 +3179,13 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,

#ifdef HAVE_MPI
/*--- Share with all processors ---*/
SU2_OMP_MASTER
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS
{
su2double *buffer = new su2double [Global_nElemDomain];
SU2_MPI::Allreduce(work_values,buffer,Global_nElemDomain,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
swap(buffer, work_values); delete [] buffer;
}
END_SU2_OMP_MASTER
SU2_OMP_BARRIER
END_SU2_OMP_SAFE_GLOBAL_ACCESS

/*--- Account for duplication ---*/
SU2_OMP_FOR_STAT(256)
Expand Down
Loading