diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index 3a8b23b1d43c..0c8658e97360 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -559,21 +559,13 @@ namespace AD{ FORCEINLINE bool PausePreaccumulation() { const auto current = PreaccEnabled; if (!current) return false; - SU2_OMP_BARRIER - SU2_OMP_MASTER - PreaccEnabled = false; - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(PreaccEnabled = false;) return true; } FORCEINLINE void ResumePreaccumulation(bool wasActive) { if (!wasActive) return; - SU2_OMP_BARRIER - SU2_OMP_MASTER - PreaccEnabled = true; - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(PreaccEnabled = true;) } FORCEINLINE void StartNoSharedReading() { diff --git a/Common/include/geometry/dual_grid/CEdge.hpp b/Common/include/geometry/dual_grid/CEdge.hpp index 57016ea822f7..4f50967d3f4a 100644 --- a/Common/include/geometry/dual_grid/CEdge.hpp +++ b/Common/include/geometry/dual_grid/CEdge.hpp @@ -43,6 +43,7 @@ class CEdge { using NodeArray = C2DContainer; NodeArray Nodes; /*!< \brief Vector to store the node indices of the edge. */ su2activematrix Normal; /*!< \brief Normal (area) of the edge. */ + const Index nEdge, nEdgeSIMD; friend class CPhysicalGeometry; @@ -70,13 +71,27 @@ class CEdge { inline unsigned long GetNode(unsigned long iEdge, unsigned long iNode) const { return Nodes(iEdge,iNode); } /*! - * \brief SIMD version of GetNode, iNode returned for multiple contiguous iEdges + * \brief SIMD version of GetNode, iNode returned for contiguous iEdges. */ template FORCEINLINE simd::Array GetNode(simd::Array iEdge, unsigned long iNode) const { return simd::Array(&Nodes(iEdge[0],iNode)); } + /*! + * \brief Sets the tail of "Nodes" to repeat one of the last edges. + * \note This is needed when using the SIMD version of GetNode and + * the number of edges is not a multiple of the simd width. + */ + void SetPaddingNodes() { + for (auto iEdge = nEdge; iEdge < nEdgeSIMD; ++iEdge) { + /*--- Pad nodes by repeating the first edge in the last SIMD group. ---*/ + const auto iEdge0 = nEdgeSIMD - simd::preferredLen(); + Nodes(iEdge, LEFT) = Nodes(iEdge0, LEFT); + Nodes(iEdge, RIGHT) = Nodes(iEdge0, RIGHT); + } + } + /*! * \brief Set the node indices of an edge. * \param[in] iEdge - Edge index. diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp index 8e8139d31cfc..6c3b81eae3df 100644 --- a/Common/include/linear_algebra/CSysSolve.hpp +++ b/Common/include/linear_algebra/CSysSolve.hpp @@ -219,12 +219,11 @@ class CSysSolve { void HandleTemporariesIn(const CSysVector& LinSysRes, CSysVector& LinSysSol) { /*--- Set the pointers. ---*/ - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { LinSysRes_ptr = &LinSysRes; LinSysSol_ptr = &LinSysSol; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } /*! @@ -241,12 +240,11 @@ class CSysSolve { LinSysSol_tmp.PassiveCopy(LinSysSol); /*--- Set the pointers. ---*/ - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { LinSysRes_ptr = &LinSysRes_tmp; LinSysSol_ptr = &LinSysSol_tmp; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } /*! @@ -258,13 +256,11 @@ class CSysSolve { void HandleTemporariesOut(CSysVector& LinSysSol) { /*--- Reset the pointers. ---*/ - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { LinSysRes_ptr = nullptr; LinSysSol_ptr = nullptr; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } /*! @@ -279,13 +275,11 @@ class CSysSolve { LinSysSol.PassiveCopy(LinSysSol_tmp); /*--- Reset the pointers. ---*/ - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { LinSysRes_ptr = nullptr; LinSysSol_ptr = nullptr; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } public: diff --git a/Common/include/linear_algebra/CSysSolve_b.hpp b/Common/include/linear_algebra/CSysSolve_b.hpp index 2b0f8ee00de2..ac90c9fe362d 100644 --- a/Common/include/linear_algebra/CSysSolve_b.hpp +++ b/Common/include/linear_algebra/CSysSolve_b.hpp @@ -32,8 +32,8 @@ #ifdef CODI_REVERSE_TYPE template struct CSysSolve_b { - static void Solve_b(const codi::RealReverse::Real* x, codi::RealReverse::Real* x_b, size_t m, - const codi::RealReverse::Real* y, const codi::RealReverse::Real* y_b, size_t n, + static void Solve_b(const su2double::Real* x, su2double::Real* x_b, size_t m, + const su2double::Real* y, const su2double::Real* y_b, size_t n, codi::DataStore* d); }; #endif diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp index 216bb2fabfa2..d8832b12f25b 100644 --- a/Common/include/linear_algebra/CSysVector.hpp +++ b/Common/include/linear_algebra/CSysVector.hpp @@ -186,10 +186,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> /*--- check if self-assignment, otherwise perform deep copy ---*/ if ((const void*)this == (const void*)&other) return; - SU2_OMP_MASTER - Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false);) CSYSVEC_PARFOR for (auto i = 0ul; i < nElm; i++) vec_val[i] = SU2_TYPE::GetValue(other[i]); @@ -297,11 +294,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> ScalarType dot(const VecExpr::CVecExpr& expr) const { static ScalarType dotRes; /*--- All threads get the same "view" of the vectors and shared variable. ---*/ - SU2_OMP_BARRIER - SU2_OMP_MASTER - dotRes = 0.0; - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(dotRes = 0.0;) /*--- Local dot product for each thread. ---*/ ScalarType sum = 0.0; @@ -317,16 +310,16 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> #ifdef HAVE_MPI /*--- Reduce across all mpi ranks, only master thread communicates. ---*/ - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { sum = dotRes; const auto mpi_type = (sizeof(ScalarType) < sizeof(double)) ? MPI_FLOAT : MPI_DOUBLE; SelectMPIWrapper::W::Allreduce(&sum, &dotRes, 1, mpi_type, MPI_SUM, SU2_MPI::GetComm()); } - END_SU2_OMP_MASTER -#endif + END_SU2_OMP_SAFE_GLOBAL_ACCESS +#else /*--- Make view of result consistent across threads. ---*/ SU2_OMP_BARRIER +#endif return dotRes; } diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp index 37b68f673b4b..e32d5e2484e6 100644 --- a/Common/include/parallelization/omp_structure.hpp +++ b/Common/include/parallelization/omp_structure.hpp @@ -185,6 +185,25 @@ void omp_finalize(); #endif +/* The SU2_OMP_SAFE_GLOBAL_ACCESS constructs are used to safeguard code that should only be executed by the master + * thread, with all threads and memory views synchronized both beforehand and afterwards. + */ + +#define BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS \ + SU2_OMP_BARRIER \ + SU2_OMP_MASTER + +#define END_SU2_OMP_SAFE_GLOBAL_ACCESS \ + END_SU2_OMP_MASTER \ + SU2_OMP_BARRIER + +#define SU2_OMP_SAFE_GLOBAL_ACCESS(...) \ + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS \ + { \ + __VA_ARGS__ \ + } \ + END_SU2_OMP_SAFE_GLOBAL_ACCESS + /*--- Convenience functions (e.g. to compute chunk sizes). ---*/ /*! diff --git a/Common/include/parallelization/vectorization.hpp b/Common/include/parallelization/vectorization.hpp index 00468baf9259..5cce7d47edf3 100644 --- a/Common/include/parallelization/vectorization.hpp +++ b/Common/include/parallelization/vectorization.hpp @@ -56,6 +56,7 @@ constexpr size_t PREFERRED_SIZE = 8; */ template constexpr size_t preferredLen() { return PREFERRED_SIZE / sizeof(T); } + template<> constexpr size_t preferredLen() { return PREFERRED_SIZE / sizeof(passivedouble); } diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp index 6eb2ff4fe6de..f61cab295d73 100644 --- a/Common/src/geometry/CGeometry.cpp +++ b/Common/src/geometry/CGeometry.cpp @@ -357,8 +357,7 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) { if (countPerPoint <= maxCountPerPoint) return; - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Store the larger packet size to the class data. ---*/ @@ -379,8 +378,7 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) { bufS_P2PRecv = new unsigned short[maxCountPerPoint*nPoint_P2PRecv[nP2PRecv]] (); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } @@ -763,10 +761,7 @@ void CGeometry::CompleteComms(CGeometry *geometry, /*--- For efficiency, recv the messages dynamically based on the order they arrive. ---*/ - SU2_OMP_MASTER - SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status);) /*--- Once we have recv'd a message, get the source rank. ---*/ @@ -831,12 +826,8 @@ void CGeometry::CompleteComms(CGeometry *geometry, data in the loop above at this point. ---*/ #ifdef HAVE_MPI - SU2_OMP_MASTER - SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE); - END_SU2_OMP_MASTER + SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE);) #endif - SU2_OMP_BARRIER - } void CGeometry::PreprocessPeriodicComms(CGeometry *geometry, @@ -1186,8 +1177,7 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) { if (countPerPeriodicPoint <= maxCountPerPeriodicPoint) return; - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Store the larger packet size to the class data. ---*/ @@ -1213,8 +1203,7 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) { bufS_PeriodicRecv = new unsigned short[nRecv] (); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CGeometry::PostPeriodicRecvs(CGeometry *geometry, @@ -1409,6 +1398,7 @@ void CGeometry::SetEdges(void) { } } } + edges->SetPaddingNodes(); } void CGeometry::SetFaces(void) { @@ -2506,38 +2496,41 @@ void CGeometry::UpdateCustomBoundaryConditions(CGeometry **geometry_container, C } void CGeometry::ComputeSurfaceAreaCfgFile(const CConfig *config) { - const auto nMarker_Global = config->GetnMarker_CfgFile(); - SurfaceAreaCfgFile.resize(nMarker_Global); - vector LocalSurfaceArea(nMarker_Global, 0.0); + SU2_OMP_MASTER + { + const auto nMarker_Global = config->GetnMarker_CfgFile(); + SurfaceAreaCfgFile.resize(nMarker_Global); + vector LocalSurfaceArea(nMarker_Global, 0.0); - /*--- Loop over all local markers ---*/ - for (unsigned short iMarker = 0; iMarker < nMarker; iMarker++) { + /*--- Loop over all local markers ---*/ + for (unsigned short iMarker = 0; iMarker < nMarker; iMarker++) { - const auto Local_TagBound = config->GetMarker_All_TagBound(iMarker); + const auto Local_TagBound = config->GetMarker_All_TagBound(iMarker); - /*--- Loop over all global markers, and find the local-global pair via - matching unique string tags. ---*/ - for (unsigned short iMarker_Global = 0; iMarker_Global < nMarker_Global; iMarker_Global++) { + /*--- Loop over all global markers, and find the local-global pair via + matching unique string tags. ---*/ + for (unsigned short iMarker_Global = 0; iMarker_Global < nMarker_Global; iMarker_Global++) { - const auto Global_TagBound = config->GetMarker_CfgFile_TagBound(iMarker_Global); - if (Local_TagBound == Global_TagBound) { + const auto Global_TagBound = config->GetMarker_CfgFile_TagBound(iMarker_Global); + if (Local_TagBound == Global_TagBound) { - for(auto iVertex = 0ul; iVertex < nVertex[iMarker]; iVertex++ ) { + for(auto iVertex = 0ul; iVertex < nVertex[iMarker]; iVertex++ ) { - const auto iPoint = vertex[iMarker][iVertex]->GetNode(); + const auto iPoint = vertex[iMarker][iVertex]->GetNode(); - if(!nodes->GetDomain(iPoint)) continue; + if(!nodes->GetDomain(iPoint)) continue; - const auto AreaNormal = vertex[iMarker][iVertex]->GetNormal(); - const auto Area = GeometryToolbox::Norm(nDim, AreaNormal); + const auto AreaNormal = vertex[iMarker][iVertex]->GetNormal(); + const auto Area = GeometryToolbox::Norm(nDim, AreaNormal); - LocalSurfaceArea[iMarker_Global] += Area; - }// for iVertex - }//if Local == Global - }//for iMarker_Global - }//for iMarker + LocalSurfaceArea[iMarker_Global] += Area; + }// for iVertex + }//if Local == Global + }//for iMarker_Global + }//for iMarker - SU2_MPI::Allreduce(LocalSurfaceArea.data(), SurfaceAreaCfgFile.data(), SurfaceAreaCfgFile.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); + SU2_MPI::Allreduce(LocalSurfaceArea.data(), SurfaceAreaCfgFile.data(), SurfaceAreaCfgFile.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); + } END_SU2_OMP_MASTER } su2double CGeometry::GetSurfaceArea(const CConfig *config, unsigned short val_marker) const { @@ -3133,7 +3126,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, END_SU2_OMP_FOR /*--- Share with all processors ---*/ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { su2double* dbl_buffer = new su2double [Global_nElemDomain*nDim]; SU2_MPI::Allreduce(cg_elem,dbl_buffer,Global_nElemDomain*nDim,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm()); @@ -3147,8 +3140,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, MPI_Allreduce(halo_detect.data(),char_buffer.data(),Global_nElemDomain,MPI_CHAR,MPI_SUM,SU2_MPI::GetComm()); halo_detect.swap(char_buffer); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS SU2_OMP_FOR_STAT(256) for(auto iElem=0ul; iElem &filter_radius, #ifdef HAVE_MPI /*--- Share with all processors ---*/ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { su2double *buffer = new su2double [Global_nElemDomain]; SU2_MPI::Allreduce(work_values,buffer,Global_nElemDomain,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm()); swap(buffer, work_values); delete [] buffer; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Account for duplication ---*/ SU2_OMP_FOR_STAT(256) diff --git a/Common/src/geometry/CMultiGridGeometry.cpp b/Common/src/geometry/CMultiGridGeometry.cpp index c95275d07344..1065b624aac2 100644 --- a/Common/src/geometry/CMultiGridGeometry.cpp +++ b/Common/src/geometry/CMultiGridGeometry.cpp @@ -920,7 +920,7 @@ void CMultiGridGeometry::MatchPeriodic(const CConfig *config, unsigned short val void CMultiGridGeometry::SetControlVolume(const CGeometry *fine_grid, unsigned short action) { - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { unsigned long iFinePoint, iCoarsePoint, iEdge, iParent; long FineEdge, CoarseEdge; @@ -983,13 +983,12 @@ void CMultiGridGeometry::SetControlVolume(const CGeometry *fine_grid, unsigned s } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CMultiGridGeometry::SetBoundControlVolume(const CGeometry *fine_grid, unsigned short action) { - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { unsigned long iCoarsePoint, iFinePoint, FineVertex, iVertex; unsigned short iMarker, iChildren, iDim; @@ -1027,8 +1026,7 @@ void CMultiGridGeometry::SetBoundControlVolume(const CGeometry *fine_grid, unsig } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CMultiGridGeometry::SetCoord(const CGeometry *fine_grid) { diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp index d8a399a6ea38..306d9bab8cf0 100644 --- a/Common/src/geometry/CPhysicalGeometry.cpp +++ b/Common/src/geometry/CPhysicalGeometry.cpp @@ -4706,7 +4706,7 @@ void CPhysicalGeometry::SetPoint_Connectivity() { unsigned long jElem, Point_Neighbor, iPoint, iElem; /*--- Loop over all the elements ---*/ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { vector > elems(nPoint); @@ -4720,8 +4720,7 @@ void CPhysicalGeometry::SetPoint_Connectivity() { } nodes->SetElems(elems); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Loop over all the points ---*/ @@ -7378,7 +7377,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) END_SU2_OMP_FOR } - SU2_OMP_MASTER { /*--- The following is difficult to parallelize with threads. ---*/ + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- The following is difficult to parallelize with threads. ---*/ su2double my_DomainVolume = 0.0; for (auto iElem = 0ul; iElem < nElem; iElem++) { @@ -7511,8 +7510,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Check if there is a normal with null area ---*/ SU2_OMP_FOR_STAT(1024) diff --git a/Common/src/geometry/dual_grid/CEdge.cpp b/Common/src/geometry/dual_grid/CEdge.cpp index ec483b31cfce..c50ef1b0d750 100644 --- a/Common/src/geometry/dual_grid/CEdge.cpp +++ b/Common/src/geometry/dual_grid/CEdge.cpp @@ -31,9 +31,10 @@ using namespace GeometryToolbox; -CEdge::CEdge(unsigned long nEdge, unsigned long nDim) { +CEdge::CEdge(unsigned long nEdge_, unsigned long nDim) + : nEdge(nEdge_), + nEdgeSIMD(nextMultiple(nEdge_, simd::preferredLen())) { /*--- Allocate with padding. ---*/ - const auto nEdgeSIMD = nextMultiple(nEdge, simd::preferredLen()); Nodes.resize(nEdgeSIMD,2) = 0; Normal.resize(nEdgeSIMD,nDim) = su2double(0.0); } diff --git a/Common/src/interface_interpolation/CMirror.cpp b/Common/src/interface_interpolation/CMirror.cpp index 20421b567395..5eb35007c22b 100644 --- a/Common/src/interface_interpolation/CMirror.cpp +++ b/Common/src/interface_interpolation/CMirror.cpp @@ -192,45 +192,48 @@ void CMirror::SetTransferCoeff(const CConfig* const* config) { /*--- Loop over the vertices on the target marker, define one row of the transpose matrix. ---*/ - SU2_OMP_PARALLEL_(for schedule(dynamic,roundUpDiv(nVertexTarget, 2*omp_get_max_threads()))) - for (auto iVertex = 0ul; iVertex < nVertexTarget; ++iVertex) { - - auto& target_vertex = targetVertices[markTarget][iVertex]; - const auto iPoint = target_geometry->vertex[markTarget][iVertex]->GetNode(); - - if (!target_geometry->nodes->GetDomain(iPoint)) continue; - - /*--- Any point of the donor geometry, that has this target point as a donor, becomes a donor. ---*/ - const long targetGlobalIndex = target_geometry->nodes->GetGlobalIndex(iPoint); - - /*--- Count donors and safe the binary search results (this is why we sorted the matrix). ---*/ - auto nDonor = 0ul; - vector > ranges(nSend); - for (int iSend = 0; iSend < nSend; ++iSend) { - const auto iProcessor = iSendProcessor[iSend]; - const auto numCoeff = allNumNodeDonor[iProcessor]; - auto p = equal_range(DonorIndex[iSend], DonorIndex[iSend]+numCoeff, targetGlobalIndex); - nDonor += (p.second - p.first); - ranges[iSend] = p; - } + SU2_OMP_PARALLEL + { + SU2_OMP_FOR_DYN(roundUpDiv(nVertexTarget, 2*omp_get_max_threads())) + for (auto iVertex = 0ul; iVertex < nVertexTarget; ++iVertex) { + + auto& target_vertex = targetVertices[markTarget][iVertex]; + const auto iPoint = target_geometry->vertex[markTarget][iVertex]->GetNode(); + + if (!target_geometry->nodes->GetDomain(iPoint)) continue; + + /*--- Any point of the donor geometry, that has this target point as a donor, becomes a donor. ---*/ + const long targetGlobalIndex = target_geometry->nodes->GetGlobalIndex(iPoint); + + /*--- Count donors and safe the binary search results (this is why we sorted the matrix). ---*/ + auto nDonor = 0ul; + vector > ranges(nSend); + for (int iSend = 0; iSend < nSend; ++iSend) { + const auto iProcessor = iSendProcessor[iSend]; + const auto numCoeff = allNumNodeDonor[iProcessor]; + auto p = equal_range(DonorIndex[iSend], DonorIndex[iSend]+numCoeff, targetGlobalIndex); + nDonor += (p.second - p.first); + ranges[iSend] = p; + } - target_vertex.resize(nDonor); + target_vertex.resize(nDonor); - /*--- Use the search results to set the interpolation coefficients. ---*/ - for (int iSend = 0, iDonor = 0; iSend < nSend; ++iSend) { - const auto iProcessor = iSendProcessor[iSend]; + /*--- Use the search results to set the interpolation coefficients. ---*/ + for (int iSend = 0, iDonor = 0; iSend < nSend; ++iSend) { + const auto iProcessor = iSendProcessor[iSend]; - const auto first = ranges[iSend].first - DonorIndex[iSend]; - const auto last = ranges[iSend].second - DonorIndex[iSend]; + const auto first = ranges[iSend].first - DonorIndex[iSend]; + const auto last = ranges[iSend].second - DonorIndex[iSend]; - for (auto iCoeff = first; iCoeff < last; ++iCoeff) { - target_vertex.processor[iDonor] = iProcessor; - target_vertex.coefficient[iDonor] = DonorCoeff[iSend][iCoeff]; - target_vertex.globalPoint[iDonor] = GlobalIndex[iSend][iCoeff]; - ++iDonor; + for (auto iCoeff = first; iCoeff < last; ++iCoeff) { + target_vertex.processor[iDonor] = iProcessor; + target_vertex.coefficient[iDonor] = DonorCoeff[iSend][iCoeff]; + target_vertex.globalPoint[iDonor] = GlobalIndex[iSend][iCoeff]; + ++iDonor; + } } } - + END_SU2_OMP_FOR } END_SU2_OMP_PARALLEL diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index 4c6344db7d10..2f6b9a060237 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -378,10 +378,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, /*--- For efficiency, recv the messages dynamically based on the order they arrive. ---*/ - SU2_OMP_MASTER - SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status);) /*--- Once we have recv'd a message, get the source rank. ---*/ @@ -475,12 +472,8 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, data in the loop above at this point. ---*/ #ifdef HAVE_MPI - SU2_OMP_MASTER - SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE); - END_SU2_OMP_MASTER + SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE);) #endif - SU2_OMP_BARRIER - } template @@ -641,7 +634,7 @@ template void CSysMatrix::BuildJacobiPreconditioner() { /*--- Build Jacobi preconditioner (M = D), compute and store the inverses of the diagonal blocks. ---*/ - SU2_OMP_FOR_(schedule(dynamic,omp_heavy_size) SU2_NOWAIT) + SU2_OMP_FOR_DYN(omp_heavy_size) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) InverseDiagonalBlock(iPoint, &(invM[iPoint*nVar*nVar])); END_SU2_OMP_FOR @@ -1392,13 +1385,12 @@ void CSysMatrix::BuildPastixPreconditioner(CGeometry *geometry, cons unsigned short kind_fact) { #ifdef HAVE_PASTIX /*--- Pastix will launch nested threads. ---*/ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { pastix_wrapper.SetMatrix(nVar,nPoint,nPointDomain,row_ptr,col_ind,matrix); pastix_wrapper.Factorize(geometry, config, kind_fact); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS #else SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION); #endif @@ -1408,11 +1400,7 @@ template void CSysMatrix::ComputePastixPreconditioner(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, const CConfig *config) const { #ifdef HAVE_PASTIX - SU2_OMP_BARRIER - SU2_OMP_MASTER - pastix_wrapper.Solve(vec,prod); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(pastix_wrapper.Solve(vec,prod);) CSysMatrixComms::Initiate(prod, geometry, config); CSysMatrixComms::Complete(prod, geometry, config); diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index 95445cccd8fb..06e79259fb97 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -214,8 +214,8 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & * do this since the working vectors are shared. ---*/ if (!cg_ready) { - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { auto nVar = b.GetNVar(); auto nBlk = b.GetNBlk(); auto nBlkDomain = b.GetNBlkDomain(); @@ -227,8 +227,7 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & cg_ready = true; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } /*--- Calculate the initial residual, compute norm, and check if system is already solved ---*/ @@ -358,8 +357,8 @@ unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector::FGMRES_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::Smoother_LinSolver(const CSysVector::Smoother_LinSolver(const CSysVector::Solve(CSysMatrix & Jacobian, co TapeActive = AD::getGlobalTape().isActive(); - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { AD::StartExtFunc(false, false); AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize()); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS AD::StopRecording(); #endif @@ -972,9 +967,9 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co /*--- Start recording if it was stopped for the linear solver ---*/ #ifdef CODI_REVERSE_TYPE AD::StartRecording(); - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { AD::SetExtFuncOut(&LinSysSol[0], LinSysSol.GetLocSize()); AD::FuncHelper->addUserData(&LinSysRes); AD::FuncHelper->addUserData(&LinSysSol); @@ -983,15 +978,11 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co AD::FuncHelper->addUserData(config); AD::FuncHelper->addUserData(this); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS AD::FuncHelper->addToTape(CSysSolve_b::Solve_b); - SU2_OMP_BARRIER - SU2_OMP_MASTER - AD::EndExtFunc(); - END_SU2_OMP_MASTER + SU2_OMP_SAFE_GLOBAL_ACCESS(AD::EndExtFunc();) #endif } @@ -1057,29 +1048,32 @@ unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, /*--- Solve the system ---*/ + /*--- Local variable to prevent all threads from writing to a shared location (this->Residual). ---*/ + ScalarType residual = 0.0; + HandleTemporariesIn(LinSysRes, LinSysSol); switch(KindSolver) { case FGMRES: - IterLinSol = FGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol , MaxIter, Residual, ScreenOutput, config); + IterLinSol = FGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol , MaxIter, residual, ScreenOutput, config); break; case RESTARTED_FGMRES: - IterLinSol = RFGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol , MaxIter, Residual, ScreenOutput, config); + IterLinSol = RFGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol , MaxIter, residual, ScreenOutput, config); break; case BCGSTAB: - IterLinSol = BCGSTAB_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol , MaxIter, Residual, ScreenOutput, config); + IterLinSol = BCGSTAB_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol , MaxIter, residual, ScreenOutput, config); break; case CONJUGATE_GRADIENT: - IterLinSol = CG_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, Residual, ScreenOutput, config); + IterLinSol = CG_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual, ScreenOutput, config); break; case SMOOTHER: - IterLinSol = Smoother_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, Residual, ScreenOutput, config); + IterLinSol = Smoother_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual, ScreenOutput, config); break; case PASTIX_LDLT : case PASTIX_LU: if (directCall) Jacobian.BuildPastixPreconditioner(geometry, config, KindSolver); Jacobian.ComputePastixPreconditioner(*LinSysRes_ptr, *LinSysSol_ptr, geometry, config); IterLinSol = 1; - Residual = 1e-20; + residual = 1e-20; break; default: SU2_MPI::Error("Unknown type of linear solver.",CURRENT_FUNCTION); @@ -1091,8 +1085,10 @@ unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, delete precond; SU2_OMP_MASTER - Iterations = IterLinSol; - END_SU2_OMP_MASTER + { + Residual = residual; + Iterations = IterLinSol; + } END_SU2_OMP_MASTER return IterLinSol; diff --git a/Common/src/linear_algebra/CSysSolve_b.cpp b/Common/src/linear_algebra/CSysSolve_b.cpp index a5dafc080752..1fabb5571a4c 100644 --- a/Common/src/linear_algebra/CSysSolve_b.cpp +++ b/Common/src/linear_algebra/CSysSolve_b.cpp @@ -32,8 +32,8 @@ #ifdef CODI_REVERSE_TYPE template -void CSysSolve_b::Solve_b(const codi::RealReverse::Real* x, codi::RealReverse::Real* x_b, size_t m, - const codi::RealReverse::Real* y, const codi::RealReverse::Real* y_b, size_t n, +void CSysSolve_b::Solve_b(const su2double::Real* x, su2double::Real* x_b, size_t m, + const su2double::Real* y, const su2double::Real* y_b, size_t n, codi::DataStore* d) { CSysVector* LinSysRes_b = nullptr; diff --git a/SU2_CFD/include/limiters/CLimiterDetails.hpp b/SU2_CFD/include/limiters/CLimiterDetails.hpp index 5f534ed94b75..9cbb3cadc7f1 100644 --- a/SU2_CFD/include/limiters/CLimiterDetails.hpp +++ b/SU2_CFD/include/limiters/CLimiterDetails.hpp @@ -178,13 +178,12 @@ struct CLimiterDetails /*--- Allocate the static members (shared between threads) to * perform the reduction across all threads in the rank. ---*/ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { sharedMin.resize(varEnd) = largeNum; sharedMax.resize(varEnd) =-largeNum; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Per thread reduction. ---*/ @@ -212,11 +211,10 @@ struct CLimiterDetails sharedMax(iVar) = max(sharedMax(iVar), localMax(iVar)); } END_SU2_OMP_CRITICAL - SU2_OMP_BARRIER /*--- Global reduction. ---*/ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { localMin = sharedMin; SU2_MPI::Allreduce(localMin.data(), sharedMin.data(), varEnd, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); @@ -224,8 +222,7 @@ struct CLimiterDetails localMax = sharedMax; SU2_MPI::Allreduce(localMax.data(), sharedMax.data(), varEnd, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Compute eps^2 (each thread has its own copy of it). ---*/ diff --git a/SU2_CFD/include/solvers/CEulerSolver.hpp b/SU2_CFD/include/solvers/CEulerSolver.hpp index acea7815891f..dcaa0efb897b 100644 --- a/SU2_CFD/include/solvers/CEulerSolver.hpp +++ b/SU2_CFD/include/solvers/CEulerSolver.hpp @@ -43,6 +43,8 @@ class CEulerSolver : public CFVMFlowSolverBase NonPhysicalEdgeCounter; /*!< \brief Non-physical reconstruction counter for each edge. */ + su2double AllBound_CEquivArea_Inv=0.0; /*!< \brief equivalent area coefficient (inviscid contribution) for all the boundaries. */ vector CEquivArea_Mnt; /*!< \brief Equivalent area (inviscid contribution) for each boundary. */ vector CEquivArea_Inv; /*!< \brief Equivalent area (inviscid contribution) for each boundary. */ diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp index 25c4bca37873..43a0fbc5489e 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp @@ -61,10 +61,7 @@ class CFVMFlowSolverBase : public CSolver { */ template static void ompMasterAssignBarrier(Ts&&... lhsRhsPairs) { - SU2_OMP_MASTER - recursiveAssign(lhsRhsPairs...); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(recursiveAssign(lhsRhsPairs...);) } su2double Mach_Inf = 0.0; /*!< \brief Mach number at the infinity. */ @@ -493,22 +490,20 @@ class CFVMFlowSolverBase : public CSolver { Global_Delta_Time = Min_Delta_Time; } END_SU2_OMP_CRITICAL - SU2_OMP_BARRIER } /*--- Compute the min/max dt (in parallel, now over mpi ranks). ---*/ - SU2_OMP_MASTER - if (config->GetComm_Level() == COMM_FULL) { - su2double rbuf_time; - SU2_MPI::Allreduce(&Min_Delta_Time, &rbuf_time, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); - Min_Delta_Time = rbuf_time; + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { + if (config->GetComm_Level() == COMM_FULL) { + su2double rbuf_time; + SU2_MPI::Allreduce(&Min_Delta_Time, &rbuf_time, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); + Min_Delta_Time = rbuf_time; - SU2_MPI::Allreduce(&Max_Delta_Time, &rbuf_time, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); - Max_Delta_Time = rbuf_time; - } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_MPI::Allreduce(&Max_Delta_Time, &rbuf_time, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); + Max_Delta_Time = rbuf_time; + } + } END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- For exact time solution use the minimum delta time of the whole mesh. ---*/ if (time_stepping) { @@ -516,7 +511,7 @@ class CFVMFlowSolverBase : public CSolver { /*--- If the unsteady CFL is set to zero, it uses the defined unsteady time step, * otherwise it computes the time step based on the unsteady CFL. ---*/ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { if (config->GetUnst_CFL() == 0.0) { Global_Delta_Time = config->GetDelta_UnstTime(); @@ -528,8 +523,7 @@ class CFVMFlowSolverBase : public CSolver { config->SetDelta_UnstTimeND(Global_Delta_Time); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Sets the regular CFL equal to the unsteady CFL. ---*/ @@ -558,16 +552,15 @@ class CFVMFlowSolverBase : public CSolver { SU2_OMP_CRITICAL Global_Delta_UnstTimeND = min(Global_Delta_UnstTimeND, glbDtND); END_SU2_OMP_CRITICAL - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { SU2_MPI::Allreduce(&Global_Delta_UnstTimeND, &glbDtND, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); Global_Delta_UnstTimeND = glbDtND; config->SetDelta_UnstTimeND(Global_Delta_UnstTimeND); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } /*--- The pseudo local time (explicit integration) cannot be greater than the physical time ---*/ diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl index bf589df95a79..cb1cd99223bb 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl @@ -481,7 +481,7 @@ void CFVMFlowSolverBase::ComputeVerificationError(CGeometry* geometry, CCo (config->GetInnerIter() == 1)); if (!write_heads) return; - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Check if there actually is an exact solution for this verification case, if computed at all. ---*/ @@ -524,8 +524,7 @@ void CFVMFlowSolverBase::ComputeVerificationError(CGeometry* geometry, CCo } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } template @@ -581,12 +580,11 @@ void CFVMFlowSolverBase::ImplicitEuler_Iteration(CGeometry *geometry, CSol auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config); - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { SetIterLinSolver(iter); SetResLinSolver(System.GetResidual()); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS CompleteImplicitIteration(geometry, nullptr, config); } @@ -665,16 +663,15 @@ void CFVMFlowSolverBase::ComputeVorticityAndStrainMag(const CConfig& confi } END_SU2_OMP_CRITICAL - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { su2double MyOmega_Max = Omega_Max; su2double MyStrainMag_Max = StrainMag_Max; SU2_MPI::Allreduce(&MyStrainMag_Max, &StrainMag_Max, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&MyOmega_Max, &Omega_Max, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } } @@ -789,7 +786,7 @@ void CFVMFlowSolverBase::LoadRestart_impl(CGeometry **geometry, CSolver ** const bool static_fsi = ((config->GetTime_Marching() == TIME_MARCHING::STEADY) && config->GetFSI_Simulation()); /*--- To make this routine safe to call in parallel most of it can only be executed by one thread. ---*/ - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { if (nVar_Restart == 0) nVar_Restart = nVar; @@ -894,8 +891,7 @@ void CFVMFlowSolverBase::LoadRestart_impl(CGeometry **geometry, CSolver ** string("This can be caused by empty lines at the end of the file."), CURRENT_FUNCTION); } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Update the geometry for flows on deforming meshes. ---*/ @@ -972,7 +968,7 @@ void CFVMFlowSolverBase::LoadRestart_impl(CGeometry **geometry, CSolver ** } /*--- Go back to single threaded execution. ---*/ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Delete the class memory that is used to load the restart. ---*/ @@ -981,8 +977,7 @@ void CFVMFlowSolverBase::LoadRestart_impl(CGeometry **geometry, CSolver ** delete [] Restart_Data; Restart_Data = nullptr; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } template diff --git a/SU2_CFD/include/solvers/CIncEulerSolver.hpp b/SU2_CFD/include/solvers/CIncEulerSolver.hpp index 9d4b55d601c5..b48d38f8680c 100644 --- a/SU2_CFD/include/solvers/CIncEulerSolver.hpp +++ b/SU2_CFD/include/solvers/CIncEulerSolver.hpp @@ -41,6 +41,8 @@ class CIncEulerSolver : public CFVMFlowSolverBase FluidModel; /*!< \brief fluid model used in the solver. */ StreamwisePeriodicValues SPvals, SPvalsUpdated; + su2vector NonPhysicalEdgeCounter; /*!< \brief Non-physical reconstruction counter for each edge. */ + /*! * \brief Preprocessing actions common to the Euler and NS solvers. * \param[in] geometry - Geometrical definition of the problem. diff --git a/SU2_CFD/include/solvers/CScalarSolver.inl b/SU2_CFD/include/solvers/CScalarSolver.inl index 6aa0ecfbb376..d063693afc08 100644 --- a/SU2_CFD/include/solvers/CScalarSolver.inl +++ b/SU2_CFD/include/solvers/CScalarSolver.inl @@ -423,12 +423,11 @@ void CScalarSolver::ImplicitEuler_Iteration(CGeometry* geometry, C auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config); - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { SetIterLinSolver(iter); SetResLinSolver(System.GetResidual()); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS CompleteImplicitIteration(geometry, solver_container, config); } diff --git a/SU2_CFD/include/solvers/CSolver.hpp b/SU2_CFD/include/solvers/CSolver.hpp index 6e0e720101d7..464e3796f446 100644 --- a/SU2_CFD/include/solvers/CSolver.hpp +++ b/SU2_CFD/include/solvers/CSolver.hpp @@ -4367,13 +4367,12 @@ class CSolver { * \brief Set the RMS and MAX residual to zero. */ inline void SetResToZero() { - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { for (auto& r : Residual_RMS) r = 0; for (auto& r : Residual_Max) r = 0; for (auto& p : Point_Max) p = 0; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } /*! diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp index af3b2c296486..44b19a6527dd 100644 --- a/SU2_CFD/src/integration/CIntegration.cpp +++ b/SU2_CFD/src/integration/CIntegration.cpp @@ -238,10 +238,7 @@ void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver solver->GetNodes()->Set_Solution_time_n1(); solver->GetNodes()->Set_Solution_time_n(); - SU2_OMP_MASTER - solver->ResetCFLAdapt(); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(solver->ResetCFLAdapt();) SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPoint(), omp_get_num_threads())) for (auto iPoint = 0ul; iPoint < geometry->GetnPoint(); iPoint++) { diff --git a/SU2_CFD/src/integration/CMultiGridIntegration.cpp b/SU2_CFD/src/integration/CMultiGridIntegration.cpp index cbd42575f33e..2731eccd91de 100644 --- a/SU2_CFD/src/integration/CMultiGridIntegration.cpp +++ b/SU2_CFD/src/integration/CMultiGridIntegration.cpp @@ -92,10 +92,7 @@ void CMultiGridIntegration::MultiGrid_Iteration(CGeometry ****geometry, geometry[iZone][iInst][FinestMesh], config[iZone]); - SU2_OMP_MASTER - config[iZone]->SubtractFinestMesh(); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(config[iZone]->SubtractFinestMesh();) } /*--- Set the current finest grid (full multigrid strategy) ---*/ @@ -226,7 +223,9 @@ void CMultiGridIntegration::MultiGrid_Cycle(CGeometry ****geometry, /*--- Temporarily disable implicit integration, for what follows we do not need the Jacobian. ---*/ - if (implicit) config->SetKind_TimeIntScheme(EULER_EXPLICIT); + if (implicit) { + SU2_OMP_SAFE_GLOBAL_ACCESS(config->SetKind_TimeIntScheme(EULER_EXPLICIT);) + } /*--- Compute $r_k = P_k + F_k(u_k)$ ---*/ @@ -250,7 +249,9 @@ void CMultiGridIntegration::MultiGrid_Cycle(CGeometry ****geometry, /*--- Restore the time integration settings. ---*/ - if (implicit) config->SetKind_TimeIntScheme(EULER_IMPLICIT); + if (implicit) { + SU2_OMP_SAFE_GLOBAL_ACCESS(config->SetKind_TimeIntScheme(EULER_IMPLICIT);) + } /*--- Recursive call to MultiGrid_Cycle (this routine). ---*/ @@ -682,7 +683,7 @@ void CMultiGridIntegration::NonDimensional_Parameters(CGeometry **geometry, CSol CNumerics ****numerics_container, CConfig *config, unsigned short FinestMesh, unsigned short RunTime_EqSystem, su2double *monitor) { - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS switch (RunTime_EqSystem) { case RUNTIME_FLOW_SYS: @@ -712,9 +713,7 @@ void CMultiGridIntegration::NonDimensional_Parameters(CGeometry **geometry, CSol numerics_container[FinestMesh][ADJFLOW_SOL][CONV_BOUND_TERM], config); break; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER - + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CMultiGridIntegration::Adjoint_Setup(CGeometry ****geometry, CSolver *****solver_container, CConfig **config, @@ -732,15 +731,14 @@ void CMultiGridIntegration::Adjoint_Setup(CGeometry ****geometry, CSolver *****s /*--- Set the force coefficients ---*/ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CD(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CD()); solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CL(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CL()); solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CT(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CT()); solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CQ(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CQ()); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Restrict solution and gradients to the coarse levels ---*/ diff --git a/SU2_CFD/src/integration/CNewtonIntegration.cpp b/SU2_CFD/src/integration/CNewtonIntegration.cpp index dd2be69424c2..922b3651872e 100644 --- a/SU2_CFD/src/integration/CNewtonIntegration.cpp +++ b/SU2_CFD/src/integration/CNewtonIntegration.cpp @@ -121,10 +121,7 @@ void CNewtonIntegration::ComputeResiduals(ResEvalType type) { /*--- Save the default integration scheme, and force to explicit if required. ---*/ auto TimeIntScheme = config->GetKind_TimeIntScheme(); if (type == ResEvalType::EXPLICIT) { - SU2_OMP_MASTER - config->SetKind_TimeIntScheme(EULER_EXPLICIT); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(config->SetKind_TimeIntScheme(EULER_EXPLICIT);) } solvers[FLOW_SOL]->Preprocessing(geometry, solvers, config, MESH_0, NO_RK_ITER, RUNTIME_FLOW_SYS, false); @@ -137,10 +134,7 @@ void CNewtonIntegration::ComputeResiduals(ResEvalType type) { /*--- Restore default. ---*/ if (type == ResEvalType::EXPLICIT) { - SU2_OMP_MASTER - config->SetKind_TimeIntScheme(TimeIntScheme); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(config->SetKind_TimeIntScheme(TimeIntScheme);) } } @@ -162,15 +156,13 @@ void CNewtonIntegration::ComputeFinDiffStep() { atomicAdd(rmsSol_loc, rmsSol); - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { su2double t = rmsSol; SU2_MPI::Allreduce(&t, &rmsSol, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); finDiffStep = finDiffStepND * max(1.0, sqrt(SU2_TYPE::GetValue(rmsSol) / geometry->GetGlobal_nPointDomain())); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER - + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver *****solvers_, CNumerics ******numerics_, @@ -213,12 +205,11 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** bool endStartup = false; if (startupPeriod) { - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { firstResidual = max(firstResidual, residual); if (startupIters) startupIters -= 1; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS endStartup = (startupIters == 0) && (residual - firstResidual < startupResidual); } @@ -227,10 +218,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** Scalar toleranceFactor = 1.0; if (!startupPeriod && tolRelaxFactor > 1 && fullTolResidual < 0.0) { - SU2_OMP_MASTER - firstResidual = max(firstResidual, residual); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(firstResidual = max(firstResidual, residual);) su2double x = (residual - firstResidual) / fullTolResidual; toleranceFactor = 1.0 + (tolRelaxFactor-1)*max(0.0, 1.0-SU2_TYPE::GetValue(x)); } @@ -256,12 +244,11 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** } SetSolutionResult(solvers[FLOW_SOL]->LinSysSol); - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { solvers[FLOW_SOL]->SetIterLinSolver(iter); solvers[FLOW_SOL]->SetResLinSolver(eps); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /// TODO: Clever back-tracking and CFL adaptation based on residual reduction. diff --git a/SU2_CFD/src/integration/CSingleGridIntegration.cpp b/SU2_CFD/src/integration/CSingleGridIntegration.cpp index 1a069b79a2f3..950012f2e66e 100644 --- a/SU2_CFD/src/integration/CSingleGridIntegration.cpp +++ b/SU2_CFD/src/integration/CSingleGridIntegration.cpp @@ -77,10 +77,7 @@ void CSingleGridIntegration::SingleGrid_Iteration(CGeometry ****geometry, CSolve solvers_fine[Solver_Position]->Postprocessing(geometry_fine, solvers_fine, config[iZone], FinestMesh); if (RunTime_EqSystem == RUNTIME_HEAT_SYS) { - SU2_OMP_MASTER - solvers_fine[HEAT_SOL]->Heat_Fluxes(geometry_fine, solvers_fine, config[iZone]); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(solvers_fine[HEAT_SOL]->Heat_Fluxes(geometry_fine, solvers_fine, config[iZone]);) } /*--- If turbulence model, copy the turbulence variables to the coarse levels ---*/ diff --git a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp index 9dfadb9279f3..beb246923d3b 100644 --- a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp @@ -110,11 +110,7 @@ void CDiscAdjMeshSolver::RegisterVariables(CGeometry *geometry, CConfig *config, if (config->GetFSI_Simulation()) return; - SU2_OMP_MASTER { - direct_solver->GetNodes()->Register_BoundDisp(); - } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(direct_solver->GetNodes()->Register_BoundDisp();) } void CDiscAdjMeshSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config, bool CrossTerm){ diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index 6115389a7b26..b1aef3d7a29e 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -178,7 +178,7 @@ void CDiscAdjSolver::RegisterSolution(CGeometry *geometry, CConfig *config) { void CDiscAdjSolver::RegisterVariables(CGeometry *geometry, CConfig *config, bool reset) { - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Register farfield values as input ---*/ @@ -295,8 +295,7 @@ void CDiscAdjSolver::RegisterVariables(CGeometry *geometry, CConfig *config, boo * extracted in the ExtractAdjointVariables routine. ---*/ } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CDiscAdjSolver::RegisterOutput(CGeometry *geometry, CConfig *config) { @@ -385,7 +384,7 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi void CDiscAdjSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config) { - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Extract the adjoint values of the farfield values ---*/ @@ -445,8 +444,7 @@ void CDiscAdjSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *conf /*--- Extract here the adjoint values of everything else that is registered as input in RegisterInput. ---*/ } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) { @@ -566,8 +564,8 @@ void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config } } - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { auto local = Sens_Geo; SU2_MPI::Allreduce(local.data(), Sens_Geo.data(), Sens_Geo.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); @@ -577,14 +575,14 @@ void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config Total_Sens_Geo += x; } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER - + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) { + SU2_OMP_MASTER config->SetGlobalParam(config->GetKind_Solver(), RunTime_EqSystem); + END_SU2_OMP_MASTER const bool dual_time = (config->GetTime_Marching() == TIME_MARCHING::DT_STEPPING_1ST) || (config->GetTime_Marching() == TIME_MARCHING::DT_STEPPING_2ND); diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp index 26e1508b2aaa..ca03e026b659 100644 --- a/SU2_CFD/src/solvers/CEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CEulerSolver.cpp @@ -148,6 +148,8 @@ CEulerSolver::CEulerSolver(CGeometry *geometry, CConfig *config, Allocate(*config); + NonPhysicalEdgeCounter.resize(geometry->GetnEdge()) = 0; + /*--- MPI + OpenMP initialization. ---*/ HybridParallelInitialization(*config, *geometry); @@ -343,8 +345,8 @@ CEulerSolver::~CEulerSolver(void) { void CEulerSolver::InstantiateEdgeNumerics(const CSolver* const* solver_container, const CConfig* config) { - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { if (config->Low_Mach_Correction()) SU2_MPI::Error("Low-Mach correction is not supported with vectorization.", CURRENT_FUNCTION); @@ -359,8 +361,7 @@ void CEulerSolver::InstantiateEdgeNumerics(const CSolver* const* solver_containe "support vectorization.", CURRENT_FUNCTION); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CEulerSolver::InitTurboContainers(CGeometry *geometry, CConfig *config){ @@ -1499,9 +1500,9 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con SU2_OMP_ATOMIC ErrorCounter += SetPrimitive_Variables(solver_container, config); - SU2_OMP_BARRIER - SU2_OMP_MASTER { /*--- Ops that are not OpenMP parallel go in this block. ---*/ + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { /*--- Ops that are not OpenMP parallel go in this block. ---*/ if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) { unsigned long tmp = ErrorCounter; @@ -1528,8 +1529,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Artificial dissipation ---*/ @@ -1831,22 +1831,19 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain } su2double RoeEnthalpy = (R * Primitive_j[prim_idx.Enthalpy()] + Primitive_i[prim_idx.Enthalpy()]) / (R+1); - bool neg_sound_speed = ((Gamma-1)*(RoeEnthalpy-0.5*sq_vel) < 0.0); - - bool bad_i = neg_sound_speed || neg_pres_or_rho_i; - bool bad_j = neg_sound_speed || neg_pres_or_rho_j; - - nodes->SetNon_Physical(iPoint, bad_i); - nodes->SetNon_Physical(jPoint, bad_j); - - /*--- Get updated state, in case the point recovered after the set. ---*/ - bad_i = nodes->GetNon_Physical(iPoint); - bad_j = nodes->GetNon_Physical(jPoint); - - counter_local += bad_i+bad_j; + const bool neg_sound_speed = ((Gamma-1)*(RoeEnthalpy-0.5*sq_vel) < 0.0); + bool bad_recon = neg_sound_speed || neg_pres_or_rho_i || neg_pres_or_rho_j; + if (bad_recon) { + /*--- Force 1st order for this edge for at least 20 iterations. ---*/ + NonPhysicalEdgeCounter[iEdge] = 20; + } else if (NonPhysicalEdgeCounter[iEdge] > 0) { + --NonPhysicalEdgeCounter[iEdge]; + bad_recon = true; + } + counter_local += bad_recon; - numerics->SetPrimitive(bad_i? V_i : Primitive_i, bad_j? V_j : Primitive_j); - numerics->SetSecondary(bad_i? S_i : Secondary_i, bad_j? S_j : Secondary_j); + numerics->SetPrimitive(bad_recon? V_i : Primitive_i, bad_recon? V_j : Primitive_j); + numerics->SetSecondary(bad_recon? S_i : Secondary_i, bad_recon? S_j : Secondary_j); } @@ -1917,16 +1914,15 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain /*--- Add counter results for all threads. ---*/ SU2_OMP_ATOMIC ErrorCounter += counter_local; - SU2_OMP_BARRIER /*--- Add counter results for all ranks. ---*/ - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { counter_local = ErrorCounter; SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm()); config->SetNonphysical_Reconstr(ErrorCounter); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } } @@ -5566,24 +5562,26 @@ void CEulerSolver::PreprocessBC_Giles(CGeometry *geometry, CConfig *config, CNum Velocity_i = new su2double[nDim]; deltaprim = new su2double[nVar]; cj = new su2double[nVar]; - complex I, cktemp_inf,cktemp_out1, cktemp_out2, expArg; + complex I, expArg; + static complex cktemp_inf, cktemp_out1, cktemp_out2; I = complex(0.0,1.0); -#ifdef HAVE_MPI - su2double MyIm_inf, MyRe_inf, Im_inf, Re_inf, MyIm_out1, MyRe_out1, Im_out1, Re_out1, MyIm_out2, MyRe_out2, Im_out2, Re_out2; -#endif - kend_max = geometry->GetnFreqSpanMax(marker_flag); for (iSpan= 0; iSpan < nSpanWiseSections ; iSpan++){ for(k=0; k < 2*kend_max+1; k++){ freq = k - kend_max; - cktemp_inf = complex(0.0,0.0); - cktemp_out1 = complex(0.0,0.0); - cktemp_out2 = complex(0.0,0.0); + SU2_OMP_MASTER + { + cktemp_inf = complex(0.0,0.0); + cktemp_out1 = complex(0.0,0.0); + cktemp_out2 = complex(0.0,0.0); + } END_SU2_OMP_MASTER for (iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++){ for (iMarkerTP=1; iMarkerTP < config->GetnMarker_Turbomachinery()+1; iMarkerTP++){ if (config->GetMarker_All_Turbomachinery(iMarker) == iMarkerTP){ if (config->GetMarker_All_TurbomachineryFlag(iMarker) == marker_flag){ + complex cktemp_inf_local{0.,0.},cktemp_out1_local{0.,0.}, cktemp_out2_local{0.,0.}; + SU2_OMP_FOR_DYN(roundUpDiv(geometry->GetnVertexSpan(iMarker,iSpan), 2*omp_get_max_threads())) for (iVertex = 0; iVertex < geometry->GetnVertexSpan(iMarker,iSpan); iVertex++) { /*--- find the node related to the vertex ---*/ @@ -5638,64 +5636,72 @@ void CEulerSolver::PreprocessBC_Giles(CGeometry *geometry, CConfig *config, CNum expArg = complex(cos(TwoPiThetaFreq_Pitch)) - I*complex(sin(TwoPiThetaFreq_Pitch)); if (freq != 0){ - cktemp_out1 += cj_out1*expArg*deltaTheta/pitch; - cktemp_out2 += cj_out2*expArg*deltaTheta/pitch; - cktemp_inf += cj_inf*expArg*deltaTheta/pitch; + cktemp_out1_local += cj_out1*expArg*deltaTheta/pitch; + cktemp_out2_local += cj_out2*expArg*deltaTheta/pitch; + cktemp_inf_local += cj_inf*expArg*deltaTheta/pitch; } else{ - cktemp_inf += complex(0.0,0.0); - cktemp_out1 += complex(0.0,0.0); - cktemp_out2 += complex(0.0,0.0); + cktemp_inf_local += complex(0.0,0.0); + cktemp_out1_local += complex(0.0,0.0); + cktemp_out2_local += complex(0.0,0.0); } } - + END_SU2_OMP_FOR + SU2_OMP_CRITICAL + { + cktemp_inf += cktemp_inf_local; + cktemp_out1 += cktemp_out1_local; + cktemp_out2 += cktemp_out2_local; + } END_SU2_OMP_CRITICAL } } } } + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { #ifdef HAVE_MPI - MyRe_inf = cktemp_inf.real(); Re_inf = 0.0; - MyIm_inf = cktemp_inf.imag(); Im_inf = 0.0; - cktemp_inf = complex(0.0,0.0); + su2double MyRe_inf = cktemp_inf.real(); su2double Re_inf = 0.0; + su2double MyIm_inf = cktemp_inf.imag(); su2double Im_inf = 0.0; + cktemp_inf = complex(0.0,0.0); - MyRe_out1 = cktemp_out1.real(); Re_out1 = 0.0; - MyIm_out1 = cktemp_out1.imag(); Im_out1 = 0.0; - cktemp_out1 = complex(0.0,0.0); + su2double MyRe_out1 = cktemp_out1.real(); su2double Re_out1 = 0.0; + su2double MyIm_out1 = cktemp_out1.imag(); su2double Im_out1 = 0.0; + cktemp_out1 = complex(0.0,0.0); - MyRe_out2 = cktemp_out2.real(); Re_out2 = 0.0; - MyIm_out2 = cktemp_out2.imag(); Im_out2 = 0.0; - cktemp_out2 = complex(0.0,0.0); + su2double MyRe_out2 = cktemp_out2.real(); su2double Re_out2 = 0.0; + su2double MyIm_out2 = cktemp_out2.imag(); su2double Im_out2 = 0.0; + cktemp_out2 = complex(0.0,0.0); - SU2_MPI::Allreduce(&MyRe_inf, &Re_inf, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(&MyIm_inf, &Im_inf, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(&MyRe_out1, &Re_out1, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(&MyIm_out1, &Im_out1, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(&MyRe_out2, &Re_out2, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(&MyIm_out2, &Im_out2, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - - cktemp_inf = complex(Re_inf,Im_inf); - cktemp_out1 = complex(Re_out1,Im_out1); - cktemp_out2 = complex(Re_out2,Im_out2); + SU2_MPI::Allreduce(&MyRe_inf, &Re_inf, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); + SU2_MPI::Allreduce(&MyIm_inf, &Im_inf, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); + SU2_MPI::Allreduce(&MyRe_out1, &Re_out1, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); + SU2_MPI::Allreduce(&MyIm_out1, &Im_out1, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); + SU2_MPI::Allreduce(&MyRe_out2, &Re_out2, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); + SU2_MPI::Allreduce(&MyIm_out2, &Im_out2, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); + cktemp_inf = complex(Re_inf,Im_inf); + cktemp_out1 = complex(Re_out1,Im_out1); + cktemp_out2 = complex(Re_out2,Im_out2); #endif - for (iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++){ - for (iMarkerTP=1; iMarkerTP < config->GetnMarker_Turbomachinery()+1; iMarkerTP++){ - if (config->GetMarker_All_Turbomachinery(iMarker) == iMarkerTP){ - if (config->GetMarker_All_TurbomachineryFlag(iMarker) == marker_flag){ - /*-----this is only valid 2D ----*/ - if (marker_flag == INFLOW){ - CkInflow[iMarker][iSpan][k]= cktemp_inf; - }else{ - CkOutflow1[iMarker][iSpan][k]=cktemp_out1; - CkOutflow2[iMarker][iSpan][k]=cktemp_out2; + for (iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++){ + for (iMarkerTP=1; iMarkerTP < config->GetnMarker_Turbomachinery()+1; iMarkerTP++){ + if (config->GetMarker_All_Turbomachinery(iMarker) == iMarkerTP){ + if (config->GetMarker_All_TurbomachineryFlag(iMarker) == marker_flag){ + /*-----this is only valid 2D ----*/ + if (marker_flag == INFLOW){ + CkInflow[iMarker][iSpan][k]= cktemp_inf; + }else{ + CkOutflow1[iMarker][iSpan][k]=cktemp_out1; + CkOutflow2[iMarker][iSpan][k]=cktemp_out2; + } } } } } - } + } END_SU2_OMP_SAFE_GLOBAL_ACCESS } } diff --git a/SU2_CFD/src/solvers/CHeatSolver.cpp b/SU2_CFD/src/solvers/CHeatSolver.cpp index fff01582ec45..2e447a989614 100644 --- a/SU2_CFD/src/solvers/CHeatSolver.cpp +++ b/SU2_CFD/src/solvers/CHeatSolver.cpp @@ -187,7 +187,9 @@ CHeatSolver::~CHeatSolver(void) { } void CHeatSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) { + SU2_OMP_MASTER config->SetGlobalParam(config->GetKind_Solver(), RunTime_EqSystem); + END_SU2_OMP_MASTER if (config->GetKind_ConvNumScheme_Heat() == SPACE_CENTERED) { SetUndivided_Laplacian(geometry, config); diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp index f25de61df84f..6d71f7377b0a 100644 --- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp @@ -149,6 +149,8 @@ CIncEulerSolver::CIncEulerSolver(CGeometry *geometry, CConfig *config, unsigned Allocate(*config); + NonPhysicalEdgeCounter.resize(geometry->GetnEdge()) = 0; + /*--- MPI + OpenMP initialization. ---*/ HybridParallelInitialization(*config, *geometry); @@ -471,7 +473,7 @@ void CIncEulerSolver::SetNondimensionalization(CConfig *config, unsigned short i case INC_IDEAL_GAS: fluidModel = new CIncIdealGas(Specific_Heat_CpND, Gas_ConstantND, Pressure_ThermodynamicND); - fluidModel->SetTDState_T(Temperature_FreeStreamND); + fluidModel->SetTDState_T(Temperature_FreeStreamND); break; case FLUID_MIXTURE: @@ -490,7 +492,7 @@ void CIncEulerSolver::SetNondimensionalization(CConfig *config, unsigned short i } fluidModel->SetTDState_T(Temperature_FreeStreamND); break; - + } if (viscous) { @@ -907,15 +909,13 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_ ErrorCounter += SetPrimitive_Variables(solver_container, config); if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) { - SU2_OMP_BARRIER - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { unsigned long tmp = ErrorCounter; SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); config->SetNonphysical_Points(ErrorCounter); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } /*--- Artificial dissipation ---*/ @@ -935,10 +935,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_ /*--- Compute properties needed for mass flow BCs. ---*/ if (outlet) { - SU2_OMP_MASTER - GetOutlet_Properties(geometry, config, iMesh, Output); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(GetOutlet_Properties(geometry, config, iMesh, Output);) } /*--- Initialize the Jacobian matrix and residual, not needed for the reducer strategy @@ -1259,26 +1256,27 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont checked. Pressure is the dynamic pressure (can be negative). ---*/ if (config->GetEnergy_Equation()) { - bool neg_temperature_i = (Primitive_i[prim_idx.Temperature()] < 0.0); - bool neg_temperature_j = (Primitive_j[prim_idx.Temperature()] < 0.0); - - bool neg_density_i = (Primitive_i[prim_idx.Density()] < 0.0); - bool neg_density_j = (Primitive_j[prim_idx.Density()] < 0.0); - - nodes->SetNon_Physical(iPoint, neg_density_i || neg_temperature_i); - nodes->SetNon_Physical(jPoint, neg_density_j || neg_temperature_j); - - /* Lastly, check for existing first-order points still active from previous iterations. */ + const bool neg_temperature_i = (Primitive_i[prim_idx.Temperature()] < 0.0); + const bool neg_temperature_j = (Primitive_j[prim_idx.Temperature()] < 0.0); + + const bool neg_density_i = (Primitive_i[prim_idx.Density()] < 0.0); + const bool neg_density_j = (Primitive_j[prim_idx.Density()] < 0.0); + + bool bad_recon = neg_temperature_i || neg_temperature_j || neg_density_i || neg_density_j; + if (bad_recon) { + /*--- Force 1st order for this edge for at least 20 iterations. ---*/ + NonPhysicalEdgeCounter[iEdge] = 20; + } else if (NonPhysicalEdgeCounter[iEdge] > 0) { + --NonPhysicalEdgeCounter[iEdge]; + bad_recon = true; + } + counter_local += bad_recon; - if (nodes->GetNon_Physical(iPoint)) { - counter_local++; - for (iVar = 0; iVar < nPrimVar; iVar++) + if (bad_recon) { + for (iVar = 0; iVar < nPrimVar; iVar++) { Primitive_i[iVar] = V_i[iVar]; - } - if (nodes->GetNon_Physical(jPoint)) { - counter_local++; - for (iVar = 0; iVar < nPrimVar; iVar++) Primitive_j[iVar] = V_j[iVar]; + } } } @@ -1336,16 +1334,15 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont /*--- Add counter results for all threads. ---*/ SU2_OMP_ATOMIC ErrorCounter += counter_local; - SU2_OMP_BARRIER /*--- Add counter results for all ranks. ---*/ - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { counter_local = ErrorCounter; SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm()); config->SetNonphysical_Reconstr(ErrorCounter); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } } @@ -1912,7 +1909,9 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co /*--- For now, only the finest mesh level stores the Beta for all levels. ---*/ if (iMesh == MESH_0) { + SU2_OMP_MASTER MaxVel2 = 0.0; + END_SU2_OMP_MASTER su2double maxVel2 = 0.0; SU2_OMP_FOR_STAT(omp_chunk_size) @@ -1924,16 +1923,14 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co MaxVel2 = max(MaxVel2, maxVel2); END_SU2_OMP_CRITICAL - SU2_OMP_BARRIER - - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { maxVel2 = MaxVel2; SU2_MPI::Allreduce(&maxVel2, &MaxVel2, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); config->SetMax_Vel2(max(1e-10, MaxVel2)); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } /*--- Allow an override if user supplies a large epsilon^2. ---*/ @@ -2231,7 +2228,7 @@ void CIncEulerSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, Flow_Dir = Inlet_FlowDir[val_marker][iVertex]; Flow_Dir_Mag = GeometryToolbox::Norm(nDim, Flow_Dir); - /*--- Store the unit flow direction vector. + /*--- Store the unit flow direction vector. If requested, use the local boundary normal (negative), instead of the prescribed flow direction in the config. ---*/ @@ -2348,7 +2345,7 @@ void CIncEulerSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, V_inlet[iDim+prim_idx.Velocity()] = nodes->GetVelocity(iPoint,iDim); /* pressure obtained from interior */ V_inlet[prim_idx.Pressure()] = nodes->GetPressure(iPoint); - } + } /*--- Access density at the node. This is either constant by construction, or will be set fixed implicitly by the temperature diff --git a/SU2_CFD/src/solvers/CIncNSSolver.cpp b/SU2_CFD/src/solvers/CIncNSSolver.cpp index 43c5d160a400..dcaa0655424b 100644 --- a/SU2_CFD/src/solvers/CIncNSSolver.cpp +++ b/SU2_CFD/src/solvers/CIncNSSolver.cpp @@ -107,12 +107,9 @@ void CIncNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container /*--- Compute the TauWall from the wall functions ---*/ if (wall_functions) { - SU2_OMP_MASTER - SetTau_Wall_WF(geometry, solver_container, config); - END_SU2_OMP_MASTER + SU2_OMP_SAFE_GLOBAL_ACCESS(SetTau_Wall_WF(geometry, solver_container, config);) // nijso: we have to set this as well?? // seteddyviscfirstpoint - SU2_OMP_BARRIER } /*--- Compute recovered pressure and temperature for streamwise periodic flow ---*/ @@ -273,10 +270,7 @@ void CIncNSSolver::Compute_Streamwise_Periodic_Recovered_Values(CConfig *config, END_SU2_OMP_FOR /*--- Compute the integrated Heatflux Q into the domain, and massflow over periodic markers ---*/ - SU2_OMP_MASTER - GetStreamwise_Periodic_Properties(geometry, config, iMesh); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(GetStreamwise_Periodic_Properties(geometry, config, iMesh);) } void CIncNSSolver::Viscous_Residual(unsigned long iEdge, CGeometry *geometry, CSolver **solver_container, @@ -850,8 +844,7 @@ void CIncNSSolver::SetTau_Wall_WF(CGeometry *geometry, CSolver **solver_containe SU2_OMP_ATOMIC globalCounter2 += smallYPlusCounter; - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { SU2_MPI::Allreduce(&globalCounter1, ¬ConvergedCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&globalCounter2, &smallYPlusCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); @@ -865,7 +858,7 @@ void CIncNSSolver::SetTau_Wall_WF(CGeometry *geometry, CSolver **solver_containe << " points, for which the wall model is not active." << endl; } } - END_SU2_OMP_MASTER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } } diff --git a/SU2_CFD/src/solvers/CMeshSolver.cpp b/SU2_CFD/src/solvers/CMeshSolver.cpp index b232c8fc2ac2..de14a6d5080a 100644 --- a/SU2_CFD/src/solvers/CMeshSolver.cpp +++ b/SU2_CFD/src/solvers/CMeshSolver.cpp @@ -173,12 +173,11 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd const bool wasActive = AD::BeginPassive(); /*--- Initialize shared reduction variables. ---*/ - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { MaxVolume = -1E22; MinVolume = 1E22; ElemCounter = 0; } - END_SU2_OMP_MASTER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Local min/max, final reduction outside loop. ---*/ su2double maxVol = -1E22, minVol = 1E22; @@ -238,17 +237,15 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd ElemCounter += elCount; } END_SU2_OMP_CRITICAL - SU2_OMP_BARRIER - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { elCount = ElemCounter; maxVol = MaxVolume; minVol = MinVolume; SU2_MPI::Allreduce(&elCount, &ElemCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&maxVol, &MaxVolume, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&minVol, &MinVolume, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Volume from 0 to 1 ---*/ @@ -266,7 +263,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd END_SU2_OMP_FOR /*--- Store the maximum and minimum volume. ---*/ - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { if (updated) { MaxVolume_Curr = MaxVolume; MinVolume_Curr = MinVolume; @@ -280,8 +277,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd cout <<"There are " << ElemCounter << " elements with negative volume.\n" << endl; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS AD::EndPassive(wasActive); @@ -375,17 +371,15 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { MinDistance = min(MinDistance, MinDistance_Local); } END_SU2_OMP_CRITICAL - SU2_OMP_BARRIER - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { MaxDistance_Local = MaxDistance; MinDistance_Local = MinDistance; SU2_MPI::Allreduce(&MaxDistance_Local, &MaxDistance, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&MinDistance_Local, &MinDistance, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } /*--- Normalize distance from 0 to 1 ---*/ diff --git a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp index 7b45c9479561..7ac8810baca9 100644 --- a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp @@ -255,9 +255,9 @@ void CNEMOEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver ompMasterAssignBarrier(ErrorCounter,0); SU2_OMP_ATOMIC ErrorCounter += SetPrimitive_Variables(solver_container, config, Output); - SU2_OMP_BARRIER - SU2_OMP_MASTER { /*--- Ops that are not OpenMP parallel go in this block. ---*/ + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS + { /*--- Ops that are not OpenMP parallel go in this block. ---*/ if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) { unsigned long tmp = ErrorCounter; @@ -268,8 +268,7 @@ void CNEMOEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver cout << "Warning. The initial solution contains "<< ErrorCounter << " points that are not physical." << endl; } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- Artificial dissipation ---*/ @@ -634,17 +633,15 @@ void CNEMOEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_con /*--- Add counter results for all threads. ---*/ SU2_OMP_ATOMIC ErrorCounter += counter_local; - SU2_OMP_BARRIER /*--- Add counter results for all ranks. ---*/ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { counter_local = ErrorCounter; SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm()); config->SetNonphysical_Reconstr(ErrorCounter); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } } diff --git a/SU2_CFD/src/solvers/CNSSolver.cpp b/SU2_CFD/src/solvers/CNSSolver.cpp index 2b4ceba51623..09bf5e884b9d 100644 --- a/SU2_CFD/src/solvers/CNSSolver.cpp +++ b/SU2_CFD/src/solvers/CNSSolver.cpp @@ -955,7 +955,11 @@ void CNSSolver::SetTau_Wall_WF(CGeometry *geometry, CSolver **solver_container, nodes->SetTemperature(iPoint,T_Wall); } else { - cout << "Warning: T_Wall < 0 " << endl; + SU2_OMP_CRITICAL + { + cout << "Warning: T_Wall < 0 " << endl; + } + END_SU2_OMP_CRITICAL } } @@ -1033,8 +1037,7 @@ void CNSSolver::SetTau_Wall_WF(CGeometry *geometry, CSolver **solver_container, SU2_OMP_ATOMIC globalCounter2 += smallYPlusCounter; - SU2_OMP_BARRIER - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { SU2_MPI::Allreduce(&globalCounter1, ¬ConvergedCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&globalCounter2, &smallYPlusCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); @@ -1048,7 +1051,7 @@ void CNSSolver::SetTau_Wall_WF(CGeometry *geometry, CSolver **solver_container, << " points, for which the wall model is not active." << endl; } } - END_SU2_OMP_MASTER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } } diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp index f0a2742270a6..934b30482388 100644 --- a/SU2_CFD/src/solvers/CSolver.cpp +++ b/SU2_CFD/src/solvers/CSolver.cpp @@ -1055,12 +1055,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry, #ifdef HAVE_MPI /*--- Once we have recv'd a message, get the source rank. ---*/ int ind; - SU2_OMP_MASTER - SU2_MPI::Waitany(geometry->nPeriodicRecv, - geometry->req_PeriodicRecv, - &ind, &status); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitany(geometry->nPeriodicRecv, geometry->req_PeriodicRecv, &ind, &status);) source = status.MPI_SOURCE; #else /*--- For serial calculations, we know the rank. ---*/ @@ -1314,13 +1309,8 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry, data in the loop above at this point. ---*/ #ifdef HAVE_MPI - SU2_OMP_MASTER - SU2_MPI::Waitall(geometry->nPeriodicSend, - geometry->req_PeriodicSend, - MPI_STATUS_IGNORE); - END_SU2_OMP_MASTER + SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitall(geometry->nPeriodicSend, geometry->req_PeriodicSend, MPI_STATUS_IGNORE);) #endif - SU2_OMP_BARRIER } delete [] Diff; @@ -1596,10 +1586,7 @@ void CSolver::CompleteComms(CGeometry *geometry, /*--- For efficiency, recv the messages dynamically based on the order they arrive. ---*/ - SU2_OMP_MASTER - SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status);) /*--- Once we have recv'd a message, get the source rank. ---*/ @@ -1704,11 +1691,8 @@ void CSolver::CompleteComms(CGeometry *geometry, data in the loop above at this point. ---*/ #ifdef HAVE_MPI - SU2_OMP_MASTER - SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE); - END_SU2_OMP_MASTER + SU2_OMP_SAFE_GLOBAL_ACCESS(SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE);) #endif - SU2_OMP_BARRIER } } @@ -1773,7 +1757,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, over time so that we do not get stuck in limit cycles, this is done on the fine grid and applied to all others. */ - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /* Only the master thread updates the shared variables. */ /* Check if we should decrease or if we can increase, the 20% is to avoid flip-flopping. */ @@ -1832,9 +1816,8 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, } } } - } /* End SU2_OMP_MASTER, now all threads update the CFL number. */ - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + } /* End safe global access, now all threads update the CFL number. */ + END_SU2_OMP_SAFE_GLOBAL_ACCESS /* Loop over all points on this grid and apply CFL adaption. */ @@ -1927,9 +1910,8 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, Avg_CFL_Local += myCFLSum; } END_SU2_OMP_CRITICAL - SU2_OMP_BARRIER - SU2_OMP_MASTER + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /* MPI reduction. */ myCFLMin = Min_CFL_Local; myCFLMax = Max_CFL_Local; myCFLSum = Avg_CFL_Local; SU2_MPI::Allreduce(&myCFLMin, &Min_CFL_Local, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); @@ -1937,8 +1919,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, SU2_MPI::Allreduce(&myCFLSum, &Avg_CFL_Local, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); Avg_CFL_Local /= su2double(geometry[iMesh]->GetGlobal_nPointDomain()); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } } @@ -1949,7 +1930,7 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) if (geometry->GetMGLevel() != MESH_0) return; - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Set the L2 Norm residual in all the processors. ---*/ @@ -2003,15 +1984,14 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) { if (geometry->GetMGLevel() != MESH_0) return; - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Set the L2 Norm residual in all the processors. ---*/ @@ -2046,8 +2026,7 @@ void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) { @@ -2593,7 +2572,7 @@ void CSolver::SolveTypicalSectionWingModel(CGeometry *geometry, su2double Cl, su void CSolver::Restart_OldGeometry(CGeometry *geometry, CConfig *config) { - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- This function is intended for dual time simulations ---*/ @@ -2741,8 +2720,7 @@ void CSolver::Restart_OldGeometry(CGeometry *geometry, CConfig *config) { } } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- It's necessary to communicate this information ---*/ diff --git a/SU2_CFD/src/solvers/CSpeciesSolver.cpp b/SU2_CFD/src/solvers/CSpeciesSolver.cpp index ba1a298fb1fc..07e4fe49cb92 100644 --- a/SU2_CFD/src/solvers/CSpeciesSolver.cpp +++ b/SU2_CFD/src/solvers/CSpeciesSolver.cpp @@ -175,7 +175,7 @@ void CSpeciesSolver::LoadRestart(CGeometry** geometry, CSolver*** solver, CConfi const string restart_filename = config->GetFilename(config->GetSolution_FileName(), "", val_iter); /*--- To make this routine safe to call in parallel most of it can only be executed by one thread. ---*/ - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/ if (config->GetRead_Binary_Restart()) { @@ -230,9 +230,8 @@ void CSpeciesSolver::LoadRestart(CGeometry** geometry, CSolver*** solver, CConfi CURRENT_FUNCTION); } - } // end SU2_OMP_MASTER, pre and postprocessing are thread-safe. - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + } // end safe global access, pre and postprocessing are thread-safe. + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- MPI solution and compute the eddy viscosity ---*/ @@ -286,7 +285,7 @@ void CSpeciesSolver::LoadRestart(CGeometry** geometry, CSolver*** solver, CConfi } /*--- Go back to single threaded execution. ---*/ - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Delete the class memory that is used to load the restart. ---*/ delete[] Restart_Vars; @@ -294,8 +293,7 @@ void CSpeciesSolver::LoadRestart(CGeometry** geometry, CSolver*** solver, CConfi delete[] Restart_Data; Restart_Data = nullptr; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CSpeciesSolver::Preprocessing(CGeometry* geometry, CSolver** solver_container, CConfig* config, diff --git a/SU2_CFD/src/solvers/CTurbSASolver.cpp b/SU2_CFD/src/solvers/CTurbSASolver.cpp index 42c8f32bed49..f48341574a0a 100644 --- a/SU2_CFD/src/solvers/CTurbSASolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSASolver.cpp @@ -181,7 +181,7 @@ CTurbSASolver::CTurbSASolver(CGeometry *geometry, CConfig *config, unsigned shor void CTurbSASolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) { - config->SetGlobalParam(config->GetKind_Solver(), RunTime_EqSystem); + SU2_OMP_SAFE_GLOBAL_ACCESS(config->SetGlobalParam(config->GetKind_Solver(), RunTime_EqSystem);) const auto kind_hybridRANSLES = config->GetKind_HybridRANSLES(); @@ -388,10 +388,7 @@ void CTurbSASolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_conta /*--- Evaluate nu tilde at the closest point to the surface using the wall functions. ---*/ if (config->GetWall_Functions()) { - SU2_OMP_MASTER - SetTurbVars_WF(geometry, solver_container, config, val_marker); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(SetTurbVars_WF(geometry, solver_container, config, val_marker);) return; } diff --git a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp index 6650234da0f9..d273f5c22578 100644 --- a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp @@ -193,7 +193,7 @@ CTurbSSTSolver::CTurbSSTSolver(CGeometry *geometry, CConfig *config, unsigned sh void CTurbSSTSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) { - config->SetGlobalParam(config->GetKind_Solver(), RunTime_EqSystem); + SU2_OMP_SAFE_GLOBAL_ACCESS(config->SetGlobalParam(config->GetKind_Solver(), RunTime_EqSystem);) /*--- Upwind second order reconstruction and gradients ---*/ CommonPreprocessing(geometry, config, Output); @@ -360,10 +360,7 @@ void CTurbSSTSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_cont /*--- Evaluate nu tilde at the closest point to the surface using the wall functions. ---*/ if (config->GetWall_Functions()) { - SU2_OMP_MASTER - SetTurbVars_WF(geometry, solver_container, config, val_marker); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + SU2_OMP_SAFE_GLOBAL_ACCESS(SetTurbVars_WF(geometry, solver_container, config, val_marker);) return; } diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp index 839f7c5db2ba..f3c6d2e3f9a2 100644 --- a/SU2_CFD/src/solvers/CTurbSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSolver.cpp @@ -216,7 +216,7 @@ void CTurbSolver::LoadRestart(CGeometry** geometry, CSolver*** solver, CConfig* const string restart_filename = config->GetFilename(config->GetSolution_FileName(), "", val_iter); /*--- To make this routine safe to call in parallel most of it can only be executed by one thread. ---*/ - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/ if (config->GetRead_Binary_Restart()) { @@ -270,9 +270,8 @@ void CTurbSolver::LoadRestart(CGeometry** geometry, CSolver*** solver, CConfig* CURRENT_FUNCTION); } - } // end SU2_OMP_MASTER, pre and postprocessing are thread-safe. - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + } // end safe global access, pre and postprocessing are thread-safe. + END_SU2_OMP_SAFE_GLOBAL_ACCESS /*--- MPI solution and compute the eddy viscosity ---*/ @@ -319,7 +318,7 @@ void CTurbSolver::LoadRestart(CGeometry** geometry, CSolver*** solver, CConfig* } /*--- Go back to single threaded execution. ---*/ - SU2_OMP_MASTER { + BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { /*--- Delete the class memory that is used to load the restart. ---*/ delete[] Restart_Vars; @@ -327,8 +326,7 @@ void CTurbSolver::LoadRestart(CGeometry** geometry, CSolver*** solver, CConfig* delete[] Restart_Data; Restart_Data = nullptr; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_SAFE_GLOBAL_ACCESS } void CTurbSolver::Impose_Fixed_Values(const CGeometry *geometry, const CConfig *config){ diff --git a/TestCases/TestCase.py b/TestCases/TestCase.py index 5f1030855f93..83d14aa292c9 100644 --- a/TestCases/TestCase.py +++ b/TestCases/TestCase.py @@ -729,7 +729,7 @@ def adjust_iter(self): # Rewrite the file with a .autotest extension self.cfg_file = "%s.autotest"%self.cfg_file file_out = open(self.cfg_file,'w') - file_out.write('%% This file automatically generated by the regression script\n') + file_out.write('% This file automatically generated by the regression script\n') file_out.write('%% Number of iterations changed to %d\n'%(self.test_iter+1)) if (self.multizone or self.new_output) and self.unsteady: adjust_string = "TIME_ITER" @@ -759,7 +759,7 @@ def adjust_opt_iter(self): # Rewrite the file with a .autotest extension self.cfg_file = "%s.autotest"%self.cfg_file file_out = open(self.cfg_file,'w') - file_out.write('%% This file automatically generated by the regression script\n') + file_out.write('% This file automatically generated by the regression script\n') file_out.write('%% Number of optimizer iterations changed to %d\n'%(self.test_iter)) for line in lines: if not line.startswith("OPT_ITERATIONS"): @@ -783,7 +783,7 @@ def disable_restart(self): # Rewrite the file with a .autotest extension self.cfg_file = "%s.autotest"%self.cfg_file file_out = open(self.cfg_file,'w') - file_out.write('%% This file automatically generated by the regression script\n') + file_out.write('% This file automatically generated by the regression script\n') file_out.write('%% Number of optimizer iterations changed to %d\n'%(self.test_iter)) for line in lines: if not line.startswith("RESTART_SOL"): diff --git a/TestCases/hybrid_regression.py b/TestCases/hybrid_regression.py index 6f254e1183f9..89e89ba6822a 100644 --- a/TestCases/hybrid_regression.py +++ b/TestCases/hybrid_regression.py @@ -641,7 +641,7 @@ def main(): supersonic_vortex_shedding.cfg_dir = "sliding_interface/supersonic_vortex_shedding" supersonic_vortex_shedding.cfg_file = "sup_vor_shed_WA.cfg" supersonic_vortex_shedding.test_iter = 5 - supersonic_vortex_shedding.test_vals = [5.000000, 0.000000, 1.216554, 1.639119] + supersonic_vortex_shedding.test_vals = [5.000000, 0.000000, 1.214350, 1.663911] supersonic_vortex_shedding.unsteady = True supersonic_vortex_shedding.multizone = True test_list.append(supersonic_vortex_shedding) diff --git a/TestCases/incomp_navierstokes/streamwise_periodic/chtPinArray_2d/of_grad_findiff.csv.ref b/TestCases/incomp_navierstokes/streamwise_periodic/chtPinArray_2d/of_grad_findiff.csv.ref index 1ffd112a83e8..c8d39589b77a 100644 --- a/TestCases/incomp_navierstokes/streamwise_periodic/chtPinArray_2d/of_grad_findiff.csv.ref +++ b/TestCases/incomp_navierstokes/streamwise_periodic/chtPinArray_2d/of_grad_findiff.csv.ref @@ -1,2 +1,2 @@ "VARIABLE" , "AVG_DENSITY[0]", "AVG_ENTHALPY[0]", "AVG_NORMALVEL[0]", "DRAG[0]" , "EFFICIENCY[0]" , "FORCE_X[0]" , "FORCE_Y[0]" , "FORCE_Z[0]" , "LIFT[0]" , "MOMENT_X[0]" , "MOMENT_Y[0]" , "MOMENT_Z[0]" , "SIDEFORCE[0]" , "SURFACE_MACH[0]", "SURFACE_MASSFLOW[0]", "SURFACE_MOM_DISTORTION[0]", "SURFACE_PRESSURE_DROP[0]", "SURFACE_SECONDARY[0]", "SURFACE_SECOND_OVER_UNIFORM[0]", "SURFACE_STATIC_PRESSURE[0]", "SURFACE_STATIC_TEMPERATURE[0]", "SURFACE_TOTAL_PRESSURE[0]", "SURFACE_TOTAL_TEMPERATURE[0]", "SURFACE_UNIFORMITY[0]", "AVG_TEMPERATURE[1]", "MAXIMUM_HEATFLUX[1]", "TOTAL_HEATFLUX[1]", "FINDIFF_STEP" -0 , 0.0 , -100000.01639127731, -1.1102999999850092e-08, 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , -0.04999999997368221, -2.2209999996988307e-08, -2.0599999983605954 , 0.0 , 2.12000000054946 , 3.709999998879887 , 330.00000030369847 , -39.999997625272954 , 315.0000011942211 , -39.999997625272954 , -1.400000004814217 , -129.99999512430804, 0.0 , -509.99999530176865, 1e-08 +0 , 0.0 , -199999.9862164259, -2.2204999999731917e-08, 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , -0.04999999997368221, -2.2200000003768e-08, -2.0599999983605954 , 0.0 , 2.1199999991616814 , 3.6999999980524834 , 330.00000030369847 , -30.00000106112566 , 314.000000400938 , -30.00000106112566 , -1.3999999826097564 , -139.99999737279722, 0.0 , -509.99999530176865, 1e-08 diff --git a/TestCases/incomp_navierstokes/streamwise_periodic/chtPinArray_2d/of_grad_findiff_aarch64.csv.ref b/TestCases/incomp_navierstokes/streamwise_periodic/chtPinArray_2d/of_grad_findiff_aarch64.csv.ref index 9730785b8993..c8d39589b77a 100644 --- a/TestCases/incomp_navierstokes/streamwise_periodic/chtPinArray_2d/of_grad_findiff_aarch64.csv.ref +++ b/TestCases/incomp_navierstokes/streamwise_periodic/chtPinArray_2d/of_grad_findiff_aarch64.csv.ref @@ -1,2 +1,2 @@ "VARIABLE" , "AVG_DENSITY[0]", "AVG_ENTHALPY[0]", "AVG_NORMALVEL[0]", "DRAG[0]" , "EFFICIENCY[0]" , "FORCE_X[0]" , "FORCE_Y[0]" , "FORCE_Z[0]" , "LIFT[0]" , "MOMENT_X[0]" , "MOMENT_Y[0]" , "MOMENT_Z[0]" , "SIDEFORCE[0]" , "SURFACE_MACH[0]", "SURFACE_MASSFLOW[0]", "SURFACE_MOM_DISTORTION[0]", "SURFACE_PRESSURE_DROP[0]", "SURFACE_SECONDARY[0]", "SURFACE_SECOND_OVER_UNIFORM[0]", "SURFACE_STATIC_PRESSURE[0]", "SURFACE_STATIC_TEMPERATURE[0]", "SURFACE_TOTAL_PRESSURE[0]", "SURFACE_TOTAL_TEMPERATURE[0]", "SURFACE_UNIFORMITY[0]", "AVG_TEMPERATURE[1]", "MAXIMUM_HEATFLUX[1]", "TOTAL_HEATFLUX[1]", "FINDIFF_STEP" -0 , 0.0 , -100000.01639127731, 4.440899999949557e-08, 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , -0.04999999997368221, -4.4410000000756306e-08, -2.0599999983605954 , 0.0 , 2.12000000054946 , 3.709999998879887 , 330.00000030369847 , -39.999997625272954 , 315.0000011942211 , -39.999997625272954 , -1.400000004814217 , -129.99999512430804, 0.0 , -509.99999530176865, 1e-08 +0 , 0.0 , -199999.9862164259, -2.2204999999731917e-08, 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , -0.04999999997368221, -2.2200000003768e-08, -2.0599999983605954 , 0.0 , 2.1199999991616814 , 3.6999999980524834 , 330.00000030369847 , -30.00000106112566 , 314.000000400938 , -30.00000106112566 , -1.3999999826097564 , -139.99999737279722, 0.0 , -509.99999530176865, 1e-08 diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py index a5e6629ef7a5..0021fad82056 100644 --- a/TestCases/parallel_regression.py +++ b/TestCases/parallel_regression.py @@ -1277,7 +1277,7 @@ def main(): supersonic_vortex_shedding.cfg_dir = "sliding_interface/supersonic_vortex_shedding" supersonic_vortex_shedding.cfg_file = "sup_vor_shed_WA.cfg" supersonic_vortex_shedding.test_iter = 5 - supersonic_vortex_shedding.test_vals = [5.000000, 0.000000, 1.227386, 1.638722] #last 4 columns + supersonic_vortex_shedding.test_vals = [5.000000, 0.000000, 1.214356, 1.663914] #last 4 columns supersonic_vortex_shedding.su2_exec = "parallel_computation.py -f" supersonic_vortex_shedding.timeout = 1600 supersonic_vortex_shedding.tol = 0.00001 @@ -1465,7 +1465,7 @@ def main(): sp_pinArray_cht_2d_dp_hf.cfg_dir = "incomp_navierstokes/streamwise_periodic/chtPinArray_2d" sp_pinArray_cht_2d_dp_hf.cfg_file = "configMaster.cfg" sp_pinArray_cht_2d_dp_hf.test_iter = 100 - sp_pinArray_cht_2d_dp_hf.test_vals = [0.246959, -0.811849, -0.962120, -0.753320, 208.023676, 349.990000, -8.9660e-10, -7.5332e-01, 7.5332e-01] + sp_pinArray_cht_2d_dp_hf.test_vals = [0.246951, -0.811811, -0.962123, -0.753322, 208.023676, 350.000000, -0.000000, -0.753320, 0.753320] sp_pinArray_cht_2d_dp_hf.su2_exec = "mpirun -n 2 SU2_CFD" sp_pinArray_cht_2d_dp_hf.timeout = 1600 sp_pinArray_cht_2d_dp_hf.tol = 0.00001 diff --git a/TestCases/serial_regression.py b/TestCases/serial_regression.py index 89294a7dc345..89674ee3d17e 100644 --- a/TestCases/serial_regression.py +++ b/TestCases/serial_regression.py @@ -1290,7 +1290,7 @@ def main(): supersonic_vortex_shedding.cfg_dir = "sliding_interface/supersonic_vortex_shedding" supersonic_vortex_shedding.cfg_file = "sup_vor_shed_WA.cfg" supersonic_vortex_shedding.test_iter = 5 - supersonic_vortex_shedding.test_vals = [5.000000, 0.000000, 1.227921, 1.638901] #last 4 columns + supersonic_vortex_shedding.test_vals = [5.000000, 0.000000, 1.214364, 1.663910] #last 4 columns supersonic_vortex_shedding.su2_exec = "SU2_CFD" supersonic_vortex_shedding.timeout = 1600 supersonic_vortex_shedding.tol = 0.00001 diff --git a/externals/codi b/externals/codi index 3c3211fef2e2..b576224d5ea6 160000 --- a/externals/codi +++ b/externals/codi @@ -1 +1 @@ -Subproject commit 3c3211fef2e225ab89680a4063b62bb3bb38a7e4 +Subproject commit b576224d5ea686fb8385b7ba6b5df0743d2be8aa diff --git a/externals/opdi b/externals/opdi index 6fb2691b8e4a..1aabf5bd1ed7 160000 --- a/externals/opdi +++ b/externals/opdi @@ -1 +1 @@ -Subproject commit 6fb2691b8e4a8f00f47d2a27740fa890df0b5405 +Subproject commit 1aabf5bd1ed77611742eb655002f2ac7a3dddef9 diff --git a/meson_scripts/init.py b/meson_scripts/init.py index 6250e4a59d84..fef4e1c51171 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -44,11 +44,11 @@ def init_submodules(method = 'auto'): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = '3c3211fef2e225ab89680a4063b62bb3bb38a7e4' + sha_version_codi = 'b576224d5ea686fb8385b7ba6b5df0743d2be8aa' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' - sha_version_opdi = '6fb2691b8e4a8f00f47d2a27740fa890df0b5405' + sha_version_opdi = '1aabf5bd1ed77611742eb655002f2ac7a3dddef9' github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib' sha_version_meson = '41c650a040d50e0912d268af7a903a9ce1456dfa' github_repo_meson = 'https://github.com/mesonbuild/meson' diff --git a/su2omp.syntax.json b/su2omp.syntax.json index e646afc08470..fe49599ee824 100644 --- a/su2omp.syntax.json +++ b/su2omp.syntax.json @@ -37,6 +37,7 @@ "SU2_OMP_FOR_DYN": "END_SU2_OMP_FOR", "SU2_OMP_FOR_STAT": "END_SU2_OMP_FOR", "CSYSVEC_PARFOR": "END_CSYSVEC_PARFOR", - "CNEWTON_PARFOR": "END_CNEWTON_PARFOR" + "CNEWTON_PARFOR": "END_CNEWTON_PARFOR", + "BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS": "END_SU2_OMP_SAFE_GLOBAL_ACCESS" } }