diff --git a/Common/include/fem_geometry_structure.hpp b/Common/include/fem_geometry_structure.hpp index 221d04a66b7e..5fbde9b9266b 100644 --- a/Common/include/fem_geometry_structure.hpp +++ b/Common/include/fem_geometry_structure.hpp @@ -1038,18 +1038,6 @@ class CMeshFEM_DG: public CMeshFEM { */ void SetSendReceive(CConfig *config); - /*! - * \brief Set the value of the total number of points globally in the simulation. - * \param[in] val_global_npoint - Global number of points in the mesh (excluding halos). - */ - void SetGlobal_nPointDomain(unsigned long val_global_npoint); - - /*! - * \brief Retrieve total number of nodes in a simulation across all processors (excluding halos). - * \return Total number of nodes in a simulation across all processors (excluding halos). - */ - unsigned long GetGlobal_nPointDomain() const override; - /*! * \brief Set the local index that correspond with the global numbering index. */ @@ -1063,8 +1051,7 @@ class CMeshFEM_DG: public CMeshFEM { long GetGlobal_to_Local_Point(unsigned long val_ipoint) const override; /*! - * \brief Function, which carries out the preprocessing tasks - when wall functions are used. + * \brief Function, which carries out the preprocessing tasks when wall functions are used. * \param[in] config - Definition of the particular problem. */ void WallFunctionPreprocessing(CConfig *config); diff --git a/Common/include/fem_geometry_structure.inl b/Common/include/fem_geometry_structure.inl index b00d06e66bd9..f3daa02859f7 100644 --- a/Common/include/fem_geometry_structure.inl +++ b/Common/include/fem_geometry_structure.inl @@ -144,10 +144,6 @@ inline CMeshFEM_DG::CMeshFEM_DG(void) : CMeshFEM() { } inline CMeshFEM_DG::~CMeshFEM_DG(void) { } -inline void CMeshFEM_DG::SetGlobal_nPointDomain(unsigned long val_global_npoint) { Global_nPointDomain = val_global_npoint; } - -inline unsigned long CMeshFEM_DG::GetGlobal_nPointDomain(void) const { return Global_nPointDomain; } - inline void CMeshFEM_DG::SetGlobal_to_Local_Point(void) { Global_to_Local_Point.clear(); unsigned long ii = 0; diff --git a/Common/include/geometry/CGeometry.hpp b/Common/include/geometry/CGeometry.hpp index 0131d2e6fbac..147ecd6fb490 100644 --- a/Common/include/geometry/CGeometry.hpp +++ b/Common/include/geometry/CGeometry.hpp @@ -53,6 +53,7 @@ extern "C" { #include "../dual_grid_structure.hpp" #include "../config_structure.hpp" #include "../geometry_structure_fem_part.hpp" +#include "../toolboxes/graph_toolbox.hpp" using namespace std; @@ -99,30 +100,42 @@ class CGeometry { nelem_quad_bound, /*!< \brief Number of quads on the mesh boundaries. */ Global_nelem_quad_bound; /*!< \brief Total number of quads on the mesh boundaries across all processors. */ - unsigned short nDim, /*!< \brief Number of dimension of the problem. */ - nZone, /*!< \brief Number of zones in the problem. */ - nMarker; /*!< \brief Number of different markers of the mesh. */ + unsigned short nDim; /*!< \brief Number of dimension of the problem. */ + unsigned short nZone; /*!< \brief Number of zones in the problem. */ + unsigned short nMarker; /*!< \brief Number of different markers of the mesh. */ + unsigned short nCommLevel; /*!< \brief Number of non-blocking communication levels. */ - unsigned short MGLevel; /*!< \brief The mesh level index for the current geometry container. */ - unsigned long Max_GlobalPoint; /*!< \brief Greater global point in the domain local structure. */ + unsigned short MGLevel; /*!< \brief The mesh level index for the current geometry container. */ + unsigned long Max_GlobalPoint; /*!< \brief Greater global point in the domain local structure. */ - /* --- Custom boundary variables --- */ + /*--- Boundary information. ---*/ + + short *Marker_All_SendRecv; /*!< \brief MPI Marker. */ su2double **CustomBoundaryTemperature; su2double **CustomBoundaryHeatFlux; -public: - unsigned long *nElem_Bound; /*!< \brief Number of elements of the boundary. */ - string *Tag_to_Marker; /*!< \brief If you know the index of the boundary (depend of the grid definition), - it gives you the maker (where the boundary is stored from 0 to boundaries). */ - CPrimalGrid** elem; /*!< \brief Element vector (primal grid information). */ - CPrimalGrid** face; /*!< \brief Face vector (primal grid information). */ - CPrimalGrid*** bound; /*!< \brief Boundary vector (primal grid information). */ - CPoint** node; /*!< \brief Node vector (dual grid information). */ - CEdge** edge; /*!< \brief Edge vector (dual grid information). */ - CVertex*** vertex; /*!< \brief Boundary Vertex vector (dual grid information). */ - CTurboVertex**** turbovertex; /*!< \brief Boundary Vertex vector ordered for turbomachinery calculation(dual grid information). */ - unsigned long *nVertex; /*!< \brief Number of vertex for each marker. */ - vector bound_is_straight; /*!< \brief Bool if boundary-marker is straight(2D)/plane(3D) for each local marker. */ + /*--- Create vectors and distribute the values among the different planes queues ---*/ + + vector > Xcoord_plane; /*!< \brief Vector containing x coordinates of new points appearing on a single plane */ + vector > Ycoord_plane; /*!< \brief Vector containing y coordinates of new points appearing on a single plane */ + vector > Zcoord_plane; /*!< \brief Vector containing z coordinates of new points appearing on a single plane */ + vector > FaceArea_plane; /*!< \brief Vector containing area/volume associated with new points appearing on a single plane */ + vector > Plane_points; /*!< \brief Vector containing points appearing on a single plane */ + + vector XCoordList; /*!< \brief Vector containing points appearing on a single plane */ + CPrimalGrid*** newBound; /*!< \brief Boundary vector for new periodic elements (primal grid information). */ + unsigned long *nNewElem_Bound; /*!< \brief Number of new periodic elements of the boundary. */ + +#ifdef HAVE_MPI +#ifdef HAVE_PARMETIS + vector > adj_nodes; /*!< \brief Vector of vectors holding each node's adjacency during preparation for ParMETIS. */ + idx_t *adjacency; /*!< \brief Local adjacency array to be input into ParMETIS for partitioning (idx_t is a ParMETIS type defined in their headers). */ + idx_t *xadj; /*!< \brief Index array that points to the start of each node's adjacency in CSR format (needed to interpret the adjacency array). */ +#endif +#endif + + /*--- Turbomachinery variables ---*/ + unsigned short *nSpanWiseSections; /*!< \brief Number of Span wise section for each turbo marker, indexed by inflow/outflow */ unsigned short *nSpanSectionsByMarker; /*!< \brief Number of Span wise section for each turbo marker, indexed by marker. Needed for deallocation.*/ unsigned short nTurboPerf; /*!< \brief Number of Span wise section for each turbo marker. */ @@ -139,89 +152,102 @@ class CGeometry { su2double **MinAngularCoord; /*!< \brief Max angular pitch at each span wise section for each marker.*/ su2double **MinRelAngularCoord; /*!< \brief Min relative angular coord at each span wise section for each marker.*/ su2double **TurboRadius; /*!< \brief Radius at each span wise section for each marker.*/ - su2double **TangGridVelIn, - **TangGridVelOut; /*!< \brief Average tangential rotational speed at each span wise section for each turbomachinery marker.*/ - su2double **SpanAreaIn, - **SpanAreaOut; /*!< \brief Area at each span wise section for each turbomachinery marker.*/ - su2double **TurboRadiusIn, - **TurboRadiusOut; /*!< \brief Radius at each span wise section for each turbomachinery marker*/ + su2double **TangGridVelIn; + su2double **TangGridVelOut; /*!< \brief Average tangential rotational speed at each span wise section for each turbomachinery marker.*/ + su2double **SpanAreaIn; + su2double **SpanAreaOut; /*!< \brief Area at each span wise section for each turbomachinery marker.*/ + su2double **TurboRadiusIn; + su2double **TurboRadiusOut; /*!< \brief Radius at each span wise section for each turbomachinery marker*/ - unsigned short nCommLevel; /*!< \brief Number of non-blocking communication levels. */ + /*--- Sparsity patterns associated with the geometry. ---*/ - short *Marker_All_SendRecv; /*!< \brief MPI Marker. */ + CCompressedSparsePatternUL + finiteVolumeCSRFill0, /*!< \brief 0-fill FVM sparsity. */ + finiteVolumeCSRFillN, /*!< \brief N-fill FVM sparsity (e.g. for ILUn preconditioner). */ + finiteElementCSRFill0, /*!< \brief 0-fill FEM sparsity. */ + finiteElementCSRFillN; /*!< \brief N-fill FEM sparsity (e.g. for ILUn preconditioner). */ - /*--- Create vectors and distribute the values among the different planes queues ---*/ - vector > Xcoord_plane; /*!< \brief Vector containing x coordinates of new points appearing on a single plane */ - vector > Ycoord_plane; /*!< \brief Vector containing y coordinates of new points appearing on a single plane */ - vector > Zcoord_plane; /*!< \brief Vector containing z coordinates of new points appearing on a single plane */ - vector > FaceArea_plane; /*!< \brief Vector containing area/volume associated with new points appearing on a single plane */ - vector > Plane_points; /*!< \brief Vector containing points appearing on a single plane */ + CEdgeToNonZeroMapUL edgeToCSRMap; /*!< \brief Map edges to CSR entries referenced by them (i,j) and (j,i). */ - vector XCoordList; /*!< \brief Vector containing points appearing on a single plane */ - CPrimalGrid*** newBound; /*!< \brief Boundary vector for new periodic elements (primal grid information). */ - unsigned long *nNewElem_Bound; /*!< \brief Number of new periodic elements of the boundary. */ + /*--- Edge and element colorings. ---*/ - /*--- Partitioning-specific variables ---*/ + CCompressedSparsePatternUL + edgeColoring, /*!< \brief Edge coloring structure for thread-based parallelization. */ + elemColoring; /*!< \brief Element coloring structure for thread-based parallelization. */ + unsigned long edgeColorGroupSize = 1; /*!< \brief Size of the edge groups within each color. */ + unsigned long elemColorGroupSize = 1; /*!< \brief Size of the element groups within each color. */ - map Global_to_Local_Elem; /*!< \brief Mapping of global to local index for elements. */ - unsigned long *beg_node; /*!< \brief Array containing the first node on each rank due to a linear partitioning by global index. */ - unsigned long *end_node; /*!< \brief Array containing the last node on each rank due to a linear partitioning by global index. */ - unsigned long *nPointLinear; /*!< \brief Array containing the total number of nodes on each rank due to a linear partioning by global index. */ - unsigned long *nPointCumulative; /*!< \brief Cumulative storage array containing the total number of points on all prior ranks in the linear partitioning. */ +public: + /*--- Main geometric elements of the grid. ---*/ -#ifdef HAVE_MPI -#ifdef HAVE_PARMETIS - vector< vector > adj_nodes; /*!< \brief Vector of vectors holding each node's adjacency during preparation for ParMETIS. */ - idx_t *adjacency; /*!< \brief Local adjacency array to be input into ParMETIS for partitioning (idx_t is a ParMETIS type defined in their headers). */ - idx_t *xadj; /*!< \brief Index array that points to the start of each node's adjacency in CSR format (needed to interpret the adjacency array). */ -#endif -#endif + CPrimalGrid** elem; /*!< \brief Element vector (primal grid information). */ + CPrimalGrid** face; /*!< \brief Face vector (primal grid information). */ + CPrimalGrid*** bound; /*!< \brief Boundary vector (primal grid information). */ + CPoint** node; /*!< \brief Node vector (dual grid information). */ + CEdge** edge; /*!< \brief Edge vector (dual grid information). */ + CVertex*** vertex; /*!< \brief Boundary Vertex vector (dual grid information). */ + CTurboVertex**** turbovertex; /*!< \brief Boundary Vertex vector ordered for turbomachinery calculation(dual grid information). */ + unsigned long *nVertex; /*!< \brief Number of vertex for each marker. */ + unsigned long *nElem_Bound; /*!< \brief Number of elements of the boundary. */ + string *Tag_to_Marker; /*!< \brief Names of boundary markers. */ + vector bound_is_straight; /*!< \brief Bool if boundary-marker is straight(2D)/plane(3D) for each local marker. */ + + /*--- Partitioning-specific variables ---*/ + + map Global_to_Local_Elem; /*!< \brief Mapping of global to local index for elements. */ + unsigned long *beg_node; /*!< \brief Array containing the first node on each rank due to a linear partitioning by global index. */ + unsigned long *end_node; /*!< \brief Array containing the last node on each rank due to a linear partitioning by global index. */ + unsigned long *nPointLinear; /*!< \brief Array containing the total number of nodes on each rank due to a linear partioning by global index. */ + unsigned long *nPointCumulative; /*!< \brief Cumulative storage array containing the total number of points on all prior ranks in the linear partitioning. */ /*--- Data structures for point-to-point MPI communications. ---*/ - int countPerPoint; /*!< \brief Maximum number of pieces of data sent per vertex in point-to-point comms. */ - int nP2PSend; /*!< \brief Number of sends during point-to-point comms. */ - int nP2PRecv; /*!< \brief Number of receives during point-to-point comms. */ - int *nPoint_P2PSend; /*!< \brief Data structure holding number of vertices for each send in point-to-point comms. */ - int *nPoint_P2PRecv; /*!< \brief Data structure holding number of vertices for each recv in point-to-point comms. */ - int *Neighbors_P2PSend; /*!< \brief Data structure holding the ranks of the neighbors for point-to-point send comms. */ - int *Neighbors_P2PRecv; /*!< \brief Data structure holding the ranks of the neighbors for point-to-point recv comms. */ - map P2PSend2Neighbor; /*!< \brief Data structure holding the reverse mapping of the ranks of the neighbors for point-to-point send comms. */ - map P2PRecv2Neighbor; /*!< \brief Data structure holding the reverse mapping of the ranks of the neighbors for point-to-point recv comms. */ - unsigned long *Local_Point_P2PSend; /*!< \brief Data structure holding the local index of all vertices to be sent in point-to-point comms. */ - unsigned long *Local_Point_P2PRecv; /*!< \brief Data structure holding the local index of all vertices to be received in point-to-point comms. */ - su2double *bufD_P2PRecv; /*!< \brief Data structure for su2double point-to-point receive. */ - su2double *bufD_P2PSend; /*!< \brief Data structure for su2double point-to-point send. */ - unsigned short *bufS_P2PRecv; /*!< \brief Data structure for unsigned long point-to-point receive. */ - unsigned short *bufS_P2PSend; /*!< \brief Data structure for unsigned long point-to-point send. */ - SU2_MPI::Request *req_P2PSend; /*!< \brief Data structure for point-to-point send requests. */ - SU2_MPI::Request *req_P2PRecv; /*!< \brief Data structure for point-to-point recv requests. */ + int countPerPoint; /*!< \brief Maximum number of pieces of data sent per vertex in point-to-point comms. */ + int nP2PSend; /*!< \brief Number of sends during point-to-point comms. */ + int nP2PRecv; /*!< \brief Number of receives during point-to-point comms. */ + int *nPoint_P2PSend; /*!< \brief Data structure holding number of vertices for each send in point-to-point comms. */ + int *nPoint_P2PRecv; /*!< \brief Data structure holding number of vertices for each recv in point-to-point comms. */ + int *Neighbors_P2PSend; /*!< \brief Data structure holding the ranks of the neighbors for point-to-point send comms. */ + int *Neighbors_P2PRecv; /*!< \brief Data structure holding the ranks of the neighbors for point-to-point recv comms. */ + map P2PSend2Neighbor; /*!< \brief Data structure holding the reverse mapping of the ranks of the neighbors for point-to-point send comms. */ + map P2PRecv2Neighbor; /*!< \brief Data structure holding the reverse mapping of the ranks of the neighbors for point-to-point recv comms. */ + unsigned long *Local_Point_P2PSend; /*!< \brief Data structure holding the local index of all vertices to be sent in point-to-point comms. */ + unsigned long *Local_Point_P2PRecv; /*!< \brief Data structure holding the local index of all vertices to be received in point-to-point comms. */ + su2double *bufD_P2PRecv; /*!< \brief Data structure for su2double point-to-point receive. */ + su2double *bufD_P2PSend; /*!< \brief Data structure for su2double point-to-point send. */ + unsigned short *bufS_P2PRecv; /*!< \brief Data structure for unsigned long point-to-point receive. */ + unsigned short *bufS_P2PSend; /*!< \brief Data structure for unsigned long point-to-point send. */ + SU2_MPI::Request *req_P2PSend; /*!< \brief Data structure for point-to-point send requests. */ + SU2_MPI::Request *req_P2PRecv; /*!< \brief Data structure for point-to-point recv requests. */ /*--- Data structures for periodic communications. ---*/ - int countPerPeriodicPoint; /*!< \brief Maximum number of pieces of data sent per vertex in periodic comms. */ - int nPeriodicSend; /*!< \brief Number of sends during periodic comms. */ - int nPeriodicRecv; /*!< \brief Number of receives during periodic comms. */ - int *nPoint_PeriodicSend; /*!< \brief Data structure holding number of vertices for each send in periodic comms. */ - int *nPoint_PeriodicRecv; /*!< \brief Data structure holding number of vertices for each recv in periodic comms. */ - int *Neighbors_PeriodicSend; /*!< \brief Data structure holding the ranks of the neighbors for periodic send comms. */ - int *Neighbors_PeriodicRecv; /*!< \brief Data structure holding the ranks of the neighbors for periodic recv comms. */ - map PeriodicSend2Neighbor; /*!< \brief Data structure holding the reverse mapping of the ranks of the neighbors for periodic send comms. */ - map PeriodicRecv2Neighbor; /*!< \brief Data structure holding the reverse mapping of the ranks of the neighbors for periodic recv comms. */ - unsigned long *Local_Point_PeriodicSend; /*!< \brief Data structure holding the local index of all vertices to be sent in periodic comms. */ - unsigned long *Local_Point_PeriodicRecv; /*!< \brief Data structure holding the local index of all vertices to be received in periodic comms. */ - unsigned long *Local_Marker_PeriodicSend; /*!< \brief Data structure holding the local index of the periodic marker for a particular vertex to be sent in periodic comms. */ - unsigned long *Local_Marker_PeriodicRecv; /*!< \brief Data structure holding the local index of the periodic marker for a particular vertex to be received in periodic comms. */ - su2double *bufD_PeriodicRecv; /*!< \brief Data structure for su2double periodic receive. */ - su2double *bufD_PeriodicSend; /*!< \brief Data structure for su2double periodic send. */ - unsigned short *bufS_PeriodicRecv; /*!< \brief Data structure for unsigned long periodic receive. */ - unsigned short *bufS_PeriodicSend; /*!< \brief Data structure for unsigned long periodic send. */ - SU2_MPI::Request *req_PeriodicSend; /*!< \brief Data structure for periodic send requests. */ - SU2_MPI::Request *req_PeriodicRecv; /*!< \brief Data structure for periodic recv requests. */ - - vector Orthogonality; /*!< \brief Measure of dual CV orthogonality angle (0 to 90 deg., 90 being best). */ - vector Aspect_Ratio; /*!< \brief Measure of dual CV aspect ratio (max face area / min face area). */ - vector Volume_Ratio; /*!< \brief Measure of dual CV volume ratio (max sub-element volume / min sub-element volume). */ + int countPerPeriodicPoint; /*!< \brief Maximum number of pieces of data sent per vertex in periodic comms. */ + int nPeriodicSend; /*!< \brief Number of sends during periodic comms. */ + int nPeriodicRecv; /*!< \brief Number of receives during periodic comms. */ + int *nPoint_PeriodicSend; /*!< \brief Data structure holding number of vertices for each send in periodic comms. */ + int *nPoint_PeriodicRecv; /*!< \brief Data structure holding number of vertices for each recv in periodic comms. */ + int *Neighbors_PeriodicSend; /*!< \brief Data structure holding the ranks of the neighbors for periodic send comms. */ + int *Neighbors_PeriodicRecv; /*!< \brief Data structure holding the ranks of the neighbors for periodic recv comms. */ + map PeriodicSend2Neighbor; /*!< \brief Data structure holding the reverse mapping of the ranks of the neighbors for periodic send comms. */ + map PeriodicRecv2Neighbor; /*!< \brief Data structure holding the reverse mapping of the ranks of the neighbors for periodic recv comms. */ + unsigned long + *Local_Point_PeriodicSend, /*!< \brief Data structure holding the local index of all vertices to be sent in periodic comms. */ + *Local_Point_PeriodicRecv, /*!< \brief Data structure holding the local index of all vertices to be received in periodic comms. */ + *Local_Marker_PeriodicSend, /*!< \brief Data structure holding the local index of the periodic marker for a particular vertex to be sent in periodic comms. */ + *Local_Marker_PeriodicRecv; /*!< \brief Data structure holding the local index of the periodic marker for a particular vertex to be received in periodic comms. */ + su2double *bufD_PeriodicRecv; /*!< \brief Data structure for su2double periodic receive. */ + su2double *bufD_PeriodicSend; /*!< \brief Data structure for su2double periodic send. */ + unsigned short *bufS_PeriodicRecv; /*!< \brief Data structure for unsigned long periodic receive. */ + unsigned short *bufS_PeriodicSend; /*!< \brief Data structure for unsigned long periodic send. */ + SU2_MPI::Request *req_PeriodicSend; /*!< \brief Data structure for periodic send requests. */ + SU2_MPI::Request *req_PeriodicRecv; /*!< \brief Data structure for periodic recv requests. */ + + /*--- Mesh quality metrics. ---*/ + + vector Orthogonality; /*!< \brief Measure of dual CV orthogonality angle (0 to 90 deg., 90 being best). */ + vector Aspect_Ratio; /*!< \brief Measure of dual CV aspect ratio (max face area / min face area). */ + vector Volume_Ratio; /*!< \brief Measure of dual CV volume ratio (max sub-element volume / min sub-element volume). */ /*! * \brief Constructor of the class. @@ -348,10 +374,16 @@ class CGeometry { inline unsigned long GetnPointDomain(void) const {return nPointDomain;} /*! - * \brief Get number of elements. - * \return Number of elements. + * \brief Retrieve total number of nodes in a simulation across all processors (including halos). + * \return Total number of nodes in a simulation across all processors (including halos). + */ + inline unsigned long GetGlobal_nPoint(void) const { return Global_nPoint; } + + /*! + * \brief Retrieve total number of nodes in a simulation across all processors (excluding halos). + * \return Total number of nodes in a simulation across all processors (excluding halos). */ - unsigned long GetnLine(void); + inline unsigned long GetGlobal_nPointDomain(void) const { return Global_nPointDomain; } /*! * \brief Get number of elements. @@ -505,6 +537,12 @@ class CGeometry { */ inline void SetnPointDomain(unsigned long val_npoint) { nPointDomain = val_npoint; } + /*! + * \brief Set the value of the total number of points globally in the simulation. + * \param[in] val_global_npoint - Global number of points in the mesh (excluding halos). + */ + void SetGlobal_nPointDomain(unsigned long val_global_npoint) { Global_nPointDomain = val_global_npoint; } + /*! * \brief Set the number of grid elements. * \param[in] val_nelem - Number of grid elements. @@ -1025,118 +1063,100 @@ class CGeometry { inline virtual unsigned short GetGlobal_to_Local_Marker(unsigned short val_imarker) const { return 0; } /*! - * \brief A virtual member. - * \return Total number of nodes in a simulation across all processors (including halos). - */ - inline virtual unsigned long GetGlobal_nPoint() const { return 0; } - - /*! - * \brief A virtual member. - * \return Total number of nodes in a simulation across all processors (excluding halos). - */ - inline virtual unsigned long GetGlobal_nPointDomain() const { return 0; } - - /*! - * \brief A virtual member. - * \param[in] val_global_npoint - Global number of points in the mesh (excluding halos). - */ - inline virtual void SetGlobal_nPointDomain(unsigned long val_global_npoint) {} - - /*! - * \brief A virtual member. + * \brief Retrieve total number of elements in a simulation across all processors. * \return Total number of elements in a simulation across all processors. */ - inline virtual unsigned long GetGlobal_nElem() const { return 0; } + inline unsigned long GetGlobal_nElem(void) const { return Global_nElem; } /*! - * \brief A virtual member. + * \brief Retrieve total number of elements in a simulation across all processors (excluding halos). * \return Total number of elements in a simulation across all processors (excluding halos). */ - inline virtual unsigned long GetGlobal_nElemDomain() const { return 0; } + inline unsigned long GetGlobal_nElemDomain(void) const { return Global_nElemDomain; } /*! - * \brief A virtual member. + * \brief Retrieve total number of triangular elements in a simulation across all processors. * \return Total number of line elements in a simulation across all processors. */ - inline virtual unsigned long GetGlobal_nElemLine() const { return 0; } + inline unsigned long GetGlobal_nElemLine(void) const { return Global_nelem_edge; } /*! - * \brief A virtual member. + * \brief Retrieve total number of triangular elements in a simulation across all processors. * \return Total number of triangular elements in a simulation across all processors. */ - inline virtual unsigned long GetGlobal_nElemTria() const { return 0; } + inline unsigned long GetGlobal_nElemTria(void) const { return Global_nelem_triangle; } /*! - * \brief A virtual member. + * \brief Retrieve total number of quadrilateral elements in a simulation across all processors. * \return Total number of quadrilateral elements in a simulation across all processors. */ - inline virtual unsigned long GetGlobal_nElemQuad() const { return 0; } + inline unsigned long GetGlobal_nElemQuad(void) const { return Global_nelem_quad; } /*! - * \brief A virtual member. + * \brief Retrieve total number of tetrahedral elements in a simulation across all processors. * \return Total number of tetrahedral elements in a simulation across all processors. */ - inline virtual unsigned long GetGlobal_nElemTetr() const { return 0; } + inline unsigned long GetGlobal_nElemTetr(void) const { return Global_nelem_tetra; } /*! - * \brief A virtual member. + * \brief Retrieve total number of hexahedral elements in a simulation across all processors. * \return Total number of hexahedral elements in a simulation across all processors. */ - inline virtual unsigned long GetGlobal_nElemHexa() const { return 0; } + inline unsigned long GetGlobal_nElemHexa(void) const { return Global_nelem_hexa; } /*! - * \brief A virtual member. + * \brief Retrieve total number of prism elements in a simulation across all processors. * \return Total number of prism elements in a simulation across all processors. */ - inline virtual unsigned long GetGlobal_nElemPris() const { return 0; } + inline unsigned long GetGlobal_nElemPris(void) const { return Global_nelem_prism; } /*! - * \brief A virtual member. + * \brief Retrieve total number of pyramid elements in a simulation across all processors. * \return Total number of pyramid elements in a simulation across all processors. */ - inline virtual unsigned long GetGlobal_nElemPyra() const { return 0; } + inline unsigned long GetGlobal_nElemPyra(void) const { return Global_nelem_pyramid; } /*! - * \brief A virtual member. + * \brief Get number of triangular elements. * \return Number of line elements. */ - inline virtual unsigned long GetnElemLine() const { return 0; } + inline unsigned long GetnElemLine(void) const { return nelem_edge; } /*! - * \brief A virtual member. + * \brief Get number of triangular elements. * \return Number of triangular elements. */ - inline virtual unsigned long GetnElemTria() const { return 0; } + inline unsigned long GetnElemTria(void) const { return nelem_triangle; } /*! - * \brief A virtual member. + * \brief Get number of quadrilateral elements. * \return Number of quadrilateral elements. */ - inline virtual unsigned long GetnElemQuad() const { return 0; } + inline unsigned long GetnElemQuad(void) const { return nelem_quad; } /*! - * \brief A virtual member. + * \brief Get number of tetrahedral elements. * \return Number of tetrahedral elements. */ - inline virtual unsigned long GetnElemTetr() const { return 0; } + inline unsigned long GetnElemTetr(void) const { return nelem_tetra; } /*! - * \brief A virtual member. + * \brief Get number of hexahedral elements. * \return Number of hexahedral elements. */ - inline virtual unsigned long GetnElemHexa() const { return 0; } + inline unsigned long GetnElemHexa(void) const { return nelem_hexa; } /*! - * \brief A virtual member. + * \brief Get number of prism elements. * \return Number of prism elements. */ - inline virtual unsigned long GetnElemPris() const { return 0; } + inline unsigned long GetnElemPris(void) const { return nelem_prism; } /*! - * \brief A virtual member. + * \brief Get number of pyramid elements. * \return Number of pyramid elements. */ - inline virtual unsigned long GetnElemPyra() const { return 0; } + inline unsigned long GetnElemPyra(void) const { return nelem_pyramid; } /*! * \brief Indentify geometrical planes in the mesh @@ -1272,164 +1292,213 @@ class CGeometry { inline virtual void SetSensitivity(unsigned long iPoint, unsigned short iDim, su2double val) {} /*! - * \brief A virtual member. + * \brief Get the average normal at a specific span for a given marker in the turbomachinery reference of frame. * \param[in] val_marker - marker value. * \param[in] val_span - span value. + * \return The span-wise averaged turbo normal. */ - inline virtual const su2double* GetAverageTurboNormal(unsigned short val_marker, unsigned short val_span) const { return nullptr; } + inline const su2double* GetAverageTurboNormal(unsigned short val_marker, unsigned short val_span) const { + return AverageTurboNormal[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the average normal at a specific span for a given marker. * \param[in] val_marker - marker value. * \param[in] val_span - span value. + * \return The span-wise averaged normal. */ - inline virtual const su2double* GetAverageNormal(unsigned short val_marker, unsigned short val_span) const { return nullptr; } + inline const su2double* GetAverageNormal(unsigned short val_marker, unsigned short val_span) const { + return AverageNormal[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the value of the total area for each span. * \param[in] val_marker - marker value. * \param[in] val_span - span value. + * \return The span-wise area. */ - inline virtual su2double GetSpanArea(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetSpanArea(unsigned short val_marker, unsigned short val_span) const { + return SpanArea[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the value of the total area for each span. * \param[in] val_marker - marker value. * \param[in] val_span - span value. + * \return The span-wise averaged turbo normal. */ - inline virtual su2double GetTurboRadius(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetTurboRadius(unsigned short val_marker, unsigned short val_span) const { + return TurboRadius[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the value of the average tangential rotational velocity for each span. * \param[in] val_marker - marker value. * \param[in] val_span - span value. + * \return The span-wise averaged tangential velocity. */ - inline virtual su2double GetAverageTangGridVel(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetAverageTangGridVel(unsigned short val_marker, unsigned short val_span) const { + return AverageTangGridVel[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the value of the inflow tangential velocity at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. * \return The span-wise inflow tangential velocity. */ - inline virtual su2double GetTangGridVelIn(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetTangGridVelIn(unsigned short val_marker, unsigned short val_span) const { + return TangGridVelIn[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the value of the outflow tangential velocity at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. * \return The span-wise outflow tangential velocity. */ - inline virtual su2double GetTangGridVelOut(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetTangGridVelOut(unsigned short val_marker, unsigned short val_span) const { + return TangGridVelOut[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the value of the inflow area at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. * \return The span-wise inflow area. */ - inline virtual su2double GetSpanAreaIn(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetSpanAreaIn(unsigned short val_marker, unsigned short val_span) const { + return SpanAreaIn[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the value of the outflow area at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. * \return The span-wise outflow area. */ - inline virtual su2double GetSpanAreaOut(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetSpanAreaOut(unsigned short val_marker, unsigned short val_span) const { + return SpanAreaOut[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the value of the inflow radius at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. * \return The span-wise inflow radius. */ - inline virtual su2double GetTurboRadiusIn(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetTurboRadiusIn(unsigned short val_marker, unsigned short val_span) const { + return TurboRadiusIn[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the value of the outflow radius at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. * \return The span-wise outflow radius. */ - inline virtual su2double GetTurboRadiusOut(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetTurboRadiusOut(unsigned short val_marker, unsigned short val_span) const { + return TurboRadiusOut[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Set the value of the inflow tangential velocity at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. */ - inline virtual void SetTangGridVelIn(su2double value, unsigned short val_marker, unsigned short val_span) {} + inline void SetTangGridVelIn(su2double value, unsigned short val_marker, unsigned short val_span) { + TangGridVelIn[val_marker][val_span] = value; + } /*! - * \brief A virtual member. + * \brief Set the value of the outflow tangential velocity at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. */ - inline virtual void SetTangGridVelOut(su2double value, unsigned short val_marker, unsigned short val_span) {} + inline void SetTangGridVelOut(su2double value, unsigned short val_marker, unsigned short val_span) { + TangGridVelOut[val_marker][val_span] = value; + } /*! - * \brief A virtual member. + * \brief Set the value of the inflow area at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. */ - inline virtual void SetSpanAreaIn(su2double value, unsigned short val_marker, unsigned short val_span) {} + inline void SetSpanAreaIn(su2double value, unsigned short val_marker, unsigned short val_span) { + SpanAreaIn[val_marker][val_span] = value; + } /*! - * \brief A virtual member. + * \brief Set the value of the outflow area at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. */ - inline virtual void SetSpanAreaOut(su2double value, unsigned short val_marker, unsigned short val_span) {} + inline void SetSpanAreaOut(su2double value, unsigned short val_marker, unsigned short val_span) { + SpanAreaOut[val_marker][val_span] = value; + } /*! - * \brief A virtual member. + * \brief Set the value of the inflow radius at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. */ - inline virtual void SetTurboRadiusIn(su2double value, unsigned short val_marker, unsigned short val_span) {} + inline void SetTurboRadiusIn(su2double value, unsigned short val_marker, unsigned short val_span) { + TurboRadiusIn[val_marker][val_span] = value; + } /*! - * \brief A virtual member. + * \brief Set the value of the outflow radius at each span. * \param[in] val_marker - marker turbo-performance value. * \param[in] val_span - span value. */ - inline virtual void SetTurboRadiusOut(su2double value, unsigned short val_marker, unsigned short val_span) {} + inline void SetTurboRadiusOut(su2double value, unsigned short val_marker, unsigned short val_span) { + TurboRadiusOut[val_marker][val_span] = value; + } /*! - * \brief A virtual member. + * \brief A total number of vertex independently from the MPI partions. * \param[in] val_marker - marker value. * \param[in] val_span - span value. */ - inline virtual unsigned long GetnTotVertexSpan(unsigned short val_marker, unsigned short val_span) const {return 0;} + inline unsigned long GetnTotVertexSpan(unsigned short val_marker, unsigned short val_span) const { + return nTotVertexSpan[val_marker][val_span]; + } /*! - * \brief A virtual member. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - */ - inline virtual su2double GetMinAngularCoord(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + * \brief min angular pitch independently from the MPI partions. + * \param[in] val_marker - marker value. + * \param[in] val_span - span value. + */ + inline su2double GetMinAngularCoord(unsigned short val_marker, unsigned short val_span) const { + return MinAngularCoord[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief max angular pitch independently from the MPI partions. * \param[in] val_marker - marker value. * \param[in] val_span - span value. */ - inline virtual su2double GetMaxAngularCoord(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetMaxAngularCoord(unsigned short val_marker, unsigned short val_span) const { + return MaxAngularCoord[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief min Relatice angular coord independently from the MPI partions. * \param[in] val_marker - marker value. * \param[in] val_span - span value. */ - inline virtual su2double GetMinRelAngularCoord(unsigned short val_marker, unsigned short val_span) const { return 0.0; } + inline su2double GetMinRelAngularCoord(unsigned short val_marker, unsigned short val_span) const { + return MinRelAngularCoord[val_marker][val_span]; + } /*! - * \brief A virtual member. + * \brief Get the average grid velocity at a specific span for a given marker. * \param[in] val_marker - marker value. * \param[in] val_span - span value. */ - inline virtual const su2double* GetAverageGridVel(unsigned short val_marker, unsigned short val_span) const {return nullptr;} + inline const su2double* GetAverageGridVel(unsigned short val_marker, unsigned short val_span) const { + return AverageGridVel[val_marker][val_span]; + } /*! * \brief A virtual member. @@ -1541,5 +1610,35 @@ class CGeometry { */ inline virtual void ComputeMeshQualityStatistics(CConfig *config) {} + /*! + * \brief Get the sparse pattern of "type" with given level of fill. + * \note This method builds the pattern if that has not been done yet. + * \param[in] type - Finite volume or finite element. + * \param[in] fillLvl - Level of fill of the pattern. + * \return Reference to the sparse pattern. + */ + const CCompressedSparsePatternUL& GetSparsePattern(ConnectivityType type, unsigned long fillLvl); + + /*! + * \brief Get the edge to sparse pattern map. + * \note This method builds the map and required pattern (0-fill FVM) if that has not been done yet. + * \return Reference to the map. + */ + const CEdgeToNonZeroMapUL& GetEdgeToSparsePatternMap(void); + + /*! + * \brief Get the edge coloring. + * \note This method computes the coloring if that has not been done yet. + * \return Reference to the coloring. + */ + const CCompressedSparsePatternUL& GetEdgeColoring(void); + + /*! + * \brief Get the element coloring. + * \note This method computes the coloring if that has not been done yet. + * \return Reference to the coloring. + */ + const CCompressedSparsePatternUL& GetElementColoring(void); + }; diff --git a/Common/include/geometry/CPhysicalGeometry.hpp b/Common/include/geometry/CPhysicalGeometry.hpp index b2428e1b97d3..8db4b2d17025 100644 --- a/Common/include/geometry/CPhysicalGeometry.hpp +++ b/Common/include/geometry/CPhysicalGeometry.hpp @@ -29,6 +29,7 @@ #include "CGeometry.hpp" #include "../CMeshReaderFVM.hpp" +#include "../toolboxes/C2DContainer.hpp" /*! * \class CPhysicalGeometry @@ -43,7 +44,7 @@ class CPhysicalGeometry final : public CGeometry { unsigned short *Global_to_Local_Marker; /*!< \brief Global to Local marker. */ unsigned long *adj_counter; /*!< \brief Adjacency counter. */ unsigned long **adjacent_elem; /*!< \brief Adjacency element list. */ - su2double* Sensitivity; /*!< \brief Vector holding the sensitivities at each point. */ + su2activematrix Sensitivity; /*!< \brief Matrix holding the sensitivities at each point. */ vector > Neighbors; map Color_List; @@ -618,114 +619,6 @@ class CPhysicalGeometry final : public CGeometry { */ void FindNormal_Neighbor(CConfig *config) override; - /*! - * \brief Retrieve total number of nodes in a simulation across all processors (including halos). - * \return Total number of nodes in a simulation across all processors (including halos). - */ - inline unsigned long GetGlobal_nPoint(void) const override { return Global_nPoint; } - - /*! - * \brief Retrieve total number of nodes in a simulation across all processors (excluding halos). - * \return Total number of nodes in a simulation across all processors (excluding halos). - */ - inline unsigned long GetGlobal_nPointDomain(void) const override { return Global_nPointDomain; } - - /*! - * \brief Retrieve total number of elements in a simulation across all processors. - * \return Total number of elements in a simulation across all processors. - */ - inline unsigned long GetGlobal_nElem(void) const override { return Global_nElem; } - - /*! - * \brief Retrieve total number of elements in a simulation across all processors (excluding halos). - * \return Total number of elements in a simulation across all processors (excluding halos). - */ - inline unsigned long GetGlobal_nElemDomain(void) const override { return Global_nElemDomain; } - - /*! - * \brief Retrieve total number of triangular elements in a simulation across all processors. - * \return Total number of line elements in a simulation across all processors. - */ - inline unsigned long GetGlobal_nElemLine(void) const override { return Global_nelem_edge; } - - /*! - * \brief Retrieve total number of triangular elements in a simulation across all processors. - * \return Total number of triangular elements in a simulation across all processors. - */ - inline unsigned long GetGlobal_nElemTria(void) const override { return Global_nelem_triangle; } - - /*! - * \brief Retrieve total number of quadrilateral elements in a simulation across all processors. - * \return Total number of quadrilateral elements in a simulation across all processors. - */ - inline unsigned long GetGlobal_nElemQuad(void) const override { return Global_nelem_quad; } - - /*! - * \brief Retrieve total number of tetrahedral elements in a simulation across all processors. - * \return Total number of tetrahedral elements in a simulation across all processors. - */ - inline unsigned long GetGlobal_nElemTetr(void) const override { return Global_nelem_tetra; } - - /*! - * \brief Retrieve total number of hexahedral elements in a simulation across all processors. - * \return Total number of hexahedral elements in a simulation across all processors. - */ - inline unsigned long GetGlobal_nElemHexa(void) const override { return Global_nelem_hexa; } - - /*! - * \brief Retrieve total number of prism elements in a simulation across all processors. - * \return Total number of prism elements in a simulation across all processors. - */ - inline unsigned long GetGlobal_nElemPris(void) const override { return Global_nelem_prism; } - - /*! - * \brief Retrieve total number of pyramid elements in a simulation across all processors. - * \return Total number of pyramid elements in a simulation across all processors. - */ - inline unsigned long GetGlobal_nElemPyra(void) const override { return Global_nelem_pyramid; } - - /*! - * \brief Get number of triangular elements. - * \return Number of line elements. - */ - inline unsigned long GetnElemLine(void) const override { return nelem_edge; } - - /*! - * \brief Get number of triangular elements. - * \return Number of triangular elements. - */ - inline unsigned long GetnElemTria(void) const override { return nelem_triangle; } - - /*! - * \brief Get number of quadrilateral elements. - * \return Number of quadrilateral elements. - */ - inline unsigned long GetnElemQuad(void) const override { return nelem_quad; } - - /*! - * \brief Get number of tetrahedral elements. - * \return Number of tetrahedral elements. - */ - inline unsigned long GetnElemTetr(void) const override { return nelem_tetra; } - - /*! - * \brief Get number of hexahedral elements. - * \return Number of hexahedral elements. - */ - inline unsigned long GetnElemHexa(void) const override { return nelem_hexa; } - - /*! - * \brief Get number of prism elements. - * \return Number of prism elements. - */ - inline unsigned long GetnElemPris(void) const override { return nelem_prism; } - - /*! - * \brief Get number of pyramid elements. - * \return Number of pyramid elements. - */ - inline unsigned long GetnElemPyra(void) const override { return nelem_pyramid; } - /*! * \brief Read the sensitivity from an input file. * \param[in] config - Definition of the particular problem. @@ -878,7 +771,7 @@ class CPhysicalGeometry final : public CGeometry { * \param[in] iDim - The component of the dim. vector. * \return The sensitivity at point iPoint and dim. iDim. */ - inline su2double GetSensitivity(unsigned long iPoint, unsigned short iDim) const override { return Sensitivity[iPoint*nDim+iDim]; } + inline su2double GetSensitivity(unsigned long iPoint, unsigned short iDim) const override { return Sensitivity(iPoint,iDim); } /*! * \brief Set the Sensitivity at a specific point. @@ -886,7 +779,7 @@ class CPhysicalGeometry final : public CGeometry { * \param[in] iDim - The component of the dim. vector. * \param[in] val - Value of the sensitivity. */ - inline void SetSensitivity(unsigned long iPoint, unsigned short iDim, su2double val) override {Sensitivity[iPoint*nDim+iDim] = val;} + inline void SetSensitivity(unsigned long iPoint, unsigned short iDim, su2double val) override { Sensitivity(iPoint,iDim) = val; } /*! * \brief Check the mesh for periodicity and deactivate multigrid if periodicity is found. @@ -894,214 +787,4 @@ class CPhysicalGeometry final : public CGeometry { */ void Check_Periodicity(CConfig *config) override; - /*! - * \brief Get the average normal at a specific span for a given marker in the turbomachinery reference of frame. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - * \return The span-wise averaged turbo normal. - */ - inline const su2double* GetAverageTurboNormal(unsigned short val_marker, unsigned short val_span) const override { - return AverageTurboNormal[val_marker][val_span]; - } - - /*! - * \brief Get the average normal at a specific span for a given marker. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - * \return The span-wise averaged normal. - */ - inline const su2double* GetAverageNormal(unsigned short val_marker, unsigned short val_span) const override { - return AverageNormal[val_marker][val_span]; - } - - /*! - * \brief Get the value of the total area for each span. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - * \return The span-wise area. - */ - inline su2double GetSpanArea(unsigned short val_marker, unsigned short val_span) const override { - return SpanArea[val_marker][val_span]; - } - - /*! - * \brief Get the value of the total area for each span. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - * \return The span-wise averaged turbo normal. - */ - inline su2double GetTurboRadius(unsigned short val_marker, unsigned short val_span) const override { - return TurboRadius[val_marker][val_span]; - } - - /*! - * \brief Get the value of the average tangential rotational velocity for each span. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - * \return The span-wise averaged tangential velocity. - */ - inline su2double GetAverageTangGridVel(unsigned short val_marker, unsigned short val_span) const override { - return AverageTangGridVel[val_marker][val_span]; - } - - /*! - * \brief Get the value of the inflow tangential velocity at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - * \return The span-wise inflow tangential velocity. - */ - inline su2double GetTangGridVelIn(unsigned short val_marker, unsigned short val_span) const override { - return TangGridVelIn[val_marker][val_span]; - } - - /*! - * \brief Get the value of the outflow tangential velocity at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - * \return The span-wise outflow tangential velocity. - */ - inline su2double GetTangGridVelOut(unsigned short val_marker, unsigned short val_span) const override { - return TangGridVelOut[val_marker][val_span]; - } - - /*! - * \brief Get the value of the inflow area at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - * \return The span-wise inflow area. - */ - inline su2double GetSpanAreaIn(unsigned short val_marker, unsigned short val_span) const override { - return SpanAreaIn[val_marker][val_span]; - } - - /*! - * \brief Get the value of the outflow area at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - * \return The span-wise outflow area. - */ - inline su2double GetSpanAreaOut(unsigned short val_marker, unsigned short val_span) const override { - return SpanAreaOut[val_marker][val_span]; - } - - /*! - * \brief Get the value of the inflow radius at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - * \return The span-wise inflow radius. - */ - inline su2double GetTurboRadiusIn(unsigned short val_marker, unsigned short val_span) const override { - return TurboRadiusIn[val_marker][val_span]; - } - - /*! - * \brief Get the value of the outflow radius at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - * \return The span-wise outflow radius. - */ - inline su2double GetTurboRadiusOut(unsigned short val_marker, unsigned short val_span) const override { - return TurboRadiusOut[val_marker][val_span]; - } - - /*! - * \brief Set the value of the inflow tangential velocity at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - */ - inline void SetTangGridVelIn(su2double value, unsigned short val_marker, unsigned short val_span) override { - TangGridVelIn[val_marker][val_span] = value; - } - - /*! - * \brief Set the value of the outflow tangential velocity at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - */ - inline void SetTangGridVelOut(su2double value, unsigned short val_marker, unsigned short val_span) override { - TangGridVelOut[val_marker][val_span] = value; - } - - /*! - * \brief Get the value of the inflow area at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - */ - inline void SetSpanAreaIn(su2double value, unsigned short val_marker, unsigned short val_span) override { - SpanAreaIn[val_marker][val_span] = value; - } - - /*! - * \brief Set the value of the outflow area at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - */ - inline void SetSpanAreaOut(su2double value, unsigned short val_marker, unsigned short val_span) override { - SpanAreaOut[val_marker][val_span] = value; - } - - /*! - * \brief Set the value of the inflow radius at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - */ - inline void SetTurboRadiusIn(su2double value, unsigned short val_marker, unsigned short val_span) override { - TurboRadiusIn[val_marker][val_span] = value; - } - - /*! - * \brief Set the value of the outflow radius at each span. - * \param[in] val_marker - marker turbo-performance value. - * \param[in] val_span - span value. - */ - inline void SetTurboRadiusOut(su2double value, unsigned short val_marker, unsigned short val_span) override { - TurboRadiusOut[val_marker][val_span] = value; - } - - /*! - * \brief A total number of vertex independently from the MPI partions. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - */ - inline unsigned long GetnTotVertexSpan(unsigned short val_marker, unsigned short val_span) const override { - return nTotVertexSpan[val_marker][val_span]; - } - - /*! - * \brief min angular pitch independently from the MPI partions. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - */ - inline su2double GetMinAngularCoord(unsigned short val_marker, unsigned short val_span) const override { - return MinAngularCoord[val_marker][val_span]; - } - - /*! - * \brief max angular pitch independently from the MPI partions. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - */ - inline su2double GetMaxAngularCoord(unsigned short val_marker, unsigned short val_span) const override { - return MaxAngularCoord[val_marker][val_span]; - } - - /*! - * \brief min Relatice angular coord independently from the MPI partions. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - */ - inline su2double GetMinRelAngularCoord(unsigned short val_marker, unsigned short val_span) const override { - return MinRelAngularCoord[val_marker][val_span]; - } - - /*! - * \brief Get the average grid velocity at a specific span for a given marker. - * \param[in] val_marker - marker value. - * \param[in] val_span - span value. - */ - inline const su2double* GetAverageGridVel(unsigned short val_marker, unsigned short val_span) const override { - return AverageGridVel[val_marker][val_span]; - } - }; - diff --git a/Common/include/linear_algebra/CMatrixVectorProduct.hpp b/Common/include/linear_algebra/CMatrixVectorProduct.hpp index 1458f4c01c59..712ca6f10747 100644 --- a/Common/include/linear_algebra/CMatrixVectorProduct.hpp +++ b/Common/include/linear_algebra/CMatrixVectorProduct.hpp @@ -7,7 +7,7 @@ * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -25,7 +25,7 @@ * You should have received a copy of the GNU Lesser General Public * License along with SU2. If not, see . */ - + #pragma once @@ -54,9 +54,8 @@ template class CMatrixVectorProduct { public: - virtual ~CMatrixVectorProduct() = 0; ///< class destructor - virtual void operator()(const CSysVector & u, CSysVector & v) - const = 0; ///< matrix-vector product operation + virtual ~CMatrixVectorProduct() = 0; + virtual void operator()(const CSysVector & u, CSysVector & v) const = 0; }; template CMatrixVectorProduct::~CMatrixVectorProduct() {} @@ -67,45 +66,37 @@ CMatrixVectorProduct::~CMatrixVectorProduct() {} * \brief Specialization of matrix-vector product that uses CSysMatrix class */ template -class CSysMatrixVectorProduct : public CMatrixVectorProduct { +class CSysMatrixVectorProduct final : public CMatrixVectorProduct { private: - CSysMatrix* sparse_matrix; /*!< \brief pointer to matrix that defines the product. */ - CGeometry* geometry; /*!< \brief pointer to matrix that defines the geometry. */ - CConfig* config; /*!< \brief pointer to matrix that defines the config. */ - - /*! - * \brief Default constructor of the class - * \note This class cannot be default constructed as that would leave us with invalid pointers. - */ - CSysMatrixVectorProduct(); + const CSysMatrix& matrix; /*!< \brief pointer to matrix that defines the product. */ + CGeometry* geometry; /*!< \brief geometry associated with the matrix. */ + CConfig* config; /*!< \brief config of the problem. */ public: - /*! * \brief constructor of the class * \param[in] matrix_ref - matrix reference that will be used to define the products * \param[in] geometry_ref - geometry associated with the problem * \param[in] config_ref - config of the problem */ - inline CSysMatrixVectorProduct(CSysMatrix & matrix_ref, - CGeometry *geometry_ref, CConfig *config_ref) { - sparse_matrix = &matrix_ref; - geometry = geometry_ref; - config = config_ref; - } + inline CSysMatrixVectorProduct(const CSysMatrix & matrix_ref, + CGeometry *geometry_ref, CConfig *config_ref) : + matrix(matrix_ref), + geometry(geometry_ref), + config(config_ref) {} /*! - * \brief destructor of the class + * \note This class cannot be default constructed as that would leave us with invalid pointers. */ - ~CSysMatrixVectorProduct() {} + CSysMatrixVectorProduct() = delete; /*! * \brief operator that defines the CSysMatrix-CSysVector product * \param[in] u - CSysVector that is being multiplied by the sparse matrix * \param[out] v - CSysVector that is the result of the product */ - inline void operator()(const CSysVector & u, CSysVector & v) const { - sparse_matrix->MatrixVectorProduct(u, v, geometry, config); + inline void operator()(const CSysVector & u, CSysVector & v) const override { + matrix.MatrixVectorProduct(u, v, geometry, config); } }; @@ -115,44 +106,36 @@ class CSysMatrixVectorProduct : public CMatrixVectorProduct { * \brief Specialization of matrix-vector product that uses CSysMatrix class for transposed products */ template -class CSysMatrixVectorProductTransposed : public CMatrixVectorProduct { +class CSysMatrixVectorProductTransposed final : public CMatrixVectorProduct { private: - CSysMatrix* sparse_matrix; /*!< \brief pointer to matrix that defines the product. */ - CGeometry* geometry; /*!< \brief pointer to matrix that defines the geometry. */ - CConfig* config; /*!< \brief pointer to matrix that defines the config. */ - - /*! - * \brief Default constructor of the class - * \note This class cannot be default constructed as that would leave us with invalid pointers. - */ - CSysMatrixVectorProductTransposed(); + const CSysMatrix& matrix; /*!< \brief pointer to matrix that defines the product. */ + CGeometry* geometry; /*!< \brief geometry associated with the matrix. */ + CConfig* config; /*!< \brief config of the problem. */ public: - /*! * \brief constructor of the class * \param[in] matrix_ref - matrix reference that will be used to define the products * \param[in] geometry_ref - geometry associated with the problem * \param[in] config_ref - config of the problem */ - inline CSysMatrixVectorProductTransposed(CSysMatrix & matrix_ref, - CGeometry *geometry_ref, CConfig *config_ref) { - sparse_matrix = &matrix_ref; - geometry = geometry_ref; - config = config_ref; - } + inline CSysMatrixVectorProductTransposed(const CSysMatrix & matrix_ref, + CGeometry *geometry_ref, CConfig *config_ref) : + matrix(matrix_ref), + geometry(geometry_ref), + config(config_ref) {} /*! - * \brief destructor of the class + * \note This class cannot be default constructed as that would leave us with invalid pointers. */ - ~CSysMatrixVectorProductTransposed() {} + CSysMatrixVectorProductTransposed() = delete; /*! * \brief operator that defines the CSysMatrix-CSysVector product * \param[in] u - CSysVector that is being multiplied by the sparse matrix * \param[out] v - CSysVector that is the result of the product */ - inline void operator()(const CSysVector & u, CSysVector & v) const { - sparse_matrix->MatrixVectorProductTransposed(u, v, geometry, config); + inline void operator()(const CSysVector & u, CSysVector & v) const override { + matrix.MatrixVectorProductTransposed(u, v, geometry, config); } }; diff --git a/Common/include/linear_algebra/CPastixWrapper.hpp b/Common/include/linear_algebra/CPastixWrapper.hpp index d683e244403b..b9914ef1f63d 100644 --- a/Common/include/linear_algebra/CPastixWrapper.hpp +++ b/Common/include/linear_algebra/CPastixWrapper.hpp @@ -7,7 +7,7 @@ * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -30,14 +30,17 @@ #ifdef HAVE_PASTIX -#include "../config_structure.hpp" -#include "../geometry/CGeometry.hpp" - namespace PaStiX { extern "C" { #include } } +#include + +using namespace std; + +class CConfig; +class CGeometry; /*! * \class CPastixWrapper diff --git a/Common/include/linear_algebra/CPreconditioner.hpp b/Common/include/linear_algebra/CPreconditioner.hpp index 7a55a6f662ce..dcc0a961dd61 100644 --- a/Common/include/linear_algebra/CPreconditioner.hpp +++ b/Common/include/linear_algebra/CPreconditioner.hpp @@ -1,13 +1,13 @@ /*! * \file CPreconditioner.hpp - * \brief Headers for the classes related to linear preconditioner wrappers. + * \brief Classes related to linear preconditioner wrappers. * The actual operations are currently implemented mostly by CSysMatrix. * \author F. Palacios, J. Hicken, T. Economon * \version 7.0.0 "Blackbird" * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -36,7 +36,7 @@ /*! * \class CPreconditioner - * \brief abstract base class for defining preconditioning operation + * \brief Abstract base class for defining a preconditioning operation. * \author J. Hicken. * * See the remarks regarding the CMatrixVectorProduct class. The same @@ -45,9 +45,20 @@ template class CPreconditioner { public: - virtual ~CPreconditioner() = 0; ///< class destructor - virtual void operator()(const CSysVector & u, CSysVector & v) - const = 0; ///< preconditioning operation + /*! + * \brief Destructor of the class + */ + virtual ~CPreconditioner() = 0; + + /*! + * \brief Overload of operator (), applies the preconditioner to "u" storing the result in "v". + */ + virtual void operator()(const CSysVector & u, CSysVector & v) const = 0; + + /*! + * \brief Generic "preprocessing" hook derived classes may implement to build the preconditioner. + */ + virtual void Build() {} }; template CPreconditioner::~CPreconditioner() {} @@ -55,238 +66,260 @@ CPreconditioner::~CPreconditioner() {} /*! * \class CJacobiPreconditioner - * \brief specialization of preconditioner that uses CSysMatrix class + * \brief Specialization of preconditioner that uses CSysMatrix class. */ template -class CJacobiPreconditioner : public CPreconditioner { +class CJacobiPreconditioner final : public CPreconditioner { private: - CSysMatrix* sparse_matrix; /*!< \brief pointer to matrix that defines the preconditioner. */ - CGeometry* geometry; /*!< \brief pointer to matrix that defines the geometry. */ - CConfig* config; /*!< \brief pointer to matrix that defines the config. */ - - /*! - * \brief Default constructor of the class - * \note This class cannot be default constructed as that would leave us with invalid pointers. - */ - CJacobiPreconditioner(); + CSysMatrix& sparse_matrix; /*!< \brief Pointer to matrix that defines the preconditioner. */ + CGeometry* geometry; /*!< \brief Pointer to geometry associated with the matrix. */ + CConfig* config; /*!< \brief Pointer to problem configuration. */ + bool transp; /*!< \brief If the transpose version of the preconditioner is required. */ public: - /*! - * \brief constructor of the class - * \param[in] matrix_ref - matrix reference that will be used to define the preconditioner - * \param[in] geometry_ref - geometry associated with the problem - * \param[in] config_ref - config of the problem + * \brief Constructor of the class. + * \param[in] matrix_ref - Matrix reference that will be used to define the preconditioner. + * \param[in] geometry_ref - Geometry associated with the problem. + * \param[in] config_ref - Config of the problem. + * \param[in] transposed - If the transpose version of the preconditioner is required. */ inline CJacobiPreconditioner(CSysMatrix & matrix_ref, - CGeometry *geometry_ref, CConfig *config_ref) { - sparse_matrix = &matrix_ref; + CGeometry *geometry_ref, CConfig *config_ref, bool transposed) : + sparse_matrix(matrix_ref) + { + if((geometry_ref == nullptr) || (config_ref == nullptr)) + SU2_MPI::Error("Preconditioner needs to be built with valid references.", CURRENT_FUNCTION); geometry = geometry_ref; config = config_ref; + transp = transposed; } /*! - * \brief destructor of the class + * \note This class cannot be default constructed as that would leave us with invalid Pointers. */ - ~CJacobiPreconditioner() {} + CJacobiPreconditioner() = delete; /*! * \brief operator that defines the preconditioner operation * \param[in] u - CSysVector that is being preconditioned * \param[out] v - CSysVector that is the result of the preconditioning */ - inline void operator()(const CSysVector & u, CSysVector & v) const { - sparse_matrix->ComputeJacobiPreconditioner(u, v, geometry, config); + inline void operator()(const CSysVector & u, CSysVector & v) const override { + sparse_matrix.ComputeJacobiPreconditioner(u, v, geometry, config); + } + + /*! + * \note Request the associated matrix to build the preconditioner. + */ + inline void Build() override { + sparse_matrix.BuildJacobiPreconditioner(transp); } }; /*! * \class CILUPreconditioner - * \brief specialization of preconditioner that uses CSysMatrix class + * \brief Specialization of preconditioner that uses CSysMatrix class */ template -class CILUPreconditioner : public CPreconditioner { +class CILUPreconditioner final : public CPreconditioner { private: - CSysMatrix* sparse_matrix; /*!< \brief pointer to matrix that defines the preconditioner. */ - CGeometry* geometry; /*!< \brief pointer to matrix that defines the geometry. */ - CConfig* config; /*!< \brief pointer to matrix that defines the config. */ - - /*! - * \brief Default constructor of the class - * \note This class cannot be default constructed as that would leave us with invalid pointers. - */ - CILUPreconditioner(); + CSysMatrix& sparse_matrix; /*!< \brief Pointer to matrix that defines the preconditioner. */ + CGeometry* geometry; /*!< \brief Pointer to geometry associated with the matrix. */ + CConfig* config; /*!< \brief Pointer to problem configuration. */ + bool transp; /*!< \brief If the transpose version of the preconditioner is required. */ public: - /*! - * \brief constructor of the class - * \param[in] matrix_ref - matrix reference that will be used to define the preconditioner - * \param[in] geometry_ref - geometry associated with the problem - * \param[in] config_ref - config of the problem + * \brief Constructor of the class. + * \param[in] matrix_ref - Matrix reference that will be used to define the preconditioner. + * \param[in] geometry_ref - Geometry associated with the problem. + * \param[in] config_ref - Config of the problem. + * \param[in] transposed - If the transpose version of the preconditioner is required. */ inline CILUPreconditioner(CSysMatrix & matrix_ref, - CGeometry *geometry_ref, CConfig *config_ref) { - sparse_matrix = &matrix_ref; + CGeometry *geometry_ref, CConfig *config_ref, bool transposed) : + sparse_matrix(matrix_ref) + { + if((geometry_ref == nullptr) || (config_ref == nullptr)) + SU2_MPI::Error("Preconditioner needs to be built with valid references.", CURRENT_FUNCTION); geometry = geometry_ref; config = config_ref; + transp = transposed; } /*! - * \brief destructor of the class + * \note This class cannot be default constructed as that would leave us with invalid Pointers. */ - ~CILUPreconditioner() {} + CILUPreconditioner() = delete; /*! - * \brief operator that defines the preconditioner operation - * \param[in] u - CSysVector that is being preconditioned - * \param[out] v - CSysVector that is the result of the preconditioning + * \brief Operator that defines the preconditioner operation. + * \param[in] u - CSysVector that is being preconditioned. + * \param[out] v - CSysVector that is the result of the preconditioning. */ - inline void operator()(const CSysVector & u, CSysVector & v) const { - sparse_matrix->ComputeILUPreconditioner(u, v, geometry, config); + inline void operator()(const CSysVector & u, CSysVector & v) const override { + sparse_matrix.ComputeILUPreconditioner(u, v, geometry, config); + } + + /*! + * \note Request the associated matrix to build the preconditioner. + */ + inline void Build() override { + sparse_matrix.BuildILUPreconditioner(transp); } }; /*! * \class CLU_SGSPreconditioner - * \brief specialization of preconditioner that uses CSysMatrix class + * \brief Specialization of preconditioner that uses CSysMatrix class. */ template -class CLU_SGSPreconditioner : public CPreconditioner { +class CLU_SGSPreconditioner final : public CPreconditioner { private: - CSysMatrix* sparse_matrix; /*!< \brief pointer to matrix that defines the preconditioner. */ - CGeometry* geometry; /*!< \brief pointer to matrix that defines the geometry. */ - CConfig* config; /*!< \brief pointer to matrix that defines the config. */ - - /*! - * \brief Default constructor of the class - * \note This class cannot be default constructed as that would leave us with invalid pointers. - */ - CLU_SGSPreconditioner(); + CSysMatrix& sparse_matrix; /*!< \brief Pointer to matrix that defines the preconditioner. */ + CGeometry* geometry; /*!< \brief Pointer to geometry associated with the matrix. */ + CConfig* config; /*!< \brief Pointer to problem configuration. */ public: /*! - * \brief constructor of the class - * \param[in] matrix_ref - matrix reference that will be used to define the preconditioner - * \param[in] geometry_ref - geometry associated with the problem - * \param[in] config_ref - config of the problem + * \brief Constructor of the class. + * \param[in] matrix_ref - Matrix reference that will be used to define the preconditioner. + * \param[in] geometry_ref - Geometry associated with the problem. + * \param[in] config_ref - Config of the problem. */ inline CLU_SGSPreconditioner(CSysMatrix & matrix_ref, - CGeometry *geometry_ref, CConfig *config_ref) { - sparse_matrix = &matrix_ref; + CGeometry *geometry_ref, CConfig *config_ref) : + sparse_matrix(matrix_ref) + { + if((geometry_ref == nullptr) || (config_ref == nullptr)) + SU2_MPI::Error("Preconditioner needs to be built with valid references.", CURRENT_FUNCTION); geometry = geometry_ref; config = config_ref; } /*! - * \brief destructor of the class + * \note This class cannot be default constructed as that would leave us with invalid Pointers. */ - ~CLU_SGSPreconditioner() {} + CLU_SGSPreconditioner() = delete; /*! - * \brief operator that defines the preconditioner operation - * \param[in] u - CSysVector that is being preconditioned - * \param[out] v - CSysVector that is the result of the preconditioning + * \brief operator that defines the preconditioner operation. + * \param[in] u - CSysVector that is being preconditioned. + * \param[out] v - CSysVector that is the result of the preconditioning. */ - inline void operator()(const CSysVector & u, CSysVector & v) const { - sparse_matrix->ComputeLU_SGSPreconditioner(u, v, geometry, config); + inline void operator()(const CSysVector & u, CSysVector & v) const override { + sparse_matrix.ComputeLU_SGSPreconditioner(u, v, geometry, config); } }; /*! * \class CLineletPreconditioner - * \brief specialization of preconditioner that uses CSysMatrix class + * \brief Specialization of preconditioner that uses CSysMatrix class. */ template -class CLineletPreconditioner : public CPreconditioner { +class CLineletPreconditioner final : public CPreconditioner { private: - CSysMatrix* sparse_matrix; /*!< \brief pointer to matrix that defines the preconditioner. */ - CGeometry* geometry; /*!< \brief pointer to matrix that defines the geometry. */ - CConfig* config; /*!< \brief pointer to matrix that defines the config. */ - - /*! - * \brief Default constructor of the class - * \note This class cannot be default constructed as that would leave us with invalid pointers. - */ - CLineletPreconditioner(); + CSysMatrix& sparse_matrix; /*!< \brief Pointer to matrix that defines the preconditioner. */ + CGeometry* geometry; /*!< \brief Pointer to geometry associated with the matrix. */ + CConfig* config; /*!< \brief Pointer to problem configuration. */ public: - /*! - * \brief constructor of the class - * \param[in] matrix_ref - matrix reference that will be used to define the preconditioner - * \param[in] geometry_ref - geometry associated with the problem - * \param[in] config_ref - config of the problem + * \brief Constructor of the class. + * \param[in] matrix_ref - Matrix reference that will be used to define the preconditioner. + * \param[in] geometry_ref - Geometry associated with the problem. + * \param[in] config_ref - Config of the problem. */ inline CLineletPreconditioner(CSysMatrix & matrix_ref, - CGeometry *geometry_ref, CConfig *config_ref) { - sparse_matrix = &matrix_ref; + CGeometry *geometry_ref, CConfig *config_ref) : + sparse_matrix(matrix_ref) + { + if((geometry_ref == nullptr) || (config_ref == nullptr)) + SU2_MPI::Error("Preconditioner needs to be built with valid references.", CURRENT_FUNCTION); geometry = geometry_ref; config = config_ref; } /*! - * \brief destructor of the class + * \note This class cannot be default constructed as that would leave us with invalid Pointers. */ - ~CLineletPreconditioner() {} + CLineletPreconditioner() = delete; /*! - * \brief operator that defines the preconditioner operation - * \param[in] u - CSysVector that is being preconditioned - * \param[out] v - CSysVector that is the result of the preconditioning + * \brief Operator that defines the preconditioner operation. + * \param[in] u - CSysVector that is being preconditioned. + * \param[out] v - CSysVector that is the result of the preconditioning. */ - inline void operator()(const CSysVector & u, CSysVector & v) const { - sparse_matrix->ComputeLineletPreconditioner(u, v, geometry, config); + inline void operator()(const CSysVector & u, CSysVector & v) const override { + sparse_matrix.ComputeLineletPreconditioner(u, v, geometry, config); + } + + /*! + * \note Request the associated matrix to build the preconditioner. + */ + inline void Build() override { + sparse_matrix.BuildJacobiPreconditioner(false); } }; /*! * \class CPastixPreconditioner - * \brief Specialization of preconditioner that uses PaStiX to factorize a CSysMatrix + * \brief Specialization of preconditioner that uses PaStiX to factorize a CSysMatrix. */ template -class CPastixPreconditioner : public CPreconditioner { +class CPastixPreconditioner final : public CPreconditioner { private: - CSysMatrix* sparse_matrix; /*!< \brief Pointer to the matrix. */ + CSysMatrix& sparse_matrix; /*!< \brief Pointer to the matrix. */ CGeometry* geometry; /*!< \brief Geometry associated with the problem. */ CConfig* config; /*!< \brief Configuration of the problem. */ + unsigned short kind_fact; /*!< \brief The type of factorization desired. */ + bool transp; /*!< \brief If the transpose version of the preconditioner is required. */ public: - /*! * \brief Constructor of the class - * \param[in] matrix_ref - Matrix reference that will be used to define the preconditioner - * \param[in] geometry_ref - Associated geometry - * \param[in] config_ref - Problem configuration + * \param[in] matrix_ref - Matrix reference that will be used to define the preconditioner. + * \param[in] geometry_ref - Associated geometry. + * \param[in] config_ref - Problem configuration. + * \param[in] kind_factorization - Type of factorization required. + * \param[in] transposed - If the transpose version of the preconditioner is required. */ - inline CPastixPreconditioner(CSysMatrix & matrix_ref, - CGeometry *geometry_ref, CConfig *config_ref) { - sparse_matrix = &matrix_ref; + inline CPastixPreconditioner(CSysMatrix & matrix_ref, CGeometry *geometry_ref, + CConfig *config_ref, unsigned short kind_factorization, bool transposed) : + sparse_matrix(matrix_ref) + { + if((geometry_ref == nullptr) || (config_ref == nullptr)) + SU2_MPI::Error("Preconditioner needs to be built with valid references.", CURRENT_FUNCTION); geometry = geometry_ref; config = config_ref; + kind_fact = kind_factorization; + transp = transposed; } /*! - * \brief Destructor of the class + * \note This class cannot be default constructed as that would leave us with invalid Pointers. */ - ~CPastixPreconditioner() {} + CPastixPreconditioner() = delete; /*! - * \brief Operator that defines the preconditioner operation - * \param[in] u - CSysVector that is being preconditioned - * \param[out] v - CSysVector that is the result of the preconditioning + * \brief Operator that defines the preconditioner operation. + * \param[in] u - CSysVector that is being preconditioned. + * \param[out] v - CSysVector that is the result of the preconditioning. + */ + inline void operator()(const CSysVector & u, CSysVector & v) const override { + sparse_matrix.ComputePastixPreconditioner(u, v, geometry, config); + } + + /*! + * \note Request the associated matrix to build the preconditioner. */ - inline void operator()(const CSysVector & u, CSysVector & v) const { - if (sparse_matrix == NULL) { - cerr << "CPastixPreconditioner::operator()(const CSysVector &, CSysVector &): " << endl; - cerr << "pointer to sparse matrix is NULL." << endl; - throw(-1); - } - sparse_matrix->ComputePastixPreconditioner(u, v, geometry, config); + inline void Build() override { + sparse_matrix.BuildPastixPreconditioner(geometry, config, kind_fact, transp); } }; diff --git a/Common/include/linear_algebra/CSysMatrix.hpp b/Common/include/linear_algebra/CSysMatrix.hpp index cebdc1f32293..0cd91f75ed18 100644 --- a/Common/include/linear_algebra/CSysMatrix.hpp +++ b/Common/include/linear_algebra/CSysMatrix.hpp @@ -1,7 +1,7 @@ /*! - * \file matrix_structure.hpp - * \brief Headers of the main subroutines for creating the sparse matrices-by-blocks. - * The subroutines and functions are in the matrix_structure.cpp file. + * \file CSysMatrix.hpp + * \brief Declaration of the block-sparse matrix class. + * The implemtation is in CSysMatrix.cpp. * \author F. Palacios, A. Bueno, T. Economon * \version 7.0.0 "Blackbird" * @@ -28,23 +28,22 @@ #pragma once -#include "../mpi_structure.hpp" -#include -#include -#include -#include - -#include "../config_structure.hpp" -#include "../geometry/CGeometry.hpp" +#include "../../include/mpi_structure.hpp" #include "CSysVector.hpp" #include "CPastixWrapper.hpp" +#include +#include + +using namespace std; + +/*--- In forward mode the matrix is not of a built-in type. ---*/ #if defined(HAVE_MKL) && !defined(CODI_FORWARD_TYPE) #include "mkl.h" #ifndef __INTEL_MKL__ #error Could not determine the MKL version #endif -/*--- JIT is only available since 2019 ---*/ +/*--- JIT is only available since 2019. ---*/ #if __INTEL_MKL__ >= 2019 #define USE_MKL /*--- @@ -59,50 +58,61 @@ #endif #endif -using namespace std; - -const su2double eps = numeric_limits::epsilon(); /*!< \brief machine epsilon */ - +class CConfig; +class CGeometry; /*! * \class CSysMatrix - * \brief Main class for defining sparse matrices-by-blocks - with compressed row format. + * \brief Main class for defining block-compressed-row-storage sparse matrices. * \author A. Bueno, F. Palacios */ template class CSysMatrix { private: + /*--- We are friends with all other possible CSysMatrices. ---*/ + template friend class CSysMatrix; + int rank; /*!< \brief MPI Rank. */ int size; /*!< \brief MPI Size. */ - unsigned long nPoint, /*!< \brief Number of points in the grid. */ - nPointDomain, /*!< \brief Number of points in the grid. */ - nVar, /*!< \brief Number of variables. */ - nEqn; /*!< \brief Number of equations. */ - ScalarType *matrix; /*!< \brief Entries of the sparse matrix. */ - ScalarType *ILU_matrix; /*!< \brief Entries of the ILU sparse matrix. */ - unsigned long nnz; /*!< \brief Number of possible nonzero entries in the matrix. */ - unsigned long *row_ptr; /*!< \brief Pointers to the first element in each row. */ - unsigned long *col_ind; /*!< \brief Column index for each of the elements in val(). */ - unsigned long nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */ - unsigned long *row_ptr_ilu; /*!< \brief Pointers to the first element in each row (ILU). */ - unsigned long *col_ind_ilu; /*!< \brief Column index for each of the elements in val() (ILU). */ - unsigned short ilu_fill_in; /*!< \brief Fill in level for the ILU preconditioner. */ - - ScalarType *block; /*!< \brief Internal array to store a subblock of the matrix. */ - ScalarType *block_inverse; /*!< \brief Internal array to store a subblock of the matrix. */ - ScalarType *block_weight; /*!< \brief Internal array to store a subblock of the matrix. */ - ScalarType *prod_row_vector; /*!< \brief Internal array to store the product of a matrix-by-blocks "row" with a vector. */ - ScalarType *aux_vector; /*!< \brief Auxiliary array to store intermediate results. */ - ScalarType *sum_vector; /*!< \brief Auxiliary array to store intermediate results. */ - ScalarType *invM; /*!< \brief Inverse of (Jacobi) preconditioner, or diagonal of ILU. */ - - unsigned long nLinelet; /*!< \brief Number of Linelets in the system. */ - vector LineletBool; /*!< \brief Identify if a point belong to a Linelet. */ - vector > LineletPoint; /*!< \brief Linelet structure. */ - vector LineletUpper; /*!< \brief Pointers to the upper blocks of the tri-diag system. */ - vector LineletInvDiag; /*!< \brief Inverse of the diagonal blocks of the tri-diag system. */ - vector LineletVector; /*!< \brief Solution and RHS of the tri-diag system. */ + + enum : size_t { MAXNVAR = 8 }; /*!< \brief Maximum number of variables the matrix can handle. The static + size is needed for fast, per-thread, static memory allocation. */ + + enum { OMP_MAX_SIZE_L = 8192 }; /*!< \brief Max. chunk size used in light parallel for loops. */ + enum { OMP_MAX_SIZE_H = 512 }; /*!< \brief Max. chunk size used in heavy parallel for loops. */ + unsigned long omp_light_size; /*!< \brief Actual chunk size used in light loops (e.g. over non zeros). */ + unsigned long omp_heavy_size; /*!< \brief Actual chunk size used in heavy loops (e.g. over rows). */ + unsigned long omp_num_parts; /*!< \brief Number of threads used in thread-parallel LU_SGS and ILU. */ + unsigned long *omp_partitions; /*!< \brief Point indexes of LU_SGS and ILU thread-parallel sub partitioning. */ + + unsigned long nPoint; /*!< \brief Number of points in the grid. */ + unsigned long nPointDomain; /*!< \brief Number of points in the grid (excluding halos). */ + unsigned long nVar; /*!< \brief Number of variables. */ + unsigned long nEqn; /*!< \brief Number of equations. */ + + ScalarType *matrix; /*!< \brief Entries of the sparse matrix. */ + unsigned long nnz; /*!< \brief Number of possible nonzero entries in the matrix. */ + const unsigned long *row_ptr; /*!< \brief Pointers to the first element in each row. */ + const unsigned long *dia_ptr; /*!< \brief Pointers to the diagonal element in each row. */ + const unsigned long *col_ind; /*!< \brief Column index for each of the elements in val(). */ + + ScalarType *ILU_matrix; /*!< \brief Entries of the ILU sparse matrix. */ + unsigned long nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */ + const unsigned long *row_ptr_ilu; /*!< \brief Pointers to the first element in each row (ILU). */ + const unsigned long *dia_ptr_ilu; /*!< \brief Pointers to the diagonal element in each row (ILU). */ + const unsigned long *col_ind_ilu; /*!< \brief Column index for each of the elements in val() (ILU). */ + unsigned short ilu_fill_in; /*!< \brief Fill in level for the ILU preconditioner. */ + + ScalarType *invM; /*!< \brief Inverse of (Jacobi) preconditioner, or diagonal of ILU. */ + + unsigned long nLinelet; /*!< \brief Number of Linelets in the system. */ + vector LineletBool; /*!< \brief Identify if a point belong to a Linelet. */ + vector > LineletPoint; /*!< \brief Linelet structure. */ + + /*--- Temporary (hence mutable) working memory used in the Linelet preconditioner, outer vector is for threads ---*/ + mutable vector > LineletUpper; /*!< \brief Pointers to the upper blocks of the tri-diag system (working memory). */ + mutable vector > LineletInvDiag; /*!< \brief Inverse of the diagonal blocks of the tri-diag system (working memory). */ + mutable vector > LineletVector; /*!< \brief Solution and RHS of the tri-diag system (working memory). */ #ifdef USE_MKL void * MatrixMatrixProductJitter; /*!< \brief Jitter handle for MKL JIT based GEMM. */ @@ -115,13 +125,26 @@ class CSysMatrix { dgemm_jit_kernel_t MatrixVectorProductKernelAlphaMinusOne; /*!< \brief MKL JIT based GEMV kernel with ALPHA=-1.0 and BETA=1.0. */ void * MatrixVectorProductTranspJitterBetaOne; /*!< \brief Jitter handle for MKL JIT based GEMV (transposed) with BETA=1.0. */ dgemm_jit_kernel_t MatrixVectorProductTranspKernelBetaOne; /*!< \brief MKL JIT based GEMV (transposed) kernel with BETA=1.0. */ - lapack_int * mkl_ipiv; #endif #ifdef HAVE_PASTIX - CPastixWrapper pastix_wrapper; + mutable CPastixWrapper pastix_wrapper; #endif + /*! + * \brief Auxilary object to wrap the edge map pointer used in fast block updates, i.e. without linear searches. + */ + struct { + const unsigned long *ptr = nullptr; + + inline unsigned long operator() (unsigned long edge, unsigned long node) const { + return ptr[2*edge+node]; + } + inline unsigned long ij(unsigned long edge) const { return ptr[2*edge]; } + inline unsigned long ji(unsigned long edge) const { return ptr[2*edge+1]; } + + } edge_ptr; + /*! * \brief Handle type conversion for when we Set, Add, etc. blocks, preserving derivative information (if supported by types). * \note See specializations for discrete adjoint right outside this class's declaration. @@ -141,36 +164,13 @@ class CSysMatrix { #endif } - /*! - * \brief Assigns values to the sparse-matrix structure (used in Initialize). - * \param[in] val_nPoint - Number of points in the nPoint x nPoint block structure - * \param[in] val_nVar - Number of nVar x nVar variables in each subblock of the matrix-by-block structure. - * \param[in] val_nEq - Number of nEqn x nVar variables in each subblock of the matrix-by-block structure. - * \param[in] val_row_ptr - Pointers to the first element in each row. - * \param[in] val_col_ind - Column index for each of the elements in val(). - * \param[in] val_nnz - Number of possible nonzero entries in the matrix. - * \param[in] config - Definition of the particular problem. - */ - void SetIndexes(unsigned long val_nPoint, unsigned long val_nPointDomain, unsigned short val_nVar, unsigned short val_nEq, unsigned long* val_row_ptr, unsigned long* val_col_ind, unsigned long val_nnz, CConfig *config); - - /*! - * \brief Assigns values to the sparse-matrix structure (used in Initialize). - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] iPoint - Base point to compute neighbours. - * \param[in] deep_level - Deep level for the recursive algorithm. - * \param[in] fill_level - ILU fill in level. - * \param[in] EdgeConnect - There is (or not) an edge structure). - * \param[in] vneighs - Storage the neighbours points to iPoint. - */ - void SetNeighbours(CGeometry *geometry, unsigned long iPoint, unsigned short deep_level, unsigned short fill_level, bool EdgeConnect, vector & vneighs); - /*! * \brief Calculates the matrix-vector product: product = matrix*vector * \param[in] matrix * \param[in] vector * \param[out] product */ - inline void MatrixVectorProduct(const ScalarType *matrix, const ScalarType *vector, ScalarType *product); + inline void MatrixVectorProduct(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const; /*! * \brief Calculates the matrix-vector product: product += matrix*vector @@ -178,7 +178,7 @@ class CSysMatrix { * \param[in] vector * \param[in,out] product */ - inline void MatrixVectorProductAdd(const ScalarType *matrix, const ScalarType *vector, ScalarType *product); + inline void MatrixVectorProductAdd(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const; /*! * \brief Calculates the matrix-vector product: product -= matrix*vector @@ -186,7 +186,7 @@ class CSysMatrix { * \param[in] vector * \param[in,out] product */ - inline void MatrixVectorProductSub(const ScalarType *matrix, const ScalarType *vector, ScalarType *product); + inline void MatrixVectorProductSub(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const; /*! * \brief Calculates the matrix-vector product: product += matrix^T * vector @@ -194,20 +194,17 @@ class CSysMatrix { * \param[in] vector * \param[in,out] product */ - inline void MatrixVectorProductTransp(const ScalarType *matrix, const ScalarType *vector, ScalarType *product); + inline void MatrixVectorProductTransp(const ScalarType *matrix, const ScalarType *vector, ScalarType *product) const; /*! * \brief Calculates the matrix-matrix product - * \param[in] matrix_a - * \param[in] matrix_b - * \param[out] product */ - inline void MatrixMatrixProduct(const ScalarType *matrix_a, const ScalarType *matrix_b, ScalarType *product); + inline void MatrixMatrixProduct(const ScalarType *matrix_a, const ScalarType *matrix_b, ScalarType *product) const; /*! * \brief Subtract b from a and store the result in c. */ - inline void VectorSubtraction(const ScalarType *a, const ScalarType *b, ScalarType *c) { + inline void VectorSubtraction(const ScalarType *a, const ScalarType *b, ScalarType *c) const { for(unsigned long iVar = 0; iVar < nVar; iVar++) c[iVar] = a[iVar] - b[iVar]; } @@ -215,47 +212,62 @@ class CSysMatrix { /*! * \brief Subtract b from a and store the result in c. */ - inline void MatrixSubtraction(const ScalarType *a, const ScalarType *b, ScalarType *c) { + inline void MatrixSubtraction(const ScalarType *a, const ScalarType *b, ScalarType *c) const { for(unsigned long iVar = 0; iVar < nVar*nEqn; iVar++) c[iVar] = a[iVar] - b[iVar]; } + /*! + * \brief Copy matrix src into dst, transpose is required. + */ + inline void MatrixCopy(const ScalarType *src, ScalarType *dst, bool transposed = false) const { + if (!transposed) { + for(auto iVar = 0ul; iVar < nVar*nVar; ++iVar) + dst[iVar] = src[iVar]; + } + else { + for (auto iVar = 0ul; iVar < nVar; ++iVar) + for (auto jVar = 0ul; jVar < nVar; ++jVar) + dst[iVar*nVar+jVar] = src[jVar*nVar+iVar]; + } + } + /*! * \brief Solve a small (nVar x nVar) linear system using Gaussian elimination. * \param[in,out] matrix - On entry the system matrix, on exit the factorized matrix. * \param[in,out] vec - On entry the rhs, on exit the solution. */ - inline void Gauss_Elimination(ScalarType* matrix, ScalarType* vec); + void Gauss_Elimination(ScalarType* matrix, ScalarType* vec) const; /*! * \brief Invert a small dense matrix. - * \param[in] matrix - the matrix. + * \param[in,out] matrix - On entry the system matrix, on exit the factorized matrix. * \param[out] inverse - the matrix inverse. */ - inline void MatrixInverse(const ScalarType *matrix, ScalarType *inverse); + void MatrixInverse(ScalarType *matrix, ScalarType *inverse) const; /*! - * \brief Performs the Gauss Elimination algorithm to solve the linear subsystem of the (i, i) subblock and rhs. - * \param[in] block_i - Index of the (i, i) subblock in the matrix-by-blocks structure. + * \brief Performs the Gauss Elimination algorithm to solve the linear subsystem of the (i,i) subblock and rhs. + * \param[in] block_i - Index of the (i,i) diagonal block. * \param[in] rhs - Right-hand-side of the linear system. * \param[in] transposed - If true the transposed of the block is used (default = false). * \return Solution of the linear system (overwritten on rhs). */ - inline void Gauss_Elimination(unsigned long block_i, ScalarType* rhs, bool transposed = false); + inline void Gauss_Elimination(unsigned long block_i, ScalarType* rhs, bool transposed = false) const; /*! * \brief Inverse diagonal block. * \param[in] block_i - Indexes of the block in the matrix-by-blocks structure. * \param[out] invBlock - Inverse block. */ - inline void InverseDiagonalBlock(unsigned long block_i, ScalarType *invBlock, bool transpose = false); + inline void InverseDiagonalBlock(unsigned long block_i, ScalarType *invBlock, bool transposed = false) const; /*! * \brief Inverse diagonal block. * \param[in] block_i - Indexes of the block in the matrix-by-blocks structure. * \param[out] invBlock - Inverse block. */ - inline void InverseDiagonalBlock_ILUMatrix(unsigned long block_i, ScalarType *invBlock); + inline void InverseDiagonalBlock_ILUMatrix(unsigned long block_i, ScalarType *invBlock) const; /*! * \brief Copies the block (i, j) of the matrix-by-blocks structure in the internal variable *block. @@ -280,29 +292,25 @@ class CSysMatrix { */ inline void SetBlockTransposed_ILUMatrix(unsigned long block_i, unsigned long block_j, ScalarType *val_block); - /*! - * \brief Subtracts the specified block to the sparse matrix. - * \param[in] block_i - Indexes of the block in the matrix-by-blocks structure. - * \param[in] block_j - Indexes of the block in the matrix-by-blocks structure. - * \param[in] **val_block - Block to subtract to A(i, j). - */ - inline void SubtractBlock_ILUMatrix(unsigned long block_i, unsigned long block_j, ScalarType *val_block); - /*! * \brief Performs the product of i-th row of the upper part of a sparse matrix by a vector. * \param[in] vec - Vector to be multiplied by the upper part of the sparse matrix A. * \param[in] row_i - Row of the matrix to be multiplied by vector vec. - * \return prod Result of the product U(A)*vec (stored at *prod_row_vector). + * \param[in] col_ub - Exclusive upper bound for column indices considered in multiplication. + * \param[out] prod - Result of the product U(A)*vec. */ - void UpperProduct(const CSysVector & vec, unsigned long row_i); + inline void UpperProduct(const CSysVector & vec, unsigned long row_i, + unsigned long col_ub, ScalarType *prod) const; /*! * \brief Performs the product of i-th row of the lower part of a sparse matrix by a vector. * \param[in] vec - Vector to be multiplied by the lower part of the sparse matrix A. * \param[in] row_i - Row of the matrix to be multiplied by vector vec. - * \return prod Result of the product L(A)*vec (stored at *prod_row_vector). + * \param[in] col_lb - Inclusive lower bound for column indices considered in multiplication. + * \param[out] prod - Result of the product L(A)*vec. */ - void LowerProduct(const CSysVector & vec, unsigned long row_i); + inline void LowerProduct(const CSysVector & vec, unsigned long row_i, + unsigned long col_lb, ScalarType *prod) const; /*! * \brief Performs the product of i-th row of the diagonal part of a sparse matrix by a vector. @@ -310,7 +318,7 @@ class CSysMatrix { * \param[in] row_i - Row of the matrix to be multiplied by vector vec. * \return prod Result of the product D(A)*vec (stored at *prod_row_vector). */ - void DiagonalProduct(const CSysVector & vec, unsigned long row_i); + inline void DiagonalProduct(const CSysVector & vec, unsigned long row_i, ScalarType *prod) const; /*! * \brief Performs the product of i-th row of a sparse matrix by a vector. @@ -318,7 +326,7 @@ class CSysMatrix { * \param[in] row_i - Row of the matrix to be multiplied by vector vec. * \return Result of the product (stored at *prod_row_vector). */ - void RowProduct(const CSysVector & vec, unsigned long row_i); + void RowProduct(const CSysVector & vec, unsigned long row_i, ScalarType *prod) const; public: @@ -334,38 +342,44 @@ class CSysMatrix { /*! * \brief Initializes sparse matrix system. - * \param[in] nVar - Number of variables. - * \param[in] nEqn - Number of equations. + * \param[in] npoint - Number of points including halos. + * \param[in] npointdomain - Number of points excluding halos. + * \param[in] nvar - Number of variables. + * \param[in] neqn - Number of equations. * \param[in] geometry - Geometrical definition of the problem. * \param[in] config - Definition of the particular problem. */ - void Initialize(unsigned long nPoint, unsigned long nPointDomain, unsigned short nVar, unsigned short nEqn, + void Initialize(unsigned long npoint, unsigned long npointdomain, + unsigned short nvar, unsigned short neqn, bool EdgeConnect, CGeometry *geometry, CConfig *config); /*! * \brief Sets to zero all the entries of the sparse matrix. */ - inline void SetValZero(void) { - if(matrix != NULL) - for (unsigned long index = 0; index < nnz*nVar*nEqn; index++) - matrix[index] = 0.0; - } + void SetValZero(void); + + /*! + * \brief Sets to zero all the block diagonal entries of the sparse matrix. + */ + void SetValDiagonalZero(void); /*! - * \brief Routine to load a vector quantity into the data structures for MPI point-to-point communication and to launch non-blocking sends and recvs. + * \brief Routine to load a vector quantity into the data structures for MPI point-to-point + * communication and to launch non-blocking sends and recvs. * \param[in] x - CSysVector holding the array of data. * \param[in] geometry - Geometrical definition of the problem. * \param[in] config - Definition of the particular problem. * \param[in] commType - Enumerated type for the quantity to be communicated. */ template - void InitiateComms(CSysVector & x, + void InitiateComms(const CSysVector & x, CGeometry *geometry, CConfig *config, - unsigned short commType); + unsigned short commType) const; /*! - * \brief Routine to complete the set of non-blocking communications launched by InitiateComms() and unpacking of the data in the vector. + * \brief Routine to complete the set of non-blocking communications launched by + * InitiateComms() and unpacking of the data in the vector. * \param[in] x - CSysVector holding the array of data. * \param[in] geometry - Geometrical definition of the problem. * \param[in] config - Definition of the particular problem. @@ -375,7 +389,7 @@ class CSysMatrix { void CompleteComms(CSysVector & x, CGeometry *geometry, CConfig *config, - unsigned short commType); + unsigned short commType) const; /*! * \brief Get a pointer to the start of block "ij" @@ -388,8 +402,18 @@ class CSysMatrix { for (unsigned long index = row_ptr[block_i]; index < row_ptr[block_i+1]; index++) if (col_ind[index] == block_j) return &(matrix[index*nVar*nEqn]); + return nullptr; + } + + /*! + * \brief Get a pointer to the start of block "ij", const version + */ + inline const ScalarType *GetBlock(unsigned long block_i, unsigned long block_j) const { - return NULL; + for (unsigned long index = row_ptr[block_i]; index < row_ptr[block_i+1]; index++) + if (col_ind[index] == block_j) + return &(matrix[index*nVar*nEqn]); + return nullptr; } /*! @@ -401,12 +425,11 @@ class CSysMatrix { * \return Value of the block entry. */ inline ScalarType GetBlock(unsigned long block_i, unsigned long block_j, - unsigned short iVar, unsigned short jVar) { + unsigned short iVar, unsigned short jVar) const { for (unsigned long index = row_ptr[block_i]; index < row_ptr[block_i+1]; index++) if (col_ind[index] == block_j) return matrix[index*nVar*nEqn+iVar*nEqn+jVar]; - return 0.0; } @@ -414,7 +437,7 @@ class CSysMatrix { * \brief Set the value of a block in the sparse matrix. * \param[in] block_i - Row index. * \param[in] block_j - Column index. - * \param[in] **val_block - Block to set to A(i, j). + * \param[in] val_block - Block to set to A(i, j). */ template inline void SetBlock(unsigned long block_i, unsigned long block_j, OtherType **val_block) { @@ -435,7 +458,7 @@ class CSysMatrix { * \brief Set the value of a block in the sparse matrix. * \param[in] block_i - Row index. * \param[in] block_j - Column index. - * \param[in] *val_block - Block to set to A(i, j). + * \param[in] val_block - Block to set to A(i, j). */ template inline void SetBlock(unsigned long block_i, unsigned long block_j, OtherType *val_block) { @@ -455,7 +478,7 @@ class CSysMatrix { * \brief Adds the specified block to the sparse matrix. * \param[in] block_i - Row index. * \param[in] block_j - Column index. - * \param[in] **val_block - Block to add to A(i, j). + * \param[in] val_block - Block to add to A(i, j). */ template inline void AddBlock(unsigned long block_i, unsigned long block_j, OtherType **val_block) { @@ -476,7 +499,7 @@ class CSysMatrix { * \brief Subtracts the specified block to the sparse matrix. * \param[in] block_i - Row index. * \param[in] block_j - Column index. - * \param[in] **val_block - Block to subtract to A(i, j). + * \param[in] val_block - Block to subtract to A(i, j). */ template inline void SubtractBlock(unsigned long block_i, unsigned long block_j, OtherType **val_block) { @@ -493,6 +516,38 @@ class CSysMatrix { } } + /*! + * \brief Update 4 blocks ii, ij, ji, jj (add to i* sub from j*). + * \note The template parameter Sign, can be used create a "subtractive" + * update i.e. subtract from row i and add to row j instead. + * \param[in] edge - Index of edge that connects iPoint and jPoint. + * \param[in] iPoint - Row to which we add the blocks. + * \param[in] jPoint - Row from which we subtract the blocks. + * \param[in] block_i - Adds to ii, subs from ji. + * \param[in] block_j - Adds to ij, subs from jj. + */ + template + inline void UpdateBlocks(unsigned long iEdge, unsigned long iPoint, unsigned long jPoint, + OtherType **block_i, OtherType **block_j) { + + ScalarType *bii = &matrix[dia_ptr[iPoint]*nVar*nEqn]; + ScalarType *bjj = &matrix[dia_ptr[jPoint]*nVar*nEqn]; + ScalarType *bij = &matrix[edge_ptr(iEdge,0)*nVar*nEqn]; + ScalarType *bji = &matrix[edge_ptr(iEdge,1)*nVar*nEqn]; + + unsigned long iVar, jVar, offset = 0; + + for (iVar = 0; iVar < nVar; iVar++) { + for (jVar = 0; jVar < nEqn; jVar++) { + bii[offset] += PassiveAssign(block_i[iVar][jVar]) * Sign; + bij[offset] += PassiveAssign(block_j[iVar][jVar]) * Sign; + bji[offset] -= PassiveAssign(block_i[iVar][jVar]) * Sign; + bjj[offset] -= PassiveAssign(block_j[iVar][jVar]) * Sign; + ++offset; + } + } + } + /*! * \brief Adds the specified value to the diagonal of the (i, i) subblock * of the matrix-by-blocks structure. @@ -554,6 +609,15 @@ class CSysMatrix { template void EnforceSolutionAtNode(const unsigned long node_i, const OtherType *x_i, CSysVector & b); + /*! + * \brief Add a scaled sparse matrix to "this" (axpy-type operation, A = A+alpha*B). + * \note Matrices must have the same sparse pattern. + * \param[in] alpha - The scaling constant. + * \param[in] B - Matrix being. + */ + template + void MatrixMatrixAddition(OtherType alpha, const CSysMatrix& B); + /*! * \brief Performs the product of a sparse matrix by a CSysVector. * \param[in] vec - CSysVector to be multiplied by the sparse matrix A. @@ -561,7 +625,8 @@ class CSysMatrix { * \param[in] config - Definition of the particular problem. * \param[out] prod - Result of the product. */ - void MatrixVectorProduct(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config); + void MatrixVectorProduct(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const; /*! * \brief Performs the product of a sparse matrix by a CSysVector. @@ -570,7 +635,8 @@ class CSysMatrix { * \param[in] config - Definition of the particular problem. * \param[out] prod - Result of the product. */ - void MatrixVectorProductTransposed(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config); + void MatrixVectorProductTransposed(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const; /*! * \brief Build the Jacobi preconditioner. @@ -584,7 +650,8 @@ class CSysMatrix { * \param[in] geometry - Geometrical definition of the problem. * \param[in] config - Definition of the particular problem. */ - void ComputeJacobiPreconditioner(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config); + void ComputeJacobiPreconditioner(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const; /*! * \brief Build the ILU preconditioner. @@ -599,36 +666,41 @@ class CSysMatrix { * \param[in] geometry - Geometrical definition of the problem. * \param[in] config - Definition of the particular problem. */ - void ComputeILUPreconditioner(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config); + void ComputeILUPreconditioner(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const; /*! * \brief Multiply CSysVector by the preconditioner * \param[in] vec - CSysVector to be multiplied by the preconditioner. * \param[out] prod - Result of the product A*vec. */ - void ComputeLU_SGSPreconditioner(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config); + void ComputeLU_SGSPreconditioner(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const; /*! * \brief Build the Linelet preconditioner. * \param[in] geometry - Geometrical definition of the problem. * \param[in] config - Definition of the particular problem. + * \return Average number of points per linelet. */ - unsigned short BuildLineletPreconditioner(CGeometry *geometry, CConfig *config); + unsigned long BuildLineletPreconditioner(CGeometry *geometry, CConfig *config); /*! * \brief Multiply CSysVector by the preconditioner * \param[in] vec - CSysVector to be multiplied by the preconditioner. * \param[out] prod - Result of the product A*vec. */ - void ComputeLineletPreconditioner(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config); + void ComputeLineletPreconditioner(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const; /*! - * \brief Compute the residual Ax-b - * \param[in] sol - CSysVector to be multiplied by the preconditioner. - * \param[in] f - Result of the product A*vec. - * \param[out] res - Result of the product A*vec. + * \brief Compute the linear residual. + * \param[in] sol - Solution (x). + * \param[in] f - Right hand side (b). + * \param[out] res - Residual (Ax-b). */ - void ComputeResidual(const CSysVector & sol, const CSysVector & f, CSysVector & res); + void ComputeResidual(const CSysVector & sol, const CSysVector & f, + CSysVector & res) const; /*! * \brief Factorize matrix using PaStiX. @@ -646,7 +718,8 @@ class CSysMatrix { * \param[in] geometry - Geometrical definition of the problem. * \param[in] config - Definition of the particular problem. */ - void ComputePastixPreconditioner(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config); + void ComputePastixPreconditioner(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const; }; diff --git a/Common/include/linear_algebra/CSysMatrix.inl b/Common/include/linear_algebra/CSysMatrix.inl index 0eb1e8374e7d..de11c89e55f0 100644 --- a/Common/include/linear_algebra/CSysMatrix.inl +++ b/Common/include/linear_algebra/CSysMatrix.inl @@ -1,29 +1,18 @@ /*! - * \file matrix_structure.inl - * \brief In-Line subroutines of the matrix_structure.hpp file. - * \note These are the "private" inlines, they are not needed outside of - * the .cpp file and so they are hidden to avoid triggering recompilation - * of other units when changes are made here. - * + * \file CSysMatrix.inl + * \brief Inline subroutines of the CSysMatrix.hpp file. + * \note These are the "private" inlines, they are not needed outside + * of the .cpp file and so they are hidden to avoid triggering + * recompilation of other units when changes are made here. * \author F. Palacios, A. Bueno, T. Economon * \version 7.0.0 "Blackbird" * - * The current SU2 release has been coordinated by the - * SU2 International Developers Society - * with selected contributions from the open-source community. + * SU2 Project Website: https://su2code.github.io * - * The main research teams contributing to the current release are: - * - Prof. Juan J. Alonso's group at Stanford University. - * - Prof. Piero Colonna's group at Delft University of Technology. - * - Prof. Nicolas R. Gauger's group at Kaiserslautern University of Technology. - * - Prof. Alberto Guardone's group at Polytechnic University of Milan. - * - Prof. Rafael Palacios' group at Imperial College London. - * - Prof. Vincent Terrapon's group at the University of Liege. - * - Prof. Edwin van der Weide's group at the University of Twente. - * - Lab. of New Concepts in Aeronautics at Tech. Institute of Aeronautics. + * The SU2 Project is maintained by the SU2 Foundation + * (http://su2foundation.org) * - * Copyright 2012-2019, Francisco D. Palacios, Thomas D. Economon, - * Tim Albring, and the SU2 contributors. + * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) * * SU2 is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -43,18 +32,25 @@ #include "CSysMatrix.hpp" +#if defined(_MSC_VER) + #define FORCEINLINE __forceinline +#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) + #define FORCEINLINE inline __attribute__((always_inline)) +#else + #define FORCEINLINE inline +#endif + template -inline ScalarType *CSysMatrix::GetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j) { +FORCEINLINE ScalarType *CSysMatrix::GetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j) { for (unsigned long index = row_ptr_ilu[block_i]; index < row_ptr_ilu[block_i+1]; index++) if (col_ind_ilu[index] == block_j) - return &(ILU_matrix[index*nVar*nEqn]); - - return NULL; + return &ILU_matrix[index*nVar*nEqn]; + return nullptr; } template -inline void CSysMatrix::SetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j, ScalarType *val_block) { +FORCEINLINE void CSysMatrix::SetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j, ScalarType *val_block) { unsigned long iVar, index; @@ -65,11 +61,10 @@ inline void CSysMatrix::SetBlock_ILUMatrix(unsigned long block_i, un break; } } - } template -inline void CSysMatrix::SetBlockTransposed_ILUMatrix(unsigned long block_i, unsigned long block_j, ScalarType *val_block) { +FORCEINLINE void CSysMatrix::SetBlockTransposed_ILUMatrix(unsigned long block_i, unsigned long block_j, ScalarType *val_block) { unsigned long iVar, jVar, index; @@ -81,23 +76,10 @@ inline void CSysMatrix::SetBlockTransposed_ILUMatrix(unsigned long b break; } } - -} - -template -inline void CSysMatrix::SubtractBlock_ILUMatrix(unsigned long block_i, unsigned long block_j, ScalarType *val_block) { - - for (unsigned long index = row_ptr_ilu[block_i]; index < row_ptr_ilu[block_i+1]; index++) { - if (col_ind_ilu[index] == block_j) { - MatrixSubtraction(&ILU_matrix[index*nVar*nEqn], val_block, &ILU_matrix[index*nVar*nEqn]); - break; - } - } - } template -inline void gemv_impl(const unsigned long n, const T *a, const T *b, T *c) { +FORCEINLINE void gemv_impl(const unsigned long n, const T *a, const T *b, T *c) { /*--- This is a templated version of GEMV with the constants as boolean template parameters so that they can be optimized away at compilation. @@ -114,7 +96,7 @@ inline void gemv_impl(const unsigned long n, const T *a, const T *b, T *c) { } template -inline void gemm_impl(const unsigned long n, const T *a, const T *b, T *c) { +FORCEINLINE void gemm_impl(const unsigned long n, const T *a, const T *b, T *c) { /*--- Same deal as for GEMV but here only the type is templated. ---*/ unsigned long i, j, k; for (i = 0; i < n; i++) { @@ -127,7 +109,7 @@ inline void gemm_impl(const unsigned long n, const T *a, const T *b, T *c) { } #define __MATVECPROD_SIGNATURE__(TYPE,NAME) \ -inline void CSysMatrix::NAME(const TYPE *matrix, const TYPE *vector, TYPE *product) +FORCEINLINE void CSysMatrix::NAME(const TYPE *matrix, const TYPE *vector, TYPE *product) const #define MATVECPROD_SIGNATURE(NAME) template __MATVECPROD_SIGNATURE__(ScalarType,NAME) @@ -153,7 +135,7 @@ MATVECPROD_SIGNATURE( MatrixVectorProductTransp ) { } template -inline void CSysMatrix::MatrixMatrixProduct(const ScalarType *matrix_a, const ScalarType *matrix_b, ScalarType *product) { +FORCEINLINE void CSysMatrix::MatrixMatrixProduct(const ScalarType *matrix_a, const ScalarType *matrix_b, ScalarType *product) const { gemm_impl(nVar, matrix_a, matrix_b, product); } #else @@ -179,7 +161,7 @@ MATVECPROD_SIGNATURE( MatrixVectorProductTransp ) { } template -inline void CSysMatrix::MatrixMatrixProduct(const ScalarType *matrix_a, const ScalarType *matrix_b, ScalarType *product) { +FORCEINLINE void CSysMatrix::MatrixMatrixProduct(const ScalarType *matrix_a, const ScalarType *matrix_b, ScalarType *product) const { MatrixMatrixProductKernel(MatrixMatrixProductJitter, const_cast(matrix_a), const_cast(matrix_b), product ); } @@ -203,7 +185,7 @@ MATVECPROD_SPECIALIZATION( MatrixVectorProductTransp ) { } template<> -inline void CSysMatrix::MatrixMatrixProduct(const su2double *matrix_a, const su2double *matrix_b, su2double *product) { +FORCEINLINE void CSysMatrix::MatrixMatrixProduct(const su2double *matrix_a, const su2double *matrix_b, su2double *product) const { gemm_impl(nVar, matrix_a, matrix_b, product); } #undef MATVECPROD_SPECIALIZATION @@ -214,146 +196,66 @@ inline void CSysMatrix::MatrixMatrixProduct(const su2double *matrix_a #undef __MATVECPROD_SIGNATURE__ template -inline void CSysMatrix::Gauss_Elimination(ScalarType* matrix, ScalarType* vec) { +FORCEINLINE void CSysMatrix::Gauss_Elimination(unsigned long block_i, ScalarType* rhs, bool transposed) const { - /*--- - This is a relatively large method to inline but maybe better - code will be generated for the special case nVar=1 this way. - ---*/ - - if (nVar==1) {vec[0] /= matrix[0]; return;} - -#ifdef USE_MKL_LAPACK - // With MKL_DIRECT_CALL enabled, this is significantly faster than native code on Intel Architectures. - LAPACKE_dgetrf( LAPACK_ROW_MAJOR, nVar, nVar, matrix, nVar, mkl_ipiv ); - LAPACKE_dgetrs( LAPACK_ROW_MAJOR, 'N', nVar, 1, matrix, nVar, mkl_ipiv, vec, 1 ); -#else - int iVar, jVar, kVar, nvar = int(nVar); - ScalarType weight; - - /*--- Transform system in Upper Matrix ---*/ - for (iVar = 1; iVar < nvar; iVar++) { - for (jVar = 0; jVar < iVar; jVar++) { - weight = matrix[iVar*nvar+jVar] / matrix[jVar*nvar+jVar]; - for (kVar = jVar; kVar < nvar; kVar++) - matrix[iVar*nvar+kVar] -= weight*matrix[jVar*nvar+kVar]; - vec[iVar] -= weight*vec[jVar]; - } - } + /*--- Copy block, as the algorithm modifies the matrix ---*/ + ScalarType block[MAXNVAR*MAXNVAR]; + MatrixCopy(&matrix[dia_ptr[block_i]*nVar*nVar], block, transposed); - /*--- Backwards substitution ---*/ - for (iVar = nvar-1; iVar >= 0; iVar--) { - for (jVar = iVar+1; jVar < nvar; jVar++) - vec[iVar] -= matrix[iVar*nvar+jVar]*vec[jVar]; - vec[iVar] /= matrix[iVar*nvar+iVar]; - } -#endif + Gauss_Elimination(block, rhs); } template -inline void CSysMatrix::MatrixInverse(const ScalarType *matrix, ScalarType *inverse) { +FORCEINLINE void CSysMatrix::InverseDiagonalBlock(unsigned long block_i, ScalarType *invBlock, bool transposed) const { - /*--- - This is a generalization of Gaussian elimination for multiple rhs' (the basis vectors). - We could call "Gauss_Elimination" multiple times or fully generalize it for multiple rhs, - the performance of both routines would suffer in both cases without the use of exotic templating. - And so it feels reasonable to have some duplication here. - ---*/ - - if (nVar==1) {inverse[0] = 1.0/matrix[0]; return;} - - int iVar, jVar, nvar = int(nVar); + /*--- Copy block, as the algorithm modifies the matrix ---*/ + ScalarType block[MAXNVAR*MAXNVAR]; + MatrixCopy(&matrix[dia_ptr[block_i]*nVar*nVar], block, transposed); - /*--- Initialize the inverse and make a copy of the matrix ---*/ - for (iVar = 0; iVar < nvar; iVar++) { - for (jVar = 0; jVar < nvar; jVar++) { - block[iVar*nvar+jVar] = matrix[iVar*nvar+jVar]; - inverse[iVar*nvar+jVar] = ScalarType(iVar==jVar); // identity - } - } + MatrixInverse(block, invBlock); +} - /*--- Inversion ---*/ -#ifdef USE_MKL_LAPACK - // With MKL_DIRECT_CALL enabled, this is significantly faster than native code on Intel Architectures. - LAPACKE_dgetrf( LAPACK_ROW_MAJOR, nVar, nVar, block, nVar, mkl_ipiv ); - LAPACKE_dgetrs( LAPACK_ROW_MAJOR, 'N', nVar, nVar, block, nVar, mkl_ipiv, inverse, nVar ); -#else - int kVar; - ScalarType weight; +template +FORCEINLINE void CSysMatrix::InverseDiagonalBlock_ILUMatrix(unsigned long block_i, ScalarType *invBlock) const { - /*--- Transform system in Upper Matrix ---*/ - for (iVar = 1; iVar < nvar; iVar++) { - for (jVar = 0; jVar < iVar; jVar++) - { - weight = block[iVar*nvar+jVar] / block[jVar*nvar+jVar]; + /*--- Copy block, as the algorithm modifies the matrix ---*/ + ScalarType block[MAXNVAR*MAXNVAR]; + MatrixCopy(&ILU_matrix[dia_ptr_ilu[block_i]*nVar*nVar], block, false); - for (kVar = jVar; kVar < nvar; kVar++) - block[iVar*nvar+kVar] -= weight*block[jVar*nvar+kVar]; + MatrixInverse(block, invBlock); +} - /*--- at this stage "inverse" is lower triangular so not all cols need updating ---*/ - for (kVar = 0; kVar <= jVar; kVar++) - inverse[iVar*nvar+kVar] -= weight*inverse[jVar*nvar+kVar]; - } - } +template +FORCEINLINE void CSysMatrix::UpperProduct(const CSysVector & vec, unsigned long row_i, + unsigned long col_ub, ScalarType *prod) const { + unsigned long iVar, index, col_j; - /*--- Backwards substitution ---*/ - for (iVar = nvar-1; iVar >= 0; iVar--) - { - for (jVar = iVar+1; jVar < nvar; jVar++) - for (kVar = 0; kVar < nvar; kVar++) - inverse[iVar*nvar+kVar] -= block[iVar*nvar+jVar] * inverse[jVar*nvar+kVar]; + for (iVar = 0; iVar < nVar; iVar++) prod[iVar] = 0.0; - for (kVar = 0; kVar < nvar; kVar++) - inverse[iVar*nvar+kVar] /= block[iVar*nvar+iVar]; + for (index = dia_ptr[row_i]+1; index < row_ptr[row_i+1]; index++) { + col_j = col_ind[index]; + if (col_j < col_ub) + MatrixVectorProductAdd(&matrix[index*nVar*nVar], &vec[col_j*nVar], prod); } -#endif } template -inline void CSysMatrix::Gauss_Elimination(unsigned long block_i, ScalarType* rhs, bool transposed) { +FORCEINLINE void CSysMatrix::LowerProduct(const CSysVector & vec, unsigned long row_i, + unsigned long col_lb, ScalarType *prod) const { + unsigned long iVar, index, col_j; - unsigned long iVar, jVar; - ScalarType *Block = GetBlock(block_i, block_i); - - /*--- Copy block, as the algorithm modifies the matrix ---*/ + for (iVar = 0; iVar < nVar; iVar++) prod[iVar] = 0.0; - if (!transposed) { - // If source and dest overlap higher level problems occur, so memcpy is safe. And it is faster. - memcpy( block, Block, nVar*nVar*sizeof(ScalarType) ); - -// for (iVar = 0; iVar < nVar*nVar; iVar++) -// block[iVar] = Block[iVar]; - - } else { - for (iVar = 0; iVar < nVar; iVar++) - for (jVar = 0; jVar < nVar; jVar++) - block[iVar*nVar+jVar] = Block[jVar*nVar+iVar]; + for (index = row_ptr[row_i]; index < dia_ptr[row_i]; index++) { + col_j = col_ind[index]; + if (col_j >= col_lb) + MatrixVectorProductAdd(&matrix[index*nVar*nVar], &vec[col_j*nVar], prod); } - - /*--- Solve system ---*/ - - Gauss_Elimination(block, rhs); - -} - -template -inline void CSysMatrix::InverseDiagonalBlock(unsigned long block_i, ScalarType *invBlock, bool transpose) { - - const ScalarType* mat = GetBlock(block_i, block_i); - MatrixInverse(mat, invBlock); - - if (transpose) // swap off-diag - for (unsigned long iVar = 0; iVar < nVar-1; ++iVar) - for (unsigned long jVar = iVar+1; jVar < nVar; ++jVar) { - ScalarType tmp = invBlock[iVar*nVar+jVar]; - invBlock[iVar*nVar+jVar] = invBlock[jVar*nVar+iVar]; - invBlock[jVar*nVar+iVar] = tmp; - } } template -inline void CSysMatrix::InverseDiagonalBlock_ILUMatrix(unsigned long block_i, ScalarType *invBlock) { +FORCEINLINE void CSysMatrix::DiagonalProduct(const CSysVector & vec, + unsigned long row_i, ScalarType *prod) const { - const ScalarType* mat = GetBlock_ILUMatrix(block_i, block_i); - MatrixInverse(mat, invBlock); + MatrixVectorProduct(&matrix[dia_ptr[row_i]*nVar*nVar], &vec[row_i*nVar], prod); } diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp index 661972f05421..a2e8f1b67845 100644 --- a/Common/include/linear_algebra/CSysSolve.hpp +++ b/Common/include/linear_algebra/CSysSolve.hpp @@ -7,7 +7,7 @@ * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -31,8 +31,6 @@ #include "../mpi_structure.hpp" -#include -#include #include #include #include @@ -40,13 +38,13 @@ #include #include -#include "../option_structure.hpp" -#include "../config_structure.hpp" -#include "../geometry/CGeometry.hpp" #include "CSysVector.hpp" -#include "CSysMatrix.hpp" -#include "CMatrixVectorProduct.hpp" -#include "CPreconditioner.hpp" + +class CConfig; +class CGeometry; +template class CSysMatrix; +template class CMatrixVectorProduct; +template class CPreconditioner; using namespace std; @@ -75,26 +73,26 @@ class CSysSolve { bool mesh_deform; /*!< \brief Operate in mesh deformation mode, changes the source of solver options. */ ScalarType Residual; /*!< \brief Residual at the end of a call to Solve. */ - bool cg_ready; /*!< \brief Indicate if memory used by CG is allocated. */ - bool bcg_ready; /*!< \brief Indicate if memory used by BCGSTAB is allocated. */ - bool gmres_ready; /*!< \brief Indicate if memory used by FGMRES is allocated. */ - bool smooth_ready; /*!< \brief Indicate if memory used by SMOOTHER is allocated. */ + mutable bool cg_ready; /*!< \brief Indicate if memory used by CG is allocated. */ + mutable bool bcg_ready; /*!< \brief Indicate if memory used by BCGSTAB is allocated. */ + mutable bool gmres_ready; /*!< \brief Indicate if memory used by FGMRES is allocated. */ + mutable bool smooth_ready; /*!< \brief Indicate if memory used by SMOOTHER is allocated. */ - VectorType r; /*!< \brief Residual in CG and BCGSTAB. */ - VectorType A_x; /*!< \brief Result of matrix-vector product in CG and BCGSTAB. */ - VectorType p; /*!< \brief Direction in CG and BCGSTAB. */ - VectorType z; /*!< \brief Preconditioned residual/direction in CG/BCGSTAB. */ + mutable VectorType r; /*!< \brief Residual in CG and BCGSTAB. */ + mutable VectorType A_x; /*!< \brief Result of matrix-vector product in CG and BCGSTAB. */ + mutable VectorType p; /*!< \brief Direction in CG and BCGSTAB. */ + mutable VectorType z; /*!< \brief Preconditioned residual/direction in CG/BCGSTAB. */ - VectorType r_0; /*!< \brief The "arbitrary" vector in BCGSTAB. */ - VectorType v; /*!< \brief BCGSTAB "v" vector (v = A * M^-1 * p). */ + mutable VectorType r_0; /*!< \brief The "arbitrary" vector in BCGSTAB. */ + mutable VectorType v; /*!< \brief BCGSTAB "v" vector (v = A * M^-1 * p). */ - vector W; /*!< \brief Large matrix used by FGMRES, w^i+1 = A * z^i. */ - vector Z; /*!< \brief Large matrix used by FGMRES, preconditioned W. */ + mutable vector W; /*!< \brief Large matrix used by FGMRES, w^i+1 = A * z^i. */ + mutable vector Z; /*!< \brief Large matrix used by FGMRES, preconditioned W. */ - VectorType LinSysRes_tmp; /*!< \brief Temporary used when it is necessary to interface between active and passive types. */ - VectorType LinSysSol_tmp; /*!< \brief Temporary used when it is necessary to interface between active and passive types. */ - VectorType* LinSysRes_ptr; /*!< \brief Pointer to appropriate LinSysRes (set to original or temporary in call to Solve). */ - VectorType* LinSysSol_ptr; /*!< \brief Pointer to appropriate LinSysSol (set to original or temporary in call to Solve). */ + VectorType LinSysSol_tmp; /*!< \brief Temporary used when it is necessary to interface between active and passive types. */ + VectorType LinSysRes_tmp; /*!< \brief Temporary used when it is necessary to interface between active and passive types. */ + VectorType* LinSysSol_ptr; /*!< \brief Pointer to appropriate LinSysSol (set to original or temporary in call to Solve). */ + const VectorType* LinSysRes_ptr; /*!< \brief Pointer to appropriate LinSysRes (set to original or temporary in call to Solve). */ /*! * \brief sign transfer function @@ -105,7 +103,7 @@ class CSysSolve { * so, feel free to delete this and replace it as needed with the * appropriate global function */ - inline ScalarType Sign(const ScalarType & x, const ScalarType & y) const { + static inline ScalarType Sign(ScalarType x, ScalarType y) { if (y == 0.0) return 0.0; return fabs(x) * (y < 0.0 ? -1.0 : 1.0); } @@ -117,7 +115,7 @@ class CSysSolve { * \param[in,out] h1 - first element of 2x1 vector being transformed * \param[in,out] h2 - second element of 2x1 vector being transformed */ - void ApplyGivens(const ScalarType & s, const ScalarType & c, ScalarType & h1, ScalarType & h2); + void ApplyGivens(ScalarType s, ScalarType c, ScalarType & h1, ScalarType & h2) const; /*! * \brief generates the Givens rotation matrix for a given 2-vector @@ -129,7 +127,7 @@ class CSysSolve { * Based on givens() of SPARSKIT, which is based on p.202 of * "Matrix Computations" by Golub and van Loan. */ - void GenerateGivens(ScalarType & dx, ScalarType & dy, ScalarType & s, ScalarType & c); + void GenerateGivens(ScalarType & dx, ScalarType & dy, ScalarType & s, ScalarType & c) const; /*! * \brief finds the solution of the upper triangular system Hsbg*x = rhs @@ -142,17 +140,16 @@ class CSysSolve { * \pre the upper Hessenberg matrix has been transformed into a * triangular matrix. */ - void SolveReduced(const int & n, const vector > & Hsbg, - const vector & rhs, vector & x); + void SolveReduced(int n, const vector > & Hsbg, + const vector & rhs, vector & x) const; /*! * \brief Modified Gram-Schmidt orthogonalization * \author Based on Kesheng John Wu's mgsro subroutine in Saad's SPARSKIT * - * \tparam Vec - a generic vector class * \param[in] i - index indicating which vector in w is being orthogonalized - * \param[in, out] Hsbg - the upper Hessenberg begin updated - * \param[in, out] w - the (i+1)th vector of w is orthogonalized against the + * \param[in,out] Hsbg - the upper Hessenberg begin updated + * \param[in,out] w - the (i+1)th vector of w is orthogonalized against the * previous vectors in w * * \pre the vectors w[0:i] are orthonormal @@ -163,7 +160,7 @@ class CSysSolve { * vector is kept in nrm0 and updated after operating with each vector * */ - void ModGramSchmidt(int i, vector > & Hsbg, vector & w); + void ModGramSchmidt(int i, vector > & Hsbg, vector & w) const; /*! * \brief writes header information for a CSysSolve residual history @@ -173,24 +170,39 @@ class CSysSolve { * * \pre the ostream object os should be open */ - void WriteHeader(const string & solver, const ScalarType & restol, const ScalarType & resinit); + void WriteHeader(string solver, ScalarType restol, ScalarType resinit) const; /*! * \brief writes residual convergence data for one iteration to a stream * \param[in] iter - current iteration - * \param[in] res - the (absolute) residual norm value - * \param[in] resinit - the initial residual norm + * \param[in] res - the residual norm to display * * \pre the ostream object os should be open */ - void WriteHistory(const int & iter, const ScalarType & res, const ScalarType & resinit); + void WriteHistory(unsigned long iter, ScalarType res) const; + + /*! + * \brief writes final residual convergence information + * \param[in] solver - string describing the solver + * \param[in] iter - current iteration + * \param[in] res - the residual norm + */ + void WriteFinalResidual(string solver, unsigned long iter, ScalarType res) const; + + /*! + * \brief writes the convergence warning + * \param[in] res_calc - the residual norm computed iteratively + * \param[in] res_true - the recomputed residual norm + * \param[in] tol - the residual norm + */ + void WriteWarning(ScalarType res_calc, ScalarType res_true, ScalarType tol) const; /*! * \brief Used by Solve for compatibility between passive and active CSysVector, see specializations. * \param[in] LinSysRes - Linear system residual * \param[in,out] LinSysSol - Linear system solution */ - void HandleTemporariesIn(CSysVector & LinSysRes, CSysVector & LinSysSol); + void HandleTemporariesIn(const CSysVector & LinSysRes, CSysVector & LinSysSol); /*! * \brief Used by Solve for compatibility between passive and active CSysVector, see specializations. @@ -213,12 +225,13 @@ class CSysSolve { * \param[in] precond - object that defines preconditioner * \param[in] tol - tolerance with which to solve the system * \param[in] m - maximum size of the search subspace + * \param[out] residual - final normalized residual * \param[in] monitoring - turn on priting residuals from solver to screen. * \param[in] config - Definition of the particular problem. */ - unsigned long CG_LinSolver(const VectorType & b, VectorType & x, ProductType & mat_vec, - PrecondType & precond, ScalarType tol, unsigned long m, - ScalarType *residual, bool monitoring, CConfig *config); + unsigned long CG_LinSolver(const VectorType & b, VectorType & x, const ProductType & mat_vec, + const PrecondType & precond, ScalarType tol, unsigned long m, + ScalarType & residual, bool monitoring, CConfig *config) const; /*! * \brief Flexible Generalized Minimal Residual method @@ -228,13 +241,13 @@ class CSysSolve { * \param[in] precond - object that defines preconditioner * \param[in] tol - tolerance with which to solve the system * \param[in] m - maximum size of the search subspace - * \param[in] residual - norm of final residual + * \param[out] residual - final normalized residual * \param[in] monitoring - turn on priting residuals from solver to screen. * \param[in] config - Definition of the particular problem. */ - unsigned long FGMRES_LinSolver(const VectorType & b, VectorType & x, ProductType & mat_vec, - PrecondType & precond, ScalarType tol, unsigned long m, - ScalarType *residual, bool monitoring, CConfig *config); + unsigned long FGMRES_LinSolver(const VectorType & b, VectorType & x, const ProductType & mat_vec, + const PrecondType & precond, ScalarType tol, unsigned long m, + ScalarType & residual, bool monitoring, CConfig *config) const; /*! * \brief Biconjugate Gradient Stabilized Method (BCGSTAB) @@ -244,13 +257,13 @@ class CSysSolve { * \param[in] precond - object that defines preconditioner * \param[in] tol - tolerance with which to solve the system * \param[in] m - maximum size of the search subspace - * \param[in] residual - norm of final residual + * \param[out] residual - final normalized residual * \param[in] monitoring - turn on priting residuals from solver to screen. * \param[in] config - Definition of the particular problem. */ - unsigned long BCGSTAB_LinSolver(const VectorType & b, VectorType & x, ProductType & mat_vec, - PrecondType & precond, ScalarType tol, unsigned long m, - ScalarType *residual, bool monitoring, CConfig *config); + unsigned long BCGSTAB_LinSolver(const VectorType & b, VectorType & x, const ProductType & mat_vec, + const PrecondType & precond, ScalarType tol, unsigned long m, + ScalarType & residual, bool monitoring, CConfig *config) const; /*! * \brief Generic smoother (modified Richardson iteration with preconditioner) @@ -260,34 +273,34 @@ class CSysSolve { * \param[in] precond - object that defines preconditioner * \param[in] tol - tolerance with which to solve the system * \param[in] m - maximum number of iterations - * \param[in] residual - norm of final residual + * \param[out] residual - final normalized residual * \param[in] monitoring - turn on priting residuals from solver to screen. * \param[in] config - Definition of the particular problem. */ - unsigned long Smoother_LinSolver(const VectorType & b, VectorType & x, ProductType & mat_vec, - PrecondType & precond, ScalarType tol, unsigned long m, - ScalarType *residual, bool monitoring, CConfig *config); + unsigned long Smoother_LinSolver(const VectorType & b, VectorType & x, const ProductType & mat_vec, + const PrecondType & precond, ScalarType tol, unsigned long m, + ScalarType & residual, bool monitoring, CConfig *config) const; /*! * \brief Solve the linear system using a Krylov subspace method * \param[in] Jacobian - Jacobian Matrix for the linear system * \param[in] LinSysRes - Linear system residual - * \param[in] LinSysSol - Linear system solution + * \param[in,out] LinSysSol - Linear system solution * \param[in] geometry - Geometrical definition of the problem. * \param[in] config - Definition of the particular problem. */ - unsigned long Solve(MatrixType & Jacobian, CSysVector & LinSysRes, CSysVector & LinSysSol, + unsigned long Solve(MatrixType & Jacobian, const CSysVector & LinSysRes, CSysVector & LinSysSol, CGeometry *geometry, CConfig *config); /*! * \brief Solve the adjoint linear system using a Krylov subspace method * \param[in] Jacobian - Jacobian Matrix for the linear system * \param[in] LinSysRes - Linear system residual - * \param[in] LinSysSol - Linear system solution + * \param[in,out] LinSysSol - Linear system solution * \param[in] geometry - Geometrical definition of the problem. * \param[in] config - Definition of the particular problem. */ - unsigned long Solve_b(MatrixType & Jacobian, CSysVector & LinSysRes, CSysVector & LinSysSol, + unsigned long Solve_b(MatrixType & Jacobian, const CSysVector & LinSysRes, CSysVector & LinSysSol, CGeometry *geometry, CConfig *config); /*! diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp index d58566d3342d..cc7f73e9d709 100644 --- a/Common/include/linear_algebra/CSysVector.hpp +++ b/Common/include/linear_algebra/CSysVector.hpp @@ -1,13 +1,13 @@ /*! - * \file vector_structure.hpp - * \brief Headers for the classes related to linear solvers (CG, FGMRES, etc) - * The subroutines and functions are in the linear_solvers_structure.cpp file. + * \file CSysVector.hpp + * \brief Declararion of the vector class used in the solution of + * large, distributed, sparse linear systems. * \author F. Palacios, J. Hicken, T. Economon * \version 7.0.0 "Blackbird" * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -26,24 +26,11 @@ * License along with SU2. If not, see . */ - #pragma once -#include "../mpi_structure.hpp" - -#include #include -#include -#include -#include -#include #include -using namespace std; - -/*--- Forward declaration of template friend functions. ---*/ -template class CSysVector; -template T dotProd(const CSysVector & u, const CSysVector & v); /*! * \class CSysVector @@ -52,22 +39,33 @@ template T dotProd(const CSysVector & u, const CSysVector & v); * * We could use the STL vector as a base class here, but this gives us * more flexibility with the underlying data (e.g. we may decide to - * use a block storage scheme rather than a continuous storage - * scheme). + * use a block storage scheme rather than a continuous storage scheme). */ template class CSysVector { private: - unsigned long nElm; /*!< \brief total number of elements (or number elements on this processor) */ - unsigned long nElmDomain; /*!< \brief total number of elements (or number elements on this processor without Ghost cells) */ -#ifdef HAVE_MPI - unsigned long nElmGlobal; /*!< \brief total number of elements over all processors */ -#endif - unsigned short nVar; /*!< \brief number of elements in a block */ - unsigned long nBlk; /*!< \brief number of blocks (or number of blocks on this processor) */ - unsigned long nBlkDomain; /*!< \brief number of blocks (or number of blocks on this processor without Ghost cells) */ - ScalarType* vec_val; /*!< \brief storage for the element values */ + enum { OMP_MAX_SIZE = 4096 }; /*!< \brief Maximum chunk size used in parallel for loops. */ + + unsigned long omp_chunk_size; /*!< \brief Static chunk size used in loop, determined at initialization. */ + ScalarType* vec_val; /*!< \brief storage for the element values, 64 byte aligned (do not use normal new/delete) */ + unsigned long nElm; /*!< \brief total number of elements (or number elements on this processor) */ + unsigned long nElmDomain; /*!< \brief total number of elements (or number elements on this processor without Ghost cells) */ + unsigned long nVar; /*!< \brief number of elements in a block */ + mutable ScalarType dotRes; /*!< \brief result of dot product. to perform a reduction with OpenMP the + variable needs to be declared outside the parallel region */ + + /*! + * \brief Generic initialization from a scalar or array. + * \note If val==nullptr vec_val is not initialized, only allocated. + * \param[in] numBlk - number of blocks locally + * \param[in] numBlkDomain - number of blocks locally (without g cells) + * \param[in] numVar - number of variables in each block + * \param[in] val - default value for elements + * \param[in] valIsArray - if true val is treated as array + */ + void Initialize(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, + const ScalarType* val, bool valIsArray); public: @@ -81,29 +79,21 @@ class CSysVector { * \param[in] size - number of elements locally * \param[in] val - default value for elements */ - CSysVector(const unsigned long & size, const ScalarType & val = 0.0); + CSysVector(unsigned long size, ScalarType val = 0.0) { + nElm = 0; vec_val = nullptr; + Initialize(size, size, 1, &val, false); + } /*! * \brief constructor of the class. * \param[in] numBlk - number of blocks locally - * \param[in] numBlkDomain + * \param[in] numBlkDomain - number of blocks locally (without g cells) * \param[in] numVar - number of variables in each block * \param[in] val - default value for elements */ - CSysVector(const unsigned long & numBlk, const unsigned long & numBlkDomain, const unsigned short & numVar, const ScalarType & val = 0.0); - - /*! - * \brief copy constructor of the class. - * \param[in] u - CSysVector that is being copied - */ - CSysVector(const CSysVector & u); - - /*! - * \brief Sets to zero all the entries of the vector. - */ - inline void SetValZero(void) { - for (unsigned long i = 0; i < nElm; i++) - vec_val[i] = 0.0; + CSysVector(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, ScalarType val = 0.0) { + nElm = 0; vec_val = nullptr; + Initialize(numBlk, numBlkDomain, numVar, &val, false); } /*! @@ -111,7 +101,10 @@ class CSysVector { * \param[in] size - number of elements locally * \param[in] u_array - vector stored as array being copied */ - explicit CSysVector(const unsigned long & size, const ScalarType* u_array); + explicit CSysVector(unsigned long size, const ScalarType* u_array) { + nElm = 0; vec_val = nullptr; + Initialize(size, size, 1, u_array, true); + } /*! * \brief constructor from array @@ -120,22 +113,54 @@ class CSysVector { * \param[in] numVar - number of variables in each block * \param[in] u_array - vector stored as array being copied */ - explicit CSysVector(const unsigned long & numBlk, const unsigned long & numBlkDomain, const unsigned short & numVar, - const ScalarType* u_array); + explicit CSysVector(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, const ScalarType* u_array) { + nElm = 0; vec_val = nullptr; + Initialize(numBlk, numBlkDomain, numVar, u_array, true); + } + + /*! + * \brief copy constructor of the class. + * \param[in] u - CSysVector that is being copied + */ + CSysVector(const CSysVector & u) { + nElm = 0; vec_val = nullptr; + Initialize(u.GetNBlk(), u.GetNBlkDomain(), u.nVar, u.vec_val, true); + } + + /*! + * \brief Set our values (resizing if required) by copying from other, the derivative information is lost. + * \param[in] other - source CSysVector + */ + template + void PassiveCopy(const CSysVector& other); /*! * \brief class destructor */ - virtual ~CSysVector(); + ~CSysVector(); /*! - * \brief Initialize the class. + * \brief Initialize the class with a scalar. * \param[in] numBlk - number of blocks locally - * \param[in] numBlkDomain + * \param[in] numBlkDomain - number of blocks locally (without g cells) * \param[in] numVar - number of variables in each block * \param[in] val - default value for elements */ - void Initialize(const unsigned long & numBlk, const unsigned long & numBlkDomain, const unsigned short & numVar, const ScalarType & val = 0.0); + void Initialize(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, ScalarType val = 0.0) { + Initialize(numBlk, numBlkDomain, numVar, &val, false); + } + + /*! + * \brief Initialize the class with an array. + * \note If ptr==nullptr no copy occurs. + * \param[in] numBlk - number of blocks locally + * \param[in] numBlkDomain - number of blocks locally (without g cells) + * \param[in] numVar - number of variables in each block + * \param[in] ptr - pointer to data with which to initialize the vector + */ + void Initialize(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, const ScalarType* ptr) { + Initialize(numBlk, numBlkDomain, numVar, ptr, true); + } /*! * \brief return the number of local elements in the CSysVector @@ -147,45 +172,34 @@ class CSysVector { */ inline unsigned long GetNElmDomain() const { return nElmDomain; } - /*! - * \brief return the size of the CSysVector (over all processors) - */ - inline unsigned long GetSize() const { -#ifdef HAVE_MPI - return nElmGlobal; -#else - return (unsigned long)nElm; -#endif - } - /*! * \brief return the number of variables at each block (typically number per node) */ - inline unsigned short GetNVar() const { return nVar; } + inline unsigned long GetNVar() const { return nVar; } /*! * \brief return the number of blocks (typically number of nodes locally) */ - inline unsigned long GetNBlk() const { return nBlk; } + inline unsigned long GetNBlk() const { return nElm/nVar; } /*! * \brief return the number of blocks (typically number of nodes locally) */ - inline unsigned long GetNBlkDomain() const { return nBlkDomain; } + inline unsigned long GetNBlkDomain() const { return nElmDomain/nVar; } /*! * \brief set calling CSysVector to scaling of another CSysVector * \param[in] a - scalar factor for x * \param[in] x - CSysVector that is being scaled */ - void Equals_AX(const ScalarType & a, CSysVector & x); + void Equals_AX(ScalarType a, const CSysVector & x); /*! * \brief adds a scaled CSysVector to calling CSysVector * \param[in] a - scalar factor for x * \param[in] x - CSysVector that is being scaled */ - void Plus_AX(const ScalarType & a, CSysVector & x); + void Plus_AX(ScalarType a, const CSysVector & x); /*! * \brief general linear combination of two CSysVectors @@ -194,7 +208,7 @@ class CSysVector { * \param[in] b - scalar factor for y * \param[in] y - second CSysVector in linear combination */ - void Equals_AX_Plus_BY(const ScalarType & a, CSysVector & x, const ScalarType & b, CSysVector & y); + void Equals_AX_Plus_BY(ScalarType a, const CSysVector & x, ScalarType b, const CSysVector & y); /*! * \brief assignment operator with deep copy @@ -206,13 +220,12 @@ class CSysVector { * \brief CSysVector=su2double assignment operator * \param[in] val - value assigned to each element of CSysVector */ - CSysVector & operator=(const ScalarType & val); + CSysVector & operator=(ScalarType val); /*! - * \brief addition operator - * \param[in] u - CSysVector being added to *this + * \brief Sets to zero all the entries of the vector. */ - CSysVector operator+(const CSysVector & u) const; + inline void SetValZero(void) { *this = ScalarType(0.0); } /*! * \brief compound addition-assignment operator @@ -220,12 +233,6 @@ class CSysVector { */ CSysVector & operator+=(const CSysVector & u); - /*! - * \brief subtraction operator - * \param[in] u - CSysVector being subtracted from *this - */ - CSysVector operator-(const CSysVector & u) const; - /*! * \brief compound subtraction-assignment operator * \param[in] u - CSysVector being subtracted from calling object @@ -233,28 +240,35 @@ class CSysVector { CSysVector & operator-=(const CSysVector & u); /*! - * \brief vector * scalar multiplication operator - * \param[in] val - value to multiply *this by + * \brief compound scalar multiplication-assignment operator + * \param[in] val - value to multiply calling object by + */ + CSysVector & operator*=(ScalarType val); + + /*! + * \brief compound scalar division-assignment operator + * \param[in] val - value to divide elements of calling object by */ - CSysVector operator*(const ScalarType & val) const; + CSysVector & operator/=(ScalarType val); /*! - * \brief compound scalar multiplication-assignment operator - * \param[in] val - value to multiply calling object by + * \brief Dot product between "this" and another vector + * \param[in] u - Another vector. + * \return result of dot product */ - CSysVector & operator*=(const ScalarType & val); + ScalarType dot(const CSysVector & u) const; /*! - * \brief vector-scalar division operator (no scalar/vector operator) - * \param[in] val - value to divide elements of *this by + * \brief squared L2 norm of the vector (via dot with self) + * \return squared L2 norm */ - CSysVector operator/(const ScalarType & val) const; + inline ScalarType squaredNorm() const { return dot(*this); } /*! - * \brief compound scalar division-assignment operator - * \param[in] val - value to divide elements of calling object by + * \brief L2 norm of the vector + * \return L2 norm */ - CSysVector & operator/=(const ScalarType & val); + inline ScalarType norm() const { return sqrt(squaredNorm()); } /*! * \brief indexing operator with assignment permitted @@ -268,32 +282,32 @@ class CSysVector { */ inline const ScalarType & operator[](const unsigned long & i) const { return vec_val[i]; } - /*! - * \brief the L2 norm of the CSysVector - * \result the L2 norm - */ - ScalarType norm() const; - /*! * \brief copies the contents of the calling CSysVector into an array * \param[out] u_array - array into which information is being copied * \pre u_array must be allocated and have the same size as CSysVector */ - void CopyToArray(ScalarType* u_array); + void CopyToArray(ScalarType* u_array) const; /*! * \brief Subtract val_residual to the residual. * \param[in] val_ipoint - index of the point where subtract the residual. * \param[in] val_residual - Value to subtract to the residual. */ - void SubtractBlock(unsigned long val_ipoint, ScalarType *val_residual); + inline void SubtractBlock(unsigned long val_ipoint, const ScalarType *val_residual) { + for (auto iVar = 0ul; iVar < nVar; iVar++) + vec_val[val_ipoint*nVar+iVar] -= val_residual[iVar]; + } /*! * \brief Add val_residual to the residual. * \param[in] val_ipoint - index of the point where add the residual. * \param[in] val_residual - Value to add to the residual. */ - void AddBlock(unsigned long val_ipoint, ScalarType *val_residual); + inline void AddBlock(unsigned long val_ipoint, const ScalarType *val_residual) { + for (auto iVar = 0ul; iVar < nVar; iVar++) + vec_val[val_ipoint*nVar+iVar] += val_residual[iVar]; + } /*! * \brief Set val_residual to the residual. @@ -301,34 +315,44 @@ class CSysVector { * \param[in] val_var - inde of the residual to be set. * \param[in] val_residual - Value to set to the residual. */ - void SetBlock(unsigned long val_ipoint, unsigned short val_var, ScalarType val_residual); + inline void SetBlock(unsigned long val_ipoint, unsigned long val_var, ScalarType val_residual) { + vec_val[val_ipoint*nVar+val_var] = val_residual; + } /*! * \brief Set val_residual to the residual. * \param[in] val_ipoint - index of the point where set the residual. * \param[in] val_residual - Value to set to the residual. */ - void SetBlock(unsigned long val_ipoint, ScalarType *val_residual); + inline void SetBlock(unsigned long val_ipoint, const ScalarType *val_residual) { + for (auto iVar = 0ul; iVar < nVar; iVar++) + vec_val[val_ipoint*nVar+iVar] = val_residual[iVar]; + } /*! * \brief Set the residual to zero. * \param[in] val_ipoint - index of the point where set the residual. */ - void SetBlock_Zero(unsigned long val_ipoint); + inline void SetBlock_Zero(unsigned long val_ipoint) { + for (auto iVar = 0ul; iVar < nVar; iVar++) + vec_val[val_ipoint*nVar+iVar] = 0.0; + } /*! * \brief Set the velocity residual to zero. * \param[in] val_ipoint - index of the point where set the residual. * \param[in] val_var - inde of the residual to be set. */ - void SetBlock_Zero(unsigned long val_ipoint, unsigned short val_var); + inline void SetBlock_Zero(unsigned long val_ipoint, unsigned long val_var) { + vec_val[val_ipoint*nVar+val_var] = 0.0; + } /*! * \brief Get the value of the residual. * \param[in] val_ipoint - index of the point where set the residual. * \return Pointer to the residual. */ - ScalarType *GetBlock(unsigned long val_ipoint); + inline ScalarType *GetBlock(unsigned long val_ipoint) { return &vec_val[val_ipoint*nVar]; } /*! * \brief Get the value of the residual. @@ -336,27 +360,7 @@ class CSysVector { * \param[in] val_var - inde of the residual to be set. * \return Value of the residual. */ - ScalarType GetBlock(unsigned long val_ipoint, unsigned short val_var); - - /*! - * \brief dot-product between two CSysVectors - * \param[in] u - first CSysVector in dot product - * \param[in] v - second CSysVector in dot product - */ - friend ScalarType dotProd(const CSysVector & u, const CSysVector & v); - - /*! - * \brief Set our values (resizing if required) by copying from other, the derivative information is lost. - * \param[in] other - source CSysVector - */ - template - void PassiveCopy(const CSysVector& other); + inline ScalarType GetBlock(unsigned long val_ipoint, unsigned long val_var) const { + return vec_val[val_ipoint*nVar+val_var]; + } }; - -/*! - * \brief scalar * vector multiplication operator - * \param[in] val - scalar value to multiply by - * \param[in] u - CSysVector having its elements scaled - */ -template -CSysVector operator*(const ScalarType & val, const CSysVector & u); diff --git a/Common/include/mpi_structure.hpp b/Common/include/mpi_structure.hpp index e242a8f9ff91..20f2113ecfca 100644 --- a/Common/include/mpi_structure.hpp +++ b/Common/include/mpi_structure.hpp @@ -118,7 +118,9 @@ class CBaseMPIWrapper { static void Error(std::string ErrorMsg, std::string FunctionName); static void Init(int *argc, char***argv); - + + static void Init_thread(int *argc, char***argv, int required, int* provided); + static void Buffer_attach(void *buffer, int size); static void Buffer_detach(void *buffer, int *size); @@ -226,6 +228,8 @@ class CMediMPIWrapper: public CBaseMPIWrapper { static void Init(int *argc, char***argv); + static void Init_thread(int *argc, char***argv, int required, int* provided); + static void Init_AMPI(void); static void Buffer_attach(void *buffer, int size); @@ -356,6 +360,8 @@ class CBaseMPIWrapper { static void Error(std::string ErrorMsg, std::string FunctionName); static void Init(int *argc, char***argv); + + static void Init_thread(int *argc, char***argv, int required, int* provided); static void Buffer_attach(void *buffer, int size); diff --git a/Common/include/mpi_structure.inl b/Common/include/mpi_structure.inl index 9ff12d749052..eab1cbe2ab50 100644 --- a/Common/include/mpi_structure.inl +++ b/Common/include/mpi_structure.inl @@ -131,6 +131,17 @@ inline void CBaseMPIWrapper::Init(int *argc, char ***argv) { winMinRankErrorInUse = true; } +inline void CBaseMPIWrapper::Init_thread(int *argc, char ***argv, int required, int* provided) { + MPI_Init_thread(argc,argv,required,provided); + MPI_Comm_rank(currentComm, &Rank); + MPI_Comm_size(currentComm, &Size); + + MinRankError = Size; + MPI_Win_create(&MinRankError, sizeof(int), sizeof(int), MPI_INFO_NULL, + currentComm, &winMinRankError); + winMinRankErrorInUse = true; +} + inline void CBaseMPIWrapper::Buffer_attach(void *buffer, int size){ MPI_Buffer_attach(buffer, size); } @@ -266,7 +277,7 @@ inline void CBaseMPIWrapper::Waitany(int nrequests, Request *request, int *index, Status *status) { MPI_Waitany(nrequests, request, index, status); } - + #if defined CODI_REVERSE_TYPE || defined CODI_FORWARD_TYPE @@ -282,6 +293,18 @@ inline void CMediMPIWrapper::Init(int *argc, char ***argv) { winMinRankErrorInUse = true; } +inline void CMediMPIWrapper::Init_thread(int *argc, char ***argv, int required, int* provided) { + AMPI_Init_thread(argc,argv,required,provided); + MediTool::init(); + AMPI_Comm_rank(convertComm(currentComm), &Rank); + AMPI_Comm_size(convertComm(currentComm), &Size); + + MinRankError = Size; + MPI_Win_create(&MinRankError, sizeof(int), sizeof(int), MPI_INFO_NULL, + currentComm, &winMinRankError); + winMinRankErrorInUse = true; +} + inline void CMediMPIWrapper::Init_AMPI(void) { AMPI_Init_common(); MediTool::init(); @@ -517,6 +540,8 @@ inline CBaseMPIWrapper::Comm CBaseMPIWrapper::GetComm(){ inline void CBaseMPIWrapper::Init(int *argc, char ***argv) {} +inline void CBaseMPIWrapper::Init_thread(int *argc, char***argv, int required, int* provided) {*provided = required;} + inline void CBaseMPIWrapper::Buffer_attach(void *buffer, int size) {} inline void CBaseMPIWrapper::Buffer_detach(void *buffer, int *size) {} diff --git a/Common/include/omp_structure.hpp b/Common/include/omp_structure.hpp new file mode 100644 index 000000000000..25b8e49d2a02 --- /dev/null +++ b/Common/include/omp_structure.hpp @@ -0,0 +1,113 @@ +/*! + * \file omp_structure.hpp + * \brief OpenMP interface header, provides compatibility functions + * if the code is built without OpenMP support. + * Parallel pragmas are defined here so that they can be + * completely "disabled" when compiling without OpenMP. + * \note Do not include omp.h explicitly anywhere, use this header instead. + * \note If you use an omp_*** function define a compatibility version here, + * if that is not practical use define "HAVE_OMP" to guard that function. + * \note Always use the macro "SU2_OMP" to create OpenMP constructs, this is so + * we can disable pragmas. Other convenient pragmas are also defined here + * e.g. SU2_OMP_PARALLEL. Exotic pragmas of limited portability should be + * defined here with suitable fallback versions to limit the spread of + * compiler tricks in other areas of the code. + * \author P. Gomes + * \version 7.0.0 "Blackbird" + * + * SU2 Project Website: https://su2code.github.io + * + * The SU2 Project is maintained by the SU2 Foundation + * (http://su2foundation.org) + * + * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) + * + * SU2 is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * SU2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with SU2. If not, see . + */ + +#pragma once + +#if defined(_MSC_VER) +#define PRAGMIZE(X) __pragma(X) +#else +#define PRAGMIZE(X) _Pragma(#X) +#endif + +/*--- Detect compilation with OpenMP support, protect agaisnt + * using OpenMP with AD (not supported yet). ---*/ +#if defined(_OPENMP) && !defined(CODI_REVERSE_TYPE) && !defined(CODI_FORWARD_TYPE) +#define HAVE_OMP +#include + +/*--- The generic start of OpenMP constructs. ---*/ +#define SU2_OMP(ARGS) PRAGMIZE(omp ARGS) + +#else // Compile without OpenMP + +/*--- Disable pragmas to quiet compilation warnings. ---*/ +#define SU2_OMP(ARGS) + +/*! + * \brief Maximum number of threads available. + */ +inline constexpr int omp_get_max_threads(void) {return 1;} + +/*! + * \brief Index of current thread, akin to MPI rank. + */ +inline constexpr int omp_get_thread_num(void) {return 0;} + +#endif + +/*--- Convenience macros (do not use excessive nesting of macros). ---*/ +#define SU2_OMP_SIMD SU2_OMP(simd) + +#define SU2_OMP_MASTER SU2_OMP(master) +#define SU2_OMP_BARRIER SU2_OMP(barrier) + +#define SU2_OMP_PARALLEL SU2_OMP(parallel) +#define SU2_OMP_PARALLEL_(ARGS) SU2_OMP(parallel ARGS) +#define SU2_OMP_PARALLEL_ON(NTHREADS) SU2_OMP(parallel num_threads(NTHREADS)) + +#define SU2_OMP_FOR_DYN(CHUNK) SU2_OMP(for schedule(dynamic,CHUNK)) +#define SU2_OMP_FOR_STAT(CHUNK) SU2_OMP(for schedule(static,CHUNK)) + + +/*--- Convenience functions (e.g. to compute chunk sizes). ---*/ + +/*! + * \brief Integer division rounding up. + */ +inline constexpr size_t roundUpDiv(size_t numerator, size_t denominator) +{ + return (numerator+denominator-1)/denominator; +} + +/*! + * \brief Compute a chunk size based on totalWork and number of threads such that + * all threads get the same number of chunks (with limited size). + * \param[in] totalWork - e.g. total number of loop iterations. + * \param[in] numThreads - Number of threads that will share the work. + * \param[in] maxChunkSize - Upper bound for chunk size. + * \return The chunkSize. + */ +inline size_t computeStaticChunkSize(size_t totalWork, + size_t numThreads, + size_t maxChunkSize) +{ + size_t workPerThread = roundUpDiv(totalWork, numThreads); + size_t chunksPerThread = roundUpDiv(workPerThread, maxChunkSize); + return roundUpDiv(workPerThread, chunksPerThread); +} + diff --git a/Common/include/toolboxes/C2DContainer.hpp b/Common/include/toolboxes/C2DContainer.hpp index 028ff7469af7..508993a48f6d 100644 --- a/Common/include/toolboxes/C2DContainer.hpp +++ b/Common/include/toolboxes/C2DContainer.hpp @@ -31,6 +31,7 @@ #include "../datatype_structure.hpp" #include +#include /*! * \enum StorageType @@ -360,6 +361,8 @@ template { + static_assert(std::is_integral::value,""); + private: using Base = container_helpers::AccessorImpl; using Base::m_data; @@ -401,12 +404,9 @@ class C2DContainer : free(m_data); } - /*--- round up size to a multiple of the alignment specification if necessary ---*/ - size_t bytes = reqSize*sizeof(Scalar_t); - size_t allocSize = (AlignSize==0)? bytes : ((bytes+AlignSize-1)/AlignSize)*AlignSize; - /*--- request actual allocation to base class as it needs specialization ---*/ - m_allocate(allocSize,rows,cols); + size_t bytes = reqSize*sizeof(Scalar_t); + m_allocate(bytes,rows,cols); return reqSize; } diff --git a/Common/include/toolboxes/allocation_toolbox.hpp b/Common/include/toolboxes/allocation_toolbox.hpp index fe334f253372..2a089b323bc4 100644 --- a/Common/include/toolboxes/allocation_toolbox.hpp +++ b/Common/include/toolboxes/allocation_toolbox.hpp @@ -9,7 +9,7 @@ * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -33,7 +33,7 @@ #if defined(_WIN32) #include #else -#include +#include #endif #include @@ -46,6 +46,11 @@ inline constexpr bool is_power_of_two(size_t x) return x && !(x & (x-1)); } +inline constexpr size_t round_up(size_t multiple, size_t x) +{ + return ((x+multiple-1)/multiple)*multiple; +} + /*! * \brief Aligned memory allocation compatible across platforms. * \param[in] alignment, in bytes, of the memory being allocated. @@ -59,6 +64,8 @@ inline T* aligned_alloc(size_t alignment, size_t size) noexcept if(alignment < alignof(void*)) alignment = alignof(void*); + size = round_up(alignment, size); + void* ptr = nullptr; #if defined(__APPLE__) diff --git a/Common/include/toolboxes/graph_toolbox.hpp b/Common/include/toolboxes/graph_toolbox.hpp new file mode 100644 index 000000000000..b2bbdbd74da5 --- /dev/null +++ b/Common/include/toolboxes/graph_toolbox.hpp @@ -0,0 +1,489 @@ +/*! + * \file graph_toolbox.hpp + * \brief Functions and classes to build/represent sparse graphs or sparse patterns. + * \author P. Gomes + * \version 7.0.0 "Blackbird" + * + * SU2 Project Website: https://su2code.github.io + * + * The SU2 Project is maintained by the SU2 Foundation + * (http://su2foundation.org) + * + * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) + * + * SU2 is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * SU2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with SU2. If not, see . + */ + +#pragma once + +#include "C2DContainer.hpp" + +#include +#include +#include +#include +#include + +/*! + * \enum ConnectivityType + * \brief In FVM points are connected by the edges (faces) of the grid. + * In FEM, two points are connected if they have an element in common. + */ +enum class ConnectivityType {FiniteVolume=0, FiniteElement=1}; + + +/*! + * \class CCompressedSparsePattern + * \brief A simple class to store adjacency information in a + * compressed format suitable for sparse matrix operations. + * If built for row-major storage the inner indices are column indices + * and the pattern should be used as (row,icol), otherwise as (col,irow). + */ +template +class CCompressedSparsePattern { + static_assert(std::is_integral::value,""); + +private: + su2vector m_outerPtr; /*!< \brief Start positions of the inner indices for each outer index. */ + su2vector m_innerIdx; /*!< \brief Inner indices of the non zero entries. */ + su2vector m_diagPtr; /*!< \brief Position of the diagonal entry. */ + +public: + using IndexType = Index_t; + + CCompressedSparsePattern() = default; + + /*! + * \brief Construct from rvalue refs. + * \note This is the most efficient constructor as no data copy occurs. + * \param[in] outerPtr - Outer index pointers. + * \param[in] innerIdx - Inner indices. + */ + CCompressedSparsePattern(su2vector&& outerPtr, + su2vector&& innerIdx) : + m_outerPtr(outerPtr), m_innerIdx(innerIdx) + { + /*--- perform a basic sanity check ---*/ + assert(m_innerIdx.size() == m_outerPtr(m_outerPtr.size()-1)); + } + + /*! + * \brief Construct from vector-like objects of any type with + * methods "size()" and "data()" (returning a pointer). + * \param[in] outerPtr - Outer index pointers. + * \param[in] innerIdx - Inner indices. + */ + template + CCompressedSparsePattern(const T& outerPtr, const T& innerIdx) + { + m_outerPtr.resize(outerPtr.size()); + for(Index_t i=0; i= 0 && iNonZero < getNumNonZeros(iOuterIdx)); + return m_innerIdx(m_outerPtr(iOuterIdx) + iNonZero); + } + + /*! + * \param[in] iOuterIdx - Outer index (row/col). + * \param[in] iInnerIdx - Inner index (col/row). + * \return Absolute position of non zero entry (iOuterIdx,iInnerIdx), + * or NNZ if position does not belong to the pattern. + */ + inline Index_t findInnerIdx(Index_t iOuterIdx, Index_t iInnerIdx) const { + for(Index_t k = m_outerPtr(iOuterIdx); k < m_outerPtr(iOuterIdx+1); ++k) + if(m_innerIdx(k) == iInnerIdx) return k; + return m_innerIdx.size(); + } + + /*! + * \param[in] iOuterIdx - Outer index (row/col). + * \param[in] iInnerIdx - Inner index (col/row). + * \return True if (iOuterIdx,iInnerIdx) exists, i.e. is non zero. + */ + inline bool isNonZero(Index_t iOuterIdx, Index_t iInnerIdx) const { + return findInnerIdx(iOuterIdx, iInnerIdx) < m_innerIdx.size(); + } + + /*! + * \param[in] iOuterIdx - Outer index (row/col). + * \param[in] iInnerIdx - Inner index (col/row). + * \return Absolute position of non zero entry (iOuterIdx,iInnerIdx). + * \note This method is only safe if the entry exists. + */ + inline Index_t quickFindInnerIdx(Index_t iOuterIdx, Index_t iInnerIdx) const { + assert(isNonZero(iOuterIdx, iInnerIdx) && "Error, j does not belong to NZ(i)."); + Index_t k = m_outerPtr(iOuterIdx); + while(m_innerIdx(k) != iInnerIdx) ++k; + return k; + } + + /*! + * \param[in] iDiagIdx - Diagonal index (row == col). + * \return Absolute position of the diagonal entry. + */ + inline Index_t getDiagPtr(Index_t iDiagIdx) const { + return m_diagPtr(iDiagIdx); + } + + /*! + * \return Raw pointer to the outer pointer vector. + */ + inline const Index_t* outerPtr() const { + assert(!empty() && "Sparse pattern has not been built."); + return m_outerPtr.data(); + } + + /*! + * \return Raw pointer to the inner index vector. + */ + inline const Index_t* innerIdx() const { + assert(!empty() && "Sparse pattern has not been built."); + return m_innerIdx.data(); + } + + /*! + * \return Raw pointer to the diagonal pointer vector. + */ + inline const Index_t* diagPtr() const { + assert(!m_diagPtr.empty() && "Diagonal map has not been built."); + return m_diagPtr.data(); + } + + /*! + * \return The minimum inner index. + */ + Index_t getMinInnerIdx() const { + Index_t idx = std::numeric_limits::max(); + for(Index_t k=0; k::min(); + for(Index_t k=0; k +using CEdgeToNonZeroMap = C2DContainer; + + +using CCompressedSparsePatternUL = CCompressedSparsePattern; +using CEdgeToNonZeroMapUL = CEdgeToNonZeroMap; + + +/*! + * \brief Build a sparse pattern from geometry information, of type FVM or FEM, + * for a given fill-level. At fill-level N, the immediate neighbors of the + * points in level N-1 are also considered neighbors of the base point. + * The resulting pattern is that of A^{N+1} where A is the sparse matrix + * of immediate neighbors. + * \note Algorithm is equivalent to the implementation by F. Palacios, + * A. Bueno, and T. Economon from CSysMatrix. + * \param[in] geometry - Definition of the grid. + * \param[in] type - Of connectivity. + * \param[in] fillLvl - Target degree of neighborhood (immediate neighbors always added). + * \return Compressed-Storage-Row sparse pattern. + */ +template +CCompressedSparsePattern buildCSRPattern(Geometry_t& geometry, + ConnectivityType type, + Index_t fillLvl) +{ + Index_t nPoint = geometry.GetnPoint(); + + std::vector outerPtr(nPoint+1); + std::vector innerIdx; + innerIdx.reserve(nPoint); // at least this much space is needed + + for(Index_t iPoint = 0; iPoint < nPoint; ++iPoint) + { + /*--- Inner indices for iPoint start here. ---*/ + outerPtr[iPoint] = innerIdx.size(); + + /*--- Use a set to avoid duplication and keep ascending order. ---*/ + std::set neighbors; + + /*--- Insert base point. ---*/ + neighbors.insert(iPoint); + + /*--- Neighbors added in previous level. ---*/ + std::set addedNeighbors(neighbors); + + for(Index_t iLevel = 0; ; ++iLevel) + { + /*--- New points added in this level. ---*/ + std::set newNeighbors; + + /*--- For each point previously added, add its level 0 + * neighbors, not duplicating any existing neighbor. ---*/ + for(auto jPoint : addedNeighbors) + { + auto point = geometry.node[jPoint]; + + if(type == ConnectivityType::FiniteVolume) + { + /*--- For FVM we know the neighbors of point j directly. ---*/ + for(unsigned short iNeigh = 0; iNeigh < point->GetnPoint(); ++iNeigh) + { + Index_t kPoint = point->GetPoint(iNeigh); + + if(neighbors.count(kPoint) == 0) // no duplication + newNeighbors.insert(kPoint); + } + } + else // FiniteElement + { + /*--- For FEM we need the nodes of all elements that contain point j. ---*/ + for(unsigned short iNeigh = 0; iNeigh < point->GetnElem(); ++iNeigh) + { + auto elem = geometry.elem[point->GetElem(iNeigh)]; + + for(unsigned short iNode = 0; iNode < elem->GetnNodes(); ++iNode) + { + Index_t kPoint = elem->GetNode(iNode); + + if(neighbors.count(kPoint) == 0) // no duplication + newNeighbors.insert(kPoint); + } + } + } + } + + neighbors.insert(newNeighbors.begin(), newNeighbors.end()); + + if(iLevel >= fillLvl) break; + + /*--- For the next level we get the neighbours of the new points. ---*/ + addedNeighbors = newNeighbors; + } + + /*--- Store final sparse pattern for iPoint. ---*/ + innerIdx.insert(innerIdx.end(), neighbors.begin(), neighbors.end()); + } + outerPtr.back() = innerIdx.size(); + + /*--- Return pattern as CCompressedSparsePattern object. ---*/ + return CCompressedSparsePattern(outerPtr, innerIdx); +} + + +/*! + * \brief Build a lookup table of the absolute positions of the non zero entries + * of a compressed sparse pattern, accessed when visiting the FVM edges + * of a grid. The table can then be used for fast access (avoids searches) + * to the non zero entries of a sparse matrix associated with the pattern. + * \param[in] geometry - Definition of the grid. + * \param[in] pattern - Sparse pattern. + * \return nEdge by 2 matrix. + */ +template +CEdgeToNonZeroMap mapEdgesToSparsePattern(Geometry_t& geometry, + const CCompressedSparsePattern& pattern) +{ + assert(!pattern.empty()); + + CEdgeToNonZeroMap edgeMap(geometry.GetnEdge(),2); + + for(Index_t iEdge = 0; iEdge < geometry.GetnEdge(); ++iEdge) + { + Index_t iPoint = geometry.edge[iEdge]->GetNode(0); + Index_t jPoint = geometry.edge[iEdge]->GetNode(1); + + edgeMap(iEdge,0) = pattern.quickFindInnerIdx(iPoint,jPoint); + edgeMap(iEdge,1) = pattern.quickFindInnerIdx(jPoint,iPoint); + } + + return edgeMap; +} + + +/*! + * \brief Color contiguous groups of outer indices of a sparse pattern such that + * within each color, any two groups do not have inner indices in common. + * \note Within a group, two outer indices will generally have common inner indices. + * The coloring is returned as a compressed sparse pattern where the colors + * are outer indices, and the outer indices of the input pattern are the + * inner indices of the coloring. A simple greedy algorithm is used. + * Using a sparse pattern as input allows "anything" to be colored e.g. + * FVM edges, FEM elements, the rows/columns of a sparse matrix, etc. + * \note The worst that can happen in this method is needing an unreasonable number + * of colors, or too much memory due to a large range of the inner indices. + * The last two template parameters limit both, in case of failure an empty + * pattern is returned. + * \param[in] pattern - Sparse pattern to be colored. + * \param[in] groupSize - Size of the outer index groups, default 1. + * \param[out] indexColor - Optional, vector with colors given to the outer indices. + * \return Coloring in the same type of the input pattern. + */ +template +T colorSparsePattern(const T& pattern, size_t groupSize = 1, + std::vector* indexColor = nullptr) +{ + static_assert(std::is_integral::value,""); + static_assert(std::numeric_limits::max() >= MaxColors,""); + + using Index_t = typename T::IndexType; + + const Index_t grpSz = groupSize; + const Index_t nOuter = pattern.getOuterSize(); + const Index_t minIdx = pattern.getMinInnerIdx(); + const Index_t nInner = pattern.getMaxInnerIdx()+1-minIdx; + + /*--- Check the max memory condition (<< 23 is to count bits). ---*/ + if(size_t(nInner) > (MaxMB << 23)) return T(); + + /*--- Vector with the color given to each outer index. ---*/ + std::vector idxColor(nOuter); + + /*--- Start with one color, with no indices assigned. ---*/ + std::vector colorSize(1,0); + Color_t color, nColor = 1; + + { + /*--- For each color keep track of the inner indices that are in it. ---*/ + std::vector > innerInColor; + innerInColor.emplace_back(nInner, false); + + auto outerPtr = pattern.outerPtr(); + auto innerIdx = pattern.innerIdx(); + + for(Index_t iOuter = 0; iOuter < nOuter; iOuter += grpSz) + { + Index_t grpEnd = std::min(iOuter+grpSz, nOuter); + + for(color = 0; color < nColor; ++color) + { + bool free = true; + /*--- Traverse entire group as a large outer index. ---*/ + for(Index_t k = outerPtr[iOuter]; k < outerPtr[grpEnd] && free; ++k) + { + free = !innerInColor[color][innerIdx[k]-minIdx]; + } + /*--- If none of the inner indices in the group appears in + * this color yet, it is assigned to the group. ---*/ + if(free) break; + } + + /*--- No color was free, make space for a new one. ---*/ + if(color == nColor) + { + ++nColor; + if(nColor == MaxColors) return T(); + colorSize.push_back(0); + innerInColor.emplace_back(nInner, false); + } + + /*--- Assign color to group. ---*/ + for(Index_t k = iOuter; k < grpEnd; ++k) idxColor[k] = color; + + /*--- Mark the inner indices of the group as belonging to the color. ---*/ + for(Index_t k = outerPtr[iOuter]; k < outerPtr[grpEnd]; ++k) + { + innerInColor[color][innerIdx[k]-minIdx] = true; + } + + /*--- Update count for the assigned color. ---*/ + colorSize[color] += grpEnd - iOuter; + } + } // matrix of bools goes out of scope + + + /*--- Compress the coloring information. ---*/ + + su2vector colorPtr(nColor+1); colorPtr(0) = 0; + su2vector outerIdx(nOuter); + + Index_t k = 0; + for(color = 0; color < nColor; ++color) + { + colorPtr(color+1) = colorPtr(color)+colorSize[color]; + + for(Index_t iOuter = 0; iOuter < nOuter; ++iOuter) + if(idxColor[iOuter] == color) + outerIdx(k++) = iOuter; + } + + /*--- Optional return of the direct color information. ---*/ + if(indexColor) *indexColor = std::move(idxColor); + + /*--- Move compressed coloring into result pattern instance. ---*/ + return T(std::move(colorPtr), std::move(outerIdx)); +} diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp index f5dc732d581d..6cd8bfee5862 100644 --- a/Common/src/geometry/CGeometry.cpp +++ b/Common/src/geometry/CGeometry.cpp @@ -3955,3 +3955,87 @@ void CGeometry::SetGridVelocity(CConfig *config, unsigned long iter) { } } + +const CCompressedSparsePatternUL& CGeometry::GetSparsePattern(ConnectivityType type, unsigned long fillLvl) +{ + bool fvm = (type == ConnectivityType::FiniteVolume); + + CCompressedSparsePatternUL* pattern = nullptr; + + if (fillLvl == 0) + pattern = fvm? &finiteVolumeCSRFill0 : &finiteElementCSRFill0; + else + pattern = fvm? &finiteVolumeCSRFillN : &finiteElementCSRFillN; + + if (pattern->empty()) { + *pattern = buildCSRPattern(*this, type, fillLvl); + pattern->buildDiagPtr(); + } + + return *pattern; +} + +const CEdgeToNonZeroMapUL& CGeometry::GetEdgeToSparsePatternMap(void) +{ + if (edgeToCSRMap.empty()) { + if (finiteVolumeCSRFill0.empty()) { + finiteVolumeCSRFill0 = buildCSRPattern(*this, ConnectivityType::FiniteVolume, 0ul); + } + edgeToCSRMap = mapEdgesToSparsePattern(*this, finiteVolumeCSRFill0); + } + return edgeToCSRMap; +} + +const CCompressedSparsePatternUL& CGeometry::GetEdgeColoring(void) +{ + if (edgeColoring.empty()) { + /*--- Create a temporary sparse pattern from the edges. ---*/ + /// TODO: Try to avoid temporary once grid information is made contiguous. + su2vector outerPtr(nEdge+1); + su2vector innerIdx(nEdge*2); + + for(unsigned long iEdge = 0; iEdge < nEdge; ++iEdge) { + outerPtr(iEdge) = 2*iEdge; + innerIdx(iEdge*2+0) = edge[iEdge]->GetNode(0); + innerIdx(iEdge*2+1) = edge[iEdge]->GetNode(1); + } + outerPtr(nEdge) = 2*nEdge; + + CCompressedSparsePatternUL pattern(move(outerPtr), move(innerIdx)); + + /*--- Color the edges. ---*/ + edgeColoring = colorSparsePattern(pattern, edgeColorGroupSize); + + if(edgeColoring.empty()) + SU2_MPI::Error("Edge coloring failed.", CURRENT_FUNCTION); + } + return edgeColoring; +} + +const CCompressedSparsePatternUL& CGeometry::GetElementColoring(void) +{ + if (elemColoring.empty()) { + /*--- Create a temporary sparse pattern from the elements. ---*/ + /// TODO: Try to avoid temporary once grid information is made contiguous. + vector outerPtr(nElem+1); + vector innerIdx; innerIdx.reserve(nElem); + + for(unsigned long iElem = 0; iElem < nElem; ++iElem) { + outerPtr[iElem] = innerIdx.size(); + + for(unsigned short iNode = 0; iNode < elem[iElem]->GetnNodes(); ++iNode) { + innerIdx.push_back(elem[iElem]->GetNode(iNode)); + } + } + outerPtr[nElem] = innerIdx.size(); + + CCompressedSparsePatternUL pattern(outerPtr, innerIdx); + + /*--- Color the elements. ---*/ + elemColoring = colorSparsePattern(pattern, elemColorGroupSize); + + if(elemColoring.empty()) + SU2_MPI::Error("Element coloring failed.", CURRENT_FUNCTION); + } + return elemColoring; +} diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp index 1bf475065233..93f63d4c121c 100644 --- a/Common/src/geometry/CPhysicalGeometry.cpp +++ b/Common/src/geometry/CPhysicalGeometry.cpp @@ -9660,12 +9660,12 @@ void CPhysicalGeometry::SetSensitivity(CConfig *config) { string filename = config->GetSolution_AdjFileName(); su2double AoASens; - unsigned short nTimeIter, iDim; - unsigned long iPoint, index; + unsigned short nTimeIter; + unsigned long index; string::size_type position; int counter = 0; - Sensitivity = new su2double[nPoint*nDim]; + Sensitivity.resize(nPoint,nDim) = su2double(0.0); if (config->GetTime_Domain()) { nTimeIter = config->GetnTime_Iter(); @@ -9679,13 +9679,6 @@ void CPhysicalGeometry::SetSensitivity(CConfig *config) { /*--- Read all lines in the restart file ---*/ long iPoint_Local; unsigned long iPoint_Global = 0; string text_line; - - for (iPoint = 0; iPoint < nPoint; iPoint++) { - for (iDim = 0; iDim < nDim; iDim++) { - Sensitivity[iPoint*nDim+iDim] = 0.0; - } - } - iPoint_Global = 0; filename = config->GetSolution_AdjFileName(); @@ -9999,13 +9992,13 @@ void CPhysicalGeometry::SetSensitivity(CConfig *config) { offset in the buffer of data from the restart file and load it. ---*/ index = counter*nFields + sens_x_idx - 1; - Sensitivity[iPoint_Local*nDim+0] = Restart_Data[index]; + Sensitivity(iPoint_Local,0) = Restart_Data[index]; index = counter*nFields + sens_y_idx - 1; - Sensitivity[iPoint_Local*nDim+1] = Restart_Data[index]; + Sensitivity(iPoint_Local,1) = Restart_Data[index]; if (nDim == 3){ index = counter*nFields + sens_z_idx - 1; - Sensitivity[iPoint_Local*nDim+2] = Restart_Data[index]; + Sensitivity(iPoint_Local,2) = Restart_Data[index]; } /*--- Increment the overall counter for how many points have been loaded. ---*/ counter++; @@ -10150,11 +10143,10 @@ void CPhysicalGeometry::SetSensitivity(CConfig *config) { iPoint_Local = GetGlobal_to_Local_Point(iPoint_Global); if (iPoint_Local > -1) { - Sensitivity[iPoint_Local*nDim+0] = PrintingToolbox::stod(point_line[sens_x_idx]); - Sensitivity[iPoint_Local*nDim+1] = PrintingToolbox::stod(point_line[sens_y_idx]); + Sensitivity(iPoint_Local,0) = PrintingToolbox::stod(point_line[sens_x_idx]); + Sensitivity(iPoint_Local,1) = PrintingToolbox::stod(point_line[sens_y_idx]); if (nDim == 3) - Sensitivity[iPoint_Local*nDim+2] = PrintingToolbox::stod(point_line[sens_z_idx]); - + Sensitivity(iPoint_Local,2) = PrintingToolbox::stod(point_line[sens_z_idx]); } } @@ -10209,12 +10201,7 @@ void CPhysicalGeometry::ReadUnorderedSensitivity(CConfig *config) { /*--- Allocate space for the sensitivity and initialize. ---*/ - Sensitivity = new su2double[nPoint*nDim]; - for (iPoint = 0; iPoint < nPoint; iPoint++) { - for (iDim = 0; iDim < nDim; iDim++) { - Sensitivity[iPoint*nDim+iDim] = 0.0; - } - } + Sensitivity.resize(nPoint,nDim) = su2double(0.0); /*--- Get the filename for the unordered ASCII sensitivity file input. ---*/ @@ -10287,7 +10274,7 @@ void CPhysicalGeometry::ReadUnorderedSensitivity(CConfig *config) { /*--- Store the sensitivities at the matched local node. ---*/ for (iDim = 0; iDim < nDim; iDim++) - Sensitivity[pointID*nDim+iDim] = Sens_External[iDim]; + Sensitivity(pointID,iDim) = Sens_External[iDim]; /*--- Keep track of how many points we match. ---*/ diff --git a/Common/src/grid_movement_structure.cpp b/Common/src/grid_movement_structure.cpp index 8aafaea4708b..097844954f1e 100644 --- a/Common/src/grid_movement_structure.cpp +++ b/Common/src/grid_movement_structure.cpp @@ -29,6 +29,9 @@ #include "../include/adt_structure.hpp" #include +#include "../include/linear_algebra/CMatrixVectorProduct.hpp" +#include "../include/linear_algebra/CPreconditioner.hpp" + using namespace std; CGridMovement::CGridMovement(void) { } @@ -205,13 +208,13 @@ void CVolumetricMovement::SetVolume_Deformation(CGeometry *geometry, CConfig *co if ((rank == MASTER_NODE) && Screen_Output) cout << "\n# ILU preconditioner." << endl; StiffMatrix.BuildILUPreconditioner(); mat_vec = new CSysMatrixVectorProduct(StiffMatrix, geometry, config); - precond = new CILUPreconditioner(StiffMatrix, geometry, config); + precond = new CILUPreconditioner(StiffMatrix, geometry, config, false); } if (config->GetKind_Deform_Linear_Solver_Prec() == JACOBI) { if ((rank == MASTER_NODE) && Screen_Output) cout << "\n# Jacobi preconditioner." << endl; StiffMatrix.BuildJacobiPreconditioner(); mat_vec = new CSysMatrixVectorProduct(StiffMatrix, geometry, config); - precond = new CJacobiPreconditioner(StiffMatrix, geometry, config); + precond = new CJacobiPreconditioner(StiffMatrix, geometry, config, false); } } else if (Derivative && (config->GetKind_SU2() == SU2_DOT)) { @@ -223,13 +226,13 @@ void CVolumetricMovement::SetVolume_Deformation(CGeometry *geometry, CConfig *co if ((rank == MASTER_NODE) && Screen_Output) cout << "\n# ILU preconditioner." << endl; StiffMatrix.BuildILUPreconditioner(true); mat_vec = new CSysMatrixVectorProductTransposed(StiffMatrix, geometry, config); - precond = new CILUPreconditioner(StiffMatrix, geometry, config); + precond = new CILUPreconditioner(StiffMatrix, geometry, config, true); } if (config->GetKind_Deform_Linear_Solver_Prec() == JACOBI) { if ((rank == MASTER_NODE) && Screen_Output) cout << "\n# Jacobi preconditioner." << endl; StiffMatrix.BuildJacobiPreconditioner(true); mat_vec = new CSysMatrixVectorProductTransposed(StiffMatrix, geometry, config); - precond = new CJacobiPreconditioner(StiffMatrix, geometry, config); + precond = new CJacobiPreconditioner(StiffMatrix, geometry, config, true); } } @@ -243,7 +246,7 @@ void CVolumetricMovement::SetVolume_Deformation(CGeometry *geometry, CConfig *co Tot_Iter = 0; MaxIter = RestartIter; - System.FGMRES_LinSolver(LinSysRes, LinSysSol, *mat_vec, *precond, NumError, 1, &Residual_Init, false, config); + System.FGMRES_LinSolver(LinSysRes, LinSysSol, *mat_vec, *precond, NumError, 1, Residual_Init, false, config); if ((rank == MASTER_NODE) && Screen_Output) { cout << "\n# FGMRES (with restart) residual history" << endl; @@ -258,7 +261,7 @@ void CVolumetricMovement::SetVolume_Deformation(CGeometry *geometry, CConfig *co if (IterLinSol + RestartIter > Smoothing_Iter) MaxIter = Smoothing_Iter - IterLinSol; - IterLinSol = System.FGMRES_LinSolver(LinSysRes, LinSysSol, *mat_vec, *precond, NumError, MaxIter, &Residual, false, config); + IterLinSol = System.FGMRES_LinSolver(LinSysRes, LinSysSol, *mat_vec, *precond, NumError, MaxIter, Residual, false, config); Tot_Iter += IterLinSol; if ((rank == MASTER_NODE) && Screen_Output) { cout << " " << Tot_Iter << " " << Residual/Residual_Init << endl; } @@ -278,7 +281,7 @@ void CVolumetricMovement::SetVolume_Deformation(CGeometry *geometry, CConfig *co case FGMRES: - Tot_Iter = System.FGMRES_LinSolver(LinSysRes, LinSysSol, *mat_vec, *precond, NumError, Smoothing_Iter, &Residual, Screen_Output, config); + Tot_Iter = System.FGMRES_LinSolver(LinSysRes, LinSysSol, *mat_vec, *precond, NumError, Smoothing_Iter, Residual, Screen_Output, config); break; @@ -286,14 +289,14 @@ void CVolumetricMovement::SetVolume_Deformation(CGeometry *geometry, CConfig *co case BCGSTAB: - Tot_Iter = System.BCGSTAB_LinSolver(LinSysRes, LinSysSol, *mat_vec, *precond, NumError, Smoothing_Iter, &Residual, Screen_Output, config); + Tot_Iter = System.BCGSTAB_LinSolver(LinSysRes, LinSysSol, *mat_vec, *precond, NumError, Smoothing_Iter, Residual, Screen_Output, config); break; case CONJUGATE_GRADIENT: - Tot_Iter = System.CG_LinSolver(LinSysRes, LinSysSol, *mat_vec, *precond, NumError, Smoothing_Iter, &Residual, Screen_Output, config); + Tot_Iter = System.CG_LinSolver(LinSysRes, LinSysSol, *mat_vec, *precond, NumError, Smoothing_Iter, Residual, Screen_Output, config); break; diff --git a/Common/src/linear_algebra/CPastixWrapper.cpp b/Common/src/linear_algebra/CPastixWrapper.cpp index a8ce422f81c9..d632ecb4ea46 100644 --- a/Common/src/linear_algebra/CPastixWrapper.cpp +++ b/Common/src/linear_algebra/CPastixWrapper.cpp @@ -7,7 +7,7 @@ * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -28,7 +28,12 @@ #ifdef HAVE_PASTIX +#include "../../include/mpi_structure.hpp" +#include "../../include/omp_structure.hpp" +#include "../../include/config_structure.hpp" +#include "../../include/geometry/CGeometry.hpp" #include "../../include/linear_algebra/CPastixWrapper.hpp" + #include void CPastixWrapper::Initialize(CGeometry *geometry, CConfig *config) { @@ -84,6 +89,8 @@ void CPastixWrapper::Initialize(CGeometry *geometry, CConfig *config) { iparm[IPARM_ORDERING] = API_ORDER_PTSCOTCH; iparm[IPARM_INCOMPLETE] = incomplete; iparm[IPARM_LEVEL_OF_FILL] = pastix_int_t(config->GetPastixFillLvl()); + iparm[IPARM_THREAD_COMM_MODE] = API_THREAD_FUNNELED; + iparm[IPARM_THREAD_NBR] = omp_get_max_threads(); /*--- Prepare sparsity structure ---*/ diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index 4d114f5799ca..7d3e5d3287d7 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -1,12 +1,12 @@ /*! - * \file matrix_structure.cpp - * \brief Main subroutines for doing the sparse structures + * \file CSysMatrix.cpp + * \brief Implementation of the sparse matrix class. * \author F. Palacios, A. Bueno, T. Economon * \version 7.0.0 "Blackbird" * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -27,37 +27,44 @@ #include "../../include/linear_algebra/CSysMatrix.inl" +#include "../../include/geometry/CGeometry.hpp" +#include "../../include/config_structure.hpp" +#include "../../include/omp_structure.hpp" +#include "../../include/toolboxes/allocation_toolbox.hpp" + +#include + template CSysMatrix::CSysMatrix(void) { size = SU2_MPI::GetSize(); rank = SU2_MPI::GetRank(); - ilu_fill_in = 0; + nPoint = nPointDomain = nVar = nEqn = 0; + nnz = nnz_ilu = 0; + ilu_fill_in = 0; + nLinelet = 0; + + omp_partitions = nullptr; - /*--- Array initialization ---*/ + matrix = nullptr; + row_ptr = nullptr; + dia_ptr = nullptr; + col_ind = nullptr; - matrix = NULL; - ILU_matrix = NULL; - row_ptr = NULL; - col_ind = NULL; - row_ptr_ilu = NULL; - col_ind_ilu = NULL; - block = NULL; - prod_row_vector = NULL; - aux_vector = NULL; - sum_vector = NULL; - invM = NULL; - block_weight = NULL; - block_inverse = NULL; + ILU_matrix = nullptr; + row_ptr_ilu = nullptr; + dia_ptr_ilu = nullptr; + col_ind_ilu = nullptr; + + invM = nullptr; #ifdef USE_MKL - MatrixMatrixProductJitter = NULL; - MatrixVectorProductJitterBetaOne = NULL; - MatrixVectorProductJitterBetaZero = NULL; - MatrixVectorProductJitterAlphaMinusOne = NULL; - MatrixVectorProductTranspJitterBetaOne = NULL; - mkl_ipiv = NULL; + MatrixMatrixProductJitter = nullptr; + MatrixVectorProductJitterBetaOne = nullptr; + MatrixVectorProductJitterBetaZero = nullptr; + MatrixVectorProductJitterAlphaMinusOne = nullptr; + MatrixVectorProductTranspJitterBetaOne = nullptr; #endif } @@ -65,124 +72,122 @@ CSysMatrix::CSysMatrix(void) { template CSysMatrix::~CSysMatrix(void) { - /*--- Memory deallocation ---*/ - - if (matrix != NULL) delete [] matrix; - if (ILU_matrix != NULL) delete [] ILU_matrix; - if (row_ptr != NULL) delete [] row_ptr; - if (col_ind != NULL) delete [] col_ind; - - if (ilu_fill_in != 0) { - if (row_ptr_ilu != NULL) delete [] row_ptr_ilu; - if (col_ind_ilu != NULL) delete [] col_ind_ilu; - } - - if (block != NULL) delete [] block; - if (block_weight != NULL) delete [] block_weight; - if (block_inverse != NULL) delete [] block_inverse; - - if (prod_row_vector != NULL) delete [] prod_row_vector; - if (aux_vector != NULL) delete [] aux_vector; - if (sum_vector != NULL) delete [] sum_vector; - if (invM != NULL) delete [] invM; + if (omp_partitions != nullptr) delete [] omp_partitions; + if (ILU_matrix != nullptr) MemoryAllocation::aligned_free(ILU_matrix); + if (matrix != nullptr) MemoryAllocation::aligned_free(matrix); + if (invM != nullptr) MemoryAllocation::aligned_free(invM); #ifdef USE_MKL - if ( MatrixMatrixProductJitter != NULL ) mkl_jit_destroy( MatrixMatrixProductJitter ); - if ( MatrixVectorProductJitterBetaZero != NULL ) mkl_jit_destroy( MatrixVectorProductJitterBetaZero ); - if ( MatrixVectorProductJitterBetaOne != NULL ) mkl_jit_destroy( MatrixVectorProductJitterBetaOne ); - if ( MatrixVectorProductJitterAlphaMinusOne != NULL ) mkl_jit_destroy( MatrixVectorProductJitterAlphaMinusOne ); - if ( MatrixVectorProductTranspJitterBetaOne != NULL ) mkl_jit_destroy( MatrixVectorProductTranspJitterBetaOne ); - if ( mkl_ipiv != NULL ) delete [] mkl_ipiv; + if ( MatrixMatrixProductJitter != nullptr ) mkl_jit_destroy( MatrixMatrixProductJitter ); + if ( MatrixVectorProductJitterBetaZero != nullptr ) mkl_jit_destroy( MatrixVectorProductJitterBetaZero ); + if ( MatrixVectorProductJitterBetaOne != nullptr ) mkl_jit_destroy( MatrixVectorProductJitterBetaOne ); + if ( MatrixVectorProductJitterAlphaMinusOne != nullptr ) mkl_jit_destroy( MatrixVectorProductJitterAlphaMinusOne ); + if ( MatrixVectorProductTranspJitterBetaOne != nullptr ) mkl_jit_destroy( MatrixVectorProductTranspJitterBetaOne ); #endif } template -void CSysMatrix::Initialize(unsigned long nPoint, unsigned long nPointDomain, - unsigned short nVar, unsigned short nEqn, +void CSysMatrix::Initialize(unsigned long npoint, unsigned long npointdomain, + unsigned short nvar, unsigned short neqn, bool EdgeConnect, CGeometry *geometry, CConfig *config) { - /*--- Don't delete *row_ptr, *col_ind because they are - asigned to the Jacobian structure. ---*/ + assert(omp_get_thread_num()==0 && "Only the master thread is allowed to initialize the matrix."); - unsigned long iPoint, *row_ptr, *col_ind, index, nnz, Elem, iVar; - unsigned short iNeigh, iElem, iNode, *nNeigh, *nNeigh_ilu; - vector::iterator it; - vector vneighs, vneighs_ilu; + if(matrix != nullptr) { + SU2_OMP_MASTER + SU2_MPI::Error("CSysMatrix can only be initialized once.", CURRENT_FUNCTION); + } - /*--- Set the ILU fill in level --*/ + if(nvar > MAXNVAR) { + SU2_OMP_MASTER + SU2_MPI::Error("nVar larger than expected, increase MAXNVAR.", CURRENT_FUNCTION); + } - ilu_fill_in = config->GetLinear_Solver_ILU_n(); + /*--- Application of this matrix, FVM or FEM. ---*/ + auto type = EdgeConnect? ConnectivityType::FiniteVolume : ConnectivityType::FiniteElement; - /*--- Compute the number of neighbors ---*/ + /*--- Types of preconditioner the matrix will be asked to build. ---*/ + unsigned short sol_prec = config->GetKind_Linear_Solver_Prec(); + unsigned short def_prec = config->GetKind_Deform_Linear_Solver_Prec(); + unsigned short adj_prec = config->GetKind_DiscAdj_Linear_Prec(); + bool adjoint = config->GetDiscrete_Adjoint(); - nNeigh = new unsigned short [nPoint]; - for (iPoint = 0; iPoint < nPoint; iPoint++) { + bool ilu_needed = (sol_prec==ILU) || (def_prec==ILU) || (adjoint && (adj_prec==ILU)); - if (EdgeConnect) { - nNeigh[iPoint] = (geometry->node[iPoint]->GetnPoint()+1); // +1 -> to include diagonal element - } - else { - vneighs.clear(); - for (iElem = 0; iElem < geometry->node[iPoint]->GetnElem(); iElem++) { - Elem = geometry->node[iPoint]->GetElem(iElem); - for (iNode = 0; iNode < geometry->elem[Elem]->GetnNodes(); iNode++) - vneighs.push_back(geometry->elem[Elem]->GetNode(iNode)); - } - vneighs.push_back(iPoint); + /*--- Basic dimensions. ---*/ + nVar = nvar; + nEqn = neqn; + nPoint = npoint; + nPointDomain = npointdomain; - sort(vneighs.begin(), vneighs.end()); - it = unique(vneighs.begin(), vneighs.end()); - vneighs.resize(it - vneighs.begin()); - nNeigh[iPoint] = vneighs.size(); - } + /*--- Get sparse structure pointers from geometry, + * the data is managed by CGeometry to allow re-use. ---*/ - } + const auto& csr = geometry->GetSparsePattern(type,0); - /*--- Create row_ptr structure, using the number of neighbors ---*/ + row_ptr = csr.outerPtr(); + col_ind = csr.innerIdx(); + dia_ptr = csr.diagPtr(); + nnz = csr.getNumNonZeros(); - row_ptr = new unsigned long [nPoint+1]; - row_ptr[0] = 0; - for (iPoint = 0; iPoint < nPoint; iPoint++) - row_ptr[iPoint+1] = row_ptr[iPoint] + nNeigh[iPoint]; - nnz = row_ptr[nPoint]; + if (type == ConnectivityType::FiniteVolume) + edge_ptr.ptr = geometry->GetEdgeToSparsePatternMap().data(); - /*--- Create col_ind structure ---*/ + /*--- Get ILU sparse pattern, if fill is 0 no new data is allocated. --*/ - col_ind = new unsigned long [nnz]; - for (iPoint = 0; iPoint < nPoint; iPoint++) { + if(ilu_needed) + { + ilu_fill_in = config->GetLinear_Solver_ILU_n(); - vneighs.clear(); + const auto& csr_ilu = geometry->GetSparsePattern(type, ilu_fill_in); - if (EdgeConnect) { - for (iNeigh = 0; iNeigh < geometry->node[iPoint]->GetnPoint(); iNeigh++) - vneighs.push_back(geometry->node[iPoint]->GetPoint(iNeigh)); - vneighs.push_back(iPoint); - } - else { - for (iElem = 0; iElem < geometry->node[iPoint]->GetnElem(); iElem++) { - Elem = geometry->node[iPoint]->GetElem(iElem); - for (iNode = 0; iNode < geometry->elem[Elem]->GetnNodes(); iNode++) - vneighs.push_back(geometry->elem[Elem]->GetNode(iNode)); - } - vneighs.push_back(iPoint); - } + row_ptr_ilu = csr_ilu.outerPtr(); + col_ind_ilu = csr_ilu.innerIdx(); + dia_ptr_ilu = csr_ilu.diagPtr(); + nnz_ilu = csr_ilu.getNumNonZeros(); + } - sort(vneighs.begin(), vneighs.end()); - it = unique(vneighs.begin(), vneighs.end()); - vneighs.resize( it - vneighs.begin() ); + /*--- Allocate data. ---*/ +#define ALLOC_AND_INIT(ptr,num) {\ + ptr = MemoryAllocation::aligned_alloc(64,num*sizeof(ScalarType));\ + for(size_t k=0; k::Initialize(unsigned long nPoint, unsigned long nPoi mkl_jit_create_dgemm( &MatrixVectorProductTranspJitterBetaOne, MKL_COL_MAJOR, MKL_NOTRANS, MKL_NOTRANS, nVar, 1, nVar, 1.0, nVar, nVar, 1.0, nVar ); MatrixVectorProductTranspKernelBetaOne = mkl_jit_get_dgemm_ptr( MatrixVectorProductTranspJitterBetaOne ); - - mkl_ipiv = new lapack_int [ nVar ]; #endif - /*--- Initialization matrix to zero ---*/ - - SetValZero(); - - delete [] nNeigh; - - /*--- ILU(n) preconditioner with a specific sparse structure ---*/ - - if (ilu_fill_in != 0) { - - nNeigh_ilu = new unsigned short [nPoint]; - for (iPoint = 0; iPoint < nPoint; iPoint++) { - - vneighs_ilu.clear(); - SetNeighbours(geometry, iPoint, 0, ilu_fill_in, EdgeConnect, vneighs_ilu); - sort(vneighs_ilu.begin(), vneighs_ilu.end()); - it = unique(vneighs_ilu.begin(), vneighs_ilu.end()); - vneighs_ilu.resize(it - vneighs_ilu.begin()); - nNeigh_ilu[iPoint] = vneighs_ilu.size(); - - } - - row_ptr_ilu = new unsigned long [nPoint+1]; - row_ptr_ilu[0] = 0; - for (iPoint = 0; iPoint < nPoint; iPoint++) - row_ptr_ilu[iPoint+1] = row_ptr_ilu[iPoint] + nNeigh_ilu[iPoint]; - nnz_ilu = row_ptr_ilu[nPoint]; - - /*--- Create col_ind structure ---*/ - - col_ind_ilu = new unsigned long [nnz_ilu]; - for (iPoint = 0; iPoint < nPoint; iPoint++) { - - vneighs_ilu.clear(); - SetNeighbours(geometry, iPoint, 0, ilu_fill_in, EdgeConnect, vneighs_ilu); - sort(vneighs_ilu.begin(), vneighs_ilu.end()); - it = unique(vneighs_ilu.begin(), vneighs_ilu.end()); - vneighs_ilu.resize( it - vneighs_ilu.begin() ); - - index = row_ptr_ilu[iPoint]; - for (iNeigh = 0; iNeigh < vneighs_ilu.size(); iNeigh++) { - col_ind_ilu[index] = vneighs_ilu[iNeigh]; - index++; - } - - } - - ILU_matrix = new ScalarType [nnz_ilu*nVar*nEqn]; - for (iVar = 0; iVar < nnz_ilu*nVar*nEqn; iVar++) ILU_matrix[iVar] = 0.0; - - invM = new ScalarType [nPointDomain*nVar*nEqn]; - for (iVar = 0; iVar < nPointDomain*nVar*nEqn; iVar++) invM[iVar] = 0.0; - - delete [] nNeigh_ilu; - - } - -} - -template -void CSysMatrix::SetNeighbours(CGeometry *geometry, unsigned long iPoint, unsigned short deep_level, unsigned short fill_level, - bool EdgeConnect, vector & vneighs) { - unsigned long Point, iElem, Elem; - unsigned short iNode; - - - if (EdgeConnect) { - vneighs.push_back(iPoint); - for (iNode = 0; iNode < geometry->node[iPoint]->GetnPoint(); iNode++) { - Point = geometry->node[iPoint]->GetPoint(iNode); - vneighs.push_back(Point); - if (deep_level < fill_level) SetNeighbours(geometry, Point, deep_level+1, fill_level, EdgeConnect, vneighs); - } - } - else { - for (iElem = 0; iElem < geometry->node[iPoint]->GetnElem(); iElem++) { - Elem = geometry->node[iPoint]->GetElem(iElem); - for (iNode = 0; iNode < geometry->elem[Elem]->GetnNodes(); iNode++) { - Point = geometry->elem[Elem]->GetNode(iNode); - vneighs.push_back(Point); - if (deep_level < fill_level) SetNeighbours(geometry, Point, deep_level+1, fill_level, EdgeConnect, vneighs); - } - } - } - -} - -template -void CSysMatrix::SetIndexes(unsigned long val_nPoint, unsigned long val_nPointDomain, unsigned short val_nVar, unsigned short val_nEq, unsigned long* val_row_ptr, unsigned long* val_col_ind, unsigned long val_nnz, CConfig *config) { - - unsigned long iVar; - - nPoint = val_nPoint; // Assign number of points in the mesh - nPointDomain = val_nPointDomain; // Assign number of points in the mesh - nVar = val_nVar; // Assign number of vars in each block system - nEqn = val_nEq; // Assign number of eqns in each block system - - row_ptr = val_row_ptr; // Assign row values in the spare system structure (Jacobian structure) - col_ind = val_col_ind; // Assign colums values in the spare system structure (Jacobian structure) - nnz = val_nnz; // Assign number of possible non zero blocks in the spare system structure (Jacobian structure) - - if (ilu_fill_in == 0) { - row_ptr_ilu = val_row_ptr; // Assign row values in the spare system structure (ILU structure) - col_ind_ilu = val_col_ind; // Assign colums values in the spare system structure (ILU structure) - nnz_ilu = val_nnz; // Assign number of possible non zero blocks in the spare system structure (ILU structure) - } - - matrix = new ScalarType [nnz*nVar*nEqn]; // Reserve memory for the values of the matrix - block = new ScalarType [nVar*nEqn]; - block_weight = new ScalarType [nVar*nEqn]; - block_inverse = new ScalarType [nVar*nEqn]; - - prod_row_vector = new ScalarType [nVar]; - aux_vector = new ScalarType [nVar]; - sum_vector = new ScalarType [nVar]; - - /*--- Memory initialization ---*/ - - for (iVar = 0; iVar < nnz*nVar*nEqn; iVar++) matrix[iVar] = 0.0; - for (iVar = 0; iVar < nVar*nEqn; iVar++) block[iVar] = 0.0; - for (iVar = 0; iVar < nVar*nEqn; iVar++) block_weight[iVar] = 0.0; - for (iVar = 0; iVar < nVar*nEqn; iVar++) block_inverse[iVar] = 0.0; - - for (iVar = 0; iVar < nVar; iVar++) prod_row_vector[iVar] = 0.0; - for (iVar = 0; iVar < nVar; iVar++) aux_vector[iVar] = 0.0; - for (iVar = 0; iVar < nVar; iVar++) sum_vector[iVar] = 0.0; - - if (ilu_fill_in == 0) { - - /*--- Set specific preconditioner matrices (ILU) ---*/ - - if ((config->GetKind_Linear_Solver_Prec() == ILU) || - ((config->GetKind_SU2() == SU2_DEF) && (config->GetKind_Deform_Linear_Solver_Prec() == ILU)) || - ((config->GetKind_SU2() == SU2_DOT) && (config->GetKind_Deform_Linear_Solver_Prec() == ILU)) || - (config->GetKind_Deform_Linear_Solver_Prec() == ILU) || - (config->GetDiscrete_Adjoint() && config->GetKind_DiscAdj_Linear_Prec() == ILU)) { - - /*--- Reserve memory for the ILU matrix. ---*/ - - ILU_matrix = new ScalarType [nnz_ilu*nVar*nEqn]; - for (iVar = 0; iVar < nnz_ilu*nVar*nEqn; iVar++) ILU_matrix[iVar] = 0.0; - - invM = new ScalarType [nPointDomain*nVar*nEqn]; - for (iVar = 0; iVar < nPointDomain*nVar*nEqn; iVar++) invM[iVar] = 0.0; - - } - - } - - /*--- Set specific preconditioner matrices (Jacobi and Linelet) ---*/ - - if ((config->GetKind_Linear_Solver_Prec() == JACOBI) || - (config->GetKind_Linear_Solver_Prec() == LINELET) || - ((config->GetKind_SU2() == SU2_DEF) && (config->GetKind_Deform_Linear_Solver_Prec() == JACOBI)) || - ((config->GetKind_SU2() == SU2_DOT) && (config->GetKind_Deform_Linear_Solver_Prec() == JACOBI)) || - (config->GetDiscrete_Adjoint() && config->GetKind_DiscAdj_Linear_Solver() == JACOBI) || - (config->GetFSI_Simulation() && config->GetKind_Deform_Linear_Solver_Prec() == JACOBI)) { - - /*--- Reserve memory for the values of the inverse of the preconditioner. ---*/ - - invM = new ScalarType [nPointDomain*nVar*nEqn]; - for (iVar = 0; iVar < nPointDomain*nVar*nEqn; iVar++) invM[iVar] = 0.0; - - } - } template template -void CSysMatrix::InitiateComms(CSysVector & x, +void CSysMatrix::InitiateComms(const CSysVector & x, CGeometry *geometry, CConfig *config, - unsigned short commType) { + unsigned short commType) const { /*--- Local variables ---*/ @@ -527,7 +365,7 @@ template void CSysMatrix::CompleteComms(CSysVector & x, CGeometry *geometry, CConfig *config, - unsigned short commType) { + unsigned short commType) const { /*--- Local variables ---*/ @@ -539,7 +377,7 @@ void CSysMatrix::CompleteComms(CSysVector & x, /*--- Set some local pointers to make access simpler. ---*/ - su2double *bufDRecv = geometry->bufD_P2PRecv; + const su2double *bufDRecv = geometry->bufD_P2PRecv; /*--- Store the data that was communicated into the appropriate location within the local class data structures. ---*/ @@ -650,129 +488,201 @@ void CSysMatrix::CompleteComms(CSysVector & x, } template -void CSysMatrix::DeleteValsRowi(unsigned long i) { - - unsigned long block_i = i/nVar; - unsigned long row = i - block_i*nVar; - unsigned long index, iVar; - - for (index = row_ptr[block_i]; index < row_ptr[block_i+1]; index++) { - for (iVar = 0; iVar < nVar; iVar++) - matrix[index*nVar*nVar+row*nVar+iVar] = 0.0; // Delete row values in the block - if (col_ind[index] == block_i) - matrix[index*nVar*nVar+row*nVar+row] = 1.0; // Set 1 to the diagonal element - } - +void CSysMatrix::SetValZero() { + SU2_OMP_FOR_STAT(omp_light_size) + for (auto index = 0ul; index < nnz*nVar*nEqn; index++) + matrix[index] = 0.0; } template -void CSysMatrix::UpperProduct(const CSysVector & vec, unsigned long row_i) { +void CSysMatrix::SetValDiagonalZero() { + SU2_OMP_FOR_STAT(omp_heavy_size) + for (auto iPoint = 0ul; iPoint < nPointDomain; ++iPoint) + for (auto index = 0ul; index < nVar*nEqn; ++index) + matrix[dia_ptr[iPoint]*nVar*nEqn + index] = 0.0; +} - unsigned long iVar, index, col_j; +template +void CSysMatrix::Gauss_Elimination(ScalarType* matrix, ScalarType* vec) const { - for (iVar = 0; iVar < nVar; iVar++) - prod_row_vector[iVar] = 0; +#ifdef USE_MKL_LAPACK + // With MKL_DIRECT_CALL enabled, this is significantly faster than native code on Intel Architectures. + lapack_int ipiv[MAXNVAR]; + LAPACKE_dgetrf( LAPACK_ROW_MAJOR, nVar, nVar, matrix, nVar, ipiv); + LAPACKE_dgetrs( LAPACK_ROW_MAJOR, 'N', nVar, 1, matrix, nVar, ipiv, vec, 1 ); +#else +#define A(I,J) matrix[(I)*nVar+(J)] - for (index = row_ptr[row_i]; index < row_ptr[row_i+1]; index++) { - col_j = col_ind[index]; - if (col_j > row_i) { - MatrixVectorProductAdd(&matrix[index*nVar*nVar], &vec[col_j*nVar], prod_row_vector); + /*--- Transform system in Upper Matrix ---*/ + for (auto iVar = 1ul; iVar < nVar; iVar++) { + for (auto jVar = 0ul; jVar < iVar; jVar++) { + ScalarType weight = A(iVar,jVar) / A(jVar,jVar); + for (auto kVar = jVar; kVar < nVar; kVar++) + A(iVar,kVar) -= weight * A(jVar,kVar); + vec[iVar] -= weight * vec[jVar]; } } + /*--- Backwards substitution ---*/ + for (auto iVar = nVar; iVar > 0ul;) { + iVar--; // unsigned type + for (auto jVar = iVar+1; jVar < nVar; jVar++) + vec[iVar] -= A(iVar,jVar) * vec[jVar]; + vec[iVar] /= A(iVar,iVar); + } +#undef A +#endif } template -void CSysMatrix::LowerProduct(const CSysVector & vec, unsigned long row_i) { +void CSysMatrix::MatrixInverse(ScalarType *matrix, ScalarType *inverse) const { - unsigned long iVar, index, col_j; + /*--- This is a generalization of Gaussian elimination for multiple rhs' (the basis vectors). + We could call "Gauss_Elimination" multiple times or fully generalize it for multiple rhs, + the performance of both routines would suffer in both cases without the use of exotic templating. + And so it feels reasonable to have some duplication here. ---*/ - for (iVar = 0; iVar < nVar; iVar++) - prod_row_vector[iVar] = 0; + assert((matrix != inverse) && "Output cannot be the same as the input."); - for (index = row_ptr[row_i]; index < row_ptr[row_i+1]; index++) { - col_j = col_ind[index]; - if (col_j < row_i) { - MatrixVectorProductAdd(&matrix[index*nVar*nVar], &vec[col_j*nVar], prod_row_vector); +#define M(I,J) inverse[(I)*nVar+(J)] + + /*--- Initialize the inverse with the identity. ---*/ + for (auto iVar = 0ul; iVar < nVar; iVar++) + for (auto jVar = 0ul; jVar < nVar; jVar++) + M(iVar,jVar) = ScalarType(iVar==jVar); + + /*--- Inversion ---*/ +#ifdef USE_MKL_LAPACK + // With MKL_DIRECT_CALL enabled, this is significantly faster than native code on Intel Architectures. + lapack_int ipiv[MAXNVAR]; + LAPACKE_dgetrf( LAPACK_ROW_MAJOR, nVar, nVar, matrix, nVar, ipiv ); + LAPACKE_dgetrs( LAPACK_ROW_MAJOR, 'N', nVar, nVar, matrix, nVar, ipiv, inverse, nVar ); +#else +#define A(I,J) matrix[(I)*nVar+(J)] + + /*--- Transform system in Upper Matrix ---*/ + for (auto iVar = 1ul; iVar < nVar; iVar++) { + for (auto jVar = 0ul; jVar < iVar; jVar++) + { + ScalarType weight = A(iVar,jVar) / A(jVar,jVar); + + for (auto kVar = jVar; kVar < nVar; kVar++) + A(iVar,kVar) -= weight * A(jVar,kVar); + + /*--- at this stage M is lower triangular so not all cols need updating ---*/ + for (auto kVar = 0ul; kVar <= jVar; kVar++) + M(iVar,kVar) -= weight * M(jVar,kVar); } } + /*--- Backwards substitution ---*/ + for (auto iVar = nVar; iVar > 0ul;) { + iVar--; // unsigned type + for (auto jVar = iVar+1; jVar < nVar; jVar++) + for (auto kVar = 0ul; kVar < nVar; kVar++) + M(iVar,kVar) -= A(iVar,jVar) * M(jVar,kVar); + + for (auto kVar = 0ul; kVar < nVar; kVar++) + M(iVar,kVar) /= A(iVar,iVar); + } +#undef A +#endif +#undef M } template -void CSysMatrix::DiagonalProduct(const CSysVector & vec, unsigned long row_i) { +void CSysMatrix::DeleteValsRowi(unsigned long i) { - for (unsigned long index = row_ptr[row_i]; index < row_ptr[row_i+1]; index++) { - if (col_ind[index] == row_i) { - MatrixVectorProduct(&matrix[index*nVar*nVar], &vec[row_i*nVar], prod_row_vector); - break; - } + unsigned long block_i = i/nVar; + unsigned long row = i - block_i*nVar; + unsigned long index, iVar; + + for (index = row_ptr[block_i]; index < row_ptr[block_i+1]; index++) { + for (iVar = 0; iVar < nVar; iVar++) + matrix[index*nVar*nVar+row*nVar+iVar] = 0.0; // Delete row values in the block + if (col_ind[index] == block_i) + matrix[index*nVar*nVar+row*nVar+row] = 1.0; // Set 1 to the diagonal element } } template -void CSysMatrix::RowProduct(const CSysVector & vec, unsigned long row_i) { - +void CSysMatrix::RowProduct(const CSysVector & vec, + unsigned long row_i, ScalarType *prod) const { unsigned long iVar, index, col_j; - for (iVar = 0; iVar < nVar; iVar++) - prod_row_vector[iVar] = 0; + for (iVar = 0; iVar < nVar; iVar++) prod[iVar] = 0.0; for (index = row_ptr[row_i]; index < row_ptr[row_i+1]; index++) { col_j = col_ind[index]; - MatrixVectorProductAdd(&matrix[index*nVar*nVar], &vec[col_j*nVar], prod_row_vector); + MatrixVectorProductAdd(&matrix[index*nVar*nVar], &vec[col_j*nVar], prod); } } template -void CSysMatrix::MatrixVectorProduct(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config) { - - unsigned long prod_begin, vec_begin, mat_begin, index, row_i; +void CSysMatrix::MatrixVectorProduct(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const { /*--- Some checks for consistency between CSysMatrix and the CSysVectors ---*/ +#ifndef NDEBUG if ( (nVar != vec.GetNVar()) || (nVar != prod.GetNVar()) ) { - cerr << "CSysMatrix::MatrixVectorProduct(const CSysVector&, CSysVector): " - << "nVar values incompatible." << endl; - throw(-1); + SU2_OMP_MASTER + SU2_MPI::Error("nVar values incompatible.", CURRENT_FUNCTION); } if ( (nPoint != vec.GetNBlk()) || (nPoint != prod.GetNBlk()) ) { - cerr << "CSysMatrix::MatrixVectorProduct(const CSysVector&, CSysVector): " - << "nPoint and nBlk values incompatible." << endl; - throw(-1); + SU2_OMP_MASTER + SU2_MPI::Error("nPoint and nBlk values incompatible.", CURRENT_FUNCTION); } +#endif - prod = ScalarType(0.0); // set all entries of prod to zero - for (row_i = 0; row_i < nPointDomain; row_i++) { - prod_begin = row_i*nVar; // offset to beginning of block row_i - for (index = row_ptr[row_i]; index < row_ptr[row_i+1]; index++) { - vec_begin = col_ind[index]*nVar; // offset to beginning of block col_ind[index] - mat_begin = (index*nVar*nVar); // offset to beginning of matrix block[row_i][col_ind[indx]] + /*--- OpenMP parallelization. First need to make view of vectors + * consistent, a barrier is implicit at the end of FOR section + * (and it is required before master thread communicates). ---*/ + + SU2_OMP_BARRIER + + SU2_OMP_FOR_DYN(omp_heavy_size) + for (auto row_i = 0ul; row_i < nPointDomain; row_i++) { + auto prod_begin = row_i*nVar; // offset to beginning of block row_i + for(auto iVar = 0ul; iVar < nVar; iVar++) + prod[prod_begin+iVar] = 0.0; + for (auto index = row_ptr[row_i]; index < row_ptr[row_i+1]; index++) { + auto vec_begin = col_ind[index]*nVar; // offset to beginning of block col_ind[index] + auto mat_begin = index*nVar*nVar; // offset to beginning of matrix block[row_i][col_ind[indx]] MatrixVectorProductAdd(&matrix[mat_begin], &vec[vec_begin], &prod[prod_begin]); } } - /*--- MPI Parallelization ---*/ - - InitiateComms(prod, geometry, config, SOLUTION_MATRIX); - CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + /*--- MPI Parallelization by master thread. ---*/ + SU2_OMP_MASTER + { + InitiateComms(prod, geometry, config, SOLUTION_MATRIX); + CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + } + SU2_OMP_BARRIER } template -void CSysMatrix::MatrixVectorProductTransposed(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config) { +void CSysMatrix::MatrixVectorProductTransposed(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const { unsigned long prod_begin, vec_begin, mat_begin, index, row_i; /*--- Some checks for consistency between CSysMatrix and the CSysVectors ---*/ +#ifndef NDEBUG if ( (nVar != vec.GetNVar()) || (nVar != prod.GetNVar()) ) { + SU2_OMP_MASTER SU2_MPI::Error("nVar values incompatible.", CURRENT_FUNCTION); } if ( (nPoint != vec.GetNBlk()) || (nPoint != prod.GetNBlk()) ) { + SU2_OMP_MASTER SU2_MPI::Error("nPoint and nBlk values incompatible.", CURRENT_FUNCTION); } +#endif + /// TODO: The transpose product requires a different thread-parallel strategy. prod = ScalarType(0.0); // set all entries of prod to zero for (row_i = 0; row_i < nPointDomain; row_i++) { vec_begin = row_i*nVar; // offset to beginning of block col_ind[index] @@ -794,208 +704,282 @@ template void CSysMatrix::BuildJacobiPreconditioner(bool transpose) { /*--- Build Jacobi preconditioner (M = D), compute and store the inverses of the diagonal blocks. ---*/ + SU2_OMP(for schedule(dynamic,omp_heavy_size) nowait) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) InverseDiagonalBlock(iPoint, &(invM[iPoint*nVar*nVar]), transpose); } template -void CSysMatrix::ComputeJacobiPreconditioner(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config) { +void CSysMatrix::ComputeJacobiPreconditioner(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const { /*--- Apply Jacobi preconditioner, y = D^{-1} * x, the inverse of the diagonal is already known. ---*/ + SU2_OMP_BARRIER + SU2_OMP_FOR_DYN(omp_heavy_size) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) MatrixVectorProduct(&(invM[iPoint*nVar*nVar]), &vec[iPoint*nVar], &prod[iPoint*nVar]); /*--- MPI Parallelization ---*/ - InitiateComms(prod, geometry, config, SOLUTION_MATRIX); - CompleteComms(prod, geometry, config, SOLUTION_MATRIX); - + SU2_OMP_MASTER + { + InitiateComms(prod, geometry, config, SOLUTION_MATRIX); + CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + } + SU2_OMP_BARRIER } template void CSysMatrix::BuildILUPreconditioner(bool transposed) { - unsigned long index, index_, iVar; - ScalarType *Block_ij; - const ScalarType *Block_jk; - long iPoint, jPoint, kPoint; - - /*--- Copy block matrix, note that the original matrix - is modified by the algorithm, so that we have the factorization stored - in the ILUMatrix at the end of this preprocessing. ---*/ - - for (iVar = 0; iVar < nnz_ilu*nVar*nEqn; iVar++) ILU_matrix[iVar] = 0.0; - - for (iPoint = 0; iPoint < (long)nPointDomain; iPoint++) { - for (index = row_ptr[iPoint]; index < row_ptr[iPoint+1]; index++) { - jPoint = col_ind[index]; - if (transposed) { - Block_ij = GetBlock(jPoint, iPoint); - SetBlockTransposed_ILUMatrix(iPoint, jPoint, Block_ij); - } else { - Block_ij = GetBlock(iPoint, jPoint); - SetBlock_ILUMatrix(iPoint, jPoint, Block_ij); + /*--- Copy block matrix to compute factorization in-place. ---*/ + + if ((ilu_fill_in == 0) && !transposed) { + /*--- ILU0, direct copy. ---*/ + SU2_OMP_FOR_STAT(omp_light_size) + for (auto iVar = 0ul; iVar < nnz*nVar*nVar; ++iVar) + ILU_matrix[iVar] = matrix[iVar]; + } + else { + /*--- ILUn clear the ILU matrix first, for ILU0^T + * the copy takes care of the clearing. ---*/ + if (ilu_fill_in > 0) { + SU2_OMP_FOR_STAT(omp_light_size) + for (auto iVar = 0ul; iVar < nnz_ilu*nVar*nVar; iVar++) + ILU_matrix[iVar] = 0.0; + } + + /*--- Transposed or ILUn, traverse matrix to access its blocks + * sequentially and set them in the ILU matrix. ---*/ + SU2_OMP_FOR_DYN(omp_heavy_size) + for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) { + for (auto index = row_ptr[iPoint]; index < row_ptr[iPoint+1]; index++) { + auto jPoint = col_ind[index]; + if (transposed) { + SetBlockTransposed_ILUMatrix(jPoint, iPoint, &matrix[index*nVar*nVar]); + } else { + SetBlock_ILUMatrix(iPoint, jPoint, &matrix[index*nVar*nVar]); + } } } } /*--- Transform system in Upper Matrix ---*/ - for (iPoint = 1; iPoint < (long)nPointDomain; iPoint++) { + /*--- OpenMP Parallelization, a loop construct is used to ensure + * the preconditioner is computed correctly even if called + * outside of a parallel section. ---*/ + + SU2_OMP_FOR_STAT(1) + for(unsigned long thread = 0; thread < omp_num_parts; ++thread) + { + const auto begin = omp_partitions[thread]; + const auto end = omp_partitions[thread+1]; + + /*--- Each thread will work on the submatrix defined from row/col "begin" + * to row/col "end-1" (i.e. the range [begin,end[). Which is exactly + * what the MPI-only implementation does. ---*/ + + ScalarType weight[MAXNVAR*MAXNVAR], aux_block[MAXNVAR*MAXNVAR]; + + for (auto iPoint = begin+1; iPoint < end; iPoint++) { - /*--- Invert and store the previous diagonal block to later compute the weight. ---*/ + /*--- Invert and store the previous diagonal block to later compute the weight. ---*/ - InverseDiagonalBlock_ILUMatrix(iPoint-1, &invM[(iPoint-1)*nVar*nVar]); + InverseDiagonalBlock_ILUMatrix(iPoint-1, &invM[(iPoint-1)*nVar*nVar]); - /*--- For each row (unknown), loop over all entries in A on this row - row_ptr_ilu[iPoint+1] will have the index for the first entry on the next - row. ---*/ + /*--- For this row (unknown), loop over its lower diagonal entries. ---*/ - for (index = row_ptr_ilu[iPoint]; index < row_ptr_ilu[iPoint+1]; index++) { + for (auto index = row_ptr_ilu[iPoint]; index < dia_ptr_ilu[iPoint]; index++) { - /*--- jPoint here is the column for each entry on this row ---*/ + /*--- jPoint is the column index (jPoint < iPoint). ---*/ - jPoint = col_ind_ilu[index]; + auto jPoint = col_ind_ilu[index]; - /*--- Check that this column is in the lower triangular portion ---*/ + /*--- We only care about the sub matrix within "begin" and "end-1". ---*/ - if (jPoint < iPoint) { + if (jPoint < begin) continue; - /*--- If we're in the lower triangle, multiply the block by - the inverse of the corresponding diagonal block. ---*/ + /*--- Multiply the block by the inverse of the corresponding diagonal block. ---*/ - Block_ij = &ILU_matrix[index*nVar*nEqn]; - MatrixMatrixProduct(Block_ij, &invM[jPoint*nVar*nVar], block_weight); + auto Block_ij = &ILU_matrix[index*nVar*nVar]; + MatrixMatrixProduct(Block_ij, &invM[jPoint*nVar*nVar], weight); - /*--- block_weight holds Aij*inv(Ajj). Jump to the row for jPoint ---*/ + /*--- "weight" holds Aij*inv(Ajj). Jump to the upper part of the jPoint row. ---*/ - for (index_ = row_ptr_ilu[jPoint]; index_ < row_ptr_ilu[jPoint+1]; index_++) { + for (auto index_ = dia_ptr_ilu[jPoint]+1; index_ < row_ptr_ilu[jPoint+1]; index_++) { - /*--- Get the column of the entry ---*/ + /*--- Get the column index (kPoint > jPoint). ---*/ - kPoint = col_ind_ilu[index_]; + auto kPoint = col_ind_ilu[index_]; - /*--- If the column is greater than or equal to jPoint, i.e., the - upper triangular part, then multiply and modify the matrix. - Here, Aik' = Aik - Aij*inv(Ajj)*Ajk. ---*/ + if (kPoint >= end) break; - if (kPoint > jPoint) { + /*--- If Aik exists, update it: Aik -= Aij*inv(Ajj)*Ajk ---*/ - Block_jk = &ILU_matrix[index_*nVar*nEqn]; - MatrixMatrixProduct(block_weight, Block_jk, block); - SubtractBlock_ILUMatrix(iPoint, kPoint, block); + auto Block_ik = GetBlock_ILUMatrix(iPoint, kPoint); + if (Block_ik != nullptr) { + auto Block_jk = &ILU_matrix[index_*nVar*nVar]; + MatrixMatrixProduct(weight, Block_jk, aux_block); + MatrixSubtraction(Block_ik, aux_block, Block_ik); } } - /*--- Lastly, store block_weight in the lower triangular part, which + /*--- Lastly, store "weight" in the lower triangular part, which will be reused during the forward solve in the precon/smoother. ---*/ - for (iVar = 0; iVar < nVar*nEqn; ++iVar) - Block_ij[iVar] = block_weight[iVar]; - + for (auto iVar = 0ul; iVar < nVar*nVar; ++iVar) + Block_ij[iVar] = weight[iVar]; } } - } + InverseDiagonalBlock_ILUMatrix(end-1, &invM[(end-1)*nVar*nVar]); - InverseDiagonalBlock_ILUMatrix(nPointDomain-1, &invM[(nPointDomain-1)*nVar*nVar]); + } // end parallel } template -void CSysMatrix::ComputeILUPreconditioner(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config) { - - unsigned long index, iVar; - const ScalarType *Block_ij; - long iPoint, jPoint; - - /*--- Copy vector to then work on prod in place ---*/ - - for (iPoint = 0; iPoint < long(nPointDomain*nVar); iPoint++) - prod[iPoint] = vec[iPoint]; - - /*--- Forward solve the system using the lower matrix entries that - were computed and stored during the ILU preprocessing. Note - that we are overwriting the residual vector as we go. ---*/ - - for (iPoint = 1; iPoint < (long)nPointDomain; iPoint++) { - for (index = row_ptr_ilu[iPoint]; index < row_ptr_ilu[iPoint+1]; index++) { - jPoint = col_ind_ilu[index]; - if (jPoint < iPoint) { - Block_ij = &ILU_matrix[index*nVar*nEqn]; +void CSysMatrix::ComputeILUPreconditioner(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const { + /*--- Coherent view of vectors. ---*/ + SU2_OMP_BARRIER + + /*--- OpenMP Parallelization ---*/ + SU2_OMP_FOR_STAT(1) + for(unsigned long thread = 0; thread < omp_num_parts; ++thread) + { + const auto begin = omp_partitions[thread]; + const auto end = omp_partitions[thread+1]; + + ScalarType aux_vec[MAXNVAR]; + + /*--- Copy vector to then work on prod in place ---*/ + + for (auto iVar = begin*nVar; iVar < end*nVar; iVar++) + prod[iVar] = vec[iVar]; + + /*--- Forward solve the system using the lower matrix entries that + were computed and stored during the ILU preprocessing. Note + that we are overwriting the residual vector as we go. ---*/ + + for (auto iPoint = begin+1; iPoint < end; iPoint++) { + for (auto index = row_ptr_ilu[iPoint]; index < dia_ptr_ilu[iPoint]; index++) { + auto jPoint = col_ind_ilu[index]; + if (jPoint < begin) continue; + auto Block_ij = &ILU_matrix[index*nVar*nVar]; MatrixVectorProductSub(Block_ij, &prod[jPoint*nVar], &prod[iPoint*nVar]); } } - } - /*--- Backwards substitution (starts at the last row) ---*/ + /*--- Backwards substitution (starts at the last row) ---*/ - for (iPoint = nPointDomain-1; iPoint >= 0; iPoint--) { - - for (iVar = 0; iVar < nVar; iVar++) - sum_vector[iVar] = prod[iPoint*nVar+iVar]; + for (auto iPoint = end; iPoint > begin;) { + iPoint--; // unsigned type + for (auto iVar = 0ul; iVar < nVar; iVar++) + aux_vec[iVar] = prod[iPoint*nVar+iVar]; - for (index = row_ptr_ilu[iPoint]; index < row_ptr_ilu[iPoint+1]; index++) { - jPoint = col_ind_ilu[index]; - if ((jPoint >= iPoint+1) && (jPoint < (long)nPointDomain)) { - Block_ij = &ILU_matrix[index*nVar*nEqn]; - MatrixVectorProductSub(Block_ij, &prod[jPoint*nVar], sum_vector); + for (auto index = dia_ptr_ilu[iPoint]+1; index < row_ptr_ilu[iPoint+1]; index++) { + auto jPoint = col_ind_ilu[index]; + if (jPoint >= end) break; + auto Block_ij = &ILU_matrix[index*nVar*nVar]; + MatrixVectorProductSub(Block_ij, &prod[jPoint*nVar], aux_vec); } - } - MatrixVectorProduct(&invM[iPoint*nVar*nVar], sum_vector, &prod[iPoint*nVar]); - } + MatrixVectorProduct(&invM[iPoint*nVar*nVar], aux_vec, &prod[iPoint*nVar]); + } + } // end parallel /*--- MPI Parallelization ---*/ - InitiateComms(prod, geometry, config, SOLUTION_MATRIX); - CompleteComms(prod, geometry, config, SOLUTION_MATRIX); - + SU2_OMP_MASTER + { + InitiateComms(prod, geometry, config, SOLUTION_MATRIX); + CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + } + SU2_OMP_BARRIER } template -void CSysMatrix::ComputeLU_SGSPreconditioner(const CSysVector & vec, CSysVector & prod, CGeometry *geometry, CConfig *config) { - unsigned long iPoint, iVar; +void CSysMatrix::ComputeLU_SGSPreconditioner(const CSysVector & vec, CSysVector & prod, + CGeometry *geometry, CConfig *config) const { /*--- First part of the symmetric iteration: (D+L).x* = b ---*/ - for (iPoint = 0; iPoint < nPointDomain; iPoint++) { - LowerProduct(prod, iPoint); // Compute L.x* - for (iVar = 0; iVar < nVar; iVar++) - prod[iPoint*nVar+iVar] = vec[iPoint*nVar+iVar] - prod_row_vector[iVar]; // Compute aux_vector = b - L.x* - Gauss_Elimination(iPoint, &prod[iPoint*nVar]); // Solve D.x* = aux_vector - } + /*--- Coherent view of vectors. ---*/ + SU2_OMP_BARRIER - /*--- MPI Parallelization ---*/ + /*--- OpenMP Parallelization ---*/ + SU2_OMP_FOR_STAT(1) + for(unsigned long thread = 0; thread < omp_num_parts; ++thread) + { + const auto begin = omp_partitions[thread]; + const auto end = omp_partitions[thread+1]; - InitiateComms(prod, geometry, config, SOLUTION_MATRIX); - CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + /*--- Each thread will work on the submatrix defined from row/col "begin" + * to row/col "end-1", except the last thread that also considers halos. + * This is NOT exactly equivalent to the MPI implementation on the same + * number of domains, for that we would need to define "thread-halos". ---*/ - /*--- Second part of the symmetric iteration: (D+U).x_(1) = D.x* ---*/ + ScalarType low_prod[MAXNVAR]; - for (iPoint = nPointDomain-1; (int)iPoint >= 0; iPoint--) { - DiagonalProduct(prod, iPoint); // Compute D.x* - for (iVar = 0; iVar < nVar; iVar++) - aux_vector[iVar] = prod_row_vector[iVar]; // Compute aux_vector = D.x* - UpperProduct(prod, iPoint); // Compute U.x_(n+1) - for (iVar = 0; iVar < nVar; iVar++) - prod[iPoint*nVar+iVar] = aux_vector[iVar] - prod_row_vector[iVar]; // Compute aux_vector = D.x*-U.x_(n+1) - Gauss_Elimination(iPoint, &prod[iPoint*nVar]); // Solve D.x* = aux_vector - } + for (auto iPoint = begin; iPoint < end; ++iPoint) { + auto idx = iPoint*nVar; + LowerProduct(prod, iPoint, begin, low_prod); // Compute L.x* + VectorSubtraction(&vec[idx], low_prod, &prod[idx]); // Compute y = b - L.x* + Gauss_Elimination(iPoint, &prod[idx]); // Solve D.x* = y + } + } // end parallel /*--- MPI Parallelization ---*/ + SU2_OMP_MASTER + { + InitiateComms(prod, geometry, config, SOLUTION_MATRIX); + CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + } + SU2_OMP_BARRIER - InitiateComms(prod, geometry, config, SOLUTION_MATRIX); - CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + /*--- Second part of the symmetric iteration: (D+U).x_(1) = D.x* ---*/ + + /*--- OpenMP Parallelization ---*/ + SU2_OMP_FOR_STAT(1) + for(unsigned long thread = 0; thread < omp_num_parts; ++thread) + { + const auto begin = omp_partitions[thread]; + const auto row_end = omp_partitions[thread+1]; + /*--- On the last thread partition the upper + * product should consider halo columns. ---*/ + const auto col_end = (row_end==nPointDomain)? nPoint : row_end; + + ScalarType up_prod[MAXNVAR], dia_prod[MAXNVAR]; + + for (auto iPoint = row_end; iPoint > begin;) { + iPoint--; // because of unsigned type + auto idx = iPoint*nVar; + DiagonalProduct(prod, iPoint, dia_prod); // Compute D.x* + UpperProduct(prod, iPoint, col_end, up_prod); // Compute U.x_(n+1) + VectorSubtraction(dia_prod, up_prod, &prod[idx]); // Compute y = D.x*-U.x_(n+1) + Gauss_Elimination(iPoint, &prod[idx]); // Solve D.x* = y + } + } // end parallel + /*--- MPI Parallelization ---*/ + SU2_OMP_MASTER + { + InitiateComms(prod, geometry, config, SOLUTION_MATRIX); + CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + } + SU2_OMP_BARRIER } template -unsigned short CSysMatrix::BuildLineletPreconditioner(CGeometry *geometry, CConfig *config) { +unsigned long CSysMatrix::BuildLineletPreconditioner(CGeometry *geometry, CConfig *config) { + + assert(omp_get_thread_num()==0 && "Linelet preconditioner cannot be built by multiple threads."); bool add_point; unsigned long iEdge, iPoint, jPoint, index_Point, iLinelet, iVertex, next_Point, counter, iElem; - unsigned short iMarker, iNode, MeanPoints; + unsigned short iMarker, iNode; su2double alpha = 0.9, weight, max_weight, *normal, area, volume_iPoint, volume_jPoint; unsigned long Local_nPoints, Local_nLineLets, Global_nPoints, Global_nLineLets, max_nElem; @@ -1003,6 +987,7 @@ unsigned short CSysMatrix::BuildLineletPreconditioner(CGeometry *geo vector check_Point(nPoint,true); + LineletBool.clear(); LineletBool.resize(nPoint,false); nLinelet = 0; @@ -1142,141 +1127,139 @@ unsigned short CSysMatrix::BuildLineletPreconditioner(CGeometry *geo } Local_nLineLets = nLinelet; -#ifndef HAVE_MPI - Global_nPoints = Local_nPoints; - Global_nLineLets = Local_nLineLets; -#else SU2_MPI::Allreduce(&Local_nPoints, &Global_nPoints, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); SU2_MPI::Allreduce(&Local_nLineLets, &Global_nLineLets, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); -#endif - - MeanPoints = SU2_TYPE::Int(ScalarType(Global_nPoints)/ScalarType(Global_nLineLets)); /*--- Memory allocation --*/ - LineletUpper.resize(max_nElem,NULL); - LineletInvDiag.resize(max_nElem*nVar*nVar,0.0); - LineletVector.resize(max_nElem*nVar,0.0); + LineletUpper.resize(omp_get_max_threads(), vector(max_nElem,nullptr)); + LineletVector.resize(omp_get_max_threads(), vector(max_nElem*nVar,0.0)); + LineletInvDiag.resize(omp_get_max_threads(), vector(max_nElem*nVar*nVar,0.0)); - return MeanPoints; + return (unsigned long)(passivedouble(Global_nPoints) / Global_nLineLets); } template void CSysMatrix::ComputeLineletPreconditioner(const CSysVector & vec, CSysVector & prod, - CGeometry *geometry, CConfig *config) { - - unsigned long iVar, iElem, nElem, iLinelet, iPoint, im1Point; - /*--- Pointers to lower, upper, and diagonal blocks ---*/ - const ScalarType *l = NULL, *u = NULL, *d = NULL; - /*--- Inverse of d_{i-1}, modified d_i, modified b_i (rhs) ---*/ - ScalarType *inv_dm1 = NULL, *d_prime = NULL, *b_prime = NULL; - -// if (size != SINGLE_NODE) -// SU2_MPI::Error("Linelet not implemented in parallel.", CURRENT_FUNCTION); + CGeometry *geometry, CConfig *config) const { + /*--- Coherent view of vectors. ---*/ + SU2_OMP_BARRIER /*--- Jacobi preconditioning where there is no linelet ---*/ - for (iPoint = 0; iPoint < nPointDomain; iPoint++) + SU2_OMP(for schedule(dynamic,omp_heavy_size) nowait) + for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) if (!LineletBool[iPoint]) MatrixVectorProduct(&(invM[iPoint*nVar*nVar]), &vec[iPoint*nVar], &prod[iPoint*nVar]); - /*--- MPI Parallelization ---*/ - - InitiateComms(prod, geometry, config, SOLUTION_MATRIX); - CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + /*--- Solve each linelet using the Thomas algorithm ---*/ - /*--- Solve linelet using the Thomas algorithm ---*/ + SU2_OMP_FOR_DYN(1) + for (auto iLinelet = 0ul; iLinelet < nLinelet; iLinelet++) { - for (iLinelet = 0; iLinelet < nLinelet; iLinelet++) { + /*--- Get references to the working vectors allocated for this thread. ---*/ - nElem = LineletPoint[iLinelet].size(); + int thread = omp_get_thread_num(); + vector& lineletUpper = LineletUpper[thread]; + vector& lineletInvDiag = LineletInvDiag[thread]; + vector& lineletVector = LineletVector[thread]; /*--- Initialize the solution vector with the rhs ---*/ - for (iElem = 0; iElem < nElem; iElem++) { - iPoint = LineletPoint[iLinelet][iElem]; - for (iVar = 0; iVar < nVar; iVar++) - LineletVector[iElem*nVar+iVar] = vec[iPoint*nVar+iVar]; + auto nElem = LineletPoint[iLinelet].size(); + + for (auto iElem = 0ul; iElem < nElem; iElem++) { + auto iPoint = LineletPoint[iLinelet][iElem]; + for (auto iVar = 0ul; iVar < nVar; iVar++) + lineletVector[iElem*nVar+iVar] = vec[iPoint*nVar+iVar]; } - /*--- Forward pass, eliminate lower entries, modify diagonal and rhs ---*/ + /*--- Forward pass, eliminate lower entries, modify diagonal and rhs. ---*/ + + /*--- Small temporaries. ---*/ + ScalarType aux_block[MAXNVAR*MAXNVAR], aux_vector[MAXNVAR]; - iPoint = LineletPoint[iLinelet][0]; - d = GetBlock(iPoint, iPoint); - for (iVar = 0; iVar < nVar*nVar; ++iVar) - LineletInvDiag[iVar] = d[iVar]; + /*--- Copy diagonal block for first point in this linelet. ---*/ + MatrixCopy(&matrix[dia_ptr[LineletPoint[iLinelet][0]]*nVar*nVar], + lineletInvDiag.data()); - for (iElem = 1; iElem < nElem; iElem++) { + for (auto iElem = 1ul; iElem < nElem; iElem++) { /*--- Setup pointers to required matrices and vectors ---*/ - im1Point = LineletPoint[iLinelet][iElem-1]; - iPoint = LineletPoint[iLinelet][iElem]; + auto im1Point = LineletPoint[iLinelet][iElem-1]; + auto iPoint = LineletPoint[iLinelet][iElem]; - d = GetBlock(iPoint, iPoint); - l = GetBlock(iPoint, im1Point); - u = GetBlock(im1Point, iPoint); + auto d = &matrix[dia_ptr[iPoint]*nVar*nVar]; + auto l = GetBlock(iPoint, im1Point); + auto u = GetBlock(im1Point, iPoint); - inv_dm1 = &LineletInvDiag[(iElem-1)*nVar*nVar]; - d_prime = &LineletInvDiag[iElem*nVar*nVar]; - b_prime = &LineletVector[iElem*nVar]; + auto inv_dm1 = &lineletInvDiag[(iElem-1)*nVar*nVar]; + auto d_prime = &lineletInvDiag[iElem*nVar*nVar]; + auto b_prime = &lineletVector[iElem*nVar]; /*--- Invert previous modified diagonal ---*/ - MatrixInverse(inv_dm1, inv_dm1); + MatrixCopy(inv_dm1, aux_block); + MatrixInverse(aux_block, inv_dm1); /*--- Left-multiply by lower block to obtain the weight ---*/ - MatrixMatrixProduct(l, inv_dm1, block_weight); + MatrixMatrixProduct(l, inv_dm1, aux_block); /*--- Multiply weight by upper block to modify current diagonal ---*/ - MatrixMatrixProduct(block_weight, u, d_prime); + MatrixMatrixProduct(aux_block, u, d_prime); MatrixSubtraction(d, d_prime, d_prime); /*--- Update the rhs ---*/ - MatrixVectorProduct(block_weight, &LineletVector[(iElem-1)*nVar], aux_vector); + MatrixVectorProduct(aux_block, &lineletVector[(iElem-1)*nVar], aux_vector); VectorSubtraction(b_prime, aux_vector, b_prime); /*--- Cache upper block pointer for the backward substitution phase ---*/ - LineletUpper[iElem-1] = u; + lineletUpper[iElem-1] = u; } /*--- Backwards substitution, LineletVector becomes the solution ---*/ /*--- x_n = d_n^{-1} * b_n ---*/ - Gauss_Elimination(&LineletInvDiag[(nElem-1)*nVar*nVar], &LineletVector[(nElem-1)*nVar]); + Gauss_Elimination(&lineletInvDiag[(nElem-1)*nVar*nVar], &lineletVector[(nElem-1)*nVar]); /*--- x_i = d_i^{-1}*(b_i - u_i*x_{i+1}) ---*/ - for (iElem = nElem-1; iElem > 0; --iElem) { - inv_dm1 = &LineletInvDiag[(iElem-1)*nVar*nVar]; - MatrixVectorProduct(LineletUpper[iElem-1], &LineletVector[iElem*nVar], aux_vector); - VectorSubtraction(&LineletVector[(iElem-1)*nVar], aux_vector, aux_vector); - MatrixVectorProduct(inv_dm1, aux_vector, &LineletVector[(iElem-1)*nVar]); + for (auto iElem = nElem-1; iElem > 0; --iElem) { + auto inv_dm1 = &lineletInvDiag[(iElem-1)*nVar*nVar]; + MatrixVectorProduct(lineletUpper[iElem-1], &lineletVector[iElem*nVar], aux_vector); + VectorSubtraction(&lineletVector[(iElem-1)*nVar], aux_vector, aux_vector); + MatrixVectorProduct(inv_dm1, aux_vector, &lineletVector[(iElem-1)*nVar]); } /*--- Copy results to product vector ---*/ - for (iElem = 0; iElem < nElem; iElem++) { - iPoint = LineletPoint[iLinelet][iElem]; - for (iVar = 0; iVar < nVar; iVar++) - prod[iPoint*nVar+iVar] = LineletVector[iElem*nVar+iVar]; + for (auto iElem = 0ul; iElem < nElem; iElem++) { + auto iPoint = LineletPoint[iLinelet][iElem]; + for (auto iVar = 0ul; iVar < nVar; iVar++) + prod[iPoint*nVar+iVar] = lineletVector[iElem*nVar+iVar]; } } /*--- MPI Parallelization ---*/ - InitiateComms(prod, geometry, config, SOLUTION_MATRIX); - CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + SU2_OMP_MASTER + { + InitiateComms(prod, geometry, config, SOLUTION_MATRIX); + CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + } + SU2_OMP_BARRIER } template -void CSysMatrix::ComputeResidual(const CSysVector & sol, const CSysVector & f, CSysVector & res) { - +void CSysMatrix::ComputeResidual(const CSysVector & sol, const CSysVector & f, + CSysVector & res) const { + SU2_OMP_FOR_DYN(omp_heavy_size) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { - RowProduct(sol, iPoint); - VectorSubtraction(prod_row_vector, &f[iPoint*nVar], &res[iPoint*nVar]); + ScalarType aux_vec[MAXNVAR]; + RowProduct(sol, iPoint, aux_vec); + VectorSubtraction(aux_vec, &f[iPoint*nVar], &res[iPoint*nVar]); } - } template @@ -1317,25 +1300,57 @@ void CSysMatrix::EnforceSolutionAtNode(const unsigned long node_i, c } +template +template +void CSysMatrix::MatrixMatrixAddition(OtherType alpha, const CSysMatrix& B) { + + /*--- Check the sparse structure is shared between the two matrices, + * comparing pointers is ok as they are obtained from CGeometry. ---*/ + bool ok = (row_ptr == B.row_ptr) && (col_ind == B.col_ind) && + (nVar == B.nVar) && (nEqn == B.nEqn) && (nnz == B.nnz); + + if (!ok) { + SU2_OMP_MASTER + SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION); + } + + SU2_OMP_FOR_STAT(omp_light_size) + for (auto i = 0ul; i < nnz*nVar*nEqn; ++i) + matrix[i] += PassiveAssign(alpha*B.matrix[i]); + +} + template void CSysMatrix::BuildPastixPreconditioner(CGeometry *geometry, CConfig *config, unsigned short kind_fact, bool transposed) { #ifdef HAVE_PASTIX - pastix_wrapper.SetMatrix(nVar,nPoint,nPointDomain,row_ptr,col_ind,matrix); - pastix_wrapper.Factorize(geometry, config, kind_fact, transposed); + /*--- Pastix will launch nested threads. ---*/ + SU2_OMP_MASTER + { + pastix_wrapper.SetMatrix(nVar,nPoint,nPointDomain,row_ptr,col_ind,matrix); + pastix_wrapper.Factorize(geometry, config, kind_fact, transposed); + } + SU2_OMP_BARRIER #else + SU2_OMP_MASTER SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION); #endif } template void CSysMatrix::ComputePastixPreconditioner(const CSysVector & vec, CSysVector & prod, - CGeometry *geometry, CConfig *config) { + CGeometry *geometry, CConfig *config) const { #ifdef HAVE_PASTIX - pastix_wrapper.Solve(vec,prod); - InitiateComms(prod, geometry, config, SOLUTION_MATRIX); - CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + SU2_OMP_BARRIER + SU2_OMP_MASTER + { + pastix_wrapper.Solve(vec,prod); + InitiateComms(prod, geometry, config, SOLUTION_MATRIX); + CompleteComms(prod, geometry, config, SOLUTION_MATRIX); + } + SU2_OMP_BARRIER #else + SU2_OMP_MASTER SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION); #endif } @@ -1344,27 +1359,32 @@ void CSysMatrix::ComputePastixPreconditioner(const CSysVector void CSysMatrix::BuildPastixPreconditioner(CGeometry *geometry, CConfig *config, unsigned short kind_fact, bool transposed) { + SU2_OMP_MASTER SU2_MPI::Error("The PaStiX preconditioner is only available in CSysMatrix", CURRENT_FUNCTION); } template<> void CSysMatrix::ComputePastixPreconditioner(const CSysVector & vec, CSysVector & prod, - CGeometry *geometry, CConfig *config) { + CGeometry *geometry, CConfig *config) const { + SU2_OMP_MASTER SU2_MPI::Error("The PaStiX preconditioner is only available in CSysMatrix", CURRENT_FUNCTION); } #endif /*--- Explicit instantiations ---*/ template class CSysMatrix; -template void CSysMatrix::InitiateComms(CSysVector&, CGeometry*, CConfig*, unsigned short); -template void CSysMatrix::CompleteComms(CSysVector&, CGeometry*, CConfig*, unsigned short); +template void CSysMatrix::InitiateComms(const CSysVector&, CGeometry*, CConfig*, unsigned short) const; +template void CSysMatrix::CompleteComms(CSysVector&, CGeometry*, CConfig*, unsigned short) const; template void CSysMatrix::EnforceSolutionAtNode(unsigned long, const su2double*, CSysVector&); +template void CSysMatrix::MatrixMatrixAddition(su2double, const CSysMatrix&); #ifdef CODI_REVERSE_TYPE template class CSysMatrix; -template void CSysMatrix::InitiateComms(CSysVector&, CGeometry*, CConfig*, unsigned short); -template void CSysMatrix::InitiateComms(CSysVector&, CGeometry*, CConfig*, unsigned short); -template void CSysMatrix::CompleteComms(CSysVector&, CGeometry*, CConfig*, unsigned short); -template void CSysMatrix::CompleteComms(CSysVector&, CGeometry*, CConfig*, unsigned short); +template void CSysMatrix::InitiateComms(const CSysVector&, CGeometry*, CConfig*, unsigned short) const; +template void CSysMatrix::InitiateComms(const CSysVector&, CGeometry*, CConfig*, unsigned short) const; +template void CSysMatrix::CompleteComms(CSysVector&, CGeometry*, CConfig*, unsigned short) const; +template void CSysMatrix::CompleteComms(CSysVector&, CGeometry*, CConfig*, unsigned short) const; template void CSysMatrix::EnforceSolutionAtNode(unsigned long, const passivedouble*, CSysVector&); template void CSysMatrix::EnforceSolutionAtNode(unsigned long, const su2double*, CSysVector&); +template void CSysMatrix::MatrixMatrixAddition(passivedouble, const CSysMatrix&); +template void CSysMatrix::MatrixMatrixAddition(su2double, const CSysMatrix&); #endif diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index f1fbbc9a4803..b61135586ba3 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -6,7 +6,7 @@ * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -27,18 +27,29 @@ #include "../../include/linear_algebra/CSysSolve.hpp" #include "../../include/linear_algebra/CSysSolve_b.hpp" +#include "../../include/omp_structure.hpp" +#include "../../include/option_structure.hpp" +#include "../../include/config_structure.hpp" +#include "../../include/geometry/CGeometry.hpp" +#include "../../include/linear_algebra/CSysMatrix.hpp" +#include "../../include/linear_algebra/CMatrixVectorProduct.hpp" +#include "../../include/linear_algebra/CPreconditioner.hpp" + +#include + +const su2double eps = numeric_limits::epsilon(); /*!< \brief machine epsilon */ template CSysSolve::CSysSolve(const bool mesh_deform_mode) : cg_ready(false), bcg_ready(false), gmres_ready(false), smooth_ready(false) { mesh_deform = mesh_deform_mode; - LinSysRes_ptr = NULL; - LinSysSol_ptr = NULL; + LinSysRes_ptr = nullptr; + LinSysSol_ptr = nullptr; Residual = 0.0; } template -void CSysSolve::ApplyGivens(const ScalarType & s, const ScalarType & c, ScalarType & h1, ScalarType & h2) { +void CSysSolve::ApplyGivens(ScalarType s, ScalarType c, ScalarType & h1, ScalarType & h2) const { ScalarType temp = c*h1 + s*h2; h2 = c*h2 - s*h1; @@ -46,7 +57,7 @@ void CSysSolve::ApplyGivens(const ScalarType & s, const ScalarType & } template -void CSysSolve::GenerateGivens(ScalarType & dx, ScalarType & dy, ScalarType & s, ScalarType & c) { +void CSysSolve::GenerateGivens(ScalarType & dx, ScalarType & dy, ScalarType & s, ScalarType & c) const { if ( (dx == 0.0) && (dy == 0.0) ) { c = 1.0; @@ -76,8 +87,8 @@ void CSysSolve::GenerateGivens(ScalarType & dx, ScalarType & dy, Sca } template -void CSysSolve::SolveReduced(const int & n, const vector > & Hsbg, - const vector & rhs, vector & x) { +void CSysSolve::SolveReduced(int n, const vector > & Hsbg, + const vector & rhs, vector & x) const { // initialize... for (int i = 0; i < n; i++) x[i] = rhs[i]; @@ -91,73 +102,38 @@ void CSysSolve::SolveReduced(const int & n, const vector -void CSysSolve::ModGramSchmidt(int i, vector > & Hsbg, vector > & w) { - - bool Convergence = true; +void CSysSolve::ModGramSchmidt(int i, vector > & Hsbg, + vector > & w) const { /*--- Parameter for reorthonormalization ---*/ - static const ScalarType reorth = 0.98; + const ScalarType reorth = 0.98; /*--- Get the norm of the vector being orthogonalized, and find the threshold for re-orthogonalization ---*/ - ScalarType nrm = dotProd(w[i+1], w[i+1]); + ScalarType nrm = w[i+1].squaredNorm(); ScalarType thr = nrm*reorth; /*--- The norm of w[i+1] < 0.0 or w[i+1] = NaN ---*/ - if ((nrm <= 0.0) || (nrm != nrm)) Convergence = false; - - /*--- Synchronization point to check the convergence of the solver ---*/ - -#ifdef HAVE_MPI - - int rank = SU2_MPI::GetRank(); - int size = SU2_MPI::GetSize(); - - unsigned short *sbuf_conv = NULL, *rbuf_conv = NULL; - sbuf_conv = new unsigned short[1]; sbuf_conv[0] = 0; - rbuf_conv = new unsigned short[1]; rbuf_conv[0] = 0; - - /*--- Convergence criteria ---*/ - - sbuf_conv[0] = Convergence; - SU2_MPI::Reduce(sbuf_conv, rbuf_conv, 1, MPI_UNSIGNED_SHORT, MPI_SUM, MASTER_NODE, MPI_COMM_WORLD); - - /*-- Compute global convergence criteria in the master node --*/ - - sbuf_conv[0] = 0; - if (rank == MASTER_NODE) { - if (rbuf_conv[0] == size) sbuf_conv[0] = 1; - else sbuf_conv[0] = 0; - } - - SU2_MPI::Bcast(sbuf_conv, 1, MPI_UNSIGNED_SHORT, MASTER_NODE, MPI_COMM_WORLD); - - if (sbuf_conv[0] == 1) Convergence = true; - else Convergence = false; - - delete [] sbuf_conv; - delete [] rbuf_conv; - -#endif - - if (!Convergence) { - SU2_MPI::Error("SU2 has diverged.", CURRENT_FUNCTION); + if ((nrm <= 0.0) || (nrm != nrm)) { + /*--- nrm is the result of a dot product, communications are implicitly handled. ---*/ + SU2_OMP_MASTER + SU2_MPI::Error("FGMRES orthogonalization failed, linear solver diverged.", CURRENT_FUNCTION); } /*--- Begin main Gram-Schmidt loop ---*/ for (int k = 0; k < i+1; k++) { - ScalarType prod = dotProd(w[i+1], w[k]); + ScalarType prod = w[i+1].dot(w[k]); Hsbg[k][i] = prod; w[i+1].Plus_AX(-prod, w[k]); /*--- Check if reorthogonalization is necessary ---*/ if (prod*prod > thr) { - prod = dotProd(w[i+1], w[k]); + prod = w[i+1].dot(w[k]); Hsbg[k][i] += prod; w[i+1].Plus_AX(-prod, w[k]); } @@ -181,44 +157,69 @@ void CSysSolve::ModGramSchmidt(int i, vector > & } template -void CSysSolve::WriteHeader(const string & solver, const ScalarType & restol, const ScalarType & resinit) { +void CSysSolve::WriteHeader(string solver, ScalarType restol, ScalarType resinit) const { - cout << "\n# " << solver << " residual history" << endl; - cout << "# Residual tolerance target = " << restol << endl; + cout << "\n# " << solver << " residual history\n"; + cout << "# Residual tolerance target = " << restol << "\n"; cout << "# Initial residual norm = " << resinit << endl; +} + +template +void CSysSolve::WriteHistory(unsigned long iter, ScalarType res) const { + cout << " " << iter << " " << res << endl; } template -void CSysSolve::WriteHistory(const int & iter, const ScalarType & res, const ScalarType & resinit) { +void CSysSolve::WriteFinalResidual(string solver, unsigned long iter, ScalarType res) const { - cout << " " << iter << " " << res/resinit << endl; + cout << "# " << solver << " final (true) residual:\n"; + cout << "# Iteration = " << iter << ": |res|/|res0| = " << res << ".\n" << endl; +} + +template +void CSysSolve::WriteWarning(ScalarType res_calc, ScalarType res_true, ScalarType tol) const { + cout << "# WARNING:\n"; + cout << "# true residual norm and calculated residual norm do not agree.\n"; + cout << "# true_res = " << res_true << ", calc_res = " << res_calc << ", tol = " << tol*10 << ".\n"; + cout << "# true_res - calc_res = " << res_true - res_calc << endl; } template unsigned long CSysSolve::CG_LinSolver(const CSysVector & b, CSysVector & x, - CMatrixVectorProduct & mat_vec, CPreconditioner & precond, - ScalarType tol, unsigned long m, ScalarType *residual, bool monitoring, CConfig *config) { + const CMatrixVectorProduct & mat_vec, const CPreconditioner & precond, + ScalarType tol, unsigned long m, ScalarType & residual, bool monitoring, CConfig *config) const { - int rank = SU2_MPI::GetRank(); + const bool master = (SU2_MPI::GetRank() == MASTER_NODE) && (omp_get_thread_num() == 0); ScalarType norm_r = 0.0, norm0 = 0.0; - int i = 0; + unsigned long i = 0; /*--- Check the subspace size ---*/ if (m < 1) { - char buf[100]; - SPRINTF(buf, "Illegal value for subspace size, m = %lu", m ); - SU2_MPI::Error(string(buf), CURRENT_FUNCTION); + SU2_OMP_MASTER + SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION); } - /*--- Allocate if not allocated yet ---*/ + /*--- Allocate if not allocated yet, only one thread can + * do this since the working vectors are shared. ---*/ if (!cg_ready) { - A_x = b; - z = b; - cg_ready = true; + SU2_OMP_MASTER + { + auto nVar = b.GetNVar(); + auto nBlk = b.GetNBlk(); + auto nBlkDomain = b.GetNBlkDomain(); + + A_x.Initialize(nBlk, nBlkDomain, nVar, nullptr); + r.Initialize(nBlk, nBlkDomain, nVar, nullptr); + z.Initialize(nBlk, nBlkDomain, nVar, nullptr); + p.Initialize(nBlk, nBlkDomain, nVar, nullptr); + + cg_ready = true; + } + SU2_OMP_BARRIER } /*--- Calculate the initial residual, compute norm, and check if system is already solved ---*/ @@ -233,7 +234,7 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & norm_r = r.norm(); norm0 = b.norm(); if ((norm_r < tol*norm0) || (norm_r < eps)) { - if (rank == MASTER_NODE) cout << "CSysSolve::ConjugateGradient(): system solved by initial guess." << endl; + if (master) cout << "CSysSolve::ConjugateGradient(): system solved by initial guess." << endl; return 0; } @@ -243,20 +244,21 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & /*--- Output header information including initial residual ---*/ - if ((monitoring) && (rank == MASTER_NODE)) { + if ((monitoring) && (master)) { WriteHeader("CG", tol, norm_r); - WriteHistory(i, norm_r, norm0); + WriteHistory(i, norm_r/norm0); } } - ScalarType alpha, beta, r_dot_z; + ScalarType alpha, beta, r_dot_z, r_dot_z_old; precond(r, z); p = z; + r_dot_z = r.dot(z); /*--- Loop over all search directions ---*/ - for (i = 0; i < (int)m; i++) { + for (i = 0; i < m; i++) { /*--- Apply matrix to p to build Krylov subspace ---*/ @@ -264,9 +266,7 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & /*--- Calculate step-length alpha ---*/ - r_dot_z = dotProd(r, z); - alpha = dotProd(A_x, p); - alpha = r_dot_z / alpha; + alpha = r_dot_z / A_x.dot(p); /*--- Update solution and residual: ---*/ @@ -281,7 +281,8 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & norm_r = r.norm(); if (norm_r < tol*norm0) break; - if (((monitoring) && (rank == MASTER_NODE)) && ((i+1) % 10 == 0)) WriteHistory(i+1, norm_r, norm0); + if (((monitoring) && (master)) && ((i+1) % 10 == 0)) + WriteHistory(i+1, norm_r/norm0); } @@ -290,9 +291,9 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & /*--- Calculate Gram-Schmidt coefficient beta, beta = dotProd(r_{i+1}, z_{i+1}) / dotProd(r_{i}, z_{i}) ---*/ - beta = 1.0 / r_dot_z; - r_dot_z = dotProd(r, z); - beta *= r_dot_z; + r_dot_z_old = r_dot_z; + r_dot_z = r.dot(z); + beta = r_dot_z / r_dot_z_old; /*--- Gram-Schmidt orthogonalization; p = beta *p + z ---*/ @@ -304,52 +305,42 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & if ((monitoring) && (config->GetComm_Level() == COMM_FULL)) { - if (rank == MASTER_NODE) { - cout << "# Conjugate Gradient final (true) residual:" << endl; - cout << "# Iteration = " << i << ": |res|/|res0| = " << norm_r/norm0 << ".\n" << endl; - } + if (master) WriteFinalResidual("CG", i, norm_r/norm0); mat_vec(x, A_x); r = b; r -= A_x; ScalarType true_res = r.norm(); if (fabs(true_res - norm_r) > tol*10.0) { - if (rank == MASTER_NODE) { - cout << "# WARNING in CSysSolve::CG_LinSolver(): " << endl; - cout << "# true residual norm and calculated residual norm do not agree." << endl; - cout << "# true_res = " << true_res <<", calc_res = " << norm_r <<", tol = " << tol*10 <<"."<< endl; - cout << "# true_res - calc_res = " << true_res - norm_r << endl; + if (master) { + WriteWarning(norm_r, true_res, tol); } } } - (*residual) = norm_r/norm0; - return (unsigned long) i; + residual = norm_r/norm0; + return i; } template unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector & b, CSysVector & x, - CMatrixVectorProduct & mat_vec, CPreconditioner & precond, - ScalarType tol, unsigned long m, ScalarType *residual, bool monitoring, CConfig *config) { + const CMatrixVectorProduct & mat_vec, const CPreconditioner & precond, + ScalarType tol, unsigned long m, ScalarType & residual, bool monitoring, CConfig *config) const { - int rank = SU2_MPI::GetRank(); + const bool master = (SU2_MPI::GetRank() == MASTER_NODE) && (omp_get_thread_num() == 0); /*--- Check the subspace size ---*/ if (m < 1) { - char buf[100]; - SPRINTF(buf, "Illegal value for subspace size, m = %lu", m ); - SU2_MPI::Error(string(buf), CURRENT_FUNCTION); + SU2_OMP_MASTER + SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION); } - /*--- Check the subspace size ---*/ - if (m > 5000) { - char buf[100]; - SPRINTF(buf, "Illegal value for subspace size (too high), m = %lu", m ); - SU2_MPI::Error(string(buf), CURRENT_FUNCTION); + SU2_OMP_MASTER + SU2_MPI::Error("FGMRES subspace is too large.", CURRENT_FUNCTION); } /*--- Allocate if not allocated yet @@ -357,12 +348,18 @@ unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector g(m+1, 0.0); vector sn(m+1, 0.0); @@ -370,11 +367,11 @@ unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector y(m, 0.0); vector > H(m+1, vector(m, 0.0)); - /*--- Calculate the norm of the rhs vector ---*/ + /*--- Calculate the norm of the rhs vector. ---*/ ScalarType norm0 = b.norm(); - /*--- Calculate the initial residual (actually the negative residual) and compute its norm ---*/ + /*--- Calculate the initial residual (actually the negative residual) and compute its norm. ---*/ mat_vec(x, W[0]); W[0] -= b; @@ -383,19 +380,19 @@ unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector::FGMRES_LinSolver(const CSysVector::FGMRES_LinSolver(const CSysVector::FGMRES_LinSolver(const CSysVector::FGMRES_LinSolver(const CSysVectorGetComm_Level() == COMM_FULL)) { - if (rank == MASTER_NODE) { - cout << "# FGMRES final (true) residual:" << endl; - cout << "# Iteration = " << i << ": |res|/|res0| = " << beta/norm0 << ".\n" << endl; - } + if (master) WriteFinalResidual("FGMRES", i, beta/norm0); mat_vec(x, W[0]); W[0] -= b; ScalarType res = W[0].norm(); if (fabs(res - beta) > tol*10) { - if (rank == MASTER_NODE) { - cout << "# WARNING in CSysSolve::FGMRES_LinSolver(): " << endl; - cout << "# true residual norm and calculated residual norm do not agree." << endl; - cout << "# res = " << res <<", beta = " << beta <<", tol = " << tol*10 <<"."<< endl; - cout << "# res - beta = " << res - beta << endl << endl; + if (master) { + WriteWarning(beta, res, tol); } } } - (*residual) = beta/norm0; - return (unsigned long) i; + residual = beta/norm0; + return i; } template unsigned long CSysSolve::BCGSTAB_LinSolver(const CSysVector & b, CSysVector & x, - CMatrixVectorProduct & mat_vec, CPreconditioner & precond, - ScalarType tol, unsigned long m, ScalarType *residual, bool monitoring, CConfig *config) { + const CMatrixVectorProduct & mat_vec, const CPreconditioner & precond, + ScalarType tol, unsigned long m, ScalarType & residual, bool monitoring, CConfig *config) const { - int rank = SU2_MPI::GetRank(); + const bool master = (SU2_MPI::GetRank() == MASTER_NODE) && (omp_get_thread_num() == 0); ScalarType norm_r = 0.0, norm0 = 0.0; - int i = 0; + unsigned long i = 0; /*--- Check the subspace size ---*/ if (m < 1) { - char buf[100]; - SPRINTF(buf, "Illegal value for subspace size, m = %lu", m ); - SU2_MPI::Error(string(buf), CURRENT_FUNCTION); + SU2_OMP_MASTER + SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION); } /*--- Allocate if not allocated yet ---*/ if (!bcg_ready) { - A_x = b; - p = b; - z = b; - v = b; - bcg_ready = true; + SU2_OMP_MASTER + { + auto nVar = b.GetNVar(); + auto nBlk = b.GetNBlk(); + auto nBlkDomain = b.GetNBlkDomain(); + + A_x.Initialize(nBlk, nBlkDomain, nVar, nullptr); + r_0.Initialize(nBlk, nBlkDomain, nVar, nullptr); + r.Initialize(nBlk, nBlkDomain, nVar, nullptr); + p.Initialize(nBlk, nBlkDomain, nVar, nullptr); + v.Initialize(nBlk, nBlkDomain, nVar, nullptr); + z.Initialize(nBlk, nBlkDomain, nVar, nullptr); + + bcg_ready = true; + } + SU2_OMP_BARRIER } /*--- Calculate the initial residual, compute norm, and check if system is already solved ---*/ @@ -524,7 +525,7 @@ unsigned long CSysSolve::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVectorGetComm_Level() == COMM_FULL)) { - if (rank == MASTER_NODE) { - cout << "# BCGSTAB final (true) residual:" << endl; - cout << "# Iteration = " << i << ": |res|/|res0| = " << norm_r/norm0 << ".\n" << endl; - } + if (master) WriteFinalResidual("BCGSTAB", i, norm_r/norm0); mat_vec(x, A_x); r = b; r -= A_x; ScalarType true_res = r.norm(); - if ((fabs(true_res - norm_r) > tol*10.0) && (rank == MASTER_NODE)) { - cout << "# WARNING in CSysSolve::BCGSTAB_LinSolver(): " << endl; - cout << "# true residual norm and calculated residual norm do not agree." << endl; - cout << "# true_res = " << true_res <<", calc_res = " << norm_r <<", tol = " << tol*10 <<"."<< endl; - cout << "# true_res - calc_res = " << true_res <<" "<< norm_r << endl; + if ((fabs(true_res - norm_r) > tol*10.0) && (master)) { + WriteWarning(norm_r, true_res, tol); } } - (*residual) = norm_r/norm0; - return (unsigned long) i; + residual = norm_r/norm0; + return i; } template unsigned long CSysSolve::Smoother_LinSolver(const CSysVector & b, CSysVector & x, - CMatrixVectorProduct & mat_vec, CPreconditioner & precond, - ScalarType tol, unsigned long m, ScalarType *residual, bool monitoring, CConfig *config) { + const CMatrixVectorProduct & mat_vec, const CPreconditioner & precond, + ScalarType tol, unsigned long m, ScalarType & residual, bool monitoring, CConfig *config) const { - int rank = SU2_MPI::GetRank(); + const bool master = (SU2_MPI::GetRank() == MASTER_NODE) && (omp_get_thread_num() == 0); ScalarType norm_r = 0.0, norm0 = 0.0; unsigned long i = 0; @@ -654,18 +650,27 @@ unsigned long CSysSolve::Smoother_LinSolver(const CSysVectorGetLinear_Solver_Smoother_Relaxation()); if (m < 1) { - char buf[100]; - SPRINTF(buf, "Illegal value for smoothing iterations, m = %lu", m ); - SU2_MPI::Error(string(buf), CURRENT_FUNCTION); + SU2_OMP_MASTER + SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION); } /*--- Allocate vectors for residual (r), solution increment (z), and matrix-vector product (A_x), for the latter two this is done only on the first call to the method. ---*/ if (!smooth_ready) { - z = b; - A_x = b; - smooth_ready = true; + SU2_OMP_MASTER + { + auto nVar = b.GetNVar(); + auto nBlk = b.GetNBlk(); + auto nBlkDomain = b.GetNBlkDomain(); + + A_x.Initialize(nBlk, nBlkDomain, nVar, nullptr); + r.Initialize(nBlk, nBlkDomain, nVar, nullptr); + z.Initialize(nBlk, nBlkDomain, nVar, nullptr); + + smooth_ready = true; + } + SU2_OMP_BARRIER } /*--- Compute the initial residual and check if the system is already solved (if in COMM_FULL mode). ---*/ @@ -680,7 +685,7 @@ unsigned long CSysSolve::Smoother_LinSolver(const CSysVector::Smoother_LinSolver(const CSysVector::Smoother_LinSolver(const CSysVectorGetComm_Level() == COMM_FULL) { norm_r = r.norm(); if (norm_r < tol*norm0) break; - if (((monitoring) && (rank == MASTER_NODE)) && ((i+1) % 5 == 0)) - cout << " " << i << " " << norm_r/norm0 << endl; + if (((monitoring) && (master)) && ((i+1) % 5 == 0)) + WriteHistory(i+1, norm_r/norm0); } } - if ((monitoring) && (rank == MASTER_NODE) && (config->GetComm_Level() == COMM_FULL)) { - cout << "# Smoother final (true) residual:" << endl; - cout << "# Iteration = " << i << ": |res|/|res0| = " << norm_r/norm0 << ".\n" << endl; + if ((monitoring) && (master) && (config->GetComm_Level() == COMM_FULL)) { + WriteFinalResidual("Smoother", i, norm_r/norm0); } - (*residual) = norm_r/norm0; + residual = norm_r/norm0; return i; } template<> -void CSysSolve::HandleTemporariesIn(CSysVector & LinSysRes, CSysVector & LinSysSol) { +void CSysSolve::HandleTemporariesIn(const CSysVector & LinSysRes, CSysVector & LinSysSol) { /*--- When the type is the same the temporaties are not required ---*/ /*--- Set the pointers ---*/ @@ -758,13 +760,13 @@ void CSysSolve::HandleTemporariesOut(CSysVector & LinSysSo /*--- When the type is the same the temporaties are not required ---*/ /*--- Reset the pointers ---*/ - LinSysRes_ptr = NULL; - LinSysSol_ptr = NULL; + LinSysRes_ptr = nullptr; + LinSysSol_ptr = nullptr; } #ifdef CODI_REVERSE_TYPE template<> -void CSysSolve::HandleTemporariesIn(CSysVector & LinSysRes, CSysVector & LinSysSol) { +void CSysSolve::HandleTemporariesIn(const CSysVector & LinSysRes, CSysVector & LinSysSol) { /*--- When the type is different we need to copy data to the temporaries ---*/ /*--- Copy data, the solution is also copied because it serves as initial conditions ---*/ @@ -784,13 +786,13 @@ void CSysSolve::HandleTemporariesOut(CSysVector & LinS LinSysSol.PassiveCopy(LinSysSol_tmp); /*--- Reset the pointers ---*/ - LinSysRes_ptr = NULL; - LinSysSol_ptr = NULL; + LinSysRes_ptr = nullptr; + LinSysSol_ptr = nullptr; } #endif template -unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, CSysVector & LinSysRes, +unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, const CSysVector & LinSysRes, CSysVector & LinSysSol, CGeometry *geometry, CConfig *config) { /*--- A word about the templated types. It is assumed that the residual and solution vectors are always of su2doubles, @@ -803,8 +805,8 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, CS ---*/ unsigned short KindSolver, KindPrecond; - unsigned long MaxIter, RestartIter, IterLinSol = 0; - ScalarType SolverTol, Norm0 = 0.0; + unsigned long MaxIter, RestartIter; + ScalarType SolverTol; bool ScreenOutput; /*--- Normal mode ---*/ @@ -852,68 +854,88 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, CS HandleTemporariesIn(LinSysRes, LinSysSol); - CMatrixVectorProduct* mat_vec = new CSysMatrixVectorProduct(Jacobian, geometry, config); - CPreconditioner* precond = NULL; + auto mat_vec = CSysMatrixVectorProduct(Jacobian, geometry, config); + CPreconditioner* precond = nullptr; switch (KindPrecond) { case JACOBI: - Jacobian.BuildJacobiPreconditioner(); - precond = new CJacobiPreconditioner(Jacobian, geometry, config); + precond = new CJacobiPreconditioner(Jacobian, geometry, config, false); break; case ILU: - Jacobian.BuildILUPreconditioner(); - precond = new CILUPreconditioner(Jacobian, geometry, config); + precond = new CILUPreconditioner(Jacobian, geometry, config, false); break; case LU_SGS: precond = new CLU_SGSPreconditioner(Jacobian, geometry, config); break; case LINELET: - Jacobian.BuildJacobiPreconditioner(); precond = new CLineletPreconditioner(Jacobian, geometry, config); break; case PASTIX_ILU: case PASTIX_LU_P: case PASTIX_LDLT_P: - Jacobian.BuildPastixPreconditioner(geometry, config, KindPrecond); - precond = new CPastixPreconditioner(Jacobian, geometry, config); + precond = new CPastixPreconditioner(Jacobian, geometry, config, KindPrecond, false); break; default: - Jacobian.BuildJacobiPreconditioner(); - precond = new CJacobiPreconditioner(Jacobian, geometry, config); + precond = new CJacobiPreconditioner(Jacobian, geometry, config, false); break; } - switch (KindSolver) { - case BCGSTAB: - IterLinSol = BCGSTAB_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, *mat_vec, *precond, SolverTol, MaxIter, &Residual, ScreenOutput, config); - break; - case FGMRES: - IterLinSol = FGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, *mat_vec, *precond, SolverTol, MaxIter, &Residual, ScreenOutput, config); - break; - case CONJUGATE_GRADIENT: - IterLinSol = CG_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, *mat_vec, *precond, SolverTol, MaxIter, &Residual, ScreenOutput, config); - break; - case RESTARTED_FGMRES: - IterLinSol = 0; - Norm0 = LinSysRes_ptr->norm(); - while (IterLinSol < MaxIter) { - /*--- Enforce a hard limit on total number of iterations ---*/ - unsigned long IterLimit = min(RestartIter, MaxIter-IterLinSol); - IterLinSol += FGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, *mat_vec, *precond, SolverTol, IterLimit, &Residual, ScreenOutput, config); - if ( Residual < SolverTol*Norm0 ) break; - } - break; - case SMOOTHER: - IterLinSol = Smoother_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, *mat_vec, *precond, SolverTol, MaxIter, &Residual, ScreenOutput, config); - break; - case PASTIX_LDLT : case PASTIX_LU: - Jacobian.BuildPastixPreconditioner(geometry, config, KindSolver); - Jacobian.ComputePastixPreconditioner(*LinSysRes_ptr, *LinSysSol_ptr, geometry, config); - IterLinSol = 1; - break; - default: - SU2_MPI::Error("Unknown type of linear solver.",CURRENT_FUNCTION); - } + /*--- Start a thread-parallel section covering the preparation of the + * preconditioner and the solution of the linear solver. + * Beware of shared variables, i.e. defined outside the section or + * members of ANY class used therein, they should be treated as + * read-only or explicitly synchronized if written to. ---*/ + + unsigned long IterLinSol = 0; + + SU2_OMP_PARALLEL + { + /*--- Build preconditioner in parallel. ---*/ + precond->Build(); + + /*--- Thread-local variables. ---*/ + unsigned long iter = 0; + ScalarType residual = 0.0, norm0 = 0.0; + + switch (KindSolver) { + case BCGSTAB: + iter = BCGSTAB_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual, ScreenOutput, config); + break; + case FGMRES: + iter = FGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual, ScreenOutput, config); + break; + case CONJUGATE_GRADIENT: + iter = CG_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual, ScreenOutput, config); + break; + case RESTARTED_FGMRES: + norm0 = LinSysRes_ptr->norm(); + while (iter < MaxIter) { + /*--- Enforce a hard limit on total number of iterations ---*/ + unsigned long IterLimit = min(RestartIter, MaxIter-iter); + iter += FGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, IterLimit, residual, ScreenOutput, config); + if ( residual < SolverTol*norm0 ) break; + } + break; + case SMOOTHER: + iter = Smoother_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, residual, ScreenOutput, config); + break; + case PASTIX_LDLT : case PASTIX_LU: + Jacobian.BuildPastixPreconditioner(geometry, config, KindSolver); + Jacobian.ComputePastixPreconditioner(*LinSysRes_ptr, *LinSysSol_ptr, geometry, config); + iter = 1; + break; + default: + SU2_MPI::Error("Unknown type of linear solver.",CURRENT_FUNCTION); + } + + /*--- Only one thread modifies shared variables, synchronization + * is not required as we are exiting the parallel section. ---*/ + SU2_OMP_MASTER + { + IterLinSol = iter; + Residual = residual; + } + + } // end SU2_OMP_PARALLEL - delete mat_vec; delete precond; HandleTemporariesOut(LinSysSol); @@ -964,7 +986,7 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, CS } template -unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, CSysVector & LinSysRes, +unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, const CSysVector & LinSysRes, CSysVector & LinSysSol, CGeometry *geometry, CConfig *config) { #ifdef CODI_REVERSE_TYPE @@ -999,21 +1021,21 @@ unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, /*--- Set up preconditioner and matrix-vector product ---*/ - CPreconditioner* precond = NULL; + CPreconditioner* precond = nullptr; switch(KindPrecond) { case ILU: - precond = new CILUPreconditioner(Jacobian, geometry, config); + precond = new CILUPreconditioner(Jacobian, geometry, config, RequiresTranspose); break; case JACOBI: - precond = new CJacobiPreconditioner(Jacobian, geometry, config); + precond = new CJacobiPreconditioner(Jacobian, geometry, config, RequiresTranspose); break; case PASTIX_ILU: case PASTIX_LU_P: case PASTIX_LDLT_P: - precond = new CPastixPreconditioner(Jacobian, geometry, config); + precond = new CPastixPreconditioner(Jacobian, geometry, config, KindPrecond, RequiresTranspose); break; } - CMatrixVectorProduct* mat_vec = new CSysMatrixVectorProductTransposed(Jacobian, geometry, config); + auto mat_vec = CSysMatrixVectorProductTransposed(Jacobian, geometry, config); /*--- Solve the system ---*/ @@ -1021,13 +1043,13 @@ unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, switch(KindSolver) { case FGMRES: - IterLinSol = FGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, *mat_vec, *precond, SolverTol , MaxIter, &Residual, ScreenOutput, config); + IterLinSol = FGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol , MaxIter, Residual, ScreenOutput, config); break; case BCGSTAB: - IterLinSol = BCGSTAB_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, *mat_vec, *precond, SolverTol , MaxIter, &Residual, ScreenOutput, config); + IterLinSol = BCGSTAB_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol , MaxIter, Residual, ScreenOutput, config); break; case CONJUGATE_GRADIENT: - IterLinSol = CG_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, *mat_vec, *precond, SolverTol, MaxIter, &Residual, ScreenOutput, config); + IterLinSol = CG_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol, MaxIter, Residual, ScreenOutput, config); break; case RESTARTED_FGMRES: IterLinSol = 0; @@ -1035,7 +1057,7 @@ unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, while (IterLinSol < MaxIter) { /*--- Enforce a hard limit on total number of iterations ---*/ unsigned long IterLimit = min(RestartIter, MaxIter-IterLinSol); - IterLinSol += FGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, *mat_vec, *precond, SolverTol , IterLimit, &Residual, ScreenOutput, config); + IterLinSol += FGMRES_LinSolver(*LinSysRes_ptr, *LinSysSol_ptr, mat_vec, *precond, SolverTol , IterLimit, Residual, ScreenOutput, config); if ( Residual < SolverTol*Norm0 ) break; } break; @@ -1051,7 +1073,6 @@ unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, HandleTemporariesOut(LinSysSol); - delete mat_vec; delete precond; return IterLinSol; diff --git a/Common/src/linear_algebra/CSysVector.cpp b/Common/src/linear_algebra/CSysVector.cpp index fb44e57f6b12..86c68e4448af 100644 --- a/Common/src/linear_algebra/CSysVector.cpp +++ b/Common/src/linear_algebra/CSysVector.cpp @@ -1,12 +1,12 @@ /*! - * \file vector_structure.cpp + * \file CSysVector.cpp * \brief Main classes required for solving linear systems of equations * \author F. Palacios, J. Hicken * \version 7.0.0 "Blackbird" * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -26,472 +26,243 @@ */ #include "../../include/linear_algebra/CSysVector.hpp" +#include "../../include/mpi_structure.hpp" +#include "../../include/omp_structure.hpp" +#include "../../include/toolboxes/allocation_toolbox.hpp" + +/*! + * \brief OpenMP worksharing construct used in CSysVector for loops. + * \note The loop will only run in parallel if methods are called from a + * parallel region (if not the results will still be correct). + * Static schedule to reduce overhead, chunk size determined at initialization. + * "nowait" clause is safe when calling CSysVector methods after each other + * as the loop size is the same. Methods of other classes that operate on a + * CSysVector and do not have the same work scheduling must use a + * SU2_OMP_BARRIER before using the vector. + */ +#define PARALLEL_FOR SU2_OMP(for schedule(static,omp_chunk_size) nowait) template CSysVector::CSysVector(void) { - nElm = 0; nElmDomain = 0; - nBlk = 0; nBlkDomain = 0; - nVar = 0; - - vec_val = NULL; + vec_val = nullptr; nElm = 0; nElmDomain = 0; nVar = 0; - nBlk = 0; - nBlkDomain = 0; - + omp_chunk_size = OMP_MAX_SIZE; + dotRes = 0.0; } template -CSysVector::CSysVector(const unsigned long & size, const ScalarType & val) { +void CSysVector::Initialize(unsigned long numBlk, unsigned long numBlkDomain, + unsigned long numVar, const ScalarType* val, bool valIsArray) { - nElm = size; nElmDomain = size; - nBlk = nElm; nBlkDomain = nElmDomain; - nVar = 1; + /*--- Assert that this method is only called by one thread. ---*/ + assert(omp_get_thread_num()==0 && "Only the master thread is allowed to initialize the vector."); - /*--- Check for invalid size, then allocate memory and initialize values ---*/ - if ( (nElm >= ULONG_MAX) ) { - char buf[100]; - SPRINTF(buf, "Invalid input: size = %lu", size ); - SU2_MPI::Error(string(buf), CURRENT_FUNCTION); + if ((nElm != numBlk*numVar) && (vec_val != nullptr)) { + MemoryAllocation::aligned_free(vec_val); + vec_val = nullptr; } - vec_val = new ScalarType[nElm]; - for (unsigned int i = 0; i < nElm; i++) - vec_val[i] = val; - -#ifdef HAVE_MPI - unsigned long nElmLocal = (unsigned long)nElm; - SU2_MPI::Allreduce(&nElmLocal, &nElmGlobal, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); -#endif - -} - -template -CSysVector::CSysVector(const unsigned long & numBlk, const unsigned long & numBlkDomain, const unsigned short & numVar, - const ScalarType & val) { - - nElm = numBlk*numVar; nElmDomain = numBlkDomain*numVar; - nBlk = numBlk; nBlkDomain = numBlkDomain; + nElm = numBlk*numVar; + nElmDomain = numBlkDomain*numVar; nVar = numVar; - /*--- Check for invalid input, then allocate memory and initialize values ---*/ - if ( nElm >= ULONG_MAX ) { - char buf[100]; - SPRINTF(buf, "invalid input: numBlk, numVar = %lu, %u", numBlk, numVar ); - SU2_MPI::Error(string(buf), CURRENT_FUNCTION); - } - - vec_val = new ScalarType[nElm]; - for (unsigned int i = 0; i < nElm; i++) - vec_val[i] = val; - -#ifdef HAVE_MPI - unsigned long nElmLocal = (unsigned long)nElm; - SU2_MPI::Allreduce(&nElmLocal, &nElmGlobal, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); -#endif - -} - -template -CSysVector::CSysVector(const CSysVector & u) { - - /*--- Copy size information, allocate memory, and initialize values ---*/ - nElm = u.nElm; nElmDomain = u.nElmDomain; - nBlk = u.nBlk; nBlkDomain = u.nBlkDomain; - nVar = u.nVar; - - vec_val = new ScalarType[nElm]; - for (unsigned long i = 0; i < nElm; i++) - vec_val[i] = u.vec_val[i]; - -#ifdef HAVE_MPI - nElmGlobal = u.nElmGlobal; -#endif - -} + omp_chunk_size = computeStaticChunkSize(nElm, omp_get_max_threads(), OMP_MAX_SIZE); -template -CSysVector::CSysVector(const unsigned long & size, const ScalarType* u_array) { + if (vec_val == nullptr) + vec_val = MemoryAllocation::aligned_alloc(64, nElm*sizeof(ScalarType)); - nElm = size; nElmDomain = size; - nBlk = nElm; nBlkDomain = nElmDomain; - nVar = 1; - - /*--- Check for invalid size, then allocate memory and initialize values ---*/ - if ( nElm >= ULONG_MAX ) { - char buf[100]; - SPRINTF(buf, "Invalid input: size = %lu", size ); - SU2_MPI::Error(string(buf), CURRENT_FUNCTION); + if(val != nullptr) { + if(!valIsArray) { + for(auto i=0ul; i -CSysVector::CSysVector(const unsigned long & numBlk, const unsigned long & numBlkDomain, const unsigned short & numVar, - const ScalarType* u_array) { - - nElm = numBlk*numVar; nElmDomain = numBlkDomain*numVar; - nBlk = numBlk; nBlkDomain = numBlkDomain; - nVar = numVar; +template +void CSysVector::PassiveCopy(const CSysVector& other) { - /*--- check for invalid input, then allocate memory and initialize values ---*/ - if ( nElm >= ULONG_MAX ) { - char buf[100]; - SPRINTF(buf, "invalid input: numBlk, numVar = %lu, %u", numBlk, numVar ); - SU2_MPI::Error(string(buf), CURRENT_FUNCTION); - } + /*--- This is a method and not the overload of an operator to make sure who + calls it knows the consequence to the derivative information (lost) ---*/ - vec_val = new ScalarType[nElm]; - for (unsigned long i = 0; i < nElm; i++) - vec_val[i] = u_array[i]; + /*--- check if self-assignment, otherwise perform deep copy ---*/ + if ((const void*)this == (const void*)&other) return; -#ifdef HAVE_MPI - unsigned long nElmLocal = (unsigned long)nElm; - SU2_MPI::Allreduce(&nElmLocal, &nElmGlobal, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); -#endif + SU2_OMP_MASTER + Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true); + SU2_OMP_BARRIER + PARALLEL_FOR + for(auto i=0ul; i CSysVector::~CSysVector() { - delete [] vec_val; - - nElm = 0; nElmDomain = 0; - nBlk = 0; nBlkDomain = 0; - nVar = 0; + if (vec_val != nullptr) + MemoryAllocation::aligned_free(vec_val); } template -void CSysVector::Initialize(const unsigned long & numBlk, const unsigned long & numBlkDomain, const unsigned short & numVar, const ScalarType & val) { - - nElm = numBlk*numVar; nElmDomain = numBlkDomain*numVar; - nBlk = numBlk; nBlkDomain = numBlkDomain; - nVar = numVar; - - /*--- Check for invalid input, then allocate memory and initialize values ---*/ - if ( nElm >= ULONG_MAX ) { - char buf[100]; - SPRINTF(buf, "invalid input: numBlk, numVar = %lu, %u", numBlk, numVar ); - SU2_MPI::Error(string(buf), CURRENT_FUNCTION); - } +void CSysVector::Equals_AX(ScalarType a, const CSysVector & x) { - vec_val = new ScalarType[nElm]; - for (unsigned long i = 0; i < nElm; i++) - vec_val[i] = val; - -#ifdef HAVE_MPI - unsigned long nElmLocal = (unsigned long)nElm; - SU2_MPI::Allreduce(&nElmLocal, &nElmGlobal, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); -#endif + assert(nElm == x.nElm && "Sizes do not match"); + PARALLEL_FOR + for(auto i=0ul; i -void CSysVector::Equals_AX(const ScalarType & a, CSysVector & x) { - /*--- check that *this and x are compatible ---*/ - if (nElm != x.nElm) { - cerr << "CSysVector::Equals_AX(): " << "sizes do not match"; - throw(-1); - } - for (unsigned long i = 0; i < nElm; i++) - vec_val[i] = a * x.vec_val[i]; -} +void CSysVector::Plus_AX(ScalarType a, const CSysVector & x) { -template -void CSysVector::Plus_AX(const ScalarType & a, CSysVector & x) { - /*--- check that *this and x are compatible ---*/ - if (nElm != x.nElm) { - SU2_MPI::Error("Sizes do not match", CURRENT_FUNCTION); - } - for (unsigned long i = 0; i < nElm; i++) - vec_val[i] += a * x.vec_val[i]; + assert(nElm == x.nElm && "Sizes do not match"); + + PARALLEL_FOR + for(auto i=0ul; i -void CSysVector::Equals_AX_Plus_BY(const ScalarType & a, CSysVector & x, const ScalarType & b, CSysVector & y) { - /*--- check that *this, x and y are compatible ---*/ - if ((nElm != x.nElm) || (nElm != y.nElm)) { - SU2_MPI::Error("Sizes do not match", CURRENT_FUNCTION); - } - for (unsigned long i = 0; i < nElm; i++) +void CSysVector::Equals_AX_Plus_BY(ScalarType a, const CSysVector & x, + ScalarType b, const CSysVector & y) { + assert(nElm == x.nElm && nElm == y.nElm && "Sizes do not match"); + + PARALLEL_FOR + for(auto i=0ul; i CSysVector & CSysVector::operator=(const CSysVector & u) { - /*--- check if self-assignment, otherwise perform deep copy ---*/ - if (this == &u) return *this; - - /*--- determine if (re-)allocation is needed ---*/ - if (nElm != u.nElm && vec_val != NULL) {delete [] vec_val; vec_val = NULL;} - if (vec_val == NULL) vec_val = new ScalarType[u.nElm]; - - /*--- copy ---*/ - nElm = u.nElm; - nElmDomain = u.nElmDomain; - nBlk = u.nBlk; - nBlkDomain = u.nBlkDomain; - nVar = u.nVar; - - for (unsigned long i = 0; i < nElm; i++) - vec_val[i] = u.vec_val[i]; + assert(nElm == u.nElm && "Sizes do not match"); -#ifdef HAVE_MPI - nElmGlobal = u.nElmGlobal; -#endif - - return *this; -} + PARALLEL_FOR + for(auto i=0ul; i -CSysVector & CSysVector::operator=(const ScalarType & val) { - for (unsigned long i = 0; i < nElm; i++) - vec_val[i] = val; return *this; } template -CSysVector CSysVector::operator+(const CSysVector & u) const { +CSysVector & CSysVector::operator=(ScalarType val) { - /*--- Use copy constructor and compound addition-assignment ---*/ - CSysVector sum(*this); - sum += u; - return sum; -} + PARALLEL_FOR + for(auto i=0ul; i -CSysVector & CSysVector::operator+=(const CSysVector & u) { - - /*--- Check for consistent sizes, then add elements ---*/ - if (nElm != u.nElm) { - SU2_MPI::Error("Sizes do not match", CURRENT_FUNCTION); - } - for (unsigned long i = 0; i < nElm; i++) - vec_val[i] += u.vec_val[i]; return *this; } template -CSysVector CSysVector::operator-(const CSysVector & u) const { +CSysVector & CSysVector::operator+=(const CSysVector & u) { - /*--- Use copy constructor and compound subtraction-assignment ---*/ - CSysVector diff(*this); - diff -= u; - return diff; -} + assert(nElm == u.nElm && "Sizes do not match"); -template -CSysVector & CSysVector::operator-=(const CSysVector & u) { + PARALLEL_FOR + for(auto i=0ul; i -CSysVector CSysVector::operator*(const ScalarType & val) const { - - /*--- use copy constructor and compound scalar - multiplication-assignment ---*/ - CSysVector prod(*this); - prod *= val; - return prod; -} - -template -CSysVector operator*(const ScalarType & val, const CSysVector & u) { +CSysVector & CSysVector::operator-=(const CSysVector & u) { - /*--- use copy constructor and compound scalar - multiplication-assignment ---*/ - CSysVector prod(u); - prod *= val; - return prod; -} + assert(nElm == u.nElm && "Sizes do not match"); -template -CSysVector & CSysVector::operator*=(const ScalarType & val) { + PARALLEL_FOR + for(auto i=0ul; i -CSysVector CSysVector::operator/(const ScalarType & val) const { +CSysVector & CSysVector::operator*=(ScalarType val) { - /*--- use copy constructor and compound scalar - division-assignment ---*/ - CSysVector quotient(*this); - quotient /= val; - return quotient; -} + PARALLEL_FOR + for(auto i=0ul; i -CSysVector & CSysVector::operator/=(const ScalarType & val) { - - for (unsigned long i = 0; i < nElm; i++) - vec_val[i] /= val; return *this; } template -ScalarType CSysVector::norm() const { - - /*--- just call dotProd on this*, then sqrt ---*/ - ScalarType val = dotProd(*this, *this); - if (val < 0.0) { - SU2_MPI::Error("Inner product of CSysVector is negative", CURRENT_FUNCTION); - } - return sqrt(val); -} - -template -void CSysVector::CopyToArray(ScalarType* u_array) { - - for (unsigned long i = 0; i < nElm; i++) - u_array[i] = vec_val[i]; -} - -template -void CSysVector::AddBlock(unsigned long val_ipoint, ScalarType *val_residual) { - unsigned short iVar; - - for (iVar = 0; iVar < nVar; iVar++) - vec_val[val_ipoint*nVar+iVar] += val_residual[iVar]; -} - -template -void CSysVector::SubtractBlock(unsigned long val_ipoint, ScalarType *val_residual) { - unsigned short iVar; - - for (iVar = 0; iVar < nVar; iVar++) - vec_val[val_ipoint*nVar+iVar] -= val_residual[iVar]; -} - -template -void CSysVector::SetBlock(unsigned long val_ipoint, ScalarType *val_residual) { - unsigned short iVar; - - for (iVar = 0; iVar < nVar; iVar++) - vec_val[val_ipoint*nVar+iVar] = val_residual[iVar]; -} - -template -void CSysVector::SetBlock(unsigned long val_ipoint, unsigned short val_var, ScalarType val_residual) { - - vec_val[val_ipoint*nVar+val_var] = val_residual; -} - -template -void CSysVector::SetBlock_Zero(unsigned long val_ipoint) { - unsigned short iVar; +CSysVector & CSysVector::operator/=(ScalarType val) { - for (iVar = 0; iVar < nVar; iVar++) - vec_val[val_ipoint*nVar+iVar] = 0.0; -} + PARALLEL_FOR + for(auto i=0ul; i -void CSysVector::SetBlock_Zero(unsigned long val_ipoint, unsigned short val_var) { - vec_val[val_ipoint*nVar+val_var] = 0.0; + return *this; } template -ScalarType CSysVector::GetBlock(unsigned long val_ipoint, unsigned short val_var) { - return vec_val[val_ipoint*nVar + val_var]; -} +void CSysVector::CopyToArray(ScalarType* u_array) const { -template -ScalarType *CSysVector::GetBlock(unsigned long val_ipoint) { - return &vec_val[val_ipoint*nVar]; + PARALLEL_FOR + for(auto i=0ul; i -template -void CSysVector::PassiveCopy(const CSysVector& other) { +ScalarType CSysVector::dot(const CSysVector & u) const { +#if !defined(CODI_FORWARD_TYPE) && !defined(CODI_REVERSE_TYPE) - /*--- This is a method and not the overload of an operator to make sure who - calls it knows the consequence to the derivative information (lost) ---*/ + /*--- All threads get the same "view" of the vectors and shared variable. ---*/ + SU2_OMP_BARRIER + dotRes = 0.0; + SU2_OMP_BARRIER - /*--- check if self-assignment, otherwise perform deep copy ---*/ - if ((const void*)this == (const void*)&other) return; + /*--- Reduction over all threads in this mpi rank using the shared variable. ---*/ + ScalarType sum = 0.0; - /*--- determine if (re-)allocation is needed ---*/ - if (nElm != other.GetLocSize() && vec_val != NULL) { - delete [] vec_val; - vec_val = NULL; - } + PARALLEL_FOR + for(auto i=0ul; i -ScalarType dotProd(const CSysVector & u, const CSysVector & v) { - - /*--- check for consistent sizes ---*/ - if (u.nElm != v.nElm) { - SU2_MPI::Error("Sizes do not match", CURRENT_FUNCTION); + /*--- Reduce across all mpi ranks, only master thread communicates. ---*/ + SU2_OMP_MASTER + { + sum = dotRes; + SelectMPIWrapper::W::Allreduce(&sum, &dotRes, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } - - /*--- find local inner product and, if a parallel run, sum over all - processors (we use nElemDomain instead of nElem) ---*/ - ScalarType loc_prod = 0.0; - for (unsigned long i = 0; i < u.nElmDomain; i++) - loc_prod += u.vec_val[i]*v.vec_val[i]; - ScalarType prod = 0.0; - + /*--- Make view of result consistent across threads. ---*/ + SU2_OMP_BARRIER +#endif // MPI +#else // CODI_TYPE + /*--- Compatible version, no OMP reductions, no atomics, master does everything. ---*/ + SU2_OMP_BARRIER + SU2_OMP_MASTER + { + ScalarType sum = 0.0; + for(auto i=0ul; i::W::Allreduce(&loc_prod, &prod, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + /*--- Reduce across all mpi ranks. ---*/ + SelectMPIWrapper::W::Allreduce(&sum, &dotRes, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else - prod = loc_prod; -#endif - - return prod; + dotRes = sum; +#endif // MPI + } + SU2_OMP_BARRIER +#endif // CODI + return dotRes; } /*--- Explicit instantiations ---*/ template class CSysVector; -template CSysVector operator*(const su2double&, const CSysVector&); template void CSysVector::PassiveCopy(const CSysVector&); -template su2double dotProd(const CSysVector & u, const CSysVector & v); - -template class CSysVector; #ifdef CODI_REVERSE_TYPE template class CSysVector; -template CSysVector operator*(const passivedouble&, const CSysVector&); template void CSysVector::PassiveCopy(const CSysVector&); template void CSysVector::PassiveCopy(const CSysVector&); -template passivedouble dotProd(const CSysVector & u, const CSysVector & v); #endif diff --git a/SU2_CFD/include/SU2_CFD.hpp b/SU2_CFD/include/SU2_CFD.hpp index 142649fb187b..57ce99b0d8db 100644 --- a/SU2_CFD/include/SU2_CFD.hpp +++ b/SU2_CFD/include/SU2_CFD.hpp @@ -7,7 +7,7 @@ * * SU2 Project Website: https://su2code.github.io * - * The SU2 Project is maintained by the SU2 Foundation + * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * * Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md) @@ -28,6 +28,7 @@ #pragma once #include "../../Common/include/mpi_structure.hpp" +#include "../../Common/include/omp_structure.hpp" #include "CLI11.hpp" #include diff --git a/SU2_CFD/include/solver_structure.hpp b/SU2_CFD/include/solver_structure.hpp index f57c8e048498..233628fd6fa4 100644 --- a/SU2_CFD/include/solver_structure.hpp +++ b/SU2_CFD/include/solver_structure.hpp @@ -3734,15 +3734,6 @@ class CSolver { */ virtual su2double Get_val_I(void); - /*! - * \brief A virtual member. - * \param[in] iPoint - Point i of the Mass Matrix. - * \param[in] jPoint - Point j of the Mass Matrix. - * \param[in] iVar - Variable i of the Mass Matrix submatrix. - * \param[in] iVar - Variable j of the Mass Matrix submatrix. - */ - virtual su2double Get_MassMatrix(unsigned long iPoint, unsigned long jPoint, unsigned short iVar, unsigned short jVar); - /*! * \brief Gauss method for solving a linear system. * \param[in] A - Matrix Ax = b. @@ -12222,15 +12213,6 @@ class CFEASolver : public CSolver { */ unsigned short Get_iElem_iDe(unsigned long iElem); - /*! - * \brief Retrieve the Mass Matrix term (to add to the Jacobian of the adjoint problem) - * \param[in] iPoint - Point i of the Mass Matrix. - * \param[in] jPoint - Point j of the Mass Matrix. - * \param[in] iVar - Variable i of the Mass Matrix submatrix. - * \param[in] iVar - Variable j of the Mass Matrix submatrix. - */ - su2double Get_MassMatrix(unsigned long iPoint, unsigned long jPoint, unsigned short iVar, unsigned short jVar); - /*! * \brief Load a solution from a restart file. * \param[in] geometry - Geometrical definition of the problem. diff --git a/SU2_CFD/include/solver_structure.inl b/SU2_CFD/include/solver_structure.inl index 9a8fc6ddd939..763826b86832 100644 --- a/SU2_CFD/include/solver_structure.inl +++ b/SU2_CFD/include/solver_structure.inl @@ -1961,8 +1961,6 @@ inline su2double CSolver::Get_DV_Val(unsigned short i_DV){ return 0.0; } inline su2double CSolver::Get_val_I(void){ return 0.0; } -inline su2double CSolver::Get_MassMatrix(unsigned long iPoint, unsigned long jPoint, unsigned short iVar, unsigned short jVar){ return 0.0; } - inline su2double CIncEulerSolver::GetDensity_Inf(void) { return Density_Inf; } inline su2double CIncEulerSolver::GetModVelocity_Inf(void) { @@ -2282,9 +2280,6 @@ inline su2double CHeatSolverFVM::GetConjugateHeatVariable(unsigned short val_mar inline void CHeatSolverFVM::SetConjugateHeatVariable(unsigned short val_marker, unsigned long val_vertex, unsigned short pos_var, su2double relaxation_factor, su2double val_var) { ConjugateVar[val_marker][val_vertex][pos_var] = relaxation_factor*val_var + (1.0-relaxation_factor)*ConjugateVar[val_marker][val_vertex][pos_var]; } -inline su2double CFEASolver::Get_MassMatrix(unsigned long iPoint, unsigned long jPoint, unsigned short iVar, unsigned short jVar){ - return MassMatrix.GetBlock(iPoint, jPoint, iVar, jVar); } - inline unsigned short CFEASolver::Get_iElem_iDe(unsigned long iElem){ return iElem_iDe[iElem]; } inline su2double CFEASolver::GetRes_FEM(unsigned short val_var) { return Conv_Check[val_var]; } diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp index cfee50ffb5cb..dcefc788e553 100644 --- a/SU2_CFD/src/SU2_CFD.cpp +++ b/SU2_CFD/src/SU2_CFD.cpp @@ -60,7 +60,12 @@ int main(int argc, char *argv[]) { #ifdef HAVE_MPI int buffsize; char *buffptr; +#ifdef HAVE_OMP + int provided; + SU2_MPI::Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); +#else SU2_MPI::Init(&argc, &argv); +#endif SU2_MPI::Buffer_attach( malloc(BUFSIZE), BUFSIZE ); SU2_Comm MPICommunicator(MPI_COMM_WORLD); #else diff --git a/SU2_CFD/src/output/CElasticityOutput.cpp b/SU2_CFD/src/output/CElasticityOutput.cpp index 98080abbc820..d8c5db7b9956 100644 --- a/SU2_CFD/src/output/CElasticityOutput.cpp +++ b/SU2_CFD/src/output/CElasticityOutput.cpp @@ -117,7 +117,7 @@ void CElasticityOutput::LoadHistoryData(CConfig *config, CGeometry *geometry, CS } else if (nonlinear_analysis){ SetHistoryOutputValue("RMS_UTOL", log10(fea_solver->LinSysSol.norm())); SetHistoryOutputValue("RMS_RTOL", log10(fea_solver->LinSysRes.norm())); - SetHistoryOutputValue("RMS_ETOL", log10(dotProd(fea_solver->LinSysSol, fea_solver->LinSysRes))); + SetHistoryOutputValue("RMS_ETOL", log10(fea_solver->LinSysSol.dot(fea_solver->LinSysRes))); } diff --git a/SU2_CFD/src/solver_direct_elasticity.cpp b/SU2_CFD/src/solver_direct_elasticity.cpp index 9cff906dcf9b..3274549b534b 100644 --- a/SU2_CFD/src/solver_direct_elasticity.cpp +++ b/SU2_CFD/src/solver_direct_elasticity.cpp @@ -2139,7 +2139,7 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CSolver **solver_container, Conv_Check[0] = LinSysSol.norm(); // Norm of the delta-solution vector Conv_Check[1] = LinSysRes.norm(); // Norm of the residual - Conv_Check[2] = dotProd(LinSysSol, LinSysRes); // Position for the energy tolerance + Conv_Check[2] = LinSysSol.dot(LinSysRes); // Position for the energy tolerance /*--- MPI solution ---*/ @@ -3024,8 +3024,8 @@ void CFEASolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_c void CFEASolver::ImplicitNewmark_Iteration(CGeometry *geometry, CSolver **solver_container, CConfig *config) { - unsigned long iPoint, jPoint; - unsigned short iVar, jVar; + unsigned long iPoint; + unsigned short iVar; bool first_iter = (config->GetInnerIter() == 0); bool dynamic = (config->GetTime_Domain()); // Dynamic simulations. @@ -3099,16 +3099,7 @@ void CFEASolver::ImplicitNewmark_Iteration(CGeometry *geometry, CSolver **solver * */ if ((nonlinear_analysis && (newton_raphson || first_iter)) || linear_analysis) { - for (iPoint = 0; iPoint < nPoint; iPoint++) { - for (jPoint = 0; jPoint < nPoint; jPoint++) { - for(iVar = 0; iVar < nVar; iVar++) { - for (jVar = 0; jVar < nVar; jVar++) { - Jacobian_ij[iVar][jVar] = a_dt[0] * MassMatrix.GetBlock(iPoint, jPoint, iVar, jVar); - } - } - Jacobian.AddBlock(iPoint, jPoint, Jacobian_ij); - } - } + Jacobian.MatrixMatrixAddition(a_dt[0], MassMatrix); } @@ -3330,8 +3321,8 @@ void CFEASolver::ImplicitNewmark_Relaxation(CGeometry *geometry, CSolver **solve void CFEASolver::GeneralizedAlpha_Iteration(CGeometry *geometry, CSolver **solver_container, CConfig *config) { - unsigned long iPoint, jPoint; - unsigned short iVar, jVar; + unsigned long iPoint; + unsigned short iVar; bool first_iter = (config->GetInnerIter() == 0); bool dynamic = (config->GetTime_Domain()); // Dynamic simulations. @@ -3398,16 +3389,7 @@ void CFEASolver::GeneralizedAlpha_Iteration(CGeometry *geometry, CSolver **solve * */ if ((nonlinear_analysis && (newton_raphson || first_iter)) || linear_analysis) { - for (iPoint = 0; iPoint < nPoint; iPoint++) { - for (jPoint = 0; jPoint < nPoint; jPoint++) { - for(iVar = 0; iVar < nVar; iVar++) { - for (jVar = 0; jVar < nVar; jVar++) { - Jacobian_ij[iVar][jVar] = a_dt[0] * MassMatrix.GetBlock(iPoint, jPoint, iVar, jVar); - } - } - Jacobian.AddBlock(iPoint, jPoint, Jacobian_ij); - } - } + Jacobian.MatrixMatrixAddition(a_dt[0], MassMatrix); } diff --git a/SU2_CFD/src/solver_direct_heat.cpp b/SU2_CFD/src/solver_direct_heat.cpp index 550dec6e477d..c11890658dba 100644 --- a/SU2_CFD/src/solver_direct_heat.cpp +++ b/SU2_CFD/src/solver_direct_heat.cpp @@ -520,38 +520,35 @@ void CHeatSolverFVM::Centered_Residual(CGeometry *geometry, CSolver **solver_con nVarFlow = solver_container[FLOW_SOL]->GetnVar(); - for (iEdge = 0; iEdge < geometry->GetnEdge(); iEdge++) { + for (iEdge = 0; iEdge < geometry->GetnEdge(); iEdge++) { - /*--- Points in edge ---*/ - iPoint = geometry->edge[iEdge]->GetNode(0); - jPoint = geometry->edge[iEdge]->GetNode(1); - numerics->SetNormal(geometry->edge[iEdge]->GetNormal()); + /*--- Points in edge ---*/ + iPoint = geometry->edge[iEdge]->GetNode(0); + jPoint = geometry->edge[iEdge]->GetNode(1); + numerics->SetNormal(geometry->edge[iEdge]->GetNormal()); - /*--- Primitive variables w/o reconstruction ---*/ - V_i = solver_container[FLOW_SOL]->GetNodes()->GetPrimitive(iPoint); - V_j = solver_container[FLOW_SOL]->GetNodes()->GetPrimitive(jPoint); + /*--- Primitive variables w/o reconstruction ---*/ + V_i = solver_container[FLOW_SOL]->GetNodes()->GetPrimitive(iPoint); + V_j = solver_container[FLOW_SOL]->GetNodes()->GetPrimitive(jPoint); - Temp_i = nodes->GetSolution(iPoint,0); - Temp_j = nodes->GetSolution(jPoint,0); + Temp_i = nodes->GetSolution(iPoint,0); + Temp_j = nodes->GetSolution(jPoint,0); - numerics->SetUndivided_Laplacian(nodes->GetUndivided_Laplacian(iPoint), nodes->GetUndivided_Laplacian(jPoint)); - numerics->SetNeighbor(geometry->node[iPoint]->GetnNeighbor(), geometry->node[jPoint]->GetnNeighbor()); + numerics->SetUndivided_Laplacian(nodes->GetUndivided_Laplacian(iPoint), nodes->GetUndivided_Laplacian(jPoint)); + numerics->SetNeighbor(geometry->node[iPoint]->GetnNeighbor(), geometry->node[jPoint]->GetnNeighbor()); - numerics->SetPrimitive(V_i, V_j); - numerics->SetTemperature(Temp_i, Temp_j); + numerics->SetPrimitive(V_i, V_j); + numerics->SetTemperature(Temp_i, Temp_j); - numerics->ComputeResidual(Residual, Jacobian_i, Jacobian_j, config); + numerics->ComputeResidual(Residual, Jacobian_i, Jacobian_j, config); - LinSysRes.AddBlock(iPoint, Residual); - LinSysRes.SubtractBlock(jPoint, Residual); + LinSysRes.AddBlock(iPoint, Residual); + LinSysRes.SubtractBlock(jPoint, Residual); - /*--- Implicit part ---*/ + /*--- Implicit part ---*/ - Jacobian.AddBlock(iPoint, iPoint, Jacobian_i); - Jacobian.AddBlock(iPoint, jPoint, Jacobian_j); - Jacobian.SubtractBlock(jPoint, iPoint, Jacobian_i); - Jacobian.SubtractBlock(jPoint, jPoint, Jacobian_j); - } + Jacobian.UpdateBlocks(iEdge, iPoint, jPoint, Jacobian_i, Jacobian_j); + } } } @@ -571,83 +568,80 @@ void CHeatSolverFVM::Upwind_Residual(CGeometry *geometry, CSolver **solver_conta nVarFlow = solver_container[FLOW_SOL]->GetnVar(); - for (iEdge = 0; iEdge < geometry->GetnEdge(); iEdge++) { + for (iEdge = 0; iEdge < geometry->GetnEdge(); iEdge++) { - /*--- Points in edge ---*/ - iPoint = geometry->edge[iEdge]->GetNode(0); - jPoint = geometry->edge[iEdge]->GetNode(1); - numerics->SetNormal(geometry->edge[iEdge]->GetNormal()); + /*--- Points in edge ---*/ + iPoint = geometry->edge[iEdge]->GetNode(0); + jPoint = geometry->edge[iEdge]->GetNode(1); + numerics->SetNormal(geometry->edge[iEdge]->GetNormal()); - /*--- Primitive variables w/o reconstruction ---*/ - V_i = solver_container[FLOW_SOL]->GetNodes()->GetPrimitive(iPoint); - V_j = solver_container[FLOW_SOL]->GetNodes()->GetPrimitive(jPoint); + /*--- Primitive variables w/o reconstruction ---*/ + V_i = solver_container[FLOW_SOL]->GetNodes()->GetPrimitive(iPoint); + V_j = solver_container[FLOW_SOL]->GetNodes()->GetPrimitive(jPoint); - Temp_i_Grad = nodes->GetGradient(iPoint); - Temp_j_Grad = nodes->GetGradient(jPoint); - numerics->SetConsVarGradient(Temp_i_Grad, Temp_j_Grad); + Temp_i_Grad = nodes->GetGradient(iPoint); + Temp_j_Grad = nodes->GetGradient(jPoint); + numerics->SetConsVarGradient(Temp_i_Grad, Temp_j_Grad); - Temp_i = nodes->GetSolution(iPoint,0); - Temp_j = nodes->GetSolution(jPoint,0); + Temp_i = nodes->GetSolution(iPoint,0); + Temp_j = nodes->GetSolution(jPoint,0); - /* Second order reconstruction */ - if (muscl) { + /* Second order reconstruction */ + if (muscl) { - for (iDim = 0; iDim < nDim; iDim++) { - Vector_i[iDim] = 0.5*(geometry->node[jPoint]->GetCoord(iDim) - geometry->node[iPoint]->GetCoord(iDim)); - Vector_j[iDim] = 0.5*(geometry->node[iPoint]->GetCoord(iDim) - geometry->node[jPoint]->GetCoord(iDim)); - } + for (iDim = 0; iDim < nDim; iDim++) { + Vector_i[iDim] = 0.5*(geometry->node[jPoint]->GetCoord(iDim) - geometry->node[iPoint]->GetCoord(iDim)); + Vector_j[iDim] = 0.5*(geometry->node[iPoint]->GetCoord(iDim) - geometry->node[jPoint]->GetCoord(iDim)); + } - Gradient_i = solver_container[FLOW_SOL]->GetNodes()->GetGradient_Reconstruction(iPoint); - Gradient_j = solver_container[FLOW_SOL]->GetNodes()->GetGradient_Reconstruction(jPoint); - Temp_i_Grad = nodes->GetGradient_Reconstruction(iPoint); - Temp_j_Grad = nodes->GetGradient_Reconstruction(jPoint); + Gradient_i = solver_container[FLOW_SOL]->GetNodes()->GetGradient_Reconstruction(iPoint); + Gradient_j = solver_container[FLOW_SOL]->GetNodes()->GetGradient_Reconstruction(jPoint); + Temp_i_Grad = nodes->GetGradient_Reconstruction(iPoint); + Temp_j_Grad = nodes->GetGradient_Reconstruction(jPoint); - /*Loop to correct the flow variables*/ - for (iVar = 0; iVar < nVarFlow; iVar++) { + /*Loop to correct the flow variables*/ + for (iVar = 0; iVar < nVarFlow; iVar++) { - /*Apply the Gradient to get the right temperature value on the edge */ - Project_Grad_i = 0.0; Project_Grad_j = 0.0; - for (iDim = 0; iDim < nDim; iDim++) { - Project_Grad_i += Vector_i[iDim]*Gradient_i[iVar][iDim]; - Project_Grad_j += Vector_j[iDim]*Gradient_j[iVar][iDim]; - } + /*Apply the Gradient to get the right temperature value on the edge */ + Project_Grad_i = 0.0; Project_Grad_j = 0.0; + for (iDim = 0; iDim < nDim; iDim++) { + Project_Grad_i += Vector_i[iDim]*Gradient_i[iVar][iDim]; + Project_Grad_j += Vector_j[iDim]*Gradient_j[iVar][iDim]; + } - Primitive_Flow_i[iVar] = V_i[iVar] + Project_Grad_i; - Primitive_Flow_j[iVar] = V_j[iVar] + Project_Grad_j; - } + Primitive_Flow_i[iVar] = V_i[iVar] + Project_Grad_i; + Primitive_Flow_j[iVar] = V_j[iVar] + Project_Grad_j; + } - /* Correct the temperature variables */ - Project_Temp_i_Grad = 0.0; Project_Temp_j_Grad = 0.0; - for (iDim = 0; iDim < nDim; iDim++) { - Project_Temp_i_Grad += Vector_i[iDim]*Temp_i_Grad[0][iDim]; - Project_Temp_j_Grad += Vector_j[iDim]*Temp_j_Grad[0][iDim]; - } + /* Correct the temperature variables */ + Project_Temp_i_Grad = 0.0; Project_Temp_j_Grad = 0.0; + for (iDim = 0; iDim < nDim; iDim++) { + Project_Temp_i_Grad += Vector_i[iDim]*Temp_i_Grad[0][iDim]; + Project_Temp_j_Grad += Vector_j[iDim]*Temp_j_Grad[0][iDim]; + } - Temp_i_Corrected = Temp_i + Project_Temp_i_Grad; - Temp_j_Corrected = Temp_j + Project_Temp_j_Grad; + Temp_i_Corrected = Temp_i + Project_Temp_i_Grad; + Temp_j_Corrected = Temp_j + Project_Temp_j_Grad; - numerics->SetPrimitive(Primitive_Flow_i, Primitive_Flow_j); - numerics->SetTemperature(Temp_i_Corrected, Temp_j_Corrected); - } + numerics->SetPrimitive(Primitive_Flow_i, Primitive_Flow_j); + numerics->SetTemperature(Temp_i_Corrected, Temp_j_Corrected); + } - else { + else { - numerics->SetPrimitive(V_i, V_j); - numerics->SetTemperature(Temp_i, Temp_j); - } + numerics->SetPrimitive(V_i, V_j); + numerics->SetTemperature(Temp_i, Temp_j); + } - numerics->ComputeResidual(Residual, Jacobian_i, Jacobian_j, config); + numerics->ComputeResidual(Residual, Jacobian_i, Jacobian_j, config); - LinSysRes.AddBlock(iPoint, Residual); - LinSysRes.SubtractBlock(jPoint, Residual); + LinSysRes.AddBlock(iPoint, Residual); + LinSysRes.SubtractBlock(jPoint, Residual); - /*--- Implicit part ---*/ + /*--- Implicit part ---*/ - Jacobian.AddBlock(iPoint, iPoint, Jacobian_i); - Jacobian.AddBlock(iPoint, jPoint, Jacobian_j); - Jacobian.SubtractBlock(jPoint, iPoint, Jacobian_i); - Jacobian.SubtractBlock(jPoint, jPoint, Jacobian_j); - } + Jacobian.UpdateBlocks(iEdge, iPoint, jPoint, Jacobian_i, Jacobian_j); + } } } diff --git a/SU2_CFD/src/solver_direct_mean.cpp b/SU2_CFD/src/solver_direct_mean.cpp index 0a236239dfe1..9ca16fd27b36 100644 --- a/SU2_CFD/src/solver_direct_mean.cpp +++ b/SU2_CFD/src/solver_direct_mean.cpp @@ -3333,13 +3333,10 @@ void CEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_conta /*--- Set implicit computation ---*/ if (implicit) { - Jacobian.AddBlock(iPoint, iPoint, Jacobian_i); - Jacobian.AddBlock(iPoint, jPoint, Jacobian_j); - Jacobian.SubtractBlock(jPoint, iPoint, Jacobian_i); - Jacobian.SubtractBlock(jPoint, jPoint, Jacobian_j); + Jacobian.UpdateBlocks(iEdge, iPoint, jPoint, Jacobian_i, Jacobian_j); } } - + } void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_container, CNumerics *numerics, @@ -3579,10 +3576,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain /*--- Set implicit Jacobians ---*/ if (implicit) { - Jacobian.AddBlock(iPoint, iPoint, Jacobian_i); - Jacobian.AddBlock(iPoint, jPoint, Jacobian_j); - Jacobian.SubtractBlock(jPoint, iPoint, Jacobian_i); - Jacobian.SubtractBlock(jPoint, jPoint, Jacobian_j); + Jacobian.UpdateBlocks(iEdge, iPoint, jPoint, Jacobian_i, Jacobian_j); } /*--- Set the final value of the Roe dissipation coefficient ---*/ @@ -8948,8 +8942,7 @@ void CEulerSolver::BC_Riemann(CGeometry *geometry, CSolver **solver_container, void CEulerSolver::BC_TurboRiemann(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics, CNumerics *visc_numerics, CConfig *config, unsigned short val_marker) { unsigned short iDim, iVar, jVar, kVar, iSpan; - unsigned long iPoint, Point_Normal, oldVertex; - long iVertex; + unsigned long iPoint, Point_Normal, oldVertex, iVertex; su2double P_Total, T_Total, *Flow_Dir; su2double *Velocity_b, Velocity2_b, Enthalpy_b, Energy_b, StaticEnergy_b, Density_b, Kappa_b, Chi_b, Pressure_b, Temperature_b; su2double *Velocity_e, Velocity2_e, Enthalpy_e, Entropy_e, Energy_e = 0.0, StaticEnthalpy_e, StaticEnergy_e, Density_e = 0.0, Pressure_e; @@ -8995,7 +8988,7 @@ void CEulerSolver::BC_TurboRiemann(CGeometry *geometry, CSolver **solver_contain /*--- Loop over all the vertices on this boundary marker ---*/ for (iSpan= 0; iSpan < nSpanWiseSections; iSpan++){ - for (iVertex = 0; iVertex < geometry->nVertexSpan[val_marker][iSpan]; iVertex++) { + for (iVertex = 0; iVertex < geometry->GetnVertexSpan(val_marker,iSpan); iVertex++) { /*--- using the other vertex information for retrieving some information ---*/ oldVertex = geometry->turbovertex[val_marker][iSpan][iVertex]->GetOldVertex(); @@ -9454,8 +9447,8 @@ void CEulerSolver::PreprocessBC_Giles(CGeometry *geometry, CConfig *config, CNum su2double cj_inf,cj_out1, cj_out2, Density_i, Pressure_i, *turboNormal, *turboVelocity, *Velocity_i, AverageSoundSpeed; su2double *deltaprim, *cj, TwoPiThetaFreq_Pitch, pitch, theta, deltaTheta; unsigned short iMarker, iSpan, iMarkerTP, iDim; - unsigned long iPoint, kend_max, k; - long iVertex, freq; + unsigned long iPoint, kend_max, k, iVertex; + long freq; unsigned short iZone = config->GetiZone(); unsigned short nSpanWiseSections = geometry->GetnSpanWiseSections(marker_flag); turboNormal = new su2double[nDim]; @@ -9481,7 +9474,7 @@ void CEulerSolver::PreprocessBC_Giles(CGeometry *geometry, CConfig *config, CNum for (iMarkerTP=1; iMarkerTP < config->GetnMarker_Turbomachinery()+1; iMarkerTP++){ if (config->GetMarker_All_Turbomachinery(iMarker) == iMarkerTP){ if (config->GetMarker_All_TurbomachineryFlag(iMarker) == marker_flag){ - for (iVertex = 0; iVertex < geometry->nVertexSpan[iMarker][iSpan]; iVertex++) { + for (iVertex = 0; iVertex < geometry->GetnVertexSpan(iMarker,iSpan); iVertex++) { /*--- find the node related to the vertex ---*/ iPoint = geometry->turbovertex[iMarker][iSpan][iVertex]->GetNode(); @@ -9607,8 +9600,7 @@ void CEulerSolver::PreprocessBC_Giles(CGeometry *geometry, CConfig *config, CNum void CEulerSolver::BC_Giles(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics, CNumerics *visc_numerics, CConfig *config, unsigned short val_marker) { unsigned short iDim, iVar, jVar, iSpan; - unsigned long iPoint, Point_Normal, oldVertex, k, kend, kend_max; - long iVertex; + unsigned long iPoint, Point_Normal, oldVertex, k, kend, kend_max, iVertex; su2double *UnitNormal, *turboVelocity, *turboNormal; su2double *Velocity_b, Velocity2_b, Enthalpy_b, Energy_b, Density_b, Pressure_b, Temperature_b; @@ -9915,7 +9907,7 @@ void CEulerSolver::BC_Giles(CGeometry *geometry, CSolver **solver_container, /*--- Loop over all the vertices on this boundary marker ---*/ - for (iVertex = 0; iVertex < geometry->nVertexSpan[val_marker][iSpan]; iVertex++) { + for (iVertex = 0; iVertex < geometry->GetnVertexSpan(val_marker,iSpan); iVertex++) { /*--- using the other vertex information for retrieving some information ---*/ oldVertex = geometry->turbovertex[val_marker][iSpan][iVertex]->GetOldVertex(); @@ -15359,10 +15351,7 @@ void CNSSolver::Viscous_Residual(CGeometry *geometry, CSolver **solver_container /*--- Implicit part ---*/ if (implicit) { - Jacobian.SubtractBlock(iPoint, iPoint, Jacobian_i); - Jacobian.SubtractBlock(iPoint, jPoint, Jacobian_j); - Jacobian.AddBlock(jPoint, iPoint, Jacobian_i); - Jacobian.AddBlock(jPoint, jPoint, Jacobian_j); + Jacobian.UpdateBlocks(iEdge, iPoint, jPoint, Jacobian_i, Jacobian_j); } } diff --git a/SU2_CFD/src/solver_direct_mean_inc.cpp b/SU2_CFD/src/solver_direct_mean_inc.cpp index 42f73a564b5e..12ff1babeee6 100644 --- a/SU2_CFD/src/solver_direct_mean_inc.cpp +++ b/SU2_CFD/src/solver_direct_mean_inc.cpp @@ -1859,10 +1859,7 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co /*--- Store implicit contributions from the residual calculation. ---*/ if (implicit) { - Jacobian.AddBlock(iPoint, iPoint, Jacobian_i); - Jacobian.AddBlock(iPoint, jPoint, Jacobian_j); - Jacobian.SubtractBlock(jPoint, iPoint, Jacobian_i); - Jacobian.SubtractBlock(jPoint, jPoint, Jacobian_j); + Jacobian.UpdateBlocks(iEdge, iPoint, jPoint, Jacobian_i, Jacobian_j); } } @@ -2008,10 +2005,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont /*--- Set implicit Jacobians ---*/ if (implicit) { - Jacobian.AddBlock(iPoint, iPoint, Jacobian_i); - Jacobian.AddBlock(iPoint, jPoint, Jacobian_j); - Jacobian.SubtractBlock(jPoint, iPoint, Jacobian_i); - Jacobian.SubtractBlock(jPoint, jPoint, Jacobian_j); + Jacobian.UpdateBlocks(iEdge, iPoint, jPoint, Jacobian_i, Jacobian_j); } } @@ -7718,10 +7712,7 @@ void CIncNSSolver::Viscous_Residual(CGeometry *geometry, CSolver **solver_contai /*--- Implicit part ---*/ if (implicit) { - Jacobian.SubtractBlock(iPoint, iPoint, Jacobian_i); - Jacobian.SubtractBlock(iPoint, jPoint, Jacobian_j); - Jacobian.AddBlock(jPoint, iPoint, Jacobian_i); - Jacobian.AddBlock(jPoint, jPoint, Jacobian_j); + Jacobian.UpdateBlocks(iEdge, iPoint, jPoint, Jacobian_i, Jacobian_j); } } diff --git a/SU2_CFD/src/solver_direct_turbulent.cpp b/SU2_CFD/src/solver_direct_turbulent.cpp index 2f4d33d7c634..92e797b764a8 100644 --- a/SU2_CFD/src/solver_direct_turbulent.cpp +++ b/SU2_CFD/src/solver_direct_turbulent.cpp @@ -193,10 +193,7 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe /*--- Implicit part ---*/ - Jacobian.AddBlock(iPoint, iPoint, Jacobian_i); - Jacobian.AddBlock(iPoint, jPoint, Jacobian_j); - Jacobian.SubtractBlock(jPoint, iPoint, Jacobian_i); - Jacobian.SubtractBlock(jPoint, jPoint, Jacobian_j); + Jacobian.UpdateBlocks(iEdge, iPoint, jPoint, Jacobian_i, Jacobian_j); } @@ -242,10 +239,7 @@ void CTurbSolver::Viscous_Residual(CGeometry *geometry, CSolver **solver_contain LinSysRes.SubtractBlock(iPoint, Residual); LinSysRes.AddBlock(jPoint, Residual); - Jacobian.SubtractBlock(iPoint, iPoint, Jacobian_i); - Jacobian.SubtractBlock(iPoint, jPoint, Jacobian_j); - Jacobian.AddBlock(jPoint, iPoint, Jacobian_i); - Jacobian.AddBlock(jPoint, jPoint, Jacobian_j); + Jacobian.UpdateBlocks(iEdge, iPoint, jPoint, Jacobian_i, Jacobian_j); } @@ -2057,8 +2051,7 @@ void CTurbSASolver::BC_ActDisk(CGeometry *geometry, CSolver **solver_container, void CTurbSASolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics, CNumerics *visc_numerics, CConfig *config, unsigned short val_marker) { unsigned short iDim, iSpan; - unsigned long oldVertex, iPoint, Point_Normal; - long iVertex; + unsigned long oldVertex, iPoint, Point_Normal, iVertex; su2double *V_inlet, *V_domain, *Normal; su2double extAverageNu; Normal = new su2double[nDim]; @@ -2072,7 +2065,7 @@ void CTurbSASolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_c /*--- Loop over all the vertices on this boundary marker ---*/ - for (iVertex = 0; iVertex < geometry->nVertexSpan[val_marker][iSpan]; iVertex++) { + for (iVertex = 0; iVertex < geometry->GetnVertexSpan(val_marker,iSpan); iVertex++) { /*--- find the node related to the vertex ---*/ iPoint = geometry->turbovertex[val_marker][iSpan][iVertex]->GetNode(); @@ -2163,8 +2156,7 @@ void CTurbSASolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_c void CTurbSASolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics, CNumerics *visc_numerics, CConfig *config, unsigned short val_marker) { unsigned short iDim, iSpan; - unsigned long oldVertex, iPoint, Point_Normal; - long iVertex; + unsigned long oldVertex, iPoint, Point_Normal, iVertex; su2double *V_inlet, *V_domain, *Normal; su2double rho, pressure, muLam, Factor_nu_Inf, nu_tilde; @@ -2189,7 +2181,7 @@ void CTurbSASolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contain /*--- Loop over all the vertices on this boundary marker ---*/ - for (iVertex = 0; iVertex < geometry->nVertexSpan[val_marker][iSpan]; iVertex++) { + for (iVertex = 0; iVertex < geometry->GetnVertexSpan(val_marker,iSpan); iVertex++) { /*--- find the node related to the vertex ---*/ iPoint = geometry->turbovertex[val_marker][iSpan][iVertex]->GetNode(); @@ -4039,8 +4031,7 @@ void CTurbSSTSolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_ unsigned short val_marker) { unsigned short iVar, iSpan, iDim; - unsigned long oldVertex, iPoint, Point_Normal; - long iVertex; + unsigned long oldVertex, iPoint, Point_Normal, iVertex; su2double *V_inlet, *V_domain, *Normal; su2double extAverageKine, extAverageOmega; unsigned short nSpanWiseSections = config->GetnSpanWiseSections(); @@ -4057,7 +4048,7 @@ void CTurbSSTSolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_ /*--- Loop over all the vertices on this boundary marker ---*/ - for (iVertex = 0; iVertex < geometry->nVertexSpan[val_marker][iSpan]; iVertex++) { + for (iVertex = 0; iVertex < geometry->GetnVertexSpan(val_marker,iSpan); iVertex++) { /*--- find the node related to the vertex ---*/ iPoint = geometry->turbovertex[val_marker][iSpan][iVertex]->GetNode(); @@ -4141,8 +4132,7 @@ void CTurbSSTSolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contai unsigned short val_marker) { unsigned short iVar, iSpan, iDim; - unsigned long oldVertex, iPoint, Point_Normal; - long iVertex; + unsigned long oldVertex, iPoint, Point_Normal, iVertex; su2double *V_inlet, *V_domain, *Normal; unsigned short nSpanWiseSections = config->GetnSpanWiseSections(); @@ -4182,7 +4172,7 @@ void CTurbSSTSolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contai omega_b = rho*kine/(muLam*viscRatio); /*--- Loop over all the vertices on this boundary marker ---*/ - for (iVertex = 0; iVertex < geometry->nVertexSpan[val_marker][iSpan]; iVertex++) { + for (iVertex = 0; iVertex < geometry->GetnVertexSpan(val_marker,iSpan); iVertex++) { /*--- find the node related to the vertex ---*/ iPoint = geometry->turbovertex[val_marker][iSpan][iVertex]->GetNode(); diff --git a/SU2_DEF/include/SU2_DEF.hpp b/SU2_DEF/include/SU2_DEF.hpp index 596655e00c92..ff0da4a73ad2 100644 --- a/SU2_DEF/include/SU2_DEF.hpp +++ b/SU2_DEF/include/SU2_DEF.hpp @@ -30,6 +30,7 @@ #pragma once #include "../../Common/include/mpi_structure.hpp" +#include "../../Common/include/omp_structure.hpp" #include #include diff --git a/SU2_DEF/src/SU2_DEF.cpp b/SU2_DEF/src/SU2_DEF.cpp index 5922cb812f99..8d2b0e2fe15b 100644 --- a/SU2_DEF/src/SU2_DEF.cpp +++ b/SU2_DEF/src/SU2_DEF.cpp @@ -40,7 +40,12 @@ int main(int argc, char *argv[]) { /*--- MPI initialization ---*/ #ifdef HAVE_MPI - SU2_MPI::Init(&argc,&argv); +#ifdef HAVE_OMP + int provided; + SU2_MPI::Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); +#else + SU2_MPI::Init(&argc, &argv); +#endif SU2_MPI::Comm MPICommunicator(MPI_COMM_WORLD); #else SU2_Comm MPICommunicator(0); diff --git a/SU2_DOT/include/SU2_DOT.hpp b/SU2_DOT/include/SU2_DOT.hpp index 09dfe1ab2647..b2c2e4e28479 100644 --- a/SU2_DOT/include/SU2_DOT.hpp +++ b/SU2_DOT/include/SU2_DOT.hpp @@ -30,6 +30,7 @@ #pragma once #include "../../Common/include/mpi_structure.hpp" +#include "../../Common/include/omp_structure.hpp" #include #include diff --git a/SU2_DOT/src/SU2_DOT.cpp b/SU2_DOT/src/SU2_DOT.cpp index 12d5b0eb5971..66e251d84861 100644 --- a/SU2_DOT/src/SU2_DOT.cpp +++ b/SU2_DOT/src/SU2_DOT.cpp @@ -45,7 +45,12 @@ int main(int argc, char *argv[]) { /*--- MPI initialization, and buffer setting ---*/ #ifdef HAVE_MPI - SU2_MPI::Init(&argc,&argv); +#ifdef HAVE_OMP + int provided; + SU2_MPI::Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); +#else + SU2_MPI::Init(&argc, &argv); +#endif SU2_MPI::Comm MPICommunicator(MPI_COMM_WORLD); #else SU2_Comm MPICommunicator(0); diff --git a/TestCases/parallel_regression_AD.py b/TestCases/parallel_regression_AD.py index 1564515e38f2..b23394be7ac2 100644 --- a/TestCases/parallel_regression_AD.py +++ b/TestCases/parallel_regression_AD.py @@ -229,7 +229,7 @@ def main(): discadj_fea.cfg_dir = "disc_adj_fea" discadj_fea.cfg_file = "configAD_fem.cfg" discadj_fea.test_iter = 9 - discadj_fea.test_vals = [-6.492475, -6.401201, -0.000364, -8.708700] #last 4 columns + discadj_fea.test_vals = [-6.070230, -6.262517, -0.000364, -8.708700] #last 4 columns discadj_fea.su2_exec = "parallel_computation.py -f" discadj_fea.timeout = 1600 discadj_fea.tol = 0.00001 diff --git a/TestCases/pastix_support/readme.txt b/TestCases/pastix_support/readme.txt index a2af3ff3a4d9..04fe6f19b349 100644 --- a/TestCases/pastix_support/readme.txt +++ b/TestCases/pastix_support/readme.txt @@ -2,7 +2,7 @@ % SU2 configuration file % % PaStiX support build instructions. % % Institution: Imperial College London % -% File Version 7.0.0 "Blackbird" % +% File Version 7.0.0 "Blackbird" % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % 1 - Download diff --git a/TestCases/serial_regression_AD.py b/TestCases/serial_regression_AD.py index 86f241a6838b..ab2d9f613812 100644 --- a/TestCases/serial_regression_AD.py +++ b/TestCases/serial_regression_AD.py @@ -214,7 +214,7 @@ def main(): discadj_fea.cfg_dir = "disc_adj_fea" discadj_fea.cfg_file = "configAD_fem.cfg" discadj_fea.test_iter = 9 - discadj_fea.test_vals = [-6.319841, -6.375512, -0.000364, -8.708700] #last 4 columns + discadj_fea.test_vals = [-6.352150, -6.402687, -0.000364, -8.708700] #last 4 columns discadj_fea.su2_exec = "SU2_CFD_AD" discadj_fea.timeout = 1600 discadj_fea.tol = 0.00001