Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
b79d3d9
start generic GG gradient function
pcarruscag Dec 6, 2019
2b9b02e
finish replacing all GG uses by generic function
pcarruscag Dec 7, 2019
f790bc3
generic function for LS gradients
pcarruscag Dec 7, 2019
0863570
delete unused "gradient manipulation" methods
pcarruscag Dec 7, 2019
93d4404
start generic limiter framework
pcarruscag Dec 9, 2019
60ca7e8
finish Venkatakrishnan-Wang
pcarruscag Dec 9, 2019
527b3f1
wrapper function to call limiter computation, compilation in only one…
pcarruscag Dec 9, 2019
658b186
update periodic comm strategy for min/max computation
pcarruscag Dec 9, 2019
174aafc
fix parameters being passed by value
pcarruscag Dec 9, 2019
0f29089
Merge branch 'hybrid_parallel_linear_algebra' into feature_hybrid_par…
pcarruscag Dec 9, 2019
36aca3a
update build systems
pcarruscag Dec 9, 2019
eeda2e4
fix namespace problems for AD
pcarruscag Dec 9, 2019
dc97616
fix AD build, return matrices by ref instead of by pointer
pcarruscag Dec 10, 2019
83fa741
improve consistency wrt edge-loop algorithm
pcarruscag Dec 10, 2019
eac1602
Merge branch 'hybrid_parallel_linear_algebra' into hybrid_parallel_gr…
pcarruscag Dec 21, 2019
8db56f5
Merge branch 'hybrid_parallel_linear_algebra' into hybrid_parallel_gr…
pcarruscag Jan 13, 2020
bf3aea4
Merge branch 'develop' into hybrid_parallel_gradients_limiters
pcarruscag Jan 13, 2020
364b060
update testcases with small changes
pcarruscag Jan 14, 2020
3bedcb2
Merge branch 'develop' into hybrid_parallel_gradients_limiters
pcarruscag Jan 15, 2020
a62519f
small testcase updates after merge with develop
pcarruscag Jan 16, 2020
2332aa6
update transonic stator restart residuals
pcarruscag Jan 16, 2020
49d2c8a
update cont_adj_euler_naca0012 case
pcarruscag Jan 16, 2020
b045156
try to update testcases branch
pcarruscag Jan 16, 2020
f80e9cc
revert changes to docker
pcarruscag Jan 16, 2020
d904867
clearer variable name, change one case to cover Venkat-Wang limiter
pcarruscag Jan 18, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Common/include/omp_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@
*/
inline constexpr int omp_get_max_threads(void) {return 1;}

/*!
* \brief Number of threads in current team.
*/
inline constexpr int omp_get_num_threads(void) {return 1;}

/*!
* \brief Index of current thread, akin to MPI rank.
*/
Expand Down
5 changes: 3 additions & 2 deletions Common/include/option_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1933,6 +1933,7 @@ static const map<string, ENUM_INPUT_REF> Input_Ref_Map = CCreateMap<string, ENUM
* \brief Vertex-based quantities exchanged during periodic marker communications.
*/
enum PERIODIC_QUANTITIES {
PERIODIC_NONE = 99, /*!< \brief No periodic communication required. */
PERIODIC_VOLUME = 1, /*!< \brief Volume communication for summing total CV (periodic only). */
Comment on lines +1936 to 1937
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@economon the AuxVar did not have periodic gradient communications, so I had to add a do-nothing mode.

PERIODIC_NEIGHBORS = 2, /*!< \brief Communication of the number of neighbors for centered schemes (periodic only). */
PERIODIC_RESIDUAL = 3, /*!< \brief Residual and Jacobian communication (periodic only). */
Expand All @@ -1947,9 +1948,9 @@ enum PERIODIC_QUANTITIES {
PERIODIC_LIM_SOL_2 = 12, /*!< \brief Solution limiter communication phase 2 of 2 (periodic only). */
PERIODIC_LIM_PRIM_1 = 13, /*!< \brief Primitive limiter communication phase 1 of 2 (periodic only). */
PERIODIC_LIM_PRIM_2 = 14, /*!< \brief Primitive limiter communication phase 2 of 2 (periodic only). */
PERIODIC_IMPLICIT = 15, /*!< \brief Implicit update communication to ensure consistency across periodic boundaries. */
PERIODIC_IMPLICIT = 15, /*!< \brief Implicit update communication to ensure consistency across periodic boundaries. */
PERIODIC_SOL_ULS = 16, /*!< \brief Solution gradient communication for unwieghted Least Squares (periodic only). */
PERIODIC_PRIM_ULS = 17 /*!< \brief Primitive gradient communication for unweighted Least Squares (periodic only). */
PERIODIC_PRIM_ULS = 17 /*!< \brief Primitive gradient communication for unweighted Least Squares (periodic only). */
};

/*!
Expand Down
66 changes: 66 additions & 0 deletions Common/include/toolboxes/C2DContainer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ class C2DContainer :
using Base::m_allocate;
public:
using Base::size;
using Index = Index_t;
using Scalar = Scalar_t;

private:
/*!
Expand Down Expand Up @@ -486,6 +488,70 @@ class C2DContainer :
};


/*!
* \class C2DDummyLastView
* \brief Helper class, adds dummy trailing dimension to a reference of a
* vector object making it a dummy matrix.
* \note The constness of the object is derived from the template type, but
* we allways keep a reference, never a copy of the associated vector.
*/
template<class T>
struct C2DDummyLastView
{
using Index = typename T::Index;
using Scalar = typename T::Scalar;

T& data;

C2DDummyLastView() = delete;

C2DDummyLastView(T& ref) : data(ref) {}

template<class U = T,
typename std::enable_if<!std::is_const<U>::value, bool>::type = 0>
Scalar& operator() (Index i, Index) noexcept
{
return data(i);
}

const Scalar& operator() (Index i, Index) const noexcept
{
return data(i);
}
};

/*!
* \class C3DDummyMiddleView
* \brief Helper class, adds dummy middle dimension to a reference of a
* matrix object making it a dummy 3D array.
* \note The constness of the object is derived from the template type, but
* we allways keep a reference, never a copy of the associated matrix.
*/
template<class T>
struct C3DDummyMiddleView
{
using Index = typename T::Index;
using Scalar = typename T::Scalar;

T& data;

C3DDummyMiddleView() = delete;

C3DDummyMiddleView(T& ref) : data(ref) {}

template<class U = T,
typename std::enable_if<!std::is_const<U>::value, bool>::type = 0>
Scalar& operator() (Index i, Index, Index k) noexcept
{
return data(i,k);
}

const Scalar& operator() (Index i, Index, Index k) const noexcept
{
return data(i,k);
}
};

/*!
* \brief Useful typedefs with default template parameters
*/
Expand Down
13 changes: 7 additions & 6 deletions Common/src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ if get_option('enable-normal')
common = static_library('SU2Common',
common_src,
install : false,
dependencies : su2_deps,
cpp_args: [default_warning_flags, su2_cpp_args])
dependencies : su2_deps,
cpp_args: [default_warning_flags, su2_cpp_args])

common_dep = declare_dependency(link_with: common,
include_directories : common_include)
endif
Expand All @@ -46,8 +47,8 @@ if get_option('enable-autodiff')
commonAD = static_library('SU2CommonAD',
common_src,
install : false,
dependencies : [su2_deps, codi_dep],
cpp_args: [default_warning_flags, su2_cpp_args, codi_rev_args])
dependencies : [su2_deps, codi_dep],
cpp_args: [default_warning_flags, su2_cpp_args, codi_rev_args])

commonAD_dep = declare_dependency(link_with: commonAD,
include_directories : common_include)
Expand All @@ -59,8 +60,8 @@ if get_option('enable-directdiff')
commonDD = static_library('SU2CommonDD',
common_src,
install : false,
dependencies : [su2_deps, codi_dep],
cpp_args: [default_warning_flags, su2_cpp_args, codi_for_args])
dependencies : [su2_deps, codi_dep],
cpp_args: [default_warning_flags, su2_cpp_args, codi_for_args])

commonDD_dep = declare_dependency(link_with: commonDD,
include_directories : common_include)
Expand Down
189 changes: 189 additions & 0 deletions SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/*!
* \file computeGradientsGreenGauss.hpp
* \brief Generic implementation of Green-Gauss gradient computation.
* \note This allows the same implementation to be used for conservative
* and primitive variables of any solver.
* \author P. Gomes
* \version 7.0.0 "Blackbird"
*
* SU2 Project Website: https://su2code.github.io
*
* The SU2 Project is maintained by the SU2 Foundation
* (http://su2foundation.org)
*
* Copyright 2012-2019, SU2 Contributors (cf. AUTHORS.md)
*
* SU2 is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* SU2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with SU2. If not, see <http://www.gnu.org/licenses/>.
*/

#include "../../../Common/include/omp_structure.hpp"


/*!
* \brief Compute the gradient of a field using the Green-Gauss theorem.
* \note Gradients can be computed only for a contiguous range of variables, defined
* by [varBegin, varEnd[ (e.g. 0,1 computes the gradient of the 1st variable).
* This can be used, for example, to compute only velocity gradients.
* \note The function uses an optional solver object to perform communications, if
* none (nullptr) is provided the function does not fail (the objective of
* this is to improve test-ability).
* \param[in] solver - Optional, solver associated with the field (used only for MPI).
* \param[in] kindMpiComm - Type of MPI communication required.
* \param[in] kindPeriodicComm - Type of periodic communication required.
* \param[in] geometry - Geometric grid properties.
* \param[in] config - Configuration of the problem, used to identify types of boundaries.
* \param[in] field - Generic object implementing operator (iPoint, iVar).
* \param[in] varBegin - Index of first variable for which to compute the gradient.
* \param[in] varEnd - Index of last variable for which to compute the gradient.
* \param[out] gradient - Generic object implementing operator (iPoint, iVar, iDim).
*/
template<class FieldType, class GradientType>
void computeGradientsGreenGauss(CSolver* solver,
MPI_QUANTITIES kindMpiComm,
PERIODIC_QUANTITIES kindPeriodicComm,
CGeometry& geometry,
CConfig& config,
const FieldType& field,
size_t varBegin,
size_t varEnd,
GradientType& gradient)
Comment on lines +52 to +60
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The signature for these functions is a bit long... But by making the solver+mpi optional we can more easily devise unit tests or use the function for some other purpose.
I have also made these function able to compute gradients for only part of the variables, the idea is to use that to recompute only the velocity gradients before computing the turbulence source terms.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the ability to choose variables to compute gradients for. Should trim some unnecessary computations.

{
size_t nPointDomain = geometry.GetnPointDomain();
size_t nDim = geometry.GetnDim();

#ifdef HAVE_OMP
constexpr size_t OMP_MAX_CHUNK = 512;

size_t chunkSize = computeStaticChunkSize(nPointDomain,
omp_get_max_threads(), OMP_MAX_CHUNK);
#endif

/*--- Start OpenMP parallel section. ---*/

SU2_OMP_PARALLEL
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This starts a parallel section, the code within {} that comes after is executed simultaneously by the threads.

{
/*--- For each (non-halo) volume integrate over its faces (edges). ---*/

SU2_OMP_FOR_DYN(chunkSize)
for (size_t iPoint = 0; iPoint < nPointDomain; ++iPoint)
Comment on lines +78 to +79
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This divides the loop over however many threads arrive here, without the worksharing construct the entire loop would be performed by each thread.
For reasons of spatial locality it is beneficial to divide the loop in chunks instead of individual iterations, in this case those chunks are assigned to each thread dynamically, this helps with load balancing but has some overhead. That is why you also see static work division for light loops (e.g. setting an array to 0).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the chunkSize dependent on? Is it hardware? number of variables?

Does simply adding SU2_OMP_FOR_DYN(chunkSize) before the for loop automatically run the loop in chunks? (assuming you add relevant includes and calls beforehand)

Does anything new need to be done in how the code is compiled or run to ensure that multiple threads are used?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The size is usually based on amount of work per loop iteration, if the loop is heavy a small size can be used to improve load balancing, otherwise a relatively large size needs to be used to amortize the parallelization overheads. Currently most sizes are determined such that each thread gets on average the same number of chunks but each chunk is no larger than a maximum size, there is an utility function in omp_structure.hpp for this.

Yes, distributing the work over the threads that arrive at that location, if only one arrives the code still executes properly.

The build system was updated in #843 since that introduces the first solver that is completely hybrid parallel, there is no advantage in using the feature for fluid solvers yet.
Basically you add -Dwith-omp=true to meson, the number of threads per rank can be specified with the -t option when calling SU2_CFD, e.g. mpirun -n 2 --bind-to numa SU2_CFD -t 8 config.cfg if -t is omitted the number of threads is whatever is defined for the environment.

{
auto node = geometry.node[iPoint];

AD::StartPreacc();
AD::SetPreaccIn(node->GetVolume());
AD::SetPreaccIn(node->GetPeriodicVolume());

for (size_t iVar = varBegin; iVar < varEnd; ++iVar)
AD::SetPreaccIn(field(iPoint,iVar));

/*--- Clear the gradient. --*/

for (size_t iVar = varBegin; iVar < varEnd; ++iVar)
for (size_t iDim = 0; iDim < nDim; ++iDim)
gradient(iPoint, iVar, iDim) = 0.0;

/*--- Handle averaging and division by volume in one constant. ---*/

su2double halfOnVol = 0.5 / (node->GetVolume()+node->GetPeriodicVolume());

/*--- Add a contribution due to each neighbor. ---*/

for (size_t iNeigh = 0; iNeigh < node->GetnPoint(); ++iNeigh)
{
size_t iEdge = node->GetEdge(iNeigh);
size_t jPoint = node->GetPoint(iNeigh);

/*--- Determine if edge points inwards or outwards of iPoint.
* If inwards we need to flip the area vector. ---*/

su2double dir = (iPoint == geometry.edge[iEdge]->GetNode(0))? 1.0 : -1.0;
su2double weight = dir * halfOnVol;

const su2double* area = geometry.edge[iEdge]->GetNormal();
AD::SetPreaccIn(area, nDim);

for (size_t iVar = varBegin; iVar < varEnd; ++iVar)
{
AD::SetPreaccIn(field(jPoint,iVar));

su2double flux = weight * (field(iPoint,iVar) + field(jPoint,iVar));

for (size_t iDim = 0; iDim < nDim; ++iDim)
gradient(iPoint, iVar, iDim) += flux * area[iDim];
}

}

for (size_t iVar = varBegin; iVar < varEnd; ++iVar)
for (size_t iDim = 0; iDim < nDim; ++iDim)
AD::SetPreaccOut(gradient(iPoint,iVar,iDim));

AD::EndPreacc();
}

/*--- Add boundary fluxes. ---*/

for (size_t iMarker = 0; iMarker < geometry.GetnMarker(); ++iMarker)
{
if ((config.GetMarker_All_KindBC(iMarker) != INTERNAL_BOUNDARY) &&
(config.GetMarker_All_KindBC(iMarker) != PERIODIC_BOUNDARY))
{
/*--- Work is shared in inner loop as two markers
* may try to update the same point. ---*/

SU2_OMP_FOR_STAT(OMP_MAX_CHUNK)
for (size_t iVertex = 0; iVertex < geometry.GetnVertex(iMarker); ++iVertex)
{
size_t iPoint = geometry.vertex[iMarker][iVertex]->GetNode();
auto node = geometry.node[iPoint];

/*--- Halo points do not need to be considered. ---*/

if (!node->GetDomain()) continue;

su2double volume = node->GetVolume() + node->GetPeriodicVolume();

const su2double* area = geometry.vertex[iMarker][iVertex]->GetNormal();

for (size_t iVar = varBegin; iVar < varEnd; iVar++)
{
su2double flux = field(iPoint,iVar) / volume;

for (size_t iDim = 0; iDim < nDim; iDim++)
gradient(iPoint, iVar, iDim) -= flux * area[iDim];
}
}
}
}

} // end SU2_OMP_PARALLEL

/*--- If no solver was provided we do not communicate ---*/

if (solver == nullptr) return;

/*--- Account for periodic contributions. ---*/

for (size_t iPeriodic = 1; iPeriodic <= config.GetnMarker_Periodic()/2; ++iPeriodic)
{
solver->InitiatePeriodicComms(&geometry, &config, iPeriodic, kindPeriodicComm);
solver->CompletePeriodicComms(&geometry, &config, iPeriodic, kindPeriodicComm);
}

/*--- Obtain the gradients at halo points from the MPI ranks that own them. ---*/

solver->InitiateComms(&geometry, &config, kindMpiComm);
solver->CompleteComms(&geometry, &config, kindMpiComm);

}
Loading