-
Notifications
You must be signed in to change notification settings - Fork 922
Hybrid Parallel AD (Part 3/?) #1294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bb27a0b
183c3ca
f501dc1
3b0ebd3
6f3c86a
d5f8ac9
53bd274
0fe1e67
5017a90
b7b3dd7
967704c
0a72b67
c38bf14
b61684b
781092a
77aa7d0
742118d
9b09003
1d2c206
a573f9a
bc90f74
cba486d
8e7a9c6
e03f11b
15d3666
e10abcc
2726ca6
f8fe252
ab91794
d8656aa
ac18c09
3c84ad1
c8ff857
5901d8b
fcc39ce
028d1e0
7acc44f
7586e7c
2830dea
25ba4e3
3b4a018
a9466bb
1ce5115
e81a8ff
3f81059
d64d620
597c637
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -319,7 +319,11 @@ void CFVMFlowSolverBase<V, R>::HybridParallelInitialization(const CConfig& confi | |
| cout << "WARNING: On " << numRanksUsingReducer << " MPI ranks the coloring efficiency was less than " | ||
| << COLORING_EFF_THRESH << " (min value was " << minEff << ").\n" | ||
| << " Those ranks will now use a fallback strategy, better performance may be possible\n" | ||
| << " with a different value of config option EDGE_COLORING_GROUP_SIZE (default 512)." << endl; | ||
| << " with a different value of config option EDGE_COLORING_GROUP_SIZE (default 512)." | ||
| #ifdef HAVE_OPDI | ||
| << "\n The memory usage of the discrete adjoint solver is higher when using the fallback." | ||
| #endif | ||
| << endl; | ||
| } | ||
|
|
||
| if (config.GetUseVectorization() && (omp_get_max_threads() > 1) && | ||
|
|
@@ -1531,6 +1535,12 @@ void CFVMFlowSolverBase<V, R>::EdgeFluxResidual(const CGeometry *geometry, | |
| InstantiateEdgeNumerics(solvers, config); | ||
| } | ||
|
|
||
| /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of | ||
| * variables, otherwise switch to the faster adjoint evaluation mode. ---*/ | ||
| bool pausePreacc = false; | ||
| if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation(); | ||
| else AD::StartNoSharedReading(); | ||
|
|
||
| /*--- Loop over edge colors. ---*/ | ||
| for (auto color : EdgeColoring) { | ||
| /*--- Chunk size is at least OMP_MIN_SIZE and a multiple of the color group size. ---*/ | ||
|
|
@@ -1553,6 +1563,10 @@ void CFVMFlowSolverBase<V, R>::EdgeFluxResidual(const CGeometry *geometry, | |
| END_SU2_OMP_FOR | ||
| } | ||
|
|
||
| /*--- Restore preaccumulation and adjoint evaluation state. ---*/ | ||
| AD::ResumePreaccumulation(pausePreacc); | ||
| if (!ReducerStrategy) AD::EndNoSharedReading(); | ||
|
|
||
| if (ReducerStrategy) { | ||
| SumEdgeFluxes(geometry); | ||
| if (config->GetKind_TimeIntScheme() == EULER_IMPLICIT) { | ||
|
|
@@ -1607,6 +1621,8 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry | |
|
|
||
| /*--- Loop over all nodes (excluding halos) ---*/ | ||
|
|
||
| AD::StartNoSharedReading(); | ||
|
|
||
| SU2_OMP_FOR_STAT(omp_chunk_size) | ||
| for (iPoint = 0; iPoint < nPointDomain; iPoint++) { | ||
|
|
||
|
|
@@ -1642,6 +1658,8 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry | |
| } | ||
| END_SU2_OMP_FOR | ||
|
|
||
| AD::EndNoSharedReading(); | ||
|
|
||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some of these shared reading optimizations depend on
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a fair-enough assumption, I added some more over the Primitive loops and removed some over smaller loops where the performance benefit might not justify the increased maintenance. |
||
| } | ||
|
|
||
| else { | ||
|
|
@@ -1719,6 +1737,8 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry | |
| /*--- Loop over all nodes (excluding halos) to compute the remainder | ||
| of the dual time-stepping source term. ---*/ | ||
|
|
||
| AD::StartNoSharedReading(); | ||
|
|
||
| SU2_OMP_FOR_STAT(omp_chunk_size) | ||
| for (iPoint = 0; iPoint < nPointDomain; iPoint++) { | ||
|
|
||
|
|
@@ -1756,6 +1776,8 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry | |
| } | ||
| } | ||
| END_SU2_OMP_FOR | ||
|
|
||
| AD::EndNoSharedReading(); | ||
| } | ||
|
|
||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.