From bb27a0be3a46342f7a7f374e11e25058fac39ec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 1 Jun 2021 14:25:19 +0200 Subject: [PATCH 01/39] CoDiPack update. --- externals/codi | 2 +- meson_scripts/init.py | 2 +- preconfigure.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/externals/codi b/externals/codi index 6a67202a3887..ee2d80cc362f 160000 --- a/externals/codi +++ b/externals/codi @@ -1 +1 @@ -Subproject commit 6a67202a3887c8da490fdfde82bc46507de68692 +Subproject commit ee2d80cc362f26879deead881c79523c113e9e6c diff --git a/meson_scripts/init.py b/meson_scripts/init.py index 4d9a4e35ac3e..c488ab134aa6 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -44,7 +44,7 @@ def init_submodules(method = 'auto'): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = '6a67202a3887c8da490fdfde82bc46507de68692' + sha_version_codi = 'ee2d80cc362f26879deead881c79523c113e9e6c' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' diff --git a/preconfigure.py b/preconfigure.py index 16cd5f307c2f..fda2d95de3f8 100755 --- a/preconfigure.py +++ b/preconfigure.py @@ -287,7 +287,7 @@ def init_codi(argument_dict, modes, mpi_support = False, update = False): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = '6a67202a3887c8da490fdfde82bc46507de68692' + sha_version_codi = 'ee2d80cc362f26879deead881c79523c113e9e6c' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' From 183c3ca7a3235c8293cf2242c66c82d35a3922c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Fri, 11 Jun 2021 13:21:38 +0200 Subject: [PATCH 02/39] CoDiPack tape choice via build options. --- Common/include/code_config.hpp | 20 +++++--------------- meson.build | 14 ++++++++++++-- meson_options.txt | 1 + 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp index c1600b310f42..3bb734a7696c 100644 --- a/Common/include/code_config.hpp +++ b/Common/include/code_config.hpp @@ -79,25 +79,15 @@ using su2conditional_t = typename su2conditional::type; #include "codi.hpp" #include "codi/tools/dataStore.hpp" -#ifndef CODI_INDEX_TAPE -#define CODI_INDEX_TAPE 0 -#endif -#ifndef CODI_PRIMAL_TAPE -#define CODI_PRIMAL_TAPE 0 -#endif -#ifndef CODI_PRIMAL_INDEX_TAPE -#define CODI_PRIMAL_INDEX_TAPE 0 -#endif - #if defined(HAVE_OMP) using su2double = codi::RealReverseIndexParallel; #else -#if CODI_INDEX_TAPE +#if defined(CODI_INDEX_TAPE) using su2double = codi::RealReverseIndex; -#elif CODI_PRIMAL_TAPE -using su2double = codi::RealReversePrimal; -#elif CODI_PRIMAL_INDEX_TAPE -using su2double = codi::RealReversePrimalIndex; +//#elif defined(CODI_PRIMAL_TAPE) +//using su2double = codi::RealReversePrimal; +//#elif defined(CODI_PRIMAL_INDEX_TAPE) +//using su2double = codi::RealReversePrimalIndex; #else using su2double = codi::RealReverse; #endif diff --git a/meson.build b/meson.build index 500ebd87fad1..813f7bcff1b3 100644 --- a/meson.build +++ b/meson.build @@ -53,8 +53,18 @@ endif if get_option('enable-autodiff') or get_option('enable-directdiff') codi_dep = [declare_dependency(include_directories: 'externals/codi/include')] - codi_rev_args = '-DCODI_REVERSE_TYPE' - codi_for_args = '-DCODI_FORWARD_TYPE' + codi_rev_args = ['-DCODI_REVERSE_TYPE'] + codi_for_args = ['-DCODI_FORWARD_TYPE'] +endif + +if get_option('enable-autodiff') + if get_option('codi-tape') == 'JacobianIndex' + codi_rev_args += '-DCODI_INDEX_TAPE' + #elif get_option('codi-tape') == 'PrimalLinear' + # codi_rev_args += '-DCODI_PRIMAL_TAPE' + #elif get_option('codi-tape') == 'PrimalIndex' + # codi_rev_args += '-DCODI_PRIMAL_INDEX_TAPE' + endif endif # add cgns library diff --git a/meson_options.txt b/meson_options.txt index b5d9ccdddc8e..da999c5d29c6 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -19,3 +19,4 @@ option('enable-mixedprec', type : 'boolean', value : false, description: 'use si option('extra-deps', type : 'string', value : '', description: 'comma-separated list of extra (custom) dependencies to add for compilation') option('enable-mpp', type : 'boolean', value : false, description: 'enable Mutation++ support') option('opdi-backend', type : 'combo', choices : ['auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice') +option('codi-tape', type : 'combo', choices : ['JacobianLinear', 'JacobianIndex'], value : 'JacobianLinear', description: 'CoDiPack tape choice') From f501dc168cd7ef4a4d1f81140a0959a9d4382128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Fri, 11 Jun 2021 14:39:52 +0200 Subject: [PATCH 03/39] Fix for the disc_adj_fsi problem. --- SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp index c52d5afe0e05..4af5f24f48cc 100644 --- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp +++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp @@ -898,8 +898,12 @@ void CDiscAdjMultizoneDriver::ComputeAdjoints(unsigned short iZone, bool eval_tr * on the last inner iteration. Structural problems have some minor issue and we * need to evaluate this section on every iteration. ---*/ +#if defined(CODI_INDEX_TAPE) + AD::ComputeAdjoint(TRANSFER, OBJECTIVE_FUNCTION); +#else if (eval_transfer || config_container[iZone]->GetStructuralProblem()) AD::ComputeAdjoint(TRANSFER, OBJECTIVE_FUNCTION); +#endif /*--- Adjoints of dependencies, needed if derivatives of variables * are extracted (e.g. AoA, Mach, etc.) ---*/ From 6f3c86a38a960646f9612cd070a6bd8c1b78a460 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Thu, 24 Jun 2021 17:59:34 +0100 Subject: [PATCH 04/39] work estimate for OpenMP scheduling of preconditioners based on num non zeros --- Common/src/linear_algebra/CSysMatrix.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index 14752136f9d9..92a5bb204b1b 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -185,10 +185,17 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi /*--- This is akin to the row_ptr. ---*/ omp_partitions = new unsigned long [omp_num_parts+1]; - /// TODO: Use a work estimate to produce more balanced partitions. - auto pts_per_part = roundUpDiv(nPointDomain, omp_num_parts); - for(auto part = 0ul; part < omp_num_parts; ++part) - omp_partitions[part] = part * pts_per_part; + /*--- Work estimate based on non-zeros to produce balanced partitions. ---*/ + + const auto row_ptr_prec = ilu_needed? row_ptr_ilu : row_ptr; + const auto nnz_prec = row_ptr_prec[nPointDomain]; + + const auto nnz_per_part = roundUpDiv(nnz_prec, omp_num_parts); + + for (auto iPoint = 1ul, part = 0ul; iPoint < nPointDomain; ++iPoint) { + if (row_ptr_prec[iPoint] >= part*nnz_per_part) + omp_partitions[part++] = iPoint; + } omp_partitions[omp_num_parts] = nPointDomain; /*--- Generate MKL Kernels ---*/ From d5f8ac99b9b907df1a5610079ce4bdf22d5af73e Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Thu, 24 Jun 2021 18:13:21 +0100 Subject: [PATCH 05/39] small fix --- Common/src/linear_algebra/CSysMatrix.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index 92a5bb204b1b..ef60b2464f14 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -192,7 +192,7 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi const auto nnz_per_part = roundUpDiv(nnz_prec, omp_num_parts); - for (auto iPoint = 1ul, part = 0ul; iPoint < nPointDomain; ++iPoint) { + for (auto iPoint = 0ul, part = 0ul; iPoint < nPointDomain; ++iPoint) { if (row_ptr_prec[iPoint] >= part*nnz_per_part) omp_partitions[part++] = iPoint; } From 53bd2742b83e4293575921ae38439f0865c1c648 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Thu, 24 Jun 2021 23:45:33 +0100 Subject: [PATCH 06/39] update tests --- TestCases/hybrid_regression.py | 102 ++++++++++++++++----------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/TestCases/hybrid_regression.py b/TestCases/hybrid_regression.py index af6011b0eaf1..f26712365922 100644 --- a/TestCases/hybrid_regression.py +++ b/TestCases/hybrid_regression.py @@ -79,7 +79,7 @@ def main(): fixedCL_naca0012.cfg_dir = "fixed_cl/naca0012" fixedCL_naca0012.cfg_file = "inv_NACA0012.cfg" fixedCL_naca0012.test_iter = 10 - fixedCL_naca0012.test_vals = [-7.374790, -1.872333, 0.300000, 0.019471] + fixedCL_naca0012.test_vals = [-7.374806, -1.872330, 0.300000, 0.019471] test_list.append(fixedCL_naca0012) # HYPERSONIC FLOW PAST BLUNT BODY @@ -107,7 +107,7 @@ def main(): cylinder.cfg_dir = "navierstokes/cylinder" cylinder.cfg_file = "lam_cylinder.cfg" cylinder.test_iter = 25 - cylinder.test_vals = [-6.765432, -1.297428, 0.019596, 0.310240] + cylinder.test_vals = [-6.765429, -1.297425, 0.019571, 0.310232] test_list.append(cylinder) # Laminar cylinder (low Mach correction) @@ -115,7 +115,7 @@ def main(): cylinder_lowmach.cfg_dir = "navierstokes/cylinder" cylinder_lowmach.cfg_file = "cylinder_lowmach.cfg" cylinder_lowmach.test_iter = 25 - cylinder_lowmach.test_vals = [-6.850130, -1.388096, -0.056203, 108.140819] + cylinder_lowmach.test_vals = [-6.850130, -1.388096, -0.056036, 108.140811] test_list.append(cylinder_lowmach) # 2D Poiseuille flow (body force driven with periodic inlet / outlet) @@ -131,7 +131,7 @@ def main(): poiseuille_profile.cfg_dir = "navierstokes/poiseuille" poiseuille_profile.cfg_file = "profile_poiseuille.cfg" poiseuille_profile.test_iter = 10 - poiseuille_profile.test_vals = [-12.494721, -7.712408, -0.000000, 2.085796] + poiseuille_profile.test_vals = [-12.494752, -7.712204, -0.000000, 2.085796] test_list.append(poiseuille_profile) ########################## @@ -151,7 +151,7 @@ def main(): rae2822_sst.cfg_dir = "rans/rae2822" rae2822_sst.cfg_file = "turb_SST_RAE2822.cfg" rae2822_sst.test_iter = 20 - rae2822_sst.test_vals = [-0.510633, 4.871233, 0.811923, 0.061627] + rae2822_sst.test_vals = [-0.510635, 4.871104, 0.811904, 0.061614] test_list.append(rae2822_sst) # RAE2822 SST_SUST @@ -159,7 +159,7 @@ def main(): rae2822_sst_sust.cfg_dir = "rans/rae2822" rae2822_sst_sust.cfg_file = "turb_SST_SUST_RAE2822.cfg" rae2822_sst_sust.test_iter = 20 - rae2822_sst_sust.test_vals = [-2.430689, 4.871233, 0.811923, 0.061627] + rae2822_sst_sust.test_vals = [-2.430589, 4.871104, 0.811903, 0.061614] test_list.append(rae2822_sst_sust) # Flat plate @@ -175,7 +175,7 @@ def main(): turb_oneram6.cfg_dir = "rans/oneram6" turb_oneram6.cfg_file = "turb_ONERAM6.cfg" turb_oneram6.test_iter = 10 - turb_oneram6.test_vals = [-2.388851, -6.689340, 0.230320, 0.157649] + turb_oneram6.test_vals = [-2.388836, -6.689414, 0.230320, 0.157640] test_list.append(turb_oneram6) # NACA0012 (SA, FUN3D finest grid results: CL=1.0983, CD=0.01242) @@ -183,7 +183,7 @@ def main(): turb_naca0012_sa.cfg_dir = "rans/naca0012" turb_naca0012_sa.cfg_file = "turb_NACA0012_sa.cfg" turb_naca0012_sa.test_iter = 10 - turb_naca0012_sa.test_vals = [-11.531286, -14.899968, 1.064330, 0.019756] + turb_naca0012_sa.test_vals = [-11.531271, -14.899968, 1.064330, 0.019756] test_list.append(turb_naca0012_sa) # NACA0012 (SST, FUN3D finest grid results: CL=1.0840, CD=0.01253) @@ -191,7 +191,7 @@ def main(): turb_naca0012_sst.cfg_dir = "rans/naca0012" turb_naca0012_sst.cfg_file = "turb_NACA0012_sst.cfg" turb_naca0012_sst.test_iter = 10 - turb_naca0012_sst.test_vals = [ -11.450482, -12.797872, -5.863656, 1.049989, 0.019163, -1.856223] + turb_naca0012_sst.test_vals = [-11.450475, -12.797872, -5.863655, 1.049989, 0.019163, -1.856263] test_list.append(turb_naca0012_sst) # NACA0012 (SST_SUST, FUN3D finest grid results: CL=1.0840, CD=0.01253) @@ -199,7 +199,7 @@ def main(): turb_naca0012_sst_sust.cfg_dir = "rans/naca0012" turb_naca0012_sst_sust.cfg_file = "turb_NACA0012_sst_sust.cfg" turb_naca0012_sst_sust.test_iter = 10 - turb_naca0012_sst_sust.test_vals = [-11.367055, -12.640670, -5.746919, 1.005233, 0.019017, -1.913885] + turb_naca0012_sst_sust.test_vals = [-11.367051, -12.640670, -5.746919, 1.005233, 0.019017, -1.913905] test_list.append(turb_naca0012_sst_sust) # NACA0012 (SST, fixed values for turbulence quantities) @@ -207,7 +207,7 @@ def main(): turb_naca0012_sst_fixedvalues.cfg_dir = "rans/naca0012" turb_naca0012_sst_fixedvalues.cfg_file = "turb_NACA0012_sst_fixedvalues.cfg" turb_naca0012_sst_fixedvalues.test_iter = 10 - turb_naca0012_sst_fixedvalues.test_vals = [-5.192492, -9.575904, -1.568271, 1.022569, 0.040527, -2.384883] + turb_naca0012_sst_fixedvalues.test_vals = [-5.192502, -9.575898, -1.568269, 1.022571, 0.040527, -2.384329] test_list.append(turb_naca0012_sst_fixedvalues) # PROPELLER @@ -215,7 +215,7 @@ def main(): propeller.cfg_dir = "rans/propeller" propeller.cfg_file = "propeller.cfg" propeller.test_iter = 10 - propeller.test_vals = [-3.389576, -8.409529, 0.000048, 0.056329] + propeller.test_vals = [-3.389575, -8.409529, 0.000048, 0.056329] test_list.append(propeller) ####################################### @@ -227,7 +227,7 @@ def main(): axi_rans_air_nozzle.cfg_dir = "axisymmetric_rans/air_nozzle" axi_rans_air_nozzle.cfg_file = "air_nozzle.cfg" axi_rans_air_nozzle.test_iter = 10 - axi_rans_air_nozzle.test_vals = [-12.094937, -6.622043, -8.814412, -2.393288] + axi_rans_air_nozzle.test_vals = [-12.094946, -6.622056, -8.814417, -2.393288] test_list.append(axi_rans_air_nozzle) ################################# @@ -240,7 +240,7 @@ def main(): turb_naca0012_sst_restart_mg.cfg_file = "turb_NACA0012_sst_multigrid_restart.cfg" turb_naca0012_sst_restart_mg.test_iter = 20 turb_naca0012_sst_restart_mg.ntest_vals = 5 - turb_naca0012_sst_restart_mg.test_vals = [-7.652983, -7.729472, -1.981061, -0.000015, 0.079061] + turb_naca0012_sst_restart_mg.test_vals = [-7.652987, -7.729472, -1.981061, -0.000015, 0.079061] test_list.append(turb_naca0012_sst_restart_mg) ############################# @@ -252,7 +252,7 @@ def main(): turb_naca0012_1c.cfg_dir = "rans_uq/naca0012" turb_naca0012_1c.cfg_file = "turb_NACA0012_uq_1c.cfg" turb_naca0012_1c.test_iter = 10 - turb_naca0012_1c.test_vals = [-4.980749, 1.139261, 0.244629, -0.112860] + turb_naca0012_1c.test_vals = [-4.980749, 1.139261, 0.244644, -0.112857] test_list.append(turb_naca0012_1c) # NACA0012 2c @@ -260,7 +260,7 @@ def main(): turb_naca0012_2c.cfg_dir = "rans_uq/naca0012" turb_naca0012_2c.cfg_file = "turb_NACA0012_uq_2c.cfg" turb_naca0012_2c.test_iter = 10 - turb_naca0012_2c.test_vals = [-5.483337, 0.968887, 0.212022, -0.120321] + turb_naca0012_2c.test_vals = [-5.483337, 0.968887, 0.212057, -0.120310] test_list.append(turb_naca0012_2c) # NACA0012 3c @@ -268,7 +268,7 @@ def main(): turb_naca0012_3c.cfg_dir = "rans_uq/naca0012" turb_naca0012_3c.cfg_file = "turb_NACA0012_uq_3c.cfg" turb_naca0012_3c.test_iter = 10 - turb_naca0012_3c.test_vals = [-5.584300, 0.931383, 0.205075, -0.120905] + turb_naca0012_3c.test_vals = [-5.584300, 0.931383, 0.205113, -0.120892] test_list.append(turb_naca0012_3c) # NACA0012 p1c1 @@ -276,7 +276,7 @@ def main(): turb_naca0012_p1c1.cfg_dir = "rans_uq/naca0012" turb_naca0012_p1c1.cfg_file = "turb_NACA0012_uq_p1c1.cfg" turb_naca0012_p1c1.test_iter = 10 - turb_naca0012_p1c1.test_vals = [-5.133237, 1.075365, 0.337532, -0.077873] + turb_naca0012_p1c1.test_vals = [-5.133233, 1.075372, 0.337556, -0.077868] test_list.append(turb_naca0012_p1c1) # NACA0012 p1c2 @@ -284,7 +284,7 @@ def main(): turb_naca0012_p1c2.cfg_dir = "rans_uq/naca0012" turb_naca0012_p1c2.cfg_file = "turb_NACA0012_uq_p1c2.cfg" turb_naca0012_p1c2.test_iter = 10 - turb_naca0012_p1c2.test_vals = [-5.554623, 0.943691, 0.226361, -0.116560] + turb_naca0012_p1c2.test_vals = [-5.554619, 0.943693, 0.226386, -0.116553] test_list.append(turb_naca0012_p1c2) ###################################### @@ -305,7 +305,7 @@ def main(): hb_rans_preconditioning.cfg_dir = "harmonic_balance/hb_rans_preconditioning" hb_rans_preconditioning.cfg_file = "davis.cfg" hb_rans_preconditioning.test_iter = 25 - hb_rans_preconditioning.test_vals = [-1.902111, -5.949291, 0.007768, 0.128060] + hb_rans_preconditioning.test_vals = [-1.902111, -5.949288, 0.007768, 0.128060] hb_rans_preconditioning.new_output = False test_list.append(hb_rans_preconditioning) @@ -327,7 +327,7 @@ def main(): inc_nozzle.cfg_dir = "incomp_euler/nozzle" inc_nozzle.cfg_file = "inv_nozzle.cfg" inc_nozzle.test_iter = 20 - inc_nozzle.test_vals = [-5.973103, -4.911802, -0.000195, 0.121643] + inc_nozzle.test_vals = [-5.971249, -4.910844, -0.000196, 0.121635] inc_nozzle.new_output = True test_list.append(inc_nozzle) @@ -340,7 +340,7 @@ def main(): inc_lam_cylinder.cfg_dir = "incomp_navierstokes/cylinder" inc_lam_cylinder.cfg_file = "incomp_cylinder.cfg" inc_lam_cylinder.test_iter = 10 - inc_lam_cylinder.test_vals = [-4.004277, -3.227956, 0.003852, 7.626578] + inc_lam_cylinder.test_vals = [-4.004277, -3.227956, 0.003851, 7.626583] inc_lam_cylinder.new_output = True test_list.append(inc_lam_cylinder) @@ -358,7 +358,7 @@ def main(): inc_poly_cylinder.cfg_dir = "incomp_navierstokes/cylinder" inc_poly_cylinder.cfg_file = "poly_cylinder.cfg" inc_poly_cylinder.test_iter = 20 - inc_poly_cylinder.test_vals = [-7.849071, -2.092548, 0.029423, 1.922053] + inc_poly_cylinder.test_vals = [-7.851512, -2.093420, 0.029974, 1.921595] inc_poly_cylinder.new_output = True test_list.append(inc_poly_cylinder) @@ -367,7 +367,7 @@ def main(): inc_lam_bend.cfg_dir = "incomp_navierstokes/bend" inc_lam_bend.cfg_file = "lam_bend.cfg" inc_lam_bend.test_iter = 10 - inc_lam_bend.test_vals = [-3.438863, -3.102176, -0.017532, -0.193429] + inc_lam_bend.test_vals = [-3.436191, -3.098014, -0.017338, -0.193981] test_list.append(inc_lam_bend) ############################ @@ -379,7 +379,7 @@ def main(): inc_turb_naca0012.cfg_dir = "incomp_rans/naca0012" inc_turb_naca0012.cfg_file = "naca0012.cfg" inc_turb_naca0012.test_iter = 20 - inc_turb_naca0012.test_vals = [-4.788495, -11.040511, 0.000023, 0.309503] + inc_turb_naca0012.test_vals = [-4.788405, -11.040493, 0.000008, 0.309506] inc_turb_naca0012.new_output = True test_list.append(inc_turb_naca0012) @@ -388,7 +388,7 @@ def main(): inc_turb_naca0012_sst_sust.cfg_dir = "incomp_rans/naca0012" inc_turb_naca0012_sst_sust.cfg_file = "naca0012_SST_SUST.cfg" inc_turb_naca0012_sst_sust.test_iter = 20 - inc_turb_naca0012_sst_sust.test_vals = [-7.276424, 0.145861, 0.000003, 0.312011] + inc_turb_naca0012_sst_sust.test_vals = [-7.276424, 0.145860, 0.000003, 0.312011] test_list.append(inc_turb_naca0012_sst_sust) ###################################### @@ -400,7 +400,7 @@ def main(): cavity.cfg_dir = "moving_wall/cavity" cavity.cfg_file = "lam_cavity.cfg" cavity.test_iter = 25 - cavity.test_vals = [-5.627934, -0.164469, 0.051998, 2.547065] + cavity.test_vals = [-5.627934, -0.164469, 0.052000, 2.547063] test_list.append(cavity) # Spinning cylinder @@ -408,7 +408,7 @@ def main(): spinning_cylinder.cfg_dir = "moving_wall/spinning_cylinder" spinning_cylinder.cfg_file = "spinning_cylinder.cfg" spinning_cylinder.test_iter = 25 - spinning_cylinder.test_vals = [-7.996313, -2.601764, 1.510692, 1.493876] + spinning_cylinder.test_vals = [-8.001289, -2.607956, 1.501322, 1.488559] test_list.append(spinning_cylinder) ###################################### @@ -420,7 +420,7 @@ def main(): square_cylinder.cfg_dir = "unsteady/square_cylinder" square_cylinder.cfg_file = "turb_square.cfg" square_cylinder.test_iter = 3 - square_cylinder.test_vals = [-1.162572, 0.066371, 1.399790, 2.220393] + square_cylinder.test_vals = [-1.162564, 0.066401, 1.399788, 2.220402] square_cylinder.unsteady = True test_list.append(square_cylinder) @@ -429,7 +429,7 @@ def main(): sine_gust.cfg_dir = "gust" sine_gust.cfg_file = "inv_gust_NACA0012.cfg" sine_gust.test_iter = 5 - sine_gust.test_vals = [-1.977520, 3.481804, -0.012403, -0.007453] + sine_gust.test_vals = [-1.977520, 3.481804, -0.012402, -0.007454] sine_gust.unsteady = True test_list.append(sine_gust) @@ -438,7 +438,7 @@ def main(): aeroelastic.cfg_dir = "aeroelastic" aeroelastic.cfg_file = "aeroelastic_NACA64A010.cfg" aeroelastic.test_iter = 2 - aeroelastic.test_vals = [0.074447, 0.033116, -0.001649, -0.000127] + aeroelastic.test_vals = [0.074433, 0.033108, -0.001650, -0.000127] aeroelastic.unsteady = True test_list.append(aeroelastic) @@ -465,7 +465,7 @@ def main(): unst_deforming_naca0012.cfg_dir = "disc_adj_euler/naca0012_pitching_def" unst_deforming_naca0012.cfg_file = "inv_NACA0012_pitching_deform.cfg" unst_deforming_naca0012.test_iter = 5 - unst_deforming_naca0012.test_vals = [-3.665128, -3.793593, -3.716506, -3.148308] + unst_deforming_naca0012.test_vals = [-3.665120, -3.793643, -3.716518, -3.148310] unst_deforming_naca0012.unsteady = True test_list.append(unst_deforming_naca0012) @@ -478,7 +478,7 @@ def main(): edge_VW.cfg_dir = "nicf/edge" edge_VW.cfg_file = "edge_VW.cfg" edge_VW.test_iter = 100 - edge_VW.test_vals = [-5.040283, 1.124491, -0.000009, 0.000000] + edge_VW.test_vals = [-5.040287, 1.124488, -0.000009, 0.000000] test_list.append(edge_VW) # Rarefaction shock wave edge_PPR @@ -486,7 +486,7 @@ def main(): edge_PPR.cfg_dir = "nicf/edge" edge_PPR.cfg_file = "edge_PPR.cfg" edge_PPR.test_iter = 100 - edge_PPR.test_vals = [-5.401640, 0.738165, -0.000035, 0.000000] + edge_PPR.test_vals = [-5.401601, 0.738205, -0.000035, 0.000000] test_list.append(edge_PPR) ###################################### @@ -498,7 +498,7 @@ def main(): Jones_tc.cfg_dir = "turbomachinery/APU_turbocharger" Jones_tc.cfg_file = "Jones.cfg" Jones_tc.test_iter = 5 - Jones_tc.test_vals = [-5.279930, 0.379651, 72.212090, 1.277440] + Jones_tc.test_vals = [-5.279930, 0.379651, 72.212100, 1.277439] Jones_tc.new_output = False test_list.append(Jones_tc) @@ -507,7 +507,7 @@ def main(): Jones_tc_rst.cfg_dir = "turbomachinery/APU_turbocharger" Jones_tc_rst.cfg_file = "Jones_rst.cfg" Jones_tc_rst.test_iter = 5 - Jones_tc_rst.test_vals = [-4.625248, -1.568821, 33.995140, 10.181940] + Jones_tc_rst.test_vals = [-4.625251, -1.568824, 33.995140, 10.181940] Jones_tc_rst.new_output = False test_list.append(Jones_tc_rst) @@ -516,7 +516,7 @@ def main(): axial_stage2D.cfg_dir = "turbomachinery/axial_stage_2D" axial_stage2D.cfg_file = "Axial_stage2D.cfg" axial_stage2D.test_iter = 20 - axial_stage2D.test_vals = [-1.933200, 5.379973, 73.357900, 0.925878] + axial_stage2D.test_vals = [-1.933139, 5.380376, 73.357910, 0.925874] axial_stage2D.new_output = False test_list.append(axial_stage2D) @@ -525,7 +525,7 @@ def main(): transonic_stator.cfg_dir = "turbomachinery/transonic_stator_2D" transonic_stator.cfg_file = "transonic_stator.cfg" transonic_stator.test_iter = 20 - transonic_stator.test_vals = [-0.562430, 5.828446, 96.436050, 0.062506] + transonic_stator.test_vals = [-0.565608, 5.833408, 96.476150, 0.062517] transonic_stator.new_output = False test_list.append(transonic_stator) @@ -534,7 +534,7 @@ def main(): transonic_stator_rst.cfg_dir = "turbomachinery/transonic_stator_2D" transonic_stator_rst.cfg_file = "transonic_stator_rst.cfg" transonic_stator_rst.test_iter = 20 - transonic_stator_rst.test_vals = [-6.621626, -0.614366, 5.002986, 0.002951] + transonic_stator_rst.test_vals = [-6.619122, -0.615716, 5.002986, 0.002951] transonic_stator_rst.new_output = False test_list.append(transonic_stator_rst) @@ -547,7 +547,7 @@ def main(): uniform_flow.cfg_dir = "sliding_interface/uniform_flow" uniform_flow.cfg_file = "uniform_NN.cfg" uniform_flow.test_iter = 5 - uniform_flow.test_vals = [5.000000, 0.000000, -0.188748, -10.631530] + uniform_flow.test_vals = [5.000000, 0.000000, -0.188748, -10.631524] uniform_flow.unsteady = True uniform_flow.multizone = True test_list.append(uniform_flow) @@ -557,7 +557,7 @@ def main(): channel_2D.cfg_dir = "sliding_interface/channel_2D" channel_2D.cfg_file = "channel_2D_WA.cfg" channel_2D.test_iter = 2 - channel_2D.test_vals = [2.000000, 0.000000, 0.398089, 0.352762, 0.405397] + channel_2D.test_vals = [2.000000, 0.000000, 0.397972, 0.352756, 0.405398] channel_2D.unsteady = True channel_2D.multizone = True test_list.append(channel_2D) @@ -567,7 +567,7 @@ def main(): channel_3D.cfg_dir = "sliding_interface/channel_3D" channel_3D.cfg_file = "channel_3D_WA.cfg" channel_3D.test_iter = 2 - channel_3D.test_vals = [2.000000, 0.000000, 0.620151, 0.505156, 0.415292] + channel_3D.test_vals = [2.000000, 0.000000, 0.620149, 0.505190, 0.415133] channel_3D.unsteady = True channel_3D.multizone = True test_list.append(channel_3D) @@ -577,7 +577,7 @@ def main(): pipe.cfg_dir = "sliding_interface/pipe" pipe.cfg_file = "pipe_NN.cfg" pipe.test_iter = 2 - pipe.test_vals = [0.150024, 0.491949, 0.677757, 0.963990, 1.006944] + pipe.test_vals = [0.150024, 0.491949, 0.677759, 0.963991, 1.006947] pipe.unsteady = True pipe.multizone = True test_list.append(pipe) @@ -587,7 +587,7 @@ def main(): rotating_cylinders.cfg_dir = "sliding_interface/rotating_cylinders" rotating_cylinders.cfg_file = "rot_cylinders_WA.cfg" rotating_cylinders.test_iter = 3 - rotating_cylinders.test_vals = [3.000000, 0.000000, 0.777567, 1.134807, 1.224136] + rotating_cylinders.test_vals = [3.000000, 0.000000, 0.777568, 1.134807, 1.224137] rotating_cylinders.unsteady = True rotating_cylinders.multizone = True test_list.append(rotating_cylinders) @@ -597,7 +597,7 @@ def main(): supersonic_vortex_shedding.cfg_dir = "sliding_interface/supersonic_vortex_shedding" supersonic_vortex_shedding.cfg_file = "sup_vor_shed_WA.cfg" supersonic_vortex_shedding.test_iter = 5 - supersonic_vortex_shedding.test_vals = [5.000000, 0.000000, 1.216554, 1.639121] + supersonic_vortex_shedding.test_vals = [5.000000, 0.000000, 1.216554, 1.639119] supersonic_vortex_shedding.unsteady = True supersonic_vortex_shedding.multizone = True test_list.append(supersonic_vortex_shedding) @@ -607,7 +607,7 @@ def main(): bars_SST_2D.cfg_dir = "sliding_interface/bars_SST_2D" bars_SST_2D.cfg_file = "bars.cfg" bars_SST_2D.test_iter = 13 - bars_SST_2D.test_vals = [13.000000, -0.619686, -1.564594] + bars_SST_2D.test_vals = [13.000000, -0.619686, -1.564595] bars_SST_2D.multizone = True test_list.append(bars_SST_2D) @@ -616,7 +616,7 @@ def main(): slinc_steady.cfg_dir = "sliding_interface/incompressible_steady" slinc_steady.cfg_file = "config.cfg" slinc_steady.test_iter = 19 - slinc_steady.test_vals = [19.000000, -1.800461, -2.115195] #last 3 columns + slinc_steady.test_vals = [19.000000, -1.800401, -2.114687] slinc_steady.multizone = True test_list.append(slinc_steady) @@ -646,7 +646,7 @@ def main(): fsi2d.cfg_dir = "fea_fsi/WallChannel_2d" fsi2d.cfg_file = "configFSI.cfg" fsi2d.test_iter = 4 - fsi2d.test_vals = [4, 0, -3.743230, -4.133462] + fsi2d.test_vals = [4.000000, 0.000000, -3.743227, -4.133479] fsi2d.multizone= True fsi2d.unsteady = True test_list.append(fsi2d) @@ -656,7 +656,7 @@ def main(): stat_fsi.cfg_dir = "fea_fsi/stat_fsi" stat_fsi.cfg_file = "config.cfg" stat_fsi.test_iter = 7 - stat_fsi.test_vals = [-3.242851, -4.866383, 0.000000, 11] + stat_fsi.test_vals = [-5.403596, -5.722583, 0.000000, 10.000000] stat_fsi.multizone = True test_list.append(stat_fsi) @@ -665,7 +665,7 @@ def main(): dyn_fsi.cfg_dir = "fea_fsi/dyn_fsi" dyn_fsi.cfg_file = "config.cfg" dyn_fsi.test_iter = 4 - dyn_fsi.test_vals = [-4.355806, -4.060581, 5.3837e-08, 100] + dyn_fsi.test_vals = [-4.355806, -4.060582, 0.000000, 102.000000] dyn_fsi.multizone = True dyn_fsi.unsteady = True test_list.append(dyn_fsi) @@ -675,7 +675,7 @@ def main(): stat_fsi_restart.cfg_dir = "fea_fsi/stat_fsi" stat_fsi_restart.cfg_file = "config_restart.cfg" stat_fsi_restart.test_iter = 1 - stat_fsi_restart.test_vals = [-3.474239, -4.250710, 0.000000, 36.000000] + stat_fsi_restart.test_vals = [-3.474082, -4.242343, 0.000000, 37.000000] stat_fsi_restart.multizone = True test_list.append(stat_fsi_restart) From 0fe1e679fba14c3ce6efc021212ab7b0fb2c79b6 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 25 Jun 2021 00:08:30 +0100 Subject: [PATCH 07/39] add hybrid AD regressions --- .github/workflows/regression.yml | 4 +- TestCases/hybrid_regression.py | 2 +- TestCases/hybrid_regression_AD.py | 250 ++++++++++++++++++++++++++++++ 3 files changed, 254 insertions(+), 2 deletions(-) create mode 100644 TestCases/hybrid_regression_AD.py diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index 3d6c73084312..287eb8a8eb27 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -60,7 +60,7 @@ jobs: strategy: fail-fast: false matrix: - testscript: ['tutorials.py', 'parallel_regression.py', 'parallel_regression_AD.py', 'serial_regression.py', 'serial_regression_AD.py', 'hybrid_regression.py'] + testscript: ['tutorials.py', 'parallel_regression.py', 'parallel_regression_AD.py', 'serial_regression.py', 'serial_regression_AD.py', 'hybrid_regression.py', 'hybrid_regression_AD.py'] include: - testscript: 'tutorials.py' tag: MPI @@ -74,6 +74,8 @@ jobs: tag: NoMPI - testscript: 'hybrid_regression.py' tag: OMP + - testscript: 'hybrid_regression_AD.py' + tag: OMP steps: - name: Download All artifact uses: actions/download-artifact@v2 diff --git a/TestCases/hybrid_regression.py b/TestCases/hybrid_regression.py index f26712365922..63c661f67516 100644 --- a/TestCases/hybrid_regression.py +++ b/TestCases/hybrid_regression.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -## \file parallel_regression.py +## \file hybrid_regression.py # \brief Python script for automated regression testing of SU2 examples # \author A. Aranake, A. Campos, T. Economon, T. Lukaczyk, S. Padron # \version 7.1.1 "Blackbird" diff --git a/TestCases/hybrid_regression_AD.py b/TestCases/hybrid_regression_AD.py new file mode 100644 index 000000000000..218ebad3827b --- /dev/null +++ b/TestCases/hybrid_regression_AD.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python + +## \file hybrid_regression_AD.py +# \brief Python script for automated regression testing of SU2 examples +# \author A. Aranake, A. Campos, T. Economon, T. Lukaczyk, S. Padron +# \version 7.1.1 "Blackbird" +# +# SU2 Project Website: https://su2code.github.io +# +# The SU2 Project is maintained by the SU2 Foundation +# (http://su2foundation.org) +# +# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) +# +# SU2 is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# SU2 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with SU2. If not, see . + +# make print(*args) function available in PY2.6+, does'nt work on PY < 2.6 +from __future__ import print_function + +import sys +from TestCase import TestCase + +def main(): + '''This program runs SU2 and ensures that the output matches specified values. + This will be used to do checks when code is pushed to github + to make sure nothing is broken. ''' + + test_list = [] + + ##################################### + ### Disc. adj. compressible Euler ### + ##################################### + + # Inviscid NACA0012 + discadj_naca0012 = TestCase('discadj_naca0012') + discadj_naca0012.cfg_dir = "cont_adj_euler/naca0012" + discadj_naca0012.cfg_file = "inv_NACA0012_discadj.cfg" + discadj_naca0012.test_iter = 100 + discadj_naca0012.test_vals = [-3.561506, -8.926634, -0.000000, 0.005587] + test_list.append(discadj_naca0012) + + # Inviscid Cylinder 3D (multiple markers) + discadj_cylinder3D = TestCase('discadj_cylinder3D') + discadj_cylinder3D.cfg_dir = "disc_adj_euler/cylinder3D" + discadj_cylinder3D.cfg_file = "inv_cylinder3D.cfg" + discadj_cylinder3D.test_iter = 5 + discadj_cylinder3D.test_vals = [-3.734502, -3.839637, 0.000000, 0.000000] + test_list.append(discadj_cylinder3D) + + # Arina nozzle 2D + discadj_arina2k = TestCase('discadj_arina2k') + discadj_arina2k.cfg_dir = "disc_adj_euler/arina2k" + discadj_arina2k.cfg_file = "Arina2KRS.cfg" + discadj_arina2k.test_iter = 20 + discadj_arina2k.test_vals = [2.189902, 1.635938, 47258.000000, 0.000000] + test_list.append(discadj_arina2k) + + #################################### + ### Disc. adj. compressible RANS ### + #################################### + + # Adjoint turbulent NACA0012 SA + discadj_rans_naca0012_sa = TestCase('discadj_rans_naca0012_sa') + discadj_rans_naca0012_sa.cfg_dir = "disc_adj_rans/naca0012" + discadj_rans_naca0012_sa.cfg_file = "turb_NACA0012_sa.cfg" + discadj_rans_naca0012_sa.test_iter = 10 + discadj_rans_naca0012_sa.test_vals = [-2.230578, 0.696567, 0.181590, -0.000018] + test_list.append(discadj_rans_naca0012_sa) + + # Adjoint turbulent NACA0012 SST + discadj_rans_naca0012_sst = TestCase('discadj_rans_naca0012_sst') + discadj_rans_naca0012_sst.cfg_dir = "disc_adj_rans/naca0012" + discadj_rans_naca0012_sst.cfg_file = "turb_NACA0012_sst.cfg" + discadj_rans_naca0012_sst.test_iter = 10 + discadj_rans_naca0012_sst.test_vals = [-2.221792, -0.491538, 0.182010, -0.000018] + test_list.append(discadj_rans_naca0012_sst) + + ####################################### + ### Disc. adj. incompressible Euler ### + ####################################### + + # Adjoint Incompressible Inviscid NACA0012 + discadj_incomp_NACA0012 = TestCase('discadj_incomp_NACA0012') + discadj_incomp_NACA0012.cfg_dir = "disc_adj_incomp_euler/naca0012" + discadj_incomp_NACA0012.cfg_file = "incomp_NACA0012_disc.cfg" + discadj_incomp_NACA0012.test_iter = 20 + discadj_incomp_NACA0012.test_vals = [20.000000, -4.095412, -2.690483, 0.000000] + test_list.append(discadj_incomp_NACA0012) + + ##################################### + ### Disc. adj. incompressible N-S ### + ##################################### + + # Adjoint Incompressible Viscous Cylinder (Heated) + discadj_incomp_cylinder = TestCase('discadj_incomp_cylinder') + discadj_incomp_cylinder.cfg_dir = "disc_adj_incomp_navierstokes/cylinder" + discadj_incomp_cylinder.cfg_file = "heated_cylinder.cfg" + discadj_incomp_cylinder.test_iter = 20 + discadj_incomp_cylinder.test_vals = [20.000000, -2.195581, -2.162081, 0.000000] + test_list.append(discadj_incomp_cylinder) + + ###################################### + ### Disc. adj. incompressible RANS ### + ###################################### + + # Adjoint Incompressible Turbulent NACA 0012 SA + discadj_incomp_turb_NACA0012_sa = TestCase('discadj_incomp_turb_NACA0012_sa') + discadj_incomp_turb_NACA0012_sa.cfg_dir = "disc_adj_incomp_rans/naca0012" + discadj_incomp_turb_NACA0012_sa.cfg_file = "turb_naca0012_sa.cfg" + discadj_incomp_turb_NACA0012_sa.test_iter = 10 + discadj_incomp_turb_NACA0012_sa.test_vals = [10.000000, -3.846018, -1.031079, 0.000000] + test_list.append(discadj_incomp_turb_NACA0012_sa) + + # Adjoint Incompressible Turbulent NACA 0012 SST + discadj_incomp_turb_NACA0012_sst = TestCase('discadj_incomp_turb_NACA0012_sst') + discadj_incomp_turb_NACA0012_sst.cfg_dir = "disc_adj_incomp_rans/naca0012" + discadj_incomp_turb_NACA0012_sst.cfg_file = "turb_naca0012_sst.cfg" + discadj_incomp_turb_NACA0012_sst.test_iter = 10 + discadj_incomp_turb_NACA0012_sst.test_vals = [-3.845593, -2.413098, -8.419991, 0.000000] + test_list.append(discadj_incomp_turb_NACA0012_sst) + + ####################################################### + ### Unsteady Disc. adj. compressible RANS ### + ####################################################### + + # Turbulent Cylinder + discadj_cylinder = TestCase('unsteady_cylinder') + discadj_cylinder.cfg_dir = "disc_adj_rans/cylinder" + discadj_cylinder.cfg_file = "cylinder.cfg" + discadj_cylinder.test_iter = 9 + discadj_cylinder.test_vals = [3.746909, -1.544883, -0.008321, 0.000014] #last 4 columns + discadj_cylinder.unsteady = True + test_list.append(discadj_cylinder) + + ############################################################## + ### Unsteady Disc. adj. compressible RANS Windowed Average ### + ############################################################## + + # Turbulent Cylinder + discadj_cylinder = TestCase('unsteady_cylinder_windowed_average_AD') + discadj_cylinder.cfg_dir = "disc_adj_rans/cylinder" + discadj_cylinder.cfg_file = "cylinder_Windowing_AD.cfg" + discadj_cylinder.test_iter = 9 + discadj_cylinder.test_vals = [3.004406] #last column + discadj_cylinder.unsteady = True + test_list.append(discadj_cylinder) + + ########################################################################## + ### Unsteady Disc. adj. compressible RANS DualTimeStepping 1st order ### + ########################################################################## + + # Turbulent Cylinder + discadj_DT_1ST_cylinder = TestCase('unsteady_cylinder_DT_1ST') + discadj_DT_1ST_cylinder.cfg_dir = "disc_adj_rans/cylinder_DT_1ST" + discadj_DT_1ST_cylinder.cfg_file = "cylinder.cfg" + discadj_DT_1ST_cylinder.test_iter = 9 + discadj_DT_1ST_cylinder.test_vals = [3.698168, -1.607050, -0.002159, 0.000028] #last 4 columns + discadj_DT_1ST_cylinder.unsteady = True + test_list.append(discadj_DT_1ST_cylinder) + + ###################################################### + ### Unsteady Disc. adj. compressible pitching NACA ### + ###################################################### + + # compressible pitching NACA0012 + discadj_pitchingNACA0012 = TestCase('pitchingNACA0012') + discadj_pitchingNACA0012.cfg_dir = "disc_adj_euler/naca0012_pitching" + discadj_pitchingNACA0012.cfg_file = "inv_NACA0012_pitching.cfg" + discadj_pitchingNACA0012.test_iter = 4 + discadj_pitchingNACA0012.test_vals = [-1.223480, -1.639387, -0.007591, 0.000013] + discadj_pitchingNACA0012.unsteady = True + test_list.append(discadj_pitchingNACA0012) + + ####################################################### + ### Disc. adj. turbomachinery ### + ####################################################### + + # Transonic Stator 2D + discadj_trans_stator = TestCase('transonic_stator') + discadj_trans_stator.cfg_dir = "disc_adj_turbomachinery/transonic_stator_2D" + discadj_trans_stator.cfg_file = "transonic_stator.cfg" + discadj_trans_stator.test_iter = 79 + discadj_trans_stator.test_vals = [79.000000, -1.941681, -1.998327] + test_list.append(discadj_trans_stator) + + ################################### + ### Structural Adjoint ### + ################################### + + # Structural model + discadj_fea = TestCase('discadj_fea') + discadj_fea.cfg_dir = "disc_adj_fea" + discadj_fea.cfg_file = "configAD_fem.cfg" + discadj_fea.test_iter = 4 + discadj_fea.test_vals = [-2.849774, -3.238669, -0.000364, -8.708700] #last 4 columns + test_list.append(discadj_fea) + + ################################### + ### Disc. adj. heat ### + ################################### + + # Discrete adjoint for heated cylinder + discadj_heat = TestCase('discadj_heat') + discadj_heat.cfg_dir = "disc_adj_heat" + discadj_heat.cfg_file = "disc_adj_heat.cfg" + discadj_heat.test_iter = 10 + discadj_heat.test_vals = [-2.280433, 0.714828, -0.743730, -6.767300] + test_list.append(discadj_heat) + + ###################################### + ### RUN TESTS ### + ###################################### + + for test in test_list: + test.su2_exec = "SU2_CFD_AD -t 2" + test.timeout = 600 + test.tol = 1e-4 + #end + + pass_list = [ test.run_test() for test in test_list ] + + # Tests summary + print('==================================================================') + print('Summary of the hybrid parallel AD tests') + print('python version:', sys.version) + for i, test in enumerate(test_list): + if (pass_list[i]): + print(' passed - %s'%test.tag) + else: + print('* FAILED - %s'%test.tag) + + if all(pass_list): + sys.exit(0) + else: + sys.exit(1) + # done + +if __name__ == '__main__': + main() From 5017a905872d1b3b341746f0c42032d586110ab3 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 25 Jun 2021 10:15:07 +0100 Subject: [PATCH 08/39] set reference residuals --- TestCases/hybrid_regression_AD.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/TestCases/hybrid_regression_AD.py b/TestCases/hybrid_regression_AD.py index 218ebad3827b..8559e4a524ba 100644 --- a/TestCases/hybrid_regression_AD.py +++ b/TestCases/hybrid_regression_AD.py @@ -55,7 +55,7 @@ def main(): discadj_cylinder3D.cfg_dir = "disc_adj_euler/cylinder3D" discadj_cylinder3D.cfg_file = "inv_cylinder3D.cfg" discadj_cylinder3D.test_iter = 5 - discadj_cylinder3D.test_vals = [-3.734502, -3.839637, 0.000000, 0.000000] + discadj_cylinder3D.test_vals = [-3.730673, -3.832084, -0.000000, 0.000000] test_list.append(discadj_cylinder3D) # Arina nozzle 2D @@ -63,7 +63,7 @@ def main(): discadj_arina2k.cfg_dir = "disc_adj_euler/arina2k" discadj_arina2k.cfg_file = "Arina2KRS.cfg" discadj_arina2k.test_iter = 20 - discadj_arina2k.test_vals = [2.189902, 1.635938, 47258.000000, 0.000000] + discadj_arina2k.test_vals = [2.107806, 1.574254, 47250.000000, 0.000000] test_list.append(discadj_arina2k) #################################### @@ -75,7 +75,7 @@ def main(): discadj_rans_naca0012_sa.cfg_dir = "disc_adj_rans/naca0012" discadj_rans_naca0012_sa.cfg_file = "turb_NACA0012_sa.cfg" discadj_rans_naca0012_sa.test_iter = 10 - discadj_rans_naca0012_sa.test_vals = [-2.230578, 0.696567, 0.181590, -0.000018] + discadj_rans_naca0012_sa.test_vals = [-2.230632, 0.696530, 0.177890, -0.000016] test_list.append(discadj_rans_naca0012_sa) # Adjoint turbulent NACA0012 SST @@ -83,7 +83,7 @@ def main(): discadj_rans_naca0012_sst.cfg_dir = "disc_adj_rans/naca0012" discadj_rans_naca0012_sst.cfg_file = "turb_NACA0012_sst.cfg" discadj_rans_naca0012_sst.test_iter = 10 - discadj_rans_naca0012_sst.test_vals = [-2.221792, -0.491538, 0.182010, -0.000018] + discadj_rans_naca0012_sst.test_vals = [-2.221793, -0.491367, 0.182000, -0.000018] test_list.append(discadj_rans_naca0012_sst) ####################################### @@ -95,7 +95,7 @@ def main(): discadj_incomp_NACA0012.cfg_dir = "disc_adj_incomp_euler/naca0012" discadj_incomp_NACA0012.cfg_file = "incomp_NACA0012_disc.cfg" discadj_incomp_NACA0012.test_iter = 20 - discadj_incomp_NACA0012.test_vals = [20.000000, -4.095412, -2.690483, 0.000000] + discadj_incomp_NACA0012.test_vals = [20.000000, -4.092007, -2.652751, 0.000000] test_list.append(discadj_incomp_NACA0012) ##################################### @@ -107,7 +107,7 @@ def main(): discadj_incomp_cylinder.cfg_dir = "disc_adj_incomp_navierstokes/cylinder" discadj_incomp_cylinder.cfg_file = "heated_cylinder.cfg" discadj_incomp_cylinder.test_iter = 20 - discadj_incomp_cylinder.test_vals = [20.000000, -2.195581, -2.162081, 0.000000] + discadj_incomp_cylinder.test_vals = [20.000000, -2.705921, -2.837904, 0.000000] test_list.append(discadj_incomp_cylinder) ###################################### @@ -119,7 +119,7 @@ def main(): discadj_incomp_turb_NACA0012_sa.cfg_dir = "disc_adj_incomp_rans/naca0012" discadj_incomp_turb_NACA0012_sa.cfg_file = "turb_naca0012_sa.cfg" discadj_incomp_turb_NACA0012_sa.test_iter = 10 - discadj_incomp_turb_NACA0012_sa.test_vals = [10.000000, -3.846018, -1.031079, 0.000000] + discadj_incomp_turb_NACA0012_sa.test_vals = [10.000000, -3.845995, -1.031097, 0.000000] test_list.append(discadj_incomp_turb_NACA0012_sa) # Adjoint Incompressible Turbulent NACA 0012 SST @@ -127,7 +127,7 @@ def main(): discadj_incomp_turb_NACA0012_sst.cfg_dir = "disc_adj_incomp_rans/naca0012" discadj_incomp_turb_NACA0012_sst.cfg_file = "turb_naca0012_sst.cfg" discadj_incomp_turb_NACA0012_sst.test_iter = 10 - discadj_incomp_turb_NACA0012_sst.test_vals = [-3.845593, -2.413098, -8.419991, 0.000000] + discadj_incomp_turb_NACA0012_sst.test_vals = [-3.845593, -2.414026, -8.420194, 0.000000] test_list.append(discadj_incomp_turb_NACA0012_sst) ####################################################### @@ -139,7 +139,7 @@ def main(): discadj_cylinder.cfg_dir = "disc_adj_rans/cylinder" discadj_cylinder.cfg_file = "cylinder.cfg" discadj_cylinder.test_iter = 9 - discadj_cylinder.test_vals = [3.746909, -1.544883, -0.008321, 0.000014] #last 4 columns + discadj_cylinder.test_vals = [3.746907, -1.544882, -0.008321, 0.000014] discadj_cylinder.unsteady = True test_list.append(discadj_cylinder) @@ -152,7 +152,7 @@ def main(): discadj_cylinder.cfg_dir = "disc_adj_rans/cylinder" discadj_cylinder.cfg_file = "cylinder_Windowing_AD.cfg" discadj_cylinder.test_iter = 9 - discadj_cylinder.test_vals = [3.004406] #last column + discadj_cylinder.test_vals = [3.004402] discadj_cylinder.unsteady = True test_list.append(discadj_cylinder) @@ -165,7 +165,7 @@ def main(): discadj_DT_1ST_cylinder.cfg_dir = "disc_adj_rans/cylinder_DT_1ST" discadj_DT_1ST_cylinder.cfg_file = "cylinder.cfg" discadj_DT_1ST_cylinder.test_iter = 9 - discadj_DT_1ST_cylinder.test_vals = [3.698168, -1.607050, -0.002159, 0.000028] #last 4 columns + discadj_DT_1ST_cylinder.test_vals = [3.698167, -1.607051, -0.002159, 0.000028] discadj_DT_1ST_cylinder.unsteady = True test_list.append(discadj_DT_1ST_cylinder) @@ -178,7 +178,7 @@ def main(): discadj_pitchingNACA0012.cfg_dir = "disc_adj_euler/naca0012_pitching" discadj_pitchingNACA0012.cfg_file = "inv_NACA0012_pitching.cfg" discadj_pitchingNACA0012.test_iter = 4 - discadj_pitchingNACA0012.test_vals = [-1.223480, -1.639387, -0.007591, 0.000013] + discadj_pitchingNACA0012.test_vals = [-1.219713, -1.645717, -0.007513, 0.000013] discadj_pitchingNACA0012.unsteady = True test_list.append(discadj_pitchingNACA0012) @@ -191,7 +191,7 @@ def main(): discadj_trans_stator.cfg_dir = "disc_adj_turbomachinery/transonic_stator_2D" discadj_trans_stator.cfg_file = "transonic_stator.cfg" discadj_trans_stator.test_iter = 79 - discadj_trans_stator.test_vals = [79.000000, -1.941681, -1.998327] + discadj_trans_stator.test_vals = [79.000000, -1.938806, -1.995540] test_list.append(discadj_trans_stator) ################################### @@ -203,7 +203,7 @@ def main(): discadj_fea.cfg_dir = "disc_adj_fea" discadj_fea.cfg_file = "configAD_fem.cfg" discadj_fea.test_iter = 4 - discadj_fea.test_vals = [-2.849774, -3.238669, -0.000364, -8.708700] #last 4 columns + discadj_fea.test_vals = [2.183540, 2.071459, -0.000363, -8.655000] test_list.append(discadj_fea) ################################### @@ -215,7 +215,7 @@ def main(): discadj_heat.cfg_dir = "disc_adj_heat" discadj_heat.cfg_file = "disc_adj_heat.cfg" discadj_heat.test_iter = 10 - discadj_heat.test_vals = [-2.280433, 0.714828, -0.743730, -6.767300] + discadj_heat.test_vals = [-2.264225, 0.711581, -0.722340, -6.653200] test_list.append(discadj_heat) ###################################### From b7b3dd7c7d9e9f6993afd55d14bc3f96fa7a4103 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 25 Jun 2021 12:53:31 +0100 Subject: [PATCH 09/39] remove heat case because heat solver does not have openmp --- TestCases/hybrid_regression_AD.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/TestCases/hybrid_regression_AD.py b/TestCases/hybrid_regression_AD.py index 8559e4a524ba..6e42e45e4c71 100644 --- a/TestCases/hybrid_regression_AD.py +++ b/TestCases/hybrid_regression_AD.py @@ -206,18 +206,6 @@ def main(): discadj_fea.test_vals = [2.183540, 2.071459, -0.000363, -8.655000] test_list.append(discadj_fea) - ################################### - ### Disc. adj. heat ### - ################################### - - # Discrete adjoint for heated cylinder - discadj_heat = TestCase('discadj_heat') - discadj_heat.cfg_dir = "disc_adj_heat" - discadj_heat.cfg_file = "disc_adj_heat.cfg" - discadj_heat.test_iter = 10 - discadj_heat.test_vals = [-2.264225, 0.711581, -0.722340, -6.653200] - test_list.append(discadj_heat) - ###################################### ### RUN TESTS ### ###################################### From 967704c3d134347b6b7b6756d29727bfeaa826fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 28 Jun 2021 23:41:47 +0200 Subject: [PATCH 10/39] Make preaccumulation threadprivate. --- Common/include/basic_types/ad_structure.hpp | 6 ++++++ Common/src/basic_types/ad_structure.cpp | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index 18c430c7b2f7..56085c66452e 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -274,6 +274,9 @@ namespace AD{ extern bool Status; extern bool PreaccActive; +#ifdef HAVE_OPDI + #pragma omp threadprivate(PreaccActive) +#endif extern bool PreaccEnabled; @@ -290,6 +293,9 @@ namespace AD{ extern std::vector TapePositions; extern codi::PreaccumulationHelper PreaccHelper; +#ifdef HAVE_OPDI + #pragma omp threadprivate(PreaccHelper) +#endif /*--- Reference to the tape. ---*/ diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp index f6defb624350..9a95aff7316c 100644 --- a/Common/src/basic_types/ad_structure.cpp +++ b/Common/src/basic_types/ad_structure.cpp @@ -35,9 +35,16 @@ namespace AD { std::vector TapePositions; bool PreaccActive = false; +#ifdef HAVE_OPDI + #pragma omp threadprivate(PreaccActive) +#endif + bool PreaccEnabled = true; codi::PreaccumulationHelper PreaccHelper; +#ifdef HAVE_OPDI + #pragma omp threadprivate(PreaccHelper) +#endif ExtFuncHelper* FuncHelper; From 0a72b6777d8914c2dfefb0d5143644a0c29841fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 28 Jun 2021 23:42:13 +0200 Subject: [PATCH 11/39] Re-enable parallel preaccumulation. --- Common/src/CConfig.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Common/src/CConfig.cpp b/Common/src/CConfig.cpp index a14351100253..1c468370c34e 100644 --- a/Common/src/CConfig.cpp +++ b/Common/src/CConfig.cpp @@ -4461,11 +4461,7 @@ void CConfig::SetPostprocessing(SU2_COMPONENT val_software, unsigned short val_i #if defined CODI_REVERSE_TYPE AD_Mode = YES; -#if defined HAVE_OMP - AD::PreaccEnabled = false; -#else AD::PreaccEnabled = AD_Preaccumulation; -#endif #else if (AD_Mode == YES) { From c38bf14b1788414f04771c124dd72a47ad1df729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 28 Jun 2021 23:42:26 +0200 Subject: [PATCH 12/39] Remove unused variable. --- Common/include/basic_types/ad_structure.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index 56085c66452e..cace6d530ec8 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -271,8 +271,6 @@ namespace AD{ extern ExtFuncHelper* FuncHelper; - extern bool Status; - extern bool PreaccActive; #ifdef HAVE_OPDI #pragma omp threadprivate(PreaccActive) From b61684b8d7a6fe57007402a3d08f6ac0eea4f03d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 28 Jun 2021 23:42:40 +0200 Subject: [PATCH 13/39] PreaccActive was never reset. --- Common/include/basic_types/ad_structure.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index cace6d530ec8..abf28604929a 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -450,6 +450,7 @@ namespace AD{ FORCEINLINE void EndPreacc(){ if (PreaccActive) { PreaccHelper.finish(false); + PreaccActive = false; } } From 781092a3533caf36589426e99293c2b20d332726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 30 Jun 2021 00:31:37 +0200 Subject: [PATCH 14/39] Identify some faulty preaccumulation regions. --- Common/src/geometry/CPhysicalGeometry.cpp | 2 ++ SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp | 2 ++ SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp | 2 ++ SU2_CFD/include/limiters/computeLimiters_impl.hpp | 2 ++ 4 files changed, 8 insertions(+) diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp index 0ac704389c11..47d0412a3d6d 100644 --- a/Common/src/geometry/CPhysicalGeometry.cpp +++ b/Common/src/geometry/CPhysicalGeometry.cpp @@ -7701,7 +7701,9 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh const auto nNodes = bound[iMarker][iElem]->GetnNodes(); +#ifndef HAVE_OPDI AD::StartPreacc(); +#endif /*--- Get pointers to the coordinates of all the element nodes ---*/ array Coord; diff --git a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp index 38934f8a2d9d..aef20cb3ff80 100644 --- a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp +++ b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp @@ -76,7 +76,9 @@ void computeGradientsGreenGauss(CSolver* solver, { auto nodes = geometry.nodes; +#ifndef HAVE_OPDI AD::StartPreacc(); +#endif AD::SetPreaccIn(nodes->GetVolume(iPoint)); AD::SetPreaccIn(nodes->GetPeriodicVolume(iPoint)); diff --git a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp index dcd923901dcb..bad782071d51 100644 --- a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp +++ b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp @@ -203,7 +203,9 @@ void computeGradientsLeastSquares(CSolver* solver, auto nodes = geometry.nodes; const auto coord_i = nodes->GetCoord(iPoint); +#ifndef HAVE_OPDI AD::StartPreacc(); +#endif AD::SetPreaccIn(coord_i, nDim); for (size_t iVar = varBegin; iVar < varEnd; ++iVar) diff --git a/SU2_CFD/include/limiters/computeLimiters_impl.hpp b/SU2_CFD/include/limiters/computeLimiters_impl.hpp index 2876c889f66b..189fd7ada642 100644 --- a/SU2_CFD/include/limiters/computeLimiters_impl.hpp +++ b/SU2_CFD/include/limiters/computeLimiters_impl.hpp @@ -132,7 +132,9 @@ void computeLimiters_impl(CSolver* solver, auto nodes = geometry.nodes; const auto coord_i = nodes->GetCoord(iPoint); +#ifndef HAVE_OPDI AD::StartPreacc(); +#endif AD::SetPreaccIn(coord_i, nDim); for (size_t iVar = varBegin; iVar < varEnd; ++iVar) From 77aa7d0b83ccf58caca44872a0c4df9603c229f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 30 Jun 2021 17:26:15 +0200 Subject: [PATCH 15/39] Disable preaccumulation for parallel boundary numerics. --- SU2_CFD/src/integration/CIntegration.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp index 853172b48bbd..62e7733e12f9 100644 --- a/SU2_CFD/src/integration/CIntegration.cpp +++ b/SU2_CFD/src/integration/CIntegration.cpp @@ -76,6 +76,12 @@ void CIntegration::Space_Integration(CGeometry *geometry, CNumerics* conv_bound_numerics = numerics[CONV_BOUND_TERM + omp_get_thread_num()*MAX_TERMS]; CNumerics* visc_bound_numerics = numerics[VISC_BOUND_TERM + omp_get_thread_num()*MAX_TERMS]; +#ifdef HAVE_OPDI + /* disable preaccumulation for parallel boundary numerics */ + bool preaccEnabled = AD::PreaccEnabled; + AD::PreaccEnabled = false; +#endif + /*--- Boundary conditions that depend on other boundaries (they require MPI sincronization)---*/ solver_container[MainSolver]->BC_Fluid_Interface(geometry, solver_container, conv_bound_numerics, visc_bound_numerics, config); @@ -178,6 +184,10 @@ void CIntegration::Space_Integration(CGeometry *geometry, solver_container[MainSolver]->BC_Periodic(geometry, solver_container, conv_bound_numerics, config); } +#ifdef HAVE_OPDI + AD::PreaccEnabled = preaccEnabled; +#endif + } void CIntegration::Time_Integration(CGeometry *geometry, CSolver **solver_container, CConfig *config, From 742118d255db4d3e17d77242a46368f98f3192f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 30 Jun 2021 17:50:44 +0200 Subject: [PATCH 16/39] Add assert. --- SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp index 4af5f24f48cc..d68f794bdeeb 100644 --- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp +++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp @@ -880,6 +880,10 @@ void CDiscAdjMultizoneDriver::SetAdj_ObjFunction() { void CDiscAdjMultizoneDriver::ComputeAdjoints(unsigned short iZone, bool eval_transfer) { +#if defined(CODI_INDEX_TAPE) || defined(HAVE_OPDI) + assert(nZone <= 1 && "index AD types do not support multiple zones"); +#endif + AD::ClearAdjoints(); /*--- Initialize the adjoints in iZone ---*/ @@ -898,12 +902,8 @@ void CDiscAdjMultizoneDriver::ComputeAdjoints(unsigned short iZone, bool eval_tr * on the last inner iteration. Structural problems have some minor issue and we * need to evaluate this section on every iteration. ---*/ -#if defined(CODI_INDEX_TAPE) - AD::ComputeAdjoint(TRANSFER, OBJECTIVE_FUNCTION); -#else if (eval_transfer || config_container[iZone]->GetStructuralProblem()) AD::ComputeAdjoint(TRANSFER, OBJECTIVE_FUNCTION); -#endif /*--- Adjoints of dependencies, needed if derivatives of variables * are extracted (e.g. AoA, Mach, etc.) ---*/ From 1d2c20619210328a5f63fb9e2676e0405694bc9a Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Mon, 5 Jul 2021 20:23:39 +0100 Subject: [PATCH 17/39] disable preacc when coloring fails --- Common/include/basic_types/ad_structure.hpp | 38 +++++++++++++++++-- Common/include/toolboxes/graph_toolbox.hpp | 2 +- Common/src/basic_types/ad_structure.cpp | 4 +- .../include/solvers/CFVMFlowSolverBase.inl | 14 ++++++- SU2_CFD/src/integration/CIntegration.cpp | 6 +-- SU2_CFD/src/solvers/CEulerSolver.cpp | 8 ++++ SU2_CFD/src/solvers/CIncEulerSolver.cpp | 16 ++++++++ SU2_CFD/src/solvers/CTurbSolver.cpp | 8 ++++ 8 files changed, 85 insertions(+), 11 deletions(-) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index abf28604929a..99c3a9225730 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -252,7 +252,7 @@ namespace AD{ /*! * \brief Start a passive region, i.e. stop recording. - * \return True is tape was active. + * \return True if tape was active. */ inline bool BeginPassive() { return false; } @@ -262,6 +262,18 @@ namespace AD{ */ inline void EndPassive(bool wasActive) {} + /*! + * \brief Pause the use of preaccumulation. + * \return True if preaccumulation was active. + */ + inline bool PausePreaccumulation() { return false; } + + /*! + * \brief Resume the use of preaccumulation. + * \param[in] wasActive - Whether preaccumulation was active before pausing. + */ + inline void ResumePreaccumulation(bool wasActive) {} + #else using CheckpointHandler = codi::DataStore; @@ -273,7 +285,7 @@ namespace AD{ extern bool PreaccActive; #ifdef HAVE_OPDI - #pragma omp threadprivate(PreaccActive) + SU2_OMP(threadprivate(PreaccActive)) #endif extern bool PreaccEnabled; @@ -292,7 +304,7 @@ namespace AD{ extern codi::PreaccumulationHelper PreaccHelper; #ifdef HAVE_OPDI - #pragma omp threadprivate(PreaccHelper) + SU2_OMP(threadprivate(PreaccHelper)) #endif /*--- Reference to the tape. ---*/ @@ -527,6 +539,26 @@ namespace AD{ FORCEINLINE void EndPassive(bool wasActive) { if(wasActive) StartRecording(); } + FORCEINLINE bool PausePreaccumulation() { + const auto current = PreaccEnabled; + if (!current) return false; + SU2_OMP_BARRIER + SU2_OMP_MASTER + PreaccEnabled = false; + END_SU2_OMP_MASTER + SU2_OMP_BARRIER + return true; + } + + FORCEINLINE void ResumePreaccumulation(bool wasActive) { + if (!wasActive) return; + SU2_OMP_BARRIER + SU2_OMP_MASTER + PreaccEnabled = true; + END_SU2_OMP_MASTER + SU2_OMP_BARRIER + } + #endif // CODI_REVERSE_TYPE } // namespace AD diff --git a/Common/include/toolboxes/graph_toolbox.hpp b/Common/include/toolboxes/graph_toolbox.hpp index 9dba7b4d9559..410a9bef4b5c 100644 --- a/Common/include/toolboxes/graph_toolbox.hpp +++ b/Common/include/toolboxes/graph_toolbox.hpp @@ -527,7 +527,7 @@ T createNaturalColoring(Index_t numInnerIndexes) * \param[out] indexColor - Optional, vector with colors given to the outer indices. * \return Coloring in the same type of the input pattern. */ -template +template T colorSparsePattern(const T& pattern, size_t groupSize = 1, bool balanceColors = false, std::vector* indexColor = nullptr) { diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp index 9a95aff7316c..4925466c0fad 100644 --- a/Common/src/basic_types/ad_structure.cpp +++ b/Common/src/basic_types/ad_structure.cpp @@ -36,14 +36,14 @@ namespace AD { bool PreaccActive = false; #ifdef HAVE_OPDI - #pragma omp threadprivate(PreaccActive) + SU2_OMP(threadprivate(PreaccActive)) #endif bool PreaccEnabled = true; codi::PreaccumulationHelper PreaccHelper; #ifdef HAVE_OPDI - #pragma omp threadprivate(PreaccHelper) + SU2_OMP(threadprivate(PreaccHelper)) #endif ExtFuncHelper* FuncHelper; diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl index b4dc537102dc..61e1d0638ed6 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl @@ -319,7 +319,11 @@ void CFVMFlowSolverBase::HybridParallelInitialization(const CConfig& confi cout << "WARNING: On " << numRanksUsingReducer << " MPI ranks the coloring efficiency was less than " << COLORING_EFF_THRESH << " (min value was " << minEff << ").\n" << " Those ranks will now use a fallback strategy, better performance may be possible\n" - << " with a different value of config option EDGE_COLORING_GROUP_SIZE (default 512)." << endl; + << " with a different value of config option EDGE_COLORING_GROUP_SIZE (default 512)." +#ifdef HAVE_OPDI + << "\n The memory usage of the discrete adjoint solver is higher when using the fallback."; +#endif + << endl; } if (config.GetUseVectorization() && (omp_get_max_threads() > 1) && @@ -1535,6 +1539,10 @@ void CFVMFlowSolverBase::EdgeFluxResidual(const CGeometry *geometry, InstantiateEdgeNumerics(solvers, config); } +#ifdef HAVE_OPDI + const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); +#endif + /*--- Loop over edge colors. ---*/ for (auto color : EdgeColoring) { /*--- Chunk size is at least OMP_MIN_SIZE and a multiple of the color group size. ---*/ @@ -1557,6 +1565,10 @@ void CFVMFlowSolverBase::EdgeFluxResidual(const CGeometry *geometry, END_SU2_OMP_FOR } +#ifdef HAVE_OPDI + AD::ResumePreaccumulation(preaccEnabled); +#endif + if (ReducerStrategy) { SumEdgeFluxes(geometry); if (config->GetKind_TimeIntScheme() == EULER_IMPLICIT) { diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp index 62e7733e12f9..ae96feaaac82 100644 --- a/SU2_CFD/src/integration/CIntegration.cpp +++ b/SU2_CFD/src/integration/CIntegration.cpp @@ -77,9 +77,7 @@ void CIntegration::Space_Integration(CGeometry *geometry, CNumerics* visc_bound_numerics = numerics[VISC_BOUND_TERM + omp_get_thread_num()*MAX_TERMS]; #ifdef HAVE_OPDI - /* disable preaccumulation for parallel boundary numerics */ - bool preaccEnabled = AD::PreaccEnabled; - AD::PreaccEnabled = false; + const auto preaccEnabled = AD::PausePreaccumulation(); #endif /*--- Boundary conditions that depend on other boundaries (they require MPI sincronization)---*/ @@ -185,7 +183,7 @@ void CIntegration::Space_Integration(CGeometry *geometry, } #ifdef HAVE_OPDI - AD::PreaccEnabled = preaccEnabled; + AD::ResumePreaccumulation(preaccEnabled); #endif } diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp index 28a274634186..71c89d7f39c0 100644 --- a/SU2_CFD/src/solvers/CEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CEulerSolver.cpp @@ -1992,6 +1992,10 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain su2double Primitive_i[MAXNVAR] = {0.0}, Primitive_j[MAXNVAR] = {0.0}; su2double Secondary_i[MAXNVAR] = {0.0}, Secondary_j[MAXNVAR] = {0.0}; +#ifdef HAVE_OPDI + const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); +#endif + /*--- Loop over edge colors. ---*/ for (auto color : EdgeColoring) { @@ -2176,6 +2180,10 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain END_SU2_OMP_FOR } // end color loop +#ifdef HAVE_OPDI + AD::ResumePreaccumulation(preaccEnabled); +#endif + if (ReducerStrategy) { SumEdgeFluxes(geometry); if (implicit) diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp index 3bfad9f52856..a05861122885 100644 --- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp @@ -1018,6 +1018,10 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co bool implicit = (config->GetKind_TimeIntScheme() == EULER_IMPLICIT); bool jst_scheme = ((config->GetKind_Centered_Flow() == JST) && (iMesh == MESH_0)); +#ifdef HAVE_OPDI + const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); +#endif + /*--- Loop over edge colors. ---*/ for (auto color : EdgeColoring) { @@ -1082,6 +1086,10 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co END_SU2_OMP_FOR } // end color loop +#ifdef HAVE_OPDI + AD::ResumePreaccumulation(preaccEnabled); +#endif + if (ReducerStrategy) { SumEdgeFluxes(geometry); if (implicit) @@ -1110,6 +1118,10 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont const bool limiter = (config->GetKind_SlopeLimit_Flow() != NO_LIMITER); const bool van_albada = (config->GetKind_SlopeLimit_Flow() == VAN_ALBADA_EDGE); +#ifdef HAVE_OPDI + const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); +#endif + /*--- Loop over edge colors. ---*/ for (auto color : EdgeColoring) { @@ -1250,6 +1262,10 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont END_SU2_OMP_FOR } // end color loop +#ifdef HAVE_OPDI + AD::ResumePreaccumulation(preaccEnabled); +#endif + if (ReducerStrategy) { SumEdgeFluxes(geometry); if (implicit) diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp index 64ea6dcf5259..30ddc19cc80b 100644 --- a/SU2_CFD/src/solvers/CTurbSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSolver.cpp @@ -106,6 +106,10 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe su2double solution_i[MAXNVAR] = {0.0}, flowPrimVar_i[MAXNVARFLOW] = {0.0}; su2double solution_j[MAXNVAR] = {0.0}, flowPrimVar_j[MAXNVARFLOW] = {0.0}; +#ifdef HAVE_OPDI + const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); +#endif + /*--- Loop over edge colors. ---*/ for (auto color : EdgeColoring) { @@ -232,6 +236,10 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe END_SU2_OMP_FOR } // end color loop +#ifdef HAVE_OPDI + AD::ResumePreaccumulation(preaccEnabled); +#endif + if (ReducerStrategy) { SumEdgeFluxes(geometry); if (implicit) Jacobian.SetDiagonalAsColumnSum(); From a573f9a5d04e0b6983569dc4692a307b7abdc90f Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Mon, 5 Jul 2021 21:15:21 +0100 Subject: [PATCH 18/39] small fix --- SU2_CFD/include/solvers/CFVMFlowSolverBase.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl index 61e1d0638ed6..689027a55f8f 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl @@ -321,7 +321,7 @@ void CFVMFlowSolverBase::HybridParallelInitialization(const CConfig& confi << " Those ranks will now use a fallback strategy, better performance may be possible\n" << " with a different value of config option EDGE_COLORING_GROUP_SIZE (default 512)." #ifdef HAVE_OPDI - << "\n The memory usage of the discrete adjoint solver is higher when using the fallback."; + << "\n The memory usage of the discrete adjoint solver is higher when using the fallback." #endif << endl; } From bc90f74fbecd8c82327f15b9d1f271930f84c137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 6 Jul 2021 02:51:08 +0200 Subject: [PATCH 19/39] Add shared reading switches. --- Common/include/basic_types/ad_structure.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index 99c3a9225730..72798f5b2119 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -559,6 +559,19 @@ namespace AD{ SU2_OMP_BARRIER } + FORCEINLINE void StartNoSharedReading() { +#ifdef HAVE_OPDI + opdi::logic->setAdjointAccessMode(opdi::LogicInterface::AdjointAccessMode::Classical); + opdi::logic->addReverseBarrier(); +#endif + } + + FORCEINLINE void EndNoSharedReading() { +#ifdef HAVE_OPDI + opdi::logic->setAdjointAccessMode(opdi::LogicInterface::AdjointAccessMode::Atomic); + opdi::logic->addReverseBarrier(); +#endif + } #endif // CODI_REVERSE_TYPE } // namespace AD From cba486d6208e82d1a648f24f17278993bea09cb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 6 Jul 2021 02:53:21 +0200 Subject: [PATCH 20/39] Apply some shared reading optimizations. --- SU2_CFD/include/solvers/CFVMFlowSolverBase.inl | 4 ++++ SU2_CFD/src/solvers/CEulerSolver.cpp | 4 ++++ SU2_CFD/src/solvers/CIncEulerSolver.cpp | 8 ++++++++ SU2_CFD/src/solvers/CTurbSolver.cpp | 4 ++++ 4 files changed, 20 insertions(+) diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl index 689027a55f8f..560101dde56f 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl @@ -1541,6 +1541,8 @@ void CFVMFlowSolverBase::EdgeFluxResidual(const CGeometry *geometry, #ifdef HAVE_OPDI const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); + if (!ReducerStrategy) + AD::StartNoSharedReading(); #endif /*--- Loop over edge colors. ---*/ @@ -1567,6 +1569,8 @@ void CFVMFlowSolverBase::EdgeFluxResidual(const CGeometry *geometry, #ifdef HAVE_OPDI AD::ResumePreaccumulation(preaccEnabled); + if (!ReducerStrategy) + AD::EndNoSharedReading(); #endif if (ReducerStrategy) { diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp index 71c89d7f39c0..906358736553 100644 --- a/SU2_CFD/src/solvers/CEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CEulerSolver.cpp @@ -1994,6 +1994,8 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain #ifdef HAVE_OPDI const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); + if (!ReducerStrategy) + AD::StartNoSharedReading(); #endif /*--- Loop over edge colors. ---*/ @@ -2182,6 +2184,8 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain #ifdef HAVE_OPDI AD::ResumePreaccumulation(preaccEnabled); + if (!ReducerStrategy) + AD::EndNoSharedReading(); #endif if (ReducerStrategy) { diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp index a05861122885..d676ce39d593 100644 --- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp @@ -1020,6 +1020,8 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co #ifdef HAVE_OPDI const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); + if (!ReducerStrategy) + AD::StartNoSharedReading(); #endif /*--- Loop over edge colors. ---*/ @@ -1088,6 +1090,8 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co #ifdef HAVE_OPDI AD::ResumePreaccumulation(preaccEnabled); + if (!ReducerStrategy) + AD::EndNoSharedReading(); #endif if (ReducerStrategy) { @@ -1120,6 +1124,8 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont #ifdef HAVE_OPDI const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); + if (!ReducerStrategy) + AD::StartNoSharedReading(); #endif /*--- Loop over edge colors. ---*/ @@ -1264,6 +1270,8 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont #ifdef HAVE_OPDI AD::ResumePreaccumulation(preaccEnabled); + if (!ReducerStrategy) + AD::EndNoSharedReading(); #endif if (ReducerStrategy) { diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp index 30ddc19cc80b..9d88477e3959 100644 --- a/SU2_CFD/src/solvers/CTurbSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSolver.cpp @@ -108,6 +108,8 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe #ifdef HAVE_OPDI const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); + if (!ReducerStrategy) + AD::StartNoSharedReading(); #endif /*--- Loop over edge colors. ---*/ @@ -238,6 +240,8 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe #ifdef HAVE_OPDI AD::ResumePreaccumulation(preaccEnabled); + if (!ReducerStrategy) + AD::EndNoSharedReading(); #endif if (ReducerStrategy) { From 8e7a9c6fd1a7d385b055b58690d172e96a91c588 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= <55186095+jblueh@users.noreply.github.com> Date: Tue, 6 Jul 2021 13:05:25 +0200 Subject: [PATCH 21/39] Apply suggestions. Co-authored-by: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com> --- Common/include/basic_types/ad_structure.hpp | 10 ++++++ Common/src/geometry/CPhysicalGeometry.cpp | 5 ++- .../gradients/computeGradientsGreenGauss.hpp | 5 ++- .../computeGradientsLeastSquares.hpp | 5 ++- .../include/limiters/computeLimiters_impl.hpp | 5 ++- .../include/solvers/CFVMFlowSolverBase.inl | 18 +++++----- SU2_CFD/src/integration/CIntegration.cpp | 10 +++--- SU2_CFD/src/solvers/CEulerSolver.cpp | 18 +++++----- SU2_CFD/src/solvers/CIncEulerSolver.cpp | 36 +++++++++---------- SU2_CFD/src/solvers/CTurbSolver.cpp | 18 +++++----- 10 files changed, 62 insertions(+), 68 deletions(-) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index 72798f5b2119..d39478835e9c 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -274,6 +274,16 @@ namespace AD{ */ inline void ResumePreaccumulation(bool wasActive) {} + /*! + * \brief Begin a hybrid parallel adjoint evaluation mode that assumes an inherently safe reverse path. + */ + inline void StartNoSharedReading() {} + + /*! + * \brief End the "no shared reading" adjoint evaluation mode. + */ + inline void EndNoSharedReading() {} + #else using CheckpointHandler = codi::DataStore; diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp index 47d0412a3d6d..b770f6b95db4 100644 --- a/Common/src/geometry/CPhysicalGeometry.cpp +++ b/Common/src/geometry/CPhysicalGeometry.cpp @@ -7701,9 +7701,8 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh const auto nNodes = bound[iMarker][iElem]->GetnNodes(); -#ifndef HAVE_OPDI - AD::StartPreacc(); -#endif + /*--- Cannot preaccumulate if hybrid parallel due to shared reading. ---*/ + if (omp_get_num_threads() == 1) AD::StartPreacc(); /*--- Get pointers to the coordinates of all the element nodes ---*/ array Coord; diff --git a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp index aef20cb3ff80..120efce7be56 100644 --- a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp +++ b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp @@ -76,9 +76,8 @@ void computeGradientsGreenGauss(CSolver* solver, { auto nodes = geometry.nodes; -#ifndef HAVE_OPDI - AD::StartPreacc(); -#endif + /*--- Cannot preaccumulate if hybrid parallel due to shared reading. ---*/ + if (omp_get_num_threads() == 1) AD::StartPreacc(); AD::SetPreaccIn(nodes->GetVolume(iPoint)); AD::SetPreaccIn(nodes->GetPeriodicVolume(iPoint)); diff --git a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp index bad782071d51..b960503deb6b 100644 --- a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp +++ b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp @@ -203,9 +203,8 @@ void computeGradientsLeastSquares(CSolver* solver, auto nodes = geometry.nodes; const auto coord_i = nodes->GetCoord(iPoint); -#ifndef HAVE_OPDI - AD::StartPreacc(); -#endif + /*--- Cannot preaccumulate if hybrid parallel due to shared reading. ---*/ + if (omp_get_num_threads() == 1) AD::StartPreacc(); AD::SetPreaccIn(coord_i, nDim); for (size_t iVar = varBegin; iVar < varEnd; ++iVar) diff --git a/SU2_CFD/include/limiters/computeLimiters_impl.hpp b/SU2_CFD/include/limiters/computeLimiters_impl.hpp index 189fd7ada642..b9613621b11d 100644 --- a/SU2_CFD/include/limiters/computeLimiters_impl.hpp +++ b/SU2_CFD/include/limiters/computeLimiters_impl.hpp @@ -132,9 +132,8 @@ void computeLimiters_impl(CSolver* solver, auto nodes = geometry.nodes; const auto coord_i = nodes->GetCoord(iPoint); -#ifndef HAVE_OPDI - AD::StartPreacc(); -#endif + /*--- Cannot preaccumulate if hybrid parallel due to shared reading. ---*/ + if (omp_get_num_threads() == 1) AD::StartPreacc(); AD::SetPreaccIn(coord_i, nDim); for (size_t iVar = varBegin; iVar < varEnd; ++iVar) diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl index 560101dde56f..5434a53588ba 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl @@ -1539,11 +1539,11 @@ void CFVMFlowSolverBase::EdgeFluxResidual(const CGeometry *geometry, InstantiateEdgeNumerics(solvers, config); } -#ifdef HAVE_OPDI - const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); - if (!ReducerStrategy) - AD::StartNoSharedReading(); -#endif + /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of + * variables, otherwise switch to the faster adjoint evaluation mode. ---*/ + bool pausePreacc = false; + if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation(); + else AD::StartNoSharedReading(); /*--- Loop over edge colors. ---*/ for (auto color : EdgeColoring) { @@ -1567,11 +1567,9 @@ void CFVMFlowSolverBase::EdgeFluxResidual(const CGeometry *geometry, END_SU2_OMP_FOR } -#ifdef HAVE_OPDI - AD::ResumePreaccumulation(preaccEnabled); - if (!ReducerStrategy) - AD::EndNoSharedReading(); -#endif + /*--- Restore preaccumulation and adjoint evaluation state. ---*/ + AD::ResumePreaccumulation(pausePreacc); + if (!ReducerStrategy) AD::EndNoSharedReading(); if (ReducerStrategy) { SumEdgeFluxes(geometry); diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp index ae96feaaac82..dd57a372f2c4 100644 --- a/SU2_CFD/src/integration/CIntegration.cpp +++ b/SU2_CFD/src/integration/CIntegration.cpp @@ -76,9 +76,9 @@ void CIntegration::Space_Integration(CGeometry *geometry, CNumerics* conv_bound_numerics = numerics[CONV_BOUND_TERM + omp_get_thread_num()*MAX_TERMS]; CNumerics* visc_bound_numerics = numerics[VISC_BOUND_TERM + omp_get_thread_num()*MAX_TERMS]; -#ifdef HAVE_OPDI - const auto preaccEnabled = AD::PausePreaccumulation(); -#endif + /*--- Pause preaccumulation in boundary conditions for hybrid parallel AD. ---*/ + /// TODO: Check if this is really needed. + const auto pausePreacc = (omp_get_num_threads() > 1) && AD::PausePreaccumulation(); /*--- Boundary conditions that depend on other boundaries (they require MPI sincronization)---*/ @@ -182,9 +182,7 @@ void CIntegration::Space_Integration(CGeometry *geometry, solver_container[MainSolver]->BC_Periodic(geometry, solver_container, conv_bound_numerics, config); } -#ifdef HAVE_OPDI - AD::ResumePreaccumulation(preaccEnabled); -#endif + AD::ResumePreaccumulation(pausePreacc); } diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp index 906358736553..dac3a53c6df2 100644 --- a/SU2_CFD/src/solvers/CEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CEulerSolver.cpp @@ -1992,11 +1992,11 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain su2double Primitive_i[MAXNVAR] = {0.0}, Primitive_j[MAXNVAR] = {0.0}; su2double Secondary_i[MAXNVAR] = {0.0}, Secondary_j[MAXNVAR] = {0.0}; -#ifdef HAVE_OPDI - const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); - if (!ReducerStrategy) - AD::StartNoSharedReading(); -#endif + /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of + * variables, otherwise switch to the faster adjoint evaluation mode. ---*/ + bool pausePreacc = false; + if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation(); + else AD::StartNoSharedReading(); /*--- Loop over edge colors. ---*/ for (auto color : EdgeColoring) @@ -2182,11 +2182,9 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain END_SU2_OMP_FOR } // end color loop -#ifdef HAVE_OPDI - AD::ResumePreaccumulation(preaccEnabled); - if (!ReducerStrategy) - AD::EndNoSharedReading(); -#endif + /*--- Restore preaccumulation and adjoint evaluation state. ---*/ + AD::ResumePreaccumulation(pausePreacc); + if (!ReducerStrategy) AD::EndNoSharedReading(); if (ReducerStrategy) { SumEdgeFluxes(geometry); diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp index d676ce39d593..9c662be50ffe 100644 --- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp @@ -1018,11 +1018,11 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co bool implicit = (config->GetKind_TimeIntScheme() == EULER_IMPLICIT); bool jst_scheme = ((config->GetKind_Centered_Flow() == JST) && (iMesh == MESH_0)); -#ifdef HAVE_OPDI - const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); - if (!ReducerStrategy) - AD::StartNoSharedReading(); -#endif + /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of + * variables, otherwise switch to the faster adjoint evaluation mode. ---*/ + bool pausePreacc = false; + if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation(); + else AD::StartNoSharedReading(); /*--- Loop over edge colors. ---*/ for (auto color : EdgeColoring) @@ -1088,11 +1088,9 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co END_SU2_OMP_FOR } // end color loop -#ifdef HAVE_OPDI - AD::ResumePreaccumulation(preaccEnabled); - if (!ReducerStrategy) - AD::EndNoSharedReading(); -#endif + /*--- Restore preaccumulation and adjoint evaluation state. ---*/ + AD::ResumePreaccumulation(pausePreacc); + if (!ReducerStrategy) AD::EndNoSharedReading(); if (ReducerStrategy) { SumEdgeFluxes(geometry); @@ -1122,11 +1120,11 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont const bool limiter = (config->GetKind_SlopeLimit_Flow() != NO_LIMITER); const bool van_albada = (config->GetKind_SlopeLimit_Flow() == VAN_ALBADA_EDGE); -#ifdef HAVE_OPDI - const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); - if (!ReducerStrategy) - AD::StartNoSharedReading(); -#endif + /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of + * variables, otherwise switch to the faster adjoint evaluation mode. ---*/ + bool pausePreacc = false; + if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation(); + else AD::StartNoSharedReading(); /*--- Loop over edge colors. ---*/ for (auto color : EdgeColoring) @@ -1268,11 +1266,9 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont END_SU2_OMP_FOR } // end color loop -#ifdef HAVE_OPDI - AD::ResumePreaccumulation(preaccEnabled); - if (!ReducerStrategy) - AD::EndNoSharedReading(); -#endif + /*--- Restore preaccumulation and adjoint evaluation state. ---*/ + AD::ResumePreaccumulation(pausePreacc); + if (!ReducerStrategy) AD::EndNoSharedReading(); if (ReducerStrategy) { SumEdgeFluxes(geometry); diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp index 9d88477e3959..63beb0d6b5b3 100644 --- a/SU2_CFD/src/solvers/CTurbSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSolver.cpp @@ -106,11 +106,11 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe su2double solution_i[MAXNVAR] = {0.0}, flowPrimVar_i[MAXNVARFLOW] = {0.0}; su2double solution_j[MAXNVAR] = {0.0}, flowPrimVar_j[MAXNVARFLOW] = {0.0}; -#ifdef HAVE_OPDI - const auto preaccEnabled = ReducerStrategy && AD::PausePreaccumulation(); - if (!ReducerStrategy) - AD::StartNoSharedReading(); -#endif + /*--- For hybrid parallel AD, pause preaccumulation if there is shared reading of + * variables, otherwise switch to the faster adjoint evaluation mode. ---*/ + bool pausePreacc = false; + if (ReducerStrategy) pausePreacc = AD::PausePreaccumulation(); + else AD::StartNoSharedReading(); /*--- Loop over edge colors. ---*/ for (auto color : EdgeColoring) @@ -238,11 +238,9 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe END_SU2_OMP_FOR } // end color loop -#ifdef HAVE_OPDI - AD::ResumePreaccumulation(preaccEnabled); - if (!ReducerStrategy) - AD::EndNoSharedReading(); -#endif + /*--- Restore preaccumulation and adjoint evaluation state. ---*/ + AD::ResumePreaccumulation(pausePreacc); + if (!ReducerStrategy) AD::EndNoSharedReading(); if (ReducerStrategy) { SumEdgeFluxes(geometry); From e03f11bca50f21fc2e77e0c4f3dc93177356ad5f Mon Sep 17 00:00:00 2001 From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com> Date: Tue, 6 Jul 2021 15:47:18 +0100 Subject: [PATCH 22/39] Update Arina2K regression --- TestCases/hybrid_regression_AD.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TestCases/hybrid_regression_AD.py b/TestCases/hybrid_regression_AD.py index 6e42e45e4c71..82a03106c847 100644 --- a/TestCases/hybrid_regression_AD.py +++ b/TestCases/hybrid_regression_AD.py @@ -63,7 +63,7 @@ def main(): discadj_arina2k.cfg_dir = "disc_adj_euler/arina2k" discadj_arina2k.cfg_file = "Arina2KRS.cfg" discadj_arina2k.test_iter = 20 - discadj_arina2k.test_vals = [2.107806, 1.574254, 47250.000000, 0.000000] + discadj_arina2k.test_vals = [-3.087876, -3.481506, 0.068878, 0.000000] test_list.append(discadj_arina2k) #################################### From 15d366680f929438b37d345a2548c4995556d606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 7 Jul 2021 20:51:14 +0200 Subject: [PATCH 23/39] Remove redundant init. --- SU2_CFD/src/SU2_CFD.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp index 99f7003d048c..37f2cdabc421 100644 --- a/SU2_CFD/src/SU2_CFD.cpp +++ b/SU2_CFD/src/SU2_CFD.cpp @@ -73,11 +73,6 @@ int main(int argc, char *argv[]) { #endif SU2_MPI::Comm MPICommunicator = SU2_MPI::GetComm(); - /*--- AD initialization ---*/ -#ifdef HAVE_OPDI - AD::getGlobalTape().initialize(); -#endif - /*--- Uncomment the following line if runtime NaN catching is desired. ---*/ // feenableexcept(FE_INVALID | FE_OVERFLOW | FE_DIVBYZERO ); From e10abccfd8b5537e4d26dda942f678ab17537fe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 13 Jul 2021 23:33:39 +0200 Subject: [PATCH 24/39] OpDiLib update. --- externals/opdi | 2 +- meson_scripts/init.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/externals/opdi b/externals/opdi index e56f79cada20..1709a8c31c06 160000 --- a/externals/opdi +++ b/externals/opdi @@ -1 +1 @@ -Subproject commit e56f79cada202d21e7425f5d5cfd5b1153f2465e +Subproject commit 1709a8c31c0610556199e8050aa53bd3c21500c5 diff --git a/meson_scripts/init.py b/meson_scripts/init.py index c488ab134aa6..49eec2e4f60e 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -48,7 +48,7 @@ def init_submodules(method = 'auto'): github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' - sha_version_opdi = 'e56f79cada202d21e7425f5d5cfd5b1153f2465e' + sha_version_opdi = '1709a8c31c0610556199e8050aa53bd3c21500c5' github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib' sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2' github_repo_meson = 'https://github.com/mesonbuild/meson' From 2726ca612b51cac5daac31c13a0e8b270f72350f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 13 Jul 2021 23:38:05 +0200 Subject: [PATCH 25/39] Add build option for shared reading optimization. --- meson.build | 4 ++++ meson_options.txt | 1 + 2 files changed, 5 insertions(+) diff --git a/meson.build b/meson.build index 813f7bcff1b3..b6b3ab26d2a5 100644 --- a/meson.build +++ b/meson.build @@ -124,6 +124,10 @@ if omp elif get_option('opdi-backend') == 'ompt' su2_cpp_args += '-DFORCE_OPDI_OMPT_BACKEND' endif + + if get_option('opdi-shared-read-opt') + su2_cpp_args += '-DOPDI_VARIABLE_ADJOINT_ACCESS_MODE=0' + endif endif endif diff --git a/meson_options.txt b/meson_options.txt index da999c5d29c6..948267d51eaf 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -20,3 +20,4 @@ option('extra-deps', type : 'string', value : '', description: 'comma-separated option('enable-mpp', type : 'boolean', value : false, description: 'enable Mutation++ support') option('opdi-backend', type : 'combo', choices : ['auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice') option('codi-tape', type : 'combo', choices : ['JacobianLinear', 'JacobianIndex'], value : 'JacobianLinear', description: 'CoDiPack tape choice') +option('opdi-shared-read-opt', type : 'boolean', value : true, description : 'OpDiLib shared reading optimization') From f8fe252bc16292918e491e8fe804c3760507d9d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 14 Jul 2021 01:58:19 +0200 Subject: [PATCH 26/39] Fix. --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index b6b3ab26d2a5..e93ec6c26209 100644 --- a/meson.build +++ b/meson.build @@ -125,7 +125,7 @@ if omp su2_cpp_args += '-DFORCE_OPDI_OMPT_BACKEND' endif - if get_option('opdi-shared-read-opt') + if get_option('opdi-shared-read-opt') == false su2_cpp_args += '-DOPDI_VARIABLE_ADJOINT_ACCESS_MODE=0' endif endif From d8656aaf2f48e3581827b9774eb72d4318e34ad0 Mon Sep 17 00:00:00 2001 From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com> Date: Thu, 15 Jul 2021 11:15:33 +0100 Subject: [PATCH 27/39] update discadj_fea (hybrid AD) --- TestCases/hybrid_regression_AD.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TestCases/hybrid_regression_AD.py b/TestCases/hybrid_regression_AD.py index 82a03106c847..c40ac8700460 100644 --- a/TestCases/hybrid_regression_AD.py +++ b/TestCases/hybrid_regression_AD.py @@ -203,7 +203,7 @@ def main(): discadj_fea.cfg_dir = "disc_adj_fea" discadj_fea.cfg_file = "configAD_fem.cfg" discadj_fea.test_iter = 4 - discadj_fea.test_vals = [2.183540, 2.071459, -0.000363, -8.655000] + discadj_fea.test_vals = [1.774569, 1.928023, -0.000364, -8.690300] test_list.append(discadj_fea) ###################################### From 3c84ad16ac0db62abc1ae0c97651f32fa6125bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 20 Jul 2021 17:23:43 +0200 Subject: [PATCH 28/39] Missing barrier. --- Common/src/linear_algebra/CSysSolve.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index 45e54033ce59..3917190d45f0 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -923,6 +923,7 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co Iterations = IterLinSol; } END_SU2_OMP_MASTER + SU2_OMP_BARRIER HandleTemporariesOut(LinSysSol); From c8ff857faa594b2b194767b5586bd579d0624c0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 20 Jul 2021 17:24:59 +0200 Subject: [PATCH 29/39] CoDiPack update. --- externals/codi | 2 +- meson_scripts/init.py | 2 +- preconfigure.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/externals/codi b/externals/codi index ee2d80cc362f..3c3211fef2e2 160000 --- a/externals/codi +++ b/externals/codi @@ -1 +1 @@ -Subproject commit ee2d80cc362f26879deead881c79523c113e9e6c +Subproject commit 3c3211fef2e225ab89680a4063b62bb3bb38a7e4 diff --git a/meson_scripts/init.py b/meson_scripts/init.py index 49eec2e4f60e..f3be4c67f034 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -44,7 +44,7 @@ def init_submodules(method = 'auto'): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = 'ee2d80cc362f26879deead881c79523c113e9e6c' + sha_version_codi = '3c3211fef2e225ab89680a4063b62bb3bb38a7e4' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' diff --git a/preconfigure.py b/preconfigure.py index fda2d95de3f8..34f73f9c89d7 100755 --- a/preconfigure.py +++ b/preconfigure.py @@ -287,7 +287,7 @@ def init_codi(argument_dict, modes, mpi_support = False, update = False): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = 'ee2d80cc362f26879deead881c79523c113e9e6c' + sha_version_codi = '3c3211fef2e225ab89680a4063b62bb3bb38a7e4' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' From fcc39ce28bd66586a9854e97af43a65fdb6d2c82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 22 Jul 2021 13:56:54 +0200 Subject: [PATCH 30/39] Move barrier inside HandleTemporariesOut. --- Common/include/linear_algebra/CSysSolve.hpp | 2 ++ Common/src/linear_algebra/CSysSolve.cpp | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp index c69643cefe14..06bd75bc349a 100644 --- a/Common/include/linear_algebra/CSysSolve.hpp +++ b/Common/include/linear_algebra/CSysSolve.hpp @@ -256,6 +256,7 @@ class CSysSolve { void HandleTemporariesOut(CSysVector& LinSysSol) { /*--- Reset the pointers. ---*/ + SU2_OMP_BARRIER SU2_OMP_MASTER { LinSysRes_ptr = nullptr; LinSysSol_ptr = nullptr; @@ -276,6 +277,7 @@ class CSysSolve { LinSysSol.PassiveCopy(LinSysSol_tmp); /*--- Reset the pointers. ---*/ + SU2_OMP_BARRIER SU2_OMP_MASTER { LinSysRes_ptr = nullptr; LinSysSol_ptr = nullptr; diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index 3917190d45f0..45e54033ce59 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -923,7 +923,6 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co Iterations = IterLinSol; } END_SU2_OMP_MASTER - SU2_OMP_BARRIER HandleTemporariesOut(LinSysSol); From 028d1e09a5cf7dc42c9bd93c83c1940ea1a8659c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 28 Jul 2021 17:21:33 +0200 Subject: [PATCH 31/39] Further shared reading optimizations. --- SU2_CFD/include/solvers/CFVMFlowSolverBase.inl | 8 ++++++++ SU2_CFD/src/integration/CMultiGridIntegration.cpp | 2 ++ SU2_CFD/src/solvers/CIncEulerSolver.cpp | 8 ++++++++ SU2_CFD/src/solvers/CSolver.cpp | 12 ++++++++++++ SU2_CFD/src/solvers/CTurbSolver.cpp | 8 ++++++++ 5 files changed, 38 insertions(+) diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl index a6f4841c4cfd..5bffac8ada4d 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl @@ -1621,6 +1621,8 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry /*--- Loop over all nodes (excluding halos) ---*/ + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -1656,6 +1658,8 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + } else { @@ -1733,6 +1737,8 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry /*--- Loop over all nodes (excluding halos) to compute the remainder of the dual time-stepping source term. ---*/ + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -1770,6 +1776,8 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry } } END_SU2_OMP_FOR + + AD::EndNoSharedReading(); } } diff --git a/SU2_CFD/src/integration/CMultiGridIntegration.cpp b/SU2_CFD/src/integration/CMultiGridIntegration.cpp index d361e5e49721..a29e1a0e9872 100644 --- a/SU2_CFD/src/integration/CMultiGridIntegration.cpp +++ b/SU2_CFD/src/integration/CMultiGridIntegration.cpp @@ -547,10 +547,12 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar void CMultiGridIntegration::SetResidual_Term(CGeometry *geometry, CSolver *solver) { + AD::StartNoSharedReading(); SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPointDomain(), omp_get_num_threads())) for (unsigned long iPoint = 0; iPoint < geometry->GetnPointDomain(); iPoint++) solver->LinSysRes.AddBlock(iPoint, solver->GetNodes()->GetResTruncError(iPoint)); END_SU2_OMP_FOR + AD::EndNoSharedReading(); } diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp index 9c662be50ffe..e31a438fb470 100644 --- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp @@ -2555,6 +2555,8 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver /*--- Loop over all nodes (excluding halos) ---*/ + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -2607,6 +2609,8 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver } } END_SU2_OMP_FOR + + AD::EndNoSharedReading(); } else { @@ -2694,6 +2698,8 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver /*--- Loop over all nodes (excluding halos) to compute the remainder of the dual time-stepping source term. ---*/ + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -2749,6 +2755,8 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver } } END_SU2_OMP_FOR + + AD::EndNoSharedReading(); } } diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp index 467ba4d519c6..8ca0025bd282 100644 --- a/SU2_CFD/src/solvers/CSolver.cpp +++ b/SU2_CFD/src/solvers/CSolver.cpp @@ -1929,6 +1929,8 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) if (geometry->GetMGLevel() != MESH_0) return; + AD::StartNoSharedReading(); + SU2_OMP_MASTER { /*--- Set the L2 Norm residual in all the processors. ---*/ @@ -1985,12 +1987,16 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) } END_SU2_OMP_MASTER SU2_OMP_BARRIER + + AD::EndNoSharedReading(); } void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) { if (geometry->GetMGLevel() != MESH_0) return; + AD::StartNoSharedReading(); + SU2_OMP_MASTER { /*--- Set the L2 Norm residual in all the processors. ---*/ @@ -2028,6 +2034,8 @@ void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) } END_SU2_OMP_MASTER SU2_OMP_BARRIER + + AD::EndNoSharedReading(); } void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) { @@ -4111,6 +4119,8 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig SU2_OMP_PARALLEL { + AD::StartNoSharedReading(); + /*--- Set Residuals to zero ---*/ SU2_OMP_MASTER for (unsigned short iVar = 0; iVar < nVar; iVar++){ @@ -4152,6 +4162,8 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig END_SU2_OMP_CRITICAL SU2_OMP_BARRIER + AD::EndNoSharedReading(); + SetResidual_BGS(geometry, config); } diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp index 63beb0d6b5b3..b3bce445df5b 100644 --- a/SU2_CFD/src/solvers/CTurbSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSolver.cpp @@ -789,6 +789,8 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con /*--- Loop over all nodes (excluding halos) ---*/ + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -855,6 +857,8 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + } else { /*--- For unsteady flows on dynamic meshes (rigidly transforming or @@ -955,6 +959,8 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con /*--- Loop over all nodes (excluding halos) to compute the remainder of the dual time-stepping source term. ---*/ + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -1023,6 +1029,8 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + } // end dynamic grid } From 7acc44f19cfb186092a056c5447fa76cad97a43f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 28 Jul 2021 19:17:25 +0200 Subject: [PATCH 32/39] Test without boundary treatment. --- SU2_CFD/src/integration/CIntegration.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp index dd57a372f2c4..327b47fb86fe 100644 --- a/SU2_CFD/src/integration/CIntegration.cpp +++ b/SU2_CFD/src/integration/CIntegration.cpp @@ -78,7 +78,7 @@ void CIntegration::Space_Integration(CGeometry *geometry, /*--- Pause preaccumulation in boundary conditions for hybrid parallel AD. ---*/ /// TODO: Check if this is really needed. - const auto pausePreacc = (omp_get_num_threads() > 1) && AD::PausePreaccumulation(); + //const auto pausePreacc = (omp_get_num_threads() > 1) && AD::PausePreaccumulation(); /*--- Boundary conditions that depend on other boundaries (they require MPI sincronization)---*/ @@ -182,7 +182,7 @@ void CIntegration::Space_Integration(CGeometry *geometry, solver_container[MainSolver]->BC_Periodic(geometry, solver_container, conv_bound_numerics, config); } - AD::ResumePreaccumulation(pausePreacc); + //AD::ResumePreaccumulation(pausePreacc); } From 2830dea1c0da1cc6d6a53bf0a3403723de5d2ba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 29 Jul 2021 00:53:21 +0200 Subject: [PATCH 33/39] Source_Residual shared reading optimizations. --- SU2_CFD/src/solvers/CEulerSolver.cpp | 16 ++++++++++++ SU2_CFD/src/solvers/CIncEulerSolver.cpp | 31 ++++++++++++++++++++++++ SU2_CFD/src/solvers/CNEMOEulerSolver.cpp | 4 +++ SU2_CFD/src/solvers/CTurbSASolver.cpp | 4 +++ SU2_CFD/src/solvers/CTurbSSTSolver.cpp | 4 +++ 5 files changed, 59 insertions(+) diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp index a4947327aa5e..317cd9780f41 100644 --- a/SU2_CFD/src/solvers/CEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CEulerSolver.cpp @@ -2291,6 +2291,8 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain unsigned short iVar; unsigned long iPoint; + AD::StartNoSharedReading(); + if (body_force) { /*--- Loop over all points ---*/ @@ -2345,6 +2347,8 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain END_SU2_OMP_FOR } + AD::EndNoSharedReading(); + if (axisymmetric) { /*--- For viscous problems, we need an additional gradient. ---*/ @@ -2352,6 +2356,8 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain ComputeAxisymmetricAuxGradients(geometry, config); } + AD::StartNoSharedReading(); + /*--- loop over points ---*/ SU2_OMP_FOR_DYN(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -2397,8 +2403,12 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } END_SU2_OMP_FOR + + AD::EndNoSharedReading(); } + AD::StartNoSharedReading(); + if (gravity) { /*--- loop over points ---*/ @@ -2470,6 +2480,8 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain END_SU2_OMP_FOR } + AD::EndNoSharedReading(); + /*--- Check if a verification solution is to be computed. ---*/ if ( VerificationSolution ) { @@ -2479,6 +2491,8 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain su2double time = 0.0; if (config->GetTime_Marching() != TIME_MARCHING::STEADY) time = config->GetPhysicalTime(); + AD::StartNoSharedReading(); + /*--- Loop over points ---*/ SU2_OMP_FOR_DYN(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -2499,6 +2513,8 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain } } END_SU2_OMP_FOR + + AD::EndNoSharedReading(); } } diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp index e31a438fb470..f94e1cedf572 100644 --- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp @@ -1318,6 +1318,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont const bool streamwise_periodic = (config->GetKind_Streamwise_Periodic() != ENUM_STREAMWISE_PERIODIC::NONE); const bool streamwise_periodic_temperature = config->GetStreamwise_Periodic_Temperature(); + AD::StartNoSharedReading(); + if (body_force) { /*--- Loop over all points ---*/ @@ -1419,12 +1421,16 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont END_SU2_OMP_FOR } + AD::EndNoSharedReading(); + if (axisymmetric) { /*--- For viscous problems, we need an additional gradient. ---*/ if (viscous) { + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPoint; iPoint++) { @@ -1443,6 +1449,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/ if (config->GetKind_Gradient_Method() == GREEN_GAUSS) { @@ -1456,6 +1464,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont /*--- loop over points ---*/ + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -1506,10 +1516,14 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont } END_SU2_OMP_FOR + + AD::EndNoSharedReading(); } if (radiation) { + AD::StartNoSharedReading(); + CNumerics* second_numerics = numerics_container[SOURCE_SECOND_TERM + omp_get_thread_num()*MAX_TERMS]; SU2_OMP_FOR_STAT(omp_chunk_size) @@ -1550,6 +1564,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont } END_SU2_OMP_FOR + AD::EndNoSharedReading(); } if (streamwise_periodic) { @@ -1557,6 +1572,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont /*--- For turbulent streamwise periodic problems w/ energy eq, we need an additional gradient of Eddy viscosity. ---*/ if (streamwise_periodic_temperature && turbulent) { + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPoint; iPoint++) { /*--- Set the auxiliary variable, Eddy viscosity mu_t, for this node. ---*/ @@ -1564,6 +1581,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/ if (config->GetKind_Gradient_Method() == GREEN_GAUSS) { SetAuxVar_Gradient_GG(geometry, config); @@ -1577,6 +1596,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont /*--- Set delta_p, m_dot, inlet_T, integrated_heat ---*/ numerics->SetStreamwisePeriodicValues(SPvals); + AD::StartNoSharedReading(); + /*--- Loop over all points ---*/ SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -1604,6 +1625,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont } // for iPoint END_SU2_OMP_FOR + AD::EndNoSharedReading(); + if(!streamwise_periodic_temperature && energy) { CNumerics* second_numerics = numerics_container[SOURCE_SECOND_TERM + omp_get_thread_num()*MAX_TERMS]; @@ -1611,6 +1634,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont /*--- Set delta_p, m_dot, inlet_T, integrated_heat ---*/ second_numerics->SetStreamwisePeriodicValues(SPvals); + AD::StartNoSharedReading(); + /*--- This bit acts as a boundary condition rather than a source term. But logically it fits better here. ---*/ for (auto iMarker = 0ul; iMarker < config->GetnMarker_All(); iMarker++) { @@ -1646,6 +1671,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont }// if periodic inlet boundary }// for iMarker + AD::EndNoSharedReading(); + }// if !streamwise_periodic_temperature }// if streamwise_periodic @@ -1658,6 +1685,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont su2double time = 0.0; if (config->GetTime_Marching() != TIME_MARCHING::STEADY) time = config->GetPhysicalTime(); + AD::StartNoSharedReading(); + /*--- Loop over points ---*/ SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -1679,6 +1708,8 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont } END_SU2_OMP_FOR + + AD::EndNoSharedReading(); } } diff --git a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp index 4f88dee8d14e..3c620d41860d 100644 --- a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp @@ -840,6 +840,8 @@ void CNEMOEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_con ComputeAxisymmetricAuxGradients(geometry,config); } + AD::StartNoSharedReading(); + /*--- loop over interior points ---*/ SU2_OMP_FOR_DYN(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -965,6 +967,8 @@ void CNEMOEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_con } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + /*--- Checking for NaN ---*/ unsigned long eAxi_global = eAxi_local; unsigned long eChm_global = eChm_local; diff --git a/SU2_CFD/src/solvers/CTurbSASolver.cpp b/SU2_CFD/src/solvers/CTurbSASolver.cpp index 0cef0e5ecb90..7963cf999332 100644 --- a/SU2_CFD/src/solvers/CTurbSASolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSASolver.cpp @@ -301,6 +301,8 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai /*--- Pick one numerics object per thread. ---*/ CNumerics* numerics = numerics_container[SOURCE_FIRST_TERM + omp_get_thread_num()*MAX_TERMS]; + AD::StartNoSharedReading(); + /*--- Loop over all points. ---*/ SU2_OMP_FOR_DYN(omp_chunk_size) @@ -400,6 +402,8 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai END_SU2_OMP_FOR } + AD::EndNoSharedReading(); + } void CTurbSASolver::Source_Template(CGeometry *geometry, CSolver **solver_container, CNumerics *numerics, diff --git a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp index 819a57c0a49f..1da3da2cf5da 100644 --- a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp @@ -291,6 +291,8 @@ void CTurbSSTSolver::Source_Residual(CGeometry *geometry, CSolver **solver_conta /*--- Loop over all points. ---*/ + AD::StartNoSharedReading(); + SU2_OMP_FOR_DYN(omp_chunk_size) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -350,6 +352,8 @@ void CTurbSSTSolver::Source_Residual(CGeometry *geometry, CSolver **solver_conta } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + } void CTurbSSTSolver::Source_Template(CGeometry *geometry, CSolver **solver_container, CNumerics *numerics, From 25ba4e36d26c3584e926cfc6ad7d35ae6c4ff101 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Sun, 1 Aug 2021 01:01:32 +0100 Subject: [PATCH 34/39] revise some shared readings and add others --- SU2_CFD/src/solvers/CEulerSolver.cpp | 22 ++++++------- SU2_CFD/src/solvers/CIncEulerSolver.cpp | 9 +++-- SU2_CFD/src/solvers/CIncNSSolver.cpp | 4 +++ SU2_CFD/src/solvers/CNSSolver.cpp | 4 +++ SU2_CFD/src/solvers/CSolver.cpp | 44 +++++++++---------------- SU2_CFD/src/solvers/CTurbSASolver.cpp | 4 ++- SU2_CFD/src/solvers/CTurbSSTSolver.cpp | 3 ++ 7 files changed, 44 insertions(+), 46 deletions(-) diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp index 317cd9780f41..3a815ed7511a 100644 --- a/SU2_CFD/src/solvers/CEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CEulerSolver.cpp @@ -1886,6 +1886,8 @@ unsigned long CEulerSolver::SetPrimitive_Variables(CSolver **solver_container, c * further reduction if function is called in parallel ---*/ unsigned long nonPhysicalPoints = 0; + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint ++) { @@ -1900,6 +1902,8 @@ unsigned long CEulerSolver::SetPrimitive_Variables(CSolver **solver_container, c } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + return nonPhysicalPoints; } @@ -2291,11 +2295,10 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain unsigned short iVar; unsigned long iPoint; - AD::StartNoSharedReading(); - if (body_force) { /*--- Loop over all points ---*/ + AD::StartNoSharedReading(); SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -2314,6 +2317,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain } END_SU2_OMP_FOR + AD::EndNoSharedReading(); } if (rotating_frame) { @@ -2324,6 +2328,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain SetRotatingFrame_GCL(geometry, config); /*--- Loop over all points ---*/ + AD::StartNoSharedReading(); SU2_OMP_FOR_DYN(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -2345,10 +2350,9 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain } END_SU2_OMP_FOR + AD::EndNoSharedReading(); } - AD::EndNoSharedReading(); - if (axisymmetric) { /*--- For viscous problems, we need an additional gradient. ---*/ @@ -2356,9 +2360,8 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain ComputeAxisymmetricAuxGradients(geometry, config); } - AD::StartNoSharedReading(); - /*--- loop over points ---*/ + AD::StartNoSharedReading(); SU2_OMP_FOR_DYN(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -2480,8 +2483,6 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain END_SU2_OMP_FOR } - AD::EndNoSharedReading(); - /*--- Check if a verification solution is to be computed. ---*/ if ( VerificationSolution ) { @@ -2491,8 +2492,6 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain su2double time = 0.0; if (config->GetTime_Marching() != TIME_MARCHING::STEADY) time = config->GetPhysicalTime(); - AD::StartNoSharedReading(); - /*--- Loop over points ---*/ SU2_OMP_FOR_DYN(omp_chunk_size) for (iPoint = 0; iPoint < nPointDomain; iPoint++) { @@ -2513,11 +2512,10 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain } } END_SU2_OMP_FOR - - AD::EndNoSharedReading(); } } + AD::EndNoSharedReading(); } void CEulerSolver::Source_Template(CGeometry *geometry, CSolver **solver_container, CNumerics *numerics, diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp index f94e1cedf572..e6396f25c8bd 100644 --- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp @@ -938,6 +938,8 @@ unsigned long CIncEulerSolver::SetPrimitive_Variables(CSolver **solver_container unsigned long iPoint, nonPhysicalPoints = 0; + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPoint; iPoint ++) { @@ -951,6 +953,8 @@ unsigned long CIncEulerSolver::SetPrimitive_Variables(CSolver **solver_container } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + return nonPhysicalPoints; } @@ -1634,8 +1638,6 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont /*--- Set delta_p, m_dot, inlet_T, integrated_heat ---*/ second_numerics->SetStreamwisePeriodicValues(SPvals); - AD::StartNoSharedReading(); - /*--- This bit acts as a boundary condition rather than a source term. But logically it fits better here. ---*/ for (auto iMarker = 0ul; iMarker < config->GetnMarker_All(); iMarker++) { @@ -1670,9 +1672,6 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont END_SU2_OMP_FOR }// if periodic inlet boundary }// for iMarker - - AD::EndNoSharedReading(); - }// if !streamwise_periodic_temperature }// if streamwise_periodic diff --git a/SU2_CFD/src/solvers/CIncNSSolver.cpp b/SU2_CFD/src/solvers/CIncNSSolver.cpp index 5a749ddfea84..cc206cf4f1a5 100644 --- a/SU2_CFD/src/solvers/CIncNSSolver.cpp +++ b/SU2_CFD/src/solvers/CIncNSSolver.cpp @@ -327,6 +327,8 @@ unsigned long CIncNSSolver::SetPrimitive_Variables(CSolver **solver_container, c bool tkeNeeded = ((turb_model == SST) || (turb_model == SST_SUST)); + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPoint; iPoint++) { @@ -356,6 +358,8 @@ unsigned long CIncNSSolver::SetPrimitive_Variables(CSolver **solver_container, c } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + return nonPhysicalPoints; } diff --git a/SU2_CFD/src/solvers/CNSSolver.cpp b/SU2_CFD/src/solvers/CNSSolver.cpp index f3b57266d66c..daf3e9d54a60 100644 --- a/SU2_CFD/src/solvers/CNSSolver.cpp +++ b/SU2_CFD/src/solvers/CNSSolver.cpp @@ -135,6 +135,8 @@ unsigned long CNSSolver::SetPrimitive_Variables(CSolver **solver_container, cons const unsigned short turb_model = config->GetKind_Turb_Model(); const bool tkeNeeded = (turb_model == SST) || (turb_model == SST_SUST); + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint ++) { @@ -164,6 +166,8 @@ unsigned long CNSSolver::SetPrimitive_Variables(CSolver **solver_container, cons } END_SU2_OMP_FOR + AD::EndNoSharedReading(); + return nonPhysicalPoints; } diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp index 8ca0025bd282..4e360ff799db 100644 --- a/SU2_CFD/src/solvers/CSolver.cpp +++ b/SU2_CFD/src/solvers/CSolver.cpp @@ -1929,8 +1929,6 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) if (geometry->GetMGLevel() != MESH_0) return; - AD::StartNoSharedReading(); - SU2_OMP_MASTER { /*--- Set the L2 Norm residual in all the processors. ---*/ @@ -1987,16 +1985,12 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) } END_SU2_OMP_MASTER SU2_OMP_BARRIER - - AD::EndNoSharedReading(); } void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) { if (geometry->GetMGLevel() != MESH_0) return; - AD::StartNoSharedReading(); - SU2_OMP_MASTER { /*--- Set the L2 Norm residual in all the processors. ---*/ @@ -2034,8 +2028,6 @@ void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) } END_SU2_OMP_MASTER SU2_OMP_BARRIER - - AD::EndNoSharedReading(); } void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) { @@ -4119,8 +4111,6 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig SU2_OMP_PARALLEL { - AD::StartNoSharedReading(); - /*--- Set Residuals to zero ---*/ SU2_OMP_MASTER for (unsigned short iVar = 0; iVar < nVar; iVar++){ @@ -4162,8 +4152,6 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig END_SU2_OMP_CRITICAL SU2_OMP_BARRIER - AD::EndNoSharedReading(); - SetResidual_BGS(geometry, config); } @@ -4225,7 +4213,7 @@ void CSolver::BasicLoadRestart(CGeometry *geometry, const CConfig *config, const } void CSolver::SavelibROM(CGeometry *geometry, CConfig *config, bool converged) { - + #if defined(HAVE_LIBROM) && !defined(CODI_FORWARD_TYPE) && !defined(CODI_REVERSE_TYPE) const bool unsteady = config->GetTime_Domain(); const string filename = config->GetlibROMbase_FileName(); @@ -4237,38 +4225,38 @@ void CSolver::SavelibROM(CGeometry *geometry, CConfig *config, bool converged) { bool incremental = false; if (!u_basis_generator) { - + /*--- Define SVD basis generator ---*/ auto timesteps = static_cast(nTimeIter - TimeIter); CAROM::Options svd_options = CAROM::Options(dim, timesteps, -1, false, true).setMaxBasisDimension(int(maxBasisDim)); - + if (config->GetKind_PODBasis() == POD_KIND::STATIC) { if (rank == MASTER_NODE) std::cout << "Creating static basis generator." << std::endl; - + if (unsteady) { if (rank == MASTER_NODE) std::cout << "Incremental basis generator recommended for unsteady simulations." << std::endl; } } else { if (rank == MASTER_NODE) std::cout << "Creating incremental basis generator." << std::endl; - + svd_options.setIncrementalSVD(1.0e-3, config->GetDelta_UnstTime(), 1.0e-2, config->GetDelta_UnstTime()*nTimeIter, true).setDebugMode(false); incremental = true; } - + u_basis_generator.reset(new CAROM::BasisGenerator( svd_options, incremental, filename)); - + // Save mesh ordering std::ofstream f; f.open(filename + "_mesh_" + to_string(rank) + ".csv"); for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { unsigned long globalPoint = geometry->nodes->GetGlobalIndex(iPoint); auto Coord = geometry->nodes->GetCoord(iPoint); - + for (unsigned long iDim; iDim < nDim; iDim++) { f << Coord[iDim] << ", "; } @@ -4283,31 +4271,31 @@ void CSolver::SavelibROM(CGeometry *geometry, CConfig *config, bool converged) { su2double t = config->GetCurrent_UnstTime(); u_basis_generator->takeSample(const_cast(base_nodes->GetSolution().data()), t, dt); } - + /*--- End collection of data and save POD ---*/ - + if (converged) { - + if (!unsteady) { // dt is different for each node, so just use a placeholder dt su2double dt = base_nodes->GetDelta_Time(0); su2double t = dt*TimeIter; u_basis_generator->takeSample(const_cast(base_nodes->GetSolution().data()), t, dt); } - + if (config->GetKind_PODBasis() == POD_KIND::STATIC) { u_basis_generator->writeSnapshot(); } - + if (rank == MASTER_NODE) std::cout << "Computing SVD" << std::endl; int rom_dim = u_basis_generator->getSpatialBasis()->numColumns(); - + if (rank == MASTER_NODE) std::cout << "Basis dimension: " << rom_dim << std::endl; u_basis_generator->endSamples(); - + if (rank == MASTER_NODE) std::cout << "ROM Sampling ended" << std::endl; } - + #else SU2_MPI::Error("SU2 was not compiled with libROM support.", CURRENT_FUNCTION); #endif diff --git a/SU2_CFD/src/solvers/CTurbSASolver.cpp b/SU2_CFD/src/solvers/CTurbSASolver.cpp index 7963cf999332..b5ffb229fd34 100644 --- a/SU2_CFD/src/solvers/CTurbSASolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSASolver.cpp @@ -255,6 +255,8 @@ void CTurbSASolver::Postprocessing(CGeometry *geometry, CSolver **solver_contain /*--- Compute eddy viscosity ---*/ + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint ++) { @@ -284,6 +286,7 @@ void CTurbSASolver::Postprocessing(CGeometry *geometry, CSolver **solver_contain } END_SU2_OMP_FOR + AD::EndNoSharedReading(); } @@ -297,7 +300,6 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai CVariable* flowNodes = solver_container[FLOW_SOL]->GetNodes(); - /*--- Pick one numerics object per thread. ---*/ CNumerics* numerics = numerics_container[SOURCE_FIRST_TERM + omp_get_thread_num()*MAX_TERMS]; diff --git a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp index 1da3da2cf5da..f5a6fe8f87b9 100644 --- a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp @@ -244,6 +244,8 @@ void CTurbSSTSolver::Postprocessing(CGeometry *geometry, CSolver **solver_contai SetSolution_Gradient_LS(geometry, config); } + AD::StartNoSharedReading(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint ++) { @@ -275,6 +277,7 @@ void CTurbSSTSolver::Postprocessing(CGeometry *geometry, CSolver **solver_contai } END_SU2_OMP_FOR + AD::EndNoSharedReading(); } void CTurbSSTSolver::Source_Residual(CGeometry *geometry, CSolver **solver_container, From a9466bb158ca8675352bf3b4b887cbf82b7f28ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 2 Aug 2021 20:57:23 +0200 Subject: [PATCH 35/39] Suggestion for an option that disabled OpDiLib. --- Common/include/code_config.hpp | 2 +- meson.build | 24 ++++++++++++++---------- meson_options.txt | 2 +- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp index 3bb734a7696c..594fab595bd5 100644 --- a/Common/include/code_config.hpp +++ b/Common/include/code_config.hpp @@ -112,7 +112,7 @@ using su2mixedfloat = passivedouble; #endif /*--- Detect if OpDiLib has to be used. ---*/ -#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) +#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) && !defined(FORCE_OPDI_OFF) #ifndef __INTEL_COMPILER #define HAVE_OPDI #else diff --git a/meson.build b/meson.build index d710813fe390..8ecb1c9ca7fb 100644 --- a/meson.build +++ b/meson.build @@ -117,16 +117,20 @@ if omp # add opdi dependency if get_option('enable-autodiff') - codi_dep += declare_dependency(include_directories: 'externals/opdi/include') - - if get_option('opdi-backend') == 'macro' - su2_cpp_args += '-DFORCE_OPDI_MACRO_BACKEND' - elif get_option('opdi-backend') == 'ompt' - su2_cpp_args += '-DFORCE_OPDI_OMPT_BACKEND' - endif - - if get_option('opdi-shared-read-opt') == false - su2_cpp_args += '-DOPDI_VARIABLE_ADJOINT_ACCESS_MODE=0' + if get_option('opdi-backend') == 'none' + su2_cpp_args += '-DFORCE_OPDI_OFF' + else + codi_dep += declare_dependency(include_directories: 'externals/opdi/include') + + if get_option('opdi-backend') == 'macro' + su2_cpp_args += '-DFORCE_OPDI_MACRO_BACKEND' + elif get_option('opdi-backend') == 'ompt' + su2_cpp_args += '-DFORCE_OPDI_OMPT_BACKEND' + endif + + if get_option('opdi-shared-read-opt') == false + su2_cpp_args += '-DOPDI_VARIABLE_ADJOINT_ACCESS_MODE=0' + endif endif endif endif diff --git a/meson_options.txt b/meson_options.txt index 05a0b2c64fbc..fa67a78ca3e2 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -18,7 +18,7 @@ option('enable-tests', type : 'boolean', value : false, description: 'compile U option('enable-mixedprec', type : 'boolean', value : false, description: 'use single precision floating point arithmetic for sparse algebra') option('extra-deps', type : 'string', value : '', description: 'comma-separated list of extra (custom) dependencies to add for compilation') option('enable-mpp', type : 'boolean', value : false, description: 'enable Mutation++ support') -option('opdi-backend', type : 'combo', choices : ['auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice') +option('opdi-backend', type : 'combo', choices : ['none', 'auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice') option('codi-tape', type : 'combo', choices : ['JacobianLinear', 'JacobianIndex'], value : 'JacobianLinear', description: 'CoDiPack tape choice') option('opdi-shared-read-opt', type : 'boolean', value : true, description : 'OpDiLib shared reading optimization') option('librom_root', type : 'string', value : '', description: 'libROM base directory') From 1ce51159110761627af4ea1c59ff89c01f84c1e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 4 Aug 2021 15:46:43 +0200 Subject: [PATCH 36/39] OpDiLib update. --- externals/opdi | 2 +- meson_scripts/init.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/externals/opdi b/externals/opdi index 1709a8c31c06..2735b503f601 160000 --- a/externals/opdi +++ b/externals/opdi @@ -1 +1 @@ -Subproject commit 1709a8c31c0610556199e8050aa53bd3c21500c5 +Subproject commit 2735b503f60163e8d64e1ac56cce46173a9fd4a9 diff --git a/meson_scripts/init.py b/meson_scripts/init.py index 13ae423daef2..c2fcd132697b 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -48,7 +48,7 @@ def init_submodules(method = 'auto'): github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' - sha_version_opdi = '1709a8c31c0610556199e8050aa53bd3c21500c5' + sha_version_opdi = '2735b503f60163e8d64e1ac56cce46173a9fd4a9' github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib' sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2' github_repo_meson = 'https://github.com/mesonbuild/meson' From e81a8ff40b6df61198ffa7404f2f2e1e93ae132b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 4 Aug 2021 18:10:29 +0200 Subject: [PATCH 37/39] Revert "Suggestion for an option that disabled OpDiLib." This reverts commit a9466bb158ca8675352bf3b4b887cbf82b7f28ff. --- Common/include/code_config.hpp | 2 +- meson.build | 24 ++++++++++-------------- meson_options.txt | 2 +- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp index 594fab595bd5..3bb734a7696c 100644 --- a/Common/include/code_config.hpp +++ b/Common/include/code_config.hpp @@ -112,7 +112,7 @@ using su2mixedfloat = passivedouble; #endif /*--- Detect if OpDiLib has to be used. ---*/ -#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) && !defined(FORCE_OPDI_OFF) +#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) #ifndef __INTEL_COMPILER #define HAVE_OPDI #else diff --git a/meson.build b/meson.build index 8ecb1c9ca7fb..d710813fe390 100644 --- a/meson.build +++ b/meson.build @@ -117,20 +117,16 @@ if omp # add opdi dependency if get_option('enable-autodiff') - if get_option('opdi-backend') == 'none' - su2_cpp_args += '-DFORCE_OPDI_OFF' - else - codi_dep += declare_dependency(include_directories: 'externals/opdi/include') - - if get_option('opdi-backend') == 'macro' - su2_cpp_args += '-DFORCE_OPDI_MACRO_BACKEND' - elif get_option('opdi-backend') == 'ompt' - su2_cpp_args += '-DFORCE_OPDI_OMPT_BACKEND' - endif - - if get_option('opdi-shared-read-opt') == false - su2_cpp_args += '-DOPDI_VARIABLE_ADJOINT_ACCESS_MODE=0' - endif + codi_dep += declare_dependency(include_directories: 'externals/opdi/include') + + if get_option('opdi-backend') == 'macro' + su2_cpp_args += '-DFORCE_OPDI_MACRO_BACKEND' + elif get_option('opdi-backend') == 'ompt' + su2_cpp_args += '-DFORCE_OPDI_OMPT_BACKEND' + endif + + if get_option('opdi-shared-read-opt') == false + su2_cpp_args += '-DOPDI_VARIABLE_ADJOINT_ACCESS_MODE=0' endif endif endif diff --git a/meson_options.txt b/meson_options.txt index fa67a78ca3e2..05a0b2c64fbc 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -18,7 +18,7 @@ option('enable-tests', type : 'boolean', value : false, description: 'compile U option('enable-mixedprec', type : 'boolean', value : false, description: 'use single precision floating point arithmetic for sparse algebra') option('extra-deps', type : 'string', value : '', description: 'comma-separated list of extra (custom) dependencies to add for compilation') option('enable-mpp', type : 'boolean', value : false, description: 'enable Mutation++ support') -option('opdi-backend', type : 'combo', choices : ['none', 'auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice') +option('opdi-backend', type : 'combo', choices : ['auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice') option('codi-tape', type : 'combo', choices : ['JacobianLinear', 'JacobianIndex'], value : 'JacobianLinear', description: 'CoDiPack tape choice') option('opdi-shared-read-opt', type : 'boolean', value : true, description : 'OpDiLib shared reading optimization') option('librom_root', type : 'string', value : '', description: 'libROM base directory') From 3f8105932abc1f1aad248fdd39a25c7eda771a9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 4 Aug 2021 18:35:05 +0200 Subject: [PATCH 38/39] Replace assert by warning. --- SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp index d68f794bdeeb..07690f259301 100644 --- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp +++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp @@ -881,7 +881,9 @@ void CDiscAdjMultizoneDriver::SetAdj_ObjFunction() { void CDiscAdjMultizoneDriver::ComputeAdjoints(unsigned short iZone, bool eval_transfer) { #if defined(CODI_INDEX_TAPE) || defined(HAVE_OPDI) - assert(nZone <= 1 && "index AD types do not support multiple zones"); + if (nZone > 1) { + std::cout << "WARNING: Index AD types do not support multiple zones." << std::endl; + } #endif AD::ClearAdjoints(); From d64d6206d973a7e446985629882886383b155aa9 Mon Sep 17 00:00:00 2001 From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com> Date: Thu, 5 Aug 2021 17:49:01 +0100 Subject: [PATCH 39/39] Update SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp --- SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp index 07690f259301..e9d15ee6809b 100644 --- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp +++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp @@ -881,7 +881,7 @@ void CDiscAdjMultizoneDriver::SetAdj_ObjFunction() { void CDiscAdjMultizoneDriver::ComputeAdjoints(unsigned short iZone, bool eval_transfer) { #if defined(CODI_INDEX_TAPE) || defined(HAVE_OPDI) - if (nZone > 1) { + if (nZone > 1 && rank == MASTER_NODE) { std::cout << "WARNING: Index AD types do not support multiple zones." << std::endl; } #endif