From 94524aea134909feecf450ffdbb974cefc3ea6d3 Mon Sep 17 00:00:00 2001 From: Shreyas Ananthan Date: Fri, 6 Jul 2018 17:09:21 -0600 Subject: [PATCH] Fix Nalu timers - Fix timerMisc for LowMach and Momentum EQS that were counting certain execution regions twice. timerMisc_ was accumulating time when computing projected_nodal_gradient which in turn was either incrementing timerMisc_ or accumulating time in PNGEQS. Current fix is to keep calls to compute_projected_nodal_gradient out of timed blocks. - Low Mach timerMisc_ accumulates time with project_nodal_velocity which calls ContinuityEQS projected_nodal_gradient where there is additional time accumulation in continuityEQS->timerMisc_. - "No output time" was reporting CPU Time instead of WallClock time. This commit changes that to WallClock time so that it is consistent with what is being reported for STKPERF: Total time - "Timing for connectivity" summary was removed because this is already reported as "init" for each equation system With these changes the sum of all the timers in the timing table should be less than the total time reported for "main()" as well as "STKPERF: Total Time" in the summary. --- nalu.C | 4 ++-- src/LowMachEquationSystem.C | 32 +++++++++++++++++++------------- src/Realm.C | 4 ---- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/nalu.C b/nalu.C index c2a1fb7295..872848692b 100644 --- a/nalu.C +++ b/nalu.C @@ -223,9 +223,9 @@ int main( int argc, char ** argv ) //output timings consistent w/ rest of Sierra stk::diag::Timer & sierra_timer = sierra::nalu::Simulation::rootTimer(); - const double elapsed_time = sierra_timer.getMetric().getAccumulatedLap(false); + const double elapsed_time = sierra_timer.getMetric().getAccumulatedLap(false); stk::diag::Timer & mesh_output_timer = sierra::nalu::Simulation::outputTimer(); - double mesh_output_time = mesh_output_timer.getMetric().getAccumulatedLap(false); + double mesh_output_time = mesh_output_timer.getMetric().getAccumulatedLap(false); double time_without_output = elapsed_time-mesh_output_time; stk::parallel_print_time_without_output_and_hwm(naluEnv.parallel_comm(), time_without_output, naluEnv.naluOutputP0()); diff --git a/src/LowMachEquationSystem.C b/src/LowMachEquationSystem.C index 5765a2e275..afaa83bd6e 100644 --- a/src/LowMachEquationSystem.C +++ b/src/LowMachEquationSystem.C @@ -630,8 +630,8 @@ LowMachEquationSystem::solve_and_update() // wrap timing double timeA, timeB; if ( isInit_ ) { - timeA = NaluEnv::self().nalu_time(); continuityEqSys_->compute_projected_nodal_gradient(); + timeA = NaluEnv::self().nalu_time(); continuityEqSys_->computeMdotAlgDriver_->execute(); timeB = NaluEnv::self().nalu_time(); @@ -697,10 +697,7 @@ LowMachEquationSystem::solve_and_update() continuityEqSys_->timerMisc_ += (timeB-timeA); // project nodal velocity - timeA = NaluEnv::self().nalu_time(); project_nodal_velocity(); - timeB = NaluEnv::self().nalu_time(); - timerMisc_ += (timeB-timeA); // compute velocity relative to mesh with new velocity realm_.compute_vrtm(); @@ -710,8 +707,8 @@ LowMachEquationSystem::solve_and_update() // we use this approach to avoid two evals per // solve/update since dudx is required for tke // production - timeA = NaluEnv::self().nalu_time(); momentumEqSys_->compute_projected_nodal_gradient(); + timeA = NaluEnv::self().nalu_time(); momentumEqSys_->compute_wall_function_params(); timeB = NaluEnv::self().nalu_time(); momentumEqSys_->timerMisc_ += (timeB-timeA); @@ -951,16 +948,25 @@ MomentumEquationSystem::initial_work() EquationSystem::initial_work(); // proceed with a bunch of initial work; wrap in timer - const double timeA = NaluEnv::self().nalu_time(); - realm_.compute_vrtm(); + { + const double timeA = NaluEnv::self().nalu_time(); + realm_.compute_vrtm(); + const double timeB = NaluEnv::self().nalu_time(); + timerMisc_ += (timeB-timeA); + } + compute_projected_nodal_gradient(); - compute_wall_function_params(); - tviscAlgDriver_->execute(); - diffFluxCoeffAlgDriver_->execute(); - cflReyAlgDriver_->execute(); - const double timeB = NaluEnv::self().nalu_time(); - timerMisc_ += (timeB-timeA); + { + const double timeA = NaluEnv::self().nalu_time(); + compute_wall_function_params(); + tviscAlgDriver_->execute(); + diffFluxCoeffAlgDriver_->execute(); + cflReyAlgDriver_->execute(); + + const double timeB = NaluEnv::self().nalu_time(); + timerMisc_ += (timeB-timeA); + } } //-------------------------------------------------------------------------- diff --git a/src/Realm.C b/src/Realm.C index 1e61ecc7a7..b80f587f53 100644 --- a/src/Realm.C +++ b/src/Realm.C @@ -3976,10 +3976,6 @@ Realm::dump_simulation_time() << " \tmin: " << g_min_time[4] << " \tmax: " << g_max_time[4] << std::endl; NaluEnv::self().naluOutputP0() << " io populate fd -- " << " \tavg: " << g_total_time[5]/double(nprocs) << " \tmin: " << g_min_time[5] << " \tmax: " << g_max_time[5] << std::endl; - NaluEnv::self().naluOutputP0() << "Timing for connectivity/finalize lysys: " << std::endl; - NaluEnv::self().naluOutputP0() << " eqs init -- " << " \tavg: " << g_total_time[2]/double(nprocs) - << " \tmin: " << g_min_time[2] << " \tmax: " << g_max_time[2] << std::endl; - NaluEnv::self().naluOutputP0() << "Timing for property evaluation: " << std::endl; NaluEnv::self().naluOutputP0() << " props -- " << " \tavg: " << g_total_time[3]/double(nprocs) << " \tmin: " << g_min_time[3] << " \tmax: " << g_max_time[3] << std::endl;