Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(DO NOT MERGE) - Tpetra: performance improvements to CrsMatrix::copyAndPermute - release candidate #13648

Closed
wants to merge 26 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
35785c2
treat copy and permute for the special case that both the source and …
tjfulle Sep 20, 2024
b7ce02c
draft: testing access to device pointers for column indices and values
skennon10 Nov 7, 2024
53ae2cc
Address performance issues in CrsMatrix copyAndPermute:
skennon10 Nov 13, 2024
e4a5255
initial push; see branch srkenno/copy-and-permute-improvements
skennon10 Nov 13, 2024
7c7a518
allow repeat runs from the same executable; improve main timers output
skennon10 Nov 14, 2024
081bd32
force a commit
skennon10 Nov 15, 2024
0f6b860
force a commit
skennon10 Nov 15, 2024
1280c63
sort timers, print nicely
skennon10 Nov 19, 2024
f608765
tweaks to timers
skennon10 Nov 19, 2024
9935787
improve timer title to reflect the average is ave(repeat runs), not a…
skennon10 Nov 20, 2024
4c03a4f
add some timers around graph ops, including compute/apply padding; ad…
skennon10 Nov 27, 2024
28c647a
merge working branch into PR branch
skennon10 Dec 3, 2024
cc22d9f
fix to allow use outside panzer
skennon10 Dec 3, 2024
db2b14c
protect new code
skennon10 Dec 3, 2024
b4d7b69
merge develop
skennon10 Dec 5, 2024
17b8e68
merge working branch
skennon10 Dec 5, 2024
fc61b03
protect timers
skennon10 Dec 6, 2024
1bab469
fix compile errors (complex data type)
skennon10 Dec 6, 2024
56795a1
towards kokkos version of copyAndPermute
skennon10 Dec 12, 2024
1c8d1f3
more kokkos work
skennon10 Dec 12, 2024
c64ff95
more kokkos: compiles but segfaults
skennon10 Dec 12, 2024
8387e30
no longer segfaulting
skennon10 Dec 13, 2024
0d52dec
CrsGraph;;copyAndPermute is now running partially on device, nice spe…
skennon10 Dec 13, 2024
393684e
cleanup; implement permute phase for isFillComplete
skennon10 Dec 16, 2024
067a50b
paste temp source files into CrsGraph; remove temp source files
skennon10 Dec 16, 2024
7561e1f
merge working branch
skennon10 Dec 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions packages/panzer/disc-fe/src/Panzer_AssemblyEngine_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,16 @@
#include "Panzer_FieldManagerBuilder.hpp"
#include "Panzer_AssemblyEngine_InArgs.hpp"
#include "Panzer_GlobalEvaluationDataContainer.hpp"
#include "Teuchos_Time.hpp"
#include <sstream>
#include <unordered_map>
#include <vector>

#define EXP_INCLUDED_FROM_PANXER_MINI_EM 0
#if EXP_INCLUDED_FROM_PANXER_MINI_EM
extern std::unordered_map<std::string, std::pair<double, std::vector<double>> >& Timers;
extern bool in_eval_J;
#endif

//===========================================================================
//===========================================================================
Expand Down Expand Up @@ -87,18 +96,30 @@ evaluate(const panzer::AssemblyEngineInArgs& in, const EvaluationFlags flags)
}

if ( flags.getValue() & EvaluationFlags::Scatter) {
[[maybe_unused]] double time = Teuchos::Time::wallTime();
PANZER_FUNC_TIME_MONITOR_DIFF("panzer::AssemblyEngine::evaluate_scatter("+PHX::print<EvalT>()+")",eval_scatter);
{
[[maybe_unused]] double time1 = Teuchos::Time::wallTime();
PANZER_FUNC_TIME_MONITOR_DIFF("panzer::AssemblyEngine::lof->ghostToGlobalContainer("+PHX::print<EvalT>()+")",lof_gtgc);
m_lin_obj_factory->ghostToGlobalContainer(*in.ghostedContainer_,*in.container_,LOC::F | LOC::Mat);
#if EXP_INCLUDED_FROM_PANXER_MINI_EM
if (in_eval_J) Timers["lof-g2gc"].first += -time1 + Teuchos::Time::wallTime();
#endif
}
{
[[maybe_unused]] double time1 = Teuchos::Time::wallTime();
PANZER_FUNC_TIME_MONITOR_DIFF("panzer::AssemblyEngine::gedc.ghostToGlobal("+PHX::print<EvalT>()+")",gedc_gtg);
m_lin_obj_factory->beginFill(*in.container_);
gedc.ghostToGlobal(LOC::F | LOC::Mat);
m_lin_obj_factory->endFill(*in.container_);
#if EXP_INCLUDED_FROM_PANXER_MINI_EM
if (in_eval_J) Timers["gedc-g2g"].first += -time1 + Teuchos::Time::wallTime();
#endif
}
m_lin_obj_factory->endFill(*in.ghostedContainer_);
#if EXP_INCLUDED_FROM_PANXER_MINI_EM
if (in_eval_J) Timers["eval_scatter"].first += -time + Teuchos::Time::wallTime();
#endif
}

return;
Expand Down
19 changes: 18 additions & 1 deletion packages/panzer/disc-fe/src/Panzer_ModelEvaluator_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@
#include "Thyra_TpetraLinearOp.hpp"
#include "Tpetra_CrsMatrix.hpp"


#define EXP_INCLUDED_FROM_PANXER_MINI_EM 0
#if EXP_INCLUDED_FROM_PANXER_MINI_EM
extern bool use_eval_J;
extern bool in_eval_J;
extern std::unordered_map<std::string, std::pair<double, std::vector<double>> >& Timers;
extern double timer_evalJ;
extern double timer_capsg;
#endif

// Constructors/Initializers/Accessors

template<typename Scalar>
Expand Down Expand Up @@ -1569,7 +1579,10 @@ evalModelImpl_basic(const Thyra::ModelEvaluatorBase::InArgs<Scalar> &inArgs,
else if(Teuchos::is_null(f_out) && !Teuchos::is_null(W_out)) {

PANZER_FUNC_TIME_MONITOR("panzer::ModelEvaluator::evalModel(J)");

#if EXP_INCLUDED_FROM_PANXER_MINI_EM
double time_ = Teuchos::Time::wallTime();
in_eval_J = true;
#endif
// only add auxiliary global data if Jacobian is being formed
ae_inargs.addGlobalEvaluationData(nonParamGlobalEvaluationData_);

Expand All @@ -1582,6 +1595,10 @@ evalModelImpl_basic(const Thyra::ModelEvaluatorBase::InArgs<Scalar> &inArgs,
thGhostedContainer->initializeMatrix(0.0);

ae_tm_.template getAsObject<panzer::Traits::Jacobian>()->evaluate(ae_inargs);
#if EXP_INCLUDED_FROM_PANXER_MINI_EM
in_eval_J = !use_eval_J;
Timers["evalJ"].first += -time_ + Teuchos::Time::wallTime();
#endif
}

// HACK: set A to null before calling responses to avoid touching the
Expand Down
Loading
Loading