From f01b71189b7c082b721fcc0cd5226db661ec1f09 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 29 Jan 2020 17:24:02 -0700 Subject: [PATCH 1/8] Tpetra::Crs{Graph,Matrix}: Add verbose output for memory use --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 17 +- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 219 +++++++++++++++--- .../tpetra/core/src/Tpetra_CrsMatrix_decl.hpp | 12 +- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 191 +++++++++++---- 4 files changed, 357 insertions(+), 82 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 5bcd134b22be..98d88d678dff 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -1163,17 +1163,21 @@ namespace Tpetra { buffer_device_type>& permuteFromLIDs) override; void - applyCrsPadding (const Kokkos::UnorderedMap& padding); + applyCrsPadding( + const Kokkos::UnorderedMap& padding, + const bool verbose); Kokkos::UnorderedMap computeCrsPadding (const RowGraph& source, const size_t numSameIDs, const Kokkos::DualView& permuteToLIDs, - const Kokkos::DualView& permuteFromLIDs) const; + const Kokkos::DualView& permuteFromLIDs, + const bool verbose) const; Kokkos::UnorderedMap computeCrsPadding (const Kokkos::DualView& importLIDs, - Kokkos::DualView numPacketsPerLID) const; + Kokkos::DualView numPacketsPerLID, + const bool verbose) const; void computeCrsPaddingForSameIDs (Kokkos::UnorderedMap& padding, @@ -1480,6 +1484,9 @@ namespace Tpetra { }; private: + std::unique_ptr + createPrefix(const char methodName[]) const; + // Friend declaration for nonmember function. template friend Teuchos::RCP @@ -1693,7 +1700,9 @@ namespace Tpetra { }; bool indicesAreAllocated () const; - void allocateIndices (const ELocalGlobal lg); + + void + allocateIndices(const ELocalGlobal lg, const bool verbose=false); //! \name Methods governing changes between global and local indices //@{ diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 9414d19de707..c9c231ec9dcf 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -1212,11 +1212,13 @@ namespace Tpetra { template void CrsGraph:: - allocateIndices (const ELocalGlobal lg) + allocateIndices (const ELocalGlobal lg, const bool verbose) { + using ::Tpetra::Details::ProfilingRegion; using Teuchos::arcp; using Teuchos::Array; using Teuchos::ArrayRCP; + using std::endl; typedef Teuchos::ArrayRCP::size_type size_type; typedef typename local_graph_type::row_map_type::non_const_type non_const_row_map_type; @@ -1227,6 +1229,17 @@ namespace Tpetra { device_type> gbl_col_inds_type; const char tfecfFuncName[] = "allocateIndices: "; const char suffix[] = " Please report this bug to the Tpetra developers."; + ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices"); + + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("allocateIndices"); + std::ostringstream os; + os << *prefix << "{lg=" + << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices") + << ", numRows: " << this->getNodeNumRows() << "}" << endl; + std::cerr << os.str(); + } // This is a protected function, only callable by us. If it was // called incorrectly, it is our fault. That's why the tests @@ -1248,6 +1261,11 @@ namespace Tpetra { // // STATIC ALLOCATION PROFILE // + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl; + std::cerr << os.str(); + } non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1); if (this->k_numAllocPerRow_.extent (0) != 0) { @@ -1294,9 +1312,21 @@ namespace Tpetra { const size_type numInds = ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, numRows); // const size_type numInds = static_cast (this->k_rowPtrs_(numRows)); if (lg == LocalIndices) { + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate local column indices " + "k_lclInds1D_: " << numInds << endl; + std::cerr << os.str(); + } k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::ind", numInds); } else { + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate global column indices " + "k_gblInds1D_: " << numInds << endl; + std::cerr << os.str(); + } k_gblInds1D_ = gbl_col_inds_type ("Tpetra::CrsGraph::ind", numInds); } storageStatus_ = ::Tpetra::Details::STORAGE_1D_UNPACKED; @@ -1308,7 +1338,12 @@ namespace Tpetra { using Kokkos::ViewAllocateWithoutInitializing; typedef decltype (k_numRowEntries_) row_ent_type; const char label[] = "Tpetra::CrsGraph::numRowEntries"; - + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate k_numRowEntries_: " << numRows + << endl; + std::cerr << os.str(); + } row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows); Kokkos::deep_copy (numRowEnt, static_cast (0)); // fill w/ 0s this->k_numRowEntries_ = numRowEnt; // "commit" our allocation @@ -2669,7 +2704,11 @@ namespace Tpetra { "Local row index " << localRow << " is not in the row Map " "on the calling process."); if (! indicesAreAllocated ()) { - allocateIndices (LocalIndices); + // Allocating indices takes a while and only needs to be done + // once per MPI process, so it's OK to query TPETRA_VERBOSE. + using ::Tpetra::Details::Behavior; + const bool verbose = Behavior::verbose("CrsGraph"); + allocateIndices (LocalIndices, verbose); } #ifdef HAVE_TPETRA_DEBUG @@ -2763,7 +2802,11 @@ namespace Tpetra { "If fillComplete has been called, you must first call resumeFill " "before you may insert indices."); if (! this->indicesAreAllocated ()) { - this->allocateIndices (GlobalIndices); + // Allocating indices takes a while and only needs to be done + // once per MPI process, so it's OK to query TPETRA_VERBOSE. + using ::Tpetra::Details::Behavior; + const bool verbose = Behavior::verbose("CrsGraph"); + this->allocateIndices (GlobalIndices, verbose); } const LO lclRow = this->rowMap_->getLocalElement (gblRow); if (lclRow != Tpetra::Details::OrdinalTraits::invalid ()) { @@ -2852,7 +2895,11 @@ namespace Tpetra { "If fillComplete has been called, you must first call resumeFill " "before you may insert indices."); if (! this->indicesAreAllocated ()) { - this->allocateIndices (GlobalIndices); + // Allocating indices takes a while and only needs to be done + // once per MPI process, so it's OK to query TPETRA_VERBOSE. + using ::Tpetra::Details::Behavior; + const bool verbose = Behavior::verbose("CrsGraph"); + this->allocateIndices (GlobalIndices, verbose); } Teuchos::ArrayView gblColInds_av (gblColInds, numGblColInds); @@ -2928,7 +2975,11 @@ namespace Tpetra { ! rowMap_->isNodeLocalElement (lrow), std::runtime_error, "Local row " << lrow << " is not in the row Map on the calling process."); if (! indicesAreAllocated ()) { - allocateIndices (LocalIndices); + // Allocating indices takes a while and only needs to be done + // once per MPI process, so it's OK to query TPETRA_VERBOSE. + using ::Tpetra::Details::Behavior; + const bool verbose = Behavior::verbose("CrsGraph"); + allocateIndices (LocalIndices, verbose); } // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on @@ -3430,8 +3481,10 @@ namespace Tpetra { const Teuchos::RCP& rangeMap, const Teuchos::RCP& params) { + using ::Tpetra::Details::Behavior; const char tfecfFuncName[] = "fillComplete: "; - const bool debug = ::Tpetra::Details::Behavior::debug (); + const bool debug = Behavior::debug("CrsGraph"); + const bool verbose = Behavior::verbose("CrsGraph"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (! isFillActive () || isFillComplete (), std::runtime_error, @@ -3473,10 +3526,10 @@ namespace Tpetra { if (! indicesAreAllocated ()) { if (hasColMap ()) { // We have a column Map, so use local indices. - allocateIndices (LocalIndices); + allocateIndices (LocalIndices, verbose); } else { // We don't have a column Map, so use global indices. - allocateIndices (GlobalIndices); + allocateIndices (GlobalIndices, verbose); } } @@ -4899,6 +4952,7 @@ namespace Tpetra { using row_graph_type = RowGraph; const char tfecfFuncName[] = "copyAndPermute: "; const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph"); + const bool verbose = ::Tpetra::Details::Behavior::verbose ("CrsGraph"); std::unique_ptr prefix; if (debug) { @@ -4926,9 +4980,9 @@ namespace Tpetra { os << *prefix << "Target is StaticProfile; do CRS padding" << endl; std::cerr << os.str (); } - auto padding = - computeCrsPadding (srcRowGraph, numSameIDs, permuteToLIDs, permuteFromLIDs); - this->applyCrsPadding(padding); + auto padding = computeCrsPadding (srcRowGraph, numSameIDs, + permuteToLIDs, permuteFromLIDs, verbose); + this->applyCrsPadding(padding, verbose); // If the source object is actually a CrsGraph, we can use view // mode instead of copy mode to access the entries in each row, @@ -5013,31 +5067,56 @@ namespace Tpetra { template void CrsGraph:: - applyCrsPadding(const Kokkos::UnorderedMap& padding) + applyCrsPadding( + const Kokkos::UnorderedMap& padding, + const bool verbose) { - // const char tfecfFuncName[] = "applyCrsPadding"; + using ::Tpetra::Details::ProfilingRegion; + using Tpetra::Details::padCrsArrays; + using std::endl; using execution_space = typename device_type::execution_space; using row_ptrs_type = typename local_graph_type::row_map_type::non_const_type; using indices_type = t_GlobalOrdinal_1D; using local_indices_type = typename local_graph_type::entries_type::non_const_type; using range_policy = Kokkos::RangePolicy>; - using Tpetra::Details::padCrsArrays; + ProfilingRegion regionCAP ("Tpetra::CrsGraph::applyCrsPadding"); - if (padding.size() == 0) + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("applyCrsPadding"); + std::ostringstream os; + os << *prefix << "padding.size(): " << padding.size() + << ", indicesAreAllocated: " + << (indicesAreAllocated() ? "true" : "false") << endl; + std::cerr << os.str (); + } + if (padding.size() == 0) { return; + } // Assume global indexing we don't have any indices yet if (! this->indicesAreAllocated()) { - allocateIndices(GlobalIndices); + allocateIndices(GlobalIndices, verbose); } // Making copies here because k_rowPtrs_ has a const type. Otherwise, we // would use it directly. + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate row_ptrs_beg: " + << k_rowPtrs_.extent(0) << endl; + std::cerr << os.str(); + } row_ptrs_type row_ptrs_beg("row_ptrs_beg", this->k_rowPtrs_.extent(0)); Kokkos::deep_copy(row_ptrs_beg, this->k_rowPtrs_); const size_t N = (row_ptrs_beg.extent(0) == 0 ? 0 : row_ptrs_beg.extent(0) - 1); + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate row_ptrs_end: " << N << endl; + std::cerr << os.str(); + } row_ptrs_type row_ptrs_end("row_ptrs_end", N); bool refill_num_row_entries = false; @@ -5063,17 +5142,41 @@ namespace Tpetra { } if(this->isGloballyIndexed()) { + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate (copy of) global column indices: " + << k_gblInds1D_.extent(0) << endl; + std::cerr << os.str(); + } indices_type indices("indices", this->k_gblInds1D_.extent(0)); Kokkos::deep_copy(indices, this->k_gblInds1D_); using padding_type = Kokkos::UnorderedMap; padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding); + if (verbose) { + std::ostringstream os; + os << *prefix << "Reassign k_gblInds1D_; old size: " + << k_gblInds1D_.extent(0) << endl; + std::cerr << os.str(); + } this->k_gblInds1D_ = indices; } else { + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate (copy of) local column indices: " + << k_lclInds1D_.extent(0) << endl; + std::cerr << os.str(); + } local_indices_type indices("indices", this->k_lclInds1D_.extent(0)); Kokkos::deep_copy(indices, this->k_lclInds1D_); using padding_type = Kokkos::UnorderedMap; padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding); + if (verbose) { + std::ostringstream os; + os << *prefix << "Reassign k_lclInds1D_; old size: " + << k_lclInds1D_.extent(0) << endl; + std::cerr << os.str(); + } this->k_lclInds1D_ = indices; } @@ -5085,19 +5188,58 @@ namespace Tpetra { } ); } + if (verbose) { + std::ostringstream os; + os << *prefix << "Reassign k_rowPtrs_; old size: " + << k_rowPtrs_.extent(0) << endl; + std::cerr << os.str(); + } this->k_rowPtrs_ = row_ptrs_beg; } + template + std::unique_ptr + CrsGraph:: + createPrefix(const char methodName[]) const + { + int myRank = -1; + auto map = this->getMap(); + if (! map.is_null()) { + auto comm = map->getComm(); + if (! comm.is_null()) { + myRank = comm->getRank(); + } + } + std::ostringstream pfxStrm; + pfxStrm << "Proc " << myRank << ": Tpetra::CrsGraph::" + << methodName << ": "; + return std::unique_ptr( + new std::string(pfxStrm.str())); + } + template Kokkos::UnorderedMap CrsGraph:: computeCrsPadding (const RowGraph& source, const size_t numSameIDs, const Kokkos::DualView& permuteToLIDs, - const Kokkos::DualView& permuteFromLIDs) const + const Kokkos::DualView& permuteFromLIDs, + const bool verbose) const { + using std::endl; using LO = LocalOrdinal; using padding_type = Kokkos::UnorderedMap; + + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("computeCrsPadding(same & permute)"); + std::ostringstream os; + os << *prefix << "{numSameIDs: " << numSameIDs + << ", numPermutes: " << permuteFromLIDs.extent(0) << "}" + << endl; + std::cerr << os.str(); + } + padding_type padding (numSameIDs + permuteFromLIDs.extent (0)); computeCrsPaddingForSameIDs(padding, source, numSameIDs, false); @@ -5244,10 +5386,24 @@ namespace Tpetra { Kokkos::UnorderedMap CrsGraph:: computeCrsPadding (const Kokkos::DualView& importLIDs, - Kokkos::DualView numPacketsPerLID) const + Kokkos::DualView numPacketsPerLID, + const bool verbose) const { + using std::endl; const char tfecfFuncName[] = "computeCrsPadding: "; + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("computeCrsPadding(imports)"); + std::ostringstream os; + os << *prefix << "{importLIDs.extent(0): " + << importLIDs.extent(0) + << ", numPacketsPerLID.extent(0): " + << numPacketsPerLID.extent(0) << "}" + << endl; + std::cerr << os.str(); + } + // Creating padding for each new incoming index Kokkos::fence (); // Make sure device sees changes made by host auto numEnt = static_cast (importLIDs.extent (0)); @@ -5925,29 +6081,24 @@ namespace Tpetra { Distributor& /* distor */, const CombineMode /* combineMode */ ) { + using ::Tpetra::Details::ProfilingRegion; using std::endl; using LO = local_ordinal_type; using GO = global_ordinal_type; const char tfecfFuncName[] = "unpackAndCombine: "; - const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph"); - std::unique_ptr prefix; - if (debug) { - std::ostringstream os; - const int myRank = this->getMap ()->getComm ()->getRank (); - os << "Proc " << myRank << ": Tpetra::CrsGraph::unpackAndCombine: "; - prefix = std::unique_ptr (new std::string (os.str ())); - os << endl; - std::cerr << os.str (); - } + ProfilingRegion regionCGC ("Tpetra::CrsGraph::unpackAndCombine"); + const bool verbose = ::Tpetra::Details::Behavior::verbose ("CrsGraph"); - if (debug) { + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("unpackAndCombine"); std::ostringstream os; - os << *prefix << "Target is StaticProfile; do CRS padding" << endl; + os << *prefix << endl; std::cerr << os.str (); } - auto padding = computeCrsPadding (importLIDs, numPacketsPerLID); - applyCrsPadding(padding); + auto padding = computeCrsPadding (importLIDs, numPacketsPerLID, verbose); + applyCrsPadding(padding, verbose); // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly // reasonable meaning, whether or not the matrix is fill complete. @@ -6029,7 +6180,7 @@ namespace Tpetra { } - if (debug) { + if (verbose) { std::ostringstream os; os << *prefix << "Done" << endl; std::cerr << os.str (); diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp index b12f275ef23e..3677a474bc36 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp @@ -3434,9 +3434,14 @@ namespace Tpetra { checkSizes (const SrcDistObject& source) override; void - applyCrsPadding(const Kokkos::UnorderedMap& padding); + applyCrsPadding( + const Kokkos::UnorderedMap& padding, + const bool verbose); private: + std::unique_ptr + createPrefix(const char methodName[]) const; + void copyAndPermuteImpl (const RowMatrix& source, const size_t numSameIDs, @@ -4150,6 +4155,8 @@ namespace Tpetra { /// \param lg [in] Argument passed into \c /// myGraph_->allocateIndices(), if applicable. /// + /// \param verbose [in] Whether to print verbose debugging output. + /// /// \pre If the graph (that is, staticGraph_) indices are /// already allocated, then gas must be GraphAlreadyAllocated. /// Otherwise, gas must be GraphNotYetAllocated. We only @@ -4157,7 +4164,8 @@ namespace Tpetra { /// /// \pre If the graph indices are not already allocated, then /// the graph must be owned by the matrix. - void allocateValues (ELocalGlobal lg, GraphAllocationStatus gas); + void allocateValues (ELocalGlobal lg, GraphAllocationStatus gas, + const bool verbose); /// \brief Merge duplicate row indices in the given row, along /// with their corresponding values. diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 72be5fc1f150..4e36b65d4497 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -1077,20 +1077,30 @@ namespace Tpetra { template void CrsMatrix:: - allocateValues (ELocalGlobal lg, GraphAllocationStatus gas) + allocateValues (ELocalGlobal lg, GraphAllocationStatus gas, + const bool verbose) { + using ::Tpetra::Details::Behavior; + using ::Tpetra::Details::ProfilingRegion; + using std::endl; const char tfecfFuncName[] = "allocateValues: "; const char suffix[] = " Please report this bug to the Tpetra developers."; + ProfilingRegion region("Tpetra::CrsMatrix::allocateValues"); - ::Tpetra::Details::ProfilingRegion regionAllocateValues - ("Tpetra::CrsMatrix::allocateValues"); -#ifdef HAVE_TPETRA_DEBUG - constexpr bool debug = true; -#else - constexpr bool debug = false; -#endif // HAVE_TPETRA_DEBUG + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("allocateValues"); + std::ostringstream os; + os << *prefix << "{lg: " + << (lg == LocalIndices ? "Local" : "Global") << "Indices" + << ", gas: Graph" + << (gas == GraphAlreadyAllocated ? "Already" : "NotYet") + << "Allocated" << endl; + std::cerr << os.str (); + } + const bool debug = Behavior::debug("CrsMatrix"); if (debug) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (this->staticGraph_.is_null (), std::logic_error, @@ -1135,7 +1145,7 @@ namespace Tpetra { "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix); } try { - this->myGraph_->allocateIndices (lg); + this->myGraph_->allocateIndices (lg, verbose); } catch (std::exception& e) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -1177,6 +1187,13 @@ namespace Tpetra { // Allocate array of (packed???) matrix values. using values_type = typename local_matrix_type::values_type; + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate k_values1D_: Pre " + << k_values1D_.extent(0) << ", post " + << lclTotalNumEntries << endl; + std::cerr << os.str(); + } this->k_values1D_ = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries); } @@ -1740,7 +1757,11 @@ namespace Tpetra { "Local row index " << lclRow << " does not belong to this process."); if (! graph.indicesAreAllocated ()) { - this->allocateValues (LocalIndices, GraphNotYetAllocated); + // We only allocate values at most once per process, so it's OK + // to check TPETRA_VERBOSE here. + using ::Tpetra::Details::Behavior; + const bool verbose = Behavior::verbose("CrsMatrix"); + this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose); } #ifdef HAVE_TPETRA_DEBUG @@ -1814,7 +1835,11 @@ namespace Tpetra { #endif // HAVE_TPETRA_DEBUG if (! graph.indicesAreAllocated ()) { - this->allocateValues (GlobalIndices, GraphNotYetAllocated); + // We only allocate values at most once per process, so it's OK + // to check TPETRA_VERBOSE here. + using ::Tpetra::Details::Behavior; + const bool verbose = Behavior::verbose("CrsMatrix"); + this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose); // mfh 23 Jul 2017: allocateValues invalidates existing // getRowInfo results. Once we get rid of lazy graph // allocation, we'll be able to move the getRowInfo call outside @@ -4472,12 +4497,13 @@ namespace Tpetra { } if (! this->getCrsGraphRef ().indicesAreAllocated ()) { - if (this->hasColMap ()) { - // We have a column Map, so use local indices. - this->allocateValues (LocalIndices, GraphNotYetAllocated); - } else { - // We don't have a column Map, so use global indices. - this->allocateValues (GlobalIndices, GraphNotYetAllocated); + using ::Tpetra::Details::Behavior; + const bool verbose = Behavior::verbose("CrsMatrix"); + if (this->hasColMap ()) { // use local indices + allocateValues(LocalIndices, GraphNotYetAllocated, verbose); + } + else { // no column Map, so use global indices + allocateValues(GlobalIndices, GraphNotYetAllocated, verbose); } } // Global assemble, if we need to. This call only costs a single @@ -6165,21 +6191,53 @@ namespace Tpetra { return (srcRowMat != NULL); } + template + std::unique_ptr + CrsMatrix:: + createPrefix(const char methodName[]) const + { + int myRank = -1; + auto map = this->getMap(); + if (! map.is_null()) { + auto comm = map->getComm(); + if (! comm.is_null()) { + myRank = comm->getRank(); + } + } + std::ostringstream pfxStrm; + pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::" + << methodName << ": "; + return std::unique_ptr( + new std::string(pfxStrm.str())); + } + template void CrsMatrix:: - applyCrsPadding(const Kokkos::UnorderedMap& padding) + applyCrsPadding( + const Kokkos::UnorderedMap& padding, + const bool verbose) { - // const char tfecfFuncName[] = "applyCrsPadding"; + using ::Tpetra::Details::ProfilingRegion; + using Tpetra::Details::padCrsArrays; + using std::endl; using execution_space = typename device_type::execution_space; using row_ptrs_type = typename local_graph_type::row_map_type::non_const_type; using range_policy = Kokkos::RangePolicy>; - using Tpetra::Details::padCrsArrays; - const char tfecfFuncName[] = "applyCrsPadding: "; + ProfilingRegion regionCAP ("Tpetra::CrsMatrix::applyCrsPadding"); + + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("applyCrsPadding"); + std::ostringstream os; + os << *prefix << "padding.size(): " << padding.size() << endl; + std::cerr << os.str (); + } + // NOTE (mfh 29 Jan 2020) This allocates the values array. if (! myGraph_->indicesAreAllocated ()) { - this->allocateValues (GlobalIndices, GraphNotYetAllocated); + this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose); } if (padding.size() == 0) @@ -6188,10 +6246,21 @@ namespace Tpetra { // Making copies here because k_rowPtrs_ has a const type. Otherwise, we // would use it directly. + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate row_ptrs_beg: " + << myGraph_->k_rowPtrs_.extent(0) << endl; + std::cerr << os.str(); + } row_ptrs_type row_ptr_beg("row_ptr_beg", myGraph_->k_rowPtrs_.extent(0)); Kokkos::deep_copy(row_ptr_beg, myGraph_->k_rowPtrs_); const size_t N = (row_ptr_beg.extent(0) == 0 ? 0 : row_ptr_beg.extent(0) - 1); + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate row_ptrs_end: " << N << endl; + std::cerr << os.str(); + } row_ptrs_type row_ptr_end("row_ptr_end", N); bool refill_num_row_entries = false; @@ -6217,15 +6286,32 @@ namespace Tpetra { } using values_type = typename local_matrix_type::values_type; + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate (copy of) values: " << k_values1D_.size() << endl; + std::cerr << os.str(); + } values_type values("values", k_values1D_.size()); Kokkos::deep_copy(values, k_values1D_); if(myGraph_->isGloballyIndexed()) { using indices_type = typename crs_graph_type::t_GlobalOrdinal_1D; + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate (copy of) global column indices: " + << myGraph_->k_gblInds1D_.extent(0) << endl; + std::cerr << os.str(); + } indices_type indices("indices", myGraph_->k_gblInds1D_.extent(0)); Kokkos::deep_copy(indices, myGraph_->k_gblInds1D_); using padding_type = Kokkos::UnorderedMap; padCrsArrays(row_ptr_beg, row_ptr_end, indices, values, padding); + if (verbose) { + std::ostringstream os; + os << *prefix << "Free old myGraph_->k_gblInds1D_: " + << myGraph_->k_gblInds1D_.extent(0) << endl; + std::cerr << os.str(); + } myGraph_->k_gblInds1D_ = indices; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( values.size() != indices.size(), @@ -6234,10 +6320,22 @@ namespace Tpetra { } else { using indices_type = typename local_graph_type::entries_type::non_const_type; + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate (copy of) local column indices: " + << myGraph_->k_lclInds1D_.extent(0) << endl; + std::cerr << os.str(); + } indices_type indices("indices", myGraph_->k_lclInds1D_.extent(0)); Kokkos::deep_copy(indices, myGraph_->k_lclInds1D_); using padding_type = Kokkos::UnorderedMap; padCrsArrays(row_ptr_beg, row_ptr_end, indices, values, padding); + if (verbose) { + std::ostringstream os; + os << *prefix << "Free old myGraph_->k_lclInds1D_: " + << myGraph_->k_lclInds1D_.extent(0) << endl; + std::cerr << os.str(); + } myGraph_->k_lclInds1D_ = indices; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( values.size() != indices.size(), @@ -6253,7 +6351,20 @@ namespace Tpetra { } ); } + + if (verbose) { + std::ostringstream os; + os << *prefix << "Free old myGraph_->k_rowPtrs_: " + << myGraph_->k_rowPtrs_.extent(0) << endl; + std::cerr << os.str(); + } myGraph_->k_rowPtrs_ = row_ptr_beg; + if (verbose) { + std::ostringstream os; + os << *prefix << "Free old k_values1D_: " + << k_values1D_.extent(0) << endl; + std::cerr << os.str(); + } k_values1D_ = values; } @@ -6423,25 +6534,21 @@ namespace Tpetra { const bool verbose = ::Tpetra::Details::Behavior::verbose (); std::unique_ptr prefix; if (verbose) { - int myRank = -1; - auto map = this->getMap (); - if (! map.is_null ()) { - auto comm = map->getComm (); - if (! comm.is_null ()) { - myRank = comm->getRank (); - } - } - prefix = [myRank] () { - std::ostringstream pfxStrm; - pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::copyAndPermute: "; - return std::unique_ptr (new std::string (pfxStrm.str ())); - } (); + prefix = createPrefix("copyAndPermute"); std::ostringstream os; os << *prefix << endl + << *prefix << " numSameIDs: " << numSameIDs << endl + << *prefix << " numPermute: " << permuteToLIDs.extent(0) + << endl << *prefix << " " - << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl + << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") + << endl + << *prefix << " " + << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") + << endl << *prefix << " " - << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl; + << "isStaticGraph: " << (isStaticGraph() ? "true" : "false") + << endl; std::cerr << os.str (); } @@ -6464,12 +6571,12 @@ namespace Tpetra { if (!this->isStaticGraph ()) { auto padding = - this->myGraph_->computeCrsPadding(*srcMat.getGraph(), numSameIDs, permuteToLIDs, permuteFromLIDs); + this->myGraph_->computeCrsPadding(*srcMat.getGraph(), + numSameIDs, permuteToLIDs, permuteFromLIDs, verbose); if (padding.size() > 0) - this->applyCrsPadding(padding); + this->applyCrsPadding(padding, verbose); } - if (verbose) { std::ostringstream os; os << *prefix << "Call copyAndPermuteImpl" << endl; @@ -7359,9 +7466,9 @@ namespace Tpetra { } if (!this->isStaticGraph()) { - auto padding = myGraph_->computeCrsPadding(importLIDs, numPacketsPerLID); + auto padding = myGraph_->computeCrsPadding(importLIDs, numPacketsPerLID, verbose); if (padding.size() > 0) - this->applyCrsPadding(padding); + this->applyCrsPadding(padding, verbose); } if (debug) { From ecb86f35b214f932907276bf6cbfc2b9ca410a4b Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 29 Jan 2020 18:13:46 -0700 Subject: [PATCH 2/8] Tpetra: Add verbose debugging output to padCrsArrays --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 18 +++++- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 17 +++++- .../core/src/Tpetra_Details_crsUtils.hpp | 61 ++++++++++++++++--- ...a_Details_unpackCrsGraphAndCombine_def.hpp | 25 ++++++-- .../core/test/Utils/TpetraUtils_crsUtils.cpp | 9 ++- 5 files changed, 111 insertions(+), 19 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index c9c231ec9dcf..2f951b3028e2 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5090,6 +5090,18 @@ namespace Tpetra { << (indicesAreAllocated() ? "true" : "false") << endl; std::cerr << os.str (); } + const int myRank = ! verbose ? -1 : [&] () { + auto map = this->getMap(); + if (map.is_null()) { + return -1; + } + auto comm = map->getComm(); + if (comm.is_null()) { + return -1; + } + return comm->getRank(); + } (); + if (padding.size() == 0) { return; } @@ -5151,7 +5163,8 @@ namespace Tpetra { indices_type indices("indices", this->k_gblInds1D_.extent(0)); Kokkos::deep_copy(indices, this->k_gblInds1D_); using padding_type = Kokkos::UnorderedMap; - padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding); + padCrsArrays(row_ptrs_beg, + row_ptrs_end, indices, padding, myRank, verbose); if (verbose) { std::ostringstream os; os << *prefix << "Reassign k_gblInds1D_; old size: " @@ -5170,7 +5183,8 @@ namespace Tpetra { local_indices_type indices("indices", this->k_lclInds1D_.extent(0)); Kokkos::deep_copy(indices, this->k_lclInds1D_); using padding_type = Kokkos::UnorderedMap; - padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding); + padCrsArrays(row_ptrs_beg, + row_ptrs_end, indices, padding, myRank, verbose); if (verbose) { std::ostringstream os; os << *prefix << "Reassign k_lclInds1D_; old size: " diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 4e36b65d4497..063f0ede6558 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -6234,6 +6234,17 @@ namespace Tpetra { os << *prefix << "padding.size(): " << padding.size() << endl; std::cerr << os.str (); } + const int myRank = ! verbose ? -1 : [&] () { + auto map = this->getMap(); + if (map.is_null()) { + return -1; + } + auto comm = map->getComm(); + if (comm.is_null()) { + return -1; + } + return comm->getRank(); + } (); // NOTE (mfh 29 Jan 2020) This allocates the values array. if (! myGraph_->indicesAreAllocated ()) { @@ -6305,7 +6316,8 @@ namespace Tpetra { indices_type indices("indices", myGraph_->k_gblInds1D_.extent(0)); Kokkos::deep_copy(indices, myGraph_->k_gblInds1D_); using padding_type = Kokkos::UnorderedMap; - padCrsArrays(row_ptr_beg, row_ptr_end, indices, values, padding); + padCrsArrays(row_ptr_beg, + row_ptr_end, indices, values, padding, myRank, verbose); if (verbose) { std::ostringstream os; os << *prefix << "Free old myGraph_->k_gblInds1D_: " @@ -6329,7 +6341,8 @@ namespace Tpetra { indices_type indices("indices", myGraph_->k_lclInds1D_.extent(0)); Kokkos::deep_copy(indices, myGraph_->k_lclInds1D_); using padding_type = Kokkos::UnorderedMap; - padCrsArrays(row_ptr_beg, row_ptr_end, indices, values, padding); + padCrsArrays(row_ptr_beg, + row_ptr_end, indices, values, padding, myRank, verbose); if (verbose) { std::ostringstream os; os << *prefix << "Free old myGraph_->k_lclInds1D_: " diff --git a/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp b/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp index 73fb2d9697b5..51f006f0a5a7 100644 --- a/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -47,6 +45,8 @@ #include "TpetraCore_config.h" #include "Kokkos_Core.hpp" #include "Kokkos_UnorderedMap.hpp" +#include +#include /// \file Tpetra_Details_crsUtils.hpp /// \brief Functions for manipulating CRS arrays @@ -74,8 +74,19 @@ pad_crs_arrays( const RowPtr& row_ptr_end, Indices& indices, Values& values, - const Padding& padding) + const Padding& padding, + const int my_rank, + const bool verbose) { + using std::endl; + std::unique_ptr prefix; + if (verbose) { + std::ostringstream os; + os << "Proc " << my_rank << ": Tpetra::...::pad_crs_arrays: "; + prefix = std::unique_ptr(new std::string(os.str())); + os << "padding.size()=" << padding.size() << endl; + std::cerr << os.str(); + } if (padding.size() == 0 || row_ptr_beg.size() == 0) { // Nothing to do @@ -87,6 +98,11 @@ pad_crs_arrays( // Determine if the indices array is large enough auto num_row = row_ptr_beg.size() - 1; auto policy = Kokkos::RangePolicy(0, num_row); + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate entries_this_row: " << num_row << endl; + std::cerr << os.str(); + } RowPtr entries_this_row("entries_this_row", num_row); Kokkos::deep_copy(entries_this_row, 0); size_t additional_size_needed = 0; @@ -116,7 +132,20 @@ pad_crs_arrays( using vals_value_type = typename Values::non_const_value_type; // The indices array must be resized and the row_ptr arrays shuffled + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate indices_new: " + << (indices.size() + additional_size_needed) << endl; + std::cerr << os.str(); + } auto indices_new = uninitialized_view("ind new", indices.size()+additional_size_needed); + if (verbose) { + const size_t new_size = pad_values ? + size_t(values.size()) + additional_size_needed : size_t(0); + std::ostringstream os; + os << *prefix << "Allocate values_new: " << new_size << endl; + std::cerr << os.str(); + } auto values_new = uninitialized_view("val new", pad_values ? values.size()+additional_size_needed : 0); Kokkos::deep_copy(values_new, vals_value_type(0.0)); @@ -174,7 +203,19 @@ pad_crs_arrays( } } + if (verbose) { + std::ostringstream os; + os << *prefix << "Assign to indices: old=" << indices.size() + << ", new=" << indices_new.size() << endl; + std::cerr << os.str(); + } indices = indices_new; + if (verbose) { + std::ostringstream os; + os << *prefix << "Assign to values: old=" << values.size() + << ", new=" << values_new.size() << endl; + std::cerr << os.str(); + } values = values_new; } @@ -303,12 +344,15 @@ padCrsArrays( const RowPtr& rowPtrBeg, const RowPtr& rowPtrEnd, Indices& indices, - const Padding& padding) + const Padding& padding, + const int my_rank, + const bool verbose) { using impl::pad_crs_arrays; // send empty values array Indices values; - pad_crs_arrays(rowPtrBeg, rowPtrEnd, indices, values, padding); + pad_crs_arrays(rowPtrBeg, + rowPtrEnd, indices, values, padding, my_rank, verbose); } template @@ -318,10 +362,13 @@ padCrsArrays( const RowPtr& rowPtrEnd, Indices& indices, Values& values, - const Padding& padding) + const Padding& padding, + const int my_rank, + const bool verbose) { using impl::pad_crs_arrays; - pad_crs_arrays(rowPtrBeg, rowPtrEnd, indices, values, padding); + pad_crs_arrays(rowPtrBeg, + rowPtrEnd, indices, values, padding, my_rank, verbose); } /// \brief Insert new indices in to current list of indices diff --git a/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_def.hpp b/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_def.hpp index 78da877aaaa3..c7b151ff40d3 100644 --- a/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_def.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_def.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -337,9 +335,10 @@ unpackAndCombine const Kokkos::View& imports, const Kokkos::View& num_packets_per_lid, const Kokkos::View& import_lids, - const bool unpack_pids) + const bool unpack_pids, + const int myRank, + const bool verbose) { - using ImportLidsView = Kokkos::View; using NumPacketsView = @@ -366,7 +365,8 @@ unpackAndCombine auto padding = computeCrsPadding (num_packets_per_lid, import_lids, unpack_pids); - padCrsArrays (row_ptrs_beg, row_ptrs_end, indices, padding); + padCrsArrays (row_ptrs_beg, + row_ptrs_end, indices, padding, myRank, verbose); // Get the offsets Kokkos::View offsets("offsets", num_import_lids+1); @@ -949,9 +949,22 @@ unpackCrsGraphAndCombine( } // Now do the actual unpack! + const bool verbose = ::Tpetra::Details::Behavior::verbose("CrsGraph"); + const int myRank = ! verbose ? -1 : [&] () { + auto map = graph.getMap(); + if (map.is_null()) { + return -1; + } + auto comm = map->getComm(); + if (comm.is_null()) { + return -1; + } + return comm->getRank(); + } (); + unpackAndCombine (row_ptrs_beg, row_ptrs_end, indices, imports_d, - num_packets_per_lid_d, import_lids_d, false); + num_packets_per_lid_d, import_lids_d, false, myRank, verbose); // mfh Later, permit graph to be locally indexed, and check whether // incoming column indices are in the column Map. If not, error. diff --git a/packages/tpetra/core/test/Utils/TpetraUtils_crsUtils.cpp b/packages/tpetra/core/test/Utils/TpetraUtils_crsUtils.cpp index 6b60258dd9e2..d3c43d0f84a6 100644 --- a/packages/tpetra/core/test/Utils/TpetraUtils_crsUtils.cpp +++ b/packages/tpetra/core/test/Utils/TpetraUtils_crsUtils.cpp @@ -117,7 +117,9 @@ TEUCHOS_UNIT_TEST(CrsGraph, ResizeRowPointersAndIndices_1) execution_space().fence(); TEST_ASSERT(!padding.failed_insert()); - padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding); + const int myRank = 0; + const bool verbose = false; + padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding, myRank, verbose); TEST_ASSERT(indices.size() == static_cast(num_indices + num_extra)); { @@ -190,7 +192,10 @@ TEUCHOS_UNIT_TEST(CrsGraph, ResizeRowPointersAndIndices_2) } execution_space().fence(); TEST_ASSERT(!padding.failed_insert()); - padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding); + + const int myRank = 0; + const bool verbose = false; + padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding, myRank, verbose); // Check row offsets TEST_ASSERT(row_ptrs_beg(0) == 0); From 08d6a854ca2e459981e7f23d64c7b008b66a5653 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 29 Jan 2020 22:05:01 -0700 Subject: [PATCH 3/8] Tpetra::CrsMatrix: Add more verbose output on allocation --- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 063f0ede6558..953c9938bb0a 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -314,9 +314,20 @@ namespace Tpetra { fillComplete_ (false), frobNorm_ (-STM::one ()) { + using std::endl; typedef typename local_matrix_type::values_type values_type; const char tfecfFuncName[] = "CrsMatrix(RCP[, " "RCP]): "; + const bool verbose = Details::Behavior::verbose("CrsMatrix"); + + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("CrsMatrix(CrsGraph,params)"); + std::ostringstream os; + os << *prefix << endl; + std::cerr << os.str (); + } + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (graph.is_null (), std::runtime_error, "Input graph is null."); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -336,6 +347,11 @@ namespace Tpetra { const size_t numCols = graph->getColMap ()->getNodeNumElements (); auto lclGraph = graph->getLocalGraph (); const size_t numEnt = lclGraph.entries.extent (0); + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate values: " << numEnt << endl; + std::cerr << os.str (); + } values_type val ("Tpetra::CrsMatrix::val", numEnt); auto lclMat = std::make_shared @@ -345,6 +361,13 @@ namespace Tpetra { // FIXME (22 Jun 2016) I would very much like to get rid of // k_values1D_ at some point. I find it confusing to have all // these extra references lying around. + if (verbose) { + std::ostringstream os; + os << *prefix << "Assign k_values1D_: old=" + << k_values1D_.extent(0) << ", new=" + << lclMat->values.extent(0) << endl; + std::cerr << os.str (); + } k_values1D_ = lclMat->values; checkInternalState (); From f6d2b371fd5d7a31a857d1fcd15e1cc1cbf88e2d Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 29 Jan 2020 22:17:25 -0700 Subject: [PATCH 4/8] Tpetra::CrsMatrix::fillLocalGraphAndMatrix: Add verbose output Add verbose output for Kokkos::View allocation & assignment. --- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 84 ++++++++++++++++++- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 953c9938bb0a..87432cc6357a 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -1275,6 +1275,7 @@ namespace Tpetra { using Teuchos::null; using Teuchos::RCP; using Teuchos::rcp; + using std::endl; using row_map_type = typename local_matrix_type::row_map_type; using lclinds_1d_type = typename Graph::local_graph_type::entries_type::non_const_type; using values_type = typename local_matrix_type::values_type; @@ -1285,7 +1286,16 @@ namespace Tpetra { "fillComplete or expertStaticFillComplete): "; const char suffix[] = " Please report this bug to the Tpetra developers."; - const bool debug = Details::Behavior::debug ("CrsGraph"); + const bool debug = Details::Behavior::debug("CrsMatrix"); + const bool verbose = Details::Behavior::verbose("CrsMatrix"); + + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("fillLocalGraphAndMatrix"); + std::ostringstream os; + os << *prefix << endl; + std::cerr << os.str (); + } if (debug) { // fillComplete() only calls fillLocalGraphAndMatrix() if the @@ -1347,7 +1357,16 @@ namespace Tpetra { << numOffsets << ") = " << valToCheck << "."); } - if (myGraph_->getNodeNumEntries () != myGraph_->getNodeAllocationSize ()) { + if (myGraph_->getNodeNumEntries() != + myGraph_->getNodeAllocationSize()) { + if (verbose) { + std::ostringstream os; + const auto numEnt = myGraph_->getNodeNumEntries(); + const auto allocSize = myGraph_->getNodeAllocationSize(); + os << *prefix << "Unpacked 1-D storage: numEnt=" << numEnt + << ", allocSize=" << allocSize << endl; + std::cerr << os.str (); + } // The matrix's current 1-D storage is "unpacked." This means // the row offsets may differ from what the final row offsets // should be. This could happen, for example, if the user @@ -1388,6 +1407,12 @@ namespace Tpetra { // Allocate the packed row offsets array. We use a nonconst // temporary (packedRowOffsets) here, because k_ptrs is // const. We will assign packedRowOffsets to k_ptrs below. + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate packed row offsets: " + << (lclNumRows+1) << endl; + std::cerr << os.str (); + } typename row_map_type::non_const_type packedRowOffsets ("Tpetra::CrsGraph::ptr", lclNumRows + 1); typename row_entries_type::const_type numRowEnt_h = @@ -1419,7 +1444,19 @@ namespace Tpetra { } // Allocate the arrays of packed column indices and values. + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate packed local column indices: " + << lclTotalNumEntries << endl; + std::cerr << os.str (); + } k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries); + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate packed values: " + << lclTotalNumEntries << endl; + std::cerr << os.str (); + } k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries); // curRowOffsets (myGraph_->k_rowPtrs_) (???), k_lclInds1D_, @@ -1476,6 +1513,14 @@ namespace Tpetra { } } else { // We don't have to pack, so just set the pointers. + if (verbose) { + std::ostringstream os; + os << *prefix << "Storage already packed: k_rowPtrs_: " + << myGraph_->k_rowPtrs_.extent(0) << ", k_lclInds1D_: " + << myGraph_->k_lclInds1D_.extent(0) << ", k_values1D_: " + << k_values1D_.extent(0) << endl; + std::cerr << os.str(); + } k_ptrs_const = myGraph_->k_rowPtrs_; k_inds = myGraph_->k_lclInds1D_; k_vals = this->k_values1D_; @@ -1551,11 +1596,38 @@ namespace Tpetra { // Free graph data structures that are only needed for // unpacked 1-D storage. + if (verbose) { + std::ostringstream os; + os << *prefix << "Optimizing storage: free k_numRowEntries_: " + << myGraph_->k_numRowEntries_.extent(0) << endl; + std::cerr << os.str(); + } myGraph_->k_numRowEntries_ = row_entries_type (); // Keep the new 1-D packed allocations. + if (verbose) { + std::ostringstream os; + os << *prefix << "Assign k_rowPtrs_: old=" + << myGraph_->k_rowPtrs_.extent(0) << ", new=" + << k_ptrs_const.extent(0) << endl; + std::cerr << os.str(); + } myGraph_->k_rowPtrs_ = k_ptrs_const; + if (verbose) { + std::ostringstream os; + os << *prefix << "Assign k_lclInds1D_: old=" + << myGraph_->k_lclInds1D_.extent(0) << ", new=" + << k_inds.extent(0) << endl; + std::cerr << os.str(); + } myGraph_->k_lclInds1D_ = k_inds; + if (verbose) { + std::ostringstream os; + os << *prefix << "Assign k_values1D_: old=" + << k_values1D_.extent(0) << ", new=" + << k_vals.extent(0) << endl; + std::cerr << os.str(); + } this->k_values1D_ = k_vals; // Whatever graph was before, it's StaticProfile now. @@ -1563,6 +1635,14 @@ namespace Tpetra { myGraph_->storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED; this->storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED; } + else { + if (verbose) { + std::ostringstream os; + os << *prefix << "User requestetd NOT to optimize storage" + << endl; + std::cerr << os.str(); + } + } // Make the local graph, using the arrays of row offsets and // column indices that we built above. The local graph should be From 36aa03c7cd4c506e1427491fb5ff8b7128f88e6f Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 29 Jan 2020 22:24:20 -0700 Subject: [PATCH 5/8] Tpetra::CrsMatrix::fillLocalMatrix: Add verbose output --- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 47 ++++++++++++++++--- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 87432cc6357a..1b2f6beffef7 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -1674,6 +1674,7 @@ namespace Tpetra { using Teuchos::null; using Teuchos::RCP; using Teuchos::rcp; + using std::endl; using row_map_type = typename Graph::local_graph_type::row_map_type; using non_const_row_map_type = typename row_map_type::non_const_type; using values_type = typename local_matrix_type::values_type; @@ -1681,9 +1682,17 @@ namespace Tpetra { // const char tfecfFuncName[] = "fillLocalMatrix (called from fillComplete): "; #endif // HAVE_TPETRA_DEBUG ProfilingRegion regionFLM ("Tpetra::CrsMatrix::fillLocalMatrix"); - const size_t lclNumRows = getNodeNumRows(); + const bool verbose = Details::Behavior::verbose("CrsMatrix"); + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("fillLocalMatrix"); + std::ostringstream os; + os << *prefix << "lclNumRows: " << lclNumRows << endl; + std::cerr << os.str (); + } + // The goals of this routine are first, to allocate and fill // packed 1-D storage (see below for an explanation) in the vals // array, and second, to give vals to the local matrix and @@ -1705,8 +1714,9 @@ namespace Tpetra { // optimized storage by default. bool requestOptimizedStorage = true; const bool default_OptimizeStorage = - ! isStaticGraph () || staticGraph_->isStorageOptimized (); - if (! params.is_null () && ! params->get ("Optimize Storage", default_OptimizeStorage)) { + ! isStaticGraph() || staticGraph_->isStorageOptimized(); + if (! params.is_null() && + ! params->get("Optimize Storage", default_OptimizeStorage)) { requestOptimizedStorage = false; } // If we're not allowed to change a static graph, then we can't @@ -1714,7 +1724,8 @@ namespace Tpetra { // the graph's storage isn't already optimized, we can't optimize // the matrix's storage either. Check and give warning, as // appropriate. - if (! staticGraph_->isStorageOptimized () && requestOptimizedStorage) { + if (! staticGraph_->isStorageOptimized () && + requestOptimizedStorage) { TPETRA_ABUSE_WARNING (true, std::runtime_error, "You requested optimized storage " "by setting the \"Optimize Storage\" flag to \"true\" in " @@ -1745,8 +1756,21 @@ namespace Tpetra { // structure of the sparse matrix does not change between linear // solves. if (nodeNumEntries != nodeNumAllocated) { + if (verbose) { + std::ostringstream os; + os << *prefix << "Unpacked 1-D storage: numEnt=" + << nodeNumEntries << ", allocSize=" << nodeNumAllocated + << endl; + std::cerr << os.str(); + } // We have to pack the 1-D storage, since the user didn't fill // up all requested storage. + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate packed row offsets: " + << (lclNumRows+1) << endl; + std::cerr << os.str(); + } non_const_row_map_type tmpk_ptrs ("Tpetra::CrsGraph::ptr", lclNumRows+1); // Total number of entries in the matrix on the calling @@ -1764,6 +1788,12 @@ namespace Tpetra { // Allocate the "packed" values array. // It has exactly the right number of entries. + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate packed values: " + << lclTotalNumEntries << endl; + std::cerr << os.str (); + } k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries); // Pack k_values1D_ into k_vals. We will replace k_values1D_ below. @@ -1776,6 +1806,12 @@ namespace Tpetra { range_type (0, lclNumRows), valsPacker); } else { // We don't have to pack, so just set the pointer. + if (verbose) { + std::ostringstream os; + os << *prefix << "Storage already packed: " + << "k_values1D_: " << k_values1D_.extent(0) << endl; + std::cerr << os.str(); + } k_vals = k_values1D_; } @@ -1862,8 +1898,7 @@ namespace Tpetra { if (! graph.indicesAreAllocated ()) { // We only allocate values at most once per process, so it's OK // to check TPETRA_VERBOSE here. - using ::Tpetra::Details::Behavior; - const bool verbose = Behavior::verbose("CrsMatrix"); + const bool verbose = Details::Behavior::verbose("CrsMatrix"); this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose); } From 7194ccd6109cf7ab0aaf1d158d642aa13d39fd6a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 29 Jan 2020 22:39:14 -0700 Subject: [PATCH 6/8] Tpetra::CrsMatrix::globalAssemble: Add verbose debugging output @trilinos/tpetra This is useful for determining whether users are doing nonlocal inserts. --- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 1b2f6beffef7..82bdeb835a6a 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -4326,6 +4326,7 @@ namespace Tpetra { using Teuchos::REDUCE_MAX; using Teuchos::REDUCE_MIN; using Teuchos::reduceAll; + using std::endl; typedef CrsMatrix crs_matrix_type; //typedef LocalOrdinal LO; typedef GlobalOrdinal GO; @@ -4333,6 +4334,15 @@ namespace Tpetra { const char tfecfFuncName[] = "globalAssemble: "; // for exception macro ProfilingRegion regionGlobalAssemble ("Tpetra::CrsMatrix::globalAssemble"); + const bool verbose = Details::Behavior::verbose("CrsMatrix"); + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("globalAssemble"); + std::ostringstream os; + os << *prefix << "nonlocals_.size()=" << nonlocals_.size() + << endl; + std::cerr << os.str(); + } RCP > comm = getComm (); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -4423,6 +4433,11 @@ namespace Tpetra { // to nonlocal rows. We may use StaticProfile, since we have // exact counts of the number of entries in each nonlocal row. + if (verbose) { + std::ostringstream os; + os << *prefix << "Create nonlocal matrix" << endl; + std::cerr << os.str(); + } RCP nonlocalMatrix = rcp (new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow (), StaticProfile)); @@ -4453,14 +4468,29 @@ namespace Tpetra { int isLocallyComplete = 1; // true by default if (origRowMapIsOneToOne) { + if (verbose) { + std::ostringstream os; + os << *prefix << "Original row Map is 1-to-1" << endl; + std::cerr << os.str(); + } export_type exportToOrig (nonlocalRowMap, origRowMap); if (! exportToOrig.isLocallyComplete ()) { isLocallyComplete = 0; } + if (verbose) { + std::ostringstream os; + os << *prefix << "doExport from nonlocalMatrix" << endl; + std::cerr << os.str(); + } this->doExport (*nonlocalMatrix, exportToOrig, Tpetra::ADD); // We're done at this point! } else { + if (verbose) { + std::ostringstream os; + os << *prefix << "Original row Map is NOT 1-to-1" << endl; + std::cerr << os.str(); + } // If you ask a Map whether it is one to one, it does some // communication and stashes intermediate results for later use // by createOneToOne. Thus, calling createOneToOne doesn't cost @@ -4475,15 +4505,32 @@ namespace Tpetra { // // TODO (mfh 09 Sep 2016, 12 Sep 2016) Estimate # entries in // each row, to avoid reallocation during the Export operation. + if (verbose) { + std::ostringstream os; + os << *prefix << "Create & doExport into 1-to-1 matrix" + << endl; + std::cerr << os.str(); + } crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0); // Export from matrix of nonlocals into the temp one-to-one matrix. - oneToOneMatrix.doExport (*nonlocalMatrix, exportToOneToOne, Tpetra::ADD); + oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne, + Tpetra::ADD); // We don't need the matrix of nonlocals anymore, so get rid of // it, to keep the memory high-water mark down. + if (verbose) { + std::ostringstream os; + os << *prefix << "Free nonlocalMatrix" << endl; + std::cerr << os.str(); + } nonlocalMatrix = Teuchos::null; // Import from the one-to-one matrix to the original matrix. + if (verbose) { + std::ostringstream os; + os << *prefix << "doImport from 1-to-1 matrix" << endl; + std::cerr << os.str(); + } import_type importToOrig (oneToOneRowMap, origRowMap); this->doImport (oneToOneMatrix, importToOrig, Tpetra::ADD); } @@ -4492,6 +4539,11 @@ namespace Tpetra { // committed side effects to *this. The standard idiom for // clearing a Container like std::map, is to swap it with an empty // Container and let the swapped Container fall out of scope. + if (verbose) { + std::ostringstream os; + os << *prefix << "Free nonlocals_ (std::map)" << endl; + std::cerr << os.str(); + } decltype (nonlocals_) newNonlocals; std::swap (nonlocals_, newNonlocals); From efb9a23cb0b564c74cb7cbd1db9ce356b27daaa9 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 29 Jan 2020 22:51:10 -0700 Subject: [PATCH 7/8] Tpetra::CrsGraph::makeIndicesLocal: Add verbose output Also add verbose output to Tpetra::CrsMatrix::fillComplete. --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 7 +++- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 32 ++++++++++++++++--- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 16 +++++++--- 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 98d88d678dff..348b0fb9405a 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -1723,6 +1723,10 @@ namespace Tpetra { /// \pre The graph has a column Map. /// \post The graph is locally indexed. /// + /// \param verbose [in] Whether to print verbose debugging output. + /// This exists because CrsMatrix may want to control output + /// independently of the CrsGraph that it owns. + /// /// \return Error code and error string. See below. /// /// First return value is the number of column indices on this @@ -1734,7 +1738,8 @@ namespace Tpetra { /// /// Second return value is a human-readable error string. If the /// first return value is zero, then the string may be empty. - std::pair makeIndicesLocal (); + std::pair + makeIndicesLocal(const bool verbose=false); /// \brief Make the Import and Export objects, if needed. /// diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 2f951b3028e2..d25243d64b57 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -3568,7 +3568,7 @@ namespace Tpetra { // Make indices local, if they aren't already. // The method doesn't do any work if the indices are already local. const std::pair makeIndicesLocalResult = - this->makeIndicesLocal (); + this->makeIndicesLocal(verbose); if (debug) { // In debug mode, print error output on all processes using ::Tpetra::Details::gathervPrint; using Teuchos::RCP; @@ -4469,7 +4469,7 @@ namespace Tpetra { template std::pair CrsGraph:: - makeIndicesLocal () + makeIndicesLocal (const bool verbose) { using ::Tpetra::Details::ProfilingRegion; using Teuchos::arcp; @@ -4488,6 +4488,14 @@ namespace Tpetra { const char tfecfFuncName[] = "makeIndicesLocal: "; ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal"); + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("makeIndicesLocal"); + std::ostringstream os; + os << *prefix << "lclNumRows: " << getNodeNumRows() << endl; + std::cerr << os.str(); + } + // These are somewhat global properties, so it's safe to have // exception checks for them, rather than returning an error code. TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -4557,6 +4565,12 @@ namespace Tpetra { // large allocation typically, so the overhead of creating // an std::string is minor. const std::string label ("Tpetra::CrsGraph::lclind"); + if (verbose) { + std::ostringstream os; + os << *prefix << "(Re)allocate k_lclInds1D_: old=" + << k_lclInds1D_.extent(0) << ", new=" << numEnt << endl; + std::cerr << os.str(); + } k_lclInds1D_ = lcl_col_inds_type (view_alloc (label, WithoutInitializing), numEnt); } @@ -4569,6 +4583,12 @@ namespace Tpetra { // following Kokkos issue: // // https://github.com/kokkos/kokkos/issues/442 + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate device mirror k_numRowEnt: " + << h_numRowEnt.extent(0) << endl; + std::cerr << os.str(); + } auto k_numRowEnt = Kokkos::create_mirror_view (device_type (), h_numRowEnt); using ::Tpetra::Details::convertColumnIndicesFromGlobalToLocal; @@ -4600,6 +4620,12 @@ namespace Tpetra { // We've converted column indices from global to local, so we // can deallocate the global column indices (which we know are // in 1-D storage, because the graph has static profile). + if (verbose) { + std::ostringstream os; + os << *prefix << "Free k_gblInds1D_: " + << k_gblInds1D_.extent(0) << endl; + std::cerr << os.str(); + } k_gblInds1D_ = gbl_col_inds_type (); } // globallyIndexed() && lclNumRows > 0 @@ -4611,7 +4637,6 @@ namespace Tpetra { return std::make_pair (lclNumErrs, errStrm.str ()); } - template void CrsGraph:: @@ -4712,7 +4737,6 @@ namespace Tpetra { } } - template void CrsGraph:: diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 82bdeb835a6a..5b5a614cc97e 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -4648,8 +4648,18 @@ namespace Tpetra { using Teuchos::ArrayRCP; using Teuchos::RCP; using Teuchos::rcp; + using std::endl; const char tfecfFuncName[] = "fillComplete: "; - ProfilingRegion regionFillComplete ("Tpetra::CrsMatrix::fillComplete"); + ProfilingRegion regionFillComplete + ("Tpetra::CrsMatrix::fillComplete"); + const bool verbose = Details::Behavior::verbose("CrsMatrix"); + std::unique_ptr prefix; + if (verbose) { + prefix = createPrefix("fillComplete(dom,ran,p)"); + std::ostringstream os; + os << *prefix << endl; + std::cerr << os.str (); + } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (! this->isFillActive () || this->isFillComplete (), std::runtime_error, @@ -4687,8 +4697,6 @@ namespace Tpetra { } if (! this->getCrsGraphRef ().indicesAreAllocated ()) { - using ::Tpetra::Details::Behavior; - const bool verbose = Behavior::verbose("CrsMatrix"); if (this->hasColMap ()) { // use local indices allocateValues(LocalIndices, GraphNotYetAllocated, verbose); } @@ -4775,7 +4783,7 @@ namespace Tpetra { // Make indices local, if necessary. The method won't do // anything if the graph is already locally indexed. const std::pair makeIndicesLocalResult = - this->myGraph_->makeIndicesLocal (); + this->myGraph_->makeIndicesLocal(verbose); // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along // the error state to makeImportExport or // computeGlobalConstants, which may do all-reduces and thus may From a5dc4f4a500f98d46b5cb5f694061c34bb7e1edc Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 30 Jan 2020 09:33:39 -0700 Subject: [PATCH 8/8] Tpetra::CrsGraph: Fix build errors relating to unique_ptr --- packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 348b0fb9405a..95e5eb4e2a42 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -66,6 +66,7 @@ #include "Teuchos_ParameterListAcceptorDefaultBase.hpp" #include // std::function +#include namespace Tpetra {