From f8ba0e401862b480ab4984814ec5181667e38d83 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Mon, 24 Jun 2024 13:36:38 -0400 Subject: [PATCH 1/4] Update the `volume(DistArray)` function to support sparse arrays as well as arrays with tensor-of-tensor tiles. --- src/TiledArray/dist_array.h | 78 +++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index c6d6cddb79..c0c9aac78e 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -869,9 +869,10 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// first minimally contains the same number of elements as /// the tile. /// \throw TiledArray::Exception if the tile is already initialized. - template )&&detail:: - is_input_iterator::value>> + template < + typename Integer, typename InIter, + typename = std::enable_if_t<(std::is_integral_v) && + detail::is_input_iterator::value>> typename std::enable_if::value>::type set( const std::initializer_list& i, InIter first) { set>(i, first); @@ -964,10 +965,9 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// \throw TiledArray::Exception if index \c i has the wrong rank. Strong /// throw guarantee. /// \throw TiledArray::Exception if tile \c i is already set. - template < - typename Index, typename Value, - typename = std::enable_if_t< - (std::is_integral_v)&&is_value_or_future_to_value_v>> + template ) && + is_value_or_future_to_value_v>> void set(const std::initializer_list& i, Value&& v) { set>(i, std::forward(v)); } @@ -1459,7 +1459,7 @@ class DistArray : public madness::archive::ParallelSerializableObject { shape() & typeid(pmap().get()).hash_code(); int64_t count = 0; for (auto it = begin(); it != end(); ++it) ++count; - ar& count; + ar & count; for (auto it = begin(); it != end(); ++it) ar & it->get(); } @@ -1476,14 +1476,14 @@ class DistArray : public madness::archive::ParallelSerializableObject { auto& world = TiledArray::get_default_world(); std::size_t typeid_hash = 0l; - ar& typeid_hash; + ar & typeid_hash; if (typeid_hash != typeid(*this).hash_code()) TA_EXCEPTION( "DistArray::serialize: source DistArray type != this DistArray type"); ProcessID world_size = -1; ProcessID world_rank = -1; - ar& world_size& world_rank; + ar & world_size & world_rank; if (world_size != world.size() || world_rank != world.rank()) TA_EXCEPTION( "DistArray::serialize: source DistArray world != this DistArray " @@ -1491,13 +1491,13 @@ class DistArray : public madness::archive::ParallelSerializableObject { trange_type trange; shape_type shape; - ar& trange& shape; + ar & trange & shape; // use default pmap, ensure it's the same pmap used to serialize auto volume = trange.tiles_range().volume(); auto pmap = detail::policy_t::default_pmap(world, volume); size_t pmap_hash_code = 0; - ar& pmap_hash_code; + ar & pmap_hash_code; if (pmap_hash_code != typeid(pmap.get()).hash_code()) TA_EXCEPTION( "DistArray::serialize: source DistArray pmap != this DistArray pmap"); @@ -1505,10 +1505,10 @@ class DistArray : public madness::archive::ParallelSerializableObject { new impl_type(world, std::move(trange), std::move(shape), pmap)); int64_t count = 0; - ar& count; + ar & count; for (auto it = begin(); it != end(); ++it, --count) { Tile tile; - ar& tile; + ar & tile; this->set(it.ordinal(), std::move(tile)); } if (count != 0) @@ -1541,27 +1541,27 @@ class DistArray : public madness::archive::ParallelSerializableObject { // make sure source data matches the expected type // TODO would be nice to be able to convert the data upon reading std::size_t typeid_hash = 0l; - localar& typeid_hash; + localar & typeid_hash; if (typeid_hash != typeid(*this).hash_code()) TA_EXCEPTION( "DistArray::load: source DistArray type != this DistArray type"); // make sure same number of clients for every I/O node int num_io_clients = 0; - localar& num_io_clients; + localar & num_io_clients; if (num_io_clients != ar.num_io_clients()) TA_EXCEPTION("DistArray::load: invalid parallel archive"); trange_type trange; shape_type shape; - localar& trange& shape; + localar & trange & shape; // send trange and shape to every client for (ProcessID p = 0; p < world.size(); ++p) { if (p != me && ar.io_node(p) == me) { world.mpi.Send(int(1), p, tag); // Tell client to expect the data madness::archive::MPIOutputArchive dest(world, p); - dest& trange& shape; + dest & trange & shape; dest.flush(); } } @@ -1573,13 +1573,13 @@ class DistArray : public madness::archive::ParallelSerializableObject { new impl_type(world, std::move(trange), std::move(shape), pmap)); int64_t count = 0; - localar& count; + localar & count; for (size_t ord = 0; ord != volume; ++ord) { if (!is_zero(ord)) { auto owner_rank = pmap->owner(ord); if (ar.io_node(owner_rank) == me) { Tile tile; - localar& tile; + localar & tile; this->set(ord, std::move(tile)); --count; } @@ -1598,7 +1598,7 @@ class DistArray : public madness::archive::ParallelSerializableObject { world.mpi.Recv(flag, p, tag); TA_ASSERT(flag == 1); madness::archive::MPIInputArchive source(world, p); - source& trange& shape; + source & trange & shape; // use default pmap auto volume = trange.tiles_range().volume(); @@ -1643,7 +1643,7 @@ class DistArray : public madness::archive::ParallelSerializableObject { } } } - localar& count; + localar & count; for (size_t ord = 0; ord != volume; ++ord) { if (!is_zero(ord)) { auto owner_rank = pmap()->owner(ord); @@ -1857,12 +1857,32 @@ auto rank(const DistArray& a) { return a.trange().tiles_range().rank(); } +/// +/// \brief Get the total elements in the non-zero tiles of an array. +/// For tensor-of-tensor tiles, the total is the sum of the elements +/// of the inner tensors in non-zero tiles. +/// template -size_t volume(const DistArray& a) { - // this is the number of tiles - if (a.size() > 0) // assuming dense shape - return a.trange().elements_range().volume(); - return 0; +size_t volume(const DistArray& array) { + std::atomic vol = 0; + + auto local_vol = [&vol](Tile const& in_tile) { + if constexpr (detail::is_tensor_of_tensor_v) { + in_tile.unary([&vol](auto const& el) { vol += el.total_size(); }); + } else + vol += in_tile.total_size(); + }; + + for (auto&& tix : array.tiles_range()) + if (!array.is_zero(tix) && array.is_local(tix)) + array.world().taskq.add(std::move(local_vol), array.find_local(tix).get()); + + array.world().gop.fence(); + + size_t vol_ = vol; + array.world().gop.sum(&vol_, 1); + + return vol_; } template @@ -2002,13 +2022,13 @@ template void save(const TiledArray::DistArray& x, const std::string name) { archive::ParallelOutputArchive<> ar2(x.world(), name.c_str(), 1); - ar2& x; + ar2 & x; } template void load(TiledArray::DistArray& x, const std::string name) { archive::ParallelInputArchive<> ar2(x.world(), name.c_str(), 1); - ar2& x; + ar2 & x; } } // namespace madness From 9433749fec3fb3602f2f0e752b4c2bc6afb73cbb Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Mon, 24 Jun 2024 14:04:07 -0400 Subject: [PATCH 2/4] typo. --- src/TiledArray/dist_array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index c0c9aac78e..2c1640cf5f 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -1875,7 +1875,7 @@ size_t volume(const DistArray& array) { for (auto&& tix : array.tiles_range()) if (!array.is_zero(tix) && array.is_local(tix)) - array.world().taskq.add(std::move(local_vol), array.find_local(tix).get()); + array.world().taskq.add(local_vol, array.find_local(tix).get()); array.world().gop.fence(); From a197d31a54bc0fc7798513cb5b696c3c7944fa24 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Tue, 25 Jun 2024 15:49:08 -0400 Subject: [PATCH 3/4] Add test and bug fix `TA::volume` function. --- src/TiledArray/dist_array.h | 4 ++- tests/dist_array.cpp | 53 +++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index 2c1640cf5f..65e2b83dce 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -1868,7 +1868,9 @@ size_t volume(const DistArray& array) { auto local_vol = [&vol](Tile const& in_tile) { if constexpr (detail::is_tensor_of_tensor_v) { - in_tile.unary([&vol](auto const& el) { vol += el.total_size(); }); + vol += std::accumulate( + in_tile.data(), in_tile.data() + in_tile.total_size(), size_t{0}, + [](auto t, auto const& inner) { return t + inner.total_size(); }); } else vol += in_tile.total_size(); }; diff --git a/tests/dist_array.cpp b/tests/dist_array.cpp index c2ac8262d0..288deabd20 100644 --- a/tests/dist_array.cpp +++ b/tests/dist_array.cpp @@ -830,4 +830,57 @@ BOOST_AUTO_TEST_CASE(rebind) { std::is_same_v, SpArrayTZ>); } +BOOST_AUTO_TEST_CASE(volume) { + using T = Tensor; + using ToT = Tensor; + using Policy = SparsePolicy; + using ArrayToT = DistArray; + + size_t constexpr nrows = 3; + size_t constexpr ncols = 4; + TiledRange const trange({{0, 2, 5, 7}, {0, 5, 7, 10, 12}}); + TA_ASSERT(trange.tiles_range().extent().at(0) == nrows && + trange.tiles_range().extent().at(1) == ncols, + "Following code depends on this condition."); + + // this Range is used to construct all inner tensors of the tile with + // tile index @c tix. + auto inner_dims = [nrows, ncols](Range::index_type const& tix) -> Range { + static std::array const rows{7, 8, 9}; + static std::array const cols{7, 8, 9, 10}; + + TA_ASSERT(tix.size() == 2, "Only rank-2 tensor expected."); + return Range({rows[tix.at(0) % nrows], cols[tix.at(1) % ncols]}); + }; + + // let's make all 'diagonal' tiles zero + auto zero_tile = [](Range::index_type const& tix) -> bool { + return tix.at(0) == tix.at(1); + }; + + auto make_tile = [inner_dims, zero_tile, &trange](auto& tile, + auto const& rng) { + auto&& tix = trange.element_to_tile(rng.lobound()); + if (zero_tile(tix)) + return 0.; + else { + tile = ToT(rng, [inner_rng = inner_dims(tix)](auto&&) { + return T(inner_rng, 0.1); + }); + return tile.norm(); + } + }; + + auto& world = get_default_world(); + auto array = make_array(world, trange, make_tile); + + // manually compute the volume of array + size_t vol = 0; + for (auto&& tix : trange.tiles_range()) + if (!zero_tile(tix)) + vol += trange.tile(tix).volume() * inner_dims(tix).volume(); + + BOOST_REQUIRE(vol == TA::volume(array)); +} + BOOST_AUTO_TEST_SUITE_END() From 9a0492b76367d8ca5c2d7a5623e0569cd74f1acb Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Tue, 25 Jun 2024 17:15:53 -0400 Subject: [PATCH 4/4] Cleanup. --- src/TiledArray/dist_array.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index 65e2b83dce..7059b77333 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -1859,8 +1859,8 @@ auto rank(const DistArray& a) { /// /// \brief Get the total elements in the non-zero tiles of an array. -/// For tensor-of-tensor tiles, the total is the sum of the elements -/// of the inner tensors in non-zero tiles. +/// For tensor-of-tensor tiles, the total is the sum of the number of +/// elements in the inner tensors of non-zero tiles. /// template size_t volume(const DistArray& array) { @@ -1868,16 +1868,19 @@ size_t volume(const DistArray& array) { auto local_vol = [&vol](Tile const& in_tile) { if constexpr (detail::is_tensor_of_tensor_v) { - vol += std::accumulate( - in_tile.data(), in_tile.data() + in_tile.total_size(), size_t{0}, - [](auto t, auto const& inner) { return t + inner.total_size(); }); + auto reduce_op = [](size_t& MADNESS_RESTRICT result, auto&& arg) { + result += arg->total_size(); + }; + auto join_op = [](auto& MADNESS_RESTRICT result, size_t count) { + result += count; + }; + vol += in_tile.reduce(reduce_op, join_op, size_t{0}); } else vol += in_tile.total_size(); }; - for (auto&& tix : array.tiles_range()) - if (!array.is_zero(tix) && array.is_local(tix)) - array.world().taskq.add(local_vol, array.find_local(tix).get()); + for (auto&& local_tile_future : array) + array.world().taskq.add(local_vol, local_tile_future.get()); array.world().gop.fence();