From 154d42703990c796613d60c8de1a0479e95a31e1 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Tue, 16 Apr 2024 11:26:22 -0400 Subject: [PATCH 01/27] typo. --- src/TiledArray/tensor/kernels.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/TiledArray/tensor/kernels.h b/src/TiledArray/tensor/kernels.h index 876ed00feb..699496d77e 100644 --- a/src/TiledArray/tensor/kernels.h +++ b/src/TiledArray/tensor/kernels.h @@ -1280,7 +1280,6 @@ auto tensor_hadamard(TensorA const& A, Annot const& aA, TensorB const& B, } else { auto pA = A.permute(perm.AC); return pA.mult_to(B.permute(perm.BC)); - return pA; } } From 2bfd5aa63d446e0a2b1fc65113b988d1bc0f85e4 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 17 Apr 2024 17:58:04 -0400 Subject: [PATCH 02/27] More corner cases of ToT evaluations supported. --- src/TiledArray/einsum/tiledarray.h | 117 +++++++++++++++++++++++++++++ tests/einsum.cpp | 20 +++++ 2 files changed, 137 insertions(+) diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index d2d8b2c9ba..c0fdbd3e66 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -167,6 +167,101 @@ auto replicate_array(Array from, TiledRange const &prepend_trng) { return result; } +template +auto reduce_modes(Tensor const &orig, size_t drank) { + TA_ASSERT(orig.nbatch() == 1); + auto const orig_rng = orig.range(); + TA_ASSERT(orig_rng.rank() > drank); + + auto const result_rng = [orig_rng, drank]() { + container::vector r1s; + for (auto i = 0; i < orig_rng.rank() - drank; ++i) + r1s.emplace_back(orig_rng.dim(i)); + return TA::Range(r1s); + }(); + + auto const delta_rng = [orig_rng, drank]() { + container::vector r1s; + for (auto i = orig_rng.rank() - drank; i < orig_rng.rank(); ++i) + r1s.emplace_back(orig_rng.dim(i)); + return TA::Range(r1s); + }(); + + auto const delta_vol = delta_rng.volume(); + + auto reducer = [orig, delta_vol, delta_rng](auto const &ix) { + auto orig_ix = ix; + std::copy(delta_rng.lobound().begin(), // + delta_rng.lobound().end(), // + std::back_inserter(orig_ix)); + + auto beg = orig.data() + orig.range().ordinal(orig_ix); + auto end = beg + delta_vol; + + // cannot get it done this way: return std::reduce(beg, end); + + typename std::iterator_traits::value_type sum{}; + for (; beg != end; ++beg) sum += *beg; + return sum; + }; + + return Tensor(result_rng, reducer); +} + +/// +/// \param orig Input DistArray. +/// \param dmodes Reduce this many modes from the end as implied in the +/// tiled range of the input array. +/// \return Array with reduced rank. +/// +template +auto reduce_modes(TA::DistArray orig, size_t drank) { + TA_ASSERT(orig.trange().rank() > drank); + + auto const result_trange = [orig, drank]() { + container::svector tr1s; + for (auto i = 0; i < (orig.trange().rank() - drank); ++i) + tr1s.emplace_back(orig.trange().at(i)); + return TiledRange(tr1s); + }(); + + auto const delta_trange = [orig, drank]() { + container::svector tr1s; + for (auto i = orig.trange().rank() - drank; i < orig.trange().rank(); ++i) + tr1s.emplace_back(orig.trange().at(i)); + return TiledRange(tr1s); + }(); + + orig.make_replicated(); + orig.world().gop.fence(); + + auto make_tile = [orig, delta_trange, drank](auto &tile, auto const &rng) { + using tile_type = std::remove_reference_t; + + tile_type res(rng, typename tile_type::value_type{}); + + for (auto &&r : delta_trange.tiles_range()) { + container::svector ix1s = rng.lobound(); + + { + auto dlo = delta_trange.make_tile_range(r).lobound(); + std::copy(dlo.begin(), dlo.end(), std::back_inserter(ix1s)); + } + + auto tix = orig.trange().element_to_tile(ix1s); + auto got = orig.find_local(tix).get(false); + + res += reduce_modes(got, drank); + } + + tile = res; + return res.norm(); + }; + + return make_array>(orig.world(), result_trange, + make_tile); +} + template TiledRange make_trange(RangeMap const &map, Ixs const &ixs) { container::svector tr1s; @@ -320,6 +415,28 @@ auto einsum(expressions::TsrExpr A, expressions::TsrExpr B, return C; } + // + // special Hadamard + contraction + // when ToT times T implied and T's indices are contraction AND Hadamard + // BUT not externals + // + if constexpr (!AreArraySame && + DeNestFlag == DeNest::False) { + auto hi_size = h.size() + i.size(); + if (hi_size != h.size() && hi_size != i.size() && + ((hi_size == a.size() && IsArrayT) || + (hi_size == b.size() && IsArrayT))) { + auto annot_c = std::string(h + e + i) + inner.c; + auto temp1 = einsum(A, B, idx(annot_c), world); + auto temp2 = reduce_modes(temp1, i.size()); + + auto annot_c_ = std::string(h + e) + inner.c; + decltype(temp2) result; + result(std::string(c) + inner.c) = temp2(annot_c_); + return result; + } + } + using ::Einsum::index::permutation; using TiledArray::Permutation; diff --git a/tests/einsum.cpp b/tests/einsum.cpp index cfae7b5925..8e76fc08a4 100644 --- a/tests/einsum.cpp +++ b/tests/einsum.cpp @@ -404,6 +404,26 @@ BOOST_AUTO_TEST_CASE(corner_cases) { {{0, 3, 5}, {0, 3, 8}}, // {{0, 3, 8}, {0, 3, 5}, {0, 2}}, // {3, 9}))); + + BOOST_REQUIRE(check_manual_eval("bi,bi->i", // + {{0, 2}, {0, 4}}, // + {{0, 2}, {0, 4}})); + + BOOST_REQUIRE(check_manual_eval("bi;a,bi;a->i;a", // + {{0, 2}, {0, 4}}, // + {{0, 2}, {0, 4}}, // + {3}, {3})); + + BOOST_REQUIRE( + (check_manual_eval("jk;a,ijk->i;a", // + {{0, 2}, {0, 4}}, // + {{0, 3}, {0, 2}, {0, 4}}, // + {5}))); + + BOOST_REQUIRE((check_manual_eval("bi;a,bi->i;a", // + {{0, 4, 8}, {0, 4}}, // + {{0, 4, 8}, {0, 4}}, // + {8}))); } BOOST_AUTO_TEST_SUITE_END() From 92e416e3bfb83b13493a31d13a2aad88f70a9ab3 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 1 May 2024 11:54:10 -0400 Subject: [PATCH 03/27] Fix `ToT times ToT into T` kind of evaluations so that varying inner tensor extents are supported. --- src/TiledArray/einsum/tiledarray.h | 148 ++++++++++++++++++++++++++--- 1 file changed, 133 insertions(+), 15 deletions(-) diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index c0fdbd3e66..4953141fbd 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -26,6 +26,102 @@ using ::Einsum::index::IndexMap; using ::Einsum::index::Permutation; using ::Einsum::index::permutation; +/// +/// \tparam T A type that parameterizes ::Einsum::Index. +/// +/// This class makes it easier to work with indices involved in a binary +/// tensor multiplication. Also defines a canonical order of the indices. +/// +/// Consider an arbitrary binary tensor multiplication annotated as: +/// A(a_1,...,a_m) * B(b_1,...,b_n) -> C(c_1,...,c_l) +/// Note that {c_1,...,c_l} is subset of ({a_1,...,a_m} union {b_1,...,b_n}). +/// +/// We define following index types. +/// * Hadamard index: An index that annotates A, B, and C. +/// * Contracted index: An index that annotates A and B but not C. +/// * External index of A: An index that annotates A and C but not B. +/// * External index of B: An index that annotates B and C but not A. +/// +/// Defining canonical index ordering. +/// * Hadamard indices are canonically ordered if they appear in the same +/// order in A's annotation. +/// * Contracted indices are canonically ordered if they appear in the same +/// order in A's annotation. +/// * External indices of A are canonically ordered if they appear in the +/// same order in A's annotation. +/// * External indices of B are canonically ordered if they appear in the +/// same order in B's annotation. +/// * Tensor A's indices are canonically ordered if Hadamard, external +/// indices of A, and contracted indices appear in that order and all +/// three index groups are themselves canonically ordered. +/// * Tensor B's indices are canonically ordered if Hadamard, external +/// indices of B, and contracted indices appear in that order and all +/// three index groups are themselves canonically ordered. +/// * Tensor C's indices are canonically ordered if Hadamard, external +/// indices of A and external indices of B appear in that order and all +/// three index groups are themselves canonically ordered. +/// +/// Example: Consider the evaluation: A(i,j,p,a,b) * B(j,i,q,b,a) -> C(i,p,j,q). +/// - Hadamard indices: {i,j} +/// - External indices of A: {p} +/// - External indices of B: {q} +/// - Contracted indices: {a, b} +/// All index groups above are canonically ordered. +/// Writing C's indices in canonical order would give: {i,j,p,q}. +/// +template +class TensorOpIndices { + public: + using index_t = ::Einsum::Index; + + TensorOpIndices(index_t const &ixA, index_t const &ixB, index_t const &ixC) + : orig_indices_({ixA, ixB, ixC}) { + hadamard_ = ixA & ixB & ixC; + contracted_ = (ixA & ixB) - ixC; + external_A_ = (ixA - ixB) & ixC; + external_B_ = (ixB - ixA) & ixC; + } + + [[nodiscard]] index_t const &ix_A() const { return orig_indices_[A]; } + [[nodiscard]] index_t const &ix_B() const { return orig_indices_[B]; } + [[nodiscard]] index_t const &ix_C() const { return orig_indices_[C]; } + + [[nodiscard]] index_t ix_A_canon() const { + return hadamard() + external_A() + contracted(); + } + + [[nodiscard]] index_t ix_B_canon() const { + return hadamard() + external_B() + contracted(); + } + + [[nodiscard]] index_t ix_C_canon() const { + return hadamard() + external_A() + external_B(); + } + + [[nodiscard]] index_t const &hadamard() const { return hadamard_; } + [[nodiscard]] index_t const &contracted() const { return contracted_; } + [[nodiscard]] index_t const &external_A() const { return external_A_; } + [[nodiscard]] index_t const &external_B() const { return external_B_; } + + [[nodiscard]] Permutation to_canon_A() const { + return ::Einsum::index::permutation(ix_A(), ix_A_canon()); + } + + [[nodiscard]] Permutation to_canon_B() const { + return ::Einsum::index::permutation(ix_B(), ix_B_canon()); + } + + [[nodiscard]] Permutation to_canon_C() const { + return ::Einsum::index::permutation(ix_C(), ix_C_canon()); + } + + private: + enum { A, B, C, ABC }; + std::array orig_indices_; + + index_t hadamard_, contracted_, external_A_, external_B_; +}; + /// converts the annotation of an expression to an Index template auto idx(const std::string &s) { @@ -334,20 +430,22 @@ auto einsum(expressions::TsrExpr A, expressions::TsrExpr B, "Nested-rank-reduction only supported when the inner tensor " "ranks match on the arguments"); - // Step I: A * B -> C' - // Step II: C' -> C // - // At "Step I", a general product (without reduction) in outer indices, - // and pure Hadamard product in inner indices is carried out. - // Then at "Step II", the inner tensors are reduced with a unary function. - // The reducing function is determined by looking at the contracting and - // non-contracting outer indices. + // Illustration of steps by an example. // - // eg. A(i,j,k;a,b) * B(k,j;a,b) -> C(i,j) involves following two steps: - // Step I: A(i,j,k;a,b) * B(k,j;a,b) -> C'(i,j;a,b) - // Step II: C'(i,j;a,b) -> C(i,j) - - auto Cp = einsum(A, B, std::string(c) + ";" + std::string(inner.i)); + // Consider the evaluation: A(ijpab;xy) * B(jiqba;yx) -> C(ipjq). + // + // Note for the outer indices: + // - Hadamard: 'ij' + // - External A: 'p' + // - External B: 'q' + // - Contracted: 'ab' + // + // Now C is evaluated in the following steps. + // Step I: A(ijpab;xy) * B(jiqba;yx) -> C0(ijpqab;xy) + // Step II: C0(ijpqab;xy) -> C1(ijpqab) + // Step III: C1(ijpqab) -> C2(ijpq) + // Step IV: C2(ijpq) -> C(ipjq) auto sum_tot_2_tos = [](auto const &tot) { typename std::remove_reference_t::value_type result( @@ -355,12 +453,32 @@ auto einsum(expressions::TsrExpr A, expressions::TsrExpr B, return result; }; - auto result = TA::foreach( - Cp, [sum_tot_2_tos](auto &out_tile, auto const &in_tile) { + auto const oixs = TensorOpIndices(a, b, c); + + struct { + std::string C0, C1, C2; + } const Cn_annot{ + std::string(oixs.ix_C_canon() + oixs.contracted()) + inner.a, + {oixs.ix_C_canon() + oixs.contracted()}, + {oixs.ix_C_canon()}}; + + // Step I: A(ijpab;xy) * B(jiqba;yx) -> C0(ijpqab;xy) + auto C0 = einsum(A, B, Cn_annot.C0); + + // Step II: C0(ijpqab;xy) -> C1(ijpqab) + auto C1 = TA::foreach( + C0, [sum_tot_2_tos](auto &out_tile, auto const &in_tile) { out_tile = sum_tot_2_tos(in_tile); }); - return result; + // Step III: C1(ijpqab) -> C2(ijpq) + auto C2 = reduce_modes(C1, oixs.contracted().size()); + + // Step IV: C2(ijpq) -> C(ipjq) + ArrayC C; + C(c) = C2(Cn_annot.C2); + return C; + } else { // these are "Hadamard" (fused) indices auto h = a & b & c; From 93f6986f15d5a514d1db1f14c9cb23cd837fe505 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 1 May 2024 13:48:01 -0400 Subject: [PATCH 04/27] Bug fix. --- src/TiledArray/einsum/tiledarray.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index 4953141fbd..cff0e2cd7b 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -265,6 +265,7 @@ auto replicate_array(Array from, TiledRange const &prepend_trng) { template auto reduce_modes(Tensor const &orig, size_t drank) { + if (drank == 0) return orig; TA_ASSERT(orig.nbatch() == 1); auto const orig_rng = orig.range(); TA_ASSERT(orig_rng.rank() > drank); @@ -313,6 +314,7 @@ auto reduce_modes(Tensor const &orig, size_t drank) { template auto reduce_modes(TA::DistArray orig, size_t drank) { TA_ASSERT(orig.trange().rank() > drank); + if (drank == 0) return orig; auto const result_trange = [orig, drank]() { container::svector tr1s; From 2e67af6243476f41cf137cc9bede3ee18aa0905f Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Tue, 7 May 2024 17:00:04 -0400 Subject: [PATCH 05/27] Simplify `(H+E,H)->H+E` logic. --- src/TiledArray/einsum/tiledarray.h | 31 ++++++++++-------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index cff0e2cd7b..3da230ca19 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -504,34 +504,23 @@ auto einsum(expressions::TsrExpr A, expressions::TsrExpr B, auto range_map = (RangeMap(a, A.array().trange()) | RangeMap(b, B.array().trange())); - auto perm_and_rank_replicate = [delta_trng = make_trange(range_map, e)]( - auto pre, // - std::string const &pre_annot, // - std::string const &permed_annot) { - decltype(pre) permed; - permed(permed_annot) = pre(pre_annot); - return replicate_array(permed, delta_trng); - }; - // special Hadamard if (h.size() == a.size() || h.size() == b.size()) { TA_ASSERT(!i && e); - bool small_a = h.size() == a.size(); - std::string const eh_annot = (e | h); - std::string const permed_annot = - std::string(h) + (small_a ? inner.a : inner.b); - std::string const C_annot = std::string(c) + inner.c; - std::string const temp_annot = std::string(e) + "," + permed_annot; + bool const small_a = h.size() == a.size(); + auto const delta_trng = make_trange(range_map, e); + std::string target_layout = std::string(c) + inner.c; ArrayC C; if (small_a) { - auto temp = - perm_and_rank_replicate(A.array(), A.annotation(), permed_annot); - C(C_annot) = temp(temp_annot) * B; + auto temp = replicate_array(A.array(), delta_trng); + std::string temp_layout = std::string(e) + "," + A.annotation(); + C(target_layout) = temp(temp_layout) * B; } else { - auto temp = - perm_and_rank_replicate(B.array(), B.annotation(), permed_annot); - C(C_annot) = A * temp(temp_annot); + auto temp = replicate_array(B.array(), delta_trng); + std::string temp_layout = std::string(e) + "," + B.annotation(); + C(target_layout) = A * temp(temp_layout); } + return C; } From 1e07eb065e0c4795d41c7e75a20e0ab680f084b6 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 8 May 2024 12:58:52 -0400 Subject: [PATCH 06/27] [skip ci] Add corner case of outer Hadamard and inner outer-product kind of ToT eval. --- tests/einsum.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/einsum.cpp b/tests/einsum.cpp index 8e76fc08a4..6ca0a611ff 100644 --- a/tests/einsum.cpp +++ b/tests/einsum.cpp @@ -232,6 +232,10 @@ BOOST_AUTO_TEST_CASE(equal_nested_ranks) { {3}, // {2})); // H+C;H+C not supported + + // H;C(op) + BOOST_REQUIRE(check_manual_eval( + "ijk;bc,j;d->kji;dcb", {{0, 1}, {0, 1}, {0, 1}}, {{0, 1}}, {2, 3}, {4})); } BOOST_AUTO_TEST_CASE(different_nested_ranks) { From 3a68f0c54e97f68e21f50e942472a18cbf5e636e Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 8 May 2024 14:37:39 -0400 Subject: [PATCH 07/27] bug fix. --- src/TiledArray/expressions/cont_engine.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/TiledArray/expressions/cont_engine.h b/src/TiledArray/expressions/cont_engine.h index d40e9c88fc..58d7b9ad57 100644 --- a/src/TiledArray/expressions/cont_engine.h +++ b/src/TiledArray/expressions/cont_engine.h @@ -513,6 +513,7 @@ class ContEngine : public BinaryEngine { const left_tile_element_type& left, const right_tile_element_type& right) { contrreduce_op(result, left, right); + result = contrreduce_op(result); // permutations of result are applied as "postprocessing" }; } // ToT x ToT } else if (inner_prod == TensorProduct::Hadamard) { From 694212027c13080ad0d3c89b812312d079cf9d25 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Fri, 10 May 2024 14:54:53 -0400 Subject: [PATCH 08/27] Outer tensor contraction logic update. When the contraction occurs in the outer tensor (between two ToTs or between T or ToT), the contraction step cannot be passed to the expression layer. Example: `J = TA::einsum(I("i,j,k,l;eik,fjl"), S_1("i,j,k;aij,eik"), "i,j,l;aij,fjl");` --- src/TiledArray/einsum/tiledarray.h | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index 3da230ca19..911591bd13 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -525,25 +525,18 @@ auto einsum(expressions::TsrExpr A, expressions::TsrExpr B, } // - // special Hadamard + contraction - // when ToT times T implied and T's indices are contraction AND Hadamard - // BUT not externals + // when contraction happens in the outer tensor + // need to evaluate specially.. // - if constexpr (!AreArraySame && - DeNestFlag == DeNest::False) { - auto hi_size = h.size() + i.size(); - if (hi_size != h.size() && hi_size != i.size() && - ((hi_size == a.size() && IsArrayT) || - (hi_size == b.size() && IsArrayT))) { - auto annot_c = std::string(h + e + i) + inner.c; - auto temp1 = einsum(A, B, idx(annot_c), world); - auto temp2 = reduce_modes(temp1, i.size()); - - auto annot_c_ = std::string(h + e) + inner.c; - decltype(temp2) result; - result(std::string(c) + inner.c) = temp2(annot_c_); - return result; - } + if (IsArrayToT && i.size() > 0) { + auto annot_c = std::string(h + e + i) + inner.c; + auto temp1 = einsum(A, B, idx(annot_c), world); + auto temp2 = reduce_modes(temp1, i.size()); + + auto annot_c_ = std::string(h + e) + inner.c; + decltype(temp2) result; + result(std::string(c) + inner.c) = temp2(annot_c_); + return result; } using ::Einsum::index::permutation; From 95bdee5f0add895dd3315f6281ed56b0a2447e03 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 15 May 2024 14:25:21 -0400 Subject: [PATCH 09/27] Try using latest-stable xcode in CI. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7753a3436d..35acea8182 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: - uses: maxim-lobanov/setup-xcode@v1 with: - xcode-version: '<14' + xcode-version: 'latest-stable' - name: Host system info shell: bash From 1607ab4d15c4cc3389284f29d06d008588d294e2 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 15 May 2024 14:36:35 -0400 Subject: [PATCH 10/27] use gcc@11 in CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 35acea8182..6b899e6b31 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: cxx : [ clang++, /usr/local/bin/g++-10 ] build_type : [ Release, Debug ] task_backend: [ Pthreads, PaRSEC ] - prerequisites : [ gcc@10 boost eigen open-mpi bison scalapack ] + prerequisites : [ gcc@11 boost eigen open-mpi bison scalapack ] name: "${{ matrix.os }}: ${{ matrix.cxx }} ${{ matrix.build_type }} ${{ matrix.task_backend }}" runs-on: ${{ matrix.os }} From ecd4caf365387ed6e869468952a40fbe11b81256 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 15 May 2024 14:43:03 -0400 Subject: [PATCH 11/27] use gcc@11 in CI [ammend] --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6b899e6b31..b10b6f1ada 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: os : [ macos-latest ] - cxx : [ clang++, /usr/local/bin/g++-10 ] + cxx : [ clang++, /usr/local/bin/g++-11 ] build_type : [ Release, Debug ] task_backend: [ Pthreads, PaRSEC ] prerequisites : [ gcc@11 boost eigen open-mpi bison scalapack ] From ec51ab0f7e8948a5edd9fda0c38a402674752a57 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 15 May 2024 15:24:14 -0400 Subject: [PATCH 12/27] Try setting GNU/gcc compiler from `/opt/homebrew/bin`. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b10b6f1ada..2339070e54 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: os : [ macos-latest ] - cxx : [ clang++, /usr/local/bin/g++-11 ] + cxx : [ clang++, /opt/homebrew/bin/g++-11 ] build_type : [ Release, Debug ] task_backend: [ Pthreads, PaRSEC ] prerequisites : [ gcc@11 boost eigen open-mpi bison scalapack ] From 40705d7c26ce7a00cd620f0cceac25010aaccfc9 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Thu, 16 May 2024 11:18:33 -0400 Subject: [PATCH 13/27] Avoid using deprecated `TA::Array` typedef. --- tests/einsum.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/einsum.cpp b/tests/einsum.cpp index 6ca0a611ff..d1afaf74e6 100644 --- a/tests/einsum.cpp +++ b/tests/einsum.cpp @@ -117,7 +117,7 @@ bool check_manual_eval(std::string const& annot, il_trange trangeA, } BOOST_AUTO_TEST_CASE(contract) { - using Array = TA::Array; + using Array = TA::TArrayI; BOOST_REQUIRE(check_manual_eval("ij,j->i", {{0, 2, 4}, {0, 4, 8}}, // A's trange @@ -136,7 +136,7 @@ BOOST_AUTO_TEST_CASE(contract) { } BOOST_AUTO_TEST_CASE(hadamard) { - using Array = TA::Array; + using Array = TA::TArrayI; BOOST_REQUIRE(check_manual_eval("i,i->i", // {{0, 1}}, // {{0, 1}} // @@ -153,13 +153,11 @@ BOOST_AUTO_TEST_CASE(hadamard) { } BOOST_AUTO_TEST_CASE(general) { - using Array = TA::Array; + using Array = TA::TArrayI; BOOST_REQUIRE(check_manual_eval("ijk,kil->ijl", // {{0, 2}, {0, 3, 5}, {0, 2, 4}}, // {{0, 2, 4}, {0, 2}, {0, 1}} // )); - - using Array = TA::Array; using Tensor = typename Array::value_type; using namespace std::string_literals; From 6e864ebddac3c7dd45d3888e9a0cb9441c0bed12 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Thu, 16 May 2024 14:57:38 -0400 Subject: [PATCH 14/27] `reduce_modes` function impl. amended to handle sparse dist-arrays. --- src/TiledArray/einsum/tiledarray.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index 911591bd13..6c1e52c5fc 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -338,6 +338,7 @@ auto reduce_modes(TA::DistArray orig, size_t drank) { tile_type res(rng, typename tile_type::value_type{}); + bool all_summed_tiles_zeros{true}; for (auto &&r : delta_trange.tiles_range()) { container::svector ix1s = rng.lobound(); @@ -347,11 +348,18 @@ auto reduce_modes(TA::DistArray orig, size_t drank) { } auto tix = orig.trange().element_to_tile(ix1s); + if constexpr (std::is_same_v::policy_type, + SparsePolicy>) + if (orig.is_zero(tix)) continue; auto got = orig.find_local(tix).get(false); res += reduce_modes(got, drank); + all_summed_tiles_zeros = false; } + if (all_summed_tiles_zeros) + return typename std::remove_reference_t::scalar_type{0}; + tile = res; return res.norm(); }; From 02c9ba90e8ea74fe5a55aff4bb89b0573b82c9a9 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Mon, 20 May 2024 12:18:32 -0400 Subject: [PATCH 15/27] Refactor OuterInnerIndices to OuterInnerSetup. --- tests/tot_array_fixture.h | 129 ++++++++++++++++++++++---------------- 1 file changed, 75 insertions(+), 54 deletions(-) diff --git a/tests/tot_array_fixture.h b/tests/tot_array_fixture.h index 94f57b0930..4710ab79d7 100644 --- a/tests/tot_array_fixture.h +++ b/tests/tot_array_fixture.h @@ -229,50 +229,6 @@ void apply_partial_perm(T& to, T const& from, PartialPerm const& p) { } } -/// -/// Example: To represent A("ik;ac") * B("kj;cb") -> C("ij;ab"), -/// construct with std::string("ij;ac,kj;cb->ij;ab"); -/// outer_indices;inner_indices annotates a single object (DistArray, Tensor -/// etc.) A_indices,B_indices annotates first(A) and second(B) object -/// '->' separates argument objects' annotation from the result's annotation -/// -class OuterInnerIndices { - // array[0] annotes A - // array[1] annotes B - // array[2] annotes C - std::array outer_, inner_; - - public: - OuterInnerIndices(std::string const& annot) { - using ::Einsum::string::split2; - - constexpr size_t A = 0; - constexpr size_t B = 1; - constexpr size_t C = 2; - - auto [ab, aC] = split2(annot, "->"); - std::tie(outer_[C], inner_[C]) = split2(aC, ";"); - - auto [aA, aB] = split2(ab, ","); - - std::tie(outer_[A], inner_[A]) = split2(aA, ";"); - std::tie(outer_[B], inner_[B]) = split2(aB, ";"); - } - - template - OuterInnerIndices(const char (&s)[N]) : OuterInnerIndices{std::string(s)} {} - - [[nodiscard]] auto const& outer() const noexcept { return outer_; } - [[nodiscard]] auto const& inner() const noexcept { return inner_; } - - [[nodiscard]] auto const& outerA() const noexcept { return outer_[0]; } - [[nodiscard]] auto const& outerB() const noexcept { return outer_[1]; } - [[nodiscard]] auto const& outerC() const noexcept { return outer_[2]; } - [[nodiscard]] auto const& innerA() const noexcept { return inner_[0]; } - [[nodiscard]] auto const& innerB() const noexcept { return inner_[1]; } - [[nodiscard]] auto const& innerC() const noexcept { return inner_[2]; } -}; - enum struct TensorProduct { General, Dot, Invalid }; struct ProductSetup { @@ -294,7 +250,7 @@ struct ProductSetup { rank_E, // rank_I; - // ProductSetup() = default; + ProductSetup() = default; template >> @@ -342,6 +298,71 @@ struct ProductSetup { } }; +/// +/// Example: To represent A("ik;ac") * B("kj;cb") -> C("ij;ab") +/// +/// Method 1: +/// --- +/// construct with a single argument std::string("ij;ac,kj;cb->ij;ab"); +/// - the substring ";" +/// annotates a single object (DistArray, Tensor etc.) +/// - "," implies two distinct annotations (for A and B) +/// separated by a comma +/// - the right hand side of '->' annotates the result. +/// - Note: the only use of comma is to separate A's and B's annotations. +/// +/// Method 2: +/// --- +/// construct with three arguments: +/// std::string("i,k;a,c"), std::string("k,j;c,b"), std::string("i,j;a,b") +/// - Note the use of comma. +/// +class OuterInnerSetup { + ProductSetup outer_; + ProductSetup inner_; + + public: + OuterInnerSetup(std::string const& annot) { + using ::Einsum::string::split2; + using Ix = ::Einsum::index::Index; + + enum { A, B, C }; + std::array O; + std::array I; + + auto [ab, aC] = split2(annot, "->"); + std::tie(O[C], I[C]) = split2(aC, ";"); + + auto [aA, aB] = split2(ab, ","); + std::tie(O[A], I[A]) = split2(aA, ";"); + std::tie(O[B], I[B]) = split2(aB, ";"); + outer_ = ProductSetup(Ix(O[A]), Ix(O[B]), Ix(O[C])); + inner_ = ProductSetup(Ix(I[A]), Ix(I[B]), Ix(I[C])); + } + + template + OuterInnerSetup(const char (&s)[N]) : OuterInnerSetup{std::string(s)} {} + + OuterInnerSetup(std::string const& annotA, std::string const& annotB, + std::string const& annotC) { + using ::Einsum::string::split2; + using Ix = ::Einsum::index::Index; + + enum { A, B, C }; + std::array O; + std::array I; + std::tie(O[A], I[A]) = split2(annotA, ";"); + std::tie(O[B], I[B]) = split2(annotB, ";"); + std::tie(O[C], I[C]) = split2(annotC, ";"); + outer_ = ProductSetup(Ix(O[A]), Ix(O[B]), Ix(O[C])); + inner_ = ProductSetup(Ix(I[A]), Ix(I[B]), Ix(I[C])); + } + + [[nodiscard]] auto const& outer() const noexcept { return outer_; } + + [[nodiscard]] auto const& inner() const noexcept { return inner_; } +}; + namespace { auto make_perm(PartialPerm const& pp) { @@ -569,28 +590,28 @@ auto general_product(TA::DistArray A, template >> -auto manual_eval(OuterInnerIndices const& oixs, ArrayA A, ArrayB B) { +auto manual_eval(OuterInnerSetup const& setups, ArrayA A, ArrayB B) { constexpr auto mnr = TA::detail::max_nested_rank; static_assert(mnr == 1 || mnr == 2); - auto const outer_setup = ProductSetup(oixs.outer()); + auto const& outer = setups.outer(); + auto const& inner = setups.inner(); - TA_ASSERT(outer_setup.valid()); + TA_ASSERT(outer.valid()); if constexpr (mnr == 2) { - auto const inner_setup = ProductSetup(oixs.inner()); - TA_ASSERT(inner_setup.valid()); + TA_ASSERT(inner.valid()); if constexpr (DeNestFlag == DeNest::True) { // reduced nested rank in result using TA::detail::nested_rank; static_assert(nested_rank == nested_rank); - TA_ASSERT(inner_setup.rank_C == 0); + TA_ASSERT(inner.rank_C == 0); using TileC = typename ArrayA::value_type::value_type; - return general_product(A, B, outer_setup, inner_setup); + return general_product(A, B, outer, inner); } else - return general_product(A, B, outer_setup, inner_setup); + return general_product(A, B, outer, inner); } else { - return general_product(A, B, outer_setup); + return general_product(A, B, outer); } } From f00c2dc55a84c6722765bebb501e0f4d62da9f3a Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Mon, 20 May 2024 13:02:43 -0400 Subject: [PATCH 16/27] Make dense_array.set(..) more generic. --- src/TiledArray/conversions/sparse_to_dense.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TiledArray/conversions/sparse_to_dense.h b/src/TiledArray/conversions/sparse_to_dense.h index c5bdd812c5..89c45a2cb6 100644 --- a/src/TiledArray/conversions/sparse_to_dense.h +++ b/src/TiledArray/conversions/sparse_to_dense.h @@ -53,7 +53,7 @@ to_dense(DistArray const& sparse_array) { dense_array.set(ord, tile); } else { // see DistArray::set(ordinal, element_type) - dense_array.set(ord, 0); + dense_array.set(ord, typename ArrayType::value_type{}); } } From 4855438d810b24fbb3d3af09c4be04ac3ec20214 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Tue, 21 May 2024 08:45:31 -0400 Subject: [PATCH 17/27] Amend make dense_array.set(..) more generic. --- src/TiledArray/conversions/sparse_to_dense.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/TiledArray/conversions/sparse_to_dense.h b/src/TiledArray/conversions/sparse_to_dense.h index 89c45a2cb6..4c8728bc94 100644 --- a/src/TiledArray/conversions/sparse_to_dense.h +++ b/src/TiledArray/conversions/sparse_to_dense.h @@ -52,8 +52,13 @@ to_dense(DistArray const& sparse_array) { Tile tile(sparse_array.find(ord).get().clone()); dense_array.set(ord, tile); } else { - // see DistArray::set(ordinal, element_type) - dense_array.set(ord, typename ArrayType::value_type{}); + if constexpr (detail::is_tensor_of_tensor_v) { + // `zero' tiles that satisfy detail::is_tensor_of_tensor_v + // will be left uninitialized + } else { + // see DistArray::set(ordinal, element_type) + dense_array.set(ord, 0); + } } } From 00663f80e39beafcf2a316d56d6c6a90d0ec9711 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Tue, 21 May 2024 09:07:45 -0400 Subject: [PATCH 18/27] Amend make dense_array.set(..) more generic. --- src/TiledArray/conversions/sparse_to_dense.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/TiledArray/conversions/sparse_to_dense.h b/src/TiledArray/conversions/sparse_to_dense.h index 4c8728bc94..7ee6e92049 100644 --- a/src/TiledArray/conversions/sparse_to_dense.h +++ b/src/TiledArray/conversions/sparse_to_dense.h @@ -52,13 +52,7 @@ to_dense(DistArray const& sparse_array) { Tile tile(sparse_array.find(ord).get().clone()); dense_array.set(ord, tile); } else { - if constexpr (detail::is_tensor_of_tensor_v) { - // `zero' tiles that satisfy detail::is_tensor_of_tensor_v - // will be left uninitialized - } else { - // see DistArray::set(ordinal, element_type) - dense_array.set(ord, 0); - } + dense_array.set(ord, typename Tile::value_type{}); } } From 4b3f39f6fd77c2f6f43926aea2f7139434e5e421 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Tue, 21 May 2024 11:45:48 -0400 Subject: [PATCH 19/27] Remove use of deprecated typedefs.. (moar) --- tests/einsum.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/einsum.cpp b/tests/einsum.cpp index d1afaf74e6..8c9e0ae057 100644 --- a/tests/einsum.cpp +++ b/tests/einsum.cpp @@ -117,7 +117,7 @@ bool check_manual_eval(std::string const& annot, il_trange trangeA, } BOOST_AUTO_TEST_CASE(contract) { - using Array = TA::TArrayI; + using Array = TA::DistArray>; BOOST_REQUIRE(check_manual_eval("ij,j->i", {{0, 2, 4}, {0, 4, 8}}, // A's trange @@ -136,7 +136,7 @@ BOOST_AUTO_TEST_CASE(contract) { } BOOST_AUTO_TEST_CASE(hadamard) { - using Array = TA::TArrayI; + using Array = TA::DistArray>; BOOST_REQUIRE(check_manual_eval("i,i->i", // {{0, 1}}, // {{0, 1}} // @@ -153,7 +153,7 @@ BOOST_AUTO_TEST_CASE(hadamard) { } BOOST_AUTO_TEST_CASE(general) { - using Array = TA::TArrayI; + using Array = TA::DistArray>; BOOST_REQUIRE(check_manual_eval("ijk,kil->ijl", // {{0, 2}, {0, 3, 5}, {0, 2, 4}}, // {{0, 2, 4}, {0, 2}, {0, 1}} // From c94488edbf930456ccd489ff5dc8823530a134ed Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Tue, 21 May 2024 12:57:25 -0400 Subject: [PATCH 20/27] Use of `ArrayIterator::ordinal()` changed to `ArrayIterator::index()` to allow rank-1 array handling. --- src/TiledArray/conversions/dense_to_sparse.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/TiledArray/conversions/dense_to_sparse.h b/src/TiledArray/conversions/dense_to_sparse.h index e5c23cf5ba..6147c01a56 100644 --- a/src/TiledArray/conversions/dense_to_sparse.h +++ b/src/TiledArray/conversions/dense_to_sparse.h @@ -27,7 +27,7 @@ to_sparse(DistArray const &dense_array) { const auto begin = dense_array.begin(); for (auto it = begin; it != end; ++it) { // write the norm of each local tile to the tensor - norm(it->get(), tile_norms[it.ordinal()]); + norm(it->get(), tile_norms[it.index()]); } // Construct a sparse shape the constructor will handle communicating the @@ -40,9 +40,9 @@ to_sparse(DistArray const &dense_array) { // sparse_array set the sparse array tile with a clone so as not to hold // a pointer to the original tile. for (auto it = begin; it != end; ++it) { - const auto ord = it.ordinal(); - if (!sparse_array.is_zero(ord)) { - sparse_array.set(ord, it->get().clone()); + const auto ix = it.index(); + if (!sparse_array.is_zero(ix)) { + sparse_array.set(ix, it->get().clone()); } } From 1dea3466d80101edbc75630c1992810673cabac4 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 22 May 2024 13:36:06 -0400 Subject: [PATCH 21/27] Bug fix. --- tests/einsum.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/einsum.cpp b/tests/einsum.cpp index 8c9e0ae057..e58e17b9e8 100644 --- a/tests/einsum.cpp +++ b/tests/einsum.cpp @@ -64,7 +64,7 @@ template bool check_manual_eval(std::string const& annot, il_trange trangeA, il_trange trangeB) { - return check_manual_eval(annot, trangeA, + return check_manual_eval(annot, trangeA, trangeB); } From 44f8ec37d2eda4097eb43da0281e6578b87fdfe7 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Tue, 28 May 2024 10:09:33 -0400 Subject: [PATCH 22/27] Compare shape by default on Sparse policy distarrays. --- tests/einsum.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/einsum.cpp b/tests/einsum.cpp index e58e17b9e8..a5980c7446 100644 --- a/tests/einsum.cpp +++ b/tests/einsum.cpp @@ -33,12 +33,18 @@ using il_extent = std::initializer_list; } // namespace template >> bool check_manual_eval(std::string const& annot, ArrayA A, ArrayB B) { auto out = TA::einsum(annot, A, B); auto ref = manual_eval(annot, A, B); + + using Policy = typename decltype(out)::policy_type; + if constexpr (ShapeCompFlag == ShapeComp::True && + std::is_same_v) { + out.truncate(); + } return ToTArrayFixture::are_equal(ref, out); } @@ -50,7 +56,7 @@ bool check_manual_eval(std::string const& annot, ArrayA A, ArrayB B) { } template + ShapeComp ShapeCompFlag = ShapeComp::True> bool check_manual_eval(std::string const& annot, il_trange trangeA, il_trange trangeB) { static_assert(detail::is_array_v && @@ -69,7 +75,7 @@ bool check_manual_eval(std::string const& annot, il_trange trangeA, } template + ShapeComp ShapeCompFlag = ShapeComp::True> bool check_manual_eval(std::string const& annot, il_trange trangeA, il_trange trangeB, il_extent inner_extents) { static_assert(detail::is_array_v); @@ -96,7 +102,7 @@ bool check_manual_eval(std::string const& annot, il_trange trangeA, } template + ShapeComp ShapeCompFlag = ShapeComp::True> bool check_manual_eval(std::string const& annot, il_trange trangeA, il_trange trangeB, il_extent inner_extentsA, il_extent inner_extentsB) { From c9cc69ffa0958d5d188c704d7941730c7d58cd85 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 29 May 2024 07:57:14 -0400 Subject: [PATCH 23/27] Return zero norm for uninitialized tensor-of-tensors tile. --- src/TiledArray/tensor/tensor.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index cd0e7e97f1..38f0e65ff9 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -2373,6 +2373,18 @@ class Tensor { /// \return The vector norm of this tensor scalar_type squared_norm() const { + + if constexpr (detail::is_tensor_v) { + // If uninitialized tensor of tensor return zero. + // All elements of this->data() are empty tensors in this case, + // however, we only look at the first element. + // Because + // - It is expensive to look at all elements. + // - The state of the array having only some empty elements + // is ill-defined and should never happen. + if (detail::empty(*data())) return 0; + } + auto square_op = [](scalar_type& MADNESS_RESTRICT res, const numeric_type arg) { res += TiledArray::detail::squared_norm(arg); From fa242b421464904712261c309847c4194d0b44d6 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 29 May 2024 08:31:46 -0400 Subject: [PATCH 24/27] Recompute shape for specially handled sparse arrays in einsum. --- src/TiledArray/einsum/tiledarray.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index 6c1e52c5fc..6d1acd784f 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -260,6 +260,10 @@ auto replicate_array(Array from, TiledRange const &prepend_trng) { tile = repped; return tile.norm(); }); + + if constexpr (std::is_same_v) + result.truncate(); + return result; } @@ -311,8 +315,8 @@ auto reduce_modes(Tensor const &orig, size_t drank) { /// tiled range of the input array. /// \return Array with reduced rank. /// -template -auto reduce_modes(TA::DistArray orig, size_t drank) { +template +auto reduce_modes(TA::DistArray orig, size_t drank) { TA_ASSERT(orig.trange().rank() > drank); if (drank == 0) return orig; @@ -348,8 +352,7 @@ auto reduce_modes(TA::DistArray orig, size_t drank) { } auto tix = orig.trange().element_to_tile(ix1s); - if constexpr (std::is_same_v::policy_type, - SparsePolicy>) + if constexpr (std::is_same_v) if (orig.is_zero(tix)) continue; auto got = orig.find_local(tix).get(false); @@ -364,8 +367,11 @@ auto reduce_modes(TA::DistArray orig, size_t drank) { return res.norm(); }; - return make_array>(orig.world(), result_trange, - make_tile); + auto result = + make_array>(orig.world(), result_trange, make_tile); + if constexpr (std::is_same_v) result.truncate(); + + return result; } template From 64dbcb14c71b53e30953089a6e3366301136da93 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 29 May 2024 10:28:21 -0400 Subject: [PATCH 25/27] Bug fix. --- src/TiledArray/einsum/tiledarray.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index 6d1acd784f..6a45c1e891 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -256,6 +256,7 @@ auto replicate_array(Array from, TiledRange const &prepend_trng) { auto res_coord_ix = res_tr.element_to_tile(res_rng.lobound()); auto from_coord_ix = decltype(res_coord_ix)( next(begin(res_coord_ix), delta_rank), end(res_coord_ix)); + if (from.is_zero(from_coord_ix)) return typename Array::scalar_type{0}; replicate_tensor(repped, from.find_local(from_coord_ix).get(false)); tile = repped; return tile.norm(); From 179e1d39f0b8635f37d72be6d3c91f315a99eaea Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Mon, 3 Jun 2024 13:58:46 -0400 Subject: [PATCH 26/27] Sparse array evaluation support in manual eval from tot_array_fixture. --- tests/tot_array_fixture.h | 307 ++++++++++++++++++++++++++------------ 1 file changed, 209 insertions(+), 98 deletions(-) diff --git a/tests/tot_array_fixture.h b/tests/tot_array_fixture.h index 4710ab79d7..5d0a0ce4dd 100644 --- a/tests/tot_array_fixture.h +++ b/tests/tot_array_fixture.h @@ -93,102 +93,203 @@ using output_archive_type = madness::archive::BinaryFstreamOutputArchive; enum class ShapeComp { True, False }; -template , bool> = true> -auto random_tensor(TA::Range const& rng) { - using NumericT = typename TensorT::numeric_type; - TensorT result{rng}; - - std::generate(/*std::execution::par, */ - result.begin(), result.end(), - TA::detail::MakeRandom::generate_value); - return result; +namespace fixture { +namespace { + +template +constexpr bool maps_index_to_range_v{}; + +template +constexpr bool maps_index_to_range_v< + Invocable, + std::enable_if_t>>>{ + true}; + +using il_range = std::initializer_list; +using il_trange = std::initializer_list; + +} // namespace + +/// +/// \tparam T Non cv-qualified TA::Tensor type. +/// \tparam Rng TA::Range should be constructible from Rng type. +/// Eg. TA::Range, std::initializer_list. +/// \param rng The range of the result tensor. +/// TA::Range(rng) will be called explicitly. +/// \return A TA::Tensor of a numeric type with random elements. +/// +template >, + typename = std::enable_if_t>> +auto random_tensor(Rng rng) { + using numeric_type = typename T::numeric_type; + + auto gen = [](auto&&) { + return detail::MakeRandom::generate_value(); + }; + + return T(TA::Range(rng), gen); } -template -auto random_tensor(std::initializer_list const& extents) { - auto lobounds = TA::container::svector(extents.size(), 0); - return random_tensor(TA::Range{lobounds, extents}); +/// +/// \tparam T Non cv-qualified +/// TA::Tensor,...> type. +/// \tparam RngO TA::Range should be constructible from RngO type. +/// Eg. TA::Range, std::initializer_list. +/// \tparam RngI TA::Range should be constructible from RngI type. +/// Eg. TA::Range, std::initializer_list. +/// \param rngo The range of the result tensor (ie the outer tensor). +/// TA::Range(rngo) will be called explicitly. +/// \param rngi The range of the inner tensors. Note that ALL inner tensors +/// will have an EQUAL range. TA::Range(rngi) will be +/// called explicitly. +/// \return A TA::Tensor of TA::Tensor with random +/// numeric_type elements. +/// +template < + typename T, typename RngO, typename RngI, // + typename = std::enable_if_t>, // + typename = std::enable_if_t>, // + typename = std::enable_if_t>> +auto random_tensor(RngO rngo, RngI rngi) { + using numeric_type = typename T::numeric_type; + using Inner = typename T::value_type; + + auto gen_inner = [](auto&&) { + return detail::MakeRandom::generate_value(); + }; + + auto gen_outer = [gen_inner, rngi](auto&&) { + return Inner(TA::Range(rngi), gen_inner); + }; + + return T(TA::Range(rngo), gen_outer); } -// -// note: all the inner tensors (elements of the outer tensor) -// have the same @c inner_rng -// +/// +/// \tparam T Non cv-qualified +/// TA::Tensor,...> type. +/// \tparam RngO TA::Range should be constructible from RngO type. +/// Eg. TA::Range, std::initializer_list. +/// \tparam IxMap An invocable type that maps the index of an element in the +/// outer tensor to a value, allowing the construction of +/// TA::Range from that value. +/// \param rngo The range of the result tensor (ie the outer tensor). +/// TA::Range(rngo) will be called explicitly. +/// \param ixmap An invocable that maps the index of an element in the +/// outer tensor to a value, allowing the construction of +/// TA::Range from that value. +/// \return A TA::Tensor of TA::Tensor with random +/// numeric_type elements. +/// template < - typename TensorT, - std::enable_if_t, bool> = true> -auto random_tensor(TA::Range const& outer_rng, TA::Range const& inner_rng) { - using InnerTensorT = typename TensorT::value_type; - TensorT result{outer_rng}; + typename T, typename RngO, typename IxMap, // + typename = std::enable_if_t>, // + typename = std::enable_if_t>, // + std::enable_if_t, bool> = true> +auto random_tensor(RngO rngo, IxMap ixmap) { + using numeric_type = typename T::numeric_type; + + auto gen_inner = [](auto&&) { + return TA::detail::MakeRandom::generate_value(); + }; - std::generate(/*std::execution::par,*/ - result.begin(), result.end(), [inner_rng]() { - return random_tensor(inner_rng); - }); + auto gen_outer = [gen_inner, ixmap](auto const& oix) { + auto inner_rng = TA::Range(ixmap(oix)); + return typename T::value_type(inner_rng, gen_inner); + }; - return result; + return T(TA::Range(rngo), gen_outer); } -template -auto random_tensor(TA::Range const& outer_rng, - std::initializer_list const& inner_extents) { - TA::container::svector lobounds(inner_extents.size(), 0); - return random_tensor(outer_rng, TA::Range(lobounds, inner_extents)); +/// +/// \tparam Array Non cv-qualified TA::DistArray type that has non-nested +/// tile type. Eg. TA::DistArray> +/// \tparam Rng TA::TiledRange should be constructible from Rng type. +/// \param rng The TA::TiledRange of the result TA::DistArray. +/// \return A TA::DistArray of non-nested tile type with random elements. +/// +template < + typename Array, typename Rng = il_trange, + typename = std::enable_if_t == 1>, + typename = std::enable_if_t>> +auto random_array(Rng rng) { + using T = typename Array::value_type; + + auto make_tile = [](auto& tile, auto const& rng) { + tile = random_tensor(rng); + return tile.norm(); + }; + + return TA::make_array(TA::get_default_world(), TA::TiledRange(rng), + make_tile); } /// -/// \tparam Array The type of DistArray to be generated. Cannot be cv-qualified -/// or reference type. -/// \tparam Args TA::Range type for inner tensor if the tile type of the result -/// is a tensor-of-tensor. -/// \param trange The TiledRange of the result DistArray. -/// \param args Either exactly one TA::Range type when the tile type of Array is -/// tensor-of-tensor or nothing. -/// \return Returns a DistArray of type Array whose elements are randomly -/// generated. -/// @note: -/// - Although DistArrays with Sparse policy can be generated all of their -/// tiles are initialized with random values -- technically the returned value -/// is dense. -/// - In case of arrays with tensor-of-tensor tiles, all the inner tensors have -/// the same rank and the same extent of corresponding modes. +/// \tparam Array Non cv-qualified TA::DistArray type that has a nested +/// tile type. +/// Eg. TA::DistArray>> +/// \tparam RngO TA::TiledRange should be constructible form RngO type. +/// \tparam RngI TA::Range should be constructible from RngI type. +/// \param rngo The TA::TiledRange of the result TA::DistArray. +/// \param rngi The range of the inner tensors. Note that ALL inner tensors +/// will have an EQUAL range. TA::Range(rngi) will be +/// called explicitly. +/// \return A TA::DistArray of nested tile type with random elements. /// template < - typename Array, typename... Args, - typename = - std::void_t, - std::enable_if_t, - bool> = true> -auto random_array(TA::TiledRange const& trange, Args const&... args) { - static_assert( - (sizeof...(Args) == 0 && - TA::detail::is_tensor_v) || - (sizeof...(Args) == 1) && - (TA::detail::is_tensor_of_tensor_v)); - - using TensorT = typename Array::value_type; - using PolicyT = typename Array::policy_type; - - auto make_tile_meta = [](auto&&... args) { - return [=](TensorT& tile, TA::Range const& rng) { - tile = random_tensor(rng, args...); - if constexpr (std::is_same_v) - return tile.norm(); - }; + typename Array, typename RngO = il_trange, typename RngI = il_range, + typename = std::enable_if_t == 2>, + typename = std::enable_if_t>, + typename = std::enable_if_t>> +auto random_array(RngO rngo, RngI rngi) { + using T = typename Array::value_type; + + auto make_tile = [rngi](auto& tile, auto const& rng) { + tile = random_tensor(rng, rngi); + return tile.norm(); }; - return TA::make_array(TA::get_default_world(), trange, - make_tile_meta(args...)); + return TA::make_array(TA::get_default_world(), TA::TiledRange(rngo), + make_tile); } -template -auto random_array(std::initializer_list> trange, - Args&&... args) { - return random_array(TA::TiledRange(trange), - std::forward(args)...); +/// +/// \tparam Array Non cv-qualified TA::DistArray type that has a nested +/// tile type. +/// Eg. TA::DistArray>> +/// \tparam RngO TA::TiledRange should be constructible form RngO type. +/// \tparam IxMap An invocable type that maps the index of an element in the +/// outer tensor to a value, allowing the construction of +/// TA::Range from that value. +/// \param rngo The TA::TiledRange of the result TA::DistArray. +/// \param ixmap An invocable that maps the index of an element in the +/// outer tensor to a value, allowing the construction of +/// TA::Range from that value. +/// \return A TA::DistArray of nested tile type with random elements. +template < + typename Array, typename RngO, typename IxMap, + typename = std::enable_if_t == 2>, + typename = std::enable_if_t>, + std::enable_if_t, bool> = true> +auto random_array(RngO rngo, IxMap ixmap) { + using T = typename Array::value_type; + + auto make_tile = [ixmap](auto& tile, auto const& rng) { + tile = random_tensor(rng, ixmap); + return tile.norm(); + }; + + return TA::make_array(TA::get_default_world(), TA::TiledRange(rngo), + make_tile); } +} // namespace fixture + +using fixture::random_array; +using fixture::random_tensor; + /// /// Succinctly call TA::detail::tensor_contract /// @@ -404,6 +505,9 @@ Result general_product(TensorA const& A, TensorB const& B, using TA::detail::max_nested_rank; using TA::detail::nested_rank; + // empty tensors + if (A.empty() || B.empty()) return Result{}; + static_assert(std::is_same_v); @@ -502,12 +606,22 @@ Result general_product(TensorA const& A, TensorB const& B, } } else { typename Result::value_type temp{}; - for (auto ix_I : rng_I) { + for (auto const& ix_I : rng_I) { apply_partial_perm(ix_A, ix_I, setup.I_to_A); apply_partial_perm(ix_B, ix_I, setup.I_to_B); - if constexpr (is_tot) - temp += general_product( + if constexpr (is_tot) { + auto temp_ = general_product( A(ix_A), B(ix_B), args...); + if constexpr (TA::detail::is_nested_tensor_v< + typename Result::value_type>) { + if (temp.empty()) + temp = std::move(temp_); + else + temp += temp_; + } else { + temp += temp_; + } + } else { TA_ASSERT(!(ix_A.empty() || ix_B.empty())); temp += A(ix_A) * B(ix_B); @@ -548,26 +662,14 @@ auto general_product(TA::DistArray A, TA::TiledRange result_trange; { - auto const rank = result_tensor.range().rank(); - auto const result_range = result_tensor.range(); - - TA::container::svector> tr1s(rank, {0}); - - TA::container::svector const ix_hi(result_range.upbound()); - for (auto d = 0; d < rank; ++d) { - TA::container::svector ix(result_range.lobound()); - for (auto& i = ix[d]; i < ix_hi[d]; ++i) { - auto const& elem_tensor = result_tensor(ix); - auto& tr1 = tr1s[d]; - tr1.emplace_back(tr1.back() + elem_tensor.range().extent(d)); - } + TA::container::svector tr1s(setup.rank_C); + for (auto [t, f] : setup.C_to_A) { + tr1s.at(t) = A.trange().at(f); } - - TA::container::svector tr1s_explicit; - tr1s_explicit.reserve(tr1s.size()); - for (auto const& v : tr1s) tr1s_explicit.emplace_back(v.begin(), v.end()); - - result_trange = TA::TiledRange(tr1s_explicit); + for (auto [t, f] : setup.C_to_B) { + tr1s.at(t) = B.trange().at(f); + } + result_trange = TiledRange(tr1s); } TA::DistArray C(world, result_trange); @@ -588,6 +690,15 @@ auto general_product(TA::DistArray A, return general_product(A, B, args...); } +template +auto general_product(TA::DistArray A, + TA::DistArray B, + Setups const&... args) { + auto A_dense = to_dense(A); + auto B_dense = to_dense(B); + return TA::to_sparse(general_product(A_dense, B_dense, args...)); +} + template >> auto manual_eval(OuterInnerSetup const& setups, ArrayA A, ArrayB B) { From 59fbafb9ed4dca8340f00902d8583883430a8218 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Mon, 3 Jun 2024 15:23:50 -0400 Subject: [PATCH 27/27] Add dox. --- src/TiledArray/einsum/tiledarray.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/TiledArray/einsum/tiledarray.h b/src/TiledArray/einsum/tiledarray.h index 6a45c1e891..5d68770506 100644 --- a/src/TiledArray/einsum/tiledarray.h +++ b/src/TiledArray/einsum/tiledarray.h @@ -268,6 +268,18 @@ auto replicate_array(Array from, TiledRange const &prepend_trng) { return result; } +/// +/// Given a rank-N tensor and a ∂-rank such that ∂ in [0,N), returns a new +/// rank-N' tensor (where N' = N - ∂) by summing over the ∂ ranks from the +/// end of the input tensor's range. For example, reduce_modes(A, 2) where +/// A.range().rank() == 5 will result into a new tensor (B) of rank-3 such that +/// B(i,j,k) = Σ_l Σ_m A(i,j,k,l,m). +/// +/// \param orig Input Tensor. +/// \param dmodes Reduce this many modes from the end as implied in the +/// range of the input tensor. +/// \return Tensor with reduced rank. +/// template auto reduce_modes(Tensor const &orig, size_t drank) { if (drank == 0) return orig; @@ -315,6 +327,7 @@ auto reduce_modes(Tensor const &orig, size_t drank) { /// \param dmodes Reduce this many modes from the end as implied in the /// tiled range of the input array. /// \return Array with reduced rank. +/// \see reduce_modes(Tensor, size_t) /// template auto reduce_modes(TA::DistArray orig, size_t drank) { @@ -375,6 +388,12 @@ auto reduce_modes(TA::DistArray orig, size_t drank) { return result; } +/// +/// \tparam Ixs Iterable of indices. +/// \param map A map from the index type of \c Ixs to TiledRange1. +/// \param ixs Iterable of indices. +/// \return TiledRange object. +/// template TiledRange make_trange(RangeMap const &map, Ixs const &ixs) { container::svector tr1s;