From 6b41a9c5329d52f391599a389b5021cbf057d854 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 8 Dec 2022 19:40:26 +0100 Subject: [PATCH 1/3] add support for host operations --- benchmark/preconditioner/preconditioner.cpp | 12 +++ benchmark/solver/solver.cpp | 12 +++ core/factorization/elimination_forest.cpp | 35 +++++---- core/factorization/elimination_forest.hpp | 5 +- core/factorization/lu.cpp | 7 +- core/factorization/symbolic.cpp | 43 ++++++----- core/factorization/symbolic.hpp | 23 ++++-- .../test/factorization/elimination_forest.cpp | 75 ++++++++++--------- include/ginkgo/core/base/executor.hpp | 50 +++++++++++++ .../test/factorization/cholesky_kernels.cpp | 54 +++++++------ reference/test/factorization/lu_kernels.cpp | 6 +- test/factorization/cholesky_kernels.cpp | 29 ++++--- 12 files changed, 234 insertions(+), 117 deletions(-) diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index 507a7676ba0..091f4fc6043 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -201,10 +201,16 @@ void run_preconditioner(const char* precond_name, auto gen_logger = std::make_shared(FLAGS_nested_names); exec->add_logger(gen_logger); + if (exec->get_master() != exec) { + exec->get_master()->add_logger(gen_logger); + } std::unique_ptr precond_op; for (auto i = 0u; i < ic_gen.get_num_repetitions(); ++i) { precond_op = precond->generate(system_matrix); } + if (exec->get_master() != exec) { + exec->get_master()->remove_logger(gko::lend(gen_logger)); + } exec->remove_logger(gko::lend(gen_logger)); gen_logger->write_data(this_precond_data["generate"]["components"], @@ -213,9 +219,15 @@ void run_preconditioner(const char* precond_name, auto apply_logger = std::make_shared(FLAGS_nested_names); exec->add_logger(apply_logger); + if (exec->get_master() != exec) { + exec->get_master()->add_logger(apply_logger); + } for (auto i = 0u; i < ic_apply.get_num_repetitions(); ++i) { precond_op->apply(lend(b), lend(x_clone)); } + if (exec->get_master() != exec) { + exec->get_master()->remove_logger(gko::lend(apply_logger)); + } exec->remove_logger(gko::lend(apply_logger)); apply_logger->write_data(this_precond_data["apply"]["components"], diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index b868052d32d..e6a22f1be6a 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -435,12 +435,18 @@ void solve_system(const std::string& solver_name, auto gen_logger = std::make_shared(FLAGS_nested_names); exec->add_logger(gen_logger); + if (exec->get_master() != exec) { + exec->get_master()->add_logger(gen_logger); + } auto precond = precond_factory.at(precond_name)(exec); solver = generate_solver(exec, give(precond), solver_name, FLAGS_max_iters) ->generate(system_matrix); + if (exec->get_master() != exec) { + exec->get_master()->remove_logger(gko::lend(gen_logger)); + } exec->remove_logger(gko::lend(gen_logger)); gen_logger->write_data(solver_json["generate"]["components"], allocator, 1); @@ -459,9 +465,15 @@ void solve_system(const std::string& solver_name, auto apply_logger = std::make_shared(FLAGS_nested_names); exec->add_logger(apply_logger); + if (exec->get_master() != exec) { + exec->get_master()->add_logger(apply_logger); + } solver->apply(lend(b), lend(x_clone)); + if (exec->get_master() != exec) { + exec->get_master()->remove_logger(gko::lend(apply_logger)); + } exec->remove_logger(gko::lend(apply_logger)); apply_logger->write_data(solver_json["apply"]["components"], allocator, 1); diff --git a/core/factorization/elimination_forest.cpp b/core/factorization/elimination_forest.cpp index 77921aa9155..86bcbc29766 100644 --- a/core/factorization/elimination_forest.cpp +++ b/core/factorization/elimination_forest.cpp @@ -181,35 +181,38 @@ void elimination_forest::set_executor( template -elimination_forest compute_elim_forest( - const matrix::Csr* mtx) +void compute_elim_forest(const matrix::Csr* mtx, + std::unique_ptr>& forest) { const auto host_exec = mtx->get_executor()->get_master(); const auto host_mtx = make_temporary_clone(host_exec, mtx); const auto num_rows = static_cast(host_mtx->get_size()[0]); - elimination_forest forest{host_exec, num_rows}; + forest = + std::make_unique>(host_exec, num_rows); compute_elim_forest_parent_impl(host_exec, host_mtx->get_const_row_ptrs(), host_mtx->get_const_col_idxs(), num_rows, - forest.parents.get_data()); - compute_elim_forest_children_impl(forest.parents.get_const_data(), num_rows, - forest.child_ptrs.get_data(), - forest.children.get_data()); + forest->parents.get_data()); + compute_elim_forest_children_impl(forest->parents.get_const_data(), + num_rows, forest->child_ptrs.get_data(), + forest->children.get_data()); compute_elim_forest_postorder_impl( - host_exec, forest.parents.get_const_data(), - forest.child_ptrs.get_const_data(), forest.children.get_const_data(), - num_rows, forest.postorder.get_data(), forest.inv_postorder.get_data()); + host_exec, forest->parents.get_const_data(), + forest->child_ptrs.get_const_data(), forest->children.get_const_data(), + num_rows, forest->postorder.get_data(), + forest->inv_postorder.get_data()); compute_elim_forest_postorder_parent_impl( - forest.parents.get_const_data(), forest.inv_postorder.get_const_data(), - num_rows, forest.postorder_parents.get_data()); + forest->parents.get_const_data(), + forest->inv_postorder.get_const_data(), num_rows, + forest->postorder_parents.get_data()); - forest.set_executor(mtx->get_executor()); - return forest; + forest->set_executor(mtx->get_executor()); } #define GKO_DECLARE_COMPUTE_ELIM_FOREST(ValueType, IndexType) \ - elimination_forest compute_elim_forest( \ - const matrix::Csr* mtx) + void compute_elim_forest( \ + const matrix::Csr* mtx, \ + std::unique_ptr>& forest) GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COMPUTE_ELIM_FOREST); diff --git a/core/factorization/elimination_forest.hpp b/core/factorization/elimination_forest.hpp index affe9d4c0bf..c994db855c3 100644 --- a/core/factorization/elimination_forest.hpp +++ b/core/factorization/elimination_forest.hpp @@ -63,8 +63,9 @@ struct elimination_forest { template -elimination_forest compute_elim_forest( - const matrix::Csr* mtx); +void compute_elim_forest( + const matrix::Csr* mtx, + std::unique_ptr>& forest); } // namespace factorization diff --git a/core/factorization/lu.cpp b/core/factorization/lu.cpp index ddef5314b8a..f7ed51c6cb7 100644 --- a/core/factorization/lu.cpp +++ b/core/factorization/lu.cpp @@ -56,6 +56,9 @@ GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets); GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup); GKO_REGISTER_OPERATION(initialize, lu_factorization::initialize); GKO_REGISTER_OPERATION(factorize, lu_factorization::factorize); +GKO_REGISTER_HOST_OPERATION(symbolic_cholesky, + gko::factorization::symbolic_cholesky); +GKO_REGISTER_HOST_OPERATION(symbolic_lu, gko::factorization::symbolic_lu); } // namespace @@ -93,9 +96,9 @@ std::unique_ptr Lu::generate_impl( std::unique_ptr factors; if (!parameters_.symbolic_factorization) { if (parameters_.symmetric_sparsity) { - factors = gko::factorization::symbolic_cholesky(mtx.get()); + exec->run(make_symbolic_cholesky(mtx.get(), factors)); } else { - factors = gko::factorization::symbolic_lu(mtx.get()); + exec->run(make_symbolic_lu(mtx.get(), factors)); } } else { const auto& symbolic = parameters_.symbolic_factorization; diff --git a/core/factorization/symbolic.cpp b/core/factorization/symbolic.cpp index adec4dfb2bd..88c06389fb6 100644 --- a/core/factorization/symbolic.cpp +++ b/core/factorization/symbolic.cpp @@ -58,6 +58,7 @@ GKO_REGISTER_OPERATION(cholesky_symbolic, GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum); GKO_REGISTER_OPERATION(initialize, lu_factorization::initialize); GKO_REGISTER_OPERATION(factorize, lu_factorization::factorize); +GKO_REGISTER_HOST_OPERATION(compute_elim_forest, compute_elim_forest); } // namespace @@ -65,38 +66,40 @@ GKO_REGISTER_OPERATION(factorize, lu_factorization::factorize); /** Computes the symbolic Cholesky factorization of the given matrix. */ template -std::unique_ptr> symbolic_cholesky( - const matrix::Csr* mtx) +void symbolic_cholesky( + const matrix::Csr* mtx, + std::unique_ptr>& factors) { using matrix_type = matrix::Csr; const auto exec = mtx->get_executor(); const auto host_exec = exec->get_master(); - const auto forest = compute_elim_forest(mtx); + std::unique_ptr> forest; + exec->run(make_compute_elim_forest(mtx, forest)); const auto num_rows = mtx->get_size()[0]; array row_ptrs{exec, num_rows + 1}; array tmp{exec}; exec->run( - make_cholesky_symbolic_count(mtx, forest, row_ptrs.get_data(), tmp)); + make_cholesky_symbolic_count(mtx, *forest, row_ptrs.get_data(), tmp)); exec->run(make_prefix_sum(row_ptrs.get_data(), num_rows + 1)); const auto factor_nnz = static_cast( exec->copy_val_to_host(row_ptrs.get_const_data() + num_rows)); - auto factor = matrix_type::create( + factors = matrix_type::create( exec, mtx->get_size(), array{exec, factor_nnz}, array{exec, factor_nnz}, std::move(row_ptrs)); - exec->run(make_cholesky_symbolic(mtx, forest, factor.get(), tmp)); - factor->sort_by_column_index(); - auto lt_factor = as(factor->transpose()); + exec->run(make_cholesky_symbolic(mtx, *forest, factors.get(), tmp)); + factors->sort_by_column_index(); + auto lt_factor = as(factors->transpose()); const auto scalar = initialize>({one()}, exec); const auto id = matrix::Identity::create(exec, num_rows); - lt_factor->apply(scalar.get(), id.get(), scalar.get(), factor.get()); - return factor; + lt_factor->apply(scalar.get(), id.get(), scalar.get(), factors.get()); } -#define GKO_DECLARE_SYMBOLIC_CHOLESKY(ValueType, IndexType) \ - std::unique_ptr> symbolic_cholesky( \ - const matrix::Csr* mtx) +#define GKO_DECLARE_SYMBOLIC_CHOLESKY(ValueType, IndexType) \ + void symbolic_cholesky( \ + const matrix::Csr* mtx, \ + std::unique_ptr>& factors) GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SYMBOLIC_CHOLESKY); @@ -109,8 +112,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SYMBOLIC_CHOLESKY); * "GSoFa: Scalable Sparse Symbolic LU Factorization on GPUs," arXiv 2021 */ template -std::unique_ptr> symbolic_lu( - const matrix::Csr* mtx) +void symbolic_lu(const matrix::Csr* mtx, + std::unique_ptr>& factors) { using matrix_type = matrix::Csr; const auto exec = mtx->get_executor(); @@ -179,16 +182,16 @@ std::unique_ptr> symbolic_lu( array out_val_array{exec, out_nnz}; exec->copy_from(host_exec.get(), out_nnz, out_col_idxs.data(), out_col_idx_array.get_data()); - auto result = matrix_type::create( + factors = matrix_type::create( exec, mtx->get_size(), std::move(out_val_array), std::move(out_col_idx_array), std::move(out_row_ptr_array)); - return result; } -#define GKO_DECLARE_SYMBOLIC_LU(ValueType, IndexType) \ - std::unique_ptr> symbolic_lu( \ - const matrix::Csr* mtx) +#define GKO_DECLARE_SYMBOLIC_LU(ValueType, IndexType) \ + void symbolic_lu( \ + const matrix::Csr* mtx, \ + std::unique_ptr>& factors) GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SYMBOLIC_LU); diff --git a/core/factorization/symbolic.hpp b/core/factorization/symbolic.hpp index b16ded3fcb2..e244993d555 100644 --- a/core/factorization/symbolic.hpp +++ b/core/factorization/symbolic.hpp @@ -37,15 +37,24 @@ namespace gko { namespace factorization { -/** Computes the symbolic Cholesky factorization of the given matrix. */ +/** + * Computes the symbolic Cholesky factorization of the given matrix. + * @param mtx the input matrix + * @param factors the output factors stored in a combined pattern + */ template -std::unique_ptr> symbolic_cholesky( - const matrix::Csr*); - -/** Computes the symbolic LU factorization of the given matrix. */ +void symbolic_cholesky( + const matrix::Csr* mtx, + std::unique_ptr>& factors); + +/** + * Computes the symbolic LU factorization of the given matrix. + * @param mtx the input matrix + * @param factors the output factors stored in a combined pattern + */ template -std::unique_ptr> symbolic_lu( - const matrix::Csr*); +void symbolic_lu(const matrix::Csr* mtx, + std::unique_ptr>& factors); } // namespace factorization diff --git a/core/test/factorization/elimination_forest.cpp b/core/test/factorization/elimination_forest.cpp index 36876431a23..ed177e7ccd0 100644 --- a/core/test/factorization/elimination_forest.cpp +++ b/core/test/factorization/elimination_forest.cpp @@ -86,19 +86,20 @@ TYPED_TEST(EliminationForest, WorksForExample) {0, 1, 0, 1, 1, 0, 0, 1, 0, 1}}, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); - GKO_ASSERT_ARRAY_EQ(forest.parents, + GKO_ASSERT_ARRAY_EQ(forest->parents, I({2, 4, 6, 8, 8, 6, 7, 8, 9, 10})); - GKO_ASSERT_ARRAY_EQ(forest.child_ptrs, + GKO_ASSERT_ARRAY_EQ(forest->child_ptrs, I({0, 0, 0, 1, 1, 2, 2, 4, 5, 8, 9, 10})); - GKO_ASSERT_ARRAY_EQ(forest.children, + GKO_ASSERT_ARRAY_EQ(forest->children, I({0, 1, 2, 5, 6, 3, 4, 7, 8, 9})); - GKO_ASSERT_ARRAY_EQ(forest.postorder, + GKO_ASSERT_ARRAY_EQ(forest->postorder, I({3, 1, 4, 0, 2, 5, 6, 7, 8, 9})); - GKO_ASSERT_ARRAY_EQ(forest.inv_postorder, + GKO_ASSERT_ARRAY_EQ(forest->inv_postorder, I({3, 1, 4, 0, 2, 5, 6, 7, 8, 9})); - GKO_ASSERT_ARRAY_EQ(forest.postorder_parents, + GKO_ASSERT_ARRAY_EQ(forest->postorder_parents, I({8, 2, 8, 4, 6, 6, 7, 8, 9, 10})); } @@ -122,19 +123,20 @@ TYPED_TEST(EliminationForest, WorksForSeparable) }, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); - GKO_ASSERT_ARRAY_EQ(forest.parents, + GKO_ASSERT_ARRAY_EQ(forest->parents, I({2, 2, 10, 4, 5, 9, 7, 9, 9, 10})); - GKO_ASSERT_ARRAY_EQ(forest.child_ptrs, + GKO_ASSERT_ARRAY_EQ(forest->child_ptrs, I({0, 0, 0, 2, 2, 3, 4, 4, 5, 5, 8, 10})); - GKO_ASSERT_ARRAY_EQ(forest.children, + GKO_ASSERT_ARRAY_EQ(forest->children, I({0, 1, 3, 4, 6, 5, 7, 8, 2, 9})); - GKO_ASSERT_ARRAY_EQ(forest.postorder, + GKO_ASSERT_ARRAY_EQ(forest->postorder, I({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})); - GKO_ASSERT_ARRAY_EQ(forest.inv_postorder, + GKO_ASSERT_ARRAY_EQ(forest->inv_postorder, I({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})); - GKO_ASSERT_ARRAY_EQ(forest.postorder_parents, + GKO_ASSERT_ARRAY_EQ(forest->postorder_parents, I({2, 2, 10, 4, 5, 9, 7, 9, 9, 10})); } @@ -157,18 +159,19 @@ TYPED_TEST(EliminationForest, WorksForPostOrderNotSelfInverse) {0, 0, 0, 0, 0, 0, 1, 0, 1, 1}, }, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); - GKO_ASSERT_ARRAY_EQ(forest.parents, + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); + GKO_ASSERT_ARRAY_EQ(forest->parents, I({2, 4, 6, 8, 5, 6, 7, 8, 9, 10})); - GKO_ASSERT_ARRAY_EQ(forest.child_ptrs, + GKO_ASSERT_ARRAY_EQ(forest->child_ptrs, I({0, 0, 0, 1, 1, 2, 3, 5, 6, 8, 9, 10})); - GKO_ASSERT_ARRAY_EQ(forest.children, + GKO_ASSERT_ARRAY_EQ(forest->children, I({0, 1, 4, 2, 5, 6, 3, 7, 8, 9})); - GKO_ASSERT_ARRAY_EQ(forest.postorder, + GKO_ASSERT_ARRAY_EQ(forest->postorder, I({3, 0, 2, 1, 4, 5, 6, 7, 8, 9})); - GKO_ASSERT_ARRAY_EQ(forest.inv_postorder, + GKO_ASSERT_ARRAY_EQ(forest->inv_postorder, I({1, 3, 2, 0, 4, 5, 6, 7, 8, 9})); - GKO_ASSERT_ARRAY_EQ(forest.postorder_parents, + GKO_ASSERT_ARRAY_EQ(forest->postorder_parents, I({8, 2, 6, 4, 5, 6, 7, 8, 9, 10})); } @@ -180,7 +183,8 @@ TYPED_TEST(EliminationForest, WorksForAni1) std::ifstream stream{gko::matrices::location_ani1_mtx}; auto mtx = gko::read(stream, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); // the elimination tree is a path gko::array iota_arr{this->ref, 36}; @@ -188,13 +192,13 @@ TYPED_TEST(EliminationForest, WorksForAni1) std::iota(iota_arr.get_data(), iota_arr.get_data() + 36, 1); std::iota(iota_arr2.get_data() + 1, iota_arr2.get_data() + 38, 0); iota_arr2.get_data()[0] = 0; - GKO_ASSERT_ARRAY_EQ(forest.parents, iota_arr); - GKO_ASSERT_ARRAY_EQ(forest.postorder_parents, iota_arr); - GKO_ASSERT_ARRAY_EQ(forest.child_ptrs, iota_arr2); + GKO_ASSERT_ARRAY_EQ(forest->parents, iota_arr); + GKO_ASSERT_ARRAY_EQ(forest->postorder_parents, iota_arr); + GKO_ASSERT_ARRAY_EQ(forest->child_ptrs, iota_arr2); std::iota(iota_arr.get_data(), iota_arr.get_data() + 36, 0); - GKO_ASSERT_ARRAY_EQ(forest.children, iota_arr); - GKO_ASSERT_ARRAY_EQ(forest.postorder, iota_arr); - GKO_ASSERT_ARRAY_EQ(forest.inv_postorder, iota_arr); + GKO_ASSERT_ARRAY_EQ(forest->children, iota_arr); + GKO_ASSERT_ARRAY_EQ(forest->postorder, iota_arr); + GKO_ASSERT_ARRAY_EQ(forest->inv_postorder, iota_arr); } @@ -205,29 +209,30 @@ TYPED_TEST(EliminationForest, WorksForAni1Amd) std::ifstream stream{gko::matrices::location_ani1_amd_mtx}; auto mtx = gko::read(stream, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); GKO_ASSERT_ARRAY_EQ( - forest.parents, + forest->parents, I({4, 2, 3, 4, 5, 29, 7, 8, 9, 27, 11, 12, 13, 14, 16, 16, 17, 18, 24, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36})); GKO_ASSERT_ARRAY_EQ( - forest.child_ptrs, + forest->child_ptrs, I({0, 0, 0, 1, 2, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12, 12, 14, 15, 16, 16, 17, 18, 19, 20, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36})); GKO_ASSERT_ARRAY_EQ( - forest.children, + forest->children, I({1, 2, 0, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 18, 23, 24, 25, 9, 26, 27, 5, 28, 29, 30, 31, 32, 33, 34, 35})); gko::array iota_arr{this->ref, 36}; std::iota(iota_arr.get_data(), iota_arr.get_data() + 36, 0); - GKO_ASSERT_ARRAY_EQ(forest.postorder, iota_arr); - GKO_ASSERT_ARRAY_EQ(forest.inv_postorder, iota_arr); + GKO_ASSERT_ARRAY_EQ(forest->postorder, iota_arr); + GKO_ASSERT_ARRAY_EQ(forest->inv_postorder, iota_arr); GKO_ASSERT_ARRAY_EQ( - forest.postorder_parents, + forest->postorder_parents, I({4, 2, 3, 4, 5, 29, 7, 8, 9, 27, 11, 12, 13, 14, 16, 16, 17, 18, 24, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36})); diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 6171fdb9663..98d1de86a7f 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -510,6 +510,56 @@ RegisteredOperation make_register_operation(const char* name, "semi-colon warnings") +/** + * Binds a host-side kernel (independent of executor type) to an Operation. + * + * It also defines a helper function which creates the associated operation. + * Any input arguments passed to the helper function are forwarded to the + * kernel when the operation is executed. + * The kernel name is searched for in the namespace where this macro is called. + * Host operations are used to make computations that are not part of the device + * kernels visible to profiling loggers and benchmarks. + * + * @param _name operation name + * @param _kernel kernel which will be bound to the operation + * + * Example + * ------- + * + * ```c++ + * void host_kernel(int) { + * // do some expensive computations + * } + * + * // Bind the kernels to the operation + * GKO_REGISTER_HOST_OPERATION(my_op, host_kernel); + * + * int main() { + * // create executor + * auto ref = ReferenceExecutor::create(); + * + * // create the operation + * auto op = make_my_op(5); // x = 5 + * + * ref->run(op); // run host kernel + * } + * ``` + * + * @ingroup Executor + */ +#define GKO_REGISTER_HOST_OPERATION(_name, _kernel) \ + template \ + auto make_##_name(Args&&... args) \ + { \ + return ::gko::detail::make_register_operation( \ + #_name, sizeof...(Args), \ + [&args...](auto) { _kernel(std::forward(args)...); }); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ + "semi-colon warnings") + + #define GKO_DECLARE_EXECUTOR_FRIEND(_type, ...) friend class _type /** diff --git a/reference/test/factorization/cholesky_kernels.cpp b/reference/test/factorization/cholesky_kernels.cpp index 957f685c737..52d3225b0cf 100644 --- a/reference/test/factorization/cholesky_kernels.cpp +++ b/reference/test/factorization/cholesky_kernels.cpp @@ -99,11 +99,12 @@ TYPED_TEST(Cholesky, KernelSymbolicCountExample) {0, 0, 0, 1, 1, 0, 0, 1, 1, 0}, {0, 1, 0, 1, 1, 0, 0, 1, 0, 1}}, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); gko::array row_nnz{this->ref, 10}; gko::kernels::reference::cholesky::cholesky_symbolic_count( - this->ref, mtx.get(), forest, row_nnz.get_data(), this->tmp); + this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp); GKO_ASSERT_ARRAY_EQ(row_nnz, I({1, 1, 2, 1, 2, 1, 3, 5, 4, 6})); } @@ -125,15 +126,16 @@ TYPED_TEST(Cholesky, KernelSymbolicFactorizeExample) {0, 0, 0, 1, 1, 0, 0, 1, 1, 0}, {0, 1, 0, 1, 1, 0, 0, 1, 0, 1}}, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); auto l_factor = matrix_type::create(this->ref, gko::dim<2>{10, 10}, 26); gko::kernels::reference::cholesky::cholesky_symbolic_count( - this->ref, mtx.get(), forest, l_factor->get_row_ptrs(), this->tmp); + this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp); gko::kernels::reference::components::prefix_sum( this->ref, l_factor->get_row_ptrs(), 11); gko::kernels::reference::cholesky::cholesky_symbolic_factorize( - this->ref, mtx.get(), forest, l_factor.get(), this->tmp); + this->ref, mtx.get(), *forest, l_factor.get(), this->tmp); GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, l({{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, @@ -165,11 +167,12 @@ TYPED_TEST(Cholesky, KernelSymbolicCountSeparable) {0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, {0, 0, 0, 0, 1, 0, 1, 0, 1, 1}}, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); gko::array row_nnz{this->ref, 10}; gko::kernels::reference::cholesky::cholesky_symbolic_count( - this->ref, mtx.get(), forest, row_nnz.get_data(), this->tmp); + this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp); GKO_ASSERT_ARRAY_EQ(row_nnz, I({1, 1, 3, 1, 2, 2, 1, 2, 1, 6})); } @@ -191,15 +194,16 @@ TYPED_TEST(Cholesky, KernelSymbolicFactorizeSeparable) {0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, {0, 0, 0, 0, 1, 0, 1, 0, 1, 1}}, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); auto l_factor = matrix_type::create(this->ref, gko::dim<2>{10, 10}, 26); gko::kernels::reference::cholesky::cholesky_symbolic_count( - this->ref, mtx.get(), forest, l_factor->get_row_ptrs(), this->tmp); + this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp); gko::kernels::reference::components::prefix_sum( this->ref, l_factor->get_row_ptrs(), 11); gko::kernels::reference::cholesky::cholesky_symbolic_factorize( - this->ref, mtx.get(), forest, l_factor.get(), this->tmp); + this->ref, mtx.get(), *forest, l_factor.get(), this->tmp); GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, l({{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, @@ -221,11 +225,12 @@ TYPED_TEST(Cholesky, KernelSymbolicCountAni1) using index_type = typename TestFixture::index_type; std::ifstream stream{gko::matrices::location_ani1_mtx}; auto mtx = gko::read(stream, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); gko::array row_nnz{this->ref, mtx->get_size()[0]}; gko::kernels::reference::cholesky::cholesky_symbolic_count( - this->ref, mtx.get(), forest, row_nnz.get_data(), this->tmp); + this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp); GKO_ASSERT_ARRAY_EQ( row_nnz, I({1, 2, 3, 3, 2, 2, 7, 7, 7, 8, 8, 7, @@ -242,17 +247,18 @@ TYPED_TEST(Cholesky, KernelSymbolicFactorizeAni1) std::ifstream ref_stream{gko::matrices::location_ani1_chol_mtx}; auto mtx = gko::read(stream, this->ref); auto l_factor_ref = gko::read(ref_stream, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); auto l_factor = matrix_type::create(this->ref, l_factor_ref->get_size(), l_factor_ref->get_num_stored_elements()); gko::kernels::reference::cholesky::cholesky_symbolic_count( - this->ref, mtx.get(), forest, l_factor->get_row_ptrs(), this->tmp); + this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp); gko::kernels::reference::components::prefix_sum( this->ref, l_factor->get_row_ptrs(), mtx->get_size()[0] + 1); gko::kernels::reference::cholesky::cholesky_symbolic_factorize( - this->ref, mtx.get(), forest, l_factor.get(), this->tmp); + this->ref, mtx.get(), *forest, l_factor.get(), this->tmp); GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, l_factor_ref); } @@ -268,7 +274,8 @@ TYPED_TEST(Cholesky, SymbolicFactorizeAni1) auto l_factor_ref = gko::read(ref_stream, this->ref); auto combined_factor_ref = this->combined_factor(l_factor_ref.get()); - auto combined_factor = gko::factorization::symbolic_cholesky(mtx.get()); + std::unique_ptr combined_factor; + gko::factorization::symbolic_cholesky(mtx.get(), combined_factor); GKO_ASSERT_MTX_EQ_SPARSITY(combined_factor, combined_factor_ref); } @@ -280,11 +287,12 @@ TYPED_TEST(Cholesky, KernelSymbolicCountAni1Amd) using index_type = typename TestFixture::index_type; std::ifstream stream{gko::matrices::location_ani1_amd_mtx}; auto mtx = gko::read(stream, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); gko::array row_nnz{this->ref, mtx->get_size()[0]}; gko::kernels::reference::cholesky::cholesky_symbolic_count( - this->ref, mtx.get(), forest, row_nnz.get_data(), this->tmp); + this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp); GKO_ASSERT_ARRAY_EQ( row_nnz, I({1, 1, 2, 3, 5, 4, 1, 2, 3, 4, 1, 2, @@ -301,17 +309,18 @@ TYPED_TEST(Cholesky, KernelSymbolicFactorizeAni1Amd) std::ifstream ref_stream{gko::matrices::location_ani1_amd_chol_mtx}; auto mtx = gko::read(stream, this->ref); auto l_factor_ref = gko::read(ref_stream, this->ref); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); auto l_factor = matrix_type::create(this->ref, l_factor_ref->get_size(), l_factor_ref->get_num_stored_elements()); gko::kernels::reference::cholesky::cholesky_symbolic_count( - this->ref, mtx.get(), forest, l_factor->get_row_ptrs(), this->tmp); + this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp); gko::kernels::reference::components::prefix_sum( this->ref, l_factor->get_row_ptrs(), mtx->get_size()[0] + 1); gko::kernels::reference::cholesky::cholesky_symbolic_factorize( - this->ref, mtx.get(), forest, l_factor.get(), this->tmp); + this->ref, mtx.get(), *forest, l_factor.get(), this->tmp); GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, l_factor_ref); } @@ -327,7 +336,8 @@ TYPED_TEST(Cholesky, SymbolicFactorizeAni1Amd) auto l_factor_ref = gko::read(ref_stream, this->ref); auto combined_factor_ref = this->combined_factor(l_factor_ref.get()); - auto combined_factor = gko::factorization::symbolic_cholesky(mtx.get()); + std::unique_ptr combined_factor; + gko::factorization::symbolic_cholesky(mtx.get(), combined_factor); GKO_ASSERT_MTX_EQ_SPARSITY(combined_factor, combined_factor_ref); } diff --git a/reference/test/factorization/lu_kernels.cpp b/reference/test/factorization/lu_kernels.cpp index 177f398de50..49d24084ac9 100644 --- a/reference/test/factorization/lu_kernels.cpp +++ b/reference/test/factorization/lu_kernels.cpp @@ -118,7 +118,8 @@ TYPED_TEST(Lu, SymbolicCholeskyWorks) this->setup(gko::matrices::location_ani1_mtx, gko::matrices::location_ani1_lu_mtx); - auto lu = gko::factorization::symbolic_cholesky(this->mtx.get()); + std::unique_ptr> lu; + gko::factorization::symbolic_cholesky(this->mtx.get(), lu); GKO_ASSERT_MTX_EQ_SPARSITY(lu, this->mtx_lu); } @@ -131,7 +132,8 @@ TYPED_TEST(Lu, SymbolicLUWorks) this->setup(gko::matrices::location_ani1_nonsymm_mtx, gko::matrices::location_ani1_nonsymm_lu_mtx); - auto lu = gko::factorization::symbolic_lu(this->mtx.get()); + std::unique_ptr> lu; + gko::factorization::symbolic_lu(this->mtx.get(), lu); GKO_ASSERT_MTX_EQ_SPARSITY(lu, this->mtx_lu); } diff --git a/test/factorization/cholesky_kernels.cpp b/test/factorization/cholesky_kernels.cpp index 8c084d7358f..a69fa026017 100644 --- a/test/factorization/cholesky_kernels.cpp +++ b/test/factorization/cholesky_kernels.cpp @@ -116,15 +116,19 @@ TYPED_TEST(Cholesky, KernelSymbolicCount) SCOPED_TRACE(pair.first); const auto& mtx = pair.second; const auto dmtx = gko::clone(this->exec, mtx); - auto forest = gko::factorization::compute_elim_forest(mtx.get()); - auto dforest = gko::factorization::compute_elim_forest(dmtx.get()); + std::unique_ptr> + forest; + std::unique_ptr> + dforest; + gko::factorization::compute_elim_forest(mtx.get(), forest); + gko::factorization::compute_elim_forest(dmtx.get(), dforest); gko::array row_nnz{this->ref, mtx->get_size()[0]}; gko::array drow_nnz{this->exec, mtx->get_size()[0]}; gko::kernels::reference::cholesky::cholesky_symbolic_count( - this->ref, mtx.get(), forest, row_nnz.get_data(), this->tmp); + this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp); gko::kernels::EXEC_NAMESPACE::cholesky::cholesky_symbolic_count( - this->exec, dmtx.get(), dforest, drow_nnz.get_data(), this->dtmp); + this->exec, dmtx.get(), *dforest, drow_nnz.get_data(), this->dtmp); GKO_ASSERT_ARRAY_EQ(drow_nnz, row_nnz); } @@ -141,10 +145,12 @@ TYPED_TEST(Cholesky, KernelSymbolicFactorize) const auto& mtx = pair.second; const auto dmtx = gko::clone(this->exec, mtx); const auto num_rows = mtx->get_size()[0]; - const auto forest = gko::factorization::compute_elim_forest(mtx.get()); + std::unique_ptr> + forest; + gko::factorization::compute_elim_forest(mtx.get(), forest); gko::array row_ptrs{this->ref, num_rows + 1}; gko::kernels::reference::cholesky::cholesky_symbolic_count( - this->ref, mtx.get(), forest, row_ptrs.get_data(), this->tmp); + this->ref, mtx.get(), *forest, row_ptrs.get_data(), this->tmp); gko::kernels::reference::components::prefix_sum( this->ref, row_ptrs.get_data(), num_rows + 1); const auto nnz = @@ -157,16 +163,17 @@ TYPED_TEST(Cholesky, KernelSymbolicFactorize) gko::array{this->exec, nnz}, gko::array{this->exec, nnz}, row_ptrs); // need to call the device kernels to initialize dtmp - const auto dforest = - gko::factorization::compute_elim_forest(dmtx.get()); + std::unique_ptr> + dforest; + gko::factorization::compute_elim_forest(dmtx.get(), dforest); gko::array dtmp_ptrs{this->exec, num_rows + 1}; gko::kernels::EXEC_NAMESPACE::cholesky::cholesky_symbolic_count( - this->exec, dmtx.get(), dforest, dtmp_ptrs.get_data(), this->dtmp); + this->exec, dmtx.get(), *dforest, dtmp_ptrs.get_data(), this->dtmp); gko::kernels::reference::cholesky::cholesky_symbolic_factorize( - this->ref, mtx.get(), forest, l_factor.get(), this->tmp); + this->ref, mtx.get(), *forest, l_factor.get(), this->tmp); gko::kernels::EXEC_NAMESPACE::cholesky::cholesky_symbolic_factorize( - this->exec, dmtx.get(), dforest, dl_factor.get(), this->dtmp); + this->exec, dmtx.get(), *dforest, dl_factor.get(), this->dtmp); GKO_ASSERT_MTX_EQ_SPARSITY(dl_factor, l_factor); } From 4852cefcf2f51d8529e75210544592133db9ec40 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 16 Dec 2022 18:30:48 +0100 Subject: [PATCH 2/3] add host operation test --- test/base/executor.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/base/executor.cpp b/test/base/executor.cpp index cbfe6f9e7cb..dfdf281e67c 100644 --- a/test/base/executor.cpp +++ b/test/base/executor.cpp @@ -104,6 +104,21 @@ TEST_F(Executor, RunsCorrectOperation) } +void host_operation(int& value) { value = 1234; } + +GKO_REGISTER_HOST_OPERATION(host_operation, host_operation); + + +TEST_F(Executor, RunsCorrectHostOperation) +{ + int value = 0; + + exec->run(make_host_operation(value)); + + ASSERT_EQ(1234, value); +} + + #ifndef GKO_COMPILING_REFERENCE From 93737711e56cffe76f2b35919bf7b4143a769352 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 16 Dec 2022 19:15:30 +0100 Subject: [PATCH 3/3] formatting improvements Co-authored-by: Yuhsiang M. Tsai --- core/factorization/symbolic.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/factorization/symbolic.hpp b/core/factorization/symbolic.hpp index e244993d555..312c15046cf 100644 --- a/core/factorization/symbolic.hpp +++ b/core/factorization/symbolic.hpp @@ -39,6 +39,7 @@ namespace factorization { /** * Computes the symbolic Cholesky factorization of the given matrix. + * * @param mtx the input matrix * @param factors the output factors stored in a combined pattern */ @@ -49,6 +50,7 @@ void symbolic_cholesky( /** * Computes the symbolic LU factorization of the given matrix. + * * @param mtx the input matrix * @param factors the output factors stored in a combined pattern */