ginkgo-project · upsj · Dec 18, 2022 · Dec 8, 2022 · Dec 16, 2022 · Dec 16, 2022
diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp
@@ -201,10 +201,16 @@ void run_preconditioner(const char* precond_name,
             auto gen_logger =
                 std::make_shared<OperationLogger>(FLAGS_nested_names);
             exec->add_logger(gen_logger);
+            if (exec->get_master() != exec) {
+                exec->get_master()->add_logger(gen_logger);
+            }
             std::unique_ptr<gko::LinOp> precond_op;
             for (auto i = 0u; i < ic_gen.get_num_repetitions(); ++i) {
                 precond_op = precond->generate(system_matrix);
             }
+            if (exec->get_master() != exec) {
+                exec->get_master()->remove_logger(gko::lend(gen_logger));
+            }
             exec->remove_logger(gko::lend(gen_logger));
 
             gen_logger->write_data(this_precond_data["generate"]["components"],
@@ -213,9 +219,15 @@ void run_preconditioner(const char* precond_name,
             auto apply_logger =
                 std::make_shared<OperationLogger>(FLAGS_nested_names);
             exec->add_logger(apply_logger);
+            if (exec->get_master() != exec) {
+                exec->get_master()->add_logger(apply_logger);
+            }
             for (auto i = 0u; i < ic_apply.get_num_repetitions(); ++i) {
                 precond_op->apply(lend(b), lend(x_clone));
             }
+            if (exec->get_master() != exec) {
+                exec->get_master()->remove_logger(gko::lend(apply_logger));
+            }
             exec->remove_logger(gko::lend(apply_logger));
 
             apply_logger->write_data(this_precond_data["apply"]["components"],

diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp
@@ -435,12 +435,18 @@ void solve_system(const std::string& solver_name,
             auto gen_logger =
                 std::make_shared<OperationLogger>(FLAGS_nested_names);
             exec->add_logger(gen_logger);
+            if (exec->get_master() != exec) {
+                exec->get_master()->add_logger(gen_logger);
+            }
 
             auto precond = precond_factory.at(precond_name)(exec);
             solver = generate_solver(exec, give(precond), solver_name,
                                      FLAGS_max_iters)
                          ->generate(system_matrix);
 
+            if (exec->get_master() != exec) {
+                exec->get_master()->remove_logger(gko::lend(gen_logger));
+            }
             exec->remove_logger(gko::lend(gen_logger));
             gen_logger->write_data(solver_json["generate"]["components"],
                                    allocator, 1);
@@ -459,9 +465,15 @@ void solve_system(const std::string& solver_name,
             auto apply_logger =
                 std::make_shared<OperationLogger>(FLAGS_nested_names);
             exec->add_logger(apply_logger);
+            if (exec->get_master() != exec) {
+                exec->get_master()->add_logger(apply_logger);
+            }
 
             solver->apply(lend(b), lend(x_clone));
 
+            if (exec->get_master() != exec) {
+                exec->get_master()->remove_logger(gko::lend(apply_logger));
+            }
             exec->remove_logger(gko::lend(apply_logger));
             apply_logger->write_data(solver_json["apply"]["components"],
                                      allocator, 1);

diff --git a/core/factorization/elimination_forest.cpp b/core/factorization/elimination_forest.cpp
@@ -181,35 +181,38 @@ void elimination_forest<IndexType>::set_executor(
 
 
 template <typename ValueType, typename IndexType>
-elimination_forest<IndexType> compute_elim_forest(
-    const matrix::Csr<ValueType, IndexType>* mtx)
+void compute_elim_forest(const matrix::Csr<ValueType, IndexType>* mtx,
+                         std::unique_ptr<elimination_forest<IndexType>>& forest)
 {
     const auto host_exec = mtx->get_executor()->get_master();
     const auto host_mtx = make_temporary_clone(host_exec, mtx);
     const auto num_rows = static_cast<IndexType>(host_mtx->get_size()[0]);
-    elimination_forest<IndexType> forest{host_exec, num_rows};
+    forest =
+        std::make_unique<elimination_forest<IndexType>>(host_exec, num_rows);
     compute_elim_forest_parent_impl(host_exec, host_mtx->get_const_row_ptrs(),
                                     host_mtx->get_const_col_idxs(), num_rows,
-                                    forest.parents.get_data());
-    compute_elim_forest_children_impl(forest.parents.get_const_data(), num_rows,
-                                      forest.child_ptrs.get_data(),
-                                      forest.children.get_data());
+                                    forest->parents.get_data());
+    compute_elim_forest_children_impl(forest->parents.get_const_data(),
+                                      num_rows, forest->child_ptrs.get_data(),
+                                      forest->children.get_data());
     compute_elim_forest_postorder_impl(
-        host_exec, forest.parents.get_const_data(),
-        forest.child_ptrs.get_const_data(), forest.children.get_const_data(),
-        num_rows, forest.postorder.get_data(), forest.inv_postorder.get_data());
+        host_exec, forest->parents.get_const_data(),
+        forest->child_ptrs.get_const_data(), forest->children.get_const_data(),
+        num_rows, forest->postorder.get_data(),
+        forest->inv_postorder.get_data());
     compute_elim_forest_postorder_parent_impl(
-        forest.parents.get_const_data(), forest.inv_postorder.get_const_data(),
-        num_rows, forest.postorder_parents.get_data());
+        forest->parents.get_const_data(),
+        forest->inv_postorder.get_const_data(), num_rows,
+        forest->postorder_parents.get_data());
 
-    forest.set_executor(mtx->get_executor());
-    return forest;
+    forest->set_executor(mtx->get_executor());
 }
 
 
 #define GKO_DECLARE_COMPUTE_ELIM_FOREST(ValueType, IndexType) \
-    elimination_forest<IndexType> compute_elim_forest(        \
-        const matrix::Csr<ValueType, IndexType>* mtx)
+    void compute_elim_forest(                                 \
+        const matrix::Csr<ValueType, IndexType>* mtx,         \
+        std::unique_ptr<elimination_forest<IndexType>>& forest)
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COMPUTE_ELIM_FOREST);
 

diff --git a/core/factorization/elimination_forest.hpp b/core/factorization/elimination_forest.hpp
@@ -63,8 +63,9 @@ struct elimination_forest {
 
 
 template <typename ValueType, typename IndexType>
-elimination_forest<IndexType> compute_elim_forest(
-    const matrix::Csr<ValueType, IndexType>* mtx);
+void compute_elim_forest(
+    const matrix::Csr<ValueType, IndexType>* mtx,
+    std::unique_ptr<elimination_forest<IndexType>>& forest);
 
 
 }  // namespace factorization

diff --git a/core/factorization/lu.cpp b/core/factorization/lu.cpp
@@ -56,6 +56,9 @@ GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets);
 GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup);
 GKO_REGISTER_OPERATION(initialize, lu_factorization::initialize);
 GKO_REGISTER_OPERATION(factorize, lu_factorization::factorize);
+GKO_REGISTER_HOST_OPERATION(symbolic_cholesky,
+                            gko::factorization::symbolic_cholesky);
+GKO_REGISTER_HOST_OPERATION(symbolic_lu, gko::factorization::symbolic_lu);
 
 
 }  // namespace
@@ -93,9 +96,9 @@ std::unique_ptr<LinOp> Lu<ValueType, IndexType>::generate_impl(
     std::unique_ptr<matrix_type> factors;
     if (!parameters_.symbolic_factorization) {
         if (parameters_.symmetric_sparsity) {
-            factors = gko::factorization::symbolic_cholesky(mtx.get());
+            exec->run(make_symbolic_cholesky(mtx.get(), factors));
         } else {
-            factors = gko::factorization::symbolic_lu(mtx.get());
+            exec->run(make_symbolic_lu(mtx.get(), factors));
         }
     } else {
         const auto& symbolic = parameters_.symbolic_factorization;

diff --git a/core/factorization/symbolic.cpp b/core/factorization/symbolic.cpp
@@ -58,45 +58,48 @@ GKO_REGISTER_OPERATION(cholesky_symbolic,
 GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
 GKO_REGISTER_OPERATION(initialize, lu_factorization::initialize);
 GKO_REGISTER_OPERATION(factorize, lu_factorization::factorize);
+GKO_REGISTER_HOST_OPERATION(compute_elim_forest, compute_elim_forest);
 
 
 }  // namespace
 
 
 /** Computes the symbolic Cholesky factorization of the given matrix. */
 template <typename ValueType, typename IndexType>
-std::unique_ptr<matrix::Csr<ValueType, IndexType>> symbolic_cholesky(
-    const matrix::Csr<ValueType, IndexType>* mtx)
+void symbolic_cholesky(
+    const matrix::Csr<ValueType, IndexType>* mtx,
+    std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors)
 {
     using matrix_type = matrix::Csr<ValueType, IndexType>;
     const auto exec = mtx->get_executor();
     const auto host_exec = exec->get_master();
-    const auto forest = compute_elim_forest(mtx);
+    std::unique_ptr<elimination_forest<IndexType>> forest;
+    exec->run(make_compute_elim_forest(mtx, forest));
     const auto num_rows = mtx->get_size()[0];
     array<IndexType> row_ptrs{exec, num_rows + 1};
     array<IndexType> tmp{exec};
     exec->run(
-        make_cholesky_symbolic_count(mtx, forest, row_ptrs.get_data(), tmp));
+        make_cholesky_symbolic_count(mtx, *forest, row_ptrs.get_data(), tmp));
     exec->run(make_prefix_sum(row_ptrs.get_data(), num_rows + 1));
     const auto factor_nnz = static_cast<size_type>(
         exec->copy_val_to_host(row_ptrs.get_const_data() + num_rows));
-    auto factor = matrix_type::create(
+    factors = matrix_type::create(
         exec, mtx->get_size(), array<ValueType>{exec, factor_nnz},
         array<IndexType>{exec, factor_nnz}, std::move(row_ptrs));
-    exec->run(make_cholesky_symbolic(mtx, forest, factor.get(), tmp));
-    factor->sort_by_column_index();
-    auto lt_factor = as<matrix_type>(factor->transpose());
+    exec->run(make_cholesky_symbolic(mtx, *forest, factors.get(), tmp));
+    factors->sort_by_column_index();
+    auto lt_factor = as<matrix_type>(factors->transpose());
     const auto scalar =
         initialize<matrix::Dense<ValueType>>({one<ValueType>()}, exec);
     const auto id = matrix::Identity<ValueType>::create(exec, num_rows);
-    lt_factor->apply(scalar.get(), id.get(), scalar.get(), factor.get());
-    return factor;
+    lt_factor->apply(scalar.get(), id.get(), scalar.get(), factors.get());
 }
 
 
-#define GKO_DECLARE_SYMBOLIC_CHOLESKY(ValueType, IndexType)               \
-    std::unique_ptr<matrix::Csr<ValueType, IndexType>> symbolic_cholesky( \
-        const matrix::Csr<ValueType, IndexType>* mtx)
+#define GKO_DECLARE_SYMBOLIC_CHOLESKY(ValueType, IndexType) \
+    void symbolic_cholesky(                                 \
+        const matrix::Csr<ValueType, IndexType>* mtx,       \
+        std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors)
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SYMBOLIC_CHOLESKY);
 
@@ -109,8 +112,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SYMBOLIC_CHOLESKY);
  * "GSoFa: Scalable Sparse Symbolic LU Factorization on GPUs," arXiv 2021
  */
 template <typename ValueType, typename IndexType>
-std::unique_ptr<matrix::Csr<ValueType, IndexType>> symbolic_lu(
-    const matrix::Csr<ValueType, IndexType>* mtx)
+void symbolic_lu(const matrix::Csr<ValueType, IndexType>* mtx,
+                 std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors)
 {
     using matrix_type = matrix::Csr<ValueType, IndexType>;
     const auto exec = mtx->get_executor();
@@ -179,16 +182,16 @@ std::unique_ptr<matrix::Csr<ValueType, IndexType>> symbolic_lu(
     array<ValueType> out_val_array{exec, out_nnz};
     exec->copy_from(host_exec.get(), out_nnz, out_col_idxs.data(),
                     out_col_idx_array.get_data());
-    auto result = matrix_type::create(
+    factors = matrix_type::create(
         exec, mtx->get_size(), std::move(out_val_array),
         std::move(out_col_idx_array), std::move(out_row_ptr_array));
-    return result;
 }
 
 
-#define GKO_DECLARE_SYMBOLIC_LU(ValueType, IndexType)               \
-    std::unique_ptr<matrix::Csr<ValueType, IndexType>> symbolic_lu( \
-        const matrix::Csr<ValueType, IndexType>* mtx)
+#define GKO_DECLARE_SYMBOLIC_LU(ValueType, IndexType) \
+    void symbolic_lu(                                 \
+        const matrix::Csr<ValueType, IndexType>* mtx, \
+        std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors)
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SYMBOLIC_LU);
 

diff --git a/core/factorization/symbolic.hpp b/core/factorization/symbolic.hpp
@@ -37,15 +37,26 @@ namespace gko {
 namespace factorization {
 
 
-/** Computes the symbolic Cholesky factorization of the given matrix. */
+/**
+ * Computes the symbolic Cholesky factorization of the given matrix.
+ *
+ * @param mtx  the input matrix
+ * @param factors  the output factors stored in a combined pattern
+ */
 template <typename ValueType, typename IndexType>
-std::unique_ptr<matrix::Csr<ValueType, IndexType>> symbolic_cholesky(
-    const matrix::Csr<ValueType, IndexType>*);
-
-/** Computes the symbolic LU factorization of the given matrix. */
+void symbolic_cholesky(
+    const matrix::Csr<ValueType, IndexType>* mtx,
+    std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors);
+
+/**
+ * Computes the symbolic LU factorization of the given matrix.
+ *
+ * @param mtx  the input matrix
+ * @param factors  the output factors stored in a combined pattern
+ */
 template <typename ValueType, typename IndexType>
-std::unique_ptr<matrix::Csr<ValueType, IndexType>> symbolic_lu(
-    const matrix::Csr<ValueType, IndexType>*);
+void symbolic_lu(const matrix::Csr<ValueType, IndexType>* mtx,
+                 std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors);
 
 
 }  // namespace factorization