Skip to content

Commit

Permalink
[pgm] use index map to create coarse matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcelKoch committed Jul 11, 2024
1 parent eb0b324 commit c297afa
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 152 deletions.
12 changes: 0 additions & 12 deletions core/distributed/matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::Matrix(
recv_offsets_(comm.size() + 1),
recv_sizes_(comm.size()),
gather_idxs_{exec},
non_local_to_global_{exec},
one_scalar_{},
local_mtx_{local_matrix_template->clone(exec)},
non_local_mtx_{non_local_matrix_template->clone(exec)}
Expand Down Expand Up @@ -134,7 +133,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::Matrix(
recv_offsets_(comm.size() + 1),
recv_sizes_(comm.size()),
gather_idxs_{exec},
non_local_to_global_{exec},
one_scalar_{},
non_local_mtx_(::gko::matrix::Coo<ValueType, LocalIndexType>::create(
exec, dim<2>{local_linop->get_size()[0], 0}))
Expand All @@ -159,7 +157,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::Matrix(
recv_offsets_(comm.size() + 1),
recv_sizes_(comm.size()),
gather_idxs_{exec},
non_local_to_global_{exec},
one_scalar_{}
{
this->set_size({imap_.get_global_size(), imap_.get_global_size()});
Expand Down Expand Up @@ -243,7 +240,6 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::convert_to(
result->recv_offsets_ = this->recv_offsets_;
result->recv_sizes_ = this->recv_sizes_;
result->send_sizes_ = this->send_sizes_;
result->non_local_to_global_ = this->non_local_to_global_;
result->set_size(this->get_size());
}

Expand All @@ -263,7 +259,6 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::move_to(
result->recv_offsets_ = std::move(this->recv_offsets_);
result->recv_sizes_ = std::move(this->recv_sizes_);
result->send_sizes_ = std::move(this->send_sizes_);
result->non_local_to_global_ = std::move(this->non_local_to_global_);
result->set_size(this->get_size());
this->set_size({});
}
Expand Down Expand Up @@ -315,11 +310,6 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::read_distributed(

auto non_local_col_idxs =
imap_.map_to_local(global_non_local_col_idxs, index_space::non_local);
non_local_to_global_ =
make_const_array_view(
imap_.get_executor(), imap_.get_remote_global_idxs().get_size(),
imap_.get_remote_global_idxs().get_const_flat_data())
.copy_to_array();

// read the local matrix data
const auto num_local_rows =
Expand Down Expand Up @@ -537,7 +527,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(
recv_offsets_ = other.recv_offsets_;
send_sizes_ = other.send_sizes_;
recv_sizes_ = other.recv_sizes_;
non_local_to_global_ = other.non_local_to_global_;
one_scalar_.init(this->get_executor(), dim<2>{1, 1});
one_scalar_->fill(one<value_type>());
}
Expand All @@ -562,7 +551,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(Matrix&& other)
recv_offsets_ = std::move(other.recv_offsets_);
send_sizes_ = std::move(other.send_sizes_);
recv_sizes_ = std::move(other.recv_sizes_);
non_local_to_global_ = std::move(other.non_local_to_global_);
one_scalar_.init(this->get_executor(), dim<2>{1, 1});
one_scalar_->fill(one<value_type>());
}
Expand Down
202 changes: 68 additions & 134 deletions core/multigrid/pgm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <ginkgo/core/base/utils.hpp>
#include <ginkgo/core/distributed/base.hpp>
#include <ginkgo/core/distributed/matrix.hpp>
#include <ginkgo/core/distributed/partition.hpp>
#include <ginkgo/core/distributed/partition_helpers.hpp>
#include <ginkgo/core/distributed/vector.hpp>
#include <ginkgo/core/matrix/coo.hpp>
#include <ginkgo/core/matrix/csr.hpp>
Expand Down Expand Up @@ -247,13 +249,18 @@ Pgm<ValueType, IndexType>::generate_local(


#if GINKGO_BUILD_MPI


template <typename ValueType, typename IndexType>
template <typename GlobalIndexType>
void Pgm<ValueType, IndexType>::communicate(
array<GlobalIndexType> Pgm<ValueType, IndexType>::communicate_non_local_agg(
std::shared_ptr<const experimental::distributed::Matrix<
ValueType, IndexType, GlobalIndexType>>
matrix,
const array<IndexType>& local_agg, array<IndexType>& non_local_agg)
std::shared_ptr<
experimental::distributed::Partition<IndexType, GlobalIndexType>>
coarse_partition,
const array<IndexType>& local_agg)
{
auto exec = gko::as<LinOp>(matrix)->get_executor();
const auto comm = matrix->get_communicator();
Expand All @@ -270,20 +277,29 @@ void Pgm<ValueType, IndexType>::communicate(
send_agg.get_size(), local_agg.get_const_data(),
gather_idxs.get_const_data(), send_agg.get_data()));

// temporary index map that contains no remote connections to map
// local indices to global
experimental::distributed::index_map<IndexType, GlobalIndexType> imap(
exec, coarse_partition, comm.rank(), array<GlobalIndexType>{exec});
auto seng_global_agg = imap.map_to_global(
send_agg, experimental::distributed::index_space::local);

array<GlobalIndexType> non_local_agg(exec, total_recv_size);

auto use_host_buffer = experimental::mpi::requires_host_buffer(exec, comm);
array<IndexType> host_recv_buffer(exec->get_master());
array<IndexType> host_send_buffer(exec->get_master());
array<GlobalIndexType> host_recv_buffer(exec->get_master());
array<GlobalIndexType> host_send_buffer(exec->get_master());
if (use_host_buffer) {
host_recv_buffer.resize_and_reset(total_recv_size);
host_send_buffer.resize_and_reset(total_send_size);
exec->get_master()->copy_from(exec, total_send_size,
send_agg.get_data(),
seng_global_agg.get_data(),
host_send_buffer.get_data());
}
auto type = experimental::mpi::type_impl<IndexType>::get_type();
auto type = experimental::mpi::type_impl<GlobalIndexType>::get_type();

const auto send_ptr = use_host_buffer ? host_send_buffer.get_const_data()
: send_agg.get_const_data();
: seng_global_agg.get_const_data();
auto recv_ptr = use_host_buffer ? host_recv_buffer.get_data()
: non_local_agg.get_data();
exec->synchronize();
Expand All @@ -294,92 +310,11 @@ void Pgm<ValueType, IndexType>::communicate(
exec->copy_from(exec->get_master(), total_recv_size, recv_ptr,
non_local_agg.get_data());
}
}
#endif


#define GKO_ASSERT_HOST_ARRAY(array) \
GKO_ASSERT(array.get_executor() == array.get_executor()->get_master())


template <typename IndexType>
void generate_non_local_map(
const std::vector<experimental::distributed::comm_index_type>& recv_offsets,
array<IndexType>& non_local_agg, array<IndexType>& non_local_col_map,
array<IndexType>& renumber)
{
GKO_ASSERT_HOST_ARRAY(non_local_agg);
GKO_ASSERT_HOST_ARRAY(non_local_col_map);
GKO_ASSERT_HOST_ARRAY(renumber);
auto exec = renumber.get_executor();
auto non_local_size = non_local_agg.get_size();
array<IndexType> part_id(exec, non_local_size);
array<IndexType> index(exec, non_local_size);

for (int i = 0; i + 1 < recv_offsets.size(); i++) {
for (auto j = recv_offsets.at(i); j < recv_offsets.at(i + 1); j++) {
part_id.get_data()[j] = i;
index.get_data()[j] = j;
}
}
// do it in host currently.
auto it = detail::make_zip_iterator(
part_id.get_data(), non_local_agg.get_data(), index.get_data());
// prepare tuple <part_id, local_agg, index>
// sort by <part_id, local_agg> or did segment sort
std::sort(it, it + non_local_size);

renumber.get_data()[0] = 0;
// renumber (prefix_sum) with not equal <part_id, local_agg>
for (int i = 1; i < non_local_size; i++) {
if (part_id.get_const_data()[i] != part_id.get_const_data()[i - 1] ||
non_local_agg.get_const_data()[i] !=
non_local_agg.get_const_data()[i - 1]) {
renumber.get_data()[i] = renumber.get_data()[i - 1] + 1;
} else {
renumber.get_data()[i] = renumber.get_data()[i - 1];
}
}
renumber.get_data()[non_local_size] =
renumber.get_data()[non_local_size - 1] + 1;
// create col map
// for each thread i, col_map[tuple[i].index] = map[i]
for (int i = 0; i < non_local_size; i++) {
non_local_col_map.get_data()[index.get_data()[i]] =
renumber.get_data()[i];
}
}


template <typename IndexType>
void compute_communication(
const std::vector<experimental::distributed::comm_index_type> recv_offsets,
const array<IndexType>& non_local_agg, const array<IndexType>& renumber,
std::vector<experimental::distributed::comm_index_type>& new_recv_size,
std::vector<experimental::distributed::comm_index_type>& new_recv_offsets,
array<IndexType>& new_recv_gather_idxs)
{
GKO_ASSERT_HOST_ARRAY(non_local_agg);
GKO_ASSERT_HOST_ARRAY(renumber);
GKO_ASSERT_HOST_ARRAY(new_recv_gather_idxs);
new_recv_offsets.at(0) = 0;
for (int i = 0; i < new_recv_size.size(); i++) {
new_recv_size.at(i) =
renumber.get_const_data()[recv_offsets.at(i + 1)] -
renumber.get_const_data()[recv_offsets.at(i)];
new_recv_offsets.at(i + 1) =
new_recv_offsets.at(i) + new_recv_size.at(i);
}
IndexType non_local_num_agg = new_recv_offsets.back();
new_recv_gather_idxs.resize_and_reset(non_local_num_agg);
for (int i = 0; i < non_local_agg.get_size(); i++) {
new_recv_gather_idxs.get_data()[renumber.get_const_data()[i]] =
non_local_agg.get_const_data()[i];
}
return non_local_agg;
}


#undef GKO_ASSERT_HOST_ARRAY
#endif


template <typename ValueType, typename IndexType>
Expand Down Expand Up @@ -440,80 +375,79 @@ void Pgm<ValueType, IndexType>::generate()
}

auto distributed_setup = [&](auto matrix) {
using global_index_type =
typename std::decay_t<decltype(*matrix)>::global_index_type;

auto exec = gko::as<LinOp>(matrix)->get_executor();
auto comm =
gko::as<experimental::distributed::DistributedBase>(matrix)
->get_communicator();
auto num_rank = comm.size();
auto pgm_local_op =
gko::as<const csr_type>(matrix->get_local_matrix());
auto result = this->generate_local(pgm_local_op);

auto non_local_csr =
as<const csr_type>(matrix->get_non_local_matrix());
auto non_local_size = non_local_csr->get_size()[1];
array<IndexType> non_local_agg(exec, non_local_size);
// get agg information (prolong_row_gather row idx)
communicate(matrix, agg_, non_local_agg);
// generate non_local_col_map
non_local_agg.set_executor(exec->get_master());
array<IndexType> non_local_col_map(exec->get_master(),
non_local_size);
// add additional entry in tail such that the offset easily handle
// it.
array<IndexType> renumber(exec->get_master(), non_local_size + 1);
auto recv_offsets = matrix->recv_offsets_;
generate_non_local_map(recv_offsets, non_local_agg,
non_local_col_map, renumber);

// get new recv_size and recv_offsets
std::vector<experimental::distributed::comm_index_type>
new_recv_size(num_rank);
std::vector<experimental::distributed::comm_index_type>
new_recv_offsets(num_rank + 1);
array<IndexType> new_recv_gather_idxs(exec->get_master());
compute_communication(recv_offsets, non_local_agg, renumber,
new_recv_size, new_recv_offsets,
new_recv_gather_idxs);

non_local_col_map.set_executor(exec);
IndexType non_local_num_agg = new_recv_gather_idxs.get_size();
// create the coarse partition
// the coarse partition will have only one range per part
// and only one part per rank.
// The global indices are ordered block-wise by rank, i.e. rank 1
// owns [0, ..., N_1), rank 2 [N_1, ..., N_2), ...
auto coarse_local_size =
static_cast<int64>(std::get<1>(result)->get_size()[0]);
auto coarse_partition = gko::share(
experimental::distributed::build_partition_from_local_size<
IndexType, global_index_type>(exec, comm,
coarse_local_size));

// get the non-local aggregates as coarse global indices
auto non_local_agg =
communicate_non_local_agg(matrix, coarse_partition, agg_);

// create a coarse index map based on the connection given by the
// non-local aggregates
auto coarse_imap =
experimental::distributed::index_map<IndexType,
global_index_type>(
exec, coarse_partition, comm.rank(), non_local_agg);

// a mapping from the fine non-local indices to the coarse non-local
// indices.
// non_local_agg already maps the fine non-local indices to coarse
// global indices, so mapping it with the coarse index map results
// in the coarse non-local indices.
auto non_local_map = coarse_imap.map_to_local(
non_local_agg,
experimental::distributed::index_space::non_local);

// build csr from row and col map
// unlike non-distributed version, generate_coarse uses different
// row and col maps.
auto non_local_csr =
as<const csr_type>(matrix->get_non_local_matrix());
auto result_non_local_csr = generate_coarse(
exec, non_local_csr.get(),
static_cast<IndexType>(std::get<1>(result)->get_size()[0]),
agg_, non_local_num_agg, non_local_col_map);
// use local and non-local to build coarse matrix
// also restriction and prolongation (Local-only-global matrix)
auto coarse_size =
static_cast<int64>(std::get<1>(result)->get_size()[0]);
comm.all_reduce(exec->get_master(), &coarse_size, 1, MPI_SUM);
new_recv_gather_idxs.set_executor(exec);
agg_, static_cast<IndexType>(coarse_imap.get_non_local_size()),
non_local_map);

// setup the generated linop.
using global_index_type =
typename std::decay_t<decltype(*matrix)>::global_index_type;
auto coarse = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm, gko::dim<2>(coarse_size, coarse_size),
std::get<1>(result), result_non_local_csr,
new_recv_size, new_recv_offsets, new_recv_gather_idxs));
exec, comm, std::move(coarse_imap), std::get<1>(result),
result_non_local_csr));
auto restrict_op = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm,
dim<2>(coarse_size,
dim<2>(coarse->get_size()[0],
gko::as<LinOp>(matrix)->get_size()[0]),
std::get<2>(result)));
auto prolong_op = share(
experimental::distributed::
Matrix<ValueType, IndexType, global_index_type>::create(
exec, comm,
dim<2>(gko::as<LinOp>(matrix)->get_size()[0],
coarse_size),
coarse->get_size()[0]),
std::get<0>(result)));
this->set_multigrid_level(prolong_op, coarse, restrict_op);
};
Expand Down
1 change: 0 additions & 1 deletion include/ginkgo/core/distributed/matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,6 @@ class Matrix
std::vector<comm_index_type> recv_offsets_;
std::vector<comm_index_type> recv_sizes_;
array<local_index_type> gather_idxs_;
array<global_index_type> non_local_to_global_;
gko::detail::DenseCache<value_type> one_scalar_;
gko::detail::DenseCache<value_type> host_send_buffer_;
gko::detail::DenseCache<value_type> host_recv_buffer_;
Expand Down
26 changes: 21 additions & 5 deletions include/ginkgo/core/multigrid/pgm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,28 @@ class Pgm : public EnableLinOp<Pgm<ValueType, IndexType>>,
std::shared_ptr<const matrix::Csr<ValueType, IndexType>> local_matrix);

#if GINKGO_BUILD_MPI
/**
* Communicates the non-local aggregates (as global indices)
*
* @tparam GlobalIndexType Global index type
*
* @param matrix a distributed matrix
* @param coarse_partition the coarse partition to compute the new global
* indices
* @param local_agg the local aggregate indices
*
* @return the aggregates for non-local columns. The aggregated indices are
* in the new global indexing
*/
template <typename GlobalIndexType>
void communicate(std::shared_ptr<const experimental::distributed::Matrix<
ValueType, IndexType, GlobalIndexType>>
matrix,
const array<IndexType>& local_agg,
array<IndexType>& non_local_agg);
array<GlobalIndexType> communicate_non_local_agg(
std::shared_ptr<const experimental::distributed::Matrix<
ValueType, IndexType, GlobalIndexType>>
matrix,
std::shared_ptr<
experimental::distributed::Partition<IndexType, GlobalIndexType>>
coarse_partition,
const array<IndexType>& local_agg);
#endif

private:
Expand Down

0 comments on commit c297afa

Please sign in to comment.