Skip to content

Commit

Permalink
use matrix builders in spgemm
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed Jan 7, 2020
1 parent a5b011b commit 6cab36f
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 65 deletions.
25 changes: 5 additions & 20 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,17 +88,9 @@ void Csr<ValueType, IndexType>::apply_impl(const LinOp *b, LinOp *x) const
using TCsr = Csr<ValueType, IndexType>;
if (auto b_csr = dynamic_cast<const TCsr *>(b)) {
// if b is a CSR matrix, we compute a SpGeMM
auto exec = this->get_executor();
Array<IndexType> x_rows(exec);
Array<IndexType> x_cols(exec);
Array<ValueType> x_vals(exec);
auto x_csr = as<TCsr>(x);
this->get_executor()->run(
csr::make_spgemm(this, b_csr, x_rows, x_cols, x_vals));
auto new_x = TCsr::create(x_csr->get_executor(), x->get_size(),
std::move(x_vals), std::move(x_cols),
std::move(x_rows), x_csr->get_strategy());
new_x->move_to(x_csr);
this->get_executor()->run(csr::make_spgemm(this, b_csr, x_csr));
x_csr->make_srow();
} else {
// otherwise we assume that b is dense and compute a SpMV/SpMM
this->get_executor()->run(
Expand All @@ -115,18 +107,11 @@ void Csr<ValueType, IndexType>::apply_impl(const LinOp *alpha, const LinOp *b,
using TCsr = Csr<ValueType, IndexType>;
if (auto b_csr = dynamic_cast<const TCsr *>(b)) {
// if b is a CSR matrix, we compute a SpGeMM
auto exec = this->get_executor();
Array<IndexType> x_rows(exec);
Array<IndexType> x_cols(exec);
Array<ValueType> x_vals(exec);
auto x_csr = as<TCsr>(x);
auto x_copy = x_csr->clone();
this->get_executor()->run(csr::make_advanced_spgemm(
as<Dense>(alpha), this, b_csr, as<Dense>(beta), x_csr, x_rows,
x_cols, x_vals));
auto new_x = TCsr::create(x_csr->get_executor(), x->get_size(),
std::move(x_vals), std::move(x_cols),
std::move(x_rows), x_csr->get_strategy());
new_x->move_to(x_csr);
as<Dense>(alpha), this, b_csr, as<Dense>(beta), x_copy, x_csr));
x_csr->make_srow();
} else {
// otherwise we assume that b is dense and compute a SpMV/SpMM
this->get_executor()->run(
Expand Down
15 changes: 6 additions & 9 deletions core/matrix/csr_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,11 @@ namespace kernels {
const matrix::Dense<ValueType> *beta, \
matrix::Dense<ValueType> *c)

#define GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType) \
void spgemm(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType> *a, \
const matrix::Csr<ValueType, IndexType> *b, \
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs, \
Array<ValueType> &c_vals)
#define GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType) \
void spgemm(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType> *a, \
const matrix::Csr<ValueType, IndexType> *b, \
matrix::Csr<ValueType, IndexType> *c)

#define GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType) \
void advanced_spgemm(std::shared_ptr<const DefaultExecutor> exec, \
Expand All @@ -76,9 +75,7 @@ namespace kernels {
const matrix::Csr<ValueType, IndexType> *b, \
const matrix::Dense<ValueType> *beta, \
const matrix::Csr<ValueType, IndexType> *d, \
Array<IndexType> &c_row_ptrs, \
Array<IndexType> &c_col_idxs, \
Array<ValueType> &c_vals)
matrix::Csr<ValueType, IndexType> *c)

#define GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \
void convert_to_dense(std::shared_ptr<const DefaultExecutor> exec, \
Expand Down
20 changes: 11 additions & 9 deletions cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/matrix/sellp.hpp>


#include "core/matrix/csr_builder.hpp"
#include "core/matrix/dense_kernels.hpp"
#include "core/synthesizer/implementation_selection.hpp"
#include "cuda/base/config.hpp"
Expand Down Expand Up @@ -423,8 +424,7 @@ template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array, Array<ValueType> &c_vals_array)
matrix::Csr<ValueType, IndexType> *c)
{
if (cusparse::is_supported<ValueType, IndexType>::value) {
auto handle = exec->get_cusparse_handle();
Expand All @@ -450,6 +450,10 @@ void spgemm(std::shared_ptr<const CudaExecutor> exec,
auto m = IndexType(a->get_size()[0]);
auto n = IndexType(b->get_size()[1]);
auto k = IndexType(a->get_size()[1]);
auto c_row_ptrs = c->get_row_ptrs();
auto c_builder = CsrBuilder<ValueType, IndexType>{c};
auto &c_col_idxs_array = c_builder.get_col_idx_array();
auto &c_vals_array = c_builder.get_value_array();

// allocate buffer
size_type buffer_size{};
Expand All @@ -461,8 +465,6 @@ void spgemm(std::shared_ptr<const CudaExecutor> exec,
auto buffer = buffer_array.get_data();

// count nnz
c_row_ptrs_array.resize_and_reset(m + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();
IndexType c_nnz{};
cusparse::spgemm_nnz(handle, m, n, k, a_descr, a_nnz, a_row_ptrs,
a_col_idxs, b_descr, b_nnz, b_row_ptrs, b_col_idxs,
Expand Down Expand Up @@ -500,9 +502,7 @@ void advanced_spgemm(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
const matrix::Csr<ValueType, IndexType> *d,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array,
Array<ValueType> &c_vals_array)
matrix::Csr<ValueType, IndexType> *c)
{
if (cusparse::is_supported<ValueType, IndexType>::value) {
auto handle = exec->get_cusparse_handle();
Expand Down Expand Up @@ -534,6 +534,10 @@ void advanced_spgemm(std::shared_ptr<const CudaExecutor> exec,
auto m = IndexType(a->get_size()[0]);
auto n = IndexType(b->get_size()[1]);
auto k = IndexType(a->get_size()[1]);
auto c_row_ptrs = c->get_row_ptrs();
auto c_builder = CsrBuilder<ValueType, IndexType>{c};
auto &c_col_idxs_array = c_builder.get_col_idx_array();
auto &c_vals_array = c_builder.get_value_array();

// allocate buffer
size_type buffer_size{};
Expand All @@ -545,8 +549,6 @@ void advanced_spgemm(std::shared_ptr<const CudaExecutor> exec,
auto buffer = buffer_array.get_data();

// count nnz
c_row_ptrs_array.resize_and_reset(m + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();
IndexType c_nnz{};
cusparse::spgemm_nnz(handle, m, n, k, a_descr, a_nnz, a_row_ptrs,
a_col_idxs, b_descr, b_nnz, b_row_ptrs, b_col_idxs,
Expand Down
20 changes: 11 additions & 9 deletions hip/matrix/csr_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/matrix/sellp.hpp>


#include "core/matrix/csr_builder.hpp"
#include "core/matrix/dense_kernels.hpp"
#include "core/synthesizer/implementation_selection.hpp"
#include "hip/base/config.hip.hpp"
Expand Down Expand Up @@ -452,8 +453,7 @@ template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const HipExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array, Array<ValueType> &c_vals_array)
matrix::Csr<ValueType, IndexType> *c)
{
if (hipsparse::is_supported<ValueType, IndexType>::value) {
auto handle = exec->get_hipsparse_handle();
Expand All @@ -479,6 +479,10 @@ void spgemm(std::shared_ptr<const HipExecutor> exec,
auto m = IndexType(a->get_size()[0]);
auto n = IndexType(b->get_size()[1]);
auto k = IndexType(a->get_size()[1]);
auto c_row_ptrs = c->get_row_ptrs();
auto c_builder = CsrBuilder<ValueType, IndexType>{c};
auto &c_col_idxs_array = c_builder.get_col_idx_array();
auto &c_vals_array = c_builder.get_value_array();

// allocate buffer
size_type buffer_size{};
Expand All @@ -490,8 +494,6 @@ void spgemm(std::shared_ptr<const HipExecutor> exec,
auto buffer = buffer_array.get_data();

// count nnz
c_row_ptrs_array.resize_and_reset(m + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();
IndexType c_nnz{};
hipsparse::spgemm_nnz(
handle, m, n, k, a_descr, a_nnz, a_row_ptrs, a_col_idxs, b_descr,
Expand Down Expand Up @@ -529,9 +531,7 @@ void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
const matrix::Csr<ValueType, IndexType> *d,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array,
Array<ValueType> &c_vals_array)
matrix::Csr<ValueType, IndexType> *c)
{
if (hipsparse::is_supported<ValueType, IndexType>::value) {
auto handle = exec->get_hipsparse_handle();
Expand Down Expand Up @@ -561,6 +561,10 @@ void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
auto m = IndexType(a->get_size()[0]);
auto n = IndexType(b->get_size()[1]);
auto k = IndexType(a->get_size()[1]);
auto c_row_ptrs = c->get_row_ptrs();
auto c_builder = CsrBuilder<ValueType, IndexType>{c};
auto &c_col_idxs_array = c_builder.get_col_idx_array();
auto &c_vals_array = c_builder.get_value_array();

// allocate buffer
size_type buffer_size{};
Expand Down Expand Up @@ -600,8 +604,6 @@ void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
hipsparse::destroy(a_descr);

// count nnz for alpha * A * B + beta * D
c_row_ptrs_array.resize_and_reset(m + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();
auto num_blocks = ceildiv(m, default_block_size);
hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::spgeam_nnz),
dim3(num_blocks), dim3(default_block_size), 0, 0,
Expand Down
20 changes: 11 additions & 9 deletions omp/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "core/base/iterator_factory.hpp"
#include "core/matrix/csr_builder.hpp"
#include "omp/components/format_conversion.hpp"


Expand Down Expand Up @@ -208,14 +209,12 @@ template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array, Array<ValueType> &c_vals_array)
matrix::Csr<ValueType, IndexType> *c)
{
auto num_rows = a->get_size()[0];

// first sweep: count nnz for each row
c_row_ptrs_array.resize_and_reset(num_rows + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();
auto c_row_ptrs = c->get_row_ptrs();

std::unordered_set<IndexType> local_col_idxs;
#pragma omp parallel for firstprivate(local_col_idxs)
Expand All @@ -231,6 +230,9 @@ void spgemm(std::shared_ptr<const OmpExecutor> exec,

// second sweep: accumulate non-zeros
auto new_nnz = c_row_ptrs[num_rows];
auto c_builder = CsrBuilder<ValueType, IndexType>{c};
auto &c_col_idxs_array = c_builder.get_col_idx_array();
auto &c_vals_array = c_builder.get_value_array();
c_col_idxs_array.resize_and_reset(new_nnz);
c_vals_array.resize_and_reset(new_nnz);
auto c_col_idxs = c_col_idxs_array.get_data();
Expand Down Expand Up @@ -261,17 +263,14 @@ void advanced_spgemm(std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
const matrix::Csr<ValueType, IndexType> *d,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array,
Array<ValueType> &c_vals_array)
matrix::Csr<ValueType, IndexType> *c)
{
auto num_rows = a->get_size()[0];
auto valpha = alpha->at(0, 0);
auto vbeta = beta->at(0, 0);

// first sweep: count nnz for each row
c_row_ptrs_array.resize_and_reset(num_rows + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();
auto c_row_ptrs = c->get_row_ptrs();

std::unordered_set<IndexType> local_col_idxs;
#pragma omp parallel for firstprivate(local_col_idxs)
Expand All @@ -292,6 +291,9 @@ void advanced_spgemm(std::shared_ptr<const OmpExecutor> exec,

// second sweep: accumulate non-zeros
auto new_nnz = c_row_ptrs[num_rows];
auto c_builder = CsrBuilder<ValueType, IndexType>{c};
auto &c_col_idxs_array = c_builder.get_col_idx_array();
auto &c_vals_array = c_builder.get_value_array();
c_col_idxs_array.resize_and_reset(new_nnz);
c_vals_array.resize_and_reset(new_nnz);
auto c_col_idxs = c_col_idxs_array.get_data();
Expand Down
20 changes: 11 additions & 9 deletions reference/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "core/base/iterator_factory.hpp"
#include "core/matrix/csr_builder.hpp"
#include "reference/components/format_conversion.hpp"


Expand Down Expand Up @@ -207,14 +208,12 @@ template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const ReferenceExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array, Array<ValueType> &c_vals_array)
matrix::Csr<ValueType, IndexType> *c)
{
auto num_rows = a->get_size()[0];

// first sweep: count nnz for each row
c_row_ptrs_array.resize_and_reset(num_rows + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();
auto c_row_ptrs = c->get_row_ptrs();

std::unordered_set<IndexType> local_col_idxs;
for (size_type a_row = 0; a_row < num_rows; ++a_row) {
Expand All @@ -229,6 +228,9 @@ void spgemm(std::shared_ptr<const ReferenceExecutor> exec,

// second sweep: accumulate non-zeros
auto new_nnz = c_row_ptrs[num_rows];
auto c_builder = CsrBuilder<ValueType, IndexType>{c};
auto &c_col_idxs_array = c_builder.get_col_idx_array();
auto &c_vals_array = c_builder.get_value_array();
c_col_idxs_array.resize_and_reset(new_nnz);
c_vals_array.resize_and_reset(new_nnz);
auto c_col_idxs = c_col_idxs_array.get_data();
Expand Down Expand Up @@ -258,17 +260,14 @@ void advanced_spgemm(std::shared_ptr<const ReferenceExecutor> exec,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
const matrix::Csr<ValueType, IndexType> *d,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array,
Array<ValueType> &c_vals_array)
matrix::Csr<ValueType, IndexType> *c)
{
auto num_rows = a->get_size()[0];
auto valpha = alpha->at(0, 0);
auto vbeta = beta->at(0, 0);

// first sweep: count nnz for each row
c_row_ptrs_array.resize_and_reset(num_rows + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();
auto c_row_ptrs = c->get_row_ptrs();

std::unordered_set<IndexType> local_col_idxs;
for (size_type a_row = 0; a_row < num_rows; ++a_row) {
Expand All @@ -288,6 +287,9 @@ void advanced_spgemm(std::shared_ptr<const ReferenceExecutor> exec,

// second sweep: accumulate non-zeros
auto new_nnz = c_row_ptrs[num_rows];
auto c_builder = CsrBuilder<ValueType, IndexType>{c};
auto &c_col_idxs_array = c_builder.get_col_idx_array();
auto &c_vals_array = c_builder.get_value_array();
c_col_idxs_array.resize_and_reset(new_nnz);
c_vals_array.resize_and_reset(new_nnz);
auto c_col_idxs = c_col_idxs_array.get_data();
Expand Down

0 comments on commit 6cab36f

Please sign in to comment.