Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BSR block SpGEMM implementation #1099

Merged
merged 16 commits into from
Apr 29, 2022
7 changes: 7 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -360,6 +360,13 @@ KOKKOSKERNELS_GENERATE_ETI(Sparse_spgemm_numeric spgemm_numeric
TYPE_LISTS FLOATS ORDINALS OFFSETS LAYOUTS DEVICES_W_SLOW_SPACE
)

KOKKOSKERNELS_GENERATE_ETI(Sparse_bspgemm_numeric bspgemm_numeric
COMPONENTS sparse
HEADER_LIST ETI_HEADERS
SOURCE_LIST SOURCES
TYPE_LISTS FLOATS ORDINALS OFFSETS LAYOUTS DEVICES_W_SLOW_SPACE
)

KOKKOSKERNELS_GENERATE_ETI(Sparse_spgemm_jacobi spgemm_jacobi
COMPONENTS sparse
HEADER_LIST ETI_HEADERS
660 changes: 660 additions & 0 deletions src/common/KokkosKernels_BlockHashmapAccumulator.hpp

Large diffs are not rendered by default.

144 changes: 144 additions & 0 deletions src/common/KokkosKernels_BlockUtils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Siva Rajamanickam (srajama@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef _KOKKOSKERNELS_BLOCKUTILS_HPP
#define _KOKKOSKERNELS_BLOCKUTILS_HPP

// #include <Kokkos_Atomic.hpp>
// #include <atomic>
#include "KokkosBatched_Gemm_Serial_Internal.hpp"

namespace KokkosSparse {
namespace Impl {

// Initializes block: A = [val, val, val, ....]
template <typename size_type, typename value_type>
KOKKOS_INLINE_FUNCTION void kk_block_init(
const size_type block_dim, value_type *dst,
const value_type val = static_cast<value_type>(
0)) { // Note: replaces __host__ std::fill() not to be called from GPU
for (auto end = dst + (block_dim * block_dim); dst < end; ++dst) {
*dst = val;
}
}

// Initializes block: A = B
template <typename size_type, typename value_type>
KOKKOS_INLINE_FUNCTION void kk_block_set(const size_type block_dim,
value_type *dst,
const value_type *val) {
memcpy(dst, val, block_dim * block_dim * sizeof(value_type));
}

// Performs A += B on blocks
template <typename size_type, typename value_type>
KOKKOS_INLINE_FUNCTION void kk_block_add(const size_type block_dim,
value_type *dst,
const value_type *val) {
const auto end = dst + block_dim * block_dim;
while (dst < end) {
*(dst++) += *(val++);
}
}

// Performs C += A * B on blocks
// Note: block is assumed to be row-major, dense matrix (no extra padding)
// Note: set clear=true to set C = 0 before increment
template <typename size_type, typename value_type,
typename DGEMM = KokkosBatched::SerialGemmInternal<
KokkosBatched::Algo::Gemm::Unblocked>>
KOKKOS_INLINE_FUNCTION void kk_block_dgemm(const size_type block_dim,
value_type *dst,
const value_type *valA,
const value_type *valB,
const bool clear = false) {
const auto ZERO = static_cast<value_type>(0);
const auto ONE = static_cast<value_type>(1);
DGEMM::invoke(block_dim, block_dim, block_dim, ONE, valA, block_dim, 1, valB,
block_dim, 1, clear ? ZERO : ONE, dst, block_dim, 1);
}

// dgemm: C = A * B
template <typename size_type, typename value_type>
KOKKOS_INLINE_FUNCTION void kk_block_set_mul(const size_type block_dim,
value_type *c_val,
const value_type *a_val,
const value_type *b_val) {
kk_block_dgemm(block_dim, c_val, a_val, b_val, true);
}

// dgemm: C += A * B
template <typename size_type, typename value_type>
KOKKOS_INLINE_FUNCTION void kk_block_add_mul(const size_type block_dim,
value_type *c_val,
const value_type *a_val,
const value_type *b_val) {
kk_block_dgemm(block_dim, c_val, a_val, b_val, false);
}

// Performs C += A * B (dense GEMM) on blocks
// Note: all pointers reference dense row-major blocks (no extra padding)
template <typename size_type, typename value_type>
KOKKOS_INLINE_FUNCTION void kk_vector_block_add_mul(const size_type block_dim,
value_type *dst,
const value_type *valA,
const value_type *valB) {
// NOTE: this should be replaced by batched DGEMM
// once atomic increment is supported there
for (size_type row = 0; row < block_dim; ++row) {
auto const row_offset = row * block_dim;
for (size_type col = 0; col < block_dim; ++col) {
auto v = &dst[row_offset + col];
auto vb = valB + col;
for (auto va = valA + row_offset, end = va + block_dim; va < end; ++va) {
Kokkos::atomic_add(v, (*va) * (*vb));
vb += block_dim;
}
}
}
}

} // namespace Impl
} // namespace KokkosSparse

#endif // _KOKKOSKERNELS_BLOCKUTILS_HPP
7 changes: 3 additions & 4 deletions src/common/KokkosKernels_HashmapAccumulator.hpp
Original file line number Diff line number Diff line change
@@ -344,20 +344,20 @@ struct HashmapAccumulator {
// Insertion is sequential, no race condition for the insertion.
// the mergeadd used in the numeric of KKMEM.
KOKKOS_INLINE_FUNCTION
int sequential_insert_into_hash_mergeAdd_TrackHashes(
void sequential_insert_into_hash_mergeAdd_TrackHashes(
key_type key, value_type value, size_type *used_size_,
size_type *used_hash_size, size_type *used_hashes) {
size_type hash, i, my_index;

if (key == -1) return __insert_success;
if (key == -1) return;

// issue-508, TODO: ensure that i < __max_value_size, but
// need information about length of keys, values, and hash_nexts first!
hash = __compute_hash(key, __hashOpRHS);
for (i = hash_begins[hash]; i != -1; i = hash_nexts[i]) {
if (keys[i] == key) {
values[i] = values[i] + value;
return __insert_success;
return;
}
}

@@ -371,7 +371,6 @@ struct HashmapAccumulator {
hash_begins[hash] = my_index;
keys[my_index] = key;
values[my_index] = value;
return __insert_success;
}

// no values. simply adds to the keys.
26 changes: 24 additions & 2 deletions src/common/KokkosKernels_IOUtils.hpp
Original file line number Diff line number Diff line change
@@ -59,6 +59,7 @@
#include <Kokkos_Core.hpp>
#include "Kokkos_Random.hpp"
#include "KokkosKernels_SimpleUtils.hpp"
#include "KokkosSparse_CrsMatrix.hpp"
#include <sys/stat.h>

namespace KokkosKernels {
@@ -94,7 +95,8 @@ template <typename ScalarType, typename OrdinalType, typename SizeType>
void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols,
SizeType &nnz, OrdinalType row_size_variance,
OrdinalType bandwidth, ScalarType *&values,
SizeType *&rowPtr, OrdinalType *&colInd) {
SizeType *&rowPtr, OrdinalType *&colInd,
OrdinalType block_elem_count = 1) {
rowPtr = new SizeType[nrows + 1];

OrdinalType elements_per_row = nrows ? nnz / nrows : 0;
@@ -138,7 +140,8 @@ void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols,
}
// Sample each value from uniform (-50, 50) for real types, or (-50 - 50i, 50
// + 50i) for complex types.
Kokkos::View<ScalarType *, Kokkos::HostSpace> valuesView(values, nnz);
Kokkos::View<ScalarType *, Kokkos::HostSpace> valuesView(
values, nnz * block_elem_count);
ScalarType randStart, randEnd;
getRandomBounds(50.0, randStart, randEnd);
Kokkos::Random_XorShift64_Pool<Kokkos::DefaultHostExecutionSpace> pool(13718);
@@ -443,6 +446,25 @@ crsMat_t kk_generate_sparse_matrix(
return crsmat;
}

template <typename bsrMat_t>
bsrMat_t kk_generate_sparse_matrix(
typename bsrMat_t::const_ordinal_type block_dim,
typename bsrMat_t::const_ordinal_type nrows,
typename bsrMat_t::const_ordinal_type ncols,
typename bsrMat_t::non_const_size_type &nnz,
typename bsrMat_t::const_ordinal_type row_size_variance,
typename bsrMat_t::const_ordinal_type bandwidth) {
typedef KokkosSparse::CrsMatrix<
typename bsrMat_t::value_type, typename bsrMat_t::ordinal_type,
typename bsrMat_t::device_type, typename bsrMat_t::memory_traits,
typename bsrMat_t::size_type>
crsMat_t;

const auto crs_mtx = kk_generate_sparse_matrix<crsMat_t>(
nrows * block_dim, ncols * block_dim, nnz, row_size_variance, bandwidth);
bsrMat_t bsrmat(crs_mtx, block_dim);
return bsrmat;
}
// TODO: need to fix the size_type. All over the reading inputs are lno_t.

template <typename stype>
71 changes: 71 additions & 0 deletions src/common/KokkosKernels_Sorting.hpp
Original file line number Diff line number Diff line change
@@ -61,6 +61,13 @@ struct DefaultComparator {
};
} // namespace Impl

// ----------------------------------
// BSR matrix/graph sorting utilities
// ----------------------------------

template <typename bsrMat_t>
void sort_bsr_matrix(const bsrMat_t& A);

// ----------------------------------
// CRS matrix/graph sorting utilities
// ----------------------------------
@@ -565,6 +572,70 @@ void sort_crs_matrix(const crsMat_t& A) {
A.graph.row_map, A.graph.entries, A.values);
}

namespace Impl {

template <typename T>
KOKKOS_INLINE_FUNCTION void kk_swap(T& a, T& b) {
T t = a;
a = b;
b = t;
}

} // namespace Impl

// Sort a BRS matrix: within each row, sort entries ascending by column and
// permute the values accordingly.
template <typename execution_space, typename rowmap_t, typename entries_t,
typename values_t,
typename lno_t = typename entries_t::non_const_value_type>
void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap,
const entries_t& entries, const values_t& values) {
// TODO: this is O(N^2) mock for debugging - do regular implementation based
// on Radix/Bitonic sort (like CSR) IDEA: maybe we need only one general
// Radix2/Bitonic2 and CSR sorting may call it with blockSize=1 ?
lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0;
if (numRows == 0) return;
const lno_t blocksize = blockdim * blockdim;

assert(values.extent(0) == entries.extent(0) * blocksize);
Kokkos::parallel_for(
"sort_bsr_matrix", Kokkos::RangePolicy<execution_space>(0, numRows),
KOKKOS_LAMBDA(lno_t i) {
const lno_t rowStart = rowmap(i);
const lno_t rowSize = rowmap(i + 1) - rowStart;
auto* e = entries.data() + rowStart;
auto* v = values.data() + rowStart * blocksize;
bool done = false;
while (!done) {
done = true;
for (lno_t j = 1; j < rowSize; ++j) {
const lno_t jp = j - 1;
if (e[jp] <= e[j]) continue;
Impl::kk_swap(e[jp], e[j]);
auto const vb = v + j * blocksize;
auto const vbp = v + jp * blocksize;
for (lno_t k = 0; k < blocksize;
++k) // std::swap_ranges(vb, vb + blocksize, vbp);
Impl::kk_swap(vb[k], vbp[k]);
done = false;
}
}
});
}

// Sort a BSR matrix (like CRS but single values are replaced with contignous
// blocks)
template <typename bsrMat_t>
void sort_bsr_matrix(const bsrMat_t& A) {
// NOTE: unlike rowmap, entries and values are non-const, so we can sort them
// directly
sort_bsr_matrix<typename bsrMat_t::execution_space,
typename bsrMat_t::row_map_type,
typename bsrMat_t::index_type::non_const_type,
typename bsrMat_t::values_type::non_const_type>(
A.blockDim(), A.graph.row_map, A.graph.entries, A.values);
}

// Sort a CRS graph: within each row, sort entries ascending by column.
template <typename execution_space, typename rowmap_t, typename entries_t>
void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) {
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
//@HEADER
// ************************************************************************
//
// KokkosKernels 0.9: Linear Algebra and Graph Kernels
// Copyright 2017 Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Siva Rajamanickam (srajama@sandia.gov)
//
// ************************************************************************
//@HEADER
*/


#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true
#include "KokkosKernels_config.h"

#include "KokkosSparse_bspgemm_numeric_spec.hpp"
namespace KokkosSparse {
namespace Impl {
@SPARSE_BSPGEMM_NUMERIC_ETI_INST_BLOCK@
} //IMPL
} //Kokkos
Loading