Skip to content

Commit

Permalink
Merge (#1669): Unify batch functionality: Matrix formats
Browse files Browse the repository at this point in the history
Unify and simplify batch functionality: Matrix formats (csr, dense, ell)

Related PR: #1669
  • Loading branch information
pratikvn authored Aug 23, 2024
2 parents 83a577c + 404de48 commit 70669c6
Show file tree
Hide file tree
Showing 57 changed files with 689 additions and 1,040 deletions.
3 changes: 3 additions & 0 deletions common/cuda_hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ set(CUDA_HIP_SOURCES
factorization/par_ilut_select_kernels.cpp
factorization/par_ilut_spgeam_kernels.cpp
factorization/par_ilut_sweep_kernels.cpp
matrix/batch_csr_kernels.cpp
matrix/batch_dense_kernels.cpp
matrix/batch_ell_kernels.cpp
matrix/coo_kernels.cpp
matrix/dense_kernels.cpp
matrix/diagonal_kernels.cpp
Expand Down
9 changes: 1 addition & 8 deletions common/cuda_hip/base/batch_multi_vector_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/types.hpp>

#include "common/cuda_hip/base/batch_struct.hpp"
#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/runtime.hpp"
Expand All @@ -22,14 +23,6 @@
#include "common/cuda_hip/components/thread_ids.hpp"
#include "common/cuda_hip/components/warp_blas.hpp"

#if defined(GKO_COMPILING_CUDA)
#include "cuda/base/batch_struct.hpp"
#elif defined(GKO_COMPILING_HIP)
#include "hip/base/batch_struct.hip.hpp"
#else
#error "batch struct def missing"
#endif


namespace gko {
namespace kernels {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,22 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_CUDA_BASE_BATCH_STRUCT_HPP_
#define GKO_CUDA_BASE_BATCH_STRUCT_HPP_
#ifndef GKO_COMMON_CUDA_HIP_BASE_BATCH_STRUCT_HPP_
#define GKO_COMMON_CUDA_HIP_BASE_BATCH_STRUCT_HPP_


#include <ginkgo/core/base/batch_multi_vector.hpp>
#include <ginkgo/core/base/math.hpp>

#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/unified/base/kernel_launch.hpp"
#include "core/base/batch_struct.hpp"


namespace gko {
namespace kernels {
namespace cuda {
namespace GKO_DEVICE_NAMESPACE {


/** @file batch_struct.hpp
Expand All @@ -33,10 +34,10 @@ namespace cuda {
* Generates an immutable uniform batch struct from a batch of multi-vectors.
*/
template <typename ValueType>
inline batch::multi_vector::uniform_batch<const cuda_type<ValueType>>
inline batch::multi_vector::uniform_batch<const device_type<ValueType>>
get_batch_struct(const batch::MultiVector<ValueType>* const op)
{
return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(),
return {as_device_type(op->get_const_values()), op->get_num_batch_items(),
static_cast<int32>(op->get_common_size()[1]),
static_cast<int32>(op->get_common_size()[0]),
static_cast<int32>(op->get_common_size()[1])};
Expand All @@ -46,19 +47,19 @@ get_batch_struct(const batch::MultiVector<ValueType>* const op)
* Generates a uniform batch struct from a batch of multi-vectors.
*/
template <typename ValueType>
inline batch::multi_vector::uniform_batch<cuda_type<ValueType>>
inline batch::multi_vector::uniform_batch<device_type<ValueType>>
get_batch_struct(batch::MultiVector<ValueType>* const op)
{
return {as_cuda_type(op->get_values()), op->get_num_batch_items(),
return {as_device_type(op->get_values()), op->get_num_batch_items(),
static_cast<int32>(op->get_common_size()[1]),
static_cast<int32>(op->get_common_size()[0]),
static_cast<int32>(op->get_common_size()[1])};
}


} // namespace cuda
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko


#endif // GKO_CUDA_BASE_BATCH_STRUCT_HPP_
#endif // GKO_COMMON_CUDA_HIP_BASE_BATCH_STRUCT_HPP_
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,34 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#include "common/cuda_hip/matrix/batch_csr_kernels.hpp"

#include <thrust/functional.h>
#include <thrust/transform.h>

#include <ginkgo/core/base/batch_multi_vector.hpp>
#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/types.hpp>
#include <ginkgo/core/matrix/batch_csr.hpp>

#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/runtime.hpp"
#include "core/base/batch_struct.hpp"
#include "core/matrix/batch_csr_kernels.hpp"
#include "core/matrix/batch_struct.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
namespace batch_csr {


constexpr auto default_block_size = 256;


template <typename ValueType, typename IndexType>
void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
const batch::matrix::Csr<ValueType, IndexType>* mat,
Expand All @@ -15,8 +43,9 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
if (b->get_common_size()[1] > 1) {
GKO_NOT_IMPLEMENTED;
}
simple_apply_kernel<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(mat_ub, b_ub, x_ub);
batch_single_kernels::simple_apply_kernel<<<num_blocks, default_block_size,
0, exec->get_stream()>>>(
mat_ub, b_ub, x_ub);
}


Expand All @@ -41,9 +70,9 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
if (b->get_common_size()[1] > 1) {
GKO_NOT_IMPLEMENTED;
}
advanced_apply_kernel<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(alpha_ub, mat_ub, b_ub,
beta_ub, x_ub);
batch_single_kernels::advanced_apply_kernel<<<
num_blocks, default_block_size, 0, exec->get_stream()>>>(
alpha_ub, mat_ub, b_ub, beta_ub, x_ub);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
Expand All @@ -59,8 +88,10 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
const auto col_scale_vals = col_scale->get_const_data();
const auto row_scale_vals = row_scale->get_const_data();
const auto mat_ub = get_batch_struct(input);
scale_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
as_device_type(col_scale_vals), as_device_type(row_scale_vals), mat_ub);
batch_single_kernels::
scale_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
as_device_type(col_scale_vals), as_device_type(row_scale_vals),
mat_ub);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
Expand All @@ -77,10 +108,16 @@ void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec,
const auto alpha_ub = get_batch_struct(alpha);
const auto beta_ub = get_batch_struct(beta);
const auto mat_ub = get_batch_struct(mat);
add_scaled_identity_kernel<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(alpha_ub, beta_ub,
mat_ub);
batch_single_kernels::add_scaled_identity_kernel<<<
num_blocks, default_block_size, 0, exec->get_stream()>>>(
alpha_ub, beta_ub, mat_ub);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
GKO_DECLARE_BATCH_CSR_ADD_SCALED_IDENTITY_KERNEL);


} // namespace batch_csr
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,36 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#include <thrust/functional.h>
#include <thrust/transform.h>

#include <ginkgo/core/base/batch_multi_vector.hpp>
#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/types.hpp>
#include <ginkgo/core/matrix/batch_csr.hpp>

#include "common/cuda_hip/base/batch_struct.hpp"
#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/runtime.hpp"
#include "common/cuda_hip/base/thrust.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/cooperative_groups.hpp"
#include "common/cuda_hip/components/format_conversion.hpp"
#include "common/cuda_hip/components/reduction.hpp"
#include "common/cuda_hip/components/segment_scan.hpp"
#include "common/cuda_hip/components/thread_ids.hpp"
#include "common/cuda_hip/components/warp_blas.hpp"
#include "common/cuda_hip/matrix/batch_struct.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
namespace batch_single_kernels {


template <typename ValueType, typename IndexType>
__device__ __forceinline__ void simple_apply(
const gko::batch::matrix::csr::batch_item<const ValueType, IndexType>& mat,
Expand All @@ -21,23 +51,11 @@ __device__ __forceinline__ void simple_apply(
}

template <typename ValueType, typename IndexType>
__global__ __launch_bounds__(
default_block_size,
sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
csr::uniform_batch<
const ValueType,
IndexType>
mat,
const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
b,
const gko::batch::
multi_vector::
uniform_batch<
ValueType>
x)
__global__ __launch_bounds__(default_block_size) void simple_apply_kernel(
const gko::batch::matrix::csr::uniform_batch<const ValueType, IndexType>
mat,
const gko::batch::multi_vector::uniform_batch<const ValueType> b,
const gko::batch::multi_vector::uniform_batch<ValueType> x)
{
for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
batch_id += gridDim.x) {
Expand Down Expand Up @@ -71,33 +89,13 @@ __device__ __forceinline__ void advanced_apply(
}

template <typename ValueType, typename IndexType>
__global__ __launch_bounds__(
default_block_size,
sm_oversubscription) void advanced_apply_kernel(const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
alpha,
const gko::batch::matrix::
csr::uniform_batch<
const ValueType,
IndexType>
mat,
const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
b,
const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
beta,
const gko::batch::
multi_vector::
uniform_batch<
ValueType>
x)
__global__ __launch_bounds__(default_block_size) void advanced_apply_kernel(
const gko::batch::multi_vector::uniform_batch<const ValueType> alpha,
const gko::batch::matrix::csr::uniform_batch<const ValueType, IndexType>
mat,
const gko::batch::multi_vector::uniform_batch<const ValueType> b,
const gko::batch::multi_vector::uniform_batch<const ValueType> beta,
const gko::batch::multi_vector::uniform_batch<ValueType> x)
{
for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
batch_id += gridDim.x) {
Expand Down Expand Up @@ -196,3 +194,9 @@ __global__ void add_scaled_identity_kernel(
add_scaled_identity(alpha_b.values[0], beta_b.values[0], mat_b);
}
}


} // namespace batch_single_kernels
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Loading

0 comments on commit 70669c6

Please sign in to comment.