Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify and simplify batch functionality: Matrix formats (csr, dense, ell) #1669

Merged
merged 9 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions common/cuda_hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ set(CUDA_HIP_SOURCES
factorization/par_ilut_select_kernels.cpp
factorization/par_ilut_spgeam_kernels.cpp
factorization/par_ilut_sweep_kernels.cpp
matrix/batch_csr_kernels.cpp
matrix/batch_dense_kernels.cpp
matrix/batch_ell_kernels.cpp
matrix/coo_kernels.cpp
matrix/dense_kernels.cpp
matrix/diagonal_kernels.cpp
Expand Down
9 changes: 1 addition & 8 deletions common/cuda_hip/base/batch_multi_vector_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/types.hpp>

#include "common/cuda_hip/base/batch_struct.hpp"
#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/runtime.hpp"
Expand All @@ -22,14 +23,6 @@
#include "common/cuda_hip/components/thread_ids.hpp"
#include "common/cuda_hip/components/warp_blas.hpp"

#if defined(GKO_COMPILING_CUDA)
#include "cuda/base/batch_struct.hpp"
#elif defined(GKO_COMPILING_HIP)
#include "hip/base/batch_struct.hip.hpp"
#else
#error "batch struct def missing"
#endif


namespace gko {
namespace kernels {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,22 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_CUDA_BASE_BATCH_STRUCT_HPP_
#define GKO_CUDA_BASE_BATCH_STRUCT_HPP_
#ifndef GKO_COMMON_CUDA_HIP_BASE_BATCH_STRUCT_HPP_
#define GKO_COMMON_CUDA_HIP_BASE_BATCH_STRUCT_HPP_


#include <ginkgo/core/base/batch_multi_vector.hpp>
#include <ginkgo/core/base/math.hpp>

#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/unified/base/kernel_launch.hpp"
#include "core/base/batch_struct.hpp"


namespace gko {
namespace kernels {
namespace cuda {
namespace GKO_DEVICE_NAMESPACE {


/** @file batch_struct.hpp
Expand All @@ -33,10 +34,10 @@ namespace cuda {
* Generates an immutable uniform batch struct from a batch of multi-vectors.
*/
template <typename ValueType>
inline batch::multi_vector::uniform_batch<const cuda_type<ValueType>>
inline batch::multi_vector::uniform_batch<const device_type<ValueType>>
get_batch_struct(const batch::MultiVector<ValueType>* const op)
{
return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(),
return {as_device_type(op->get_const_values()), op->get_num_batch_items(),
static_cast<int32>(op->get_common_size()[1]),
static_cast<int32>(op->get_common_size()[0]),
static_cast<int32>(op->get_common_size()[1])};
Expand All @@ -46,19 +47,19 @@ get_batch_struct(const batch::MultiVector<ValueType>* const op)
* Generates a uniform batch struct from a batch of multi-vectors.
*/
template <typename ValueType>
inline batch::multi_vector::uniform_batch<cuda_type<ValueType>>
inline batch::multi_vector::uniform_batch<device_type<ValueType>>
get_batch_struct(batch::MultiVector<ValueType>* const op)
{
return {as_cuda_type(op->get_values()), op->get_num_batch_items(),
return {as_device_type(op->get_values()), op->get_num_batch_items(),
static_cast<int32>(op->get_common_size()[1]),
static_cast<int32>(op->get_common_size()[0]),
static_cast<int32>(op->get_common_size()[1])};
}


} // namespace cuda
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko


#endif // GKO_CUDA_BASE_BATCH_STRUCT_HPP_
#endif // GKO_COMMON_CUDA_HIP_BASE_BATCH_STRUCT_HPP_
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,34 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#include "common/cuda_hip/matrix/batch_csr_kernels.hpp"

#include <thrust/functional.h>
#include <thrust/transform.h>

#include <ginkgo/core/base/batch_multi_vector.hpp>
#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/types.hpp>
#include <ginkgo/core/matrix/batch_csr.hpp>

#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/runtime.hpp"
#include "core/base/batch_struct.hpp"
#include "core/matrix/batch_csr_kernels.hpp"
#include "core/matrix/batch_struct.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
namespace batch_csr {


constexpr auto default_block_size = 256;


template <typename ValueType, typename IndexType>
void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
const batch::matrix::Csr<ValueType, IndexType>* mat,
Expand All @@ -15,8 +43,9 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
if (b->get_common_size()[1] > 1) {
GKO_NOT_IMPLEMENTED;
}
simple_apply_kernel<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(mat_ub, b_ub, x_ub);
batch_single_kernels::simple_apply_kernel<<<num_blocks, default_block_size,
0, exec->get_stream()>>>(
mat_ub, b_ub, x_ub);
}


Expand All @@ -41,9 +70,9 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
if (b->get_common_size()[1] > 1) {
GKO_NOT_IMPLEMENTED;
}
advanced_apply_kernel<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(alpha_ub, mat_ub, b_ub,
beta_ub, x_ub);
batch_single_kernels::advanced_apply_kernel<<<
num_blocks, default_block_size, 0, exec->get_stream()>>>(
alpha_ub, mat_ub, b_ub, beta_ub, x_ub);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
Expand All @@ -59,8 +88,10 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
const auto col_scale_vals = col_scale->get_const_data();
const auto row_scale_vals = row_scale->get_const_data();
const auto mat_ub = get_batch_struct(input);
scale_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
as_device_type(col_scale_vals), as_device_type(row_scale_vals), mat_ub);
batch_single_kernels::
scale_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
as_device_type(col_scale_vals), as_device_type(row_scale_vals),
mat_ub);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
Expand All @@ -77,10 +108,16 @@ void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec,
const auto alpha_ub = get_batch_struct(alpha);
const auto beta_ub = get_batch_struct(beta);
const auto mat_ub = get_batch_struct(mat);
add_scaled_identity_kernel<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(alpha_ub, beta_ub,
mat_ub);
batch_single_kernels::add_scaled_identity_kernel<<<
num_blocks, default_block_size, 0, exec->get_stream()>>>(
alpha_ub, beta_ub, mat_ub);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
GKO_DECLARE_BATCH_CSR_ADD_SCALED_IDENTITY_KERNEL);


} // namespace batch_csr
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,36 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#include <thrust/functional.h>
#include <thrust/transform.h>

#include <ginkgo/core/base/batch_multi_vector.hpp>
#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/types.hpp>
#include <ginkgo/core/matrix/batch_csr.hpp>

#include "common/cuda_hip/base/batch_struct.hpp"
#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/runtime.hpp"
#include "common/cuda_hip/base/thrust.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/cooperative_groups.hpp"
#include "common/cuda_hip/components/format_conversion.hpp"
#include "common/cuda_hip/components/reduction.hpp"
#include "common/cuda_hip/components/segment_scan.hpp"
#include "common/cuda_hip/components/thread_ids.hpp"
#include "common/cuda_hip/components/warp_blas.hpp"
#include "common/cuda_hip/matrix/batch_struct.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
namespace batch_single_kernels {


template <typename ValueType, typename IndexType>
__device__ __forceinline__ void simple_apply(
const gko::batch::matrix::csr::batch_item<const ValueType, IndexType>& mat,
Expand All @@ -21,23 +51,11 @@ __device__ __forceinline__ void simple_apply(
}

template <typename ValueType, typename IndexType>
__global__ __launch_bounds__(
default_block_size,
sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
csr::uniform_batch<
const ValueType,
IndexType>
mat,
const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
b,
const gko::batch::
multi_vector::
uniform_batch<
ValueType>
x)
__global__ __launch_bounds__(default_block_size) void simple_apply_kernel(
const gko::batch::matrix::csr::uniform_batch<const ValueType, IndexType>
mat,
const gko::batch::multi_vector::uniform_batch<const ValueType> b,
const gko::batch::multi_vector::uniform_batch<ValueType> x)
{
for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
batch_id += gridDim.x) {
Expand Down Expand Up @@ -71,33 +89,13 @@ __device__ __forceinline__ void advanced_apply(
}

template <typename ValueType, typename IndexType>
__global__ __launch_bounds__(
default_block_size,
sm_oversubscription) void advanced_apply_kernel(const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
alpha,
const gko::batch::matrix::
csr::uniform_batch<
const ValueType,
IndexType>
mat,
const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
b,
const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
beta,
const gko::batch::
multi_vector::
uniform_batch<
ValueType>
x)
__global__ __launch_bounds__(default_block_size) void advanced_apply_kernel(
const gko::batch::multi_vector::uniform_batch<const ValueType> alpha,
const gko::batch::matrix::csr::uniform_batch<const ValueType, IndexType>
mat,
const gko::batch::multi_vector::uniform_batch<const ValueType> b,
const gko::batch::multi_vector::uniform_batch<const ValueType> beta,
const gko::batch::multi_vector::uniform_batch<ValueType> x)
{
for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
batch_id += gridDim.x) {
Expand Down Expand Up @@ -196,3 +194,9 @@ __global__ void add_scaled_identity_kernel(
add_scaled_identity(alpha_b.values[0], beta_b.values[0], mat_b);
}
}


} // namespace batch_single_kernels
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Loading
Loading