Skip to content

Commit

Permalink
Use simple kernels for scalar apply, dense convert
Browse files Browse the repository at this point in the history
  • Loading branch information
pratikvn committed Aug 6, 2021
1 parent ffbf037 commit 7b1a52c
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 227 deletions.
75 changes: 75 additions & 0 deletions common/preconditioner/jacobi_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,81 @@ void invert_diagonal(std::shared_ptr<const DefaultExecutor> exec,
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL);


template <typename ValueType>
void scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *alpha,
const matrix::Dense<ValueType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Dense<ValueType> *x)
{
if (alpha->get_size()[1] > 1) {
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b,
auto beta, auto x) {
x(row, col) = beta[col] * x(row, col) +
alpha[col] * b(row, col) * diag[row];
},
x->get_size(), diag.get_const_data(), alpha->get_const_values(), b,
beta->get_const_values(), x);
} else {
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b,
auto beta, auto x) {
x(row, col) =
beta[0] * x(row, col) + alpha[0] * b(row, col) * diag[row];
},
x->get_size(), diag.get_const_data(), alpha->get_const_values(), b,
beta->get_const_values(), x);
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void simple_scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *b,
matrix::Dense<ValueType> *x)
{
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto diag, auto b, auto x) {
x(row, col) = b(row, col) * diag[row];
},
x->get_size(), diag.get_const_data(), b, x);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void scalar_convert_to_dense(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &blocks,
ValueType *result_values,
const gko::dim<2> &matrix_size,
size_type result_stride)
{
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto stride, auto diag,
auto result_values) {
result_values[row * stride + col] = zero<ValueType>();
if (row == col) {
result_values[row * stride + col] = diag[row];
}
},
matrix_size, result_stride, blocks.get_const_data(), result_values);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);


} // namespace jacobi
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
Expand Down
66 changes: 0 additions & 66 deletions cuda/preconditioner/jacobi_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -230,17 +230,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL);


template <typename ValueType>
void scalar_convert_to_dense(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &blocks,
ValueType *result_values,
const gko::dim<2> &mat_size,
size_type result_stride) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType, typename IndexType>
void convert_to_dense(
std::shared_ptr<const DefaultExecutor> exec, size_type num_blocks,
Expand All @@ -254,61 +243,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType>
void scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *alpha,
const matrix::Dense<ValueType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Dense<ValueType> *x)
{
const auto b_size = b->get_size();
const auto num_rows = b_size[0];
const auto num_cols = b_size[1];
const auto b_stride = b->get_stride();
const auto x_stride = x->get_stride();
const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size);

const auto b_values = b->get_const_values();
const auto diag_values = diag.get_const_data();
auto x_values = x->get_values();

kernel::scalar_apply<<<grid_dim, default_block_size>>>(
num_rows, num_cols, as_cuda_type(diag_values),
as_cuda_type(alpha->get_const_values()), b_stride,
as_cuda_type(b_values), as_cuda_type(beta->get_const_values()),
x_stride, as_cuda_type(x_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void simple_scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *b,
matrix::Dense<ValueType> *x)
{
const auto b_size = b->get_size();
const auto num_rows = b_size[0];
const auto num_cols = b_size[1];
const auto b_stride = b->get_stride();
const auto x_stride = x->get_stride();
const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size);

const auto b_values = b->get_const_values();
const auto diag_values = diag.get_const_data();
auto x_values = x->get_values();

kernel::simple_scalar_apply<<<grid_dim, default_block_size>>>(
num_rows, num_cols, as_cuda_type(diag_values), b_stride,
as_cuda_type(b_values), x_stride, as_cuda_type(x_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL);


} // namespace jacobi
} // namespace cuda
} // namespace kernels
Expand Down
32 changes: 0 additions & 32 deletions dpcpp/preconditioner/jacobi_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,17 +267,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL);


template <typename ValueType>
void scalar_convert_to_dense(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &blocks,
ValueType *result_values,
const gko::dim<2> &mat_size,
size_type result_stride) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType, typename IndexType>
void convert_to_dense(
std::shared_ptr<const DpcppExecutor> exec, size_type num_blocks,
Expand All @@ -291,27 +280,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType>
void scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *alpha,
const matrix::Dense<ValueType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Dense<ValueType> *x) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void simple_scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *b,
matrix::Dense<ValueType> *x) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL);


} // namespace jacobi
} // namespace dpcpp
} // namespace kernels
Expand Down
67 changes: 0 additions & 67 deletions hip/preconditioner/jacobi_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,17 +244,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL);


template <typename ValueType>
void scalar_convert_to_dense(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &blocks,
ValueType *result_values,
const gko::dim<2> &mat_size,
size_type result_stride) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType, typename IndexType>
void convert_to_dense(
std::shared_ptr<const HipExecutor> exec, size_type num_blocks,
Expand All @@ -268,62 +257,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType>
void scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *alpha,
const matrix::Dense<ValueType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Dense<ValueType> *x)
{
const auto b_size = b->get_size();
const auto num_rows = b_size[0];
const auto num_cols = b_size[1];
const auto b_stride = b->get_stride();
const auto x_stride = x->get_stride();
const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size);

const auto b_values = b->get_const_values();
const auto diag_values = diag.get_const_data();
auto x_values = x->get_values();

hipLaunchKernelGGL(
kernel::scalar_apply, dim3(grid_dim), dim3(default_block_size), 0, 0,
num_rows, num_cols, as_hip_type(diag_values),
as_hip_type(alpha->get_const_values()), b_stride, as_hip_type(b_values),
as_hip_type(beta->get_const_values()), x_stride, as_hip_type(x_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void simple_scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *b,
matrix::Dense<ValueType> *x)
{
const auto b_size = b->get_size();
const auto num_rows = b_size[0];
const auto num_cols = b_size[1];
const auto b_stride = b->get_stride();
const auto x_stride = x->get_stride();
const auto grid_dim = ceildiv(num_rows * num_cols, default_block_size);

const auto b_values = b->get_const_values();
const auto diag_values = diag.get_const_data();
auto x_values = x->get_values();

hipLaunchKernelGGL(kernel::simple_scalar_apply, dim3(grid_dim),
dim3(default_block_size), 0, 0, num_rows, num_cols,
as_hip_type(diag_values), b_stride,
as_hip_type(b_values), x_stride, as_hip_type(x_values));
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL);


} // namespace jacobi
} // namespace hip
} // namespace kernels
Expand Down
62 changes: 0 additions & 62 deletions omp/preconditioner/jacobi_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -696,68 +696,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType>
void scalar_convert_to_dense(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &blocks,
ValueType *result_values,
const gko::dim<2> &matrix_size,
size_type result_stride)
{
#pragma omp parallel for
for (size_type i = 0; i < matrix_size[0]; ++i) {
for (size_type j = 0; j < matrix_size[1]; ++j) {
result_values[i * result_stride + j] = zero<ValueType>();
if (i == j) {
result_values[i * result_stride + j] =
blocks.get_const_data()[i];
}
}
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);


template <typename ValueType>
void scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *alpha,
const matrix::Dense<ValueType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Dense<ValueType> *x)
{
#pragma omp parallel for
for (size_type i = 0; i < x->get_size()[0]; ++i) {
for (size_type j = 0; j < x->get_size()[1]; ++j) {
x->at(i, j) = beta->at(0) * x->at(i, j) +
alpha->at(0) * b->at(i, j) * diag.get_const_data()[i];
}
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void simple_scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *b,
matrix::Dense<ValueType> *x)
{
#pragma omp parallel for
for (size_type i = 0; i < x->get_size()[0]; ++i) {
for (size_type j = 0; j < x->get_size()[1]; ++j) {
auto diag_val = diag.get_const_data()[i];
x->at(i, j) = b->at(i, j) * diag_val;
}
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL);


} // namespace jacobi
} // namespace omp
} // namespace kernels
Expand Down

0 comments on commit 7b1a52c

Please sign in to comment.