Skip to content

Commit

Permalink
Merge: Scalar Jacobi fixes and update to unified kernels.
Browse files Browse the repository at this point in the history
This PR aims to fix some of the incomplete functionalities that were left during the specialization for Scalar Jacobi.

Fixes #838

Related PR: #854
  • Loading branch information
pratikvn committed Aug 7, 2021
2 parents d9789d7 + c455486 commit 5492c98
Show file tree
Hide file tree
Showing 16 changed files with 455 additions and 275 deletions.
158 changes: 158 additions & 0 deletions common/preconditioner/jacobi_kernels.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*******************************<GINKGO LICENSE>******************************
Copyright (c) 2017-2021, the Ginkgo authors
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/

#include "core/preconditioner/jacobi_kernels.hpp"


#include <ginkgo/core/base/math.hpp>


#include "common/base/kernel_launch.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
/**
* @brief The Jacobi preconditioner namespace.
*
* @ingroup jacobi
*/
namespace jacobi {


template <typename ValueType>
void scalar_conj(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag, Array<ValueType> &conj_diag)
{
run_kernel(
exec,
[] GKO_KERNEL(auto elem, auto diag, auto conj_diag) {
conj_diag[elem] = conj(diag[elem]);
},
diag.get_num_elems(), diag, conj_diag);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL);


template <typename ValueType>
void invert_diagonal(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag, Array<ValueType> &inv_diag)
{
run_kernel(
exec,
[] GKO_KERNEL(auto elem, auto diag, auto inv_diag) {
inv_diag[elem] = safe_divide(one(diag[elem]), diag[elem]);
},
diag.get_num_elems(), diag, inv_diag);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL);


template <typename ValueType>
void scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *alpha,
const matrix::Dense<ValueType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Dense<ValueType> *x)
{
if (alpha->get_size()[1] > 1) {
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b,
auto beta, auto x) {
x(row, col) = beta[col] * x(row, col) +
alpha[col] * b(row, col) * diag[row];
},
x->get_size(), diag, alpha->get_const_values(), b,
beta->get_const_values(), x);
} else {
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto diag, auto alpha, auto b,
auto beta, auto x) {
x(row, col) =
beta[0] * x(row, col) + alpha[0] * b(row, col) * diag[row];
},
x->get_size(), diag, alpha->get_const_values(), b,
beta->get_const_values(), x);
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void simple_scalar_apply(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &diag,
const matrix::Dense<ValueType> *b,
matrix::Dense<ValueType> *x)
{
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto diag, auto b, auto x) {
x(row, col) = b(row, col) * diag[row];
},
x->get_size(), diag, b, x);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL);


template <typename ValueType>
void scalar_convert_to_dense(std::shared_ptr<const DefaultExecutor> exec,
const Array<ValueType> &blocks,
matrix::Dense<ValueType> *result)
{
run_kernel(
exec,
[] GKO_KERNEL(auto row, auto col, auto diag, auto result) {
result(row, col) = zero(diag[row]);
if (row == col) {
result(row, col) = diag[row];
}
},
result->get_size(), blocks, result);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);


} // namespace jacobi
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
47 changes: 0 additions & 47 deletions common/preconditioner/jacobi_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -213,50 +213,3 @@ __launch_bounds__(warps_per_block *config::warp_size) adaptive_transpose_jacobi(
});
}
}


namespace kernel {


template <typename ValueType>
__global__ __launch_bounds__(default_block_size) void scalar_apply(
size_type num_rows, size_type num_cols, const ValueType *__restrict__ diag,
const ValueType *__restrict__ alpha, size_type source_stride,
const ValueType *__restrict__ source_values,
const ValueType *__restrict__ beta, size_type result_stride,
ValueType *__restrict__ result_values)
{
const auto tidx = thread::get_thread_id_flat();
const auto row = tidx / num_cols;
const auto col = tidx % num_cols;

if (row < num_rows) {
auto diag_val =
diag[row] == zero<ValueType>() ? one<ValueType>() : diag[row];
result_values[row * result_stride + col] =
result_values[row * result_stride + col] * beta[0] +
alpha[0] * source_values[row * source_stride + col] / diag_val;
}
}


template <typename ValueType>
__global__ __launch_bounds__(default_block_size) void simple_scalar_apply(
size_type num_rows, size_type num_cols, const ValueType *__restrict__ diag,
size_type source_stride, const ValueType *__restrict__ source_values,
size_type result_stride, ValueType *__restrict__ result_values)
{
const auto tidx = thread::get_thread_id_flat();
const auto row = tidx / num_cols;
const auto col = tidx % num_cols;

if (row < num_rows) {
auto diag_val =
diag[row] == zero<ValueType>() ? one<ValueType>() : diag[row];
result_values[row * result_stride + col] =
source_values[row * source_stride + col] / diag_val;
}
}


} // namespace kernel
16 changes: 16 additions & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1119,6 +1119,22 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL);

template <typename ValueType>
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL(ValueType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL);

template <typename ValueType>
GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL(ValueType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL);

template <typename ValueType>
GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL(ValueType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
Expand Down
101 changes: 66 additions & 35 deletions core/preconditioner/jacobi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,13 @@ GKO_REGISTER_OPERATION(apply, jacobi::apply);
GKO_REGISTER_OPERATION(scalar_apply, jacobi::scalar_apply);
GKO_REGISTER_OPERATION(find_blocks, jacobi::find_blocks);
GKO_REGISTER_OPERATION(generate, jacobi::generate);
GKO_REGISTER_OPERATION(scalar_conj, jacobi::scalar_conj);
GKO_REGISTER_OPERATION(invert_diagonal, jacobi::invert_diagonal);
GKO_REGISTER_OPERATION(transpose_jacobi, jacobi::transpose_jacobi);
GKO_REGISTER_OPERATION(conj_transpose_jacobi, jacobi::conj_transpose_jacobi);
GKO_REGISTER_OPERATION(convert_to_dense, jacobi::convert_to_dense);
GKO_REGISTER_OPERATION(scalar_convert_to_dense,
jacobi::scalar_convert_to_dense);
GKO_REGISTER_OPERATION(initialize_precisions, jacobi::initialize_precisions);


Expand Down Expand Up @@ -119,10 +123,14 @@ void Jacobi<ValueType, IndexType>::convert_to(
{
auto exec = this->get_executor();
auto tmp = matrix::Dense<ValueType>::create(exec, this->get_size());
exec->run(jacobi::make_convert_to_dense(
num_blocks_, parameters_.storage_optimization.block_wise,
parameters_.block_pointers, blocks_, storage_scheme_, tmp->get_values(),
tmp->get_stride()));
if (parameters_.max_block_size == 1) {
exec->run(jacobi::make_scalar_convert_to_dense(blocks_, tmp.get()));
} else {
exec->run(jacobi::make_convert_to_dense(
num_blocks_, parameters_.storage_optimization.block_wise,
parameters_.block_pointers, blocks_, storage_scheme_,
tmp->get_values(), tmp->get_stride()));
}
tmp->move_to(result);
}

Expand All @@ -141,29 +149,40 @@ void Jacobi<ValueType, IndexType>::write(mat_data &data) const
make_temporary_clone(this->get_executor()->get_master(), this);
data = {local_clone->get_size(), {}};

const auto ptrs = local_clone->parameters_.block_pointers.get_const_data();
for (size_type block = 0; block < local_clone->get_num_blocks(); ++block) {
const auto scheme = local_clone->get_storage_scheme();
const auto group_data = local_clone->blocks_.get_const_data() +
scheme.get_group_offset(block);
const auto block_size = ptrs[block + 1] - ptrs[block];
const auto precisions = local_clone->parameters_.storage_optimization
.block_wise.get_const_data();
const auto prec =
precisions ? precisions[block] : precision_reduction();
GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION(ValueType, prec, {
const auto block_data =
reinterpret_cast<const resolved_precision *>(group_data) +
scheme.get_block_offset(block);
for (IndexType row = 0; row < block_size; ++row) {
for (IndexType col = 0; col < block_size; ++col) {
data.nonzeros.emplace_back(
ptrs[block] + row, ptrs[block] + col,
static_cast<ValueType>(
block_data[row + col * scheme.get_stride()]));
if (parameters_.max_block_size == 1) {
for (IndexType row = 0; row < data.size[0]; ++row) {
data.nonzeros.emplace_back(
row, row,
static_cast<ValueType>(local_clone->get_blocks()[row]));
}
} else {
const auto ptrs =
local_clone->parameters_.block_pointers.get_const_data();
for (size_type block = 0; block < local_clone->get_num_blocks();
++block) {
const auto scheme = local_clone->get_storage_scheme();
const auto group_data = local_clone->blocks_.get_const_data() +
scheme.get_group_offset(block);
const auto block_size = ptrs[block + 1] - ptrs[block];
const auto precisions =
local_clone->parameters_.storage_optimization.block_wise
.get_const_data();
const auto prec =
precisions ? precisions[block] : precision_reduction();
GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION(ValueType, prec, {
const auto block_data =
reinterpret_cast<const resolved_precision *>(group_data) +
scheme.get_block_offset(block);
for (IndexType row = 0; row < block_size; ++row) {
for (IndexType col = 0; col < block_size; ++col) {
data.nonzeros.emplace_back(
ptrs[block] + row, ptrs[block] + col,
static_cast<ValueType>(
block_data[row + col * scheme.get_stride()]));
}
}
}
});
});
}
}
}

Expand All @@ -180,10 +199,15 @@ std::unique_ptr<LinOp> Jacobi<ValueType, IndexType>::transpose() const
res->blocks_.resize_and_reset(blocks_.get_num_elems());
res->conditioning_ = conditioning_;
res->parameters_ = parameters_;
this->get_executor()->run(jacobi::make_transpose_jacobi(
num_blocks_, parameters_.max_block_size,
parameters_.storage_optimization.block_wise, parameters_.block_pointers,
blocks_, storage_scheme_, res->blocks_));
if (parameters_.max_block_size == 1) {
res->blocks_ = blocks_;
} else {
this->get_executor()->run(jacobi::make_transpose_jacobi(
num_blocks_, parameters_.max_block_size,
parameters_.storage_optimization.block_wise,
parameters_.block_pointers, blocks_, storage_scheme_,
res->blocks_));
}

return std::move(res);
}
Expand All @@ -201,10 +225,16 @@ std::unique_ptr<LinOp> Jacobi<ValueType, IndexType>::conj_transpose() const
res->blocks_.resize_and_reset(blocks_.get_num_elems());
res->conditioning_ = conditioning_;
res->parameters_ = parameters_;
this->get_executor()->run(jacobi::make_conj_transpose_jacobi(
num_blocks_, parameters_.max_block_size,
parameters_.storage_optimization.block_wise, parameters_.block_pointers,
blocks_, storage_scheme_, res->blocks_));
if (parameters_.max_block_size == 1) {
this->get_executor()->run(
jacobi::make_scalar_conj(this->blocks_, res->blocks_));
} else {
this->get_executor()->run(jacobi::make_conj_transpose_jacobi(
num_blocks_, parameters_.max_block_size,
parameters_.storage_optimization.block_wise,
parameters_.block_pointers, blocks_, storage_scheme_,
res->blocks_));
}

return std::move(res);
}
Expand Down Expand Up @@ -244,7 +274,8 @@ void Jacobi<ValueType, IndexType>::generate(const LinOp *system_matrix,
auto temp = Array<ValueType>::view(diag_vt->get_executor(),
diag_vt->get_size()[0],
diag_vt->get_values());
this->blocks_ = temp;
this->blocks_ = Array<ValueType>(exec, temp.get_num_elems());
exec->run(jacobi::make_invert_diagonal(temp, this->blocks_));
this->num_blocks_ = diag_vt->get_size()[0];
} else {
auto csr_mtx = convert_to_with_sorting<csr_type>(exec, system_matrix,
Expand Down
Loading

0 comments on commit 5492c98

Please sign in to comment.