Skip to content

Commit

Permalink
[cuda,hip,dpcpp] disable optimized kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
pratikvn committed Aug 5, 2024
1 parent 4e43262 commit 42b403d
Show file tree
Hide file tree
Showing 10 changed files with 301 additions and 347 deletions.
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ option(GINKGO_HIP_AMD_UNSAFE_ATOMIC "Compiler uses unsafe floating point atomic
option(GINKGO_SPLIT_TEMPLATE_INSTANTIATIONS "Split template instantiations for slow-to-compile files. This improves parallel build performance" ON)
mark_as_advanced(GINKGO_SPLIT_TEMPLATE_INSTANTIATIONS)
option(GINKGO_JACOBI_FULL_OPTIMIZATIONS "Use all the optimizations for the CUDA Jacobi algorithm" OFF)
option(GINKGO_BATCHED_FULL_OPTIMIZATIONS "Use all the optimizations for the CUDA/HIP batched solver algorithms" OFF)
option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON)
option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is OFF." OFF)
option(GINKGO_BUILD_PAPI_SDE "Build Ginkgo with PAPI SDE. Enabled if a system installation is found." ${PAPI_SDE_FOUND})
Expand Down
5 changes: 1 addition & 4 deletions core/solver/batch_bicgstab_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@
#include "core/base/kernel_declaration.hpp"


#ifdef GINKGO_BACTCHED_FULL_OPTIMIZATIONS
constexpr bool bicgstab_no_shared_vecs = false;
#else
// TODO: update when splitting kernels
constexpr bool bicgstab_no_shared_vecs = true;
#endif


namespace gko {
Expand Down
5 changes: 1 addition & 4 deletions core/solver/batch_cg_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@
#include "core/base/kernel_declaration.hpp"


#ifdef GINKGO_BACTCHED_FULL_OPTIMIZATIONS
constexpr bool cg_no_shared_vecs = false;
#else
// TODO: update when splitting compilation
constexpr bool cg_no_shared_vecs = true;
#endif


namespace gko {
Expand Down
123 changes: 58 additions & 65 deletions cuda/solver/batch_bicgstab_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -167,76 +167,69 @@ public:
value_type* const workspace_data = workspace.get_data();
// Only instantiate when full optimizations has been enabled. Otherwise,
// just use the default one with no shared memory.
#ifdef GINKGO_BATCHED_FULL_OPTIMIZATIONS
// TODO: split compilation
// Template parameters launch_apply_kernel<StopType, n_shared,
// prec_shared>
if (sconf.prec_shared) {
launch_apply_kernel<StopType, 9, true>(
sconf, logger, prec, mat, b.values, x.values, workspace_data,
block_size, shared_size);
} else {
switch (sconf.n_shared) {
case 0:
launch_apply_kernel<StopType, 0, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 1:
launch_apply_kernel<StopType, 1, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 2:
launch_apply_kernel<StopType, 2, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 3:
launch_apply_kernel<StopType, 3, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 4:
launch_apply_kernel<StopType, 4, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 5:
launch_apply_kernel<StopType, 5, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 6:
launch_apply_kernel<StopType, 6, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 7:
launch_apply_kernel<StopType, 7, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 8:
launch_apply_kernel<StopType, 8, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 9:
launch_apply_kernel<StopType, 9, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
default:
GKO_NOT_IMPLEMENTED;
}
}
#else
// if (sconf.prec_shared) {
// launch_apply_kernel<StopType, 9, true>(
// sconf, logger, prec, mat, b.values, x.values, workspace_data,
// block_size, shared_size);
// } else {
// switch (sconf.n_shared) {
// case 0:
launch_apply_kernel<StopType, 0, false>(
sconf, logger, prec, mat, b.values, x.values, workspace_data,
block_size, shared_size);
#endif
// break;
// case 1:
// launch_apply_kernel<StopType, 1, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 2:
// launch_apply_kernel<StopType, 2, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 3:
// launch_apply_kernel<StopType, 3, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 4:
// launch_apply_kernel<StopType, 4, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 5:
// launch_apply_kernel<StopType, 5, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 6:
// launch_apply_kernel<StopType, 6, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 7:
// launch_apply_kernel<StopType, 7, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 8:
// launch_apply_kernel<StopType, 8, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 9:
// launch_apply_kernel<StopType, 9, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// default:
// GKO_NOT_IMPLEMENTED;
// }
// }
}
private:
Expand Down
81 changes: 38 additions & 43 deletions cuda/solver/batch_cg_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -165,56 +165,51 @@ public:
value_type* const workspace_data = workspace.get_data();
// TODO: split compilation
// Only instantiate when full optimizations has been enabled. Otherwise,
// just use the default one with no shared memory.
#ifdef GINKGO_BATCHED_FULL_OPTIMIZATIONS
// Template parameters launch_apply_kernel<StopType, n_shared,
// prec_shared>
if (sconf.prec_shared) {
launch_apply_kernel<StopType, 5, true>(
sconf, logger, prec, mat, b.values, x.values, workspace_data,
block_size, shared_size);
} else {
switch (sconf.n_shared) {
case 0:
launch_apply_kernel<StopType, 0, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 1:
launch_apply_kernel<StopType, 1, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 2:
launch_apply_kernel<StopType, 2, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 3:
launch_apply_kernel<StopType, 3, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 4:
launch_apply_kernel<StopType, 4, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 5:
launch_apply_kernel<StopType, 5, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
default:
GKO_NOT_IMPLEMENTED;
}
}
#else
// if (sconf.prec_shared) {
// launch_apply_kernel<StopType, 5, true>(
// sconf, logger, prec, mat, b.values, x.values, workspace_data,
// block_size, shared_size);
// } else {
// switch (sconf.n_shared) {
// case 0:
launch_apply_kernel<StopType, 0, false>(
sconf, logger, prec, mat, b.values, x.values, workspace_data,
block_size, shared_size);
#endif
// break;
// case 1:
// launch_apply_kernel<StopType, 1, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 2:
// launch_apply_kernel<StopType, 2, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 3:
// launch_apply_kernel<StopType, 3, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 4:
// launch_apply_kernel<StopType, 4, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 5:
// launch_apply_kernel<StopType, 5, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// default:
// GKO_NOT_IMPLEMENTED;
// }
// }
}
private:
Expand Down
Loading

0 comments on commit 42b403d

Please sign in to comment.