Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Oneapi6 fix #1251

Merged
merged 9 commits into from
Mar 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,12 @@ if(MSVC)
endif()
endif()

if(GINKGO_BUILD_DPCPP)
ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_MAJOR_VERSION __LIBSYCL_MAJOR_VERSION)
ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_VERSION __SYCL_COMPILER_VERSION)
else()
set(GINKGO_DPCPP_MAJOR_VERSION "0")
endif()
configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in
${Ginkgo_BINARY_DIR}/include/ginkgo/config.hpp @ONLY)

Expand Down
1 change: 1 addition & 0 deletions cmake/GinkgoConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ set(GINKGO_HIP_VERSION @GINKGO_HIP_VERSION@)
set(GINKGO_AMD_ARCH_FLAGS @GINKGO_AMD_ARCH_FLAGS@)

set(GINKGO_DPCPP_VERSION @GINKGO_DPCPP_VERSION@)
set(GINKGO_DPCPP_MAJOR_VERSION @GINKGO_DPCPP_MAJOR_VERSION@)
set(GINKGO_DPCPP_FLAGS @GINKGO_DPCPP_FLAGS@)
set(GINKGO_MKL_ROOT @GINKGO_MKL_ROOT@)
set(GINKGO_DPL_ROOT @GINKGO_DPL_ROOT@)
Expand Down
4 changes: 2 additions & 2 deletions cmake/build_helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ function(ginkgo_extract_clang_version CLANG_COMPILER GINKGO_CLANG_VERSION)
endfunction()

# Extract the DPC++ version
function(ginkgo_extract_dpcpp_version DPCPP_COMPILER GINKGO_DPCPP_VERSION)
function(ginkgo_extract_dpcpp_version DPCPP_COMPILER GINKGO_DPCPP_VERSION MACRO_VAR)
set(DPCPP_VERSION_PROG "#include <CL/sycl.hpp>\n#include <iostream>\n"
"int main() {std::cout << __SYCL_COMPILER_VERSION << '\\n'\;"
"int main() {std::cout << ${MACRO_VAR} << '\\n'\;"
"return 0\;}")
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver.cpp" ${DPCPP_VERSION_PROG})
execute_process(COMMAND ${DPCPP_COMPILER} ${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver.cpp
Expand Down
5 changes: 3 additions & 2 deletions cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ namespace csr {
constexpr int default_block_size = 512;
constexpr int warps_in_block = 4;
constexpr int spmv_block_size = warps_in_block * config::warp_size;
constexpr int classical_overweight = 32;
constexpr int classical_oversubscription = 32;


/**
Expand Down Expand Up @@ -240,7 +240,8 @@ void classical_spmv(syn::value_list<int, subwarp_size>,
const matrix::Dense<ValueType>* beta = nullptr)
{
const auto nwarps = exec->get_num_warps_per_sm() *
exec->get_num_multiprocessor() * classical_overweight;
exec->get_num_multiprocessor() *
classical_oversubscription;
const auto gridx =
std::min(ceildiv(a->get_size()[0], spmv_block_size / subwarp_size),
int64(nwarps / warps_in_block));
Expand Down
5 changes: 3 additions & 2 deletions cuda/matrix/sparsity_csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ namespace cuda {
namespace sparsity_csr {


constexpr int classical_overweight = 32;
constexpr int classical_oversubscription = 32;
constexpr int spmv_block_size = 128;
constexpr int warps_in_block = 4;

Expand Down Expand Up @@ -92,7 +92,8 @@ void classical_spmv(syn::value_list<int, subwarp_size>,
gko::acc::reduced_row_major<2, arithmetic_type, OutputValueType>;

const auto nwarps = exec->get_num_warps_per_sm() *
exec->get_num_multiprocessor() * classical_overweight;
exec->get_num_multiprocessor() *
classical_oversubscription;
const auto gridx =
std::min(ceildiv(a->get_size()[0], spmv_block_size / subwarp_size),
int64(nwarps / warps_in_block));
Expand Down
3 changes: 0 additions & 3 deletions dpcpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_VERSION)
set(GINKGO_DPCPP_VERSION ${GINKGO_DPCPP_VERSION} PARENT_SCOPE)

find_package(MKL CONFIG REQUIRED HINTS "$ENV{MKLROOT}")
set(GINKGO_MKL_ROOT "${MKL_ROOT}" PARENT_SCOPE)
find_package(oneDPL REQUIRED HINTS "$ENV{DPL_ROOT}")
Expand Down
31 changes: 27 additions & 4 deletions dpcpp/base/executor.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void DpcppExecutor::raw_free(void* ptr) const noexcept
try {
queue_->wait_and_throw();
sycl::free(ptr, queue_->get_context());
} catch (cl::sycl::exception& err) {
} catch (sycl::exception& err) {
tcojean marked this conversation as resolved.
Show resolved Hide resolved
#if GKO_VERBOSE_LEVEL >= 1
// Unfortunately, if memory free fails, there's not much we can do
std::cerr << "Unrecoverable Dpcpp error on device "
Expand Down Expand Up @@ -231,7 +231,7 @@ void delete_queue(sycl::queue* queue)
}


::cl::sycl::property_list get_property_list(dpcpp_queue_property property)
sycl::property_list get_property_list(dpcpp_queue_property property)
{
if (property == dpcpp_queue_property::in_order) {
return {sycl::property::queue::in_order{}};
Expand All @@ -257,11 +257,11 @@ void DpcppExecutor::set_device_property(dpcpp_queue_property property)
if (!device.is_host()) {
try {
auto subgroup_sizes =
device.get_info<cl::sycl::info::device::sub_group_sizes>();
device.get_info<sycl::info::device::sub_group_sizes>();
for (auto& i : subgroup_sizes) {
this->get_exec_info().subgroup_sizes.push_back(i);
}
} catch (cl::sycl::exception& err) {
} catch (sycl::exception& err) {
GKO_NOT_SUPPORTED(device);
}
}
Expand All @@ -274,15 +274,38 @@ void DpcppExecutor::set_device_property(dpcpp_queue_property property)
}
this->get_exec_info().max_workgroup_size = static_cast<int>(
device.get_info<sycl::info::device::max_work_group_size>());
// They change the max_work_item_size with template parameter Dimension after
// major version 6 and adding the default = 3 is not in the same release.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would 3 be a sensible default at all? It sounds like that is a pretty CUDA-specific view? maybe just state that this is the dimensionality we use.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so. At least, it's the default dimension template choice from the spec.
They just forgot to add the default in some version when updating the code with updated spec

#if GINKGO_DPCPP_MAJOR_VERSION >= 6
auto max_workitem_sizes =
device.get_info<sycl::info::device::max_work_item_sizes<3>>();
#else
auto max_workitem_sizes =
device.get_info<sycl::info::device::max_work_item_sizes>();
#endif
// Get the max dimension of a sycl::id object
auto max_work_item_dimensions =
device.get_info<sycl::info::device::max_work_item_dimensions>();
for (uint32 i = 0; i < max_work_item_dimensions; i++) {
this->get_exec_info().max_workitem_sizes.push_back(
max_workitem_sizes[i]);
}

// Get the hardware threads per eu
if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
#if GINKGO_DPCPP_MAJOR_VERSION >= 6
this->get_exec_info().num_pu_per_cu = device.get_info<
sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
#else
this->get_exec_info().num_pu_per_cu = device.get_info<
sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
#endif
} else {
// To make the usage still valid.
// TODO: check the value for other vendor gpu or cpu.
this->get_exec_info().num_pu_per_cu = 1;
}

// Here we declare the queue with the property `in_order` which ensures the
// kernels are executed in the submission order. Otherwise, calls to
// `wait()` would be needed after every call to a DPC++ function or kernel.
Expand Down
2 changes: 1 addition & 1 deletion dpcpp/base/helper.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ bool validate(sycl::queue* queue, unsigned int workgroup_size,
{
auto device = queue->get_device();
auto subgroup_size_list =
device.get_info<cl::sycl::info::device::sub_group_sizes>();
device.get_info<sycl::info::device::sub_group_sizes>();
auto max_workgroup_size =
device.get_info<sycl::info::device::max_work_group_size>();
bool allowed = false;
Expand Down
2 changes: 1 addition & 1 deletion dpcpp/base/onemkl_bindings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ struct is_supported<std::complex<double>> : std::true_type {};


#define GKO_BIND_DOT(ValueType, Name, Func) \
inline void Name(::cl::sycl::queue& exec_queue, std::int64_t n, \
inline void Name(sycl::queue& exec_queue, std::int64_t n, \
const ValueType* x, std::int64_t incx, \
const ValueType* y, std::int64_t incy, ValueType* result) \
{ \
Expand Down
Loading