diff --git a/backends/include/dppl_sycl_context_interface.h b/backends/include/dppl_sycl_context_interface.h index e2164e388e..de5175bc40 100644 --- a/backends/include/dppl_sycl_context_interface.h +++ b/backends/include/dppl_sycl_context_interface.h @@ -27,6 +27,7 @@ #include "dppl_data_types.h" #include "dppl_sycl_types.h" +#include "dppl_sycl_platform_interface.h" #include "Support/DllExport.h" #include "Support/ExternC.h" #include "Support/MemOwnershipAttrs.h" @@ -34,15 +35,38 @@ DPPL_C_EXTERN_C_BEGIN +/*! + * @brief Checks if two DPPLSyclContextRef objects point to the same + * sycl::context. + * + * @param CtxRef1 First opaque pointer to the sycl context. + * @param CtxRef2 Second opaque pointer to the sycl context. + * @return True if the underlying sycl::context are same, false otherwise. + */ +DPPL_API +bool DPPLContext_AreEq (__dppl_keep const DPPLSyclContextRef CtxRef1, + __dppl_keep const DPPLSyclContextRef CtxRef2); + /*! * @brief Returns true if this SYCL context is a host context. * - * @param CtxRef A opaque pointer to a sycl::context. + * @param CtxRef An opaque pointer to a sycl::context. * @return True if the SYCL context is a host context, else False. */ DPPL_API bool DPPLContext_IsHost (__dppl_keep const DPPLSyclContextRef CtxRef); +/*! + * @brief Returns the sycl backend for the DPPLSyclContextRef pointer. + * + * @param CtxRef An opaque pointer to a sycl::context. + * @return The sycl backend for the DPPLSyclContextRef returned as + * a DPPLSyclBackendType enum type. + */ +DPPL_API +DPPLSyclBackendType +DPPLContext_GetBackend (__dppl_keep const DPPLSyclContextRef CtxRef); + /*! * @brief Delete the pointer after casting it to sycl::context * diff --git a/backends/include/dppl_sycl_device_interface.h b/backends/include/dppl_sycl_device_interface.h index 3ebe9a80ef..01e3c08101 100644 --- a/backends/include/dppl_sycl_device_interface.h +++ b/backends/include/dppl_sycl_device_interface.h @@ -28,6 +28,7 @@ #pragma once #include "dppl_data_types.h" +#include "dppl_sycl_enum_types.h" #include "dppl_sycl_types.h" #include "Support/DllExport.h" #include "Support/ExternC.h" @@ -35,22 +36,6 @@ DPPL_C_EXTERN_C_BEGIN -/*! - * @brief Redefinition of Sycl's device_type so that we do not have to include - * sycl.hpp here and in the Python bindings. - * - */ -typedef enum -{ - DPPL_CPU, - DPPL_GPU, - DPPL_ACCELERATOR, - DPPL_CUSTOM, - DPPL_AUTOMATIC, - DPPL_HOST, - DPPL_ALL -} DPPLSyclDeviceType; - /*! * @brief Prints out some of the info::deivice attributes for the device. * diff --git a/backends/include/dppl_sycl_enum_types.h b/backends/include/dppl_sycl_enum_types.h new file mode 100644 index 0000000000..0d8d73f091 --- /dev/null +++ b/backends/include/dppl_sycl_enum_types.h @@ -0,0 +1,91 @@ +//===--- dppl_sycl_enum_types.h - DPPL-SYCL interface ---*---C++ -----*----===// +// +// Python Data Parallel Processing Library (PyDPPL) +// +// Copyright 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This header defines DPPL specficif enum types that wrap corresponding Sycl +/// enum classes. These enums are defined primarily so that Python extensions +/// that use DPPL do not have to include Sycl headers directly. +/// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "Support/ExternC.h" + +DPPL_C_EXTERN_C_BEGIN + +/*! + * @brief Redefinition of DPC++-specific Sycl backend types. + * + */ +enum DPPLSyclBackendType +{ + DPPL_UNKNOWN_BACKEND = 0x0, + DPPL_OPENCL = 1 << 16, + DPPL_HOST = 1 << 15, + DPPL_LEVEL_ZERO = 1 << 14, + DPPL_CUDA = 1 << 13 +}; + +/*! + * @brief DPPL device types that are equivalent to Sycl's device_type. + * + */ +enum DPPLSyclDeviceType +{ + DPPL_CPU = 1 << 0, + DPPL_GPU = 1 << 1, + DPPL_ACCELERATOR = 1 << 2, + DPPL_CUSTOM = 1 << 3, + DPPL_AUTOMATIC = 1 << 4, + DPPL_HOST_DEVICE = 1 << 5, + DPPL_ALL = 1 << 6 + // IMP: before adding new values here look at DPPLSyclBackendType enum. The + // values should not overlap. +}; + +/*! + * @brief Supported types for kernel arguments to be passed to a Sycl kernel + * using DPPL. + * + * \todo Add support for sycl::buffer + * + */ +typedef enum +{ + DPPL_CHAR, + DPPL_SIGNED_CHAR, + DPPL_UNSIGNED_CHAR, + DPPL_SHORT, + DPPL_INT, + DPPL_UNSIGNED_INT, + DPPL_UNSIGNED_INT8, + DPPL_LONG, + DPPL_UNSIGNED_LONG, + DPPL_LONG_LONG, + DPPL_UNSIGNED_LONG_LONG, + DPPL_SIZE_T, + DPPL_FLOAT, + DPPL_DOUBLE, + DPPL_LONG_DOUBLE, + DPPL_VOID_PTR +} DPPLKernelArgType; + +DPPL_C_EXTERN_C_END diff --git a/backends/include/dppl_sycl_platform_interface.h b/backends/include/dppl_sycl_platform_interface.h index 459a39a1da..2b2e02d702 100644 --- a/backends/include/dppl_sycl_platform_interface.h +++ b/backends/include/dppl_sycl_platform_interface.h @@ -26,19 +26,46 @@ #pragma once #include "dppl_data_types.h" +#include "dppl_sycl_enum_types.h" #include "Support/DllExport.h" #include "Support/ExternC.h" +#include "Support/MemOwnershipAttrs.h" DPPL_C_EXTERN_C_BEGIN /*! - * @brief Get the number of sycl::platform available on the system. + * @brief Returns the number of sycl::platform available on the system. * * @return The number of available sycl::platforms. */ DPPL_API size_t DPPLPlatform_GetNumPlatforms (); +/*! + * @brief Returns the number of unique sycl backends on the system not counting + * the host backend. + * + * @return The number of unique sycl backends. + */ +DPPL_API +size_t DPPLPlatform_GetNumBackends (); + +/*! + * @brief Returns an array of the unique DPPLSyclBackendType values on the system. + * + * @return An array of DPPLSyclBackendType enum values. + */ +DPPL_API +__dppl_give DPPLSyclBackendType* DPPLPlatform_GetListOfBackends (); + +/*! + * @brief Frees an array of DPPLSyclBackendType enum values. + * + * @param BEArr An array of DPPLSyclBackendType enum values to be freed. + */ +DPPL_API +void DPPLPlatform_DeleteListOfBackends (__dppl_take DPPLSyclBackendType* BEArr); + /*! * @brief Prints out some selected info about all sycl::platform on the system. * diff --git a/backends/include/dppl_sycl_program_interface.h b/backends/include/dppl_sycl_program_interface.h index e14f45b9f5..952156ff79 100644 --- a/backends/include/dppl_sycl_program_interface.h +++ b/backends/include/dppl_sycl_program_interface.h @@ -74,7 +74,7 @@ DPPL_API __dppl_give DPPLSyclProgramRef DPPLProgram_CreateFromOCLSource (__dppl_keep const DPPLSyclContextRef Ctx, __dppl_keep const char *Source, - __dppl_keep const char *CompileOpts = nullptr); + __dppl_keep const char *CompileOpts); /*! * @brief Returns the SyclKernel with given name from the program, if not found diff --git a/backends/include/dppl_sycl_queue_interface.h b/backends/include/dppl_sycl_queue_interface.h index cc12c9af77..03e746da44 100644 --- a/backends/include/dppl_sycl_queue_interface.h +++ b/backends/include/dppl_sycl_queue_interface.h @@ -28,6 +28,7 @@ #pragma once #include "dppl_data_types.h" +#include "dppl_sycl_enum_types.h" #include "dppl_sycl_types.h" #include "Support/DllExport.h" #include "Support/ExternC.h" @@ -36,37 +37,33 @@ DPPL_C_EXTERN_C_BEGIN /*! - * @brief Supported types for kernel arguments to be passed to a Sycl kernel. + * @brief Delete the pointer after casting it to sycl::queue. * - * \todo Add support for sycl::buffer + * @param QRef A DPPLSyclQueueRef pointer that gets deleted. + */ +DPPL_API +void DPPLQueue_Delete (__dppl_take DPPLSyclQueueRef QRef); + +/*! + * @brief Checks if two DPPLSyclQueueRef objects point to the same sycl::queue. * + * @param QRef1 First opaque pointer to the sycl queue. + * @param QRef2 Second opaque pointer to the sycl queue. + * @return True if the underlying sycl::queue are same, false otherwise. */ -typedef enum -{ - DPPL_CHAR, - DPPL_SIGNED_CHAR, - DPPL_UNSIGNED_CHAR, - DPPL_SHORT, - DPPL_INT, - DPPL_UNSIGNED_INT, - DPPL_LONG, - DPPL_UNSIGNED_LONG, - DPPL_LONG_LONG, - DPPL_UNSIGNED_LONG_LONG, - DPPL_SIZE_T, - DPPL_FLOAT, - DPPL_DOUBLE, - DPPL_LONG_DOUBLE, - DPPL_VOID_PTR -} DPPLKernelArgType; +DPPL_API +bool DPPLQueue_AreEq (__dppl_keep const DPPLSyclQueueRef QRef1, + __dppl_keep const DPPLSyclQueueRef QRef2); /*! - * @brief Delete the pointer after casting it to sycl::queue. + * @brief Returns the Sycl backend for the provided sycl::queue. * - * @param QRef A DPPLSyclQueueRef pointer that gets deleted. + * @param QRef An opaque pointer to the sycl queue. + * @return A enum DPPLSyclBackendType corresponding to the backed for the + * queue. */ DPPL_API -void DPPLQueue_Delete (__dppl_take DPPLSyclQueueRef QRef); +DPPLSyclBackendType DPPLQueue_GetBackend (__dppl_keep DPPLSyclQueueRef QRef); /*! * @brief Returns the Sycl context for the queue. diff --git a/backends/include/dppl_sycl_queue_manager.h b/backends/include/dppl_sycl_queue_manager.h index 33956af07b..c71ed0e285 100644 --- a/backends/include/dppl_sycl_queue_manager.h +++ b/backends/include/dppl_sycl_queue_manager.h @@ -35,6 +35,7 @@ #include "dppl_data_types.h" #include "dppl_sycl_types.h" +#include "dppl_sycl_context_interface.h" #include "dppl_sycl_device_interface.h" #include "Support/DllExport.h" #include "Support/ExternC.h" @@ -63,8 +64,10 @@ __dppl_give DPPLSyclQueueRef DPPLQueueMgr_GetCurrentQueue (); * raised if no such device exists. */ DPPL_API -__dppl_give DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclDeviceType DeviceTy, - size_t DNum); +__dppl_give DPPLSyclQueueRef +DPPLQueueMgr_GetQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum); /*! * @brief Get the number of activated queues not including the global or @@ -75,56 +78,72 @@ __dppl_give DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclDeviceType DeviceTy, DPPL_API size_t DPPLQueueMgr_GetNumActivatedQueues (); + /*! - * @brief Get the number of GPU queues available on the system. + * @brief Get the number of available queues for given backend and device type + * combination. * - * @return The number of available GPU queues. + * @param BETy Type of Sycl backend. + * @param DeviceTy Type of Sycl device. + * @return The number of available queues. */ DPPL_API -size_t DPPLQueueMgr_GetNumGPUQueues (); +size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy); /*! - * @brief Get the number of CPU queues available on the system. + * @brief Returns True if the passed in queue and the current queue are the + * same, else returns False. * - * @return The number of available CPU queues. + * @param QRef An opaque pointer to a sycl::queue. + * @return True or False depending on whether the QRef argument is the same as + * the currently activated queue. */ DPPL_API -size_t DPPLQueueMgr_GetNumCPUQueues (); +bool DPPLQueueMgr_IsCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef); /*! -* @brief Set the default DPPL queue to the sycl::queue for the given device. -* -* If no such device is found the a runtime_error exception is thrown. +* @brief Set the default DPPL queue to the sycl::queue for the given backend +* and device type combination and return a DPPLSyclQueueRef for that queue. +* If no queue was created Null is returned to caller. * +* @param BETy Type of Sycl backend. * @param DeviceTy The type of Sycl device (sycl_device_type) -* @param DNum Device id for the device (defaults to 0) +* @param DNum Device id for the device +* @return A copy of the sycl::queue that was set as the new default queue. If no +* queue could be created then returns Null. */ DPPL_API -void DPPLQueueMgr_SetAsDefaultQueue (DPPLSyclDeviceType DeviceTy, - size_t DNum); +__dppl_give DPPLSyclQueueRef +DPPLQueueMgr_SetAsDefaultQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum); /*! * @brief Pushes a new sycl::queue object to the top of DPPL's thread-local * stack of a "activated" queues, and returns a copy of the queue to caller. * - * DPPL maintains a thread-local stack of sycl::queue objects to facilitate - * nested parallelism. The sycl::queue at the top of the stack is termed as the - * currently activated queue, and is always the one returned by + * The DPPL queue manager maintains a thread-local stack of sycl::queue objects + * to facilitate nested parallelism. The sycl::queue at the top of the stack is + * termed as the currently activated queue, and is always the one returned by * DPPLQueueMgr_GetCurrentQueue(). DPPLPushSyclQueueToStack creates a new * sycl::queue corresponding to the specified device and pushes it to the top * of the stack. A copy of the sycl::queue is returned to the caller wrapped * inside the opaque DPPLSyclQueueRef pointer. A runtime_error exception is * thrown when a new sycl::queue could not be created for the specified device. * + * @param BETy Type of Sycl backend. * @param DeviceTy The type of Sycl device (sycl_device_type) * @param DNum Device id for the device (defaults to 0) * * @return A copy of the sycl::queue that was pushed to the top of DPPL's - * stack of sycl::queue objects. + * stack of sycl::queue objects. Nullptr is returned if no such device exists. */ DPPL_API __dppl_give DPPLSyclQueueRef -DPPLQueueMgr_PushQueue (DPPLSyclDeviceType DeviceTy, size_t DNum); +DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum); /*! * @brief Pops the top of stack element from DPPL's stack of activated diff --git a/backends/include/dppl_sycl_types.h b/backends/include/dppl_sycl_types.h index e3ccf3d0a8..ce1f74dd85 100644 --- a/backends/include/dppl_sycl_types.h +++ b/backends/include/dppl_sycl_types.h @@ -25,6 +25,10 @@ #pragma once +#include "Support/ExternC.h" + +DPPL_C_EXTERN_C_BEGIN + /*! * @brief Opaque pointer to a sycl::context * @@ -74,3 +78,5 @@ typedef struct DPPLOpaqueSyclQueue *DPPLSyclQueueRef; * @see sycl::usm */ typedef struct DPPLOpaqueSyclUSM *DPPLSyclUSMRef; + +DPPL_C_EXTERN_C_END diff --git a/backends/source/dppl_sycl_context_interface.cpp b/backends/source/dppl_sycl_context_interface.cpp index 4d97b2bb6c..cbdd722deb 100644 --- a/backends/source/dppl_sycl_context_interface.cpp +++ b/backends/source/dppl_sycl_context_interface.cpp @@ -36,6 +36,14 @@ namespace DEFINE_SIMPLE_CONVERSION_FUNCTIONS(context, DPPLSyclContextRef) } /* end of anonymous namespace */ +bool DPPLContext_AreEq (__dppl_keep const DPPLSyclContextRef CtxRef1, + __dppl_keep const DPPLSyclContextRef CtxRef2) +{ + if(!(CtxRef1 && CtxRef2)) + // \todo handle error + return false; + return (*unwrap(CtxRef1) == *unwrap(CtxRef2)); +} bool DPPLContext_IsHost (__dppl_keep const DPPLSyclContextRef CtxRef) { @@ -46,3 +54,23 @@ void DPPLContext_Delete (__dppl_take DPPLSyclContextRef CtxRef) { delete unwrap(CtxRef); } + +DPPLSyclBackendType +DPPLContext_GetBackend (__dppl_keep const DPPLSyclContextRef CtxRef) +{ + auto BE = unwrap(CtxRef)->get_platform().get_backend(); + + switch(BE) + { + case backend::host: + return DPPL_HOST; + case backend::opencl: + return DPPL_OPENCL; + case backend::level_zero: + return DPPL_LEVEL_ZERO; + case backend::cuda: + return DPPL_CUDA; + default: + return DPPL_UNKNOWN_BACKEND; + } +} diff --git a/backends/source/dppl_sycl_device_interface.cpp b/backends/source/dppl_sycl_device_interface.cpp index 68e50ab9b0..cf5a1d027c 100644 --- a/backends/source/dppl_sycl_device_interface.cpp +++ b/backends/source/dppl_sycl_device_interface.cpp @@ -56,20 +56,26 @@ void dump_device_info (const device & Device) << Device.get_info() << '\n'; ss << std::setw(4) << " " << std::left << std::setw(16) << "Device type"; - try { - if (Device.has(aspect::accelerator)) - ss << "accelerator" << '\n'; - else if (Device.has(aspect::cpu)) - ss << "cpu" << '\n'; - else if (Device.has(aspect::custom)) - ss << "custom" << '\n'; - else if (Device.has(aspect::gpu)) - ss << "gpu" << '\n'; - else if (Device.has(aspect::host)) - ss << "host" << '\n'; - } catch (runtime_error re) { - // \todo handle errors - ss << "unknown\n"; + auto devTy = Device.get_info(); + switch(devTy) + { + case info::device_type::cpu: + ss << "cpu" << '\n'; + break; + case info::device_type::gpu: + ss << "gpu" << '\n'; + break; + case info::device_type::accelerator: + ss << "accelerator" << '\n'; + break; + case info::device_type::custom: + ss << "custom" << '\n'; + break; + case info::device_type::host: + ss << "host" << '\n'; + break; + default: + ss << "unknown" << '\n'; } std::cout << ss.str(); diff --git a/backends/source/dppl_sycl_platform_interface.cpp b/backends/source/dppl_sycl_platform_interface.cpp index 09ec3cbb5e..d8b8a77606 100644 --- a/backends/source/dppl_sycl_platform_interface.cpp +++ b/backends/source/dppl_sycl_platform_interface.cpp @@ -27,95 +27,162 @@ #include "dppl_sycl_platform_interface.h" #include #include +#include #include #include using namespace cl::sycl; +namespace +{ +std::set +get_set_of_backends () +{ + std::set be_set; + for (auto p : platform::get_platforms()) { + if(p.is_host()) + continue; + auto be = p.get_backend(); + switch (be) + { + case backend::host: + be_set.insert(DPPLSyclBackendType::DPPL_HOST); + break; + case backend::cuda: + be_set.insert(DPPLSyclBackendType::DPPL_CUDA); + break; + case backend::level_zero: + be_set.insert(DPPLSyclBackendType::DPPL_LEVEL_ZERO); + break; + case backend::opencl: + be_set.insert(DPPLSyclBackendType::DPPL_OPENCL); + break; + default: + break; + } + } + return be_set; +} + +} // namespace + /*! - * Prints out the following sycl::info::platform attributes for each platform - * found on the system: - * - info::platform::name - * - info::platform::version - * - info::platform::vendor - * - info::platform::profile - * - backend (opencl, cuda, level-zero, host) - * - number of devices on the platform - * - * Additionally, for each device we print out: - * - info::device::name - * - info::device::driver_version - * - type of the device based on the aspects cpu, gpu, accelerator. - */ +* Prints out the following sycl::info::platform attributes for each platform +* found on the system: +* - info::platform::name +* - info::platform::version +* - info::platform::vendor +* - info::platform::profile +* - backend (opencl, cuda, level-zero, host) +* - number of devices on the platform +* +* Additionally, for each device we print out: +* - info::device::name +* - info::device::driver_version +* - type of the device based on the aspects cpu, gpu, accelerator. +*/ void DPPLPlatform_DumpInfo () { - size_t i = 0; - - // Print out the info for each platform - auto platforms = platform::get_platforms(); - for (auto &p : platforms) { - std::cout << "---Platform " << i << '\n'; - std::stringstream ss; - - auto vendor = p.get_info(); - if (vendor.empty()) - vendor = "unknown"; - - ss << std::setw(4) << " " << std::left << std::setw(12) << "Name" - << p.get_info() << '\n'; - ss << std::setw(4) << " " << std::left << std::setw(12) << "Version" - << p.get_info() << '\n'; - ss << std::setw(4) << " " << std::left << std::setw(12) << "Vendor" - << vendor << '\n'; - ss << std::setw(4) << " " << std::left << std::setw(12) << "Profile" - << p.get_info() << '\n'; - ss << std::setw(4) << " " << std::left << std::setw(12) << "Backend"; - p.is_host() ? (ss << "unknown") : (ss << p.get_backend()); - ss << '\n'; - - // Get number of devices on the platform - auto devices = p.get_devices(); - - ss << std::setw(4) << " " << std::left << std::setw(12) << "Devices" - << devices.size() << '\n'; - // Print some of the device information - for (auto dn = 0ul; dn < devices.size(); ++dn) { - ss << std::setw(4) << "---Device " << dn << '\n'; - ss << std::setw(8) << " " << std::left << std::setw(20) - << "Name" << devices[dn].get_info() << '\n'; - ss << std::setw(8) << " " << std::left << std::setw(20) - << "Driver version" - << devices[dn].get_info() << '\n'; - ss << std::setw(8) << " " << std::left << std::setw(20) - << "Device type"; - - try { - if (devices[dn].has(aspect::accelerator)) - ss << "accelerator" << '\n'; - else if (devices[dn].has(aspect::cpu)) - ss << "cpu" << '\n'; - else if (devices[dn].has(aspect::custom)) - ss << "custom" << '\n'; - else if (devices[dn].has(aspect::gpu)) - ss << "gpu" << '\n'; - else if (devices[dn].has(aspect::host)) - ss << "host" << '\n'; - } catch (runtime_error re) { - // \todo handle errors - ss << "unknown\n"; - } - } - - std::cout << ss.str(); - ++i; - } + size_t i = 0; + + // Print out the info for each platform + auto platforms = platform::get_platforms(); + for (auto &p : platforms) { + std::cout << "---Platform " << i << '\n'; + std::stringstream ss; + + auto vendor = p.get_info(); + if (vendor.empty()) + vendor = "unknown"; + + ss << std::setw(4) << " " << std::left << std::setw(12) << "Name" + << p.get_info() << '\n'; + ss << std::setw(4) << " " << std::left << std::setw(12) << "Version" + << p.get_info() << '\n'; + ss << std::setw(4) << " " << std::left << std::setw(12) << "Vendor" + << vendor << '\n'; + ss << std::setw(4) << " " << std::left << std::setw(12) << "Profile" + << p.get_info() << '\n'; + ss << std::setw(4) << " " << std::left << std::setw(12) << "Backend"; + p.is_host() ? (ss << "unknown") : (ss << p.get_backend()); + ss << '\n'; + + // Get number of devices on the platform + auto devices = p.get_devices(); + + ss << std::setw(4) << " " << std::left << std::setw(12) << "Devices" + << devices.size() << '\n'; + // Print some of the device information + for (auto dn = 0ul; dn < devices.size(); ++dn) { + ss << std::setw(4) << "---Device " << dn << '\n'; + ss << std::setw(8) << " " << std::left << std::setw(20) + << "Name" << devices[dn].get_info() << '\n'; + ss << std::setw(8) << " " << std::left << std::setw(20) + << "Driver version" + << devices[dn].get_info() << '\n'; + ss << std::setw(8) << " " << std::left << std::setw(20) + << "Device type"; + + auto devTy = devices[dn].get_info(); + switch (devTy) + { + case info::device_type::cpu: + ss << "cpu" << '\n'; + break; + case info::device_type::gpu: + ss << "gpu" << '\n'; + break; + case info::device_type::accelerator: + ss << "accelerator" << '\n'; + break; + case info::device_type::custom: + ss << "custom" << '\n'; + break; + case info::device_type::host: + ss << "host" << '\n'; + break; + default: + ss << "unknown" << '\n'; + } + } + std::cout << ss.str(); + ++i; + } } /*! - * Returns the number of sycl::platform on the system. - */ +* Returns the number of sycl::platform on the system. +*/ size_t DPPLPlatform_GetNumPlatforms () { return platform::get_platforms().size(); } + +size_t DPPLPlatform_GetNumBackends () +{ + return get_set_of_backends().size(); +} + +__dppl_give DPPLSyclBackendType *DPPLPlatform_GetListOfBackends () +{ + auto be_set = get_set_of_backends(); + + if (be_set.empty()) + return nullptr; + + DPPLSyclBackendType *BEArr = new DPPLSyclBackendType[be_set.size()]; + + auto i = 0ul; + for (auto be : be_set) { + BEArr[i] = be; + ++i; + } + + return BEArr; +} + +void DPPLPlatform_DeleteListOfBackends (__dppl_take DPPLSyclBackendType *BEArr) +{ + delete[] BEArr; +} diff --git a/backends/source/dppl_sycl_queue_interface.cpp b/backends/source/dppl_sycl_queue_interface.cpp index 7ddaece07e..81ccf64f9f 100644 --- a/backends/source/dppl_sycl_queue_interface.cpp +++ b/backends/source/dppl_sycl_queue_interface.cpp @@ -25,6 +25,7 @@ //===----------------------------------------------------------------------===// #include "dppl_sycl_queue_interface.h" +#include "dppl_sycl_context_interface.h" #include "Support/CBindingWrapping.h" #include /* SYCL headers */ @@ -71,6 +72,9 @@ bool set_kernel_arg (handler &cgh, size_t idx, __dppl_keep void *Arg, case DPPL_UNSIGNED_INT: cgh.set_arg(idx, *(unsigned int*)Arg); break; + case DPPL_UNSIGNED_INT8: + cgh.set_arg(idx, *(uint8_t*)Arg); + break; case DPPL_LONG: cgh.set_arg(idx, *(long*)Arg); break; @@ -117,6 +121,23 @@ void DPPLQueue_Delete (__dppl_take DPPLSyclQueueRef QRef) delete unwrap(QRef); } + +bool DPPLQueue_AreEq (__dppl_keep const DPPLSyclQueueRef QRef1, + __dppl_keep const DPPLSyclQueueRef QRef2) +{ + if(!(QRef1 && QRef2)) + // \todo handle error + return false; + return (*unwrap(QRef1) == *unwrap(QRef2)); +} + +DPPLSyclBackendType DPPLQueue_GetBackend (__dppl_keep DPPLSyclQueueRef QRef) +{ + auto Q = unwrap(QRef); + auto C = Q->get_context(); + return DPPLContext_GetBackend(wrap(&C)); +} + __dppl_give DPPLSyclDeviceRef DPPLQueue_GetDevice (__dppl_keep const DPPLSyclQueueRef QRef) { @@ -179,11 +200,11 @@ DPPLQueue_SubmitRange (__dppl_keep const DPPLSyclKernelRef KRef, "dimensions."); } }); - } catch (runtime_error re) { + } catch (runtime_error &re) { // \todo fix error handling std::cerr << re.what() << '\n'; return nullptr; - } catch (std::runtime_error sre) { + } catch (std::runtime_error &sre) { std::cerr << sre.what() << '\n'; return nullptr; } @@ -231,7 +252,7 @@ DPPLQueue_SubmitNDRange(__dppl_keep const DPPLSyclKernelRef KRef, break; case 3: cgh.parallel_for(nd_range<3>{{gRange[0], gRange[1], gRange[2]}, - {lRange[0], lRange[1], lRange[3]}}, + {lRange[0], lRange[1], lRange[2]}}, *Kernel); break; default: @@ -240,11 +261,11 @@ DPPLQueue_SubmitNDRange(__dppl_keep const DPPLSyclKernelRef KRef, "dimensions."); } }); - } catch (runtime_error re) { + } catch (runtime_error &re) { // \todo fix error handling std::cerr << re.what() << '\n'; return nullptr; - } catch (std::runtime_error sre) { + } catch (std::runtime_error &sre) { std::cerr << sre.what() << '\n'; return nullptr; } diff --git a/backends/source/dppl_sycl_queue_manager.cpp b/backends/source/dppl_sycl_queue_manager.cpp index 3981c528dc..8162e838d2 100644 --- a/backends/source/dppl_sycl_queue_manager.cpp +++ b/backends/source/dppl_sycl_queue_manager.cpp @@ -42,11 +42,6 @@ namespace // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(queue, DPPLSyclQueueRef) -void error_reporter (const std::string & msg) -{ - throw std::runtime_error("Error: " + msg); -} - /*! * @brief A helper class to support the DPPLSyclQueuemanager. * @@ -57,158 +52,253 @@ void error_reporter (const std::string & msg) class QMgrHelper { public: - static std::vector& - cpu_queues_ () + using QVec = vector_class; + + static QVec* init_queues (backend BE, info::device_type DTy) { + QVec *queues = new QVec(); + auto Platforms = platform::get_platforms(); + for (auto &p : Platforms) { + if (p.is_host()) continue; + auto be = p.get_backend(); + auto Devices = p.get_devices(); + + if (Devices.size() == 1) { + auto d = Devices[0]; + auto devty = d.get_info(); + if(devty == DTy && be == BE) { + auto Ctx = context(d); + queues->emplace_back(Ctx, d); + break; + } + } + else { + vector_class SelectedDevices; + for(auto &d : Devices) { + auto devty = d.get_info(); + if(devty == DTy && be == BE) + SelectedDevices.push_back(d); + } + if (SelectedDevices.size() > 0) { + auto Ctx = context(SelectedDevices); + auto d = SelectedDevices[0]; + queues->emplace_back(Ctx, d); + } + } + } + return queues; + } + + static QVec& get_opencl_cpu_queues () + { + static QVec* queues = init_queues(backend::opencl, + info::device_type::cpu); + return *queues; + } + + static QVec& get_opencl_gpu_queues () { - static std::vector* cpu_queues = - QMgrHelper::init_queues(info::device_type::cpu); - return *cpu_queues; + static QVec* queues = init_queues(backend::opencl, + info::device_type::gpu); + return *queues; } - static std::vector& - gpu_queues_ () + static QVec get_level0_gpu_queues () { - static std::vector* gpu_queues = - QMgrHelper::init_queues(info::device_type::gpu); - return *gpu_queues; + static QVec* queues = init_queues(backend::level_zero, + info::device_type::gpu); + return *queues; } - static std::vector& - active_queues_ () + static QVec& get_active_queues () { - thread_local static std::vector* active_queues = - new std::vector({default_selector()}); + thread_local static QVec* active_queues = + new QVec({default_selector()}); return *active_queues; } static __dppl_give DPPLSyclQueueRef - getQueue (DPPLSyclDeviceType DeviceTy, size_t DNum); + getQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum); static __dppl_give DPPLSyclQueueRef getCurrentQueue (); - static void - setAsDefaultQueue (DPPLSyclDeviceType DeviceTy, size_t DNum); + static bool isCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef); + + static __dppl_give DPPLSyclQueueRef + setAsDefaultQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum); static __dppl_give DPPLSyclQueueRef - pushSyclQueue (DPPLSyclDeviceType DeviceTy, size_t DNum); + pushSyclQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum); static void popSyclQueue (); - static cl::sycl::vector_class* - init_queues (info::device_type device_ty) - { - - auto queues = new std::vector(); - for(auto d : device::get_devices(device_ty)) - queues->emplace_back(d); - return queues; - } }; -// make function call like access to variable -// it is for minimizing code changes during replacing static vars with functions -// it could be refactored by replacing variable with function call -// scope of this variables is only this file -#define cpu_queues cpu_queues_() -#define gpu_queues gpu_queues_() -#define active_queues active_queues_() - - -//----------------------------- Public API -----------------------------------// - /*! * Allocates a new copy of the present top of stack queue, which can be the * default queue and returns to caller. The caller owns the pointer and is - * responsible for deallocating it. The helper function deleteQueue can be used - * is for that purpose. + * responsible for deallocating it. The helper function DPPLQueue_Delete should + * be used for that purpose. */ DPPLSyclQueueRef QMgrHelper::getCurrentQueue () { - if(active_queues.empty()) - error_reporter("No currently active queues."); - auto last = QMgrHelper::active_queues.size() - 1; - return wrap(new queue(QMgrHelper::active_queues[last])); + auto activated_q = get_active_queues(); + if(activated_q.empty()) { + // \todo handle error + std::cerr << "No currently active queues.\n"; + return nullptr; + } + auto last = activated_q.size() - 1; + return wrap(new queue(activated_q[last])); } /*! * Allocates a sycl::queue by copying from the cached {cpu|gpu}_queues vector * and returns it to the caller. The caller owns the pointer and is responsible - * for deallocating it. The helper function deleteQueue can be used is for that - * purpose. + * for deallocating it. The helper function DPPLQueue_Delete should + * be used for that purpose. */ -DPPLSyclQueueRef -QMgrHelper::getQueue (DPPLSyclDeviceType DeviceTy, size_t DNum) +__dppl_give DPPLSyclQueueRef +QMgrHelper::getQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum) { queue *QRef = nullptr; - switch (DeviceTy) + switch (BETy|DeviceTy) { - case DPPLSyclDeviceType::DPPL_CPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: { - if (DNum >= cpu_queues.size()) { - std::stringstream ss; - ss << "SYCL CPU device " << DNum << " not found on system."; - error_reporter(ss.str()); + auto cpuQs = get_opencl_cpu_queues(); + if (DNum >= cpuQs.size()) { + // \todo handle error + std::cerr << "OpenCL CPU device " << DNum + << " not found on system.\n"; + return nullptr; } - QRef = new queue(QMgrHelper::cpu_queues[DNum]); + QRef = new queue(cpuQs[DNum]); break; } - case DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: { - if (DNum >= gpu_queues.size()) { - std::stringstream ss; - ss << "SYCL GPU device " << DNum << " not found on system."; - error_reporter(ss.str()); + auto gpuQs = get_opencl_gpu_queues(); + if (DNum >= gpuQs.size()) { + // \todo handle error + std::cerr << "OpenCL GPU device " << DNum + << " not found on system.\n"; + return nullptr; } - QRef = new queue(QMgrHelper::gpu_queues[DNum]); + QRef = new queue(gpuQs[DNum]); + break; + } + case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: + { + auto l0GpuQs = get_level0_gpu_queues(); + if (DNum >= l0GpuQs.size()) { + // \todo handle error + std::cerr << "Level-0 GPU device " << DNum + << " not found on system.\n"; + return nullptr; + } + QRef = new queue(l0GpuQs[DNum]); break; } default: - error_reporter("Unsupported device type."); + std::cerr << "Unsupported device type.\n"; + return nullptr; } return wrap(QRef); } +/*! + * Compares the context and device of the current queue to the context and + * device of the queue passed as input. Return true if both queues have the + * same context and device. + */ +bool QMgrHelper::isCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef) +{ + auto activated_q = get_active_queues(); + if(activated_q.empty()) { + // \todo handle error + std::cerr << "No currently active queues.\n"; + return false; + } + auto last = activated_q.size() - 1; + auto currQ = activated_q[last]; + return (*unwrap(QRef) == currQ); +} + /*! * Changes the first entry into the stack, i.e., the default queue to a new * sycl::queue corresponding to the device type and device number. */ -void -QMgrHelper::setAsDefaultQueue (DPPLSyclDeviceType DeviceTy, size_t DNum) +__dppl_give DPPLSyclQueueRef +QMgrHelper::setAsDefaultQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum) { - if(active_queues.empty()) - error_reporter("active queue vector is corrupted."); + queue *QRef = nullptr; + auto &activeQ = get_active_queues(); + if(activeQ.empty()) { + std::cerr << "active queue vector is corrupted.\n"; + return nullptr; + } - switch (DeviceTy) + switch (BETy|DeviceTy) { - case DPPLSyclDeviceType::DPPL_CPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: { - if (DNum >= cpu_queues.size()) { - std::stringstream ss; - ss << "SYCL CPU device " << DNum << " not found on system."; - error_reporter(ss.str()); + auto oclcpu_q = get_opencl_cpu_queues(); + if (DNum >= oclcpu_q.size()) { + // \todo handle error + std::cerr << "OpenCL CPU device " << DNum + << " not found on system\n."; + return nullptr; } - active_queues[0] = cpu_queues[DNum]; + activeQ[0] = oclcpu_q[DNum]; break; } - case DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: { - if (DNum >= gpu_queues.size()) { - std::stringstream ss; - ss << "SYCL GPU device " << DNum << " not found on system."; - error_reporter(ss.str()); + auto oclgpu_q = get_opencl_gpu_queues(); + if (DNum >= oclgpu_q.size()) { + // \todo handle error + std::cerr << "OpenCL GPU device " << DNum + << " not found on system\n."; + return nullptr; } - active_queues[0] = gpu_queues[DNum]; + activeQ[0] = oclgpu_q[DNum]; + break; + } + case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: + { + auto l0gpu_q = get_level0_gpu_queues(); + if (DNum >= l0gpu_q.size()) { + // \todo handle error + std::cerr << "Level-0 GPU device " << DNum + << " not found on system\n."; + return nullptr; + } + activeQ[0] = l0gpu_q[DNum]; break; } default: { - error_reporter("Unsupported device type."); + std::cerr << "Unsupported device type.\n"; + return nullptr; } } + + QRef = new queue(activeQ[0]); + return wrap(QRef); } /*! @@ -217,40 +307,60 @@ QMgrHelper::setAsDefaultQueue (DPPLSyclDeviceType DeviceTy, size_t DNum) * cleaned up. The helper function DPPLDeleteSyclQueue() can be used is for that * purpose. */ -DPPLSyclQueueRef -QMgrHelper::pushSyclQueue (DPPLSyclDeviceType DeviceTy, size_t DNum) +__dppl_give DPPLSyclQueueRef +QMgrHelper::pushSyclQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum) { queue *QRef = nullptr; - if(active_queues.empty()) - error_reporter("Why is there no previous global context?"); + auto &activeQ = get_active_queues(); + if(activeQ.empty()) { + std::cerr << "Why is there no previous global context?\n"; + return nullptr; + } - switch (DeviceTy) + switch (BETy|DeviceTy) { - case DPPLSyclDeviceType::DPPL_CPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: { - if (DNum >= cpu_queues.size()) { - std::stringstream ss; - ss << "SYCL CPU device " << DNum << " not found on system."; - error_reporter(ss.str()); + if (DNum >= get_opencl_cpu_queues().size()) { + // \todo handle error + std::cerr << "OpenCL CPU device " << DNum + << " not found on system\n."; + return nullptr; } - active_queues.emplace_back(cpu_queues[DNum]); - QRef = new queue(active_queues[active_queues.size()-1]); + activeQ.emplace_back(get_opencl_cpu_queues()[DNum]); + QRef = new queue(activeQ[activeQ.size()-1]); break; } - case DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: { - if (DNum >= gpu_queues.size()) { - std::stringstream ss; - ss << "SYCL GPU device " << DNum << " not found on system."; - error_reporter(ss.str()); + if (DNum >= get_opencl_gpu_queues().size()) { + // \todo handle error + std::cerr << "OpenCL GPU device " << DNum + << " not found on system\n."; + return nullptr; } - active_queues.emplace_back(gpu_queues[DNum]); - QRef = new queue(active_queues[active_queues.size()-1]); + activeQ.emplace_back(get_opencl_gpu_queues()[DNum]); + QRef = new queue(activeQ[get_active_queues().size()-1]); + break; + } + case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: + { + if (DNum >= get_level0_gpu_queues().size()) { + // \todo handle error + std::cerr << "Level-0 GPU device " << DNum + << " not found on system\n."; + return nullptr; + } + activeQ.emplace_back(get_level0_gpu_queues()[DNum]); + QRef = new queue(activeQ[get_active_queues().size()-1]); break; } default: { - error_reporter("Unsupported device type."); + std::cerr << "Unsupported device type.\n"; + return nullptr; } } @@ -269,38 +379,59 @@ void QMgrHelper::popSyclQueue () { // The first queue which is the "default" queue can not be removed. - if(active_queues.size() <= 1 ) - error_reporter("No active contexts"); - active_queues.pop_back(); + if(get_active_queues().size() <= 1 ) { + std::cerr << "No active contexts.\n"; + return; + } + get_active_queues().pop_back(); } } /* end of anonymous namespace */ +//----------------------------- Public API -----------------------------------// + /*! * Returns inside the number of activated queues not including the global queue * (QMgrHelper::active_queues[0]). */ size_t DPPLQueueMgr_GetNumActivatedQueues () { - if (QMgrHelper::active_queues.empty()) - error_reporter("No active contexts"); - return QMgrHelper::active_queues.size() - 1; -} - -/*! - * Returns the number of CPU queues. - */ -size_t DPPLQueueMgr_GetNumCPUQueues () -{ - return QMgrHelper::cpu_queues.size(); + if (QMgrHelper::get_active_queues().empty()) { + // \todo handle error + std::cerr << "No active contexts.\n"; + return 0; + } + return QMgrHelper::get_active_queues().size() - 1; } /*! - * Returns the number of GPU queues. + * Returns the number of available queues for a specific backend and device + * type combination. */ -size_t DPPLQueueMgr_GetNumGPUQueues () +size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy) { - return QMgrHelper::gpu_queues.size(); + switch (BETy|DeviceTy) + { + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: + { + return QMgrHelper::get_opencl_cpu_queues().size(); + } + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: + { + return QMgrHelper::get_opencl_gpu_queues().size(); + } + case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: + { + return QMgrHelper::get_level0_gpu_queues().size(); + } + default: + { + // \todo handle error + std::cerr << "Unsupported device type.\n"; + return 0; + } + } } /*! @@ -315,30 +446,43 @@ DPPLSyclQueueRef DPPLQueueMgr_GetCurrentQueue () * Returns a copy of a sycl::queue corresponding to the specified device type * and device number. A runtime_error gets thrown if no such device exists. */ -DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclDeviceType DeviceTy, +DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum) { - return QMgrHelper::getQueue(DeviceTy, DNum); + return QMgrHelper::getQueue(BETy, DeviceTy, DNum); } +/*! + +* */ +bool DPPLQueueMgr_IsCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef) +{ + return QMgrHelper::isCurrentQueue(QRef); +} /*! * The function sets the global queue, i.e., the sycl::queue object at * QMgrHelper::active_queues[0] vector to the sycl::queue corresponding to the - * specified device type and id. A runtime_error gets thrown if no such device - * exists. + * specified device type and id. If not queue was found for the backend and + * device, Null is returned. */ -void DPPLQueueMgr_SetAsDefaultQueue (DPPLSyclDeviceType DeviceTy, size_t DNum) +__dppl_give DPPLSyclQueueRef +DPPLQueueMgr_SetAsDefaultQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum) { - QMgrHelper::setAsDefaultQueue(DeviceTy, DNum); + return QMgrHelper::setAsDefaultQueue(BETy, DeviceTy, DNum); } /*! * \see QMgrHelper::pushSyclQueue() */ __dppl_give DPPLSyclQueueRef -DPPLQueueMgr_PushQueue (DPPLSyclDeviceType DeviceTy, size_t DNum) +DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum) { - return QMgrHelper::pushSyclQueue(DeviceTy, DNum); + return QMgrHelper::pushSyclQueue(BETy, DeviceTy, DNum); } /*! diff --git a/backends/source/dppl_utils.cpp b/backends/source/dppl_utils.cpp index 4c0e9857dc..b3e4206679 100644 --- a/backends/source/dppl_utils.cpp +++ b/backends/source/dppl_utils.cpp @@ -27,5 +27,5 @@ void DPPLCString_Delete (__dppl_take const char* str) { - delete str; + delete[] str; } diff --git a/backends/tests/test_sycl_kernel_interface.cpp b/backends/tests/test_sycl_kernel_interface.cpp index 5a360dab62..4777e6e654 100644 --- a/backends/tests/test_sycl_kernel_interface.cpp +++ b/backends/tests/test_sycl_kernel_interface.cpp @@ -53,48 +53,60 @@ struct TestDPPLSyclKernelInterface : public ::testing::Test )CLC"; const char *CompileOpts ="-cl-fast-relaxed-math"; - DPPLSyclContextRef CtxRef = nullptr; - DPPLSyclQueueRef QueueRef = nullptr; - DPPLSyclProgramRef PRef = nullptr; - DPPLSyclKernelRef AddKernel = nullptr; - DPPLSyclKernelRef AxpyKernel = nullptr; - + size_t nOpenCLGpuQ = 0; TestDPPLSyclKernelInterface () - { - QueueRef = DPPLQueueMgr_GetQueue(DPPL_GPU, 0); - CtxRef = DPPLQueue_GetContext(QueueRef); - PRef = DPPLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, - CompileOpts); - AddKernel = DPPLProgram_GetKernel(PRef, "add"); - AxpyKernel = DPPLProgram_GetKernel(PRef, "axpy"); - } - - ~TestDPPLSyclKernelInterface () - { - DPPLQueue_Delete(QueueRef); - DPPLContext_Delete(CtxRef); - DPPLProgram_Delete(PRef); - DPPLKernel_Delete(AddKernel); - DPPLKernel_Delete(AxpyKernel); - } + : nOpenCLGpuQ(DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU)) + { } }; TEST_F (TestDPPLSyclKernelInterface, CheckGetFunctionName) { + if(!nOpenCLGpuQ) + GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + + auto QueueRef = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0); + auto CtxRef = DPPLQueue_GetContext(QueueRef); + auto PRef = DPPLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, + CompileOpts); + auto AddKernel = DPPLProgram_GetKernel(PRef, "add"); + auto AxpyKernel = DPPLProgram_GetKernel(PRef, "axpy"); auto fnName1 = DPPLKernel_GetFunctionName(AddKernel); auto fnName2 = DPPLKernel_GetFunctionName(AxpyKernel); + ASSERT_STREQ("add", fnName1); ASSERT_STREQ("axpy", fnName2); + DPPLCString_Delete(fnName1); DPPLCString_Delete(fnName2); + + DPPLQueue_Delete(QueueRef); + DPPLContext_Delete(CtxRef); + DPPLProgram_Delete(PRef); + DPPLKernel_Delete(AddKernel); + DPPLKernel_Delete(AxpyKernel); } TEST_F (TestDPPLSyclKernelInterface, CheckGetNumArgs) { + if(!nOpenCLGpuQ) + GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + + auto QueueRef = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0); + auto CtxRef = DPPLQueue_GetContext(QueueRef); + auto PRef = DPPLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, + CompileOpts); + auto AddKernel = DPPLProgram_GetKernel(PRef, "add"); + auto AxpyKernel = DPPLProgram_GetKernel(PRef, "axpy"); ASSERT_EQ(DPPLKernel_GetNumArgs(AddKernel), 3); ASSERT_EQ(DPPLKernel_GetNumArgs(AxpyKernel), 4); + + DPPLQueue_Delete(QueueRef); + DPPLContext_Delete(CtxRef); + DPPLProgram_Delete(PRef); + DPPLKernel_Delete(AddKernel); + DPPLKernel_Delete(AxpyKernel); } int diff --git a/backends/tests/test_sycl_platform_interface.cpp b/backends/tests/test_sycl_platform_interface.cpp index 3de7029b40..e8037b2466 100644 --- a/backends/tests/test_sycl_platform_interface.cpp +++ b/backends/tests/test_sycl_platform_interface.cpp @@ -29,13 +29,32 @@ struct TestDPPLSyclPlatformInterface : public ::testing::Test { }; - TEST_F (TestDPPLSyclPlatformInterface, CheckGetNumPlatforms) { auto nplatforms = DPPLPlatform_GetNumPlatforms(); EXPECT_GE(nplatforms, 0); } +TEST_F (TestDPPLSyclPlatformInterface, GetNumBackends) +{ + auto nbackends = DPPLPlatform_GetNumBackends(); + EXPECT_GE(nbackends, 0); +} + +TEST_F (TestDPPLSyclPlatformInterface, GetListOfBackends) +{ + auto nbackends = DPPLPlatform_GetNumBackends(); + auto backends = DPPLPlatform_GetListOfBackends(); + EXPECT_TRUE(backends != nullptr); + for(auto i = 0ul; i < nbackends; ++i) { + EXPECT_TRUE( + backends[i] == DPPLSyclBackendType::DPPL_CUDA || + backends[i] == DPPLSyclBackendType::DPPL_OPENCL || + backends[i] == DPPLSyclBackendType::DPPL_LEVEL_ZERO); + } + DPPLPlatform_DeleteListOfBackends(backends); +} + TEST_F (TestDPPLSyclPlatformInterface, CheckDPPLPlatformDumpInfo) { EXPECT_NO_FATAL_FAILURE(DPPLPlatform_DumpInfo()); @@ -44,7 +63,7 @@ TEST_F (TestDPPLSyclPlatformInterface, CheckDPPLPlatformDumpInfo) int main (int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - int ret = RUN_ALL_TESTS(); - return ret; + ::testing::InitGoogleTest(&argc, argv); + int ret = RUN_ALL_TESTS(); + return ret; } diff --git a/backends/tests/test_sycl_program_interface.cpp b/backends/tests/test_sycl_program_interface.cpp index 3b2fff423b..027a81f6c3 100644 --- a/backends/tests/test_sycl_program_interface.cpp +++ b/backends/tests/test_sycl_program_interface.cpp @@ -122,65 +122,78 @@ struct TestDPPLSyclProgramInterface : public ::testing::Test } )CLC"; const char *CompileOpts ="-cl-fast-relaxed-math"; - - DPPLSyclContextRef CtxRef = nullptr; - DPPLSyclQueueRef QueueRef = nullptr; - DPPLSyclProgramRef PRef = nullptr; - DPPLSyclProgramRef PRef2 = nullptr; - - TestDPPLSyclProgramInterface () + std::ifstream spirvFile; + size_t spirvFileSize = 0; + std::vector spirvBuffer; + size_t nOpenCLGpuQ = 0; + + TestDPPLSyclProgramInterface () : + nOpenCLGpuQ(DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU)), + spirvFile{"./multi_kernel.spv", std::ios::binary | std::ios::ate}, + spirvFileSize(std::filesystem::file_size("./multi_kernel.spv")), + spirvBuffer(spirvFileSize) { - QueueRef = DPPLQueueMgr_GetQueue(DPPL_GPU, 0); - CtxRef = DPPLQueue_GetContext(QueueRef); - PRef = DPPLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, - CompileOpts); - - // Create a program from a SPIR-V file - std::ifstream file{"./multi_kernel.spv", - std::ios::binary | std::ios::ate}; - auto fileSize = std::filesystem::file_size("./multi_kernel.spv"); - file.seekg(0, std::ios::beg); - std::vector buffer(fileSize); - file.read(buffer.data(), fileSize); - PRef2 = DPPLProgram_CreateFromOCLSpirv(CtxRef, buffer.data(), - fileSize); + spirvFile.seekg(0, std::ios::beg); + spirvFile.read(spirvBuffer.data(), spirvFileSize); } ~TestDPPLSyclProgramInterface () { - DPPLQueue_Delete(QueueRef); - DPPLContext_Delete(CtxRef); - DPPLProgram_Delete(PRef); - DPPLProgram_Delete(PRef2); + spirvFile.close(); } }; TEST_F (TestDPPLSyclProgramInterface, CheckCreateFromOCLSource) { + if(!nOpenCLGpuQ) + GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + + auto QueueRef = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0); + auto CtxRef = DPPLQueue_GetContext(QueueRef); + auto PRef = DPPLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, + CompileOpts); ASSERT_TRUE(PRef != nullptr); + ASSERT_TRUE(DPPLProgram_HasKernel(PRef, "add")); + ASSERT_TRUE(DPPLProgram_HasKernel(PRef, "axpy")); + + DPPLQueue_Delete(QueueRef); + DPPLContext_Delete(CtxRef); + DPPLProgram_Delete(PRef); } TEST_F (TestDPPLSyclProgramInterface, CheckCreateFromOCLSpirv) { - ASSERT_TRUE(PRef2 != nullptr); -} + if(!nOpenCLGpuQ) + GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); -TEST_F (TestDPPLSyclProgramInterface, CheckHasKernelOCLSource) -{ + auto QueueRef = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0); + auto CtxRef = DPPLQueue_GetContext(QueueRef); + auto PRef = DPPLProgram_CreateFromOCLSpirv(CtxRef, spirvBuffer.data(), + spirvFileSize); + ASSERT_TRUE(PRef != nullptr); ASSERT_TRUE(DPPLProgram_HasKernel(PRef, "add")); ASSERT_TRUE(DPPLProgram_HasKernel(PRef, "axpy")); -} -TEST_F (TestDPPLSyclProgramInterface, CheckHasKernelSpirvSource) -{ - ASSERT_TRUE(DPPLProgram_HasKernel(PRef, "add")); - ASSERT_TRUE(DPPLProgram_HasKernel(PRef, "axpy")); + DPPLQueue_Delete(QueueRef); + DPPLContext_Delete(CtxRef); + DPPLProgram_Delete(PRef); } TEST_F (TestDPPLSyclProgramInterface, CheckGetKernelOCLSource) { + if(!nOpenCLGpuQ) + GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + + auto QueueRef = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0); + auto CtxRef = DPPLQueue_GetContext(QueueRef); + auto PRef = DPPLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, + CompileOpts); auto AddKernel = DPPLProgram_GetKernel(PRef, "add"); auto AxpyKernel = DPPLProgram_GetKernel(PRef, "axpy"); + + ASSERT_TRUE(AddKernel != nullptr); + ASSERT_TRUE(AxpyKernel != nullptr); + auto syclQueue = reinterpret_cast(QueueRef); add_kernel_checker(syclQueue, AddKernel); @@ -188,12 +201,26 @@ TEST_F (TestDPPLSyclProgramInterface, CheckGetKernelOCLSource) DPPLKernel_Delete(AddKernel); DPPLKernel_Delete(AxpyKernel); + DPPLQueue_Delete(QueueRef); + DPPLContext_Delete(CtxRef); + DPPLProgram_Delete(PRef); } TEST_F (TestDPPLSyclProgramInterface, CheckGetKernelOCLSpirv) { - auto AddKernel = DPPLProgram_GetKernel(PRef2, "add"); - auto AxpyKernel = DPPLProgram_GetKernel(PRef2, "axpy"); + if(!nOpenCLGpuQ) + GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + + auto QueueRef = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0); + auto CtxRef = DPPLQueue_GetContext(QueueRef); + auto PRef = DPPLProgram_CreateFromOCLSpirv(CtxRef, spirvBuffer.data(), + spirvFileSize); + auto AddKernel = DPPLProgram_GetKernel(PRef, "add"); + auto AxpyKernel = DPPLProgram_GetKernel(PRef, "axpy"); + + ASSERT_TRUE(AddKernel != nullptr); + ASSERT_TRUE(AxpyKernel != nullptr); + auto syclQueue = reinterpret_cast(QueueRef); add_kernel_checker(syclQueue, AddKernel); @@ -201,12 +228,15 @@ TEST_F (TestDPPLSyclProgramInterface, CheckGetKernelOCLSpirv) DPPLKernel_Delete(AddKernel); DPPLKernel_Delete(AxpyKernel); + DPPLQueue_Delete(QueueRef); + DPPLContext_Delete(CtxRef); + DPPLProgram_Delete(PRef); } int main (int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - int ret = RUN_ALL_TESTS(); - return ret; + ::testing::InitGoogleTest(&argc, argv); + int ret = RUN_ALL_TESTS(); + return ret; } diff --git a/backends/tests/test_sycl_queue_interface.cpp b/backends/tests/test_sycl_queue_interface.cpp index d284799540..9c853ef8aa 100644 --- a/backends/tests/test_sycl_queue_interface.cpp +++ b/backends/tests/test_sycl_queue_interface.cpp @@ -80,28 +80,184 @@ struct TestDPPLSyclQueueInterface : public ::testing::Test )CLC"; const char *CompileOpts ="-cl-fast-relaxed-math"; - DPPLSyclContextRef CtxRef = nullptr; - DPPLSyclQueueRef Queue = nullptr; - DPPLSyclProgramRef PRef = nullptr; - DPPLSyclProgramRef PRef2 = nullptr; TestDPPLSyclQueueInterface () + { } + + ~TestDPPLSyclQueueInterface () + { } +}; + +TEST_F (TestDPPLSyclQueueInterface, CheckAreEq) +{ + auto Q1 = DPPLQueueMgr_GetCurrentQueue(); + auto Q2 = DPPLQueueMgr_GetCurrentQueue(); + EXPECT_TRUE(DPPLQueue_AreEq(Q1, Q2)); + + auto nOclGPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBackendType::DPPL_OPENCL, + DPPLSyclDeviceType::DPPL_GPU); + auto nOclCPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBackendType::DPPL_OPENCL, + DPPLSyclDeviceType::DPPL_CPU); { - Queue = DPPLQueueMgr_GetQueue(DPPL_GPU, 0); - CtxRef = DPPLQueue_GetContext(Queue); - PRef = DPPLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, - CompileOpts); + if(!nOclGPU) + GTEST_SKIP_("No OpenCL GPUs available.\n"); + + auto Def_Q = DPPLQueueMgr_SetAsDefaultQueue( + DPPLSyclBackendType::DPPL_OPENCL, + DPPLSyclDeviceType::DPPL_GPU, + 0 + ); + auto OclGPU_Q0 = DPPLQueueMgr_PushQueue( + DPPLSyclBackendType::DPPL_OPENCL, + DPPLSyclDeviceType::DPPL_GPU, + 0 + ); + auto OclGPU_Q1 = DPPLQueueMgr_PushQueue( + DPPLSyclBackendType::DPPL_OPENCL, + DPPLSyclDeviceType::DPPL_GPU, + 0 + ); + EXPECT_TRUE(DPPLQueue_AreEq(Def_Q, OclGPU_Q0)); + EXPECT_TRUE(DPPLQueue_AreEq(Def_Q, OclGPU_Q1)); + EXPECT_TRUE(DPPLQueue_AreEq(OclGPU_Q0, OclGPU_Q1)); + DPPLQueue_Delete(Def_Q); + DPPLQueue_Delete(OclGPU_Q0); + DPPLQueue_Delete(OclGPU_Q1); + DPPLQueueMgr_PopQueue(); + DPPLQueueMgr_PopQueue(); } - ~TestDPPLSyclQueueInterface () { - DPPLQueue_Delete(Queue); - DPPLContext_Delete(CtxRef); - DPPLProgram_Delete(PRef); + if(!nOclGPU || !nOclCPU) + GTEST_SKIP_("OpenCL GPUs and CPU not available.\n"); + auto GPU_Q = DPPLQueueMgr_PushQueue( + DPPLSyclBackendType::DPPL_OPENCL, + DPPLSyclDeviceType::DPPL_GPU, + 0 + ); + auto CPU_Q = DPPLQueueMgr_PushQueue( + DPPLSyclBackendType::DPPL_OPENCL, + DPPLSyclDeviceType::DPPL_CPU, + 0 + ); + EXPECT_FALSE(DPPLQueue_AreEq(GPU_Q, CPU_Q)); + DPPLQueueMgr_PopQueue(); + DPPLQueueMgr_PopQueue(); } -}; + +} + +TEST_F (TestDPPLSyclQueueInterface, CheckGetBackend) +{ + auto Q1 = DPPLQueueMgr_GetCurrentQueue(); + auto BE = DPPLQueue_GetBackend(Q1); + EXPECT_TRUE((BE == DPPL_OPENCL) || + (BE == DPPL_LEVEL_ZERO) || + (BE == DPPL_CUDA) || + (BE == DPPL_HOST) + ); + DPPLQueue_Delete(Q1); + if(DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU)) { + auto Q = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); + EXPECT_TRUE(DPPLQueue_GetBackend(Q) == DPPL_OPENCL); + DPPLQueue_Delete(Q); + DPPLQueueMgr_PopQueue(); + } + if(DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU)) { + auto Q = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0); + EXPECT_TRUE(DPPLQueue_GetBackend(Q) == DPPL_OPENCL); + DPPLQueue_Delete(Q); + DPPLQueueMgr_PopQueue(); + } + if(DPPLQueueMgr_GetNumQueues(DPPL_LEVEL_ZERO, DPPL_GPU)) { + auto Q = DPPLQueueMgr_PushQueue(DPPL_LEVEL_ZERO, DPPL_GPU, 0); + EXPECT_TRUE(DPPLQueue_GetBackend(Q) == DPPL_LEVEL_ZERO); + DPPLQueue_Delete(Q); + DPPLQueueMgr_PopQueue(); + } +} + +TEST_F (TestDPPLSyclQueueInterface, CheckGetContext) +{ + auto Q1 = DPPLQueueMgr_GetCurrentQueue(); + auto Ctx = DPPLQueue_GetContext(Q1); + ASSERT_TRUE(Ctx != nullptr); + DPPLQueue_Delete(Q1); + DPPLContext_Delete(Ctx); + + if(DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU)) { + auto Q = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); + auto OclGpuCtx = DPPLQueue_GetContext(Q); + ASSERT_TRUE(OclGpuCtx != nullptr); + DPPLQueue_Delete(Q); + DPPLContext_Delete(OclGpuCtx); + DPPLQueueMgr_PopQueue(); + } + if(DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU)) { + auto Q = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0); + auto OclCpuCtx = DPPLQueue_GetContext(Q); + ASSERT_TRUE(OclCpuCtx != nullptr); + DPPLQueue_Delete(Q); + DPPLContext_Delete(OclCpuCtx); + DPPLQueueMgr_PopQueue(); + } + if(DPPLQueueMgr_GetNumQueues(DPPL_LEVEL_ZERO, DPPL_GPU)) { + auto Q = DPPLQueueMgr_PushQueue(DPPL_LEVEL_ZERO, DPPL_GPU, 0); + auto L0Ctx = DPPLQueue_GetContext(Q); + ASSERT_TRUE(Ctx != nullptr); + DPPLQueue_Delete(Q); + DPPLContext_Delete(L0Ctx); + DPPLQueueMgr_PopQueue(); + } +} + +TEST_F (TestDPPLSyclQueueInterface, CheckGetDevice) +{ + auto Q1 = DPPLQueueMgr_GetCurrentQueue(); + auto D = DPPLQueue_GetDevice(Q1); + ASSERT_TRUE(D != nullptr); + DPPLQueue_Delete(Q1); + DPPLDevice_Delete(D); + + if(DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU)) { + auto Q = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); + auto OCLGPU_D = DPPLQueue_GetDevice(Q); + ASSERT_TRUE(OCLGPU_D != nullptr); + EXPECT_TRUE(DPPLDevice_IsGPU(OCLGPU_D)); + DPPLQueue_Delete(Q); + DPPLDevice_Delete(OCLGPU_D); + DPPLQueueMgr_PopQueue(); + } + if(DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU)) { + auto Q = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0); + auto OCLCPU_D = DPPLQueue_GetDevice(Q); + ASSERT_TRUE(OCLCPU_D != nullptr); + EXPECT_TRUE(DPPLDevice_IsCPU(OCLCPU_D)); + DPPLQueue_Delete(Q); + DPPLDevice_Delete(OCLCPU_D); + DPPLQueueMgr_PopQueue(); + } + if(DPPLQueueMgr_GetNumQueues(DPPL_LEVEL_ZERO, DPPL_GPU)) { + auto Q = DPPLQueueMgr_PushQueue(DPPL_LEVEL_ZERO, DPPL_GPU, 0); + auto L0GPU_D = DPPLQueue_GetDevice(Q); + ASSERT_TRUE(L0GPU_D != nullptr); + EXPECT_TRUE(DPPLDevice_IsGPU(L0GPU_D)); + DPPLQueue_Delete(Q); + DPPLDevice_Delete(L0GPU_D); + DPPLQueueMgr_PopQueue(); + } +} TEST_F (TestDPPLSyclQueueInterface, CheckSubmit) { + auto nOpenCLGpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU); + + if(!nOpenCLGpuQ) + GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + + auto Queue = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0); + auto CtxRef = DPPLQueue_GetContext(Queue); + auto PRef = DPPLProgram_CreateFromOCLSource(CtxRef, CLProgramStr, + CompileOpts); ASSERT_TRUE(PRef != nullptr); ASSERT_TRUE(DPPLProgram_HasKernel(PRef, "init_arr")); ASSERT_TRUE(DPPLProgram_HasKernel(PRef, "add")); @@ -181,12 +337,16 @@ TEST_F (TestDPPLSyclQueueInterface, CheckSubmit) DPPLfree_with_queue((DPPLSyclUSMRef)a, Queue); DPPLfree_with_queue((DPPLSyclUSMRef)b, Queue); DPPLfree_with_queue((DPPLSyclUSMRef)c, Queue); + + DPPLQueue_Delete(Queue); + DPPLContext_Delete(CtxRef); + DPPLProgram_Delete(PRef); } int main (int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - int ret = RUN_ALL_TESTS(); - return ret; + ::testing::InitGoogleTest(&argc, argv); + int ret = RUN_ALL_TESTS(); + return ret; } diff --git a/backends/tests/test_sycl_queue_manager.cpp b/backends/tests/test_sycl_queue_manager.cpp index 38faadc217..675fc01c4c 100644 --- a/backends/tests/test_sycl_queue_manager.cpp +++ b/backends/tests/test_sycl_queue_manager.cpp @@ -23,20 +23,24 @@ /// dppl_sycl_queue_interface.h and dppl_sycl_queue_manager.h. /// //===----------------------------------------------------------------------===// +#include "dppl_sycl_context_interface.h" #include "dppl_sycl_device_interface.h" #include "dppl_sycl_queue_interface.h" #include "dppl_sycl_queue_manager.h" #include #include +#include + using namespace std; +using namespace cl::sycl; namespace { void foo (size_t & num) { - auto q1 = DPPLQueueMgr_PushQueue(DPPL_CPU, 0); - auto q2 = DPPLQueueMgr_PushQueue(DPPL_GPU, 0); + auto q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0); + auto q2 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); // Capture the number of active queues in first num = DPPLQueueMgr_GetNumActivatedQueues(); DPPLQueueMgr_PopQueue(); @@ -47,7 +51,7 @@ namespace void bar (size_t & num) { - auto q1 = DPPLQueueMgr_PushQueue(DPPL_GPU, 0); + auto q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); // Capture the number of active queues in second num = DPPLQueueMgr_GetNumActivatedQueues(); DPPLQueueMgr_PopQueue(); @@ -67,39 +71,82 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetCurrentQueue) } -TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetQueue) +TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetOpenCLCpuQ) { - auto numCpuQueues = DPPLQueueMgr_GetNumCPUQueues(); - auto numGpuQueues = DPPLQueueMgr_GetNumGPUQueues(); - if(numCpuQueues > 0) { - EXPECT_TRUE(DPPLQueueMgr_GetQueue(DPPL_CPU, 0) != nullptr); - auto non_existent_device_num = numCpuQueues+1; - try { - DPPLQueueMgr_GetQueue(DPPL_CPU, non_existent_device_num); - FAIL() << "SYCL CPU device " << non_existent_device_num - << "not found on system."; - } - catch (...) { } - } - if(numGpuQueues > 0) { - EXPECT_TRUE(DPPLQueueMgr_GetQueue(DPPL_GPU, 0) != nullptr); - auto non_existent_device_num = numGpuQueues+1; - try { - DPPLQueueMgr_GetQueue(DPPL_GPU, non_existent_device_num); - FAIL() << "SYCL GPU device " << non_existent_device_num - << "not found on system."; - } - catch (...) { } - } + auto nOpenCLCpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU); + if(!nOpenCLCpuQ) + GTEST_SKIP_("Skipping as no OpenCL CPU device found."); + + auto q = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_CPU, 0); + EXPECT_TRUE(q != nullptr); + auto sycl_q = reinterpret_cast(q); + auto be = sycl_q->get_context().get_platform().get_backend(); + EXPECT_EQ(be, backend::opencl); + auto devty = sycl_q->get_device().get_info(); + EXPECT_EQ(devty, info::device_type::cpu); + + auto non_existent_device_num = nOpenCLCpuQ + 1; + // Non-existent device number should return nullptr + auto null_q = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_CPU, + non_existent_device_num); + ASSERT_TRUE(null_q == nullptr); } +TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetOpenCLGpuQ) +{ + auto nOpenCLGpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU); + if(!nOpenCLGpuQ) + GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + + auto q = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0); + EXPECT_TRUE(q != nullptr); + auto sycl_q = reinterpret_cast(q); + auto be = sycl_q->get_context().get_platform().get_backend(); + EXPECT_EQ(be, backend::opencl); + auto devty = sycl_q->get_device().get_info(); + EXPECT_EQ(devty, info::device_type::gpu); + + auto non_existent_device_num = nOpenCLGpuQ + 1; + // Non-existent device number should return nullptr + auto null_q = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, + non_existent_device_num); + ASSERT_TRUE(null_q == nullptr); +} + +TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetLevel0GpuQ) +{ + auto nL0GpuQ = DPPLQueueMgr_GetNumQueues(DPPL_LEVEL_ZERO, DPPL_GPU); + if(!nL0GpuQ) + GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + + auto q = DPPLQueueMgr_GetQueue(DPPL_LEVEL_ZERO, DPPL_GPU, 0); + EXPECT_TRUE(q != nullptr); + auto sycl_q = reinterpret_cast(q); + auto be = sycl_q->get_context().get_platform().get_backend(); + EXPECT_EQ(be, backend::level_zero); + auto devty = sycl_q->get_device().get_info(); + EXPECT_EQ(devty, info::device_type::gpu); + + auto non_existent_device_num = nL0GpuQ + 1; + // Non-existent device number should return nullptr + auto null_q = DPPLQueueMgr_GetQueue(DPPL_LEVEL_ZERO, DPPL_GPU, + non_existent_device_num); + ASSERT_TRUE(null_q == nullptr); +} TEST_F (TestDPPLSyclQueueManager, CheckGetNumActivatedQueues) { size_t num0, num1, num2, num4; + auto nOpenCLCpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU); + auto nOpenCLGpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU); + auto nL0GpuQ = DPPLQueueMgr_GetNumQueues(DPPL_LEVEL_ZERO, DPPL_GPU); + // Add a queue to main thread - auto q = DPPLQueueMgr_PushQueue(DPPL_CPU, 0); + if(!nOpenCLCpuQ || !nOpenCLGpuQ) + GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + + auto q = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0); std::thread first (foo, std::ref(num1)); std::thread second (bar, std::ref(num2)); @@ -123,7 +170,6 @@ TEST_F (TestDPPLSyclQueueManager, CheckGetNumActivatedQueues) DPPLQueue_Delete(q); } - TEST_F (TestDPPLSyclQueueManager, CheckDPPLDumpDeviceInfo) { auto q = DPPLQueueMgr_GetCurrentQueue(); @@ -131,11 +177,26 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLDumpDeviceInfo) EXPECT_NO_FATAL_FAILURE(DPPLQueue_Delete(q)); } +TEST_F (TestDPPLSyclQueueManager, CheckIsCurrentQueue) +{ + if(!DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU)) + GTEST_SKIP_("No OpenCL GPU.\n"); + + auto Q0 = DPPLQueueMgr_GetCurrentQueue(); + EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q0)); + auto Q = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); + EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q)); + EXPECT_FALSE(DPPLQueueMgr_IsCurrentQueue(Q0)); + DPPLQueue_Delete(Q); + DPPLQueueMgr_PopQueue(); + EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q0)); + DPPLQueue_Delete(Q0); +} int main (int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - int ret = RUN_ALL_TESTS(); - return ret; + ::testing::InitGoogleTest(&argc, argv); + int ret = RUN_ALL_TESTS(); + return ret; } diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat index a951f8a1f6..31c40e0d24 100644 --- a/conda-recipe/bld.bat +++ b/conda-recipe/bld.bat @@ -3,7 +3,7 @@ IF ERRORLEVEL 1 exit 1 REM conda uses %ERRORLEVEL% but FPGA scripts can set it. So it should be reseted. set ERRORLEVEL= -set "CC=dpcpp.exe" +set "CC=clang-cl.exe" set "CXX=dpcpp.exe" rmdir /S /Q build_cmake diff --git a/dpctl/_sycl_core.pxd b/dpctl/_sycl_core.pxd index c8860b3c6b..a95e5f28c5 100644 --- a/dpctl/_sycl_core.pxd +++ b/dpctl/_sycl_core.pxd @@ -37,6 +37,7 @@ cdef class SyclContext: @staticmethod cdef SyclContext _create (DPPLSyclContextRef ctxt) + cpdef bool equals (self, SyclContext ctxt) cdef DPPLSyclContextRef get_context_ref (self) @@ -108,6 +109,7 @@ cdef class SyclQueue: @staticmethod cdef SyclQueue _create (DPPLSyclQueueRef qref) + cpdef bool equals (self, SyclQueue q) cpdef SyclContext get_sycl_context (self) cpdef SyclDevice get_sycl_device (self) cdef DPPLSyclQueueRef get_queue_ref (self) diff --git a/dpctl/backend.pxd b/dpctl/backend.pxd index 3f587b77fe..c36a9ffed4 100644 --- a/dpctl/backend.pxd +++ b/dpctl/backend.pxd @@ -33,6 +33,43 @@ from libcpp cimport bool cdef extern from "dppl_utils.h": cdef void DPPLCString_Delete (const char *str) +cdef extern from "dppl_sycl_enum_types.h": + cdef enum _backend_type 'DPPLSyclBackendType': + _OPENCL 'DPPL_OPENCL' + _HOST 'DPPL_HOST' + _LEVEL_ZERO 'DPPL_LEVEL_ZERO' + _CUDA 'DPPL_CUDA' + _UNKNOWN_BACKEND 'DPPL_UNKNOWN_BACKEND' + + ctypedef _backend_type DPPLSyclBackendType + + cdef enum _device_type 'DPPLSyclDeviceType': + _GPU 'DPPL_GPU' + _CPU 'DPPL_CPU' + _ACCELERATOR 'DPPL_ACCELERATOR' + _HOST_DEVICE 'DPPL_HOST_DEVICE' + + ctypedef _device_type DPPLSyclDeviceType + + cdef enum _arg_data_type 'DPPLKernelArgType': + _CHAR 'DPPL_CHAR', + _SIGNED_CHAR 'DPPL_SIGNED_CHAR', + _UNSIGNED_CHAR 'DPPL_UNSIGNED_CHAR', + _SHORT 'DPPL_SHORT', + _INT 'DPPL_INT', + _UNSIGNED_INT 'DPPL_UNSIGNED_INT', + _UNSIGNED_INT8 'DPPL_UNSIGNED_INT8', + _LONG 'DPPL_LONG', + _UNSIGNED_LONG 'DPPL_UNSIGNED_LONG', + _LONG_LONG 'DPPL_LONG_LONG', + _UNSIGNED_LONG_LONG 'DPPL_UNSIGNED_LONG_LONG', + _SIZE_T 'DPPL_SIZE_T', + _FLOAT 'DPPL_FLOAT', + _DOUBLE 'DPPL_DOUBLE', + _LONG_DOUBLE 'DPPL_DOUBLE', + _VOID_PTR 'DPPL_VOID_PTR' + + ctypedef _arg_data_type DPPLKernelArgType cdef extern from "dppl_sycl_types.h": cdef struct DPPLOpaqueSyclContext @@ -52,28 +89,18 @@ cdef extern from "dppl_sycl_types.h": ctypedef DPPLOpaqueSyclUSM* DPPLSyclUSMRef -cdef extern from "dppl_sycl_context_interface.h": - cdef void DPPLContext_Delete (DPPLSyclContextRef CtxtRef) except + - - cdef extern from "dppl_sycl_device_interface.h": - cdef enum _device_type 'DPPLSyclDeviceType': - _GPU 'DPPL_GPU' - _CPU 'DPPL_CPU' - cdef void DPPLDevice_DumpInfo (const DPPLSyclDeviceRef DRef) except + - cdef void DPPLDevice_Delete (DPPLSyclDeviceRef DRef) except + - cdef void DPPLDevice_DumpInfo (const DPPLSyclDeviceRef DRef) except + - cdef bool DPPLDevice_IsAccelerator (const DPPLSyclDeviceRef DRef) except + - cdef bool DPPLDevice_IsCPU (const DPPLSyclDeviceRef DRef) except + - cdef bool DPPLDevice_IsGPU (const DPPLSyclDeviceRef DRef) except + - cdef bool DPPLDevice_IsHost (const DPPLSyclDeviceRef DRef) except + - cdef const char* DPPLDevice_GetDriverInfo (const DPPLSyclDeviceRef DRef) \ - except + - cdef const char* DPPLDevice_GetName (const DPPLSyclDeviceRef DRef) except + - cdef const char* DPPLDevice_GetVendorName (const DPPLSyclDeviceRef DRef) \ - except + - cdef bool DPPLDevice_IsHostUnifiedMemory (const DPPLSyclDeviceRef DRef) \ - except + + cdef void DPPLDevice_DumpInfo (const DPPLSyclDeviceRef DRef) + cdef void DPPLDevice_Delete (DPPLSyclDeviceRef DRef) + cdef void DPPLDevice_DumpInfo (const DPPLSyclDeviceRef DRef) + cdef bool DPPLDevice_IsAccelerator (const DPPLSyclDeviceRef DRef) + cdef bool DPPLDevice_IsCPU (const DPPLSyclDeviceRef DRef) + cdef bool DPPLDevice_IsGPU (const DPPLSyclDeviceRef DRef) + cdef bool DPPLDevice_IsHost (const DPPLSyclDeviceRef DRef) + cdef const char* DPPLDevice_GetDriverInfo (const DPPLSyclDeviceRef DRef) + cdef const char* DPPLDevice_GetName (const DPPLSyclDeviceRef DRef) + cdef const char* DPPLDevice_GetVendorName (const DPPLSyclDeviceRef DRef) + cdef bool DPPLDevice_IsHostUnifiedMemory (const DPPLSyclDeviceRef DRef) cdef extern from "dppl_sycl_event_interface.h": @@ -90,6 +117,17 @@ cdef extern from "dppl_sycl_kernel_interface.h": cdef extern from "dppl_sycl_platform_interface.h": cdef size_t DPPLPlatform_GetNumPlatforms () cdef void DPPLPlatform_DumpInfo () + cdef size_t DPPLPlatform_GetNumBackends () + cdef DPPLSyclBackendType *DPPLPlatform_GetListOfBackends () + cdef void DPPLPlatform_DeleteListOfBackends (DPPLSyclBackendType * BEs) + + +cdef extern from "dppl_sycl_context_interface.h": + cdef bool DPPLContext_AreEq (const DPPLSyclContextRef CtxRef1, + const DPPLSyclContextRef CtxRef2) + cdef DPPLSyclBackendType DPPLContext_GetBackend ( + const DPPLSyclContextRef CtxRef) + cdef void DPPLContext_Delete (DPPLSyclContextRef CtxRef) cdef extern from "dppl_sycl_program_interface.h": @@ -109,24 +147,10 @@ cdef extern from "dppl_sycl_program_interface.h": cdef extern from "dppl_sycl_queue_interface.h": - cdef enum _arg_data_type 'DPPLKernelArgType': - _CHAR 'DPPL_CHAR', - _SIGNED_CHAR 'DPPL_SIGNED_CHAR', - _UNSIGNED_CHAR 'DPPL_UNSIGNED_CHAR', - _SHORT 'DPPL_SHORT', - _INT 'DPPL_INT', - _UNSIGNED_INT 'DPPL_INT', - _LONG 'DPPL_LONG', - _UNSIGNED_LONG 'DPPL_UNSIGNED_LONG', - _LONG_LONG 'DPPL_LONG_LONG', - _UNSIGNED_LONG_LONG 'DPPL_UNSIGNED_LONG_LONG', - _SIZE_T 'DPPL_SIZE_T', - _FLOAT 'DPPL_FLOAT', - _DOUBLE 'DPPL_DOUBLE', - _LONG_DOUBLE 'DPPL_DOUBLE', - _VOID_PTR 'DPPL_VOID_PTR' - ctypedef _arg_data_type DPPLKernelArgType + cdef bool DPPLQueue_AreEq (const DPPLSyclQueueRef QRef1, + const DPPLSyclQueueRef QRef2) cdef void DPPLQueue_Delete (DPPLSyclQueueRef QRef) + cdef DPPLSyclBackendType DPPLQueue_GetBackend (const DPPLSyclQueueRef Q) cdef DPPLSyclContextRef DPPLQueue_GetContext (const DPPLSyclQueueRef Q) cdef DPPLSyclDeviceRef DPPLQueue_GetDevice (const DPPLSyclQueueRef Q) cdef DPPLSyclEventRef DPPLQueue_SubmitRange ( @@ -156,31 +180,32 @@ cdef extern from "dppl_sycl_queue_interface.h": cdef extern from "dppl_sycl_queue_manager.h": - cdef DPPLSyclQueueRef DPPLQueueMgr_GetCurrentQueue () except + - cdef size_t DPPLQueueMgr_GetNumCPUQueues () except + - cdef size_t DPPLQueueMgr_GetNumGPUQueues () except + - cdef size_t DPPLQueueMgr_GetNumActivatedQueues () except + - cdef DPPLSyclQueueRef DPPLQueueMgr_GetQueue (_device_type DTy, - size_t device_num) except + - cdef void DPPLQueueMgr_PopQueue () except + - cdef DPPLSyclQueueRef DPPLQueueMgr_PushQueue (_device_type DTy, - size_t device_num) except + - cdef void DPPLQueueMgr_SetAsDefaultQueue (_device_type DTy, - size_t device_num) except + + cdef DPPLSyclQueueRef DPPLQueueMgr_GetCurrentQueue () + cdef size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy) + cdef size_t DPPLQueueMgr_GetNumActivatedQueues () + cdef DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum) + cdef bool DPPLQueueMgr_IsCurrentQueue (const DPPLSyclQueueRef QRef) + cdef void DPPLQueueMgr_PopQueue () + cdef DPPLSyclQueueRef DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum) + cdef DPPLSyclQueueRef DPPLQueueMgr_SetAsDefaultQueue ( + DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, + size_t DNum + ) cdef extern from "dppl_sycl_usm_interface.h": - cdef DPPLSyclUSMRef DPPLmalloc_shared (size_t size, DPPLSyclQueueRef QRef) \ - except + - cdef DPPLSyclUSMRef DPPLmalloc_host (size_t size, DPPLSyclQueueRef QRef) \ - except + - cdef DPPLSyclUSMRef DPPLmalloc_device (size_t size, DPPLSyclQueueRef QRef) \ - except + - + cdef DPPLSyclUSMRef DPPLmalloc_shared (size_t size, DPPLSyclQueueRef QRef) + cdef DPPLSyclUSMRef DPPLmalloc_host (size_t size, DPPLSyclQueueRef QRef) + cdef DPPLSyclUSMRef DPPLmalloc_device (size_t size, DPPLSyclQueueRef QRef) cdef void DPPLfree_with_queue (DPPLSyclUSMRef MRef, - DPPLSyclQueueRef QRef) except + + DPPLSyclQueueRef QRef) cdef void DPPLfree_with_context (DPPLSyclUSMRef MRef, - DPPLSyclContextRef CRef) except + - + DPPLSyclContextRef CRef) cdef const char* DPPLUSM_GetPointerType (DPPLSyclUSMRef MRef, - DPPLSyclContextRef CRef) except + + DPPLSyclContextRef CRef) diff --git a/dpctl/sycl_core.pyx b/dpctl/sycl_core.pyx index 7a10b9d6d7..aa0b6e354f 100644 --- a/dpctl/sycl_core.pyx +++ b/dpctl/sycl_core.pyx @@ -40,9 +40,22 @@ _logger = logging.getLogger(__name__) class device_type(Enum): gpu = auto() cpu = auto() + accelerator = auto() + host_device = auto() +class backend_type(Enum): + opencl = auto() + level_zero = auto() + cuda = auto() + host = auto() -cdef class UnsupportedDeviceTypeError (Exception): +cdef class UnsupportedBackendError (Exception): + '''This exception is raised when a device type other than CPU or GPU is + encountered. + ''' + pass + +cdef class UnsupportedDeviceError (Exception): '''This exception is raised when a device type other than CPU or GPU is encountered. ''' @@ -66,6 +79,11 @@ cdef class SyclKernelInvalidRangeError (Exception): ''' pass +cdef class SyclQueueCreationError (Exception): + '''This exception is raised when a range that has more than three + dimensions or less than one dimension. + ''' + pass cdef class SyclContext: @@ -78,6 +96,12 @@ cdef class SyclContext: def __dealloc__ (self): DPPLContext_Delete(self._ctxt_ref) + cpdef bool equals (self, SyclContext ctxt): + """ Returns true if the SyclContext argument has the same _context_ref + as this SyclContext. + """ + return DPPLContext_AreEq(self._ctxt_ref, ctxt.get_context_ref()) + cdef DPPLSyclContextRef get_context_ref (self): return self._ctxt_ref @@ -270,9 +294,15 @@ cdef class SyclQueue: elif isinstance(arg, ctypes.c_uint): kargs[idx] = (ctypes.addressof(arg)) kargty[idx] = _arg_data_type._UNSIGNED_INT + elif isinstance(arg, ctypes.c_uint8): + kargs[idx] = (ctypes.addressof(arg)) + kargty[idx] = _arg_data_type._UNSIGNED_INT8 elif isinstance(arg, ctypes.c_long): kargs[idx] = (ctypes.addressof(arg)) kargty[idx] = _arg_data_type._LONG + elif isinstance(arg, ctypes.c_ulong): + kargs[idx] = (ctypes.addressof(arg)) + kargty[idx] = _arg_data_type._UNSIGNED_LONG elif isinstance(arg, ctypes.c_longlong): kargs[idx] = (ctypes.addressof(arg)) kargty[idx] = _arg_data_type._LONG_LONG @@ -320,6 +350,26 @@ cdef class SyclQueue: return ret + cpdef bool equals (self, SyclQueue q): + """ Returns true if the SyclQueue argument has the same _queue_ref + as this SycleQueue. + """ + return DPPLQueue_AreEq(self._queue_ref, q.get_queue_ref()) + + def get_sycl_backend (self): + """ Returns the Sycl bakend associated with the queue. + """ + cdef DPPLSyclBackendType BE = DPPLQueue_GetBackend(self._queue_ref) + if BE == _backend_type._OPENCL: + return backend_type.opencl + elif BE == _backend_type._LEVEL_ZERO: + return backend_type.level_zero + elif BE == _backend_type._HOST: + return backend_type.host + elif BE == _backend_type._CUDA: + return backend_type.cuda + else: + raise ValueError("Unknown backend type.") cpdef SyclContext get_sycl_context (self): return self._context @@ -449,74 +499,158 @@ cdef class SyclQueue: DPPLQueue_Memcpy(self._queue_ref, c_dest, c_src, count) -cdef class _SyclQueueManager: +cdef class _SyclRTManager: ''' Wrapper for the C API's sycl queue manager interface. ''' + cdef dict _backend_str_ty_dict + cdef dict _device_str_ty_dict + cdef dict _backend_enum_ty_dict + cdef dict _device_enum_ty_dict + + def __cinit__ (self): + + self._backend_str_ty_dict = { + "opencl" : _backend_type._OPENCL, + "level0" : _backend_type._LEVEL_ZERO, + } + + self._device_str_ty_dict = { + "gpu" : _device_type._GPU, + "cpu" : _device_type._CPU, + } + + self._backend_enum_ty_dict = { + backend_type.opencl : _backend_type._OPENCL, + backend_type.level_zero : _backend_type._LEVEL_ZERO, + } + + self._device_enum_ty_dict = { + device_type.cpu : _device_type._CPU, + device_type.gpu : _device_type._GPU, + } + + cdef _raise_queue_creation_error (self, str be, str dev, int devid, fname): + e = SyclQueueCreationError( + "Queue creation failed for :", be, dev, devid + ) + e.fname = fname + e.code = -1 + raise e - def _set_as_current_queue (self, device_ty, device_id): + + def _set_as_current_queue (self, backend_ty, device_ty, device_id): cdef DPPLSyclQueueRef queue_ref - if device_ty == device_type.gpu: - queue_ref = DPPLQueueMgr_PushQueue(_device_type._GPU, device_id) - elif device_ty == device_type.cpu: - queue_ref = DPPLQueueMgr_PushQueue(_device_type._CPU, device_id) - else: - e = UnsupportedDeviceTypeError("Device can only be cpu or gpu") - raise e - return SyclQueue._create(queue_ref) + try : + beTy = self._backend_str_ty_dict[backend_ty] + try : + devTy = self._device_str_ty_dict[device_ty] + queue_ref = DPPLQueueMgr_PushQueue(beTy, devTy, device_id) + if queue_ref is NULL: + self._raise_queue_creation_error( + backend_ty, device_ty, device_id, + "DPPLQueueMgr_PushQueue" + ) + return SyclQueue._create(queue_ref) + except KeyError: + raise UnsupportedDeviceError("Device can only be gpu or cpu") + except KeyError: + raise UnsupportedBackendError("Backend can only be opencl or " + "level-0") def _remove_current_queue (self): DPPLQueueMgr_PopQueue() - def has_sycl_platforms (self): - cdef size_t num_platforms = DPPLPlatform_GetNumPlatforms() - if num_platforms: - return True - else: - return False - - def get_num_platforms (self): - ''' Returns the number of available SYCL/OpenCL platforms. + def dump (self): + ''' Prints information about the Runtime object. ''' - return DPPLPlatform_GetNumPlatforms() + DPPLPlatform_DumpInfo() - def get_num_activated_queues (self): - ''' Return the number of currently activated queues for this thread. + def print_available_backends (self): + """ Prints the available backends. + """ + print(self._backend_ty_dict.keys()) + + def get_current_backend (self): + """ Returns the backend for the current queue as `backend_type` enum + """ + return self.get_current_queue().get_sycl_backend() + + def get_current_device_type (self): + ''' Returns current device type as `device_type` enum ''' - return DPPLQueueMgr_GetNumActivatedQueues() + return self.get_current_queue().get_sycl_device().get_device_type() def get_current_queue (self): ''' Returns the activated SYCL queue as a PyCapsule. ''' return SyclQueue._create(DPPLQueueMgr_GetCurrentQueue()) - def set_default_queue (self, device_ty, device_id): - if device_ty == device_type.gpu: - DPPLQueueMgr_SetAsDefaultQueue(_device_type._GPU, device_id) - elif device_ty == device_type.cpu: - DPPLQueueMgr_SetAsDefaultQueue(_device_type._CPU, device_id) - else: - e = UnsupportedDeviceTypeError("Device can only be cpu or gpu") - raise e + def get_num_activated_queues (self): + ''' Return the number of currently activated queues for this thread. + ''' + return DPPLQueueMgr_GetNumActivatedQueues() + + def get_num_platforms (self): + ''' Returns the number of available SYCL/OpenCL platforms. + ''' + return DPPLPlatform_GetNumPlatforms() - def has_gpu_queues (self): - cdef size_t num = DPPLQueueMgr_GetNumGPUQueues() + def get_num_queues (self, backend_ty, device_ty): + cdef size_t num = 0 + try : + beTy = self._backend_enum_ty_dict[backend_ty] + try : + devTy = self._device_enum_ty_dict[device_ty] + num = DPPLQueueMgr_GetNumQueues(beTy, devTy) + except KeyError: + raise UnsupportedDeviceError( + "Device can only be device_type.gpu or device_type.cpu" + ) + except KeyError: + raise UnsupportedBackendError( + "Backend can only be backend_type.opencl or " + "backend_type.level_zero" + ) + + return num + + def has_gpu_queues (self, backend_ty=backend_type.opencl): + cdef size_t num = 0 + try : + beTy = self._backend_enum_ty_dict[backend_ty] + num = DPPLQueueMgr_GetNumQueues(beTy, _device_type._GPU) + except KeyError: + raise UnsupportedBackendError( + "Backend can only be backend_type.opencl or " + "backend_type.level_zero" + ) if num: return True else: return False - def has_cpu_queues (self): - cdef size_t num = DPPLQueueMgr_GetNumCPUQueues() + def has_cpu_queues (self, backend_ty=backend_type.opencl): + cdef size_t num = 0 + try : + beTy = self._backend_enum_ty_dict[backend_ty] + num = DPPLQueueMgr_GetNumQueues(beTy, _device_type._CPU) + except KeyError: + raise UnsupportedBackendError( + "Backend can only be backend_type.opencl or " + "backend_type.level_zero" + ) if num: return True else: return False - def dump (self): - ''' Prints information about the Runtime object. - ''' - DPPLPlatform_DumpInfo() + def has_sycl_platforms (self): + cdef size_t num_platforms = DPPLPlatform_GetNumPlatforms() + if num_platforms: + return True + else: + return False def is_in_device_context (self): cdef size_t num = DPPLQueueMgr_GetNumActivatedQueues() @@ -525,30 +659,42 @@ cdef class _SyclQueueManager: else: return False - def get_current_device_type (self): - ''' Returns current device type as `device_type` enum - ''' - if self.is_in_device_context(): - return self.get_current_queue().get_sycl_device().get_device_type() - else: - return None + def set_default_queue (self, backend_ty, device_ty, device_id): + cdef DPPLSyclQueueRef ret + try : + beTy = self._backend_ty_dict[backend_ty] + try : + devTy = self._device_ty_dict[device_ty] + ret = DPPLQueueMgr_SetAsDefaultQueue(beTy, devTy, device_id) + if ret is NULL: + self._raise_queue_creation_error( + backend_ty, device_ty, device_id, + "DPPLQueueMgr_PushQueue" + ) + + except KeyError: + raise UnsupportedDeviceError("Device can only be gpu or cpu") + except KeyError: + raise UnsupportedBackendError("Backend can only be opencl or " + "level-0") # This private instance of the _SyclQueueManager should not be directly # accessed outside the module. -_qmgr = _SyclQueueManager() +_mgr = _SyclRTManager() # Global bound functions -dump = _qmgr.dump -get_current_queue = _qmgr.get_current_queue -get_current_device_type = _qmgr.get_current_device_type -get_num_platforms = _qmgr.get_num_platforms -get_num_activated_queues = _qmgr.get_num_activated_queues -has_cpu_queues = _qmgr.has_cpu_queues -has_gpu_queues = _qmgr.has_gpu_queues -has_sycl_platforms = _qmgr.has_sycl_platforms -set_default_queue = _qmgr.set_default_queue -is_in_device_context = _qmgr.is_in_device_context +dump = _mgr.dump +get_current_queue = _mgr.get_current_queue +get_current_device_type = _mgr.get_current_device_type +get_num_platforms = _mgr.get_num_platforms +get_num_activated_queues = _mgr.get_num_activated_queues +get_num_queues = _mgr.get_num_queues +has_cpu_queues = _mgr.has_cpu_queues +has_gpu_queues = _mgr.has_gpu_queues +has_sycl_platforms = _mgr.has_sycl_platforms +set_default_queue = _mgr.set_default_queue +is_in_device_context = _mgr.is_in_device_context def create_program_from_source (SyclQueue q, unicode source, unicode copts=""): @@ -565,13 +711,19 @@ def create_program_from_source (SyclQueue q, unicode source, unicode copts=""): copts (unicode) : Optional compilation flags that will be used when compiling the program. - Returns: - program (SyclProgram): A SyclProgram object wrapping the - syc::program returned by the C API. + Returns: + program (SyclProgram): A SyclProgram object wrapping the + syc::program returned by the C API. ''' - cdef DPPLSyclProgramRef Pref + BE = q.get_sycl_backend() + if BE != backend_type.opencl: + raise ValueError( + "Cannot create program for a ", BE, "type backend. Currently only " + "OpenCL devices are supported for program creations." + ) + cdef DPPLSyclProgramRef Pref cdef bytes bSrc = source.encode('utf8') cdef bytes bCOpts = copts.encode('utf8') cdef const char *Src = bSrc @@ -601,6 +753,12 @@ def create_program_from_spirv (SyclQueue q, const unsigned char[:] IL): program (SyclProgram): A SyclProgram object wrapping the syc::program returned by the C API. ''' + BE = q.get_sycl_backend() + if BE != backend_type.opencl: + raise ValueError( + "Cannot create program for a ", BE, "type backend. Currently only " + "OpenCL devices are supported for program creations." + ) cdef DPPLSyclProgramRef Pref cdef const unsigned char *dIL = &IL[0] @@ -616,7 +774,7 @@ def create_program_from_spirv (SyclQueue q, const unsigned char[:] IL): from contextlib import contextmanager @contextmanager -def device_context (dev=device_type.gpu, device_num=0): +def device_context (str queue_str="opencl:gpu:0"): # Create a new device context and add it to the front of the runtime's # deque of active contexts (SyclQueueManager.active_contexts_). # Also return a reference to the context. The behavior allows consumers @@ -625,14 +783,23 @@ def device_context (dev=device_type.gpu, device_num=0): # If set_context is unable to create a new context an exception is raised. try: + attrs = queue_str.split(':') + nattrs = len(attrs) + if (nattrs < 2 or nattrs > 3): + raise ValueError("Invalid device context string. Should be " + "backend:device:device_number or " + "backend:device. In the later case the " + "device_number defaults to 0") + if nattrs == 2: + attrs.append("0") ctxt = None - ctxt = _qmgr._set_as_current_queue(dev, device_num) + ctxt = _mgr._set_as_current_queue(attrs[0], attrs[1], int(attrs[2])) yield ctxt finally: # Code to release resource if ctxt: _logger.debug( "Removing the context from the stack of active contexts") - _qmgr._remove_current_queue() + _mgr._remove_current_queue() else: _logger.debug("No context was created so nothing to do") diff --git a/dpctl/tests/__init__.py b/dpctl/tests/__init__.py index 52c61b29d1..f04131d53a 100644 --- a/dpctl/tests/__init__.py +++ b/dpctl/tests/__init__.py @@ -25,6 +25,7 @@ from .test_dump_functions import * from .test_sycl_kernel_submit import * from .test_sycl_program import * +from .test_sycl_queue import * from .test_sycl_queue_manager import * from .test_sycl_queue_memcpy import * from .test_sycl_usm import * diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index 69b24c7405..fd07de1fee 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -28,7 +28,7 @@ import dpctl._memory as dpctl_mem import numpy as np -@unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available") +@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") class Test1DKernelSubmit (unittest.TestCase): def test_create_program_from_source (self): @@ -37,7 +37,7 @@ def test_create_program_from_source (self): size_t index = get_global_id(0); \ c[index] = d*a[index] + b[index]; \ }" - with dpctl.device_context(dpctl.device_type.gpu, 0): + with dpctl.device_context("opencl:gpu:0"): q = dpctl.get_current_queue() prog = dpctl.create_program_from_source(q, oclSrc) axpyKernel = prog.get_sycl_kernel('axpy') diff --git a/dpctl/tests/test_sycl_program.py b/dpctl/tests/test_sycl_program.py index 1a87de88d3..c59ea631ed 100644 --- a/dpctl/tests/test_sycl_program.py +++ b/dpctl/tests/test_sycl_program.py @@ -27,8 +27,7 @@ import unittest import os - -@unittest.skipIf(not dpctl.has_gpu_queues(), "No SYCL platforms available") +@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") class TestProgramFromOCLSource (unittest.TestCase): def test_create_program_from_source (self): @@ -41,7 +40,7 @@ def test_create_program_from_source (self): size_t index = get_global_id(0); \ c[index] = a[index] + d*b[index]; \ }" - with dpctl.device_context(dpctl.device_type.gpu, 0): + with dpctl.device_context("opencl:gpu:0"): q = dpctl.get_current_queue() prog = dpctl.create_program_from_source(q, oclSrc) self.assertIsNotNone(prog) @@ -58,7 +57,7 @@ def test_create_program_from_source (self): self.assertEqual(axpyKernel.get_num_args(), 4) -@unittest.skipIf(not dpctl.has_gpu_queues(), "No SYCL platforms available") +@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") class TestProgramFromSPRIV (unittest.TestCase): def test_create_program_from_spirv(self): @@ -67,7 +66,7 @@ def test_create_program_from_spirv(self): spirv_file = os.path.join(CURR_DIR, 'input_files/multi_kernel.spv') with open(spirv_file, 'rb') as fin: spirv = fin.read() - with dpctl.device_context(dpctl.device_type.gpu, 0): + with dpctl.device_context("opencl:gpu:0"): q = dpctl.get_current_queue() prog = dpctl.create_program_from_spirv(q,spirv) self.assertIsNotNone(prog) @@ -82,6 +81,48 @@ def test_create_program_from_spirv(self): self.assertEqual(addKernel.get_num_args(), 3) self.assertEqual(axpyKernel.get_num_args(), 4) +@unittest.skipUnless( + dpctl.has_gpu_queues(backend_ty=dpctl.backend_type.level_zero), + "No Level0 GPU queues available" +) +class TestProgramForLevel0GPU (unittest.TestCase): + + def test_create_program_from_spirv(self): + + CURR_DIR = os.path.dirname(os.path.abspath(__file__)) + spirv_file = os.path.join(CURR_DIR, 'input_files/multi_kernel.spv') + with open(spirv_file, 'rb') as fin: + spirv = fin.read() + with dpctl.device_context("level0:gpu:0"): + q = dpctl.get_current_queue() + try: + prog = dpctl.create_program_from_spirv(q,spirv) + self.fail( + "Tried to create program for an unsupported Level0 GPU." + ) + except ValueError: + pass + + def test_create_program_from_source (self): + oclSrc = " \ + kernel void add(global int* a, global int* b, global int* c) { \ + size_t index = get_global_id(0); \ + c[index] = a[index] + b[index]; \ + } \ + kernel void axpy(global int* a, global int* b, global int* c, int d) { \ + size_t index = get_global_id(0); \ + c[index] = a[index] + d*b[index]; \ + }" + with dpctl.device_context("level0:gpu:0"): + q = dpctl.get_current_queue() + try: + prog = dpctl.create_program_from_source(q, oclSrc) + self.fail( + "Tried to create program for an unsupported Level0 GPU." + ) + except ValueError: + pass + if __name__ == '__main__': unittest.main() diff --git a/dpctl/tests/test_sycl_queue.py b/dpctl/tests/test_sycl_queue.py new file mode 100644 index 0000000000..92a0dafab4 --- /dev/null +++ b/dpctl/tests/test_sycl_queue.py @@ -0,0 +1,49 @@ +##===------------- test_sycl_queue.py - dpctl -------*- Python -*---------===## +## +## Data Parallel Control (dpctl) +## +## Copyright 2020 Intel Corporation +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +##===----------------------------------------------------------------------===## +## +## \file +## Defines unit test cases for the SyclQueue classes defined in sycl_core.pyx. +##===----------------------------------------------------------------------===## + +import dpctl +import unittest + +class TestSyclQueue (unittest.TestCase): + @unittest.skipUnless( + dpctl.has_gpu_queues(), "No OpenCL GPU queues available" + ) + @unittest.skipUnless( + dpctl.has_cpu_queues(), "No OpenCL CPU queues available" + ) + def test_queue_not_equals (self): + with dpctl.device_context("opencl:gpu") as gpuQ0: + with dpctl.device_context("opencl:cpu") as cpuQ: + self.assertFalse(cpuQ.equals(gpuQ0)) + + @unittest.skipUnless( + dpctl.has_gpu_queues(), "No OpenCL GPU queues available" + ) + def test_queue_equals (self): + with dpctl.device_context("opencl:gpu") as gpuQ0: + with dpctl.device_context("opencl:gpu") as gpuQ1: + self.assertTrue(gpuQ0.equals(gpuQ1)) + +if __name__ == '__main__': + unittest.main() diff --git a/dpctl/tests/test_sycl_queue_manager.py b/dpctl/tests/test_sycl_queue_manager.py index 1eddef7fcb..0427daf73f 100644 --- a/dpctl/tests/test_sycl_queue_manager.py +++ b/dpctl/tests/test_sycl_queue_manager.py @@ -28,7 +28,7 @@ class TestGetNumPlatforms (unittest.TestCase): @unittest.skipIf(not dpctl.has_sycl_platforms(), - "No SYCL platforms available") + "No SYCL platforms available") def test_dpctl_get_num_platforms (self): if(dpctl.has_sycl_platforms): self.assertGreaterEqual(dpctl.get_num_platforms(), 1) @@ -56,14 +56,22 @@ class TestIsInDeviceContext (unittest.TestCase): def test_is_in_device_context_outside_device_ctxt (self): self.assertFalse(dpctl.is_in_device_context()) + @unittest.skipUnless( + dpctl.has_gpu_queues(), "No OpenCL GPU queues available" + ) def test_is_in_device_context_inside_device_ctxt (self): - with dpctl.device_context(dpctl.device_type.gpu): + with dpctl.device_context("opencl:gpu:0"): self.assertTrue(dpctl.is_in_device_context()) - @unittest.skipIf(not dpctl.has_cpu_queues(), "No CPU platforms available") + @unittest.skipUnless( + dpctl.has_gpu_queues(), "No OpenCL GPU queues available" + ) + @unittest.skipUnless( + dpctl.has_cpu_queues(), "No OpenCL CPU queues available" + ) def test_is_in_device_context_inside_nested_device_ctxt (self): - with dpctl.device_context(dpctl.device_type.cpu): - with dpctl.device_context(dpctl.device_type.gpu): + with dpctl.device_context("opencl:cpu:0"): + with dpctl.device_context("opencl:gpu:0"): self.assertTrue(dpctl.is_in_device_context()) self.assertTrue(dpctl.is_in_device_context()) self.assertFalse(dpctl.is_in_device_context()) @@ -73,28 +81,38 @@ def test_is_in_device_context_inside_nested_device_ctxt (self): class TestIsInDeviceContext (unittest.TestCase): def test_get_current_device_type_outside_device_ctxt (self): - self.assertEqual(dpctl.get_current_device_type(), None) + self.assertNotEqual(dpctl.get_current_device_type(), None) def test_get_current_device_type_inside_device_ctxt (self): - self.assertEqual(dpctl.get_current_device_type(), None) + self.assertNotEqual(dpctl.get_current_device_type(), None) - with dpctl.device_context(dpctl.device_type.gpu): - self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.gpu) + with dpctl.device_context("opencl:gpu:0"): + self.assertEqual( + dpctl.get_current_device_type(), dpctl.device_type.gpu + ) - self.assertEqual(dpctl.get_current_device_type(), None) + self.assertNotEqual(dpctl.get_current_device_type(), None) - @unittest.skipIf(not dpctl.has_cpu_queues(), "No CPU platforms available") + @unittest.skipUnless( + dpctl.has_cpu_queues(), "No OpenCL CPU queues available" + ) def test_get_current_device_type_inside_nested_device_ctxt (self): - self.assertEqual(dpctl.get_current_device_type(), None) + self.assertNotEqual(dpctl.get_current_device_type(), None) - with dpctl.device_context(dpctl.device_type.cpu): - self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.cpu) + with dpctl.device_context("opencl:cpu:0"): + self.assertEqual( + dpctl.get_current_device_type(), dpctl.device_type.cpu + ) - with dpctl.device_context(dpctl.device_type.gpu): - self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.gpu) - self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.cpu) + with dpctl.device_context("opencl:gpu:0"): + self.assertEqual( + dpctl.get_current_device_type(), dpctl.device_type.gpu + ) + self.assertEqual( + dpctl.get_current_device_type(), dpctl.device_type.cpu + ) - self.assertEqual(dpctl.get_current_device_type(), None) + self.assertNotEqual(dpctl.get_current_device_type(), None) @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available") @@ -103,27 +121,36 @@ class TestGetCurrentQueueInMultipleThreads (unittest.TestCase): def test_num_current_queues_outside_with_clause (self): self.assertEqual(dpctl.get_num_activated_queues(), 0) - @unittest.skipIf(not dpctl.has_gpu_queues(), "No GPU platforms available") - @unittest.skipIf(not dpctl.has_cpu_queues(), "No CPU platforms available") + @unittest.skipUnless( + dpctl.has_gpu_queues(), "No OpenCL GPU queues available" + ) + @unittest.skipUnless( + dpctl.has_cpu_queues(), "No OpenCL CPU queues available" + ) def test_num_current_queues_inside_with_clause (self): - with dpctl.device_context(dpctl.device_type.cpu): + with dpctl.device_context("opencl:cpu:0"): self.assertEqual(dpctl.get_num_activated_queues(), 1) - with dpctl.device_context(dpctl.device_type.gpu): + with dpctl.device_context("opencl:gpu:0"): self.assertEqual(dpctl.get_num_activated_queues(), 2) self.assertEqual(dpctl.get_num_activated_queues(), 0) - @unittest.skipIf(not dpctl.has_gpu_queues(), "No GPU platforms available") - @unittest.skipIf(not dpctl.has_cpu_queues(), "No CPU platforms available") + + @unittest.skipUnless( + dpctl.has_gpu_queues(), "No OpenCL GPU queues available" + ) + @unittest.skipUnless( + dpctl.has_cpu_queues(), "No OpenCL CPU queues available" + ) def test_num_current_queues_inside_threads (self): from threading import Thread, local def SessionThread (self): self.assertEqual(dpctl.get_num_activated_queues(), 0) - with dpctl.device_context(dpctl.device_type.gpu): + with dpctl.device_context("opencl:gpu:0"): self.assertEqual(dpctl.get_num_activated_queues(), 1) Session1 = Thread(target=SessionThread(self)) Session2 = Thread(target=SessionThread(self)) - with dpctl.device_context(dpctl.device_type.cpu): + with dpctl.device_context("opencl:cpu:0"): self.assertEqual(dpctl.get_num_activated_queues(), 1) Session1.start() Session2.start() diff --git a/dpctl/tests/test_sycl_usm.py b/dpctl/tests/test_sycl_usm.py index 895a9f5180..6a1d5ddc2f 100644 --- a/dpctl/tests/test_sycl_usm.py +++ b/dpctl/tests/test_sycl_usm.py @@ -47,12 +47,14 @@ def test_memory_without_context (self): # Without context self.assertEqual(mobj._usm_type(), 'shared') - @unittest.skipIf(not dpctl.has_cpu_queues(), "No CPU platforms available") + @unittest.skipUnless( + dpctl.has_cpu_queues(), "No OpenCL CPU queues available" + ) def test_memory_cpu_context (self): mobj = self._create_memory() # CPU context - with dpctl.device_context(dpctl.device_type.cpu): + with dpctl.device_context("opencl:cpu:0"): # type respective to the context in which # memory was created usm_type = mobj._usm_type() @@ -65,12 +67,14 @@ def test_memory_cpu_context (self): # not in the same SYCL context self.assertTrue(usm_type in ['unknown', 'shared']) - @unittest.skipIf(not dpctl.has_gpu_queues(), "No GPU platforms available") + @unittest.skipUnless( + dpctl.has_gpu_queues(), "No OpenCL GPU queues available" + ) def test_memory_gpu_context (self): mobj = self._create_memory() # GPU context - with dpctl.device_context(dpctl.device_type.gpu): + with dpctl.device_context("opencl:gpu:0"): usm_type = mobj._usm_type() self.assertEqual(usm_type, 'shared') current_queue = dpctl.get_current_queue() diff --git a/scripts/build_for_develop.sh b/scripts/build_for_develop.sh index 63057d8099..3d54402846 100755 --- a/scripts/build_for_develop.sh +++ b/scripts/build_for_develop.sh @@ -27,6 +27,7 @@ cmake \ ../backends make V=1 -n -j 4 && make check && make install +#make V=1 -n -j 4 && make install popd cp install/lib/*.so dpctl/ @@ -47,4 +48,4 @@ export CXX=dpcpp export CFLAGS=-fPIC python setup.py clean --all python setup.py build develop -python -m unittest dpctl.tests +python -m unittest -v dpctl.tests