diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.cpp index c83e9e7323036..57956cb64a67d 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.cpp @@ -19,8 +19,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( (void)hDevice; (void)pCommandBufferDesc; (void)phCommandBuffer; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -28,8 +28,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { (void)hCommandBuffer; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -37,8 +37,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { (void)hCommandBuffer; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -46,8 +46,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { (void)hCommandBuffer; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -68,8 +68,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( (void)pSyncPointWaitList; (void)pSyncPoint; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -86,8 +86,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( (void)pSyncPointWaitList; (void)pSyncPoint; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -107,8 +107,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( (void)pSyncPointWaitList; (void)pSyncPoint; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -134,8 +134,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( (void)pSyncPointWaitList; (void)pSyncPoint; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -155,8 +155,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( (void)pSyncPointWaitList; (void)pSyncPoint; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -175,8 +175,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( (void)pSyncPointWaitList; (void)pSyncPoint; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -203,8 +203,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( (void)pSyncPointWaitList; (void)pSyncPoint; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -232,8 +232,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( (void)pSyncPointWaitList; (void)pSyncPoint; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -247,7 +247,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( (void)phEventWaitList; (void)phEvent; - sycl::detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp index 86975e5097257..83264160e7002 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp @@ -72,17 +72,17 @@ std::string getCudaVersionString() { return stream.str(); } -void sycl::detail::ur::die(const char *Message) { +void detail::ur::die(const char *Message) { std::cerr << "ur_die: " << Message << std::endl; std::terminate(); } -void sycl::detail::ur::assertion(bool Condition, const char *Message) { +void detail::ur::assertion(bool Condition, const char *Message) { if (!Condition) die(Message); } -void sycl::detail::ur::cuPrint(const char *Message) { +void detail::ur::cuPrint(const char *Message) { std::cerr << "ur_print: " << Message << std::endl; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp index 5cfa609018b29..82b38c10d449c 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp @@ -8,7 +8,6 @@ #pragma once #include -#include #include ur_result_t mapErrorUR(CUresult Result); @@ -37,8 +36,6 @@ extern thread_local char ErrorMessage[MaxMessageSize]; ur_result_t ErrorCode); /// ------ Error handling, matching OpenCL plugin semantics. -namespace sycl { -__SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { namespace ur { @@ -55,5 +52,3 @@ void assertion(bool Condition, const char *Message = nullptr); } // namespace ur } // namespace detail -} // __SYCL_INLINE_VER_NAMESPACE(_V1) -} // namespace sycl diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/context.cpp index 74a32bdac2748..2b621383da094 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/context.cpp @@ -66,7 +66,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( } case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { int Major = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hContext->getDevice()->get()) == CUDA_SUCCESS); diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp index 52d4e3badc8f1..a81599d629a7a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp @@ -15,7 +15,7 @@ int getAttribute(ur_device_handle_t device, CUdevice_attribute attribute) { int value; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&value, attribute, device->get()) == CUDA_SUCCESS); return value; } @@ -53,11 +53,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: { int ComputeUnits = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&ComputeUnits, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(ComputeUnits >= 0); + detail::ur::assertion(ComputeUnits >= 0); return ReturnValue(static_cast(ComputeUnits)); } case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: { @@ -69,20 +69,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } ReturnSizes; int MaxX = 0, MaxY = 0, MaxZ = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxX, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(MaxX >= 0); + detail::ur::assertion(MaxX >= 0); - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxY, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(MaxY >= 0); + detail::ur::assertion(MaxY >= 0); - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxZ, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(MaxZ >= 0); + detail::ur::assertion(MaxZ >= 0); ReturnSizes.Sizes[0] = size_t(MaxX); ReturnSizes.Sizes[1] = size_t(MaxY); @@ -95,20 +95,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, size_t Sizes[MaxWorkItemDimensions]; } ReturnSizes; int MaxX = 0, MaxY = 0, MaxZ = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxX, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(MaxX >= 0); + detail::ur::assertion(MaxX >= 0); - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxY, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(MaxY >= 0); + detail::ur::assertion(MaxY >= 0); - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxZ, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(MaxZ >= 0); + detail::ur::assertion(MaxZ >= 0); ReturnSizes.Sizes[0] = size_t(MaxX); ReturnSizes.Sizes[1] = size_t(MaxY); @@ -118,12 +118,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: { int MaxWorkGroupSize = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxWorkGroupSize, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(MaxWorkGroupSize >= 0); + detail::ur::assertion(MaxWorkGroupSize >= 0); return ReturnValue(size_t(MaxWorkGroupSize)); } @@ -172,14 +172,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { // Number of sub-groups = max block size / warp size + possible remainder int MaxThreads = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxThreads, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hDevice->get()) == CUDA_SUCCESS); int WarpSize = 0; - sycl::detail::ur::assertion( - cuDeviceGetAttribute(&WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, - hDevice->get()) == CUDA_SUCCESS); + detail::ur::assertion(cuDeviceGetAttribute(&WarpSize, + CU_DEVICE_ATTRIBUTE_WARP_SIZE, + hDevice->get()) == CUDA_SUCCESS); int MaxWarps = (MaxThreads + WarpSize - 1) / WarpSize; return ReturnValue(MaxWarps); } @@ -187,7 +187,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // Volta provides independent thread scheduling // TODO: Revisit for previous generation GPUs int Major = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()) == CUDA_SUCCESS); @@ -197,7 +197,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ATOMIC_64: { int Major = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()) == CUDA_SUCCESS); @@ -214,7 +214,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { int Major = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()) == CUDA_SUCCESS); @@ -255,7 +255,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_BFLOAT16: { int Major = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()) == CUDA_SUCCESS); @@ -266,18 +266,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { // NVIDIA devices only support one sub-group size (the warp size) int WarpSize = 0; - sycl::detail::ur::assertion( - cuDeviceGetAttribute(&WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, - hDevice->get()) == CUDA_SUCCESS); + detail::ur::assertion(cuDeviceGetAttribute(&WarpSize, + CU_DEVICE_ATTRIBUTE_WARP_SIZE, + hDevice->get()) == CUDA_SUCCESS); size_t Sizes[1] = {static_cast(WarpSize)}; return ReturnValue(Sizes, 1); } case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: { int ClockFreq = 0; - sycl::detail::ur::assertion( - cuDeviceGetAttribute(&ClockFreq, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, - hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(ClockFreq >= 0); + detail::ur::assertion(cuDeviceGetAttribute(&ClockFreq, + CU_DEVICE_ATTRIBUTE_CLOCK_RATE, + hDevice->get()) == CUDA_SUCCESS); + detail::ur::assertion(ClockFreq >= 0); return ReturnValue(static_cast(ClockFreq) / 1000u); } case UR_DEVICE_INFO_ADDRESS_BITS: { @@ -292,8 +292,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // CL_DEVICE_TYPE_CUSTOM. size_t Global = 0; - sycl::detail::ur::assertion(cuDeviceTotalMem(&Global, hDevice->get()) == - CUDA_SUCCESS); + detail::ur::assertion(cuDeviceTotalMem(&Global, hDevice->get()) == + CUDA_SUCCESS); auto QuarterGlobal = static_cast(Global / 4u); @@ -308,7 +308,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, if (std::getenv("SYCL_PI_CUDA_ENABLE_IMAGE_SUPPORT") != nullptr) { Enabled = true; } else { - sycl::detail::ur::cuPrint( + detail::ur::cuPrint( "Images are not fully supported by the CUDA BE, their support is " "disabled by default. Their partial support can be activated by " "setting SYCL_PI_CUDA_ENABLE_IMAGE_SUPPORT environment variable at " @@ -332,17 +332,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: { // Take the smaller of maximum surface and maximum texture height. int TexHeight = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&TexHeight, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(TexHeight >= 0); + detail::ur::assertion(TexHeight >= 0); int SurfHeight = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&SurfHeight, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(SurfHeight >= 0); + detail::ur::assertion(SurfHeight >= 0); int Min = std::min(TexHeight, SurfHeight); @@ -351,17 +351,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: { // Take the smaller of maximum surface and maximum texture width. int TexWidth = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&TexWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(TexWidth >= 0); + detail::ur::assertion(TexWidth >= 0); int SurfWidth = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&SurfWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(SurfWidth >= 0); + detail::ur::assertion(SurfWidth >= 0); int Min = std::min(TexWidth, SurfWidth); @@ -370,17 +370,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: { // Take the smaller of maximum surface and maximum texture height. int TexHeight = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&TexHeight, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(TexHeight >= 0); + detail::ur::assertion(TexHeight >= 0); int SurfHeight = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&SurfHeight, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(SurfHeight >= 0); + detail::ur::assertion(SurfHeight >= 0); int Min = std::min(TexHeight, SurfHeight); @@ -389,17 +389,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: { // Take the smaller of maximum surface and maximum texture width. int TexWidth = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&TexWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(TexWidth >= 0); + detail::ur::assertion(TexWidth >= 0); int SurfWidth = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&SurfWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(SurfWidth >= 0); + detail::ur::assertion(SurfWidth >= 0); int Min = std::min(TexWidth, SurfWidth); @@ -408,17 +408,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: { // Take the smaller of maximum surface and maximum texture depth. int TexDepth = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&TexDepth, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(TexDepth >= 0); + detail::ur::assertion(TexDepth >= 0); int SurfDepth = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&SurfDepth, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(SurfDepth >= 0); + detail::ur::assertion(SurfDepth >= 0); int Min = std::min(TexDepth, SurfDepth); @@ -427,17 +427,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: { // Take the smaller of maximum surface and maximum texture width. int TexWidth = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&TexWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(TexWidth >= 0); + detail::ur::assertion(TexWidth >= 0); int SurfWidth = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&SurfWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(SurfWidth >= 0); + detail::ur::assertion(SurfWidth >= 0); int Min = std::min(TexWidth, SurfWidth); @@ -459,7 +459,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: { int MemBaseAddrAlign = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MemBaseAddrAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, hDevice->get()) == CUDA_SUCCESS); @@ -504,27 +504,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: { int CacheSize = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(CacheSize >= 0); + detail::ur::assertion(CacheSize >= 0); // The L2 cache is global to the GPU. return ReturnValue(static_cast(CacheSize)); } case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: { size_t Bytes = 0; // Runtime API has easy access to this value, driver API info is scarse. - sycl::detail::ur::assertion(cuDeviceTotalMem(&Bytes, hDevice->get()) == - CUDA_SUCCESS); + detail::ur::assertion(cuDeviceTotalMem(&Bytes, hDevice->get()) == + CUDA_SUCCESS); return ReturnValue(uint64_t{Bytes}); } case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: { int ConstantMemory = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&ConstantMemory, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(ConstantMemory >= 0); + detail::ur::assertion(ConstantMemory >= 0); return ReturnValue(static_cast(ConstantMemory)); } @@ -542,30 +542,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // CUDA has its own definition of "local memory", which maps to OpenCL's // "private memory". int LocalMemSize = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&LocalMemSize, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(LocalMemSize >= 0); + detail::ur::assertion(LocalMemSize >= 0); return ReturnValue(static_cast(LocalMemSize)); } case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: { int ECCEnabled = 0; - sycl::detail::ur::assertion( - cuDeviceGetAttribute(&ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, - hDevice->get()) == CUDA_SUCCESS); + detail::ur::assertion(cuDeviceGetAttribute(&ECCEnabled, + CU_DEVICE_ATTRIBUTE_ECC_ENABLED, + hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion((ECCEnabled == 0) | (ECCEnabled == 1)); + detail::ur::assertion((ECCEnabled == 0) | (ECCEnabled == 1)); auto Result = static_cast(ECCEnabled); return ReturnValue(Result); } case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: { int IsIntegrated = 0; - sycl::detail::ur::assertion( - cuDeviceGetAttribute(&IsIntegrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, - hDevice->get()) == CUDA_SUCCESS); + detail::ur::assertion(cuDeviceGetAttribute(&IsIntegrated, + CU_DEVICE_ATTRIBUTE_INTEGRATED, + hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion((IsIntegrated == 0) | (IsIntegrated == 1)); + detail::ur::assertion((IsIntegrated == 0) | (IsIntegrated == 1)); auto result = static_cast(IsIntegrated); return ReturnValue(result); } @@ -620,9 +620,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_NAME: { static constexpr size_t MaxDeviceNameLength = 256u; char Name[MaxDeviceNameLength]; - sycl::detail::ur::assertion( - cuDeviceGetName(Name, MaxDeviceNameLength, hDevice->get()) == - CUDA_SUCCESS); + detail::ur::assertion(cuDeviceGetName(Name, MaxDeviceNameLength, + hDevice->get()) == CUDA_SUCCESS); return ReturnValue(Name, strlen(Name) + 1); } case UR_DEVICE_INFO_VENDOR: { @@ -641,13 +640,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_VERSION: { std::stringstream SS; int Major; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()) == CUDA_SUCCESS); SS << Major; int Minor; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()) == CUDA_SUCCESS); @@ -666,11 +665,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, int Major = 0; int Minor = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()) == CUDA_SUCCESS); @@ -847,27 +846,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GLOBAL_MEM_FREE: { size_t FreeMemory = 0; size_t TotalMemory = 0; - sycl::detail::ur::assertion(cuMemGetInfo(&FreeMemory, &TotalMemory) == - CUDA_SUCCESS, - "failed cuMemGetInfo() API."); + detail::ur::assertion(cuMemGetInfo(&FreeMemory, &TotalMemory) == + CUDA_SUCCESS, + "failed cuMemGetInfo() API."); return ReturnValue(FreeMemory); } case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: { int Value = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Value, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(Value >= 0); + detail::ur::assertion(Value >= 0); // Convert kilohertz to megahertz when returning. return ReturnValue(Value / 1000); } case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: { int Value = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Value, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(Value >= 0); + detail::ur::assertion(Value >= 0); return ReturnValue(Value); } case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { @@ -875,20 +874,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_DEVICE_ID: { int Value = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Value, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion(Value >= 0); + detail::ur::assertion(Value >= 0); return ReturnValue(Value); } case UR_DEVICE_INFO_UUID: { CUuuid UUID; #if (CUDA_VERSION >= 11040) - sycl::detail::ur::assertion(cuDeviceGetUuid_v2(&UUID, hDevice->get()) == - CUDA_SUCCESS); + detail::ur::assertion(cuDeviceGetUuid_v2(&UUID, hDevice->get()) == + CUDA_SUCCESS); #else - sycl::detail::ur::assertion(cuDeviceGetUuid(&UUID, hDevice->get()) == - CUDA_SUCCESS); + detail::ur::assertion(cuDeviceGetUuid(&UUID, hDevice->get()) == + CUDA_SUCCESS); #endif std::array Name; std::copy(UUID.bytes, UUID.bytes + 16, Name.begin()); @@ -896,13 +895,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: { int Major = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()) == CUDA_SUCCESS); int Minor = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()) == CUDA_SUCCESS); @@ -918,7 +917,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } else if (IsOrinAGX) { MemoryClockKHz = 3200000; } else { - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MemoryClockKHz, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, hDevice->get()) == CUDA_SUCCESS); @@ -928,7 +927,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, if (IsOrinAGX) { MemoryBusWidth = 256; } else { - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MemoryBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, hDevice->get()) == CUDA_SUCCESS); @@ -973,7 +972,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, &MaxRegisters, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, hDevice->get())); - sycl::detail::ur::assertion(MaxRegisters >= 0); + detail::ur::assertion(MaxRegisters >= 0); return ReturnValue(static_cast(MaxRegisters)); } @@ -984,12 +983,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_PCI_ADDRESS: { constexpr size_t AddressBufferSize = 13; char AddressBuffer[AddressBufferSize]; - sycl::detail::ur::assertion( - cuDeviceGetPCIBusId(AddressBuffer, AddressBufferSize, hDevice->get()) == - CUDA_SUCCESS); + detail::ur::assertion(cuDeviceGetPCIBusId(AddressBuffer, AddressBufferSize, + hDevice->get()) == CUDA_SUCCESS); // CUDA API (8.x - 12.1) guarantees 12 bytes + \0 are written - sycl::detail::ur::assertion(strnlen(AddressBuffer, AddressBufferSize) == - 12); + detail::ur::assertion(strnlen(AddressBuffer, AddressBufferSize) == 12); return ReturnValue(AddressBuffer, strnlen(AddressBuffer, AddressBufferSize - 1) + 1); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp index 1cfc5cc40a4a6..792f69092682e 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp @@ -794,7 +794,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) { case CU_AD_FORMAT_FLOAT: return 4; default: - sycl::detail::ur::die("Invalid image format."); + detail::ur::die("Invalid image format."); return 0; } } diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/event.cpp index 8916197b73f1c..066c0498f1d0d 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/event.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/event.cpp @@ -119,7 +119,7 @@ ur_result_t ur_event_handle_t_::record() { try { EventID = Queue->getNextEventID(); if (EventID == 0) { - sycl::detail::ur::die( + detail::ur::die( "Unrecoverable program state reached in event identifier overflow"); } Result = UR_CHECK_ERROR(cuEventRecord(EvEnd, Stream)); @@ -182,7 +182,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, case UR_EVENT_INFO_CONTEXT: return ReturnValue(hEvent->getContext()); default: - sycl::detail::ur::die("Event info request not implemented"); + detail::ur::die("Event info request not implemented"); } return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -213,7 +213,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( default: break; } - sycl::detail::ur::die("Event Profiling info request not implemented"); + detail::ur::die("Event Profiling info request not implemented"); return {}; } @@ -221,7 +221,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t, ur_execution_info_t, ur_event_callback_t, void *) { - sycl::detail::ur::die("Event Callback not implemented in CUDA adapter"); + detail::ur::die("Event Callback not implemented in CUDA adapter"); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -254,8 +254,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { const auto RefCount = hEvent->incrementReferenceCount(); - sycl::detail::ur::assertion( - RefCount != 0, "Reference count overflow detected in urEventRetain."); + detail::ur::assertion(RefCount != 0, + "Reference count overflow detected in urEventRetain."); return UR_RESULT_SUCCESS; } @@ -265,9 +265,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { // double delete or someone is messing with the ref count. // either way, cannot safely proceed. - sycl::detail::ur::assertion( - hEvent->getReferenceCount() != 0, - "Reference count overflow detected in urEventRelease."); + detail::ur::assertion(hEvent->getReferenceCount() != 0, + "Reference count overflow detected in urEventRelease."); // decrement ref count. If it is 0, delete the event. if (hEvent->decrementReferenceCount() == 0) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp index 358f59c499e17..7d46ce039bab8 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp @@ -73,24 +73,24 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, size_t GlobalWorkSize[3] = {0, 0, 0}; int MaxBlockDimX{0}, MaxBlockDimY{0}, MaxBlockDimZ{0}; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxBlockDimX, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxBlockDimY, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxBlockDimZ, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, hDevice->get()) == CUDA_SUCCESS); int MaxGridDimX{0}, MaxGridDimY{0}, MaxGridDimZ{0}; - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxGridDimX, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxGridDimY, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, hDevice->get()) == CUDA_SUCCESS); - sycl::detail::ur::assertion( + detail::ur::assertion( cuDeviceGetAttribute(&MaxGridDimZ, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, hDevice->get()) == CUDA_SUCCESS); @@ -101,7 +101,7 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { int MaxThreads = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuFuncGetAttribute(&MaxThreads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hKernel->get()) == CUDA_SUCCESS); return ReturnValue(size_t(MaxThreads)); @@ -122,7 +122,7 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { // OpenCL LOCAL == CUDA SHARED int Bytes = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuFuncGetAttribute(&Bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, hKernel->get()) == CUDA_SUCCESS); return ReturnValue(uint64_t(Bytes)); @@ -130,17 +130,17 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { // Work groups should be multiples of the warp size int WarpSize = 0; - sycl::detail::ur::assertion( - cuDeviceGetAttribute(&WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, - hDevice->get()) == CUDA_SUCCESS); + detail::ur::assertion(cuDeviceGetAttribute(&WarpSize, + CU_DEVICE_ATTRIBUTE_WARP_SIZE, + hDevice->get()) == CUDA_SUCCESS); return ReturnValue(static_cast(WarpSize)); } case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { // OpenCL PRIVATE == CUDA LOCAL int Bytes = 0; - sycl::detail::ur::assertion( - cuFuncGetAttribute(&Bytes, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, - hKernel->get()) == CUDA_SUCCESS); + detail::ur::assertion(cuFuncGetAttribute(&Bytes, + CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, + hKernel->get()) == CUDA_SUCCESS); return ReturnValue(uint64_t(Bytes)); } default: @@ -231,9 +231,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, return ReturnValue(""); case UR_KERNEL_INFO_NUM_REGS: { int NumRegs = 0; - sycl::detail::ur::assertion( - cuFuncGetAttribute(&NumRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, - hKernel->get()) == CUDA_SUCCESS); + detail::ur::assertion(cuFuncGetAttribute(&NumRegs, + CU_FUNC_ATTRIBUTE_NUM_REGS, + hKernel->get()) == CUDA_SUCCESS); return ReturnValue(static_cast(NumRegs)); } default: @@ -254,15 +254,15 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: { // Sub-group size is equivalent to warp size int WarpSize = 0; - sycl::detail::ur::assertion( - cuDeviceGetAttribute(&WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, - hDevice->get()) == CUDA_SUCCESS); + detail::ur::assertion(cuDeviceGetAttribute(&WarpSize, + CU_DEVICE_ATTRIBUTE_WARP_SIZE, + hDevice->get()) == CUDA_SUCCESS); return ReturnValue(static_cast(WarpSize)); } case UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS: { // Number of sub-groups = max block size / warp size + possible remainder int MaxThreads = 0; - sycl::detail::ur::assertion( + detail::ur::assertion( cuFuncGetAttribute(&MaxThreads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hKernel->get()) == CUDA_SUCCESS); int WarpSize = 0; diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.cpp index b19acea3159f2..f0c2765794762 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.cpp @@ -162,8 +162,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { // error for which it is unclear if the function that reported it succeeded // or not. Either way, the state of the program is compromised and likely // unrecoverable. - sycl::detail::ur::die( - "Unrecoverable program state reached in urMemRelease"); + detail::ur::die("Unrecoverable program state reached in urMemRelease"); } return UR_RESULT_SUCCESS; @@ -331,7 +330,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( PixelTypeSizeBytes = 4; break; default: - sycl::detail::ur::die( + detail::ur::die( "urMemImageCreate given unsupported image_channel_data_type"); } diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/queue.cpp index 05443eeed89df..32391fec5c136 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/queue.cpp @@ -265,7 +265,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( else if (CuFlags == CU_STREAM_NON_BLOCKING) Flags = UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM; else - sycl::detail::ur::die("Unknown cuda stream"); + detail::ur::die("Unknown cuda stream"); std::vector ComputeCuStreams(1, CuStream); std::vector TransferCuStreams(0); diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp index 36ec89fb9da3c..836e47f988e50 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp @@ -73,7 +73,7 @@ urSamplerRelease(ur_sampler_handle_t hSampler) { // double delete or someone is messing with the ref count. // either way, cannot safely proceed. - sycl::detail::ur::assertion( + detail::ur::assertion( hSampler->getReferenceCount() != 0, "Reference count overflow detected in urSamplerRelease.");