diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 315a4ea81b..e5f01c2f4c 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -164,7 +164,7 @@ jobs: matrix: adapter: [ {name: CUDA, triplet: nvptx64-nvidia-cuda}, - {name: HIP, triplet: spir64}, # should be amdgcn-amdhsa, but build scripts for device binaries are currently broken for this target. + {name: HIP, triplet: amdgcn-amd-amdhsa}, {name: L0, triplet: spir64} ] build_type: [Debug, Release] @@ -198,6 +198,8 @@ jobs: -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib -DUR_CONFORMANCE_TARGET_TRIPLES=${{matrix.adapter.triplet}} + ${{ matrix.adapter.name == 'HIP' && '-DAMD_ARCH=gfx1030' || '' }} + ${{ matrix.adapter.name == 'HIP' && '-DUR_HIP_PLATFORM=AMD' || '' }} - name: Build # This is so that device binaries can find the sycl runtime library diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 5761f24e0a..c752c3fd14 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -121,7 +121,10 @@ ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size, for (auto &UnmappedFlag : UnmappedMemAdviceFlags) { if (URAdviceFlags & UnmappedFlag) { - throw UR_RESULT_ERROR_INVALID_ENUMERATION; + setErrorMessage("Memory advice ignored because the CUDA backend does not " + "support some of the specified flags", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } } @@ -1355,7 +1358,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_queue_handle_t hQueue, const void *pMem, size_t size, ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - unsigned int PointerRangeSize = 0; + std::ignore = flags; + + size_t PointerRangeSize = 0; UR_CHECK_ERROR(cuPointerGetAttribute( &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem)); UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); @@ -1363,7 +1368,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( // Certain cuda devices and Windows do not have support for some Unified // Memory features. cuMemPrefetchAsync requires concurrent memory access - // for managed memory. Therfore, ignore prefetch hint if concurrent managed + // for managed memory. Therefore, ignore prefetch hint if concurrent managed // memory access is not available. if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) { setErrorMessage("Prefetch hint ignored as device does not support " @@ -1381,10 +1386,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } - // flags is currently unused so fail if set - if (flags != 0) - return UR_RESULT_ERROR_INVALID_VALUE; - ur_result_t Result = UR_RESULT_SUCCESS; std::unique_ptr EventPtr{nullptr}; @@ -1415,7 +1416,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { - unsigned int PointerRangeSize = 0; + size_t PointerRangeSize = 0; UR_CHECK_ERROR(cuPointerGetAttribute( &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem)); UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index e868793319..bee94d00a6 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -165,6 +165,42 @@ ur_result_t getKernelNames(ur_program_handle_t) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +/// Loads images from a list of PTX or CUBIN binaries. +/// Note: No calls to CUDA driver API in this function, only store binaries +/// for later. +/// +/// Note: Only supports one device +/// +ur_result_t createProgram(ur_context_handle_t hContext, + ur_device_handle_t hDevice, size_t size, + const uint8_t *pBinary, + const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram) { + UR_ASSERT(hContext->getDevice()->get() == hDevice->get(), + UR_RESULT_ERROR_INVALID_CONTEXT); + UR_ASSERT(size, UR_RESULT_ERROR_INVALID_SIZE); + + std::unique_ptr RetProgram{ + new ur_program_handle_t_{hContext}}; + + if (pProperties) { + if (pProperties->count > 0 && pProperties->pMetadatas == nullptr) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } else if (pProperties->count == 0 && pProperties->pMetadatas != nullptr) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + UR_CHECK_ERROR( + RetProgram->setMetadata(pProperties->pMetadatas, pProperties->count)); + } + + auto pBinary_string = reinterpret_cast(pBinary); + + UR_CHECK_ERROR(RetProgram->setBinary(pBinary_string, size)); + *phProgram = RetProgram.release(); + + return UR_RESULT_SUCCESS; +} + /// CUDA will handle the PTX/CUBIN binaries internally through CUmodule object. /// So, urProgramCreateWithIL and urProgramCreateWithBinary are equivalent in /// terms of CUDA adapter. See \ref urProgramCreateWithBinary. @@ -175,8 +211,8 @@ urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, ur_device_handle_t hDevice = hContext->getDevice(); auto pBinary = reinterpret_cast(pIL); - return urProgramCreateWithBinary(hContext, hDevice, length, pBinary, - pProperties, phProgram); + return createProgram(hContext, hDevice, length, pBinary, pProperties, + phProgram); } /// CUDA will handle the PTX/CUBIN binaries internally through a call to @@ -185,7 +221,9 @@ urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - return urProgramBuild(hContext, hProgram, pOptions); + UR_CHECK_ERROR(urProgramBuild(hContext, hProgram, pOptions)); + hProgram->BinaryType = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; + return UR_RESULT_SUCCESS; } /// Loads the images from a UR program into a CUmodule that can be @@ -202,6 +240,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext, ScopedContext Active(hProgram->getContext()); hProgram->buildProgram(pOptions); + hProgram->BinaryType = UR_PROGRAM_BINARY_TYPE_EXECUTABLE; } catch (ur_result_t Err) { Result = Err; @@ -241,6 +280,7 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, RetProgram->setBinary(static_cast(CuBin), CuBinSize); Result = RetProgram->buildProgram(pOptions); + RetProgram->BinaryType = UR_PROGRAM_BINARY_TYPE_EXECUTABLE; } catch (...) { // Upon error attempt cleanup UR_CHECK_ERROR(cuLinkDestroy(State)); @@ -287,6 +327,9 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, return ReturnValue(hProgram->BuildOptions.c_str()); case UR_PROGRAM_BUILD_INFO_LOG: return ReturnValue(hProgram->InfoLog, hProgram->MaxLogSize); + case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: { + return ReturnValue(hProgram->BinaryType); + } default: break; } @@ -384,44 +427,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( return UR_RESULT_SUCCESS; } -/// Loads images from a list of PTX or CUBIN binaries. -/// Note: No calls to CUDA driver API in this function, only store binaries -/// for later. -/// -/// Note: Only supports one device -/// UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, const uint8_t *pBinary, const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { - UR_ASSERT(hContext->getDevice()->get() == hDevice->get(), - UR_RESULT_ERROR_INVALID_CONTEXT); - UR_ASSERT(size, UR_RESULT_ERROR_INVALID_SIZE); - ur_result_t Result = UR_RESULT_SUCCESS; + UR_CHECK_ERROR( + createProgram(hContext, hDevice, size, pBinary, pProperties, phProgram)); + (*phProgram)->BinaryType = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; - std::unique_ptr RetProgram{ - new ur_program_handle_t_{hContext}}; - - if (pProperties) { - if (pProperties->count > 0 && pProperties->pMetadatas == nullptr) { - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - } else if (pProperties->count == 0 && pProperties->pMetadatas != nullptr) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - Result = - RetProgram->setMetadata(pProperties->pMetadatas, pProperties->count); - } - UR_ASSERT(Result == UR_RESULT_SUCCESS, Result); - - auto pBinary_string = reinterpret_cast(pBinary); - - Result = RetProgram->setBinary(pBinary_string, size); - UR_ASSERT(Result == UR_RESULT_SUCCESS, Result); - - *phProgram = RetProgram.release(); - - return Result; + return UR_RESULT_SUCCESS; } // This entry point is only used for native specialization constants (SPIR-V), diff --git a/source/adapters/cuda/program.hpp b/source/adapters/cuda/program.hpp index e27c2d8863..30131a4120 100644 --- a/source/adapters/cuda/program.hpp +++ b/source/adapters/cuda/program.hpp @@ -25,6 +25,12 @@ struct ur_program_handle_t_ { std::atomic_uint32_t RefCount; ur_context_handle_t Context; + /* The ur_program_binary_type_t property is defined individually for every + * device in a program. However, since the CUDA adapter only has 1 device per + * context / program, there is no need to keep track of its value for each + * device. */ + ur_program_binary_type_t BinaryType = UR_PROGRAM_BINARY_TYPE_NONE; + // Metadata std::unordered_map> KernelReqdWorkGroupSizeMD; diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 7cec6def8b..5b473c050e 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -210,7 +210,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(uint64_t{MaxAlloc}); } case UR_DEVICE_INFO_IMAGE_SUPPORTED: { - return ReturnValue(uint32_t{true}); + return ReturnValue(true); } case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: { // This call doesn't match to HIP as it doesn't have images, but instead @@ -218,6 +218,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // but some searching found as of SM 2.x 128 are supported. return ReturnValue(128u); } + case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: { + // This call doesn't match to HIP as it doesn't have images, but instead + // surfaces and textures. No clear call in the HIP API to determine this, + // but some searching found as of SM 2.x 128 are supported. + return ReturnValue(128u); + } case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: { // This call doesn't match to HIP as it doesn't have images, but instead // surfaces and textures. No clear call in the HIP API to determine this, @@ -339,7 +345,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(0u); } case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { - uint64_t Config = + ur_device_fp_capability_flags_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | @@ -350,12 +356,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(Config); } case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { - uint64_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | - UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | - UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | - UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | - UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | - UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + ur_device_fp_capability_flags_t Config = + UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | + UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | + UR_DEVICE_FP_CAPABILITY_FLAG_FMA; return ReturnValue(Config); } case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: { @@ -459,14 +466,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: { // The mandated minimum capability: - uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE | - UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; + ur_queue_flags_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE | + UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; return ReturnValue(Capability); } case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: case UR_DEVICE_INFO_QUEUE_PROPERTIES: { // The mandated minimum capability: - uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE; + ur_queue_flags_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE; return ReturnValue(Capability); } case UR_DEVICE_INFO_BUILT_IN_KERNELS: { @@ -730,9 +737,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - uint64_t Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | - UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | - UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; + ur_memory_order_capability_flags_t Capabilities = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; return ReturnValue(Capabilities); } case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: @@ -821,7 +829,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: case UR_DEVICE_INFO_BFLOAT16: - return UR_RESULT_ERROR_INVALID_ENUMERATION; + case UR_DEVICE_INFO_IL_VERSION: + case UR_DEVICE_INFO_ASYNC_BARRIER: + case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; default: break; @@ -939,15 +950,6 @@ ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice, if (pDeviceTimestamp) { UR_CHECK_ERROR(hipEventCreateWithFlags(&Event, hipEventDefault)); UR_CHECK_ERROR(hipEventRecord(Event)); - } - if (pHostTimestamp) { - using namespace std::chrono; - *pHostTimestamp = - duration_cast(steady_clock::now().time_since_epoch()) - .count(); - } - - if (pDeviceTimestamp) { UR_CHECK_ERROR(hipEventSynchronize(Event)); float ElapsedTime = 0.0f; UR_CHECK_ERROR(hipEventElapsedTime(&ElapsedTime, @@ -955,5 +957,11 @@ ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice, *pDeviceTimestamp = (uint64_t)(ElapsedTime * (double)1e6); } + if (pHostTimestamp) { + using namespace std::chrono; + *pHostTimestamp = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/hip/kernel.cpp b/source/adapters/hip/kernel.cpp index 936589401e..642743ddbf 100644 --- a/source/adapters/hip/kernel.cpp +++ b/source/adapters/hip/kernel.cpp @@ -22,8 +22,12 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, ScopedContext Active(hProgram->getContext()->getDevice()); hipFunction_t HIPFunc; - UR_CHECK_ERROR( - hipModuleGetFunction(&HIPFunc, hProgram->get(), pKernelName)); + hipError_t KernelError = + hipModuleGetFunction(&HIPFunc, hProgram->get(), pKernelName); + if (KernelError == hipErrorNotFound) { + return UR_RESULT_ERROR_INVALID_KERNEL_NAME; + } + UR_CHECK_ERROR(KernelError); std::string KernelNameWoffset = std::string(pKernelName) + "_with_offset"; hipFunction_t HIPFuncWithOffsetParam; @@ -321,3 +325,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( const ur_kernel_native_properties_t *, ur_kernel_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetSpecializationConstants( + [[maybe_unused]] ur_kernel_handle_t hKernel, + [[maybe_unused]] uint32_t count, + [[maybe_unused]] const ur_specialization_constant_info_t *pSpecConstants) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index 41cb2b94d0..3083d47744 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -11,6 +11,29 @@ #include "memory.hpp" #include "context.hpp" #include +#include + +namespace { + +size_t GetHipFormatPixelSize(hipArray_Format Format) { + switch (Format) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + case HIP_AD_FORMAT_SIGNED_INT8: + return 1; + case HIP_AD_FORMAT_UNSIGNED_INT16: + case HIP_AD_FORMAT_SIGNED_INT16: + case HIP_AD_FORMAT_HALF: + return 2; + case HIP_AD_FORMAT_UNSIGNED_INT32: + case HIP_AD_FORMAT_SIGNED_INT32: + case HIP_AD_FORMAT_FLOAT: + return 4; + default: + detail::ur::die("Invalid HIP format specifier"); + } +} + +} // namespace /// Decreases the reference count of the Mem object. /// If this is zero, calls the relevant HIP Free function @@ -234,7 +257,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UR_ASSERT(MemInfoType <= UR_MEM_INFO_CONTEXT, UR_RESULT_ERROR_INVALID_ENUMERATION); - UR_ASSERT(hMemory->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); @@ -243,9 +265,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, switch (MemInfoType) { case UR_MEM_INFO_SIZE: { try { - size_t AllocSize = 0; - UR_CHECK_ERROR(hipMemGetAddressRange( - nullptr, &AllocSize, std::get(hMemory->Mem).Ptr)); + const auto MemVisitor = [](auto &&Mem) -> size_t { + using T = std::decay_t; + if constexpr (std::is_same_v) { + size_t AllocSize = 0; + hipDeviceptr_t BasePtr = nullptr; + UR_CHECK_ERROR(hipMemGetAddressRange(&BasePtr, &AllocSize, Mem.Ptr)); + return AllocSize; + } else if constexpr (std::is_same_v) { + HIP_ARRAY3D_DESCRIPTOR ArrayDescriptor; + UR_CHECK_ERROR(hipArray3DGetDescriptor(&ArrayDescriptor, Mem.Array)); + const auto PixelSizeBytes = + GetHipFormatPixelSize(ArrayDescriptor.Format) * + ArrayDescriptor.NumChannels; + const auto ImageSizeBytes = + PixelSizeBytes * + (ArrayDescriptor.Width ? ArrayDescriptor.Width : 1) * + (ArrayDescriptor.Height ? ArrayDescriptor.Height : 1) * + (ArrayDescriptor.Depth ? ArrayDescriptor.Depth : 1); + return ImageSizeBytes; + } else { + static_assert(ur_always_false_t, "Not exhaustive visitor!"); + } + }; + + const auto AllocSize = std::visit(MemVisitor, hMemory->Mem); return ReturnValue(AllocSize); } catch (ur_result_t Err) { return Err; @@ -481,11 +525,91 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( return Result; } -/// \TODO Not implemented -UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t, - ur_image_info_t, size_t, - void *, size_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, + ur_image_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + UR_ASSERT(hMemory->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); + ScopedContext Active(hMemory->getContext()->getDevice()); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + try { + + HIP_ARRAY3D_DESCRIPTOR ArrayInfo; + UR_CHECK_ERROR(hipArray3DGetDescriptor( + &ArrayInfo, std::get(hMemory->Mem).Array)); + + const auto hip2urFormat = + [](hipArray_Format HipFormat) -> ur_image_channel_type_t { + switch (HipFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + return UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8; + case HIP_AD_FORMAT_UNSIGNED_INT16: + return UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16; + case HIP_AD_FORMAT_UNSIGNED_INT32: + return UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; + case HIP_AD_FORMAT_SIGNED_INT8: + return UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8; + case HIP_AD_FORMAT_SIGNED_INT16: + return UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16; + case HIP_AD_FORMAT_SIGNED_INT32: + return UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32; + case HIP_AD_FORMAT_HALF: + return UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT; + case HIP_AD_FORMAT_FLOAT: + return UR_IMAGE_CHANNEL_TYPE_FLOAT; + + default: + detail::ur::die("Invalid Hip format specified."); + } + }; + + const auto hipFormatToElementSize = + [](hipArray_Format HipFormat) -> size_t { + switch (HipFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + case HIP_AD_FORMAT_SIGNED_INT8: + return 1; + case HIP_AD_FORMAT_UNSIGNED_INT16: + case HIP_AD_FORMAT_SIGNED_INT16: + case HIP_AD_FORMAT_HALF: + return 2; + case HIP_AD_FORMAT_UNSIGNED_INT32: + case HIP_AD_FORMAT_SIGNED_INT32: + case HIP_AD_FORMAT_FLOAT: + return 4; + default: + detail::ur::die("Invalid Hip format specified."); + } + }; + + switch (propName) { + case UR_IMAGE_INFO_FORMAT: + return ReturnValue(ur_image_format_t{UR_IMAGE_CHANNEL_ORDER_RGBA, + hip2urFormat(ArrayInfo.Format)}); + case UR_IMAGE_INFO_WIDTH: + return ReturnValue(ArrayInfo.Width); + case UR_IMAGE_INFO_HEIGHT: + return ReturnValue(ArrayInfo.Height); + case UR_IMAGE_INFO_DEPTH: + return ReturnValue(ArrayInfo.Depth); + case UR_IMAGE_INFO_ELEMENT_SIZE: + return ReturnValue(hipFormatToElementSize(ArrayInfo.Format)); + case UR_IMAGE_INFO_ROW_PITCH: + case UR_IMAGE_INFO_SLICE_PITCH: + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { diff --git a/source/adapters/hip/platform.cpp b/source/adapters/hip/platform.cpp index 139c59b89c..5f35b55f1f 100644 --- a/source/adapters/hip/platform.cpp +++ b/source/adapters/hip/platform.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "platform.hpp" +#include "context.hpp" hipEvent_t ur_platform_handle_t_::EvBase{nullptr}; @@ -90,6 +91,16 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, PlatformIds[i].Devices.emplace_back( new ur_device_handle_t_{Device, Context, &PlatformIds[i]}); } + + // Setup EvBase + { + ScopedContext Active(PlatformIds.front().Devices.front().get()); + hipEvent_t EvBase; + UR_CHECK_ERROR(hipEventCreate(&EvBase)); + UR_CHECK_ERROR(hipEventRecord(EvBase, 0)); + + ur_platform_handle_t_::EvBase = EvBase; + } } catch (const std::bad_alloc &) { // Signal out-of-memory situation for (int i = 0; i < NumDevices; ++i) { diff --git a/source/adapters/hip/sampler.cpp b/source/adapters/hip/sampler.cpp index 840eb1a1b0..5a177d6a9f 100644 --- a/source/adapters/hip/sampler.cpp +++ b/source/adapters/hip/sampler.cpp @@ -80,3 +80,17 @@ ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler) { return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( + [[maybe_unused]] ur_native_handle_t hNativeSampler, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] const ur_sampler_native_properties_t *pProperties, + [[maybe_unused]] ur_sampler_handle_t *phSampler) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urSamplerGetNativeHandle([[maybe_unused]] ur_sampler_handle_t hSampler, + [[maybe_unused]] ur_native_handle_t *phNativeSampler) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 0e8ad3c605..26292b9528 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -38,11 +38,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnCreateWithNativeHandle = nullptr; + pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle; pDdiTable->pfnGet = urPlatformGet; pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; pDdiTable->pfnGetInfo = urPlatformGetInfo; - pDdiTable->pfnGetNativeHandle = nullptr; + pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; return UR_RESULT_SUCCESS; } @@ -123,7 +123,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( pDdiTable->pfnSetArgSampler = urKernelSetArgSampler; pDdiTable->pfnSetArgValue = urKernelSetArgValue; pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; - pDdiTable->pfnSetSpecializationConstants = nullptr; + pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants; return UR_RESULT_SUCCESS; } @@ -134,9 +134,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } pDdiTable->pfnCreate = urSamplerCreate; - pDdiTable->pfnCreateWithNativeHandle = nullptr; + pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; pDdiTable->pfnGetInfo = urSamplerGetInfo; - pDdiTable->pfnGetNativeHandle = nullptr; + pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle; pDdiTable->pfnRelease = urSamplerRelease; pDdiTable->pfnRetain = urSamplerRetain; return UR_RESULT_SUCCESS; diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 8276d10048..a73f348b52 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -293,4 +293,6 @@ inline ur_result_t exceptionToResult(std::exception_ptr eptr) { } } +template inline constexpr bool ur_always_false_t = false; + #endif /* UR_UTIL_H */ diff --git a/test/conformance/device/device_adapter_hip.match b/test/conformance/device/device_adapter_hip.match index 478659b4e9..711bfe1224 100644 --- a/test/conformance/device/device_adapter_hip.match +++ b/test/conformance/device/device_adapter_hip.match @@ -1,25 +1,5 @@ -{{OPT}}urDeviceCreateWithNativeHandleTest.Success -{{OPT}}urDeviceGetTest.InvalidValueNumEntries -{{OPT}}urDeviceGetGlobalTimestampTest.Success +urDeviceCreateWithNativeHandleTest.Success +urDeviceGetTest.InvalidValueNumEntries {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_SINGLE_FP_CONFIG -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_DOUBLE_FP_CONFIG -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_QUEUE_PROPERTIES -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_IMAGE_SUPPORTED -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_IL_VERSION -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_SUPPORTED_PARTITIONS -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_PARTITION_TYPE -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_GPU_EU_COUNT -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_GPU_EU_SLICES -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_BFLOAT16 -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_ASYNC_BARRIER -{{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT +urDeviceGetInfoTest.Success/UR_DEVICE_INFO_SUPPORTED_PARTITIONS +urDeviceGetInfoTest.Success/UR_DEVICE_INFO_PARTITION_TYPE diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 10925b964f..9d19a34b93 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -18,9 +18,20 @@ macro(add_device_binary SOURCE_FILE) endif() foreach(TRIPLE ${TARGET_TRIPLES}) set(EXE_PATH "${DEVICE_BINARY_DIR}/${KERNEL_NAME}_${TRIPLE}") + if(${TRIPLE} MATCHES "amd") + set(AMD_TARGET_BACKEND -Xsycl-target-backend=${TRIPLE}) + set(AMD_OFFLOAD_ARCH --offload-arch=${AMD_ARCH}) + set(AMD_NOGPULIB -nogpulib) + endif() + # images are not yet supported in sycl on AMD + if(${TRIPLE} MATCHES "amd" AND ${KERNEL_NAME} MATCHES "image_copy") + continue() + endif() add_custom_command(OUTPUT ${EXE_PATH} - COMMAND ${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off + COMMAND ${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off + ${AMD_TARGET_BACKEND} ${AMD_OFFLOAD_ARCH} ${AMD_NOGPULIB} ${SOURCE_FILE} -o ${EXE_PATH} + COMMAND ${CMAKE_COMMAND} -E env ${EXTRA_ENV} SYCL_DUMP_IMAGES=true ${EXE_PATH} || exit 0 WORKING_DIRECTORY "${DEVICE_BINARY_DIR}" diff --git a/test/conformance/enqueue/enqueue_adapter_hip.match b/test/conformance/enqueue/enqueue_adapter_hip.match index 7a1c0d5b8e..9d48681c1a 100644 --- a/test/conformance/enqueue/enqueue_adapter_hip.match +++ b/test/conformance/enqueue/enqueue_adapter_hip.match @@ -1 +1,87 @@ -Segmentation fault +{{OPT}}Segmentation Fault +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitInvalidEvent/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitInvalidEvent/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___copy_row_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___copy_3d_2d +{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemBufferFillTest.Success/AMD_HIP_BACKEND___{{.*}}___size__256__patternSize__256 +{{OPT}}urEnqueueMemBufferFillTest.Success/AMD_HIP_BACKEND___{{.*}}___size__1024__patternSize__256 +{{OPT}}urEnqueueMemBufferMapTest.SuccessMultiMaps/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemBufferReadTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemBufferWriteTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___write_row_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___write_3d_2d +{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageCopyTest.Success/AMD_HIP_BACKEND___{{.*}}___1D +{{OPT}}urEnqueueMemImageCopyTest.Success/AMD_HIP_BACKEND___{{.*}}___2D +{{OPT}}urEnqueueMemImageCopyTest.Success/AMD_HIP_BACKEND___{{.*}}___3D +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/AMD_HIP_BACKEND___{{.*}}___1D +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/AMD_HIP_BACKEND___{{.*}}___2D +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/AMD_HIP_BACKEND___{{.*}}___3D +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/AMD_HIP_BACKEND___{{.*}}___1D +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/AMD_HIP_BACKEND___{{.*}}___2D +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/AMD_HIP_BACKEND___{{.*}}___3D +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/AMD_HIP_BACKEND___{{.*}}___1D +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/AMD_HIP_BACKEND___{{.*}}___2D +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/AMD_HIP_BACKEND___{{.*}}___3D +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/AMD_HIP_BACKEND___{{.*}}___1D +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/AMD_HIP_BACKEND___{{.*}}___3D +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/AMD_HIP_BACKEND___{{.*}}___1D +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/AMD_HIP_BACKEND___{{.*}}___3D +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/AMD_HIP_BACKEND___{{.*}}___1D +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/AMD_HIP_BACKEND___{{.*}}___3D +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/AMD_HIP_BACKEND___{{.*}}___1D +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/AMD_HIP_BACKEND___{{.*}}___3D +{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___1D +{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___2D +{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___3D +{{OPT}}urEnqueueMemImageReadTest.Success1D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageReadTest.Success3D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin1D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin2D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin3D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageReadTest.InvalidRegion1D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageReadTest.InvalidRegion2D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageReadTest.InvalidRegion3D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageWriteTest.Success1D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageWriteTest.Success3D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin1D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin2D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin3D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion1D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion2D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion3D/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__1__width__1__height__1__patternSize__1 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__256__height__1__patternSize__256 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__256__height__1__patternSize__4 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__57__height__1__patternSize__1 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__1024__height__1__patternSize__256 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__1024__height__1__patternSize__1024 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__256__height__256__patternSize__1 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__256__height__256__patternSize__256 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__256__height__256__patternSize__65536 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__234__width__233__height__1__patternSize__1 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__234__width__233__height__35__patternSize__1 +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__256__height__35__patternSize__128 +{{OPT}}urEnqueueUSMFill2DNegativeTest.OutOfBounds/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__1__width__1__height__1 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__256__height__1 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__1024__height__1 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__256__height__256 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__234__width__233__height__23 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__234__width__233__height__1 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__1__width__1__height__1 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__256__height__1 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__1024__height__1 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__1024__width__256__height__256 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__234__width__233__height__23 +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/AMD_HIP_BACKEND___{{.*}}___pitch__234__width__233__height__1 +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/AMD_HIP_BACKEND___{{.*}}___pitch__1__width__1__height__1 +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/AMD_HIP_BACKEND___{{.*}}___pitch__1__width__1__height__1 +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___pitch__1__width__1__height__1 +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/AMD_HIP_BACKEND___{{.*}}___pitch__1__width__1__height__1 +{{OPT}}urEnqueueUSMPrefetchWithParamTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT +{{OPT}}urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/AMD_HIP_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT +{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/kernel/kernel_adapter_hip.match b/test/conformance/kernel/kernel_adapter_hip.match index 7a1c0d5b8e..96d579f088 100644 --- a/test/conformance/kernel/kernel_adapter_hip.match +++ b/test/conformance/kernel/kernel_adapter_hip.match @@ -1 +1,25 @@ -Segmentation fault +{{OPT}}Segmentation Fault +{{OPT}}urKernelGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_REGS +{{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_FUNCTION_NAME +{{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_ARGS +{{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_REFERENCE_COUNT +{{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_CONTEXT +{{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_PROGRAM +{{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_ATTRIBUTES +{{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_REGS +{{OPT}}urKernelSetArgLocalTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetArgPointerTest.SuccessShared/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetArgPointerNegativeTest.InvalidNullHandleKernel/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetArgSamplerTest.Success/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetArgSamplerTest.InvalidNullHandleKernel/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetArgSamplerTest.InvalidNullHandleArgValue/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetArgValueTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetArgValueTest.InvalidKernelArgumentSize/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetExecInfoUSMPointersTest.SuccessShared/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetSpecializationConstantsTest.Success/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urKernelSetSpecializationConstantsTest.InvalidSizeCount/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/kernel/urKernelSetArgSampler.cpp b/test/conformance/kernel/urKernelSetArgSampler.cpp index 4a044383ff..814b79a153 100644 --- a/test/conformance/kernel/urKernelSetArgSampler.cpp +++ b/test/conformance/kernel/urKernelSetArgSampler.cpp @@ -7,6 +7,14 @@ struct urKernelSetArgSamplerTest : uur::urKernelTest { void SetUp() { + // Images and samplers are not available on AMD + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + if (backend == UR_PLATFORM_BACKEND_HIP) { + GTEST_SKIP() << "Sampler are not supported on hip."; + } + program_name = "image_copy"; UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp()); ur_sampler_desc_t sampler_desc = { diff --git a/test/conformance/memory/memory_adapter_cuda.match b/test/conformance/memory/memory_adapter_cuda.match index 35c3504444..3b3da22161 100644 --- a/test/conformance/memory/memory_adapter_cuda.match +++ b/test/conformance/memory/memory_adapter_cuda.match @@ -3,6 +3,8 @@ urMemGetInfoTest.InvalidNullPointerParamValue/NVIDIA_CUDA_BACKEND___{{.*}}___UR_ urMemGetInfoTest.InvalidNullPointerParamValue/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT urMemGetInfoTest.InvalidNullPointerPropSizeRet/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_SIZE urMemGetInfoTest.InvalidNullPointerPropSizeRet/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT +urMemGetInfoImageTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_SIZE +urMemGetInfoImageTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT {{OPT}}urMemImageGetInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_IMAGE_INFO_FORMAT {{OPT}}urMemImageGetInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE {{OPT}}urMemImageGetInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_IMAGE_INFO_ROW_PITCH diff --git a/test/conformance/memory/memory_adapter_hip.match b/test/conformance/memory/memory_adapter_hip.match index 63dc9cd3cc..c6d4bdacfe 100644 --- a/test/conformance/memory/memory_adapter_hip.match +++ b/test/conformance/memory/memory_adapter_hip.match @@ -1,2 +1,9 @@ urMemBufferCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}}_ -Segmentation fault +urMemGetInfoTest.InvalidNullPointerParamValue/AMD_HIP_BACKEND___{{.*}}___UR_MEM_INFO_SIZE +urMemGetInfoTest.InvalidNullPointerParamValue/AMD_HIP_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT +urMemGetInfoTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}}___UR_MEM_INFO_SIZE +urMemGetInfoTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}}___UR_MEM_INFO_CONTEXT +{{OPT}}urMemImageCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_IMAGE_INFO_ROW_PITCH +{{OPT}}urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH +{{OPT}}Segmentation fault diff --git a/test/conformance/memory/memory_adapter_level_zero.match b/test/conformance/memory/memory_adapter_level_zero.match index 64b91fb9f3..00b085926f 100644 --- a/test/conformance/memory/memory_adapter_level_zero.match +++ b/test/conformance/memory/memory_adapter_level_zero.match @@ -6,4 +6,5 @@ urMemGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_ov urMemGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT urMemGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE urMemGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT +urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE {{Segmentation fault|Aborted}} diff --git a/test/conformance/memory/urMemGetInfo.cpp b/test/conformance/memory/urMemGetInfo.cpp index 355c2c009d..fcfcd429c5 100644 --- a/test/conformance/memory/urMemGetInfo.cpp +++ b/test/conformance/memory/urMemGetInfo.cpp @@ -2,18 +2,20 @@ // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include #include #include using urMemGetInfoTest = uur::urMemBufferTestWithParam; +static constexpr std::array mem_info_values{ + UR_MEM_INFO_SIZE, UR_MEM_INFO_CONTEXT}; static std::unordered_map mem_info_size_map = { {UR_MEM_INFO_SIZE, sizeof(size_t)}, {UR_MEM_INFO_CONTEXT, sizeof(ur_context_handle_t)}, }; -UUR_TEST_SUITE_P(urMemGetInfoTest, - ::testing::Values(UR_MEM_INFO_SIZE, UR_MEM_INFO_CONTEXT), +UUR_TEST_SUITE_P(urMemGetInfoTest, ::testing::ValuesIn(mem_info_values), uur::deviceTestWithParamPrinter); TEST_P(urMemGetInfoTest, Success) { @@ -71,3 +73,35 @@ TEST_P(urMemGetInfoTest, InvalidNullPointerPropSizeRet) { urMemGetInfo(buffer, UR_MEM_INFO_SIZE, 0, nullptr, nullptr), UR_RESULT_ERROR_INVALID_SIZE); } + +using urMemGetInfoImageTest = uur::urMemImageTestWithParam; +UUR_TEST_SUITE_P(urMemGetInfoImageTest, ::testing::ValuesIn(mem_info_values), + uur::deviceTestWithParamPrinter); + +TEST_P(urMemGetInfoImageTest, Success) { + ur_mem_info_t info = getParam(); + size_t size; + ASSERT_SUCCESS(urMemGetInfo(image, info, 0, nullptr, &size)); + ASSERT_NE(size, 0); + + if (const auto expected_size = mem_info_size_map.find(info); + expected_size != mem_info_size_map.end()) { + ASSERT_EQ(expected_size->second, size); + } + + std::vector info_data(size); + ASSERT_SUCCESS(urMemGetInfo(image, info, size, info_data.data(), nullptr)); + + if (info == UR_MEM_INFO_SIZE) { + const size_t ExpectedPixelSize = sizeof(float) * 4 /*NumChannels*/; + const size_t ExpectedImageSize = ExpectedPixelSize * desc.arraySize * + desc.width * desc.height * desc.depth; + const size_t ImageSizeBytes = + *reinterpret_cast(info_data.data()); + ASSERT_EQ(ImageSizeBytes, ExpectedImageSize); + } else if (info == UR_MEM_INFO_CONTEXT) { + ur_context_handle_t InfoContext = + *reinterpret_cast(info_data.data()); + ASSERT_EQ(InfoContext, context); + } +} diff --git a/test/conformance/platform/platform_adapter_hip.match b/test/conformance/platform/platform_adapter_hip.match index efd19f8b27..df63fbef05 100644 --- a/test/conformance/platform/platform_adapter_hip.match +++ b/test/conformance/platform/platform_adapter_hip.match @@ -1,4 +1 @@ urPlatformGetTest.InvalidNumEntries -urPlatformGetNativeHandleTest.Success -urPlatformGetNativeHandleTest.InvalidNullHandlePlatform -urPlatformGetNativeHandleTest.InvalidNullPointerNativePlatform diff --git a/test/conformance/program/program_adapter_hip.match b/test/conformance/program/program_adapter_hip.match index 7a1c0d5b8e..67f98ec2f7 100644 --- a/test/conformance/program/program_adapter_hip.match +++ b/test/conformance/program/program_adapter_hip.match @@ -1 +1,25 @@ -Segmentation fault +{{OPT}}Segmentation Fault +{{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urProgramGetBuildInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_BINARY_TYPE +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_STATUS +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_OPTIONS +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_LOG +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_BINARY_TYPE +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_STATUS +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_OPTIONS +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_LOG +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_BINARY_TYPE +{{OPT}}urProgramGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_NUM_KERNELS +{{OPT}}urProgramGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_KERNEL_NAMES +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_REFERENCE_COUNT +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_CONTEXT +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_NUM_DEVICES +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_DEVICES +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_SOURCE +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_BINARY_SIZES +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_BINARIES +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_NUM_KERNELS +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_KERNEL_NAMES +{{OPT}}urProgramLinkTest.Success/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urProgramSetSpecializationConstantsTest.Success/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/queue/queue_adapter_hip.match b/test/conformance/queue/queue_adapter_hip.match index 16166a827c..46a22304cd 100644 --- a/test/conformance/queue/queue_adapter_hip.match +++ b/test/conformance/queue/queue_adapter_hip.match @@ -1,5 +1,3 @@ urQueueCreateTest.InvalidQueueProperties/AMD_HIP_BACKEND___{{.*}}_ -urQueueCreateWithParamTest.SuccessWithProperties/AMD_HIP_BACKEND___{{.*}}___UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE -urQueueCreateWithParamTest.SuccessWithProperties/AMD_HIP_BACKEND___{{.*}}___UR_QUEUE_FLAG_PROFILING_ENABLE urQueueGetInfoTestWithInfoParam.Success/AMD_HIP_BACKEND___{{.*}}___UR_QUEUE_INFO_DEVICE_DEFAULT urQueueGetInfoTestWithInfoParam.Success/AMD_HIP_BACKEND___{{.*}}___UR_QUEUE_INFO_SIZE diff --git a/test/conformance/sampler/sampler_adapter_hip.match b/test/conformance/sampler/sampler_adapter_hip.match index c690ae416a..e69de29bb2 100644 --- a/test/conformance/sampler/sampler_adapter_hip.match +++ b/test/conformance/sampler/sampler_adapter_hip.match @@ -1,3 +0,0 @@ -urSamplerGetNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urSamplerGetNativeHandleTest.InvalidNullHandleSampler/AMD_HIP_BACKEND___{{.*}}_ -urSamplerGetNativeHandleTest.InvalidNullPointerNativeHandle/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/source/environment.cpp b/test/conformance/source/environment.cpp index 287310f679..875ceb63ef 100644 --- a/test/conformance/source/environment.cpp +++ b/test/conformance/source/environment.cpp @@ -266,6 +266,17 @@ std::string KernelsEnvironment::getSupportedILPostfix(uint32_t device_index) { return {}; } + // special case for AMD as it doesn't support IL. + ur_platform_backend_t backend; + if (urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, sizeof(backend), + &backend, nullptr)) { + error = "failed to get backend from platform."; + return {}; + } + if (backend == UR_PLATFORM_BACKEND_HIP) { + return ".bin"; + } + auto device = instance->GetDevices()[device_index]; std::string IL_version; if (uur::GetDeviceILVersion(device, IL_version)) { diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index fbb8a48fb1..2c6cc1dde9 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1032,15 +1032,37 @@ struct urKernelExecutionTest : urKernelTest { ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index, nullptr, mem_handle)); - // This emulates the offset struct sycl adds for a 1D buffer accessor. - struct { - size_t offsets[1] = {0}; - } accessor; - ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 1, - sizeof(accessor), nullptr, - &accessor)); - - current_arg_index += 2; + // SYCL device kernels have different interfaces depending on the + // backend being used. Typically a kernel which takes a buffer argument + // will take a pointer to the start of the buffer and a sycl::id param + // which is a struct that encodes the accessor to the buffer. However + // the AMD backend handles this differently and uses three separate + // arguments for each of the three dimensions of the accessor. + + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + if (backend == UR_PLATFORM_BACKEND_HIP) { + // this emulates the three offset params for buffer accessor on AMD. + size_t val = 0; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 1, + sizeof(size_t), nullptr, &val)); + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 2, + sizeof(size_t), nullptr, &val)); + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 3, + sizeof(size_t), nullptr, &val)); + current_arg_index += 4; + } else { + // This emulates the offset struct sycl adds for a 1D buffer accessor. + struct { + size_t offsets[1] = {0}; + } accessor; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 1, + sizeof(accessor), nullptr, + &accessor)); + current_arg_index += 2; + } + buffer_args.push_back(mem_handle); *out_buffer = mem_handle; }