From 18b21f726679e8457248d2b7e169ca901f27906a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A1bio=20Mestre?= <fabio.mestre@codeplay.com>
Date: Thu, 28 Sep 2023 14:30:44 +0100
Subject: [PATCH 1/2] [SYCL][CUDA] Fetch the adapter source from UR repo

The CUDA adapter source files have been moved to the unified runtime
repository at https://github.com/oneapi-src/unified-runtime

This commit removes the sources files from intel/llvm and updates
cmake to fetch them directly from the unified runtime repository.
---
 sycl/plugins/cuda/CMakeLists.txt              |   70 +-
 sycl/plugins/cuda/pi_cuda.hpp                 |   20 +-
 sycl/plugins/unified_runtime/CMakeLists.txt   |   66 +-
 .../ur/adapters/cuda/README.md                |    7 +
 .../ur/adapters/cuda/adapter.cpp              |   89 -
 .../ur/adapters/cuda/adapter.hpp              |   11 -
 .../ur/adapters/cuda/command_buffer.cpp       |  253 ---
 .../ur/adapters/cuda/command_buffer.hpp       |   13 -
 .../ur/adapters/cuda/common.cpp               |  139 --
 .../ur/adapters/cuda/common.hpp               |   59 -
 .../ur/adapters/cuda/context.cpp              |  161 --
 .../ur/adapters/cuda/context.hpp              |  149 --
 .../ur/adapters/cuda/device.cpp               | 1212 ------------
 .../ur/adapters/cuda/device.hpp               |  119 --
 .../ur/adapters/cuda/enqueue.cpp              | 1690 -----------------
 .../ur/adapters/cuda/enqueue.hpp              |   16 -
 .../ur/adapters/cuda/event.cpp                |  295 ---
 .../ur/adapters/cuda/event.hpp                |  189 --
 .../ur/adapters/cuda/image.cpp                | 1061 -----------
 .../ur/adapters/cuda/image.hpp                |   32 -
 .../ur/adapters/cuda/kernel.cpp               |  374 ----
 .../ur/adapters/cuda/kernel.hpp               |  206 --
 .../ur/adapters/cuda/memory.cpp               |  479 -----
 .../ur/adapters/cuda/memory.hpp               |  232 ---
 .../ur/adapters/cuda/platform.cpp             |  195 --
 .../ur/adapters/cuda/platform.hpp             |   15 -
 .../ur/adapters/cuda/program.cpp              |  452 -----
 .../ur/adapters/cuda/program.hpp              |   54 -
 .../ur/adapters/cuda/queue.cpp                |  328 ----
 .../ur/adapters/cuda/queue.hpp                |  246 ---
 .../ur/adapters/cuda/sampler.cpp              |  106 --
 .../ur/adapters/cuda/sampler.hpp              |   54 -
 .../ur/adapters/cuda/tracing.cpp              |  109 --
 .../ur/adapters/cuda/ur_interface_loader.cpp  |  355 ----
 .../unified_runtime/ur/adapters/cuda/usm.cpp  |  503 -----
 .../unified_runtime/ur/adapters/cuda/usm.hpp  |  130 --
 .../ur/adapters/cuda/usm_p2p.cpp              |   69 -
 37 files changed, 35 insertions(+), 9523 deletions(-)
 create mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/README.md
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/adapter.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/adapter.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/context.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/context.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/device.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/event.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/event.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/image.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/image.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/memory.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/memory.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/platform.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/platform.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/program.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/program.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/queue.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/queue.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/tracing.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/usm.cpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/usm.hpp
 delete mode 100644 sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp

diff --git a/sycl/plugins/cuda/CMakeLists.txt b/sycl/plugins/cuda/CMakeLists.txt
index 80d8d0c2f5525..700130e2f33fe 100644
--- a/sycl/plugins/cuda/CMakeLists.txt
+++ b/sycl/plugins/cuda/CMakeLists.txt
@@ -1,29 +1,11 @@
 message(STATUS "Including the PI API CUDA backend.")
 
- # cannot rely on cmake support for CUDA; it assumes runtime API is being used.
- # we only require the CUDA driver API to be used
- # CUDA_CUDA_LIBRARY variable defines the path to libcuda.so, the CUDA Driver API library.
-
-find_package(CUDA 10.1 REQUIRED)
-
-# Make imported library global to use it within the project.
-add_library(cudadrv SHARED IMPORTED GLOBAL)
-
-if (WIN32)
-  set_target_properties(
-    cudadrv PROPERTIES
-      IMPORTED_IMPLIB               ${CUDA_CUDA_LIBRARY}
-      INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}
-  )
-else()
-  set_target_properties(
-    cudadrv PROPERTIES
-      IMPORTED_LOCATION             ${CUDA_CUDA_LIBRARY}
-      INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}
-  )
-endif()
-
 if (SYCL_ENABLE_XPTI_TRACING)
+  # cannot rely on cmake support for CUDA; it assumes runtime API is being used.
+  # we only require the CUDA driver API to be used
+  # CUDA_CUDA_LIBRARY variable defines the path to libcuda.so, the CUDA Driver API library.
+  find_package(CUDA 10.1 REQUIRED)
+
   # The following two if's can be removed when FindCUDA -> FindCUDAToolkit.
   # CUDA_CUPTI_INCLUDE_DIR -> CUDAToolkit_CUPTI_INCLUDE_DIR
   include(FindCUDACupti)
@@ -46,46 +28,15 @@ if (SYCL_ENABLE_XPTI_TRACING)
   )
 endif()
 
+# Get the CUDA adapter sources so they can be shared with the CUDA PI plugin
+get_target_property(UR_CUDA_ADAPTER_SOURCES ur_adapter_cuda SOURCES)
+
 add_sycl_plugin(cuda
   SOURCES
+    ${UR_CUDA_ADAPTER_SOURCES}
     # Some code is shared with the UR adapter
     "../unified_runtime/pi2ur.hpp"
     "../unified_runtime/pi2ur.cpp"
-    "../unified_runtime/ur/ur.hpp"
-    "../unified_runtime/ur/ur.cpp"
-    "../unified_runtime/ur/adapters/cuda/adapter.cpp"
-    "../unified_runtime/ur/adapters/cuda/adapter.hpp"
-    "../unified_runtime/ur/adapters/cuda/command_buffer.cpp"
-    "../unified_runtime/ur/adapters/cuda/command_buffer.hpp"
-    "../unified_runtime/ur/adapters/cuda/common.cpp"
-    "../unified_runtime/ur/adapters/cuda/common.hpp"
-    "../unified_runtime/ur/adapters/cuda/context.cpp"
-    "../unified_runtime/ur/adapters/cuda/context.hpp"
-    "../unified_runtime/ur/adapters/cuda/device.cpp"
-    "../unified_runtime/ur/adapters/cuda/device.hpp"
-    "../unified_runtime/ur/adapters/cuda/enqueue.cpp"
-    "../unified_runtime/ur/adapters/cuda/event.cpp"
-    "../unified_runtime/ur/adapters/cuda/event.hpp"
-    "../unified_runtime/ur/adapters/cuda/image.cpp"
-    "../unified_runtime/ur/adapters/cuda/image.hpp"
-    "../unified_runtime/ur/adapters/cuda/kernel.cpp"
-    "../unified_runtime/ur/adapters/cuda/kernel.hpp"
-    "../unified_runtime/ur/adapters/cuda/memory.cpp"
-    "../unified_runtime/ur/adapters/cuda/memory.hpp"
-    "../unified_runtime/ur/adapters/cuda/platform.cpp"
-    "../unified_runtime/ur/adapters/cuda/platform.hpp"
-    "../unified_runtime/ur/adapters/cuda/program.cpp"
-    "../unified_runtime/ur/adapters/cuda/program.hpp"
-    "../unified_runtime/ur/adapters/cuda/queue.cpp"
-    "../unified_runtime/ur/adapters/cuda/queue.hpp"
-    "../unified_runtime/ur/adapters/cuda/sampler.cpp"
-    "../unified_runtime/ur/adapters/cuda/sampler.hpp"
-    "../unified_runtime/ur/adapters/cuda/tracing.cpp"
-    "../unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp"
-    "../unified_runtime/ur/adapters/cuda/usm.cpp"
-    "../unified_runtime/ur/adapters/cuda/usm.hpp"
-    "../unified_runtime/ur/adapters/cuda/usm_p2p.cpp"
-    # ---
     "${sycl_inc_dir}/sycl/detail/pi.h"
     "${sycl_inc_dir}/sycl/detail/pi.hpp"
     "pi_cuda.hpp"
@@ -94,7 +45,8 @@ add_sycl_plugin(cuda
   INCLUDE_DIRS
     ${sycl_inc_dir}
     ${XPTI_INCLUDE}
-    ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime
+    ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime # for Unified Runtime
+    ${UNIFIED_RUNTIME_SOURCE_DIR}/source/ # for adapters/cuda
   LIBRARIES
     cudadrv
     ${XPTI_LIBS}
diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp
index b65c867c71a03..2b5d77b26ea9d 100644
--- a/sycl/plugins/cuda/pi_cuda.hpp
+++ b/sycl/plugins/cuda/pi_cuda.hpp
@@ -25,16 +25,16 @@
 #define _PI_CUDA_PLUGIN_VERSION_STRING                                         \
   _PI_PLUGIN_VERSION_STRING(_PI_CUDA_PLUGIN_VERSION)
 
-#include <ur/adapters/cuda/command_buffer.hpp>
-#include <ur/adapters/cuda/context.hpp>
-#include <ur/adapters/cuda/device.hpp>
-#include <ur/adapters/cuda/event.hpp>
-#include <ur/adapters/cuda/kernel.hpp>
-#include <ur/adapters/cuda/memory.hpp>
-#include <ur/adapters/cuda/platform.hpp>
-#include <ur/adapters/cuda/program.hpp>
-#include <ur/adapters/cuda/queue.hpp>
-#include <ur/adapters/cuda/sampler.hpp>
+#include <adapters/cuda/command_buffer.hpp>
+#include <adapters/cuda/context.hpp>
+#include <adapters/cuda/device.hpp>
+#include <adapters/cuda/event.hpp>
+#include <adapters/cuda/kernel.hpp>
+#include <adapters/cuda/memory.hpp>
+#include <adapters/cuda/platform.hpp>
+#include <adapters/cuda/program.hpp>
+#include <adapters/cuda/queue.hpp>
+#include <adapters/cuda/sampler.hpp>
 
 // Share code between the PI Plugin and UR Adapter
 #include <pi2ur.hpp>
diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index c5bbb404c56f9..df97b6eb07812 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -3,11 +3,15 @@
 if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_DIR)
   include(FetchContent)
 
-  # The UR tag should be from the 'adapters' branch
   set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  set(UNIFIED_RUNTIME_TAG c791b8bba63af1c1880ae278e9d6df90021636dd)
+  set(UNIFIED_RUNTIME_TAG 00c7edb98f0c57ad968196a9cef393c380b6d6f7)
 
   set(UR_BUILD_ADAPTER_L0 ON)
+
+  if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS)
+    set(UR_BUILD_ADAPTER_CUDA ON)
+  endif()
+
   set(UMF_ENABLE_POOL_TRACKING ON)
   message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}")
   FetchContent_Declare(unified-runtime
@@ -81,63 +85,7 @@ add_sycl_plugin(unified_runtime
 add_dependencies(sycl-runtime-libraries ur_adapter_level_zero)
 
 if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS)
-  # Build CUDA adapter
-  add_sycl_library("ur_adapter_cuda" SHARED
-    SOURCES
-      "ur/ur.hpp"
-      "ur/ur.cpp"
-      "ur/adapters/cuda/adapter.cpp"
-      "ur/adapters/cuda/adapter.hpp"
-      "ur/adapters/cuda/command_buffer.cpp"
-      "ur/adapters/cuda/command_buffer.hpp"
-      "ur/adapters/cuda/common.cpp"
-      "ur/adapters/cuda/common.hpp"
-      "ur/adapters/cuda/context.cpp"
-      "ur/adapters/cuda/context.hpp"
-      "ur/adapters/cuda/device.cpp"
-      "ur/adapters/cuda/device.hpp"
-      "ur/adapters/cuda/enqueue.cpp"
-      "ur/adapters/cuda/event.cpp"
-      "ur/adapters/cuda/event.hpp"
-      "ur/adapters/cuda/image.cpp"
-      "ur/adapters/cuda/image.hpp"
-      "ur/adapters/cuda/kernel.cpp"
-      "ur/adapters/cuda/kernel.hpp"
-      "ur/adapters/cuda/memory.cpp"
-      "ur/adapters/cuda/memory.hpp"
-      "ur/adapters/cuda/platform.cpp"
-      "ur/adapters/cuda/platform.hpp"
-      "ur/adapters/cuda/program.cpp"
-      "ur/adapters/cuda/program.hpp"
-      "ur/adapters/cuda/queue.cpp"
-      "ur/adapters/cuda/queue.hpp"
-      "ur/adapters/cuda/sampler.cpp"
-      "ur/adapters/cuda/sampler.hpp"
-      "ur/adapters/cuda/tracing.cpp"
-      "ur/adapters/cuda/ur_interface_loader.cpp"
-      "ur/adapters/cuda/usm.cpp"
-      "ur/adapters/cuda/usm.hpp"
-      "ur/adapters/cuda/usm_p2p.cpp"
-    INCLUDE_DIRS
-      ${sycl_inc_dir}
-    LIBRARIES
-      UnifiedRuntime-Headers
-      UnifiedRuntimeCommon
-      Threads::Threads
-      cudadrv
-  )
-
-  set_target_properties("ur_adapter_cuda" PROPERTIES
-    VERSION "0.0.0"
-    SOVERSION "0"
-  )
-
-  if(UMF_ENABLE_POOL_TRACKING)
-    target_compile_definitions("ur_adapter_cuda" PRIVATE
-      UMF_ENABLE_POOL_TRACKING)
-  else()
-    message(WARNING "CUDA adapter USM pools are disabled, set UMF_ENABLE_POOL_TRACKING to enable them")
-  endif()
+  add_dependencies(sycl-runtime-libraries ur_adapter_cuda)
 endif()
 
 if ("hip" IN_LIST SYCL_ENABLE_PLUGINS)
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/README.md b/sycl/plugins/unified_runtime/ur/adapters/cuda/README.md
new file mode 100644
index 0000000000000..3ce735f91aa4e
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/README.md
@@ -0,0 +1,7 @@
+# Cuda adapter
+The source for the Cuda adapter has been moved to the
+[adapters](https://github.com/oneapi-src/unified-runtime/tree/adapters) branch
+of the [Unified Runtime](https://github.com/oneapi-src/unified-runtime/) repo.
+Changes can be made by opening pull requests against that branch, and updating
+the Unified Runtime commit in the parent
+[CMakeLists.txt](../../../CMakeLists.txt).
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/adapter.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/adapter.cpp
deleted file mode 100644
index e1179f487d4fd..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/adapter.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-//===--------- adapter.cpp - CUDA Adapter ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <ur_api.h>
-
-#include "common.hpp"
-
-void enableCUDATracing();
-void disableCUDATracing();
-
-struct ur_adapter_handle_t_ {
-  std::atomic<uint32_t> RefCount = 0;
-  std::mutex Mutex;
-};
-
-ur_adapter_handle_t_ adapter{};
-
-UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t,
-                                           ur_loader_config_handle_t) {
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) {
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters,
-             uint32_t *pNumAdapters) {
-  if (NumEntries > 0 && phAdapters) {
-    std::lock_guard<std::mutex> Lock{adapter.Mutex};
-    if (adapter.RefCount++ == 0) {
-      enableCUDATracing();
-    }
-
-    *phAdapters = &adapter;
-  }
-
-  if (pNumAdapters) {
-    *pNumAdapters = 1;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {
-  adapter.RefCount++;
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) {
-  std::lock_guard<std::mutex> Lock{adapter.Mutex};
-  if (--adapter.RefCount == 0) {
-    disableCUDATracing();
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError(
-    ur_adapter_handle_t, const char **ppMessage, int32_t *pError) {
-  std::ignore = pError;
-  *ppMessage = ErrorMessage;
-  return ErrorMessageCode;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t,
-                                                     ur_adapter_info_t propName,
-                                                     size_t propSize,
-                                                     void *pPropValue,
-                                                     size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
-
-  switch (propName) {
-  case UR_ADAPTER_INFO_BACKEND:
-    return ReturnValue(UR_ADAPTER_BACKEND_CUDA);
-  case UR_ADAPTER_INFO_REFERENCE_COUNT:
-    return ReturnValue(adapter.RefCount.load());
-  default:
-    return UR_RESULT_ERROR_INVALID_ENUMERATION;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/adapter.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/adapter.hpp
deleted file mode 100644
index 7edf36e636dba..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/adapter.hpp
+++ /dev/null
@@ -1,11 +0,0 @@
-//===--------- adapter.hpp - CUDA Adapter ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-struct ur_adapter_handle_t_;
-
-extern ur_adapter_handle_t_ adapter;
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.cpp
deleted file mode 100644
index e2e1784d13e5b..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.cpp
+++ /dev/null
@@ -1,253 +0,0 @@
-//===--------- command_buffer.cpp - CUDA Adapter --------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "command_buffer.hpp"
-#include "common.hpp"
-
-/// Stub implementations of UR experimental feature command-buffers
-
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
-    ur_exp_command_buffer_handle_t *phCommandBuffer) {
-  (void)hContext;
-  (void)hDevice;
-  (void)pCommandBufferDesc;
-  (void)phCommandBuffer;
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
-  (void)hCommandBuffer;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
-  (void)hCommandBuffer;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
-  (void)hCommandBuffer;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
-    ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel,
-    uint32_t workDim, const size_t *pGlobalWorkOffset,
-    const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
-    uint32_t numSyncPointsInWaitList,
-    const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
-    ur_exp_command_buffer_sync_point_t *pSyncPoint) {
-  (void)hCommandBuffer;
-  (void)hKernel;
-  (void)workDim;
-  (void)pGlobalWorkOffset;
-  (void)pGlobalWorkSize;
-  (void)pLocalWorkSize;
-  (void)numSyncPointsInWaitList;
-  (void)pSyncPointWaitList;
-  (void)pSyncPoint;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp(
-    ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc,
-    size_t size, uint32_t numSyncPointsInWaitList,
-    const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
-    ur_exp_command_buffer_sync_point_t *pSyncPoint) {
-  (void)hCommandBuffer;
-  (void)pDst;
-  (void)pSrc;
-  (void)size;
-  (void)numSyncPointsInWaitList;
-  (void)pSyncPointWaitList;
-  (void)pSyncPoint;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp(
-    ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem,
-    ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size,
-    uint32_t numSyncPointsInWaitList,
-    const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
-    ur_exp_command_buffer_sync_point_t *pSyncPoint) {
-  (void)hCommandBuffer;
-  (void)hSrcMem;
-  (void)hDstMem;
-  (void)srcOffset;
-  (void)dstOffset;
-  (void)size;
-  (void)numSyncPointsInWaitList;
-  (void)pSyncPointWaitList;
-  (void)pSyncPoint;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp(
-    ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem,
-    ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin,
-    ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch,
-    size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
-    uint32_t numSyncPointsInWaitList,
-    const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
-    ur_exp_command_buffer_sync_point_t *pSyncPoint) {
-  (void)hCommandBuffer;
-  (void)hSrcMem;
-  (void)hDstMem;
-  (void)srcOrigin;
-  (void)dstOrigin;
-  (void)region;
-  (void)srcRowPitch;
-  (void)srcSlicePitch;
-  (void)dstRowPitch;
-  (void)dstSlicePitch;
-  (void)numSyncPointsInWaitList;
-  (void)pSyncPointWaitList;
-  (void)pSyncPoint;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT
-ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp(
-    ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
-    size_t offset, size_t size, const void *pSrc,
-    uint32_t numSyncPointsInWaitList,
-    const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
-    ur_exp_command_buffer_sync_point_t *pSyncPoint) {
-  (void)hCommandBuffer;
-  (void)hBuffer;
-  (void)offset;
-  (void)size;
-  (void)pSrc;
-  (void)numSyncPointsInWaitList;
-  (void)pSyncPointWaitList;
-  (void)pSyncPoint;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT
-ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp(
-    ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
-    size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList,
-    const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
-    ur_exp_command_buffer_sync_point_t *pSyncPoint) {
-  (void)hCommandBuffer;
-  (void)hBuffer;
-  (void)offset;
-  (void)size;
-  (void)pDst;
-  (void)numSyncPointsInWaitList;
-  (void)pSyncPointWaitList;
-  (void)pSyncPoint;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT
-ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp(
-    ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
-    ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset,
-    ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
-    size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
-    uint32_t numSyncPointsInWaitList,
-    const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
-    ur_exp_command_buffer_sync_point_t *pSyncPoint) {
-  (void)hCommandBuffer;
-  (void)hBuffer;
-  (void)bufferOffset;
-  (void)hostOffset;
-  (void)region;
-  (void)bufferRowPitch;
-  (void)bufferSlicePitch;
-  (void)hostRowPitch;
-  (void)hostSlicePitch;
-  (void)pSrc;
-  (void)numSyncPointsInWaitList;
-  (void)pSyncPointWaitList;
-  (void)pSyncPoint;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT
-ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp(
-    ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
-    ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset,
-    ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
-    size_t hostRowPitch, size_t hostSlicePitch, void *pDst,
-    uint32_t numSyncPointsInWaitList,
-    const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
-    ur_exp_command_buffer_sync_point_t *pSyncPoint) {
-  (void)hCommandBuffer;
-  (void)hBuffer;
-  (void)bufferOffset;
-  (void)hostOffset;
-  (void)region;
-  (void)bufferRowPitch;
-  (void)bufferSlicePitch;
-  (void)hostRowPitch;
-  (void)hostSlicePitch;
-  (void)pDst;
-
-  (void)numSyncPointsInWaitList;
-  (void)pSyncPointWaitList;
-  (void)pSyncPoint;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
-    ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  (void)hCommandBuffer;
-  (void)hQueue;
-  (void)numEventsInWaitList;
-  (void)phEventWaitList;
-  (void)phEvent;
-
-  detail::ur::die("Experimental Command-buffer feature is not "
-                  "implemented for CUDA adapter.");
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.hpp
deleted file mode 100644
index 31ea4372ea2b1..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/command_buffer.hpp
+++ /dev/null
@@ -1,13 +0,0 @@
-//===--------- command_buffer.hpp - CUDA Adapter --------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <ur/ur.hpp>
-
-/// Stub implementation of command-buffers for CUDA
-
-struct ur_exp_command_buffer_handle_t_ {};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp
deleted file mode 100644
index 5fcfe5993eee3..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-//===--------- common.cpp - CUDA Adapter ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "common.hpp"
-
-#include <cuda.h>
-
-#include <sstream>
-
-ur_result_t mapErrorUR(CUresult Result) {
-  switch (Result) {
-  case CUDA_SUCCESS:
-    return UR_RESULT_SUCCESS;
-  case CUDA_ERROR_NOT_PERMITTED:
-    return UR_RESULT_ERROR_INVALID_OPERATION;
-  case CUDA_ERROR_INVALID_CONTEXT:
-    return UR_RESULT_ERROR_INVALID_CONTEXT;
-  case CUDA_ERROR_INVALID_DEVICE:
-    return UR_RESULT_ERROR_INVALID_DEVICE;
-  case CUDA_ERROR_INVALID_VALUE:
-    return UR_RESULT_ERROR_INVALID_VALUE;
-  case CUDA_ERROR_OUT_OF_MEMORY:
-    return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
-  case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
-    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
-  default:
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-}
-
-void checkErrorUR(CUresult Result, const char *Function, int Line,
-                  const char *File) {
-  if (Result == CUDA_SUCCESS || Result == CUDA_ERROR_DEINITIALIZED) {
-    return;
-  }
-
-  if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr &&
-      std::getenv("UR_SUPPRESS_ERROR_MESSAGE") == nullptr) {
-    const char *ErrorString = nullptr;
-    const char *ErrorName = nullptr;
-    cuGetErrorName(Result, &ErrorName);
-    cuGetErrorString(Result, &ErrorString);
-    std::stringstream SS;
-    SS << "\nUR CUDA ERROR:"
-       << "\n\tValue:           " << Result
-       << "\n\tName:            " << ErrorName
-       << "\n\tDescription:     " << ErrorString
-       << "\n\tFunction:        " << Function << "\n\tSource Location: " << File
-       << ":" << Line << "\n"
-       << std::endl;
-    std::cerr << SS.str();
-  }
-
-  if (std::getenv("PI_CUDA_ABORT") != nullptr ||
-      std::getenv("UR_CUDA_ABORT") != nullptr) {
-    std::abort();
-  }
-
-  throw mapErrorUR(Result);
-}
-
-void checkErrorUR(ur_result_t Result, const char *Function, int Line,
-                  const char *File) {
-  if (Result == UR_RESULT_SUCCESS) {
-    return;
-  }
-
-  if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr &&
-      std::getenv("UR_SUPPRESS_ERROR_MESSAGE") == nullptr) {
-    std::stringstream SS;
-    SS << "\nUR ERROR:"
-       << "\n\tValue:           " << Result
-       << "\n\tFunction:        " << Function << "\n\tSource Location: " << File
-       << ":" << Line << "\n"
-       << std::endl;
-    std::cerr << SS.str();
-  }
-
-  if (std::getenv("PI_CUDA_ABORT") != nullptr) {
-    std::abort();
-  }
-
-  throw Result;
-}
-
-std::string getCudaVersionString() {
-  int driver_version = 0;
-  cuDriverGetVersion(&driver_version);
-  // The version is returned as (1000 major + 10 minor).
-  std::stringstream stream;
-  stream << "CUDA " << driver_version / 1000 << "."
-         << driver_version % 1000 / 10;
-  return stream.str();
-}
-
-void detail::ur::die(const char *Message) {
-  std::cerr << "ur_die: " << Message << std::endl;
-  std::terminate();
-}
-
-void detail::ur::assertion(bool Condition, const char *Message) {
-  if (!Condition)
-    die(Message);
-}
-
-void detail::ur::cuPrint(const char *Message) {
-  std::cerr << "ur_print: " << Message << std::endl;
-}
-
-// Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR
-thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS;
-thread_local char ErrorMessage[MaxMessageSize];
-
-// Utility function for setting a message and warning
-[[maybe_unused]] void setErrorMessage(const char *pMessage,
-                                      ur_result_t ErrorCode) {
-  assert(strlen(pMessage) <= MaxMessageSize);
-  strcpy(ErrorMessage, pMessage);
-  ErrorMessageCode = ErrorCode;
-}
-
-void setPluginSpecificMessage(CUresult cu_res) {
-  const char *error_string;
-  const char *error_name;
-  cuGetErrorName(cu_res, &error_name);
-  cuGetErrorString(cu_res, &error_string);
-  char *message = (char *)malloc(strlen(error_string) + strlen(error_name) + 2);
-  strcpy(message, error_name);
-  strcat(message, "\n");
-  strcat(message, error_string);
-
-  setErrorMessage(message, UR_RESULT_ERROR_ADAPTER_SPECIFIC);
-  free(message);
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp
deleted file mode 100644
index 1f73a7030e6e5..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/common.hpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===--------- common.hpp - CUDA Adapter ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <cuda.h>
-#include <ur/ur.hpp>
-
-ur_result_t mapErrorUR(CUresult Result);
-
-/// Converts CUDA error into UR error codes, and outputs error information
-/// to stderr.
-/// If PI_CUDA_ABORT env variable is defined, it aborts directly instead of
-/// throwing the error. This is intended for debugging purposes.
-/// \return UR_RESULT_SUCCESS if \param Result was CUDA_SUCCESS.
-/// \throw ur_result_t exception (integer) if input was not success.
-///
-void checkErrorUR(CUresult Result, const char *Function, int Line,
-                  const char *File);
-
-void checkErrorUR(ur_result_t Result, const char *Function, int Line,
-                  const char *File);
-
-#define UR_CHECK_ERROR(Result)                                                 \
-  checkErrorUR(Result, __func__, __LINE__, __FILE__)
-
-std::string getCudaVersionString();
-
-constexpr size_t MaxMessageSize = 256;
-extern thread_local ur_result_t ErrorMessageCode;
-extern thread_local char ErrorMessage[MaxMessageSize];
-
-// Utility function for setting a message and warning
-[[maybe_unused]] void setErrorMessage(const char *pMessage,
-                                      ur_result_t ErrorCode);
-
-void setPluginSpecificMessage(CUresult cu_res);
-
-/// ------ Error handling, matching OpenCL plugin semantics.
-namespace detail {
-namespace ur {
-
-// Report error and no return (keeps compiler from printing warnings).
-// TODO: Probably change that to throw a catchable exception,
-//       but for now it is useful to see every failure.
-//
-[[noreturn]] void die(const char *Message);
-
-// Reports error messages
-void cuPrint(const char *Message);
-
-void assertion(bool Condition, const char *Message = nullptr);
-
-} // namespace ur
-} // namespace detail
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/context.cpp
deleted file mode 100644
index 179902a538831..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/context.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-//===--------- context.cpp - CUDA Adapter ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "context.hpp"
-#include "usm.hpp"
-
-#include <cassert>
-
-void ur_context_handle_t_::addPool(ur_usm_pool_handle_t Pool) {
-  std::lock_guard<std::mutex> Lock(Mutex);
-  PoolHandles.insert(Pool);
-}
-
-void ur_context_handle_t_::removePool(ur_usm_pool_handle_t Pool) {
-  std::lock_guard<std::mutex> Lock(Mutex);
-  PoolHandles.erase(Pool);
-}
-
-ur_usm_pool_handle_t
-ur_context_handle_t_::getOwningURPool(umf_memory_pool_t *UMFPool) {
-  std::lock_guard<std::mutex> Lock(Mutex);
-  for (auto &Pool : PoolHandles) {
-    if (Pool->hasUMFPool(UMFPool)) {
-      return Pool;
-    }
-  }
-  return nullptr;
-}
-
-/// Create a UR CUDA context.
-///
-/// By default creates a scoped context and keeps the last active CUDA context
-/// on top of the CUDA context stack.
-/// With the __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY key/id and a value of
-/// PI_TRUE creates a primary CUDA context and activates it on the CUDA context
-/// stack.
-///
-UR_APIEXPORT ur_result_t UR_APICALL
-urContextCreate(uint32_t DeviceCount, const ur_device_handle_t *phDevices,
-                const ur_context_properties_t *pProperties,
-                ur_context_handle_t *phContext) {
-  std::ignore = DeviceCount;
-  std::ignore = pProperties;
-
-  assert(DeviceCount == 1);
-  ur_result_t RetErr = UR_RESULT_SUCCESS;
-
-  std::unique_ptr<ur_context_handle_t_> ContextPtr{nullptr};
-  try {
-    ContextPtr = std::unique_ptr<ur_context_handle_t_>(
-        new ur_context_handle_t_{*phDevices});
-    *phContext = ContextPtr.release();
-  } catch (ur_result_t Err) {
-    RetErr = Err;
-  } catch (...) {
-    RetErr = UR_RESULT_ERROR_OUT_OF_RESOURCES;
-  }
-  return RetErr;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo(
-    ur_context_handle_t hContext, ur_context_info_t ContextInfoType,
-    size_t propSize, void *pContextInfo, size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet);
-
-  switch (static_cast<uint32_t>(ContextInfoType)) {
-  case UR_CONTEXT_INFO_NUM_DEVICES:
-    return ReturnValue(1);
-  case UR_CONTEXT_INFO_DEVICES:
-    return ReturnValue(hContext->getDevice());
-  case UR_CONTEXT_INFO_REFERENCE_COUNT:
-    return ReturnValue(hContext->getReferenceCount());
-  case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
-    uint32_t Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
-                            UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
-                            UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
-                            UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
-    return ReturnValue(Capabilities);
-  }
-  case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
-    int Major = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
-        hContext->getDevice()->get()));
-    uint32_t Capabilities =
-        (Major >= 7) ? UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM
-                     : UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE;
-    return ReturnValue(Capabilities);
-  }
-  case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT:
-    // 2D USM memcpy is supported.
-    return ReturnValue(true);
-  case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT:
-    // 2D USM operations currently not supported.
-    return ReturnValue(false);
-
-  default:
-    break;
-  }
-
-  return UR_RESULT_ERROR_INVALID_ENUMERATION;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urContextRelease(ur_context_handle_t hContext) {
-  if (hContext->decrementReferenceCount() > 0) {
-    return UR_RESULT_SUCCESS;
-  }
-  hContext->invokeExtendedDeleters();
-
-  std::unique_ptr<ur_context_handle_t_> Context{hContext};
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urContextRetain(ur_context_handle_t hContext) {
-  assert(hContext->getReferenceCount() > 0);
-
-  hContext->incrementReferenceCount();
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle(
-    ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) {
-  *phNativeContext = reinterpret_cast<ur_native_handle_t>(hContext->get());
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle(
-    ur_native_handle_t hNativeContext, uint32_t numDevices,
-    const ur_device_handle_t *phDevices,
-    const ur_context_native_properties_t *pProperties,
-    ur_context_handle_t *phContext) {
-  std::ignore = hNativeContext;
-  std::ignore = numDevices;
-  std::ignore = phDevices;
-  std::ignore = pProperties;
-  std::ignore = phContext;
-
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter(
-    ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter,
-    void *pUserData) {
-  hContext->setExtendedDeleter(pfnDeleter, pUserData);
-  return UR_RESULT_SUCCESS;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/context.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/context.hpp
deleted file mode 100644
index a321c148940b2..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/context.hpp
+++ /dev/null
@@ -1,149 +0,0 @@
-//===--------- context.hpp - CUDA Adapter ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <cuda.h>
-#include <ur_api.h>
-
-#include <atomic>
-#include <mutex>
-#include <set>
-#include <vector>
-
-#include "common.hpp"
-#include "device.hpp"
-
-#include <umf/memory_pool.h>
-
-typedef void (*ur_context_extended_deleter_t)(void *user_data);
-
-/// UR context mapping to a CUDA context object.
-///
-/// There is no direct mapping between a CUDA context and a UR context.
-/// The main differences are described below:
-///
-/// <b> CUDA context vs UR context </b>
-///
-/// One of the main differences between the UR API and the CUDA driver API is
-/// that the second modifies the state of the threads by assigning
-/// `CUcontext` objects to threads. `CUcontext` objects store data associated
-/// with a given device and control access to said device from the user side.
-/// UR API context are objects that are passed to functions, and not bound
-/// to threads.
-/// The ur_context_handle_t_ object doesn't implement this behavior. It only
-/// holds the CUDA context data. The RAII object \ref ScopedContext implements
-/// the active context behavior.
-///
-/// <b> Primary vs User-defined context </b>
-///
-/// CUDA has two different types of context, the Primary context,
-/// which is usable by all threads on a given process for a given device, and
-/// the aforementioned custom contexts.
-/// The CUDA documentation, confirmed with performance analysis, suggest using
-/// the Primary context whenever possible.
-/// The Primary context is also used by the CUDA Runtime API.
-/// For UR applications to interop with CUDA Runtime API, they have to use
-/// the primary context - and make that active in the thread.
-/// The `ur_context_handle_t_` object can be constructed with a `kind` parameter
-/// that allows to construct a Primary or `user-defined` context, so that
-/// the UR object interface is always the same.
-///
-///  <b> Destructor callback </b>
-///
-///  Required to implement CP023, SYCL Extended Context Destruction,
-///  the PI Context can store a number of callback functions that will be
-///  called upon destruction of the UR Context.
-///  See proposal for details.
-///  https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md
-///
-struct ur_context_handle_t_ {
-
-  struct deleter_data {
-    ur_context_extended_deleter_t Function;
-    void *UserData;
-
-    void operator()() { Function(UserData); }
-  };
-
-  using native_type = CUcontext;
-
-  native_type CUContext;
-  ur_device_handle_t DeviceID;
-  std::atomic_uint32_t RefCount;
-
-  ur_context_handle_t_(ur_device_handle_t_ *DevID)
-      : CUContext{DevID->getContext()}, DeviceID{DevID}, RefCount{1} {
-    urDeviceRetain(DeviceID);
-  };
-
-  ~ur_context_handle_t_() { urDeviceRelease(DeviceID); }
-
-  void invokeExtendedDeleters() {
-    std::lock_guard<std::mutex> Guard(Mutex);
-    for (auto &Deleter : ExtendedDeleters) {
-      Deleter();
-    }
-  }
-
-  void setExtendedDeleter(ur_context_extended_deleter_t Function,
-                          void *UserData) {
-    std::lock_guard<std::mutex> Guard(Mutex);
-    ExtendedDeleters.emplace_back(deleter_data{Function, UserData});
-  }
-
-  ur_device_handle_t getDevice() const noexcept { return DeviceID; }
-
-  native_type get() const noexcept { return CUContext; }
-
-  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
-
-  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
-
-  uint32_t getReferenceCount() const noexcept { return RefCount; }
-
-  void addPool(ur_usm_pool_handle_t Pool);
-
-  void removePool(ur_usm_pool_handle_t Pool);
-
-  ur_usm_pool_handle_t getOwningURPool(umf_memory_pool_t *UMFPool);
-
-private:
-  std::mutex Mutex;
-  std::vector<deleter_data> ExtendedDeleters;
-  std::set<ur_usm_pool_handle_t> PoolHandles;
-};
-
-namespace {
-class ScopedContext {
-public:
-  ScopedContext(ur_context_handle_t Context) {
-    if (!Context) {
-      throw UR_RESULT_ERROR_INVALID_CONTEXT;
-    }
-
-    setContext(Context->get());
-  }
-
-  ScopedContext(CUcontext NativeContext) { setContext(NativeContext); }
-
-  ~ScopedContext() {}
-
-private:
-  void setContext(CUcontext Desired) {
-    CUcontext Original = nullptr;
-
-    UR_CHECK_ERROR(cuCtxGetCurrent(&Original));
-
-    // Make sure the desired context is active on the current thread, setting
-    // it if necessary
-    if (Original != Desired) {
-      UR_CHECK_ERROR(cuCtxSetCurrent(Desired));
-    }
-  }
-};
-} // namespace
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp
deleted file mode 100644
index ece3dca15a3b3..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp
+++ /dev/null
@@ -1,1212 +0,0 @@
-//===--------- device.cpp - CUDA Adapter ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <array>
-#include <cassert>
-#include <sstream>
-
-#include "adapter.hpp"
-#include "context.hpp"
-#include "device.hpp"
-#include "platform.hpp"
-
-int getAttribute(ur_device_handle_t device, CUdevice_attribute attribute) {
-  int value;
-
-  UR_CHECK_ERROR(cuDeviceGetAttribute(&value, attribute, device->get()));
-  return value;
-}
-
-uint64_t ur_device_handle_t_::getElapsedTime(CUevent ev) const {
-  float Milliseconds = 0.0f;
-
-  // cuEventSynchronize waits till the event is ready for call to
-  // cuEventElapsedTime.
-  UR_CHECK_ERROR(cuEventSynchronize(EvBase));
-  UR_CHECK_ERROR(cuEventSynchronize(ev));
-  UR_CHECK_ERROR(cuEventElapsedTime(&Milliseconds, EvBase, ev));
-
-  return static_cast<uint64_t>(Milliseconds * 1.0e6);
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
-                                                    ur_device_info_t propName,
-                                                    size_t propSize,
-                                                    void *pPropValue,
-                                                    size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
-
-  static constexpr uint32_t MaxWorkItemDimensions = 3u;
-
-  ScopedContext Active(hDevice->getContext());
-
-  switch ((uint32_t)propName) {
-  case UR_DEVICE_INFO_TYPE: {
-    return ReturnValue(UR_DEVICE_TYPE_GPU);
-  }
-  case UR_DEVICE_INFO_VENDOR_ID: {
-    return ReturnValue(4318u);
-  }
-  case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: {
-    int ComputeUnits = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &ComputeUnits, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
-        hDevice->get()));
-    detail::ur::assertion(ComputeUnits >= 0);
-    return ReturnValue(static_cast<uint32_t>(ComputeUnits));
-  }
-  case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: {
-    return ReturnValue(MaxWorkItemDimensions);
-  }
-  case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: {
-    struct {
-      size_t Sizes[MaxWorkItemDimensions];
-    } ReturnSizes;
-
-    int MaxX = 0, MaxY = 0, MaxZ = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxX, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, hDevice->get()));
-    detail::ur::assertion(MaxX >= 0);
-
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxY, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, hDevice->get()));
-    detail::ur::assertion(MaxY >= 0);
-
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxZ, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, hDevice->get()));
-    detail::ur::assertion(MaxZ >= 0);
-
-    ReturnSizes.Sizes[0] = size_t(MaxX);
-    ReturnSizes.Sizes[1] = size_t(MaxY);
-    ReturnSizes.Sizes[2] = size_t(MaxZ);
-    return ReturnValue(ReturnSizes);
-  }
-
-  case UR_DEVICE_INFO_MAX_WORK_GROUPS_3D: {
-    struct {
-      size_t Sizes[MaxWorkItemDimensions];
-    } ReturnSizes;
-    int MaxX = 0, MaxY = 0, MaxZ = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxX, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, hDevice->get()));
-    detail::ur::assertion(MaxX >= 0);
-
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxY, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, hDevice->get()));
-    detail::ur::assertion(MaxY >= 0);
-
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxZ, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, hDevice->get()));
-    detail::ur::assertion(MaxZ >= 0);
-
-    ReturnSizes.Sizes[0] = size_t(MaxX);
-    ReturnSizes.Sizes[1] = size_t(MaxY);
-    ReturnSizes.Sizes[2] = size_t(MaxZ);
-    return ReturnValue(ReturnSizes);
-  }
-
-  case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: {
-    int MaxWorkGroupSize = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxWorkGroupSize, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
-        hDevice->get()));
-
-    detail::ur::assertion(MaxWorkGroupSize >= 0);
-
-    return ReturnValue(size_t(MaxWorkGroupSize));
-  }
-  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: {
-    return ReturnValue(0u);
-  }
-  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: {
-    return ReturnValue(1u);
-  }
-  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: {
-    return ReturnValue(0u);
-  }
-  case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: {
-    // Number of sub-groups = max block size / warp size + possible remainder
-    int MaxThreads = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxThreads, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
-        hDevice->get()));
-    int WarpSize = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, hDevice->get()));
-    int MaxWarps = (MaxThreads + WarpSize - 1) / WarpSize;
-    return ReturnValue(MaxWarps);
-  }
-  case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: {
-    // Volta provides independent thread scheduling
-    // TODO: Revisit for previous generation GPUs
-    int Major = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
-    bool IFP = (Major >= 7);
-    return ReturnValue(IFP);
-  }
-
-  case UR_DEVICE_INFO_ATOMIC_64: {
-    int Major = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
-
-    bool Atomic64 = (Major >= 6) ? true : false;
-    return ReturnValue(Atomic64);
-  }
-  case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
-    ur_memory_order_capability_flags_t Capabilities =
-        UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
-        UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
-        UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
-        UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
-    return ReturnValue(Capabilities);
-  }
-  case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
-    int Major = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
-    uint64_t Capabilities =
-        (Major >= 7) ? UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM
-                     : UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP |
-                           UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE;
-    return ReturnValue(Capabilities);
-  }
-
-  case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: {
-    // SYCL2020 4.6.4.2 minimum mandated capabilities for
-    // atomic_fence_order_capabilities.
-    ur_memory_order_capability_flags_t Capabilities =
-        UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
-        UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
-        UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
-        UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
-    return ReturnValue(Capabilities);
-  }
-  case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
-    // SYCL2020 4.6.4.2 minimum mandated capabilities for
-    // atomic_fence/memory_scope_capabilities.
-    // Because scopes are hierarchical, wider scopes support all narrower
-    // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
-    // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
-    ur_memory_scope_capability_flags_t Capabilities =
-        UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
-        UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
-        UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP;
-    return ReturnValue(Capabilities);
-  }
-  case UR_DEVICE_INFO_BFLOAT16: {
-    int Major = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
-
-    bool BFloat16 = (Major >= 8) ? true : false;
-    return ReturnValue(BFloat16);
-  }
-  case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
-    // NVIDIA devices only support one sub-group size (the warp size)
-    int WarpSize = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, hDevice->get()));
-    size_t Sizes[1] = {static_cast<size_t>(WarpSize)};
-    return ReturnValue(Sizes, 1);
-  }
-  case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: {
-    int ClockFreq = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &ClockFreq, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, hDevice->get()));
-    detail::ur::assertion(ClockFreq >= 0);
-    return ReturnValue(static_cast<uint32_t>(ClockFreq) / 1000u);
-  }
-  case UR_DEVICE_INFO_ADDRESS_BITS: {
-    auto Bits = uint32_t{std::numeric_limits<uintptr_t>::digits};
-    return ReturnValue(Bits);
-  }
-  case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: {
-    return ReturnValue(uint64_t{hDevice->getMaxAllocSize()});
-  }
-  case UR_DEVICE_INFO_IMAGE_SUPPORTED: {
-    bool Enabled = false;
-
-    if (std::getenv("SYCL_PI_CUDA_ENABLE_IMAGE_SUPPORT") != nullptr ||
-        std::getenv("UR_CUDA_ENABLE_IMAGE_SUPPORT") != nullptr) {
-      Enabled = true;
-    } else {
-      detail::ur::cuPrint(
-          "Images are not fully supported by the CUDA BE, their support is "
-          "disabled by default. Their partial support can be activated by "
-          "setting SYCL_PI_CUDA_ENABLE_IMAGE_SUPPORT environment variable at "
-          "runtime.");
-    }
-
-    return ReturnValue(Enabled);
-  }
-  case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: {
-    // This call doesn't match to CUDA as it doesn't have images, but instead
-    // surfaces and textures. No clear call in the CUDA API to determine this,
-    // but some searching found as of SM 2.x 128 are supported.
-    return ReturnValue(128u);
-  }
-  case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: {
-    // This call doesn't match to CUDA as it doesn't have images, but instead
-    // surfaces and textures. No clear call in the CUDA API to determine this,
-    // but some searching found as of SM 2.x 128 are supported.
-    return ReturnValue(128u);
-  }
-  case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: {
-    // Take the smaller of maximum surface and maximum texture height.
-    int TexHeight = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &TexHeight, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT,
-        hDevice->get()));
-    detail::ur::assertion(TexHeight >= 0);
-    int SurfHeight = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &SurfHeight, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT,
-        hDevice->get()));
-    detail::ur::assertion(SurfHeight >= 0);
-
-    int Min = std::min(TexHeight, SurfHeight);
-
-    return ReturnValue(static_cast<size_t>(Min));
-  }
-  case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: {
-    // Take the smaller of maximum surface and maximum texture width.
-    int TexWidth = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &TexWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH,
-        hDevice->get()));
-    detail::ur::assertion(TexWidth >= 0);
-    int SurfWidth = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &SurfWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH,
-        hDevice->get()));
-    detail::ur::assertion(SurfWidth >= 0);
-
-    int Min = std::min(TexWidth, SurfWidth);
-
-    return ReturnValue(static_cast<size_t>(Min));
-  }
-  case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: {
-    // Take the smaller of maximum surface and maximum texture height.
-    int TexHeight = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &TexHeight, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT,
-        hDevice->get()));
-    detail::ur::assertion(TexHeight >= 0);
-    int SurfHeight = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &SurfHeight, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT,
-        hDevice->get()));
-    detail::ur::assertion(SurfHeight >= 0);
-
-    int Min = std::min(TexHeight, SurfHeight);
-
-    return ReturnValue(static_cast<size_t>(Min));
-  }
-  case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: {
-    // Take the smaller of maximum surface and maximum texture width.
-    int TexWidth = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &TexWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH,
-        hDevice->get()));
-    detail::ur::assertion(TexWidth >= 0);
-    int SurfWidth = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &SurfWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH,
-        hDevice->get()));
-    detail::ur::assertion(SurfWidth >= 0);
-
-    int Min = std::min(TexWidth, SurfWidth);
-
-    return ReturnValue(static_cast<size_t>(Min));
-  }
-  case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: {
-    // Take the smaller of maximum surface and maximum texture depth.
-    int TexDepth = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &TexDepth, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH,
-        hDevice->get()));
-    detail::ur::assertion(TexDepth >= 0);
-    int SurfDepth = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &SurfDepth, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH,
-        hDevice->get()));
-    detail::ur::assertion(SurfDepth >= 0);
-
-    int Min = std::min(TexDepth, SurfDepth);
-
-    return ReturnValue(static_cast<size_t>(Min));
-  }
-  case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: {
-    // Take the smaller of maximum surface and maximum texture width.
-    int TexWidth = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &TexWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH,
-        hDevice->get()));
-    detail::ur::assertion(TexWidth >= 0);
-    int SurfWidth = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &SurfWidth, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH,
-        hDevice->get()));
-    detail::ur::assertion(SurfWidth >= 0);
-
-    int Min = std::min(TexWidth, SurfWidth);
-
-    return ReturnValue(static_cast<size_t>(Min));
-  }
-  case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: {
-    return ReturnValue(0lu);
-  }
-  case UR_DEVICE_INFO_MAX_SAMPLERS: {
-    // This call is kind of meaningless for cuda, as samplers don't exist.
-    // Closest thing is textures, which is 128.
-    return ReturnValue(128u);
-  }
-  case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: {
-    // https://docs.nvidia.com/cuda/cuda-c-programming-guide/#function-parameters
-    // __global__ function parameters are passed to the device via constant
-    // memory and are limited to 4 KB.
-    return ReturnValue(4000lu);
-  }
-  case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: {
-    int MemBaseAddrAlign = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(&MemBaseAddrAlign,
-                                        CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT,
-                                        hDevice->get()));
-    // Multiply by 8 as clGetDeviceInfo returns this value in bits
-    MemBaseAddrAlign *= 8;
-    return ReturnValue(MemBaseAddrAlign);
-  }
-  case UR_DEVICE_INFO_HALF_FP_CONFIG: {
-    // TODO: is this config consistent across all NVIDIA GPUs?
-    return ReturnValue(0u);
-  }
-  case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
-    // TODO: is this config consistent across all NVIDIA GPUs?
-    ur_device_fp_capability_flags_t Config =
-        UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
-        UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
-        UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
-        UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
-        UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
-        UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
-        UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
-    return ReturnValue(Config);
-  }
-  case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
-    // TODO: is this config consistent across all NVIDIA GPUs?
-    ur_device_fp_capability_flags_t Config =
-        UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
-        UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
-        UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
-        UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
-        UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
-        UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
-    return ReturnValue(Config);
-  }
-  case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
-    // TODO: is this config consistent across all NVIDIA GPUs?
-    return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE);
-  }
-  case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: {
-    // The value is documented for all existing GPUs in the CUDA programming
-    // guidelines, section "H.3.2. Global Memory".
-    return ReturnValue(128u);
-  }
-  case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: {
-    int CacheSize = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, hDevice->get()));
-    detail::ur::assertion(CacheSize >= 0);
-    // The L2 cache is global to the GPU.
-    return ReturnValue(static_cast<uint64_t>(CacheSize));
-  }
-  case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: {
-    size_t Bytes = 0;
-    // Runtime API has easy access to this value, driver API info is scarse.
-    detail::ur::assertion(cuDeviceTotalMem(&Bytes, hDevice->get()) ==
-                          CUDA_SUCCESS);
-    return ReturnValue(uint64_t{Bytes});
-  }
-  case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: {
-    int ConstantMemory = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &ConstantMemory, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY,
-        hDevice->get()));
-    detail::ur::assertion(ConstantMemory >= 0);
-
-    return ReturnValue(static_cast<uint64_t>(ConstantMemory));
-  }
-  case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: {
-    // TODO: is there a way to retrieve this from CUDA driver API?
-    // Hard coded to value returned by clinfo for OpenCL 1.2 CUDA | GeForce GTX
-    // 1060 3GB
-    return ReturnValue(9u);
-  }
-  case UR_DEVICE_INFO_LOCAL_MEM_TYPE: {
-    return ReturnValue(UR_DEVICE_LOCAL_MEM_TYPE_LOCAL);
-  }
-  case UR_DEVICE_INFO_LOCAL_MEM_SIZE: {
-    // OpenCL's "local memory" maps most closely to CUDA's "shared memory".
-    // CUDA has its own definition of "local memory", which maps to OpenCL's
-    // "private memory".
-    if (hDevice->maxLocalMemSizeChosen()) {
-      return ReturnValue(
-          static_cast<uint64_t>(hDevice->getMaxChosenLocalMem()));
-    } else {
-      int LocalMemSize = 0;
-      UR_CHECK_ERROR(cuDeviceGetAttribute(
-          &LocalMemSize, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
-          hDevice->get()));
-      detail::ur::assertion(LocalMemSize >= 0);
-      return ReturnValue(static_cast<uint64_t>(LocalMemSize));
-    }
-  }
-  case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: {
-    int ECCEnabled = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, hDevice->get()));
-
-    detail::ur::assertion((ECCEnabled == 0) | (ECCEnabled == 1));
-    auto Result = static_cast<bool>(ECCEnabled);
-    return ReturnValue(Result);
-  }
-  case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: {
-    int IsIntegrated = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &IsIntegrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, hDevice->get()));
-
-    detail::ur::assertion((IsIntegrated == 0) | (IsIntegrated == 1));
-    auto result = static_cast<bool>(IsIntegrated);
-    return ReturnValue(result);
-  }
-  case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: {
-    // Hard coded to value returned by clinfo for OpenCL 1.2 CUDA | GeForce GTX
-    // 1060 3GB
-    return ReturnValue(1000lu);
-  }
-  case UR_DEVICE_INFO_ENDIAN_LITTLE: {
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_AVAILABLE: {
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: {
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_COMPILER_AVAILABLE: {
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_LINKER_AVAILABLE: {
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: {
-    auto Capability = ur_device_exec_capability_flags_t{
-        UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL};
-    return ReturnValue(Capability);
-  }
-  case UR_DEVICE_INFO_QUEUE_PROPERTIES:
-    return ReturnValue(
-        ur_queue_flag_t(UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE |
-                        UR_QUEUE_FLAG_PROFILING_ENABLE));
-  case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: {
-    // The mandated minimum capability:
-    ur_queue_flags_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE |
-                                  UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
-    return ReturnValue(Capability);
-  }
-  case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: {
-    // The mandated minimum capability:
-    ur_queue_flags_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE;
-    return ReturnValue(Capability);
-  }
-  case UR_DEVICE_INFO_BUILT_IN_KERNELS: {
-    // An empty string is returned if no built-in kernels are supported by the
-    // device.
-    return ReturnValue("");
-  }
-  case UR_DEVICE_INFO_PLATFORM: {
-    return ReturnValue(hDevice->getPlatform());
-  }
-  case UR_DEVICE_INFO_NAME: {
-    static constexpr size_t MaxDeviceNameLength = 256u;
-    char Name[MaxDeviceNameLength];
-    UR_CHECK_ERROR(cuDeviceGetName(Name, MaxDeviceNameLength, hDevice->get()));
-    return ReturnValue(Name, strlen(Name) + 1);
-  }
-  case UR_DEVICE_INFO_VENDOR: {
-    return ReturnValue("NVIDIA Corporation");
-  }
-  case UR_DEVICE_INFO_DRIVER_VERSION: {
-    auto Version = getCudaVersionString();
-    return ReturnValue(Version.c_str());
-  }
-  case UR_DEVICE_INFO_PROFILE: {
-    return ReturnValue("CUDA");
-  }
-  case UR_DEVICE_INFO_REFERENCE_COUNT: {
-    return ReturnValue(hDevice->getReferenceCount());
-  }
-  case UR_DEVICE_INFO_VERSION: {
-    std::stringstream SS;
-    int Major;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
-    SS << Major;
-    int Minor;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));
-    SS << "." << Minor;
-    return ReturnValue(SS.str().c_str());
-  }
-  case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: {
-    return ReturnValue("");
-  }
-  case UR_DEVICE_INFO_EXTENSIONS: {
-
-    std::string SupportedExtensions = "cl_khr_fp64 cl_khr_subgroups ";
-    SupportedExtensions += "pi_ext_intel_devicelib_assert ";
-    SupportedExtensions += " ";
-
-    int Major = 0;
-    int Minor = 0;
-
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));
-
-    if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
-      SupportedExtensions += "cl_khr_fp16 ";
-    }
-
-    return ReturnValue(SupportedExtensions.c_str());
-  }
-  case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {
-    // The minimum value for the FULL profile is 1 MB.
-    return ReturnValue(1024lu);
-  }
-  case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: {
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_PARENT_DEVICE: {
-    return ReturnValue(nullptr);
-  }
-  case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: {
-    return ReturnValue(0u);
-  }
-  case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: {
-    if (pPropSizeRet) {
-      *pPropSizeRet = 0;
-    }
-    return UR_RESULT_SUCCESS;
-  }
-
-  case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: {
-    return ReturnValue(0u);
-  }
-  case UR_DEVICE_INFO_PARTITION_TYPE: {
-    if (pPropSizeRet) {
-      *pPropSizeRet = 0;
-    }
-    return UR_RESULT_SUCCESS;
-  }
-
-    // Intel USM extensions
-
-  case UR_DEVICE_INFO_USM_HOST_SUPPORT: {
-    // from cl_intel_unified_shared_memory: "The host memory access capabilities
-    // apply to any host allocation."
-    //
-    // query if/how the device can access page-locked host memory, possibly
-    // through PCIe, using the same pointer as the host
-    uint32_t Value = {};
-    if (getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING)) {
-      // the device shares a unified address space with the host
-      if (getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >=
-          6) {
-        // compute capability 6.x introduces operations that are atomic with
-        // respect to other CPUs and GPUs in the system
-        Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
-                UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS |
-                UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS |
-                UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS;
-      } else {
-        // on GPU architectures with compute capability lower than 6.x, atomic
-        // operations from the GPU to CPU memory will not be atomic with respect
-        // to CPU initiated atomic operations
-        Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
-                UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS;
-      }
-    }
-    return ReturnValue(Value);
-  }
-  case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: {
-    // from cl_intel_unified_shared_memory:
-    // "The device memory access capabilities apply to any device allocation
-    // associated with this device."
-    //
-    // query how the device can access memory allocated on the device itself (?)
-    uint32_t Value =
-        UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
-        UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS |
-        UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS |
-        UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS;
-    return ReturnValue(Value);
-  }
-  case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: {
-    // from cl_intel_unified_shared_memory:
-    // "The single device shared memory access capabilities apply to any shared
-    // allocation associated with this device."
-    //
-    // query if/how the device can access managed memory associated to it
-    uint32_t Value = {};
-    if (getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY)) {
-      // the device can allocate managed memory on this system
-      Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
-              UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS;
-    }
-    if (getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
-      // the device can coherently access managed memory concurrently with the
-      // CPU
-      Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS;
-      if (getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >=
-          6) {
-        // compute capability 6.x introduces operations that are atomic with
-        // respect to other CPUs and GPUs in the system
-        Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS;
-      }
-    }
-    return ReturnValue(Value);
-  }
-  case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: {
-    // from cl_intel_unified_shared_memory:
-    // "The cross-device shared memory access capabilities apply to any shared
-    // allocation associated with this device, or to any shared memory
-    // allocation on another device that also supports the same cross-device
-    // shared memory access capability."
-    //
-    // query if/how the device can access managed memory associated to other
-    // devices
-    uint32_t Value = {};
-    if (getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY)) {
-      // the device can allocate managed memory on this system
-      Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS;
-    }
-    if (getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
-      // all devices with the CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS
-      // attribute can coherently access managed memory concurrently with the
-      // CPU
-      Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS;
-    }
-    if (getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >=
-        6) {
-      // compute capability 6.x introduces operations that are atomic with
-      // respect to other CPUs and GPUs in the system
-      if (Value & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)
-        Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS;
-      if (Value & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS)
-        Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS;
-    }
-    return ReturnValue(Value);
-  }
-  case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: {
-    // from cl_intel_unified_shared_memory:
-    // "The shared system memory access capabilities apply to any allocations
-    // made by a system allocator, such as malloc or new."
-    //
-    // query if/how the device can access pageable host memory allocated by the
-    // system allocator
-    uint32_t Value = {};
-    if (getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS)) {
-      // the device suppports coherently accessing pageable memory without
-      // calling cuMemHostRegister/cudaHostRegister on it
-      if (getAttribute(hDevice,
-                       CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED)) {
-        // the link between the device and the host supports native atomic
-        // operations
-        Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
-                UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS |
-                UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS |
-                UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS;
-      } else {
-        // the link between the device and the host does not support native
-        // atomic operations
-        Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
-                UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS;
-      }
-    }
-    return ReturnValue(Value);
-  }
-  case UR_DEVICE_INFO_ASYNC_BARRIER: {
-    int Value = getAttribute(hDevice,
-                             CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >= 8;
-    return ReturnValue(static_cast<bool>(Value));
-  }
-  case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: {
-    int Major =
-        getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR);
-    int Minor =
-        getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR);
-    std::string Result = std::to_string(Major) + "." + std::to_string(Minor);
-    return ReturnValue(Result.c_str());
-  }
-
-  case UR_DEVICE_INFO_GLOBAL_MEM_FREE: {
-    size_t FreeMemory = 0;
-    size_t TotalMemory = 0;
-    detail::ur::assertion(cuMemGetInfo(&FreeMemory, &TotalMemory) ==
-                              CUDA_SUCCESS,
-                          "failed cuMemGetInfo() API.");
-    return ReturnValue(FreeMemory);
-  }
-  case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: {
-    int Value = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Value, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, hDevice->get()));
-    detail::ur::assertion(Value >= 0);
-    // Convert kilohertz to megahertz when returning.
-    return ReturnValue(Value / 1000);
-  }
-  case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: {
-    int Value = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Value, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, hDevice->get()));
-    detail::ur::assertion(Value >= 0);
-    return ReturnValue(Value);
-  }
-  case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: {
-    return ReturnValue(int32_t{1});
-  }
-  case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: {
-    // On CUDA bindless images are supported.
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP: {
-    // On CUDA bindless images can be backed by shared (managed) USM.
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP: {
-    // On CUDA 1D bindless image USM is not supported.
-    // More specifically, linear filtering is not supported.
-    return ReturnValue(false);
-  }
-  case UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP: {
-    // On CUDA 2D bindless image USM is supported.
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP: {
-    int32_t tex_pitch_align = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &tex_pitch_align, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT,
-        hDevice->get()));
-    return ReturnValue(tex_pitch_align);
-  }
-  case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_WIDTH_EXP: {
-    int32_t tex_max_linear_width = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &tex_max_linear_width,
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, hDevice->get()));
-    return ReturnValue(tex_max_linear_width);
-  }
-  case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_HEIGHT_EXP: {
-    int32_t tex_max_linear_height = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &tex_max_linear_height,
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, hDevice->get()));
-    return ReturnValue(tex_max_linear_height);
-  }
-  case UR_DEVICE_INFO_MAX_IMAGE_LINEAR_PITCH_EXP: {
-    int32_t tex_max_linear_pitch = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &tex_max_linear_pitch,
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, hDevice->get()));
-    return ReturnValue(tex_max_linear_pitch);
-  }
-  case UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP: {
-    // CUDA supports mipmaps.
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP: {
-    // CUDA supports anisotropic filtering.
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_MIPMAP_MAX_ANISOTROPY_EXP: {
-    // CUDA has no query for this, but documentation states max value is 16.
-    return ReturnValue(16.f);
-  }
-  case UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP: {
-    // CUDA supports creation of images from individual mipmap levels.
-    return ReturnValue(true);
-  }
-
-  case UR_DEVICE_INFO_INTEROP_MEMORY_IMPORT_SUPPORT_EXP: {
-    // CUDA supports importing external memory.
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT_EXP: {
-    // CUDA does not support exporting it's own device memory.
-    return ReturnValue(false);
-  }
-  case UR_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP: {
-    // CUDA supports importing external semaphores.
-    return ReturnValue(true);
-  }
-  case UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP: {
-    // CUDA does not support exporting semaphores or events.
-    return ReturnValue(false);
-  }
-  case UR_DEVICE_INFO_DEVICE_ID: {
-    int Value = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Value, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, hDevice->get()));
-    detail::ur::assertion(Value >= 0);
-    return ReturnValue(Value);
-  }
-  case UR_DEVICE_INFO_UUID: {
-    CUuuid UUID;
-#if (CUDA_VERSION >= 11040)
-    detail::ur::assertion(cuDeviceGetUuid_v2(&UUID, hDevice->get()) ==
-                          CUDA_SUCCESS);
-#else
-    detail::ur::assertion(cuDeviceGetUuid(&UUID, hDevice->get()) ==
-                          CUDA_SUCCESS);
-#endif
-    std::array<unsigned char, 16> Name;
-    std::copy(UUID.bytes, UUID.bytes + 16, Name.begin());
-    return ReturnValue(Name.data(), 16);
-  }
-  case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: {
-    int Major = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
-
-    int Minor = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));
-
-    // Some specific devices seem to need special handling. See reference
-    // https://github.com/jeffhammond/HPCInfo/blob/master/cuda/gpu-detect.cu
-    bool IsXavierAGX = Major == 7 && Minor == 2;
-    bool IsOrinAGX = Major == 8 && Minor == 7;
-
-    int MemoryClockKHz = 0;
-    if (IsXavierAGX) {
-      MemoryClockKHz = 2133000;
-    } else if (IsOrinAGX) {
-      MemoryClockKHz = 3200000;
-    } else {
-      UR_CHECK_ERROR(cuDeviceGetAttribute(&MemoryClockKHz,
-                                          CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,
-                                          hDevice->get()));
-    }
-
-    int MemoryBusWidth = 0;
-    if (IsOrinAGX) {
-      MemoryBusWidth = 256;
-    } else {
-      UR_CHECK_ERROR(cuDeviceGetAttribute(
-          &MemoryBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH,
-          hDevice->get()));
-    }
-
-    uint32_t MemoryBandwidth = MemoryClockKHz * MemoryBusWidth * 250;
-
-    return ReturnValue(MemoryBandwidth);
-  }
-  case UR_DEVICE_INFO_IL_VERSION: {
-    std::string ILVersion = "nvptx-";
-
-    int DriverVersion = 0;
-    cuDriverGetVersion(&DriverVersion);
-    int Major = DriverVersion / 1000;
-    int Minor = DriverVersion % 1000 / 10;
-
-    // We can work out which ptx ISA version we support based on the versioning
-    // table published here
-    // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes
-    // Major versions that we support are consistent in how they line up, so we
-    // can derive that easily. The minor versions for version 10 don't line up
-    // the same so it needs a special case. This is not ideal but it does seem
-    // to be the best bet to avoid a maintenance burden here.
-    ILVersion += std::to_string(Major - 4) + ".";
-    if (Major == 10) {
-      ILVersion += std::to_string(Minor + 3);
-    } else if (Major >= 11) {
-      ILVersion += std::to_string(Minor);
-    } else {
-      return UR_RESULT_ERROR_INVALID_VALUE;
-    }
-
-    return ReturnValue(ILVersion.data(), ILVersion.size());
-  }
-  case UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: {
-    // Maximum number of 32-bit registers available to a thread block.
-    // Note: This number is shared by all thread blocks simultaneously resident
-    // on a multiprocessor.
-    int MaxRegisters{-1};
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxRegisters, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
-        hDevice->get()));
-
-    detail::ur::assertion(MaxRegisters >= 0);
-
-    return ReturnValue(static_cast<uint32_t>(MaxRegisters));
-  }
-  case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT:
-    return ReturnValue(false);
-  case UR_DEVICE_INFO_IMAGE_SRGB:
-    return ReturnValue(false);
-  case UR_DEVICE_INFO_PCI_ADDRESS: {
-    constexpr size_t AddressBufferSize = 13;
-    char AddressBuffer[AddressBufferSize];
-    UR_CHECK_ERROR(
-        cuDeviceGetPCIBusId(AddressBuffer, AddressBufferSize, hDevice->get()));
-    // CUDA API (8.x - 12.1) guarantees 12 bytes + \0 are written
-    detail::ur::assertion(strnlen(AddressBuffer, AddressBufferSize) == 12);
-    return ReturnValue(AddressBuffer,
-                       strnlen(AddressBuffer, AddressBufferSize - 1) + 1);
-  }
-  case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS:
-    return ReturnValue(false);
-    // TODO: Investigate if this information is available on CUDA.
-  case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED:
-    return ReturnValue(false);
-  case UR_DEVICE_INFO_ESIMD_SUPPORT:
-    return ReturnValue(false);
-  case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS:
-  case UR_DEVICE_INFO_GPU_EU_COUNT:
-  case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
-  case UR_DEVICE_INFO_GPU_EU_SLICES:
-  case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
-  case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
-  case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
-  case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT:
-    return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
-
-  default:
-    break;
-  }
-  return UR_RESULT_ERROR_INVALID_ENUMERATION;
-}
-
-/// \return PI_SUCCESS if the function is executed successfully
-/// CUDA devices are always root devices so retain always returns success.
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) {
-  std::ignore = hDevice;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urDevicePartition(ur_device_handle_t, const ur_device_partition_properties_t *,
-                  uint32_t, ur_device_handle_t *, uint32_t *) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-/// \return UR_RESULT_SUCCESS always since CUDA devices are always root
-/// devices.
-UR_APIEXPORT ur_result_t UR_APICALL
-urDeviceRelease(ur_device_handle_t hDevice) {
-  std::ignore = hDevice;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform,
-                                                ur_device_type_t DeviceType,
-                                                uint32_t NumEntries,
-                                                ur_device_handle_t *phDevices,
-                                                uint32_t *pNumDevices) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  const bool AskingForAll = DeviceType == UR_DEVICE_TYPE_ALL;
-  const bool AskingForDefault = DeviceType == UR_DEVICE_TYPE_DEFAULT;
-  const bool AskingForGPU = DeviceType == UR_DEVICE_TYPE_GPU;
-  const bool ReturnDevices = AskingForDefault || AskingForAll || AskingForGPU;
-
-  size_t NumDevices = ReturnDevices ? hPlatform->Devices.size() : 0;
-
-  try {
-    if (pNumDevices) {
-      *pNumDevices = NumDevices;
-    }
-
-    if (ReturnDevices && phDevices) {
-      for (size_t i = 0; i < std::min(size_t(NumEntries), NumDevices); ++i) {
-        phDevices[i] = hPlatform->Devices[i].get();
-      }
-    }
-
-    return Result;
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
-  }
-}
-
-/// Gets the native CUDA handle of a UR device object
-///
-/// \param[in] device The UR device to get the native CUDA object of.
-/// \param[out] nativeHandle Set to the native handle of the UR device object.
-///
-/// \return PI_SUCCESS
-
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle(
-    ur_device_handle_t hDevice, ur_native_handle_t *phNativeHandle) {
-  *phNativeHandle = reinterpret_cast<ur_native_handle_t>(hDevice->get());
-  return UR_RESULT_SUCCESS;
-}
-
-/// Created a UR device object from a CUDA device handle.
-/// NOTE: The created UR object does not take ownership of the native handle.
-///
-/// \param[in] nativeHandle The native handle to create UR device object from.
-/// \param[in] platform is the UR platform of the device.
-/// \param[out] device Set to the UR device object created from native handle.
-///
-/// \return TBD
-
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
-    ur_native_handle_t hNativeDevice, ur_platform_handle_t hPlatform,
-    const ur_device_native_properties_t *pProperties,
-    ur_device_handle_t *phDevice) {
-  std::ignore = pProperties;
-
-  // We can't cast between ur_native_handle_t and CUdevice, so memcpy the bits
-  // instead
-  CUdevice CuDevice = 0;
-  memcpy(&CuDevice, &hNativeDevice, sizeof(CUdevice));
-
-  auto IsDevice = [=](std::unique_ptr<ur_device_handle_t_> &Dev) {
-    return Dev->get() == CuDevice;
-  };
-
-  // If a platform is provided just check if the device is in it
-  if (hPlatform) {
-    auto SearchRes = std::find_if(begin(hPlatform->Devices),
-                                  end(hPlatform->Devices), IsDevice);
-    if (SearchRes != end(hPlatform->Devices)) {
-      *phDevice = SearchRes->get();
-      return UR_RESULT_SUCCESS;
-    }
-  }
-
-  // Get list of platforms
-  uint32_t NumPlatforms = 0;
-  ur_adapter_handle_t AdapterHandle = &adapter;
-  ur_result_t Result =
-      urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms);
-  if (Result != UR_RESULT_SUCCESS)
-    return Result;
-
-  ur_platform_handle_t *Plat = static_cast<ur_platform_handle_t *>(
-      malloc(NumPlatforms * sizeof(ur_platform_handle_t)));
-  Result = urPlatformGet(&AdapterHandle, 1, NumPlatforms, Plat, nullptr);
-  if (Result != UR_RESULT_SUCCESS)
-    return Result;
-
-  // Iterate through platforms to find device that matches nativeHandle
-  for (uint32_t j = 0; j < NumPlatforms; ++j) {
-    auto SearchRes =
-        std::find_if(begin(Plat[j]->Devices), end(Plat[j]->Devices), IsDevice);
-    if (SearchRes != end(Plat[j]->Devices)) {
-      *phDevice = static_cast<ur_device_handle_t>((*SearchRes).get());
-      return UR_RESULT_SUCCESS;
-    }
-  }
-
-  // If the provided nativeHandle cannot be matched to an
-  // existing device return error
-  return UR_RESULT_ERROR_INVALID_OPERATION;
-}
-
-ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice,
-                                                   uint64_t *pDeviceTimestamp,
-                                                   uint64_t *pHostTimestamp) {
-  CUevent Event;
-  ScopedContext Active(hDevice->getContext());
-
-  if (pDeviceTimestamp) {
-    UR_CHECK_ERROR(cuEventCreate(&Event, CU_EVENT_DEFAULT));
-    UR_CHECK_ERROR(cuEventRecord(Event, 0));
-  }
-  if (pHostTimestamp) {
-
-    using namespace std::chrono;
-    *pHostTimestamp =
-        duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
-            .count();
-  }
-
-  if (pDeviceTimestamp) {
-    UR_CHECK_ERROR(cuEventSynchronize(Event));
-    *pDeviceTimestamp = hDevice->getElapsedTime(Event);
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-/// \return If available, the first binary that is PTX
-///
-UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary(
-    ur_device_handle_t hDevice, const ur_device_binary_t *pBinaries,
-    uint32_t NumBinaries, uint32_t *pSelectedBinary) {
-  std::ignore = hDevice;
-
-  // Look for an image for the NVPTX64 target, and return the first one that is
-  // found
-  for (uint32_t i = 0; i < NumBinaries; i++) {
-    if (strcmp(pBinaries[i].pDeviceTargetSpec,
-               UR_DEVICE_BINARY_TARGET_NVPTX64) == 0) {
-      *pSelectedBinary = i;
-      return UR_RESULT_SUCCESS;
-    }
-  }
-
-  // No image can be loaded for the given device
-  return UR_RESULT_ERROR_INVALID_BINARY;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/device.hpp
deleted file mode 100644
index 696630bd10ca0..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/device.hpp
+++ /dev/null
@@ -1,119 +0,0 @@
-//===--------- device.hpp - CUDA Adapter ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <ur/ur.hpp>
-
-struct ur_device_handle_t_ {
-private:
-  using native_type = CUdevice;
-
-  native_type CuDevice;
-  CUcontext CuContext;
-  CUevent EvBase; // CUDA event used as base counter
-  std::atomic_uint32_t RefCount;
-  ur_platform_handle_t Platform;
-
-  static constexpr uint32_t MaxWorkItemDimensions = 3u;
-  size_t MaxWorkItemSizes[MaxWorkItemDimensions];
-  size_t MaxWorkGroupSize{0};
-  size_t MaxAllocSize{0};
-  int MaxBlockDimY{0};
-  int MaxBlockDimZ{0};
-  int MaxRegsPerBlock{0};
-  int MaxCapacityLocalMem{0};
-  int MaxChosenLocalMem{0};
-  bool MaxLocalMemSizeChosen{false};
-
-public:
-  ur_device_handle_t_(native_type cuDevice, CUcontext cuContext, CUevent evBase,
-                      ur_platform_handle_t platform)
-      : CuDevice(cuDevice), CuContext(cuContext), EvBase(evBase), RefCount{1},
-        Platform(platform) {
-
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxBlockDimY, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, cuDevice));
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxBlockDimZ, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, cuDevice));
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
-        cuDevice));
-
-    // Set local mem max size if env var is present
-    static const char *LocalMemSizePtrUR =
-        std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE");
-    static const char *LocalMemSizePtrPI =
-        std::getenv("SYCL_PI_CUDA_MAX_LOCAL_MEM_SIZE");
-    static const char *LocalMemSizePtr =
-        LocalMemSizePtrUR ? LocalMemSizePtrUR
-                          : (LocalMemSizePtrPI ? LocalMemSizePtrPI : nullptr);
-
-    if (LocalMemSizePtr) {
-      cuDeviceGetAttribute(
-          &MaxCapacityLocalMem,
-          CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice);
-      MaxChosenLocalMem = std::atoi(LocalMemSizePtr);
-      MaxLocalMemSizeChosen = true;
-    }
-
-    // Max size of memory object allocation in bytes.
-    // The minimum value is max(min(1024 × 1024 ×
-    // 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE),
-    // 32 × 1024 × 1024) for devices that are not of type
-    // CL_DEVICE_TYPE_CUSTOM.
-    size_t Global = 0;
-    UR_CHECK_ERROR(cuDeviceTotalMem(&Global, cuDevice));
-
-    auto QuarterGlobal = static_cast<uint32_t>(Global / 4u);
-
-    MaxAllocSize = std::max(std::min(1024u * 1024u * 1024u, QuarterGlobal),
-                            32u * 1024u * 1024u);
-  }
-
-  ~ur_device_handle_t_() { cuDevicePrimaryCtxRelease(CuDevice); }
-
-  native_type get() const noexcept { return CuDevice; };
-
-  CUcontext getContext() const noexcept { return CuContext; };
-
-  uint32_t getReferenceCount() const noexcept { return RefCount; }
-
-  ur_platform_handle_t getPlatform() const noexcept { return Platform; };
-
-  uint64_t getElapsedTime(CUevent) const;
-
-  void saveMaxWorkItemSizes(size_t Size,
-                            size_t *SaveMaxWorkItemSizes) noexcept {
-    memcpy(MaxWorkItemSizes, SaveMaxWorkItemSizes, Size);
-  };
-
-  void saveMaxWorkGroupSize(int Value) noexcept { MaxWorkGroupSize = Value; };
-
-  void getMaxWorkItemSizes(size_t RetSize,
-                           size_t *RetMaxWorkItemSizes) const noexcept {
-    memcpy(RetMaxWorkItemSizes, MaxWorkItemSizes, RetSize);
-  };
-
-  size_t getMaxWorkGroupSize() const noexcept { return MaxWorkGroupSize; };
-
-  size_t getMaxBlockDimY() const noexcept { return MaxBlockDimY; };
-
-  size_t getMaxBlockDimZ() const noexcept { return MaxBlockDimZ; };
-
-  size_t getMaxRegsPerBlock() const noexcept { return MaxRegsPerBlock; };
-
-  size_t getMaxAllocSize() const noexcept { return MaxAllocSize; };
-
-  int getMaxCapacityLocalMem() const noexcept { return MaxCapacityLocalMem; };
-
-  int getMaxChosenLocalMem() const noexcept { return MaxChosenLocalMem; };
-
-  bool maxLocalMemSizeChosen() { return MaxLocalMemSizeChosen; };
-};
-
-int getAttribute(ur_device_handle_t Device, CUdevice_attribute Attribute);
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp
deleted file mode 100644
index ec1adce808681..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.cpp
+++ /dev/null
@@ -1,1690 +0,0 @@
-//===--------- enqueue.cpp - CUDA Adapter ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "enqueue.hpp"
-#include "common.hpp"
-#include "context.hpp"
-#include "event.hpp"
-#include "kernel.hpp"
-#include "memory.hpp"
-#include "queue.hpp"
-
-#include <cmath>
-#include <cuda.h>
-
-ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue, CUstream Stream,
-                              uint32_t NumEventsInWaitList,
-                              const ur_event_handle_t *EventWaitList) {
-  UR_ASSERT(EventWaitList, UR_RESULT_SUCCESS);
-
-  try {
-    ScopedContext Active(CommandQueue->getContext());
-
-    auto Result = forLatestEvents(
-        EventWaitList, NumEventsInWaitList,
-        [Stream](ur_event_handle_t Event) -> ur_result_t {
-          if (Event->getStream() == Stream) {
-            return UR_RESULT_SUCCESS;
-          } else {
-            UR_CHECK_ERROR(cuStreamWaitEvent(Stream, Event->get(), 0));
-            return UR_RESULT_SUCCESS;
-          }
-        });
-    return Result;
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-}
-
-template <typename PtrT>
-void getUSMHostOrDevicePtr(PtrT USMPtr, CUmemorytype *OutMemType,
-                           CUdeviceptr *OutDevPtr, PtrT *OutHostPtr) {
-  // do not throw if cuPointerGetAttribute returns CUDA_ERROR_INVALID_VALUE
-  // checks with PI_CHECK_ERROR are not suggested
-  CUresult Ret = cuPointerGetAttribute(
-      OutMemType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)USMPtr);
-  // ARRAY, UNIFIED types are not supported!
-  assert(*OutMemType != CU_MEMORYTYPE_ARRAY &&
-         *OutMemType != CU_MEMORYTYPE_UNIFIED);
-
-  // pointer not known to the CUDA subsystem (possibly a system allocated ptr)
-  if (Ret == CUDA_ERROR_INVALID_VALUE) {
-    *OutMemType = CU_MEMORYTYPE_HOST;
-    *OutDevPtr = 0;
-    *OutHostPtr = USMPtr;
-
-    // todo: resets the above "non-stick" error
-  } else if (Ret == CUDA_SUCCESS) {
-    *OutDevPtr = (*OutMemType == CU_MEMORYTYPE_DEVICE)
-                     ? reinterpret_cast<CUdeviceptr>(USMPtr)
-                     : 0;
-    *OutHostPtr = (*OutMemType == CU_MEMORYTYPE_HOST) ? USMPtr : nullptr;
-  } else {
-    UR_CHECK_ERROR(Ret);
-  }
-}
-
-ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size,
-                           ur_usm_advice_flags_t URAdviceFlags,
-                           CUdevice Device) {
-  std::unordered_map<ur_usm_advice_flags_t, CUmem_advise>
-      URToCUMemAdviseDeviceFlagsMap = {
-          {UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, CU_MEM_ADVISE_SET_READ_MOSTLY},
-          {UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY,
-           CU_MEM_ADVISE_UNSET_READ_MOSTLY},
-          {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION,
-           CU_MEM_ADVISE_SET_PREFERRED_LOCATION},
-          {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION,
-           CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION},
-          {UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE,
-           CU_MEM_ADVISE_SET_ACCESSED_BY},
-          {UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE,
-           CU_MEM_ADVISE_UNSET_ACCESSED_BY},
-      };
-  for (auto &FlagPair : URToCUMemAdviseDeviceFlagsMap) {
-    if (URAdviceFlags & FlagPair.first) {
-      UR_CHECK_ERROR(cuMemAdvise(DevPtr, Size, FlagPair.second, Device));
-    }
-  }
-
-  std::unordered_map<ur_usm_advice_flags_t, CUmem_advise>
-      URToCUMemAdviseHostFlagsMap = {
-          {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST,
-           CU_MEM_ADVISE_SET_PREFERRED_LOCATION},
-          {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST,
-           CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION},
-          {UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST,
-           CU_MEM_ADVISE_SET_ACCESSED_BY},
-          {UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST,
-           CU_MEM_ADVISE_UNSET_ACCESSED_BY},
-      };
-
-  for (auto &FlagPair : URToCUMemAdviseHostFlagsMap) {
-    if (URAdviceFlags & FlagPair.first) {
-      UR_CHECK_ERROR(cuMemAdvise(DevPtr, Size, FlagPair.second, CU_DEVICE_CPU));
-    }
-  }
-
-  std::array<ur_usm_advice_flags_t, 4> UnmappedMemAdviceFlags = {
-      UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY,
-      UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY,
-      UR_USM_ADVICE_FLAG_BIAS_CACHED, UR_USM_ADVICE_FLAG_BIAS_UNCACHED};
-
-  for (auto &UnmappedFlag : UnmappedMemAdviceFlags) {
-    if (URAdviceFlags & UnmappedFlag) {
-      throw UR_RESULT_ERROR_INVALID_ENUMERATION;
-    }
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-// Determine local work sizes that result in uniform work groups.
-// The default threadsPerBlock only require handling the first work_dim
-// dimension.
-void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock,
-                        const size_t *GlobalWorkSize, const uint32_t WorkDim,
-                        const size_t MaxThreadsPerBlock[3],
-                        ur_kernel_handle_t Kernel, uint32_t LocalSize) {
-  assert(ThreadsPerBlock != nullptr);
-  assert(GlobalWorkSize != nullptr);
-  assert(Kernel != nullptr);
-  int MinGrid, MaxBlockSize;
-  size_t MaxBlockDim[3];
-
-  // The below assumes a three dimensional range but this is not guaranteed by
-  // UR.
-  size_t GlobalSizeNormalized[3] = {1, 1, 1};
-  for (uint32_t i = 0; i < WorkDim; i++) {
-    GlobalSizeNormalized[i] = GlobalWorkSize[i];
-  }
-
-  MaxBlockDim[1] = Device->getMaxBlockDimY();
-  MaxBlockDim[2] = Device->getMaxBlockDimZ();
-
-  UR_CHECK_ERROR(
-      cuOccupancyMaxPotentialBlockSize(&MinGrid, &MaxBlockSize, Kernel->get(),
-                                       NULL, LocalSize, MaxThreadsPerBlock[0]));
-
-  ThreadsPerBlock[2] = std::min(GlobalSizeNormalized[2], MaxBlockDim[2]);
-  ThreadsPerBlock[1] =
-      std::min(GlobalSizeNormalized[1],
-               std::min(MaxBlockSize / ThreadsPerBlock[2], MaxBlockDim[1]));
-  MaxBlockDim[0] = MaxBlockSize / (ThreadsPerBlock[1] * ThreadsPerBlock[2]);
-  ThreadsPerBlock[0] = std::min(
-      MaxThreadsPerBlock[0], std::min(GlobalSizeNormalized[0], MaxBlockDim[0]));
-
-  static auto IsPowerOf2 = [](size_t Value) -> bool {
-    return Value && !(Value & (Value - 1));
-  };
-
-  // Find a local work group size that is a divisor of the global
-  // work group size to produce uniform work groups.
-  // Additionally, for best compute utilisation, the local size has
-  // to be a power of two.
-  while (0u != (GlobalSizeNormalized[0] % ThreadsPerBlock[0]) ||
-         !IsPowerOf2(ThreadsPerBlock[0])) {
-    --ThreadsPerBlock[0];
-  }
-}
-
-// Helper to verify out-of-registers case (exceeded block max registers).
-// If the kernel requires a number of registers for the entire thread
-// block exceeds the hardware limitations, then the cuLaunchKernel call
-// will fail to launch with CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES error.
-bool hasExceededMaxRegistersPerBlock(ur_device_handle_t Device,
-                                     ur_kernel_handle_t Kernel,
-                                     size_t BlockSize) {
-  return BlockSize * Kernel->getRegsPerThread() > Device->getMaxRegsPerBlock();
-}
-
-/// Enqueues a wait on the given CUstream for all specified events (See
-/// \ref enqueueEventWaitWithBarrier.) If the events list is empty, the enqueued
-/// wait will wait on all previous events in the queue.
-///
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
-    ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  // This function makes one stream work on the previous work (or work
-  // represented by input events) and then all future work waits on that stream.
-  try {
-    ScopedContext Active(hQueue->getContext());
-    uint32_t StreamToken;
-    ur_stream_guard_ Guard;
-    CUstream CuStream = hQueue->getNextComputeStream(
-        numEventsInWaitList, phEventWaitList, Guard, &StreamToken);
-    {
-      std::lock_guard<std::mutex> GuardBarrier(hQueue->BarrierMutex);
-      if (hQueue->BarrierEvent == nullptr) {
-        UR_CHECK_ERROR(
-            cuEventCreate(&hQueue->BarrierEvent, CU_EVENT_DISABLE_TIMING));
-      }
-      if (numEventsInWaitList == 0) { //  wait on all work
-        if (hQueue->BarrierTmpEvent == nullptr) {
-          UR_CHECK_ERROR(
-              cuEventCreate(&hQueue->BarrierTmpEvent, CU_EVENT_DISABLE_TIMING));
-        }
-        hQueue->syncStreams(
-            [CuStream, TmpEvent = hQueue->BarrierTmpEvent](CUstream s) {
-              if (CuStream != s) {
-                // record a new CUDA event on every stream and make one stream
-                // wait for these events
-                UR_CHECK_ERROR(cuEventRecord(TmpEvent, s));
-                UR_CHECK_ERROR(cuStreamWaitEvent(CuStream, TmpEvent, 0));
-              }
-            });
-      } else { // wait just on given events
-        forLatestEvents(phEventWaitList, numEventsInWaitList,
-                        [CuStream](ur_event_handle_t Event) -> ur_result_t {
-                          if (Event->getQueue()->hasBeenSynchronized(
-                                  Event->getComputeStreamToken())) {
-                            return UR_RESULT_SUCCESS;
-                          } else {
-                            UR_CHECK_ERROR(
-                                cuStreamWaitEvent(CuStream, Event->get(), 0));
-                            return UR_RESULT_SUCCESS;
-                          }
-                        });
-      }
-
-      UR_CHECK_ERROR(cuEventRecord(hQueue->BarrierEvent, CuStream));
-      for (unsigned int i = 0; i < hQueue->ComputeAppliedBarrier.size(); i++) {
-        hQueue->ComputeAppliedBarrier[i] = false;
-      }
-      for (unsigned int i = 0; i < hQueue->TransferAppliedBarrier.size(); i++) {
-        hQueue->TransferAppliedBarrier[i] = false;
-      }
-    }
-
-    if (phEvent) {
-      *phEvent = ur_event_handle_t_::makeNative(
-          UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, hQueue, CuStream, StreamToken);
-      UR_CHECK_ERROR((*phEvent)->start());
-      UR_CHECK_ERROR((*phEvent)->record());
-    }
-
-    return UR_RESULT_SUCCESS;
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-}
-
-/// Enqueues a wait on the given CUstream for all events.
-/// See \ref enqueueEventWait
-/// TODO: Add support for multiple streams once the Event class is properly
-/// refactored.
-///
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
-    ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList,
-                                        phEventWaitList, phEvent);
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
-    ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
-    const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
-    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  // Preconditions
-  UR_ASSERT(hQueue->getContext() == hKernel->getContext(),
-            UR_RESULT_ERROR_INVALID_KERNEL);
-  UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
-  UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
-
-  if (*pGlobalWorkSize == 0) {
-    return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList,
-                                          phEventWaitList, phEvent);
-  }
-
-  // Set the number of threads per block to the number of threads per warp
-  // by default unless user has provided a better number
-  size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
-  size_t MaxWorkGroupSize = 0u;
-  size_t MaxThreadsPerBlock[3] = {};
-  bool ProvidedLocalWorkGroupSize = (pLocalWorkSize != nullptr);
-  uint32_t LocalSize = hKernel->getLocalSize();
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  try {
-    // Set the active context here as guessLocalWorkSize needs an active context
-    ScopedContext Active(hQueue->getContext());
-    {
-      size_t *ReqdThreadsPerBlock = hKernel->ReqdThreadsPerBlock;
-      MaxWorkGroupSize = hQueue->Device->getMaxWorkGroupSize();
-      hQueue->Device->getMaxWorkItemSizes(sizeof(MaxThreadsPerBlock),
-                                          MaxThreadsPerBlock);
-
-      if (ProvidedLocalWorkGroupSize) {
-        auto IsValid = [&](int Dim) {
-          if (ReqdThreadsPerBlock[Dim] != 0 &&
-              pLocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim])
-            return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
-
-          if (pLocalWorkSize[Dim] > MaxThreadsPerBlock[Dim])
-            return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
-          // Checks that local work sizes are a divisor of the global work sizes
-          // which includes that the local work sizes are neither larger than
-          // the global work sizes and not 0.
-          if (0u == pLocalWorkSize[Dim])
-            return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
-          if (0u != (pGlobalWorkSize[Dim] % pLocalWorkSize[Dim]))
-            return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
-          ThreadsPerBlock[Dim] = pLocalWorkSize[Dim];
-          return UR_RESULT_SUCCESS;
-        };
-
-        size_t KernelLocalWorkGroupSize = 0;
-        for (size_t Dim = 0; Dim < workDim; Dim++) {
-          auto Err = IsValid(Dim);
-          if (Err != UR_RESULT_SUCCESS)
-            return Err;
-          // If no error then sum the total local work size per dim.
-          KernelLocalWorkGroupSize += pLocalWorkSize[Dim];
-        }
-
-        if (hasExceededMaxRegistersPerBlock(hQueue->Device, hKernel,
-                                            KernelLocalWorkGroupSize)) {
-          return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
-        }
-      } else {
-        guessLocalWorkSize(hQueue->Device, ThreadsPerBlock, pGlobalWorkSize,
-                           workDim, MaxThreadsPerBlock, hKernel, LocalSize);
-      }
-    }
-
-    if (MaxWorkGroupSize <
-        ThreadsPerBlock[0] * ThreadsPerBlock[1] * ThreadsPerBlock[2]) {
-      return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
-    }
-
-    size_t BlocksPerGrid[3] = {1u, 1u, 1u};
-
-    for (size_t i = 0; i < workDim; i++) {
-      BlocksPerGrid[i] =
-          (pGlobalWorkSize[i] + ThreadsPerBlock[i] - 1) / ThreadsPerBlock[i];
-    }
-
-    std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-
-    uint32_t StreamToken;
-    ur_stream_guard_ Guard;
-    CUstream CuStream = hQueue->getNextComputeStream(
-        numEventsInWaitList, phEventWaitList, Guard, &StreamToken);
-    CUfunction CuFunc = hKernel->get();
-
-    Result = enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                               phEventWaitList);
-
-    // Set the implicit global offset parameter if kernel has offset variant
-    if (hKernel->get_with_offset_parameter()) {
-      std::uint32_t CudaImplicitOffset[3] = {0, 0, 0};
-      if (pGlobalWorkOffset) {
-        for (size_t i = 0; i < workDim; i++) {
-          CudaImplicitOffset[i] =
-              static_cast<std::uint32_t>(pGlobalWorkOffset[i]);
-          if (pGlobalWorkOffset[i] != 0) {
-            CuFunc = hKernel->get_with_offset_parameter();
-          }
-        }
-      }
-      hKernel->setImplicitOffsetArg(sizeof(CudaImplicitOffset),
-                                    CudaImplicitOffset);
-    }
-
-    auto &ArgIndices = hKernel->getArgIndices();
-
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_KERNEL_LAUNCH, hQueue, CuStream, StreamToken));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    if (hQueue->getContext()->getDevice()->maxLocalMemSizeChosen()) {
-      // Set up local memory requirements for kernel.
-      auto Device = hQueue->getContext()->getDevice();
-      if (Device->getMaxChosenLocalMem() < 0) {
-        bool EnvVarHasURPrefix =
-            (std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr);
-        setErrorMessage(EnvVarHasURPrefix ? "Invalid value specified for "
-                                            "UR_CUDA_MAX_LOCAL_MEM_SIZE"
-                                          : "Invalid value specified for "
-                                            "SYCL_PI_CUDA_MAX_LOCAL_MEM_SIZE",
-                        UR_RESULT_ERROR_ADAPTER_SPECIFIC);
-        return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
-      }
-      if (LocalSize > static_cast<uint32_t>(Device->getMaxCapacityLocalMem())) {
-        setErrorMessage("Too much local memory allocated for device",
-                        UR_RESULT_ERROR_ADAPTER_SPECIFIC);
-        return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
-      }
-      if (LocalSize > static_cast<uint32_t>(Device->getMaxChosenLocalMem())) {
-        bool EnvVarHasURPrefix =
-            (std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr);
-        setErrorMessage(
-            EnvVarHasURPrefix
-                ? "Local memory for kernel exceeds the amount requested using "
-                  "UR_CUDA_MAX_LOCAL_MEM_SIZE. Try increasing the value of "
-                  "UR_CUDA_MAX_LOCAL_MEM_SIZE."
-                : "Local memory for kernel exceeds the amount requested using "
-                  "SYCL_PI_CUDA_MAX_LOCAL_MEM_SIZE. Try increasing the the "
-                  "value of SYCL_PI_CUDA_MAX_LOCAL_MEM_SIZE.",
-            UR_RESULT_ERROR_ADAPTER_SPECIFIC);
-        return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
-      }
-      UR_CHECK_ERROR(cuFuncSetAttribute(
-          CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
-          Device->getMaxChosenLocalMem()));
-    }
-
-    UR_CHECK_ERROR(cuLaunchKernel(
-        CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
-        ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize,
-        CuStream, const_cast<void **>(ArgIndices.data()), nullptr));
-    if (LocalSize != 0)
-      hKernel->clearLocalSize();
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-      *phEvent = RetImplEvent.release();
-    }
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-/// General 3D memory copy operation.
-/// This function requires the corresponding CUDA context to be at the top of
-/// the context stack
-/// If the source and/or destination is on the device, SrcPtr and/or DstPtr
-/// must be a pointer to a CUdeviceptr
-static ur_result_t commonEnqueueMemBufferCopyRect(
-    CUstream cu_stream, ur_rect_region_t region, const void *SrcPtr,
-    const CUmemorytype_enum SrcType, ur_rect_offset_t src_offset,
-    size_t src_row_pitch, size_t src_slice_pitch, void *DstPtr,
-    const CUmemorytype_enum DstType, ur_rect_offset_t dst_offset,
-    size_t dst_row_pitch, size_t dst_slice_pitch) {
-
-  UR_ASSERT(SrcType == CU_MEMORYTYPE_DEVICE || SrcType == CU_MEMORYTYPE_HOST,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  UR_ASSERT(DstType == CU_MEMORYTYPE_DEVICE || DstType == CU_MEMORYTYPE_HOST,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-
-  src_row_pitch =
-      (!src_row_pitch) ? region.width + src_offset.x : src_row_pitch;
-  src_slice_pitch = (!src_slice_pitch)
-                        ? ((region.height + src_offset.y) * src_row_pitch)
-                        : src_slice_pitch;
-  dst_row_pitch =
-      (!dst_row_pitch) ? region.width + dst_offset.x : dst_row_pitch;
-  dst_slice_pitch = (!dst_slice_pitch)
-                        ? ((region.height + dst_offset.y) * dst_row_pitch)
-                        : dst_slice_pitch;
-
-  CUDA_MEMCPY3D params = {};
-
-  params.WidthInBytes = region.width;
-  params.Height = region.height;
-  params.Depth = region.depth;
-
-  params.srcMemoryType = SrcType;
-  params.srcDevice = SrcType == CU_MEMORYTYPE_DEVICE
-                         ? *static_cast<const CUdeviceptr *>(SrcPtr)
-                         : 0;
-  params.srcHost = SrcType == CU_MEMORYTYPE_HOST ? SrcPtr : nullptr;
-  params.srcXInBytes = src_offset.x;
-  params.srcY = src_offset.y;
-  params.srcZ = src_offset.z;
-  params.srcPitch = src_row_pitch;
-  params.srcHeight = src_slice_pitch / src_row_pitch;
-
-  params.dstMemoryType = DstType;
-  params.dstDevice =
-      DstType == CU_MEMORYTYPE_DEVICE ? *static_cast<CUdeviceptr *>(DstPtr) : 0;
-  params.dstHost = DstType == CU_MEMORYTYPE_HOST ? DstPtr : nullptr;
-  params.dstXInBytes = dst_offset.x;
-  params.dstY = dst_offset.y;
-  params.dstZ = dst_offset.z;
-  params.dstPitch = dst_row_pitch;
-  params.dstHeight = dst_slice_pitch / dst_row_pitch;
-
-  UR_CHECK_ERROR(cuMemcpy3DAsync(&params, cu_stream));
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead,
-    ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin,
-    ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
-    size_t hostRowPitch, size_t hostSlicePitch, void *pDst,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  CUdeviceptr DevPtr = hBuffer->Mem.BufferMem.get();
-  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream CuStream = hQueue->getNextTransferStream();
-
-    Result = enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                               phEventWaitList);
-
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_BUFFER_READ_RECT, hQueue, CuStream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    Result = commonEnqueueMemBufferCopyRect(
-        CuStream, region, &DevPtr, CU_MEMORYTYPE_DEVICE, bufferOrigin,
-        bufferRowPitch, bufferSlicePitch, pDst, CU_MEMORYTYPE_HOST, hostOrigin,
-        hostRowPitch, hostSlicePitch);
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-    }
-
-    if (blockingRead) {
-      UR_CHECK_ERROR(cuStreamSynchronize(CuStream));
-    }
-
-    if (phEvent) {
-      *phEvent = RetImplEvent.release();
-    }
-
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite,
-    ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin,
-    ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
-    size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  CUdeviceptr DevPtr = hBuffer->Mem.BufferMem.get();
-  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-
-  try {
-    ScopedContext active(hQueue->getContext());
-    CUstream cuStream = hQueue->getNextTransferStream();
-    Result = enqueueEventsWait(hQueue, cuStream, numEventsInWaitList,
-                               phEventWaitList);
-
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_BUFFER_WRITE_RECT, hQueue, cuStream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    Result = commonEnqueueMemBufferCopyRect(
-        cuStream, region, pSrc, CU_MEMORYTYPE_HOST, hostOrigin, hostRowPitch,
-        hostSlicePitch, &DevPtr, CU_MEMORYTYPE_DEVICE, bufferOrigin,
-        bufferRowPitch, bufferSlicePitch);
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-    }
-
-    if (blockingWrite) {
-      UR_CHECK_ERROR(cuStreamSynchronize(cuStream));
-    }
-
-    if (phEvent) {
-      *phEvent = RetImplEvent.release();
-    }
-
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc,
-    ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  UR_ASSERT(size + dstOffset <= hBufferDst->Mem.BufferMem.getSize(),
-            UR_RESULT_ERROR_INVALID_SIZE);
-  UR_ASSERT(size + srcOffset <= hBufferSrc->Mem.BufferMem.getSize(),
-            UR_RESULT_ERROR_INVALID_SIZE);
-
-  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    ur_result_t Result = UR_RESULT_SUCCESS;
-
-    auto Stream = hQueue->getNextTransferStream();
-    Result =
-        enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList);
-
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_BUFFER_COPY, hQueue, Stream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    auto Src = hBufferSrc->Mem.BufferMem.get() + srcOffset;
-    auto Dst = hBufferDst->Mem.BufferMem.get() + dstOffset;
-
-    UR_CHECK_ERROR(cuMemcpyDtoDAsync(Dst, Src, size, Stream));
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-      *phEvent = RetImplEvent.release();
-    }
-
-    return Result;
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc,
-    ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin,
-    ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch,
-    size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  CUdeviceptr SrcPtr = hBufferSrc->Mem.BufferMem.get();
-  CUdeviceptr DstPtr = hBufferDst->Mem.BufferMem.get();
-  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream CuStream = hQueue->getNextTransferStream();
-    Result = enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                               phEventWaitList);
-
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_BUFFER_COPY_RECT, hQueue, CuStream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    Result = commonEnqueueMemBufferCopyRect(
-        CuStream, region, &SrcPtr, CU_MEMORYTYPE_DEVICE, srcOrigin, srcRowPitch,
-        srcSlicePitch, &DstPtr, CU_MEMORYTYPE_DEVICE, dstOrigin, dstRowPitch,
-        dstSlicePitch);
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-      *phEvent = RetImplEvent.release();
-    }
-
-  } catch (ur_result_t err) {
-    Result = err;
-  }
-  return Result;
-}
-
-// CUDA has no memset functions that allow setting values more than 4 bytes. UR
-// API lets you pass an arbitrary "pattern" to the buffer fill, which can be
-// more than 4 bytes. We must break up the pattern into 1 byte values, and set
-// the buffer using multiple strided calls.  The first 4 patterns are set using
-// cuMemsetD32Async then all subsequent 1 byte patterns are set using
-// cuMemset2DAsync which is called for each pattern.
-ur_result_t commonMemSetLargePattern(CUstream Stream, uint32_t PatternSize,
-                                     size_t Size, const void *pPattern,
-                                     CUdeviceptr Ptr) {
-  // Calculate the number of patterns, stride, number of times the pattern
-  // needs to be applied, and the number of times the first 32 bit pattern
-  // needs to be applied.
-  auto NumberOfSteps = PatternSize / sizeof(uint8_t);
-  auto Pitch = NumberOfSteps * sizeof(uint8_t);
-  auto Height = Size / NumberOfSteps;
-  auto Count32 = Size / sizeof(uint32_t);
-
-  // Get 4-byte chunk of the pattern and call cuMemsetD32Async
-  auto Value = *(static_cast<const uint32_t *>(pPattern));
-  UR_CHECK_ERROR(cuMemsetD32Async(Ptr, Value, Count32, Stream));
-  for (auto step = 4u; step < NumberOfSteps; ++step) {
-    // take 1 byte of the pattern
-    Value = *(static_cast<const uint8_t *>(pPattern) + step);
-
-    // offset the pointer to the part of the buffer we want to write to
-    auto OffsetPtr = Ptr + (step * sizeof(uint8_t));
-
-    // set all of the pattern chunks
-    UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch, Value, sizeof(uint8_t),
-                                     Height, Stream));
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern,
-    size_t patternSize, size_t offset, size_t size,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  UR_ASSERT(size + offset <= hBuffer->Mem.BufferMem.getSize(),
-            UR_RESULT_ERROR_INVALID_SIZE);
-
-  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-
-    auto Stream = hQueue->getNextTransferStream();
-    ur_result_t Result =
-        enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList);
-
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_BUFFER_FILL, hQueue, Stream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    auto DstDevice = hBuffer->Mem.BufferMem.get() + offset;
-    auto N = size / patternSize;
-
-    // pattern size in bytes
-    switch (patternSize) {
-    case 1: {
-      auto Value = *static_cast<const uint8_t *>(pPattern);
-      UR_CHECK_ERROR(cuMemsetD8Async(DstDevice, Value, N, Stream));
-      break;
-    }
-    case 2: {
-      auto Value = *static_cast<const uint16_t *>(pPattern);
-      UR_CHECK_ERROR(cuMemsetD16Async(DstDevice, Value, N, Stream));
-      break;
-    }
-    case 4: {
-      auto Value = *static_cast<const uint32_t *>(pPattern);
-      UR_CHECK_ERROR(cuMemsetD32Async(DstDevice, Value, N, Stream));
-      break;
-    }
-    default: {
-      Result = commonMemSetLargePattern(Stream, patternSize, size, pPattern,
-                                        DstDevice);
-      break;
-    }
-    }
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-      *phEvent = RetImplEvent.release();
-    }
-
-    return Result;
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-}
-
-static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
-  switch (ArrayDesc.Format) {
-  case CU_AD_FORMAT_UNSIGNED_INT8:
-  case CU_AD_FORMAT_SIGNED_INT8:
-    return 1;
-  case CU_AD_FORMAT_UNSIGNED_INT16:
-  case CU_AD_FORMAT_SIGNED_INT16:
-  case CU_AD_FORMAT_HALF:
-    return 2;
-  case CU_AD_FORMAT_UNSIGNED_INT32:
-  case CU_AD_FORMAT_SIGNED_INT32:
-  case CU_AD_FORMAT_FLOAT:
-    return 4;
-  default:
-    detail::ur::die("Invalid image format.");
-    return 0;
-  }
-}
-
-/// General ND memory copy operation for images (where N > 1).
-/// This function requires the corresponding CUDA context to be at the top of
-/// the context stack
-/// If the source and/or destination is an array, SrcPtr and/or DstPtr
-/// must be a pointer to a CUarray
-static ur_result_t commonEnqueueMemImageNDCopy(
-    CUstream CuStream, ur_mem_type_t ImgType, const ur_rect_region_t Region,
-    const void *SrcPtr, const CUmemorytype_enum SrcType,
-    const ur_rect_offset_t SrcOffset, void *DstPtr,
-    const CUmemorytype_enum DstType, const ur_rect_offset_t DstOffset) {
-  UR_ASSERT(SrcType == CU_MEMORYTYPE_ARRAY || SrcType == CU_MEMORYTYPE_HOST,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  UR_ASSERT(DstType == CU_MEMORYTYPE_ARRAY || DstType == CU_MEMORYTYPE_HOST,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-
-  if (ImgType == UR_MEM_TYPE_IMAGE2D) {
-    CUDA_MEMCPY2D CpyDesc;
-    memset(&CpyDesc, 0, sizeof(CpyDesc));
-    CpyDesc.srcMemoryType = SrcType;
-    if (SrcType == CU_MEMORYTYPE_ARRAY) {
-      CpyDesc.srcArray = *static_cast<const CUarray *>(SrcPtr);
-      CpyDesc.srcXInBytes = SrcOffset.x;
-      CpyDesc.srcY = SrcOffset.y;
-    } else {
-      CpyDesc.srcHost = SrcPtr;
-    }
-    CpyDesc.dstMemoryType = DstType;
-    if (DstType == CU_MEMORYTYPE_ARRAY) {
-      CpyDesc.dstArray = *static_cast<CUarray *>(DstPtr);
-      CpyDesc.dstXInBytes = DstOffset.x;
-      CpyDesc.dstY = DstOffset.y;
-    } else {
-      CpyDesc.dstHost = DstPtr;
-    }
-    CpyDesc.WidthInBytes = Region.width;
-    CpyDesc.Height = Region.height;
-    UR_CHECK_ERROR(cuMemcpy2DAsync(&CpyDesc, CuStream));
-    return UR_RESULT_SUCCESS;
-  }
-  if (ImgType == UR_MEM_TYPE_IMAGE3D) {
-    CUDA_MEMCPY3D CpyDesc;
-    memset(&CpyDesc, 0, sizeof(CpyDesc));
-    CpyDesc.srcMemoryType = SrcType;
-    if (SrcType == CU_MEMORYTYPE_ARRAY) {
-      CpyDesc.srcArray = *static_cast<const CUarray *>(SrcPtr);
-      CpyDesc.srcXInBytes = SrcOffset.x;
-      CpyDesc.srcY = SrcOffset.y;
-      CpyDesc.srcZ = SrcOffset.z;
-    } else {
-      CpyDesc.srcHost = SrcPtr;
-    }
-    CpyDesc.dstMemoryType = DstType;
-    if (DstType == CU_MEMORYTYPE_ARRAY) {
-      CpyDesc.dstArray = *static_cast<CUarray *>(DstPtr);
-      CpyDesc.dstXInBytes = DstOffset.x;
-      CpyDesc.dstY = DstOffset.y;
-      CpyDesc.dstZ = DstOffset.z;
-    } else {
-      CpyDesc.dstHost = DstPtr;
-    }
-    CpyDesc.WidthInBytes = Region.width;
-    CpyDesc.Height = Region.height;
-    CpyDesc.Depth = Region.depth;
-    UR_CHECK_ERROR(cuMemcpy3DAsync(&CpyDesc, CuStream));
-    return UR_RESULT_SUCCESS;
-  }
-  return UR_RESULT_ERROR_INVALID_VALUE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead,
-    ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch,
-    size_t slicePitch, void *pDst, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  std::ignore = rowPitch;
-  std::ignore = slicePitch;
-
-  UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream CuStream = hQueue->getNextTransferStream();
-    Result = enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                               phEventWaitList);
-
-    CUarray Array = hImage->Mem.SurfaceMem.getArray();
-
-    CUDA_ARRAY_DESCRIPTOR ArrayDesc;
-    UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));
-
-    int ElementByteSize = imageElementByteSize(ArrayDesc);
-
-    size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
-    size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
-
-    ur_mem_type_t ImgType = hImage->Mem.SurfaceMem.getImageType();
-
-    std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_IMAGE_READ, hQueue, CuStream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-    if (ImgType == UR_MEM_TYPE_IMAGE1D) {
-      UR_CHECK_ERROR(
-          cuMemcpyAtoHAsync(pDst, Array, ByteOffsetX, BytesToCopy, CuStream));
-    } else {
-      ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height,
-                                         region.depth};
-      ur_rect_offset_t SrcOffset = {ByteOffsetX, origin.y, origin.z};
-
-      Result = commonEnqueueMemImageNDCopy(
-          CuStream, ImgType, AdjustedRegion, &Array, CU_MEMORYTYPE_ARRAY,
-          SrcOffset, pDst, CU_MEMORYTYPE_HOST, ur_rect_offset_t{});
-      if (Result != UR_RESULT_SUCCESS) {
-        return Result;
-      }
-    }
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-      *phEvent = RetImplEvent.release();
-    }
-
-    if (blockingRead) {
-      UR_CHECK_ERROR(cuStreamSynchronize(CuStream));
-    }
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite,
-    ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch,
-    size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  std::ignore = blockingWrite;
-  std::ignore = rowPitch;
-  std::ignore = slicePitch;
-
-  UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream CuStream = hQueue->getNextTransferStream();
-    Result = enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                               phEventWaitList);
-
-    CUarray Array = hImage->Mem.SurfaceMem.getArray();
-
-    CUDA_ARRAY_DESCRIPTOR ArrayDesc;
-    UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));
-
-    int ElementByteSize = imageElementByteSize(ArrayDesc);
-
-    size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
-    size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
-
-    std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_IMAGE_WRITE, hQueue, CuStream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    ur_mem_type_t ImgType = hImage->Mem.SurfaceMem.getImageType();
-    if (ImgType == UR_MEM_TYPE_IMAGE1D) {
-      UR_CHECK_ERROR(
-          cuMemcpyHtoAAsync(Array, ByteOffsetX, pSrc, BytesToCopy, CuStream));
-    } else {
-      ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height,
-                                         region.depth};
-      ur_rect_offset_t DstOffset = {ByteOffsetX, origin.y, origin.z};
-
-      Result = commonEnqueueMemImageNDCopy(
-          CuStream, ImgType, AdjustedRegion, pSrc, CU_MEMORYTYPE_HOST,
-          ur_rect_offset_t{}, &Array, CU_MEMORYTYPE_ARRAY, DstOffset);
-
-      if (Result != UR_RESULT_SUCCESS) {
-        return Result;
-      }
-    }
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-      *phEvent = RetImplEvent.release();
-    }
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc,
-    ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin,
-    ur_rect_offset_t dstOrigin, ur_rect_region_t region,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  UR_ASSERT(hImageSrc->MemType == ur_mem_handle_t_::Type::Surface,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  UR_ASSERT(hImageDst->MemType == ur_mem_handle_t_::Type::Surface,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  UR_ASSERT(hImageSrc->Mem.SurfaceMem.getImageType() ==
-                hImageDst->Mem.SurfaceMem.getImageType(),
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream CuStream = hQueue->getNextTransferStream();
-    Result = enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                               phEventWaitList);
-
-    CUarray SrcArray = hImageSrc->Mem.SurfaceMem.getArray();
-    CUarray DstArray = hImageDst->Mem.SurfaceMem.getArray();
-
-    CUDA_ARRAY_DESCRIPTOR SrcArrayDesc;
-    UR_CHECK_ERROR(cuArrayGetDescriptor(&SrcArrayDesc, SrcArray));
-    CUDA_ARRAY_DESCRIPTOR DstArrayDesc;
-    UR_CHECK_ERROR(cuArrayGetDescriptor(&DstArrayDesc, DstArray));
-
-    UR_ASSERT(SrcArrayDesc.Format == DstArrayDesc.Format,
-              UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-    UR_ASSERT(SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels,
-              UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-
-    int ElementByteSize = imageElementByteSize(SrcArrayDesc);
-
-    size_t DstByteOffsetX =
-        dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels;
-    size_t SrcByteOffsetX =
-        srcOrigin.x * ElementByteSize * DstArrayDesc.NumChannels;
-    size_t BytesToCopy =
-        ElementByteSize * SrcArrayDesc.NumChannels * region.width;
-
-    std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_IMAGE_COPY, hQueue, CuStream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    ur_mem_type_t ImgType = hImageSrc->Mem.SurfaceMem.getImageType();
-    if (ImgType == UR_MEM_TYPE_IMAGE1D) {
-      UR_CHECK_ERROR(cuMemcpyAtoA(DstArray, DstByteOffsetX, SrcArray,
-                                  SrcByteOffsetX, BytesToCopy));
-    } else {
-      ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height,
-                                         region.depth};
-      ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z};
-      ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y, dstOrigin.z};
-
-      Result = commonEnqueueMemImageNDCopy(
-          CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY,
-          SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset);
-      if (Result != UR_RESULT_SUCCESS) {
-        return Result;
-      }
-    }
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-      *phEvent = RetImplEvent.release();
-    }
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  return Result;
-}
-
-/// Implements mapping on the host using a BufferRead operation.
-/// Mapped pointers are stored in the pi_mem object.
-/// If the buffer uses pinned host memory a pointer to that memory is returned
-/// and no read operation is done.
-///
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap,
-    ur_map_flags_t mapFlags, size_t offset, size_t size,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent, void **ppRetMap) {
-  UR_ASSERT(hBuffer->MemType == ur_mem_handle_t_::Type::Buffer,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  UR_ASSERT(offset + size <= hBuffer->Mem.BufferMem.getSize(),
-            UR_RESULT_ERROR_INVALID_SIZE);
-
-  ur_result_t Result = UR_RESULT_ERROR_INVALID_MEM_OBJECT;
-  const bool IsPinned =
-      hBuffer->Mem.BufferMem.MemAllocMode ==
-      ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr;
-
-  // Currently no support for overlapping regions
-  if (hBuffer->Mem.BufferMem.getMapPtr() != nullptr) {
-    return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-  }
-
-  // Allocate a pointer in the host to store the mapped information
-  auto HostPtr = hBuffer->Mem.BufferMem.mapToPtr(size, offset, mapFlags);
-  *ppRetMap = hBuffer->Mem.BufferMem.getMapPtr();
-  if (HostPtr) {
-    Result = UR_RESULT_SUCCESS;
-  }
-
-  if (!IsPinned &&
-      ((mapFlags & UR_MAP_FLAG_READ) || (mapFlags & UR_MAP_FLAG_WRITE))) {
-    // Pinned host memory is already on host so it doesn't need to be read.
-    Result = urEnqueueMemBufferRead(hQueue, hBuffer, blockingMap, offset, size,
-                                    HostPtr, numEventsInWaitList,
-                                    phEventWaitList, phEvent);
-  } else {
-    ScopedContext Active(hQueue->getContext());
-
-    if (IsPinned) {
-      Result = urEnqueueEventsWait(hQueue, numEventsInWaitList, phEventWaitList,
-                                   nullptr);
-    }
-
-    if (phEvent) {
-      try {
-        *phEvent = ur_event_handle_t_::makeNative(
-            UR_COMMAND_MEM_BUFFER_MAP, hQueue, hQueue->getNextTransferStream());
-        UR_CHECK_ERROR((*phEvent)->start());
-        UR_CHECK_ERROR((*phEvent)->record());
-      } catch (ur_result_t Err) {
-        Result = Err;
-      }
-    }
-  }
-
-  return Result;
-}
-
-/// Implements the unmap from the host, using a BufferWrite operation.
-/// Requires the mapped pointer to be already registered in the given memobj.
-/// If memobj uses pinned host memory, this will not do a write.
-///
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  UR_ASSERT(hMem->MemType == ur_mem_handle_t_::Type::Buffer,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  UR_ASSERT(hMem->Mem.BufferMem.getMapPtr() != nullptr,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  UR_ASSERT(hMem->Mem.BufferMem.getMapPtr() == pMappedPtr,
-            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-
-  const bool IsPinned =
-      hMem->Mem.BufferMem.MemAllocMode ==
-      ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr;
-
-  if (!IsPinned && (hMem->Mem.BufferMem.getMapFlags() & UR_MAP_FLAG_WRITE)) {
-    // Pinned host memory is only on host so it doesn't need to be written to.
-    Result = urEnqueueMemBufferWrite(
-        hQueue, hMem, true, hMem->Mem.BufferMem.getMapOffset(),
-        hMem->Mem.BufferMem.getMapSize(), pMappedPtr, numEventsInWaitList,
-        phEventWaitList, phEvent);
-  } else {
-    ScopedContext Active(hQueue->getContext());
-
-    if (IsPinned) {
-      Result = urEnqueueEventsWait(hQueue, numEventsInWaitList, phEventWaitList,
-                                   nullptr);
-    }
-
-    if (phEvent) {
-      try {
-        *phEvent = ur_event_handle_t_::makeNative(
-            UR_COMMAND_MEM_UNMAP, hQueue, hQueue->getNextTransferStream());
-        UR_CHECK_ERROR((*phEvent)->start());
-        UR_CHECK_ERROR((*phEvent)->record());
-      } catch (ur_result_t Err) {
-        Result = Err;
-      }
-    }
-  }
-
-  hMem->Mem.BufferMem.unmap(pMappedPtr);
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
-    ur_queue_handle_t hQueue, void *ptr, size_t patternSize,
-    const void *pPattern, size_t size, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    uint32_t StreamToken;
-    ur_stream_guard_ Guard;
-    CUstream CuStream = hQueue->getNextComputeStream(
-        numEventsInWaitList, phEventWaitList, Guard, &StreamToken);
-    UR_CHECK_ERROR(enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                                     phEventWaitList));
-    if (phEvent) {
-      EventPtr =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_USM_FILL, hQueue, CuStream, StreamToken));
-      UR_CHECK_ERROR(EventPtr->start());
-    }
-
-    auto N = size / patternSize;
-    switch (patternSize) {
-    case 1:
-      UR_CHECK_ERROR(cuMemsetD8Async(
-          (CUdeviceptr)ptr, *((const uint8_t *)pPattern) & 0xFF, N, CuStream));
-      break;
-    case 2:
-      UR_CHECK_ERROR(cuMemsetD16Async((CUdeviceptr)ptr,
-                                      *((const uint16_t *)pPattern) & 0xFFFF, N,
-                                      CuStream));
-      break;
-    case 4:
-      UR_CHECK_ERROR(cuMemsetD32Async(
-          (CUdeviceptr)ptr, *((const uint32_t *)pPattern) & 0xFFFFFFFF, N,
-          CuStream));
-      break;
-    default:
-      commonMemSetLargePattern(CuStream, patternSize, size, pPattern,
-                               (CUdeviceptr)ptr);
-      break;
-    }
-    if (phEvent) {
-      UR_CHECK_ERROR(EventPtr->record());
-      *phEvent = EventPtr.release();
-    }
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
-    ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc,
-    size_t size, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream CuStream = hQueue->getNextTransferStream();
-    Result = enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                               phEventWaitList);
-    if (phEvent) {
-      EventPtr =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_USM_MEMCPY, hQueue, CuStream));
-      UR_CHECK_ERROR(EventPtr->start());
-    }
-    UR_CHECK_ERROR(
-        cuMemcpyAsync((CUdeviceptr)pDst, (CUdeviceptr)pSrc, size, CuStream));
-    if (phEvent) {
-      UR_CHECK_ERROR(EventPtr->record());
-    }
-    if (blocking) {
-      UR_CHECK_ERROR(cuStreamSynchronize(CuStream));
-    }
-    if (phEvent) {
-      *phEvent = EventPtr.release();
-    }
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
-    ur_queue_handle_t hQueue, const void *pMem, size_t size,
-    ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  unsigned int PointerRangeSize = 0;
-  UR_CHECK_ERROR(cuPointerGetAttribute(
-      &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem));
-  UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE);
-  ur_device_handle_t Device = hQueue->getContext()->getDevice();
-
-  // Certain cuda devices and Windows do not have support for some Unified
-  // Memory features. cuMemPrefetchAsync requires concurrent memory access
-  // for managed memory. Therfore, ignore prefetch hint if concurrent managed
-  // memory access is not available.
-  if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
-    setErrorMessage("Prefetch hint ignored as device does not support "
-                    "concurrent managed access",
-                    UR_RESULT_SUCCESS);
-    return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
-  }
-
-  unsigned int IsManaged;
-  UR_CHECK_ERROR(cuPointerGetAttribute(
-      &IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem));
-  if (!IsManaged) {
-    setErrorMessage("Prefetch hint ignored as prefetch only works with USM",
-                    UR_RESULT_SUCCESS);
-    return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
-  }
-
-  // flags is currently unused so fail if set
-  if (flags != 0)
-    return UR_RESULT_ERROR_INVALID_VALUE;
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream CuStream = hQueue->getNextTransferStream();
-    Result = enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                               phEventWaitList);
-    if (phEvent) {
-      EventPtr =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_BUFFER_COPY, hQueue, CuStream));
-      UR_CHECK_ERROR(EventPtr->start());
-    }
-    UR_CHECK_ERROR(
-        cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream));
-    if (phEvent) {
-      UR_CHECK_ERROR(EventPtr->record());
-      *phEvent = EventPtr.release();
-    }
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-/// USM: memadvise API to govern behavior of automatic migration mechanisms
-UR_APIEXPORT ur_result_t UR_APICALL
-urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
-                   ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) {
-  unsigned int PointerRangeSize = 0;
-  UR_CHECK_ERROR(cuPointerGetAttribute(
-      &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem));
-  UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE);
-
-  // Certain cuda devices and Windows do not have support for some Unified
-  // Memory features. Passing CU_MEM_ADVISE_SET/CLEAR_PREFERRED_LOCATION and
-  // to cuMemAdvise on a GPU device requires the GPU device to report a non-zero
-  // value for CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. Therfore, ignore
-  // memory advise if concurrent managed memory access is not available.
-  if ((advice & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION) ||
-      (advice & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION) ||
-      (advice & UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE) ||
-      (advice & UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE) ||
-      (advice & UR_USM_ADVICE_FLAG_DEFAULT)) {
-    ur_device_handle_t Device = hQueue->getContext()->getDevice();
-    if (!getAttribute(Device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
-      setErrorMessage("Mem advise ignored as device does not support "
-                      "concurrent managed access",
-                      UR_RESULT_SUCCESS);
-      return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
-    }
-
-    // TODO: If ptr points to valid system-allocated pageable memory we should
-    // check that the device also has the
-    // CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS property.
-  }
-
-  unsigned int IsManaged;
-  UR_CHECK_ERROR(cuPointerGetAttribute(
-      &IsManaged, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem));
-  if (!IsManaged) {
-    setErrorMessage(
-        "Memory advice ignored as memory advices only works with USM",
-        UR_RESULT_SUCCESS);
-    return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
-  }
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-
-    if (phEvent) {
-      EventPtr =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_USM_ADVISE, hQueue, hQueue->getNextTransferStream()));
-      UR_CHECK_ERROR(EventPtr->start());
-    }
-
-    if (advice & UR_USM_ADVICE_FLAG_DEFAULT) {
-      UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
-                                 CU_MEM_ADVISE_UNSET_READ_MOSTLY,
-                                 hQueue->getContext()->getDevice()->get()));
-      UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
-                                 CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION,
-                                 hQueue->getContext()->getDevice()->get()));
-      UR_CHECK_ERROR(cuMemAdvise((CUdeviceptr)pMem, size,
-                                 CU_MEM_ADVISE_UNSET_ACCESSED_BY,
-                                 hQueue->getContext()->getDevice()->get()));
-    } else {
-      Result = setCuMemAdvise((CUdeviceptr)pMem, size, advice,
-                              hQueue->getContext()->getDevice()->get());
-    }
-
-    if (phEvent) {
-      UR_CHECK_ERROR(EventPtr->record());
-      *phEvent = EventPtr.release();
-    }
-  } catch (ur_result_t err) {
-    Result = err;
-  } catch (...) {
-    Result = UR_RESULT_ERROR_UNKNOWN;
-  }
-  return Result;
-}
-
-// TODO: Implement this. Remember to return true for
-//       PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT when it is implemented.
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D(
-    ur_queue_handle_t, void *, size_t, size_t, const void *, size_t, size_t,
-    uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D(
-    ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch,
-    const void *pSrc, size_t srcPitch, size_t width, size_t height,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  ur_result_t result = UR_RESULT_SUCCESS;
-
-  try {
-    ScopedContext active(hQueue->getContext());
-    CUstream cuStream = hQueue->getNextTransferStream();
-    result = enqueueEventsWait(hQueue, cuStream, numEventsInWaitList,
-                               phEventWaitList);
-
-    std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_BUFFER_COPY_RECT, hQueue, cuStream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    // Determine the direction of copy using cuPointerGetAttribute
-    // for both the SrcPtr and DstPtr
-    CUDA_MEMCPY2D CpyDesc = {};
-    memset(&CpyDesc, 0, sizeof(CpyDesc));
-
-    getUSMHostOrDevicePtr(pSrc, &CpyDesc.srcMemoryType, &CpyDesc.srcDevice,
-                          &CpyDesc.srcHost);
-    getUSMHostOrDevicePtr(pDst, &CpyDesc.dstMemoryType, &CpyDesc.dstDevice,
-                          &CpyDesc.dstHost);
-
-    CpyDesc.dstPitch = dstPitch;
-    CpyDesc.srcPitch = srcPitch;
-    CpyDesc.WidthInBytes = width;
-    CpyDesc.Height = height;
-
-    UR_CHECK_ERROR(cuMemcpy2DAsync(&CpyDesc, cuStream));
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-      *phEvent = RetImplEvent.release();
-    }
-    if (blocking) {
-      UR_CHECK_ERROR(cuStreamSynchronize(cuStream));
-    }
-  } catch (ur_result_t err) {
-    result = err;
-  }
-  return result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead,
-    size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  UR_ASSERT(!hBuffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  UR_ASSERT(offset + size <= hBuffer->Mem.BufferMem.Size,
-            UR_RESULT_ERROR_INVALID_SIZE);
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  CUdeviceptr DevPtr = hBuffer->Mem.BufferMem.get();
-  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream CuStream = hQueue->getNextTransferStream();
-
-    Result = enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                               phEventWaitList);
-
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_BUFFER_READ, hQueue, CuStream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    UR_CHECK_ERROR(cuMemcpyDtoHAsync(pDst, DevPtr + offset, size, CuStream));
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-    }
-
-    if (blockingRead) {
-      UR_CHECK_ERROR(cuStreamSynchronize(CuStream));
-    }
-
-    if (phEvent) {
-      *phEvent = RetImplEvent.release();
-    }
-
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite(
-    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite,
-    size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  UR_ASSERT(!hBuffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  UR_ASSERT(offset + size <= hBuffer->Mem.BufferMem.Size,
-            UR_RESULT_ERROR_INVALID_SIZE);
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  CUdeviceptr DevPtr = hBuffer->Mem.BufferMem.get();
-  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream CuStream = hQueue->getNextTransferStream();
-
-    Result = enqueueEventsWait(hQueue, CuStream, numEventsInWaitList,
-                               phEventWaitList);
-
-    if (phEvent) {
-      RetImplEvent =
-          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
-              UR_COMMAND_MEM_BUFFER_WRITE, hQueue, CuStream));
-      UR_CHECK_ERROR(RetImplEvent->start());
-    }
-
-    UR_CHECK_ERROR(cuMemcpyHtoDAsync(DevPtr + offset, pSrc, size, CuStream));
-
-    if (phEvent) {
-      UR_CHECK_ERROR(RetImplEvent->record());
-    }
-
-    if (blockingWrite) {
-      UR_CHECK_ERROR(cuStreamSynchronize(CuStream));
-    }
-
-    if (phEvent) {
-      *phEvent = RetImplEvent.release();
-    }
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite(
-    ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name,
-    bool blockingWrite, size_t count, size_t offset, const void *pSrc,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  // Since CUDA requires a the global variable to be referenced by name, we use
-  // metadata to find the correct name to access it by.
-  auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name);
-  if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end())
-    return UR_RESULT_ERROR_INVALID_VALUE;
-  std::string DeviceGlobalName = DeviceGlobalNameIt->second;
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  try {
-    CUdeviceptr DeviceGlobal = 0;
-    size_t DeviceGlobalSize = 0;
-    UR_CHECK_ERROR(cuModuleGetGlobal(&DeviceGlobal, &DeviceGlobalSize,
-                                     hProgram->get(),
-                                     DeviceGlobalName.c_str()));
-
-    if (offset + count > DeviceGlobalSize)
-      return UR_RESULT_ERROR_INVALID_VALUE;
-
-    return urEnqueueUSMMemcpy(
-        hQueue, blockingWrite, reinterpret_cast<void *>(DeviceGlobal + offset),
-        pSrc, count, numEventsInWaitList, phEventWaitList, phEvent);
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead(
-    ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name,
-    bool blockingRead, size_t count, size_t offset, void *pDst,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  // Since CUDA requires a the global variable to be referenced by name, we use
-  // metadata to find the correct name to access it by.
-  auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name);
-  if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end())
-    return UR_RESULT_ERROR_INVALID_VALUE;
-  std::string DeviceGlobalName = DeviceGlobalNameIt->second;
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  try {
-    CUdeviceptr DeviceGlobal = 0;
-    size_t DeviceGlobalSize = 0;
-    UR_CHECK_ERROR(cuModuleGetGlobal(&DeviceGlobal, &DeviceGlobalSize,
-                                     hProgram->get(),
-                                     DeviceGlobalName.c_str()));
-
-    if (offset + count > DeviceGlobalSize)
-      return UR_RESULT_ERROR_INVALID_VALUE;
-
-    return urEnqueueUSMMemcpy(
-        hQueue, blockingRead, pDst,
-        reinterpret_cast<const void *>(DeviceGlobal + offset), count,
-        numEventsInWaitList, phEventWaitList, phEvent);
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-/// Host Pipes
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe(
-    ur_queue_handle_t hQueue, ur_program_handle_t hProgram,
-    const char *pipe_symbol, bool blocking, void *pDst, size_t size,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  (void)hQueue;
-  (void)hProgram;
-  (void)pipe_symbol;
-  (void)blocking;
-  (void)pDst;
-  (void)size;
-  (void)numEventsInWaitList;
-  (void)phEventWaitList;
-  (void)phEvent;
-
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe(
-    ur_queue_handle_t hQueue, ur_program_handle_t hProgram,
-    const char *pipe_symbol, bool blocking, void *pSrc, size_t size,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-  (void)hQueue;
-  (void)hProgram;
-  (void)pipe_symbol;
-  (void)blocking;
-  (void)pSrc;
-  (void)size;
-  (void)numEventsInWaitList;
-  (void)phEventWaitList;
-  (void)phEvent;
-
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.hpp
deleted file mode 100644
index d49853b38dccb..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/enqueue.hpp
+++ /dev/null
@@ -1,16 +0,0 @@
-//===--------- enqueue.hpp - CUDA Adapter ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <cassert>
-#include <cuda.h>
-#include <ur_api.h>
-
-ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue, CUstream Stream,
-                              uint32_t NumEventsInWaitList,
-                              const ur_event_handle_t *EventWaitList);
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/event.cpp
deleted file mode 100644
index 18d861c4e9ee5..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/event.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-//===--------- event.cpp - CUDA Adapter -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "event.hpp"
-#include "common.hpp"
-#include "context.hpp"
-#include "device.hpp"
-#include "queue.hpp"
-
-#include <cassert>
-#include <cuda.h>
-
-ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type,
-                                       ur_context_handle_t Context,
-                                       ur_queue_handle_t Queue, CUstream Stream,
-                                       uint32_t StreamToken)
-    : CommandType{Type}, RefCount{1}, HasOwnership{true},
-      HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
-      StreamToken{StreamToken}, EvEnd{nullptr}, EvStart{nullptr},
-      EvQueued{nullptr}, Queue{Queue}, Stream{Stream}, Context{Context} {
-
-  bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE;
-
-  UR_CHECK_ERROR(cuEventCreate(
-      &EvEnd, ProfilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING));
-
-  if (ProfilingEnabled) {
-    UR_CHECK_ERROR(cuEventCreate(&EvQueued, CU_EVENT_DEFAULT));
-    UR_CHECK_ERROR(cuEventCreate(&EvStart, CU_EVENT_DEFAULT));
-  }
-
-  if (Queue != nullptr) {
-    urQueueRetain(Queue);
-  }
-  urContextRetain(Context);
-}
-
-ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context,
-                                       CUevent EventNative)
-    : CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false},
-      HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
-      StreamToken{std::numeric_limits<uint32_t>::max()}, EvEnd{EventNative},
-      EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, Context{Context} {
-  urContextRetain(Context);
-}
-
-ur_event_handle_t_::~ur_event_handle_t_() {
-  if (Queue != nullptr) {
-    urQueueRelease(Queue);
-  }
-  urContextRelease(Context);
-}
-
-ur_result_t ur_event_handle_t_::start() {
-  assert(!isStarted());
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  try {
-    if (Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE) {
-      // NOTE: This relies on the default stream to be unused.
-      UR_CHECK_ERROR(cuEventRecord(EvQueued, 0));
-      UR_CHECK_ERROR(cuEventRecord(EvStart, Stream));
-    }
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-
-  IsStarted = true;
-  return Result;
-}
-
-bool ur_event_handle_t_::isCompleted() const noexcept {
-  if (!IsRecorded) {
-    return false;
-  }
-  if (!HasBeenWaitedOn) {
-    const CUresult Result = cuEventQuery(EvEnd);
-    if (Result != CUDA_SUCCESS && Result != CUDA_ERROR_NOT_READY) {
-      UR_CHECK_ERROR(Result);
-      return false;
-    }
-    if (Result == CUDA_ERROR_NOT_READY) {
-      return false;
-    }
-  }
-  return true;
-}
-
-uint64_t ur_event_handle_t_::getQueuedTime() const {
-  assert(isStarted());
-  return Queue->get_device()->getElapsedTime(EvQueued);
-}
-
-uint64_t ur_event_handle_t_::getStartTime() const {
-  assert(isStarted());
-  return Queue->get_device()->getElapsedTime(EvStart);
-}
-
-uint64_t ur_event_handle_t_::getEndTime() const {
-  assert(isStarted() && isRecorded());
-  return Queue->get_device()->getElapsedTime(EvEnd);
-}
-
-ur_result_t ur_event_handle_t_::record() {
-
-  if (isRecorded() || !isStarted()) {
-    return UR_RESULT_ERROR_INVALID_EVENT;
-  }
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  UR_ASSERT(Queue, UR_RESULT_ERROR_INVALID_QUEUE);
-
-  try {
-    EventID = Queue->getNextEventID();
-    if (EventID == 0) {
-      detail::ur::die(
-          "Unrecoverable program state reached in event identifier overflow");
-    }
-    UR_CHECK_ERROR(cuEventRecord(EvEnd, Stream));
-  } catch (ur_result_t error) {
-    Result = error;
-  }
-
-  if (Result == UR_RESULT_SUCCESS) {
-    IsRecorded = true;
-  }
-
-  return Result;
-}
-
-ur_result_t ur_event_handle_t_::wait() {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  try {
-    UR_CHECK_ERROR(cuEventSynchronize(EvEnd));
-    HasBeenWaitedOn = true;
-  } catch (ur_result_t error) {
-    Result = error;
-  }
-
-  return Result;
-}
-
-ur_result_t ur_event_handle_t_::release() {
-  if (!backendHasOwnership())
-    return UR_RESULT_SUCCESS;
-
-  assert(Queue != nullptr);
-
-  UR_CHECK_ERROR(cuEventDestroy(EvEnd));
-
-  if (Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE) {
-    UR_CHECK_ERROR(cuEventDestroy(EvQueued));
-    UR_CHECK_ERROR(cuEventDestroy(EvStart));
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent,
-                                                   ur_event_info_t propName,
-                                                   size_t propValueSize,
-                                                   void *pPropValue,
-                                                   size_t *pPropValueSizeRet) {
-  UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
-
-  switch (propName) {
-  case UR_EVENT_INFO_COMMAND_QUEUE:
-    return ReturnValue(hEvent->getQueue());
-  case UR_EVENT_INFO_COMMAND_TYPE:
-    return ReturnValue(hEvent->getCommandType());
-  case UR_EVENT_INFO_REFERENCE_COUNT:
-    return ReturnValue(hEvent->getReferenceCount());
-  case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS:
-    return ReturnValue(hEvent->getExecutionStatus());
-  case UR_EVENT_INFO_CONTEXT:
-    return ReturnValue(hEvent->getContext());
-  default:
-    detail::ur::die("Event info request not implemented");
-  }
-
-  return UR_RESULT_ERROR_INVALID_ENUMERATION;
-}
-
-/// Obtain profiling information from PI CUDA events
-/// \TODO Timings from CUDA are only elapsed time.
-UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
-    ur_event_handle_t hEvent, ur_profiling_info_t propName,
-    size_t propValueSize, void *pPropValue, size_t *pPropValueSizeRet) {
-  UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
-
-  ur_queue_handle_t Queue = hEvent->getQueue();
-  if (Queue == nullptr || !(Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
-    return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE;
-  }
-
-  switch (propName) {
-  case UR_PROFILING_INFO_COMMAND_QUEUED:
-  case UR_PROFILING_INFO_COMMAND_SUBMIT:
-    // Note: No user for this case
-    return ReturnValue(static_cast<uint64_t>(hEvent->getQueuedTime()));
-  case UR_PROFILING_INFO_COMMAND_START:
-    return ReturnValue(static_cast<uint64_t>(hEvent->getStartTime()));
-  case UR_PROFILING_INFO_COMMAND_END:
-    return ReturnValue(static_cast<uint64_t>(hEvent->getEndTime()));
-  default:
-    break;
-  }
-  detail::ur::die("Event Profiling info request not implemented");
-  return {};
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t,
-                                                       ur_execution_info_t,
-                                                       ur_event_callback_t,
-                                                       void *) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) {
-  try {
-    auto Context = phEventWaitList[0]->getContext();
-    ScopedContext Active(Context);
-
-    auto WaitFunc = [Context](ur_event_handle_t Event) -> ur_result_t {
-      UR_ASSERT(Event, UR_RESULT_ERROR_INVALID_EVENT);
-      UR_ASSERT(Event->getContext() == Context,
-                UR_RESULT_ERROR_INVALID_CONTEXT);
-
-      return Event->wait();
-    };
-    return forLatestEvents(phEventWaitList, numEvents, WaitFunc);
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
-  }
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) {
-  const auto RefCount = hEvent->incrementReferenceCount();
-
-  detail::ur::assertion(RefCount != 0,
-                        "Reference count overflow detected in urEventRetain.");
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) {
-  // double delete or someone is messing with the ref count.
-  // either way, cannot safely proceed.
-  detail::ur::assertion(hEvent->getReferenceCount() != 0,
-                        "Reference count overflow detected in urEventRelease.");
-
-  // decrement ref count. If it is 0, delete the event.
-  if (hEvent->decrementReferenceCount() == 0) {
-    std::unique_ptr<ur_event_handle_t_> event_ptr{hEvent};
-    ur_result_t Result = UR_RESULT_ERROR_INVALID_EVENT;
-    try {
-      ScopedContext Active(hEvent->getContext());
-      Result = hEvent->release();
-    } catch (...) {
-      Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
-    }
-    return Result;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle(
-    ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) {
-  *phNativeEvent = reinterpret_cast<ur_native_handle_t>(hEvent->get());
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(
-    ur_native_handle_t hNativeEvent, ur_context_handle_t hContext,
-    const ur_event_native_properties_t *pProperties,
-    ur_event_handle_t *phEvent) {
-  std::ignore = pProperties;
-
-  std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
-
-  *phEvent = ur_event_handle_t_::makeWithNative(
-      hContext, reinterpret_cast<CUevent>(hNativeEvent));
-
-  return UR_RESULT_SUCCESS;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/event.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/event.hpp
deleted file mode 100644
index 4c788532c224e..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/event.hpp
+++ /dev/null
@@ -1,189 +0,0 @@
-//===--------- event.hpp - CUDA Adapter -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <cuda.h>
-#include <ur/ur.hpp>
-
-#include "queue.hpp"
-
-/// UR Event mapping to CUevent
-///
-struct ur_event_handle_t_ {
-public:
-  using native_type = CUevent;
-
-  ur_result_t record();
-
-  ur_result_t wait();
-
-  ur_result_t start();
-
-  native_type get() const noexcept { return EvEnd; };
-
-  ur_queue_handle_t getQueue() const noexcept { return Queue; }
-
-  CUstream getStream() const noexcept { return Stream; }
-
-  uint32_t getComputeStreamToken() const noexcept { return StreamToken; }
-
-  ur_command_t getCommandType() const noexcept { return CommandType; }
-
-  uint32_t getReferenceCount() const noexcept { return RefCount; }
-
-  bool isRecorded() const noexcept { return IsRecorded; }
-
-  bool isStarted() const noexcept { return IsStarted; }
-
-  bool isCompleted() const noexcept;
-
-  uint32_t getExecutionStatus() const noexcept {
-
-    if (!isRecorded()) {
-      return UR_EVENT_STATUS_SUBMITTED;
-    }
-
-    if (!isCompleted()) {
-      return UR_EVENT_STATUS_RUNNING;
-    }
-    return UR_EVENT_STATUS_COMPLETE;
-  }
-
-  ur_context_handle_t getContext() const noexcept { return Context; };
-
-  uint32_t incrementReferenceCount() { return ++RefCount; }
-
-  uint32_t decrementReferenceCount() { return --RefCount; }
-
-  uint32_t getEventID() const noexcept { return EventID; }
-
-  bool backendHasOwnership() const noexcept { return HasOwnership; }
-
-  // Returns the counter time when the associated command(s) were enqueued
-  //
-  uint64_t getQueuedTime() const;
-
-  // Returns the counter time when the associated command(s) started execution
-  //
-  uint64_t getStartTime() const;
-
-  // Returns the counter time when the associated command(s) completed
-  //
-  uint64_t getEndTime() const;
-
-  // construct a native CUDA. This maps closely to the underlying CUDA event.
-  static ur_event_handle_t
-  makeNative(ur_command_t Type, ur_queue_handle_t Queue, CUstream Stream,
-             uint32_t StreamToken = std::numeric_limits<uint32_t>::max()) {
-    return new ur_event_handle_t_(Type, Queue->getContext(), Queue, Stream,
-                                  StreamToken);
-  }
-
-  static ur_event_handle_t makeWithNative(ur_context_handle_t context,
-                                          CUevent eventNative) {
-    return new ur_event_handle_t_(context, eventNative);
-  }
-
-  ur_result_t release();
-
-  ~ur_event_handle_t_();
-
-private:
-  // This constructor is private to force programmers to use the makeNative /
-  // make_user static members in order to create a pi_event for CUDA.
-  ur_event_handle_t_(ur_command_t Type, ur_context_handle_t Context,
-                     ur_queue_handle_t Queue, CUstream Stream,
-                     uint32_t StreamToken);
-
-  // This constructor is private to force programmers to use the
-  // makeWithNative for event interop
-  ur_event_handle_t_(ur_context_handle_t Context, CUevent EventNative);
-
-  ur_command_t CommandType; // The type of command associated with event.
-
-  std::atomic_uint32_t RefCount; // Event reference count.
-
-  bool HasOwnership; // Signifies if event owns the native type.
-
-  bool HasBeenWaitedOn; // Signifies whether the event has been waited
-                        // on through a call to wait(), which implies
-                        // that it has completed.
-
-  bool IsRecorded; // Signifies wether a native CUDA event has been recorded
-                   // yet.
-  bool IsStarted;  // Signifies wether the operation associated with the
-                   // UR event has started or not
-
-  uint32_t StreamToken;
-  uint32_t EventID; // Queue identifier of the event.
-
-  native_type EvEnd; // CUDA event handle. If this ur_event_handle_t represents
-                     // a user event, this will be nullptr.
-
-  native_type EvStart; // CUDA event handle associated with the start
-
-  native_type EvQueued; // CUDA event handle associated with the time
-                        // the command was enqueued
-
-  ur_queue_handle_t Queue; // ur_queue_handle_t associated with the event. If
-                           // this is a user event, this will be nullptr.
-
-  CUstream Stream; // CUstream associated with the event. If this is a user
-                   // event, this will be uninitialized.
-
-  ur_context_handle_t Context; // ur_context_handle_t associated with the event.
-                               // If this is a native event, this will be the
-                               // same context associated with the queue member.
-};
-
-// Iterate over `event_wait_list` and apply the given callback `f` to the
-// latest event on each queue therein. The callback must take a single
-// ur_event_handle_t argument and return a ur_result_t. If the callback returns
-// an error, the iteration terminates and the error is returned.
-template <typename Func>
-ur_result_t forLatestEvents(const ur_event_handle_t *EventWaitList,
-                            std::size_t NumEventsInWaitList, Func &&F) {
-
-  if (EventWaitList == nullptr || NumEventsInWaitList == 0) {
-    return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST;
-  }
-
-  // Fast path if we only have a single event
-  if (NumEventsInWaitList == 1) {
-    return F(EventWaitList[0]);
-  }
-
-  std::vector<ur_event_handle_t> Events{EventWaitList,
-                                        EventWaitList + NumEventsInWaitList};
-  std::sort(Events.begin(), Events.end(),
-            [](ur_event_handle_t Event0, ur_event_handle_t Event1) {
-              // Tiered sort creating sublists of streams (smallest value first)
-              // in which the corresponding events are sorted into a sequence of
-              // newest first.
-              return Event0->getStream() < Event1->getStream() ||
-                     (Event0->getStream() == Event1->getStream() &&
-                      Event0->getEventID() > Event1->getEventID());
-            });
-
-  CUstream LastSeenStream = 0;
-  for (size_t i = 0; i < Events.size(); i++) {
-    auto Event = Events[i];
-    if (!Event || (i != 0 && Event->getStream() == LastSeenStream)) {
-      continue;
-    }
-
-    LastSeenStream = Event->getStream();
-
-    auto Result = F(Event);
-    if (Result != UR_RESULT_SUCCESS) {
-      return Result;
-    }
-  }
-
-  return UR_RESULT_SUCCESS;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/image.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/image.cpp
deleted file mode 100644
index 1b11cade5cebc..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/image.cpp
+++ /dev/null
@@ -1,1061 +0,0 @@
-//===--------- image.cpp - CUDA Adapter -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <cuda.h>
-#include <map>
-#include <utility>
-
-#include "common.hpp"
-#include "context.hpp"
-#include "enqueue.hpp"
-#include "event.hpp"
-#include "image.hpp"
-#include "memory.hpp"
-#include "queue.hpp"
-#include "sampler.hpp"
-#include "ur/ur.hpp"
-#include "ur_api.h"
-
-ur_result_t urCalculateNumChannels(ur_image_channel_order_t order,
-                                   unsigned int *NumChannels) {
-  switch (order) {
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_A:
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_R:
-    *NumChannels = 1;
-    return UR_RESULT_SUCCESS;
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RG:
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RA:
-    *NumChannels = 2;
-    return UR_RESULT_SUCCESS;
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RGB:
-    return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RGBA:
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_ARGB:
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_BGRA:
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_ABGR:
-    *NumChannels = 4;
-    return UR_RESULT_SUCCESS;
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RX:
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RGX:
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_RGBX:
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_SRGBA:
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_INTENSITY:
-  case ur_image_channel_order_t::UR_IMAGE_CHANNEL_ORDER_LUMINANCE:
-  default:
-    return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
-  }
-}
-
-/// Convert a UR image format to a CUDA image format and
-/// get the pixel size in bytes.
-/// /param image_channel_type is the ur_image_channel_type_t.
-/// /param image_channel_order is the ur_image_channel_order_t.
-///        this is used for normalized channel formats, as CUDA
-///        combines the channel format and order for normalized
-///        channel types.
-/// /param return_cuda_format will be set to the equivalent cuda
-///        format if not nullptr.
-/// /param return_pixel_size_bytes will be set to the pixel
-///        byte size if not nullptr.
-ur_result_t
-urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type,
-                           ur_image_channel_order_t image_channel_order,
-                           CUarray_format *return_cuda_format,
-                           size_t *return_pixel_size_bytes) {
-
-  CUarray_format cuda_format;
-  size_t pixel_size_bytes = 0;
-  unsigned int num_channels = 0;
-  UR_CHECK_ERROR(urCalculateNumChannels(image_channel_order, &num_channels));
-
-  switch (image_channel_type) {
-#define CASE(FROM, TO, SIZE)                                                   \
-  case FROM: {                                                                 \
-    cuda_format = TO;                                                          \
-    pixel_size_bytes = SIZE * num_channels;                                    \
-    break;                                                                     \
-  }
-
-    CASE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, CU_AD_FORMAT_UNSIGNED_INT8, 1)
-    CASE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8, CU_AD_FORMAT_SIGNED_INT8, 1)
-    CASE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, CU_AD_FORMAT_UNSIGNED_INT16, 2)
-    CASE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16, CU_AD_FORMAT_SIGNED_INT16, 2)
-    CASE(UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT, CU_AD_FORMAT_HALF, 2)
-    CASE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, CU_AD_FORMAT_UNSIGNED_INT32, 4)
-    CASE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32, CU_AD_FORMAT_SIGNED_INT32, 4)
-    CASE(UR_IMAGE_CHANNEL_TYPE_FLOAT, CU_AD_FORMAT_FLOAT, 4)
-
-#undef CASE
-  default:
-    break;
-  }
-
-  // These new formats were brought in in CUDA 11.5
-#if CUDA_VERSION >= 11050
-
-  // If none of the above channel types were passed, check those below
-  if (pixel_size_bytes == 0) {
-
-    // We can't use a switch statement here because these single
-    // UR_IMAGE_CHANNEL_TYPEs can correspond to multiple [u/s]norm CU_AD_FORMATs
-    // depending on the number of channels. We use a std::map instead to
-    // retrieve the correct CUDA format
-
-    // map < <channel type, num channels> , <CUDA format, data type byte size> >
-    const std::map<std::pair<ur_image_channel_type_t, uint32_t>,
-                   std::pair<CUarray_format, uint32_t>>
-        norm_channel_type_map{
-            {{UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, 1},
-             {CU_AD_FORMAT_UNORM_INT8X1, 1}},
-            {{UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, 2},
-             {CU_AD_FORMAT_UNORM_INT8X2, 2}},
-            {{UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, 4},
-             {CU_AD_FORMAT_UNORM_INT8X4, 4}},
-
-            {{UR_IMAGE_CHANNEL_TYPE_SNORM_INT8, 1},
-             {CU_AD_FORMAT_SNORM_INT8X1, 1}},
-            {{UR_IMAGE_CHANNEL_TYPE_SNORM_INT8, 2},
-             {CU_AD_FORMAT_SNORM_INT8X2, 2}},
-            {{UR_IMAGE_CHANNEL_TYPE_SNORM_INT8, 4},
-             {CU_AD_FORMAT_SNORM_INT8X4, 4}},
-
-            {{UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, 1},
-             {CU_AD_FORMAT_UNORM_INT16X1, 2}},
-            {{UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, 2},
-             {CU_AD_FORMAT_UNORM_INT16X2, 4}},
-            {{UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, 4},
-             {CU_AD_FORMAT_UNORM_INT16X4, 8}},
-
-            {{UR_IMAGE_CHANNEL_TYPE_SNORM_INT16, 1},
-             {CU_AD_FORMAT_SNORM_INT16X1, 2}},
-            {{UR_IMAGE_CHANNEL_TYPE_SNORM_INT16, 2},
-             {CU_AD_FORMAT_SNORM_INT16X2, 4}},
-            {{UR_IMAGE_CHANNEL_TYPE_SNORM_INT16, 4},
-             {CU_AD_FORMAT_SNORM_INT16X4, 8}},
-        };
-
-    try {
-      auto cuda_format_and_size = norm_channel_type_map.at(
-          std::make_pair(image_channel_type, num_channels));
-      cuda_format = cuda_format_and_size.first;
-      pixel_size_bytes = cuda_format_and_size.second;
-    } catch (std::out_of_range &e) {
-      return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
-    }
-  }
-
-#endif
-
-  if (return_cuda_format) {
-    *return_cuda_format = cuda_format;
-  }
-  if (return_pixel_size_bytes) {
-    *return_pixel_size_bytes = pixel_size_bytes;
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-ur_result_t
-cudaToUrImageChannelFormat(CUarray_format cuda_format,
-                           ur_image_channel_type_t *return_image_channel_type) {
-
-  switch (cuda_format) {
-#define CUDA_TO_UR_IMAGE_CHANNEL_TYPE(FROM, TO)                                \
-  case FROM: {                                                                 \
-    *return_image_channel_type = TO;                                           \
-    return UR_RESULT_SUCCESS;                                                  \
-  }
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNSIGNED_INT8,
-                                  UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNSIGNED_INT16,
-                                  UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNSIGNED_INT32,
-                                  UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SIGNED_INT8,
-                                  UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SIGNED_INT16,
-                                  UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SIGNED_INT32,
-                                  UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_HALF,
-                                  UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_FLOAT,
-                                  UR_IMAGE_CHANNEL_TYPE_FLOAT);
-#if CUDA_VERSION >= 11050
-
-    // Note that the CUDA UNORM and SNORM formats also encode the number of
-    // channels.
-    // Since UR does not encode this, we map different CUDA formats to the same
-    // UR channel type.
-    // Since this function is only called from `urBindlessImagesImageGetInfoExp`
-    // which has access to `CUDA_ARRAY3D_DESCRIPTOR`, we can determine the
-    // number of channels in the calling function.
-
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT8X1,
-                                  UR_IMAGE_CHANNEL_TYPE_UNORM_INT8);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT8X2,
-                                  UR_IMAGE_CHANNEL_TYPE_UNORM_INT8);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT8X4,
-                                  UR_IMAGE_CHANNEL_TYPE_UNORM_INT8);
-
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT16X1,
-                                  UR_IMAGE_CHANNEL_TYPE_UNORM_INT16);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT16X2,
-                                  UR_IMAGE_CHANNEL_TYPE_UNORM_INT16);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT16X4,
-                                  UR_IMAGE_CHANNEL_TYPE_UNORM_INT16);
-
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT8X1,
-                                  UR_IMAGE_CHANNEL_TYPE_SNORM_INT8);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT8X2,
-                                  UR_IMAGE_CHANNEL_TYPE_SNORM_INT8);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT8X4,
-                                  UR_IMAGE_CHANNEL_TYPE_SNORM_INT8);
-
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT16X1,
-                                  UR_IMAGE_CHANNEL_TYPE_SNORM_INT16);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT16X2,
-                                  UR_IMAGE_CHANNEL_TYPE_SNORM_INT16);
-    CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT16X4,
-                                  UR_IMAGE_CHANNEL_TYPE_SNORM_INT16);
-#endif
-#undef MAP
-  default:
-    return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
-  }
-}
-
-ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
-                            const ur_image_desc_t *pImageDesc,
-                            CUDA_RESOURCE_DESC ResourceDesc,
-                            ur_exp_image_handle_t *phRetImage) {
-
-  try {
-    /// pi_sampler_properties
-    /// |     <bits>     | <usage>
-    /// -----------------------------------
-    /// |  31 30 ... 6   | N/A
-    /// |       5        | mip filter mode
-    /// |     4 3 2      | addressing mode
-    /// |       1        | filter mode
-    /// |       0        | normalize coords
-    CUDA_TEXTURE_DESC ImageTexDesc = {};
-    CUaddress_mode AddrMode = {};
-    ur_sampler_addressing_mode_t AddrModeProp = hSampler->getAddressingMode();
-    if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE -
-                         UR_SAMPLER_ADDRESSING_MODE_NONE)) {
-      AddrMode = CU_TR_ADDRESS_MODE_CLAMP;
-    } else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP -
-                                UR_SAMPLER_ADDRESSING_MODE_NONE)) {
-      AddrMode = CU_TR_ADDRESS_MODE_BORDER;
-    } else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_REPEAT -
-                                UR_SAMPLER_ADDRESSING_MODE_NONE)) {
-      AddrMode = CU_TR_ADDRESS_MODE_WRAP;
-    } else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT -
-                                UR_SAMPLER_ADDRESSING_MODE_NONE)) {
-      AddrMode = CU_TR_ADDRESS_MODE_MIRROR;
-    }
-    CUfilter_mode FilterMode;
-    ur_sampler_filter_mode_t FilterModeProp = hSampler->getFilterMode();
-    FilterMode =
-        FilterModeProp ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
-    ImageTexDesc.filterMode = FilterMode;
-
-    // Mipmap attributes
-    CUfilter_mode MipFilterMode;
-    ur_sampler_filter_mode_t MipFilterModeProp = hSampler->getMipFilterMode();
-    MipFilterMode =
-        MipFilterModeProp ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
-    ImageTexDesc.mipmapFilterMode = MipFilterMode;
-    ImageTexDesc.maxMipmapLevelClamp = hSampler->MaxMipmapLevelClamp;
-    ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp;
-    ImageTexDesc.maxAnisotropy = hSampler->MaxAnisotropy;
-
-    // The address modes can interfere with other dimensionsenqueueEventsWait
-    // e.g. 1D texture sampling can be interfered with when setting other
-    // dimension address modes despite their nonexistence.
-    ImageTexDesc.addressMode[0] = AddrMode; // 1D
-    ImageTexDesc.addressMode[1] =
-        pImageDesc->height > 0 ? AddrMode : ImageTexDesc.addressMode[1]; // 2D
-    ImageTexDesc.addressMode[2] =
-        pImageDesc->depth > 0 ? AddrMode : ImageTexDesc.addressMode[2]; // 3D
-
-    // flags takes the normalized coordinates setting -- unnormalized is default
-    ImageTexDesc.flags = (hSampler->isNormalizedCoords())
-                             ? CU_TRSF_NORMALIZED_COORDINATES
-                             : ImageTexDesc.flags;
-
-    // CUDA default promotes 8-bit and 16-bit integers to float between [0,1]
-    // This flag prevents this behaviour.
-    ImageTexDesc.flags |= CU_TRSF_READ_AS_INTEGER;
-
-    CUtexObject Texture;
-    UR_CHECK_ERROR(
-        cuTexObjectCreate(&Texture, &ResourceDesc, &ImageTexDesc, nullptr));
-    *phRetImage = (ur_exp_image_handle_t)Texture;
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool,
-    size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem,
-    size_t *pResultPitch) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-  std::ignore = pUSMDesc;
-  std::ignore = pool;
-
-  UR_ASSERT((widthInBytes > 0), UR_RESULT_ERROR_INVALID_VALUE);
-  UR_ASSERT((height > 0), UR_RESULT_ERROR_INVALID_VALUE);
-  UR_ASSERT((elementSizeBytes > 0), UR_RESULT_ERROR_INVALID_VALUE);
-
-  // elementSizeBytes can only take on values of 4, 8, or 16.
-  // small data types need to be minimised to 4.
-  if (elementSizeBytes < 4) {
-    elementSizeBytes = 4;
-  }
-  UR_ASSERT((elementSizeBytes == 4 || elementSizeBytes == 8 ||
-             elementSizeBytes == 16),
-            UR_RESULT_ERROR_INVALID_VALUE);
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  try {
-    ScopedContext Active(hDevice->getContext());
-    UR_CHECK_ERROR(cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch,
-                                   widthInBytes, height, elementSizeBytes));
-  } catch (ur_result_t error) {
-    Result = error;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urBindlessImagesUnsampledImageHandleDestroyExp(ur_context_handle_t hContext,
-                                               ur_device_handle_t hDevice,
-                                               ur_exp_image_handle_t hImage) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  UR_CHECK_ERROR(cuSurfObjectDestroy((CUsurfObject)hImage));
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urBindlessImagesSampledImageHandleDestroyExp(ur_context_handle_t hContext,
-                                             ur_device_handle_t hDevice,
-                                             ur_exp_image_handle_t hImage) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  UR_CHECK_ERROR(cuTexObjectDestroy((CUtexObject)hImage));
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
-    ur_exp_image_mem_handle_t *phImageMem) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  // Populate descriptor
-  CUDA_ARRAY3D_DESCRIPTOR array_desc = {};
-
-  UR_CHECK_ERROR(urCalculateNumChannels(pImageFormat->channelOrder,
-                                        &array_desc.NumChannels));
-
-  UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType,
-                                            pImageFormat->channelOrder,
-                                            &array_desc.Format, nullptr));
-
-  array_desc.Flags = 0; // No flags required
-  array_desc.Width = pImageDesc->width;
-  if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
-    array_desc.Height = 0;
-    array_desc.Depth = 0;
-  } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
-    array_desc.Height = pImageDesc->height;
-    array_desc.Depth = 0;
-  } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
-    array_desc.Height = pImageDesc->height;
-    array_desc.Depth = pImageDesc->depth;
-  }
-
-  ScopedContext Active(hDevice->getContext());
-
-  // Allocate a cuArray
-  if (pImageDesc->numMipLevel == 1) {
-    CUarray ImageArray;
-
-    try {
-      UR_CHECK_ERROR(cuArray3DCreate(&ImageArray, &array_desc));
-      *phImageMem = (ur_exp_image_mem_handle_t)ImageArray;
-    } catch (ur_result_t Err) {
-      cuArrayDestroy(ImageArray);
-      return Err;
-    } catch (...) {
-      cuArrayDestroy(ImageArray);
-      return UR_RESULT_ERROR_UNKNOWN;
-    }
-  } else // Allocate a cuMipmappedArray
-  {
-    CUmipmappedArray mip_array;
-    array_desc.Flags = CUDA_ARRAY3D_SURFACE_LDST;
-
-    try {
-      UR_CHECK_ERROR(cuMipmappedArrayCreate(&mip_array, &array_desc,
-                                            pImageDesc->numMipLevel));
-      *phImageMem = (ur_exp_image_mem_handle_t)mip_array;
-    } catch (ur_result_t Err) {
-      cuMipmappedArrayDestroy(mip_array);
-      return Err;
-    } catch (...) {
-      cuMipmappedArrayDestroy(mip_array);
-      return UR_RESULT_ERROR_UNKNOWN;
-    }
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    ur_exp_image_mem_handle_t hImageMem) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  ScopedContext Active(hDevice->getContext());
-  try {
-    UR_CHECK_ERROR(cuArrayDestroy((CUarray)hImageMem));
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    ur_exp_image_mem_handle_t hImageMem, const ur_image_format_t *pImageFormat,
-    const ur_image_desc_t *pImageDesc, ur_mem_handle_t *phMem,
-    ur_exp_image_handle_t *phImage) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  unsigned int NumChannels = 0;
-  UR_CHECK_ERROR(
-      urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels));
-
-  CUarray_format format;
-  size_t PixelSizeBytes;
-  UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType,
-                                            pImageFormat->channelOrder, &format,
-                                            &PixelSizeBytes));
-
-  try {
-
-    ScopedContext Active(hDevice->getContext());
-
-    CUDA_RESOURCE_DESC image_res_desc = {};
-
-    // We have a CUarray
-    image_res_desc.resType = CU_RESOURCE_TYPE_ARRAY;
-    image_res_desc.res.array.hArray = (CUarray)hImageMem;
-
-    // We create surfaces in the unsampled images case as it conforms to how
-    // CUDA deals with unsampled images.
-    CUsurfObject surface;
-    UR_CHECK_ERROR(cuSurfObjectCreate(&surface, &image_res_desc));
-    *phImage = (ur_exp_image_handle_t)surface;
-
-    auto urMemObj = std::unique_ptr<ur_mem_handle_t_>(new ur_mem_handle_t_{
-        hContext, (CUarray)hImageMem, surface, pImageDesc->type});
-
-    if (urMemObj == nullptr) {
-      return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
-    }
-
-    *phMem = urMemObj.release();
-
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    ur_exp_image_mem_handle_t hImageMem, const ur_image_format_t *pImageFormat,
-    const ur_image_desc_t *pImageDesc, ur_sampler_handle_t hSampler,
-    ur_mem_handle_t *phMem, ur_exp_image_handle_t *phImage) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  ScopedContext Active(hDevice->getContext());
-
-  unsigned int NumChannels = 0;
-  UR_CHECK_ERROR(
-      urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels));
-
-  CUarray_format format;
-  size_t PixelSizeBytes;
-  UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType,
-                                            pImageFormat->channelOrder, &format,
-                                            &PixelSizeBytes));
-
-  try {
-    CUDA_RESOURCE_DESC image_res_desc = {};
-
-    unsigned int mem_type;
-    // If this function doesn't return successfully, we assume that hImageMem is
-    // a CUarray or CUmipmappedArray. If this function returns successfully, we
-    // check whether hImageMem is device memory (even managed memory isn't
-    // considered shared).
-    CUresult Err = cuPointerGetAttribute(
-        &mem_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)hImageMem);
-    if (Err != CUDA_SUCCESS) {
-      // We have a CUarray
-      if (pImageDesc->numMipLevel == 1) {
-        image_res_desc.resType = CU_RESOURCE_TYPE_ARRAY;
-        image_res_desc.res.array.hArray = (CUarray)hImageMem;
-      }
-      // We have a CUmipmappedArray
-      else {
-        image_res_desc.resType = CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
-        image_res_desc.res.mipmap.hMipmappedArray = (CUmipmappedArray)hImageMem;
-      }
-    } else if (mem_type == CU_MEMORYTYPE_DEVICE) {
-      // We have a USM pointer
-      if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
-        image_res_desc.resType = CU_RESOURCE_TYPE_LINEAR;
-        image_res_desc.res.linear.devPtr = (CUdeviceptr)hImageMem;
-        image_res_desc.res.linear.format = format;
-        image_res_desc.res.linear.numChannels = NumChannels;
-        image_res_desc.res.linear.sizeInBytes =
-            pImageDesc->width * PixelSizeBytes;
-      } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
-        image_res_desc.resType = CU_RESOURCE_TYPE_PITCH2D;
-        image_res_desc.res.pitch2D.devPtr = (CUdeviceptr)hImageMem;
-        image_res_desc.res.pitch2D.format = format;
-        image_res_desc.res.pitch2D.numChannels = NumChannels;
-        image_res_desc.res.pitch2D.width = pImageDesc->width;
-        image_res_desc.res.pitch2D.height = pImageDesc->height;
-        image_res_desc.res.pitch2D.pitchInBytes = pImageDesc->rowPitch;
-      } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
-        // Cannot create 3D image from USM.
-        return UR_RESULT_ERROR_INVALID_VALUE;
-      }
-    } else {
-      // Unknown image memory type.
-      return UR_RESULT_ERROR_INVALID_VALUE;
-    }
-
-    UR_CHECK_ERROR(
-        urTextureCreate(hSampler, pImageDesc, image_res_desc, phImage));
-
-    auto urMemObj = std::unique_ptr<ur_mem_handle_t_>(new ur_mem_handle_t_{
-        hContext, (CUarray)hImageMem, (CUtexObject)*phImage, hSampler,
-        pImageDesc->type});
-
-    if (urMemObj == nullptr) {
-      return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
-    }
-
-    *phMem = urMemObj.release();
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
-    ur_queue_handle_t hQueue, void *pDst, void *pSrc,
-    const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
-    ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset,
-    ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent,
-    ur_rect_region_t hostExtent, uint32_t numEventsInWaitList,
-    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
-  UR_ASSERT((imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE ||
-             imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST ||
-             imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE),
-            UR_RESULT_ERROR_INVALID_VALUE);
-
-  unsigned int NumChannels = 0;
-  size_t PixelSizeBytes = 0;
-
-  UR_CHECK_ERROR(
-      urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels));
-
-  // We need to get this now in bytes for calculating the total image size
-  // later.
-  UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType,
-                                            pImageFormat->channelOrder, nullptr,
-                                            &PixelSizeBytes));
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream Stream = hQueue->getNextTransferStream();
-    enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList);
-    // We have to use a different copy function for each image dimensionality.
-
-    if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) {
-      if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
-        size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width;
-        char *SrcWithOffset = (char *)pSrc + (srcOffset.x * PixelSizeBytes);
-        UR_CHECK_ERROR(
-            cuMemcpyHtoAAsync((CUarray)pDst, dstOffset.x * PixelSizeBytes,
-                              (void *)SrcWithOffset, CopyExtentBytes, Stream));
-      } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
-        CUDA_MEMCPY2D cpy_desc = {};
-        cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
-        cpy_desc.srcHost = pSrc;
-        cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
-        cpy_desc.srcY = srcOffset.y;
-        cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
-        cpy_desc.dstY = dstOffset.y;
-        cpy_desc.srcPitch = hostExtent.width * PixelSizeBytes;
-        if (pImageDesc->rowPitch == 0) {
-          cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
-          cpy_desc.dstArray = (CUarray)pDst;
-        } else {
-          // Pitched memory
-          cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_DEVICE;
-          cpy_desc.dstDevice = (CUdeviceptr)pDst;
-          cpy_desc.dstPitch = pImageDesc->rowPitch;
-        }
-        cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width;
-        cpy_desc.Height = copyExtent.height;
-        UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
-      } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
-        CUDA_MEMCPY3D cpy_desc = {};
-        cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
-        cpy_desc.srcY = srcOffset.y;
-        cpy_desc.srcZ = srcOffset.z;
-        cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
-        cpy_desc.dstY = dstOffset.y;
-        cpy_desc.dstZ = dstOffset.z;
-        cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
-        cpy_desc.srcHost = pSrc;
-        cpy_desc.srcPitch = hostExtent.width * PixelSizeBytes;
-        cpy_desc.srcHeight = hostExtent.height;
-        cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
-        cpy_desc.dstArray = (CUarray)pDst;
-        cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width;
-        cpy_desc.Height = copyExtent.height;
-        cpy_desc.Depth = copyExtent.depth;
-        UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
-      }
-    } else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
-      if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
-        size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width;
-        size_t src_offset_bytes = PixelSizeBytes * srcOffset.x;
-        void *dst_with_offset =
-            (void *)((char *)pDst + (PixelSizeBytes * dstOffset.x));
-        UR_CHECK_ERROR(cuMemcpyAtoHAsync(dst_with_offset, (CUarray)pSrc,
-                                         src_offset_bytes, CopyExtentBytes,
-                                         Stream));
-      } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
-        CUDA_MEMCPY2D cpy_desc = {};
-        cpy_desc.srcXInBytes = srcOffset.x;
-        cpy_desc.srcY = srcOffset.y;
-        cpy_desc.dstXInBytes = dstOffset.x;
-        cpy_desc.dstY = dstOffset.y;
-        cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
-        cpy_desc.dstHost = pDst;
-        if (pImageDesc->rowPitch == 0) {
-          cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
-          cpy_desc.srcArray = (CUarray)pSrc;
-        } else {
-          // Pitched memory
-          cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_DEVICE;
-          cpy_desc.srcPitch = pImageDesc->rowPitch;
-          cpy_desc.srcDevice = (CUdeviceptr)pSrc;
-        }
-        cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width;
-        cpy_desc.Height = copyExtent.height;
-        UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
-      } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
-        CUDA_MEMCPY3D cpy_desc = {};
-        cpy_desc.srcXInBytes = srcOffset.x;
-        cpy_desc.srcY = srcOffset.y;
-        cpy_desc.srcZ = srcOffset.z;
-        cpy_desc.dstXInBytes = dstOffset.x;
-        cpy_desc.dstY = dstOffset.y;
-        cpy_desc.dstZ = dstOffset.z;
-        cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
-        cpy_desc.srcArray = (CUarray)pSrc;
-        cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
-        cpy_desc.dstHost = pDst;
-        cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width;
-        cpy_desc.Height = copyExtent.height;
-        cpy_desc.Depth = copyExtent.depth;
-        UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
-      }
-    } else {
-      /// imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE
-      /// TODO: implemet device to device copy
-      return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-    }
-    if (phEvent) {
-      auto NewEvent = ur_event_handle_t_::makeNative(UR_COMMAND_MEM_IMAGE_COPY,
-                                                     hQueue, Stream);
-      NewEvent->record();
-      *phEvent = NewEvent;
-    }
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp(
-    ur_exp_image_mem_handle_t hImageMem, ur_image_info_t propName,
-    void *pPropValue, size_t *pPropSizeRet) {
-
-  CUDA_ARRAY3D_DESCRIPTOR ArrayDesc;
-  UR_CHECK_ERROR(cuArray3DGetDescriptor(&ArrayDesc, (CUarray)hImageMem));
-  switch (propName) {
-  case UR_IMAGE_INFO_WIDTH:
-    if (pPropValue) {
-      *(size_t *)pPropValue = ArrayDesc.Width;
-    }
-    if (pPropSizeRet) {
-      *pPropSizeRet = sizeof(size_t);
-    }
-    return UR_RESULT_SUCCESS;
-  case UR_IMAGE_INFO_HEIGHT:
-    if (pPropValue) {
-      *(size_t *)pPropValue = ArrayDesc.Height;
-    }
-    if (pPropSizeRet) {
-      *pPropSizeRet = sizeof(size_t);
-    }
-    return UR_RESULT_SUCCESS;
-  case UR_IMAGE_INFO_DEPTH:
-    if (pPropValue) {
-      *(size_t *)pPropValue = ArrayDesc.Depth;
-    }
-    if (pPropSizeRet) {
-      *pPropSizeRet = sizeof(size_t);
-    }
-    return UR_RESULT_SUCCESS;
-  case UR_IMAGE_INFO_FORMAT:
-    ur_image_channel_type_t ChannelType;
-    ur_image_channel_order_t ChannelOrder;
-    UR_CHECK_ERROR(cudaToUrImageChannelFormat(ArrayDesc.Format, &ChannelType));
-    // CUDA does not have a notion of channel "order" in the same way that
-    // SYCL 1.2.1 does.
-    switch (ArrayDesc.NumChannels) {
-    case 1:
-      ChannelOrder = UR_IMAGE_CHANNEL_ORDER_R;
-      break;
-    case 2:
-      ChannelOrder = UR_IMAGE_CHANNEL_ORDER_RG;
-      break;
-    case 4:
-      ChannelOrder = UR_IMAGE_CHANNEL_ORDER_RGBA;
-      break;
-    }
-    if (pPropValue) {
-      ((ur_image_format_t *)pPropValue)->channelType = ChannelType;
-      ((ur_image_format_t *)pPropValue)->channelOrder = ChannelOrder;
-    }
-    if (pPropSizeRet) {
-      *pPropSizeRet = sizeof(ur_image_format_t);
-    }
-    return UR_RESULT_SUCCESS;
-  default:
-    return UR_RESULT_ERROR_INVALID_VALUE;
-  }
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    ur_exp_image_mem_handle_t hImageMem, uint32_t mipmapLevel,
-    ur_exp_image_mem_handle_t *phImageMem) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  try {
-    ScopedContext Active(hDevice->getContext());
-    CUarray ImageArray;
-    UR_CHECK_ERROR(cuMipmappedArrayGetLevel(
-        &ImageArray, (CUmipmappedArray)hImageMem, mipmapLevel));
-    *phImageMem = (ur_exp_image_mem_handle_t)ImageArray;
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    ur_exp_image_mem_handle_t hMem) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  ScopedContext Active(hDevice->getContext());
-  try {
-    UR_CHECK_ERROR(cuMipmappedArrayDestroy((CUmipmappedArray)hMem));
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportOpaqueFDExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
-    ur_exp_interop_mem_desc_t *pInteropMemDesc,
-    ur_exp_interop_mem_handle_t *phInteropMem) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  try {
-    ScopedContext Active(hDevice->getContext());
-
-    CUDA_EXTERNAL_MEMORY_HANDLE_DESC extMemDesc = {};
-    extMemDesc.size = size;
-
-    void *pNext = const_cast<void *>(pInteropMemDesc->pNext);
-    while (pNext != nullptr) {
-      const ur_base_desc_t *BaseDesc =
-          reinterpret_cast<const ur_base_desc_t *>(pNext);
-      if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR) {
-        const ur_exp_file_descriptor_t *FileDescriptor =
-            reinterpret_cast<const ur_exp_file_descriptor_t *>(pNext);
-
-        extMemDesc.handle.fd = FileDescriptor->fd;
-        extMemDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD;
-      } else if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE) {
-        return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-      }
-      pNext = const_cast<void *>(BaseDesc->pNext);
-    }
-
-    CUexternalMemory extMem;
-    UR_CHECK_ERROR(cuImportExternalMemory(&extMem, &extMemDesc));
-    *phInteropMem = (ur_exp_interop_mem_handle_t)extMem;
-
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
-    ur_exp_interop_mem_handle_t hInteropMem,
-    ur_exp_image_mem_handle_t *phImageMem) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  unsigned int NumChannels = 0;
-  UR_CHECK_ERROR(
-      urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels));
-
-  CUarray_format format;
-  UR_CHECK_ERROR(urToCudaImageChannelFormat(
-      pImageFormat->channelType, pImageFormat->channelOrder, &format, nullptr));
-
-  try {
-    ScopedContext Active(hDevice->getContext());
-
-    CUDA_ARRAY3D_DESCRIPTOR ArrayDesc = {};
-    ArrayDesc.Width = pImageDesc->width;
-    ArrayDesc.Height = pImageDesc->height;
-    ArrayDesc.Depth = pImageDesc->depth;
-    ArrayDesc.NumChannels = NumChannels;
-    ArrayDesc.Format = format;
-
-    CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapDesc = {};
-    mipmapDesc.numLevels = 1;
-    mipmapDesc.arrayDesc = ArrayDesc;
-
-    CUmipmappedArray memMipMap;
-    UR_CHECK_ERROR(cuExternalMemoryGetMappedMipmappedArray(
-        &memMipMap, (CUexternalMemory)hInteropMem, &mipmapDesc));
-
-    CUarray memArray;
-    UR_CHECK_ERROR(cuMipmappedArrayGetLevel(&memArray, memMipMap, 0));
-
-    *phImageMem = (ur_exp_image_mem_handle_t)memArray;
-
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseInteropExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    ur_exp_interop_mem_handle_t hInteropMem) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  try {
-    ScopedContext Active(hDevice->getContext());
-    UR_CHECK_ERROR(cuDestroyExternalMemory((CUexternalMemory)hInteropMem));
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urBindlessImagesImportExternalSemaphoreOpaqueFDExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    ur_exp_interop_semaphore_desc_t *pInteropSemaphoreDesc,
-    ur_exp_interop_semaphore_handle_t *phInteropSemaphoreHandle) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  try {
-    ScopedContext Active(hDevice->getContext());
-
-    CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC extSemDesc = {};
-
-    void *pNext = const_cast<void *>(pInteropSemaphoreDesc->pNext);
-    while (pNext != nullptr) {
-      const ur_base_desc_t *BaseDesc =
-          reinterpret_cast<const ur_base_desc_t *>(pNext);
-      if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR) {
-        const ur_exp_file_descriptor_t *FileDescriptor =
-            reinterpret_cast<const ur_exp_file_descriptor_t *>(pNext);
-
-        extSemDesc.handle.fd = FileDescriptor->fd;
-        extSemDesc.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD;
-      } else if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE) {
-        return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-      }
-      pNext = const_cast<void *>(BaseDesc->pNext);
-    }
-
-    CUexternalSemaphore semaphore;
-    UR_CHECK_ERROR(cuImportExternalSemaphore(&semaphore, &extSemDesc));
-
-    *phInteropSemaphoreHandle = (ur_exp_interop_semaphore_handle_t)semaphore;
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice,
-    ur_exp_interop_semaphore_handle_t hInteropSemaphore) {
-  UR_ASSERT((hContext->getDevice()->get() == hDevice->get()),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-
-  try {
-    ScopedContext Active(hDevice->getContext());
-    UR_CHECK_ERROR(
-        cuDestroyExternalSemaphore((CUexternalSemaphore)hInteropSemaphore));
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp(
-    ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream Stream = hQueue->getNextTransferStream();
-
-    enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList);
-
-    CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS SemWaitParams = {};
-
-    // Wait for one external semaphore
-    UR_CHECK_ERROR(cuWaitExternalSemaphoresAsync(
-        (CUexternalSemaphore *)&hSemaphore, &SemWaitParams, 1 /* numExtSems */,
-        Stream));
-
-    if (phEvent) {
-      auto NewEvent = ur_event_handle_t_::makeNative(
-          UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP, hQueue, Stream);
-      NewEvent->record();
-      *phEvent = NewEvent;
-    }
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp(
-    ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore,
-    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
-    ur_event_handle_t *phEvent) {
-
-  try {
-    ScopedContext Active(hQueue->getContext());
-    CUstream Stream = hQueue->getNextTransferStream();
-
-    enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList);
-
-    CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS SemSignalParams = {};
-
-    // Signal one external semaphore
-    UR_CHECK_ERROR(cuSignalExternalSemaphoresAsync(
-        (CUexternalSemaphore *)&hSemaphore, &SemSignalParams,
-        1 /* numExtSems */, Stream));
-
-    if (phEvent) {
-      auto NewEvent = ur_event_handle_t_::makeNative(
-          UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP, hQueue, Stream);
-      NewEvent->record();
-      *phEvent = NewEvent;
-    }
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-  return UR_RESULT_SUCCESS;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/image.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/image.hpp
deleted file mode 100644
index af1d9fd194893..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/image.hpp
+++ /dev/null
@@ -1,32 +0,0 @@
-//===--------- image.hpp - CUDA Adapter -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <cassert>
-#include <cuda.h>
-#include <ur_api.h>
-
-#include "common.hpp"
-ur_result_t urCalculateNumChannels(ur_image_channel_order_t order,
-                                   unsigned int *num_channels);
-
-ur_result_t
-urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type,
-                           ur_image_channel_order_t image_channel_order,
-                           CUarray_format *return_cuda_format,
-                           size_t *return_pixel_types_size_bytes);
-
-ur_result_t
-cudaToUrImageChannelFormat(CUarray_format cuda_format,
-                           ur_image_channel_type_t *return_image_channel_type);
-
-ur_result_t urTextureCreate(ur_context_handle_t hContext,
-                            ur_sampler_desc_t SamplerDesc,
-                            const ur_image_desc_t *pImageDesc,
-                            CUDA_RESOURCE_DESC ResourceDesc,
-                            ur_exp_image_handle_t *phRetImage);
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp
deleted file mode 100644
index e2fa09e4ddece..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp
+++ /dev/null
@@ -1,374 +0,0 @@
-//===--------- kernel.cpp - CUDA Adapter ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "kernel.hpp"
-#include "memory.hpp"
-#include "sampler.hpp"
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName,
-               ur_kernel_handle_t *phKernel) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  std::unique_ptr<ur_kernel_handle_t_> Kernel{nullptr};
-
-  try {
-    ScopedContext Active(hProgram->getContext());
-
-    CUfunction CuFunc;
-    CUresult FunctionResult =
-        cuModuleGetFunction(&CuFunc, hProgram->get(), pKernelName);
-
-    // We can't add this as a generic mapping in UR_CHECK_ERROR since cuda's
-    // NOT_FOUND error applies to more than just functions.
-    if (FunctionResult == CUDA_ERROR_NOT_FOUND) {
-      throw UR_RESULT_ERROR_INVALID_KERNEL_NAME;
-    } else {
-      UR_CHECK_ERROR(FunctionResult);
-    }
-
-    std::string KernelNameWithOffset =
-        std::string(pKernelName) + "_with_offset";
-    CUfunction CuFuncWithOffsetParam;
-    CUresult OffsetRes = cuModuleGetFunction(
-        &CuFuncWithOffsetParam, hProgram->get(), KernelNameWithOffset.c_str());
-
-    // If there is no kernel with global offset parameter we mark it as missing
-    if (OffsetRes == CUDA_ERROR_NOT_FOUND) {
-      CuFuncWithOffsetParam = nullptr;
-    } else {
-      UR_CHECK_ERROR(OffsetRes);
-    }
-    Kernel = std::unique_ptr<ur_kernel_handle_t_>(
-        new ur_kernel_handle_t_{CuFunc, CuFuncWithOffsetParam, pKernelName,
-                                hProgram, hProgram->getContext()});
-  } catch (ur_result_t Err) {
-    Result = Err;
-  } catch (...) {
-    Result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
-  }
-
-  *phKernel = Kernel.release();
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
-                     ur_kernel_group_info_t propName, size_t propSize,
-                     void *pPropValue, size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
-
-  switch (propName) {
-  case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: {
-    size_t GlobalWorkSize[3] = {0, 0, 0};
-
-    int MaxBlockDimX{0}, MaxBlockDimY{0}, MaxBlockDimZ{0};
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxBlockDimX, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, hDevice->get()));
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxBlockDimY, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, hDevice->get()));
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxBlockDimZ, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, hDevice->get()));
-
-    int MaxGridDimX{0}, MaxGridDimY{0}, MaxGridDimZ{0};
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxGridDimX, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, hDevice->get()));
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxGridDimY, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, hDevice->get()));
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &MaxGridDimZ, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, hDevice->get()));
-
-    GlobalWorkSize[0] = MaxBlockDimX * MaxGridDimX;
-    GlobalWorkSize[1] = MaxBlockDimY * MaxGridDimY;
-    GlobalWorkSize[2] = MaxBlockDimZ * MaxGridDimZ;
-    return ReturnValue(GlobalWorkSize, 3);
-  }
-  case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: {
-    int MaxThreads = 0;
-    UR_CHECK_ERROR(cuFuncGetAttribute(
-        &MaxThreads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hKernel->get()));
-    return ReturnValue(size_t(MaxThreads));
-  }
-  case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: {
-    size_t GroupSize[3] = {0, 0, 0};
-    const auto &ReqdWGSizeMDMap =
-        hKernel->get_program()->KernelReqdWorkGroupSizeMD;
-    const auto ReqdWGSizeMD = ReqdWGSizeMDMap.find(hKernel->getName());
-    if (ReqdWGSizeMD != ReqdWGSizeMDMap.end()) {
-      const auto ReqdWGSize = ReqdWGSizeMD->second;
-      GroupSize[0] = std::get<0>(ReqdWGSize);
-      GroupSize[1] = std::get<1>(ReqdWGSize);
-      GroupSize[2] = std::get<2>(ReqdWGSize);
-    }
-    return ReturnValue(GroupSize, 3);
-  }
-  case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: {
-    // OpenCL LOCAL == CUDA SHARED
-    int Bytes = 0;
-    UR_CHECK_ERROR(cuFuncGetAttribute(
-        &Bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, hKernel->get()));
-    return ReturnValue(uint64_t(Bytes));
-  }
-  case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
-    // Work groups should be multiples of the warp size
-    int WarpSize = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, hDevice->get()));
-    return ReturnValue(static_cast<size_t>(WarpSize));
-  }
-  case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
-    // OpenCL PRIVATE == CUDA LOCAL
-    int Bytes = 0;
-    UR_CHECK_ERROR(cuFuncGetAttribute(
-        &Bytes, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, hKernel->get()));
-    return ReturnValue(uint64_t(Bytes));
-  }
-  default:
-    break;
-  }
-
-  return UR_RESULT_ERROR_INVALID_ENUMERATION;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) {
-  UR_ASSERT(hKernel->getReferenceCount() > 0u, UR_RESULT_ERROR_INVALID_KERNEL);
-
-  hKernel->incrementReferenceCount();
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urKernelRelease(ur_kernel_handle_t hKernel) {
-  // double delete or someone is messing with the ref count.
-  // either way, cannot safely proceed.
-  UR_ASSERT(hKernel->getReferenceCount() != 0, UR_RESULT_ERROR_INVALID_KERNEL);
-
-  // decrement ref count. If it is 0, delete the program.
-  if (hKernel->decrementReferenceCount() == 0) {
-    // no internal cuda resources to clean up. Just delete it.
-    delete hKernel;
-    return UR_RESULT_SUCCESS;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-// TODO(ur): Not implemented on cuda atm. Also, need to add tests for this
-// feature.
-UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle(
-    ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) {
-  (void)hKernel;
-  (void)phNativeKernel;
-
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
-    ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize,
-    const ur_kernel_arg_value_properties_t *pProperties,
-    const void *pArgValue) {
-  std::ignore = pProperties;
-  UR_ASSERT(argSize, UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE);
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  try {
-    hKernel->setKernelArg(argIndex, argSize, pArgValue);
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal(
-    ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize,
-    const ur_kernel_arg_local_properties_t *pProperties) {
-  std::ignore = pProperties;
-  UR_ASSERT(argSize, UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE);
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  try {
-    hKernel->setKernelLocalArg(argIndex, argSize);
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel,
-                                                    ur_kernel_info_t propName,
-                                                    size_t propSize,
-                                                    void *pKernelInfo,
-                                                    size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propSize, pKernelInfo, pPropSizeRet);
-
-  switch (propName) {
-  case UR_KERNEL_INFO_FUNCTION_NAME:
-    return ReturnValue(hKernel->getName());
-  case UR_KERNEL_INFO_NUM_ARGS:
-    return ReturnValue(hKernel->getNumArgs());
-  case UR_KERNEL_INFO_REFERENCE_COUNT:
-    return ReturnValue(hKernel->getReferenceCount());
-  case UR_KERNEL_INFO_CONTEXT:
-    return ReturnValue(hKernel->getContext());
-  case UR_KERNEL_INFO_PROGRAM:
-    return ReturnValue(hKernel->get_program());
-  case UR_KERNEL_INFO_ATTRIBUTES:
-    return ReturnValue("");
-  case UR_KERNEL_INFO_NUM_REGS: {
-    int NumRegs = 0;
-    UR_CHECK_ERROR(cuFuncGetAttribute(&NumRegs, CU_FUNC_ATTRIBUTE_NUM_REGS,
-                                      hKernel->get()));
-    return ReturnValue(static_cast<uint32_t>(NumRegs));
-  }
-  default:
-    break;
-  }
-
-  return UR_RESULT_ERROR_INVALID_ENUMERATION;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
-                        ur_kernel_sub_group_info_t propName, size_t propSize,
-                        void *pPropValue, size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
-  switch (propName) {
-  case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: {
-    // Sub-group size is equivalent to warp size
-    int WarpSize = 0;
-    UR_CHECK_ERROR(cuDeviceGetAttribute(
-        &WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, hDevice->get()));
-    return ReturnValue(static_cast<uint32_t>(WarpSize));
-  }
-  case UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS: {
-    // Number of sub-groups = max block size / warp size + possible remainder
-    int MaxThreads = 0;
-    UR_CHECK_ERROR(cuFuncGetAttribute(
-        &MaxThreads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hKernel->get()));
-    int WarpSize = 0;
-    urKernelGetSubGroupInfo(hKernel, hDevice,
-                            UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE,
-                            sizeof(uint32_t), &WarpSize, nullptr);
-    int MaxWarps = (MaxThreads + WarpSize - 1) / WarpSize;
-    return ReturnValue(static_cast<uint32_t>(MaxWarps));
-  }
-  case UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS: {
-    // Return value of 0 => not specified
-    // TODO: Revisit if PTX is generated for compile-time work-group sizes
-    return ReturnValue(0);
-  }
-  case UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL: {
-    // Return value of 0 => unspecified or "auto" sub-group size
-    // Correct for now, since warp size may be read from special register
-    // TODO: Return warp size once default is primary sub-group size
-    // TODO: Revisit if we can recover [[sub_group_size]] attribute from PTX
-    return ReturnValue(0);
-  }
-  default:
-    break;
-  }
-
-  return UR_RESULT_ERROR_INVALID_ENUMERATION;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex,
-                      const ur_kernel_arg_pointer_properties_t *pProperties,
-                      const void *pArgValue) {
-  std::ignore = pProperties;
-  hKernel->setKernelArg(argIndex, sizeof(pArgValue), pArgValue);
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
-                     const ur_kernel_arg_mem_obj_properties_t *Properties,
-                     ur_mem_handle_t hArgValue) {
-  std::ignore = Properties;
-
-  // Below sets kernel arg when zero-sized buffers are handled.
-  // In such case the corresponding memory is null.
-  if (hArgValue == nullptr) {
-    hKernel->setKernelArg(argIndex, 0, nullptr);
-    return UR_RESULT_SUCCESS;
-  }
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  try {
-    if (hArgValue->MemType == ur_mem_handle_t_::Type::Surface) {
-      CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
-      UR_CHECK_ERROR(cuArray3DGetDescriptor(
-          &arrayDesc, hArgValue->Mem.SurfaceMem.getArray()));
-      if (arrayDesc.Format != CU_AD_FORMAT_UNSIGNED_INT32 &&
-          arrayDesc.Format != CU_AD_FORMAT_SIGNED_INT32 &&
-          arrayDesc.Format != CU_AD_FORMAT_HALF &&
-          arrayDesc.Format != CU_AD_FORMAT_FLOAT) {
-        setErrorMessage("PI CUDA kernels only support images with channel "
-                        "types int32, uint32, float, and half.",
-                        UR_RESULT_ERROR_ADAPTER_SPECIFIC);
-        return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
-      }
-      CUsurfObject CuSurf = hArgValue->Mem.SurfaceMem.getSurface();
-      hKernel->setKernelArg(argIndex, sizeof(CuSurf), (void *)&CuSurf);
-    } else {
-      CUdeviceptr CuPtr = hArgValue->Mem.BufferMem.get();
-      hKernel->setKernelArg(argIndex, sizeof(CUdeviceptr), (void *)&CuPtr);
-    }
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-// A NOP for the CUDA backend
-UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo(
-    ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, size_t propSize,
-    const ur_kernel_exec_info_properties_t *pProperties,
-    const void *pPropValue) {
-  std::ignore = hKernel;
-  std::ignore = propSize;
-  std::ignore = pPropValue;
-  std::ignore = pProperties;
-
-  switch (propName) {
-  case UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS:
-  case UR_KERNEL_EXEC_INFO_USM_PTRS:
-  case UR_KERNEL_EXEC_INFO_CACHE_CONFIG:
-    return UR_RESULT_SUCCESS;
-  default:
-    return UR_RESULT_ERROR_INVALID_ENUMERATION;
-  }
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle(
-    ur_native_handle_t hNativeKernel, ur_context_handle_t hContext,
-    ur_program_handle_t hProgram,
-    const ur_kernel_native_properties_t *pProperties,
-    ur_kernel_handle_t *phKernel) {
-  std::ignore = hNativeKernel;
-  std::ignore = hContext;
-  std::ignore = hProgram;
-  std::ignore = pProperties;
-  std::ignore = phKernel;
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex,
-                      const ur_kernel_arg_sampler_properties_t *pProperties,
-                      ur_sampler_handle_t hArgValue) {
-  std::ignore = pProperties;
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  try {
-    uint32_t SamplerProps = hArgValue->Props;
-    hKernel->setKernelArg(argIndex, sizeof(uint32_t), (void *)&SamplerProps);
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.hpp
deleted file mode 100644
index ea4e565d3f44b..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.hpp
+++ /dev/null
@@ -1,206 +0,0 @@
-//===--------- kernel.hpp - CUDA Adapter ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <cuda.h>
-#include <ur_api.h>
-
-#include <array>
-#include <atomic>
-#include <cassert>
-#include <numeric>
-
-#include "program.hpp"
-
-/// Implementation of a UR Kernel for CUDA
-///
-/// UR Kernels are used to set kernel arguments,
-/// creating a state on the Kernel object for a given
-/// invocation. This is not the case of CUFunction objects,
-/// which are simply passed together with the arguments on the invocation.
-/// The UR Kernel implementation for CUDA stores the list of arguments,
-/// argument sizes, and offsets to emulate the interface of UR Kernel,
-/// saving the arguments for the later dispatch.
-/// Note that in UR API, the Local memory is specified as a size per
-/// individual argument, but in CUDA only the total usage of shared
-/// memory is required since it is not passed as a parameter.
-/// A compiler pass converts the UR API local memory model into the
-/// CUDA shared model. This object simply calculates the total of
-/// shared memory, and the initial offsets of each parameter.
-struct ur_kernel_handle_t_ {
-  using native_type = CUfunction;
-
-  native_type Function;
-  native_type FunctionWithOffsetParam;
-  std::string Name;
-  ur_context_handle_t Context;
-  ur_program_handle_t Program;
-  std::atomic_uint32_t RefCount;
-
-  static constexpr uint32_t ReqdThreadsPerBlockDimensions = 3u;
-  size_t ReqdThreadsPerBlock[ReqdThreadsPerBlockDimensions];
-  int RegsPerThread{0};
-
-  /// Structure that holds the arguments to the kernel.
-  /// Note each argument size is known, since it comes
-  /// from the kernel signature.
-  /// This is not something can be queried from the CUDA API
-  /// so there is a hard-coded size (\ref MAX_PARAM_BYTES)
-  /// and a storage.
-  struct arguments {
-    static constexpr size_t MaxParamBytes = 4000u;
-    using args_t = std::array<char, MaxParamBytes>;
-    using args_size_t = std::vector<size_t>;
-    using args_index_t = std::vector<void *>;
-    args_t Storage;
-    args_size_t ParamSizes;
-    args_index_t Indices;
-    args_size_t OffsetPerIndex;
-
-    std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0};
-
-    arguments() {
-      // Place the implicit offset index at the end of the indicies collection
-      Indices.emplace_back(&ImplicitOffsetArgs);
-    }
-
-    /// Add an argument to the kernel.
-    /// If the argument existed before, it is replaced.
-    /// Otherwise, it is added.
-    /// Gaps are filled with empty arguments.
-    /// Implicit offset argument is kept at the back of the indices collection.
-    void addArg(size_t Index, size_t Size, const void *Arg,
-                size_t LocalSize = 0) {
-      if (Index + 2 > Indices.size()) {
-        // Move implicit offset argument index with the end
-        Indices.resize(Index + 2, Indices.back());
-        // Ensure enough space for the new argument
-        ParamSizes.resize(Index + 1);
-        OffsetPerIndex.resize(Index + 1);
-      }
-      ParamSizes[Index] = Size;
-      // calculate the insertion point on the array
-      size_t InsertPos = std::accumulate(std::begin(ParamSizes),
-                                         std::begin(ParamSizes) + Index, 0);
-      // Update the stored value for the argument
-      std::memcpy(&Storage[InsertPos], Arg, Size);
-      Indices[Index] = &Storage[InsertPos];
-      OffsetPerIndex[Index] = LocalSize;
-    }
-
-    void addLocalArg(size_t Index, size_t Size) {
-      size_t LocalOffset = this->getLocalSize();
-
-      // maximum required alignment is the size of the largest vector type
-      const size_t MaxAlignment = sizeof(double) * 16;
-
-      // for arguments smaller than the maximum alignment simply align to the
-      // size of the argument
-      const size_t Alignment = std::min(MaxAlignment, Size);
-
-      // align the argument
-      size_t AlignedLocalOffset = LocalOffset;
-      size_t Pad = LocalOffset % Alignment;
-      if (Pad != 0) {
-        AlignedLocalOffset += Alignment - Pad;
-      }
-
-      addArg(Index, sizeof(size_t), (const void *)&(AlignedLocalOffset),
-             Size + (AlignedLocalOffset - LocalOffset));
-    }
-
-    void setImplicitOffset(size_t Size, std::uint32_t *ImplicitOffset) {
-      assert(Size == sizeof(std::uint32_t) * 3);
-      std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size);
-    }
-
-    void clearLocalSize() {
-      std::fill(std::begin(OffsetPerIndex), std::end(OffsetPerIndex), 0);
-    }
-
-    const args_index_t &getIndices() const noexcept { return Indices; }
-
-    uint32_t getLocalSize() const {
-      return std::accumulate(std::begin(OffsetPerIndex),
-                             std::end(OffsetPerIndex), 0);
-    }
-  } Args;
-
-  ur_kernel_handle_t_(CUfunction Func, CUfunction FuncWithOffsetParam,
-                      const char *Name, ur_program_handle_t Program,
-                      ur_context_handle_t Context)
-      : Function{Func}, FunctionWithOffsetParam{FuncWithOffsetParam},
-        Name{Name}, Context{Context}, Program{Program}, RefCount{1} {
-    urProgramRetain(Program);
-    urContextRetain(Context);
-    /// Note: this code assumes that there is only one device per context
-    ur_result_t RetError = urKernelGetGroupInfo(
-        this, Context->getDevice(),
-        UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE,
-        sizeof(ReqdThreadsPerBlock), ReqdThreadsPerBlock, nullptr);
-    (void)RetError;
-    assert(RetError == UR_RESULT_SUCCESS);
-    UR_CHECK_ERROR(
-        cuFuncGetAttribute(&RegsPerThread, CU_FUNC_ATTRIBUTE_NUM_REGS, Func));
-  }
-
-  ~ur_kernel_handle_t_() {
-    urProgramRelease(Program);
-    urContextRelease(Context);
-  }
-
-  ur_program_handle_t get_program() const noexcept { return Program; }
-
-  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
-
-  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
-
-  uint32_t getReferenceCount() const noexcept { return RefCount; }
-
-  native_type get() const noexcept { return Function; };
-
-  native_type get_with_offset_parameter() const noexcept {
-    return FunctionWithOffsetParam;
-  };
-
-  bool has_with_offset_parameter() const noexcept {
-    return FunctionWithOffsetParam != nullptr;
-  }
-
-  ur_context_handle_t getContext() const noexcept { return Context; };
-
-  const char *getName() const noexcept { return Name.c_str(); }
-
-  /// Get the number of kernel arguments, excluding the implicit global offset.
-  /// Note this only returns the current known number of arguments, not the
-  /// real one required by the kernel, since this cannot be queried from
-  /// the CUDA Driver API
-  size_t getNumArgs() const noexcept { return Args.Indices.size() - 1; }
-
-  void setKernelArg(int Index, size_t Size, const void *Arg) {
-    Args.addArg(Index, Size, Arg);
-  }
-
-  void setKernelLocalArg(int Index, size_t Size) {
-    Args.addLocalArg(Index, Size);
-  }
-
-  void setImplicitOffsetArg(size_t Size, std::uint32_t *ImplicitOffset) {
-    return Args.setImplicitOffset(Size, ImplicitOffset);
-  }
-
-  const arguments::args_index_t &getArgIndices() const {
-    return Args.getIndices();
-  }
-
-  uint32_t getLocalSize() const noexcept { return Args.getLocalSize(); }
-
-  void clearLocalSize() { Args.clearLocalSize(); }
-
-  size_t getRegsPerThread() const noexcept { return RegsPerThread; };
-};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.cpp
deleted file mode 100644
index d51ba73d67e2a..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.cpp
+++ /dev/null
@@ -1,479 +0,0 @@
-//===--------- memory.cpp - CUDA Adapter ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <cuda.h>
-
-#include "common.hpp"
-#include "context.hpp"
-#include "memory.hpp"
-
-/// Creates a UR Memory object using a CUDA memory allocation.
-/// Can trigger a manual copy depending on the mode.
-/// \TODO Implement USE_HOST_PTR using cuHostRegister - See #9789
-///
-UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
-    ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size,
-    const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) {
-  // Validate flags
-  if (flags &
-      (UR_MEM_FLAG_USE_HOST_POINTER | UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER)) {
-    UR_ASSERT(pProperties && pProperties->pHost,
-              UR_RESULT_ERROR_INVALID_HOST_PTR);
-  }
-  UR_ASSERT(size != 0, UR_RESULT_ERROR_INVALID_BUFFER_SIZE);
-
-  // Currently, USE_HOST_PTR is not implemented using host register
-  // since this triggers a weird segfault after program ends.
-  // Setting this constant to true enables testing that behavior.
-  const bool EnableUseHostPtr = false;
-  const bool PerformInitialCopy =
-      (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) ||
-      ((flags & UR_MEM_FLAG_USE_HOST_POINTER) && !EnableUseHostPtr);
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  ur_mem_handle_t MemObj = nullptr;
-
-  try {
-    ScopedContext Active(hContext);
-    CUdeviceptr Ptr = 0;
-    auto HostPtr = pProperties ? pProperties->pHost : nullptr;
-
-    ur_mem_handle_t_::MemImpl::BufferMem::AllocMode AllocMode =
-        ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::Classic;
-
-    if ((flags & UR_MEM_FLAG_USE_HOST_POINTER) && EnableUseHostPtr) {
-      UR_CHECK_ERROR(
-          cuMemHostRegister(HostPtr, size, CU_MEMHOSTREGISTER_DEVICEMAP));
-      UR_CHECK_ERROR(cuMemHostGetDevicePointer(&Ptr, HostPtr, 0));
-      AllocMode = ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::UseHostPtr;
-    } else if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) {
-      UR_CHECK_ERROR(cuMemAllocHost(&HostPtr, size));
-      UR_CHECK_ERROR(cuMemHostGetDevicePointer(&Ptr, HostPtr, 0));
-      AllocMode = ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr;
-    } else {
-      UR_CHECK_ERROR(cuMemAlloc(&Ptr, size));
-      if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) {
-        AllocMode = ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::CopyIn;
-      }
-    }
-
-    ur_mem_handle_t parentBuffer = nullptr;
-
-    auto URMemObj = std::unique_ptr<ur_mem_handle_t_>(new ur_mem_handle_t_{
-        hContext, parentBuffer, flags, AllocMode, Ptr, HostPtr, size});
-    if (URMemObj != nullptr) {
-      MemObj = URMemObj.release();
-      if (PerformInitialCopy) {
-        // Operates on the default stream of the current CUDA context.
-        UR_CHECK_ERROR(cuMemcpyHtoD(Ptr, HostPtr, size));
-        // Synchronize with default stream implicitly used by cuMemcpyHtoD
-        // to make buffer data available on device before any other UR call
-        // uses it.
-        CUstream defaultStream = 0;
-        UR_CHECK_ERROR(cuStreamSynchronize(defaultStream));
-      }
-    } else {
-      Result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
-    }
-  } catch (ur_result_t Err) {
-    Result = Err;
-  } catch (...) {
-    Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
-  }
-
-  *phBuffer = MemObj;
-
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) {
-  UR_ASSERT(hMem->getReferenceCount() > 0, UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  hMem->incrementReferenceCount();
-  return UR_RESULT_SUCCESS;
-}
-
-/// Decreases the reference count of the Mem object.
-/// If this is zero, calls the relevant CUDA Free function
-/// \return UR_RESULT_SUCCESS unless deallocation error
-UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  try {
-
-    // Do nothing if there are other references
-    if (hMem->decrementReferenceCount() > 0) {
-      return UR_RESULT_SUCCESS;
-    }
-
-    // make sure hMem is released in case checkErrorUR throws
-    std::unique_ptr<ur_mem_handle_t_> MemObjPtr(hMem);
-
-    if (hMem->isSubBuffer()) {
-      return UR_RESULT_SUCCESS;
-    }
-
-    ScopedContext Active(MemObjPtr->getContext());
-
-    if (hMem->MemType == ur_mem_handle_t_::Type::Buffer) {
-      switch (MemObjPtr->Mem.BufferMem.MemAllocMode) {
-      case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::CopyIn:
-      case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::Classic:
-        UR_CHECK_ERROR(cuMemFree(MemObjPtr->Mem.BufferMem.Ptr));
-        break;
-      case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::UseHostPtr:
-        UR_CHECK_ERROR(cuMemHostUnregister(MemObjPtr->Mem.BufferMem.HostPtr));
-        break;
-      case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr:
-        UR_CHECK_ERROR(cuMemFreeHost(MemObjPtr->Mem.BufferMem.HostPtr));
-      };
-    } else if (hMem->MemType == ur_mem_handle_t_::Type::Surface) {
-      UR_CHECK_ERROR(
-          cuSurfObjectDestroy(MemObjPtr->Mem.SurfaceMem.getSurface()));
-      UR_CHECK_ERROR(cuArrayDestroy(MemObjPtr->Mem.SurfaceMem.getArray()));
-    }
-
-  } catch (ur_result_t Err) {
-    Result = Err;
-  } catch (...) {
-    Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
-  }
-
-  if (Result != UR_RESULT_SUCCESS) {
-    // A reported CUDA error is either an implementation or an asynchronous CUDA
-    // error for which it is unclear if the function that reported it succeeded
-    // or not. Either way, the state of the program is compromised and likely
-    // unrecoverable.
-    detail::ur::die("Unrecoverable program state reached in urMemRelease");
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-/// Gets the native CUDA handle of a UR mem object
-///
-/// \param[in] hMem The UR mem to get the native CUDA object of.
-/// \param[out] phNativeMem Set to the native handle of the UR mem object.
-///
-/// \return UR_RESULT_SUCCESS
-UR_APIEXPORT ur_result_t UR_APICALL
-urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) {
-  *phNativeMem =
-      reinterpret_cast<ur_native_handle_t>(hMem->Mem.BufferMem.get());
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory,
-                                                 ur_mem_info_t MemInfoType,
-                                                 size_t propSize,
-                                                 void *pMemInfo,
-                                                 size_t *pPropSizeRet) {
-  UR_ASSERT(hMemory->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-
-  UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet);
-
-  ScopedContext Active(hMemory->getContext());
-
-  switch (MemInfoType) {
-  case UR_MEM_INFO_SIZE: {
-    try {
-      size_t AllocSize = 0;
-      UR_CHECK_ERROR(cuMemGetAddressRange(nullptr, &AllocSize,
-                                          hMemory->Mem.BufferMem.Ptr));
-      return ReturnValue(AllocSize);
-    } catch (ur_result_t Err) {
-      return Err;
-    } catch (...) {
-      return UR_RESULT_ERROR_UNKNOWN;
-    }
-  }
-  case UR_MEM_INFO_CONTEXT: {
-    return ReturnValue(hMemory->getContext());
-  }
-
-  default:
-    return UR_RESULT_ERROR_INVALID_ENUMERATION;
-  }
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle(
-    ur_native_handle_t, ur_context_handle_t, const ur_mem_native_properties_t *,
-    ur_mem_handle_t *) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle(
-    ur_native_handle_t, ur_context_handle_t, const ur_image_format_t *,
-    const ur_image_desc_t *, const ur_mem_native_properties_t *,
-    ur_mem_handle_t *) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-/// \TODO Not implemented
-UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate(
-    ur_context_handle_t hContext, ur_mem_flags_t flags,
-    const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
-    void *pHost, ur_mem_handle_t *phMem) {
-  if (flags &
-      (UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER | UR_MEM_FLAG_USE_HOST_POINTER)) {
-    UR_ASSERT(pHost, UR_RESULT_ERROR_INVALID_HOST_PTR);
-  }
-  const bool PerformInitialCopy =
-      (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) ||
-      ((flags & UR_MEM_FLAG_USE_HOST_POINTER));
-
-  UR_ASSERT(pImageDesc->stype == UR_STRUCTURE_TYPE_IMAGE_DESC,
-            UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
-  UR_ASSERT(pImageDesc->type <= UR_MEM_TYPE_IMAGE1D_BUFFER,
-            UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
-  UR_ASSERT(pImageDesc->numMipLevel == 0,
-            UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
-  UR_ASSERT(pImageDesc->numSamples == 0,
-            UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
-  if (!pHost) {
-    UR_ASSERT(pImageDesc->rowPitch == 0,
-              UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
-    UR_ASSERT(pImageDesc->slicePitch == 0,
-              UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
-  }
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  // We only support RBGA channel order
-  // TODO: check SYCL CTS and spec. May also have to support BGRA
-  UR_ASSERT(pImageFormat->channelOrder == UR_IMAGE_CHANNEL_ORDER_RGBA,
-            UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION);
-
-  // We have to use cuArray3DCreate, which has some caveats. The height and
-  // depth parameters must be set to 0 produce 1D or 2D arrays. pImageDesc gives
-  // a minimum value of 1, so we need to convert the answer.
-  CUDA_ARRAY3D_DESCRIPTOR ArrayDesc;
-  ArrayDesc.NumChannels = 4; // Only support 4 channel image
-  ArrayDesc.Flags = 0;       // No flags required
-  ArrayDesc.Width = pImageDesc->width;
-  if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
-    ArrayDesc.Height = 0;
-    ArrayDesc.Depth = 0;
-  } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
-    ArrayDesc.Height = pImageDesc->height;
-    ArrayDesc.Depth = 0;
-  } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
-    ArrayDesc.Height = pImageDesc->height;
-    ArrayDesc.Depth = pImageDesc->depth;
-  }
-
-  // We need to get this now in bytes for calculating the total image size later
-  size_t PixelTypeSizeBytes;
-
-  switch (pImageFormat->channelType) {
-  case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8:
-  case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8:
-    ArrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
-    PixelTypeSizeBytes = 1;
-    break;
-  case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8:
-    ArrayDesc.Format = CU_AD_FORMAT_SIGNED_INT8;
-    PixelTypeSizeBytes = 1;
-    break;
-  case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16:
-  case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16:
-    ArrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
-    PixelTypeSizeBytes = 2;
-    break;
-  case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16:
-    ArrayDesc.Format = CU_AD_FORMAT_SIGNED_INT16;
-    PixelTypeSizeBytes = 2;
-    break;
-  case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT:
-    ArrayDesc.Format = CU_AD_FORMAT_HALF;
-    PixelTypeSizeBytes = 2;
-    break;
-  case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32:
-    ArrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT32;
-    PixelTypeSizeBytes = 4;
-    break;
-  case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32:
-    ArrayDesc.Format = CU_AD_FORMAT_SIGNED_INT32;
-    PixelTypeSizeBytes = 4;
-    break;
-  case UR_IMAGE_CHANNEL_TYPE_FLOAT:
-    ArrayDesc.Format = CU_AD_FORMAT_FLOAT;
-    PixelTypeSizeBytes = 4;
-    break;
-  default:
-    detail::ur::die(
-        "urMemImageCreate given unsupported image_channel_data_type");
-  }
-
-  // When a dimension isn't used pImageDesc has the size set to 1
-  size_t PixelSizeBytes =
-      PixelTypeSizeBytes * 4; // 4 is the only number of channels we support
-  size_t ImageSizeBytes = PixelSizeBytes * pImageDesc->width *
-                          pImageDesc->height * pImageDesc->depth;
-
-  ScopedContext Active(hContext);
-  CUarray ImageArray = nullptr;
-  try {
-    UR_CHECK_ERROR(cuArray3DCreate(&ImageArray, &ArrayDesc));
-  } catch (ur_result_t Err) {
-    if (Err == UR_RESULT_ERROR_INVALID_VALUE) {
-      return UR_RESULT_ERROR_INVALID_IMAGE_SIZE;
-    }
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  try {
-    if (PerformInitialCopy) {
-      // We have to use a different copy function for each image dimensionality
-      if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
-        UR_CHECK_ERROR(cuMemcpyHtoA(ImageArray, 0, pHost, ImageSizeBytes));
-      } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
-        CUDA_MEMCPY2D CpyDesc;
-        memset(&CpyDesc, 0, sizeof(CpyDesc));
-        CpyDesc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
-        CpyDesc.srcHost = pHost;
-        CpyDesc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
-        CpyDesc.dstArray = ImageArray;
-        CpyDesc.WidthInBytes = PixelSizeBytes * pImageDesc->width;
-        CpyDesc.Height = pImageDesc->height;
-        UR_CHECK_ERROR(cuMemcpy2D(&CpyDesc));
-      } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
-        CUDA_MEMCPY3D CpyDesc;
-        memset(&CpyDesc, 0, sizeof(CpyDesc));
-        CpyDesc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
-        CpyDesc.srcHost = pHost;
-        CpyDesc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
-        CpyDesc.dstArray = ImageArray;
-        CpyDesc.WidthInBytes = PixelSizeBytes * pImageDesc->width;
-        CpyDesc.Height = pImageDesc->height;
-        CpyDesc.Depth = pImageDesc->depth;
-        UR_CHECK_ERROR(cuMemcpy3D(&CpyDesc));
-      }
-    }
-
-    // CUDA_RESOURCE_DESC is a union of different structs, shown here
-    // https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TEXOBJECT.html
-    // We need to fill it as described here to use it for a surface or texture
-    // https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__SURFOBJECT.html
-    // CUDA_RESOURCE_DESC::resType must be CU_RESOURCE_TYPE_ARRAY and
-    // CUDA_RESOURCE_DESC::res::array::hArray must be set to a valid CUDA array
-    // handle.
-    // CUDA_RESOURCE_DESC::flags must be set to zero
-
-    CUDA_RESOURCE_DESC ImageResDesc;
-    ImageResDesc.res.array.hArray = ImageArray;
-    ImageResDesc.resType = CU_RESOURCE_TYPE_ARRAY;
-    ImageResDesc.flags = 0;
-
-    CUsurfObject Surface;
-    UR_CHECK_ERROR(cuSurfObjectCreate(&Surface, &ImageResDesc));
-
-    auto MemObj = std::unique_ptr<ur_mem_handle_t_>(new ur_mem_handle_t_(
-        hContext, ImageArray, Surface, flags, pImageDesc->type, phMem));
-
-    if (MemObj == nullptr) {
-      return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
-    }
-
-    *phMem = MemObj.release();
-  } catch (ur_result_t Err) {
-    if (ImageArray) {
-      cuArrayDestroy(ImageArray);
-    }
-    return Err;
-  } catch (...) {
-    if (ImageArray) {
-      cuArrayDestroy(ImageArray);
-    }
-    return UR_RESULT_ERROR_UNKNOWN;
-  }
-
-  return Result;
-}
-
-/// \TODO Not implemented
-UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t,
-                                                      ur_image_info_t, size_t,
-                                                      void *, size_t *) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-/// Implements a buffer partition in the CUDA backend.
-/// A buffer partition (or a sub-buffer, in OpenCL terms) is simply implemented
-/// as an offset over an existing CUDA allocation.
-UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition(
-    ur_mem_handle_t hBuffer, ur_mem_flags_t flags,
-    ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion,
-    ur_mem_handle_t *phMem) {
-  UR_ASSERT(hBuffer, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
-  UR_ASSERT((flags & UR_MEM_FLAGS_MASK) == 0,
-            UR_RESULT_ERROR_INVALID_ENUMERATION);
-  UR_ASSERT(hBuffer->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-  UR_ASSERT(!hBuffer->isSubBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-
-  // Default value for flags means UR_MEM_FLAG_READ_WRITE.
-  if (flags == 0) {
-    flags = UR_MEM_FLAG_READ_WRITE;
-  }
-
-  UR_ASSERT(!(flags &
-              (UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER |
-               UR_MEM_FLAG_ALLOC_HOST_POINTER | UR_MEM_FLAG_USE_HOST_POINTER)),
-            UR_RESULT_ERROR_INVALID_VALUE);
-  if (hBuffer->MemFlags & UR_MEM_FLAG_WRITE_ONLY) {
-    UR_ASSERT(!(flags & (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_READ_ONLY)),
-              UR_RESULT_ERROR_INVALID_VALUE);
-  }
-  if (hBuffer->MemFlags & UR_MEM_FLAG_READ_ONLY) {
-    UR_ASSERT(!(flags & (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_WRITE_ONLY)),
-              UR_RESULT_ERROR_INVALID_VALUE);
-  }
-
-  UR_ASSERT(bufferCreateType == UR_BUFFER_CREATE_TYPE_REGION,
-            UR_RESULT_ERROR_INVALID_ENUMERATION);
-  UR_ASSERT(pRegion != nullptr, UR_RESULT_ERROR_INVALID_NULL_POINTER);
-  UR_ASSERT(phMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
-
-  UR_ASSERT(pRegion->size != 0u, UR_RESULT_ERROR_INVALID_BUFFER_SIZE);
-
-  assert((pRegion->origin <= (pRegion->origin + pRegion->size)) && "Overflow");
-  UR_ASSERT(
-      ((pRegion->origin + pRegion->size) <= hBuffer->Mem.BufferMem.getSize()),
-      UR_RESULT_ERROR_INVALID_BUFFER_SIZE);
-  // Retained indirectly due to retaining parent buffer below.
-  ur_context_handle_t Context = hBuffer->Context;
-
-  ur_mem_handle_t_::MemImpl::BufferMem::AllocMode AllocMode =
-      ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::Classic;
-
-  assert(hBuffer->Mem.BufferMem.Ptr !=
-         ur_mem_handle_t_::MemImpl::BufferMem::native_type{0});
-  ur_mem_handle_t_::MemImpl::BufferMem::native_type Ptr =
-      hBuffer->Mem.BufferMem.Ptr + pRegion->origin;
-
-  void *HostPtr = nullptr;
-  if (hBuffer->Mem.BufferMem.HostPtr) {
-    HostPtr =
-        static_cast<char *>(hBuffer->Mem.BufferMem.HostPtr) + pRegion->origin;
-  }
-
-  std::unique_ptr<ur_mem_handle_t_> MemObj{nullptr};
-  try {
-    MemObj = std::unique_ptr<ur_mem_handle_t_>{new ur_mem_handle_t_{
-        Context, hBuffer, flags, AllocMode, Ptr, HostPtr, pRegion->size}};
-  } catch (ur_result_t Err) {
-    *phMem = nullptr;
-    return Err;
-  } catch (...) {
-    *phMem = nullptr;
-    return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
-  }
-
-  *phMem = MemObj.release();
-  return UR_RESULT_SUCCESS;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.hpp
deleted file mode 100644
index 33f8a3342f05d..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/memory.hpp
+++ /dev/null
@@ -1,232 +0,0 @@
-//===--------- memory.hpp - CUDA Adapter ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <cassert>
-#include <cuda.h>
-#include <ur_api.h>
-
-#include "common.hpp"
-
-/// UR Mem mapping to CUDA memory allocations, both data and texture/surface.
-/// \brief Represents non-SVM allocations on the CUDA backend.
-/// Keeps tracks of all mapped regions used for Map/Unmap calls.
-/// Only one region can be active at the same time per allocation.
-struct ur_mem_handle_t_ {
-  // Context where the memory object is accessible
-  ur_context_handle_t Context;
-
-  /// Reference counting of the handler
-  std::atomic_uint32_t RefCount;
-  enum class Type { Buffer, Surface, Texture } MemType;
-
-  // Original mem flags passed
-  ur_mem_flags_t MemFlags;
-
-  /// A UR Memory object represents either plain memory allocations ("Buffers"
-  /// in OpenCL) or typed allocations ("Images" in OpenCL).
-  /// In CUDA their API handlers are different. Whereas "Buffers" are allocated
-  /// as pointer-like structs, "Images" are stored in Textures or Surfaces.
-  /// This union allows implementation to use either from the same handler.
-  union MemImpl {
-    // Handler for plain, pointer-based CUDA allocations
-    struct BufferMem {
-      using native_type = CUdeviceptr;
-
-      // If this allocation is a sub-buffer (i.e., a view on an existing
-      // allocation), this is the pointer to the parent handler structure
-      ur_mem_handle_t Parent;
-      // CUDA handler for the pointer
-      native_type Ptr;
-
-      /// Pointer associated with this device on the host
-      void *HostPtr;
-      /// Size of the allocation in bytes
-      size_t Size;
-      /// Size of the active mapped region.
-      size_t MapSize;
-      /// Offset of the active mapped region.
-      size_t MapOffset;
-      /// Pointer to the active mapped region, if any
-      void *MapPtr;
-      /// Original flags for the mapped region
-      ur_map_flags_t MapFlags;
-
-      /** AllocMode
-       * classic: Just a normal buffer allocated on the device via cuda malloc
-       * use_host_ptr: Use an address on the host for the device
-       * copy_in: The data for the device comes from the host but the host
-       pointer is not available later for re-use
-       * alloc_host_ptr: Uses pinned-memory allocation
-      */
-      enum class AllocMode {
-        Classic,
-        UseHostPtr,
-        CopyIn,
-        AllocHostPtr,
-      } MemAllocMode;
-
-      native_type get() const noexcept { return Ptr; }
-
-      size_t getSize() const noexcept { return Size; }
-
-      void *getMapPtr() const noexcept { return MapPtr; }
-
-      size_t getMapSize() const noexcept { return MapSize; }
-
-      size_t getMapOffset() const noexcept { return MapOffset; }
-
-      /// Returns a pointer to data visible on the host that contains
-      /// the data on the device associated with this allocation.
-      /// The offset is used to index into the CUDA allocation.
-      void *mapToPtr(size_t Size, size_t Offset,
-                     ur_map_flags_t Flags) noexcept {
-        assert(MapPtr == nullptr);
-        MapSize = Size;
-        MapOffset = Offset;
-        MapFlags = Flags;
-        if (HostPtr) {
-          MapPtr = static_cast<char *>(HostPtr) + Offset;
-        } else {
-          // TODO: Allocate only what is needed based on the offset
-          MapPtr = static_cast<void *>(malloc(this->getSize()));
-        }
-        return MapPtr;
-      }
-
-      /// Detach the allocation from the host memory.
-      void unmap(void *) noexcept {
-        assert(MapPtr != nullptr);
-
-        if (MapPtr != HostPtr) {
-          free(MapPtr);
-        }
-        MapPtr = nullptr;
-        MapSize = 0;
-        MapOffset = 0;
-      }
-
-      ur_map_flags_t getMapFlags() const noexcept {
-        assert(MapPtr != nullptr);
-        return MapFlags;
-      }
-    } BufferMem;
-
-    // Handler data for surface object (i.e. Images)
-    struct SurfaceMem {
-      CUarray Array;
-      CUsurfObject SurfObj;
-      ur_mem_type_t ImageType;
-
-      CUarray getArray() const noexcept { return Array; }
-
-      CUsurfObject getSurface() const noexcept { return SurfObj; }
-
-      ur_mem_type_t getImageType() const noexcept { return ImageType; }
-    } SurfaceMem;
-
-    struct ImageMem {
-      CUarray Array;
-      void *Handle;
-      ur_mem_type_t ImageType;
-      ur_sampler_handle_t Sampler;
-
-      CUarray get_array() const noexcept { return Array; }
-
-      void *get_handle() const noexcept { return Handle; }
-
-      ur_mem_type_t get_image_type() const noexcept { return ImageType; }
-
-      ur_sampler_handle_t get_sampler() const noexcept { return Sampler; }
-    } ImageMem;
-  } Mem;
-
-  /// Constructs the UR mem handler for a non-typed allocation ("buffer")
-  ur_mem_handle_t_(ur_context_handle_t Context, ur_mem_handle_t Parent,
-                   ur_mem_flags_t MemFlags, MemImpl::BufferMem::AllocMode Mode,
-                   CUdeviceptr Ptr, void *HostPtr, size_t Size)
-      : Context{Context}, RefCount{1}, MemType{Type::Buffer},
-        MemFlags{MemFlags} {
-    Mem.BufferMem.Ptr = Ptr;
-    Mem.BufferMem.Parent = Parent;
-    Mem.BufferMem.HostPtr = HostPtr;
-    Mem.BufferMem.Size = Size;
-    Mem.BufferMem.MapSize = 0;
-    Mem.BufferMem.MapOffset = 0;
-    Mem.BufferMem.MapPtr = nullptr;
-    Mem.BufferMem.MapFlags = UR_MAP_FLAG_WRITE;
-    Mem.BufferMem.MemAllocMode = Mode;
-    if (isSubBuffer()) {
-      urMemRetain(Mem.BufferMem.Parent);
-    } else {
-      urContextRetain(Context);
-    }
-  };
-
-  /// Constructs the UR allocation for an Image object (surface in CUDA)
-  ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array,
-                   CUsurfObject Surf, ur_mem_flags_t MemFlags,
-                   ur_mem_type_t ImageType, void *HostPtr)
-      : Context{Context}, RefCount{1}, MemType{Type::Surface},
-        MemFlags{MemFlags} {
-    (void)HostPtr;
-
-    Mem.SurfaceMem.Array = Array;
-    Mem.SurfaceMem.SurfObj = Surf;
-    Mem.SurfaceMem.ImageType = ImageType;
-    urContextRetain(Context);
-  }
-
-  /// Constructs the UR allocation for an unsampled image object
-  ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array,
-                   CUsurfObject Surf, ur_mem_type_t ImageType)
-      : Context{Context}, RefCount{1}, MemType{Type::Surface} {
-
-    Mem.ImageMem.Array = Array;
-    Mem.ImageMem.Handle = (void *)Surf;
-    Mem.ImageMem.ImageType = ImageType;
-    Mem.ImageMem.Sampler = nullptr;
-    urContextRetain(Context);
-  }
-
-  /// Constructs the UR allocation for a sampled image object
-  ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array, CUtexObject Tex,
-                   ur_sampler_handle_t Sampler, ur_mem_type_t ImageType)
-      : Context{Context}, RefCount{1}, MemType{Type::Texture} {
-
-    Mem.ImageMem.Array = Array;
-    Mem.ImageMem.Handle = (void *)Tex;
-    Mem.ImageMem.ImageType = ImageType;
-    Mem.ImageMem.Sampler = Sampler;
-    urContextRetain(Context);
-  }
-
-  ~ur_mem_handle_t_() {
-    if (isBuffer() && isSubBuffer()) {
-      urMemRelease(Mem.BufferMem.Parent);
-      return;
-    }
-    urContextRelease(Context);
-  }
-
-  bool isBuffer() const noexcept { return MemType == Type::Buffer; }
-
-  bool isSubBuffer() const noexcept {
-    return (isBuffer() && (Mem.BufferMem.Parent != nullptr));
-  }
-
-  bool isImage() const noexcept { return MemType == Type::Surface; }
-
-  ur_context_handle_t getContext() const noexcept { return Context; }
-
-  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
-
-  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
-
-  uint32_t getReferenceCount() const noexcept { return RefCount; }
-};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/platform.cpp
deleted file mode 100644
index 876f83921de23..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/platform.cpp
+++ /dev/null
@@ -1,195 +0,0 @@
-//===--------- platform.cpp - CUDA Adapter --------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "platform.hpp"
-#include "common.hpp"
-#include "context.hpp"
-#include "device.hpp"
-
-#include <cassert>
-#include <cuda.h>
-#include <sstream>
-
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo(
-    ur_platform_handle_t hPlatform, ur_platform_info_t PlatformInfoType,
-    size_t Size, void *pPlatformInfo, size_t *pSizeRet) {
-
-  UR_ASSERT(hPlatform, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
-  UrReturnHelper ReturnValue(Size, pPlatformInfo, pSizeRet);
-
-  switch (PlatformInfoType) {
-  case UR_PLATFORM_INFO_NAME:
-    return ReturnValue("NVIDIA CUDA BACKEND");
-  case UR_PLATFORM_INFO_VENDOR_NAME:
-    return ReturnValue("NVIDIA Corporation");
-  case UR_PLATFORM_INFO_PROFILE:
-    return ReturnValue("FULL PROFILE");
-  case UR_PLATFORM_INFO_VERSION: {
-    auto Version = getCudaVersionString();
-    return ReturnValue(Version.c_str());
-  }
-  case UR_PLATFORM_INFO_EXTENSIONS: {
-    return ReturnValue("");
-  }
-  case UR_PLATFORM_INFO_BACKEND: {
-    return ReturnValue(UR_PLATFORM_BACKEND_CUDA);
-  }
-  default:
-    return UR_RESULT_ERROR_INVALID_ENUMERATION;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-/// Obtains the CUDA platform.
-/// There is only one CUDA platform, and contains all devices on the system.
-/// Triggers the CUDA Driver initialization (cuInit) the first time, so this
-/// must be the first PI API called.
-///
-/// However because multiple devices in a context is not currently supported,
-/// place each device in a separate platform.
-UR_APIEXPORT ur_result_t UR_APICALL
-urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries,
-              ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) {
-
-  try {
-    static std::once_flag InitFlag;
-    static uint32_t NumPlatforms = 1;
-    static std::vector<ur_platform_handle_t_> Platforms;
-
-    UR_ASSERT(phPlatforms || pNumPlatforms, UR_RESULT_ERROR_INVALID_VALUE);
-    UR_ASSERT(!phPlatforms || NumEntries > 0, UR_RESULT_ERROR_INVALID_SIZE);
-
-    ur_result_t Result = UR_RESULT_SUCCESS;
-
-    std::call_once(
-        InitFlag,
-        [](ur_result_t &Result) {
-          UR_CHECK_ERROR(cuInit(0));
-          int NumDevices = 0;
-          UR_CHECK_ERROR(cuDeviceGetCount(&NumDevices));
-          try {
-            // make one platform per device
-            NumPlatforms = NumDevices;
-            Platforms.resize(NumDevices);
-
-            for (int i = 0; i < NumDevices; ++i) {
-              CUdevice Device;
-              UR_CHECK_ERROR(cuDeviceGet(&Device, i));
-              CUcontext Context;
-              UR_CHECK_ERROR(cuDevicePrimaryCtxRetain(&Context, Device));
-
-              ScopedContext active(Context);
-              CUevent EvBase;
-              UR_CHECK_ERROR(cuEventCreate(&EvBase, CU_EVENT_DEFAULT));
-
-              // Use default stream to record base event counter
-              UR_CHECK_ERROR(cuEventRecord(EvBase, 0));
-
-              Platforms[i].Devices.emplace_back(new ur_device_handle_t_{
-                  Device, Context, EvBase, &Platforms[i]});
-              {
-                const auto &Dev = Platforms[i].Devices.back().get();
-                size_t MaxWorkGroupSize = 0u;
-                size_t MaxThreadsPerBlock[3] = {};
-                UR_CHECK_ERROR(urDeviceGetInfo(
-                    Dev, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES,
-                    sizeof(MaxThreadsPerBlock), MaxThreadsPerBlock, nullptr));
-
-                UR_CHECK_ERROR(urDeviceGetInfo(
-                    Dev, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
-                    sizeof(MaxWorkGroupSize), &MaxWorkGroupSize, nullptr));
-
-                Dev->saveMaxWorkItemSizes(sizeof(MaxThreadsPerBlock),
-                                          MaxThreadsPerBlock);
-                Dev->saveMaxWorkGroupSize(MaxWorkGroupSize);
-              }
-            }
-          } catch (const std::bad_alloc &) {
-            // Signal out-of-memory situation
-            for (int i = 0; i < NumDevices; ++i) {
-              Platforms[i].Devices.clear();
-            }
-            Platforms.clear();
-            Result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
-          } catch (ur_result_t Err) {
-            // Clear and rethrow to allow retry
-            for (int i = 0; i < NumDevices; ++i) {
-              Platforms[i].Devices.clear();
-            }
-            Platforms.clear();
-            Result = Err;
-            throw Err;
-          } catch (...) {
-            Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
-            throw;
-          }
-        },
-        Result);
-
-    if (pNumPlatforms != nullptr) {
-      *pNumPlatforms = NumPlatforms;
-    }
-
-    if (phPlatforms != nullptr) {
-      for (unsigned i = 0; i < std::min(NumEntries, NumPlatforms); ++i) {
-        phPlatforms[i] = &Platforms[i];
-      }
-    }
-
-    return Result;
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
-  }
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion(
-    ur_platform_handle_t hDriver, ur_api_version_t *pVersion) {
-  std::ignore = hDriver;
-  *pVersion = UR_API_VERSION_CURRENT;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle(
-    ur_platform_handle_t hPlatform, ur_native_handle_t *phNativePlatform) {
-  std::ignore = hPlatform;
-  std::ignore = phNativePlatform;
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle(
-    ur_native_handle_t hNativePlatform,
-    const ur_platform_native_properties_t *pProperties,
-    ur_platform_handle_t *phPlatform) {
-  std::ignore = hNativePlatform;
-  std::ignore = pProperties;
-  std::ignore = phPlatform;
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-// Get CUDA plugin specific backend option.
-// Current support is only for optimization options.
-// Return empty string for cuda.
-// TODO: Determine correct string to be passed.
-UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption(
-    ur_platform_handle_t hPlatform, const char *pFrontendOption,
-    const char **ppPlatformOption) {
-  std::ignore = hPlatform;
-  using namespace std::literals;
-  if (pFrontendOption == nullptr)
-    return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-  if (pFrontendOption == "-O0"sv || pFrontendOption == "-O1"sv ||
-      pFrontendOption == "-O2"sv || pFrontendOption == "-O3"sv ||
-      pFrontendOption == ""sv) {
-    *ppPlatformOption = "";
-    return UR_RESULT_SUCCESS;
-  }
-  return UR_RESULT_ERROR_INVALID_VALUE;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/platform.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/platform.hpp
deleted file mode 100644
index c9b6550610eb8..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/platform.hpp
+++ /dev/null
@@ -1,15 +0,0 @@
-//===--------- platform.hpp - CUDA Adapter --------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <ur/ur.hpp>
-#include <vector>
-
-struct ur_platform_handle_t_ {
-  std::vector<std::unique_ptr<ur_device_handle_t_>> Devices;
-};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/program.cpp
deleted file mode 100644
index 7e238dd7fe22b..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/program.cpp
+++ /dev/null
@@ -1,452 +0,0 @@
-//===--------- program.cpp - CUDA Adapter ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "program.hpp"
-
-bool getMaxRegistersJitOptionValue(const std::string &BuildOptions,
-                                   unsigned int &Value) {
-  using namespace std::string_view_literals;
-  const std::size_t OptionPos = BuildOptions.find_first_of("maxrregcount"sv);
-  if (OptionPos == std::string::npos) {
-    return false;
-  }
-
-  const std::size_t DelimPos = BuildOptions.find('=', OptionPos + 1u);
-  if (DelimPos == std::string::npos) {
-    return false;
-  }
-
-  const std::size_t Length = BuildOptions.length();
-  const std::size_t StartPos = DelimPos + 1u;
-  if (DelimPos == std::string::npos || StartPos >= Length) {
-    return false;
-  }
-
-  std::size_t Pos = StartPos;
-  while (Pos < Length &&
-         std::isdigit(static_cast<unsigned char>(BuildOptions[Pos]))) {
-    Pos++;
-  }
-
-  const std::string ValueString = BuildOptions.substr(StartPos, Pos - StartPos);
-  if (ValueString.empty()) {
-    return false;
-  }
-
-  Value = static_cast<unsigned int>(std::stoi(ValueString));
-  return true;
-}
-
-ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Context)
-    : Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1},
-      Context{Context}, KernelReqdWorkGroupSizeMD{} {
-  urContextRetain(Context);
-}
-
-ur_program_handle_t_::~ur_program_handle_t_() { urContextRelease(Context); }
-
-std::pair<std::string, std::string>
-splitMetadataName(const std::string &metadataName) {
-  size_t splitPos = metadataName.rfind('@');
-  if (splitPos == std::string::npos)
-    return std::make_pair(metadataName, std::string{});
-  return std::make_pair(metadataName.substr(0, splitPos),
-                        metadataName.substr(splitPos, metadataName.length()));
-}
-
-ur_result_t
-ur_program_handle_t_::setMetadata(const ur_program_metadata_t *Metadata,
-                                  size_t Length) {
-  for (size_t i = 0; i < Length; ++i) {
-    const ur_program_metadata_t MetadataElement = Metadata[i];
-    std::string MetadataElementName{MetadataElement.pName};
-
-    auto [Prefix, Tag] = splitMetadataName(MetadataElementName);
-
-    if (Tag == __SYCL_UR_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE) {
-      // If metadata is reqd_work_group_size, record it for the corresponding
-      // kernel name.
-      size_t MDElemsSize = MetadataElement.size - sizeof(std::uint64_t);
-
-      // Expect between 1 and 3 32-bit integer values.
-      UR_ASSERT(MDElemsSize >= sizeof(std::uint32_t) &&
-                    MDElemsSize <= sizeof(std::uint32_t) * 3,
-                UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE);
-
-      // Get pointer to data, skipping 64-bit size at the start of the data.
-      const char *ValuePtr =
-          reinterpret_cast<const char *>(MetadataElement.value.pData) +
-          sizeof(std::uint64_t);
-      // Read values and pad with 1's for values not present.
-      std::uint32_t ReqdWorkGroupElements[] = {1, 1, 1};
-      std::memcpy(ReqdWorkGroupElements, ValuePtr, MDElemsSize);
-      KernelReqdWorkGroupSizeMD[Prefix] =
-          std::make_tuple(ReqdWorkGroupElements[0], ReqdWorkGroupElements[1],
-                          ReqdWorkGroupElements[2]);
-    } else if (Tag == __SYCL_UR_PROGRAM_METADATA_GLOBAL_ID_MAPPING) {
-      const char *MetadataValPtr =
-          reinterpret_cast<const char *>(MetadataElement.value.pData) +
-          sizeof(std::uint64_t);
-      const char *MetadataValPtrEnd =
-          MetadataValPtr + MetadataElement.size - sizeof(std::uint64_t);
-      GlobalIDMD[Prefix] = std::string{MetadataValPtr, MetadataValPtrEnd};
-    }
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-ur_result_t ur_program_handle_t_::setBinary(const char *Source, size_t Length) {
-  // Do not re-set program binary data which has already been set as that will
-  // delete the old binary data.
-  UR_ASSERT(Binary == nullptr && BinarySizeInBytes == 0,
-            UR_RESULT_ERROR_INVALID_OPERATION);
-  Binary = Source;
-  BinarySizeInBytes = Length;
-  return UR_RESULT_SUCCESS;
-}
-
-ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {
-  if (BuildOptions) {
-    this->BuildOptions = BuildOptions;
-  }
-
-  constexpr const unsigned int NumberOfOptions = 4u;
-
-  std::vector<CUjit_option> Options(NumberOfOptions);
-  std::vector<void *> OptionVals(NumberOfOptions);
-
-  // Pass a buffer for info messages
-  Options[0] = CU_JIT_INFO_LOG_BUFFER;
-  OptionVals[0] = (void *)InfoLog;
-  // Pass the size of the info buffer
-  Options[1] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
-  OptionVals[1] = (void *)(long)MaxLogSize;
-  // Pass a buffer for error message
-  Options[2] = CU_JIT_ERROR_LOG_BUFFER;
-  OptionVals[2] = (void *)ErrorLog;
-  // Pass the size of the error buffer
-  Options[3] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
-  OptionVals[3] = (void *)(long)MaxLogSize;
-
-  if (!this->BuildOptions.empty()) {
-    unsigned int MaxRegs;
-    bool Valid = getMaxRegistersJitOptionValue(BuildOptions, MaxRegs);
-    if (Valid) {
-      Options.push_back(CU_JIT_MAX_REGISTERS);
-      OptionVals.push_back(reinterpret_cast<void *>(MaxRegs));
-    }
-  }
-
-  UR_CHECK_ERROR(cuModuleLoadDataEx(&Module, static_cast<const void *>(Binary),
-                                    Options.size(), Options.data(),
-                                    OptionVals.data()));
-
-  BuildStatus = UR_PROGRAM_BUILD_STATUS_SUCCESS;
-
-  // If no exception, result is correct
-  return UR_RESULT_SUCCESS;
-}
-
-/// Finds kernel names by searching for entry points in the PTX source, as the
-/// CUDA driver API doesn't expose an operation for this.
-/// Note: This is currently only being used by the SYCL program class for the
-///       has_kernel method, so an alternative would be to move the has_kernel
-///       query to UR and use cuModuleGetFunction to check for a kernel.
-/// Note: Another alternative is to add kernel names as metadata, like with
-///       reqd_work_group_size.
-ur_result_t getKernelNames(ur_program_handle_t) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-/// CUDA will handle the PTX/CUBIN binaries internally through CUmodule object.
-/// So, urProgramCreateWithIL and urProgramCreateWithBinary are equivalent in
-/// terms of CUDA adapter. See \ref urProgramCreateWithBinary.
-UR_APIEXPORT ur_result_t UR_APICALL
-urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL,
-                      size_t length, const ur_program_properties_t *pProperties,
-                      ur_program_handle_t *phProgram) {
-  ur_device_handle_t hDevice = hContext->getDevice();
-  auto pBinary = reinterpret_cast<const uint8_t *>(pIL);
-
-  return urProgramCreateWithBinary(hContext, hDevice, length, pBinary,
-                                   pProperties, phProgram);
-}
-
-/// CUDA will handle the PTX/CUBIN binaries internally through a call to
-/// cuModuleLoadDataEx. So, urProgramCompile and urProgramBuild are equivalent
-/// in terms of CUDA adapter. \TODO Implement asynchronous compilation
-UR_APIEXPORT ur_result_t UR_APICALL
-urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram,
-                 const char *pOptions) {
-  return urProgramBuild(hContext, hProgram, pOptions);
-}
-
-/// Loads the images from a UR program into a CUmodule that can be
-/// used later on to extract functions (kernels).
-/// See \ref ur_program_handle_t for implementation details.
-UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext,
-                                                   ur_program_handle_t hProgram,
-                                                   const char *pOptions) {
-  std::ignore = hContext;
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  try {
-    ScopedContext Active(hProgram->getContext());
-
-    hProgram->buildProgram(pOptions);
-
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-/// Creates a new UR program object that is the outcome of linking all input
-/// programs.
-/// \TODO Implement linker options, requires mapping of OpenCL to CUDA
-UR_APIEXPORT ur_result_t UR_APICALL
-urProgramLink(ur_context_handle_t hContext, uint32_t count,
-              const ur_program_handle_t *phPrograms, const char *pOptions,
-              ur_program_handle_t *phProgram) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  try {
-    ScopedContext Active(hContext);
-
-    CUlinkState State;
-    std::unique_ptr<ur_program_handle_t_> RetProgram{
-        new ur_program_handle_t_{hContext}};
-
-    UR_CHECK_ERROR(cuLinkCreate(0, nullptr, nullptr, &State));
-    try {
-      for (size_t i = 0; i < count; ++i) {
-        ur_program_handle_t Program = phPrograms[i];
-        UR_CHECK_ERROR(cuLinkAddData(
-            State, CU_JIT_INPUT_PTX, const_cast<char *>(Program->Binary),
-            Program->BinarySizeInBytes, nullptr, 0, nullptr, nullptr));
-      }
-      void *CuBin = nullptr;
-      size_t CuBinSize = 0;
-      UR_CHECK_ERROR(cuLinkComplete(State, &CuBin, &CuBinSize));
-
-      Result =
-          RetProgram->setBinary(static_cast<const char *>(CuBin), CuBinSize);
-
-      Result = RetProgram->buildProgram(pOptions);
-    } catch (...) {
-      // Upon error attempt cleanup
-      UR_CHECK_ERROR(cuLinkDestroy(State));
-      throw;
-    }
-
-    UR_CHECK_ERROR(cuLinkDestroy(State));
-    *phProgram = RetProgram.release();
-
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-/// Created a UR program object from a CUDA program handle.
-/// TODO: Implement this.
-/// NOTE: The created UR object takes ownership of the native handle.
-///
-/// \param[in] nativeHandle The native handle to create UR program object from.
-/// \param[in] context The UR context of the program.
-/// \param[out] program Set to the UR program object created from native handle.
-///
-/// \return TBD
-UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle(
-    ur_native_handle_t, ur_context_handle_t,
-    const ur_program_native_properties_t *, ur_program_handle_t *) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice,
-                      ur_program_build_info_t propName, size_t propSize,
-                      void *pPropValue, size_t *pPropSizeRet) {
-  std::ignore = hDevice;
-
-  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
-
-  switch (propName) {
-  case UR_PROGRAM_BUILD_INFO_STATUS: {
-    return ReturnValue(hProgram->BuildStatus);
-  }
-  case UR_PROGRAM_BUILD_INFO_OPTIONS:
-    return ReturnValue(hProgram->BuildOptions.c_str());
-  case UR_PROGRAM_BUILD_INFO_LOG:
-    return ReturnValue(hProgram->InfoLog, hProgram->MaxLogSize);
-  default:
-    break;
-  }
-  return UR_RESULT_ERROR_INVALID_ENUMERATION;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName,
-                 size_t propSize, void *pProgramInfo, size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propSize, pProgramInfo, pPropSizeRet);
-
-  switch (propName) {
-  case UR_PROGRAM_INFO_REFERENCE_COUNT:
-    return ReturnValue(hProgram->getReferenceCount());
-  case UR_PROGRAM_INFO_CONTEXT:
-    return ReturnValue(hProgram->Context);
-  case UR_PROGRAM_INFO_NUM_DEVICES:
-    return ReturnValue(1u);
-  case UR_PROGRAM_INFO_DEVICES:
-    return ReturnValue(&hProgram->Context->DeviceID, 1);
-  case UR_PROGRAM_INFO_SOURCE:
-    return ReturnValue(hProgram->Binary);
-  case UR_PROGRAM_INFO_BINARY_SIZES:
-    return ReturnValue(&hProgram->BinarySizeInBytes, 1);
-  case UR_PROGRAM_INFO_BINARIES:
-    return ReturnValue(&hProgram->Binary, 1);
-  case UR_PROGRAM_INFO_KERNEL_NAMES:
-    /* TODO: Add implementation for getKernelNames */
-    UR_ASSERT(getKernelNames(hProgram), UR_RESULT_ERROR_UNSUPPORTED_FEATURE);
-    return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
-  case UR_PROGRAM_INFO_NUM_KERNELS:
-    return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
-  default:
-    break;
-  }
-  return UR_RESULT_ERROR_INVALID_ENUMERATION;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urProgramRetain(ur_program_handle_t hProgram) {
-  UR_ASSERT(hProgram->getReferenceCount() > 0, UR_RESULT_ERROR_INVALID_PROGRAM);
-  hProgram->incrementReferenceCount();
-  return UR_RESULT_SUCCESS;
-}
-
-/// Decreases the reference count of a ur_program_handle_t object.
-/// When the reference count reaches 0, it unloads the module from
-/// the context.
-UR_APIEXPORT ur_result_t UR_APICALL
-urProgramRelease(ur_program_handle_t hProgram) {
-  // double delete or someone is messing with the ref count.
-  // either way, cannot safely proceed.
-  UR_ASSERT(hProgram->getReferenceCount() != 0,
-            UR_RESULT_ERROR_INVALID_PROGRAM);
-
-  // decrement ref count. If it is 0, delete the program.
-  if (hProgram->decrementReferenceCount() == 0) {
-
-    std::unique_ptr<ur_program_handle_t_> ProgramPtr{hProgram};
-
-    ur_result_t Result = UR_RESULT_ERROR_INVALID_PROGRAM;
-
-    try {
-      ScopedContext Active(hProgram->getContext());
-      auto cuModule = hProgram->get();
-      // "0" is a valid handle for a cuModule, so the best way to check if we
-      // actually loaded a module and need to unload it is to look at the build
-      // status.
-      if (hProgram->BuildStatus == UR_PROGRAM_BUILD_STATUS_SUCCESS) {
-        UR_CHECK_ERROR(cuModuleUnload(cuModule));
-        Result = UR_RESULT_SUCCESS;
-      } else if (hProgram->BuildStatus == UR_PROGRAM_BUILD_STATUS_NONE) {
-        // Nothing to free.
-        Result = UR_RESULT_SUCCESS;
-      }
-    } catch (...) {
-      Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
-    }
-
-    return Result;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-/// Gets the native CUDA handle of a UR program object
-///
-/// \param[in] program The UR program handle to get the native CUDA object of.
-/// \param[out] nativeHandle Set to the native handle of the UR program object.
-///
-/// \return ur_result_t
-UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle(
-    ur_program_handle_t hProgram, ur_native_handle_t *nativeHandle) {
-  *nativeHandle = reinterpret_cast<ur_native_handle_t>(hProgram->get());
-  return UR_RESULT_SUCCESS;
-}
-
-/// Loads images from a list of PTX or CUBIN binaries.
-/// Note: No calls to CUDA driver API in this function, only store binaries
-/// for later.
-///
-/// Note: Only supports one device
-///
-UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
-    ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
-    const uint8_t *pBinary, const ur_program_properties_t *pProperties,
-    ur_program_handle_t *phProgram) {
-  UR_ASSERT(hContext->getDevice()->get() == hDevice->get(),
-            UR_RESULT_ERROR_INVALID_CONTEXT);
-  UR_ASSERT(size, UR_RESULT_ERROR_INVALID_SIZE);
-
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  std::unique_ptr<ur_program_handle_t_> RetProgram{
-      new ur_program_handle_t_{hContext}};
-
-  if (pProperties) {
-    if (pProperties->count > 0 && pProperties->pMetadatas == nullptr) {
-      return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-    } else if (pProperties->count == 0 && pProperties->pMetadatas != nullptr) {
-      return UR_RESULT_ERROR_INVALID_SIZE;
-    }
-    Result =
-        RetProgram->setMetadata(pProperties->pMetadatas, pProperties->count);
-  }
-  UR_ASSERT(Result == UR_RESULT_SUCCESS, Result);
-
-  auto pBinary_string = reinterpret_cast<const char *>(pBinary);
-
-  Result = RetProgram->setBinary(pBinary_string, size);
-  UR_ASSERT(Result == UR_RESULT_SUCCESS, Result);
-
-  *phProgram = RetProgram.release();
-
-  return Result;
-}
-
-// This entry point is only used for native specialization constants (SPIR-V),
-// and the CUDA plugin is AOT only so this entry point is not supported.
-UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants(
-    ur_program_handle_t, uint32_t, const ur_specialization_constant_info_t *) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer(
-    ur_device_handle_t hDevice, ur_program_handle_t hProgram,
-    const char *pFunctionName, void **ppFunctionPointer) {
-  // Check if device passed is the same the device bound to the context
-  UR_ASSERT(hDevice == hProgram->getContext()->getDevice(),
-            UR_RESULT_ERROR_INVALID_DEVICE);
-
-  CUfunction Func;
-  CUresult Ret = cuModuleGetFunction(&Func, hProgram->get(), pFunctionName);
-  *ppFunctionPointer = Func;
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  if (Ret != CUDA_SUCCESS && Ret != CUDA_ERROR_NOT_FOUND)
-    UR_CHECK_ERROR(Ret);
-  if (Ret == CUDA_ERROR_NOT_FOUND) {
-    *ppFunctionPointer = 0;
-    Result = UR_RESULT_ERROR_INVALID_FUNCTION_NAME;
-  }
-
-  return Result;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/program.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/program.hpp
deleted file mode 100644
index 99ed9a3862917..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/program.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===--------- program.hpp - CUDA Adapter ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <cuda.h>
-#include <ur_api.h>
-
-#include <atomic>
-#include <unordered_map>
-
-#include "context.hpp"
-
-struct ur_program_handle_t_ {
-  using native_type = CUmodule;
-  native_type Module;
-  const char *Binary;
-  size_t BinarySizeInBytes;
-  std::atomic_uint32_t RefCount;
-  ur_context_handle_t Context;
-
-  // Metadata
-  std::unordered_map<std::string, std::tuple<uint32_t, uint32_t, uint32_t>>
-      KernelReqdWorkGroupSizeMD;
-  std::unordered_map<std::string, std::string> GlobalIDMD;
-
-  constexpr static size_t MaxLogSize = 8192u;
-
-  char ErrorLog[MaxLogSize], InfoLog[MaxLogSize];
-  std::string BuildOptions;
-  ur_program_build_status_t BuildStatus = UR_PROGRAM_BUILD_STATUS_NONE;
-
-  ur_program_handle_t_(ur_context_handle_t Context);
-  ~ur_program_handle_t_();
-
-  ur_result_t setMetadata(const ur_program_metadata_t *Metadata, size_t Length);
-
-  ur_result_t setBinary(const char *Binary, size_t BinarySizeInBytes);
-
-  ur_result_t buildProgram(const char *BuildOptions);
-  ur_context_handle_t getContext() const { return Context; };
-
-  native_type get() const noexcept { return Module; };
-
-  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
-
-  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
-
-  uint32_t getReferenceCount() const noexcept { return RefCount; }
-};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/queue.cpp
deleted file mode 100644
index 2a3d18994991c..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/queue.cpp
+++ /dev/null
@@ -1,328 +0,0 @@
-//===--------- queue.cpp - CUDA Adapter -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "queue.hpp"
-#include "common.hpp"
-#include "context.hpp"
-#include "event.hpp"
-
-#include <cassert>
-#include <cuda.h>
-
-void ur_queue_handle_t_::computeStreamWaitForBarrierIfNeeded(CUstream Stream,
-                                                             uint32_t StreamI) {
-  if (BarrierEvent && !ComputeAppliedBarrier[StreamI]) {
-    UR_CHECK_ERROR(cuStreamWaitEvent(Stream, BarrierEvent, 0));
-    ComputeAppliedBarrier[StreamI] = true;
-  }
-}
-
-void ur_queue_handle_t_::transferStreamWaitForBarrierIfNeeded(
-    CUstream Stream, uint32_t StreamI) {
-  if (BarrierEvent && !TransferAppliedBarrier[StreamI]) {
-    UR_CHECK_ERROR(cuStreamWaitEvent(Stream, BarrierEvent, 0));
-    TransferAppliedBarrier[StreamI] = true;
-  }
-}
-
-CUstream ur_queue_handle_t_::getNextComputeStream(uint32_t *StreamToken) {
-  uint32_t StreamI;
-  uint32_t Token;
-  while (true) {
-    if (NumComputeStreams < ComputeStreams.size()) {
-      // the check above is for performance - so as not to lock mutex every time
-      std::lock_guard<std::mutex> guard(ComputeStreamMutex);
-      // The second check is done after mutex is locked so other threads can not
-      // change NumComputeStreams after that
-      if (NumComputeStreams < ComputeStreams.size()) {
-        UR_CHECK_ERROR(cuStreamCreateWithPriority(
-            &ComputeStreams[NumComputeStreams++], Flags, Priority));
-      }
-    }
-    Token = ComputeStreamIndex++;
-    StreamI = Token % ComputeStreams.size();
-    // if a stream has been reused before it was next selected round-robin
-    // fashion, we want to delay its next use and instead select another one
-    // that is more likely to have completed all the enqueued work.
-    if (DelayCompute[StreamI]) {
-      DelayCompute[StreamI] = false;
-    } else {
-      break;
-    }
-  }
-  if (StreamToken) {
-    *StreamToken = Token;
-  }
-  CUstream res = ComputeStreams[StreamI];
-  computeStreamWaitForBarrierIfNeeded(res, StreamI);
-  return res;
-}
-
-CUstream ur_queue_handle_t_::getNextComputeStream(
-    uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
-    ur_stream_guard_ &Guard, uint32_t *StreamToken) {
-  for (uint32_t i = 0; i < NumEventsInWaitList; i++) {
-    uint32_t Token = EventWaitList[i]->getComputeStreamToken();
-    if (reinterpret_cast<ur_queue_handle_t>(EventWaitList[i]->getQueue()) ==
-            this &&
-        canReuseStream(Token)) {
-      std::unique_lock<std::mutex> ComputeSyncGuard(ComputeStreamSyncMutex);
-      // redo the check after lock to avoid data races on
-      // LastSyncComputeStreams
-      if (canReuseStream(Token)) {
-        uint32_t StreamI = Token % DelayCompute.size();
-        DelayCompute[StreamI] = true;
-        if (StreamToken) {
-          *StreamToken = Token;
-        }
-        Guard = ur_stream_guard_{std::move(ComputeSyncGuard)};
-        CUstream Result = EventWaitList[i]->getStream();
-        computeStreamWaitForBarrierIfNeeded(Result, StreamI);
-        return Result;
-      }
-    }
-  }
-  Guard = {};
-  return getNextComputeStream(StreamToken);
-}
-
-CUstream ur_queue_handle_t_::getNextTransferStream() {
-  if (TransferStreams.empty()) { // for example in in-order queue
-    return getNextComputeStream();
-  }
-  if (NumTransferStreams < TransferStreams.size()) {
-    // the check above is for performance - so as not to lock mutex every time
-    std::lock_guard<std::mutex> Guuard(TransferStreamMutex);
-    // The second check is done after mutex is locked so other threads can not
-    // change NumTransferStreams after that
-    if (NumTransferStreams < TransferStreams.size()) {
-      UR_CHECK_ERROR(cuStreamCreateWithPriority(
-          &TransferStreams[NumTransferStreams++], Flags, Priority));
-    }
-  }
-  uint32_t StreamI = TransferStreamIndex++ % TransferStreams.size();
-  CUstream Result = TransferStreams[StreamI];
-  transferStreamWaitForBarrierIfNeeded(Result, StreamI);
-  return Result;
-}
-
-/// Creates a `ur_queue_handle_t` object on the CUDA backend.
-/// Valid properties
-/// * __SYCL_PI_CUDA_USE_DEFAULT_STREAM -> CU_STREAM_DEFAULT
-/// * __SYCL_PI_CUDA_SYNC_WITH_DEFAULT -> CU_STREAM_NON_BLOCKING
-UR_APIEXPORT ur_result_t UR_APICALL
-urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice,
-              const ur_queue_properties_t *pProps, ur_queue_handle_t *phQueue) {
-  try {
-    std::unique_ptr<ur_queue_handle_t_> Queue{nullptr};
-
-    if (hContext->getDevice() != hDevice) {
-      *phQueue = nullptr;
-      return UR_RESULT_ERROR_INVALID_DEVICE;
-    }
-
-    unsigned int Flags = CU_STREAM_NON_BLOCKING;
-    ur_queue_flags_t URFlags = 0;
-    // '0' is the default priority, per CUDA Toolkit 12.2 and earlier
-    int Priority = 0;
-    bool IsOutOfOrder = false;
-    if (pProps && pProps->stype == UR_STRUCTURE_TYPE_QUEUE_PROPERTIES) {
-      URFlags = pProps->flags;
-      if (URFlags == UR_QUEUE_FLAG_USE_DEFAULT_STREAM) {
-        Flags = CU_STREAM_DEFAULT;
-      } else if (URFlags == UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM) {
-        Flags = 0;
-      }
-
-      if (URFlags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
-        IsOutOfOrder = true;
-      }
-      if (URFlags & UR_QUEUE_FLAG_PRIORITY_HIGH) {
-        ScopedContext Active(hContext);
-        UR_CHECK_ERROR(cuCtxGetStreamPriorityRange(nullptr, &Priority));
-      } else if (URFlags & UR_QUEUE_FLAG_PRIORITY_LOW) {
-        ScopedContext Active(hContext);
-        UR_CHECK_ERROR(cuCtxGetStreamPriorityRange(&Priority, nullptr));
-      }
-    }
-
-    std::vector<CUstream> ComputeCuStreams(
-        IsOutOfOrder ? ur_queue_handle_t_::DefaultNumComputeStreams : 1);
-    std::vector<CUstream> TransferCuStreams(
-        IsOutOfOrder ? ur_queue_handle_t_::DefaultNumTransferStreams : 0);
-
-    Queue = std::unique_ptr<ur_queue_handle_t_>(new ur_queue_handle_t_{
-        std::move(ComputeCuStreams), std::move(TransferCuStreams), hContext,
-        hDevice, Flags, URFlags, Priority});
-
-    *phQueue = Queue.release();
-
-    return UR_RESULT_SUCCESS;
-  } catch (ur_result_t Err) {
-
-    return Err;
-
-  } catch (...) {
-
-    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
-  }
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) {
-  assert(hQueue->getReferenceCount() > 0);
-
-  hQueue->incrementReferenceCount();
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) {
-  if (hQueue->decrementReferenceCount() > 0) {
-    return UR_RESULT_SUCCESS;
-  }
-
-  try {
-    std::unique_ptr<ur_queue_handle_t_> Queue(hQueue);
-
-    if (!hQueue->backendHasOwnership())
-      return UR_RESULT_SUCCESS;
-
-    ScopedContext Active(hQueue->getContext());
-
-    hQueue->forEachStream([](CUstream S) {
-      UR_CHECK_ERROR(cuStreamSynchronize(S));
-      UR_CHECK_ERROR(cuStreamDestroy(S));
-    });
-
-    return UR_RESULT_SUCCESS;
-  } catch (ur_result_t Err) {
-    return Err;
-  } catch (...) {
-    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
-  }
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  try {
-    ScopedContext active(hQueue->getContext());
-
-    hQueue->syncStreams</*ResetUsed=*/true>(
-        [](CUstream s) { UR_CHECK_ERROR(cuStreamSynchronize(s)); });
-
-  } catch (ur_result_t Err) {
-
-    Result = Err;
-
-  } catch (...) {
-
-    Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
-  }
-
-  return Result;
-}
-
-// There is no CUDA counterpart for queue flushing and we don't run into the
-// same problem of having to flush cross-queue dependencies as some of the
-// other plugins, so it can be left as no-op.
-UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) {
-  std::ignore = hQueue;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc,
-                       ur_native_handle_t *phNativeQueue) {
-  std::ignore = pDesc;
-
-  ScopedContext Active(hQueue->getContext());
-  *phNativeQueue =
-      reinterpret_cast<ur_native_handle_t>(hQueue->getNextComputeStream());
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
-    ur_native_handle_t hNativeQueue, ur_context_handle_t hContext,
-    ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties,
-    ur_queue_handle_t *phQueue) {
-  (void)hDevice;
-
-  unsigned int CuFlags;
-  CUstream CuStream = reinterpret_cast<CUstream>(hNativeQueue);
-
-  UR_CHECK_ERROR(cuStreamGetFlags(CuStream, &CuFlags));
-
-  ur_queue_flags_t Flags = 0;
-  if (CuFlags == CU_STREAM_DEFAULT)
-    Flags = UR_QUEUE_FLAG_USE_DEFAULT_STREAM;
-  else if (CuFlags == CU_STREAM_NON_BLOCKING)
-    Flags = UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM;
-  else
-    detail::ur::die("Unknown cuda stream");
-
-  std::vector<CUstream> ComputeCuStreams(1, CuStream);
-  std::vector<CUstream> TransferCuStreams(0);
-
-  // Create queue and set num_compute_streams to 1, as computeCuStreams has
-  // valid stream
-  *phQueue =
-      new ur_queue_handle_t_{std::move(ComputeCuStreams),
-                             std::move(TransferCuStreams),
-                             hContext,
-                             hContext->getDevice(),
-                             CuFlags,
-                             Flags,
-                             /*backend_owns*/ pProperties->isNativeHandleOwned};
-  (*phQueue)->NumComputeStreams = 1;
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue,
-                                                   ur_queue_info_t propName,
-                                                   size_t propValueSize,
-                                                   void *pPropValue,
-                                                   size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet);
-
-  switch (propName) {
-  case UR_QUEUE_INFO_CONTEXT:
-    return ReturnValue(hQueue->Context);
-  case UR_QUEUE_INFO_DEVICE:
-    return ReturnValue(hQueue->Device);
-  case UR_QUEUE_INFO_REFERENCE_COUNT:
-    return ReturnValue(hQueue->getReferenceCount());
-  case UR_QUEUE_INFO_FLAGS:
-    return ReturnValue(hQueue->URFlags);
-  case UR_QUEUE_INFO_EMPTY: {
-    try {
-      bool IsReady = hQueue->allOf([](CUstream S) -> bool {
-        const CUresult Ret = cuStreamQuery(S);
-        if (Ret == CUDA_SUCCESS)
-          return true;
-
-        if (Ret == CUDA_ERROR_NOT_READY)
-          return false;
-
-        UR_CHECK_ERROR(Ret);
-        return false;
-      });
-      return ReturnValue(IsReady);
-    } catch (ur_result_t Err) {
-      return Err;
-    } catch (...) {
-      return UR_RESULT_ERROR_OUT_OF_RESOURCES;
-    }
-  }
-  case UR_QUEUE_INFO_DEVICE_DEFAULT:
-  case UR_QUEUE_INFO_SIZE:
-    return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
-  default:
-    return UR_RESULT_ERROR_INVALID_ENUMERATION;
-  }
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/queue.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/queue.hpp
deleted file mode 100644
index 4f2721b13aed6..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/queue.hpp
+++ /dev/null
@@ -1,246 +0,0 @@
-//===--------- queue.hpp - CUDA Adapter -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#pragma once
-
-#include <ur/ur.hpp>
-
-#include <algorithm>
-#include <cuda.h>
-#include <vector>
-
-using ur_stream_guard_ = std::unique_lock<std::mutex>;
-
-/// UR queue mapping on to CUstream objects.
-///
-struct ur_queue_handle_t_ {
-
-  using native_type = CUstream;
-  static constexpr int DefaultNumComputeStreams = 128;
-  static constexpr int DefaultNumTransferStreams = 64;
-
-  std::vector<native_type> ComputeStreams;
-  std::vector<native_type> TransferStreams;
-  // delay_compute_ keeps track of which streams have been recently reused and
-  // their next use should be delayed. If a stream has been recently reused it
-  // will be skipped the next time it would be selected round-robin style. When
-  // skipped, its delay flag is cleared.
-  std::vector<bool> DelayCompute;
-  // keep track of which streams have applied barrier
-  std::vector<bool> ComputeAppliedBarrier;
-  std::vector<bool> TransferAppliedBarrier;
-  ur_context_handle_t_ *Context;
-  ur_device_handle_t_ *Device;
-  CUevent BarrierEvent = nullptr;
-  CUevent BarrierTmpEvent = nullptr;
-  std::atomic_uint32_t RefCount;
-  std::atomic_uint32_t EventCount;
-  std::atomic_uint32_t ComputeStreamIndex;
-  std::atomic_uint32_t TransferStreamIndex;
-  unsigned int NumComputeStreams;
-  unsigned int NumTransferStreams;
-  unsigned int LastSyncComputeStreams;
-  unsigned int LastSyncTransferStreams;
-  unsigned int Flags;
-  ur_queue_flags_t URFlags;
-  int Priority;
-  // When ComputeStreamSyncMutex and ComputeStreamMutex both need to be
-  // locked at the same time, ComputeStreamSyncMutex should be locked first
-  // to avoid deadlocks
-  std::mutex ComputeStreamSyncMutex;
-  std::mutex ComputeStreamMutex;
-  std::mutex TransferStreamMutex;
-  std::mutex BarrierMutex;
-  bool HasOwnership;
-
-  ur_queue_handle_t_(std::vector<CUstream> &&ComputeStreams,
-                     std::vector<CUstream> &&TransferStreams,
-                     ur_context_handle_t_ *Context, ur_device_handle_t_ *Device,
-                     unsigned int Flags, ur_queue_flags_t URFlags, int Priority,
-                     bool BackendOwns = true)
-      : ComputeStreams{std::move(ComputeStreams)},
-        TransferStreams{std::move(TransferStreams)},
-        DelayCompute(this->ComputeStreams.size(), false),
-        ComputeAppliedBarrier(this->ComputeStreams.size()),
-        TransferAppliedBarrier(this->TransferStreams.size()), Context{Context},
-        Device{Device}, RefCount{1}, EventCount{0}, ComputeStreamIndex{0},
-        TransferStreamIndex{0}, NumComputeStreams{0}, NumTransferStreams{0},
-        LastSyncComputeStreams{0}, LastSyncTransferStreams{0}, Flags(Flags),
-        URFlags(URFlags), Priority(Priority), HasOwnership{BackendOwns} {
-    urContextRetain(Context);
-    urDeviceRetain(Device);
-  }
-
-  ~ur_queue_handle_t_() {
-    urContextRelease(Context);
-    urDeviceRelease(Device);
-  }
-
-  void computeStreamWaitForBarrierIfNeeded(CUstream Strean, uint32_t StreamI);
-  void transferStreamWaitForBarrierIfNeeded(CUstream Stream, uint32_t StreamI);
-
-  // get_next_compute/transfer_stream() functions return streams from
-  // appropriate pools in round-robin fashion
-  native_type getNextComputeStream(uint32_t *StreamToken = nullptr);
-  // this overload tries select a stream that was used by one of dependencies.
-  // If that is not possible returns a new stream. If a stream is reused it
-  // returns a lock that needs to remain locked as long as the stream is in use
-  native_type getNextComputeStream(uint32_t NumEventsInWaitList,
-                                   const ur_event_handle_t *EventWaitList,
-                                   ur_stream_guard_ &Guard,
-                                   uint32_t *StreamToken = nullptr);
-  native_type getNextTransferStream();
-  native_type get() { return getNextComputeStream(); };
-
-  bool hasBeenSynchronized(uint32_t StreamToken) {
-    // stream token not associated with one of the compute streams
-    if (StreamToken == std::numeric_limits<uint32_t>::max()) {
-      return false;
-    }
-    return LastSyncComputeStreams > StreamToken;
-  }
-
-  bool canReuseStream(uint32_t StreamToken) {
-    // stream token not associated with one of the compute streams
-    if (StreamToken == std::numeric_limits<uint32_t>::max()) {
-      return false;
-    }
-    // If the command represented by the stream token was not the last command
-    // enqueued to the stream we can not reuse the stream - we need to allow for
-    // commands enqueued after it and the one we are about to enqueue to run
-    // concurrently
-    bool IsLastCommand =
-        (ComputeStreamIndex - StreamToken) <= ComputeStreams.size();
-    // If there was a barrier enqueued to the queue after the command
-    // represented by the stream token we should not reuse the stream, as we can
-    // not take that stream into account for the bookkeeping for the next
-    // barrier - such a stream would not be synchronized with. Performance-wise
-    // it does not matter that we do not reuse the stream, as the work
-    // represented by the stream token is guaranteed to be complete by the
-    // barrier before any work we are about to enqueue to the stream will start,
-    // so the event does not need to be synchronized with.
-    return IsLastCommand && !hasBeenSynchronized(StreamToken);
-  }
-
-  template <typename T> bool allOf(T &&F) {
-    {
-      std::lock_guard<std::mutex> ComputeGuard(ComputeStreamMutex);
-      unsigned int End = std::min(
-          static_cast<unsigned int>(ComputeStreams.size()), NumComputeStreams);
-      if (!std::all_of(ComputeStreams.begin(), ComputeStreams.begin() + End, F))
-        return false;
-    }
-    {
-      std::lock_guard<std::mutex> TransferGuard(TransferStreamMutex);
-      unsigned int End =
-          std::min(static_cast<unsigned int>(TransferStreams.size()),
-                   NumTransferStreams);
-      if (!std::all_of(TransferStreams.begin(), TransferStreams.begin() + End,
-                       F))
-        return false;
-    }
-    return true;
-  }
-
-  template <typename T> void forEachStream(T &&F) {
-    {
-      std::lock_guard<std::mutex> compute_guard(ComputeStreamMutex);
-      unsigned int End = std::min(
-          static_cast<unsigned int>(ComputeStreams.size()), NumComputeStreams);
-      for (unsigned int i = 0; i < End; i++) {
-        F(ComputeStreams[i]);
-      }
-    }
-    {
-      std::lock_guard<std::mutex> transfer_guard(TransferStreamMutex);
-      unsigned int End =
-          std::min(static_cast<unsigned int>(TransferStreams.size()),
-                   NumTransferStreams);
-      for (unsigned int i = 0; i < End; i++) {
-        F(TransferStreams[i]);
-      }
-    }
-  }
-
-  template <bool ResetUsed = false, typename T> void syncStreams(T &&F) {
-    auto SyncCompute = [&F, &Streams = ComputeStreams, &Delay = DelayCompute](
-                           unsigned int Start, unsigned int Stop) {
-      for (unsigned int i = Start; i < Stop; i++) {
-        F(Streams[i]);
-        Delay[i] = false;
-      }
-    };
-    auto SyncTransfer = [&F, &streams = TransferStreams](unsigned int Start,
-                                                         unsigned int Stop) {
-      for (unsigned int i = Start; i < Stop; i++) {
-        F(streams[i]);
-      }
-    };
-    {
-      unsigned int Size = static_cast<unsigned int>(ComputeStreams.size());
-      std::lock_guard<std::mutex> ComputeSyncGuard(ComputeStreamSyncMutex);
-      std::lock_guard<std::mutex> ComputeGuard(ComputeStreamMutex);
-      unsigned int Start = LastSyncComputeStreams;
-      unsigned int End = NumComputeStreams < Size ? NumComputeStreams
-                                                  : ComputeStreamIndex.load();
-      if (ResetUsed) {
-        LastSyncComputeStreams = End;
-      }
-      if (End - Start >= Size) {
-        SyncCompute(0, Size);
-      } else {
-        Start %= Size;
-        End %= Size;
-        if (Start <= End) {
-          SyncCompute(Start, End);
-        } else {
-          SyncCompute(Start, Size);
-          SyncCompute(0, End);
-        }
-      }
-    }
-    {
-      unsigned int Size = static_cast<unsigned int>(TransferStreams.size());
-      if (!Size) {
-        return;
-      }
-      std::lock_guard<std::mutex> TransferGuard(TransferStreamMutex);
-      unsigned int Start = LastSyncTransferStreams;
-      unsigned int End = NumTransferStreams < Size ? NumTransferStreams
-                                                   : TransferStreamIndex.load();
-      if (ResetUsed) {
-        LastSyncTransferStreams = End;
-      }
-      if (End - Start >= Size) {
-        SyncTransfer(0, Size);
-      } else {
-        Start %= Size;
-        End %= Size;
-        if (Start <= End) {
-          SyncTransfer(Start, End);
-        } else {
-          SyncTransfer(Start, Size);
-          SyncTransfer(0, End);
-        }
-      }
-    }
-  }
-
-  ur_context_handle_t_ *getContext() const { return Context; };
-
-  ur_device_handle_t_ *get_device() const { return Device; };
-
-  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
-
-  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
-
-  uint32_t getReferenceCount() const noexcept { return RefCount; }
-
-  uint32_t getNextEventID() noexcept { return ++EventCount; }
-
-  bool backendHasOwnership() const noexcept { return HasOwnership; }
-};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp
deleted file mode 100644
index e561f4902b1d5..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-//===--------- sampler.cpp - CUDA Adapter ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "sampler.hpp"
-#include "common.hpp"
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc,
-                ur_sampler_handle_t *phSampler) {
-  std::unique_ptr<ur_sampler_handle_t_> Sampler{
-      new ur_sampler_handle_t_(hContext)};
-
-  if (pDesc && pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) {
-    Sampler->Props |= pDesc->normalizedCoords;
-    Sampler->Props |= pDesc->filterMode << 1;
-    Sampler->Props |= pDesc->addressingMode << 2;
-  } else {
-    // Set default values
-    Sampler->Props |= true; // Normalized Coords
-    Sampler->Props |= UR_SAMPLER_ADDRESSING_MODE_CLAMP << 2;
-  }
-
-  void *pNext = const_cast<void *>(pDesc->pNext);
-  while (pNext != nullptr) {
-    const ur_base_desc_t *BaseDesc =
-        reinterpret_cast<const ur_base_desc_t *>(pNext);
-    if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES) {
-      const ur_exp_sampler_mip_properties_t *SamplerMipProperties =
-          reinterpret_cast<const ur_exp_sampler_mip_properties_t *>(pNext);
-      Sampler->MaxMipmapLevelClamp = SamplerMipProperties->maxMipmapLevelClamp;
-      Sampler->MinMipmapLevelClamp = SamplerMipProperties->minMipmapLevelClamp;
-      Sampler->MaxAnisotropy = SamplerMipProperties->maxAnisotropy;
-      Sampler->Props |= SamplerMipProperties->mipFilterMode << 5;
-    }
-    pNext = const_cast<void *>(BaseDesc->pNext);
-  }
-
-  *phSampler = Sampler.release();
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName,
-                 size_t propValueSize, void *pPropValue, size_t *pPropSizeRet) {
-  UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet);
-
-  switch (propName) {
-  case UR_SAMPLER_INFO_REFERENCE_COUNT:
-    return ReturnValue(hSampler->getReferenceCount());
-  case UR_SAMPLER_INFO_CONTEXT:
-    return ReturnValue(hSampler->Context);
-  case UR_SAMPLER_INFO_NORMALIZED_COORDS: {
-    bool NormCoordsProp = hSampler->isNormalizedCoords();
-    return ReturnValue(NormCoordsProp);
-  }
-  case UR_SAMPLER_INFO_FILTER_MODE: {
-    ur_sampler_filter_mode_t FilterProp = hSampler->getFilterMode();
-    return ReturnValue(FilterProp);
-  }
-  case UR_SAMPLER_INFO_ADDRESSING_MODE: {
-    ur_sampler_addressing_mode_t AddressingProp = hSampler->getAddressingMode();
-    return ReturnValue(AddressingProp);
-  }
-  default:
-    return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
-  }
-  return {};
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urSamplerRetain(ur_sampler_handle_t hSampler) {
-  hSampler->incrementReferenceCount();
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urSamplerRelease(ur_sampler_handle_t hSampler) {
-  // double delete or someone is messing with the ref count.
-  // either way, cannot safely proceed.
-  detail::ur::assertion(
-      hSampler->getReferenceCount() != 0,
-      "Reference count overflow detected in urSamplerRelease.");
-
-  // decrement ref count. If it is 0, delete the sampler.
-  if (hSampler->decrementReferenceCount() == 0) {
-    delete hSampler;
-  }
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urSamplerGetNativeHandle(ur_sampler_handle_t, ur_native_handle_t *) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle(
-    ur_native_handle_t, ur_context_handle_t,
-    const ur_sampler_native_properties_t *, ur_sampler_handle_t *) {
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.hpp
deleted file mode 100644
index 8c362b98c9e80..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===--------- sampler.hpp - CUDA Adapter ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <ur/ur.hpp>
-
-/// Implementation of samplers for CUDA
-///
-/// Sampler property layout:
-/// |     <bits>     | <usage>
-/// -----------------------------------
-/// |  31 30 ... 6   | N/A
-/// |       5        | mip filter mode
-/// |     4 3 2      | addressing mode
-/// |       1        | filter mode
-/// |       0        | normalize coords
-struct ur_sampler_handle_t_ {
-  std::atomic_uint32_t RefCount;
-  uint32_t Props;
-  float MinMipmapLevelClamp;
-  float MaxMipmapLevelClamp;
-  float MaxAnisotropy;
-  ur_context_handle_t Context;
-
-  ur_sampler_handle_t_(ur_context_handle_t Context)
-      : RefCount(1), Props(0), MinMipmapLevelClamp(0.0f),
-        MaxMipmapLevelClamp(0.0f), MaxAnisotropy(0.0f), Context(Context) {}
-
-  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
-
-  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
-
-  uint32_t getReferenceCount() const noexcept { return RefCount; }
-
-  ur_bool_t isNormalizedCoords() const noexcept {
-    return static_cast<ur_bool_t>(Props & 0b1);
-  }
-
-  ur_sampler_filter_mode_t getFilterMode() const noexcept {
-    return static_cast<ur_sampler_filter_mode_t>((Props >> 1) & 0b1);
-  }
-
-  ur_sampler_addressing_mode_t getAddressingMode() const noexcept {
-    return static_cast<ur_sampler_addressing_mode_t>((Props >> 2) & 0b111);
-  }
-
-  ur_sampler_filter_mode_t getMipFilterMode() const noexcept {
-    return static_cast<ur_sampler_filter_mode_t>((Props >> 5) & 0b1);
-  }
-};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/tracing.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/tracing.cpp
deleted file mode 100644
index 9c0183960eebb..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/tracing.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-//===-------------- tracing.cpp - CUDA Host API Tracing --------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifdef XPTI_ENABLE_INSTRUMENTATION
-#include <xpti/xpti_data_types.h>
-#include <xpti/xpti_trace_framework.h>
-#endif
-
-#include <cuda.h>
-#ifdef XPTI_ENABLE_INSTRUMENTATION
-#include <cupti.h>
-#endif // XPTI_ENABLE_INSTRUMENTATION
-
-#include <exception>
-#include <iostream>
-
-#ifdef XPTI_ENABLE_INSTRUMENTATION
-constexpr auto CUDA_CALL_STREAM_NAME = "sycl.experimental.cuda.call";
-constexpr auto CUDA_DEBUG_STREAM_NAME = "sycl.experimental.cuda.debug";
-
-thread_local uint64_t CallCorrelationID = 0;
-thread_local uint64_t DebugCorrelationID = 0;
-
-static xpti_td *GCallEvent = nullptr;
-static xpti_td *GDebugEvent = nullptr;
-
-constexpr auto GVerStr = "0.1";
-constexpr int GMajVer = 0;
-constexpr int GMinVer = 1;
-
-static void cuptiCallback(void *, CUpti_CallbackDomain, CUpti_CallbackId CBID,
-                          const void *CBData) {
-  if (xptiTraceEnabled()) {
-    const auto *CBInfo = static_cast<const CUpti_CallbackData *>(CBData);
-
-    if (CBInfo->callbackSite == CUPTI_API_ENTER) {
-      CallCorrelationID = xptiGetUniqueId();
-      DebugCorrelationID = xptiGetUniqueId();
-    }
-
-    const char *FuncName = CBInfo->functionName;
-    uint32_t FuncID = static_cast<uint32_t>(CBID);
-    uint16_t TraceTypeArgs = CBInfo->callbackSite == CUPTI_API_ENTER
-                                 ? xpti::trace_function_with_args_begin
-                                 : xpti::trace_function_with_args_end;
-    uint16_t TraceType = CBInfo->callbackSite == CUPTI_API_ENTER
-                             ? xpti::trace_function_begin
-                             : xpti::trace_function_end;
-
-    uint8_t CallStreamID = xptiRegisterStream(CUDA_CALL_STREAM_NAME);
-    uint8_t DebugStreamID = xptiRegisterStream(CUDA_DEBUG_STREAM_NAME);
-
-    xptiNotifySubscribers(CallStreamID, TraceType, GCallEvent, nullptr,
-                          CallCorrelationID, FuncName);
-
-    xpti::function_with_args_t Payload{
-        FuncID, FuncName, const_cast<void *>(CBInfo->functionParams),
-        CBInfo->functionReturnValue, CBInfo->context};
-    xptiNotifySubscribers(DebugStreamID, TraceTypeArgs, GDebugEvent, nullptr,
-                          DebugCorrelationID, &Payload);
-  }
-}
-#endif
-
-void enableCUDATracing() {
-#ifdef XPTI_ENABLE_INSTRUMENTATION
-  if (!xptiTraceEnabled())
-    return;
-
-  xptiRegisterStream(CUDA_CALL_STREAM_NAME);
-  xptiInitialize(CUDA_CALL_STREAM_NAME, GMajVer, GMinVer, GVerStr);
-  xptiRegisterStream(CUDA_DEBUG_STREAM_NAME);
-  xptiInitialize(CUDA_DEBUG_STREAM_NAME, GMajVer, GMinVer, GVerStr);
-
-  uint64_t Dummy;
-  xpti::payload_t CUDAPayload("CUDA Plugin Layer");
-  GCallEvent =
-      xptiMakeEvent("CUDA Plugin Layer", &CUDAPayload,
-                    xpti::trace_algorithm_event, xpti_at::active, &Dummy);
-
-  xpti::payload_t CUDADebugPayload("CUDA Plugin Debug Layer");
-  GDebugEvent =
-      xptiMakeEvent("CUDA Plugin Debug Layer", &CUDADebugPayload,
-                    xpti::trace_algorithm_event, xpti_at::active, &Dummy);
-
-  CUpti_SubscriberHandle Subscriber;
-  cuptiSubscribe(&Subscriber, cuptiCallback, nullptr);
-  cuptiEnableDomain(1, Subscriber, CUPTI_CB_DOMAIN_DRIVER_API);
-  cuptiEnableCallback(0, Subscriber, CUPTI_CB_DOMAIN_DRIVER_API,
-                      CUPTI_DRIVER_TRACE_CBID_cuGetErrorString);
-  cuptiEnableCallback(0, Subscriber, CUPTI_CB_DOMAIN_DRIVER_API,
-                      CUPTI_DRIVER_TRACE_CBID_cuGetErrorName);
-#endif
-}
-
-void disableCUDATracing() {
-#ifdef XPTI_ENABLE_INSTRUMENTATION
-  if (!xptiTraceEnabled())
-    return;
-
-  xptiFinalize(CUDA_CALL_STREAM_NAME);
-  xptiFinalize(CUDA_DEBUG_STREAM_NAME);
-#endif // XPTI_ENABLE_INSTRUMENTATION
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp
deleted file mode 100644
index 73eace5818dfd..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp
+++ /dev/null
@@ -1,355 +0,0 @@
-//===--------- ur_interface_loader.cpp - Unified Runtime  -----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <ur_api.h>
-#include <ur_ddi.h>
-
-namespace {
-
-// TODO - this is a duplicate of what is in the L0 plugin
-// We should move this to somewhere common
-ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) {
-  if (pDdiTable == nullptr) {
-    return UR_RESULT_ERROR_INVALID_NULL_POINTER;
-  }
-  // Pre 1.0 we enforce that loader and adapter must have the same version.
-  // Post 1.0 only a major version match should be required.
-  if (version != UR_API_VERSION_CURRENT) {
-    return UR_RESULT_ERROR_UNSUPPORTED_VERSION;
-  }
-  return UR_RESULT_SUCCESS;
-}
-} // namespace
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable(
-    ur_api_version_t version, ur_platform_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnCreateWithNativeHandle = nullptr;
-  pDdiTable->pfnGet = urPlatformGet;
-  pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion;
-  pDdiTable->pfnGetInfo = urPlatformGetInfo;
-  pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle;
-  pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable(
-    ur_api_version_t version, ur_context_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnCreate = urContextCreate;
-  pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle;
-  pDdiTable->pfnGetInfo = urContextGetInfo;
-  pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle;
-  pDdiTable->pfnRelease = urContextRelease;
-  pDdiTable->pfnRetain = urContextRetain;
-  pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable(
-    ur_api_version_t version, ur_event_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle;
-  pDdiTable->pfnGetInfo = urEventGetInfo;
-  pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle;
-  pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo;
-  pDdiTable->pfnRelease = urEventRelease;
-  pDdiTable->pfnRetain = urEventRetain;
-  pDdiTable->pfnSetCallback = urEventSetCallback;
-  pDdiTable->pfnWait = urEventWait;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable(
-    ur_api_version_t version, ur_program_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnBuild = urProgramBuild;
-  pDdiTable->pfnCompile = urProgramCompile;
-  pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary;
-  pDdiTable->pfnCreateWithIL = urProgramCreateWithIL;
-  pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle;
-  pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo;
-  pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer;
-  pDdiTable->pfnGetInfo = urProgramGetInfo;
-  pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle;
-  pDdiTable->pfnLink = urProgramLink;
-  pDdiTable->pfnRelease = urProgramRelease;
-  pDdiTable->pfnRetain = urProgramRetain;
-  pDdiTable->pfnSetSpecializationConstants =
-      urProgramSetSpecializationConstants;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
-    ur_api_version_t version, ur_kernel_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnCreate = urKernelCreate;
-  pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle;
-  pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo;
-  pDdiTable->pfnGetInfo = urKernelGetInfo;
-  pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle;
-  pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo;
-  pDdiTable->pfnRelease = urKernelRelease;
-  pDdiTable->pfnRetain = urKernelRetain;
-  pDdiTable->pfnSetArgLocal = urKernelSetArgLocal;
-  pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj;
-  pDdiTable->pfnSetArgPointer = urKernelSetArgPointer;
-  pDdiTable->pfnSetArgSampler = urKernelSetArgSampler;
-  pDdiTable->pfnSetArgValue = urKernelSetArgValue;
-  pDdiTable->pfnSetExecInfo = urKernelSetExecInfo;
-  pDdiTable->pfnSetSpecializationConstants = nullptr;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable(
-    ur_api_version_t version, ur_sampler_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnCreate = urSamplerCreate;
-  pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle;
-  pDdiTable->pfnGetInfo = urSamplerGetInfo;
-  pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle;
-  pDdiTable->pfnRelease = urSamplerRelease;
-  pDdiTable->pfnRetain = urSamplerRetain;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL
-urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnBufferCreate = urMemBufferCreate;
-  pDdiTable->pfnBufferPartition = urMemBufferPartition;
-  pDdiTable->pfnBufferCreateWithNativeHandle =
-      urMemBufferCreateWithNativeHandle;
-  pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle;
-  pDdiTable->pfnGetInfo = urMemGetInfo;
-  pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle;
-  pDdiTable->pfnImageCreate = urMemImageCreate;
-  pDdiTable->pfnImageGetInfo = urMemImageGetInfo;
-  pDdiTable->pfnRelease = urMemRelease;
-  pDdiTable->pfnRetain = urMemRetain;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
-    ur_api_version_t version, ur_enqueue_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead;
-  pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite;
-  pDdiTable->pfnEventsWait = urEnqueueEventsWait;
-  pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier;
-  pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch;
-  pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy;
-  pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect;
-  pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill;
-  pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap;
-  pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead;
-  pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect;
-  pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite;
-  pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect;
-  pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy;
-  pDdiTable->pfnMemImageRead = urEnqueueMemImageRead;
-  pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite;
-  pDdiTable->pfnMemUnmap = urEnqueueMemUnmap;
-  pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D;
-  pDdiTable->pfnUSMFill = urEnqueueUSMFill;
-  pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise;
-  pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D;
-  pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy;
-  pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch;
-  pDdiTable->pfnReadHostPipe = urEnqueueReadHostPipe;
-  pDdiTable->pfnWriteHostPipe = urEnqueueWriteHostPipe;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable(
-    ur_api_version_t version, ur_global_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnInit = urInit;
-  pDdiTable->pfnTearDown = urTearDown;
-  pDdiTable->pfnAdapterGet = urAdapterGet;
-  pDdiTable->pfnAdapterRelease = urAdapterRelease;
-  pDdiTable->pfnAdapterRetain = urAdapterRetain;
-  pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError;
-  pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo;
-
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable(
-    ur_api_version_t version, ur_queue_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnCreate = urQueueCreate;
-  pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle;
-  pDdiTable->pfnFinish = urQueueFinish;
-  pDdiTable->pfnFlush = urQueueFlush;
-  pDdiTable->pfnGetInfo = urQueueGetInfo;
-  pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle;
-  pDdiTable->pfnRelease = urQueueRelease;
-  pDdiTable->pfnRetain = urQueueRetain;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL
-urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc;
-  pDdiTable->pfnFree = urUSMFree;
-  pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo;
-  pDdiTable->pfnHostAlloc = urUSMHostAlloc;
-  pDdiTable->pfnPoolCreate = urUSMPoolCreate;
-  pDdiTable->pfnPoolRetain = urUSMPoolRetain;
-  pDdiTable->pfnPoolRelease = urUSMPoolRelease;
-  pDdiTable->pfnPoolGetInfo = urUSMPoolGetInfo;
-  pDdiTable->pfnSharedAlloc = urUSMSharedAlloc;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable(
-    ur_api_version_t version, ur_device_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle;
-  pDdiTable->pfnGet = urDeviceGet;
-  pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps;
-  pDdiTable->pfnGetInfo = urDeviceGetInfo;
-  pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle;
-  pDdiTable->pfnPartition = urDevicePartition;
-  pDdiTable->pfnRelease = urDeviceRelease;
-  pDdiTable->pfnRetain = urDeviceRetain;
-  pDdiTable->pfnSelectBinary = urDeviceSelectBinary;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable(
-    ur_api_version_t version, ///< [in] API version requested
-    ur_command_buffer_exp_dditable_t
-        *pDdiTable ///< [in,out] pointer to table of DDI function pointers
-) {
-  auto retVal = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != retVal) {
-    return retVal;
-  }
-  pDdiTable->pfnCreateExp = urCommandBufferCreateExp;
-  pDdiTable->pfnRetainExp = urCommandBufferRetainExp;
-  pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp;
-  pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp;
-  pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp;
-  pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp;
-  pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp;
-  pDdiTable->pfnAppendMembufferCopyRectExp =
-      urCommandBufferAppendMembufferCopyRectExp;
-  pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp;
-  pDdiTable->pfnAppendMembufferReadRectExp =
-      urCommandBufferAppendMembufferReadRectExp;
-  pDdiTable->pfnAppendMembufferWriteExp =
-      urCommandBufferAppendMembufferWriteExp;
-  pDdiTable->pfnAppendMembufferWriteRectExp =
-      urCommandBufferAppendMembufferWriteRectExp;
-  pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp;
-
-  return retVal;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable(
-    ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) {
-  auto retVal = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != retVal) {
-    return retVal;
-  }
-  pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp;
-  pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp;
-  pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp;
-
-  return retVal;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable(
-    ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnUnsampledImageHandleDestroyExp =
-      urBindlessImagesUnsampledImageHandleDestroyExp;
-  pDdiTable->pfnSampledImageHandleDestroyExp =
-      urBindlessImagesSampledImageHandleDestroyExp;
-  pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp;
-  pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp;
-  pDdiTable->pfnUnsampledImageCreateExp =
-      urBindlessImagesUnsampledImageCreateExp;
-  pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp;
-  pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp;
-  pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp;
-  pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp;
-  pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp;
-  pDdiTable->pfnImportOpaqueFDExp = urBindlessImagesImportOpaqueFDExp;
-  pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp;
-  pDdiTable->pfnReleaseInteropExp = urBindlessImagesReleaseInteropExp;
-  pDdiTable->pfnImportExternalSemaphoreOpaqueFDExp =
-      urBindlessImagesImportExternalSemaphoreOpaqueFDExp;
-  pDdiTable->pfnDestroyExternalSemaphoreExp =
-      urBindlessImagesDestroyExternalSemaphoreExp;
-  pDdiTable->pfnWaitExternalSemaphoreExp =
-      urBindlessImagesWaitExternalSemaphoreExp;
-  pDdiTable->pfnSignalExternalSemaphoreExp =
-      urBindlessImagesSignalExternalSemaphoreExp;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable(
-    ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) {
-  auto result = validateProcInputs(version, pDdiTable);
-  if (UR_RESULT_SUCCESS != result) {
-    return result;
-  }
-  pDdiTable->pfnPitchedAllocExp = urUSMPitchedAllocExp;
-  return UR_RESULT_SUCCESS;
-}
-
-#if defined(__cplusplus)
-} // extern "C"
-#endif
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm.cpp
deleted file mode 100644
index d272a836e600a..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm.cpp
+++ /dev/null
@@ -1,503 +0,0 @@
-//===--------- usm.cpp - CUDA Adapter -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <cassert>
-
-#include "adapter.hpp"
-#include "common.hpp"
-#include "context.hpp"
-#include "device.hpp"
-#include "event.hpp"
-#include "platform.hpp"
-#include "queue.hpp"
-#include "usm.hpp"
-
-#include <cuda.h>
-
-/// USM: Implements USM Host allocations using CUDA Pinned Memory
-/// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#page-locked-host-memory
-UR_APIEXPORT ur_result_t UR_APICALL
-urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc,
-               ur_usm_pool_handle_t hPool, size_t size, void **ppMem) {
-  auto alignment = pUSMDesc ? pUSMDesc->align : 0u;
-  UR_ASSERT(!pUSMDesc ||
-                (alignment == 0 || ((alignment & (alignment - 1)) == 0)),
-            UR_RESULT_ERROR_INVALID_VALUE);
-
-  if (!hPool) {
-    return USMHostAllocImpl(ppMem, hContext, nullptr, size, alignment);
-  }
-
-  auto UMFPool = hPool->HostMemPool.get();
-  *ppMem = umfPoolAlignedMalloc(UMFPool, size, alignment);
-  if (*ppMem == nullptr) {
-    auto umfErr = umfPoolGetLastAllocationError(UMFPool);
-    return umf::umf2urResult(umfErr);
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-/// USM: Implements USM device allocations using a normal CUDA device pointer
-///
-UR_APIEXPORT ur_result_t UR_APICALL
-urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
-                 const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t hPool,
-                 size_t size, void **ppMem) {
-  auto alignment = pUSMDesc ? pUSMDesc->align : 0u;
-  UR_ASSERT(!pUSMDesc ||
-                (alignment == 0 || ((alignment & (alignment - 1)) == 0)),
-            UR_RESULT_ERROR_INVALID_VALUE);
-
-  if (!hPool) {
-    return USMDeviceAllocImpl(ppMem, hContext, hDevice, nullptr, size,
-                              alignment);
-  }
-
-  auto UMFPool = hPool->DeviceMemPool.get();
-  *ppMem = umfPoolAlignedMalloc(UMFPool, size, alignment);
-  if (*ppMem == nullptr) {
-    auto umfErr = umfPoolGetLastAllocationError(UMFPool);
-    return umf::umf2urResult(umfErr);
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-/// USM: Implements USM Shared allocations using CUDA Managed Memory
-///
-UR_APIEXPORT ur_result_t UR_APICALL
-urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
-                 const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t hPool,
-                 size_t size, void **ppMem) {
-  auto alignment = pUSMDesc ? pUSMDesc->align : 0u;
-  UR_ASSERT(!pUSMDesc ||
-                (alignment == 0 || ((alignment & (alignment - 1)) == 0)),
-            UR_RESULT_ERROR_INVALID_VALUE);
-
-  if (!hPool) {
-    return USMSharedAllocImpl(ppMem, hContext, hDevice, nullptr, nullptr, size,
-                              alignment);
-  }
-
-  auto UMFPool = hPool->SharedMemPool.get();
-  *ppMem = umfPoolAlignedMalloc(UMFPool, size, alignment);
-  if (*ppMem == nullptr) {
-    auto umfErr = umfPoolGetLastAllocationError(UMFPool);
-    return umf::umf2urResult(umfErr);
-  }
-  return UR_RESULT_SUCCESS;
-}
-
-ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Pointer) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-  try {
-    ScopedContext Active(Context);
-    bool IsManaged;
-    unsigned int Type;
-    void *AttributeValues[2] = {&IsManaged, &Type};
-    CUpointer_attribute Attributes[2] = {CU_POINTER_ATTRIBUTE_IS_MANAGED,
-                                         CU_POINTER_ATTRIBUTE_MEMORY_TYPE};
-    UR_CHECK_ERROR(cuPointerGetAttributes(2, Attributes, AttributeValues,
-                                          (CUdeviceptr)Pointer));
-    UR_ASSERT(Type == CU_MEMORYTYPE_DEVICE || Type == CU_MEMORYTYPE_HOST,
-              UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-    if (IsManaged || Type == CU_MEMORYTYPE_DEVICE) {
-      // Memory allocated with cuMemAlloc and cuMemAllocManaged must be freed
-      // with cuMemFree
-      UR_CHECK_ERROR(cuMemFree((CUdeviceptr)Pointer));
-    } else {
-      // Memory allocated with cuMemAllocHost must be freed with cuMemFreeHost
-      UR_CHECK_ERROR(cuMemFreeHost(Pointer));
-    }
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-/// USM: Frees the given USM pointer associated with the context.
-///
-UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext,
-                                              void *pMem) {
-  if (auto Pool = umfPoolByPtr(pMem))
-    return umf::umf2urResult(umfPoolFree(Pool, pMem));
-  return USMFreeImpl(hContext, pMem);
-}
-
-ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context,
-                               ur_device_handle_t, ur_usm_device_mem_flags_t *,
-                               size_t Size, uint32_t Alignment) {
-  try {
-    ScopedContext Active(Context);
-    UR_CHECK_ERROR(cuMemAlloc((CUdeviceptr *)ResultPtr, Size));
-  } catch (ur_result_t Err) {
-    return Err;
-  }
-
-#ifdef NDEBUG
-  std::ignore = Alignment;
-#else
-  assert((Alignment == 0 ||
-          reinterpret_cast<std::uintptr_t>(*ResultPtr) % Alignment == 0));
-#endif
-  return UR_RESULT_SUCCESS;
-}
-
-ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context,
-                               ur_device_handle_t, ur_usm_host_mem_flags_t *,
-                               ur_usm_device_mem_flags_t *, size_t Size,
-                               uint32_t Alignment) {
-  try {
-    ScopedContext Active(Context);
-    UR_CHECK_ERROR(cuMemAllocManaged((CUdeviceptr *)ResultPtr, Size,
-                                     CU_MEM_ATTACH_GLOBAL));
-  } catch (ur_result_t Err) {
-    return Err;
-  }
-
-#ifdef NDEBUG
-  std::ignore = Alignment;
-#else
-  assert((Alignment == 0 ||
-          reinterpret_cast<std::uintptr_t>(*ResultPtr) % Alignment == 0));
-#endif
-  return UR_RESULT_SUCCESS;
-}
-
-ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context,
-                             ur_usm_host_mem_flags_t *, size_t Size,
-                             uint32_t Alignment) {
-  try {
-    ScopedContext Active(Context);
-    UR_CHECK_ERROR(cuMemAllocHost(ResultPtr, Size));
-  } catch (ur_result_t Err) {
-    return Err;
-  }
-
-#ifdef NDEBUG
-  std::ignore = Alignment;
-#else
-  assert((Alignment == 0 ||
-          reinterpret_cast<std::uintptr_t>(*ResultPtr) % Alignment == 0));
-#endif
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL
-urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem,
-                     ur_usm_alloc_info_t propName, size_t propValueSize,
-                     void *pPropValue, size_t *pPropValueSizeRet) {
-  ur_result_t Result = UR_RESULT_SUCCESS;
-
-  UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
-
-  try {
-    ScopedContext Active(hContext);
-    switch (propName) {
-    case UR_USM_ALLOC_INFO_TYPE: {
-      unsigned int Value;
-      // do not throw if cuPointerGetAttribute returns CUDA_ERROR_INVALID_VALUE
-      CUresult Ret = cuPointerGetAttribute(
-          &Value, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr)pMem);
-      if (Ret == CUDA_ERROR_INVALID_VALUE) {
-        // pointer not known to the CUDA subsystem
-        return ReturnValue(UR_USM_TYPE_UNKNOWN);
-      }
-      checkErrorUR(Ret, __func__, __LINE__ - 5, __FILE__);
-      if (Value) {
-        // pointer to managed memory
-        return ReturnValue(UR_USM_TYPE_SHARED);
-      }
-      UR_CHECK_ERROR(cuPointerGetAttribute(
-          &Value, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)pMem));
-      UR_ASSERT(Value == CU_MEMORYTYPE_DEVICE || Value == CU_MEMORYTYPE_HOST,
-                UR_RESULT_ERROR_INVALID_MEM_OBJECT);
-      if (Value == CU_MEMORYTYPE_DEVICE) {
-        // pointer to device memory
-        return ReturnValue(UR_USM_TYPE_DEVICE);
-      }
-      if (Value == CU_MEMORYTYPE_HOST) {
-        // pointer to host memory
-        return ReturnValue(UR_USM_TYPE_HOST);
-      }
-      // should never get here
-#ifdef _MSC_VER
-      __assume(0);
-#else
-      __builtin_unreachable();
-#endif
-    }
-    case UR_USM_ALLOC_INFO_BASE_PTR: {
-#if CUDA_VERSION >= 10020
-      // CU_POINTER_ATTRIBUTE_RANGE_START_ADDR was introduced in CUDA 10.2
-      void *Base;
-      UR_CHECK_ERROR(cuPointerGetAttribute(
-          &Base, CU_POINTER_ATTRIBUTE_RANGE_START_ADDR, (CUdeviceptr)pMem));
-      return ReturnValue(Base);
-#else
-      return UR_RESULT_ERROR_INVALID_VALUE;
-#endif
-    }
-    case UR_USM_ALLOC_INFO_SIZE: {
-#if CUDA_VERSION >= 10020
-      // CU_POINTER_ATTRIBUTE_RANGE_SIZE was introduced in CUDA 10.2
-      size_t Value;
-      UR_CHECK_ERROR(cuPointerGetAttribute(
-          &Value, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem));
-      return ReturnValue(Value);
-#else
-      return UR_RESULT_ERROR_INVALID_VALUE;
-#endif
-    }
-    case UR_USM_ALLOC_INFO_DEVICE: {
-      // get device index associated with this pointer
-      unsigned int DeviceIndex;
-      UR_CHECK_ERROR(cuPointerGetAttribute(&DeviceIndex,
-                                           CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
-                                           (CUdeviceptr)pMem));
-
-      // currently each device is in its own platform, so find the platform at
-      // the same index
-      std::vector<ur_platform_handle_t> Platforms;
-      Platforms.resize(DeviceIndex + 1);
-      ur_adapter_handle_t AdapterHandle = &adapter;
-      Result = urPlatformGet(&AdapterHandle, 1, DeviceIndex + 1,
-                             Platforms.data(), nullptr);
-
-      // get the device from the platform
-      ur_device_handle_t Device = Platforms[DeviceIndex]->Devices[0].get();
-      return ReturnValue(Device);
-    }
-    case UR_USM_ALLOC_INFO_POOL: {
-      auto UMFPool = umfPoolByPtr(pMem);
-      if (!UMFPool) {
-        return UR_RESULT_ERROR_INVALID_VALUE;
-      }
-      ur_usm_pool_handle_t Pool = hContext->getOwningURPool(UMFPool);
-      if (!Pool) {
-        return UR_RESULT_ERROR_INVALID_VALUE;
-      }
-      return ReturnValue(Pool);
-    }
-    default:
-      return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
-    }
-  } catch (ur_result_t Err) {
-    Result = Err;
-  }
-  return Result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t Context,
-                                                   void *HostPtr, size_t Size) {
-  UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT);
-  UR_ASSERT(!HostPtr, UR_RESULT_ERROR_INVALID_VALUE);
-  UR_ASSERT(Size > 0, UR_RESULT_ERROR_INVALID_VALUE);
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context,
-                                                    void *HostPtr) {
-  UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT);
-  UR_ASSERT(!HostPtr, UR_RESULT_ERROR_INVALID_VALUE);
-  return UR_RESULT_SUCCESS;
-}
-
-umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx,
-                                           ur_device_handle_t Dev) {
-  Context = Ctx;
-  Device = Dev;
-  // There isn't a way to query this in cuda, and there isn't much info on
-  // cuda's approach to alignment or transfer granularity between host and
-  // device. Within UMF this is only used to influence alignment, and since we
-  // discard that in our alloc implementations it seems we can safely ignore
-  // this as well, for now.
-  MinPageSize = 0;
-
-  return UMF_RESULT_SUCCESS;
-}
-
-enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align,
-                                           void **Ptr) {
-  auto Res = allocateImpl(Ptr, Size, Align);
-  if (Res != UR_RESULT_SUCCESS) {
-    getLastStatusRef() = Res;
-    return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
-  }
-
-  return UMF_RESULT_SUCCESS;
-}
-
-enum umf_result_t USMMemoryProvider::free(void *Ptr, size_t Size) {
-  (void)Size;
-
-  auto Res = USMFreeImpl(Context, Ptr);
-  if (Res != UR_RESULT_SUCCESS) {
-    getLastStatusRef() = Res;
-    return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
-  }
-
-  return UMF_RESULT_SUCCESS;
-}
-
-void USMMemoryProvider::get_last_native_error(const char **ErrMsg,
-                                              int32_t *ErrCode) {
-  (void)ErrMsg;
-  *ErrCode = static_cast<int32_t>(getLastStatusRef());
-}
-
-umf_result_t USMMemoryProvider::get_min_page_size(void *Ptr, size_t *PageSize) {
-  (void)Ptr;
-  *PageSize = MinPageSize;
-
-  return UMF_RESULT_SUCCESS;
-}
-
-ur_result_t USMSharedMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
-                                                  uint32_t Alignment) {
-  return USMSharedAllocImpl(ResultPtr, Context, Device, nullptr, nullptr, Size,
-                            Alignment);
-}
-
-ur_result_t USMDeviceMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
-                                                  uint32_t Alignment) {
-  return USMDeviceAllocImpl(ResultPtr, Context, Device, nullptr, Size,
-                            Alignment);
-}
-
-ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
-                                                uint32_t Alignment) {
-  return USMHostAllocImpl(ResultPtr, Context, nullptr, Size, Alignment);
-}
-
-ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context,
-                                             ur_usm_pool_desc_t *PoolDesc)
-    : Context(Context) {
-  const void *pNext = PoolDesc->pNext;
-  while (pNext != nullptr) {
-    const ur_base_desc_t *BaseDesc = static_cast<const ur_base_desc_t *>(pNext);
-    switch (BaseDesc->stype) {
-    case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: {
-      const ur_usm_pool_limits_desc_t *Limits =
-          reinterpret_cast<const ur_usm_pool_limits_desc_t *>(BaseDesc);
-      for (auto &config : DisjointPoolConfigs.Configs) {
-        config.MaxPoolableSize = Limits->maxPoolableSize;
-        config.SlabMinSize = Limits->minDriverAllocSize;
-      }
-      break;
-    }
-    default: {
-      throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT);
-    }
-    }
-    pNext = BaseDesc->pNext;
-  }
-
-  auto MemProvider =
-      umf::memoryProviderMakeUnique<USMHostMemoryProvider>(Context, nullptr)
-          .second;
-
-  HostMemPool =
-      umf::poolMakeUnique<usm::DisjointPool, 1>(
-          {std::move(MemProvider)},
-          this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Host])
-          .second;
-
-  auto Device = Context->DeviceID;
-  MemProvider =
-      umf::memoryProviderMakeUnique<USMDeviceMemoryProvider>(Context, Device)
-          .second;
-  DeviceMemPool =
-      umf::poolMakeUnique<usm::DisjointPool, 1>(
-          {std::move(MemProvider)},
-          this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Device])
-          .second;
-
-  MemProvider =
-      umf::memoryProviderMakeUnique<USMSharedMemoryProvider>(Context, Device)
-          .second;
-  SharedMemPool =
-      umf::poolMakeUnique<usm::DisjointPool, 1>(
-          {std::move(MemProvider)},
-          this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Shared])
-          .second;
-  Context->addPool(this);
-}
-
-bool ur_usm_pool_handle_t_::hasUMFPool(umf_memory_pool_t *umf_pool) {
-  return DeviceMemPool.get() == umf_pool || SharedMemPool.get() == umf_pool ||
-         HostMemPool.get() == umf_pool;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate(
-    ur_context_handle_t Context, ///< [in] handle of the context object
-    ur_usm_pool_desc_t
-        *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with
-                   ///< ::ur_usm_pool_limits_desc_t
-    ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool
-) {
-  // Without pool tracking we can't free pool allocations.
-#ifdef UMF_ENABLE_POOL_TRACKING
-  if (PoolDesc->flags & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) {
-    return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-  }
-  try {
-    *Pool = reinterpret_cast<ur_usm_pool_handle_t>(
-        new ur_usm_pool_handle_t_(Context, PoolDesc));
-  } catch (const UsmAllocationException &Ex) {
-    return Ex.getError();
-  }
-  return UR_RESULT_SUCCESS;
-#else
-  std::ignore = Context;
-  std::ignore = PoolDesc;
-  std::ignore = Pool;
-  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
-#endif
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRetain(
-    ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool
-) {
-  Pool->incrementReferenceCount();
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRelease(
-    ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool
-) {
-  if (Pool->decrementReferenceCount() > 0) {
-    return UR_RESULT_SUCCESS;
-  }
-  Pool->Context->removePool(Pool);
-  delete Pool;
-  return UR_RESULT_SUCCESS;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo(
-    ur_usm_pool_handle_t hPool,  ///< [in] handle of the USM memory pool
-    ur_usm_pool_info_t propName, ///< [in] name of the pool property to query
-    size_t propSize, ///< [in] size in bytes of the pool property value provided
-    void *pPropValue, ///< [out][optional][typename(propName, propSize)] value
-                      ///< of the pool property
-    size_t *pPropSizeRet ///< [out][optional] size in bytes returned in pool
-                         ///< property value
-) {
-  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
-
-  switch (propName) {
-  case UR_USM_POOL_INFO_REFERENCE_COUNT: {
-    return ReturnValue(hPool->getReferenceCount());
-  }
-  case UR_USM_POOL_INFO_CONTEXT: {
-    return ReturnValue(hPool->Context);
-  }
-  default: {
-    return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
-  }
-  }
-}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm.hpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm.hpp
deleted file mode 100644
index d4cfba7641f30..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm.hpp
+++ /dev/null
@@ -1,130 +0,0 @@
-//===--------- usm.hpp - CUDA Adapter -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "common.hpp"
-
-#include <umf_helpers.hpp>
-#include <umf_pools/disjoint_pool_config_parser.hpp>
-
-usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig();
-
-struct ur_usm_pool_handle_t_ {
-  std::atomic_uint32_t RefCount = 1;
-
-  ur_context_handle_t Context = nullptr;
-
-  usm::DisjointPoolAllConfigs DisjointPoolConfigs =
-      usm::DisjointPoolAllConfigs();
-
-  umf::pool_unique_handle_t DeviceMemPool;
-  umf::pool_unique_handle_t SharedMemPool;
-  umf::pool_unique_handle_t HostMemPool;
-
-  ur_usm_pool_handle_t_(ur_context_handle_t Context,
-                        ur_usm_pool_desc_t *PoolDesc);
-
-  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
-
-  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
-
-  uint32_t getReferenceCount() const noexcept { return RefCount; }
-
-  bool hasUMFPool(umf_memory_pool_t *umf_pool);
-};
-
-// Exception type to pass allocation errors
-class UsmAllocationException {
-  const ur_result_t Error;
-
-public:
-  UsmAllocationException(ur_result_t Err) : Error{Err} {}
-  ur_result_t getError() const { return Error; }
-};
-
-// Implements memory allocation via driver API for USM allocator interface.
-class USMMemoryProvider {
-private:
-  ur_result_t &getLastStatusRef() {
-    static thread_local ur_result_t LastStatus = UR_RESULT_SUCCESS;
-    return LastStatus;
-  }
-
-protected:
-  ur_context_handle_t Context;
-  ur_device_handle_t Device;
-  size_t MinPageSize;
-
-  // Internal allocation routine which must be implemented for each allocation
-  // type
-  virtual ur_result_t allocateImpl(void **ResultPtr, size_t Size,
-                                   uint32_t Alignment) = 0;
-
-public:
-  umf_result_t initialize(ur_context_handle_t Ctx, ur_device_handle_t Dev);
-  umf_result_t alloc(size_t Size, size_t Align, void **Ptr);
-  umf_result_t free(void *Ptr, size_t Size);
-  void get_last_native_error(const char **ErrMsg, int32_t *ErrCode);
-  umf_result_t get_min_page_size(void *, size_t *);
-  umf_result_t get_recommended_page_size(size_t, size_t *) {
-    return UMF_RESULT_ERROR_NOT_SUPPORTED;
-  };
-  umf_result_t purge_lazy(void *, size_t) {
-    return UMF_RESULT_ERROR_NOT_SUPPORTED;
-  };
-  umf_result_t purge_force(void *, size_t) {
-    return UMF_RESULT_ERROR_NOT_SUPPORTED;
-  };
-  virtual const char *get_name() = 0;
-
-  virtual ~USMMemoryProvider() = default;
-};
-
-// Allocation routines for shared memory type
-class USMSharedMemoryProvider final : public USMMemoryProvider {
-public:
-  const char *get_name() override { return "USMSharedMemoryProvider"; }
-
-protected:
-  ur_result_t allocateImpl(void **ResultPtr, size_t Size,
-                           uint32_t Alignment) override;
-};
-
-// Allocation routines for device memory type
-class USMDeviceMemoryProvider final : public USMMemoryProvider {
-public:
-  const char *get_name() override { return "USMSharedMemoryProvider"; }
-
-protected:
-  ur_result_t allocateImpl(void **ResultPtr, size_t Size,
-                           uint32_t Alignment) override;
-};
-
-// Allocation routines for host memory type
-class USMHostMemoryProvider final : public USMMemoryProvider {
-public:
-  const char *get_name() override { return "USMSharedMemoryProvider"; }
-
-protected:
-  ur_result_t allocateImpl(void **ResultPtr, size_t Size,
-                           uint32_t Alignment) override;
-};
-
-ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context,
-                               ur_device_handle_t Device,
-                               ur_usm_device_mem_flags_t *Flags, size_t Size,
-                               uint32_t Alignment);
-
-ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context,
-                               ur_device_handle_t Device,
-                               ur_usm_host_mem_flags_t *,
-                               ur_usm_device_mem_flags_t *, size_t Size,
-                               uint32_t Alignment);
-
-ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context,
-                             ur_usm_host_mem_flags_t *Flags, size_t Size,
-                             uint32_t Alignment);
diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp
deleted file mode 100644
index ed580dd5d8065..0000000000000
--- a/sycl/plugins/unified_runtime/ur/adapters/cuda/usm_p2p.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-//===--------- usm_p2p.cpp - CUDA Adapter----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "common.hpp"
-#include "context.hpp"
-
-UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp(
-    ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) {
-
-  ur_result_t result = UR_RESULT_SUCCESS;
-  try {
-    ScopedContext active(commandDevice->getContext());
-    UR_CHECK_ERROR(cuCtxEnablePeerAccess(peerDevice->getContext(), 0));
-  } catch (ur_result_t err) {
-    result = err;
-  }
-  return result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp(
-    ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) {
-
-  ur_result_t result = UR_RESULT_SUCCESS;
-  try {
-    ScopedContext active(commandDevice->getContext());
-    UR_CHECK_ERROR(cuCtxDisablePeerAccess(peerDevice->getContext()));
-  } catch (ur_result_t err) {
-    result = err;
-  }
-  return result;
-}
-
-UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp(
-    ur_device_handle_t commandDevice, ur_device_handle_t peerDevice,
-    ur_exp_peer_info_t propName, size_t propSize, void *pPropValue,
-    size_t *pPropSizeRet) {
-
-  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
-
-  int value;
-  CUdevice_P2PAttribute cu_attr;
-  try {
-    ScopedContext active(commandDevice->getContext());
-    switch (propName) {
-    case UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED: {
-      cu_attr = CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED;
-      break;
-    }
-    case UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED: {
-      cu_attr = CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED;
-      break;
-    }
-    default: {
-      return UR_RESULT_ERROR_INVALID_ENUMERATION;
-    }
-    }
-
-    UR_CHECK_ERROR(cuDeviceGetP2PAttribute(
-        &value, cu_attr, commandDevice->get(), peerDevice->get()));
-  } catch (ur_result_t err) {
-    return err;
-  }
-  return ReturnValue(value);
-}

From 345a913cdc908be75bc3d188ff9c1d5eb6570795 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A1bio=20Mestre?= <fabio.mestre@codeplay.com>
Date: Mon, 2 Oct 2023 14:01:21 +0100
Subject: [PATCH 2/2] Update UR commit

---
 sycl/plugins/unified_runtime/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index df97b6eb07812..7f8059df7c9c6 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -4,7 +4,7 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D
   include(FetchContent)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  set(UNIFIED_RUNTIME_TAG 00c7edb98f0c57ad968196a9cef393c380b6d6f7)
+  set(UNIFIED_RUNTIME_TAG 6a0eb7eff8a955fcd53edc79653f6bc85ef922f9)
 
   set(UR_BUILD_ADAPTER_L0 ON)