diff --git a/sycl/plugins/hip/CMakeLists.txt b/sycl/plugins/hip/CMakeLists.txt
index 5af49f82e2b6e..6f9bcbcd71818 100644
--- a/sycl/plugins/hip/CMakeLists.txt
+++ b/sycl/plugins/hip/CMakeLists.txt
@@ -87,12 +87,46 @@ set(HIP_HEADERS "${PI_HIP_INCLUDE_DIR};${PI_HIP_HSA_INCLUDE_DIR}")
 # Create pi_hip library
 add_sycl_plugin(hip
   SOURCES
+    # Some code is shared with the UR adapter
+    "../unified_runtime/pi2ur.hpp"
+    "../unified_runtime/pi2ur.cpp"
+    "../unified_runtime/ur/ur.hpp"
+    "../unified_runtime/ur/ur.cpp"
+    "../unified_runtime/ur/adapters/hip/common.cpp"
+    "../unified_runtime/ur/adapters/hip/common.hpp"
+    "../unified_runtime/ur/adapters/hip/context.cpp"
+    "../unified_runtime/ur/adapters/hip/context.hpp"
+    "../unified_runtime/ur/adapters/hip/device.cpp"
+    "../unified_runtime/ur/adapters/hip/device.hpp"
+    "../unified_runtime/ur/adapters/hip/enqueue.cpp"
+    "../unified_runtime/ur/adapters/hip/event.cpp"
+    "../unified_runtime/ur/adapters/hip/event.hpp"
+    "../unified_runtime/ur/adapters/hip/platform.cpp"
+    "../unified_runtime/ur/adapters/hip/platform.hpp"
+    "../unified_runtime/ur/adapters/hip/memory.cpp"
+    "../unified_runtime/ur/adapters/hip/memory.hpp"
+    "../unified_runtime/ur/adapters/hip/sampler.cpp"
+    "../unified_runtime/ur/adapters/hip/sampler.hpp"
+    "../unified_runtime/ur/adapters/hip/usm.cpp"
+    "../unified_runtime/ur/adapters/hip/program.cpp"
+    "../unified_runtime/ur/adapters/hip/program.hpp"
+    "../unified_runtime/ur/adapters/hip/kernel.cpp"
+    "../unified_runtime/ur/adapters/hip/kernel.hpp"
+    "../unified_runtime/ur/adapters/hip/queue.cpp"
+    "../unified_runtime/ur/adapters/hip/queue.hpp"
+    "../unified_runtime/ur/adapters/hip/command_buffer.cpp"
+    "../unified_runtime/ur/adapters/hip/command_buffer.hpp"
+    "../unified_runtime/ur/adapters/hip/usm_p2p.cpp"
+    "../unified_runtime/ur/adapters/hip/ur_interface_loader.cpp"
     "${sycl_inc_dir}/sycl/detail/pi.h"
     "${sycl_inc_dir}/sycl/detail/pi.hpp"
     "pi_hip.hpp"
     "pi_hip.cpp"
   INCLUDE_DIRS
     ${sycl_plugin_dir}
+    ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime
+  LIBRARIES
+    UnifiedRuntime-Headers
   HEADER
     ${CMAKE_CURRENT_SOURCE_DIR}/include/features.hpp
 )
diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp
index b829800631db2..4338efa69a9a5 100644
--- a/sycl/plugins/hip/pi_hip.cpp
+++ b/sycl/plugins/hip/pi_hip.cpp
@@ -27,5847 +27,8 @@
 #include <string.h>
 #include <string_view>
 
-namespace {
-// Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be
-// indexed, but on NVidia it is an opaque type and needs to go through
-// cuArrayGetDescriptor so implement a utility function to get the array
-// properties
-inline void getArrayDesc(hipArray *array, hipArray_Format &format,
-                         size_t &channels) {
-#if defined(__HIP_PLATFORM_AMD__)
-  format = array->Format;
-  channels = array->NumChannels;
-#elif defined(__HIP_PLATFORM_NVIDIA__)
-  CUDA_ARRAY_DESCRIPTOR arrayDesc;
-  cuArrayGetDescriptor(&arrayDesc, (CUarray)array);
-
-  format = arrayDesc.Format;
-  channels = arrayDesc.NumChannels;
-#else
-#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
-#endif
-}
-
-// NVidia HIP headers guard hipArray3DCreate behind __CUDACC__, this does not
-// seem to be required and we're not using nvcc to build the HIP PI plugin so
-// add the translation function here
-#if defined(__HIP_PLATFORM_NVIDIA__) && !defined(__CUDACC__)
-inline static hipError_t
-hipArray3DCreate(hiparray *pHandle,
-                 const HIP_ARRAY3D_DESCRIPTOR *pAllocateArray) {
-  return hipCUResultTohipError(cuArray3DCreate(pHandle, pAllocateArray));
-}
-#endif
-
-// hipArray gets turned into cudaArray when using the HIP NVIDIA platform, and
-// some CUDA APIs use cudaArray* and others use CUarray, these two represent the
-// same type, however when building cudaArray appears as an opaque type, so it
-// needs to be explicitly casted to CUarray. In order for this to work for both
-// AMD and NVidia we introduce an second hipArray type that will be CUarray for
-// NVIDIA and hipArray* for AMD so that we can place the explicit casts when
-// necessary for NVIDIA and they will be no-ops for AMD.
-#if defined(__HIP_PLATFORM_NVIDIA__)
-typedef CUarray hipCUarray;
-#elif defined(__HIP_PLATFORM_AMD__)
-typedef hipArray *hipCUarray;
-#else
-#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
-#endif
-
-// Add missing HIP to CUDA defines
-#if defined(__HIP_PLATFORM_NVIDIA__)
-#define hipMemoryType CUmemorytype
-#define hipMemoryTypeHost CU_MEMORYTYPE_HOST
-#define hipMemoryTypeDevice CU_MEMORYTYPE_DEVICE
-#define hipMemoryTypeArray CU_MEMORYTYPE_ARRAY
-#define hipMemoryTypeUnified CU_MEMORYTYPE_UNIFIED
-#endif
-
-std::string getHipVersionString() {
-  int driver_version = 0;
-  if (hipDriverGetVersion(&driver_version) != hipSuccess) {
-    return "";
-  }
-  // The version is returned as (1000 major + 10 minor).
-  std::stringstream stream;
-  stream << "HIP " << driver_version / 1000 << "."
-         << driver_version % 1000 / 10;
-  return stream.str();
-}
-
-pi_result map_error(hipError_t result) {
-  switch (result) {
-  case hipSuccess:
-    return PI_SUCCESS;
-  case hipErrorInvalidContext:
-    return PI_ERROR_INVALID_CONTEXT;
-  case hipErrorInvalidDevice:
-    return PI_ERROR_INVALID_DEVICE;
-  case hipErrorInvalidValue:
-    return PI_ERROR_INVALID_VALUE;
-  case hipErrorOutOfMemory:
-    return PI_ERROR_OUT_OF_HOST_MEMORY;
-  case hipErrorLaunchOutOfResources:
-    return PI_ERROR_OUT_OF_RESOURCES;
-  default:
-    return PI_ERROR_UNKNOWN;
-  }
-}
-
-// Global variables for PI_ERROR_PLUGIN_SPECIFIC_ERROR
-constexpr size_t MaxMessageSize = 256;
-thread_local pi_result ErrorMessageCode = PI_SUCCESS;
-thread_local char ErrorMessage[MaxMessageSize];
-
-// Utility function for setting a message and warning
-[[maybe_unused]] static void setErrorMessage(const char *message,
-                                             pi_result error_code) {
-  assert(strlen(message) <= MaxMessageSize);
-  strcpy(ErrorMessage, message);
-  ErrorMessageCode = error_code;
-}
-
-// Returns plugin specific error and warning messages
-pi_result hip_piPluginGetLastError(char **message) {
-  *message = &ErrorMessage[0];
-  return ErrorMessageCode;
-}
-
-// Returns plugin specific backend option.
-// Current support is only for optimization options.
-// Return empty string for hip.
-// TODO: Determine correct string to be passed.
-pi_result hip_piPluginGetBackendOption(pi_platform, const char *frontend_option,
-                                       const char **backend_option) {
-  using namespace std::literals;
-  if (frontend_option == nullptr)
-    return PI_ERROR_INVALID_VALUE;
-  if (frontend_option == "-O0"sv || frontend_option == "-O1"sv ||
-      frontend_option == "-O2"sv || frontend_option == "-O3"sv ||
-      frontend_option == ""sv) {
-    *backend_option = "";
-    return PI_SUCCESS;
-  }
-  return PI_ERROR_INVALID_VALUE;
-}
-
-// Iterates over the event wait list, returns correct pi_result error codes.
-// Invokes the callback for the latest event of each queue in the wait list.
-// The callback must take a single pi_event argument and return a pi_result.
-template <typename Func>
-pi_result forLatestEvents(const pi_event *event_wait_list,
-                          std::size_t num_events_in_wait_list, Func &&f) {
-
-  if (event_wait_list == nullptr || num_events_in_wait_list == 0) {
-    return PI_ERROR_INVALID_EVENT_WAIT_LIST;
-  }
-
-  // Fast path if we only have a single event
-  if (num_events_in_wait_list == 1) {
-    return f(event_wait_list[0]);
-  }
-
-  std::vector<pi_event> events{event_wait_list,
-                               event_wait_list + num_events_in_wait_list};
-  std::sort(events.begin(), events.end(), [](pi_event e0, pi_event e1) {
-    // Tiered sort creating sublists of streams (smallest value first) in which
-    // the corresponding events are sorted into a sequence of newest first.
-    return e0->get_stream() < e1->get_stream() ||
-           (e0->get_stream() == e1->get_stream() &&
-            e0->get_event_id() > e1->get_event_id());
-  });
-
-  bool first = true;
-  hipStream_t lastSeenStream = 0;
-  for (pi_event event : events) {
-    if (!event || (!first && event->get_stream() == lastSeenStream)) {
-      continue;
-    }
-
-    first = false;
-    lastSeenStream = event->get_stream();
-
-    auto result = f(event);
-    if (result != PI_SUCCESS) {
-      return result;
-    }
-  }
-
-  return PI_SUCCESS;
-}
-
-/// Converts HIP error into PI error codes, and outputs error information
-/// to stderr.
-/// If PI_HIP_ABORT env variable is defined, it aborts directly instead of
-/// throwing the error. This is intended for debugging purposes.
-/// \return PI_SUCCESS if \param result was hipSuccess.
-/// \throw pi_error exception (integer) if input was not success.
-///
-pi_result check_error(hipError_t result, const char *function, int line,
-                      const char *file) {
-  if (result == hipSuccess) {
-    return PI_SUCCESS;
-  }
-
-  if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr) {
-    const char *errorString = nullptr;
-    const char *errorName = nullptr;
-    errorName = hipGetErrorName(result);
-    errorString = hipGetErrorString(result);
-    std::stringstream ss;
-    ss << "\nPI HIP ERROR:"
-       << "\n\tValue:           " << result
-       << "\n\tName:            " << errorName
-       << "\n\tDescription:     " << errorString
-       << "\n\tFunction:        " << function << "\n\tSource Location: " << file
-       << ":" << line << "\n"
-       << std::endl;
-    std::cerr << ss.str();
-  }
-
-  if (std::getenv("PI_HIP_ABORT") != nullptr) {
-    std::abort();
-  }
-
-  throw map_error(result);
-}
-
-/// \cond NODOXY
-#define PI_CHECK_ERROR(result) check_error(result, __func__, __LINE__, __FILE__)
-
-/// RAII type to guarantee recovering original HIP context
-/// Scoped context is used across all PI HIP plugin implementation
-/// to activate the PI Context on the current thread, matching the
-/// HIP driver semantics where the context used for the HIP Driver
-/// API is the one active on the thread.
-/// The implementation tries to avoid replacing the hipCtx_t if it cans
-class ScopedContext {
-  pi_context placedContext_;
-  hipCtx_t original_;
-  bool needToRecover_;
-
-public:
-  ScopedContext(pi_context ctxt) : placedContext_{ctxt}, needToRecover_{false} {
-
-    if (!placedContext_) {
-      throw PI_ERROR_INVALID_CONTEXT;
-    }
-
-    hipCtx_t desired = placedContext_->get();
-    PI_CHECK_ERROR(hipCtxGetCurrent(&original_));
-    if (original_ != desired) {
-      // Sets the desired context as the active one for the thread
-      PI_CHECK_ERROR(hipCtxSetCurrent(desired));
-      if (original_ == nullptr) {
-        // No context is installed on the current thread
-        // This is the most common case. We can activate the context in the
-        // thread and leave it there until all the PI context referring to the
-        // same underlying HIP context are destroyed. This emulates
-        // the behaviour of the HIP runtime api, and avoids costly context
-        // switches. No action is required on this side of the if.
-      } else {
-        needToRecover_ = true;
-      }
-    }
-  }
-
-  ~ScopedContext() {
-    if (needToRecover_) {
-      PI_CHECK_ERROR(hipCtxSetCurrent(original_));
-    }
-  }
-};
-
-/// \cond NODOXY
-template <typename T, typename Assign>
-pi_result getInfoImpl(size_t param_value_size, void *param_value,
-                      size_t *param_value_size_ret, T value, size_t value_size,
-                      Assign &&assign_func) {
-
-  if (param_value != nullptr) {
-
-    if (param_value_size < value_size) {
-      return PI_ERROR_INVALID_VALUE;
-    }
-
-    assign_func(param_value, value, value_size);
-  }
-
-  if (param_value_size_ret != nullptr) {
-    *param_value_size_ret = value_size;
-  }
-
-  return PI_SUCCESS;
-}
-
-template <typename T>
-pi_result getInfo(size_t param_value_size, void *param_value,
-                  size_t *param_value_size_ret, T value) {
-
-  auto assignment = [](void *param_value, T value, size_t value_size) {
-    (void)value_size;
-    *static_cast<T *>(param_value) = value;
-  };
-
-  return getInfoImpl(param_value_size, param_value, param_value_size_ret, value,
-                     sizeof(T), std::move(assignment));
-}
-
-template <typename T>
-pi_result getInfoArray(size_t array_length, size_t param_value_size,
-                       void *param_value, size_t *param_value_size_ret,
-                       T *value) {
-
-  auto assignment = [](void *param_value, T *value, size_t value_size) {
-    memcpy(param_value, static_cast<const void *>(value), value_size);
-  };
-
-  return getInfoImpl(param_value_size, param_value, param_value_size_ret, value,
-                     array_length * sizeof(T), std::move(assignment));
-}
-
-template <>
-pi_result getInfo<const char *>(size_t param_value_size, void *param_value,
-                                size_t *param_value_size_ret,
-                                const char *value) {
-  return getInfoArray(strlen(value) + 1, param_value_size, param_value,
-                      param_value_size_ret, value);
-}
-
-int getAttribute(pi_device device, hipDeviceAttribute_t attribute) {
-  int value;
-  sycl::detail::pi::assertion(
-      hipDeviceGetAttribute(&value, attribute, device->get()) == hipSuccess);
-  return value;
-}
-/// \endcond
-
-void simpleGuessLocalWorkSize(size_t *threadsPerBlock,
-                              const size_t *global_work_size,
-                              const size_t maxThreadsPerBlock[3],
-                              [[maybe_unused]] pi_kernel kernel) {
-  assert(threadsPerBlock != nullptr);
-  assert(global_work_size != nullptr);
-  assert(kernel != nullptr);
-  // int recommendedBlockSize, minGrid;
-
-  // PI_CHECK_ERROR(hipOccupancyMaxPotentialBlockSize(
-  //    &minGrid, &recommendedBlockSize, kernel->get(),
-  //    0, 0));
-
-  //(void)minGrid; // Not used, avoid warnings
-
-  threadsPerBlock[0] = std::min(maxThreadsPerBlock[0], global_work_size[0]);
-
-  // Find a local work group size that is a divisor of the global
-  // work group size to produce uniform work groups.
-  while (0u != (global_work_size[0] % threadsPerBlock[0])) {
-    --threadsPerBlock[0];
-  }
-}
-
-pi_result enqueueEventsWait(pi_queue command_queue, hipStream_t stream,
-                            pi_uint32 num_events_in_wait_list,
-                            const pi_event *event_wait_list) {
-  if (!event_wait_list) {
-    return PI_SUCCESS;
-  }
-  try {
-    ScopedContext active(command_queue->get_context());
-
-    auto result = forLatestEvents(
-        event_wait_list, num_events_in_wait_list,
-        [stream](pi_event event) -> pi_result {
-          if (event->get_stream() == stream) {
-            return PI_SUCCESS;
-          } else {
-            return PI_CHECK_ERROR(hipStreamWaitEvent(stream, event->get(), 0));
-          }
-        });
-
-    if (result != PI_SUCCESS) {
-      return result;
-    }
-    return PI_SUCCESS;
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_UNKNOWN;
-  }
-}
-
-} // anonymous namespace
-
-/// ------ Error handling, matching OpenCL plugin semantics.
-namespace sycl {
-__SYCL_INLINE_VER_NAMESPACE(_V1) {
-namespace detail {
-namespace pi {
-
-// Report error and no return (keeps compiler from printing warnings).
-// TODO: Probably change that to throw a catchable exception,
-//       but for now it is useful to see every failure.
-//
-[[noreturn]] void die(const char *Message) {
-  std::cerr << "pi_die: " << Message << std::endl;
-  std::terminate();
-}
-
-// Reports error messages
-void hipPrint(const char *Message) {
-  std::cerr << "pi_print: " << Message << std::endl;
-}
-
-void assertion(bool Condition, const char *Message) {
-  if (!Condition)
-    die(Message);
-}
-
-} // namespace pi
-} // namespace detail
-} // __SYCL_INLINE_VER_NAMESPACE(_V1)
-} // namespace sycl
-
-//--------------
-// PI object implementation
-
-extern "C" {
-
-// Required in a number of functions, so forward declare here
-pi_result hip_piEnqueueEventsWait(pi_queue command_queue,
-                                  pi_uint32 num_events_in_wait_list,
-                                  const pi_event *event_wait_list,
-                                  pi_event *event);
-pi_result hip_piEnqueueEventsWaitWithBarrier(pi_queue command_queue,
-                                             pi_uint32 num_events_in_wait_list,
-                                             const pi_event *event_wait_list,
-                                             pi_event *event);
-pi_result hip_piEventRelease(pi_event event);
-pi_result hip_piEventRetain(pi_event event);
-
-} // extern "C"
-
-/// \endcond
-
-void _pi_queue::compute_stream_wait_for_barrier_if_needed(hipStream_t stream,
-                                                          pi_uint32 stream_i) {
-  if (barrier_event_ && !compute_applied_barrier_[stream_i]) {
-    PI_CHECK_ERROR(hipStreamWaitEvent(stream, barrier_event_, 0));
-    compute_applied_barrier_[stream_i] = true;
-  }
-}
-
-void _pi_queue::transfer_stream_wait_for_barrier_if_needed(hipStream_t stream,
-                                                           pi_uint32 stream_i) {
-  if (barrier_event_ && !transfer_applied_barrier_[stream_i]) {
-    PI_CHECK_ERROR(hipStreamWaitEvent(stream, barrier_event_, 0));
-    transfer_applied_barrier_[stream_i] = true;
-  }
-}
-
-hipStream_t _pi_queue::get_next_compute_stream(pi_uint32 *stream_token) {
-  pi_uint32 stream_i;
-  pi_uint32 token;
-  while (true) {
-    if (num_compute_streams_ < compute_streams_.size()) {
-      // the check above is for performance - so as not to lock mutex every time
-      std::lock_guard<std::mutex> guard(compute_stream_mutex_);
-      // The second check is done after mutex is locked so other threads can not
-      // change num_compute_streams_ after that
-      if (num_compute_streams_ < compute_streams_.size()) {
-        PI_CHECK_ERROR(hipStreamCreateWithFlags(
-            &compute_streams_[num_compute_streams_++], flags_));
-      }
-    }
-    token = compute_stream_idx_++;
-    stream_i = token % compute_streams_.size();
-    // if a stream has been reused before it was next selected round-robin
-    // fashion, we want to delay its next use and instead select another one
-    // that is more likely to have completed all the enqueued work.
-    if (delay_compute_[stream_i]) {
-      delay_compute_[stream_i] = false;
-    } else {
-      break;
-    }
-  }
-  if (stream_token) {
-    *stream_token = token;
-  }
-  hipStream_t res = compute_streams_[stream_i];
-  compute_stream_wait_for_barrier_if_needed(res, stream_i);
-  return res;
-}
-
-hipStream_t _pi_queue::get_next_compute_stream(
-    pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
-    _pi_stream_guard &guard, pi_uint32 *stream_token) {
-  for (pi_uint32 i = 0; i < num_events_in_wait_list; i++) {
-    pi_uint32 token = event_wait_list[i]->get_compute_stream_token();
-    if (event_wait_list[i]->get_queue() == this && can_reuse_stream(token)) {
-      std::unique_lock<std::mutex> compute_sync_guard(
-          compute_stream_sync_mutex_);
-      // redo the check after lock to avoid data races on
-      // last_sync_compute_streams_
-      if (can_reuse_stream(token)) {
-        pi_uint32 stream_i = token % delay_compute_.size();
-        delay_compute_[stream_i] = true;
-        if (stream_token) {
-          *stream_token = token;
-        }
-        guard = _pi_stream_guard{std::move(compute_sync_guard)};
-        hipStream_t res = event_wait_list[i]->get_stream();
-        compute_stream_wait_for_barrier_if_needed(res, stream_i);
-        return res;
-      }
-    }
-  }
-  guard = {};
-  return get_next_compute_stream(stream_token);
-}
-
-hipStream_t _pi_queue::get_next_transfer_stream() {
-  if (transfer_streams_.empty()) { // for example in in-order queue
-    return get_next_compute_stream();
-  }
-  if (num_transfer_streams_ < transfer_streams_.size()) {
-    // the check above is for performance - so as not to lock mutex every time
-    std::lock_guard<std::mutex> guard(transfer_stream_mutex_);
-    // The second check is done after mutex is locked so other threads can not
-    // change num_transfer_streams_ after that
-    if (num_transfer_streams_ < transfer_streams_.size()) {
-      PI_CHECK_ERROR(hipStreamCreateWithFlags(
-          &transfer_streams_[num_transfer_streams_++], flags_));
-    }
-  }
-  pi_uint32 stream_i = transfer_stream_idx_++ % transfer_streams_.size();
-  hipStream_t res = transfer_streams_[stream_i];
-  transfer_stream_wait_for_barrier_if_needed(res, stream_i);
-  return res;
-}
-
-_pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue,
-                     hipStream_t stream, pi_uint32 stream_token)
-    : commandType_{type}, refCount_{1}, hasBeenWaitedOn_{false},
-      isRecorded_{false}, isStarted_{false}, streamToken_{stream_token},
-      evEnd_{nullptr}, evStart_{nullptr}, evQueued_{nullptr}, queue_{queue},
-      stream_{stream}, context_{context} {
-
-  assert(type != PI_COMMAND_TYPE_USER);
-
-  bool profilingEnabled = queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE;
-
-  PI_CHECK_ERROR(hipEventCreateWithFlags(
-      &evEnd_, profilingEnabled ? hipEventDefault : hipEventDisableTiming));
-
-  if (profilingEnabled) {
-    PI_CHECK_ERROR(hipEventCreateWithFlags(&evQueued_, hipEventDefault));
-    PI_CHECK_ERROR(hipEventCreateWithFlags(&evStart_, hipEventDefault));
-  }
-
-  if (queue_ != nullptr) {
-    hip_piQueueRetain(queue_);
-  }
-  hip_piContextRetain(context_);
-}
-
-_pi_event::~_pi_event() {
-  if (queue_ != nullptr) {
-    hip_piQueueRelease(queue_);
-  }
-  hip_piContextRelease(context_);
-}
-
-pi_result _pi_event::start() {
-  assert(!is_started());
-  pi_result result = PI_SUCCESS;
-
-  try {
-    if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) {
-      // NOTE: This relies on the default stream to be unused.
-      PI_CHECK_ERROR(hipEventRecord(evQueued_, 0));
-      PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get()));
-    }
-  } catch (pi_result error) {
-    result = error;
-  }
-
-  isStarted_ = true;
-  return result;
-}
-
-bool _pi_event::is_completed() const noexcept {
-  if (!isRecorded_) {
-    return false;
-  }
-  if (!hasBeenWaitedOn_) {
-    const hipError_t ret = hipEventQuery(evEnd_);
-    if (ret != hipSuccess && ret != hipErrorNotReady) {
-      PI_CHECK_ERROR(ret);
-      return false;
-    }
-    if (ret == hipErrorNotReady) {
-      return false;
-    }
-  }
-  return true;
-}
-
-pi_uint64 _pi_event::get_queued_time() const {
-  float miliSeconds = 0.0f;
-  assert(is_started());
-
-  // hipEventSynchronize waits till the event is ready for call to
-  // hipEventElapsedTime.
-  PI_CHECK_ERROR(hipEventSynchronize(evStart_));
-  PI_CHECK_ERROR(hipEventSynchronize(evEnd_));
-
-  PI_CHECK_ERROR(hipEventElapsedTime(&miliSeconds, evStart_, evEnd_));
-  return static_cast<pi_uint64>(miliSeconds * 1.0e6);
-}
-
-pi_uint64 _pi_event::get_start_time() const {
-  float miliSeconds = 0.0f;
-  assert(is_started());
-
-  // hipEventSynchronize waits till the event is ready for call to
-  // hipEventElapsedTime.
-  PI_CHECK_ERROR(hipEventSynchronize(_pi_platform::evBase_));
-  PI_CHECK_ERROR(hipEventSynchronize(evStart_));
-
-  PI_CHECK_ERROR(
-      hipEventElapsedTime(&miliSeconds, _pi_platform::evBase_, evStart_));
-  return static_cast<pi_uint64>(miliSeconds * 1.0e6);
-}
-
-pi_uint64 _pi_event::get_end_time() const {
-  float miliSeconds = 0.0f;
-  assert(is_started() && is_recorded());
-
-  // hipEventSynchronize waits till the event is ready for call to
-  // hipEventElapsedTime.
-  PI_CHECK_ERROR(hipEventSynchronize(_pi_platform::evBase_));
-  PI_CHECK_ERROR(hipEventSynchronize(evEnd_));
-
-  PI_CHECK_ERROR(
-      hipEventElapsedTime(&miliSeconds, _pi_platform::evBase_, evEnd_));
-  return static_cast<pi_uint64>(miliSeconds * 1.0e6);
-}
-
-pi_result _pi_event::record() {
-
-  if (is_recorded() || !is_started()) {
-    return PI_ERROR_INVALID_EVENT;
-  }
-
-  pi_result result = PI_ERROR_INVALID_OPERATION;
-
-  if (!queue_) {
-    return PI_ERROR_INVALID_QUEUE;
-  }
-
-  try {
-    eventId_ = queue_->get_next_event_id();
-    if (eventId_ == 0) {
-      sycl::detail::pi::die(
-          "Unrecoverable program state reached in event identifier overflow");
-    }
-    result = PI_CHECK_ERROR(hipEventRecord(evEnd_, stream_));
-  } catch (pi_result error) {
-    result = error;
-  }
-
-  if (result == PI_SUCCESS) {
-    isRecorded_ = true;
-  }
-
-  return result;
-}
-
-pi_result _pi_event::wait() {
-  pi_result retErr;
-  try {
-    retErr = PI_CHECK_ERROR(hipEventSynchronize(evEnd_));
-    hasBeenWaitedOn_ = true;
-  } catch (pi_result error) {
-    retErr = error;
-  }
-
-  return retErr;
-}
-
-pi_result _pi_event::release() {
-  assert(queue_ != nullptr);
-  PI_CHECK_ERROR(hipEventDestroy(evEnd_));
-
-  if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) {
-    PI_CHECK_ERROR(hipEventDestroy(evQueued_));
-    PI_CHECK_ERROR(hipEventDestroy(evStart_));
-  }
-
-  return PI_SUCCESS;
-}
-
-// makes all future work submitted to queue wait for all work captured in event.
-pi_result enqueueEventWait(pi_queue queue, pi_event event) {
-  // for native events, the hipStreamWaitEvent call is used.
-  // This makes all future work submitted to stream wait for all
-  // work captured in event.
-  queue->for_each_stream([e = event->get()](hipStream_t s) {
-    PI_CHECK_ERROR(hipStreamWaitEvent(s, e, 0));
-  });
-  return PI_SUCCESS;
-}
-
-_pi_program::_pi_program(pi_context ctxt)
-    : module_{nullptr}, binary_{}, binarySizeInBytes_{0}, refCount_{1},
-      context_{ctxt} {
-  hip_piContextRetain(context_);
-}
-
-_pi_program::~_pi_program() { hip_piContextRelease(context_); }
-
-pi_result _pi_program::set_binary(const char *source, size_t length) {
-  assert((binary_ == nullptr && binarySizeInBytes_ == 0) &&
-         "Re-setting program binary data which has already been set");
-  binary_ = source;
-  binarySizeInBytes_ = length;
-  return PI_SUCCESS;
-}
-
-pi_result _pi_program::build_program(const char *build_options) {
-
-  this->buildOptions_ = build_options;
-
-  constexpr const unsigned int numberOfOptions = 4u;
-
-  hipJitOption options[numberOfOptions];
-  void *optionVals[numberOfOptions];
-
-  // Pass a buffer for info messages
-  options[0] = hipJitOptionInfoLogBuffer;
-  optionVals[0] = (void *)infoLog_;
-  // Pass the size of the info buffer
-  options[1] = hipJitOptionInfoLogBufferSizeBytes;
-  optionVals[1] = (void *)(long)MAX_LOG_SIZE;
-  // Pass a buffer for error message
-  options[2] = hipJitOptionErrorLogBuffer;
-  optionVals[2] = (void *)errorLog_;
-  // Pass the size of the error buffer
-  options[3] = hipJitOptionErrorLogBufferSizeBytes;
-  optionVals[3] = (void *)(long)MAX_LOG_SIZE;
-
-  auto result = PI_CHECK_ERROR(
-      hipModuleLoadDataEx(&module_, static_cast<const void *>(binary_),
-                          numberOfOptions, options, optionVals));
-
-  const auto success = (result == PI_SUCCESS);
-
-  buildStatus_ =
-      success ? PI_PROGRAM_BUILD_STATUS_SUCCESS : PI_PROGRAM_BUILD_STATUS_ERROR;
-
-  // If no exception, result is correct
-  return success ? PI_SUCCESS : PI_ERROR_BUILD_PROGRAM_FAILURE;
-}
-
-/// Finds kernel names by searching for entry points in the PTX source, as the
-/// HIP driver API doesn't expose an operation for this.
-/// Note: This is currently only being used by the SYCL program class for the
-///       has_kernel method, so an alternative would be to move the has_kernel
-///       query to PI and use hipModuleGetFunction to check for a kernel.
-std::string getKernelNames(pi_program program) {
-  (void)program;
-  sycl::detail::pi::die("getKernelNames not implemented");
-  return {};
-}
-
-/// RAII object that calls the reference count release function on the held PI
-/// object on destruction.
-///
-/// The `dismiss` function stops the release from happening on destruction.
-template <typename T> class ReleaseGuard {
-private:
-  T Captive;
-
-  static pi_result callRelease(pi_device Captive) {
-    return hip_piDeviceRelease(Captive);
-  }
-
-  static pi_result callRelease(pi_context Captive) {
-    return hip_piContextRelease(Captive);
-  }
-
-  static pi_result callRelease(pi_mem Captive) {
-    return hip_piMemRelease(Captive);
-  }
-
-  static pi_result callRelease(pi_program Captive) {
-    return hip_piProgramRelease(Captive);
-  }
-
-  static pi_result callRelease(pi_kernel Captive) {
-    return hip_piKernelRelease(Captive);
-  }
-
-  static pi_result callRelease(pi_queue Captive) {
-    return hip_piQueueRelease(Captive);
-  }
-
-  static pi_result callRelease(pi_event Captive) {
-    return hip_piEventRelease(Captive);
-  }
-
-public:
-  ReleaseGuard() = delete;
-  /// Obj can be `nullptr`.
-  explicit ReleaseGuard(T Obj) : Captive(Obj) {}
-  ReleaseGuard(ReleaseGuard &&Other) noexcept : Captive(Other.Captive) {
-    Other.Captive = nullptr;
-  }
-
-  ReleaseGuard(const ReleaseGuard &) = delete;
-
-  /// Calls the related PI object release function if the object held is not
-  /// `nullptr` or if `dismiss` has not been called.
-  ~ReleaseGuard() {
-    if (Captive != nullptr) {
-      pi_result ret = callRelease(Captive);
-      if (ret != PI_SUCCESS) {
-        // A reported HIP error is either an implementation or an asynchronous
-        // HIP error for which it is unclear if the function that reported it
-        // succeeded or not. Either way, the state of the program is compromised
-        // and likely unrecoverable.
-        sycl::detail::pi::die(
-            "Unrecoverable program state reached in hip_piMemRelease");
-      }
-    }
-  }
-
-  ReleaseGuard &operator=(const ReleaseGuard &) = delete;
-
-  ReleaseGuard &operator=(ReleaseGuard &&Other) {
-    Captive = Other.Captive;
-    Other.Captive = nullptr;
-    return *this;
-  }
-
-  /// End the guard and do not release the reference count of the held
-  /// PI object.
-  void dismiss() { Captive = nullptr; }
-};
-
-//-- PI API implementation
-extern "C" {
-
-/// Obtains the HIP platform.
-/// There is only one HIP platform, and contains all devices on the system.
-/// Triggers the HIP Driver initialization (hipInit) the first time, so this
-/// must be the first PI API called.
-///
-/// However because multiple devices in a context is not currently supported,
-/// place each device in a separate platform.
-///
-pi_result hip_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
-                             pi_uint32 *num_platforms) {
-
-  try {
-    static std::once_flag initFlag;
-    static pi_uint32 numPlatforms = 1;
-    static std::vector<_pi_platform> platformIds;
-
-    if (num_entries == 0 and platforms != nullptr) {
-      return PI_ERROR_INVALID_VALUE;
-    }
-    if (platforms == nullptr and num_platforms == nullptr) {
-      return PI_ERROR_INVALID_VALUE;
-    }
-
-    pi_result err = PI_SUCCESS;
-
-    std::call_once(
-        initFlag,
-        [](pi_result &err) {
-          if (hipInit(0) != hipSuccess) {
-            numPlatforms = 0;
-            return;
-          }
-          int numDevices = 0;
-          hipError_t hipErrorCode = hipGetDeviceCount(&numDevices);
-          if (hipErrorCode == hipErrorNoDevice) {
-            numPlatforms = 0;
-            return;
-          }
-          err = PI_CHECK_ERROR(hipErrorCode);
-          if (numDevices == 0) {
-            numPlatforms = 0;
-            return;
-          }
-          try {
-            numPlatforms = numDevices;
-            platformIds.resize(numDevices);
-
-            for (int i = 0; i < numDevices; ++i) {
-              hipDevice_t device;
-              err = PI_CHECK_ERROR(hipDeviceGet(&device, i));
-              platformIds[i].devices_.emplace_back(
-                  new _pi_device{device, &platformIds[i]});
-            }
-          } catch (const std::bad_alloc &) {
-            // Signal out-of-memory situation
-            for (int i = 0; i < numDevices; ++i) {
-              platformIds[i].devices_.clear();
-            }
-            platformIds.clear();
-            err = PI_ERROR_OUT_OF_HOST_MEMORY;
-          } catch (...) {
-            // Clear and rethrow to allow retry
-            for (int i = 0; i < numDevices; ++i) {
-              platformIds[i].devices_.clear();
-            }
-            platformIds.clear();
-            throw;
-          }
-        },
-        err);
-
-    if (num_platforms != nullptr) {
-      *num_platforms = numPlatforms;
-    }
-
-    if (platforms != nullptr) {
-      for (unsigned i = 0; i < std::min(num_entries, numPlatforms); ++i) {
-        platforms[i] = &platformIds[i];
-      }
-    }
-
-    return err;
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_OUT_OF_RESOURCES;
-  }
-}
-
-pi_result hip_piPlatformGetInfo([[maybe_unused]] pi_platform platform,
-                                pi_platform_info param_name,
-                                size_t param_value_size, void *param_value,
-                                size_t *param_value_size_ret) {
-  assert(platform != nullptr);
-
-  switch (param_name) {
-  case PI_PLATFORM_INFO_NAME:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   "AMD HIP BACKEND");
-  case PI_PLATFORM_INFO_VENDOR:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   "AMD Corporation");
-  case PI_PLATFORM_INFO_PROFILE:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   "FULL PROFILE");
-  case PI_PLATFORM_INFO_VERSION: {
-    auto version = getHipVersionString();
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   version.c_str());
-  }
-  case PI_PLATFORM_INFO_EXTENSIONS: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, "");
-  }
-  case PI_EXT_PLATFORM_INFO_BACKEND: {
-    return getInfo<pi_platform_backend>(param_value_size, param_value,
-                                        param_value_size_ret,
-                                        PI_EXT_PLATFORM_BACKEND_HIP);
-  }
-  default:
-    __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-  }
-  sycl::detail::pi::die("Platform info request not implemented");
-  return {};
-}
-
-/// \param devices List of devices available on the system
-/// \param num_devices Number of elements in the list of devices
-/// Requesting a non-GPU device triggers an error, all PI HIP devices
-/// are GPUs.
-///
-pi_result hip_piDevicesGet(pi_platform platform, pi_device_type device_type,
-                           pi_uint32 num_entries, pi_device *devices,
-                           pi_uint32 *num_devices) {
-
-  pi_result err = PI_SUCCESS;
-  const bool askingForDefault = device_type == PI_DEVICE_TYPE_DEFAULT;
-  const bool askingForGPU = device_type & PI_DEVICE_TYPE_GPU;
-  const bool returnDevices = askingForDefault || askingForGPU;
-
-  size_t numDevices = returnDevices ? platform->devices_.size() : 0;
-
-  try {
-    if (num_devices) {
-      *num_devices = numDevices;
-    }
-
-    if (returnDevices && devices) {
-      for (size_t i = 0; i < std::min(size_t(num_entries), numDevices); ++i) {
-        devices[i] = platform->devices_[i].get();
-      }
-    }
-
-    return err;
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_OUT_OF_RESOURCES;
-  }
-}
-
-/// \return PI_SUCCESS if the function is exehipted successfully
-/// HIP devices are always root devices so retain always returns success.
-pi_result hip_piDeviceRetain(pi_device device) {
-  (void)device;
-  return PI_SUCCESS;
-}
-
-pi_result hip_piContextGetInfo(pi_context context, pi_context_info param_name,
-                               size_t param_value_size, void *param_value,
-                               size_t *param_value_size_ret) {
-
-  switch (param_name) {
-  case PI_CONTEXT_INFO_NUM_DEVICES:
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1);
-  case PI_CONTEXT_INFO_DEVICES:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   context->get_device());
-  case PI_CONTEXT_INFO_REFERENCE_COUNT:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   context->get_reference_count());
-  case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT:
-    return getInfo<pi_bool>(param_value_size, param_value, param_value_size_ret,
-                            true);
-  case PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT:
-  case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT:
-    // 2D USM operations currently not supported.
-    return getInfo<pi_bool>(param_value_size, param_value, param_value_size_ret,
-                            false);
-  case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
-  case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
-  case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES:
-  case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
-    // These queries should be dealt with in context_impl.cpp by calling the
-    // queries of each device separately and building the intersection set.
-    setErrorMessage("These queries should have never come here.",
-                    PI_ERROR_INVALID_ARG_VALUE);
-    return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
-  }
-  default:
-    __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-  }
-
-  return PI_ERROR_OUT_OF_RESOURCES;
-}
-
-pi_result hip_piContextRetain(pi_context context) {
-  assert(context != nullptr);
-  assert(context->get_reference_count() > 0);
-
-  context->increment_reference_count();
-  return PI_SUCCESS;
-}
-
-pi_result hip_piextContextSetExtendedDeleter(
-    pi_context context, pi_context_extended_deleter function, void *user_data) {
-  context->set_extended_deleter(function, user_data);
-  return PI_SUCCESS;
-}
-
-/// Not applicable to HIP, devices cannot be partitioned.
-///
-pi_result hip_piDevicePartition(pi_device device,
-                                const pi_device_partition_property *properties,
-                                pi_uint32 num_devices, pi_device *out_devices,
-                                pi_uint32 *out_num_devices) {
-  (void)device;
-  (void)properties;
-  (void)num_devices;
-  (void)out_devices;
-  (void)out_num_devices;
-
-  return PI_ERROR_INVALID_OPERATION;
-}
-
-/// \return If available, the first binary that is PTX
-///
-pi_result hip_piextDeviceSelectBinary(pi_device device,
-                                      pi_device_binary *binaries,
-                                      pi_uint32 num_binaries,
-                                      pi_uint32 *selected_binary) {
-  (void)device;
-  if (!binaries) {
-    sycl::detail::pi::die("No list of device images provided");
-  }
-  if (num_binaries < 1) {
-    sycl::detail::pi::die("No binary images in the list");
-  }
-
-  // Look for an image for the HIP target, and return the first one that is
-  // found
-#if defined(__HIP_PLATFORM_AMD__)
-  const char *binary_type = __SYCL_PI_DEVICE_BINARY_TARGET_AMDGCN;
-#elif defined(__HIP_PLATFORM_NVIDIA__)
-  const char *binary_type = __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64;
-#else
-#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
-#endif
-
-  for (pi_uint32 i = 0; i < num_binaries; i++) {
-    if (strcmp(binaries[i]->DeviceTargetSpec, binary_type) == 0) {
-      *selected_binary = i;
-      return PI_SUCCESS;
-    }
-  }
-
-  // No image can be loaded for the given device
-  return PI_ERROR_INVALID_BINARY;
-}
-
-pi_result hip_piextGetDeviceFunctionPointer([[maybe_unused]] pi_device device,
-                                            pi_program program,
-                                            const char *func_name,
-                                            pi_uint64 *func_pointer_ret) {
-  // Check if device passed is the same the device bound to the context
-  assert(device == program->get_context()->get_device());
-  assert(func_pointer_ret != nullptr);
-
-  hipFunction_t func;
-  hipError_t ret = hipModuleGetFunction(&func, program->get(), func_name);
-  *func_pointer_ret = reinterpret_cast<pi_uint64>(func);
-  pi_result retError = PI_SUCCESS;
-
-  if (ret != hipSuccess && ret != hipErrorNotFound)
-    retError = PI_CHECK_ERROR(ret);
-  if (ret == hipErrorNotFound) {
-    *func_pointer_ret = 0;
-    retError = PI_ERROR_INVALID_KERNEL_NAME;
-  }
-
-  return retError;
-}
-
-/// \return PI_SUCCESS always since HIP devices are always root devices.
-///
-pi_result hip_piDeviceRelease(pi_device device) {
-  (void)device;
-  return PI_SUCCESS;
-}
-
-pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
-                              size_t param_value_size, void *param_value,
-                              size_t *param_value_size_ret) {
-
-  static constexpr pi_uint32 max_work_item_dimensions = 3u;
-
-  assert(device != nullptr);
-
-  switch (param_name) {
-  case PI_DEVICE_INFO_TYPE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   PI_DEVICE_TYPE_GPU);
-  }
-  case PI_DEVICE_INFO_VENDOR_ID: {
-#if defined(__HIP_PLATFORM_AMD__)
-    pi_uint32 vendor_id = 4098u;
-#elif defined(__HIP_PLATFORM_NVIDIA__)
-    pi_uint32 vendor_id = 4318u;
-#else
-    pi_uint32 vendor_id = 0u;
-#endif
-
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   vendor_id);
-  }
-  case PI_DEVICE_INFO_MAX_COMPUTE_UNITS: {
-    int compute_units = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&compute_units,
-                              hipDeviceAttributeMultiprocessorCount,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(compute_units >= 0);
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   pi_uint32(compute_units));
-  }
-  case PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   max_work_item_dimensions);
-  }
-  case PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES: {
-    size_t return_sizes[max_work_item_dimensions];
-
-    int max_x = 0, max_y = 0, max_z = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&max_x, hipDeviceAttributeMaxBlockDimX,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(max_x >= 0);
-
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&max_y, hipDeviceAttributeMaxBlockDimY,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(max_y >= 0);
-
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&max_z, hipDeviceAttributeMaxBlockDimZ,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(max_z >= 0);
-
-    return_sizes[0] = size_t(max_x);
-    return_sizes[1] = size_t(max_y);
-    return_sizes[2] = size_t(max_z);
-    return getInfoArray(max_work_item_dimensions, param_value_size, param_value,
-                        param_value_size_ret, return_sizes);
-  }
-
-  case PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D: {
-    size_t return_sizes[max_work_item_dimensions];
-    int max_x = 0, max_y = 0, max_z = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&max_x, hipDeviceAttributeMaxGridDimX,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(max_x >= 0);
-
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&max_y, hipDeviceAttributeMaxGridDimY,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(max_y >= 0);
-
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&max_z, hipDeviceAttributeMaxGridDimZ,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(max_z >= 0);
-
-    return_sizes[0] = size_t(max_x);
-    return_sizes[1] = size_t(max_y);
-    return_sizes[2] = size_t(max_z);
-    return getInfoArray(max_work_item_dimensions, param_value_size, param_value,
-                        param_value_size_ret, return_sizes);
-  }
-
-  case PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE: {
-    int max_work_group_size = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&max_work_group_size,
-                              hipDeviceAttributeMaxThreadsPerBlock,
-                              device->get()) == hipSuccess);
-
-    sycl::detail::pi::assertion(max_work_group_size >= 0);
-
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   size_t(max_work_group_size));
-  }
-  case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
-  }
-  case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  }
-  case PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
-  }
-  case PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS: {
-    // Number of sub-groups = max block size / warp size + possible remainder
-    int max_threads = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&max_threads,
-                              hipDeviceAttributeMaxThreadsPerBlock,
-                              device->get()) == hipSuccess);
-    int warpSize = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
-                              device->get()) == hipSuccess);
-    int maxWarps = (max_threads + warpSize - 1) / warpSize;
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   static_cast<uint32_t>(maxWarps));
-  }
-  case PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: {
-    // Volta provides independent thread scheduling
-    // TODO: Revisit for previous generation GPUs
-    int major = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor,
-                              device->get()) == hipSuccess);
-    bool ifp = (major >= 7);
-    return getInfo(param_value_size, param_value, param_value_size_ret, ifp);
-  }
-  case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
-    int warpSize = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
-                              device->get()) == hipSuccess);
-    size_t sizes[1] = {static_cast<size_t>(warpSize)};
-    return getInfoArray<size_t>(1, param_value_size, param_value,
-                                param_value_size_ret, sizes);
-  }
-  case PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY: {
-    int clock_freq = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&clock_freq, hipDeviceAttributeClockRate,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(clock_freq >= 0);
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   pi_uint32(clock_freq) / 1000u);
-  }
-  case PI_DEVICE_INFO_ADDRESS_BITS: {
-    auto bits = pi_uint32{std::numeric_limits<uintptr_t>::digits};
-    return getInfo(param_value_size, param_value, param_value_size_ret, bits);
-  }
-  case PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: {
-    // Max size of memory object allocation in bytes.
-    // The minimum value is max(min(1024 × 1024 ×
-    // 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE),
-    // 32 × 1024 × 1024) for devices that are not of type
-    // CL_DEVICE_TYPE_HIPSTOM.
-
-    size_t global = 0;
-    sycl::detail::pi::assertion(hipDeviceTotalMem(&global, device->get()) ==
-                                hipSuccess);
-
-    auto quarter_global = static_cast<pi_uint32>(global / 4u);
-
-    auto max_alloc = std::max(std::min(1024u * 1024u * 1024u, quarter_global),
-                              32u * 1024u * 1024u);
-
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   pi_uint64{max_alloc});
-  }
-  case PI_DEVICE_INFO_IMAGE_SUPPORT: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   PI_TRUE);
-  }
-  case PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS: {
-    // This call doesn't match to HIP as it doesn't have images, but instead
-    // surfaces and textures. No clear call in the HIP API to determine this,
-    // but some searching found as of SM 2.x 128 are supported.
-    return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
-  }
-  case PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: {
-    // This call doesn't match to HIP as it doesn't have images, but instead
-    // surfaces and textures. No clear call in the HIP API to determine this,
-    // but some searching found as of SM 2.x 128 are supported.
-    return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
-  }
-
-  case PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: {
-    // Take the smaller of maximum surface and maximum texture height.
-    int tex_height = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&tex_height, hipDeviceAttributeMaxTexture2DHeight,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(tex_height >= 0);
-    int surf_height = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&surf_height,
-                              hipDeviceAttributeMaxTexture2DHeight,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(surf_height >= 0);
-
-    int min = std::min(tex_height, surf_height);
-
-    return getInfo(param_value_size, param_value, param_value_size_ret, min);
-  }
-  case PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH: {
-    // Take the smaller of maximum surface and maximum texture width.
-    int tex_width = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture2DWidth,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(tex_width >= 0);
-    int surf_width = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture2DWidth,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(surf_width >= 0);
-
-    int min = std::min(tex_width, surf_width);
-
-    return getInfo(param_value_size, param_value, param_value_size_ret, min);
-  }
-  case PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: {
-    // Take the smaller of maximum surface and maximum texture height.
-    int tex_height = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&tex_height, hipDeviceAttributeMaxTexture3DHeight,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(tex_height >= 0);
-    int surf_height = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&surf_height,
-                              hipDeviceAttributeMaxTexture3DHeight,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(surf_height >= 0);
-
-    int min = std::min(tex_height, surf_height);
-
-    return getInfo(param_value_size, param_value, param_value_size_ret, min);
-  }
-  case PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH: {
-    // Take the smaller of maximum surface and maximum texture width.
-    int tex_width = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture3DWidth,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(tex_width >= 0);
-    int surf_width = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture3DWidth,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(surf_width >= 0);
-
-    int min = std::min(tex_width, surf_width);
-
-    return getInfo(param_value_size, param_value, param_value_size_ret, min);
-  }
-  case PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH: {
-    // Take the smaller of maximum surface and maximum texture depth.
-    int tex_depth = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&tex_depth, hipDeviceAttributeMaxTexture3DDepth,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(tex_depth >= 0);
-    int surf_depth = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&surf_depth, hipDeviceAttributeMaxTexture3DDepth,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(surf_depth >= 0);
-
-    int min = std::min(tex_depth, surf_depth);
-
-    return getInfo(param_value_size, param_value, param_value_size_ret, min);
-  }
-  case PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: {
-    // Take the smaller of maximum surface and maximum texture width.
-    int tex_width = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture1DWidth,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(tex_width >= 0);
-    int surf_width = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture1DWidth,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(surf_width >= 0);
-
-    int min = std::min(tex_width, surf_width);
-
-    return getInfo(param_value_size, param_value, param_value_size_ret, min);
-  }
-  case PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   size_t(0));
-  }
-  case PI_DEVICE_INFO_MAX_SAMPLERS: {
-    // This call is kind of meaningless for HIP, as samplers don't exist.
-    // Closest thing is textures, which is 128.
-    return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
-  }
-  case PI_DEVICE_INFO_MAX_PARAMETER_SIZE: {
-    // __global__ function parameters are passed to the device via constant
-    // memory and are limited to 4 KB.
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   size_t{4000u});
-  }
-  case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: {
-    int mem_base_addr_align = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&mem_base_addr_align,
-                              hipDeviceAttributeTextureAlignment,
-                              device->get()) == hipSuccess);
-    // Multiply by 8 as clGetDeviceInfo returns this value in bits
-    mem_base_addr_align *= 8;
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   mem_base_addr_align);
-  }
-  case PI_DEVICE_INFO_HALF_FP_CONFIG: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
-  }
-  case PI_DEVICE_INFO_SINGLE_FP_CONFIG: {
-    auto config = PI_FP_DENORM | PI_FP_INF_NAN | PI_FP_ROUND_TO_NEAREST |
-                  PI_FP_ROUND_TO_ZERO | PI_FP_ROUND_TO_INF | PI_FP_FMA |
-                  PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
-    return getInfo(param_value_size, param_value, param_value_size_ret, config);
-  }
-  case PI_DEVICE_INFO_DOUBLE_FP_CONFIG: {
-    auto config = PI_FP_DENORM | PI_FP_INF_NAN | PI_FP_ROUND_TO_NEAREST |
-                  PI_FP_ROUND_TO_ZERO | PI_FP_ROUND_TO_INF | PI_FP_FMA;
-    return getInfo(param_value_size, param_value, param_value_size_ret, config);
-  }
-  case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE);
-  }
-  case PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: {
-    // The value is dohipmented for all existing GPUs in the HIP programming
-    // guidelines, section "H.3.2. Global Memory".
-    return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
-  }
-  case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: {
-    int cache_size = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&cache_size, hipDeviceAttributeL2CacheSize,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(cache_size >= 0);
-    // The L2 cache is global to the GPU.
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   pi_uint64(cache_size));
-  }
-  case PI_DEVICE_INFO_GLOBAL_MEM_SIZE: {
-    size_t bytes = 0;
-    // Runtime API has easy access to this value, driver API info is scarse.
-    sycl::detail::pi::assertion(hipDeviceTotalMem(&bytes, device->get()) ==
-                                hipSuccess);
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   pi_uint64{bytes});
-  }
-  case PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: {
-    unsigned int constant_memory = 0;
-
-    // hipDeviceGetAttribute takes a int*, however the size of the constant
-    // memory on AMD GPU may be larger than what can fit in the positive part
-    // of a signed integer, so use an unsigned integer and cast the pointer to
-    // int*.
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(reinterpret_cast<int *>(&constant_memory),
-                              hipDeviceAttributeTotalConstantMemory,
-                              device->get()) == hipSuccess);
-
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   pi_uint64(constant_memory));
-  }
-  case PI_DEVICE_INFO_MAX_CONSTANT_ARGS: {
-    // TODO: is there a way to retrieve this from HIP driver API?
-    // Hard coded to value returned by clinfo for OpenCL 1.2 HIP | GeForce GTX
-    // 1060 3GB
-    return getInfo(param_value_size, param_value, param_value_size_ret, 9u);
-  }
-  case PI_DEVICE_INFO_LOCAL_MEM_TYPE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   PI_DEVICE_LOCAL_MEM_TYPE_LOCAL);
-  }
-  case PI_DEVICE_INFO_LOCAL_MEM_SIZE: {
-    // OpenCL's "local memory" maps most closely to HIP's "shared memory".
-    // HIP has its own definition of "local memory", which maps to OpenCL's
-    // "private memory".
-    int local_mem_size = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&local_mem_size,
-                              hipDeviceAttributeMaxSharedMemoryPerBlock,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(local_mem_size >= 0);
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   pi_uint64(local_mem_size));
-  }
-  case PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: {
-    int ecc_enabled = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&ecc_enabled, hipDeviceAttributeEccEnabled,
-                              device->get()) == hipSuccess);
-
-    sycl::detail::pi::assertion((ecc_enabled == 0) | (ecc_enabled == 1));
-    auto result = static_cast<pi_bool>(ecc_enabled);
-    return getInfo(param_value_size, param_value, param_value_size_ret, result);
-  }
-  case PI_DEVICE_INFO_HOST_UNIFIED_MEMORY: {
-    int is_integrated = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&is_integrated, hipDeviceAttributeIntegrated,
-                              device->get()) == hipSuccess);
-
-    sycl::detail::pi::assertion((is_integrated == 0) | (is_integrated == 1));
-    auto result = static_cast<pi_bool>(is_integrated);
-    return getInfo(param_value_size, param_value, param_value_size_ret, result);
-  }
-  case PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: {
-    // Hard coded to value returned by clinfo for OpenCL 1.2 HIP | GeForce GTX
-    // 1060 3GB
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   size_t{1000u});
-  }
-  case PI_DEVICE_INFO_ENDIAN_LITTLE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   PI_TRUE);
-  }
-  case PI_DEVICE_INFO_AVAILABLE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   PI_TRUE);
-  }
-  case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   PI_TRUE);
-  }
-  case PI_DEVICE_INFO_COMPILER_AVAILABLE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   PI_TRUE);
-  }
-  case PI_DEVICE_INFO_LINKER_AVAILABLE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   PI_TRUE);
-  }
-  case PI_DEVICE_INFO_EXECUTION_CAPABILITIES: {
-    auto capability = PI_DEVICE_EXEC_CAPABILITIES_KERNEL;
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   capability);
-  }
-  case PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: {
-    // The mandated minimum capability:
-    auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE |
-                      PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   capability);
-  }
-  case PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: {
-    // The mandated minimum capability:
-    auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE;
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   capability);
-  }
-  case PI_DEVICE_INFO_BUILT_IN_KERNELS: {
-    // An empty string is returned if no built-in kernels are supported by the
-    // device.
-    return getInfo(param_value_size, param_value, param_value_size_ret, "");
-  }
-  case PI_DEVICE_INFO_PLATFORM: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   device->get_platform());
-  }
-  case PI_DEVICE_INFO_NAME: {
-    static constexpr size_t MAX_DEVICE_NAME_LENGTH = 256u;
-    char name[MAX_DEVICE_NAME_LENGTH];
-    sycl::detail::pi::assertion(hipDeviceGetName(name, MAX_DEVICE_NAME_LENGTH,
-                                                 device->get()) == hipSuccess);
-
-    // On AMD GPUs hipDeviceGetName returns an empty string, so return the arch
-    // name instead, this is also what AMD OpenCL devices return.
-    if (strlen(name) == 0) {
-      hipDeviceProp_t props;
-      sycl::detail::pi::assertion(
-          hipGetDeviceProperties(&props, device->get()) == hipSuccess);
-
-      return getInfoArray(strlen(props.gcnArchName) + 1, param_value_size,
-                          param_value, param_value_size_ret, props.gcnArchName);
-    }
-    return getInfoArray(strlen(name) + 1, param_value_size, param_value,
-                        param_value_size_ret, name);
-  }
-  case PI_DEVICE_INFO_VENDOR: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   "AMD Corporation");
-  }
-  case PI_DEVICE_INFO_DRIVER_VERSION: {
-    auto version = getHipVersionString();
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   version.c_str());
-  }
-  case PI_DEVICE_INFO_PROFILE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, "HIP");
-  }
-  case PI_DEVICE_INFO_REFERENCE_COUNT: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   device->get_reference_count());
-  }
-  case PI_DEVICE_INFO_VERSION: {
-    std::stringstream s;
-
-    hipDeviceProp_t props;
-    sycl::detail::pi::assertion(hipGetDeviceProperties(&props, device->get()) ==
-                                hipSuccess);
-#if defined(__HIP_PLATFORM_NVIDIA__)
-    s << props.major << "." << props.minor;
-#elif defined(__HIP_PLATFORM_AMD__)
-    s << props.gcnArchName;
-#else
-#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
-#endif
-
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   s.str().c_str());
-  }
-  case PI_DEVICE_INFO_OPENCL_C_VERSION: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, "");
-  }
-  case PI_DEVICE_INFO_BACKEND_VERSION: {
-    // TODO: return some meaningful for backend_version below
-    return getInfo(param_value_size, param_value, param_value_size_ret, "");
-  }
-  case PI_DEVICE_INFO_EXTENSIONS: {
-    // TODO: Remove comment when HIP support native asserts.
-    // DEVICELIB_ASSERT extension is set so fallback assert
-    // postprocessing is NOP. HIP 4.3 docs indicate support for
-    // native asserts are in progress
-    std::string SupportedExtensions = "";
-    SupportedExtensions += PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT;
-    SupportedExtensions += " ";
-
-    hipDeviceProp_t props;
-    sycl::detail::pi::assertion(hipGetDeviceProperties(&props, device->get()) ==
-                                hipSuccess);
-    if (props.arch.hasDoubles) {
-      SupportedExtensions += "cl_khr_fp64 ";
-    }
-
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   SupportedExtensions.c_str());
-  }
-  case PI_DEVICE_INFO_PRINTF_BUFFER_SIZE: {
-    // The minimum value for the FULL profile is 1 MB.
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   size_t{1024u});
-  }
-  case PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   PI_TRUE);
-  }
-  case PI_DEVICE_INFO_PARENT_DEVICE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   nullptr);
-  }
-  case PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
-  }
-  case PI_DEVICE_INFO_PARTITION_PROPERTIES: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   static_cast<pi_device_partition_property>(0u));
-  }
-  case PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
-  }
-  case PI_DEVICE_INFO_PARTITION_TYPE: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   static_cast<pi_device_partition_property>(0u));
-  }
-
-    // Intel USM extensions
-
-  case PI_DEVICE_INFO_USM_HOST_SUPPORT: {
-    // from cl_intel_unified_shared_memory: "The host memory access capabilities
-    // apply to any host allocation."
-    //
-    // query if/how the device can access page-locked host memory, possibly
-    // through PCIe, using the same pointer as the host
-    pi_bitfield value = {};
-    // if (getAttribute(device, HIP_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING)) {
-    // the device shares a unified address space with the host
-    if (getAttribute(device, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
-      // compute capability 6.x introduces operations that are atomic with
-      // respect to other CPUs and GPUs in the system
-      value = PI_USM_ACCESS | PI_USM_ATOMIC_ACCESS | PI_USM_CONCURRENT_ACCESS |
-              PI_USM_CONCURRENT_ATOMIC_ACCESS;
-    } else {
-      // on GPU architectures with compute capability lower than 6.x, atomic
-      // operations from the GPU to CPU memory will not be atomic with respect
-      // to CPU initiated atomic operations
-      value = PI_USM_ACCESS | PI_USM_CONCURRENT_ACCESS;
-    }
-    //}
-    return getInfo(param_value_size, param_value, param_value_size_ret, value);
-  }
-  case PI_DEVICE_INFO_USM_DEVICE_SUPPORT: {
-    // from cl_intel_unified_shared_memory:
-    // "The device memory access capabilities apply to any device allocation
-    // associated with this device."
-    //
-    // query how the device can access memory allocated on the device itself (?)
-    pi_bitfield value = PI_USM_ACCESS | PI_USM_ATOMIC_ACCESS |
-                        PI_USM_CONCURRENT_ACCESS |
-                        PI_USM_CONCURRENT_ATOMIC_ACCESS;
-    return getInfo(param_value_size, param_value, param_value_size_ret, value);
-  }
-  case PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: {
-    // from cl_intel_unified_shared_memory:
-    // "The single device shared memory access capabilities apply to any shared
-    // allocation associated with this device."
-    //
-    // query if/how the device can access managed memory associated to it
-    pi_bitfield value = {};
-    if (getAttribute(device, hipDeviceAttributeManagedMemory)) {
-      // the device can allocate managed memory on this system
-      value = PI_USM_ACCESS | PI_USM_ATOMIC_ACCESS;
-    }
-    if (getAttribute(device, hipDeviceAttributeConcurrentManagedAccess)) {
-      // the device can coherently access managed memory concurrently with the
-      // CPU
-      value |= PI_USM_CONCURRENT_ACCESS;
-      if (getAttribute(device, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
-        // compute capability 6.x introduces operations that are atomic with
-        // respect to other CPUs and GPUs in the system
-        value |= PI_USM_CONCURRENT_ATOMIC_ACCESS;
-      }
-    }
-    return getInfo(param_value_size, param_value, param_value_size_ret, value);
-  }
-  case PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: {
-    // from cl_intel_unified_shared_memory:
-    // "The cross-device shared memory access capabilities apply to any shared
-    // allocation associated with this device, or to any shared memory
-    // allocation on another device that also supports the same cross-device
-    // shared memory access capability."
-    //
-    // query if/how the device can access managed memory associated to other
-    // devices
-    pi_bitfield value = {};
-    if (getAttribute(device, hipDeviceAttributeManagedMemory)) {
-      // the device can allocate managed memory on this system
-      value |= PI_USM_ACCESS;
-    }
-    if (getAttribute(device, hipDeviceAttributeConcurrentManagedAccess)) {
-      // all devices with the CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS
-      // attribute can coherently access managed memory concurrently with the
-      // CPU
-      value |= PI_USM_CONCURRENT_ACCESS;
-    }
-    if (getAttribute(device, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
-      // compute capability 6.x introduces operations that are atomic with
-      // respect to other CPUs and GPUs in the system
-      if (value & PI_USM_ACCESS)
-        value |= PI_USM_ATOMIC_ACCESS;
-      if (value & PI_USM_CONCURRENT_ACCESS)
-        value |= PI_USM_CONCURRENT_ATOMIC_ACCESS;
-    }
-    return getInfo(param_value_size, param_value, param_value_size_ret, value);
-  }
-  case PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: {
-    // from cl_intel_unified_shared_memory:
-    // "The shared system memory access capabilities apply to any allocations
-    // made by a system allocator, such as malloc or new."
-    //
-    // query if/how the device can access pageable host memory allocated by the
-    // system allocator
-    pi_bitfield value = {};
-    if (getAttribute(device, hipDeviceAttributePageableMemoryAccess)) {
-      // the link between the device and the host does not support native
-      // atomic operations
-      value = PI_USM_ACCESS | PI_USM_CONCURRENT_ACCESS;
-    }
-    return getInfo(param_value_size, param_value, param_value_size_ret, value);
-  }
-
-  case PI_DEVICE_INFO_ATOMIC_64: {
-    // TODO: Reconsider it when AMD supports SYCL_USE_NATIVE_FP_ATOMICS.
-    hipDeviceProp_t props;
-    sycl::detail::pi::assertion(hipGetDeviceProperties(&props, device->get()) ==
-                                hipSuccess);
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   props.arch.hasGlobalInt64Atomics &&
-                       props.arch.hasSharedInt64Atomics);
-  }
-
-  case PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY: {
-    size_t FreeMemory = 0;
-    size_t TotalMemory = 0;
-    sycl::detail::pi::assertion(hipMemGetInfo(&FreeMemory, &TotalMemory) ==
-                                    hipSuccess,
-                                "failed hipMemGetInfo() API.");
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   FreeMemory);
-  }
-
-  case PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE: {
-    int value = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&value, hipDeviceAttributeMemoryClockRate,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(value >= 0);
-    // Convert kilohertz to megahertz when returning.
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   value / 1000);
-  }
-
-  case PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH: {
-    int value = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&value, hipDeviceAttributeMemoryBusWidth,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(value >= 0);
-    return getInfo(param_value_size, param_value, param_value_size_ret, value);
-  }
-  case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   pi_int32{1});
-  }
-
-  case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
-    pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED |
-                                                PI_MEMORY_ORDER_ACQUIRE |
-                                                PI_MEMORY_ORDER_RELEASE;
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   capabilities);
-  }
-  case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
-  case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
-    // SYCL2020 4.6.4.2 minimum mandated capabilities for
-    // atomic_fence/memory_scope_capabilities.
-    // Because scopes are hierarchical, wider scopes support all narrower
-    // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
-    // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
-    pi_memory_scope_capabilities capabilities = PI_MEMORY_SCOPE_WORK_ITEM |
-                                                PI_MEMORY_SCOPE_SUB_GROUP |
-                                                PI_MEMORY_SCOPE_WORK_GROUP;
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   capabilities);
-  }
-  case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: {
-    // SYCL2020 4.6.4.2 minimum mandated capabilities for
-    // atomic_fence_order_capabilities.
-    pi_memory_order_capabilities capabilities =
-        PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
-        PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL;
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   capabilities);
-  }
-
-  case PI_DEVICE_INFO_DEVICE_ID: {
-    int value = 0;
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&value, hipDeviceAttributePciDeviceId,
-                              device->get()) == hipSuccess);
-    sycl::detail::pi::assertion(value >= 0);
-    return getInfo(param_value_size, param_value, param_value_size_ret, value);
-  }
-
-  case PI_DEVICE_INFO_UUID: {
-#if ((HIP_VERSION_MAJOR == 5 && HIP_VERSION_MINOR >= 2) ||                     \
-     HIP_VERSION_MAJOR > 5)
-    hipUUID uuid = {};
-    // Supported since 5.2+
-    sycl::detail::pi::assertion(hipDeviceGetUuid(&uuid, device->get()) ==
-                                hipSuccess);
-    std::array<unsigned char, 16> name;
-    std::copy(uuid.bytes, uuid.bytes + 16, name.begin());
-    return getInfoArray(16, param_value_size, param_value, param_value_size_ret,
-                        name.data());
-#endif
-    return PI_ERROR_INVALID_VALUE;
-  }
-  case PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT: {
-    // The mem-channel buffer property is not supported on HIP devices.
-    return getInfo<pi_bool>(param_value_size, param_value, param_value_size_ret,
-                            false);
-  }
-  case PI_DEVICE_INFO_IMAGE_SRGB: {
-    // The sRGB images are not supported on HIP device.
-    return getInfo<pi_bool>(param_value_size, param_value, param_value_size_ret,
-                            false);
-  }
-
-  case PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: {
-    // Maximum number of 32-bit registers available to a thread block.
-    // Note: This number is shared by all thread blocks simultaneously resident
-    // on a multiprocessor.
-    int max_registers{-1};
-    sycl::detail::pi::assertion(
-        hipDeviceGetAttribute(&max_registers,
-                              hipDeviceAttributeMaxRegistersPerBlock,
-                              device->get()) == hipSuccess);
-
-    sycl::detail::pi::assertion(max_registers >= 0);
-
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   static_cast<uint32_t>(max_registers));
-  }
-
-  case PI_DEVICE_INFO_PCI_ADDRESS: {
-    constexpr size_t AddressBufferSize = 13;
-    char AddressBuffer[AddressBufferSize];
-    sycl::detail::pi::assertion(
-        hipDeviceGetPCIBusId(AddressBuffer, AddressBufferSize, device->get()) ==
-        hipSuccess);
-    // A typical PCI address is 12 bytes + \0: "1234:67:90.2", but the HIP API
-    // is not guaranteed to use this format. In practice, it uses this format,
-    // at least in 5.3-5.5. To be on the safe side, we make sure the terminating
-    // \0 is set.
-    AddressBuffer[AddressBufferSize - 1] = '\0';
-    sycl::detail::pi::assertion(strnlen(AddressBuffer, AddressBufferSize) > 0);
-    return getInfoArray(strnlen(AddressBuffer, AddressBufferSize - 1) + 1,
-                        param_value_size, param_value, param_value_size_ret,
-                        AddressBuffer);
-  }
-  // TODO: Investigate if this information is available on HIP.
-  case PI_DEVICE_INFO_GPU_EU_COUNT:
-  case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
-  case PI_DEVICE_INFO_GPU_SLICES:
-  case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
-  case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
-  case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
-  case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH:
-  case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS:
-  case PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER:
-    setErrorMessage("HIP backend does not support this query",
-                    PI_ERROR_INVALID_ARG_VALUE);
-    return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
-
-  default:
-    __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-  }
-  sycl::detail::pi::die("Device info request not implemented");
-  return {};
-}
-
-/// Gets the native HIP handle of a PI device object
-///
-/// \param[in] device The PI device to get the native HIP object of.
-/// \param[out] nativeHandle Set to the native handle of the PI device object.
-///
-/// \return PI_SUCCESS
-pi_result hip_piextDeviceGetNativeHandle(pi_device device,
-                                         pi_native_handle *nativeHandle) {
-  *nativeHandle = static_cast<pi_native_handle>(device->get());
-  return PI_SUCCESS;
-}
-
-/// Created a PI device object from a HIP device handle.
-/// TODO: Implement this.
-/// NOTE: The created PI object takes ownership of the native handle.
-///
-/// \param[in] nativeHandle The native handle to create PI device object from.
-/// \param[in] platform is the PI platform of the device.
-/// \param[out] device Set to the PI device object created from native handle.
-///
-/// \return TBD
-pi_result hip_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle,
-                                                pi_platform platform,
-                                                pi_device *device) {
-  (void)nativeHandle;
-  (void)platform;
-  (void)device;
-  sycl::detail::pi::die(
-      "Creation of PI device from native handle not implemented");
-  return {};
-}
-
-/* Context APIs */
-
-/// Create a PI HIP context.
-///
-/// By default creates a scoped context and keeps the last active HIP context
-/// on top of the HIP context stack.
-/// With the __SYCL_PI_CONTEXT_PROPERTIES_HIP_PRIMARY key/id and a value of
-/// PI_TRUE creates a primary HIP context and activates it on the HIP context
-/// stack.
-///
-/// \param[in] properties 0 terminated array of key/id-value combinations. Can
-/// be nullptr. Only accepts property key/id
-/// __SYCL_PI_CONTEXT_PROPERTIES_HIP_PRIMARY with a pi_bool value.
-/// \param[in] num_devices Number of devices to create the context for.
-/// \param[in] devices Devices to create the context for.
-/// \param[in] pfn_notify Callback, currently unused.
-/// \param[in] user_data User data for callback.
-/// \param[out] retcontext Set to created context on success.
-///
-/// \return PI_SUCCESS on success, otherwise an error return code.
-pi_result hip_piContextCreate(
-    const pi_context_properties *properties,
-    [[maybe_unused]] pi_uint32 num_devices, const pi_device *devices,
-    [[maybe_unused]] void (*pfn_notify)(const char *errinfo,
-                                        const void *private_info, size_t cb,
-                                        [[maybe_unused]] void *user_data),
-    [[maybe_unused]] void *user_data, pi_context *retcontext) {
-
-  assert(devices != nullptr);
-  // TODO: How to implement context callback?
-  assert(pfn_notify == nullptr);
-  assert(user_data == nullptr);
-  assert(num_devices == 1);
-  // Need input context
-  assert(retcontext != nullptr);
-  pi_result errcode_ret = PI_SUCCESS;
-
-  // Parse properties.
-  bool property_hip_primary = false;
-  while (properties && (0 != *properties)) {
-    // Consume property ID.
-    pi_context_properties id = *properties;
-    ++properties;
-    // Consume property value.
-    pi_context_properties value = *properties;
-    ++properties;
-    switch (id) {
-    case __SYCL_PI_CONTEXT_PROPERTIES_HIP_PRIMARY:
-      assert(value == PI_FALSE || value == PI_TRUE);
-      property_hip_primary = static_cast<bool>(value);
-      break;
-    default:
-      // Unknown property.
-      sycl::detail::pi::die(
-          "Unknown piContextCreate property in property list");
-      return PI_ERROR_INVALID_VALUE;
-    }
-  }
-
-  std::unique_ptr<_pi_context> piContextPtr{nullptr};
-  try {
-    hipCtx_t current = nullptr;
-
-    if (property_hip_primary) {
-      // Use the HIP primary context and assume that we want to use it
-      // immediately as we want to forge context switches.
-      hipCtx_t Ctxt;
-      errcode_ret =
-          PI_CHECK_ERROR(hipDevicePrimaryCtxRetain(&Ctxt, devices[0]->get()));
-      piContextPtr = std::unique_ptr<_pi_context>(
-          new _pi_context{_pi_context::kind::primary, Ctxt, *devices});
-      errcode_ret = PI_CHECK_ERROR(hipCtxPushCurrent(Ctxt));
-    } else {
-      // Create a scoped context.
-      hipCtx_t newContext;
-      PI_CHECK_ERROR(hipCtxGetCurrent(&current));
-      errcode_ret = PI_CHECK_ERROR(
-          hipCtxCreate(&newContext, hipDeviceMapHost, devices[0]->get()));
-      piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{
-          _pi_context::kind::user_defined, newContext, *devices});
-    }
-
-    static std::once_flag initFlag;
-    std::call_once(
-        initFlag,
-        [](pi_result &) {
-          // Use default stream to record base event counter
-          PI_CHECK_ERROR(
-              hipEventCreateWithFlags(&_pi_platform::evBase_, hipEventDefault));
-          PI_CHECK_ERROR(hipEventRecord(_pi_platform::evBase_, 0));
-        },
-        errcode_ret);
-
-    // For non-primary scoped contexts keep the last active on top of the stack
-    // as `cuCtxCreate` replaces it implicitly otherwise.
-    // Primary contexts are kept on top of the stack, so the previous context
-    // is not queried and therefore not recovered.
-    if (current != nullptr) {
-      PI_CHECK_ERROR(hipCtxSetCurrent(current));
-    }
-
-    *retcontext = piContextPtr.release();
-  } catch (pi_result err) {
-    errcode_ret = err;
-  } catch (...) {
-    errcode_ret = PI_ERROR_OUT_OF_RESOURCES;
-  }
-  return errcode_ret;
-}
-
-pi_result hip_piContextRelease(pi_context ctxt) {
-
-  assert(ctxt != nullptr);
-
-  if (ctxt->decrement_reference_count() > 0) {
-    return PI_SUCCESS;
-  }
-  ctxt->invoke_extended_deleters();
-
-  std::unique_ptr<_pi_context> context{ctxt};
-
-  if (!ctxt->is_primary()) {
-    hipCtx_t hipCtxt = ctxt->get();
-    // hipCtxSynchronize is not supported for AMD platform so we can just
-    // destroy the context, for NVIDIA make sure it's synchronized.
-#if defined(__HIP_PLATFORM_NVIDIA__)
-    hipCtx_t current = nullptr;
-    PI_CHECK_ERROR(hipCtxGetCurrent(&current));
-    if (hipCtxt != current) {
-      PI_CHECK_ERROR(hipCtxPushCurrent(hipCtxt));
-    }
-    PI_CHECK_ERROR(hipCtxSynchronize());
-    PI_CHECK_ERROR(hipCtxGetCurrent(&current));
-    if (hipCtxt == current) {
-      PI_CHECK_ERROR(hipCtxPopCurrent(&current));
-    }
-#endif
-    return PI_CHECK_ERROR(hipCtxDestroy(hipCtxt));
-  } else {
-    // Primary context is not destroyed, but released
-    hipDevice_t hipDev = ctxt->get_device()->get();
-    hipCtx_t current;
-    PI_CHECK_ERROR(hipCtxPopCurrent(&current));
-    return PI_CHECK_ERROR(hipDevicePrimaryCtxRelease(hipDev));
-  }
-
-  hipCtx_t hipCtxt = ctxt->get();
-  return PI_CHECK_ERROR(hipCtxDestroy(hipCtxt));
-}
-
-/// Gets the native HIP handle of a PI context object
-///
-/// \param[in] context The PI context to get the native HIP object of.
-/// \param[out] nativeHandle Set to the native handle of the PI context object.
-///
-/// \return PI_SUCCESS
-pi_result hip_piextContextGetNativeHandle(pi_context context,
-                                          pi_native_handle *nativeHandle) {
-  *nativeHandle = reinterpret_cast<pi_native_handle>(context->get());
-  return PI_SUCCESS;
-}
-
-/// Created a PI context object from a HIP context handle.
-/// TODO: Implement this.
-/// NOTE: The created PI object takes ownership of the native handle.
-///
-/// \param[in] nativeHandle The native handle to create PI context object from.
-/// \param[out] context Set to the PI context object created from native handle.
-///
-/// \return TBD
-pi_result hip_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle,
-                                                 pi_uint32 num_devices,
-                                                 const pi_device *devices,
-                                                 bool ownNativeHandle,
-                                                 pi_context *context) {
-  (void)nativeHandle;
-  (void)num_devices;
-  (void)devices;
-  (void)ownNativeHandle;
-  (void)context;
-  sycl::detail::pi::die(
-      "Creation of PI context from native handle not implemented");
-  return {};
-}
-
-/// Creates a PI Memory object using a HIP memory allocation.
-/// Can trigger a manual copy depending on the mode.
-/// \TODO Implement USE_HOST_PTR using cuHostRegister
-///
-pi_result
-hip_piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size,
-                      void *host_ptr, pi_mem *ret_mem,
-                      [[maybe_unused]] const pi_mem_properties *properties) {
-  // Need input memory object
-  assert(ret_mem != nullptr);
-  assert((properties == nullptr || *properties == 0) &&
-         "no mem properties goes to HIP RT yet");
-  // Currently, USE_HOST_PTR is not implemented using host register
-  // since this triggers a weird segfault after program ends.
-  // Setting this constant to true enables testing that behavior.
-  const bool enableUseHostPtr = false;
-  const bool performInitialCopy =
-      (flags & PI_MEM_FLAGS_HOST_PTR_COPY) ||
-      ((flags & PI_MEM_FLAGS_HOST_PTR_USE) && !enableUseHostPtr);
-  pi_result retErr = PI_SUCCESS;
-  pi_mem retMemObj = nullptr;
-
-  try {
-    ScopedContext active(context);
-    void *ptr;
-    _pi_mem::mem_::buffer_mem_::alloc_mode allocMode =
-        _pi_mem::mem_::buffer_mem_::alloc_mode::classic;
-
-    if ((flags & PI_MEM_FLAGS_HOST_PTR_USE) && enableUseHostPtr) {
-      retErr = PI_CHECK_ERROR(
-          hipHostRegister(host_ptr, size, hipHostRegisterMapped));
-      retErr = PI_CHECK_ERROR(hipHostGetDevicePointer(&ptr, host_ptr, 0));
-      allocMode = _pi_mem::mem_::buffer_mem_::alloc_mode::use_host_ptr;
-    } else if (flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) {
-      retErr = PI_CHECK_ERROR(hipHostMalloc(&host_ptr, size));
-      retErr = PI_CHECK_ERROR(hipHostGetDevicePointer(&ptr, host_ptr, 0));
-      allocMode = _pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr;
-    } else {
-      retErr = PI_CHECK_ERROR(hipMalloc(&ptr, size));
-      if (flags & PI_MEM_FLAGS_HOST_PTR_COPY) {
-        allocMode = _pi_mem::mem_::buffer_mem_::alloc_mode::copy_in;
-      }
-    }
-
-    if (retErr == PI_SUCCESS) {
-      pi_mem parentBuffer = nullptr;
-
-      auto devPtr =
-          reinterpret_cast<_pi_mem::mem_::mem_::buffer_mem_::native_type>(ptr);
-      auto piMemObj = std::unique_ptr<_pi_mem>(new _pi_mem{
-          context, parentBuffer, allocMode, devPtr, host_ptr, size});
-      if (piMemObj != nullptr) {
-        retMemObj = piMemObj.release();
-        if (performInitialCopy) {
-          // Operates on the default stream of the current HIP context.
-          retErr = PI_CHECK_ERROR(hipMemcpyHtoD(devPtr, host_ptr, size));
-          // Synchronize with default stream implicitly used by cuMemcpyHtoD
-          // to make buffer data available on device before any other PI call
-          // uses it.
-          if (retErr == PI_SUCCESS) {
-            hipStream_t defaultStream = 0;
-            retErr = PI_CHECK_ERROR(hipStreamSynchronize(defaultStream));
-          }
-        }
-      } else {
-        retErr = PI_ERROR_OUT_OF_HOST_MEMORY;
-      }
-    }
-  } catch (pi_result err) {
-    retErr = err;
-  } catch (...) {
-    retErr = PI_ERROR_OUT_OF_RESOURCES;
-  }
-
-  *ret_mem = retMemObj;
-
-  return retErr;
-}
-
-/// Decreases the reference count of the Mem object.
-/// If this is zero, calls the relevant HIP Free function
-/// \return PI_SUCCESS unless deallocation error
-///
-pi_result hip_piMemRelease(pi_mem memObj) {
-  assert((memObj != nullptr) && "PI_ERROR_INVALID_MEM_OBJECTS");
-
-  pi_result ret = PI_SUCCESS;
-
-  try {
-
-    // Do nothing if there are other references
-    if (memObj->decrement_reference_count() > 0) {
-      return PI_SUCCESS;
-    }
-
-    // make sure memObj is released in case PI_CHECK_ERROR throws
-    std::unique_ptr<_pi_mem> uniqueMemObj(memObj);
-
-    if (memObj->is_sub_buffer()) {
-      return PI_SUCCESS;
-    }
-
-    ScopedContext active(uniqueMemObj->get_context());
-
-    if (memObj->mem_type_ == _pi_mem::mem_type::buffer) {
-      switch (uniqueMemObj->mem_.buffer_mem_.allocMode_) {
-      case _pi_mem::mem_::buffer_mem_::alloc_mode::copy_in:
-      case _pi_mem::mem_::buffer_mem_::alloc_mode::classic:
-        ret = PI_CHECK_ERROR(
-            hipFree((void *)uniqueMemObj->mem_.buffer_mem_.ptr_));
-        break;
-      case _pi_mem::mem_::buffer_mem_::alloc_mode::use_host_ptr:
-        ret = PI_CHECK_ERROR(
-            hipHostUnregister(uniqueMemObj->mem_.buffer_mem_.hostPtr_));
-        break;
-      case _pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr:
-        ret = PI_CHECK_ERROR(
-            hipFreeHost(uniqueMemObj->mem_.buffer_mem_.hostPtr_));
-      };
-    }
-
-    else if (memObj->mem_type_ == _pi_mem::mem_type::surface) {
-      ret = PI_CHECK_ERROR(hipDestroySurfaceObject(
-          uniqueMemObj->mem_.surface_mem_.get_surface()));
-      auto array = uniqueMemObj->mem_.surface_mem_.get_array();
-      ret = PI_CHECK_ERROR(hipFreeArray(array));
-    }
-
-  } catch (pi_result err) {
-    ret = err;
-  } catch (...) {
-    ret = PI_ERROR_OUT_OF_RESOURCES;
-  }
-
-  if (ret != PI_SUCCESS) {
-    // A reported HIP error is either an implementation or an asynchronous HIP
-    // error for which it is unclear if the function that reported it succeeded
-    // or not. Either way, the state of the program is compromised and likely
-    // unrecoverable.
-    sycl::detail::pi::die(
-        "Unrecoverable program state reached in hip_piMemRelease");
-  }
-
-  return PI_SUCCESS;
-}
-
-/// Implements a buffer partition in the HIP backend.
-/// A buffer partition (or a sub-buffer, in OpenCL terms) is simply implemented
-/// as an offset over an existing HIP allocation.
-///
-pi_result hip_piMemBufferPartition(
-    pi_mem parent_buffer, pi_mem_flags flags,
-    [[maybe_unused]] pi_buffer_create_type buffer_create_type,
-    void *buffer_create_info, pi_mem *memObj) {
-  assert((parent_buffer != nullptr) && "PI_ERROR_INVALID_MEM_OBJECT");
-  assert(parent_buffer->is_buffer() && "PI_ERROR_INVALID_MEM_OBJECTS");
-  assert(!parent_buffer->is_sub_buffer() && "PI_ERROR_INVALID_MEM_OBJECT");
-
-  // Default value for flags means PI_MEM_FLAGS_ACCCESS_RW.
-  if (flags == 0) {
-    flags = PI_MEM_FLAGS_ACCESS_RW;
-  }
-
-  assert((flags == PI_MEM_FLAGS_ACCESS_RW) && "PI_ERROR_INVALID_VALUE");
-  assert((buffer_create_type == PI_BUFFER_CREATE_TYPE_REGION) &&
-         "PI_ERROR_INVALID_VALUE");
-  assert((buffer_create_info != nullptr) && "PI_ERROR_INVALID_VALUE");
-  assert(memObj != nullptr);
-
-  const auto bufferRegion =
-      *reinterpret_cast<pi_buffer_region>(buffer_create_info);
-  assert((bufferRegion.size != 0u) && "PI_ERROR_INVALID_BUFFER_SIZE");
-
-  assert((bufferRegion.origin <= (bufferRegion.origin + bufferRegion.size)) &&
-         "Overflow");
-  assert(((bufferRegion.origin + bufferRegion.size) <=
-          parent_buffer->mem_.buffer_mem_.get_size()) &&
-         "PI_ERROR_INVALID_BUFFER_SIZE");
-  // Retained indirectly due to retaining parent buffer below.
-  pi_context context = parent_buffer->context_;
-  _pi_mem::mem_::buffer_mem_::alloc_mode allocMode =
-      _pi_mem::mem_::buffer_mem_::alloc_mode::classic;
-
-  assert(parent_buffer->mem_.buffer_mem_.ptr_ !=
-         _pi_mem::mem_::buffer_mem_::native_type{0});
-  _pi_mem::mem_::buffer_mem_::native_type ptr =
-      parent_buffer->mem_.buffer_mem_.get_with_offset(bufferRegion.origin);
-
-  void *hostPtr = nullptr;
-  if (parent_buffer->mem_.buffer_mem_.hostPtr_) {
-    hostPtr = static_cast<char *>(parent_buffer->mem_.buffer_mem_.hostPtr_) +
-              bufferRegion.origin;
-  }
-
-  ReleaseGuard<pi_mem> releaseGuard(parent_buffer);
-
-  std::unique_ptr<_pi_mem> retMemObj{nullptr};
-  try {
-    ScopedContext active(context);
-
-    retMemObj = std::unique_ptr<_pi_mem>{new _pi_mem{
-        context, parent_buffer, allocMode, ptr, hostPtr, bufferRegion.size}};
-  } catch (pi_result err) {
-    *memObj = nullptr;
-    return err;
-  } catch (...) {
-    *memObj = nullptr;
-    return PI_ERROR_OUT_OF_HOST_MEMORY;
-  }
-
-  releaseGuard.dismiss();
-  *memObj = retMemObj.release();
-  return PI_SUCCESS;
-}
-
-pi_result hip_piMemGetInfo(pi_mem memObj, pi_mem_info queriedInfo,
-                           size_t expectedQuerySize, void *queryOutput,
-                           size_t *writtenQuerySize) {
-  (void)memObj;
-  (void)queriedInfo;
-  (void)expectedQuerySize;
-  (void)queryOutput;
-  (void)writtenQuerySize;
-
-  sycl::detail::pi::die("hip_piMemGetInfo not implemented");
-}
-
-/// Gets the native HIP handle of a PI mem object
-///
-/// \param[in] mem The PI mem to get the native HIP object of.
-/// \param[out] nativeHandle Set to the native handle of the PI mem object.
-///
-/// \return PI_SUCCESS
-pi_result hip_piextMemGetNativeHandle(pi_mem mem,
-                                      pi_native_handle *nativeHandle) {
-#if defined(__HIP_PLATFORM_NVIDIA__)
-  if (sizeof(_pi_mem::mem_::buffer_mem_::native_type) >
-      sizeof(pi_native_handle)) {
-    // Check that all the upper bits that cannot be represented by
-    // pi_native_handle are empty.
-    // NOTE: The following shift might trigger a warning, but the check in the
-    // if above makes sure that this does not underflow.
-    _pi_mem::mem_::buffer_mem_::native_type upperBits =
-        mem->mem_.buffer_mem_.get() >> (sizeof(pi_native_handle) * CHAR_BIT);
-    if (upperBits) {
-      // Return an error if any of the remaining bits is non-zero.
-      return PI_ERROR_INVALID_MEM_OBJECT;
-    }
-  }
-  *nativeHandle = static_cast<pi_native_handle>(mem->mem_.buffer_mem_.get());
-#elif defined(__HIP_PLATFORM_AMD__)
-  *nativeHandle =
-      reinterpret_cast<pi_native_handle>(mem->mem_.buffer_mem_.get());
-#else
-#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
-#endif
-  return PI_SUCCESS;
-}
-
-/// Created a PI mem object from a HIP mem handle.
-/// TODO: Implement this.
-/// NOTE: The created PI object takes ownership of the native handle.
-///
-/// \param[in] nativeHandle The native handle to create PI mem object from.
-/// \param[in] context The PI context of the memory allocation.
-/// \param[in] ownNativeHandle Indicates if we own the native memory handle or
-/// it came from interop that asked to not transfer the ownership to SYCL RT.
-/// \param[out] mem Set to the PI mem object created from native handle.
-///
-/// \return TBD
-pi_result hip_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle,
-                                             pi_context context,
-                                             bool ownNativeHandle,
-                                             pi_mem *mem) {
-  (void)nativeHandle;
-  (void)context;
-  (void)ownNativeHandle;
-  (void)mem;
-
-  sycl::detail::pi::die(
-      "Creation of PI mem from native handle not implemented");
-  return {};
-}
-
-/// Created a PI image mem object from a HIP image mem handle.
-/// TODO: Implement this.
-/// NOTE: The created PI object takes ownership of the native handle.
-///
-/// \param[in] nativeHandle The native handle to create PI mem object from.
-/// \param[in] context The PI context of the memory allocation.
-/// \param[in] ownNativeHandle Indicates if we own the native memory handle or
-/// it came from interop that asked to not transfer the ownership to SYCL RT.
-/// \param[in] ImageFormat The format of the image.
-/// \param[in] ImageDesc The description information for the image.
-/// \param[out] mem Set to the PI mem object created from native handle.
-///
-/// \return TBD
-pi_result hip_piextMemImageCreateWithNativeHandle(
-    pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle,
-    const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc,
-    pi_mem *mem) {
-  (void)nativeHandle;
-  (void)context;
-  (void)ownNativeHandle;
-  (void)ImageFormat;
-  (void)ImageDesc;
-  (void)mem;
-
-  sycl::detail::pi::die(
-      "Creation of PI mem from native image handle not implemented");
-  return {};
-}
-
-/// Creates a `pi_queue` object on the HIP backend.
-/// Valid properties
-/// * __SYCL_PI_HIP_USE_DEFAULT_STREAM -> hipStreamDefault
-/// * __SYCL_PI_HIP_SYNC_WITH_DEFAULT -> hipStreamNonBlocking
-/// \return Pi queue object mapping to a HIPStream
-///
-pi_result hip_piQueueCreate(pi_context context, pi_device device,
-                            pi_queue_properties properties, pi_queue *queue) {
-  try {
-    std::unique_ptr<_pi_queue> queueImpl{nullptr};
-
-    if (context->get_device() != device) {
-      *queue = nullptr;
-      return PI_ERROR_INVALID_DEVICE;
-    }
-
-    unsigned int flags = 0;
-
-    const bool is_out_of_order =
-        properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
-
-    std::vector<hipStream_t> computeHipStreams(
-        is_out_of_order ? _pi_queue::default_num_compute_streams : 1);
-    std::vector<hipStream_t> transferHipStreams(
-        is_out_of_order ? _pi_queue::default_num_transfer_streams : 0);
-
-    queueImpl = std::unique_ptr<_pi_queue>(new _pi_queue{
-        std::move(computeHipStreams), std::move(transferHipStreams), context,
-        device, properties, flags});
-
-    *queue = queueImpl.release();
-
-    return PI_SUCCESS;
-  } catch (pi_result err) {
-
-    return err;
-
-  } catch (...) {
-
-    return PI_ERROR_OUT_OF_RESOURCES;
-  }
-}
-pi_result hip_piextQueueCreate(pi_context Context, pi_device Device,
-                               pi_queue_properties *Properties,
-                               pi_queue *Queue) {
-  assert(Properties);
-  // Expect flags mask to be passed first.
-  assert(Properties[0] == PI_QUEUE_FLAGS);
-  if (Properties[0] != PI_QUEUE_FLAGS)
-    return PI_ERROR_INVALID_VALUE;
-  pi_queue_properties Flags = Properties[1];
-  // Extra data isn't supported yet.
-  assert(Properties[2] == 0);
-  if (Properties[2] != 0)
-    return PI_ERROR_INVALID_VALUE;
-  return hip_piQueueCreate(Context, Device, Flags, Queue);
-}
-
-pi_result hip_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name,
-                             size_t param_value_size, void *param_value,
-                             size_t *param_value_size_ret) {
-  assert(command_queue != nullptr);
-
-  switch (param_name) {
-  case PI_QUEUE_INFO_CONTEXT:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   command_queue->context_);
-  case PI_QUEUE_INFO_DEVICE:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   command_queue->device_);
-  case PI_QUEUE_INFO_REFERENCE_COUNT:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   command_queue->get_reference_count());
-  case PI_QUEUE_INFO_PROPERTIES:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   command_queue->properties_);
-  case PI_EXT_ONEAPI_QUEUE_INFO_EMPTY: {
-    bool IsReady = command_queue->all_of([](hipStream_t s) -> bool {
-      const hipError_t ret = hipStreamQuery(s);
-      if (ret == hipSuccess)
-        return true;
-
-      if (ret == hipErrorNotReady)
-        return false;
-
-      PI_CHECK_ERROR(ret);
-      return false;
-    });
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   IsReady);
-  }
-  default:
-    __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-  }
-  sycl::detail::pi::die("Queue info request not implemented");
-  return {};
-}
-
-pi_result hip_piQueueRetain(pi_queue command_queue) {
-  assert(command_queue != nullptr);
-  assert(command_queue->get_reference_count() > 0);
-
-  command_queue->increment_reference_count();
-  return PI_SUCCESS;
-}
-
-pi_result hip_piQueueRelease(pi_queue command_queue) {
-  assert(command_queue != nullptr);
-
-  if (command_queue->decrement_reference_count() > 0) {
-    return PI_SUCCESS;
-  }
-
-  try {
-    std::unique_ptr<_pi_queue> queueImpl(command_queue);
-
-    ScopedContext active(command_queue->get_context());
-
-    command_queue->for_each_stream([](hipStream_t s) {
-      PI_CHECK_ERROR(hipStreamSynchronize(s));
-      PI_CHECK_ERROR(hipStreamDestroy(s));
-    });
-
-    return PI_SUCCESS;
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_OUT_OF_RESOURCES;
-  }
-}
-
-pi_result hip_piQueueFinish(pi_queue command_queue) {
-
-  // set default result to a negative result (avoid false-positve tests)
-  pi_result result = PI_ERROR_OUT_OF_HOST_MEMORY;
-
-  try {
-
-    assert(command_queue !=
-           nullptr); // need PI_ERROR_INVALID_EXTERNAL_HANDLE error code
-    ScopedContext active(command_queue->get_context());
-
-    command_queue->sync_streams<true>([&result](hipStream_t s) {
-      result = PI_CHECK_ERROR(hipStreamSynchronize(s));
-    });
-
-  } catch (pi_result err) {
-
-    result = err;
-
-  } catch (...) {
-
-    result = PI_ERROR_OUT_OF_RESOURCES;
-  }
-
-  return result;
-}
-
-// There is no HIP counterpart for queue flushing and we don't run into the
-// same problem of having to flush cross-queue dependencies as some of the
-// other plugins, so it can be left as no-op.
-pi_result hip_piQueueFlush(pi_queue command_queue) {
-  (void)command_queue;
-  return PI_SUCCESS;
-}
-
-/// Gets the native HIP handle of a PI queue object
-///
-/// \param[in] queue The PI queue to get the native HIP object of.
-/// \param[out] nativeHandle Set to the native handle of the PI queue object.
-///
-/// \return PI_SUCCESS
-pi_result hip_piextQueueGetNativeHandle(pi_queue queue,
-                                        pi_native_handle *nativeHandle,
-                                        int32_t *NativeHandleDesc) {
-  *NativeHandleDesc = 0;
-  ScopedContext active(queue->get_context());
-  *nativeHandle =
-      reinterpret_cast<pi_native_handle>(queue->get_next_compute_stream());
-  return PI_SUCCESS;
-}
-
-/// Created a PI queue object from a HIP queue handle.
-/// TODO: Implement this.
-/// NOTE: The created PI object takes ownership of the native handle.
-///
-/// \param[in] nativeHandle The native handle to create PI queue object from.
-/// \param[in] context is the PI context of the queue.
-/// \param[out] queue Set to the PI queue object created from native handle.
-/// \param ownNativeHandle tells if SYCL RT should assume the ownership of
-///        the native handle, if it can.
-///
-///
-/// \return TBD
-pi_result hip_piextQueueCreateWithNativeHandle(
-    pi_native_handle nativeHandle, int32_t NativeHandleDesc, pi_context context,
-    pi_device device, bool ownNativeHandle, pi_queue_properties *Properties,
-    pi_queue *queue) {
-  (void)nativeHandle;
-  (void)NativeHandleDesc;
-  (void)context;
-  (void)device;
-  (void)ownNativeHandle;
-  (void)Properties;
-  (void)queue;
-  sycl::detail::pi::die(
-      "Creation of PI queue from native handle not implemented");
-  return {};
-}
-
-pi_result hip_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer,
-                                      pi_bool blocking_write, size_t offset,
-                                      size_t size, void *ptr,
-                                      pi_uint32 num_events_in_wait_list,
-                                      const pi_event *event_wait_list,
-                                      pi_event *event) {
-
-  assert(buffer != nullptr);
-  assert(command_queue != nullptr);
-  pi_result retErr = PI_SUCCESS;
-  std::unique_ptr<_pi_event> retImplEv{nullptr};
-
-  try {
-    ScopedContext active(command_queue->get_context());
-    hipStream_t hipStream = command_queue->get_next_transfer_stream();
-    retErr = enqueueEventsWait(command_queue, hipStream,
-                               num_events_in_wait_list, event_wait_list);
-
-    if (event) {
-      retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
-          PI_COMMAND_TYPE_MEM_BUFFER_WRITE, command_queue, hipStream));
-      retImplEv->start();
-    }
-
-    retErr = PI_CHECK_ERROR(
-        hipMemcpyHtoDAsync(buffer->mem_.buffer_mem_.get_with_offset(offset),
-                           ptr, size, hipStream));
-
-    if (event) {
-      retErr = retImplEv->record();
-    }
-
-    if (blocking_write) {
-      retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
-    }
-
-    if (event) {
-      *event = retImplEv.release();
-    }
-  } catch (pi_result err) {
-    retErr = err;
-  }
-  return retErr;
-}
-
-pi_result hip_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer,
-                                     pi_bool blocking_read, size_t offset,
-                                     size_t size, void *ptr,
-                                     pi_uint32 num_events_in_wait_list,
-                                     const pi_event *event_wait_list,
-                                     pi_event *event) {
-
-  assert(buffer != nullptr);
-  assert(command_queue != nullptr);
-  pi_result retErr = PI_SUCCESS;
-  std::unique_ptr<_pi_event> retImplEv{nullptr};
-
-  try {
-    ScopedContext active(command_queue->get_context());
-    hipStream_t hipStream = command_queue->get_next_transfer_stream();
-    retErr = enqueueEventsWait(command_queue, hipStream,
-                               num_events_in_wait_list, event_wait_list);
-
-    if (event) {
-      retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
-          PI_COMMAND_TYPE_MEM_BUFFER_READ, command_queue, hipStream));
-      retImplEv->start();
-    }
-
-    retErr = PI_CHECK_ERROR(hipMemcpyDtoHAsync(
-        ptr, buffer->mem_.buffer_mem_.get_with_offset(offset), size,
-        hipStream));
-
-    if (event) {
-      retErr = retImplEv->record();
-    }
-
-    if (blocking_read) {
-      retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
-    }
-
-    if (event) {
-      *event = retImplEv.release();
-    }
-
-  } catch (pi_result err) {
-    retErr = err;
-  }
-  return retErr;
-}
-
-pi_result hip_piEventsWait(pi_uint32 num_events, const pi_event *event_list) {
-
-  try {
-    assert(num_events != 0);
-    assert(event_list);
-    if (num_events == 0) {
-      return PI_ERROR_INVALID_VALUE;
-    }
-
-    if (!event_list) {
-      return PI_ERROR_INVALID_EVENT;
-    }
-
-    auto context = event_list[0]->get_context();
-    ScopedContext active(context);
-
-    auto waitFunc = [context](pi_event event) -> pi_result {
-      if (!event) {
-        return PI_ERROR_INVALID_EVENT;
-      }
-
-      if (event->get_context() != context) {
-        return PI_ERROR_INVALID_CONTEXT;
-      }
-
-      return event->wait();
-    };
-    return forLatestEvents(event_list, num_events, waitFunc);
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_OUT_OF_RESOURCES;
-  }
-}
-
-pi_result hip_piKernelCreate(pi_program program, const char *kernel_name,
-                             pi_kernel *kernel) {
-  assert(kernel != nullptr);
-  assert(program != nullptr);
-
-  pi_result retErr = PI_SUCCESS;
-  std::unique_ptr<_pi_kernel> retKernel{nullptr};
-
-  try {
-    ScopedContext active(program->get_context());
-
-    hipFunction_t hipFunc;
-    retErr = PI_CHECK_ERROR(
-        hipModuleGetFunction(&hipFunc, program->get(), kernel_name));
-
-    std::string kernel_name_woffset = std::string(kernel_name) + "_with_offset";
-    hipFunction_t hipFuncWithOffsetParam;
-    hipError_t offsetRes = hipModuleGetFunction(
-        &hipFuncWithOffsetParam, program->get(), kernel_name_woffset.c_str());
-
-    // If there is no kernel with global offset parameter we mark it as missing
-    if (offsetRes == hipErrorNotFound) {
-      hipFuncWithOffsetParam = nullptr;
-    } else {
-      retErr = PI_CHECK_ERROR(offsetRes);
-    }
-
-    retKernel = std::unique_ptr<_pi_kernel>(
-        new _pi_kernel{hipFunc, hipFuncWithOffsetParam, kernel_name, program,
-                       program->get_context()});
-  } catch (pi_result err) {
-    retErr = err;
-  } catch (...) {
-    retErr = PI_ERROR_OUT_OF_HOST_MEMORY;
-  }
-
-  *kernel = retKernel.release();
-  return retErr;
-}
-
-pi_result hip_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index,
-                             size_t arg_size, const void *arg_value) {
-
-  assert(kernel != nullptr);
-  pi_result retErr = PI_SUCCESS;
-  try {
-    if (arg_value) {
-      kernel->set_kernel_arg(arg_index, arg_size, arg_value);
-    } else {
-      kernel->set_kernel_local_arg(arg_index, arg_size);
-    }
-  } catch (pi_result err) {
-    retErr = err;
-  }
-  return retErr;
-}
-
-pi_result hip_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
-                                      const pi_mem_obj_property *arg_properties,
-                                      const pi_mem *arg_value) {
-  std::ignore = arg_properties;
-
-  assert(kernel != nullptr);
-  assert(arg_value != nullptr);
-
-  // Below sets kernel arg when zero-sized buffers are handled.
-  // In such case the corresponding memory is null.
-  if (*arg_value == nullptr) {
-    kernel->set_kernel_arg(arg_index, 0, nullptr);
-    return PI_SUCCESS;
-  }
-
-  pi_result retErr = PI_SUCCESS;
-  try {
-    pi_mem arg_mem = *arg_value;
-
-    if (arg_mem->mem_type_ == _pi_mem::mem_type::surface) {
-      auto array = arg_mem->mem_.surface_mem_.get_array();
-      hipArray_Format Format;
-      size_t NumChannels;
-      getArrayDesc(array, Format, NumChannels);
-      if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 &&
-          Format != HIP_AD_FORMAT_SIGNED_INT32 &&
-          Format != HIP_AD_FORMAT_HALF && Format != HIP_AD_FORMAT_FLOAT) {
-        sycl::detail::pi::die(
-            "PI HIP kernels only support images with channel types int32, "
-            "uint32, float, and half.");
-      }
-      hipSurfaceObject_t hipSurf = arg_mem->mem_.surface_mem_.get_surface();
-      kernel->set_kernel_arg(arg_index, sizeof(hipSurf), (void *)&hipSurf);
-    } else
-
-    {
-      void *hipPtr = arg_mem->mem_.buffer_mem_.get_void();
-      kernel->set_kernel_arg(arg_index, sizeof(void *), (void *)&hipPtr);
-    }
-  } catch (pi_result err) {
-    retErr = err;
-  }
-  return retErr;
-}
-
-pi_result hip_piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index,
-                                       const pi_sampler *arg_value) {
-
-  assert(kernel != nullptr);
-  assert(arg_value != nullptr);
-
-  pi_result retErr = PI_SUCCESS;
-  try {
-    pi_uint32 samplerProps = (*arg_value)->props_;
-    kernel->set_kernel_arg(arg_index, sizeof(pi_uint32), (void *)&samplerProps);
-  } catch (pi_result err) {
-    retErr = err;
-  }
-  return retErr;
-}
-
-pi_result hip_piEnqueueKernelLaunch(
-    pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim,
-    const size_t *global_work_offset, const size_t *global_work_size,
-    const size_t *local_work_size, pi_uint32 num_events_in_wait_list,
-    const pi_event *event_wait_list, pi_event *event) {
-
-  // Preconditions
-  assert(command_queue != nullptr);
-  assert(command_queue->get_context() == kernel->get_context());
-  assert(kernel != nullptr);
-  assert(global_work_offset != nullptr);
-  assert(work_dim > 0);
-  assert(work_dim < 4);
-
-  if (*global_work_size == 0) {
-    return hip_piEnqueueEventsWaitWithBarrier(
-        command_queue, num_events_in_wait_list, event_wait_list, event);
-  }
-
-  // Set the number of threads per block to the number of threads per warp
-  // by default unless user has provided a better number
-  size_t threadsPerBlock[3] = {32u, 1u, 1u};
-  size_t maxWorkGroupSize = 0u;
-  size_t maxThreadsPerBlock[3] = {};
-  bool providedLocalWorkGroupSize = (local_work_size != nullptr);
-
-  {
-    pi_result retError = hip_piDeviceGetInfo(
-        command_queue->device_, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES,
-        sizeof(maxThreadsPerBlock), maxThreadsPerBlock, nullptr);
-    assert(retError == PI_SUCCESS);
-    (void)retError;
-
-    retError = hip_piDeviceGetInfo(
-        command_queue->device_, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
-        sizeof(maxWorkGroupSize), &maxWorkGroupSize, nullptr);
-    assert(retError == PI_SUCCESS);
-    // The maxWorkGroupsSize = 1024 for AMD GPU
-    // The maxThreadsPerBlock = {1024, 1024, 1024}
-
-    if (providedLocalWorkGroupSize) {
-      auto isValid = [&](int dim) {
-        if (local_work_size[dim] > maxThreadsPerBlock[dim])
-          return PI_ERROR_INVALID_WORK_GROUP_SIZE;
-        // Checks that local work sizes are a divisor of the global work sizes
-        // which includes that the local work sizes are neither larger than the
-        // global work sizes and not 0.
-        if (0u == local_work_size[dim])
-          return PI_ERROR_INVALID_WORK_GROUP_SIZE;
-        if (0u != (global_work_size[dim] % local_work_size[dim]))
-          return PI_ERROR_INVALID_WORK_GROUP_SIZE;
-        threadsPerBlock[dim] = local_work_size[dim];
-        return PI_SUCCESS;
-      };
-
-      for (size_t dim = 0; dim < work_dim; dim++) {
-        auto err = isValid(dim);
-        if (err != PI_SUCCESS)
-          return err;
-      }
-    } else {
-      simpleGuessLocalWorkSize(threadsPerBlock, global_work_size,
-                               maxThreadsPerBlock, kernel);
-    }
-  }
-
-  if (maxWorkGroupSize <
-      size_t(threadsPerBlock[0] * threadsPerBlock[1] * threadsPerBlock[2])) {
-    return PI_ERROR_INVALID_WORK_GROUP_SIZE;
-  }
-
-  size_t blocksPerGrid[3] = {1u, 1u, 1u};
-
-  for (size_t i = 0; i < work_dim; i++) {
-    blocksPerGrid[i] =
-        (global_work_size[i] + threadsPerBlock[i] - 1) / threadsPerBlock[i];
-  }
-
-  pi_result retError = PI_SUCCESS;
-  std::unique_ptr<_pi_event> retImplEv{nullptr};
-
-  try {
-    ScopedContext active(command_queue->get_context());
-
-    pi_uint32 stream_token;
-    _pi_stream_guard guard;
-    hipStream_t hipStream = command_queue->get_next_compute_stream(
-        num_events_in_wait_list, event_wait_list, guard, &stream_token);
-    hipFunction_t hipFunc = kernel->get();
-
-    retError = enqueueEventsWait(command_queue, hipStream,
-                                 num_events_in_wait_list, event_wait_list);
-
-    // Set the implicit global offset parameter if kernel has offset variant
-    if (kernel->get_with_offset_parameter()) {
-      std::uint32_t hip_implicit_offset[3] = {0, 0, 0};
-      if (global_work_offset) {
-        for (size_t i = 0; i < work_dim; i++) {
-          hip_implicit_offset[i] =
-              static_cast<std::uint32_t>(global_work_offset[i]);
-          if (global_work_offset[i] != 0) {
-            hipFunc = kernel->get_with_offset_parameter();
-          }
-        }
-      }
-      kernel->set_implicit_offset_arg(sizeof(hip_implicit_offset),
-                                      hip_implicit_offset);
-    }
-
-    auto argIndices = kernel->get_arg_indices();
-
-    if (event) {
-      retImplEv = std::unique_ptr<_pi_event>(
-          _pi_event::make_native(PI_COMMAND_TYPE_NDRANGE_KERNEL, command_queue,
-                                 hipStream, stream_token));
-      retImplEv->start();
-    }
-
-    // Set local mem max size if env var is present
-    static const char *local_mem_sz_ptr =
-        std::getenv("SYCL_PI_HIP_MAX_LOCAL_MEM_SIZE");
-
-    if (local_mem_sz_ptr) {
-      int device_max_local_mem = 0;
-      retError = PI_CHECK_ERROR(hipDeviceGetAttribute(
-          &device_max_local_mem, hipDeviceAttributeMaxSharedMemoryPerBlock,
-          command_queue->get_device()->get()));
-
-      static const int env_val = std::atoi(local_mem_sz_ptr);
-      if (env_val <= 0 || env_val > device_max_local_mem) {
-        setErrorMessage("Invalid value specified for "
-                        "SYCL_PI_HIP_MAX_LOCAL_MEM_SIZE",
-                        PI_ERROR_PLUGIN_SPECIFIC_ERROR);
-        return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
-      }
-      retError = PI_CHECK_ERROR(hipFuncSetAttribute(
-          hipFunc, hipFuncAttributeMaxDynamicSharedMemorySize, env_val));
-    }
-
-    retError = PI_CHECK_ERROR(hipModuleLaunchKernel(
-        hipFunc, blocksPerGrid[0], blocksPerGrid[1], blocksPerGrid[2],
-        threadsPerBlock[0], threadsPerBlock[1], threadsPerBlock[2],
-        kernel->get_local_size(), hipStream, argIndices.data(), nullptr));
-
-    kernel->clear_local_size();
-
-    if (event) {
-      retError = retImplEv->record();
-      *event = retImplEv.release();
-    }
-  } catch (pi_result err) {
-    retError = err;
-  }
-  return retError;
-}
-
-pi_result hip_piMemImageCreate(pi_context context, pi_mem_flags flags,
-                               const pi_image_format *image_format,
-                               const pi_image_desc *image_desc, void *host_ptr,
-                               pi_mem *ret_mem) {
-
-  // Need input memory object
-  assert(ret_mem != nullptr);
-  const bool performInitialCopy = (flags & PI_MEM_FLAGS_HOST_PTR_COPY) ||
-                                  ((flags & PI_MEM_FLAGS_HOST_PTR_USE));
-  pi_result retErr = PI_SUCCESS;
-
-  // We only support RBGA channel order
-  // TODO: check SYCL CTS and spec. May also have to support BGRA
-  if (image_format->image_channel_order !=
-      pi_image_channel_order::PI_IMAGE_CHANNEL_ORDER_RGBA) {
-    sycl::detail::pi::die(
-        "hip_piMemImageCreate only supports RGBA channel order");
-  }
-
-  // We have to use cuArray3DCreate, which has some caveats. The height and
-  // depth parameters must be set to 0 produce 1D or 2D arrays. image_desc gives
-  // a minimum value of 1, so we need to convert the answer.
-  HIP_ARRAY3D_DESCRIPTOR array_desc;
-  array_desc.NumChannels = 4; // Only support 4 channel image
-  array_desc.Flags = 0;       // No flags required
-  array_desc.Width = image_desc->image_width;
-  if (image_desc->image_type == PI_MEM_TYPE_IMAGE1D) {
-    array_desc.Height = 0;
-    array_desc.Depth = 0;
-  } else if (image_desc->image_type == PI_MEM_TYPE_IMAGE2D) {
-    array_desc.Height = image_desc->image_height;
-    array_desc.Depth = 0;
-  } else if (image_desc->image_type == PI_MEM_TYPE_IMAGE3D) {
-    array_desc.Height = image_desc->image_height;
-    array_desc.Depth = image_desc->image_depth;
-  }
-
-  // We need to get this now in bytes for calculating the total image size later
-  size_t pixel_type_size_bytes;
-
-  switch (image_format->image_channel_data_type) {
-  case PI_IMAGE_CHANNEL_TYPE_UNORM_INT8:
-  case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8:
-    array_desc.Format = HIP_AD_FORMAT_UNSIGNED_INT8;
-    pixel_type_size_bytes = 1;
-    break;
-  case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8:
-    array_desc.Format = HIP_AD_FORMAT_SIGNED_INT8;
-    pixel_type_size_bytes = 1;
-    break;
-  case PI_IMAGE_CHANNEL_TYPE_UNORM_INT16:
-  case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16:
-    array_desc.Format = HIP_AD_FORMAT_UNSIGNED_INT16;
-    pixel_type_size_bytes = 2;
-    break;
-  case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16:
-    array_desc.Format = HIP_AD_FORMAT_SIGNED_INT16;
-    pixel_type_size_bytes = 2;
-    break;
-  case PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT:
-    array_desc.Format = HIP_AD_FORMAT_HALF;
-    pixel_type_size_bytes = 2;
-    break;
-  case PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32:
-    array_desc.Format = HIP_AD_FORMAT_UNSIGNED_INT32;
-    pixel_type_size_bytes = 4;
-    break;
-  case PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32:
-    array_desc.Format = HIP_AD_FORMAT_SIGNED_INT32;
-    pixel_type_size_bytes = 4;
-    break;
-  case PI_IMAGE_CHANNEL_TYPE_FLOAT:
-    array_desc.Format = HIP_AD_FORMAT_FLOAT;
-    pixel_type_size_bytes = 4;
-    break;
-  default:
-    sycl::detail::pi::die(
-        "hip_piMemImageCreate given unsupported image_channel_data_type");
-  }
-
-  // When a dimension isn't used image_desc has the size set to 1
-  size_t pixel_size_bytes =
-      pixel_type_size_bytes * 4; // 4 is the only number of channels we support
-  size_t image_size_bytes = pixel_size_bytes * image_desc->image_width *
-                            image_desc->image_height * image_desc->image_depth;
-
-  ScopedContext active(context);
-  hipArray *image_array;
-  retErr = PI_CHECK_ERROR(hipArray3DCreate(
-      reinterpret_cast<hipCUarray *>(&image_array), &array_desc));
-
-  try {
-    if (performInitialCopy) {
-      // We have to use a different copy function for each image dimensionality
-      if (image_desc->image_type == PI_MEM_TYPE_IMAGE1D) {
-        retErr = PI_CHECK_ERROR(
-            hipMemcpyHtoA(image_array, 0, host_ptr, image_size_bytes));
-      } else if (image_desc->image_type == PI_MEM_TYPE_IMAGE2D) {
-        hip_Memcpy2D cpy_desc;
-        memset(&cpy_desc, 0, sizeof(cpy_desc));
-        cpy_desc.srcMemoryType = hipMemoryType::hipMemoryTypeHost;
-        cpy_desc.srcHost = host_ptr;
-        cpy_desc.dstMemoryType = hipMemoryType::hipMemoryTypeArray;
-        cpy_desc.dstArray = reinterpret_cast<hipCUarray>(image_array);
-        cpy_desc.WidthInBytes = pixel_size_bytes * image_desc->image_width;
-        cpy_desc.Height = image_desc->image_height;
-        retErr = PI_CHECK_ERROR(hipMemcpyParam2D(&cpy_desc));
-      } else if (image_desc->image_type == PI_MEM_TYPE_IMAGE3D) {
-        HIP_MEMCPY3D cpy_desc;
-        memset(&cpy_desc, 0, sizeof(cpy_desc));
-        cpy_desc.srcMemoryType = hipMemoryType::hipMemoryTypeHost;
-        cpy_desc.srcHost = host_ptr;
-        cpy_desc.dstMemoryType = hipMemoryType::hipMemoryTypeArray;
-        cpy_desc.dstArray = reinterpret_cast<hipCUarray>(image_array);
-        cpy_desc.WidthInBytes = pixel_size_bytes * image_desc->image_width;
-        cpy_desc.Height = image_desc->image_height;
-        cpy_desc.Depth = image_desc->image_depth;
-        retErr = PI_CHECK_ERROR(hipDrvMemcpy3D(&cpy_desc));
-      }
-    }
-
-    // HIP_RESOURCE_DESC is a union of different structs, shown here
-    // We need to fill it as described here to use it for a surface or texture
-    // HIP_RESOURCE_DESC::resType must be HIP_RESOURCE_TYPE_ARRAY and
-    // HIP_RESOURCE_DESC::res::array::hArray must be set to a valid HIP array
-    // handle.
-    // HIP_RESOURCE_DESC::flags must be set to zero
-
-    hipResourceDesc image_res_desc;
-    image_res_desc.res.array.array = image_array;
-    image_res_desc.resType = hipResourceTypeArray;
-
-    hipSurfaceObject_t surface;
-    retErr = PI_CHECK_ERROR(hipCreateSurfaceObject(&surface, &image_res_desc));
-
-    auto piMemObj = std::unique_ptr<_pi_mem>(new _pi_mem{
-        context, image_array, surface, image_desc->image_type, host_ptr});
-
-    if (piMemObj == nullptr) {
-      return PI_ERROR_OUT_OF_HOST_MEMORY;
-    }
-
-    *ret_mem = piMemObj.release();
-  } catch (pi_result err) {
-    PI_CHECK_ERROR(hipFreeArray(image_array));
-    return err;
-  } catch (...) {
-    PI_CHECK_ERROR(hipFreeArray(image_array));
-    return PI_ERROR_UNKNOWN;
-  }
-  return retErr;
-}
-
-/// \TODO Not implemented
-pi_result hip_piMemImageGetInfo(pi_mem image, pi_image_info param_name,
-                                size_t param_value_size, void *param_value,
-                                size_t *param_value_size_ret) {
-  (void)image;
-  (void)param_name;
-  (void)param_value_size;
-  (void)param_value;
-  (void)param_value_size_ret;
-
-  sycl::detail::pi::die("hip_piMemImageGetInfo not implemented");
-  return {};
-}
-
-pi_result hip_piMemRetain(pi_mem mem) {
-  assert(mem != nullptr);
-  assert(mem->get_reference_count() > 0);
-  mem->increment_reference_count();
-  return PI_SUCCESS;
-}
-
-/// Not used as HIP backend only creates programs from binary.
-/// See \ref hip_piclProgramCreateWithBinary.
-///
-pi_result hip_piclProgramCreateWithSource(pi_context context, pi_uint32 count,
-                                          const char **strings,
-                                          const size_t *lengths,
-                                          pi_program *program) {
-  (void)context;
-  (void)count;
-  (void)strings;
-  (void)lengths;
-  (void)program;
-
-  sycl::detail::pi::hipPrint("hip_piclProgramCreateWithSource not implemented");
-  return PI_ERROR_INVALID_OPERATION;
-}
-
-/// Loads the images from a PI program into a HIPmodule that can be
-/// used later on to extract functions (kernels).
-/// See \ref _pi_program for implementation details.
-///
-pi_result hip_piProgramBuild(
-    pi_program program, [[maybe_unused]] pi_uint32 num_devices,
-    [[maybe_unused]] const pi_device *device_list, const char *options,
-    [[maybe_unused]] void (*pfn_notify)(pi_program program, void *user_data),
-    [[maybe_unused]] void *user_data) {
-
-  assert(program != nullptr);
-  assert(num_devices == 1 || num_devices == 0);
-  assert(device_list != nullptr || num_devices == 0);
-  assert(pfn_notify == nullptr);
-  assert(user_data == nullptr);
-  pi_result retError = PI_SUCCESS;
-
-  try {
-    ScopedContext active(program->get_context());
-
-    program->build_program(options);
-
-  } catch (pi_result err) {
-    retError = err;
-  }
-  return retError;
-}
-
-/// \TODO Not implemented
-pi_result hip_piProgramCreate(pi_context context, const void *il, size_t length,
-                              pi_program *res_program) {
-  (void)context;
-  (void)il;
-  (void)length;
-  (void)res_program;
-
-  sycl::detail::pi::die("hip_piProgramCreate not implemented");
-  return {};
-}
-
-/// Loads images from a list of PTX or HIPBIN binaries.
-/// Note: No calls to HIP driver API in this function, only store binaries
-/// for later.
-///
-/// Note: Only supports one device
-///
-pi_result hip_piProgramCreateWithBinary(
-    pi_context context, [[maybe_unused]] pi_uint32 num_devices,
-    [[maybe_unused]] const pi_device *device_list, const size_t *lengths,
-    const unsigned char **binaries, size_t num_metadata_entries,
-    const pi_device_binary_property *metadata, pi_int32 *binary_status,
-    pi_program *program) {
-  (void)num_metadata_entries;
-  (void)metadata;
-  (void)binary_status;
-
-  assert(context != nullptr);
-  assert(binaries != nullptr);
-  assert(program != nullptr);
-  assert(device_list != nullptr);
-  assert(num_devices == 1 && "HIP contexts are for a single device");
-  assert((context->get_device()->get() == device_list[0]->get()) &&
-         "Mismatch between devices context and passed context when creating "
-         "program from binary");
-
-  pi_result retError = PI_SUCCESS;
-
-  std::unique_ptr<_pi_program> retProgram{new _pi_program{context}};
-
-  // TODO: Set metadata here and use reqd_work_group_size information.
-  // See cuda_piProgramCreateWithBinary
-
-  const bool has_length = (lengths != nullptr);
-  size_t length = has_length
-                      ? lengths[0]
-                      : strlen(reinterpret_cast<const char *>(binaries[0])) + 1;
-
-  assert(length != 0);
-
-  retProgram->set_binary(reinterpret_cast<const char *>(binaries[0]), length);
-
-  *program = retProgram.release();
-
-  return retError;
-}
-
-pi_result hip_piProgramGetInfo(pi_program program, pi_program_info param_name,
-                               size_t param_value_size, void *param_value,
-                               size_t *param_value_size_ret) {
-  assert(program != nullptr);
-
-  switch (param_name) {
-  case PI_PROGRAM_INFO_REFERENCE_COUNT:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   program->get_reference_count());
-  case PI_PROGRAM_INFO_CONTEXT:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   program->context_);
-  case PI_PROGRAM_INFO_NUM_DEVICES:
-    return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
-  case PI_PROGRAM_INFO_DEVICES:
-    return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
-                        &program->context_->deviceId_);
-  case PI_PROGRAM_INFO_SOURCE:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   program->binary_);
-  case PI_PROGRAM_INFO_BINARY_SIZES:
-    return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
-                        &program->binarySizeInBytes_);
-  case PI_PROGRAM_INFO_BINARIES:
-    return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
-                        &program->binary_);
-  case PI_PROGRAM_INFO_KERNEL_NAMES: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   getKernelNames(program).c_str());
-  }
-  default:
-    __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-  }
-  sycl::detail::pi::die("Program info request not implemented");
-  return {};
-}
-
-pi_result hip_piProgramLink(pi_context context, pi_uint32 num_devices,
-                            const pi_device *device_list, const char *options,
-                            pi_uint32 num_input_programs,
-                            const pi_program *input_programs,
-                            void (*pfn_notify)(pi_program program,
-                                               void *user_data),
-                            void *user_data, pi_program *ret_program) {
-  (void)context;
-  (void)num_devices;
-  (void)device_list;
-  (void)options;
-  (void)num_input_programs;
-  (void)input_programs;
-  (void)pfn_notify;
-  (void)user_data;
-  (void)ret_program;
-  sycl::detail::pi::die(
-      "hip_piProgramLink: linking not supported with hip backend");
-  return {};
-}
-
-/// Creates a new program that is the outcome of the compilation of the headers
-///  and the program.
-/// \TODO Implement asynchronous compilation
-///
-pi_result hip_piProgramCompile(
-    pi_program program, [[maybe_unused]] pi_uint32 num_devices,
-    [[maybe_unused]] const pi_device *device_list, const char *options,
-    [[maybe_unused]] pi_uint32 num_input_headers,
-    const pi_program *input_headers, const char **header_include_names,
-    [[maybe_unused]] void (*pfn_notify)(pi_program program, void *user_data),
-    [[maybe_unused]] void *user_data) {
-  (void)input_headers;
-  (void)header_include_names;
-
-  assert(program != nullptr);
-  assert(num_devices == 1 || num_devices == 0);
-  assert(device_list != nullptr || num_devices == 0);
-  assert(pfn_notify == nullptr);
-  assert(user_data == nullptr);
-  assert(num_input_headers == 0);
-  pi_result retError = PI_SUCCESS;
-
-  try {
-    ScopedContext active(program->get_context());
-
-    program->build_program(options);
-
-  } catch (pi_result err) {
-    retError = err;
-  }
-  return retError;
-}
-
-pi_result hip_piProgramGetBuildInfo(pi_program program, pi_device device,
-                                    pi_program_build_info param_name,
-                                    size_t param_value_size, void *param_value,
-                                    size_t *param_value_size_ret) {
-  (void)device;
-
-  assert(program != nullptr);
-
-  switch (param_name) {
-  case PI_PROGRAM_BUILD_INFO_STATUS: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   program->buildStatus_);
-  }
-  case PI_PROGRAM_BUILD_INFO_OPTIONS:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   program->buildOptions_.c_str());
-  case PI_PROGRAM_BUILD_INFO_LOG:
-    return getInfoArray(program->MAX_LOG_SIZE, param_value_size, param_value,
-                        param_value_size_ret, program->infoLog_);
-  default:
-    __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-  }
-  sycl::detail::pi::die("Program Build info request not implemented");
-  return {};
-}
-
-pi_result hip_piProgramRetain(pi_program program) {
-  assert(program != nullptr);
-  assert(program->get_reference_count() > 0);
-  program->increment_reference_count();
-  return PI_SUCCESS;
-}
-
-/// Decreases the reference count of a pi_program object.
-/// When the reference count reaches 0, it unloads the module from
-/// the context.
-pi_result hip_piProgramRelease(pi_program program) {
-  assert(program != nullptr);
-
-  // double delete or someone is messing with the ref count.
-  // either way, cannot safely proceed.
-  assert(program->get_reference_count() != 0 &&
-         "Reference count overflow detected in hip_piProgramRelease.");
-
-  // decrement ref count. If it is 0, delete the program.
-  if (program->decrement_reference_count() == 0) {
-
-    std::unique_ptr<_pi_program> program_ptr{program};
-
-    pi_result result = PI_ERROR_INVALID_PROGRAM;
-
-    try {
-      ScopedContext active(program->get_context());
-      auto hipModule = program->get();
-      result = PI_CHECK_ERROR(hipModuleUnload(hipModule));
-    } catch (...) {
-      result = PI_ERROR_OUT_OF_RESOURCES;
-    }
-
-    return result;
-  }
-
-  return PI_SUCCESS;
-}
-
-/// Gets the native HIP handle of a PI program object
-///
-/// \param[in] program The PI program to get the native HIP object of.
-/// \param[out] nativeHandle Set to the native handle of the PI program object.
-///
-/// \return TBD
-pi_result hip_piextProgramGetNativeHandle(pi_program program,
-                                          pi_native_handle *nativeHandle) {
-  *nativeHandle = reinterpret_cast<pi_native_handle>(program->get());
-  return PI_SUCCESS;
-}
-
-/// Created a PI program object from a HIP program handle.
-/// TODO: Implement this.
-/// NOTE: The created PI object takes ownership of the native handle.
-///
-/// \param[in] nativeHandle The native handle to create PI program object from.
-/// \param[in] context The PI context of the program.
-/// \param[in] ownNativeHandle tells if should assume the ownership of
-///            the native handle.
-/// \param[out] program Set to the PI program object created from native handle.
-///
-/// \return TBD
-pi_result hip_piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle,
-                                                 pi_context context,
-                                                 bool ownNativeHandle,
-                                                 pi_program *program) {
-  (void)nativeHandle;
-  (void)context;
-  (void)ownNativeHandle;
-  (void)program;
-
-  sycl::detail::pi::die(
-      "Creation of PI program from native handle not implemented");
-  return {};
-}
-
-pi_result hip_piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name,
-                              size_t param_value_size, void *param_value,
-                              size_t *param_value_size_ret) {
-
-  if (kernel != nullptr) {
-
-    switch (param_name) {
-    case PI_KERNEL_INFO_FUNCTION_NAME:
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     kernel->get_name());
-    case PI_KERNEL_INFO_NUM_ARGS:
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     kernel->get_num_args());
-    case PI_KERNEL_INFO_REFERENCE_COUNT:
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     kernel->get_reference_count());
-    case PI_KERNEL_INFO_CONTEXT: {
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     kernel->get_context());
-    }
-    case PI_KERNEL_INFO_PROGRAM: {
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     kernel->get_program());
-    }
-    case PI_KERNEL_INFO_ATTRIBUTES: {
-      return getInfo(param_value_size, param_value, param_value_size_ret, "");
-    }
-    default: {
-      __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-    }
-    }
-  }
-
-  return PI_ERROR_INVALID_KERNEL;
-}
-
-pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
-                                   pi_kernel_group_info param_name,
-                                   size_t param_value_size, void *param_value,
-                                   size_t *param_value_size_ret) {
-
-  // here we want to query about a kernel's hip blocks!
-
-  if (kernel != nullptr) {
-
-    switch (param_name) {
-    case PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: {
-      size_t global_work_size[3] = {0, 0, 0};
-
-      int max_block_dimX{0}, max_block_dimY{0}, max_block_dimZ{0};
-      sycl::detail::pi::assertion(
-          hipDeviceGetAttribute(&max_block_dimX, hipDeviceAttributeMaxBlockDimX,
-                                device->get()) == hipSuccess);
-      sycl::detail::pi::assertion(
-          hipDeviceGetAttribute(&max_block_dimY, hipDeviceAttributeMaxBlockDimY,
-                                device->get()) == hipSuccess);
-      sycl::detail::pi::assertion(
-          hipDeviceGetAttribute(&max_block_dimZ, hipDeviceAttributeMaxBlockDimZ,
-                                device->get()) == hipSuccess);
-
-      int max_grid_dimX{0}, max_grid_dimY{0}, max_grid_dimZ{0};
-      sycl::detail::pi::assertion(
-          hipDeviceGetAttribute(&max_grid_dimX, hipDeviceAttributeMaxGridDimX,
-                                device->get()) == hipSuccess);
-      sycl::detail::pi::assertion(
-          hipDeviceGetAttribute(&max_grid_dimY, hipDeviceAttributeMaxGridDimY,
-                                device->get()) == hipSuccess);
-      sycl::detail::pi::assertion(
-          hipDeviceGetAttribute(&max_grid_dimZ, hipDeviceAttributeMaxGridDimZ,
-                                device->get()) == hipSuccess);
-
-      global_work_size[0] = max_block_dimX * max_grid_dimX;
-      global_work_size[1] = max_block_dimY * max_grid_dimY;
-      global_work_size[2] = max_block_dimZ * max_grid_dimZ;
-      return getInfoArray(3, param_value_size, param_value,
-                          param_value_size_ret, global_work_size);
-    }
-    case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: {
-      int max_threads = 0;
-      sycl::detail::pi::assertion(
-          hipFuncGetAttribute(&max_threads,
-                              HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
-                              kernel->get()) == hipSuccess);
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     size_t(max_threads));
-    }
-    case PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: {
-      // Returns the work-group size specified in the kernel source or IL.
-      // If the work-group size is not specified in the kernel source or IL,
-      // (0, 0, 0) is returned.
-      // https://www.khronos.org/registry/OpenCL/sdk/2.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
-
-      // TODO: can we extract the work group size from the PTX?
-      size_t group_size[3] = {0, 0, 0};
-      return getInfoArray(3, param_value_size, param_value,
-                          param_value_size_ret, group_size);
-    }
-    case PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: {
-      // OpenCL LOCAL == HIP SHARED
-      int bytes = 0;
-      sycl::detail::pi::assertion(
-          hipFuncGetAttribute(&bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
-                              kernel->get()) == hipSuccess);
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     pi_uint64(bytes));
-    }
-    case PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
-      // Work groups should be multiples of the warp size
-      int warpSize = 0;
-      sycl::detail::pi::assertion(
-          hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
-                                device->get()) == hipSuccess);
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     static_cast<size_t>(warpSize));
-    }
-    case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
-      // OpenCL PRIVATE == HIP LOCAL
-      int bytes = 0;
-      sycl::detail::pi::assertion(
-          hipFuncGetAttribute(&bytes, HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
-                              kernel->get()) == hipSuccess);
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     pi_uint64(bytes));
-    }
-    case PI_KERNEL_GROUP_INFO_NUM_REGS: {
-      sycl::detail::pi::die("PI_KERNEL_GROUP_INFO_NUM_REGS in "
-                            "piKernelGetGroupInfo not implemented\n");
-      return {};
-    }
-
-    default:
-      __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-    }
-  }
-
-  return PI_ERROR_INVALID_KERNEL;
-}
-
-pi_result hip_piKernelGetSubGroupInfo(
-    pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name,
-    size_t input_value_size, const void *input_value, size_t param_value_size,
-    void *param_value, size_t *param_value_size_ret) {
-  (void)input_value_size;
-  (void)input_value;
-
-  if (kernel != nullptr) {
-    switch (param_name) {
-    case PI_KERNEL_MAX_SUB_GROUP_SIZE: {
-      // Sub-group size is equivalent to warp size
-      int warpSize = 0;
-      sycl::detail::pi::assertion(
-          hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
-                                device->get()) == hipSuccess);
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     static_cast<uint32_t>(warpSize));
-    }
-    case PI_KERNEL_MAX_NUM_SUB_GROUPS: {
-      // Number of sub-groups = max block size / warp size + possible remainder
-      int max_threads = 0;
-      sycl::detail::pi::assertion(
-          hipFuncGetAttribute(&max_threads,
-                              HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
-                              kernel->get()) == hipSuccess);
-      int warpSize = 0;
-      hip_piKernelGetSubGroupInfo(kernel, device, PI_KERNEL_MAX_SUB_GROUP_SIZE,
-                                  0, nullptr, sizeof(uint32_t), &warpSize,
-                                  nullptr);
-      int maxWarps = (max_threads + warpSize - 1) / warpSize;
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     static_cast<uint32_t>(maxWarps));
-    }
-    case PI_KERNEL_COMPILE_NUM_SUB_GROUPS: {
-      // Return value of 0 => not specified
-      // TODO: Revisit if PTX is generated for compile-time work-group sizes
-      return getInfo(param_value_size, param_value, param_value_size_ret, 0);
-    }
-    case PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: {
-      // Return value of 0 => unspecified or "auto" sub-group size
-      // Correct for now, since warp size may be read from special register
-      // TODO: Return warp size once default is primary sub-group size
-      // TODO: Revisit if we can recover [[sub_group_size]] attribute from PTX
-      return getInfo(param_value_size, param_value, param_value_size_ret, 0);
-    }
-    default:
-      __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-    }
-  }
-  return PI_ERROR_INVALID_KERNEL;
-}
-
-pi_result hip_piKernelRetain(pi_kernel kernel) {
-  assert(kernel != nullptr);
-  assert(kernel->get_reference_count() > 0u);
-
-  kernel->increment_reference_count();
-  return PI_SUCCESS;
-}
-
-pi_result hip_piKernelRelease(pi_kernel kernel) {
-  assert(kernel != nullptr);
-
-  // double delete or someone is messing with the ref count.
-  // either way, cannot safely proceed.
-  assert(kernel->get_reference_count() != 0 &&
-         "Reference count overflow detected in hip_piKernelRelease.");
-
-  // decrement ref count. If it is 0, delete the program.
-  if (kernel->decrement_reference_count() == 0) {
-    // no internal hip resources to clean up. Just delete it.
-    delete kernel;
-    return PI_SUCCESS;
-  }
-
-  return PI_SUCCESS;
-}
-
-// A NOP for the HIP backend
-pi_result hip_piKernelSetExecInfo(pi_kernel kernel,
-                                  pi_kernel_exec_info param_name,
-                                  size_t param_value_size,
-                                  const void *param_value) {
-  (void)kernel;
-  (void)param_name;
-  (void)param_value_size;
-  (void)param_value;
-
-  return PI_SUCCESS;
-}
-
-pi_result hip_piextProgramSetSpecializationConstant(pi_program, pi_uint32,
-                                                    size_t, const void *) {
-  // This entry point is only used for native specialization constants (SPIR-V),
-  // and the HIP plugin is AOT only so this entry point is not supported.
-  sycl::detail::pi::die("Native specialization constants are not supported");
-  return {};
-}
-
-pi_result hip_piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index,
-                                       size_t arg_size, const void *arg_value) {
-  kernel->set_kernel_arg(arg_index, arg_size, arg_value);
-  return PI_SUCCESS;
-}
-
-//
-// Events
-//
-pi_result hip_piEventCreate(pi_context context, pi_event *event) {
-  (void)context;
-  (void)event;
-
-  sycl::detail::pi::die("PI Event Create not implemented in HIP backend");
-}
-
-pi_result hip_piEventGetInfo(pi_event event, pi_event_info param_name,
-                             size_t param_value_size, void *param_value,
-                             size_t *param_value_size_ret) {
-  assert(event != nullptr);
-
-  switch (param_name) {
-  case PI_EVENT_INFO_COMMAND_QUEUE:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   event->get_queue());
-  case PI_EVENT_INFO_COMMAND_TYPE:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   event->get_command_type());
-  case PI_EVENT_INFO_REFERENCE_COUNT:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   event->get_reference_count());
-  case PI_EVENT_INFO_COMMAND_EXECUTION_STATUS: {
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   static_cast<pi_event_status>(event->get_execution_status()));
-  }
-  case PI_EVENT_INFO_CONTEXT:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   event->get_context());
-  default:
-    __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-  }
-
-  return PI_ERROR_INVALID_EVENT;
-}
-
-/// Obtain profiling information from PI HIP events
-/// Timings from HIP are only elapsed time.
-pi_result hip_piEventGetProfilingInfo(pi_event event,
-                                      pi_profiling_info param_name,
-                                      size_t param_value_size,
-                                      void *param_value,
-                                      size_t *param_value_size_ret) {
-
-  assert(event != nullptr);
-
-  pi_queue queue = event->get_queue();
-  if (queue == nullptr ||
-      !(queue->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE)) {
-    return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE;
-  }
-
-  switch (param_name) {
-  case PI_PROFILING_INFO_COMMAND_QUEUED:
-  case PI_PROFILING_INFO_COMMAND_SUBMIT:
-    // Note: No user for this case
-    return getInfo<pi_uint64>(param_value_size, param_value,
-                              param_value_size_ret, event->get_queued_time());
-  case PI_PROFILING_INFO_COMMAND_START:
-    return getInfo<pi_uint64>(param_value_size, param_value,
-                              param_value_size_ret, event->get_start_time());
-  case PI_PROFILING_INFO_COMMAND_END:
-    return getInfo<pi_uint64>(param_value_size, param_value,
-                              param_value_size_ret, event->get_end_time());
-  default:
-    __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-  }
-  sycl::detail::pi::die("Event Profiling info request not implemented");
-  return {};
-}
-
-pi_result hip_piEventSetCallback(pi_event event,
-                                 pi_int32 command_exec_callback_type,
-                                 pfn_notify notify, void *user_data) {
-  (void)event;
-  (void)command_exec_callback_type;
-  (void)notify;
-  (void)user_data;
-
-  sycl::detail::pi::die("Event Callback not implemented in HIP backend");
-  return PI_SUCCESS;
-}
-
-pi_result hip_piEventSetStatus(pi_event event, pi_int32 execution_status) {
-  (void)event;
-  (void)execution_status;
-
-  sycl::detail::pi::die("Event Set Status not implemented in HIP backend");
-  return PI_ERROR_INVALID_VALUE;
-}
-
-pi_result hip_piEventRetain(pi_event event) {
-  assert(event != nullptr);
-
-  const auto refCount = event->increment_reference_count();
-
-  sycl::detail::pi::assertion(
-      refCount != 0, "Reference count overflow detected in hip_piEventRetain.");
-
-  return PI_SUCCESS;
-}
-
-pi_result hip_piEventRelease(pi_event event) {
-  assert(event != nullptr);
-
-  // double delete or someone is messing with the ref count.
-  // either way, cannot safely proceed.
-  sycl::detail::pi::assertion(
-      event->get_reference_count() != 0,
-      "Reference count overflow detected in hip_piEventRelease.");
-
-  // decrement ref count. If it is 0, delete the event.
-  if (event->decrement_reference_count() == 0) {
-    std::unique_ptr<_pi_event> event_ptr{event};
-    pi_result result = PI_ERROR_INVALID_EVENT;
-    try {
-      ScopedContext active(event->get_context());
-      result = event->release();
-    } catch (...) {
-      result = PI_ERROR_OUT_OF_RESOURCES;
-    }
-    return result;
-  }
-
-  return PI_SUCCESS;
-}
-
-/// Enqueues a wait on the given queue for all events.
-/// See \ref enqueueEventWait
-///
-/// Currently queues are represented by a single in-order stream, therefore
-/// every command is an implicit barrier and so hip_piEnqueueEventsWait has the
-/// same behavior as hip_piEnqueueEventsWaitWithBarrier. So
-/// hip_piEnqueueEventsWait can just call hip_piEnqueueEventsWaitWithBarrier.
-pi_result hip_piEnqueueEventsWait(pi_queue command_queue,
-                                  pi_uint32 num_events_in_wait_list,
-                                  const pi_event *event_wait_list,
-                                  pi_event *event) {
-  return hip_piEnqueueEventsWaitWithBarrier(
-      command_queue, num_events_in_wait_list, event_wait_list, event);
-}
-
-/// Enqueues a wait on the given queue for all specified events.
-/// See \ref enqueueEventWaitWithBarrier
-///
-/// If the events list is empty, the enqueued wait will wait on all previous
-/// events in the queue.
-pi_result hip_piEnqueueEventsWaitWithBarrier(pi_queue command_queue,
-                                             pi_uint32 num_events_in_wait_list,
-                                             const pi_event *event_wait_list,
-                                             pi_event *event) {
-  if (!command_queue) {
-    return PI_ERROR_INVALID_QUEUE;
-  }
-
-  pi_result result;
-
-  try {
-    ScopedContext active(command_queue->get_context());
-    pi_uint32 stream_token;
-    _pi_stream_guard guard;
-    hipStream_t hipStream = command_queue->get_next_compute_stream(
-        num_events_in_wait_list, event_wait_list, guard, &stream_token);
-    {
-      std::lock_guard<std::mutex> guard(command_queue->barrier_mutex_);
-      if (command_queue->barrier_event_ == nullptr) {
-        PI_CHECK_ERROR(hipEventCreate(&command_queue->barrier_event_));
-      }
-      if (num_events_in_wait_list == 0) { //  wait on all work
-        if (command_queue->barrier_tmp_event_ == nullptr) {
-          PI_CHECK_ERROR(hipEventCreate(&command_queue->barrier_tmp_event_));
-        }
-        command_queue->sync_streams(
-            [hipStream,
-             tmp_event = command_queue->barrier_tmp_event_](hipStream_t s) {
-              if (hipStream != s) {
-                PI_CHECK_ERROR(hipEventRecord(tmp_event, s));
-                PI_CHECK_ERROR(hipStreamWaitEvent(hipStream, tmp_event, 0));
-              }
-            });
-      } else { // wait just on given events
-        forLatestEvents(event_wait_list, num_events_in_wait_list,
-                        [hipStream](pi_event event) -> pi_result {
-                          if (event->get_queue()->has_been_synchronized(
-                                  event->get_compute_stream_token())) {
-                            return PI_SUCCESS;
-                          } else {
-                            return PI_CHECK_ERROR(
-                                hipStreamWaitEvent(hipStream, event->get(), 0));
-                          }
-                        });
-      }
-
-      result = PI_CHECK_ERROR(
-          hipEventRecord(command_queue->barrier_event_, hipStream));
-      for (unsigned int i = 0;
-           i < command_queue->compute_applied_barrier_.size(); i++) {
-        command_queue->compute_applied_barrier_[i] = false;
-      }
-      for (unsigned int i = 0;
-           i < command_queue->transfer_applied_barrier_.size(); i++) {
-        command_queue->transfer_applied_barrier_[i] = false;
-      }
-    }
-    if (result != PI_SUCCESS) {
-      return result;
-    }
-
-    if (event) {
-      *event = _pi_event::make_native(PI_COMMAND_TYPE_MARKER, command_queue,
-                                      hipStream, stream_token);
-      (*event)->start();
-      (*event)->record();
-    }
-
-    return PI_SUCCESS;
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_UNKNOWN;
-  }
-}
-
-/// Gets the native HIP handle of a PI event object
-///
-/// \param[in] event The PI event to get the native HIP object of.
-/// \param[out] nativeHandle Set to the native handle of the PI event object.
-///
-/// \return PI_SUCCESS on success. PI_ERROR_INVALID_EVENT if given a user event.
-pi_result hip_piextEventGetNativeHandle(pi_event event,
-                                        pi_native_handle *nativeHandle) {
-  *nativeHandle = reinterpret_cast<pi_native_handle>(event->get());
-  return PI_SUCCESS;
-}
-
-/// Created a PI event object from a HIP event handle.
-/// TODO: Implement this.
-/// NOTE: The created PI object takes ownership of the native handle.
-///
-/// \param[in] nativeHandle The native handle to create PI event object from.
-/// \param[out] event Set to the PI event object created from native handle.
-///
-/// \return TBD
-pi_result hip_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle,
-                                               pi_context context,
-                                               bool ownNativeHandle,
-                                               pi_event *event) {
-  (void)nativeHandle;
-  (void)context;
-  (void)ownNativeHandle;
-  (void)event;
-
-  sycl::detail::pi::die(
-      "Creation of PI event from native handle not implemented");
-  return {};
-}
-
-/// Creates a PI sampler object
-///
-/// \param[in] context The context the sampler is created for.
-/// \param[in] sampler_properties The properties for the sampler.
-/// \param[out] result_sampler Set to the resulting sampler object.
-///
-/// \return PI_SUCCESS on success. PI_ERROR_INVALID_VALUE if given an invalid
-/// property
-///         or if there is multiple of properties from the same category.
-pi_result hip_piSamplerCreate(pi_context context,
-                              const pi_sampler_properties *sampler_properties,
-                              pi_sampler *result_sampler) {
-  std::unique_ptr<_pi_sampler> retImplSampl{new _pi_sampler(context)};
-
-  bool propSeen[3] = {false, false, false};
-  for (size_t i = 0; sampler_properties[i] != 0; i += 2) {
-    switch (sampler_properties[i]) {
-    case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS:
-      if (propSeen[0]) {
-        return PI_ERROR_INVALID_VALUE;
-      }
-      propSeen[0] = true;
-      retImplSampl->props_ |= sampler_properties[i + 1];
-      break;
-    case PI_SAMPLER_PROPERTIES_FILTER_MODE:
-      if (propSeen[1]) {
-        return PI_ERROR_INVALID_VALUE;
-      }
-      propSeen[1] = true;
-      retImplSampl->props_ |=
-          (sampler_properties[i + 1] - PI_SAMPLER_FILTER_MODE_NEAREST) << 1;
-      break;
-    case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE:
-      if (propSeen[2]) {
-        return PI_ERROR_INVALID_VALUE;
-      }
-      propSeen[2] = true;
-      retImplSampl->props_ |=
-          (sampler_properties[i + 1] - PI_SAMPLER_ADDRESSING_MODE_NONE) << 2;
-      break;
-    default:
-      return PI_ERROR_INVALID_VALUE;
-    }
-  }
-
-  if (!propSeen[0]) {
-    retImplSampl->props_ |= PI_TRUE;
-  }
-  // Default filter mode to CL_FILTER_NEAREST
-  if (!propSeen[2]) {
-    retImplSampl->props_ |=
-        (PI_SAMPLER_ADDRESSING_MODE_CLAMP % PI_SAMPLER_ADDRESSING_MODE_NONE)
-        << 2;
-  }
-
-  *result_sampler = retImplSampl.release();
-  return PI_SUCCESS;
-}
-
-/// Gets information from a PI sampler object
-///
-/// \param[in] sampler The sampler to get the information from.
-/// \param[in] param_name The name of the information to get.
-/// \param[in] param_value_size The size of the param_value.
-/// \param[out] param_value Set to information value.
-/// \param[out] param_value_size_ret Set to the size of the information value.
-///
-/// \return PI_SUCCESS on success.
-pi_result hip_piSamplerGetInfo(pi_sampler sampler, pi_sampler_info param_name,
-                               size_t param_value_size, void *param_value,
-                               size_t *param_value_size_ret) {
-  assert(sampler != nullptr);
-
-  switch (param_name) {
-  case PI_SAMPLER_INFO_REFERENCE_COUNT:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   sampler->get_reference_count());
-  case PI_SAMPLER_INFO_CONTEXT:
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   sampler->context_);
-  case PI_SAMPLER_INFO_NORMALIZED_COORDS: {
-    pi_bool norm_coords_prop = static_cast<pi_bool>(sampler->props_ & 0x1);
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   norm_coords_prop);
-  }
-  case PI_SAMPLER_INFO_FILTER_MODE: {
-    pi_sampler_filter_mode filter_prop = static_cast<pi_sampler_filter_mode>(
-        ((sampler->props_ >> 1) & 0x1) + PI_SAMPLER_FILTER_MODE_NEAREST);
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   filter_prop);
-  }
-  case PI_SAMPLER_INFO_ADDRESSING_MODE: {
-    pi_sampler_addressing_mode addressing_prop =
-        static_cast<pi_sampler_addressing_mode>(
-            (sampler->props_ >> 2) + PI_SAMPLER_ADDRESSING_MODE_NONE);
-    return getInfo(param_value_size, param_value, param_value_size_ret,
-                   addressing_prop);
-  }
-  default:
-    __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
-  }
-  return {};
-}
-
-/// Retains a PI sampler object, incrementing its reference count.
-///
-/// \param[in] sampler The sampler to increment the reference count of.
-///
-/// \return PI_SUCCESS.
-pi_result hip_piSamplerRetain(pi_sampler sampler) {
-  assert(sampler != nullptr);
-  sampler->increment_reference_count();
-  return PI_SUCCESS;
-}
-
-/// Releases a PI sampler object, decrementing its reference count. If the
-/// reference count reaches zero, the sampler object is destroyed.
-///
-/// \param[in] sampler The sampler to decrement the reference count of.
-///
-/// \return PI_SUCCESS.
-pi_result hip_piSamplerRelease(pi_sampler sampler) {
-  assert(sampler != nullptr);
-
-  // double delete or someone is messing with the ref count.
-  // either way, cannot safely proceed.
-  sycl::detail::pi::assertion(
-      sampler->get_reference_count() != 0,
-      "Reference count overflow detected in hip_piSamplerRelease.");
-
-  // decrement ref count. If it is 0, delete the sampler.
-  if (sampler->decrement_reference_count() == 0) {
-    delete sampler;
-  }
-
-  return PI_SUCCESS;
-}
-
-/// General 3D memory copy operation.
-/// This function requires the corresponding HIP context to be at the top of
-/// the context stack
-/// If the source and/or destination is on the device, src_ptr and/or dst_ptr
-/// must be a pointer to a hipDevPtr
-static pi_result commonEnqueueMemBufferCopyRect(
-    hipStream_t hip_stream, pi_buff_rect_region region, const void *src_ptr,
-    const hipMemoryType src_type, pi_buff_rect_offset src_offset,
-    size_t src_row_pitch, size_t src_slice_pitch, void *dst_ptr,
-    const hipMemoryType dst_type, pi_buff_rect_offset dst_offset,
-    size_t dst_row_pitch, size_t dst_slice_pitch) {
-
-  assert(region != nullptr);
-  assert(src_offset != nullptr);
-  assert(dst_offset != nullptr);
-
-  assert(src_type == hipMemoryTypeDevice || src_type == hipMemoryTypeHost);
-  assert(dst_type == hipMemoryTypeDevice || dst_type == hipMemoryTypeHost);
-
-  src_row_pitch = (!src_row_pitch) ? region->width_bytes : src_row_pitch;
-  src_slice_pitch = (!src_slice_pitch) ? (region->height_scalar * src_row_pitch)
-                                       : src_slice_pitch;
-  dst_row_pitch = (!dst_row_pitch) ? region->width_bytes : dst_row_pitch;
-  dst_slice_pitch = (!dst_slice_pitch) ? (region->height_scalar * dst_row_pitch)
-                                       : dst_slice_pitch;
-
-  HIP_MEMCPY3D params;
-
-  params.WidthInBytes = region->width_bytes;
-  params.Height = region->height_scalar;
-  params.Depth = region->depth_scalar;
-
-  params.srcMemoryType = src_type;
-  params.srcDevice = src_type == hipMemoryTypeDevice
-                         ? *static_cast<const hipDeviceptr_t *>(src_ptr)
-                         : 0;
-  params.srcHost = src_type == hipMemoryTypeHost ? src_ptr : nullptr;
-  params.srcXInBytes = src_offset->x_bytes;
-  params.srcY = src_offset->y_scalar;
-  params.srcZ = src_offset->z_scalar;
-  params.srcPitch = src_row_pitch;
-  params.srcHeight = src_slice_pitch / src_row_pitch;
-
-  params.dstMemoryType = dst_type;
-  params.dstDevice = dst_type == hipMemoryTypeDevice
-                         ? *reinterpret_cast<hipDeviceptr_t *>(dst_ptr)
-                         : 0;
-  params.dstHost = dst_type == hipMemoryTypeHost ? dst_ptr : nullptr;
-  params.dstXInBytes = dst_offset->x_bytes;
-  params.dstY = dst_offset->y_scalar;
-  params.dstZ = dst_offset->z_scalar;
-  params.dstPitch = dst_row_pitch;
-  params.dstHeight = dst_slice_pitch / dst_row_pitch;
-
-  return PI_CHECK_ERROR(hipDrvMemcpy3DAsync(&params, hip_stream));
-
-  return PI_SUCCESS;
-}
-
-pi_result hip_piEnqueueMemBufferReadRect(
-    pi_queue command_queue, pi_mem buffer, pi_bool blocking_read,
-    pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
-    pi_buff_rect_region region, size_t buffer_row_pitch,
-    size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
-    void *ptr, pi_uint32 num_events_in_wait_list,
-    const pi_event *event_wait_list, pi_event *event) {
-
-  assert(buffer != nullptr);
-  assert(command_queue != nullptr);
-
-  pi_result retErr = PI_SUCCESS;
-  void *devPtr = buffer->mem_.buffer_mem_.get_void();
-  std::unique_ptr<_pi_event> retImplEv{nullptr};
-
-  try {
-    ScopedContext active(command_queue->get_context());
-    hipStream_t hipStream = command_queue->get_next_transfer_stream();
-
-    retErr = enqueueEventsWait(command_queue, hipStream,
-                               num_events_in_wait_list, event_wait_list);
-
-    if (event) {
-      retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
-          PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT, command_queue, hipStream));
-      retImplEv->start();
-    }
-
-    retErr = commonEnqueueMemBufferCopyRect(
-        hipStream, region, &devPtr, hipMemoryTypeDevice, buffer_offset,
-        buffer_row_pitch, buffer_slice_pitch, ptr, hipMemoryTypeHost,
-        host_offset, host_row_pitch, host_slice_pitch);
-
-    if (event) {
-      retErr = retImplEv->record();
-    }
-
-    if (blocking_read) {
-      retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
-    }
-
-    if (event) {
-      *event = retImplEv.release();
-    }
-
-  } catch (pi_result err) {
-    retErr = err;
-  }
-  return retErr;
-}
-
-pi_result hip_piEnqueueMemBufferWriteRect(
-    pi_queue command_queue, pi_mem buffer, pi_bool blocking_write,
-    pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
-    pi_buff_rect_region region, size_t buffer_row_pitch,
-    size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
-    const void *ptr, pi_uint32 num_events_in_wait_list,
-    const pi_event *event_wait_list, pi_event *event) {
-
-  assert(buffer != nullptr);
-  assert(command_queue != nullptr);
-
-  pi_result retErr = PI_SUCCESS;
-  void *devPtr = buffer->mem_.buffer_mem_.get_void();
-  std::unique_ptr<_pi_event> retImplEv{nullptr};
-
-  try {
-    ScopedContext active(command_queue->get_context());
-    hipStream_t hipStream = command_queue->get_next_transfer_stream();
-    retErr = enqueueEventsWait(command_queue, hipStream,
-                               num_events_in_wait_list, event_wait_list);
-
-    if (event) {
-      retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
-          PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT, command_queue, hipStream));
-      retImplEv->start();
-    }
-
-    retErr = commonEnqueueMemBufferCopyRect(
-        hipStream, region, ptr, hipMemoryTypeHost, host_offset, host_row_pitch,
-        host_slice_pitch, &devPtr, hipMemoryTypeDevice, buffer_offset,
-        buffer_row_pitch, buffer_slice_pitch);
-
-    if (event) {
-      retErr = retImplEv->record();
-    }
-
-    if (blocking_write) {
-      retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
-    }
-
-    if (event) {
-      *event = retImplEv.release();
-    }
-
-  } catch (pi_result err) {
-    retErr = err;
-  }
-  return retErr;
-}
-
-pi_result hip_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,
-                                     pi_mem dst_buffer, size_t src_offset,
-                                     size_t dst_offset, size_t size,
-                                     pi_uint32 num_events_in_wait_list,
-                                     const pi_event *event_wait_list,
-                                     pi_event *event) {
-  if (!command_queue) {
-    return PI_ERROR_INVALID_QUEUE;
-  }
-
-  std::unique_ptr<_pi_event> retImplEv{nullptr};
-
-  try {
-    ScopedContext active(command_queue->get_context());
-    pi_result result;
-    auto stream = command_queue->get_next_transfer_stream();
-
-    if (event_wait_list) {
-      result = enqueueEventsWait(command_queue, stream, num_events_in_wait_list,
-                                 event_wait_list);
-    }
-
-    if (event) {
-      retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
-          PI_COMMAND_TYPE_MEM_BUFFER_COPY, command_queue, stream));
-      result = retImplEv->start();
-    }
-
-    auto src = src_buffer->mem_.buffer_mem_.get_with_offset(src_offset);
-    auto dst = dst_buffer->mem_.buffer_mem_.get_with_offset(dst_offset);
-
-    result = PI_CHECK_ERROR(hipMemcpyDtoDAsync(dst, src, size, stream));
-
-    if (event) {
-      result = retImplEv->record();
-      *event = retImplEv.release();
-    }
-
-    return result;
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_UNKNOWN;
-  }
-}
-
-pi_result hip_piEnqueueMemBufferCopyRect(
-    pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer,
-    pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
-    pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
-    size_t dst_row_pitch, size_t dst_slice_pitch,
-    pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
-    pi_event *event) {
-
-  assert(src_buffer != nullptr);
-  assert(dst_buffer != nullptr);
-  assert(command_queue != nullptr);
-
-  pi_result retErr = PI_SUCCESS;
-  void *srcPtr = src_buffer->mem_.buffer_mem_.get_void();
-  void *dstPtr = dst_buffer->mem_.buffer_mem_.get_void();
-  std::unique_ptr<_pi_event> retImplEv{nullptr};
-
-  try {
-    ScopedContext active(command_queue->get_context());
-    hipStream_t hipStream = command_queue->get_next_transfer_stream();
-    retErr = enqueueEventsWait(command_queue, hipStream,
-                               num_events_in_wait_list, event_wait_list);
-
-    if (event) {
-      retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
-          PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT, command_queue, hipStream));
-      retImplEv->start();
-    }
-
-    retErr = commonEnqueueMemBufferCopyRect(
-        hipStream, region, &srcPtr, hipMemoryTypeDevice, src_origin,
-        src_row_pitch, src_slice_pitch, &dstPtr, hipMemoryTypeDevice,
-        dst_origin, dst_row_pitch, dst_slice_pitch);
-
-    if (event) {
-      retImplEv->record();
-      *event = retImplEv.release();
-    }
-
-  } catch (pi_result err) {
-    retErr = err;
-  }
-  return retErr;
-}
-
-pi_result hip_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,
-                                     const void *pattern, size_t pattern_size,
-                                     size_t offset, size_t size,
-                                     pi_uint32 num_events_in_wait_list,
-                                     const pi_event *event_wait_list,
-                                     pi_event *event) {
-  assert(command_queue != nullptr);
-
-  auto args_are_multiples_of_pattern_size =
-      (offset % pattern_size == 0) || (size % pattern_size == 0);
-
-  auto pattern_is_valid = (pattern != nullptr);
-
-  auto pattern_size_is_valid =
-      ((pattern_size & (pattern_size - 1)) == 0) && // is power of two
-      (pattern_size > 0) && (pattern_size <= 128);  // falls within valid range
-
-  assert(args_are_multiples_of_pattern_size && pattern_is_valid &&
-         pattern_size_is_valid);
-  (void)args_are_multiples_of_pattern_size;
-  (void)pattern_is_valid;
-  (void)pattern_size_is_valid;
-
-  std::unique_ptr<_pi_event> retImplEv{nullptr};
-
-  try {
-    ScopedContext active(command_queue->get_context());
-
-    auto stream = command_queue->get_next_transfer_stream();
-    pi_result result;
-    if (event_wait_list) {
-      result = enqueueEventsWait(command_queue, stream, num_events_in_wait_list,
-                                 event_wait_list);
-    }
-
-    if (event) {
-      retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native(
-          PI_COMMAND_TYPE_MEM_BUFFER_FILL, command_queue, stream));
-      result = retImplEv->start();
-    }
-
-    auto dstDevice = buffer->mem_.buffer_mem_.get_with_offset(offset);
-    auto N = size / pattern_size;
-
-    // pattern size in bytes
-    switch (pattern_size) {
-    case 1: {
-      auto value = *static_cast<const uint8_t *>(pattern);
-      result = PI_CHECK_ERROR(hipMemsetD8Async(dstDevice, value, N, stream));
-      break;
-    }
-    case 2: {
-      auto value = *static_cast<const uint16_t *>(pattern);
-      result = PI_CHECK_ERROR(hipMemsetD16Async(dstDevice, value, N, stream));
-      break;
-    }
-    case 4: {
-      auto value = *static_cast<const uint32_t *>(pattern);
-      result = PI_CHECK_ERROR(hipMemsetD32Async(dstDevice, value, N, stream));
-      break;
-    }
-
-    default: {
-      // HIP has no memset functions that allow setting values more than 4
-      // bytes. PI API lets you pass an arbitrary "pattern" to the buffer
-      // fill, which can be more than 4 bytes. We must break up the pattern
-      // into 1 byte values, and set the buffer using multiple strided calls.
-      // The first 4 patterns are set using hipMemsetD32Async then all
-      // subsequent 1 byte patterns are set using hipMemset2DAsync which is
-      // called for each pattern.
-
-      // Calculate the number of patterns, stride, number of times the pattern
-      // needs to be applied, and the number of times the first 32 bit pattern
-      // needs to be applied.
-      auto number_of_steps = pattern_size / sizeof(uint8_t);
-      auto pitch = number_of_steps * sizeof(uint8_t);
-      auto height = size / number_of_steps;
-      auto count_32 = size / sizeof(uint32_t);
-
-      // Get 4-byte chunk of the pattern and call hipMemsetD32Async
-      auto value = *(static_cast<const uint32_t *>(pattern));
-      result =
-          PI_CHECK_ERROR(hipMemsetD32Async(dstDevice, value, count_32, stream));
-      for (auto step = 4u; step < number_of_steps; ++step) {
-        // take 1 byte of the pattern
-        value = *(static_cast<const uint8_t *>(pattern) + step);
-
-        // offset the pointer to the part of the buffer we want to write to
-        auto offset_ptr = reinterpret_cast<void *>(
-            reinterpret_cast<uint8_t *>(dstDevice) + (step * sizeof(uint8_t)));
-
-        // set all of the pattern chunks
-        result = PI_CHECK_ERROR(hipMemset2DAsync(
-            offset_ptr, pitch, value, sizeof(uint8_t), height, stream));
-      }
-      break;
-    }
-    }
-
-    if (event) {
-      result = retImplEv->record();
-      *event = retImplEv.release();
-    }
-
-    return result;
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_UNKNOWN;
-  }
-}
-
-static size_t imageElementByteSize(hipArray_Format array_format) {
-  switch (array_format) {
-  case HIP_AD_FORMAT_UNSIGNED_INT8:
-  case HIP_AD_FORMAT_SIGNED_INT8:
-    return 1;
-  case HIP_AD_FORMAT_UNSIGNED_INT16:
-  case HIP_AD_FORMAT_SIGNED_INT16:
-  case HIP_AD_FORMAT_HALF:
-    return 2;
-  case HIP_AD_FORMAT_UNSIGNED_INT32:
-  case HIP_AD_FORMAT_SIGNED_INT32:
-  case HIP_AD_FORMAT_FLOAT:
-    return 4;
-  default:
-    return 0;
-  }
-  sycl::detail::pi::die("Invalid iamge format.");
-  return 0;
-}
-
-/// General ND memory copy operation for images (where N > 1).
-/// This function requires the corresponding HIP context to be at the top of
-/// the context stack
-/// If the source and/or destination is an array, src_ptr and/or dst_ptr
-/// must be a pointer to a hipArray
-
-static pi_result commonEnqueueMemImageNDCopy(
-    hipStream_t hip_stream, pi_mem_type img_type, const size_t *region,
-    const void *src_ptr, const hipMemoryType src_type, const size_t *src_offset,
-    void *dst_ptr, const hipMemoryType dst_type, const size_t *dst_offset) {
-  assert(region != nullptr);
-
-  assert(src_type == hipMemoryTypeArray || src_type == hipMemoryTypeHost);
-  assert(dst_type == hipMemoryTypeArray || dst_type == hipMemoryTypeHost);
-
-  if (img_type == PI_MEM_TYPE_IMAGE2D) {
-    hip_Memcpy2D cpyDesc;
-    memset(&cpyDesc, 0, sizeof(cpyDesc));
-    cpyDesc.srcMemoryType = src_type;
-    if (src_type == hipMemoryTypeArray) {
-      cpyDesc.srcArray =
-          reinterpret_cast<hipCUarray>(const_cast<void *>(src_ptr));
-      cpyDesc.srcXInBytes = src_offset[0];
-      cpyDesc.srcY = src_offset[1];
-    } else {
-      cpyDesc.srcHost = src_ptr;
-    }
-    cpyDesc.dstMemoryType = dst_type;
-    if (dst_type == hipMemoryTypeArray) {
-      cpyDesc.dstArray =
-          reinterpret_cast<hipCUarray>(const_cast<void *>(dst_ptr));
-      cpyDesc.dstXInBytes = dst_offset[0];
-      cpyDesc.dstY = dst_offset[1];
-    } else {
-      cpyDesc.dstHost = dst_ptr;
-    }
-    cpyDesc.WidthInBytes = region[0];
-    cpyDesc.Height = region[1];
-    return PI_CHECK_ERROR(hipMemcpyParam2DAsync(&cpyDesc, hip_stream));
-  }
-
-  if (img_type == PI_MEM_TYPE_IMAGE3D) {
-
-    HIP_MEMCPY3D cpyDesc;
-    memset(&cpyDesc, 0, sizeof(cpyDesc));
-    cpyDesc.srcMemoryType = src_type;
-    if (src_type == hipMemoryTypeArray) {
-      cpyDesc.srcArray =
-          reinterpret_cast<hipCUarray>(const_cast<void *>(src_ptr));
-      cpyDesc.srcXInBytes = src_offset[0];
-      cpyDesc.srcY = src_offset[1];
-      cpyDesc.srcZ = src_offset[2];
-    } else {
-      cpyDesc.srcHost = src_ptr;
-    }
-    cpyDesc.dstMemoryType = dst_type;
-    if (dst_type == hipMemoryTypeArray) {
-      cpyDesc.dstArray = reinterpret_cast<hipCUarray>(dst_ptr);
-      cpyDesc.dstXInBytes = dst_offset[0];
-      cpyDesc.dstY = dst_offset[1];
-      cpyDesc.dstZ = dst_offset[2];
-    } else {
-      cpyDesc.dstHost = dst_ptr;
-    }
-    cpyDesc.WidthInBytes = region[0];
-    cpyDesc.Height = region[1];
-    cpyDesc.Depth = region[2];
-    return PI_CHECK_ERROR(hipDrvMemcpy3DAsync(&cpyDesc, hip_stream));
-    return PI_ERROR_UNKNOWN;
-  }
-
-  return PI_ERROR_INVALID_VALUE;
-}
-
-pi_result hip_piEnqueueMemImageRead(pi_queue command_queue, pi_mem image,
-                                    pi_bool blocking_read, const size_t *origin,
-                                    const size_t *region, size_t row_pitch,
-                                    size_t slice_pitch, void *ptr,
-                                    pi_uint32 num_events_in_wait_list,
-                                    const pi_event *event_wait_list,
-                                    pi_event *event) {
-  (void)row_pitch;
-  (void)slice_pitch;
-
-  assert(command_queue != nullptr);
-  assert(image != nullptr);
-  assert(image->mem_type_ == _pi_mem::mem_type::surface);
-
-  pi_result retErr = PI_SUCCESS;
-
-  try {
-    ScopedContext active(command_queue->get_context());
-    hipStream_t hipStream = command_queue->get_next_transfer_stream();
-
-    if (event_wait_list) {
-      retErr = enqueueEventsWait(command_queue, hipStream,
-                                 num_events_in_wait_list, event_wait_list);
-    }
-
-    hipArray *array = image->mem_.surface_mem_.get_array();
-
-    hipArray_Format Format;
-    size_t NumChannels;
-    getArrayDesc(array, Format, NumChannels);
-
-    int elementByteSize = imageElementByteSize(Format);
-
-    size_t byteOffsetX = origin[0] * elementByteSize * NumChannels;
-    size_t bytesToCopy = elementByteSize * NumChannels * region[0];
-
-    pi_mem_type imgType = image->mem_.surface_mem_.get_image_type();
-
-    size_t adjustedRegion[3] = {bytesToCopy, region[1], region[2]};
-    size_t srcOffset[3] = {byteOffsetX, origin[1], origin[2]};
-
-    retErr = commonEnqueueMemImageNDCopy(hipStream, imgType, adjustedRegion,
-                                         array, hipMemoryTypeArray, srcOffset,
-                                         ptr, hipMemoryTypeHost, nullptr);
-
-    if (retErr != PI_SUCCESS) {
-      return retErr;
-    }
-
-    if (event) {
-      auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_IMAGE_READ,
-                                              command_queue, hipStream);
-      new_event->record();
-      *event = new_event;
-    }
-
-    if (blocking_read) {
-      retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
-    }
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_UNKNOWN;
-  }
-  return PI_SUCCESS;
-  return retErr;
-}
-
-pi_result hip_piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image,
-                                     pi_bool blocking_write,
-                                     const size_t *origin, const size_t *region,
-                                     size_t input_row_pitch,
-                                     size_t input_slice_pitch, const void *ptr,
-                                     pi_uint32 num_events_in_wait_list,
-                                     const pi_event *event_wait_list,
-                                     pi_event *event) {
-  (void)blocking_write;
-  (void)input_row_pitch;
-  (void)input_slice_pitch;
-  assert(command_queue != nullptr);
-  assert(image != nullptr);
-  assert(image->mem_type_ == _pi_mem::mem_type::surface);
-
-  pi_result retErr = PI_SUCCESS;
-
-  try {
-    ScopedContext active(command_queue->get_context());
-    hipStream_t hipStream = command_queue->get_next_transfer_stream();
-
-    if (event_wait_list) {
-      retErr = enqueueEventsWait(command_queue, hipStream,
-                                 num_events_in_wait_list, event_wait_list);
-    }
-
-    hipArray *array = image->mem_.surface_mem_.get_array();
-
-    hipArray_Format Format;
-    size_t NumChannels;
-    getArrayDesc(array, Format, NumChannels);
-
-    int elementByteSize = imageElementByteSize(Format);
-
-    size_t byteOffsetX = origin[0] * elementByteSize * NumChannels;
-    size_t bytesToCopy = elementByteSize * NumChannels * region[0];
-
-    pi_mem_type imgType = image->mem_.surface_mem_.get_image_type();
-
-    size_t adjustedRegion[3] = {bytesToCopy, region[1], region[2]};
-    size_t dstOffset[3] = {byteOffsetX, origin[1], origin[2]};
-
-    retErr = commonEnqueueMemImageNDCopy(hipStream, imgType, adjustedRegion,
-                                         ptr, hipMemoryTypeHost, nullptr, array,
-                                         hipMemoryTypeArray, dstOffset);
-
-    if (retErr != PI_SUCCESS) {
-      return retErr;
-    }
-
-    if (event) {
-      auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_IMAGE_WRITE,
-                                              command_queue, hipStream);
-      new_event->record();
-      *event = new_event;
-    }
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_UNKNOWN;
-  }
-
-  return PI_SUCCESS;
-
-  return retErr;
-}
-
-pi_result hip_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image,
-                                    pi_mem dst_image, const size_t *src_origin,
-                                    const size_t *dst_origin,
-                                    const size_t *region,
-                                    pi_uint32 num_events_in_wait_list,
-                                    const pi_event *event_wait_list,
-                                    pi_event *event) {
-
-  assert(src_image->mem_type_ == _pi_mem::mem_type::surface);
-  assert(dst_image->mem_type_ == _pi_mem::mem_type::surface);
-  assert(src_image->mem_.surface_mem_.get_image_type() ==
-         dst_image->mem_.surface_mem_.get_image_type());
-
-  pi_result retErr = PI_SUCCESS;
-
-  try {
-    ScopedContext active(command_queue->get_context());
-    hipStream_t hipStream = command_queue->get_next_transfer_stream();
-    if (event_wait_list) {
-      retErr = enqueueEventsWait(command_queue, hipStream,
-                                 num_events_in_wait_list, event_wait_list);
-    }
-
-    hipArray *srcArray = src_image->mem_.surface_mem_.get_array();
-    hipArray_Format srcFormat;
-    size_t srcNumChannels;
-    getArrayDesc(srcArray, srcFormat, srcNumChannels);
-
-    hipArray *dstArray = dst_image->mem_.surface_mem_.get_array();
-    hipArray_Format dstFormat;
-    size_t dstNumChannels;
-    getArrayDesc(dstArray, dstFormat, dstNumChannels);
-
-    assert(srcFormat == dstFormat);
-    assert(srcNumChannels == dstNumChannels);
-
-    int elementByteSize = imageElementByteSize(srcFormat);
-
-    size_t dstByteOffsetX = dst_origin[0] * elementByteSize * srcNumChannels;
-    size_t srcByteOffsetX = src_origin[0] * elementByteSize * dstNumChannels;
-    size_t bytesToCopy = elementByteSize * srcNumChannels * region[0];
-
-    pi_mem_type imgType = src_image->mem_.surface_mem_.get_image_type();
-
-    size_t adjustedRegion[3] = {bytesToCopy, region[1], region[2]};
-    size_t srcOffset[3] = {srcByteOffsetX, src_origin[1], src_origin[2]};
-    size_t dstOffset[3] = {dstByteOffsetX, dst_origin[1], dst_origin[2]};
-
-    retErr = commonEnqueueMemImageNDCopy(
-        hipStream, imgType, adjustedRegion, srcArray, hipMemoryTypeArray,
-        srcOffset, dstArray, hipMemoryTypeArray, dstOffset);
-
-    if (retErr != PI_SUCCESS) {
-      return retErr;
-    }
-
-    if (event) {
-      auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_IMAGE_COPY,
-                                              command_queue, hipStream);
-      new_event->record();
-      *event = new_event;
-    }
-  } catch (pi_result err) {
-    return err;
-  } catch (...) {
-    return PI_ERROR_UNKNOWN;
-  }
-
-  return PI_SUCCESS;
-  return retErr;
-}
-
-/// \TODO Not implemented in HIP.
-pi_result hip_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image,
-                                    const void *fill_color,
-                                    const size_t *origin, const size_t *region,
-                                    pi_uint32 num_events_in_wait_list,
-                                    const pi_event *event_wait_list,
-                                    pi_event *event) {
-  (void)command_queue;
-  (void)image;
-  (void)fill_color;
-  (void)origin;
-  (void)region;
-  (void)num_events_in_wait_list;
-  (void)event_wait_list;
-  (void)event;
-
-  sycl::detail::pi::die("hip_piEnqueueMemImageFill not implemented");
-  return {};
-}
-
-/// Implements mapping on the host using a BufferRead operation.
-/// Mapped pointers are stored in the pi_mem object.
-/// If the buffer uses pinned host memory a pointer to that memory is returned
-/// and no read operation is done.
-///
-pi_result hip_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
-                                    pi_bool blocking_map,
-                                    pi_map_flags map_flags, size_t offset,
-                                    size_t size,
-                                    pi_uint32 num_events_in_wait_list,
-                                    const pi_event *event_wait_list,
-                                    pi_event *event, void **ret_map) {
-  assert(ret_map != nullptr);
-  assert(command_queue != nullptr);
-  assert(buffer != nullptr);
-  assert(buffer->mem_type_ == _pi_mem::mem_type::buffer);
-
-  pi_result ret_err = PI_ERROR_INVALID_OPERATION;
-  const bool is_pinned = buffer->mem_.buffer_mem_.allocMode_ ==
-                         _pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr;
-
-  // Currently no support for overlapping regions
-  if (buffer->mem_.buffer_mem_.get_map_ptr() != nullptr) {
-    return ret_err;
-  }
-
-  // Allocate a pointer in the host to store the mapped information
-  auto hostPtr = buffer->mem_.buffer_mem_.map_to_ptr(offset, map_flags);
-  *ret_map = buffer->mem_.buffer_mem_.get_map_ptr();
-  if (hostPtr) {
-    ret_err = PI_SUCCESS;
-  }
-
-  if (!is_pinned && ((map_flags & PI_MAP_READ) || (map_flags & PI_MAP_WRITE))) {
-    // Pinned host memory is already on host so it doesn't need to be read.
-    ret_err = hip_piEnqueueMemBufferRead(
-        command_queue, buffer, blocking_map, offset, size, hostPtr,
-        num_events_in_wait_list, event_wait_list, event);
-  } else {
-    ScopedContext active(command_queue->get_context());
-
-    if (is_pinned) {
-      ret_err = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
-                                        event_wait_list, nullptr);
-    }
-
-    if (event) {
-      try {
-        *event = _pi_event::make_native(
-            PI_COMMAND_TYPE_MEM_BUFFER_MAP, command_queue,
-            command_queue->get_next_transfer_stream());
-        (*event)->start();
-        (*event)->record();
-      } catch (pi_result error) {
-        ret_err = error;
-      }
-    }
-  }
-
-  return ret_err;
-}
-
-/// Implements the unmap from the host, using a BufferWrite operation.
-/// Requires the mapped pointer to be already registered in the given memobj.
-/// If memobj uses pinned host memory, this will not do a write.
-///
-pi_result hip_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
-                                void *mapped_ptr,
-                                pi_uint32 num_events_in_wait_list,
-                                const pi_event *event_wait_list,
-                                pi_event *event) {
-  pi_result ret_err = PI_SUCCESS;
-
-  assert(command_queue != nullptr);
-  assert(mapped_ptr != nullptr);
-  assert(memobj != nullptr);
-  assert(memobj->mem_type_ == _pi_mem::mem_type::buffer);
-  assert(memobj->mem_.buffer_mem_.get_map_ptr() != nullptr);
-  assert(memobj->mem_.buffer_mem_.get_map_ptr() == mapped_ptr);
-
-  const bool is_pinned = memobj->mem_.buffer_mem_.allocMode_ ==
-                         _pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr;
-
-  if (!is_pinned &&
-      ((memobj->mem_.buffer_mem_.get_map_flags() & PI_MAP_WRITE) ||
-       (memobj->mem_.buffer_mem_.get_map_flags() &
-        PI_MAP_WRITE_INVALIDATE_REGION))) {
-    // Pinned host memory is only on host so it doesn't need to be written to.
-    ret_err = hip_piEnqueueMemBufferWrite(
-        command_queue, memobj, true,
-        memobj->mem_.buffer_mem_.get_map_offset(mapped_ptr),
-        memobj->mem_.buffer_mem_.get_size(), mapped_ptr,
-        num_events_in_wait_list, event_wait_list, event);
-  } else {
-    ScopedContext active(command_queue->get_context());
-
-    if (is_pinned) {
-      ret_err = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list,
-                                        event_wait_list, nullptr);
-    }
-
-    if (event) {
-      try {
-        *event = _pi_event::make_native(
-            PI_COMMAND_TYPE_MEM_BUFFER_UNMAP, command_queue,
-            command_queue->get_next_transfer_stream());
-        (*event)->start();
-        (*event)->record();
-      } catch (pi_result error) {
-        ret_err = error;
-      }
-    }
-  }
-
-  memobj->mem_.buffer_mem_.unmap(mapped_ptr);
-  return ret_err;
-}
-
-/// USM: Implements USM Host allocations using HIP Pinned Memory
-///
-pi_result
-hip_piextUSMHostAlloc(void **result_ptr, pi_context context,
-                      [[maybe_unused]] pi_usm_mem_properties *properties,
-                      size_t size, [[maybe_unused]] pi_uint32 alignment) {
-  assert(result_ptr != nullptr);
-  assert(context != nullptr);
-  assert(properties == nullptr || *properties == 0);
-  pi_result result = PI_SUCCESS;
-  try {
-    ScopedContext active(context);
-    result = PI_CHECK_ERROR(hipHostMalloc(result_ptr, size));
-  } catch (pi_result error) {
-    result = error;
-  }
-
-  assert(alignment == 0 ||
-         (result == PI_SUCCESS &&
-          reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
-  return result;
-}
-
-/// USM: Implements USM device allocations using a normal HIP device pointer
-///
-pi_result
-hip_piextUSMDeviceAlloc(void **result_ptr, pi_context context,
-                        [[maybe_unused]] pi_device device,
-                        [[maybe_unused]] pi_usm_mem_properties *properties,
-                        size_t size, [[maybe_unused]] pi_uint32 alignment) {
-  assert(result_ptr != nullptr);
-  assert(context != nullptr);
-  assert(device != nullptr);
-  assert(properties == nullptr || *properties == 0);
-  pi_result result = PI_SUCCESS;
-  try {
-    ScopedContext active(context);
-    result = PI_CHECK_ERROR(hipMalloc(result_ptr, size));
-  } catch (pi_result error) {
-    result = error;
-  }
-
-  assert(alignment == 0 ||
-         (result == PI_SUCCESS &&
-          reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
-  return result;
-}
-
-/// USM: Implements USM Shared allocations using HIP Managed Memory
-///
-pi_result
-hip_piextUSMSharedAlloc(void **result_ptr, pi_context context,
-                        [[maybe_unused]] pi_device device,
-                        [[maybe_unused]] pi_usm_mem_properties *properties,
-                        size_t size, [[maybe_unused]] pi_uint32 alignment) {
-  assert(result_ptr != nullptr);
-  assert(context != nullptr);
-  assert(device != nullptr);
-  assert(properties == nullptr || *properties == 0);
-  pi_result result = PI_SUCCESS;
-  try {
-    ScopedContext active(context);
-    result =
-        PI_CHECK_ERROR(hipMallocManaged(result_ptr, size, hipMemAttachGlobal));
-  } catch (pi_result error) {
-    result = error;
-  }
-
-  assert(alignment == 0 ||
-         (result == PI_SUCCESS &&
-          reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
-  return result;
-}
-
-/// USM: Frees the given USM pointer associated with the context.
-///
-pi_result hip_piextUSMFree(pi_context context, void *ptr) {
-
-  assert(context != nullptr);
-  pi_result result = PI_SUCCESS;
-  try {
-    ScopedContext active(context);
-    unsigned int type;
-    hipPointerAttribute_t hipPointerAttributeType;
-    result =
-        PI_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, ptr));
-    type = hipPointerAttributeType.memoryType;
-    assert(type == hipMemoryTypeDevice or type == hipMemoryTypeHost);
-    if (type == hipMemoryTypeDevice) {
-      result = PI_CHECK_ERROR(hipFree(ptr));
-    }
-    if (type == hipMemoryTypeHost) {
-      result = PI_CHECK_ERROR(hipFreeHost(ptr));
-    }
-  } catch (pi_result error) {
-    result = error;
-  }
-  return result;
-}
-
-pi_result hip_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value,
-                                    size_t count,
-                                    pi_uint32 num_events_in_waitlist,
-                                    const pi_event *events_waitlist,
-                                    pi_event *event) {
-
-  assert(queue != nullptr);
-  assert(ptr != nullptr);
-  pi_result result = PI_SUCCESS;
-  std::unique_ptr<_pi_event> event_ptr{nullptr};
-
-  try {
-    ScopedContext active(queue->get_context());
-    pi_uint32 stream_token;
-    _pi_stream_guard guard;
-    hipStream_t hipStream = queue->get_next_compute_stream(
-        num_events_in_waitlist, events_waitlist, guard, &stream_token);
-    result = enqueueEventsWait(queue, hipStream, num_events_in_waitlist,
-                               events_waitlist);
-    if (event) {
-      event_ptr = std::unique_ptr<_pi_event>(_pi_event::make_native(
-          PI_COMMAND_TYPE_MEM_BUFFER_FILL, queue, hipStream, stream_token));
-      event_ptr->start();
-    }
-    result = PI_CHECK_ERROR(
-        hipMemsetD8Async(reinterpret_cast<hipDeviceptr_t>(ptr),
-                         (unsigned char)value & 0xFF, count, hipStream));
-    if (event) {
-      result = event_ptr->record();
-      *event = event_ptr.release();
-    }
-  } catch (pi_result err) {
-    result = err;
-  }
-
-  return result;
-}
-
-pi_result hip_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking,
-                                    void *dst_ptr, const void *src_ptr,
-                                    size_t size,
-                                    pi_uint32 num_events_in_waitlist,
-                                    const pi_event *events_waitlist,
-                                    pi_event *event) {
-  assert(queue != nullptr);
-  assert(dst_ptr != nullptr);
-  assert(src_ptr != nullptr);
-  pi_result result = PI_SUCCESS;
-
-  std::unique_ptr<_pi_event> event_ptr{nullptr};
-
-  try {
-    ScopedContext active(queue->get_context());
-    hipStream_t hipStream = queue->get_next_transfer_stream();
-    result = enqueueEventsWait(queue, hipStream, num_events_in_waitlist,
-                               events_waitlist);
-    if (event) {
-      event_ptr = std::unique_ptr<_pi_event>(_pi_event::make_native(
-          PI_COMMAND_TYPE_MEM_BUFFER_COPY, queue, hipStream));
-      event_ptr->start();
-    }
-    result = PI_CHECK_ERROR(
-        hipMemcpyAsync(dst_ptr, src_ptr, size, hipMemcpyDefault, hipStream));
-    if (event) {
-      result = event_ptr->record();
-    }
-    if (blocking) {
-      result = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
-    }
-    if (event) {
-      *event = event_ptr.release();
-    }
-  } catch (pi_result err) {
-    result = err;
-  }
-  return result;
-}
-
-pi_result hip_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr,
-                                      size_t size, pi_usm_migration_flags flags,
-                                      pi_uint32 num_events_in_waitlist,
-                                      const pi_event *events_waitlist,
-                                      pi_event *event) {
-
-  // flags is currently unused so fail if set
-  if (flags != 0)
-    return PI_ERROR_INVALID_VALUE;
-  assert(queue != nullptr);
-  assert(ptr != nullptr);
-  pi_result result = PI_SUCCESS;
-  std::unique_ptr<_pi_event> event_ptr{nullptr};
-
-  try {
-    ScopedContext active(queue->get_context());
-    hipStream_t hipStream = queue->get_next_transfer_stream();
-    result = enqueueEventsWait(queue, hipStream, num_events_in_waitlist,
-                               events_waitlist);
-    if (event) {
-      event_ptr = std::unique_ptr<_pi_event>(_pi_event::make_native(
-          PI_COMMAND_TYPE_MEM_BUFFER_COPY, queue, hipStream));
-      event_ptr->start();
-    }
-    result = PI_CHECK_ERROR(hipMemPrefetchAsync(
-        ptr, size, queue->get_context()->get_device()->get(), hipStream));
-    if (event) {
-      result = event_ptr->record();
-      *event = event_ptr.release();
-    }
-  } catch (pi_result err) {
-    result = err;
-  }
-
-  return result;
-}
-
-/// USM: memadvise API to govern behavior of automatic migration mechanisms
-pi_result hip_piextUSMEnqueueMemAdvise(pi_queue queue,
-                                       [[maybe_unused]] const void *ptr,
-                                       size_t length, pi_mem_advice advice,
-                                       pi_event *event) {
-  (void)length;
-  (void)advice;
-
-  assert(queue != nullptr);
-  assert(ptr != nullptr);
-  // TODO implement a mapping to hipMemAdvise once the expected behaviour
-  // of piextUSMEnqueueMemAdvise is detailed in the USM extension
-  return hip_piEnqueueEventsWait(queue, 0, nullptr, event);
-
-  return PI_SUCCESS;
-}
-
-// TODO: Implement this. Remember to return true for
-//       PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT when it is implemented.
-pi_result hip_piextUSMEnqueueFill2D(pi_queue, void *, size_t, size_t,
-                                    const void *, size_t, size_t, pi_uint32,
-                                    const pi_event *, pi_event *) {
-  sycl::detail::pi::die("piextUSMEnqueueFill2D: not implemented");
-  return {};
-}
-
-// TODO: Implement this. Remember to return true for
-//       PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT when it is implemented.
-pi_result hip_piextUSMEnqueueMemset2D(pi_queue, void *, size_t, int, size_t,
-                                      size_t, pi_uint32, const pi_event *,
-                                      pi_event *) {
-  sycl::detail::pi::die("hip_piextUSMEnqueueMemset2D: not implemented");
-  return {};
-}
-
-/// 2D Memcpy API
-///
-/// \param queue is the queue to submit to
-/// \param blocking is whether this operation should block the host
-/// \param dst_ptr is the location the data will be copied
-/// \param dst_pitch is the total width of the destination memory including
-/// padding
-/// \param src_ptr is the data to be copied
-/// \param dst_pitch is the total width of the source memory including padding
-/// \param width is width in bytes of each row to be copied
-/// \param height is height the columns to be copied
-/// \param num_events_in_waitlist is the number of events to wait on
-/// \param events_waitlist is an array of events to wait on
-/// \param event is the event that represents this operation
-pi_result hip_piextUSMEnqueueMemcpy2D(pi_queue queue, pi_bool blocking,
-                                      void *dst_ptr, size_t dst_pitch,
-                                      const void *src_ptr, size_t src_pitch,
-                                      size_t width, size_t height,
-                                      pi_uint32 num_events_in_wait_list,
-                                      const pi_event *event_wait_list,
-                                      pi_event *event) {
-  assert(queue != nullptr);
-
-  pi_result result = PI_SUCCESS;
-
-  try {
-    ScopedContext active(queue->get_context());
-    hipStream_t hipStream = queue->get_next_transfer_stream();
-    result = enqueueEventsWait(queue, hipStream, num_events_in_wait_list,
-                               event_wait_list);
-    if (event) {
-      (*event) = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT,
-                                        queue, hipStream);
-      (*event)->start();
-    }
-
-    result = PI_CHECK_ERROR(hipMemcpy2DAsync(dst_ptr, dst_pitch, src_ptr,
-                                             src_pitch, width, height,
-                                             hipMemcpyDefault, hipStream));
-
-    if (event) {
-      (*event)->record();
-    }
-    if (blocking) {
-      result = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
-    }
-  } catch (pi_result err) {
-    result = err;
-  }
-
-  return result;
-}
-
-/// API to query information about USM allocated pointers
-/// Valid Queries:
-///   PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value
-///   PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if
-///                         the queried pointer fell inside an allocation.
-///                         Result must fit in void *
-///   PI_MEM_ALLOC_SIZE returns how big the queried pointer's
-///                     allocation is in bytes. Result is a size_t.
-///   PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against
-///
-/// \param context is the pi_context
-/// \param ptr is the pointer to query
-/// \param param_name is the type of query to perform
-/// \param param_value_size is the size of the result in bytes
-/// \param param_value is the result
-/// \param param_value_ret is how many bytes were written
-pi_result hip_piextUSMGetMemAllocInfo(pi_context context, const void *ptr,
-                                      pi_mem_alloc_info param_name,
-                                      size_t param_value_size,
-                                      void *param_value,
-                                      size_t *param_value_size_ret) {
-
-  assert(context != nullptr);
-  assert(ptr != nullptr);
-  pi_result result = PI_SUCCESS;
-  hipPointerAttribute_t hipPointerAttributeType;
-
-  try {
-    ScopedContext active(context);
-    switch (param_name) {
-    case PI_MEM_ALLOC_TYPE: {
-      unsigned int value;
-      // do not throw if hipPointerGetAttribute returns hipErrorInvalidValue
-      hipError_t ret = hipPointerGetAttributes(&hipPointerAttributeType, ptr);
-      if (ret == hipErrorInvalidValue) {
-        // pointer not known to the HIP subsystem
-        return getInfo(param_value_size, param_value, param_value_size_ret,
-                       PI_MEM_TYPE_UNKNOWN);
-      }
-      result = check_error(ret, __func__, __LINE__ - 5, __FILE__);
-      value = hipPointerAttributeType.isManaged;
-      if (value) {
-        // pointer to managed memory
-        return getInfo(param_value_size, param_value, param_value_size_ret,
-                       PI_MEM_TYPE_SHARED);
-      }
-      result = PI_CHECK_ERROR(
-          hipPointerGetAttributes(&hipPointerAttributeType, ptr));
-      value = hipPointerAttributeType.memoryType;
-      assert(value == hipMemoryTypeDevice or value == hipMemoryTypeHost);
-      if (value == hipMemoryTypeDevice) {
-        // pointer to device memory
-        return getInfo(param_value_size, param_value, param_value_size_ret,
-                       PI_MEM_TYPE_DEVICE);
-      }
-      if (value == hipMemoryTypeHost) {
-        // pointer to host memory
-        return getInfo(param_value_size, param_value, param_value_size_ret,
-                       PI_MEM_TYPE_HOST);
-      }
-      // should never get here
-      __builtin_unreachable();
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     PI_MEM_TYPE_UNKNOWN);
-    }
-    case PI_MEM_ALLOC_BASE_PTR: {
-      return PI_ERROR_INVALID_VALUE;
-    }
-    case PI_MEM_ALLOC_SIZE: {
-      return PI_ERROR_INVALID_VALUE;
-    }
-
-    case PI_MEM_ALLOC_DEVICE: {
-      // get device index associated with this pointer
-      result = PI_CHECK_ERROR(
-          hipPointerGetAttributes(&hipPointerAttributeType, ptr));
-      int device_idx = hipPointerAttributeType.device;
-
-      // currently each device is in its own platform, so find the platform at
-      // the same index
-      std::vector<pi_platform> platforms;
-      platforms.resize(device_idx + 1);
-      result = hip_piPlatformsGet(device_idx + 1, platforms.data(), nullptr);
-
-      // get the device from the platform
-      pi_device device = platforms[device_idx]->devices_[0].get();
-      return getInfo(param_value_size, param_value, param_value_size_ret,
-                     device);
-    }
-    }
-  } catch (pi_result error) {
-    result = error;
-  }
-
-  return result;
-}
-
-pi_result hip_piextEnqueueDeviceGlobalVariableWrite(
-    pi_queue queue, pi_program program, const char *name,
-    pi_bool blocking_write, size_t count, size_t offset, const void *src,
-    pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
-    pi_event *event) {
-  (void)queue;
-  (void)program;
-  (void)name;
-  (void)blocking_write;
-  (void)count;
-  (void)offset;
-  (void)src;
-  (void)num_events_in_wait_list;
-  (void)event_wait_list;
-  (void)event;
-
-  sycl::detail::pi::die(
-      "hip_piextEnqueueDeviceGlobalVariableWrite not implemented");
-  return {};
-}
-
-pi_result hip_piextEnqueueDeviceGlobalVariableRead(
-    pi_queue queue, pi_program program, const char *name, pi_bool blocking_read,
-    size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list,
-    const pi_event *event_wait_list, pi_event *event) {
-  (void)queue;
-  (void)program;
-  (void)name;
-  (void)blocking_read;
-  (void)count;
-  (void)offset;
-  (void)dst;
-  (void)num_events_in_wait_list;
-  (void)event_wait_list;
-  (void)event;
-
-  sycl::detail::pi::die(
-      "hip_piextEnqueueDeviceGlobalVariableRead not implemented");
-}
-
-/// Host Pipes
-pi_result hip_piextEnqueueReadHostPipe(pi_queue queue, pi_program program,
-                                       const char *pipe_symbol,
-                                       pi_bool blocking, void *ptr, size_t size,
-                                       pi_uint32 num_events_in_waitlist,
-                                       const pi_event *events_waitlist,
-                                       pi_event *event) {
-  (void)queue;
-  (void)program;
-  (void)pipe_symbol;
-  (void)blocking;
-  (void)ptr;
-  (void)size;
-  (void)num_events_in_waitlist;
-  (void)events_waitlist;
-  (void)event;
-
-  sycl::detail::pi::die("hip_piextEnqueueReadHostPipe not implemented");
-  return {};
-}
-
-pi_result hip_piextEnqueueWriteHostPipe(
-    pi_queue queue, pi_program program, const char *pipe_symbol,
-    pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist,
-    const pi_event *events_waitlist, pi_event *event) {
-  (void)queue;
-  (void)program;
-  (void)pipe_symbol;
-  (void)blocking;
-  (void)ptr;
-  (void)size;
-  (void)num_events_in_waitlist;
-  (void)events_waitlist;
-  (void)event;
-
-  sycl::detail::pi::die("hip_piextEnqueueWriteHostPipe not implemented");
-  return {};
-}
-pi_result
-hip_piextCommandBufferCreate(pi_context context, pi_device device,
-                             const pi_ext_command_buffer_desc *desc,
-                             pi_ext_command_buffer *ret_command_buffer) {
-  (void)context;
-  (void)device;
-  (void)desc;
-  (void)ret_command_buffer;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextCommandBufferRetain(pi_ext_command_buffer command_buffer) {
-  (void)command_buffer;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextCommandBufferRelease(pi_ext_command_buffer command_buffer) {
-  (void)command_buffer;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextCommandBufferFinalize(pi_ext_command_buffer command_buffer) {
-  (void)command_buffer;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextCommandBufferNDRangeKernel(
-    pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim,
-    const size_t *global_work_offset, const size_t *global_work_size,
-    const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list,
-    const pi_ext_sync_point *sync_point_wait_list,
-    pi_ext_sync_point *sync_point) {
-  (void)command_buffer;
-  (void)kernel;
-  (void)work_dim;
-  (void)global_work_offset;
-  (void)global_work_size;
-  (void)local_work_size;
-  (void)num_sync_points_in_wait_list;
-  (void)sync_point_wait_list;
-  (void)sync_point;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result
-hip_piextCommandBufferMemcpyUSM(pi_ext_command_buffer command_buffer,
-                                void *dst_ptr, const void *src_ptr, size_t size,
-                                pi_uint32 num_sync_points_in_wait_list,
-                                const pi_ext_sync_point *sync_point_wait_list,
-                                pi_ext_sync_point *sync_point) {
-  (void)command_buffer;
-  (void)dst_ptr;
-  (void)src_ptr;
-  (void)size;
-  (void)num_sync_points_in_wait_list;
-  (void)sync_point_wait_list;
-  (void)sync_point;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextCommandBufferMemBufferCopy(
-    pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer,
-    size_t src_offset, size_t dst_offset, size_t size,
-    pi_uint32 num_sync_points_in_wait_list,
-    const pi_ext_sync_point *sync_point_wait_list,
-    pi_ext_sync_point *sync_point) {
-  (void)command_buffer;
-  (void)src_buffer;
-  (void)dst_buffer;
-  (void)src_offset;
-  (void)dst_offset;
-  (void)size;
-  (void)num_sync_points_in_wait_list;
-  (void)sync_point_wait_list;
-  (void)sync_point;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextCommandBufferMemBufferCopyRect(
-    pi_ext_command_buffer command_buffer, pi_mem src_buffer, pi_mem dst_buffer,
-    pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
-    pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
-    size_t dst_row_pitch, size_t dst_slice_pitch,
-    pi_uint32 num_sync_points_in_wait_list,
-    const pi_ext_sync_point *sync_point_wait_list,
-    pi_ext_sync_point *sync_point) {
-  (void)command_buffer;
-  (void)src_buffer;
-  (void)dst_buffer;
-  (void)src_origin;
-  (void)dst_origin;
-  (void)region;
-  (void)src_row_pitch;
-  (void)src_slice_pitch;
-  (void)dst_row_pitch;
-  (void)dst_slice_pitch;
-  (void)num_sync_points_in_wait_list;
-  (void)sync_point_wait_list;
-  (void)sync_point;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextCommandBufferMemBufferRead(
-    pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset,
-    size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list,
-    const pi_ext_sync_point *sync_point_wait_list,
-    pi_ext_sync_point *sync_point) {
-  (void)command_buffer;
-  (void)buffer;
-  (void)offset;
-  (void)size;
-  (void)dst;
-  (void)num_sync_points_in_wait_list;
-  (void)sync_point_wait_list;
-  (void)sync_point;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextCommandBufferMemBufferReadRect(
-    pi_ext_command_buffer command_buffer, pi_mem buffer,
-    pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
-    pi_buff_rect_region region, size_t buffer_row_pitch,
-    size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
-    void *ptr, pi_uint32 num_sync_points_in_wait_list,
-    const pi_ext_sync_point *sync_point_wait_list,
-    pi_ext_sync_point *sync_point) {
-  (void)command_buffer;
-  (void)buffer;
-  (void)buffer_offset;
-  (void)host_offset;
-  (void)region;
-  (void)buffer_row_pitch;
-  (void)buffer_slice_pitch;
-  (void)host_row_pitch;
-  (void)host_slice_pitch;
-  (void)ptr;
-  (void)num_sync_points_in_wait_list;
-  (void)sync_point_wait_list;
-  (void)sync_point;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextCommandBufferMemBufferWrite(
-    pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset,
-    size_t size, const void *ptr, pi_uint32 num_sync_points_in_wait_list,
-    const pi_ext_sync_point *sync_point_wait_list,
-    pi_ext_sync_point *sync_point) {
-  (void)command_buffer;
-  (void)buffer;
-  (void)offset;
-  (void)size;
-  (void)ptr;
-  (void)num_sync_points_in_wait_list;
-  (void)sync_point_wait_list;
-  (void)sync_point;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextCommandBufferMemBufferWriteRect(
-    pi_ext_command_buffer command_buffer, pi_mem buffer,
-    pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
-    pi_buff_rect_region region, size_t buffer_row_pitch,
-    size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
-    const void *ptr, pi_uint32 num_sync_points_in_wait_list,
-    const pi_ext_sync_point *sync_point_wait_list,
-    pi_ext_sync_point *sync_point) {
-  (void)command_buffer;
-  (void)buffer;
-  (void)buffer_offset;
-  (void)host_offset;
-  (void)region;
-  (void)buffer_row_pitch;
-  (void)buffer_slice_pitch;
-  (void)host_row_pitch;
-  (void)host_slice_pitch;
-  (void)ptr;
-  (void)num_sync_points_in_wait_list;
-  (void)sync_point_wait_list;
-  (void)sync_point;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-pi_result hip_piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer,
-                                        pi_queue queue,
-                                        pi_uint32 num_events_in_wait_list,
-                                        const pi_event *event_wait_list,
-                                        pi_event *event) {
-  (void)command_buffer;
-  (void)queue;
-  (void)num_events_in_wait_list;
-  (void)event_wait_list;
-  (void)event;
-
-  sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
-  return {};
-}
-
-// This API is called by Sycl RT to notify the end of the plugin lifetime.
-// Windows: dynamically loaded plugins might have been unloaded already
-// when this is called. Sycl RT holds onto the PI plugin so it can be
-// called safely. But this is not transitive. If the PI plugin in turn
-// dynamically loaded a different DLL, that may have been unloaded.
-// TODO: add a global variable lifetime management code here (see
-// pi_level_zero.cpp for reference) Currently this is just a NOOP.
-pi_result hip_piTearDown(void *PluginParameter) {
-  (void)PluginParameter;
-  return PI_SUCCESS;
-}
-
-pi_result hip_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime,
-                                      uint64_t *HostTime) {
-  if (!DeviceTime && !HostTime)
-    return PI_SUCCESS;
-
-  _pi_event::native_type event;
-
-  ScopedContext active(Device->get_context());
-
-  if (DeviceTime) {
-    PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault));
-    PI_CHECK_ERROR(hipEventRecord(event));
-  }
-  if (HostTime) {
-    using namespace std::chrono;
-    *HostTime =
-        duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
-            .count();
-  }
-
-  if (DeviceTime) {
-    PI_CHECK_ERROR(hipEventSynchronize(event));
-
-    float elapsedTime = 0.0f;
-    PI_CHECK_ERROR(
-        hipEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event));
-    *DeviceTime = (uint64_t)(elapsedTime * (double)1e6);
-  }
-  return PI_SUCCESS;
-}
-
-pi_result hip_piextEnablePeerAccess(pi_device command_device,
-                                    pi_device peer_device) {
-
-  std::ignore = command_device;
-  std::ignore = peer_device;
-
-  setErrorMessage("piextEnablePeerAccess not "
-                  "implemented in hip backend",
-                  PI_ERROR_PLUGIN_SPECIFIC_ERROR);
-  return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
-}
-
-pi_result hip_piextDisablePeerAccess(pi_device command_device,
-                                     pi_device peer_device) {
-
-  std::ignore = command_device;
-  std::ignore = peer_device;
-
-  setErrorMessage("piextDisablePeerAccess not "
-                  "implemented in hip backend",
-                  PI_ERROR_PLUGIN_SPECIFIC_ERROR);
-  return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
-}
-
-pi_result hip_piextPeerAccessGetInfo(pi_device command_device,
-                                     pi_device peer_device, pi_peer_attr attr,
-                                     size_t param_value_size, void *param_value,
-                                     size_t *param_value_size_ret) {
-  std::ignore = command_device;
-  std::ignore = peer_device;
-  std::ignore = attr;
-  // Zero return value indicates that all of the queries currently return false.
-  return getInfo(param_value_size, param_value, param_value_size_ret,
-                 pi_int32{0});
-}
+//-- PI API implementation
+extern "C" {
 
 const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING;
 
@@ -5891,158 +52,164 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
   (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&hip_api);
 
   // Platform
-  _PI_CL(piPlatformsGet, hip_piPlatformsGet)
-  _PI_CL(piPlatformGetInfo, hip_piPlatformGetInfo)
+  _PI_CL(piPlatformsGet, pi2ur::piPlatformsGet)
+  _PI_CL(piPlatformGetInfo, pi2ur::piPlatformGetInfo)
   // Device
-  _PI_CL(piDevicesGet, hip_piDevicesGet)
-  _PI_CL(piDeviceGetInfo, hip_piDeviceGetInfo)
-  _PI_CL(piDevicePartition, hip_piDevicePartition)
-  _PI_CL(piDeviceRetain, hip_piDeviceRetain)
-  _PI_CL(piDeviceRelease, hip_piDeviceRelease)
-  _PI_CL(piextDeviceSelectBinary, hip_piextDeviceSelectBinary)
-  _PI_CL(piextGetDeviceFunctionPointer, hip_piextGetDeviceFunctionPointer)
-  _PI_CL(piextDeviceGetNativeHandle, hip_piextDeviceGetNativeHandle)
+  _PI_CL(piDevicesGet, pi2ur::piDevicesGet)
+  _PI_CL(piDeviceGetInfo, pi2ur::piDeviceGetInfo)
+  _PI_CL(piDevicePartition, pi2ur::piDevicePartition)
+  _PI_CL(piDeviceRetain, pi2ur::piDeviceRetain)
+  _PI_CL(piDeviceRelease, pi2ur::piDeviceRelease)
+  _PI_CL(piextDeviceSelectBinary, pi2ur::piextDeviceSelectBinary)
+  _PI_CL(piextGetDeviceFunctionPointer, pi2ur::piextGetDeviceFunctionPointer)
+  _PI_CL(piextDeviceGetNativeHandle, pi2ur::piextDeviceGetNativeHandle)
   _PI_CL(piextDeviceCreateWithNativeHandle,
-         hip_piextDeviceCreateWithNativeHandle)
+         pi2ur::piextDeviceCreateWithNativeHandle)
   // Context
-  _PI_CL(piextContextSetExtendedDeleter, hip_piextContextSetExtendedDeleter)
-  _PI_CL(piContextCreate, hip_piContextCreate)
-  _PI_CL(piContextGetInfo, hip_piContextGetInfo)
-  _PI_CL(piContextRetain, hip_piContextRetain)
-  _PI_CL(piContextRelease, hip_piContextRelease)
-  _PI_CL(piextContextGetNativeHandle, hip_piextContextGetNativeHandle)
+  _PI_CL(piextContextSetExtendedDeleter, pi2ur::piextContextSetExtendedDeleter)
+  _PI_CL(piContextCreate, pi2ur::piContextCreate)
+  _PI_CL(piContextGetInfo, pi2ur::piContextGetInfo)
+  _PI_CL(piContextRetain, pi2ur::piContextRetain)
+  _PI_CL(piContextRelease, pi2ur::piContextRelease)
+  _PI_CL(piextContextGetNativeHandle, pi2ur::piextContextGetNativeHandle)
   _PI_CL(piextContextCreateWithNativeHandle,
-         hip_piextContextCreateWithNativeHandle)
+         pi2ur::piextContextCreateWithNativeHandle)
   // Queue
-  _PI_CL(piQueueCreate, hip_piQueueCreate)
-  _PI_CL(piextQueueCreate, hip_piextQueueCreate)
-  _PI_CL(piQueueGetInfo, hip_piQueueGetInfo)
-  _PI_CL(piQueueFinish, hip_piQueueFinish)
-  _PI_CL(piQueueFlush, hip_piQueueFlush)
-  _PI_CL(piQueueRetain, hip_piQueueRetain)
-  _PI_CL(piQueueRelease, hip_piQueueRelease)
-  _PI_CL(piextQueueGetNativeHandle, hip_piextQueueGetNativeHandle)
-  _PI_CL(piextQueueCreateWithNativeHandle, hip_piextQueueCreateWithNativeHandle)
+  _PI_CL(piQueueCreate, pi2ur::piQueueCreate)
+  _PI_CL(piextQueueCreate, pi2ur::piextQueueCreate)
+  _PI_CL(piQueueGetInfo, pi2ur::piQueueGetInfo)
+  _PI_CL(piQueueFinish, pi2ur::piQueueFinish)
+  _PI_CL(piQueueFlush, pi2ur::piQueueFlush)
+  _PI_CL(piQueueRetain, pi2ur::piQueueRetain)
+  _PI_CL(piQueueRelease, pi2ur::piQueueRelease)
+  _PI_CL(piextQueueGetNativeHandle, pi2ur::piextQueueGetNativeHandle)
+  _PI_CL(piextQueueCreateWithNativeHandle,
+         pi2ur::piextQueueCreateWithNativeHandle)
   // Memory
-  _PI_CL(piMemBufferCreate, hip_piMemBufferCreate)
-  _PI_CL(piMemImageCreate, hip_piMemImageCreate)
-  _PI_CL(piMemGetInfo, hip_piMemGetInfo)
-  _PI_CL(piMemImageGetInfo, hip_piMemImageGetInfo)
-  _PI_CL(piMemRetain, hip_piMemRetain)
-  _PI_CL(piMemRelease, hip_piMemRelease)
-  _PI_CL(piMemBufferPartition, hip_piMemBufferPartition)
-  _PI_CL(piextMemGetNativeHandle, hip_piextMemGetNativeHandle)
-  _PI_CL(piextMemCreateWithNativeHandle, hip_piextMemCreateWithNativeHandle)
+  _PI_CL(piMemBufferCreate, pi2ur::piMemBufferCreate)
+  _PI_CL(piMemImageCreate, pi2ur::piMemImageCreate)
+  _PI_CL(piMemGetInfo, pi2ur::piMemGetInfo)
+  _PI_CL(piMemImageGetInfo, pi2ur::piMemImageGetInfo)
+  _PI_CL(piMemRetain, pi2ur::piMemRetain)
+  _PI_CL(piMemRelease, pi2ur::piMemRelease)
+  _PI_CL(piMemBufferPartition, pi2ur::piMemBufferPartition)
+  _PI_CL(piextMemGetNativeHandle, pi2ur::piextMemGetNativeHandle)
+  _PI_CL(piextMemCreateWithNativeHandle, pi2ur::piextMemCreateWithNativeHandle)
   // Program
-  _PI_CL(piProgramCreate, hip_piProgramCreate)
-  _PI_CL(piclProgramCreateWithSource, hip_piclProgramCreateWithSource)
-  _PI_CL(piProgramCreateWithBinary, hip_piProgramCreateWithBinary)
-  _PI_CL(piProgramGetInfo, hip_piProgramGetInfo)
-  _PI_CL(piProgramCompile, hip_piProgramCompile)
-  _PI_CL(piProgramBuild, hip_piProgramBuild)
-  _PI_CL(piProgramLink, hip_piProgramLink)
-  _PI_CL(piProgramGetBuildInfo, hip_piProgramGetBuildInfo)
-  _PI_CL(piProgramRetain, hip_piProgramRetain)
-  _PI_CL(piProgramRelease, hip_piProgramRelease)
-  _PI_CL(piextProgramGetNativeHandle, hip_piextProgramGetNativeHandle)
+  _PI_CL(piProgramCreate, pi2ur::piProgramCreate)
+  _PI_CL(piclProgramCreateWithSource, pi2ur::piclProgramCreateWithSource)
+  _PI_CL(piProgramCreateWithBinary, pi2ur::piProgramCreateWithBinary)
+  _PI_CL(piProgramGetInfo, pi2ur::piProgramGetInfo)
+  _PI_CL(piProgramCompile, pi2ur::piProgramCompile)
+  _PI_CL(piProgramBuild, pi2ur::piProgramBuild)
+  _PI_CL(piProgramLink, pi2ur::piProgramLink)
+  _PI_CL(piProgramGetBuildInfo, pi2ur::piProgramGetBuildInfo)
+  _PI_CL(piProgramRetain, pi2ur::piProgramRetain)
+  _PI_CL(piProgramRelease, pi2ur::piProgramRelease)
+  _PI_CL(piextProgramGetNativeHandle, pi2ur::piextProgramGetNativeHandle)
   _PI_CL(piextProgramCreateWithNativeHandle,
-         hip_piextProgramCreateWithNativeHandle)
-  // Kernel
-  _PI_CL(piKernelCreate, hip_piKernelCreate)
-  _PI_CL(piKernelSetArg, hip_piKernelSetArg)
-  _PI_CL(piKernelGetInfo, hip_piKernelGetInfo)
-  _PI_CL(piKernelGetGroupInfo, hip_piKernelGetGroupInfo)
-  _PI_CL(piKernelGetSubGroupInfo, hip_piKernelGetSubGroupInfo)
-  _PI_CL(piKernelRetain, hip_piKernelRetain)
-  _PI_CL(piKernelRelease, hip_piKernelRelease)
-  _PI_CL(piKernelSetExecInfo, hip_piKernelSetExecInfo)
+         pi2ur::piextProgramCreateWithNativeHandle)
   _PI_CL(piextProgramSetSpecializationConstant,
-         hip_piextProgramSetSpecializationConstant)
-  _PI_CL(piextKernelSetArgPointer, hip_piextKernelSetArgPointer)
+         pi2ur::piextProgramSetSpecializationConstant)
+  // Kernel
+  _PI_CL(piKernelCreate, pi2ur::piKernelCreate)
+  _PI_CL(piKernelSetArg, pi2ur::piKernelSetArg)
+  _PI_CL(piKernelGetInfo, pi2ur::piKernelGetInfo)
+  _PI_CL(piKernelGetGroupInfo, pi2ur::piKernelGetGroupInfo)
+  _PI_CL(piKernelGetSubGroupInfo, pi2ur::piKernelGetSubGroupInfo)
+  _PI_CL(piKernelRetain, pi2ur::piKernelRetain)
+  _PI_CL(piKernelRelease, pi2ur::piKernelRelease)
+  _PI_CL(piKernelSetExecInfo, pi2ur::piKernelSetExecInfo)
+  _PI_CL(piextKernelSetArgPointer, pi2ur::piKernelSetArgPointer)
   // Event
-  _PI_CL(piEventCreate, hip_piEventCreate)
-  _PI_CL(piEventGetInfo, hip_piEventGetInfo)
-  _PI_CL(piEventGetProfilingInfo, hip_piEventGetProfilingInfo)
-  _PI_CL(piEventsWait, hip_piEventsWait)
-  _PI_CL(piEventSetCallback, hip_piEventSetCallback)
-  _PI_CL(piEventSetStatus, hip_piEventSetStatus)
-  _PI_CL(piEventRetain, hip_piEventRetain)
-  _PI_CL(piEventRelease, hip_piEventRelease)
-  _PI_CL(piextEventGetNativeHandle, hip_piextEventGetNativeHandle)
-  _PI_CL(piextEventCreateWithNativeHandle, hip_piextEventCreateWithNativeHandle)
+  _PI_CL(piEventCreate, pi2ur::piEventCreate)
+  _PI_CL(piEventGetInfo, pi2ur::piEventGetInfo)
+  _PI_CL(piEventGetProfilingInfo, pi2ur::piEventGetProfilingInfo)
+  _PI_CL(piEventsWait, pi2ur::piEventsWait)
+  _PI_CL(piEventSetCallback, pi2ur::piEventSetCallback)
+  _PI_CL(piEventSetStatus, pi2ur::piEventSetStatus)
+  _PI_CL(piEventRetain, pi2ur::piEventRetain)
+  _PI_CL(piEventRelease, pi2ur::piEventRelease)
+  _PI_CL(piextEventGetNativeHandle, pi2ur::piextEventGetNativeHandle)
+  _PI_CL(piextEventCreateWithNativeHandle,
+         pi2ur::piextEventCreateWithNativeHandle)
   // Sampler
-  _PI_CL(piSamplerCreate, hip_piSamplerCreate)
-  _PI_CL(piSamplerGetInfo, hip_piSamplerGetInfo)
-  _PI_CL(piSamplerRetain, hip_piSamplerRetain)
-  _PI_CL(piSamplerRelease, hip_piSamplerRelease)
-  // Queue commands
-  _PI_CL(piEnqueueKernelLaunch, hip_piEnqueueKernelLaunch)
-  _PI_CL(piEnqueueEventsWait, hip_piEnqueueEventsWait)
-  _PI_CL(piEnqueueEventsWaitWithBarrier, hip_piEnqueueEventsWaitWithBarrier)
-  _PI_CL(piEnqueueMemBufferRead, hip_piEnqueueMemBufferRead)
-  _PI_CL(piEnqueueMemBufferReadRect, hip_piEnqueueMemBufferReadRect)
-  _PI_CL(piEnqueueMemBufferWrite, hip_piEnqueueMemBufferWrite)
-  _PI_CL(piEnqueueMemBufferWriteRect, hip_piEnqueueMemBufferWriteRect)
-  _PI_CL(piEnqueueMemBufferCopy, hip_piEnqueueMemBufferCopy)
-  _PI_CL(piEnqueueMemBufferCopyRect, hip_piEnqueueMemBufferCopyRect)
-  _PI_CL(piEnqueueMemBufferFill, hip_piEnqueueMemBufferFill)
-  _PI_CL(piEnqueueMemImageRead, hip_piEnqueueMemImageRead)
-  _PI_CL(piEnqueueMemImageWrite, hip_piEnqueueMemImageWrite)
-  _PI_CL(piEnqueueMemImageCopy, hip_piEnqueueMemImageCopy)
-  _PI_CL(piEnqueueMemImageFill, hip_piEnqueueMemImageFill)
-  _PI_CL(piEnqueueMemBufferMap, hip_piEnqueueMemBufferMap)
-  _PI_CL(piEnqueueMemUnmap, hip_piEnqueueMemUnmap)
+  _PI_CL(piSamplerCreate, pi2ur::piSamplerCreate)
+  _PI_CL(piSamplerGetInfo, pi2ur::piSamplerGetInfo)
+  _PI_CL(piSamplerRetain, pi2ur::piSamplerRetain)
+  _PI_CL(piSamplerRelease, pi2ur::piSamplerRelease)
+  // Enqueue commands
+  _PI_CL(piEnqueueKernelLaunch, pi2ur::piEnqueueKernelLaunch)
+  _PI_CL(piEnqueueEventsWait, pi2ur::piEnqueueEventsWait)
+  _PI_CL(piEnqueueEventsWaitWithBarrier, pi2ur::piEnqueueEventsWaitWithBarrier)
+  _PI_CL(piEnqueueMemBufferRead, pi2ur::piEnqueueMemBufferRead)
+  _PI_CL(piEnqueueMemBufferReadRect, pi2ur::piEnqueueMemBufferReadRect)
+  _PI_CL(piEnqueueMemBufferWrite, pi2ur::piEnqueueMemBufferWrite)
+  _PI_CL(piEnqueueMemBufferWriteRect, pi2ur::piEnqueueMemBufferWriteRect)
+  _PI_CL(piEnqueueMemBufferCopy, pi2ur::piEnqueueMemBufferCopy)
+  _PI_CL(piEnqueueMemBufferCopyRect, pi2ur::piEnqueueMemBufferCopyRect)
+  _PI_CL(piEnqueueMemBufferFill, pi2ur::piEnqueueMemBufferFill)
+  _PI_CL(piEnqueueMemImageRead, pi2ur::piEnqueueMemImageRead)
+  _PI_CL(piEnqueueMemImageWrite, pi2ur::piEnqueueMemImageWrite)
+  _PI_CL(piEnqueueMemImageCopy, pi2ur::piEnqueueMemImageCopy)
+  _PI_CL(piEnqueueMemImageFill, pi2ur::piEnqueueMemImageFill)
+  _PI_CL(piEnqueueMemBufferMap, pi2ur::piEnqueueMemBufferMap)
+  _PI_CL(piEnqueueMemUnmap, pi2ur::piEnqueueMemUnmap)
   // USM
-  _PI_CL(piextUSMHostAlloc, hip_piextUSMHostAlloc)
-  _PI_CL(piextUSMDeviceAlloc, hip_piextUSMDeviceAlloc)
-  _PI_CL(piextUSMSharedAlloc, hip_piextUSMSharedAlloc)
-  _PI_CL(piextUSMFree, hip_piextUSMFree)
-  _PI_CL(piextUSMEnqueueMemset, hip_piextUSMEnqueueMemset)
-  _PI_CL(piextUSMEnqueueMemcpy, hip_piextUSMEnqueueMemcpy)
-  _PI_CL(piextUSMEnqueuePrefetch, hip_piextUSMEnqueuePrefetch)
-  _PI_CL(piextUSMEnqueueMemAdvise, hip_piextUSMEnqueueMemAdvise)
-  _PI_CL(piextUSMEnqueueMemcpy2D, hip_piextUSMEnqueueMemcpy2D)
-  _PI_CL(piextUSMEnqueueFill2D, hip_piextUSMEnqueueFill2D)
-  _PI_CL(piextUSMEnqueueMemset2D, hip_piextUSMEnqueueMemset2D)
-  _PI_CL(piextUSMGetMemAllocInfo, hip_piextUSMGetMemAllocInfo)
+  _PI_CL(piextUSMHostAlloc, pi2ur::piextUSMHostAlloc)
+  _PI_CL(piextUSMDeviceAlloc, pi2ur::piextUSMDeviceAlloc)
+  _PI_CL(piextUSMSharedAlloc, pi2ur::piextUSMSharedAlloc)
+  _PI_CL(piextUSMFree, pi2ur::piextUSMFree)
+  _PI_CL(piextUSMEnqueueMemset, pi2ur::piextUSMEnqueueMemset)
+  _PI_CL(piextUSMEnqueueMemcpy, pi2ur::piextUSMEnqueueMemcpy)
+  _PI_CL(piextUSMEnqueuePrefetch, pi2ur::piextUSMEnqueuePrefetch)
+  _PI_CL(piextUSMEnqueueMemAdvise, pi2ur::piextUSMEnqueueMemAdvise)
+  _PI_CL(piextUSMEnqueueMemcpy2D, pi2ur::piextUSMEnqueueMemcpy2D)
+  _PI_CL(piextUSMEnqueueFill2D, pi2ur::piextUSMEnqueueFill2D)
+  _PI_CL(piextUSMEnqueueMemset2D, pi2ur::piextUSMEnqueueMemset2D)
+  _PI_CL(piextUSMGetMemAllocInfo, pi2ur::piextUSMGetMemAllocInfo)
   // Device global variable
   _PI_CL(piextEnqueueDeviceGlobalVariableWrite,
-         hip_piextEnqueueDeviceGlobalVariableWrite)
+         pi2ur::piextEnqueueDeviceGlobalVariableWrite)
   _PI_CL(piextEnqueueDeviceGlobalVariableRead,
-         hip_piextEnqueueDeviceGlobalVariableRead)
+         pi2ur::piextEnqueueDeviceGlobalVariableRead)
 
   // Host Pipe
-  _PI_CL(piextEnqueueReadHostPipe, hip_piextEnqueueReadHostPipe)
-  _PI_CL(piextEnqueueWriteHostPipe, hip_piextEnqueueWriteHostPipe)
+  _PI_CL(piextEnqueueReadHostPipe, pi2ur::piextEnqueueReadHostPipe)
+  _PI_CL(piextEnqueueWriteHostPipe, pi2ur::piextEnqueueWriteHostPipe)
+
+  _PI_CL(piextKernelSetArgMemObj, pi2ur::piextKernelSetArgMemObj)
+  _PI_CL(piextKernelSetArgSampler, pi2ur::piextKernelSetArgSampler)
+  _PI_CL(piPluginGetLastError, pi2ur::piPluginGetLastError)
+  _PI_CL(piTearDown, pi2ur::piTearDown)
+  _PI_CL(piGetDeviceAndHostTimer, pi2ur::piGetDeviceAndHostTimer)
+  _PI_CL(piPluginGetBackendOption, pi2ur::piPluginGetBackendOption)
 
   // command-buffer
-  _PI_CL(piextCommandBufferCreate, hip_piextCommandBufferCreate)
-  _PI_CL(piextCommandBufferRetain, hip_piextCommandBufferRetain)
-  _PI_CL(piextCommandBufferRelease, hip_piextCommandBufferRelease)
-  _PI_CL(piextCommandBufferNDRangeKernel, hip_piextCommandBufferNDRangeKernel)
-  _PI_CL(piextCommandBufferMemcpyUSM, hip_piextCommandBufferMemcpyUSM)
-  _PI_CL(piextCommandBufferMemBufferCopy, hip_piextCommandBufferMemBufferCopy)
+  _PI_CL(piextCommandBufferCreate, pi2ur::piextCommandBufferCreate)
+  _PI_CL(piextCommandBufferRetain, pi2ur::piextCommandBufferRetain)
+  _PI_CL(piextCommandBufferRelease, pi2ur::piextCommandBufferRelease)
+  _PI_CL(piextCommandBufferNDRangeKernel,
+         pi2ur::piextCommandBufferNDRangeKernel)
+  _PI_CL(piextCommandBufferMemcpyUSM, pi2ur::piextCommandBufferMemcpyUSM)
+  _PI_CL(piextCommandBufferMemBufferCopy,
+         pi2ur::piextCommandBufferMemBufferCopy)
   _PI_CL(piextCommandBufferMemBufferCopyRect,
-         hip_piextCommandBufferMemBufferCopyRect)
-  _PI_CL(piextCommandBufferMemBufferRead, hip_piextCommandBufferMemBufferRead)
+         pi2ur::piextCommandBufferMemBufferCopyRect)
+  _PI_CL(piextCommandBufferMemBufferRead,
+         pi2ur::piextCommandBufferMemBufferRead)
   _PI_CL(piextCommandBufferMemBufferReadRect,
-         hip_piextCommandBufferMemBufferReadRect)
-  _PI_CL(piextCommandBufferMemBufferWrite, hip_piextCommandBufferMemBufferWrite)
+         pi2ur::piextCommandBufferMemBufferReadRect)
+  _PI_CL(piextCommandBufferMemBufferWrite,
+         pi2ur::piextCommandBufferMemBufferWrite)
   _PI_CL(piextCommandBufferMemBufferWriteRect,
-         hip_piextCommandBufferMemBufferWriteRect)
-  _PI_CL(piextEnqueueCommandBuffer, hip_piextEnqueueCommandBuffer)
-
-  _PI_CL(piextKernelSetArgMemObj, hip_piextKernelSetArgMemObj)
-  _PI_CL(piextKernelSetArgSampler, hip_piextKernelSetArgSampler)
-  _PI_CL(piPluginGetLastError, hip_piPluginGetLastError)
-  _PI_CL(piTearDown, hip_piTearDown)
-  _PI_CL(piGetDeviceAndHostTimer, hip_piGetDeviceAndHostTimer)
-  _PI_CL(piPluginGetBackendOption, hip_piPluginGetBackendOption)
+         pi2ur::piextCommandBufferMemBufferWriteRect)
+  _PI_CL(piextEnqueueCommandBuffer, pi2ur::piextEnqueueCommandBuffer)
 
   // Peer to Peer
-  _PI_CL(piextEnablePeerAccess, hip_piextEnablePeerAccess)
-  _PI_CL(piextDisablePeerAccess, hip_piextDisablePeerAccess)
-  _PI_CL(piextPeerAccessGetInfo, hip_piextPeerAccessGetInfo)
+  _PI_CL(piextEnablePeerAccess, pi2ur::piextEnablePeerAccess)
+  _PI_CL(piextDisablePeerAccess, pi2ur::piextDisablePeerAccess)
+  _PI_CL(piextPeerAccessGetInfo, pi2ur::piextPeerAccessGetInfo)
 
 #undef _PI_CL
 
@@ -6056,5 +223,3 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
 #endif
 
 } // extern "C"
-
-hipEvent_t _pi_platform::evBase_{nullptr};
diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp
index b5648a8de42ed..3ab21101228fe 100644
--- a/sycl/plugins/hip/pi_hip.hpp
+++ b/sycl/plugins/hip/pi_hip.hpp
@@ -39,925 +39,59 @@
 #include <string>
 #include <vector>
 
-extern "C" {
-
-/// \cond INGORE_BLOCK_IN_DOXYGEN
-pi_result hip_piContextRetain(pi_context);
-pi_result hip_piContextRelease(pi_context);
-pi_result hip_piDeviceRelease(pi_device);
-pi_result hip_piDeviceRetain(pi_device);
-pi_result hip_piProgramRetain(pi_program);
-pi_result hip_piProgramRelease(pi_program);
-pi_result hip_piQueueRelease(pi_queue);
-pi_result hip_piQueueRetain(pi_queue);
-pi_result hip_piMemRetain(pi_mem);
-pi_result hip_piMemRelease(pi_mem);
-pi_result hip_piKernelRetain(pi_kernel);
-pi_result hip_piKernelRelease(pi_kernel);
-/// \endcond
-}
+#include <ur/adapters/hip/command_buffer.hpp>
+#include <ur/adapters/hip/context.hpp>
+#include <ur/adapters/hip/device.hpp>
+#include <ur/adapters/hip/event.hpp>
+#include <ur/adapters/hip/kernel.hpp>
+#include <ur/adapters/hip/memory.hpp>
+#include <ur/adapters/hip/platform.hpp>
+#include <ur/adapters/hip/program.hpp>
+#include <ur/adapters/hip/queue.hpp>
+#include <ur/adapters/hip/sampler.hpp>
+
+#include "pi2ur.hpp"
 
 using _pi_stream_guard = std::unique_lock<std::mutex>;
 
-/// A PI platform stores all known PI devices,
-///  in the HIP plugin this is just a vector of
-///  available devices since initialization is done
-///  when devices are used.
-///
-struct _pi_platform {
-  static hipEvent_t evBase_; // HIP event used as base counter
-  std::vector<std::unique_ptr<_pi_device>> devices_;
+struct _pi_platform : ur_platform_handle_t_ {
+  using ur_platform_handle_t_::ur_platform_handle_t_;
 };
 
-/// PI device mapping to a hipDevice_t.
-/// Includes an observer pointer to the platform,
-/// and implements the reference counting semantics since
-/// HIP objects are not refcounted.
-///
-struct _pi_device {
-private:
-  using native_type = hipDevice_t;
-
-  native_type cuDevice_;
-  std::atomic_uint32_t refCount_;
-  pi_platform platform_;
-  pi_context context_;
-
-public:
-  _pi_device(native_type cuDevice, pi_platform platform)
-      : cuDevice_(cuDevice), refCount_{1}, platform_(platform) {}
-
-  native_type get() const noexcept { return cuDevice_; };
-
-  pi_uint32 get_reference_count() const noexcept { return refCount_; }
-
-  pi_platform get_platform() const noexcept { return platform_; };
-
-  void set_context(pi_context ctx) { context_ = ctx; };
-
-  pi_context get_context() { return context_; };
+struct _pi_device : ur_device_handle_t_ {
+  using ur_device_handle_t_::ur_device_handle_t_;
 };
 
-/// PI context mapping to a HIP context object.
-///
-/// There is no direct mapping between a HIP context and a PI context,
-/// main differences described below:
-///
-/// <b> HIP context vs PI context </b>
-///
-/// One of the main differences between the PI API and the HIP driver API is
-/// that the second modifies the state of the threads by assigning
-/// `hipCtx_t` objects to threads. `hipCtx_t` objects store data associated
-/// with a given device and control access to said device from the user side.
-/// PI API context are objects that are passed to functions, and not bound
-/// to threads.
-/// The _pi_context object doesn't implement this behavior, only holds the
-/// HIP context data. The RAII object \ref ScopedContext implements the active
-/// context behavior.
-///
-/// <b> Primary vs User-defined context </b>
-///
-/// HIP has two different types of context, the Primary context,
-/// which is usable by all threads on a given process for a given device, and
-/// the aforementioned custom contexts.
-/// HIP documentation, and performance analysis, indicates it is recommended
-/// to use Primary context whenever possible.
-/// Primary context is used as well by the HIP Runtime API.
-/// For PI applications to interop with HIP Runtime API, they have to use
-/// the primary context - and make that active in the thread.
-/// The `_pi_context` object can be constructed with a `kind` parameter
-/// that allows to construct a Primary or `user-defined` context, so that
-/// the PI object interface is always the same.
-///
-///  <b> Destructor callback </b>
-///
-///  Required to implement CP023, SYCL Extended Context Destruction,
-///  the PI Context can store a number of callback functions that will be
-///  called upon destruction of the PI Context.
-///  See proposal for details.
-///
-struct _pi_context {
-
-  struct deleter_data {
-    pi_context_extended_deleter function;
-    void *user_data;
-
-    void operator()() { function(user_data); }
-  };
-
-  using native_type = hipCtx_t;
-
-  enum class kind { primary, user_defined } kind_;
-  native_type hipContext_;
-  _pi_device *deviceId_;
-  std::atomic_uint32_t refCount_;
-
-  _pi_context(kind k, hipCtx_t ctxt, _pi_device *devId)
-      : kind_{k}, hipContext_{ctxt}, deviceId_{devId}, refCount_{1} {
-    deviceId_->set_context(this);
-    hip_piDeviceRetain(deviceId_);
-  };
-
-  ~_pi_context() { hip_piDeviceRelease(deviceId_); }
-
-  void invoke_extended_deleters() {
-    std::lock_guard<std::mutex> guard(mutex_);
-    for (auto &deleter : extended_deleters_) {
-      deleter();
-    }
-  }
-
-  void set_extended_deleter(pi_context_extended_deleter function,
-                            void *user_data) {
-    std::lock_guard<std::mutex> guard(mutex_);
-    extended_deleters_.emplace_back(deleter_data{function, user_data});
-  }
-
-  pi_device get_device() const noexcept { return deviceId_; }
-
-  native_type get() const noexcept { return hipContext_; }
-
-  bool is_primary() const noexcept { return kind_ == kind::primary; }
-
-  pi_uint32 increment_reference_count() noexcept { return ++refCount_; }
-
-  pi_uint32 decrement_reference_count() noexcept { return --refCount_; }
-
-  pi_uint32 get_reference_count() const noexcept { return refCount_; }
-
-private:
-  std::mutex mutex_;
-  std::vector<deleter_data> extended_deleters_;
+struct _pi_context : ur_context_handle_t_ {
+  using ur_context_handle_t_::ur_context_handle_t_;
 };
 
-/// PI Mem mapping to HIP memory allocations, both data and texture/surface.
-/// \brief Represents non-SVM allocations on the HIP backend.
-/// Keeps tracks of all mapped regions used for Map/Unmap calls.
-/// Only one region can be active at the same time per allocation.
-struct _pi_mem {
-
-  // TODO: Move as much shared data up as possible
-  using pi_context = _pi_context *;
-
-  // Context where the memory object is accessibles
-  pi_context context_;
-
-  /// Reference counting of the handler
-  std::atomic_uint32_t refCount_;
-  enum class mem_type { buffer, surface } mem_type_;
-
-  /// A PI Memory object represents either plain memory allocations ("Buffers"
-  /// in OpenCL) or typed allocations ("Images" in OpenCL).
-  /// In HIP their API handlers are different. Whereas "Buffers" are allocated
-  /// as pointer-like structs, "Images" are stored in Textures or Surfaces
-  /// This union allows implementation to use either from the same handler.
-  union mem_ {
-    // Handler for plain, pointer-based HIP allocations
-    struct buffer_mem_ {
-      using native_type = hipDeviceptr_t;
-
-      // If this allocation is a sub-buffer (i.e., a view on an existing
-      // allocation), this is the pointer to the parent handler structure
-      pi_mem parent_;
-      // HIP handler for the pointer
-      native_type ptr_;
-
-      /// Pointer associated with this device on the host
-      void *hostPtr_;
-      /// Size of the allocation in bytes
-      size_t size_;
-      /// Offset of the active mapped region.
-      size_t mapOffset_;
-      /// Pointer to the active mapped region, if any
-      void *mapPtr_;
-      /// Original flags for the mapped region
-      pi_map_flags mapFlags_;
-
-      /** alloc_mode
-       * classic: Just a normal buffer allocated on the device via hip malloc
-       * use_host_ptr: Use an address on the host for the device
-       * copy_in: The data for the device comes from the host but the host
-       pointer is not available later for re-use
-       * alloc_host_ptr: Uses pinned-memory allocation
-      */
-      enum class alloc_mode {
-        classic,
-        use_host_ptr,
-        copy_in,
-        alloc_host_ptr
-      } allocMode_;
-
-      native_type get() const noexcept { return ptr_; }
-
-      native_type get_with_offset(size_t offset) const noexcept {
-        return reinterpret_cast<native_type>(reinterpret_cast<uint8_t *>(ptr_) +
-                                             offset);
-      }
-
-      void *get_void() const noexcept { return reinterpret_cast<void *>(ptr_); }
-
-      size_t get_size() const noexcept { return size_; }
-
-      void *get_map_ptr() const noexcept { return mapPtr_; }
-
-      size_t get_map_offset(void *ptr) const noexcept {
-        (void)ptr;
-        return mapOffset_;
-      }
-
-      /// Returns a pointer to data visible on the host that contains
-      /// the data on the device associated with this allocation.
-      /// The offset is used to index into the HIP allocation.
-      ///
-      void *map_to_ptr(size_t offset, pi_map_flags flags) noexcept {
-        assert(mapPtr_ == nullptr);
-        mapOffset_ = offset;
-        mapFlags_ = flags;
-        if (hostPtr_) {
-          mapPtr_ = static_cast<char *>(hostPtr_) + offset;
-        } else {
-          // TODO: Allocate only what is needed based on the offset
-          mapPtr_ = static_cast<void *>(malloc(this->get_size()));
-        }
-        return mapPtr_;
-      }
-
-      /// Detach the allocation from the host memory.
-      void unmap(void *ptr) noexcept {
-        (void)ptr;
-        assert(mapPtr_ != nullptr);
-
-        if (mapPtr_ != hostPtr_) {
-          free(mapPtr_);
-        }
-        mapPtr_ = nullptr;
-        mapOffset_ = 0;
-      }
-
-      pi_map_flags get_map_flags() const noexcept {
-        assert(mapPtr_ != nullptr);
-        return mapFlags_;
-      }
-    } buffer_mem_;
-
-    // Handler data for surface object (i.e. Images)
-    struct surface_mem_ {
-      hipArray *array_;
-      hipSurfaceObject_t surfObj_;
-      pi_mem_type imageType_;
-
-      hipArray *get_array() const noexcept { return array_; }
-
-      hipSurfaceObject_t get_surface() const noexcept { return surfObj_; }
-
-      pi_mem_type get_image_type() const noexcept { return imageType_; }
-    } surface_mem_;
-  } mem_;
-
-  /// Constructs the PI MEM handler for a non-typed allocation ("buffer")
-  _pi_mem(pi_context ctxt, pi_mem parent, mem_::buffer_mem_::alloc_mode mode,
-          hipDeviceptr_t ptr, void *host_ptr, size_t size)
-      : context_{ctxt}, refCount_{1}, mem_type_{mem_type::buffer} {
-    mem_.buffer_mem_.ptr_ = ptr;
-    mem_.buffer_mem_.parent_ = parent;
-    mem_.buffer_mem_.hostPtr_ = host_ptr;
-    mem_.buffer_mem_.size_ = size;
-    mem_.buffer_mem_.mapOffset_ = 0;
-    mem_.buffer_mem_.mapPtr_ = nullptr;
-    mem_.buffer_mem_.mapFlags_ = PI_MAP_WRITE;
-    mem_.buffer_mem_.allocMode_ = mode;
-    if (is_sub_buffer()) {
-      hip_piMemRetain(mem_.buffer_mem_.parent_);
-    } else {
-      hip_piContextRetain(context_);
-    }
-  };
-
-  /// Constructs the PI allocation for an Image object
-  _pi_mem(pi_context ctxt, hipArray *array, hipSurfaceObject_t surf,
-          pi_mem_type image_type, void *host_ptr)
-      : context_{ctxt}, refCount_{1}, mem_type_{mem_type::surface} {
-    (void)host_ptr;
-    mem_.surface_mem_.array_ = array;
-    mem_.surface_mem_.imageType_ = image_type;
-    mem_.surface_mem_.surfObj_ = surf;
-    hip_piContextRetain(context_);
-  }
-
-  ~_pi_mem() {
-    if (mem_type_ == mem_type::buffer) {
-      if (is_sub_buffer()) {
-        hip_piMemRelease(mem_.buffer_mem_.parent_);
-        return;
-      }
-    }
-    hip_piContextRelease(context_);
-  }
-
-  // TODO: Move as many shared funcs up as possible
-  bool is_buffer() const noexcept { return mem_type_ == mem_type::buffer; }
-
-  bool is_sub_buffer() const noexcept {
-    return (is_buffer() && (mem_.buffer_mem_.parent_ != nullptr));
-  }
-
-  bool is_image() const noexcept { return mem_type_ == mem_type::surface; }
-
-  pi_context get_context() const noexcept { return context_; }
-
-  pi_uint32 increment_reference_count() noexcept { return ++refCount_; }
-
-  pi_uint32 decrement_reference_count() noexcept { return --refCount_; }
-
-  pi_uint32 get_reference_count() const noexcept { return refCount_; }
+struct _pi_mem : ur_mem_handle_t_ {
+  using ur_mem_handle_t_::ur_mem_handle_t_;
 };
 
-/// PI queue mapping on to hipStream_t objects.
-///
-struct _pi_queue {
-  using native_type = hipStream_t;
-  static constexpr int default_num_compute_streams = 64;
-  static constexpr int default_num_transfer_streams = 16;
-
-  std::vector<native_type> compute_streams_;
-  std::vector<native_type> transfer_streams_;
-  // delay_compute_ keeps track of which streams have been recently reused and
-  // their next use should be delayed. If a stream has been recently reused it
-  // will be skipped the next time it would be selected round-robin style. When
-  // skipped, its delay flag is cleared.
-  std::vector<bool> delay_compute_;
-  // keep track of which streams have applied barrier
-  std::vector<bool> compute_applied_barrier_;
-  std::vector<bool> transfer_applied_barrier_;
-  _pi_context *context_;
-  _pi_device *device_;
-  pi_queue_properties properties_;
-  hipEvent_t barrier_event_ = nullptr;
-  hipEvent_t barrier_tmp_event_ = nullptr;
-  std::atomic_uint32_t refCount_;
-  std::atomic_uint32_t eventCount_;
-  std::atomic_uint32_t compute_stream_idx_;
-  std::atomic_uint32_t transfer_stream_idx_;
-  unsigned int num_compute_streams_;
-  unsigned int num_transfer_streams_;
-  unsigned int last_sync_compute_streams_;
-  unsigned int last_sync_transfer_streams_;
-  unsigned int flags_;
-  // When compute_stream_sync_mutex_ and compute_stream_mutex_ both need to be
-  // locked at the same time, compute_stream_sync_mutex_ should be locked first
-  // to avoid deadlocks
-  std::mutex compute_stream_sync_mutex_;
-  std::mutex compute_stream_mutex_;
-  std::mutex transfer_stream_mutex_;
-  std::mutex barrier_mutex_;
-
-  _pi_queue(std::vector<native_type> &&compute_streams,
-            std::vector<native_type> &&transfer_streams, _pi_context *context,
-            _pi_device *device, pi_queue_properties properties,
-            unsigned int flags)
-      : compute_streams_{std::move(compute_streams)},
-        transfer_streams_{std::move(transfer_streams)},
-        delay_compute_(compute_streams_.size(), false),
-        compute_applied_barrier_(compute_streams_.size()),
-        transfer_applied_barrier_(transfer_streams_.size()), context_{context},
-        device_{device}, properties_{properties}, refCount_{1}, eventCount_{0},
-        compute_stream_idx_{0}, transfer_stream_idx_{0},
-        num_compute_streams_{0}, num_transfer_streams_{0},
-        last_sync_compute_streams_{0}, last_sync_transfer_streams_{0},
-        flags_(flags) {
-    hip_piContextRetain(context_);
-    hip_piDeviceRetain(device_);
-  }
-
-  ~_pi_queue() {
-    hip_piContextRelease(context_);
-    hip_piDeviceRelease(device_);
-  }
-
-  void compute_stream_wait_for_barrier_if_needed(hipStream_t stream,
-                                                 pi_uint32 stream_i);
-  void transfer_stream_wait_for_barrier_if_needed(hipStream_t stream,
-                                                  pi_uint32 stream_i);
-
-  // get_next_compute/transfer_stream() functions return streams from
-  // appropriate pools in round-robin fashion
-  native_type get_next_compute_stream(pi_uint32 *stream_token = nullptr);
-  // this overload tries select a stream that was used by one of dependancies.
-  // If that is not possible returns a new stream. If a stream is reused it
-  // returns a lock that needs to remain locked as long as the stream is in use
-  native_type get_next_compute_stream(pi_uint32 num_events_in_wait_list,
-                                      const pi_event *event_wait_list,
-                                      _pi_stream_guard &guard,
-                                      pi_uint32 *stream_token = nullptr);
-  native_type get_next_transfer_stream();
-  native_type get() { return get_next_compute_stream(); };
-
-  bool has_been_synchronized(pi_uint32 stream_token) {
-    // stream token not associated with one of the compute streams
-    if (stream_token == std::numeric_limits<pi_uint32>::max()) {
-      return false;
-    }
-    return last_sync_compute_streams_ > stream_token;
-  }
-
-  bool can_reuse_stream(pi_uint32 stream_token) {
-    // stream token not associated with one of the compute streams
-    if (stream_token == std::numeric_limits<pi_uint32>::max()) {
-      return false;
-    }
-    // If the command represented by the stream token was not the last command
-    // enqueued to the stream we can not reuse the stream - we need to allow for
-    // commands enqueued after it and the one we are about to enqueue to run
-    // concurrently
-    bool is_last_command =
-        (compute_stream_idx_ - stream_token) <= compute_streams_.size();
-    // If there was a barrier enqueued to the queue after the command
-    // represented by the stream token we should not reuse the stream, as we can
-    // not take that stream into account for the bookkeeping for the next
-    // barrier - such a stream would not be synchronized with. Performance-wise
-    // it does not matter that we do not reuse the stream, as the work
-    // represented by the stream token is guaranteed to be complete by the
-    // barrier before any work we are about to enqueue to the stream will start,
-    // so the event does not need to be synchronized with.
-    return is_last_command && !has_been_synchronized(stream_token);
-  }
-
-  template <typename T> bool all_of(T &&f) {
-    {
-      std::lock_guard<std::mutex> compute_guard(compute_stream_mutex_);
-      unsigned int end =
-          std::min(static_cast<unsigned int>(compute_streams_.size()),
-                   num_compute_streams_);
-      if (!std::all_of(compute_streams_.begin(), compute_streams_.begin() + end,
-                       f))
-        return false;
-    }
-    {
-      std::lock_guard<std::mutex> transfer_guard(transfer_stream_mutex_);
-      unsigned int end =
-          std::min(static_cast<unsigned int>(transfer_streams_.size()),
-                   num_transfer_streams_);
-      if (!std::all_of(transfer_streams_.begin(),
-                       transfer_streams_.begin() + end, f))
-        return false;
-    }
-    return true;
-  }
-
-  template <typename T> void for_each_stream(T &&f) {
-    {
-      std::lock_guard<std::mutex> compute_guard(compute_stream_mutex_);
-      unsigned int end =
-          std::min(static_cast<unsigned int>(compute_streams_.size()),
-                   num_compute_streams_);
-      for (unsigned int i = 0; i < end; i++) {
-        f(compute_streams_[i]);
-      }
-    }
-    {
-      std::lock_guard<std::mutex> transfer_guard(transfer_stream_mutex_);
-      unsigned int end =
-          std::min(static_cast<unsigned int>(transfer_streams_.size()),
-                   num_transfer_streams_);
-      for (unsigned int i = 0; i < end; i++) {
-        f(transfer_streams_[i]);
-      }
-    }
-  }
-
-  template <bool ResetUsed = false, typename T> void sync_streams(T &&f) {
-    auto sync_compute = [&f, &streams = compute_streams_,
-                         &delay = delay_compute_](unsigned int start,
-                                                  unsigned int stop) {
-      for (unsigned int i = start; i < stop; i++) {
-        f(streams[i]);
-        delay[i] = false;
-      }
-    };
-    auto sync_transfer = [&f, &streams = transfer_streams_](unsigned int start,
-                                                            unsigned int stop) {
-      for (unsigned int i = start; i < stop; i++) {
-        f(streams[i]);
-      }
-    };
-    {
-      unsigned int size = static_cast<unsigned int>(compute_streams_.size());
-      std::lock_guard<std::mutex> compute_sync_guard(
-          compute_stream_sync_mutex_);
-      std::lock_guard<std::mutex> compute_guard(compute_stream_mutex_);
-      unsigned int start = last_sync_compute_streams_;
-      unsigned int end = num_compute_streams_ < size
-                             ? num_compute_streams_
-                             : compute_stream_idx_.load();
-      if (end - start >= size) {
-        sync_compute(0, size);
-      } else {
-        start %= size;
-        end %= size;
-        if (start < end) {
-          sync_compute(start, end);
-        } else {
-          sync_compute(start, size);
-          sync_compute(0, end);
-        }
-      }
-      if (ResetUsed) {
-        last_sync_compute_streams_ = end;
-      }
-    }
-    {
-      unsigned int size = static_cast<unsigned int>(transfer_streams_.size());
-      if (size > 0) {
-        std::lock_guard<std::mutex> transfer_guard(transfer_stream_mutex_);
-        unsigned int start = last_sync_transfer_streams_;
-        unsigned int end = num_transfer_streams_ < size
-                               ? num_transfer_streams_
-                               : transfer_stream_idx_.load();
-        if (end - start >= size) {
-          sync_transfer(0, size);
-        } else {
-          start %= size;
-          end %= size;
-          if (start < end) {
-            sync_transfer(start, end);
-          } else {
-            sync_transfer(start, size);
-            sync_transfer(0, end);
-          }
-        }
-        if (ResetUsed) {
-          last_sync_transfer_streams_ = end;
-        }
-      }
-    }
-  }
-
-  _pi_context *get_context() const { return context_; };
-
-  _pi_device *get_device() const { return device_; };
-
-  pi_uint32 increment_reference_count() noexcept { return ++refCount_; }
-
-  pi_uint32 decrement_reference_count() noexcept { return --refCount_; }
-
-  pi_uint32 get_reference_count() const noexcept { return refCount_; }
-
-  pi_uint32 get_next_event_id() noexcept { return ++eventCount_; }
+struct _pi_queue : ur_queue_handle_t_ {
+  using ur_queue_handle_t_::ur_queue_handle_t_;
 };
 
-typedef void (*pfn_notify)(pi_event event, pi_int32 eventCommandStatus,
-                           void *userData);
-/// PI Event mapping to hipEvent_t
-///
-struct _pi_event {
-public:
-  using native_type = hipEvent_t;
-
-  pi_result record();
-
-  pi_result wait();
-
-  pi_result start();
-
-  native_type get() const noexcept { return evEnd_; };
-
-  pi_queue get_queue() const noexcept { return queue_; }
-
-  hipStream_t get_stream() const noexcept { return stream_; }
-
-  pi_uint32 get_compute_stream_token() const noexcept { return streamToken_; }
-
-  pi_command_type get_command_type() const noexcept { return commandType_; }
-
-  pi_uint32 get_reference_count() const noexcept { return refCount_; }
-
-  bool is_recorded() const noexcept { return isRecorded_; }
-
-  bool is_started() const noexcept { return isStarted_; }
-
-  bool is_completed() const noexcept;
-
-  pi_int32 get_execution_status() const noexcept {
-
-    if (!is_recorded()) {
-      return PI_EVENT_SUBMITTED;
-    }
-
-    if (!is_completed()) {
-      return PI_EVENT_RUNNING;
-    }
-    return PI_EVENT_COMPLETE;
-  }
-
-  pi_context get_context() const noexcept { return context_; };
-
-  pi_uint32 increment_reference_count() { return ++refCount_; }
-
-  pi_uint32 decrement_reference_count() { return --refCount_; }
-
-  pi_uint32 get_event_id() const noexcept { return eventId_; }
-
-  // Returns the counter time when the associated command(s) were enqueued
-  //
-  pi_uint64 get_queued_time() const;
-
-  // Returns the counter time when the associated command(s) started execution
-  //
-  pi_uint64 get_start_time() const;
-
-  // Returns the counter time when the associated command(s) completed
-  //
-  pi_uint64 get_end_time() const;
-
-  // construct a native HIP. This maps closely to the underlying HIP event.
-  static pi_event
-  make_native(pi_command_type type, pi_queue queue, hipStream_t stream,
-              pi_uint32 stream_token = std::numeric_limits<pi_uint32>::max()) {
-    return new _pi_event(type, queue->get_context(), queue, stream,
-                         stream_token);
-  }
-
-  pi_result release();
-
-  ~_pi_event();
-
-private:
-  // This constructor is private to force programmers to use the make_native /
-  // make_user static members in order to create a pi_event for HIP.
-  _pi_event(pi_command_type type, pi_context context, pi_queue queue,
-            hipStream_t stream, pi_uint32 stream_token);
-
-  pi_command_type commandType_; // The type of command associated with event.
-
-  std::atomic_uint32_t refCount_; // Event reference count.
-
-  bool hasBeenWaitedOn_; // Signifies whether the event has been waited
-                         // on through a call to wait(), which implies
-                         // that it has completed.
-
-  bool isRecorded_; // Signifies wether a native HIP event has been recorded
-                    // yet.
-  bool isStarted_;  // Signifies wether the operation associated with the
-                    // PI event has started or not
-                    //
-
-  pi_uint32 streamToken_;
-  pi_uint32 eventId_; // Queue identifier of the event.
-
-  native_type evEnd_; // HIP event handle. If this _pi_event represents a user
-                      // event, this will be nullptr.
-
-  native_type evStart_; // HIP event handle associated with the start
-
-  native_type evQueued_; // HIP event handle associated with the time
-                         // the command was enqueued
-
-  pi_queue queue_; // pi_queue associated with the event. If this is a user
-                   // event, this will be nullptr.
-
-  hipStream_t stream_; // hipStream_t associated with the event. If this is a
-                       // user event, this will be uninitialized.
-
-  pi_context context_; // pi_context associated with the event. If this is a
-                       // native event, this will be the same context associated
-                       // with the queue_ member.
+struct _pi_event : ur_event_handle_t_ {
+  using ur_event_handle_t_::ur_event_handle_t_;
 };
 
-/// Implementation of PI Program on HIP Module object
-///
-struct _pi_program {
-  using native_type = hipModule_t;
-  native_type module_;
-  const char *binary_;
-  size_t binarySizeInBytes_;
-  std::atomic_uint32_t refCount_;
-  _pi_context *context_;
-
-  constexpr static size_t MAX_LOG_SIZE = 8192u;
-
-  char errorLog_[MAX_LOG_SIZE], infoLog_[MAX_LOG_SIZE];
-  std::string buildOptions_;
-  pi_program_build_status buildStatus_ = PI_PROGRAM_BUILD_STATUS_NONE;
-
-  _pi_program(pi_context ctxt);
-  ~_pi_program();
-
-  pi_result set_binary(const char *binary, size_t binarySizeInBytes);
-
-  pi_result build_program(const char *build_options);
-
-  pi_context get_context() const { return context_; };
-
-  native_type get() const noexcept { return module_; };
-
-  pi_uint32 increment_reference_count() noexcept { return ++refCount_; }
-
-  pi_uint32 decrement_reference_count() noexcept { return --refCount_; }
-
-  pi_uint32 get_reference_count() const noexcept { return refCount_; }
+struct _pi_program : ur_program_handle_t_ {
+  using ur_program_handle_t_::ur_program_handle_t_;
 };
 
-/// Implementation of a PI Kernel for HIP
-///
-/// PI Kernels are used to set kernel arguments,
-/// creating a state on the Kernel object for a given
-/// invocation. This is not the case of HIPFunction objects,
-/// which are simply passed together with the arguments on the invocation.
-/// The PI Kernel implementation for HIP stores the list of arguments,
-/// argument sizes and offsets to emulate the interface of PI Kernel,
-/// saving the arguments for the later dispatch.
-/// Note that in PI API, the Local memory is specified as a size per
-/// individual argument, but in HIP only the total usage of shared
-/// memory is required since it is not passed as a parameter.
-/// A compiler pass converts the PI API local memory model into the
-/// HIP shared model. This object simply calculates the total of
-/// shared memory, and the initial offsets of each parameter.
-///
-struct _pi_kernel {
-  using native_type = hipFunction_t;
-
-  native_type function_;
-  native_type functionWithOffsetParam_;
-  std::string name_;
-  pi_context context_;
-  pi_program program_;
-  std::atomic_uint32_t refCount_;
-
-  /// Structure that holds the arguments to the kernel.
-  /// Note earch argument size is known, since it comes
-  /// from the kernel signature.
-  /// This is not something can be queried from the HIP API
-  /// so there is a hard-coded size (\ref MAX_PARAM_BYTES)
-  /// and a storage.
-  ///
-  struct arguments {
-    static constexpr size_t MAX_PARAM_BYTES = 4000u;
-    using args_t = std::array<char, MAX_PARAM_BYTES>;
-    using args_size_t = std::vector<size_t>;
-    using args_index_t = std::vector<void *>;
-    args_t storage_;
-    args_size_t paramSizes_;
-    args_index_t indices_;
-    args_size_t offsetPerIndex_;
-
-    std::uint32_t implicitOffsetArgs_[3] = {0, 0, 0};
-
-    arguments() {
-      // Place the implicit offset index at the end of the indicies collection
-      indices_.emplace_back(&implicitOffsetArgs_);
-    }
-
-    /// Adds an argument to the kernel.
-    /// If the argument existed before, it is replaced.
-    /// Otherwise, it is added.
-    /// Gaps are filled with empty arguments.
-    /// Implicit offset argument is kept at the back of the indices collection.
-    void add_arg(size_t index, size_t size, const void *arg,
-                 size_t localSize = 0) {
-      if (index + 2 > indices_.size()) {
-        // Move implicit offset argument index with the end
-        indices_.resize(index + 2, indices_.back());
-        // Ensure enough space for the new argument
-        paramSizes_.resize(index + 1);
-        offsetPerIndex_.resize(index + 1);
-      }
-      paramSizes_[index] = size;
-      // calculate the insertion point on the array
-      size_t insertPos = std::accumulate(std::begin(paramSizes_),
-                                         std::begin(paramSizes_) + index, 0);
-      // Update the stored value for the argument
-      std::memcpy(&storage_[insertPos], arg, size);
-      indices_[index] = &storage_[insertPos];
-      offsetPerIndex_[index] = localSize;
-    }
-
-    void add_local_arg(size_t index, size_t size) {
-      size_t localOffset = this->get_local_size();
-
-      // maximum required alignment is the size of the largest vector type
-      const size_t max_alignment = sizeof(double) * 16;
-
-      // for arguments smaller than the maximum alignment simply align to the
-      // size of the argument
-      const size_t alignment = std::min(max_alignment, size);
-
-      // align the argument
-      size_t alignedLocalOffset = localOffset;
-      if (localOffset % alignment != 0) {
-        alignedLocalOffset += alignment - (localOffset % alignment);
-      }
-
-      add_arg(index, sizeof(size_t), (const void *)&(alignedLocalOffset),
-              size + (alignedLocalOffset - localOffset));
-    }
-
-    void set_implicit_offset(size_t size, std::uint32_t *implicitOffset) {
-      assert(size == sizeof(std::uint32_t) * 3);
-      std::memcpy(implicitOffsetArgs_, implicitOffset, size);
-    }
-
-    void clear_local_size() {
-      std::fill(std::begin(offsetPerIndex_), std::end(offsetPerIndex_), 0);
-    }
-
-    args_index_t get_indices() const noexcept { return indices_; }
-
-    pi_uint32 get_local_size() const {
-      return std::accumulate(std::begin(offsetPerIndex_),
-                             std::end(offsetPerIndex_), 0);
-    }
-  } args_;
-
-  _pi_kernel(hipFunction_t func, hipFunction_t funcWithOffsetParam,
-             const char *name, pi_program program, pi_context ctxt)
-      : function_{func}, functionWithOffsetParam_{funcWithOffsetParam},
-        name_{name}, context_{ctxt}, program_{program}, refCount_{1} {
-    hip_piProgramRetain(program_);
-    hip_piContextRetain(context_);
-  }
-
-  _pi_kernel(hipFunction_t func, const char *name, pi_program program,
-             pi_context ctxt)
-      : _pi_kernel{func, nullptr, name, program, ctxt} {}
-
-  ~_pi_kernel() {
-    hip_piProgramRelease(program_);
-    hip_piContextRelease(context_);
-  }
-
-  pi_program get_program() const noexcept { return program_; }
-
-  pi_uint32 increment_reference_count() noexcept { return ++refCount_; }
-
-  pi_uint32 decrement_reference_count() noexcept { return --refCount_; }
-
-  pi_uint32 get_reference_count() const noexcept { return refCount_; }
-
-  native_type get() const noexcept { return function_; };
-
-  native_type get_with_offset_parameter() const noexcept {
-    return functionWithOffsetParam_;
-  };
-
-  bool has_with_offset_parameter() const noexcept {
-    return functionWithOffsetParam_ != nullptr;
-  }
-
-  pi_context get_context() const noexcept { return context_; };
-
-  const char *get_name() const noexcept { return name_.c_str(); }
-
-  /// Returns the number of arguments, excluding the implicit global offset.
-  /// Note this only returns the current known number of arguments, not the
-  /// real one required by the kernel, since this cannot be queried from
-  /// the HIP Driver API
-  pi_uint32 get_num_args() const noexcept { return args_.indices_.size() - 1; }
-
-  void set_kernel_arg(int index, size_t size, const void *arg) {
-    args_.add_arg(index, size, arg);
-  }
-
-  void set_kernel_local_arg(int index, size_t size) {
-    args_.add_local_arg(index, size);
-  }
-
-  void set_implicit_offset_arg(size_t size, std::uint32_t *implicitOffset) {
-    args_.set_implicit_offset(size, implicitOffset);
-  }
-
-  arguments::args_index_t get_arg_indices() const {
-    return args_.get_indices();
-  }
-
-  pi_uint32 get_local_size() const noexcept { return args_.get_local_size(); }
-
-  void clear_local_size() { args_.clear_local_size(); }
+struct _pi_kernel : ur_kernel_handle_t_ {
+  using ur_kernel_handle_t_::ur_kernel_handle_t_;
 };
 
-/// Implementation of samplers for HIP
-///
-/// Sampler property layout:
-/// | 31 30 ... 6 5 |      4 3 2      |     1      |         0        |
-/// |      N/A      | addressing mode | fiter mode | normalize coords |
-struct _pi_sampler {
-  std::atomic_uint32_t refCount_;
-  pi_uint32 props_;
-  pi_context context_;
-
-  _pi_sampler(pi_context context)
-      : refCount_(1), props_(0), context_(context) {}
-
-  pi_uint32 increment_reference_count() noexcept { return ++refCount_; }
-
-  pi_uint32 decrement_reference_count() noexcept { return --refCount_; }
-
-  pi_uint32 get_reference_count() const noexcept { return refCount_; }
+struct _pi_sampler : ur_sampler_handle_t_ {
+  using ur_sampler_handle_t_::ur_sampler_handle_t_;
 };
 
-struct _pi_ext_command_buffer {};
-
-// -------------------------------------------------------------
-// Helper types and functions
-//
+struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ {
+  using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_;
+};
 
 #endif // PI_HIP_HPP
diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 88c631c3b6795..fb6e850b7329f 100755
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -174,6 +174,65 @@ if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS)
   )
 endif()
 
+if ("hip" IN_LIST SYCL_ENABLE_PLUGINS)
+  # Build HIP adapter
+  add_sycl_library("ur_adapter_hip" SHARED 
+    SOURCES
+      "ur/ur.hpp"
+      "ur/ur.cpp"
+      "ur/usm_allocator.cpp"
+      "ur/usm_allocator.hpp"
+      "ur/adapters/hip/common.cpp"
+      "ur/adapters/hip/common.hpp"
+      "ur/adapters/hip/context.cpp"
+      "ur/adapters/hip/context.hpp"
+      "ur/adapters/hip/device.cpp"
+      "ur/adapters/hip/device.hpp"
+      "ur/adapters/hip/enqueue.cpp"
+      "ur/adapters/hip/event.cpp"
+      "ur/adapters/hip/event.hpp"
+      "ur/adapters/hip/platform.cpp"
+      "ur/adapters/hip/platform.hpp"
+      "ur/adapters/hip/memory.cpp"
+      "ur/adapters/hip/memory.hpp"
+      "ur/adapters/hip/sampler.cpp"
+      "ur/adapters/hip/sampler.hpp"
+      "ur/adapters/hip/usm.cpp"
+      "ur/adapters/hip/program.cpp"
+      "ur/adapters/hip/program.hpp"
+      "ur/adapters/hip/kernel.cpp"
+      "ur/adapters/hip/kernel.hpp"
+      "ur/adapters/hip/queue.cpp"
+      "ur/adapters/hip/queue.hpp"
+      "ur/adapters/hip/command_buffer.hpp"
+      "ur/adapters/hip/command_buffer.cpp"
+      "ur/adapters/hip/usm_p2p.cpp"
+      "ur/adapters/hip/ur_interface_loader.cpp"
+    INCLUDE_DIRS
+      ${sycl_inc_dir}
+    LIBRARIES
+      UnifiedRuntime-Headers
+      Threads::Threads
+  )
+
+  set_target_properties("ur_adapter_hip" PROPERTIES
+    VERSION "0.0.0"
+    SOVERSION "0"
+  )
+
+  if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD")
+    target_link_libraries(ur_adapter_hip PUBLIC rocmdrv)
+    # Set HIP define to select AMD platform
+    target_compile_definitions(ur_adapter_hip PRIVATE __HIP_PLATFORM_AMD__)
+  elseif("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "NVIDIA")
+    target_link_libraries(ur_adapter_hip PUBLIC cudadrv cudart)
+    # Set HIP define to select NVIDIA platform
+    target_compile_definitions(ur_adapter_hip PRIVATE __HIP_PLATFORM_NVIDIA__)
+  else()
+    message(FATAL_ERROR "Unspecified PI HIP platform please set SYCL_BUILD_PI_HIP_PLATFORM to 'AMD' or 'NVIDIA'")
+  endif()
+endif()
+
 if (TARGET UnifiedRuntimeLoader)
   set_target_properties(hello_world PROPERTIES EXCLUDE_FROM_ALL 1 EXCLUDE_FROM_DEFAULT_BUILD 1)
   # Install the UR loader.
diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp
index 12cba34aeb2a3..80b48c58b4c6a 100644
--- a/sycl/plugins/unified_runtime/pi2ur.hpp
+++ b/sycl/plugins/unified_runtime/pi2ur.hpp
@@ -291,7 +291,7 @@ inline pi_result ur2piPlatformInfoValue(ur_platform_info_t ParamName,
       case UR_PLATFORM_BACKEND_CUDA:
         return PI_EXT_PLATFORM_BACKEND_CUDA;
       case UR_PLATFORM_BACKEND_HIP:
-        return PI_EXT_PLATFORM_BACKEND_CUDA;
+        return PI_EXT_PLATFORM_BACKEND_HIP;
       default:
         die("UR_PLATFORM_INFO_BACKEND: unhandled value");
       }
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.cpp
new file mode 100644
index 0000000000000..4a559e33a1273
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.cpp
@@ -0,0 +1,129 @@
+//===--------- command_buffer.cpp - HIP Adapter ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include "command_buffer.hpp"
+#include "common.hpp"
+
+/// Stub implementations of UR experimental feature command-buffers
+
+UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
+    ur_context_handle_t, ur_device_handle_t,
+    const ur_exp_command_buffer_desc_t *, ur_exp_command_buffer_handle_t *) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urCommandBufferRetainExp(ur_exp_command_buffer_handle_t) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
+    ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t,
+    const size_t *, const size_t *, const size_t *, uint32_t,
+    const ur_exp_command_buffer_sync_point_t *,
+    ur_exp_command_buffer_sync_point_t *) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp(
+    ur_exp_command_buffer_handle_t, void *, const void *, size_t, uint32_t,
+    const ur_exp_command_buffer_sync_point_t *,
+    ur_exp_command_buffer_sync_point_t *) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp(
+    ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, size_t,
+    size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *,
+    ur_exp_command_buffer_sync_point_t *) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp(
+    ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t,
+    ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t,
+    size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *,
+    ur_exp_command_buffer_sync_point_t *) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT
+ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp(
+    ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, size_t,
+    const void *, uint32_t, const ur_exp_command_buffer_sync_point_t *,
+    ur_exp_command_buffer_sync_point_t *) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT
+ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp(
+    ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, size_t, void *,
+    uint32_t, const ur_exp_command_buffer_sync_point_t *,
+    ur_exp_command_buffer_sync_point_t *) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT
+ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp(
+    ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t,
+    ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, void *,
+    uint32_t, const ur_exp_command_buffer_sync_point_t *,
+    ur_exp_command_buffer_sync_point_t *) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT
+ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp(
+    ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t,
+    ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, void *,
+    uint32_t, const ur_exp_command_buffer_sync_point_t *,
+    ur_exp_command_buffer_sync_point_t *) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
+    ur_exp_command_buffer_handle_t, ur_queue_handle_t, uint32_t,
+    const ur_event_handle_t *, ur_event_handle_t *) {
+  detail::ur::die("Experimental Command-buffer feature is not "
+                  "implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.hpp
new file mode 100644
index 0000000000000..9bcdbfeccf17d
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/command_buffer.hpp
@@ -0,0 +1,13 @@
+//===--------- command_buffer.hpp - HIP Adapter ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include <ur/ur.hpp>
+
+/// Stub implementation of command-buffers for HIP
+
+struct ur_exp_command_buffer_handle_t_ {};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/common.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/common.cpp
new file mode 100644
index 0000000000000..36740fb0147a4
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/common.cpp
@@ -0,0 +1,103 @@
+//===--------- common.cpp - HIP Adapter -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+#include "common.hpp"
+
+#include <sstream>
+
+ur_result_t mapErrorUR(hipError_t Result) {
+  switch (Result) {
+  case hipSuccess:
+    return UR_RESULT_SUCCESS;
+  case hipErrorInvalidContext:
+    return UR_RESULT_ERROR_INVALID_CONTEXT;
+  case hipErrorInvalidDevice:
+    return UR_RESULT_ERROR_INVALID_DEVICE;
+  case hipErrorInvalidValue:
+    return UR_RESULT_ERROR_INVALID_VALUE;
+  case hipErrorOutOfMemory:
+    return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+  case hipErrorLaunchOutOfResources:
+    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
+  default:
+    return UR_RESULT_ERROR_UNKNOWN;
+  }
+}
+
+ur_result_t checkErrorUR(hipError_t Result, const char *Function, int Line,
+                         const char *File) {
+  if (Result == hipSuccess) {
+    return UR_RESULT_SUCCESS;
+  }
+
+  if (std::getenv("SYCL_PI_SUPPRESS_ERROR_MESSAGE") == nullptr ||
+      std::getenv("UR_SUPPRESS_ERROR_MESSAGE") == nullptr) {
+    const char *ErrorString = nullptr;
+    const char *ErrorName = nullptr;
+    ErrorName = hipGetErrorName(Result);
+    ErrorString = hipGetErrorString(Result);
+    std::cerr << "\nUR HIP ERROR:"
+              << "\n\tValue:           " << Result
+              << "\n\tName:            " << ErrorName
+              << "\n\tDescription:     " << ErrorString
+              << "\n\tFunction:        " << Function
+              << "\n\tSource Location: " << File << ":" << Line << "\n\n";
+  }
+
+  if (std::getenv("PI_HIP_ABORT") != nullptr ||
+      std::getenv("UR_HIP_ABORT") != nullptr) {
+    std::abort();
+  }
+
+  throw mapErrorUR(Result);
+}
+
+hipError_t getHipVersionString(std::string &Version) {
+  int DriverVersion = 0;
+  auto Result = hipDriverGetVersion(&DriverVersion);
+  if (Result != hipSuccess) {
+    return Result;
+  }
+  // The version is returned as (1000 major + 10 minor).
+  std::stringstream Stream;
+  Stream << "HIP " << DriverVersion / 1000 << "." << DriverVersion % 1000 / 10;
+  Version = Stream.str();
+  return Result;
+}
+
+void detail::ur::die(const char *pMessage) {
+  std::cerr << "ur_die: " << pMessage << '\n';
+  std::terminate();
+}
+
+void detail::ur::assertion(bool Condition, const char *pMessage) {
+  if (!Condition)
+    die(pMessage);
+}
+
+void detail::ur::hipPrint(const char *pMessage) {
+  std::cerr << "ur_print: " << pMessage << '\n';
+}
+
+// Global variables for UR_RESULT_ADAPTER_SPECIFIC_ERROR
+thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS;
+thread_local char ErrorMessage[MaxMessageSize];
+
+// Utility function for setting a message and warning
+[[maybe_unused]] void setErrorMessage(const char *pMessage,
+                                      ur_result_t ErrorCode) {
+  assert(strlen(pMessage) < MaxMessageSize);
+  strncpy(ErrorMessage, pMessage, MaxMessageSize - 1);
+  ErrorMessageCode = ErrorCode;
+}
+
+// Returns plugin specific error and warning messages; common implementation
+// that can be shared between adapters
+ur_result_t urGetLastResult(ur_platform_handle_t, const char **ppMessage) {
+  *ppMessage = &ErrorMessage[0];
+  return ErrorMessageCode;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/common.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/common.hpp
new file mode 100644
index 0000000000000..b1ebaf4c84df1
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/common.hpp
@@ -0,0 +1,176 @@
+//===--------- common.hpp - HIP Adapter -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+#pragma once
+
+#include <hip/hip_runtime.h>
+#include <ur/ur.hpp>
+
+// Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be
+// indexed, but on NVidia it is an opaque type and needs to go through
+// cuArrayGetDescriptor so implement a utility function to get the array
+// properties
+inline void getArrayDesc(hipArray *Array, hipArray_Format &Format,
+                         size_t &Channels) {
+#if defined(__HIP_PLATFORM_AMD__)
+  Format = Array->Format;
+  Channels = Array->NumChannels;
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+  CUDA_ARRAY_DESCRIPTOR ArrayDesc;
+  cuArrayGetDescriptor(&ArrayDesc, (CUarray)Array);
+
+  Format = ArrayDesc.Format;
+  Channels = ArrayDesc.NumChannels;
+#else
+#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
+#endif
+}
+
+// HIP on NVIDIA headers guard hipArray3DCreate behind __CUDACC__, this does not
+// seem to be required and we're not using nvcc to build the UR HIP adapter so
+// add the translation function here
+#if defined(__HIP_PLATFORM_NVIDIA__) && !defined(__CUDACC__)
+inline static hipError_t
+hipArray3DCreate(hiparray *pHandle,
+                 const HIP_ARRAY3D_DESCRIPTOR *pAllocateArray) {
+  return hipCUResultTohipError(cuArray3DCreate(pHandle, pAllocateArray));
+}
+#endif
+
+// hipArray gets turned into cudaArray when using the HIP NVIDIA platform, and
+// some CUDA APIs use cudaArray* and others use CUarray, these two represent the
+// same type, however when building cudaArray appears as an opaque type, so it
+// needs to be explicitly casted to CUarray. In order for this to work for both
+// AMD and NVidia we introduce an second hipArray type that will be CUarray for
+// NVIDIA and hipArray* for AMD so that we can place the explicit casts when
+// necessary for NVIDIA and they will be no-ops for AMD.
+#if defined(__HIP_PLATFORM_NVIDIA__)
+typedef CUarray hipCUarray;
+#elif defined(__HIP_PLATFORM_AMD__)
+typedef hipArray *hipCUarray;
+#else
+#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
+#endif
+
+// Add missing HIP to CUDA defines
+#if defined(__HIP_PLATFORM_NVIDIA__)
+#define hipMemoryType CUmemorytype
+#define hipMemoryTypeHost CU_MEMORYTYPE_HOST
+#define hipMemoryTypeDevice CU_MEMORYTYPE_DEVICE
+#define hipMemoryTypeArray CU_MEMORYTYPE_ARRAY
+#define hipMemoryTypeUnified CU_MEMORYTYPE_UNIFIED
+#endif
+
+ur_result_t mapErrorUR(hipError_t Result);
+
+ur_result_t checkErrorUR(hipError_t Result, const char *Function, int Line,
+                         const char *File);
+
+#define UR_CHECK_ERROR(result)                                                 \
+  checkErrorUR(result, __func__, __LINE__, __FILE__)
+
+hipError_t getHipVersionString(std::string &Version);
+
+constexpr size_t MaxMessageSize = 256;
+extern thread_local ur_result_t ErrorMessageCode;
+extern thread_local char ErrorMessage[MaxMessageSize];
+
+// Utility function for setting a message and warning
+[[maybe_unused]] void setErrorMessage(const char *Message,
+                                      ur_result_t ErrorCode);
+
+/// ------ Error handling, matching OpenCL plugin semantics.
+namespace detail {
+namespace ur {
+
+// Report error and no return (keeps compiler from printing warnings).
+// TODO: Probably change that to throw a catchable exception,
+//       but for now it is useful to see every failure.
+//
+[[noreturn]] void die(const char *pMessage);
+
+// Reports error messages
+void hipPrint(const char *pMessage);
+
+void assertion(bool Condition, const char *pMessage = nullptr);
+
+} // namespace ur
+} // namespace detail
+
+/// RAII object that calls the reference count release function on the held UR
+/// object on destruction.
+///
+/// The `dismiss` function stops the release from happening on destruction.
+template <typename T> class ReleaseGuard {
+private:
+  T Captive;
+
+  static ur_result_t callRelease(ur_device_handle_t Captive) {
+    return urDeviceRelease(Captive);
+  }
+
+  static ur_result_t callRelease(ur_context_handle_t Captive) {
+    return urContextRelease(Captive);
+  }
+
+  static ur_result_t callRelease(ur_mem_handle_t Captive) {
+    return urMemRelease(Captive);
+  }
+
+  static ur_result_t callRelease(ur_program_handle_t Captive) {
+    return urProgramRelease(Captive);
+  }
+
+  static ur_result_t callRelease(ur_kernel_handle_t Captive) {
+    return urKernelRelease(Captive);
+  }
+
+  static ur_result_t callRelease(ur_queue_handle_t Captive) {
+    return urQueueRelease(Captive);
+  }
+
+  static ur_result_t callRelease(ur_event_handle_t Captive) {
+    return urEventRelease(Captive);
+  }
+
+public:
+  ReleaseGuard() = delete;
+  /// Obj can be `nullptr`.
+  explicit ReleaseGuard(T Obj) : Captive(Obj) {}
+  ReleaseGuard(ReleaseGuard &&Other) noexcept : Captive(Other.Captive) {
+    Other.Captive = nullptr;
+  }
+
+  ReleaseGuard(const ReleaseGuard &) = delete;
+
+  /// Calls the related UR object release function if the object held is not
+  /// `nullptr` or if `dismiss` has not been called.
+  ~ReleaseGuard() {
+    if (Captive != nullptr) {
+      ur_result_t ret = callRelease(Captive);
+      if (ret != UR_RESULT_SUCCESS) {
+        // A reported HIP error is either an implementation or an asynchronous
+        // HIP error for which it is unclear if the function that reported it
+        // succeeded or not. Either way, the state of the program is compromised
+        // and likely unrecoverable.
+        detail::ur::die("Unrecoverable program state reached in piMemRelease");
+      }
+    }
+  }
+
+  ReleaseGuard &operator=(const ReleaseGuard &) = delete;
+
+  ReleaseGuard &operator=(ReleaseGuard &&Other) {
+    Captive = Other.Captive;
+    Other.Captive = nullptr;
+    return *this;
+  }
+
+  /// End the guard and do not release the reference count of the held
+  /// UR object.
+  void dismiss() { Captive = nullptr; }
+};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/context.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/context.cpp
new file mode 100644
index 0000000000000..fe392e36cc225
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/context.cpp
@@ -0,0 +1,161 @@
+//===--------- context.cpp - HIP Adapter ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include "context.hpp"
+
+/// Create a UR HIP context.
+///
+/// By default creates a scoped context and keeps the last active HIP context
+/// on top of the HIP context stack.
+///
+UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
+    uint32_t DeviceCount, const ur_device_handle_t *phDevices,
+    const ur_context_properties_t *, ur_context_handle_t *phContext) {
+  std::ignore = DeviceCount;
+  assert(DeviceCount == 1);
+  ur_result_t RetErr = UR_RESULT_SUCCESS;
+
+  std::unique_ptr<ur_context_handle_t_> ContextPtr{nullptr};
+  try {
+    hipCtx_t Current = nullptr;
+
+    // Create a scoped context.
+    hipCtx_t NewContext;
+    UR_CHECK_ERROR(hipCtxGetCurrent(&Current));
+    RetErr = UR_CHECK_ERROR(
+        hipCtxCreate(&NewContext, hipDeviceMapHost, phDevices[0]->get()));
+    ContextPtr = std::unique_ptr<ur_context_handle_t_>(new ur_context_handle_t_{
+        ur_context_handle_t_::kind::UserDefined, NewContext, *phDevices});
+
+    static std::once_flag InitFlag;
+    std::call_once(
+        InitFlag,
+        [](ur_result_t &) {
+          // Use default stream to record base event counter
+          UR_CHECK_ERROR(hipEventCreateWithFlags(&ur_platform_handle_t_::EvBase,
+                                                 hipEventDefault));
+          UR_CHECK_ERROR(hipEventRecord(ur_platform_handle_t_::EvBase, 0));
+        },
+        RetErr);
+
+    // For non-primary scoped contexts keep the last active on top of the stack
+    // as `hipCtxCreate` replaces it implicitly otherwise.
+    // Primary contexts are kept on top of the stack, so the previous context
+    // is not queried and therefore not recovered.
+    if (Current != nullptr) {
+      UR_CHECK_ERROR(hipCtxSetCurrent(Current));
+    }
+
+    *phContext = ContextPtr.release();
+  } catch (ur_result_t Err) {
+    RetErr = Err;
+  } catch (...) {
+    RetErr = UR_RESULT_ERROR_OUT_OF_RESOURCES;
+  }
+  return RetErr;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName,
+                 size_t propSize, void *pPropValue, size_t *pPropSizeRet) {
+
+  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
+
+  switch (uint32_t{propName}) {
+  case UR_CONTEXT_INFO_NUM_DEVICES:
+    return ReturnValue(1);
+  case UR_CONTEXT_INFO_DEVICES:
+    return ReturnValue(hContext->getDevice());
+  case UR_CONTEXT_INFO_REFERENCE_COUNT:
+    return ReturnValue(hContext->getReferenceCount());
+  case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
+  case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
+  case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES:
+  case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
+    // These queries should be dealt with in context_impl.cpp by calling the
+    // queries of each device separately and building the intersection set.
+    return UR_RESULT_ERROR_INVALID_ENUMERATION;
+  }
+  case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT:
+    // 2D USM memcpy is supported.
+    return ReturnValue(true);
+  case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT:
+    // 2D USM operations currently not supported.
+    return ReturnValue(false);
+
+  default:
+    break;
+  }
+
+  return UR_RESULT_ERROR_INVALID_ENUMERATION;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urContextRelease(ur_context_handle_t hContext) {
+  if (hContext->decrementReferenceCount() > 0) {
+    return UR_RESULT_SUCCESS;
+  }
+  hContext->invokeExtendedDeleters();
+
+  std::unique_ptr<ur_context_handle_t_> context{hContext};
+
+  if (!hContext->isPrimary()) {
+    hipCtx_t HIPCtxt = hContext->get();
+    // hipCtxSynchronize is not supported for AMD platform so we can just
+    // destroy the context, for NVIDIA make sure it's synchronized.
+#if defined(__HIP_PLATFORM_NVIDIA__)
+    hipCtx_t Current = nullptr;
+    UR_CHECK_ERROR(hipCtxGetCurrent(&Current));
+    if (HIPCtxt != Current) {
+      UR_CHECK_ERROR(hipCtxPushCurrent(HIPCtxt));
+    }
+    UR_CHECK_ERROR(hipCtxSynchronize());
+    UR_CHECK_ERROR(hipCtxGetCurrent(&Current));
+    if (HIPCtxt == Current) {
+      UR_CHECK_ERROR(hipCtxPopCurrent(&Current));
+    }
+#endif
+    return UR_CHECK_ERROR(hipCtxDestroy(HIPCtxt));
+  } else {
+    // Primary context is not destroyed, but released
+    hipDevice_t HIPDev = hContext->getDevice()->get();
+    hipCtx_t Current;
+    UR_CHECK_ERROR(hipCtxPopCurrent(&Current));
+    return UR_CHECK_ERROR(hipDevicePrimaryCtxRelease(HIPDev));
+  }
+
+  hipCtx_t HIPCtxt = hContext->get();
+  return UR_CHECK_ERROR(hipCtxDestroy(HIPCtxt));
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urContextRetain(ur_context_handle_t hContext) {
+  assert(hContext->getReferenceCount() > 0);
+
+  hContext->incrementReferenceCount();
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle(
+    ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) {
+  *phNativeContext = reinterpret_cast<ur_native_handle_t>(hContext->get());
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle(
+    ur_native_handle_t, uint32_t, const ur_device_handle_t *,
+    const ur_context_native_properties_t *, ur_context_handle_t *) {
+  return UR_RESULT_ERROR_INVALID_OPERATION;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter(
+    ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter,
+    void *pUserData) {
+  hContext->setExtendedDeleter(pfnDeleter, pUserData);
+  return UR_RESULT_SUCCESS;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/context.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/context.hpp
new file mode 100644
index 0000000000000..aa61e1e84b4aa
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/context.hpp
@@ -0,0 +1,152 @@
+//===--------- context.hpp - HIP Adapter ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+#pragma once
+
+#include "common.hpp"
+#include "device.hpp"
+#include "platform.hpp"
+
+typedef void (*ur_context_extended_deleter_t)(void *UserData);
+
+/// UR context mapping to a HIP context object.
+///
+/// There is no direct mapping between a HIP context and a UR context.
+/// The main differences are described below:
+///
+/// <b> HIP context vs UR context </b>
+///
+/// One of the main differences between the UR API and the HIP driver API is
+/// that the second modifies the state of the threads by assigning
+/// `hipCtx_t` objects to threads. `hipCtx_t` objects store data associated
+/// with a given device and control access to said device from the user side.
+/// UR API context are objects that are passed to functions, and not bound
+/// to threads.
+/// The ur_context_handle_t_ object doesn't implement this behavior. It only
+/// holds the HIP context data. The RAII object \ref ScopedContext implements
+/// the active context behavior.
+///
+/// <b> Primary vs UserDefined context </b>
+///
+/// HIP has two different types of context, the Primary context,
+/// which is usable by all threads on a given process for a given device, and
+/// the aforementioned custom contexts.
+/// The HIP documentation, and performance analysis, suggest using the Primary
+/// context whenever possible. The Primary context is also used by the HIP
+/// Runtime API. For UR applications to interop with HIP Runtime API, they have
+/// to use the primary context - and make that active in the thread. The
+/// `ur_context_handle_t_` object can be constructed with a `kind` parameter
+/// that allows to construct a Primary or `UserDefined` context, so that
+/// the UR object interface is always the same.
+///
+///  <b> Destructor callback </b>
+///
+///  Required to implement CP023, SYCL Extended Context Destruction,
+///  the UR Context can store a number of callback functions that will be
+///  called upon destruction of the UR Context.
+///  See proposal for details.
+///  https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md
+///
+struct ur_context_handle_t_ {
+
+  struct deleter_data {
+    ur_context_extended_deleter_t Function;
+    void *UserData;
+
+    void operator()() { Function(UserData); }
+  };
+
+  using native_type = hipCtx_t;
+
+  enum class kind { Primary, UserDefined } Kind;
+  native_type HIPContext;
+  ur_device_handle_t DeviceId;
+  std::atomic_uint32_t RefCount;
+
+  ur_context_handle_t_(kind K, hipCtx_t Ctxt, ur_device_handle_t DevId)
+      : Kind{K}, HIPContext{Ctxt}, DeviceId{DevId}, RefCount{1} {
+    DeviceId->setContext(this);
+    urDeviceRetain(DeviceId);
+  };
+
+  ~ur_context_handle_t_() { urDeviceRelease(DeviceId); }
+
+  void invokeExtendedDeleters() {
+    std::lock_guard<std::mutex> Guard(Mutex);
+    for (auto &Deleter : ExtendedDeleters) {
+      Deleter();
+    }
+  }
+
+  void setExtendedDeleter(ur_context_extended_deleter_t Function,
+                          void *UserData) {
+    std::lock_guard<std::mutex> Guard(Mutex);
+    ExtendedDeleters.emplace_back(deleter_data{Function, UserData});
+  }
+
+  ur_device_handle_t getDevice() const noexcept { return DeviceId; }
+
+  native_type get() const noexcept { return HIPContext; }
+
+  bool isPrimary() const noexcept { return Kind == kind::Primary; }
+
+  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
+
+  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
+
+  uint32_t getReferenceCount() const noexcept { return RefCount; }
+
+private:
+  std::mutex Mutex;
+  std::vector<deleter_data> ExtendedDeleters;
+};
+
+namespace {
+/// RAII type to guarantee recovering original HIP context
+/// Scoped context is used across all UR HIP plugin implementation
+/// to activate the UR Context on the current thread, matching the
+/// HIP driver semantics where the context used for the HIP Driver
+/// API is the one active on the thread.
+/// The implementation tries to avoid replacing the hipCtx_t if it cans
+class ScopedContext {
+  ur_context_handle_t PlacedContext;
+  hipCtx_t Original;
+  bool NeedToRecover;
+
+public:
+  ScopedContext(ur_context_handle_t Ctxt)
+      : PlacedContext{Ctxt}, NeedToRecover{false} {
+
+    if (!PlacedContext) {
+      throw UR_RESULT_ERROR_INVALID_CONTEXT;
+    }
+
+    hipCtx_t Desired = PlacedContext->get();
+    UR_CHECK_ERROR(hipCtxGetCurrent(&Original));
+    if (Original != Desired) {
+      // Sets the desired context as the active one for the thread
+      UR_CHECK_ERROR(hipCtxSetCurrent(Desired));
+      if (Original == nullptr) {
+        // No context is installed on the current thread
+        // This is the most common case. We can activate the context in the
+        // thread and leave it there until all the UR context referring to the
+        // same underlying HIP context are destroyed. This emulates
+        // the behaviour of the HIP runtime api, and avoids costly context
+        // switches. No action is required on this side of the if.
+      } else {
+        NeedToRecover = true;
+      }
+    }
+  }
+
+  ~ScopedContext() {
+    if (NeedToRecover) {
+      UR_CHECK_ERROR(hipCtxSetCurrent(Original));
+    }
+  }
+};
+} // namespace
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/device.cpp
new file mode 100644
index 0000000000000..866819ca3c07f
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/device.cpp
@@ -0,0 +1,986 @@
+//===--------- device.cpp - HIP Adapter -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include "device.hpp"
+#include "context.hpp"
+#include "event.hpp"
+
+#include <sstream>
+
+int getAttribute(ur_device_handle_t Device, hipDeviceAttribute_t Attribute) {
+  int Value;
+  detail::ur::assertion(
+      hipDeviceGetAttribute(&Value, Attribute, Device->get()) == hipSuccess);
+  return Value;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
+                                                    ur_device_info_t propName,
+                                                    size_t propSize,
+                                                    void *pPropValue,
+                                                    size_t *pPropSizeRet) {
+  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
+
+  static constexpr uint32_t MaxWorkItemDimensions = 3u;
+
+  switch ((uint32_t)propName) {
+  case UR_DEVICE_INFO_TYPE: {
+    return ReturnValue(UR_DEVICE_TYPE_GPU);
+  }
+  case UR_DEVICE_INFO_VENDOR_ID: {
+#if defined(__HIP_PLATFORM_AMD__)
+    uint32_t VendorId = 4098u;
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+    uint32_t VendorId = 4318u;
+#else
+    uint32_t VendorId = 0u;
+#endif
+    return ReturnValue(VendorId);
+  }
+  case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: {
+    int ComputeUnits = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&ComputeUnits,
+                              hipDeviceAttributeMultiprocessorCount,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(ComputeUnits >= 0);
+    return ReturnValue(static_cast<uint32_t>(ComputeUnits));
+  }
+  case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: {
+    return ReturnValue(MaxWorkItemDimensions);
+  }
+  case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: {
+    struct {
+      size_t sizes[MaxWorkItemDimensions];
+    } return_sizes;
+
+    int MaxX = 0, MaxY = 0, MaxZ = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&MaxX,
+                                                hipDeviceAttributeMaxBlockDimX,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(MaxX >= 0);
+
+    detail::ur::assertion(hipDeviceGetAttribute(&MaxY,
+                                                hipDeviceAttributeMaxBlockDimY,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(MaxY >= 0);
+
+    detail::ur::assertion(hipDeviceGetAttribute(&MaxZ,
+                                                hipDeviceAttributeMaxBlockDimZ,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(MaxZ >= 0);
+
+    return_sizes.sizes[0] = size_t(MaxX);
+    return_sizes.sizes[1] = size_t(MaxY);
+    return_sizes.sizes[2] = size_t(MaxZ);
+    return ReturnValue(return_sizes);
+  }
+
+  case UR_DEVICE_INFO_MAX_WORK_GROUPS_3D: {
+    struct {
+      size_t sizes[MaxWorkItemDimensions];
+    } return_sizes;
+
+    int MaxX = 0, MaxY = 0, MaxZ = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&MaxX,
+                                                hipDeviceAttributeMaxGridDimX,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(MaxX >= 0);
+
+    detail::ur::assertion(hipDeviceGetAttribute(&MaxY,
+                                                hipDeviceAttributeMaxGridDimY,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(MaxY >= 0);
+
+    detail::ur::assertion(hipDeviceGetAttribute(&MaxZ,
+                                                hipDeviceAttributeMaxGridDimZ,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(MaxZ >= 0);
+
+    return_sizes.sizes[0] = size_t(MaxX);
+    return_sizes.sizes[1] = size_t(MaxY);
+    return_sizes.sizes[2] = size_t(MaxZ);
+    return ReturnValue(return_sizes);
+  }
+
+  case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: {
+    int MaxWorkGroupSize = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&MaxWorkGroupSize,
+                              hipDeviceAttributeMaxThreadsPerBlock,
+                              hDevice->get()) == hipSuccess);
+
+    detail::ur::assertion(MaxWorkGroupSize >= 0);
+
+    return ReturnValue(size_t(MaxWorkGroupSize));
+  }
+  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: {
+    return ReturnValue(0u);
+  }
+  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: {
+    return ReturnValue(1u);
+  }
+  case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: {
+    return ReturnValue(0u);
+  }
+  case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: {
+    // Number of sub-groups = max block size / warp size + possible remainder
+    int MaxThreads = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&MaxThreads, hipDeviceAttributeMaxThreadsPerBlock,
+                              hDevice->get()) == hipSuccess);
+    int WarpSize = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&WarpSize,
+                                                hipDeviceAttributeWarpSize,
+                                                hDevice->get()) == hipSuccess);
+    int MaxWarps = (MaxThreads + WarpSize - 1) / WarpSize;
+    return ReturnValue(MaxWarps);
+  }
+  case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: {
+    // Volta provides independent thread scheduling
+    // TODO: Revisit for previous generation GPUs
+    int Major = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&Major, hipDeviceAttributeComputeCapabilityMajor,
+                              hDevice->get()) == hipSuccess);
+    bool IFP = (Major >= 7);
+    return ReturnValue(IFP);
+  }
+  case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
+    int WarpSize = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&WarpSize,
+                                                hipDeviceAttributeWarpSize,
+                                                hDevice->get()) == hipSuccess);
+    size_t Sizes[1] = {static_cast<size_t>(WarpSize)};
+    return ReturnValue(Sizes, 1);
+  }
+  case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: {
+    int ClockFreq = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&ClockFreq,
+                                                hipDeviceAttributeClockRate,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(ClockFreq >= 0);
+    return ReturnValue(static_cast<uint32_t>(ClockFreq) / 1000u);
+  }
+  case UR_DEVICE_INFO_ADDRESS_BITS: {
+    auto Bits = uint32_t{std::numeric_limits<uintptr_t>::digits};
+    return ReturnValue(Bits);
+  }
+  case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: {
+    // Max size of memory object allocation in bytes.
+    // The minimum value is max(min(1024 × 1024 ×
+    // 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE),
+    // 32 × 1024 × 1024) for devices that are not of type
+    // CL_DEVICE_TYPE_CUSTOM.
+
+    size_t Global = 0;
+    detail::ur::assertion(hipDeviceTotalMem(&Global, hDevice->get()) ==
+                          hipSuccess);
+
+    auto QuarterGlobal = static_cast<uint32_t>(Global / 4u);
+
+    auto MaxAlloc = std::max(std::min(1024u * 1024u * 1024u, QuarterGlobal),
+                             32u * 1024u * 1024u);
+
+    return ReturnValue(uint64_t{MaxAlloc});
+  }
+  case UR_DEVICE_INFO_IMAGE_SUPPORTED: {
+    return ReturnValue(uint32_t{true});
+  }
+  case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: {
+    // This call doesn't match to HIP as it doesn't have images, but instead
+    // surfaces and textures. No clear call in the HIP API to determine this,
+    // but some searching found as of SM 2.x 128 are supported.
+    return ReturnValue(128u);
+  }
+  case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: {
+    // This call doesn't match to HIP as it doesn't have images, but instead
+    // surfaces and textures. No clear call in the HIP API to determine this,
+    // but some searching found as of SM 2.x 128 are supported.
+    return ReturnValue(128u);
+  }
+  case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: {
+    // Take the smaller of maximum surface and maximum texture height.
+    int TexHeight = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&TexHeight, hipDeviceAttributeMaxTexture2DHeight,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(TexHeight >= 0);
+    int SurfHeight = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&SurfHeight, hipDeviceAttributeMaxTexture2DHeight,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(SurfHeight >= 0);
+
+    int Min = std::min(TexHeight, SurfHeight);
+
+    return ReturnValue(static_cast<size_t>(Min));
+  }
+  case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: {
+    // Take the smaller of maximum surface and maximum texture width.
+    int TexWidth = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&TexWidth, hipDeviceAttributeMaxTexture2DWidth,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(TexWidth >= 0);
+    int SurfWidth = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&SurfWidth, hipDeviceAttributeMaxTexture2DWidth,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(SurfWidth >= 0);
+
+    int Min = std::min(TexWidth, SurfWidth);
+
+    return ReturnValue(static_cast<size_t>(Min));
+  }
+  case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: {
+    // Take the smaller of maximum surface and maximum texture height.
+    int TexHeight = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&TexHeight, hipDeviceAttributeMaxTexture3DHeight,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(TexHeight >= 0);
+    int SurfHeight = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&SurfHeight, hipDeviceAttributeMaxTexture3DHeight,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(SurfHeight >= 0);
+
+    int Min = std::min(TexHeight, SurfHeight);
+
+    return ReturnValue(static_cast<size_t>(Min));
+  }
+  case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: {
+    // Take the smaller of maximum surface and maximum texture width.
+    int TexWidth = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&TexWidth, hipDeviceAttributeMaxTexture3DWidth,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(TexWidth >= 0);
+    int SurfWidth = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&SurfWidth, hipDeviceAttributeMaxTexture3DWidth,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(SurfWidth >= 0);
+
+    int Min = std::min(TexWidth, SurfWidth);
+
+    return ReturnValue(static_cast<size_t>(Min));
+  }
+  case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: {
+    // Take the smaller of maximum surface and maximum texture depth.
+    int TexDepth = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&TexDepth, hipDeviceAttributeMaxTexture3DDepth,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(TexDepth >= 0);
+    int SurfDepth = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&SurfDepth, hipDeviceAttributeMaxTexture3DDepth,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(SurfDepth >= 0);
+
+    int Min = std::min(TexDepth, SurfDepth);
+
+    return ReturnValue(static_cast<size_t>(Min));
+  }
+  case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: {
+    // Take the smaller of maximum surface and maximum texture width.
+    int TexWidth = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&TexWidth, hipDeviceAttributeMaxTexture1DWidth,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(TexWidth >= 0);
+    int SurfWidth = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&SurfWidth, hipDeviceAttributeMaxTexture1DWidth,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(SurfWidth >= 0);
+
+    int Min = std::min(TexWidth, SurfWidth);
+
+    return ReturnValue(static_cast<size_t>(Min));
+  }
+  case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: {
+    return ReturnValue(0lu);
+  }
+  case UR_DEVICE_INFO_MAX_SAMPLERS: {
+    // This call is kind of meaningless for HIP, as samplers don't exist.
+    // Closest thing is textures, which is 128.
+    return ReturnValue(128u);
+  }
+  case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: {
+    // __global__ function parameters are passed to the device via constant
+    // memory and are limited to 4 KB.
+    return ReturnValue(4000lu);
+  }
+  case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: {
+    int MemBaseAddrAlign = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&MemBaseAddrAlign,
+                              hipDeviceAttributeTextureAlignment,
+                              hDevice->get()) == hipSuccess);
+    // Multiply by 8 as clGetDeviceInfo returns this value in bits
+    MemBaseAddrAlign *= 8;
+    return ReturnValue(MemBaseAddrAlign);
+  }
+  case UR_DEVICE_INFO_HALF_FP_CONFIG: {
+    return ReturnValue(0u);
+  }
+  case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
+    uint64_t Config =
+        UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
+        UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
+        UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
+        UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
+        UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
+        UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
+        UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
+    return ReturnValue(Config);
+  }
+  case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
+    uint64_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
+                      UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
+                      UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
+                      UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
+                      UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
+                      UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
+    return ReturnValue(Config);
+  }
+  case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
+    return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE);
+  }
+  case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: {
+    // The value is dohipmented for all existing GPUs in the HIP programming
+    // guidelines, section "H.3.2. Global Memory".
+    return ReturnValue(128u);
+  }
+  case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: {
+    int CacheSize = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&CacheSize,
+                                                hipDeviceAttributeL2CacheSize,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(CacheSize >= 0);
+    // The L2 cache is global to the GPU.
+    return ReturnValue(static_cast<uint64_t>(CacheSize));
+  }
+  case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: {
+    size_t Bytes = 0;
+    // Runtime API has easy access to this value, driver API info is scarse.
+    detail::ur::assertion(hipDeviceTotalMem(&Bytes, hDevice->get()) ==
+                          hipSuccess);
+    return ReturnValue(uint64_t{Bytes});
+  }
+  case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: {
+    int ConstantMemory = 0;
+
+    // hipDeviceGetAttribute takes a int*, however the size of the constant
+    // memory on AMD GPU may be larger than what can fit in the positive part
+    // of a signed integer, so use an unsigned integer and cast the pointer to
+    // int*.
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&ConstantMemory,
+                              hipDeviceAttributeTotalConstantMemory,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(ConstantMemory >= 0);
+
+    return ReturnValue(static_cast<uint64_t>(ConstantMemory));
+  }
+  case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: {
+    // TODO: is there a way to retrieve this from HIP driver API?
+    // Hard coded to value returned by clinfo for OpenCL 1.2 HIP | GeForce GTX
+    // 1060 3GB
+    return ReturnValue(9u);
+  }
+  case UR_DEVICE_INFO_LOCAL_MEM_TYPE: {
+    return ReturnValue(UR_DEVICE_LOCAL_MEM_TYPE_LOCAL);
+  }
+  case UR_DEVICE_INFO_LOCAL_MEM_SIZE: {
+    // OpenCL's "local memory" maps most closely to HIP's "shared memory".
+    // HIP has its own definition of "local memory", which maps to OpenCL's
+    // "private memory".
+    int LocalMemSize = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&LocalMemSize,
+                              hipDeviceAttributeMaxSharedMemoryPerBlock,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(LocalMemSize >= 0);
+    return ReturnValue(static_cast<uint64_t>(LocalMemSize));
+  }
+  case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: {
+    int EccEnabled = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&EccEnabled,
+                                                hipDeviceAttributeEccEnabled,
+                                                hDevice->get()) == hipSuccess);
+
+    detail::ur::assertion((EccEnabled == 0) | (EccEnabled == 1));
+    auto Result = static_cast<bool>(EccEnabled);
+    return ReturnValue(Result);
+  }
+  case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: {
+    int IsIntegrated = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&IsIntegrated,
+                                                hipDeviceAttributeIntegrated,
+                                                hDevice->get()) == hipSuccess);
+
+    detail::ur::assertion((IsIntegrated == 0) | (IsIntegrated == 1));
+    auto Result = static_cast<bool>(IsIntegrated);
+    return ReturnValue(Result);
+  }
+  case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: {
+    // Hard coded to value returned by clinfo for OpenCL 1.2 HIP | GeForce GTX
+    // 1060 3GB
+    return ReturnValue(1000lu);
+  }
+  case UR_DEVICE_INFO_ENDIAN_LITTLE: {
+    return ReturnValue(true);
+  }
+  case UR_DEVICE_INFO_AVAILABLE: {
+    return ReturnValue(true);
+  }
+  case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: {
+    return ReturnValue(true);
+  }
+  case UR_DEVICE_INFO_COMPILER_AVAILABLE: {
+    return ReturnValue(true);
+  }
+  case UR_DEVICE_INFO_LINKER_AVAILABLE: {
+    return ReturnValue(true);
+  }
+  case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: {
+    auto Capability = ur_device_exec_capability_flags_t{
+        UR_DEVICE_EXEC_CAPABILITY_FLAG_KERNEL};
+    return ReturnValue(Capability);
+  }
+  case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: {
+    // The mandated minimum capability:
+    uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE |
+                          UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+    return ReturnValue(Capability);
+  }
+  case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES:
+  case UR_DEVICE_INFO_QUEUE_PROPERTIES: {
+    // The mandated minimum capability:
+    uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE;
+    return ReturnValue(Capability);
+  }
+  case UR_DEVICE_INFO_BUILT_IN_KERNELS: {
+    // An empty string is returned if no built-in kernels are supported by the
+    // device.
+    return ReturnValue("");
+  }
+  case UR_DEVICE_INFO_PLATFORM: {
+    return ReturnValue(hDevice->getPlatform());
+  }
+  case UR_DEVICE_INFO_NAME: {
+    static constexpr size_t MAX_DEVICE_NAME_LENGTH = 256u;
+    char Name[MAX_DEVICE_NAME_LENGTH];
+    detail::ur::assertion(hipDeviceGetName(Name, MAX_DEVICE_NAME_LENGTH,
+                                           hDevice->get()) == hipSuccess);
+    // On AMD GPUs hipDeviceGetName returns an empty string, so return the arch
+    // name instead, this is also what AMD OpenCL devices return.
+    if (strlen(Name) == 0) {
+      hipDeviceProp_t Props;
+      detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
+                            hipSuccess);
+
+      return ReturnValue(Props.gcnArchName, strlen(Props.gcnArchName) + 1);
+    }
+    return ReturnValue(Name, strlen(Name) + 1);
+  }
+  case UR_DEVICE_INFO_VENDOR: {
+    return ReturnValue("AMD Corporation");
+  }
+  case UR_DEVICE_INFO_DRIVER_VERSION: {
+    std::string Version;
+    detail::ur::assertion(getHipVersionString(Version) == hipSuccess);
+    return ReturnValue(Version.c_str());
+  }
+  case UR_DEVICE_INFO_PROFILE: {
+    return ReturnValue("HIP");
+  }
+  case UR_DEVICE_INFO_REFERENCE_COUNT: {
+    return ReturnValue(hDevice->getReferenceCount());
+  }
+  case UR_DEVICE_INFO_VERSION: {
+    std::stringstream S;
+
+    hipDeviceProp_t Props;
+    detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
+                          hipSuccess);
+#if defined(__HIP_PLATFORM_NVIDIA__)
+    S << Props.major << "." << Props.minor;
+#elif defined(__HIP_PLATFORM_AMD__)
+    S << Props.gcnArchName;
+#else
+#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
+#endif
+    return ReturnValue(S.str().c_str());
+  }
+  case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: {
+    return ReturnValue("");
+  }
+  case UR_DEVICE_INFO_EXTENSIONS: {
+    // TODO: Remove comment when HIP support native asserts.
+    // DEVICELIB_ASSERT extension is set so fallback assert
+    // postprocessing is NOP. HIP 4.3 docs indicate support for
+    // native asserts are in progress
+    std::string SupportedExtensions = "";
+    SupportedExtensions += "pi_ext_intel_devicelib_assert ";
+    SupportedExtensions += " ";
+
+    hipDeviceProp_t Props;
+    detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
+                          hipSuccess);
+
+    if (Props.arch.hasDoubles) {
+      SupportedExtensions += "cl_khr_fp64 ";
+    }
+
+    return ReturnValue(SupportedExtensions.c_str());
+  }
+  case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {
+    // The minimum value for the FULL profile is 1 MB.
+    return ReturnValue(1024lu);
+  }
+  case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: {
+    return ReturnValue(true);
+  }
+  case UR_DEVICE_INFO_PARENT_DEVICE: {
+    return ReturnValue(nullptr);
+  }
+  case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: {
+    return ReturnValue(0u);
+  }
+  case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: {
+    return ReturnValue(static_cast<ur_device_partition_t>(0u));
+  }
+  case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: {
+    return ReturnValue(0u);
+  }
+  case UR_DEVICE_INFO_PARTITION_TYPE: {
+    return ReturnValue(static_cast<ur_device_partition_t>(0u));
+  }
+
+  // Intel USM extensions
+  case UR_DEVICE_INFO_USM_HOST_SUPPORT: {
+    // from cl_intel_unified_shared_memory: "The host memory access capabilities
+    // apply to any host allocation."
+    //
+    // query if/how the device can access page-locked host memory, possibly
+    // through PCIe, using the same pointer as the host
+    ur_device_usm_access_capability_flags_t Value = {};
+    // if (getAttribute(device, HIP_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING)) {
+    // the device shares a unified address space with the host
+    if (getAttribute(hDevice, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
+      // compute capability 6.x introduces operations that are atomic with
+      // respect to other CPUs and GPUs in the system
+      Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
+              UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS |
+              UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS |
+              UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS;
+    } else {
+      // on GPU architectures with compute capability lower than 6.x, atomic
+      // operations from the GPU to CPU memory will not be atomic with respect
+      // to CPU initiated atomic operations
+      Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
+              UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS;
+    }
+    return ReturnValue(Value);
+  }
+  case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: {
+    // from cl_intel_unified_shared_memory:
+    // "The device memory access capabilities apply to any device allocation
+    // associated with this device."
+    //
+    // query how the device can access memory allocated on the device itself (?)
+    ur_device_usm_access_capability_flags_t Value =
+        UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
+        UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS |
+        UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS |
+        UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS;
+    return ReturnValue(Value);
+  }
+  case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: {
+    // from cl_intel_unified_shared_memory:
+    // "The single device shared memory access capabilities apply to any shared
+    // allocation associated with this device."
+    //
+    // query if/how the device can access managed memory associated to it
+    ur_device_usm_access_capability_flags_t Value = {};
+    if (getAttribute(hDevice, hipDeviceAttributeManagedMemory)) {
+      // the device can allocate managed memory on this system
+      Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
+              UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS;
+    }
+    if (getAttribute(hDevice, hipDeviceAttributeConcurrentManagedAccess)) {
+      // the device can coherently access managed memory concurrently with the
+      // CPU
+      Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS;
+      if (getAttribute(hDevice, hipDeviceAttributeComputeCapabilityMajor) >=
+          6) {
+        // compute capability 6.x introduces operations that are atomic with
+        // respect to other CPUs and GPUs in the system
+        Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS;
+      }
+    }
+    return ReturnValue(Value);
+  }
+  case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: {
+    // from cl_intel_unified_shared_memory:
+    // "The cross-device shared memory access capabilities apply to any shared
+    // allocation associated with this device, or to any shared memory
+    // allocation on another device that also supports the same cross-device
+    // shared memory access capability."
+    //
+    // query if/how the device can access managed memory associated to other
+    // devices
+    ur_device_usm_access_capability_flags_t Value = {};
+    if (getAttribute(hDevice, hipDeviceAttributeManagedMemory)) {
+      // the device can allocate managed memory on this system
+      Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS;
+    }
+    if (getAttribute(hDevice, hipDeviceAttributeConcurrentManagedAccess)) {
+      // all devices with the CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS
+      // attribute can coherently access managed memory concurrently with the
+      // CPU
+      Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS;
+    }
+    if (getAttribute(hDevice, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
+      // compute capability 6.x introduces operations that are atomic with
+      // respect to other CPUs and GPUs in the system
+      if (Value & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)
+        Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_ACCESS;
+      if (Value & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS)
+        Value |= UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS;
+    }
+    return ReturnValue(Value);
+  }
+  case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: {
+    // from cl_intel_unified_shared_memory:
+    // "The shared system memory access capabilities apply to any allocations
+    // made by a system allocator, such as malloc or new."
+    //
+    // query if/how the device can access pageable host memory allocated by the
+    // system allocator
+    ur_device_usm_access_capability_flags_t Value = {};
+    if (getAttribute(hDevice, hipDeviceAttributePageableMemoryAccess)) {
+      // the link between the device and the host does not support native
+      // atomic operations
+      Value = UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS |
+              UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_CONCURRENT_ACCESS;
+    }
+    return ReturnValue(Value);
+  }
+
+  case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: {
+    int Major = 0, Minor = 0;
+    detail::ur::assertion(hipDeviceComputeCapability(
+                              &Major, &Minor, hDevice->get()) == hipSuccess);
+    std::string Result = std::to_string(Major) + "." + std::to_string(Minor);
+    return ReturnValue(Result.c_str());
+  }
+
+  case UR_DEVICE_INFO_ATOMIC_64: {
+    hipDeviceProp_t Props;
+    detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
+                          hipSuccess);
+    return ReturnValue(Props.arch.hasGlobalInt64Atomics &&
+                       Props.arch.hasSharedInt64Atomics);
+  }
+
+  case UR_DEVICE_INFO_GLOBAL_MEM_FREE: {
+    size_t FreeMemory = 0;
+    size_t TotalMemory = 0;
+    detail::ur::assertion(hipMemGetInfo(&FreeMemory, &TotalMemory) ==
+                              hipSuccess,
+                          "failed hipMemGetInfo() API.");
+    return ReturnValue(FreeMemory);
+  }
+
+  case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: {
+    int Value = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&Value, hipDeviceAttributeMemoryClockRate,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(Value >= 0);
+    // Convert kilohertz to megahertz when returning.
+    return ReturnValue(Value / 1000);
+  }
+
+  case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: {
+    int Value = 0;
+    detail::ur::assertion(
+        hipDeviceGetAttribute(&Value, hipDeviceAttributeMemoryBusWidth,
+                              hDevice->get()) == hipSuccess);
+    detail::ur::assertion(Value >= 0);
+    return ReturnValue(Value);
+  }
+  case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: {
+    return ReturnValue(int32_t{1});
+  }
+
+  case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
+    uint64_t Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
+                            UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
+                            UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE;
+    return ReturnValue(Capabilities);
+  }
+  case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
+  case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
+    // SYCL2020 4.6.4.2 minimum mandated capabilities for
+    // atomic_fence/memory_scope_capabilities.
+    // Because scopes are hierarchical, wider scopes support all narrower
+    // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
+    // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
+    uint64_t Capabilities = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM |
+                            UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP |
+                            UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP;
+    return ReturnValue(Capabilities);
+  }
+  case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: {
+    // SYCL2020 4.6.4.2 minimum mandated capabilities for
+    // atomic_fence_order_capabilities.
+    ur_memory_order_capability_flags_t Capabilities =
+        UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
+        UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
+        UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
+        UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
+    return ReturnValue(Capabilities);
+  }
+  case UR_DEVICE_INFO_DEVICE_ID: {
+    int Value = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&Value,
+                                                hipDeviceAttributePciDeviceId,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(Value >= 0);
+    return ReturnValue(Value);
+  }
+  case UR_DEVICE_INFO_UUID: {
+#if ((HIP_VERSION_MAJOR == 5 && HIP_VERSION_MINOR >= 2) ||                     \
+     HIP_VERSION_MAJOR > 5)
+    hipUUID UUID = {};
+    // Supported since 5.2+
+    detail::ur::assertion(hipDeviceGetUuid(&UUID, hDevice->get()) ==
+                          hipSuccess);
+    std::array<unsigned char, 16> Name;
+    std::copy(UUID.bytes, UUID.bytes + 16, Name.begin());
+    return ReturnValue(Name.data(), 16);
+#endif
+    return UR_RESULT_ERROR_INVALID_VALUE;
+  }
+  case UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: {
+    // Maximum number of 32-bit registers available to a thread block.
+    // Note: This number is shared by all thread blocks simultaneously resident
+    // on a multiprocessor.
+    int MaxRegisters{-1};
+    UR_CHECK_ERROR(hipDeviceGetAttribute(
+        &MaxRegisters, hipDeviceAttributeMaxRegistersPerBlock, hDevice->get()));
+
+    detail::ur::assertion(MaxRegisters >= 0);
+
+    return ReturnValue(static_cast<uint32_t>(MaxRegisters));
+  }
+  case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT:
+    return ReturnValue(false);
+  case UR_DEVICE_INFO_IMAGE_SRGB:
+    return ReturnValue(false);
+  case UR_DEVICE_INFO_PCI_ADDRESS: {
+    constexpr size_t AddressBufferSize = 13;
+    char AddressBuffer[AddressBufferSize];
+    detail::ur::assertion(hipDeviceGetPCIBusId(AddressBuffer, AddressBufferSize,
+                                               hDevice->get()) == hipSuccess);
+    // A typical PCI address is 12 bytes + \0: "1234:67:90.2", but the HIP API
+    // is not guaranteed to use this format. In practice, it uses this format,
+    // at least in 5.3-5.5. To be on the safe side, we make sure the terminating
+    // \0 is set.
+    AddressBuffer[AddressBufferSize - 1] = '\0';
+    detail::ur::assertion(strnlen(AddressBuffer, AddressBufferSize) > 0);
+    return ReturnValue(AddressBuffer,
+                       strnlen(AddressBuffer, AddressBufferSize - 1) + 1);
+  }
+  case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED:
+    return ReturnValue(false);
+  // TODO: Investigate if this information is available on HIP.
+  case UR_DEVICE_INFO_GPU_EU_COUNT:
+  case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
+  case UR_DEVICE_INFO_GPU_EU_SLICES:
+  case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
+  case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
+  case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
+  case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH:
+  case UR_DEVICE_INFO_BFLOAT16:
+    return UR_RESULT_ERROR_INVALID_ENUMERATION;
+
+  default:
+    break;
+  }
+  return UR_RESULT_ERROR_INVALID_ENUMERATION;
+}
+
+/// \return UR_RESULT_SUCCESS if the function is executed successfully
+/// HIP devices are always root devices so retain always returns success.
+UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t) {
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urDevicePartition(ur_device_handle_t, const ur_device_partition_properties_t *,
+                  uint32_t, ur_device_handle_t *, uint32_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+/// \return UR_RESULT_SUCCESS always since HIP devices are always root
+/// devices.
+UR_APIEXPORT ur_result_t UR_APICALL urDeviceRelease(ur_device_handle_t) {
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform,
+                                                ur_device_type_t DeviceType,
+                                                uint32_t NumEntries,
+                                                ur_device_handle_t *phDevices,
+                                                uint32_t *pNumDevices) {
+  ur_result_t Err = UR_RESULT_SUCCESS;
+  const bool AskingForDefault = DeviceType == UR_DEVICE_TYPE_DEFAULT;
+  const bool AskingForGPU = DeviceType == UR_DEVICE_TYPE_GPU;
+  const bool AskingForAll = DeviceType == UR_DEVICE_TYPE_ALL;
+  const bool ReturnDevices = AskingForDefault || AskingForGPU || AskingForAll;
+
+  size_t NumDevices = ReturnDevices ? hPlatform->Devices.size() : 0;
+
+  try {
+    UR_ASSERT(pNumDevices || phDevices, UR_RESULT_ERROR_INVALID_VALUE);
+
+    if (pNumDevices) {
+      *pNumDevices = NumDevices;
+    }
+
+    if (ReturnDevices && phDevices) {
+      for (size_t i = 0; i < std::min(size_t(NumEntries), NumDevices); ++i) {
+        phDevices[i] = hPlatform->Devices[i].get();
+      }
+    }
+
+    return Err;
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
+  }
+}
+
+/// Gets the native HIP handle of a UR device object
+///
+/// \param[in] hDevice The UR device to get the native HIP object of.
+/// \param[out] phNativeHandle Set to the native handle of the UR device object.
+///
+/// \return UR_RESULT_SUCCESS
+UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle(
+    ur_device_handle_t hDevice, ur_native_handle_t *phNativeHandle) {
+  *phNativeHandle = reinterpret_cast<ur_native_handle_t>(hDevice->get());
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
+    ur_native_handle_t, ur_platform_handle_t,
+    const ur_device_native_properties_t *, ur_device_handle_t *) {
+  return UR_RESULT_ERROR_INVALID_OPERATION;
+}
+
+/// \return UR_RESULT_SUCCESS If available, the first binary that is PTX
+///
+UR_APIEXPORT ur_result_t UR_APICALL
+urDeviceSelectBinary(ur_device_handle_t, const ur_device_binary_t *pBinaries,
+                     uint32_t NumBinaries, uint32_t *pSelectedBinary) {
+  // Ignore unused parameter
+  UR_ASSERT(NumBinaries > 0, UR_RESULT_ERROR_INVALID_ARGUMENT);
+
+  // Look for an image for the HIP target, and return the first one that is
+  // found
+#if defined(__HIP_PLATFORM_AMD__)
+  const char *BinaryType = UR_DEVICE_BINARY_TARGET_AMDGCN;
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+  const char *BinaryType = UR_DEVICE_BINARY_TARGET_NVPTX64;
+#else
+#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
+#endif
+  for (uint32_t i = 0; i < NumBinaries; i++) {
+    if (strcmp(pBinaries[i].pDeviceTargetSpec, BinaryType) == 0) {
+      *pSelectedBinary = i;
+      return UR_RESULT_SUCCESS;
+    }
+  }
+
+  // No image can be loaded for the given device
+  return UR_RESULT_ERROR_INVALID_BINARY;
+}
+
+ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice,
+                                                   uint64_t *pDeviceTimestamp,
+                                                   uint64_t *pHostTimestamp) {
+  if (!pDeviceTimestamp && !pHostTimestamp)
+    return UR_RESULT_SUCCESS;
+
+  ur_event_handle_t_::native_type Event;
+  ScopedContext Active(hDevice->getContext());
+
+  if (pDeviceTimestamp) {
+    UR_CHECK_ERROR(hipEventCreateWithFlags(&Event, hipEventDefault));
+    UR_CHECK_ERROR(hipEventRecord(Event));
+  }
+  if (pHostTimestamp) {
+    using namespace std::chrono;
+    *pHostTimestamp =
+        duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
+            .count();
+  }
+
+  if (pDeviceTimestamp) {
+    UR_CHECK_ERROR(hipEventSynchronize(Event));
+    float ElapsedTime = 0.0f;
+    UR_CHECK_ERROR(hipEventElapsedTime(&ElapsedTime,
+                                       ur_platform_handle_t_::EvBase, Event));
+    *pDeviceTimestamp = (uint64_t)(ElapsedTime * (double)1e6);
+  }
+
+  return UR_RESULT_SUCCESS;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/device.hpp
new file mode 100644
index 0000000000000..9a56652957663
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/device.hpp
@@ -0,0 +1,42 @@
+//===--------- device.hpp - HIP Adapter -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+#pragma once
+
+#include "common.hpp"
+
+#include <ur/ur.hpp>
+
+/// UR device mapping to a hipDevice_t.
+/// Includes an observer pointer to the platform,
+/// and implements the reference counting semantics since
+/// HIP objects are not refcounted.
+struct ur_device_handle_t_ {
+private:
+  using native_type = hipDevice_t;
+
+  native_type HIPDevice;
+  std::atomic_uint32_t RefCount;
+  ur_platform_handle_t Platform;
+  ur_context_handle_t Context;
+
+public:
+  ur_device_handle_t_(native_type HipDevice, ur_platform_handle_t Platform)
+      : HIPDevice(HipDevice), RefCount{1}, Platform(Platform) {}
+
+  native_type get() const noexcept { return HIPDevice; };
+
+  uint32_t getReferenceCount() const noexcept { return RefCount; }
+
+  ur_platform_handle_t getPlatform() const noexcept { return Platform; };
+
+  void setContext(ur_context_handle_t Ctxt) { Context = Ctxt; };
+
+  ur_context_handle_t getContext() { return Context; };
+};
+
+int getAttribute(ur_device_handle_t Device, hipDeviceAttribute_t Attribute);
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/enqueue.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/enqueue.cpp
new file mode 100644
index 0000000000000..c5042f64bcc7b
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/enqueue.cpp
@@ -0,0 +1,1419 @@
+//===--------- enqueue.cpp - HIP Adapter -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include "common.hpp"
+#include "context.hpp"
+#include "event.hpp"
+#include "kernel.hpp"
+#include "memory.hpp"
+#include "queue.hpp"
+
+namespace {
+
+static size_t imageElementByteSize(hipArray_Format ArrayFormat) {
+  switch (ArrayFormat) {
+  case HIP_AD_FORMAT_UNSIGNED_INT8:
+  case HIP_AD_FORMAT_SIGNED_INT8:
+    return 1;
+  case HIP_AD_FORMAT_UNSIGNED_INT16:
+  case HIP_AD_FORMAT_SIGNED_INT16:
+  case HIP_AD_FORMAT_HALF:
+    return 2;
+  case HIP_AD_FORMAT_UNSIGNED_INT32:
+  case HIP_AD_FORMAT_SIGNED_INT32:
+  case HIP_AD_FORMAT_FLOAT:
+    return 4;
+  default:
+    detail::ur::die("Invalid image format.");
+  }
+  return 0;
+}
+
+ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue,
+                              hipStream_t Stream, uint32_t NumEventsInWaitList,
+                              const ur_event_handle_t *EventWaitList) {
+  if (!EventWaitList) {
+    return UR_RESULT_SUCCESS;
+  }
+  try {
+    ScopedContext Active(CommandQueue->getContext());
+
+    auto Result = forLatestEvents(
+        EventWaitList, NumEventsInWaitList,
+        [Stream](ur_event_handle_t Event) -> ur_result_t {
+          if (Event->getStream() == Stream) {
+            return UR_RESULT_SUCCESS;
+          } else {
+            return UR_CHECK_ERROR(hipStreamWaitEvent(Stream, Event->get(), 0));
+          }
+        });
+
+    if (Result != UR_RESULT_SUCCESS) {
+      return Result;
+    }
+    return UR_RESULT_SUCCESS;
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_UNKNOWN;
+  }
+}
+
+void simpleGuessLocalWorkSize(size_t *ThreadsPerBlock,
+                              const size_t *GlobalWorkSize,
+                              const size_t MaxThreadsPerBlock[3],
+                              ur_kernel_handle_t Kernel) {
+  assert(ThreadsPerBlock != nullptr);
+  assert(GlobalWorkSize != nullptr);
+  assert(Kernel != nullptr);
+
+  std::ignore = Kernel;
+
+  ThreadsPerBlock[0] = std::min(MaxThreadsPerBlock[0], GlobalWorkSize[0]);
+
+  // Find a local work group size that is a divisor of the global
+  // work group size to produce uniform work groups.
+  while (GlobalWorkSize[0] % ThreadsPerBlock[0]) {
+    --ThreadsPerBlock[0];
+  }
+}
+} // namespace
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite,
+    size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList,
+    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  UR_ASSERT(!(phEventWaitList == NULL && numEventsInWaitList > 0),
+            UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
+  UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0),
+            UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+    Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                               phEventWaitList);
+
+    if (phEvent) {
+      RetImplEvent =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_MEM_BUFFER_WRITE, hQueue, HIPStream));
+      RetImplEvent->start();
+    }
+
+    Result = UR_CHECK_ERROR(
+        hipMemcpyHtoDAsync(hBuffer->Mem.BufferMem.getWithOffset(offset),
+                           const_cast<void *>(pSrc), size, HIPStream));
+
+    if (phEvent) {
+      Result = RetImplEvent->record();
+    }
+
+    if (blockingWrite) {
+      Result = UR_CHECK_ERROR(hipStreamSynchronize(HIPStream));
+    }
+
+    if (phEvent) {
+      *phEvent = RetImplEvent.release();
+    }
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead,
+    size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList,
+    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  UR_ASSERT(!(phEventWaitList == NULL && numEventsInWaitList > 0),
+            UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
+  UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0),
+            UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+    Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                               phEventWaitList);
+
+    if (phEvent) {
+      RetImplEvent =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_MEM_BUFFER_READ, hQueue, HIPStream));
+      RetImplEvent->start();
+    }
+
+    Result = UR_CHECK_ERROR(hipMemcpyDtoHAsync(
+        pDst, hBuffer->Mem.BufferMem.getWithOffset(offset), size, HIPStream));
+
+    if (phEvent) {
+      Result = RetImplEvent->record();
+    }
+
+    if (blockingRead) {
+      Result = UR_CHECK_ERROR(hipStreamSynchronize(HIPStream));
+    }
+
+    if (phEvent) {
+      *phEvent = RetImplEvent.release();
+    }
+
+  } catch (ur_result_t err) {
+    Result = err;
+  }
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
+    ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim,
+    const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
+    const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
+    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  UR_ASSERT(hQueue->getContext() == hKernel->getContext(),
+            UR_RESULT_ERROR_INVALID_QUEUE);
+  UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
+  UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
+
+  if (*pGlobalWorkSize == 0) {
+    return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList,
+                                          phEventWaitList, phEvent);
+  }
+
+  // Set the number of threads per block to the number of threads per warp
+  // by default unless user has provided a better number
+  size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
+  size_t MaxWorkGroupSize = 0u;
+  size_t MaxThreadsPerBlock[3] = {};
+  bool ProvidedLocalWorkGroupSize = (pLocalWorkSize != nullptr);
+
+  {
+    ur_result_t Result = urDeviceGetInfo(
+        hQueue->Device, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES,
+        sizeof(MaxThreadsPerBlock), MaxThreadsPerBlock, nullptr);
+    UR_ASSERT(Result == UR_RESULT_SUCCESS, Result);
+
+    Result =
+        urDeviceGetInfo(hQueue->Device, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
+                        sizeof(MaxWorkGroupSize), &MaxWorkGroupSize, nullptr);
+    UR_ASSERT(Result == UR_RESULT_SUCCESS, Result);
+
+    // The MaxWorkGroupSize = 1024 for AMD GPU
+    // The MaxThreadsPerBlock = {1024, 1024, 1024}
+
+    if (ProvidedLocalWorkGroupSize) {
+      auto isValid = [&](int dim) {
+        UR_ASSERT(pLocalWorkSize[dim] <= MaxThreadsPerBlock[dim],
+                  UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE);
+        // Checks that local work sizes are a divisor of the global work sizes
+        // which includes that the local work sizes are neither larger than the
+        // global work sizes and not 0.
+        UR_ASSERT(pLocalWorkSize != 0, UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE);
+        UR_ASSERT((pGlobalWorkSize[dim] % pLocalWorkSize[dim]) == 0,
+                  UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE);
+        ThreadsPerBlock[dim] = pLocalWorkSize[dim];
+        return UR_RESULT_SUCCESS;
+      };
+
+      for (size_t dim = 0; dim < workDim; dim++) {
+        auto err = isValid(dim);
+        if (err != UR_RESULT_SUCCESS)
+          return err;
+      }
+    } else {
+      simpleGuessLocalWorkSize(ThreadsPerBlock, pGlobalWorkSize,
+                               MaxThreadsPerBlock, hKernel);
+    }
+  }
+
+  UR_ASSERT(MaxWorkGroupSize >= size_t(ThreadsPerBlock[0] * ThreadsPerBlock[1] *
+                                       ThreadsPerBlock[2]),
+            UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE);
+
+  size_t BlocksPerGrid[3] = {1u, 1u, 1u};
+
+  for (size_t i = 0; i < workDim; i++) {
+    BlocksPerGrid[i] =
+        (pGlobalWorkSize[i] + ThreadsPerBlock[i] - 1) / ThreadsPerBlock[i];
+  }
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+
+    uint32_t StreamToken;
+    ur_stream_quard Guard;
+    hipStream_t HIPStream = hQueue->getNextComputeStream(
+        numEventsInWaitList, phEventWaitList, Guard, &StreamToken);
+    hipFunction_t HIPFunc = hKernel->get();
+
+    Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                               phEventWaitList);
+
+    // Set the implicit global offset parameter if kernel has offset variant
+    if (hKernel->getWithOffsetParameter()) {
+      std::uint32_t hip_implicit_offset[3] = {0, 0, 0};
+      if (pGlobalWorkOffset) {
+        for (size_t i = 0; i < workDim; i++) {
+          hip_implicit_offset[i] =
+              static_cast<std::uint32_t>(pGlobalWorkOffset[i]);
+          if (pGlobalWorkOffset[i] != 0) {
+            HIPFunc = hKernel->getWithOffsetParameter();
+          }
+        }
+      }
+      hKernel->setImplicitOffsetArg(sizeof(hip_implicit_offset),
+                                    hip_implicit_offset);
+    }
+
+    auto ArgIndices = hKernel->getArgIndices();
+
+    if (phEvent) {
+      RetImplEvent =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_KERNEL_LAUNCH, hQueue, HIPStream, StreamToken));
+      RetImplEvent->start();
+    }
+
+    // Set local mem max size if env var is present
+    static const char *LocalMemSzPtrUR =
+        std::getenv("UR_HIP_MAX_LOCAL_MEM_SIZE");
+    static const char *LocalMemSzPtrPI =
+        std::getenv("SYCL_PI_HIP_MAX_LOCAL_MEM_SIZE");
+    static const char *LocalMemSzPtr =
+        LocalMemSzPtrUR ? LocalMemSzPtrUR
+                        : (LocalMemSzPtrPI ? LocalMemSzPtrPI : nullptr);
+
+    if (LocalMemSzPtr) {
+      int DeviceMaxLocalMem = 0;
+      Result = UR_CHECK_ERROR(hipDeviceGetAttribute(
+          &DeviceMaxLocalMem, hipDeviceAttributeMaxSharedMemoryPerBlock,
+          hQueue->getDevice()->get()));
+
+      static const int EnvVal = std::atoi(LocalMemSzPtr);
+      if (EnvVal <= 0 || EnvVal > DeviceMaxLocalMem) {
+        setErrorMessage(LocalMemSzPtrUR ? "Invalid value specified for "
+                                          "UR_HIP_MAX_LOCAL_MEM_SIZE"
+                                        : "Invalid value specified for "
+                                          "SYCL_PI_HIP_MAX_LOCAL_MEM_SIZE",
+                        UR_RESULT_ERROR_ADAPTER_SPECIFIC);
+        return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
+      }
+      Result = UR_CHECK_ERROR(hipFuncSetAttribute(
+          HIPFunc, hipFuncAttributeMaxDynamicSharedMemorySize, EnvVal));
+    }
+
+    Result = UR_CHECK_ERROR(hipModuleLaunchKernel(
+        HIPFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
+        ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2],
+        hKernel->getLocalSize(), HIPStream, ArgIndices.data(), nullptr));
+
+    hKernel->clearLocalSize();
+
+    if (phEvent) {
+      Result = RetImplEvent->record();
+      *phEvent = RetImplEvent.release();
+    }
+  } catch (ur_result_t err) {
+    Result = err;
+  }
+  return Result;
+}
+
+/// Enqueues a wait on the given queue for all events.
+/// See \ref enqueueEventWait
+///
+/// Currently queues are represented by a single in-order stream, therefore
+/// every command is an implicit barrier and so urEnqueueEventWait has the
+/// same behavior as urEnqueueEventWaitWithBarrier. So urEnqueueEventWait can
+/// just call urEnqueueEventWaitWithBarrier.
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
+    ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
+    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList,
+                                        phEventWaitList, phEvent);
+}
+
+/// Enqueues a wait on the given queue for all specified events.
+/// See \ref enqueueEventWaitWithBarrier
+///
+/// If the events list is empty, the enqueued wait will wait on all previous
+/// events in the queue.
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
+    ur_queue_handle_t hQueue, uint32_t numEventsInWaitList,
+    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  UR_ASSERT(!(phEventWaitList == NULL && numEventsInWaitList > 0),
+            UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST)
+  UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0),
+            UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST)
+
+  ur_result_t Result;
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    uint32_t StreamToken;
+    ur_stream_quard Guard;
+    hipStream_t HIPStream = hQueue->getNextComputeStream(
+        numEventsInWaitList,
+        reinterpret_cast<const ur_event_handle_t *>(phEventWaitList), Guard,
+        &StreamToken);
+    {
+      std::lock_guard<std::mutex> Guard(hQueue->BarrierMutex);
+      if (hQueue->BarrierEvent == nullptr) {
+        UR_CHECK_ERROR(hipEventCreate(&hQueue->BarrierEvent));
+      }
+      if (numEventsInWaitList == 0) { //  wait on all work
+        if (hQueue->BarrierTmpEvent == nullptr) {
+          UR_CHECK_ERROR(hipEventCreate(&hQueue->BarrierTmpEvent));
+        }
+        hQueue->syncStreams(
+            [HIPStream, TmpEvent = hQueue->BarrierTmpEvent](hipStream_t S) {
+              if (HIPStream != S) {
+                UR_CHECK_ERROR(hipEventRecord(TmpEvent, S));
+                UR_CHECK_ERROR(hipStreamWaitEvent(HIPStream, TmpEvent, 0));
+              }
+            });
+      } else { // wait just on given events
+        forLatestEvents(
+            reinterpret_cast<const ur_event_handle_t *>(phEventWaitList),
+            numEventsInWaitList,
+            [HIPStream](ur_event_handle_t Event) -> ur_result_t {
+              if (Event->getQueue()->hasBeenSynchronized(
+                      Event->getComputeStreamToken())) {
+                return UR_RESULT_SUCCESS;
+              } else {
+                return UR_CHECK_ERROR(
+                    hipStreamWaitEvent(HIPStream, Event->get(), 0));
+              }
+            });
+      }
+
+      Result = UR_CHECK_ERROR(hipEventRecord(hQueue->BarrierEvent, HIPStream));
+      for (unsigned int i = 0; i < hQueue->ComputeAppliedBarrier.size(); i++) {
+        hQueue->ComputeAppliedBarrier[i] = false;
+      }
+      for (unsigned int i = 0; i < hQueue->TransferAppliedBarrier.size(); i++) {
+        hQueue->TransferAppliedBarrier[i] = false;
+      }
+    }
+    if (Result != UR_RESULT_SUCCESS) {
+      return Result;
+    }
+
+    if (phEvent) {
+      *phEvent = ur_event_handle_t_::makeNative(
+          UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, hQueue, HIPStream, StreamToken);
+      (*phEvent)->start();
+      (*phEvent)->record();
+    }
+
+    return UR_RESULT_SUCCESS;
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_UNKNOWN;
+  }
+}
+
+/// General 3D memory copy operation.
+/// This function requires the corresponding HIP context to be at the top of
+/// the context stack
+/// If the source and/or destination is on the device, SrcPtr and/or DstPtr
+/// must be a pointer to a hipDevPtr
+static ur_result_t commonEnqueueMemBufferCopyRect(
+    hipStream_t HipStream, ur_rect_region_t Region, const void *SrcPtr,
+    const hipMemoryType SrcType, ur_rect_offset_t SrcOffset, size_t SrcRowPitch,
+    size_t SrcSlicePitch, void *DstPtr, const hipMemoryType DstType,
+    ur_rect_offset_t DstOffset, size_t DstRowPitch, size_t DstSlicePitch) {
+
+  assert(SrcType == hipMemoryTypeDevice || SrcType == hipMemoryTypeHost);
+  assert(DstType == hipMemoryTypeDevice || DstType == hipMemoryTypeHost);
+
+  SrcRowPitch = (!SrcRowPitch) ? Region.width : SrcRowPitch;
+  SrcSlicePitch =
+      (!SrcSlicePitch) ? (Region.height * SrcRowPitch) : SrcSlicePitch;
+  DstRowPitch = (!DstRowPitch) ? Region.width : DstRowPitch;
+  DstSlicePitch =
+      (!DstSlicePitch) ? (Region.height * DstRowPitch) : DstSlicePitch;
+
+  HIP_MEMCPY3D Params;
+
+  Params.WidthInBytes = Region.width;
+  Params.Height = Region.height;
+  Params.Depth = Region.depth;
+
+  Params.srcMemoryType = SrcType;
+  Params.srcDevice = SrcType == hipMemoryTypeDevice
+                         ? *static_cast<const hipDeviceptr_t *>(SrcPtr)
+                         : 0;
+  Params.srcHost = SrcType == hipMemoryTypeHost ? SrcPtr : nullptr;
+  Params.srcXInBytes = SrcOffset.x;
+  Params.srcY = SrcOffset.y;
+  Params.srcZ = SrcOffset.z;
+  Params.srcPitch = SrcRowPitch;
+  Params.srcHeight = SrcSlicePitch / SrcRowPitch;
+
+  Params.dstMemoryType = DstType;
+  Params.dstDevice = DstType == hipMemoryTypeDevice
+                         ? *reinterpret_cast<hipDeviceptr_t *>(DstPtr)
+                         : 0;
+  Params.dstHost = DstType == hipMemoryTypeHost ? DstPtr : nullptr;
+  Params.dstXInBytes = DstOffset.x;
+  Params.dstY = DstOffset.y;
+  Params.dstZ = DstOffset.z;
+  Params.dstPitch = DstRowPitch;
+  Params.dstHeight = DstSlicePitch / DstRowPitch;
+
+  return UR_CHECK_ERROR(hipDrvMemcpy3DAsync(&Params, HipStream));
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead,
+    ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin,
+    ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
+    size_t hostRowPitch, size_t hostSlicePitch, void *pDst,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
+  UR_ASSERT(!(phEventWaitList == NULL && numEventsInWaitList > 0),
+            UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
+  UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0),
+            UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST);
+  UR_ASSERT(!(region.width == 0 || region.height == 0 || region.width == 0),
+            UR_RESULT_ERROR_INVALID_SIZE);
+  UR_ASSERT(!(bufferRowPitch != 0 && bufferRowPitch < region.width),
+            UR_RESULT_ERROR_INVALID_SIZE);
+  UR_ASSERT(!(hostRowPitch != 0 && hostRowPitch < region.width),
+            UR_RESULT_ERROR_INVALID_SIZE);
+  UR_ASSERT(!(bufferSlicePitch != 0 &&
+              bufferSlicePitch < region.height * bufferRowPitch),
+            UR_RESULT_ERROR_INVALID_SIZE);
+  UR_ASSERT(!(bufferSlicePitch != 0 && bufferSlicePitch % bufferRowPitch != 0),
+            UR_RESULT_ERROR_INVALID_SIZE);
+  UR_ASSERT(
+      !(hostSlicePitch != 0 && hostSlicePitch < region.height * hostRowPitch),
+      UR_RESULT_ERROR_INVALID_SIZE);
+  UR_ASSERT(!(hostSlicePitch != 0 && hostSlicePitch % hostRowPitch != 0),
+            UR_RESULT_ERROR_INVALID_SIZE);
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  void *DevPtr = hBuffer->Mem.BufferMem.getVoid();
+  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+
+    Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                               phEventWaitList);
+
+    if (phEvent) {
+      RetImplEvent =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_MEM_BUFFER_READ_RECT, hQueue, HIPStream));
+      RetImplEvent->start();
+    }
+
+    Result = commonEnqueueMemBufferCopyRect(
+        HIPStream, region, &DevPtr, hipMemoryTypeDevice, bufferOrigin,
+        bufferRowPitch, bufferSlicePitch, pDst, hipMemoryTypeHost, hostOrigin,
+        hostRowPitch, hostSlicePitch);
+
+    if (phEvent) {
+      Result = RetImplEvent->record();
+    }
+
+    if (blockingRead) {
+      Result = UR_CHECK_ERROR(hipStreamSynchronize(HIPStream));
+    }
+
+    if (phEvent) {
+      *phEvent = RetImplEvent.release();
+    }
+
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite,
+    ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin,
+    ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch,
+    size_t hostRowPitch, size_t hostSlicePitch, void *pSrc,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  void *DevPtr = hBuffer->Mem.BufferMem.getVoid();
+  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+    Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                               phEventWaitList);
+
+    if (phEvent) {
+      RetImplEvent =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_MEM_BUFFER_WRITE_RECT, hQueue, HIPStream));
+      RetImplEvent->start();
+    }
+
+    Result = commonEnqueueMemBufferCopyRect(
+        HIPStream, region, pSrc, hipMemoryTypeHost, hostOrigin, hostRowPitch,
+        hostSlicePitch, &DevPtr, hipMemoryTypeDevice, bufferOrigin,
+        bufferRowPitch, bufferSlicePitch);
+
+    if (phEvent) {
+      Result = RetImplEvent->record();
+    }
+
+    if (blockingWrite) {
+      Result = UR_CHECK_ERROR(hipStreamSynchronize(HIPStream));
+    }
+
+    if (phEvent) {
+      *phEvent = RetImplEvent.release();
+    }
+
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc,
+    ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
+  UR_ASSERT(size + srcOffset <= hBufferSrc->Mem.BufferMem.getSize(),
+            UR_RESULT_ERROR_INVALID_SIZE);
+  UR_ASSERT(size + dstOffset <= hBufferDst->Mem.BufferMem.getSize(),
+            UR_RESULT_ERROR_INVALID_SIZE);
+
+  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    ur_result_t Result;
+    auto Stream = hQueue->getNextTransferStream();
+
+    if (phEventWaitList) {
+      Result = enqueueEventsWait(hQueue, Stream, numEventsInWaitList,
+                                 phEventWaitList);
+    }
+
+    if (phEvent) {
+      RetImplEvent =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_MEM_BUFFER_COPY, hQueue, Stream));
+      Result = RetImplEvent->start();
+    }
+
+    auto Src = hBufferSrc->Mem.BufferMem.getWithOffset(srcOffset);
+    auto Dst = hBufferDst->Mem.BufferMem.getWithOffset(dstOffset);
+
+    Result = UR_CHECK_ERROR(hipMemcpyDtoDAsync(Dst, Src, size, Stream));
+
+    if (phEvent) {
+      Result = RetImplEvent->record();
+      *phEvent = RetImplEvent.release();
+    }
+
+    return Result;
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_UNKNOWN;
+  }
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc,
+    ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin,
+    ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch,
+    size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  void *SrcPtr = hBufferSrc->Mem.BufferMem.getVoid();
+  void *DstPtr = hBufferDst->Mem.BufferMem.getVoid();
+  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+    Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                               phEventWaitList);
+
+    if (phEvent) {
+      RetImplEvent =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_MEM_BUFFER_COPY_RECT, hQueue, HIPStream));
+      RetImplEvent->start();
+    }
+
+    Result = commonEnqueueMemBufferCopyRect(
+        HIPStream, region, &SrcPtr, hipMemoryTypeDevice, srcOrigin, srcRowPitch,
+        srcSlicePitch, &DstPtr, hipMemoryTypeDevice, dstOrigin, dstRowPitch,
+        dstSlicePitch);
+
+    if (phEvent) {
+      RetImplEvent->record();
+      *phEvent = RetImplEvent.release();
+    }
+
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+  return Result;
+}
+
+// HIP has no memset functions that allow setting values more than 4 bytes. UR
+// API lets you pass an arbitrary "pattern" to the buffer fill, which can be
+// more than 4 bytes. We must break up the pattern into 1 byte values, and set
+// the buffer using multiple strided calls.  The first 4 patterns are set using
+// hipMemsetD32Async then all subsequent 1 byte patterns are set using
+// hipMemset2DAsync which is called for each pattern.
+ur_result_t commonMemSetLargePattern(hipStream_t Stream, uint32_t PatternSize,
+                                     size_t Size, const void *pPattern,
+                                     hipDeviceptr_t Ptr) {
+  // Calculate the number of patterns, stride, number of times the pattern
+  // needs to be applied, and the number of times the first 32 bit pattern
+  // needs to be applied.
+  auto NumberOfSteps = PatternSize / sizeof(uint8_t);
+  auto Pitch = NumberOfSteps * sizeof(uint8_t);
+  auto Height = Size / NumberOfSteps;
+  auto Count32 = Size / sizeof(uint32_t);
+
+  // Get 4-byte chunk of the pattern and call hipMemsetD32Async
+  auto Value = *(static_cast<const uint32_t *>(pPattern));
+  auto Result = UR_CHECK_ERROR(hipMemsetD32Async(Ptr, Value, Count32, Stream));
+  if (Result != UR_RESULT_SUCCESS) {
+    return Result;
+  }
+  for (auto step = 4u; step < NumberOfSteps; ++step) {
+    // take 1 byte of the pattern
+    Value = *(static_cast<const uint8_t *>(pPattern) + step);
+
+    // offset the pointer to the part of the buffer we want to write to
+    auto OffsetPtr = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(Ptr) +
+                                              (step * sizeof(uint8_t)));
+
+    // set all of the pattern chunks
+    Result = UR_CHECK_ERROR(hipMemset2DAsync(OffsetPtr, Pitch, Value,
+                                             sizeof(uint8_t), Height, Stream));
+    if (Result != UR_RESULT_SUCCESS) {
+      return Result;
+    }
+  }
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern,
+    size_t patternSize, size_t offset, size_t size,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
+  UR_ASSERT(size + offset <= hBuffer->Mem.BufferMem.getSize(),
+            UR_RESULT_ERROR_INVALID_SIZE);
+  auto ArgsAreMultiplesOfPatternSize =
+      (offset % patternSize == 0) || (size % patternSize == 0);
+
+  auto PatternIsValid = (pPattern != nullptr);
+
+  auto PatternSizeIsValid =
+      ((patternSize & (patternSize - 1)) == 0) && // is power of two
+      (patternSize > 0) && (patternSize <= 128);  // falls within valid range
+
+  UR_ASSERT(ArgsAreMultiplesOfPatternSize && PatternIsValid &&
+                PatternSizeIsValid,
+            UR_RESULT_ERROR_INVALID_VALUE);
+  std::ignore = ArgsAreMultiplesOfPatternSize;
+  std::ignore = PatternIsValid;
+  std::ignore = PatternSizeIsValid;
+
+  std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+
+    auto Stream = hQueue->getNextTransferStream();
+    ur_result_t Result;
+    if (phEventWaitList) {
+      Result = enqueueEventsWait(hQueue, Stream, numEventsInWaitList,
+                                 phEventWaitList);
+    }
+
+    if (phEvent) {
+      RetImplEvent =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_MEM_BUFFER_FILL, hQueue, Stream));
+      Result = RetImplEvent->start();
+    }
+
+    auto DstDevice = hBuffer->Mem.BufferMem.getWithOffset(offset);
+    auto N = size / patternSize;
+
+    // pattern size in bytes
+    switch (patternSize) {
+    case 1: {
+      auto Value = *static_cast<const uint8_t *>(pPattern);
+      Result = UR_CHECK_ERROR(hipMemsetD8Async(DstDevice, Value, N, Stream));
+      break;
+    }
+    case 2: {
+      auto Value = *static_cast<const uint16_t *>(pPattern);
+      Result = UR_CHECK_ERROR(hipMemsetD16Async(DstDevice, Value, N, Stream));
+      break;
+    }
+    case 4: {
+      auto Value = *static_cast<const uint32_t *>(pPattern);
+      Result = UR_CHECK_ERROR(hipMemsetD32Async(DstDevice, Value, N, Stream));
+      break;
+    }
+
+    default: {
+      Result = commonMemSetLargePattern(Stream, patternSize, size, pPattern,
+                                        DstDevice);
+      break;
+    }
+    }
+
+    if (phEvent) {
+      Result = RetImplEvent->record();
+      *phEvent = RetImplEvent.release();
+    }
+
+    return Result;
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_UNKNOWN;
+  }
+}
+
+/// General ND memory copy operation for images (where N > 1).
+/// This function requires the corresponding HIP context to be at the top of
+/// the context stack
+/// If the source and/or destination is an array, SrcPtr and/or DstPtr
+/// must be a pointer to a hipArray
+static ur_result_t commonEnqueueMemImageNDCopy(
+    hipStream_t HipStream, ur_mem_type_t ImgType, const size_t *Region,
+    const void *SrcPtr, const hipMemoryType SrcType, const size_t *SrcOffset,
+    void *DstPtr, const hipMemoryType DstType, const size_t *DstOffset) {
+  UR_ASSERT(SrcType == hipMemoryTypeArray || SrcType == hipMemoryTypeHost,
+            UR_RESULT_ERROR_INVALID_VALUE);
+  UR_ASSERT(DstType == hipMemoryTypeArray || DstType == hipMemoryTypeHost,
+            UR_RESULT_ERROR_INVALID_VALUE);
+
+  if (ImgType == UR_MEM_TYPE_IMAGE2D) {
+    hip_Memcpy2D CpyDesc;
+    memset(&CpyDesc, 0, sizeof(CpyDesc));
+    CpyDesc.srcMemoryType = SrcType;
+    if (SrcType == hipMemoryTypeArray) {
+      CpyDesc.srcArray =
+          reinterpret_cast<hipCUarray>(const_cast<void *>(SrcPtr));
+      CpyDesc.srcXInBytes = SrcOffset[0];
+      CpyDesc.srcY = SrcOffset[1];
+    } else {
+      CpyDesc.srcHost = SrcPtr;
+    }
+    CpyDesc.dstMemoryType = DstType;
+    if (DstType == hipMemoryTypeArray) {
+      CpyDesc.dstArray =
+          reinterpret_cast<hipCUarray>(const_cast<void *>(DstPtr));
+      CpyDesc.dstXInBytes = DstOffset[0];
+      CpyDesc.dstY = DstOffset[1];
+    } else {
+      CpyDesc.dstHost = DstPtr;
+    }
+    CpyDesc.WidthInBytes = Region[0];
+    CpyDesc.Height = Region[1];
+    return UR_CHECK_ERROR(hipMemcpyParam2DAsync(&CpyDesc, HipStream));
+  }
+
+  if (ImgType == UR_MEM_TYPE_IMAGE3D) {
+
+    HIP_MEMCPY3D CpyDesc;
+    memset(&CpyDesc, 0, sizeof(CpyDesc));
+    CpyDesc.srcMemoryType = SrcType;
+    if (SrcType == hipMemoryTypeArray) {
+      CpyDesc.srcArray =
+          reinterpret_cast<hipCUarray>(const_cast<void *>(SrcPtr));
+      CpyDesc.srcXInBytes = SrcOffset[0];
+      CpyDesc.srcY = SrcOffset[1];
+      CpyDesc.srcZ = SrcOffset[2];
+    } else {
+      CpyDesc.srcHost = SrcPtr;
+    }
+    CpyDesc.dstMemoryType = DstType;
+    if (DstType == hipMemoryTypeArray) {
+      CpyDesc.dstArray = reinterpret_cast<hipCUarray>(DstPtr);
+      CpyDesc.dstXInBytes = DstOffset[0];
+      CpyDesc.dstY = DstOffset[1];
+      CpyDesc.dstZ = DstOffset[2];
+    } else {
+      CpyDesc.dstHost = DstPtr;
+    }
+    CpyDesc.WidthInBytes = Region[0];
+    CpyDesc.Height = Region[1];
+    CpyDesc.Depth = Region[2];
+    return UR_CHECK_ERROR(hipDrvMemcpy3DAsync(&CpyDesc, HipStream));
+    return UR_RESULT_ERROR_UNKNOWN;
+  }
+
+  return UR_RESULT_ERROR_INVALID_VALUE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead,
+    ur_rect_offset_t origin, ur_rect_region_t region, size_t, size_t,
+    void *pDst, uint32_t numEventsInWaitList,
+    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface,
+            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+
+    if (phEventWaitList) {
+      Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                                 phEventWaitList);
+    }
+
+    hipArray *Array = hImage->Mem.SurfaceMem.getArray();
+
+    hipArray_Format Format;
+    size_t NumChannels;
+    getArrayDesc(Array, Format, NumChannels);
+
+    int ElementByteSize = imageElementByteSize(Format);
+
+    size_t ByteOffsetX = origin.x * ElementByteSize * NumChannels;
+    size_t BytesToCopy = ElementByteSize * NumChannels * region.depth;
+
+    auto ImgType = hImage->Mem.SurfaceMem.getImageType();
+
+    size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.height};
+    size_t SrcOffset[3] = {ByteOffsetX, origin.y, origin.z};
+
+    Result = commonEnqueueMemImageNDCopy(HIPStream, ImgType, AdjustedRegion,
+                                         Array, hipMemoryTypeArray, SrcOffset,
+                                         pDst, hipMemoryTypeHost, nullptr);
+
+    if (Result != UR_RESULT_SUCCESS) {
+      return Result;
+    }
+
+    if (phEvent) {
+      auto NewEvent = ur_event_handle_t_::makeNative(UR_COMMAND_MEM_IMAGE_READ,
+                                                     hQueue, HIPStream);
+      NewEvent->record();
+      *phEvent = NewEvent;
+    }
+
+    if (blockingRead) {
+      Result = UR_CHECK_ERROR(hipStreamSynchronize(HIPStream));
+    }
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_UNKNOWN;
+  }
+  return UR_RESULT_SUCCESS;
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool,
+    ur_rect_offset_t origin, ur_rect_region_t region, size_t, size_t,
+    void *pSrc, uint32_t numEventsInWaitList,
+    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface,
+            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+
+    if (phEventWaitList) {
+      Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                                 phEventWaitList);
+    }
+
+    hipArray *Array = hImage->Mem.SurfaceMem.getArray();
+
+    hipArray_Format Format;
+    size_t NumChannels;
+    getArrayDesc(Array, Format, NumChannels);
+
+    int ElementByteSize = imageElementByteSize(Format);
+
+    size_t ByteOffsetX = origin.x * ElementByteSize * NumChannels;
+    size_t BytesToCopy = ElementByteSize * NumChannels * region.depth;
+
+    auto ImgType = hImage->Mem.SurfaceMem.getImageType();
+
+    size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.height};
+    size_t DstOffset[3] = {ByteOffsetX, origin.y, origin.z};
+
+    Result = commonEnqueueMemImageNDCopy(HIPStream, ImgType, AdjustedRegion,
+                                         pSrc, hipMemoryTypeHost, nullptr,
+                                         Array, hipMemoryTypeArray, DstOffset);
+
+    if (Result != UR_RESULT_SUCCESS) {
+      return Result;
+    }
+
+    if (phEvent) {
+      auto NewEvent = ur_event_handle_t_::makeNative(UR_COMMAND_MEM_IMAGE_WRITE,
+                                                     hQueue, HIPStream);
+      NewEvent->record();
+      *phEvent = NewEvent;
+    }
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_UNKNOWN;
+  }
+
+  return UR_RESULT_SUCCESS;
+
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc,
+    ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin,
+    ur_rect_offset_t dstOrigin, ur_rect_region_t region,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
+  UR_ASSERT(hImageSrc->MemType == ur_mem_handle_t_::Type::Surface,
+            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+  UR_ASSERT(hImageDst->MemType == ur_mem_handle_t_::Type::Surface,
+            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+  UR_ASSERT(hImageSrc->Mem.SurfaceMem.getImageType() ==
+                hImageDst->Mem.SurfaceMem.getImageType(),
+            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+    if (phEventWaitList) {
+      Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                                 phEventWaitList);
+    }
+
+    hipArray *SrcArray = hImageSrc->Mem.SurfaceMem.getArray();
+    hipArray_Format SrcFormat;
+    size_t SrcNumChannels;
+    getArrayDesc(SrcArray, SrcFormat, SrcNumChannels);
+
+    hipArray *DstArray = hImageDst->Mem.SurfaceMem.getArray();
+    hipArray_Format DstFormat;
+    size_t DstNumChannels;
+    getArrayDesc(DstArray, DstFormat, DstNumChannels);
+
+    UR_ASSERT(SrcFormat == DstFormat,
+              UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+    UR_ASSERT(SrcNumChannels == DstNumChannels,
+              UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+
+    int ElementByteSize = imageElementByteSize(SrcFormat);
+
+    size_t DstByteOffsetX = dstOrigin.x * ElementByteSize * SrcNumChannels;
+    size_t SrcByteOffsetX = srcOrigin.x * ElementByteSize * DstNumChannels;
+    size_t BytesToCopy = ElementByteSize * SrcNumChannels * region.depth;
+
+    auto ImgType = hImageSrc->Mem.SurfaceMem.getImageType();
+
+    size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.width};
+    size_t SrcOffset[3] = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z};
+    size_t DstOffset[3] = {DstByteOffsetX, dstOrigin.y, dstOrigin.z};
+
+    Result = commonEnqueueMemImageNDCopy(
+        HIPStream, ImgType, AdjustedRegion, SrcArray, hipMemoryTypeArray,
+        SrcOffset, DstArray, hipMemoryTypeArray, DstOffset);
+
+    if (Result != UR_RESULT_SUCCESS) {
+      return Result;
+    }
+
+    if (phEvent) {
+      auto NewEvent = ur_event_handle_t_::makeNative(UR_COMMAND_MEM_IMAGE_COPY,
+                                                     hQueue, HIPStream);
+      NewEvent->record();
+      *phEvent = NewEvent;
+    }
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_UNKNOWN;
+  }
+
+  return UR_RESULT_SUCCESS;
+}
+
+/// Implements mapping on the host using a BufferRead operation.
+/// Mapped pointers are stored in the ur_mem_handle_t object.
+/// If the buffer uses pinned host memory a pointer to that memory is returned
+/// and no read operation is done.
+///
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap,
+    ur_map_flags_t mapFlags, size_t offset, size_t size,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent, void **ppRetMap) {
+  UR_ASSERT(hBuffer->MemType == ur_mem_handle_t_::Type::Buffer,
+            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+  UR_ASSERT(offset + size <= hBuffer->Mem.BufferMem.getSize(),
+            UR_RESULT_ERROR_INVALID_SIZE);
+
+  ur_result_t Result = UR_RESULT_ERROR_INVALID_OPERATION;
+  const bool IsPinned =
+      hBuffer->Mem.BufferMem.MemAllocMode ==
+      ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr;
+
+  // Currently no support for overlapping regions
+  if (hBuffer->Mem.BufferMem.getMapPtr() != nullptr) {
+    return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+  }
+
+  // Allocate a pointer in the host to store the mapped information
+  auto HostPtr = hBuffer->Mem.BufferMem.mapToPtr(size, offset, mapFlags);
+  *ppRetMap = hBuffer->Mem.BufferMem.getMapPtr();
+  if (HostPtr) {
+    Result = UR_RESULT_SUCCESS;
+  }
+
+  if (!IsPinned &&
+      ((mapFlags & UR_MAP_FLAG_READ) || (mapFlags & UR_MAP_FLAG_WRITE))) {
+    // Pinned host memory is already on host so it doesn't need to be read.
+    Result = urEnqueueMemBufferRead(hQueue, hBuffer, blockingMap, offset, size,
+                                    HostPtr, numEventsInWaitList,
+                                    phEventWaitList, phEvent);
+  } else {
+    ScopedContext Active(hQueue->getContext());
+
+    if (IsPinned) {
+      Result = urEnqueueEventsWait(hQueue, numEventsInWaitList, phEventWaitList,
+                                   nullptr);
+    }
+
+    if (phEvent) {
+      try {
+        *phEvent = ur_event_handle_t_::makeNative(
+            UR_COMMAND_MEM_BUFFER_MAP, hQueue, hQueue->getNextTransferStream());
+        (*phEvent)->start();
+        (*phEvent)->record();
+      } catch (ur_result_t Error) {
+        Result = Error;
+      }
+    }
+  }
+
+  return Result;
+}
+
+/// Implements the unmap from the host, using a BufferWrite operation.
+/// Requires the mapped pointer to be already registered in the given hMem.
+/// If hMem uses pinned host memory, this will not do a write.
+///
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
+    ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  UR_ASSERT(hMem->MemType == ur_mem_handle_t_::Type::Buffer,
+            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+  UR_ASSERT(hMem->Mem.BufferMem.getMapPtr() != nullptr,
+            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+  UR_ASSERT(hMem->Mem.BufferMem.getMapPtr() == pMappedPtr,
+            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+
+  const bool IsPinned =
+      hMem->Mem.BufferMem.MemAllocMode ==
+      ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr;
+
+  if (!IsPinned && ((hMem->Mem.BufferMem.getMapFlags() & UR_MAP_FLAG_WRITE) ||
+                    (hMem->Mem.BufferMem.getMapFlags() &
+                     UR_MAP_FLAG_WRITE_INVALIDATE_REGION))) {
+    // Pinned host memory is only on host so it doesn't need to be written to.
+    Result = urEnqueueMemBufferWrite(
+        hQueue, hMem, true, hMem->Mem.BufferMem.getMapOffset(),
+        hMem->Mem.BufferMem.getMapSize(), pMappedPtr, numEventsInWaitList,
+        phEventWaitList, phEvent);
+  } else {
+    ScopedContext Active(hQueue->getContext());
+
+    if (IsPinned) {
+      Result = urEnqueueEventsWait(hQueue, numEventsInWaitList, phEventWaitList,
+                                   nullptr);
+    }
+
+    if (phEvent) {
+      try {
+        *phEvent = ur_event_handle_t_::makeNative(
+            UR_COMMAND_MEM_UNMAP, hQueue, hQueue->getNextTransferStream());
+        (*phEvent)->start();
+        (*phEvent)->record();
+      } catch (ur_result_t Error) {
+        Result = Error;
+      }
+    }
+  }
+
+  hMem->Mem.BufferMem.unmap(pMappedPtr);
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
+    ur_queue_handle_t hQueue, void *ptr, size_t patternSize,
+    const void *pPattern, size_t size, uint32_t numEventsInWaitList,
+    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    uint32_t StreamToken;
+    ur_stream_quard Guard;
+    hipStream_t HIPStream = hQueue->getNextComputeStream(
+        numEventsInWaitList, phEventWaitList, Guard, &StreamToken);
+    Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                               phEventWaitList);
+    if (phEvent) {
+      EventPtr =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_USM_FILL, hQueue, HIPStream, StreamToken));
+      EventPtr->start();
+    }
+
+    auto N = size / patternSize;
+    switch (patternSize) {
+    case 1:
+      Result = UR_CHECK_ERROR(
+          hipMemsetD8Async(reinterpret_cast<hipDeviceptr_t>(ptr),
+                           *(const uint8_t *)pPattern & 0xFF, N, HIPStream));
+      break;
+    case 2:
+      Result = UR_CHECK_ERROR(hipMemsetD16Async(
+          reinterpret_cast<hipDeviceptr_t>(ptr),
+          *(const uint16_t *)pPattern & 0xFFFF, N, HIPStream));
+      break;
+    case 4:
+      Result = UR_CHECK_ERROR(hipMemsetD32Async(
+          reinterpret_cast<hipDeviceptr_t>(ptr),
+          *(const uint32_t *)pPattern & 0xFFFFFFFF, N, HIPStream));
+      break;
+
+    default:
+      Result = commonMemSetLargePattern(HIPStream, patternSize, size, pPattern,
+                                        reinterpret_cast<hipDeviceptr_t>(ptr));
+      break;
+    }
+
+    if (phEvent) {
+      Result = EventPtr->record();
+      *phEvent = EventPtr.release();
+    }
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
+    ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc,
+    size_t size, uint32_t numEventsInWaitList,
+    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+    Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                               phEventWaitList);
+    if (phEvent) {
+      EventPtr =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_USM_MEMCPY, hQueue, HIPStream));
+      EventPtr->start();
+    }
+    Result = UR_CHECK_ERROR(
+        hipMemcpyAsync(pDst, pSrc, size, hipMemcpyDefault, HIPStream));
+    if (phEvent) {
+      Result = EventPtr->record();
+    }
+    if (blocking) {
+      Result = UR_CHECK_ERROR(hipStreamSynchronize(HIPStream));
+    }
+    if (phEvent) {
+      *phEvent = EventPtr.release();
+    }
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
+    ur_queue_handle_t hQueue, const void *pMem, size_t size,
+    ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList,
+    const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
+  void *HIPDevicePtr = const_cast<void *>(pMem);
+  unsigned int PointerRangeSize = 0;
+  UR_CHECK_ERROR(hipPointerGetAttribute(&PointerRangeSize,
+                                        HIP_POINTER_ATTRIBUTE_RANGE_SIZE,
+                                        (hipDeviceptr_t)HIPDevicePtr));
+  UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE);
+
+  // flags is currently unused so fail if set
+  if (flags != 0)
+    return UR_RESULT_ERROR_INVALID_VALUE;
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  std::unique_ptr<ur_event_handle_t_> EventPtr{nullptr};
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+    Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                               phEventWaitList);
+    if (phEvent) {
+      EventPtr =
+          std::unique_ptr<ur_event_handle_t_>(ur_event_handle_t_::makeNative(
+              UR_COMMAND_USM_PREFETCH, hQueue, HIPStream));
+      EventPtr->start();
+    }
+    Result = UR_CHECK_ERROR(hipMemPrefetchAsync(
+        pMem, size, hQueue->getContext()->getDevice()->get(), HIPStream));
+    if (phEvent) {
+      Result = EventPtr->record();
+      *phEvent = EventPtr.release();
+    }
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
+                   ur_usm_advice_flags_t, ur_event_handle_t *phEvent) {
+  void *HIPDevicePtr = const_cast<void *>(pMem);
+  unsigned int PointerRangeSize = 0;
+  UR_CHECK_ERROR(hipPointerGetAttribute(&PointerRangeSize,
+                                        HIP_POINTER_ATTRIBUTE_RANGE_SIZE,
+                                        (hipDeviceptr_t)HIPDevicePtr));
+  UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE);
+
+  // TODO implement a mapping to hipMemAdvise once the expected behaviour
+  // of urEnqueueUSMAdvise is detailed in the USM extension
+  return urEnqueueEventsWait(hQueue, 0, nullptr, phEvent);
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D(
+    ur_queue_handle_t, void *, size_t, size_t, const void *, size_t, size_t,
+    uint32_t, const ur_event_handle_t *, ur_event_handle_t *phEvent) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+/// 2D Memcpy API
+///
+/// \param hQueue is the queue to submit to
+/// \param blocking is whether this operation should block the host
+/// \param pDst is the location the data will be copied
+/// \param dstPitch is the total width of the destination memory including
+/// padding
+/// \param pSrc is the data to be copied
+/// \param srcPitch is the total width of the source memory including padding
+/// \param width is width in bytes of each row to be copied
+/// \param height is height the columns to be copied
+/// \param numEventsInWaitList is the number of events to wait on
+/// \param phEventWaitList is an array of events to wait on
+/// \param phEvent is the event that represents this operation
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D(
+    ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch,
+    const void *pSrc, size_t srcPitch, size_t width, size_t height,
+    uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
+    ur_event_handle_t *phEvent) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  try {
+    ScopedContext Active(hQueue->getContext());
+    hipStream_t HIPStream = hQueue->getNextTransferStream();
+    Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList,
+                               phEventWaitList);
+    if (phEvent) {
+      (*phEvent) = ur_event_handle_t_::makeNative(UR_COMMAND_USM_MEMCPY_2D,
+                                                  hQueue, HIPStream);
+      (*phEvent)->start();
+    }
+
+    Result =
+        UR_CHECK_ERROR(hipMemcpy2DAsync(pDst, dstPitch, pSrc, srcPitch, width,
+                                        height, hipMemcpyDefault, HIPStream));
+
+    if (phEvent) {
+      (*phEvent)->record();
+    }
+    if (blocking) {
+      Result = UR_CHECK_ERROR(hipStreamSynchronize(HIPStream));
+    }
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite(
+    ur_queue_handle_t, ur_program_handle_t, const char *, bool, size_t, size_t,
+    const void *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead(
+    ur_queue_handle_t, ur_program_handle_t, const char *, bool, size_t, size_t,
+    void *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe(
+    ur_queue_handle_t, ur_program_handle_t, const char *, bool, void *, size_t,
+    uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe(
+    ur_queue_handle_t, ur_program_handle_t, const char *, bool, void *, size_t,
+    uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/event.cpp
new file mode 100644
index 0000000000000..93faf2def0ac5
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/event.cpp
@@ -0,0 +1,316 @@
+//===--------- event.cpp - HIP Adapter ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include "event.hpp"
+#include "common.hpp"
+#include "context.hpp"
+#include "platform.hpp"
+
+ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type,
+                                       ur_context_handle_t Context,
+                                       ur_queue_handle_t Queue,
+                                       hipStream_t Stream, uint32_t StreamToken)
+    : CommandType{Type}, RefCount{1}, HasBeenWaitedOn{false}, IsRecorded{false},
+      IsStarted{false}, StreamToken{StreamToken}, EvEnd{nullptr},
+      EvStart{nullptr}, EvQueued{nullptr}, Queue{Queue}, Stream{Stream},
+      Context{Context} {
+
+  bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE;
+
+  UR_CHECK_ERROR(hipEventCreateWithFlags(
+      &EvEnd, ProfilingEnabled ? hipEventDefault : hipEventDisableTiming));
+
+  if (ProfilingEnabled) {
+    UR_CHECK_ERROR(hipEventCreateWithFlags(&EvQueued, hipEventDefault));
+    UR_CHECK_ERROR(hipEventCreateWithFlags(&EvStart, hipEventDefault));
+  }
+
+  if (Queue != nullptr) {
+    urQueueRetain(Queue);
+  }
+  urContextRetain(Context);
+}
+
+ur_event_handle_t_::~ur_event_handle_t_() {
+  if (Queue != nullptr) {
+    urQueueRelease(Queue);
+  }
+  urContextRelease(Context);
+}
+
+ur_result_t ur_event_handle_t_::start() {
+  assert(!isStarted());
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  try {
+    if (Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE) {
+      // NOTE: This relies on the default stream to be unused.
+      UR_CHECK_ERROR(hipEventRecord(EvQueued, 0));
+      UR_CHECK_ERROR(hipEventRecord(EvStart, Queue->get()));
+    }
+  } catch (ur_result_t Error) {
+    Result = Error;
+  }
+
+  IsStarted = true;
+  return Result;
+}
+
+bool ur_event_handle_t_::isCompleted() const noexcept {
+  if (!IsRecorded) {
+    return false;
+  }
+  if (!HasBeenWaitedOn) {
+    const hipError_t Result = hipEventQuery(EvEnd);
+    if (Result != hipSuccess && Result != hipErrorNotReady) {
+      UR_CHECK_ERROR(Result);
+      return false;
+    }
+    if (Result == hipErrorNotReady) {
+      return false;
+    }
+  }
+  return true;
+}
+
+uint64_t ur_event_handle_t_::getQueuedTime() const {
+  float MilliSeconds = 0.0f;
+  assert(isStarted());
+
+  // hipEventSynchronize waits till the event is ready for call to
+  // hipEventElapsedTime.
+  UR_CHECK_ERROR(hipEventSynchronize(EvStart));
+  UR_CHECK_ERROR(hipEventSynchronize(EvEnd));
+
+  UR_CHECK_ERROR(hipEventElapsedTime(&MilliSeconds, EvStart, EvEnd));
+  return static_cast<uint64_t>(MilliSeconds * 1.0e6);
+}
+
+uint64_t ur_event_handle_t_::getStartTime() const {
+  float MiliSeconds = 0.0f;
+  assert(isStarted());
+
+  // hipEventSynchronize waits till the event is ready for call to
+  // hipEventElapsedTime.
+  UR_CHECK_ERROR(hipEventSynchronize(ur_platform_handle_t_::EvBase));
+  UR_CHECK_ERROR(hipEventSynchronize(EvStart));
+
+  UR_CHECK_ERROR(hipEventElapsedTime(&MiliSeconds,
+                                     ur_platform_handle_t_::EvBase, EvStart));
+  return static_cast<uint64_t>(MiliSeconds * 1.0e6);
+}
+
+uint64_t ur_event_handle_t_::getEndTime() const {
+  float MiliSeconds = 0.0f;
+  assert(isStarted() && isRecorded());
+
+  // hipEventSynchronize waits till the event is ready for call to
+  // hipEventElapsedTime.
+  UR_CHECK_ERROR(hipEventSynchronize(ur_platform_handle_t_::EvBase));
+  UR_CHECK_ERROR(hipEventSynchronize(EvEnd));
+
+  UR_CHECK_ERROR(
+      hipEventElapsedTime(&MiliSeconds, ur_platform_handle_t_::EvBase, EvEnd));
+  return static_cast<uint64_t>(MiliSeconds * 1.0e6);
+}
+
+ur_result_t ur_event_handle_t_::record() {
+
+  if (isRecorded() || !isStarted()) {
+    return UR_RESULT_ERROR_INVALID_EVENT;
+  }
+
+  ur_result_t Result = UR_RESULT_ERROR_INVALID_OPERATION;
+
+  UR_ASSERT(Queue, UR_RESULT_ERROR_INVALID_QUEUE);
+
+  try {
+    EventId = Queue->getNextEventId();
+    if (EventId == 0) {
+      detail::ur::die(
+          "Unrecoverable program state reached in event identifier overflow");
+    }
+    Result = UR_CHECK_ERROR(hipEventRecord(EvEnd, Stream));
+  } catch (ur_result_t Error) {
+    Result = Error;
+  }
+
+  if (Result == UR_RESULT_SUCCESS) {
+    IsRecorded = true;
+  }
+
+  return Result;
+}
+
+ur_result_t ur_event_handle_t_::wait() {
+  ur_result_t Result;
+  try {
+    Result = UR_CHECK_ERROR(hipEventSynchronize(EvEnd));
+    HasBeenWaitedOn = true;
+  } catch (ur_result_t Error) {
+    Result = Error;
+  }
+
+  return Result;
+}
+
+ur_result_t ur_event_handle_t_::release() {
+  assert(Queue != nullptr);
+  UR_CHECK_ERROR(hipEventDestroy(EvEnd));
+
+  if (Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE) {
+    UR_CHECK_ERROR(hipEventDestroy(EvQueued));
+    UR_CHECK_ERROR(hipEventDestroy(EvStart));
+  }
+
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) {
+  UR_ASSERT(numEvents > 0, UR_RESULT_ERROR_INVALID_VALUE);
+
+  try {
+
+    auto Context = phEventWaitList[0]->getContext();
+    ScopedContext Active(Context);
+
+    auto WaitFunc = [Context](ur_event_handle_t Event) -> ur_result_t {
+      UR_ASSERT(Event, UR_RESULT_ERROR_INVALID_EVENT);
+      UR_ASSERT(Event->getContext() == Context,
+                UR_RESULT_ERROR_INVALID_CONTEXT);
+
+      return Event->wait();
+    };
+    return forLatestEvents(phEventWaitList, numEvents, WaitFunc);
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
+  }
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent,
+                                                   ur_event_info_t propName,
+                                                   size_t propValueSize,
+                                                   void *pPropValue,
+                                                   size_t *pPropValueSizeRet) {
+  UR_ASSERT(!(pPropValue && propValueSize == 0), UR_RESULT_ERROR_INVALID_SIZE);
+
+  UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
+  switch (propName) {
+  case UR_EVENT_INFO_COMMAND_QUEUE:
+    return ReturnValue(hEvent->getQueue());
+  case UR_EVENT_INFO_COMMAND_TYPE:
+    return ReturnValue(hEvent->getCommandType());
+  case UR_EVENT_INFO_REFERENCE_COUNT:
+    return ReturnValue(hEvent->getReferenceCount());
+  case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS:
+    return ReturnValue(hEvent->getExecutionStatus());
+  case UR_EVENT_INFO_CONTEXT:
+    return ReturnValue(hEvent->getContext());
+  default:
+    break;
+  }
+
+  return UR_RESULT_ERROR_INVALID_ENUMERATION;
+}
+
+/// Obtain profiling information from UR HIP events
+/// Timings from HIP are only elapsed time.
+UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo(
+    ur_event_handle_t hEvent, ur_profiling_info_t propName,
+    size_t propValueSize, void *pPropValue, size_t *pPropValueSizeRet) {
+
+  UR_ASSERT(!(pPropValue && propValueSize == 0), UR_RESULT_ERROR_INVALID_VALUE);
+
+  ur_queue_handle_t Queue = hEvent->getQueue();
+  if (Queue == nullptr || !(Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
+    return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE;
+  }
+
+  UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
+  switch (propName) {
+  case UR_PROFILING_INFO_COMMAND_QUEUED:
+  case UR_PROFILING_INFO_COMMAND_SUBMIT:
+    // Note: No user for this case
+    return ReturnValue(static_cast<uint64_t>(hEvent->getQueuedTime()));
+  case UR_PROFILING_INFO_COMMAND_START:
+    return ReturnValue(static_cast<uint64_t>(hEvent->getStartTime()));
+  case UR_PROFILING_INFO_COMMAND_END:
+    return ReturnValue(static_cast<uint64_t>(hEvent->getEndTime()));
+  default:
+    break;
+  }
+  return {};
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t,
+                                                       ur_execution_info_t,
+                                                       ur_event_callback_t,
+                                                       void *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) {
+  const auto RefCount = hEvent->incrementReferenceCount();
+
+  detail::ur::assertion(RefCount != 0,
+                        "Reference count overflow detected in urEventRetain.");
+
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) {
+  // double delete or someone is messing with the ref count.
+  // either way, cannot safely proceed.
+  detail::ur::assertion(hEvent->getReferenceCount() != 0,
+                        "Reference count overflow detected in urEventRelease.");
+
+  // decrement ref count. If it is 0, delete the event.
+  if (hEvent->decrementReferenceCount() == 0) {
+    std::unique_ptr<ur_event_handle_t_> event_ptr{hEvent};
+    ur_result_t Result = UR_RESULT_ERROR_INVALID_EVENT;
+    try {
+      ScopedContext Active(hEvent->getContext());
+      Result = hEvent->release();
+    } catch (...) {
+      Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
+    }
+    return Result;
+  }
+
+  return UR_RESULT_SUCCESS;
+}
+
+/// Gets the native HIP handle of a UR event object
+///
+/// \param[in] hEvent The UR event to get the native HIP object of.
+/// \param[out] phNativeEvent Set to the native handle of the UR event object.
+///
+/// \return UR_RESULT_SUCCESS on success. UR_RESULT_ERROR_INVALID_EVENT if given
+/// a user event.
+UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle(
+    ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) {
+  *phNativeEvent = reinterpret_cast<ur_native_handle_t>(hEvent->get());
+  return UR_RESULT_SUCCESS;
+}
+
+/// Created a UR event object from a HIP event handle.
+/// TODO: Implement this.
+/// NOTE: The created UR object takes ownership of the native handle.
+///
+/// \param[in] hNativeEvent The native handle to create UR event object from.
+/// \param[out] phEvent Set to the UR event object created from native handle.
+///
+/// \return UR_RESULT_ERROR_UNSUPPORTED_FEATURE
+UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(
+    ur_native_handle_t, ur_context_handle_t,
+    const ur_event_native_properties_t *, ur_event_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/event.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/event.hpp
new file mode 100644
index 0000000000000..5960f384cdfd5
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/event.hpp
@@ -0,0 +1,172 @@
+//===--------- event.hpp - HIP Adapter -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+#pragma once
+
+#include "common.hpp"
+#include "queue.hpp"
+
+/// UR Event mapping to hipEvent_t
+///
+struct ur_event_handle_t_ {
+public:
+  using native_type = hipEvent_t;
+
+  ur_result_t record();
+
+  ur_result_t wait();
+
+  ur_result_t start();
+
+  native_type get() const noexcept { return EvEnd; };
+
+  ur_queue_handle_t getQueue() const noexcept { return Queue; }
+
+  hipStream_t getStream() const noexcept { return Stream; }
+
+  uint32_t getComputeStreamToken() const noexcept { return StreamToken; }
+
+  ur_command_t getCommandType() const noexcept { return CommandType; }
+
+  uint32_t getReferenceCount() const noexcept { return RefCount; }
+
+  bool isRecorded() const noexcept { return IsRecorded; }
+
+  bool isStarted() const noexcept { return IsStarted; }
+
+  bool isCompleted() const noexcept;
+
+  uint32_t getExecutionStatus() const noexcept {
+
+    if (!isRecorded()) {
+      return UR_EVENT_STATUS_SUBMITTED;
+    }
+
+    if (!isCompleted()) {
+      return UR_EVENT_STATUS_RUNNING;
+    }
+    return UR_EVENT_STATUS_COMPLETE;
+  }
+
+  ur_context_handle_t getContext() const noexcept { return Context; };
+
+  uint32_t incrementReferenceCount() { return ++RefCount; }
+
+  uint32_t decrementReferenceCount() { return --RefCount; }
+
+  uint32_t getEventId() const noexcept { return EventId; }
+
+  // Returns the counter time when the associated command(s) were enqueued
+  uint64_t getQueuedTime() const;
+
+  // Returns the counter time when the associated command(s) started execution
+  uint64_t getStartTime() const;
+
+  // Returns the counter time when the associated command(s) completed
+  uint64_t getEndTime() const;
+
+  // construct a native HIP. This maps closely to the underlying HIP event.
+  static ur_event_handle_t
+  makeNative(ur_command_t Type, ur_queue_handle_t Queue, hipStream_t Stream,
+             uint32_t StreamToken = std::numeric_limits<uint32_t>::max()) {
+    return new ur_event_handle_t_(Type, Queue->getContext(), Queue, Stream,
+                                  StreamToken);
+  }
+
+  ur_result_t release();
+
+  ~ur_event_handle_t_();
+
+private:
+  // This constructor is private to force programmers to use the makeNative /
+  // make_user static members in order to create a ur_event_handle_t for HIP.
+  ur_event_handle_t_(ur_command_t Type, ur_context_handle_t Context,
+                     ur_queue_handle_t Queue, hipStream_t Stream,
+                     uint32_t StreamToken);
+
+  ur_command_t CommandType; // The type of command associated with event.
+
+  std::atomic_uint32_t RefCount; // Event reference count.
+
+  bool HasBeenWaitedOn; // Signifies whether the event has been waited
+                        // on through a call to wait(), which implies
+                        // that it has completed.
+
+  bool IsRecorded; // Signifies wether a native HIP event has been recorded
+                   // yet.
+  bool IsStarted;  // Signifies wether the operation associated with the
+                   // UR event has started or not
+                   //
+
+  uint32_t StreamToken;
+  uint32_t EventId; // Queue identifier of the event.
+
+  native_type EvEnd; // HIP event handle. If this ur_event_handle_t_
+                     // represents a user event, this will be nullptr.
+
+  native_type EvStart; // HIP event handle associated with the start
+
+  native_type EvQueued; // HIP event handle associated with the time
+                        // the command was enqueued
+
+  ur_queue_handle_t Queue; // ur_queue_handle_t associated with the event. If
+                           // this is a user event, this will be nullptr.
+
+  hipStream_t Stream; // hipStream_t associated with the event. If this is a
+                      // user event, this will be uninitialized.
+
+  ur_context_handle_t Context; // ur_context_handle_t associated with the event.
+                               // If this is a native event, this will be the
+                               // same context associated with the Queue member.
+};
+
+// Iterate over `EventWaitList` and apply the given callback `F` to the
+// latest event on each queue therein. The callback must take a single
+// ur_event_handle_t argument and return a ur_result_t. If the callback returns
+// an error, the iteration terminates and the error is returned.
+template <typename Func>
+ur_result_t forLatestEvents(const ur_event_handle_t *EventWaitList,
+                            size_t NumEventsInWaitList, Func &&F) {
+
+  if (EventWaitList == nullptr || NumEventsInWaitList == 0) {
+    return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST;
+  }
+
+  // Fast path if we only have a single event
+  if (NumEventsInWaitList == 1) {
+    return F(EventWaitList[0]);
+  }
+
+  std::vector<ur_event_handle_t> Events{EventWaitList,
+                                        EventWaitList + NumEventsInWaitList};
+  std::sort(Events.begin(), Events.end(),
+            [](ur_event_handle_t E0, ur_event_handle_t E1) {
+              // Tiered sort creating sublists of streams (smallest value first)
+              // in which the corresponding events are sorted into a sequence of
+              // newest first.
+              return E0->getStream() < E1->getStream() ||
+                     (E0->getStream() == E1->getStream() &&
+                      E0->getEventId() > E1->getEventId());
+            });
+
+  hipStream_t LastSeenStream = 0;
+  for (size_t i = 0; i < Events.size(); i++) {
+    auto Event = Events[i];
+    if (!Event || (i != 0 && Event->getStream() == LastSeenStream)) {
+      continue;
+    }
+
+    LastSeenStream = Event->getStream();
+
+    auto Result = F(Event);
+    if (Result != UR_RESULT_SUCCESS) {
+      return Result;
+    }
+  }
+
+  return UR_RESULT_SUCCESS;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.cpp
new file mode 100644
index 0000000000000..709657ab0c947
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.cpp
@@ -0,0 +1,325 @@
+//===--------- kernel.cpp - HIP Adapter ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include "kernel.hpp"
+#include "memory.hpp"
+#include "sampler.hpp"
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName,
+               ur_kernel_handle_t *phKernel) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  std::unique_ptr<ur_kernel_handle_t_> RetKernel{nullptr};
+
+  try {
+    ScopedContext Active(hProgram->getContext());
+
+    hipFunction_t HIPFunc;
+    Result = UR_CHECK_ERROR(
+        hipModuleGetFunction(&HIPFunc, hProgram->get(), pKernelName));
+
+    std::string KernelNameWoffset = std::string(pKernelName) + "_with_offset";
+    hipFunction_t HIPFuncWithOffsetParam;
+    hipError_t OffsetRes = hipModuleGetFunction(
+        &HIPFuncWithOffsetParam, hProgram->get(), KernelNameWoffset.c_str());
+
+    // If there is no kernel with global offset parameter we mark it as missing
+    if (OffsetRes == hipErrorNotFound) {
+      HIPFuncWithOffsetParam = nullptr;
+    } else {
+      Result = UR_CHECK_ERROR(OffsetRes);
+    }
+    RetKernel = std::unique_ptr<ur_kernel_handle_t_>(
+        new ur_kernel_handle_t_{HIPFunc, HIPFuncWithOffsetParam, pKernelName,
+                                hProgram, hProgram->getContext()});
+  } catch (ur_result_t Err) {
+    Result = Err;
+  } catch (...) {
+    Result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+  }
+
+  *phKernel = RetKernel.release();
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
+                     ur_kernel_group_info_t propName, size_t propSize,
+                     void *pPropValue, size_t *pPropSizeRet) {
+  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
+
+  switch (propName) {
+  case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: {
+    size_t GlobalWorkSize[3] = {0, 0, 0};
+
+    int MaxBlockDimX{0}, MaxBlockDimY{0}, MaxBlockDimZ{0};
+    detail::ur::assertion(hipDeviceGetAttribute(&MaxBlockDimX,
+                                                hipDeviceAttributeMaxBlockDimX,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(hipDeviceGetAttribute(&MaxBlockDimY,
+                                                hipDeviceAttributeMaxBlockDimY,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(hipDeviceGetAttribute(&MaxBlockDimZ,
+                                                hipDeviceAttributeMaxBlockDimZ,
+                                                hDevice->get()) == hipSuccess);
+
+    int max_grid_dimX{0}, max_grid_dimY{0}, max_grid_dimZ{0};
+    detail::ur::assertion(hipDeviceGetAttribute(&max_grid_dimX,
+                                                hipDeviceAttributeMaxGridDimX,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(hipDeviceGetAttribute(&max_grid_dimY,
+                                                hipDeviceAttributeMaxGridDimY,
+                                                hDevice->get()) == hipSuccess);
+    detail::ur::assertion(hipDeviceGetAttribute(&max_grid_dimZ,
+                                                hipDeviceAttributeMaxGridDimZ,
+                                                hDevice->get()) == hipSuccess);
+
+    GlobalWorkSize[0] = MaxBlockDimX * max_grid_dimX;
+    GlobalWorkSize[1] = MaxBlockDimY * max_grid_dimY;
+    GlobalWorkSize[2] = MaxBlockDimZ * max_grid_dimZ;
+    return ReturnValue(GlobalWorkSize, 3);
+  }
+  case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: {
+    int MaxThreads = 0;
+    detail::ur::assertion(
+        hipFuncGetAttribute(&MaxThreads,
+                            HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
+                            hKernel->get()) == hipSuccess);
+    return ReturnValue(size_t(MaxThreads));
+  }
+  case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: {
+    size_t group_size[3] = {0, 0, 0};
+    // Returns the work-group size specified in the kernel source or IL.
+    // If the work-group size is not specified in the kernel source or IL,
+    // (0, 0, 0) is returned.
+    // https://www.khronos.org/registry/OpenCL/sdk/2.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
+
+    // TODO: can we extract the work group size from the PTX?
+    return ReturnValue(group_size, 3);
+  }
+  case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: {
+    // OpenCL LOCAL == HIP SHARED
+    int Bytes = 0;
+    detail::ur::assertion(
+        hipFuncGetAttribute(&Bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
+                            hKernel->get()) == hipSuccess);
+    return ReturnValue(uint64_t(Bytes));
+  }
+  case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
+    // Work groups should be multiples of the warp size
+    int WarpSize = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&WarpSize,
+                                                hipDeviceAttributeWarpSize,
+                                                hDevice->get()) == hipSuccess);
+    return ReturnValue(static_cast<size_t>(WarpSize));
+  }
+  case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: {
+    // OpenCL PRIVATE == HIP LOCAL
+    int Bytes = 0;
+    detail::ur::assertion(
+        hipFuncGetAttribute(&Bytes, HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
+                            hKernel->get()) == hipSuccess);
+    return ReturnValue(uint64_t(Bytes));
+  }
+  default:
+    break;
+  }
+
+  return UR_RESULT_ERROR_INVALID_ENUMERATION;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) {
+  UR_ASSERT(hKernel->getReferenceCount() > 0u, UR_RESULT_ERROR_INVALID_KERNEL);
+
+  hKernel->incrementReferenceCount();
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urKernelRelease(ur_kernel_handle_t hKernel) {
+  // double delete or someone is messing with the ref count.
+  // either way, cannot safely proceed.
+  UR_ASSERT(hKernel->getReferenceCount() != 0, UR_RESULT_ERROR_INVALID_KERNEL);
+
+  // decrement ref count. If it is 0, delete the program.
+  if (hKernel->decrementReferenceCount() == 0) {
+    // no internal cuda resources to clean up. Just delete it.
+    delete hKernel;
+    return UR_RESULT_SUCCESS;
+  }
+
+  return UR_RESULT_SUCCESS;
+}
+
+// TODO(ur): Not implemented on hip atm. Also, need to add tests for this
+// feature.
+UR_APIEXPORT ur_result_t UR_APICALL
+urKernelGetNativeHandle(ur_kernel_handle_t, ur_native_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue(
+    ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize,
+    const ur_kernel_arg_value_properties_t *, const void *pArgValue) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  try {
+    if (pArgValue) {
+      hKernel->setKernelArg(argIndex, argSize, pArgValue);
+    } else {
+      hKernel->setKernelLocalArg(argIndex, argSize);
+    }
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel,
+                                                    ur_kernel_info_t propName,
+                                                    size_t propSize,
+                                                    void *pKernelInfo,
+                                                    size_t *pPropSizeRet) {
+  UrReturnHelper ReturnValue(propSize, pKernelInfo, pPropSizeRet);
+
+  switch (propName) {
+  case UR_KERNEL_INFO_FUNCTION_NAME:
+    return ReturnValue(hKernel->getName());
+  case UR_KERNEL_INFO_NUM_ARGS:
+    return ReturnValue(hKernel->getNumArgs());
+  case UR_KERNEL_INFO_REFERENCE_COUNT:
+    return ReturnValue(hKernel->getReferenceCount());
+  case UR_KERNEL_INFO_CONTEXT:
+    return ReturnValue(hKernel->getContext());
+  case UR_KERNEL_INFO_PROGRAM:
+    return ReturnValue(hKernel->getProgram());
+  case UR_KERNEL_INFO_ATTRIBUTES:
+    return ReturnValue("");
+  default:
+    break;
+  }
+
+  return UR_RESULT_ERROR_INVALID_ENUMERATION;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
+                        ur_kernel_sub_group_info_t propName, size_t propSize,
+                        void *pPropValue, size_t *pPropSizeRet) {
+  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
+  switch (propName) {
+  case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: {
+    // Sub-group size is equivalent to warp size
+    int WarpSize = 0;
+    detail::ur::assertion(hipDeviceGetAttribute(&WarpSize,
+                                                hipDeviceAttributeWarpSize,
+                                                hDevice->get()) == hipSuccess);
+    return ReturnValue(static_cast<uint32_t>(WarpSize));
+  }
+  case UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS: {
+    // Number of sub-groups = max block size / warp size + possible remainder
+    int MaxThreads = 0;
+    detail::ur::assertion(
+        hipFuncGetAttribute(&MaxThreads,
+                            HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
+                            hKernel->get()) == hipSuccess);
+    int WarpSize = 0;
+    urKernelGetSubGroupInfo(hKernel, hDevice,
+                            UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE,
+                            sizeof(uint32_t), &WarpSize, nullptr);
+    int MaxWarps = (MaxThreads + WarpSize - 1) / WarpSize;
+    return ReturnValue(static_cast<uint32_t>(MaxWarps));
+  }
+  case UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS: {
+    // Return value of 0 => not specified
+    // TODO: Revisit if PTX is generated for compile-time work-group sizes
+    return ReturnValue(0);
+  }
+  case UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL: {
+    // Return value of 0 => unspecified or "auto" sub-group size
+    // Correct for now, since warp size may be read from special register
+    // TODO: Return warp size once default is primary sub-group size
+    // TODO: Revisit if we can recover [[sub_group_size]] attribute from PTX
+    return ReturnValue(0);
+  }
+  default:
+    break;
+  }
+
+  return UR_RESULT_ERROR_INVALID_ENUMERATION;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer(
+    ur_kernel_handle_t hKernel, uint32_t argIndex,
+    const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) {
+  hKernel->setKernelArg(argIndex, sizeof(pArgValue), pArgValue);
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj(
+    ur_kernel_handle_t hKernel, uint32_t argIndex,
+    const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) {
+  // Below sets kernel arg when zero-sized buffers are handled.
+  // In such case the corresponding memory is null.
+  if (hArgValue == nullptr) {
+    hKernel->setKernelArg(argIndex, 0, nullptr);
+    return UR_RESULT_SUCCESS;
+  }
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  try {
+    if (hArgValue->MemType == ur_mem_handle_t_::Type::Surface) {
+      auto array = hArgValue->Mem.SurfaceMem.getArray();
+      hipArray_Format Format;
+      size_t NumChannels;
+      getArrayDesc(array, Format, NumChannels);
+      if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 &&
+          Format != HIP_AD_FORMAT_SIGNED_INT32 &&
+          Format != HIP_AD_FORMAT_HALF && Format != HIP_AD_FORMAT_FLOAT) {
+        detail::ur::die(
+            "UR HIP kernels only support images with channel types int32, "
+            "uint32, float, and half.");
+      }
+      hipSurfaceObject_t hipSurf = hArgValue->Mem.SurfaceMem.getSurface();
+      hKernel->setKernelArg(argIndex, sizeof(hipSurf), (void *)&hipSurf);
+    } else
+
+    {
+      void *HIPPtr = hArgValue->Mem.BufferMem.getVoid();
+      hKernel->setKernelArg(argIndex, sizeof(void *), (void *)&HIPPtr);
+    }
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler(
+    ur_kernel_handle_t hKernel, uint32_t argIndex,
+    const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  try {
+    uint32_t SamplerProps = hArgValue->Props;
+    hKernel->setKernelArg(argIndex, sizeof(uint32_t), (void *)&SamplerProps);
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+  return Result;
+}
+
+// A NOP for the HIP backend
+UR_APIEXPORT ur_result_t UR_APICALL
+urKernelSetExecInfo(ur_kernel_handle_t, ur_kernel_exec_info_t, size_t,
+                    const ur_kernel_exec_info_properties_t *, const void *) {
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle(
+    ur_native_handle_t, ur_context_handle_t, ur_program_handle_t,
+    const ur_kernel_native_properties_t *, ur_kernel_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.hpp
new file mode 100644
index 0000000000000..0e4f3c0ea8bd0
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/kernel.hpp
@@ -0,0 +1,193 @@
+//===--------- kernel.hpp - HIP Adapter ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+#pragma once
+
+#include <ur_api.h>
+
+#include <atomic>
+#include <cassert>
+#include <numeric>
+
+#include "program.hpp"
+
+/// Implementation of a UR Kernel for HIP
+///
+/// UR Kernels are used to set kernel arguments,
+/// creating a state on the Kernel object for a given
+/// invocation. This is not the case of HIPFunction objects,
+/// which are simply passed together with the arguments on the invocation.
+/// The UR Kernel implementation for HIP stores the list of arguments,
+/// argument sizes, and offsets to emulate the interface of UR Kernel,
+/// saving the arguments for the later dispatch.
+/// Note that in UR API, the Local memory is specified as a size per
+/// individual argument, but in HIP only the total usage of shared
+/// memory is required since it is not passed as a parameter.
+/// A compiler pass converts the UR API local memory model into the
+/// HIP shared model. This object simply calculates the total of
+/// shared memory, and the initial offsets of each parameter.
+struct ur_kernel_handle_t_ {
+  using native_type = hipFunction_t;
+
+  native_type Function;
+  native_type FunctionWithOffsetParam;
+  std::string Name;
+  ur_context_handle_t Context;
+  ur_program_handle_t Program;
+  std::atomic_uint32_t RefCount;
+
+  /// Structure that holds the arguments to the kernel.
+  /// Note earch argument size is known, since it comes
+  /// from the kernel signature.
+  /// This is not something can be queried from the HIP API
+  /// so there is a hard-coded size (\ref MAX_PARAM_BYTES)
+  /// and a storage.
+  struct arguments {
+    static constexpr size_t MAX_PARAM_BYTES = 4000u;
+    using args_t = std::array<char, MAX_PARAM_BYTES>;
+    using args_size_t = std::vector<size_t>;
+    using args_index_t = std::vector<void *>;
+    args_t Storage;
+    args_size_t ParamSizes;
+    args_index_t Indices;
+    args_size_t OffsetPerIndex;
+
+    std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0};
+
+    arguments() {
+      // Place the implicit offset index at the end of the indicies collection
+      Indices.emplace_back(&ImplicitOffsetArgs);
+    }
+
+    /// Add an argument to the kernel.
+    /// If the argument existed before, it is replaced.
+    /// Otherwise, it is added.
+    /// Gaps are filled with empty arguments.
+    /// Implicit offset argument is kept at the back of the indices collection.
+    void addArg(size_t Index, size_t Size, const void *Arg,
+                size_t LocalSize = 0) {
+      if (Index + 2 > Indices.size()) {
+        // Move implicit offset argument Index with the end
+        Indices.resize(Index + 2, Indices.back());
+        // Ensure enough space for the new argument
+        ParamSizes.resize(Index + 1);
+        OffsetPerIndex.resize(Index + 1);
+      }
+      ParamSizes[Index] = Size;
+      // calculate the insertion point on the array
+      size_t InsertPos = std::accumulate(std::begin(ParamSizes),
+                                         std::begin(ParamSizes) + Index, 0);
+      // Update the stored value for the argument
+      std::memcpy(&Storage[InsertPos], Arg, Size);
+      Indices[Index] = &Storage[InsertPos];
+      OffsetPerIndex[Index] = LocalSize;
+    }
+
+    void addLocalArg(size_t Index, size_t Size) {
+      size_t LocalOffset = this->getLocalSize();
+
+      // maximum required alignment is the size of the largest vector type
+      const size_t MaxAlignment = sizeof(double) * 16;
+
+      // for arguments smaller than the maximum alignment simply align to the
+      // size of the argument
+      const size_t Alignment = std::min(MaxAlignment, Size);
+
+      // align the argument
+      size_t AlignedLocalOffset = LocalOffset;
+      size_t Pad = LocalOffset % Alignment;
+      if (Pad != 0) {
+        AlignedLocalOffset += Alignment - Pad;
+      }
+
+      addArg(Index, sizeof(size_t), (const void *)&AlignedLocalOffset,
+             Size + AlignedLocalOffset - LocalOffset);
+    }
+
+    void setImplicitOffset(size_t Size, std::uint32_t *ImplicitOffset) {
+      assert(Size == sizeof(std::uint32_t) * 3);
+      std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size);
+    }
+
+    void clearLocalSize() {
+      std::fill(std::begin(OffsetPerIndex), std::end(OffsetPerIndex), 0);
+    }
+
+    const args_index_t &getIndices() const noexcept { return Indices; }
+
+    uint32_t getLocalSize() const {
+      return std::accumulate(std::begin(OffsetPerIndex),
+                             std::end(OffsetPerIndex), 0);
+    }
+  } Args;
+
+  ur_kernel_handle_t_(hipFunction_t Func, hipFunction_t FuncWithOffsetParam,
+                      const char *Name, ur_program_handle_t Program,
+                      ur_context_handle_t Ctxt)
+      : Function{Func}, FunctionWithOffsetParam{FuncWithOffsetParam},
+        Name{Name}, Context{Ctxt}, Program{Program}, RefCount{1} {
+    urProgramRetain(Program);
+    urContextRetain(Context);
+  }
+
+  ur_kernel_handle_t_(hipFunction_t Func, const char *Name,
+                      ur_program_handle_t Program, ur_context_handle_t Ctxt)
+      : ur_kernel_handle_t_{Func, nullptr, Name, Program, Ctxt} {}
+
+  ~ur_kernel_handle_t_() {
+    urProgramRelease(Program);
+    urContextRelease(Context);
+  }
+
+  ur_program_handle_t getProgram() const noexcept { return Program; }
+
+  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
+
+  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
+
+  uint32_t getReferenceCount() const noexcept { return RefCount; }
+
+  native_type get() const noexcept { return Function; };
+
+  native_type getWithOffsetParameter() const noexcept {
+    return FunctionWithOffsetParam;
+  };
+
+  bool hasWithOffsetParameter() const noexcept {
+    return FunctionWithOffsetParam != nullptr;
+  }
+
+  ur_context_handle_t getContext() const noexcept { return Context; };
+
+  const char *getName() const noexcept { return Name.c_str(); }
+
+  /// Get the number of kernel arguments, excluding the implicit global offset.
+  /// Note this only returns the current known number of arguments, not the
+  /// real one required by the kernel, since this cannot be queried from
+  /// the HIP Driver API
+  uint32_t getNumArgs() const noexcept { return Args.Indices.size() - 1; }
+
+  void setKernelArg(int Index, size_t Size, const void *Arg) {
+    Args.addArg(Index, Size, Arg);
+  }
+
+  void setKernelLocalArg(int Index, size_t Size) {
+    Args.addLocalArg(Index, Size);
+  }
+
+  void setImplicitOffsetArg(size_t Size, std::uint32_t *ImplicitOffset) {
+    return Args.setImplicitOffset(Size, ImplicitOffset);
+  }
+
+  const arguments::args_index_t &getArgIndices() const {
+    return Args.getIndices();
+  }
+
+  uint32_t getLocalSize() const noexcept { return Args.getLocalSize(); }
+
+  void clearLocalSize() { Args.clearLocalSize(); }
+};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/memory.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/memory.cpp
new file mode 100644
index 0000000000000..3401b5beff148
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/memory.cpp
@@ -0,0 +1,498 @@
+#include "memory.hpp"
+#include "context.hpp"
+#include <cassert>
+
+/// Decreases the reference count of the Mem object.
+/// If this is zero, calls the relevant HIP Free function
+/// \return UR_RESULT_SUCCESS unless deallocation error
+UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  try {
+
+    // Do nothing if there are other references
+    if (hMem->decrementReferenceCount() > 0) {
+      return UR_RESULT_SUCCESS;
+    }
+
+    // make sure memObj is released in case UR_CHECK_ERROR throws
+    std::unique_ptr<ur_mem_handle_t_> uniqueMemObj(hMem);
+
+    if (hMem->isSubBuffer()) {
+      return UR_RESULT_SUCCESS;
+    }
+
+    ScopedContext Active(uniqueMemObj->getContext());
+
+    if (hMem->MemType == ur_mem_handle_t_::Type::Buffer) {
+      switch (uniqueMemObj->Mem.BufferMem.MemAllocMode) {
+      case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::CopyIn:
+      case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::Classic:
+        Result =
+            UR_CHECK_ERROR(hipFree((void *)uniqueMemObj->Mem.BufferMem.Ptr));
+        break;
+      case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::UseHostPtr:
+        Result = UR_CHECK_ERROR(
+            hipHostUnregister(uniqueMemObj->Mem.BufferMem.HostPtr));
+        break;
+      case ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr:
+        Result =
+            UR_CHECK_ERROR(hipFreeHost(uniqueMemObj->Mem.BufferMem.HostPtr));
+      };
+    }
+
+    else if (hMem->MemType == ur_mem_handle_t_::Type::Surface) {
+      Result = UR_CHECK_ERROR(
+          hipDestroySurfaceObject(uniqueMemObj->Mem.SurfaceMem.getSurface()));
+      auto Array = uniqueMemObj->Mem.SurfaceMem.getArray();
+      Result = UR_CHECK_ERROR(hipFreeArray(Array));
+    }
+
+  } catch (ur_result_t Err) {
+    Result = Err;
+  } catch (...) {
+    Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
+  }
+
+  if (Result != UR_RESULT_SUCCESS) {
+    // A reported HIP error is either an implementation or an asynchronous HIP
+    // error for which it is unclear if the function that reported it succeeded
+    // or not. Either way, the state of the program is compromised and likely
+    // unrecoverable.
+    detail::ur::die("Unrecoverable program state reached in urMemRelease");
+  }
+
+  return UR_RESULT_SUCCESS;
+}
+
+/// Creates a UR Memory object using a HIP memory allocation.
+/// Can trigger a manual copy depending on the mode.
+/// \TODO Implement USE_HOST_PTR using hipHostRegister - See #9789
+UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
+    ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size,
+    const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) {
+  // Validate flags
+  UR_ASSERT((flags & UR_MEM_FLAGS_MASK) == 0,
+            UR_RESULT_ERROR_INVALID_ENUMERATION);
+  if (flags &
+      (UR_MEM_FLAG_USE_HOST_POINTER | UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER)) {
+    UR_ASSERT(pProperties && pProperties->pHost,
+              UR_RESULT_ERROR_INVALID_HOST_PTR);
+  }
+  // Need input memory object
+  UR_ASSERT(size != 0, UR_RESULT_ERROR_INVALID_BUFFER_SIZE);
+
+  // Currently, USE_HOST_PTR is not implemented using host register
+  // since this triggers a weird segfault after program ends.
+  // Setting this constant to true enables testing that behavior.
+  const bool EnableUseHostPtr = false;
+  const bool PerformInitialCopy =
+      (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) ||
+      ((flags & UR_MEM_FLAG_USE_HOST_POINTER) && !EnableUseHostPtr);
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  ur_mem_handle_t RetMemObj = nullptr;
+
+  try {
+    ScopedContext Active(hContext);
+    void *Ptr;
+    auto pHost = pProperties ? pProperties->pHost : nullptr;
+    ur_mem_handle_t_::MemImpl::BufferMem::AllocMode AllocMode =
+        ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::Classic;
+
+    if ((flags & UR_MEM_FLAG_USE_HOST_POINTER) && EnableUseHostPtr) {
+      Result =
+          UR_CHECK_ERROR(hipHostRegister(pHost, size, hipHostRegisterMapped));
+      Result = UR_CHECK_ERROR(hipHostGetDevicePointer(&Ptr, pHost, 0));
+      AllocMode = ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::UseHostPtr;
+    } else if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) {
+      Result = UR_CHECK_ERROR(hipHostMalloc(&pHost, size));
+      Result = UR_CHECK_ERROR(hipHostGetDevicePointer(&Ptr, pHost, 0));
+      AllocMode = ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::AllocHostPtr;
+    } else {
+      Result = UR_CHECK_ERROR(hipMalloc(&Ptr, size));
+      if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) {
+        AllocMode = ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::CopyIn;
+      }
+    }
+
+    if (Result == UR_RESULT_SUCCESS) {
+      ur_mem_handle_t parentBuffer = nullptr;
+
+      auto DevPtr =
+          reinterpret_cast<ur_mem_handle_t_::MemImpl::BufferMem::native_type>(
+              Ptr);
+      auto URMemObj = std::unique_ptr<ur_mem_handle_t_>(new ur_mem_handle_t_{
+          hContext, parentBuffer, flags, AllocMode, DevPtr, pHost, size});
+      if (URMemObj != nullptr) {
+        RetMemObj = URMemObj.release();
+        if (PerformInitialCopy) {
+          // Operates on the default stream of the current HIP context.
+          Result = UR_CHECK_ERROR(hipMemcpyHtoD(DevPtr, pHost, size));
+          // Synchronize with default stream implicitly used by hipMemcpyHtoD
+          // to make buffer data available on device before any other UR call
+          // uses it.
+          if (Result == UR_RESULT_SUCCESS) {
+            hipStream_t defaultStream = 0;
+            Result = UR_CHECK_ERROR(hipStreamSynchronize(defaultStream));
+          }
+        }
+      } else {
+        Result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+      }
+    }
+  } catch (ur_result_t Err) {
+    Result = Err;
+  } catch (...) {
+    Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
+  }
+
+  *phBuffer = RetMemObj;
+
+  return Result;
+}
+
+/// Implements a buffer partition in the HIP backend.
+/// A buffer partition (or a sub-buffer, in OpenCL terms) is simply implemented
+/// as an offset over an existing HIP allocation.
+UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition(
+    ur_mem_handle_t hBuffer, ur_mem_flags_t flags,
+    ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion,
+    ur_mem_handle_t *phMem) {
+  UR_ASSERT((flags & UR_MEM_FLAGS_MASK) == 0,
+            UR_RESULT_ERROR_INVALID_ENUMERATION);
+  UR_ASSERT(hBuffer->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+  UR_ASSERT(!hBuffer->isSubBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+
+  // Default value for flags means UR_MEM_FLAG_READ_WRITE.
+  if (flags == 0) {
+    flags = UR_MEM_FLAG_READ_WRITE;
+  }
+
+  UR_ASSERT(!(flags &
+              (UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER |
+               UR_MEM_FLAG_ALLOC_HOST_POINTER | UR_MEM_FLAG_USE_HOST_POINTER)),
+            UR_RESULT_ERROR_INVALID_VALUE);
+  if (hBuffer->MemFlags & UR_MEM_FLAG_WRITE_ONLY) {
+    UR_ASSERT(!(flags & (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_READ_ONLY)),
+              UR_RESULT_ERROR_INVALID_VALUE);
+  }
+  if (hBuffer->MemFlags & UR_MEM_FLAG_READ_ONLY) {
+    UR_ASSERT(!(flags & (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_WRITE_ONLY)),
+              UR_RESULT_ERROR_INVALID_VALUE);
+  }
+
+  UR_ASSERT(bufferCreateType == UR_BUFFER_CREATE_TYPE_REGION,
+            UR_RESULT_ERROR_INVALID_ENUMERATION);
+
+  UR_ASSERT(pRegion->size != 0u, UR_RESULT_ERROR_INVALID_BUFFER_SIZE);
+
+  UR_ASSERT(
+      ((pRegion->origin + pRegion->size) <= hBuffer->Mem.BufferMem.getSize()),
+      UR_RESULT_ERROR_INVALID_BUFFER_SIZE);
+  // Retained indirectly due to retaining parent buffer below.
+  ur_context_handle_t Context = hBuffer->Context;
+  ur_mem_handle_t_::MemImpl::BufferMem::AllocMode AllocMode =
+      ur_mem_handle_t_::MemImpl::BufferMem::AllocMode::Classic;
+
+  UR_ASSERT(hBuffer->Mem.BufferMem.Ptr !=
+                ur_mem_handle_t_::MemImpl::BufferMem::native_type{0},
+            UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+  ur_mem_handle_t_::MemImpl::BufferMem::native_type Ptr =
+      hBuffer->Mem.BufferMem.getWithOffset(pRegion->origin);
+
+  void *HostPtr = nullptr;
+  if (hBuffer->Mem.BufferMem.HostPtr) {
+    HostPtr =
+        static_cast<char *>(hBuffer->Mem.BufferMem.HostPtr) + pRegion->origin;
+  }
+
+  ReleaseGuard<ur_mem_handle_t> ReleaseGuard(hBuffer);
+
+  std::unique_ptr<ur_mem_handle_t_> RetMemObj{nullptr};
+  try {
+    ScopedContext Active(Context);
+
+    RetMemObj = std::unique_ptr<ur_mem_handle_t_>{new ur_mem_handle_t_{
+        Context, hBuffer, flags, AllocMode, Ptr, HostPtr, pRegion->size}};
+  } catch (ur_result_t Err) {
+    *phMem = nullptr;
+    return Err;
+  } catch (...) {
+    *phMem = nullptr;
+    return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+  }
+
+  ReleaseGuard.dismiss();
+  *phMem = RetMemObj.release();
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory,
+                                                 ur_mem_info_t MemInfoType,
+                                                 size_t propSize,
+                                                 void *pMemInfo,
+                                                 size_t *pPropSizeRet) {
+
+  UR_ASSERT(MemInfoType <= UR_MEM_INFO_CONTEXT,
+            UR_RESULT_ERROR_INVALID_ENUMERATION);
+  UR_ASSERT(hMemory->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+
+  UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet);
+
+  ScopedContext Active(hMemory->getContext());
+
+  switch (MemInfoType) {
+  case UR_MEM_INFO_SIZE: {
+    try {
+      size_t AllocSize = 0;
+      UR_CHECK_ERROR(hipMemGetAddressRange(nullptr, &AllocSize,
+                                           hMemory->Mem.BufferMem.Ptr));
+      return ReturnValue(AllocSize);
+    } catch (ur_result_t Err) {
+      return Err;
+    } catch (...) {
+      return UR_RESULT_ERROR_UNKNOWN;
+    }
+  }
+  case UR_MEM_INFO_CONTEXT: {
+    return ReturnValue(hMemory->getContext());
+  }
+
+  default:
+    return UR_RESULT_ERROR_INVALID_ENUMERATION;
+  }
+}
+
+/// Gets the native HIP handle of a UR mem object
+///
+/// \param[in] hMem The UR mem to get the native HIP object of.
+/// \param[out] phNativeMem Set to the native handle of the UR mem object.
+///
+/// \return UR_RESULT_SUCCESS
+UR_APIEXPORT ur_result_t UR_APICALL
+urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) {
+#if defined(__HIP_PLATFORM_NVIDIA__)
+  if (sizeof(ur_mem_handle_t_::MemImpl::BufferMem::native_type) >
+      sizeof(ur_native_handle_t)) {
+    // Check that all the upper bits that cannot be represented by
+    // ur_native_handle_t are empty.
+    // NOTE: The following shift might trigger a warning, but the check in the
+    // if above makes sure that this does not underflow.
+    ur_mem_handle_t_::MemImpl::BufferMem::native_type UpperBits =
+        hMem->Mem.BufferMem.get() >> (sizeof(ur_native_handle_t) * CHAR_BIT);
+    if (UpperBits) {
+      // Return an error if any of the remaining bits is non-zero.
+      return UR_RESULT_ERROR_INVALID_MEM_OBJECT;
+    }
+  }
+  *phNativeMem =
+      reinterpret_cast<ur_native_handle_t>(hMem->Mem.BufferMem.get());
+#elif defined(__HIP_PLATFORM_AMD__)
+  *phNativeMem =
+      reinterpret_cast<ur_native_handle_t>(hMem->Mem.BufferMem.get());
+#else
+#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
+#endif
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle(
+    ur_native_handle_t, ur_context_handle_t, const ur_mem_native_properties_t *,
+    ur_mem_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle(
+    ur_native_handle_t, ur_context_handle_t, const ur_image_format_t *,
+    const ur_image_desc_t *, const ur_mem_native_properties_t *,
+    ur_mem_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+/// \TODO Not implemented
+UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate(
+    ur_context_handle_t hContext, ur_mem_flags_t flags,
+    const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc,
+    void *pHost, ur_mem_handle_t *phMem) {
+
+  // Need input memory object
+  UR_ASSERT((flags & UR_MEM_FLAGS_MASK) == 0,
+            UR_RESULT_ERROR_INVALID_ENUMERATION);
+  if (flags &
+      (UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER | UR_MEM_FLAG_USE_HOST_POINTER)) {
+    UR_ASSERT(pHost, UR_RESULT_ERROR_INVALID_HOST_PTR);
+  }
+
+  const bool PerformInitialCopy =
+      (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) ||
+      ((flags & UR_MEM_FLAG_USE_HOST_POINTER));
+
+  UR_ASSERT(pImageDesc->stype == UR_STRUCTURE_TYPE_IMAGE_DESC,
+            UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+  UR_ASSERT(pImageDesc->type <= UR_MEM_TYPE_IMAGE1D_BUFFER,
+            UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+  UR_ASSERT(pImageDesc->numMipLevel == 0,
+            UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+  UR_ASSERT(pImageDesc->numSamples == 0,
+            UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+  if (!pHost) {
+    UR_ASSERT(pImageDesc->rowPitch == 0,
+              UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+    UR_ASSERT(pImageDesc->slicePitch == 0,
+              UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+  }
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  // We only support RBGA channel order
+  // TODO: check SYCL CTS and spec. May also have to support BGRA
+  UR_ASSERT(pImageFormat->channelOrder == UR_IMAGE_CHANNEL_ORDER_RGBA,
+            UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION);
+
+  // We have to use hipArray3DCreate, which has some caveats. The height and
+  // depth parameters must be set to 0 produce 1D or 2D arrays. image_desc gives
+  // a minimum value of 1, so we need to convert the answer.
+  HIP_ARRAY3D_DESCRIPTOR ArrayDesc;
+  ArrayDesc.NumChannels = 4; // Only support 4 channel image
+  ArrayDesc.Flags = 0;       // No flags required
+  ArrayDesc.Width = pImageDesc->width;
+  if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
+    ArrayDesc.Height = 0;
+    ArrayDesc.Depth = 0;
+  } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
+    ArrayDesc.Height = pImageDesc->height;
+    ArrayDesc.Depth = 0;
+  } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
+    ArrayDesc.Height = pImageDesc->height;
+    ArrayDesc.Depth = pImageDesc->depth;
+  }
+
+  // We need to get this now in bytes for calculating the total image size later
+  size_t PixelTypeSizeBytes;
+
+  switch (pImageFormat->channelType) {
+
+  case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8:
+  case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8:
+    ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT8;
+    PixelTypeSizeBytes = 1;
+    break;
+  case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8:
+    ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT8;
+    PixelTypeSizeBytes = 1;
+    break;
+  case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16:
+  case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16:
+    ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT16;
+    PixelTypeSizeBytes = 2;
+    break;
+  case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16:
+    ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT16;
+    PixelTypeSizeBytes = 2;
+    break;
+  case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT:
+    ArrayDesc.Format = HIP_AD_FORMAT_HALF;
+    PixelTypeSizeBytes = 2;
+    break;
+  case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32:
+    ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT32;
+    PixelTypeSizeBytes = 4;
+    break;
+  case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32:
+    ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT32;
+    PixelTypeSizeBytes = 4;
+    break;
+  case UR_IMAGE_CHANNEL_TYPE_FLOAT:
+    ArrayDesc.Format = HIP_AD_FORMAT_FLOAT;
+    PixelTypeSizeBytes = 4;
+    break;
+  default:
+    // urMemImageCreate given unsupported image_channel_data_type
+    return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+  }
+
+  // When a dimension isn't used image_desc has the size set to 1
+  size_t PixelSizeBytes =
+      PixelTypeSizeBytes * 4; // 4 is the only number of channels we support
+  size_t ImageSizeBytes = PixelSizeBytes * pImageDesc->width *
+                          pImageDesc->height * pImageDesc->depth;
+
+  ScopedContext Active(hContext);
+  hipArray *ImageArray;
+  Result = UR_CHECK_ERROR(hipArray3DCreate(
+      reinterpret_cast<hipCUarray *>(&ImageArray), &ArrayDesc));
+
+  try {
+    if (PerformInitialCopy) {
+      // We have to use a different copy function for each image dimensionality
+      if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
+        Result =
+            UR_CHECK_ERROR(hipMemcpyHtoA(ImageArray, 0, pHost, ImageSizeBytes));
+      } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
+        hip_Memcpy2D CpyDesc;
+        memset(&CpyDesc, 0, sizeof(CpyDesc));
+        CpyDesc.srcMemoryType = hipMemoryType::hipMemoryTypeHost;
+        CpyDesc.srcHost = pHost;
+        CpyDesc.dstMemoryType = hipMemoryType::hipMemoryTypeArray;
+        CpyDesc.dstArray = reinterpret_cast<hipCUarray>(ImageArray);
+        CpyDesc.WidthInBytes = PixelSizeBytes * pImageDesc->width;
+        CpyDesc.Height = pImageDesc->height;
+        Result = UR_CHECK_ERROR(hipMemcpyParam2D(&CpyDesc));
+      } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
+        HIP_MEMCPY3D CpyDesc;
+        memset(&CpyDesc, 0, sizeof(CpyDesc));
+        CpyDesc.srcMemoryType = hipMemoryType::hipMemoryTypeHost;
+        CpyDesc.srcHost = pHost;
+        CpyDesc.dstMemoryType = hipMemoryType::hipMemoryTypeArray;
+        CpyDesc.dstArray = reinterpret_cast<hipCUarray>(ImageArray);
+        CpyDesc.WidthInBytes = PixelSizeBytes * pImageDesc->width;
+        CpyDesc.Height = pImageDesc->height;
+        CpyDesc.Depth = pImageDesc->depth;
+        Result = UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc));
+      }
+    }
+
+    // HIP_RESOURCE_DESC is a union of different structs, shown here
+    // We need to fill it as described here to use it for a surface or texture
+    // HIP_RESOURCE_DESC::resType must be HIP_RESOURCE_TYPE_ARRAY and
+    // HIP_RESOURCE_DESC::res::array::hArray must be set to a valid HIP array
+    // handle.
+    // HIP_RESOURCE_DESC::flags must be set to zero
+
+    hipResourceDesc ImageResDesc;
+    ImageResDesc.res.array.array = ImageArray;
+    ImageResDesc.resType = hipResourceTypeArray;
+
+    hipSurfaceObject_t Surface;
+    Result = UR_CHECK_ERROR(hipCreateSurfaceObject(&Surface, &ImageResDesc));
+
+    auto URMemObj = std::unique_ptr<ur_mem_handle_t_>(new ur_mem_handle_t_{
+        hContext, ImageArray, Surface, flags, pImageDesc->type, pHost});
+
+    if (URMemObj == nullptr) {
+      return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+    }
+
+    *phMem = URMemObj.release();
+  } catch (ur_result_t Err) {
+    UR_CHECK_ERROR(hipFreeArray(ImageArray));
+    return Err;
+  } catch (...) {
+    UR_CHECK_ERROR(hipFreeArray(ImageArray));
+    return UR_RESULT_ERROR_UNKNOWN;
+  }
+  return Result;
+}
+
+/// \TODO Not implemented
+UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t,
+                                                      ur_image_info_t, size_t,
+                                                      void *, size_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) {
+  UR_ASSERT(hMem->getReferenceCount() > 0, UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+  hMem->incrementReferenceCount();
+  return UR_RESULT_SUCCESS;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/memory.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/memory.hpp
new file mode 100644
index 0000000000000..0219084d8b2c8
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/memory.hpp
@@ -0,0 +1,198 @@
+//===--------- context.cpp - HIP Adapter ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+#pragma once
+
+#include "common.hpp"
+#include <cassert>
+
+/// UR Mem mapping to HIP memory allocations, both data and texture/surface.
+/// \brief Represents non-SVM allocations on the HIP backend.
+/// Keeps tracks of all mapped regions used for Map/Unmap calls.
+/// Only one region can be active at the same time per allocation.
+struct ur_mem_handle_t_ {
+
+  // TODO: Move as much shared data up as possible
+  using ur_context = ur_context_handle_t_ *;
+  using ur_mem = ur_mem_handle_t_ *;
+
+  // Context where the memory object is accessible
+  ur_context Context;
+
+  /// Reference counting of the handler
+  std::atomic_uint32_t RefCount;
+  enum class Type { Buffer, Surface } MemType;
+
+  // Original mem flags passed
+  ur_mem_flags_t MemFlags;
+
+  /// A UR Memory object represents either plain memory allocations ("Buffers"
+  /// in OpenCL) or typed allocations ("Images" in OpenCL).
+  /// In HIP their API handlers are different. Whereas "Buffers" are allocated
+  /// as pointer-like structs, "Images" are stored in Textures or Surfaces.
+  /// This union allows implementation to use either from the same handler.
+  union MemImpl {
+    // Handler for plain, pointer-based HIP allocations
+    struct BufferMem {
+      using native_type = hipDeviceptr_t;
+
+      // If this allocation is a sub-buffer (i.e., a view on an existing
+      // allocation), this is the pointer to the parent handler structure
+      ur_mem Parent;
+      // HIP handler for the pointer
+      native_type Ptr;
+
+      /// Pointer associated with this device on the host
+      void *HostPtr;
+      /// Size of the allocation in bytes
+      size_t Size;
+      /// Size of the active mapped region.
+      size_t MapSize;
+      /// Offset of the active mapped region.
+      size_t MapOffset;
+      /// Pointer to the active mapped region, if any
+      void *MapPtr;
+      /// Original flags for the mapped region
+      ur_map_flags_t MapFlags;
+
+      /** AllocMode
+       * Classic: Just a normal buffer allocated on the device via hip malloc
+       * UseHostPtr: Use an address on the host for the device
+       * CopyIn: The data for the device comes from the host but the host
+       pointer is not available later for re-use
+       * AllocHostPtr: Uses pinned-memory allocation
+      */
+      enum class AllocMode {
+        Classic,
+        UseHostPtr,
+        CopyIn,
+        AllocHostPtr
+      } MemAllocMode;
+
+      native_type get() const noexcept { return Ptr; }
+
+      native_type getWithOffset(size_t Offset) const noexcept {
+        return reinterpret_cast<native_type>(reinterpret_cast<uint8_t *>(Ptr) +
+                                             Offset);
+      }
+
+      void *getVoid() const noexcept { return reinterpret_cast<void *>(Ptr); }
+
+      size_t getSize() const noexcept { return Size; }
+
+      void *getMapPtr() const noexcept { return MapPtr; }
+
+      size_t getMapSize() const noexcept { return MapSize; }
+
+      size_t getMapOffset() const noexcept { return MapOffset; }
+
+      /// Returns a pointer to data visible on the host that contains
+      /// the data on the device associated with this allocation.
+      /// The offset is used to index into the HIP allocation.
+      ///
+      void *mapToPtr(size_t Size, size_t Offset,
+                     ur_map_flags_t Flags) noexcept {
+        assert(MapPtr == nullptr);
+        MapSize = Size;
+        MapOffset = Offset;
+        MapFlags = Flags;
+        if (HostPtr) {
+          MapPtr = static_cast<char *>(HostPtr) + Offset;
+        } else {
+          // TODO: Allocate only what is needed based on the offset
+          MapPtr = static_cast<void *>(malloc(this->getSize()));
+        }
+        return MapPtr;
+      }
+
+      /// Detach the allocation from the host memory.
+      void unmap(void *) noexcept {
+        assert(MapPtr != nullptr);
+
+        if (MapPtr != HostPtr) {
+          free(MapPtr);
+        }
+        MapPtr = nullptr;
+        MapSize = 0;
+        MapOffset = 0;
+      }
+
+      ur_map_flags_t getMapFlags() const noexcept {
+        assert(MapPtr != nullptr);
+        return MapFlags;
+      }
+    } BufferMem;
+
+    // Handler data for surface object (i.e. Images)
+    struct SurfaceMem {
+      hipArray *Array;
+      hipSurfaceObject_t SurfObj;
+      ur_mem_type_t ImageType;
+
+      hipArray *getArray() const noexcept { return Array; }
+
+      hipSurfaceObject_t getSurface() const noexcept { return SurfObj; }
+
+      ur_mem_type_t getImageType() const noexcept { return ImageType; }
+    } SurfaceMem;
+  } Mem;
+
+  /// Constructs the UR MEM handler for a non-typed allocation ("buffer")
+  ur_mem_handle_t_(ur_context Ctxt, ur_mem Parent, ur_mem_flags_t MemFlags,
+                   MemImpl::BufferMem::AllocMode Mode, hipDeviceptr_t Ptr,
+                   void *HostPtr, size_t Size)
+      : Context{Ctxt}, RefCount{1}, MemType{Type::Buffer}, MemFlags{MemFlags} {
+    Mem.BufferMem.Ptr = Ptr;
+    Mem.BufferMem.Parent = Parent;
+    Mem.BufferMem.HostPtr = HostPtr;
+    Mem.BufferMem.Size = Size;
+    Mem.BufferMem.MapSize = 0;
+    Mem.BufferMem.MapOffset = 0;
+    Mem.BufferMem.MapPtr = nullptr;
+    Mem.BufferMem.MapFlags = UR_MAP_FLAG_WRITE;
+    Mem.BufferMem.MemAllocMode = Mode;
+    if (isSubBuffer()) {
+      urMemRetain(Mem.BufferMem.Parent);
+    } else {
+      urContextRetain(Context);
+    }
+  };
+
+  /// Constructs the UR allocation for an Image object
+  ur_mem_handle_t_(ur_context Ctxt, hipArray *Array, hipSurfaceObject_t Surf,
+                   ur_mem_flags_t MemFlags, ur_mem_type_t ImageType, void *)
+      : Context{Ctxt}, RefCount{1}, MemType{Type::Surface}, MemFlags{MemFlags} {
+    Mem.SurfaceMem.Array = Array;
+    Mem.SurfaceMem.ImageType = ImageType;
+    Mem.SurfaceMem.SurfObj = Surf;
+    urContextRetain(Context);
+  }
+
+  ~ur_mem_handle_t_() {
+    if (isBuffer() && isSubBuffer()) {
+      urMemRelease(Mem.BufferMem.Parent);
+      return;
+    }
+    urContextRelease(Context);
+  }
+
+  bool isBuffer() const noexcept { return MemType == Type::Buffer; }
+
+  bool isSubBuffer() const noexcept {
+    return (isBuffer() && (Mem.BufferMem.Parent != nullptr));
+  }
+
+  bool isImage() const noexcept { return MemType == Type::Surface; }
+
+  ur_context getContext() const noexcept { return Context; }
+
+  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
+
+  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
+
+  uint32_t getReferenceCount() const noexcept { return RefCount; }
+};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/platform.cpp
new file mode 100644
index 0000000000000..11f8fc55d44ce
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/platform.cpp
@@ -0,0 +1,153 @@
+//===--------- platform.cpp - HIP Adapter ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include "platform.hpp"
+
+hipEvent_t ur_platform_handle_t_::EvBase{nullptr};
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urPlatformGetInfo(ur_platform_handle_t, ur_platform_info_t propName,
+                  size_t propSize, void *pPropValue, size_t *pSizeRet) {
+  UrReturnHelper ReturnValue(propSize, pPropValue, pSizeRet);
+
+  switch (propName) {
+  case UR_PLATFORM_INFO_NAME:
+    return ReturnValue("AMD HIP BACKEND");
+  case UR_PLATFORM_INFO_VENDOR_NAME:
+    return ReturnValue("AMD Corporation");
+  case UR_PLATFORM_INFO_PROFILE:
+    return ReturnValue("FULL PROFILE");
+  case UR_PLATFORM_INFO_VERSION: {
+    std::string Version;
+    detail::ur::assertion(getHipVersionString(Version) == hipSuccess);
+    return ReturnValue(Version.c_str());
+  }
+  case UR_PLATFORM_INFO_BACKEND: {
+    return ReturnValue(UR_PLATFORM_BACKEND_HIP);
+  }
+  case UR_PLATFORM_INFO_EXTENSIONS: {
+    return ReturnValue("");
+  }
+  default:
+    return UR_RESULT_ERROR_INVALID_ENUMERATION;
+  }
+
+  return UR_RESULT_SUCCESS;
+}
+
+/// Obtains the HIP platform.
+/// There is only one HIP platform, and contains all devices on the system.
+/// Triggers the HIP Driver initialization (hipInit) the first time, so this
+/// must be the first UR API called.
+///
+/// However because multiple devices in a context is not currently supported,
+/// place each device in a separate platform.
+UR_APIEXPORT ur_result_t UR_APICALL
+urPlatformGet(uint32_t NumEntries, ur_platform_handle_t *phPlatforms,
+              uint32_t *pNumPlatforms) {
+
+  try {
+    static std::once_flag InitFlag;
+    static uint32_t NumPlatforms = 1;
+    static std::vector<ur_platform_handle_t_> PlatformIds;
+
+    UR_ASSERT(phPlatforms || pNumPlatforms, UR_RESULT_ERROR_INVALID_VALUE);
+    UR_ASSERT(!phPlatforms || NumEntries > 0, UR_RESULT_ERROR_INVALID_VALUE);
+
+    ur_result_t Result = UR_RESULT_SUCCESS;
+
+    std::call_once(
+        InitFlag,
+        [](ur_result_t &Err) {
+          if (hipInit(0) != hipSuccess) {
+            NumPlatforms = 0;
+            return;
+          }
+          int NumDevices = 0;
+          Err = UR_CHECK_ERROR(hipGetDeviceCount(&NumDevices));
+          if (NumDevices == 0) {
+            NumPlatforms = 0;
+            return;
+          }
+          try {
+            // make one platform per device
+            NumPlatforms = NumDevices;
+            PlatformIds.resize(NumDevices);
+
+            for (int i = 0; i < NumDevices; ++i) {
+              hipDevice_t Device;
+              Err = UR_CHECK_ERROR(hipDeviceGet(&Device, i));
+              PlatformIds[i].Devices.emplace_back(
+                  new ur_device_handle_t_{Device, &PlatformIds[i]});
+            }
+          } catch (const std::bad_alloc &) {
+            // Signal out-of-memory situation
+            for (int i = 0; i < NumDevices; ++i) {
+              PlatformIds[i].Devices.clear();
+            }
+            PlatformIds.clear();
+            Err = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+          } catch (...) {
+            // Clear and rethrow to allow retry
+            for (int i = 0; i < NumDevices; ++i) {
+              PlatformIds[i].Devices.clear();
+            }
+            PlatformIds.clear();
+            throw;
+          }
+        },
+        Result);
+
+    if (pNumPlatforms != nullptr) {
+      *pNumPlatforms = NumPlatforms;
+    }
+
+    if (phPlatforms != nullptr) {
+      for (unsigned i = 0; i < std::min(NumEntries, NumPlatforms); ++i) {
+        phPlatforms[i] = &PlatformIds[i];
+      }
+    }
+
+    return Result;
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
+  }
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urPlatformGetApiVersion(ur_platform_handle_t, ur_api_version_t *pVersion) {
+  *pVersion = UR_API_VERSION_CURRENT;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t) {
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) {
+  return UR_RESULT_SUCCESS;
+}
+
+// Get CUDA plugin specific backend option.
+// Current support is only for optimization options.
+// Return empty string for cuda.
+// TODO: Determine correct string to be passed.
+UR_APIEXPORT ur_result_t UR_APICALL
+urPlatformGetBackendOption(ur_platform_handle_t, const char *pFrontendOption,
+                           const char **ppPlatformOption) {
+  using namespace std::literals;
+  if (pFrontendOption == "-O0"sv || pFrontendOption == "-O1"sv ||
+      pFrontendOption == "-O2"sv || pFrontendOption == "-O3"sv ||
+      pFrontendOption == ""sv) {
+    *ppPlatformOption = "";
+    return UR_RESULT_SUCCESS;
+  }
+  return UR_RESULT_ERROR_INVALID_VALUE;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/platform.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/platform.hpp
new file mode 100644
index 0000000000000..86e24d952cc78
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/platform.hpp
@@ -0,0 +1,23 @@
+//===--------- platform.hpp - HIP Adapter ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+#pragma once
+
+#include "common.hpp"
+#include "device.hpp"
+
+#include <vector>
+
+/// A UR platform stores all known UR devices,
+///  in the HIP plugin this is just a vector of
+///  available devices since initialization is done
+///  when devices are used.
+///
+struct ur_platform_handle_t_ {
+  static hipEvent_t EvBase; // HIP event used as base counter
+  std::vector<std::unique_ptr<ur_device_handle_t_>> Devices;
+};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/program.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/program.cpp
new file mode 100644
index 0000000000000..a66c444c4d9f8
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/program.cpp
@@ -0,0 +1,301 @@
+//===--------- program.cpp - HIP Adapter ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include "program.hpp"
+
+ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Ctxt)
+    : Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1},
+      Context{Ctxt} {
+  urContextRetain(Context);
+}
+
+ur_program_handle_t_::~ur_program_handle_t_() { urContextRelease(Context); }
+
+ur_result_t ur_program_handle_t_::setBinary(const char *Source, size_t Length) {
+  // Do not re-set program binary data which has already been set as that will
+  // delete the old binary data.
+  UR_ASSERT(Binary == nullptr && BinarySizeInBytes == 0,
+            UR_RESULT_ERROR_INVALID_OPERATION);
+  Binary = Source;
+  BinarySizeInBytes = Length;
+  return UR_RESULT_SUCCESS;
+}
+
+ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {
+  if (BuildOptions) {
+    this->BuildOptions = BuildOptions;
+  }
+
+  constexpr const unsigned int NumberOfOptions = 4u;
+
+  hipJitOption Options[NumberOfOptions];
+  void *OptionVals[NumberOfOptions];
+
+  // Pass a buffer for info messages
+  Options[0] = hipJitOptionInfoLogBuffer;
+  OptionVals[0] = (void *)InfoLog;
+  // Pass the size of the info buffer
+  Options[1] = hipJitOptionInfoLogBufferSizeBytes;
+  OptionVals[1] = (void *)(long)MAX_LOG_SIZE;
+  // Pass a buffer for error message
+  Options[2] = hipJitOptionErrorLogBuffer;
+  OptionVals[2] = (void *)ErrorLog;
+  // Pass the size of the error buffer
+  Options[3] = hipJitOptionErrorLogBufferSizeBytes;
+  OptionVals[3] = (void *)(long)MAX_LOG_SIZE;
+
+  auto Result = UR_CHECK_ERROR(
+      hipModuleLoadDataEx(&Module, static_cast<const void *>(Binary),
+                          NumberOfOptions, Options, OptionVals));
+
+  const bool Success = (Result == UR_RESULT_SUCCESS);
+
+  BuildStatus =
+      Success ? UR_PROGRAM_BUILD_STATUS_SUCCESS : UR_PROGRAM_BUILD_STATUS_ERROR;
+
+  // If no exception, result is correct
+  return Success ? UR_RESULT_SUCCESS : UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE;
+}
+
+/// Finds kernel names by searching for entry points in the PTX source, as the
+/// HIP driver API doesn't expose an operation for this.
+/// Note: This is currently only being used by the SYCL program class for the
+///       has_kernel method, so an alternative would be to move the has_kernel
+///       query to UR and use hipModuleGetFunction to check for a kernel.
+ur_result_t getKernelNames(ur_program_handle_t) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+/// HIP will handle the PTX/HIPBIN binaries internally through hipModule_t
+/// object. So, urProgramCreateWithIL and urProgramCreateWithBinary are
+/// equivalent in terms of HIP adapter. See \ref urProgramCreateWithBinary.
+UR_APIEXPORT ur_result_t UR_APICALL
+urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL,
+                      size_t length, const ur_program_properties_t *pProperties,
+                      ur_program_handle_t *phProgram) {
+  ur_device_handle_t hDevice = hContext->getDevice();
+  const auto pBinary = reinterpret_cast<const uint8_t *>(pIL);
+
+  return urProgramCreateWithBinary(hContext, hDevice, length, pBinary,
+                                   pProperties, phProgram);
+}
+
+/// HIP will handle the PTX/HIPBIN binaries internally through a call to
+/// hipModuleLoadDataEx. So, urProgramCompile and urProgramBuild are equivalent
+/// in terms of HIP adapter. \TODO Implement asynchronous compilation
+UR_APIEXPORT ur_result_t UR_APICALL
+urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram,
+                 const char *pOptions) {
+  return urProgramBuild(hContext, hProgram, pOptions);
+}
+
+/// Loads the images from a UR program into a hipModule_t that can be
+/// used later on to extract functions (kernels).
+/// See \ref ur_program_handle_t for implementation details.
+UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t,
+                                                   ur_program_handle_t hProgram,
+                                                   const char *pOptions) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  try {
+    ScopedContext Active(hProgram->getContext());
+
+    hProgram->buildProgram(pOptions);
+
+  } catch (ur_result_t Err) {
+    Result = Err;
+  }
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urProgramLink(ur_context_handle_t, uint32_t,
+                                                  const ur_program_handle_t *,
+                                                  const char *,
+                                                  ur_program_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+/// Created a UR program object from a HIP program handle.
+/// TODO: Implement this.
+/// NOTE: The created UR object takes ownership of the native handle.
+///
+/// \param[in] hNativeProgram The native handle to create UR program object
+/// from. \param[in] hContext The UR context of the program. \param[out]
+/// phProgram Set to the UR program object created from native handle.
+///
+/// \return UR_RESULT_ERROR_UNSUPPORTED_FEATURE
+UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle(
+    ur_native_handle_t, ur_context_handle_t,
+    const ur_program_native_properties_t *, ur_program_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t,
+                      ur_program_build_info_t propName, size_t propSize,
+                      void *pPropValue, size_t *pPropSizeRet) {
+  // Ignore unused parameter
+  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
+
+  switch (propName) {
+  case UR_PROGRAM_BUILD_INFO_STATUS: {
+    return ReturnValue(hProgram->BuildStatus);
+  }
+  case UR_PROGRAM_BUILD_INFO_OPTIONS:
+    return ReturnValue(hProgram->BuildOptions.c_str());
+  case UR_PROGRAM_BUILD_INFO_LOG:
+    return ReturnValue(hProgram->InfoLog, hProgram->MAX_LOG_SIZE);
+  default:
+    break;
+  }
+  return UR_RESULT_ERROR_INVALID_ENUMERATION;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName,
+                 size_t propSize, void *pProgramInfo, size_t *pPropSizeRet) {
+  UrReturnHelper ReturnValue(propSize, pProgramInfo, pPropSizeRet);
+
+  switch (propName) {
+  case UR_PROGRAM_INFO_REFERENCE_COUNT:
+    return ReturnValue(hProgram->getReferenceCount());
+  case UR_PROGRAM_INFO_CONTEXT:
+    return ReturnValue(hProgram->Context);
+  case UR_PROGRAM_INFO_NUM_DEVICES:
+    return ReturnValue(1u);
+  case UR_PROGRAM_INFO_DEVICES:
+    return ReturnValue(&hProgram->Context->DeviceId, 1);
+  case UR_PROGRAM_INFO_SOURCE:
+    return ReturnValue(hProgram->Binary);
+  case UR_PROGRAM_INFO_BINARY_SIZES:
+    return ReturnValue(&hProgram->BinarySizeInBytes, 1);
+  case UR_PROGRAM_INFO_BINARIES:
+    return ReturnValue(&hProgram->Binary, 1);
+  case UR_PROGRAM_INFO_KERNEL_NAMES:
+    return getKernelNames(hProgram);
+  default:
+    break;
+  }
+  return UR_RESULT_ERROR_INVALID_ENUMERATION;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urProgramRetain(ur_program_handle_t hProgram) {
+  UR_ASSERT(hProgram->getReferenceCount() > 0, UR_RESULT_ERROR_INVALID_PROGRAM);
+  hProgram->incrementReferenceCount();
+  return UR_RESULT_SUCCESS;
+}
+
+/// Decreases the reference count of a ur_program_handle_t object.
+/// When the reference count reaches 0, it unloads the module from
+/// the context.
+UR_APIEXPORT ur_result_t UR_APICALL
+urProgramRelease(ur_program_handle_t hProgram) {
+  // double delete or someone is messing with the ref count.
+  // either way, cannot safely proceed.
+  UR_ASSERT(hProgram->getReferenceCount() != 0,
+            UR_RESULT_ERROR_INVALID_PROGRAM);
+
+  // decrement ref count. If it is 0, delete the program.
+  if (hProgram->decrementReferenceCount() == 0) {
+
+    std::unique_ptr<ur_program_handle_t_> ProgramPtr{hProgram};
+
+    ur_result_t Result = UR_RESULT_ERROR_INVALID_PROGRAM;
+
+    try {
+      ScopedContext Active(hProgram->getContext());
+      auto HIPModule = hProgram->get();
+      Result = UR_CHECK_ERROR(hipModuleUnload(HIPModule));
+    } catch (...) {
+      Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
+    }
+
+    return Result;
+  }
+
+  return UR_RESULT_SUCCESS;
+}
+
+/// Gets the native HIP handle of a UR program object
+///
+/// \param[in] hProgram The UR program to get the native HIP object of.
+/// \param[out] phNativeProgram Set to the native handle of the UR program
+/// object.
+///
+/// \return UR_RESULT_SUCCESS
+UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle(
+    ur_program_handle_t hProgram, ur_native_handle_t *phNativeProgram) {
+  *phNativeProgram = reinterpret_cast<ur_native_handle_t>(hProgram->get());
+  return UR_RESULT_SUCCESS;
+}
+
+/// Loads images from a list of PTX or HIPBin binaries.
+/// Note: No calls to HIP driver API in this function, only store binaries
+/// for later.
+///
+/// Note: Only supports one device
+UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
+    ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
+    const uint8_t *pBinary, const ur_program_properties_t *,
+    ur_program_handle_t *phProgram) {
+  UR_ASSERT(pBinary != nullptr && size != 0, UR_RESULT_ERROR_INVALID_BINARY);
+  UR_ASSERT(hContext->getDevice()->get() == hDevice->get(),
+            UR_RESULT_ERROR_INVALID_CONTEXT);
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  std::unique_ptr<ur_program_handle_t_> RetProgram{
+      new ur_program_handle_t_{hContext}};
+
+  // TODO: Set metadata here and use reqd_work_group_size information.
+  // See urProgramCreateWithBinary in CUDA adapter.
+
+  auto pBinary_string = reinterpret_cast<const char *>(pBinary);
+  if (size == 0) {
+    size = strlen(pBinary_string) + 1;
+  }
+
+  UR_ASSERT(size, UR_RESULT_ERROR_INVALID_SIZE);
+
+  Result = RetProgram->setBinary(pBinary_string, size);
+  UR_ASSERT(Result == UR_RESULT_SUCCESS, Result);
+
+  *phProgram = RetProgram.release();
+
+  return Result;
+}
+
+// This entry point is only used for native specialization constants (SPIR-V),
+// and the HIP plugin is AOT only so this entry point is not supported.
+UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants(
+    ur_program_handle_t, uint32_t, const ur_specialization_constant_info_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer(
+    ur_device_handle_t hDevice, ur_program_handle_t hProgram,
+    const char *pFunctionName, void **ppFunctionPointer) {
+  // Check if device passed is the same the device bound to the context
+  UR_ASSERT(hDevice == hProgram->getContext()->getDevice(),
+            UR_RESULT_ERROR_INVALID_DEVICE);
+
+  hipFunction_t Func;
+  hipError_t Ret = hipModuleGetFunction(&Func, hProgram->get(), pFunctionName);
+  *ppFunctionPointer = Func;
+  ur_result_t Result = UR_RESULT_SUCCESS;
+
+  if (Ret != hipSuccess && Ret != hipErrorNotFound)
+    Result = UR_CHECK_ERROR(Ret);
+  if (Ret == hipErrorNotFound) {
+    *ppFunctionPointer = 0;
+    Result = UR_RESULT_ERROR_INVALID_FUNCTION_NAME;
+  }
+
+  return Result;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/program.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/program.hpp
new file mode 100644
index 0000000000000..9c233dbd99598
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/program.hpp
@@ -0,0 +1,46 @@
+//===--------- program.hpp - HIP Adapter ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+#pragma once
+
+#include <ur_api.h>
+
+#include <atomic>
+
+#include "context.hpp"
+
+/// Implementation of UR Program on HIP Module object
+struct ur_program_handle_t_ {
+  using native_type = hipModule_t;
+  native_type Module;
+  const char *Binary;
+  size_t BinarySizeInBytes;
+  std::atomic_uint32_t RefCount;
+  ur_context_handle_t Context;
+
+  constexpr static size_t MAX_LOG_SIZE = 8192u;
+
+  char ErrorLog[MAX_LOG_SIZE], InfoLog[MAX_LOG_SIZE];
+  std::string BuildOptions;
+  ur_program_build_status_t BuildStatus = UR_PROGRAM_BUILD_STATUS_NONE;
+
+  ur_program_handle_t_(ur_context_handle_t Ctxt);
+  ~ur_program_handle_t_();
+
+  ur_result_t setBinary(const char *Binary, size_t BinarySizeInBytes);
+
+  ur_result_t buildProgram(const char *BuildOptions);
+  ur_context_handle_t getContext() const { return Context; };
+
+  native_type get() const noexcept { return Module; };
+
+  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
+
+  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
+
+  uint32_t getReferenceCount() const noexcept { return RefCount; }
+};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/queue.cpp
new file mode 100644
index 0000000000000..19447bcf8ae93
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/queue.cpp
@@ -0,0 +1,270 @@
+//===--------- queue.cpp - HIP Adapter ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include "queue.hpp"
+#include "context.hpp"
+#include "event.hpp"
+
+void ur_queue_handle_t_::computeStreamWaitForBarrierIfNeeded(
+    hipStream_t Stream, uint32_t Stream_i) {
+  if (BarrierEvent && !ComputeAppliedBarrier[Stream_i]) {
+    UR_CHECK_ERROR(hipStreamWaitEvent(Stream, BarrierEvent, 0));
+    ComputeAppliedBarrier[Stream_i] = true;
+  }
+}
+
+void ur_queue_handle_t_::transferStreamWaitForBarrierIfNeeded(
+    hipStream_t Stream, uint32_t Stream_i) {
+  if (BarrierEvent && !TransferAppliedBarrier[Stream_i]) {
+    UR_CHECK_ERROR(hipStreamWaitEvent(Stream, BarrierEvent, 0));
+    TransferAppliedBarrier[Stream_i] = true;
+  }
+}
+
+hipStream_t ur_queue_handle_t_::getNextComputeStream(uint32_t *StreamToken) {
+  uint32_t Stream_i;
+  uint32_t Token;
+  while (true) {
+    if (NumComputeStreams < ComputeStreams.size()) {
+      // the check above is for performance - so as not to lock mutex every time
+      std::lock_guard<std::mutex> guard(ComputeStreamMutex);
+      // The second check is done after mutex is locked so other threads can not
+      // change NumComputeStreams after that
+      if (NumComputeStreams < ComputeStreams.size()) {
+        UR_CHECK_ERROR(hipStreamCreateWithFlags(
+            &ComputeStreams[NumComputeStreams++], Flags));
+      }
+    }
+    Token = ComputeStreamIdx++;
+    Stream_i = Token % ComputeStreams.size();
+    // if a stream has been reused before it was next selected round-robin
+    // fashion, we want to delay its next use and instead select another one
+    // that is more likely to have completed all the enqueued work.
+    if (DelayCompute[Stream_i]) {
+      DelayCompute[Stream_i] = false;
+    } else {
+      break;
+    }
+  }
+  if (StreamToken) {
+    *StreamToken = Token;
+  }
+  hipStream_t Res = ComputeStreams[Stream_i];
+  computeStreamWaitForBarrierIfNeeded(Res, Stream_i);
+  return Res;
+}
+
+hipStream_t ur_queue_handle_t_::getNextComputeStream(
+    uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
+    ur_stream_quard &Guard, uint32_t *StreamToken) {
+  for (uint32_t i = 0; i < NumEventsInWaitList; i++) {
+    uint32_t Token = EventWaitList[i]->getComputeStreamToken();
+    if (EventWaitList[i]->getQueue() == this && canReuseStream(Token)) {
+      std::unique_lock<std::mutex> ComputeSyncGuard(ComputeStreamSyncMutex);
+      // redo the check after lock to avoid data races on
+      // LastSyncComputeStreams
+      if (canReuseStream(Token)) {
+        uint32_t Stream_i = Token % DelayCompute.size();
+        DelayCompute[Stream_i] = true;
+        if (StreamToken) {
+          *StreamToken = Token;
+        }
+        Guard = ur_stream_quard{std::move(ComputeSyncGuard)};
+        hipStream_t Res = EventWaitList[i]->getStream();
+        computeStreamWaitForBarrierIfNeeded(Res, Stream_i);
+        return Res;
+      }
+    }
+  }
+  Guard = {};
+  return getNextComputeStream(StreamToken);
+}
+
+hipStream_t ur_queue_handle_t_::getNextTransferStream() {
+  if (TransferStreams.empty()) { // for example in in-order queue
+    return getNextComputeStream();
+  }
+  if (NumTransferStreams < TransferStreams.size()) {
+    // the check above is for performance - so as not to lock mutex every time
+    std::lock_guard<std::mutex> Guard(TransferStreamMutex);
+    // The second check is done after mutex is locked so other threads can not
+    // change NumTransferStreams after that
+    if (NumTransferStreams < TransferStreams.size()) {
+      UR_CHECK_ERROR(hipStreamCreateWithFlags(
+          &TransferStreams[NumTransferStreams++], Flags));
+    }
+  }
+  uint32_t Stream_i = TransferStreamIdx++ % TransferStreams.size();
+  hipStream_t Res = TransferStreams[Stream_i];
+  transferStreamWaitForBarrierIfNeeded(Res, Stream_i);
+  return Res;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice,
+              const ur_queue_properties_t *pProps, ur_queue_handle_t *phQueue) {
+  try {
+    std::unique_ptr<ur_queue_handle_t_> QueueImpl{nullptr};
+
+    if (hContext->getDevice() != hDevice) {
+      *phQueue = nullptr;
+      return UR_RESULT_ERROR_INVALID_DEVICE;
+    }
+
+    unsigned int Flags = 0;
+
+    const bool IsOutOfOrder =
+        pProps ? pProps->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
+               : false;
+
+    std::vector<hipStream_t> ComputeHipStreams(
+        IsOutOfOrder ? ur_queue_handle_t_::DefaultNumComputeStreams : 1);
+    std::vector<hipStream_t> TransferHipStreams(
+        IsOutOfOrder ? ur_queue_handle_t_::DefaultNumTransferStreams : 0);
+
+    QueueImpl = std::unique_ptr<ur_queue_handle_t_>(new ur_queue_handle_t_{
+        std::move(ComputeHipStreams), std::move(TransferHipStreams), hContext,
+        hDevice, Flags, pProps ? pProps->flags : 0});
+
+    *phQueue = QueueImpl.release();
+
+    return UR_RESULT_SUCCESS;
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
+  }
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue,
+                                                   ur_queue_info_t propName,
+                                                   size_t propValueSize,
+                                                   void *pPropValue,
+                                                   size_t *pPropSizeRet) {
+  UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet);
+  switch (propName) {
+  case UR_QUEUE_INFO_CONTEXT:
+    return ReturnValue(hQueue->Context);
+  case UR_QUEUE_INFO_DEVICE:
+    return ReturnValue(hQueue->Device);
+  case UR_QUEUE_INFO_REFERENCE_COUNT:
+    return ReturnValue(hQueue->getReferenceCount());
+  case UR_QUEUE_INFO_FLAGS:
+    return ReturnValue(hQueue->URFlags);
+  case UR_QUEUE_INFO_EMPTY: {
+    bool IsReady = hQueue->allOf([](hipStream_t S) -> bool {
+      const hipError_t Ret = hipStreamQuery(S);
+      if (Ret == hipSuccess)
+        return true;
+
+      try {
+        UR_CHECK_ERROR(Ret);
+      } catch (...) {
+        return false;
+      }
+
+      return false;
+    });
+    return ReturnValue(IsReady);
+  }
+  default:
+    break;
+  }
+  return {};
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) {
+  UR_ASSERT(hQueue->getReferenceCount() > 0, UR_RESULT_ERROR_INVALID_QUEUE);
+
+  hQueue->incrementReferenceCount();
+  return UR_RESULT_SUCCESS;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) {
+  if (hQueue->decrementReferenceCount() > 0) {
+    return UR_RESULT_SUCCESS;
+  }
+
+  try {
+    std::unique_ptr<ur_queue_handle_t_> QueueImpl(hQueue);
+
+    ScopedContext Active(hQueue->getContext());
+
+    hQueue->forEachStream([](hipStream_t S) {
+      UR_CHECK_ERROR(hipStreamSynchronize(S));
+      UR_CHECK_ERROR(hipStreamDestroy(S));
+    });
+
+    return UR_RESULT_SUCCESS;
+  } catch (ur_result_t Err) {
+    return Err;
+  } catch (...) {
+    return UR_RESULT_ERROR_OUT_OF_RESOURCES;
+  }
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) {
+  // set default result to a negative result (avoid false-positve tests)
+  ur_result_t Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
+
+  try {
+
+    ScopedContext Active(hQueue->getContext());
+
+    hQueue->syncStreams<true>([&Result](hipStream_t S) {
+      Result = UR_CHECK_ERROR(hipStreamSynchronize(S));
+    });
+
+  } catch (ur_result_t Err) {
+    Result = Err;
+  } catch (...) {
+    Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
+  }
+
+  return Result;
+}
+
+// There is no HIP counterpart for queue flushing and we don't run into the
+// same problem of having to flush cross-queue dependencies as some of the
+// other plugins, so it can be left as no-op.
+UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t) {
+  return UR_RESULT_SUCCESS;
+}
+
+/// Gets the native HIP handle of a UR queue object
+///
+/// \param[in] hQueue The UR queue to get the native HIP object of.
+/// \param[out] phNativeQueue Set to the native handle of the UR queue object.
+///
+/// \return UR_RESULT_SUCCESS
+UR_APIEXPORT ur_result_t UR_APICALL
+urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *,
+                       ur_native_handle_t *phNativeQueue) {
+  ScopedContext Active(hQueue->getContext());
+  *phNativeQueue =
+      reinterpret_cast<ur_native_handle_t>(hQueue->getNextComputeStream());
+  return UR_RESULT_SUCCESS;
+}
+
+/// Created a UR queue object from a HIP queue handle.
+/// TODO: Implement this.
+/// NOTE: The created UR object takes ownership of the native handle.
+///
+/// \param[in] hNativeQueue The native handle to create UR queue object from.
+/// \param[in] hContext is the UR context of the queue.
+/// \param[out] phQueue Set to the UR queue object created from native handle.
+/// \param pProperties->isNativeHandleOwned tells if SYCL RT should assume the
+/// ownership of
+///        the native handle, if it can.
+///
+/// \return UR_RESULT_ERROR_UNSUPPORTED_FEATURE
+UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
+    ur_native_handle_t, ur_context_handle_t, ur_device_handle_t,
+    const ur_queue_native_properties_t *, ur_queue_handle_t *) {
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/queue.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/queue.hpp
new file mode 100644
index 0000000000000..ac8aeaf37c373
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/queue.hpp
@@ -0,0 +1,238 @@
+//===--------- queue.hpp - HIP Adapter -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+#pragma once
+
+#include "common.hpp"
+
+using ur_stream_quard = std::unique_lock<std::mutex>;
+
+/// UR queue mapping on to hipStream_t objects.
+///
+struct ur_queue_handle_t_ {
+  using native_type = hipStream_t;
+  static constexpr int DefaultNumComputeStreams = 64;
+  static constexpr int DefaultNumTransferStreams = 16;
+
+  std::vector<native_type> ComputeStreams;
+  std::vector<native_type> TransferStreams;
+  // DelayCompute keeps track of which streams have been recently reused and
+  // their next use should be delayed. If a stream has been recently reused it
+  // will be skipped the next time it would be selected round-robin style. When
+  // skipped, its delay flag is cleared.
+  std::vector<bool> DelayCompute;
+  // keep track of which streams have applied barrier
+  std::vector<bool> ComputeAppliedBarrier;
+  std::vector<bool> TransferAppliedBarrier;
+  ur_context_handle_t Context;
+  ur_device_handle_t Device;
+  hipEvent_t BarrierEvent = nullptr;
+  hipEvent_t BarrierTmpEvent = nullptr;
+  std::atomic_uint32_t RefCount;
+  std::atomic_uint32_t EventCount;
+  std::atomic_uint32_t ComputeStreamIdx;
+  std::atomic_uint32_t TransferStreamIdx;
+  unsigned int NumComputeStreams;
+  unsigned int NumTransferStreams;
+  unsigned int LastSyncComputeStreams;
+  unsigned int LastSyncTransferStreams;
+  unsigned int Flags;
+  ur_queue_flags_t URFlags;
+  // When ComputeStreamSyncMutex and ComputeStreamMutex both need to be
+  // locked at the same time, ComputeStreamSyncMutex should be locked first
+  // to avoid deadlocks
+  std::mutex ComputeStreamSyncMutex;
+  std::mutex ComputeStreamMutex;
+  std::mutex TransferStreamMutex;
+  std::mutex BarrierMutex;
+
+  ur_queue_handle_t_(std::vector<native_type> &&ComputeStreams,
+                     std::vector<native_type> &&TransferStreams,
+                     ur_context_handle_t Context, ur_device_handle_t Device,
+                     unsigned int Flags, ur_queue_flags_t URFlags)
+      : ComputeStreams{std::move(ComputeStreams)},
+        TransferStreams{std::move(TransferStreams)},
+        DelayCompute(this->ComputeStreams.size(), false),
+        ComputeAppliedBarrier(this->ComputeStreams.size()),
+        TransferAppliedBarrier(this->TransferStreams.size()), Context{Context},
+        Device{Device}, RefCount{1}, EventCount{0}, ComputeStreamIdx{0},
+        TransferStreamIdx{0}, NumComputeStreams{0}, NumTransferStreams{0},
+        LastSyncComputeStreams{0}, LastSyncTransferStreams{0}, Flags(Flags),
+        URFlags(URFlags) {
+    urContextRetain(Context);
+    urDeviceRetain(Device);
+  }
+
+  ~ur_queue_handle_t_() {
+    urContextRelease(Context);
+    urDeviceRelease(Device);
+  }
+
+  void computeStreamWaitForBarrierIfNeeded(hipStream_t Stream,
+                                           uint32_t Stream_i);
+  void transferStreamWaitForBarrierIfNeeded(hipStream_t Stream,
+                                            uint32_t Stream_i);
+
+  // getNextCompute/TransferStream() functions return streams from
+  // appropriate pools in round-robin fashion
+  native_type getNextComputeStream(uint32_t *StreamToken = nullptr);
+  // this overload tries select a stream that was used by one of dependencies.
+  // If that is not possible returns a new stream. If a stream is reused it
+  // returns a lock that needs to remain locked as long as the stream is in use
+  native_type getNextComputeStream(uint32_t NumEventsInWaitList,
+                                   const ur_event_handle_t *EventWaitList,
+                                   ur_stream_quard &Guard,
+                                   uint32_t *StreamToken = nullptr);
+  native_type getNextTransferStream();
+  native_type get() { return getNextComputeStream(); };
+
+  bool hasBeenSynchronized(uint32_t StreamToken) {
+    // stream token not associated with one of the compute streams
+    if (StreamToken == std::numeric_limits<uint32_t>::max()) {
+      return false;
+    }
+    return LastSyncComputeStreams > StreamToken;
+  }
+
+  bool canReuseStream(uint32_t StreamToken) {
+    // stream token not associated with one of the compute streams
+    if (StreamToken == std::numeric_limits<uint32_t>::max()) {
+      return false;
+    }
+    // If the command represented by the stream token was not the last command
+    // enqueued to the stream we can not reuse the stream - we need to allow for
+    // commands enqueued after it and the one we are about to enqueue to run
+    // concurrently
+    bool IsLastCommand =
+        (ComputeStreamIdx - StreamToken) <= ComputeStreams.size();
+    // If there was a barrier enqueued to the queue after the command
+    // represented by the stream token we should not reuse the stream, as we can
+    // not take that stream into account for the bookkeeping for the next
+    // barrier - such a stream would not be synchronized with. Performance-wise
+    // it does not matter that we do not reuse the stream, as the work
+    // represented by the stream token is guaranteed to be complete by the
+    // barrier before any work we are about to enqueue to the stream will start,
+    // so the event does not need to be synchronized with.
+    return IsLastCommand && !hasBeenSynchronized(StreamToken);
+  }
+
+  template <typename T> bool allOf(T &&F) {
+    {
+      std::lock_guard<std::mutex> ComputeGuard(ComputeStreamMutex);
+      unsigned int End = std::min(
+          static_cast<unsigned int>(ComputeStreams.size()), NumComputeStreams);
+      if (!std::all_of(ComputeStreams.begin(), ComputeStreams.begin() + End, F))
+        return false;
+    }
+    {
+      std::lock_guard<std::mutex> TransferGuard(TransferStreamMutex);
+      unsigned int End =
+          std::min(static_cast<unsigned int>(TransferStreams.size()),
+                   NumTransferStreams);
+      if (!std::all_of(TransferStreams.begin(), TransferStreams.begin() + End,
+                       F))
+        return false;
+    }
+    return true;
+  }
+
+  template <typename T> void forEachStream(T &&F) {
+    {
+      std::lock_guard<std::mutex> ComputeGuard(ComputeStreamMutex);
+      unsigned int End = std::min(
+          static_cast<unsigned int>(ComputeStreams.size()), NumComputeStreams);
+      for (unsigned int i = 0; i < End; i++) {
+        F(ComputeStreams[i]);
+      }
+    }
+    {
+      std::lock_guard<std::mutex> TransferGuard(TransferStreamMutex);
+      unsigned int End =
+          std::min(static_cast<unsigned int>(TransferStreams.size()),
+                   NumTransferStreams);
+      for (unsigned int i = 0; i < End; i++) {
+        F(TransferStreams[i]);
+      }
+    }
+  }
+
+  template <bool ResetUsed = false, typename T> void syncStreams(T &&F) {
+    auto SyncCompute = [&F, &Streams = ComputeStreams, &Delay = DelayCompute](
+                           unsigned int Start, unsigned int Stop) {
+      for (unsigned int i = Start; i < Stop; i++) {
+        F(Streams[i]);
+        Delay[i] = false;
+      }
+    };
+    auto SyncTransfer = [&F, &Streams = TransferStreams](unsigned int Start,
+                                                         unsigned int Stop) {
+      for (unsigned int i = Start; i < Stop; i++) {
+        F(Streams[i]);
+      }
+    };
+    {
+      unsigned int Size = static_cast<unsigned int>(ComputeStreams.size());
+      std::lock_guard<std::mutex> ComputeSyncGuard(ComputeStreamSyncMutex);
+      std::lock_guard<std::mutex> ComputeGuard(ComputeStreamMutex);
+      unsigned int Start = LastSyncComputeStreams;
+      unsigned int End = NumComputeStreams < Size ? NumComputeStreams
+                                                  : ComputeStreamIdx.load();
+      if (End - Start >= Size) {
+        SyncCompute(0, Size);
+      } else {
+        Start %= Size;
+        End %= Size;
+        if (Start < End) {
+          SyncCompute(Start, End);
+        } else {
+          SyncCompute(Start, Size);
+          SyncCompute(0, End);
+        }
+      }
+      if (ResetUsed) {
+        LastSyncComputeStreams = End;
+      }
+    }
+    {
+      unsigned int Size = static_cast<unsigned int>(TransferStreams.size());
+      if (!Size) {
+        return;
+      }
+      std::lock_guard<std::mutex> TransferGuard(TransferStreamMutex);
+      unsigned int Start = LastSyncTransferStreams;
+      unsigned int End = NumTransferStreams < Size ? NumTransferStreams
+                                                   : TransferStreamIdx.load();
+      if (End - Start >= Size) {
+        SyncTransfer(0, Size);
+      } else {
+        Start %= Size;
+        End %= Size;
+        if (Start < End) {
+          SyncTransfer(Start, End);
+        } else {
+          SyncTransfer(Start, Size);
+          SyncTransfer(0, End);
+        }
+      }
+      if (ResetUsed) {
+        LastSyncTransferStreams = End;
+      }
+    }
+  }
+
+  ur_context_handle_t getContext() const { return Context; };
+
+  ur_device_handle_t getDevice() const { return Device; };
+
+  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
+
+  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
+
+  uint32_t getReferenceCount() const noexcept { return RefCount; }
+
+  uint32_t getNextEventId() noexcept { return ++EventCount; }
+};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.cpp
new file mode 100644
index 0000000000000..8c9464aa9a587
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.cpp
@@ -0,0 +1,80 @@
+//===--------- sampler.cpp - HIP Adapter ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include "sampler.hpp"
+#include "common.hpp"
+
+ur_result_t urSamplerCreate(ur_context_handle_t hContext,
+                            const ur_sampler_desc_t *pDesc,
+                            ur_sampler_handle_t *phSampler) {
+  std::unique_ptr<ur_sampler_handle_t_> RetImplSampl{
+      new ur_sampler_handle_t_(hContext)};
+
+  if (pDesc && pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) {
+    RetImplSampl->Props |= pDesc->normalizedCoords;
+    RetImplSampl->Props |= pDesc->filterMode << 1;
+    RetImplSampl->Props |= pDesc->addressingMode << 2;
+  } else {
+    // Set default values
+    RetImplSampl->Props |= true; // Normalized Coords
+    RetImplSampl->Props |= UR_SAMPLER_ADDRESSING_MODE_CLAMP << 2;
+  }
+
+  *phSampler = RetImplSampl.release();
+  return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler,
+                             ur_sampler_info_t propName, size_t propValueSize,
+                             void *pPropValue, size_t *pPropSizeRet) {
+  UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet);
+
+  switch (propName) {
+  case UR_SAMPLER_INFO_REFERENCE_COUNT:
+    return ReturnValue(hSampler->getReferenceCount());
+  case UR_SAMPLER_INFO_CONTEXT:
+    return ReturnValue(hSampler->Context);
+  case UR_SAMPLER_INFO_NORMALIZED_COORDS: {
+    bool NormCoordsProp = static_cast<bool>(hSampler->Props);
+    return ReturnValue(NormCoordsProp);
+  }
+  case UR_SAMPLER_INFO_FILTER_MODE: {
+    auto FilterProp =
+        static_cast<ur_sampler_filter_mode_t>((hSampler->Props >> 1) & 0x1);
+    return ReturnValue(FilterProp);
+  }
+  case UR_SAMPLER_INFO_ADDRESSING_MODE: {
+    auto AddressingProp =
+        static_cast<ur_sampler_addressing_mode_t>(hSampler->Props >> 2);
+    return ReturnValue(AddressingProp);
+  }
+  default:
+    return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
+  }
+  return {};
+}
+
+ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler) {
+  hSampler->incrementReferenceCount();
+  return UR_RESULT_SUCCESS;
+}
+
+ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler) {
+  // double delete or someone is messing with the ref count.
+  // either way, cannot safely proceed.
+  detail::ur::assertion(
+      hSampler->getReferenceCount() != 0,
+      "Reference count overflow detected in urSamplerRelease.");
+
+  // decrement ref count. If it is 0, delete the sampler.
+  if (hSampler->decrementReferenceCount() == 0) {
+    delete hSampler;
+  }
+
+  return UR_RESULT_SUCCESS;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.hpp b/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.hpp
new file mode 100644
index 0000000000000..b1c98f0171741
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/sampler.hpp
@@ -0,0 +1,31 @@
+//===--------- sampler.hpp - HIP Adapter ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include <ur/ur.hpp>
+
+#include "context.hpp"
+
+/// Implementation of samplers for HIP
+///
+/// Sampler property layout:
+/// | 31 30 ... 6 5 |      4 3 2      |     1      |         0        |
+/// |      N/A      | addressing mode | fiter mode | normalize coords |
+struct ur_sampler_handle_t_ {
+  std::atomic_uint32_t RefCount;
+  uint32_t Props;
+  ur_context_handle_t Context;
+
+  ur_sampler_handle_t_(ur_context_handle_t Context)
+      : RefCount(1), Props(0), Context(Context) {}
+
+  uint32_t incrementReferenceCount() noexcept { return ++RefCount; }
+
+  uint32_t decrementReferenceCount() noexcept { return --RefCount; }
+
+  uint32_t getReferenceCount() const noexcept { return RefCount; }
+};
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/ur_interface_loader.cpp
new file mode 100644
index 0000000000000..580b9916fb485
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/ur_interface_loader.cpp
@@ -0,0 +1,304 @@
+//===--------- ur_interface_loader.cpp - Unified Runtime  ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include <ur_api.h>
+#include <ur_ddi.h>
+
+namespace {
+
+// TODO - this is a duplicate of what is in the L0 plugin
+// We should move this to somewhere common
+ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) {
+  if (pDdiTable == nullptr) {
+    return UR_RESULT_ERROR_INVALID_NULL_POINTER;
+  }
+  // Pre 1.0 we enforce that loader and adapter must have the same version.
+  // Post 1.0 only a major version match should be required.
+  if (version != UR_API_VERSION_CURRENT) {
+    return UR_RESULT_ERROR_UNSUPPORTED_VERSION;
+  }
+  return UR_RESULT_SUCCESS;
+}
+} // namespace
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable(
+    ur_api_version_t version, ur_platform_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnCreateWithNativeHandle = nullptr;
+  pDdiTable->pfnGet = urPlatformGet;
+  pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion;
+  pDdiTable->pfnGetInfo = urPlatformGetInfo;
+  pDdiTable->pfnGetNativeHandle = nullptr;
+  pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable(
+    ur_api_version_t version, ur_context_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnCreate = urContextCreate;
+  pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle;
+  pDdiTable->pfnGetInfo = urContextGetInfo;
+  pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle;
+  pDdiTable->pfnRelease = urContextRelease;
+  pDdiTable->pfnRetain = urContextRetain;
+  pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable(
+    ur_api_version_t version, ur_event_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle;
+  pDdiTable->pfnGetInfo = urEventGetInfo;
+  pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle;
+  pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo;
+  pDdiTable->pfnRelease = urEventRelease;
+  pDdiTable->pfnRetain = urEventRetain;
+  pDdiTable->pfnSetCallback = urEventSetCallback;
+  pDdiTable->pfnWait = urEventWait;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable(
+    ur_api_version_t version, ur_program_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnBuild = urProgramBuild;
+  pDdiTable->pfnCompile = urProgramCompile;
+  pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary;
+  pDdiTable->pfnCreateWithIL = urProgramCreateWithIL;
+  pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle;
+  pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo;
+  pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer;
+  pDdiTable->pfnGetInfo = urProgramGetInfo;
+  pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle;
+  pDdiTable->pfnLink = urProgramLink;
+  pDdiTable->pfnRelease = urProgramRelease;
+  pDdiTable->pfnRetain = urProgramRetain;
+  pDdiTable->pfnSetSpecializationConstants =
+      urProgramSetSpecializationConstants;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
+    ur_api_version_t version, ur_kernel_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnCreate = urKernelCreate;
+  pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle;
+  pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo;
+  pDdiTable->pfnGetInfo = urKernelGetInfo;
+  pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle;
+  pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo;
+  pDdiTable->pfnRelease = urKernelRelease;
+  pDdiTable->pfnRetain = urKernelRetain;
+  pDdiTable->pfnSetArgLocal = nullptr;
+  pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj;
+  pDdiTable->pfnSetArgPointer = urKernelSetArgPointer;
+  pDdiTable->pfnSetArgSampler = urKernelSetArgSampler;
+  pDdiTable->pfnSetArgValue = urKernelSetArgValue;
+  pDdiTable->pfnSetExecInfo = urKernelSetExecInfo;
+  pDdiTable->pfnSetSpecializationConstants = nullptr;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable(
+    ur_api_version_t version, ur_sampler_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnCreate = urSamplerCreate;
+  pDdiTable->pfnCreateWithNativeHandle = nullptr;
+  pDdiTable->pfnGetInfo = urSamplerGetInfo;
+  pDdiTable->pfnGetNativeHandle = nullptr;
+  pDdiTable->pfnRelease = urSamplerRelease;
+  pDdiTable->pfnRetain = urSamplerRetain;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL
+urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnBufferCreate = urMemBufferCreate;
+  pDdiTable->pfnBufferPartition = urMemBufferPartition;
+  pDdiTable->pfnBufferCreateWithNativeHandle =
+      urMemBufferCreateWithNativeHandle;
+  pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle;
+  pDdiTable->pfnGetInfo = urMemGetInfo;
+  pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle;
+  pDdiTable->pfnImageCreate = urMemImageCreate;
+  pDdiTable->pfnImageGetInfo = urMemImageGetInfo;
+  pDdiTable->pfnRelease = urMemRelease;
+  pDdiTable->pfnRetain = urMemRetain;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
+    ur_api_version_t version, ur_enqueue_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead;
+  pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite;
+  pDdiTable->pfnEventsWait = urEnqueueEventsWait;
+  pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier;
+  pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch;
+  pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy;
+  pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect;
+  pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill;
+  pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap;
+  pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead;
+  pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect;
+  pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite;
+  pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect;
+  pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy;
+  pDdiTable->pfnMemImageRead = urEnqueueMemImageRead;
+  pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite;
+  pDdiTable->pfnMemUnmap = urEnqueueMemUnmap;
+  pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D;
+  pDdiTable->pfnUSMFill = urEnqueueUSMFill;
+  pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise;
+  pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D;
+  pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy;
+  pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch;
+  pDdiTable->pfnReadHostPipe = urEnqueueReadHostPipe;
+  pDdiTable->pfnWriteHostPipe = urEnqueueWriteHostPipe;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable(
+    ur_api_version_t version, ur_global_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+
+  pDdiTable->pfnInit = urInit;
+  pDdiTable->pfnTearDown = urTearDown;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable(
+    ur_api_version_t version, ur_queue_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnCreate = urQueueCreate;
+  pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle;
+  pDdiTable->pfnFinish = urQueueFinish;
+  pDdiTable->pfnFlush = urQueueFlush;
+  pDdiTable->pfnGetInfo = urQueueGetInfo;
+  pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle;
+  pDdiTable->pfnRelease = urQueueRelease;
+  pDdiTable->pfnRetain = urQueueRetain;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL
+urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc;
+  pDdiTable->pfnFree = urUSMFree;
+  pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo;
+  pDdiTable->pfnHostAlloc = urUSMHostAlloc;
+  pDdiTable->pfnPoolCreate = nullptr;
+  pDdiTable->pfnPoolRetain = nullptr;
+  pDdiTable->pfnPoolRelease = nullptr;
+  pDdiTable->pfnPoolGetInfo = nullptr;
+  pDdiTable->pfnSharedAlloc = urUSMSharedAlloc;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable(
+    ur_api_version_t version, ur_device_dditable_t *pDdiTable) {
+  auto result = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != result) {
+    return result;
+  }
+  pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle;
+  pDdiTable->pfnGet = urDeviceGet;
+  pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps;
+  pDdiTable->pfnGetInfo = urDeviceGetInfo;
+  pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle;
+  pDdiTable->pfnPartition = urDevicePartition;
+  pDdiTable->pfnRelease = urDeviceRelease;
+  pDdiTable->pfnRetain = urDeviceRetain;
+  pDdiTable->pfnSelectBinary = urDeviceSelectBinary;
+  return UR_RESULT_SUCCESS;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable(
+    ur_api_version_t version, ur_command_buffer_exp_dditable_t *pDdiTable) {
+  auto retVal = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != retVal) {
+    return retVal;
+  }
+  pDdiTable->pfnCreateExp = urCommandBufferCreateExp;
+  pDdiTable->pfnRetainExp = urCommandBufferRetainExp;
+  pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp;
+  pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp;
+  pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp;
+  pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp;
+  pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp;
+  pDdiTable->pfnAppendMembufferCopyRectExp =
+      urCommandBufferAppendMembufferCopyRectExp;
+  pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp;
+  pDdiTable->pfnAppendMembufferReadRectExp =
+      urCommandBufferAppendMembufferReadRectExp;
+  pDdiTable->pfnAppendMembufferWriteExp =
+      urCommandBufferAppendMembufferWriteExp;
+  pDdiTable->pfnAppendMembufferWriteRectExp =
+      urCommandBufferAppendMembufferWriteRectExp;
+  pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp;
+
+  return retVal;
+}
+
+UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable(
+    ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) {
+  auto retVal = validateProcInputs(version, pDdiTable);
+  if (UR_RESULT_SUCCESS != retVal) {
+    return retVal;
+  }
+  pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp;
+  pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp;
+  pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp;
+
+  return retVal;
+}
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/usm.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/usm.cpp
new file mode 100644
index 0000000000000..8be25d0128612
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/usm.cpp
@@ -0,0 +1,190 @@
+//===--------- usm.cpp - HIP Adapter ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------===//
+
+#include <cassert>
+
+#include "common.hpp"
+#include "context.hpp"
+#include "device.hpp"
+#include "platform.hpp"
+
+/// USM: Implements USM Host allocations using HIP Pinned Memory
+UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(
+    ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc,
+    [[maybe_unused]] ur_usm_pool_handle_t pool, size_t size, void **ppMem) {
+
+  UR_ASSERT(!pUSMDesc || (pUSMDesc->align == 0 ||
+                          ((pUSMDesc->align & (pUSMDesc->align - 1)) == 0)),
+            UR_RESULT_ERROR_INVALID_VALUE);
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  try {
+    ScopedContext Active(hContext);
+    Result = UR_CHECK_ERROR(hipHostMalloc(ppMem, size));
+  } catch (ur_result_t Error) {
+    Result = Error;
+  }
+
+  if (Result == UR_RESULT_SUCCESS) {
+    assert((!pUSMDesc || pUSMDesc->align == 0 ||
+            reinterpret_cast<std::uintptr_t>(*ppMem) % pUSMDesc->align == 0));
+  }
+
+  return Result;
+}
+
+/// USM: Implements USM device allocations using a normal HIP device pointer
+UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc(
+    ur_context_handle_t hContext, ur_device_handle_t,
+    const ur_usm_desc_t *pUSMDesc, [[maybe_unused]] ur_usm_pool_handle_t pool,
+    size_t size, void **ppMem) {
+  UR_ASSERT(!pUSMDesc || (pUSMDesc->align == 0 ||
+                          ((pUSMDesc->align & (pUSMDesc->align - 1)) == 0)),
+            UR_RESULT_ERROR_INVALID_VALUE);
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  try {
+    ScopedContext Active(hContext);
+    Result = UR_CHECK_ERROR(hipMalloc(ppMem, size));
+  } catch (ur_result_t Error) {
+    Result = Error;
+  }
+
+  if (Result == UR_RESULT_SUCCESS) {
+    assert((!pUSMDesc || pUSMDesc->align == 0 ||
+            reinterpret_cast<std::uintptr_t>(*ppMem) % pUSMDesc->align == 0));
+  }
+
+  return Result;
+}
+
+/// USM: Implements USM Shared allocations using HIP Managed Memory
+UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc(
+    ur_context_handle_t hContext, ur_device_handle_t,
+    const ur_usm_desc_t *pUSMDesc, [[maybe_unused]] ur_usm_pool_handle_t pool,
+    size_t size, void **ppMem) {
+  UR_ASSERT(!pUSMDesc || (pUSMDesc->align == 0 ||
+                          ((pUSMDesc->align & (pUSMDesc->align - 1)) == 0)),
+            UR_RESULT_ERROR_INVALID_VALUE);
+
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  try {
+    ScopedContext Active(hContext);
+    Result = UR_CHECK_ERROR(hipMallocManaged(ppMem, size, hipMemAttachGlobal));
+  } catch (ur_result_t Error) {
+    Result = Error;
+  }
+
+  if (Result == UR_RESULT_SUCCESS) {
+    assert((!pUSMDesc || pUSMDesc->align == 0 ||
+            reinterpret_cast<std::uintptr_t>(*ppMem) % pUSMDesc->align == 0));
+  }
+
+  return Result;
+}
+
+/// USM: Frees the given USM pointer associated with the context.
+UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext,
+                                              void *pMem) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  try {
+    ScopedContext Active(hContext);
+    unsigned int Type;
+    hipPointerAttribute_t hipPointerAttributeType;
+    Result =
+        UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem));
+    Type = hipPointerAttributeType.memoryType;
+    UR_ASSERT(Type == hipMemoryTypeDevice || Type == hipMemoryTypeHost,
+              UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+    if (Type == hipMemoryTypeDevice) {
+      Result = UR_CHECK_ERROR(hipFree(pMem));
+    }
+    if (Type == hipMemoryTypeHost) {
+      Result = UR_CHECK_ERROR(hipFreeHost(pMem));
+    }
+  } catch (ur_result_t Error) {
+    Result = Error;
+  }
+  return Result;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem,
+                     ur_usm_alloc_info_t propName, size_t propValueSize,
+                     void *pPropValue, size_t *pPropValueSizeRet) {
+  ur_result_t Result = UR_RESULT_SUCCESS;
+  hipPointerAttribute_t hipPointerAttributeType;
+
+  UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet);
+
+  try {
+    ScopedContext Active(hContext);
+    switch (propName) {
+    case UR_USM_ALLOC_INFO_TYPE: {
+      unsigned int Value;
+      // do not throw if hipPointerGetAttribute returns hipErrorInvalidValue
+      hipError_t Ret = hipPointerGetAttributes(&hipPointerAttributeType, pMem);
+      if (Ret == hipErrorInvalidValue) {
+        // pointer not known to the HIP subsystem
+        return ReturnValue(UR_USM_TYPE_UNKNOWN);
+      }
+      Result = checkErrorUR(Ret, __func__, __LINE__ - 5, __FILE__);
+      Value = hipPointerAttributeType.isManaged;
+      if (Value) {
+        // pointer to managed memory
+        return ReturnValue(UR_USM_TYPE_SHARED);
+      }
+      Result = UR_CHECK_ERROR(
+          hipPointerGetAttributes(&hipPointerAttributeType, pMem));
+      Value = hipPointerAttributeType.memoryType;
+      UR_ASSERT(Value == hipMemoryTypeDevice || Value == hipMemoryTypeHost,
+                UR_RESULT_ERROR_INVALID_MEM_OBJECT);
+      if (Value == hipMemoryTypeDevice) {
+        // pointer to device memory
+        return ReturnValue(UR_USM_TYPE_DEVICE);
+      }
+      if (Value == hipMemoryTypeHost) {
+        // pointer to host memory
+        return ReturnValue(UR_USM_TYPE_HOST);
+      }
+      // should never get here
+#ifdef _MSC_VER
+      __assume(0);
+#else
+      __builtin_unreachable();
+#endif
+      return ReturnValue(UR_USM_TYPE_UNKNOWN);
+    }
+    case UR_USM_ALLOC_INFO_BASE_PTR:
+    case UR_USM_ALLOC_INFO_SIZE:
+      return UR_RESULT_ERROR_INVALID_VALUE;
+    case UR_USM_ALLOC_INFO_DEVICE: {
+      // get device index associated with this pointer
+      Result = UR_CHECK_ERROR(
+          hipPointerGetAttributes(&hipPointerAttributeType, pMem));
+
+      int DeviceIdx = hipPointerAttributeType.device;
+
+      // currently each device is in its own platform, so find the platform at
+      // the same index
+      std::vector<ur_platform_handle_t> Platforms;
+      Platforms.resize(DeviceIdx + 1);
+      Result = urPlatformGet(DeviceIdx + 1, Platforms.data(), nullptr);
+
+      // get the device from the platform
+      ur_device_handle_t Device = Platforms[DeviceIdx]->Devices[0].get();
+      return ReturnValue(Device);
+    }
+    default:
+      return UR_RESULT_ERROR_INVALID_ENUMERATION;
+    }
+  } catch (ur_result_t Error) {
+    Result = Error;
+  }
+  return Result;
+}
diff --git a/sycl/plugins/unified_runtime/ur/adapters/hip/usm_p2p.cpp b/sycl/plugins/unified_runtime/ur/adapters/hip/usm_p2p.cpp
new file mode 100644
index 0000000000000..aefcf4755558a
--- /dev/null
+++ b/sycl/plugins/unified_runtime/ur/adapters/hip/usm_p2p.cpp
@@ -0,0 +1,31 @@
+//===--------- usm_p2p.cpp - HIP Adapter---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------===//
+
+#include "common.hpp"
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urUsmP2PEnablePeerAccessExp(ur_device_handle_t, ur_device_handle_t) {
+  detail::ur::die(
+      "urUsmP2PEnablePeerAccessExp is not implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL
+urUsmP2PDisablePeerAccessExp(ur_device_handle_t, ur_device_handle_t) {
+  detail::ur::die(
+      "urUsmP2PDisablePeerAccessExp is not implemented for HIP adapter.");
+  return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
+}
+
+UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp(
+    ur_device_handle_t, ur_device_handle_t, ur_exp_peer_info_t, size_t propSize,
+    void *pPropValue, size_t *pPropSizeRet) {
+  UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
+  // Zero return value indicates that all of the queries currently return false.
+  return ReturnValue(uint32_t{0});
+}
diff --git a/sycl/unittests/pi/hip/CMakeLists.txt b/sycl/unittests/pi/hip/CMakeLists.txt
index 5965cbff1a1da..eee75b0447551 100644
--- a/sycl/unittests/pi/hip/CMakeLists.txt
+++ b/sycl/unittests/pi/hip/CMakeLists.txt
@@ -22,6 +22,7 @@ target_include_directories(PiHipTests
     "${sycl_inc_dir}/sycl/detail/"
     "${sycl_inc_dir}"
     "${sycl_plugin_dir}/hip/"
+    "${sycl_plugin_dir}/unified_runtime/"
 )
 
 if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD")
@@ -37,4 +38,5 @@ endif()
 target_link_libraries(PiHipTests
   PRIVATE
     rocmdrv
+    UnifiedRuntime-Headers
 )