diff --git a/buildbot/configure.py b/buildbot/configure.py index 7a26a460108d2..3c04cc005f427 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -30,8 +30,8 @@ def do_configure(args): libclc_gen_remangled_variants = 'OFF' sycl_build_pi_cuda = 'OFF' sycl_build_pi_esimd_cpu = 'OFF' - sycl_build_pi_rocm = 'OFF' - sycl_build_pi_rocm_platform = 'AMD' + sycl_build_pi_hip = 'OFF' + sycl_build_pi_hip_platform = 'AMD' sycl_werror = 'ON' llvm_enable_assertions = 'ON' llvm_enable_doxygen = 'OFF' @@ -48,7 +48,7 @@ def do_configure(args): if args.enable_esimd_cpu_emulation: sycl_build_pi_esimd_cpu = 'ON' - if args.cuda or args.rocm: + if args.cuda or args.hip: llvm_enable_projects += ';libclc' if args.cuda: @@ -57,20 +57,20 @@ def do_configure(args): libclc_gen_remangled_variants = 'ON' sycl_build_pi_cuda = 'ON' - if args.rocm: - if args.rocm_platform == 'AMD': + if args.hip: + if args.hip_platform == 'AMD': llvm_targets_to_build += ';AMDGPU' libclc_targets_to_build += ';amdgcn--;amdgcn--amdhsa' - # The ROCm plugin for AMD uses lld for linking + # The HIP plugin for AMD uses lld for linking llvm_enable_projects += ';lld' - elif args.rocm_platform == 'NVIDIA' and not args.cuda: + elif args.hip_platform == 'NVIDIA' and not args.cuda: llvm_targets_to_build += ';NVPTX' libclc_targets_to_build += ';nvptx64--;nvptx64--nvidiacl' libclc_gen_remangled_variants = 'ON' - sycl_build_pi_rocm_platform = args.rocm_platform - sycl_build_pi_rocm = 'ON' + sycl_build_pi_hip_platform = args.hip_platform + sycl_build_pi_hip = 'ON' if args.no_werror: sycl_werror = 'OFF' @@ -107,8 +107,8 @@ def do_configure(args): "-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build), "-DLIBCLC_GENERATE_REMANGLED_VARIANTS={}".format(libclc_gen_remangled_variants), "-DSYCL_BUILD_PI_CUDA={}".format(sycl_build_pi_cuda), - "-DSYCL_BUILD_PI_ROCM={}".format(sycl_build_pi_rocm), - "-DSYCL_BUILD_PI_ROCM_PLATFORM={}".format(sycl_build_pi_rocm_platform), + "-DSYCL_BUILD_PI_HIP={}".format(sycl_build_pi_hip), + "-DSYCL_BUILD_PI_HIP_PLATFORM={}".format(sycl_build_pi_hip_platform), "-DLLVM_BUILD_TOOLS=ON", "-DSYCL_ENABLE_WERROR={}".format(sycl_werror), "-DCMAKE_INSTALL_PREFIX={}".format(install_dir), @@ -178,8 +178,8 @@ def main(): parser.add_argument("-t", "--build-type", metavar="BUILD_TYPE", default="Release", help="build type: Debug, Release") parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA") - parser.add_argument("--rocm", action='store_true', help="switch from OpenCL to ROCm") - parser.add_argument("--rocm-platform", type=str, choices=['AMD', 'NVIDIA'], default='AMD', help="choose ROCm backend") + parser.add_argument("--hip", action='store_true', help="switch from OpenCL to HIP") + parser.add_argument("--hip-platform", type=str, choices=['AMD', 'NVIDIA'], default='AMD', help="choose hardware platform for HIP backend") parser.add_argument("--arm", action='store_true', help="build ARM support rather than x86") parser.add_argument("--enable-esimd-cpu-emulation", action='store_true', help="build with ESIMD_CPU emulation support") parser.add_argument("--no-assertions", action='store_true', help="build without assertions") diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 2623a85227515..8ca27d106e57f 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -90,8 +90,8 @@ install(DIRECTORY ${OpenCL_INCLUDE_DIR}/CL option(SYCL_BUILD_PI_CUDA "Enables the CUDA backend for the Plugin Interface" OFF) -option(SYCL_BUILD_PI_ROCM - "Enables the ROCM backend for the Plugin Interface" OFF) +option(SYCL_BUILD_PI_HIP + "Enables the HIP backend for the Plugin Interface" OFF) # Configure SYCL version macro set(sycl_inc_dir ${CMAKE_CURRENT_SOURCE_DIR}/include) @@ -279,16 +279,16 @@ if(SYCL_BUILD_PI_CUDA) list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libspirv-builtins pi_cuda) endif() -if(SYCL_BUILD_PI_ROCM) +if(SYCL_BUILD_PI_HIP) # Ensure that libclc is enabled. list(FIND LLVM_ENABLE_PROJECTS libclc LIBCLC_FOUND) if( LIBCLC_FOUND EQUAL -1 ) message(FATAL_ERROR - "ROCM support requires adding \"libclc\" to the CMake argument \"LLVM_ENABLE_PROJECTS\"") + "HIP support requires adding \"libclc\" to the CMake argument \"LLVM_ENABLE_PROJECTS\"") endif() - add_dependencies(sycl-toolchain libspirv-builtins pi_rocm) - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libspirv-builtins pi_rocm) + add_dependencies(sycl-toolchain libspirv-builtins pi_hip) + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libspirv-builtins pi_hip) endif() # TODO : Remove 'if (NOT MSVC)' when CM_EMU supports Windows diff --git a/sycl/doc/GetStartedGuide.md b/sycl/doc/GetStartedGuide.md index 762e818cb552f..07a8169c6b1aa 100644 --- a/sycl/doc/GetStartedGuide.md +++ b/sycl/doc/GetStartedGuide.md @@ -9,8 +9,8 @@ and a wide range of compute accelerators such as GPU and FPGA. - [Build DPC++ toolchain](#build-dpc-toolchain) - [Build DPC++ toolchain with libc++ library](#build-dpc-toolchain-with-libc-library) - [Build DPC++ toolchain with support for NVIDIA CUDA](#build-dpc-toolchain-with-support-for-nvidia-cuda) - - [Build DPC++ toolchain with support for AMD ROCm](#build-dpc-toolchain-with-support-for-amd-rocm) - - [Build DPC++ toolchain with support for NVIDIA ROCm](#build-dpc-toolchain-with-support-for-nvidia-rocm) + - [Build DPC++ toolchain with support for HIP AMD](#build-dpc-toolchain-with-support-for-hip-amd) + - [Build DPC++ toolchain with support for HIP NVIDIA](#build-dpc-toolchain-with-support-for-hip-nvidia) - [Build DPC++ toolchain with support for ESIMD CPU Emulation](#build-dpc-toolchain-with-support-for-esimd-cpu) - [Build Doxygen documentation](#build-doxygen-documentation) - [Deployment](#deployment) @@ -30,7 +30,7 @@ and a wide range of compute accelerators such as GPU and FPGA. - [C++ standard](#c-standard) - [Known Issues and Limitations](#known-issues-and-limitations) - [CUDA back-end limitations](#cuda-back-end-limitations) - - [ROCm back-end limitations](#rocm-back-end-limitations) + - [HIP back-end limitations](#hip-back-end-limitations) - [Find More](#find-more) ## Prerequisites @@ -108,8 +108,8 @@ flags can be found by launching the script with `--help`): * `--system-ocl` -> Don't download OpenCL headers and library via CMake but use the system ones * `--no-werror` -> Don't treat warnings as errors when compiling llvm * `--cuda` -> use the cuda backend (see [Nvidia CUDA](#build-dpc-toolchain-with-support-for-nvidia-cuda)) -* `--rocm` -> use the rocm backend (see [AMD ROCm](#build-dpc-toolchain-with-support-for-amd-rocm)) -* `--rocm-platform` -> select the platform used by the rocm backend, `AMD` or `NVIDIA` (see [AMD ROCm](#build-dpc-toolchain-with-support-for-amd-rocm) or see [NVIDIA ROCm](#build-dpc-toolchain-with-support-for-nvidia-rocm)) +* `--hip` -> use the HIP backend (see [HIP](#build-dpc-toolchain-with-support-for-hip-amd)) +* `--hip-platform` -> select the platform used by the hip backend, `AMD` or `NVIDIA` (see [HIP AMD](#build-dpc-toolchain-with-support-for-hip-amd) or see [HIP NVIDIA](#build-dpc-toolchain-with-support-for-hip-nvidia)) * '--enable-esimd-cpu-emulation' -> enable ESIMD CPU emulation (see [ESIMD CPU emulation](#build-dpc-toolchain-with-support-for-esimd-cpu)) * `--shared-libs` -> Build shared libraries * `-t` -> Build type (debug or release) @@ -161,11 +161,12 @@ a Titan RTX GPU (SM 71), but it should work on any GPU compatible with SM 50 or above. The default SM for the NVIDIA CUDA backend is 5.0. Users can specify lower values, but some features may not be supported. -### Build DPC++ toolchain with support for AMD ROCm -There is experimental support for DPC++ for ROCm devices. +### Build DPC++ toolchain with support for HIP AMD -To enable support for ROCm devices, follow the instructions for the Linux -DPC++ toolchain, but add the `--rocm` flag to `configure.py` +There is experimental support for DPC++ for HIP devices. + +To enable support for HIP devices, follow the instructions for the Linux +DPC++ toolchain, but add the `--hip` flag to `configure.py` Enabling this flag requires an installation of ROCm 4.2.0 on the system, refer to @@ -176,31 +177,31 @@ Currently, the only combination tested is Ubuntu 18.04 with ROCm 4.2.0 using a V [LLD](https://llvm.org/docs/AMDGPUUsage.html) is necessary for the AMD GPU compilation chain. The AMDGPU backend generates a standard ELF [ELF] relocatable code object that can be linked by lld to produce a standard ELF shared code object which can be loaded and executed on an AMDGPU target. -So if you want to support AMD ROCm, you should also build the lld project. +So if you want to support HIP AMD, you should also build the lld project. [LLD Build Guide](https://lld.llvm.org/) The following CMake variables can be updated to change where CMake is looking -for the ROCm installation: +for the HIP installation: -* `SYCL_BUILD_PI_ROCM_INCLUDE_DIR`: Path to HIP include directory (default +* `SYCL_BUILD_PI_HIP_INCLUDE_DIR`: Path to HIP include directory (default `/opt/rocm/hip/include`). -* `SYCL_BUILD_PI_ROCM_HSA_INCLUDE_DIR`: Path to HSA include directory (default +* `SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR`: Path to HSA include directory (default `/opt/rocm/hsa/include`). -* `SYCL_BUILD_PI_ROCM_AMD_LIBRARY`: Path to HIP runtime library (default +* `SYCL_BUILD_PI_HIP_AMD_LIBRARY`: Path to HIP runtime library (default `/opt/rocm/hip/lib/libamdhip64.so`). -### Build DPC++ toolchain with support for NVIDIA ROCm +### Build DPC++ toolchain with support for HIP NVIDIA -There is experimental support for DPC++ for using ROCm on NVIDIA devices. +There is experimental support for DPC++ for using HIP on NVIDIA devices. This is a compatibility feature and the [CUDA backend](#build-dpc-toolchain-with-support-for-nvidia-cuda) should be preferred to run on NVIDIA GPUs. -To enable support for NVIDIA ROCm devices, follow the instructions for the Linux -DPC++ toolchain, but add the `--rocm` and `--rocm-platform NVIDIA` flags to +To enable support for HIP NVIDIA devices, follow the instructions for the Linux +DPC++ toolchain, but add the `--hip` and `--hip-platform NVIDIA` flags to `configure.py`. -Enabling this flag requires ROCm to be installed, more specifically +Enabling this flag requires HIP to be installed, more specifically [HIP NVCC](https://rocmdocs.amd.com/en/latest/Installation_Guide/HIP-Installation.html#nvidia-platform), as well as CUDA to be installed, see [NVIDIA CUDA Installation Guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html). @@ -468,7 +469,7 @@ skipped. If CUDA support has been built, it is tested only if there are CUDA devices available. -If testing with ROCm for AMD make sure to specify the GPU being used +If testing with HIP for AMD make sure to specify the GPU being used by adding `-Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=` to the CMake variable `SYCL_CLANG_EXTRA_FLAGS`. @@ -589,14 +590,14 @@ and run following command: clang++ -fsycl simple-sycl-app.cpp -o simple-sycl-app.exe ``` -When building for CUDA or NVIDIA ROCm, use the CUDA target triple as follows: +When building for CUDA or HIP NVIDIA, use the CUDA target triple as follows: ```bash clang++ -fsycl -fsycl-targets=nvptx64-nvidia-cuda \ simple-sycl-app.cpp -o simple-sycl-app-cuda.exe ``` -When building for ROCm, use the ROCm target triple and specify the +When building for HIP AMD, use the AMD target triple and specify the target architecture with `-Xsycl-target-backend --offload-arch=` as follows: @@ -638,8 +639,8 @@ execution, so SYCL runtime will use `default_selector` logic to select one of accelerators available in the system or SYCL host device. In this case, the behavior of the `default_selector` can be altered using the `SYCL_BE` environment variable, setting `PI_CUDA` forces -the usage of the CUDA backend (if available), `PI_ROCM` forces -the usage of the ROCm backend (if available), `PI_OPENCL` will +the usage of the CUDA backend (if available), `PI_HIP` forces +the usage of the HIP backend (if available), `PI_OPENCL` will force the usage of the OpenCL backend. ```bash @@ -783,15 +784,15 @@ which contains all the symbols required. * The NVIDIA OpenCL headers conflict with the OpenCL headers required for this project and may cause compilation issues on some platforms -### ROCm back-end limitations +### HIP back-end limitations * For supported Operating Systems, please refer to the [Supported Operating Systems](https://github.com/RadeonOpenCompute/ROCm#supported-operating-systems) * The only combination tested is Ubuntu 18.04 with ROCm 4.2 using a Vega20 gfx906. * Judging from the current [test](https://github.com/zjin-lcf/oneAPI-DirectProgramming) results, - there is still a lot of room for improvement in ROCm back-end support. The current problems include three aspects. + there is still a lot of room for improvement in HIP back-end support. The current problems include three aspects. The first one is at compile time: the `barrier` and `atomic` keywords are not supported. - The second is at runtime: when calling `hipMemcpyDtoHAsync` ROCm API, the program will cause an exception if the input data size is too large. - The third is calculation accuracy: the ROCm backend has obvious errors in the calculation results of some float type operators + The second is at runtime: when calling `hipMemcpyDtoHAsync` HIP API, the program will cause an exception if the input data size is too large. + The third is calculation accuracy: the HIP backend has obvious errors in the calculation results of some float type operators ## Find More diff --git a/sycl/include/CL/sycl/backend_types.hpp b/sycl/include/CL/sycl/backend_types.hpp index 28c05a2ad215d..42568222c0f10 100644 --- a/sycl/include/CL/sycl/backend_types.hpp +++ b/sycl/include/CL/sycl/backend_types.hpp @@ -27,7 +27,7 @@ enum class backend : char { cuda = 3, all = 4, esimd_cpu = 5, - rocm = 6, + hip = 6, }; template struct interop; @@ -58,8 +58,8 @@ inline std::ostream &operator<<(std::ostream &Out, backend be) { case backend::esimd_cpu: Out << "esimd_cpu"; break; - case backend::rocm: - Out << "rocm"; + case backend::hip: + Out << "hip"; break; case backend::all: Out << "all"; diff --git a/sycl/include/CL/sycl/detail/hip_definitions.hpp b/sycl/include/CL/sycl/detail/hip_definitions.hpp index 555609139446f..30265cbb6c475 100644 --- a/sycl/include/CL/sycl/detail/hip_definitions.hpp +++ b/sycl/include/CL/sycl/detail/hip_definitions.hpp @@ -1,4 +1,4 @@ -//==------------ hip_definitions.hpp - SYCL ROCM backend ------------------==// +//==------------ hip_definitions.hpp - SYCL HIP backend -------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index dcde4e1d807d7..e03dd9580b43f 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -66,13 +66,13 @@ bool trace(TraceLevel level); #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" #define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" #define __SYCL_ESIMD_CPU_PLUGIN_NAME "pi_esimd_cpu.dll" -#define __SYCL_ROCM_PLUGIN_NAME "libpi_rocm.dll" +#define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dll" #else #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.so" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.so" #define __SYCL_ESIMD_CPU_PLUGIN_NAME "libpi_esimd_cpu.so" -#define __SYCL_ROCM_PLUGIN_NAME "libpi_rocm.so" +#define __SYCL_HIP_PLUGIN_NAME "libpi_hip.so" #endif // Report error and no return (keeps compiler happy about no return statements). diff --git a/sycl/plugins/CMakeLists.txt b/sycl/plugins/CMakeLists.txt index 10f22d881da84..fb153cdf74548 100644 --- a/sycl/plugins/CMakeLists.txt +++ b/sycl/plugins/CMakeLists.txt @@ -8,8 +8,8 @@ if(SYCL_BUILD_PI_CUDA) add_subdirectory(cuda) endif() -if(SYCL_BUILD_PI_ROCM) - add_subdirectory(rocm) +if(SYCL_BUILD_PI_HIP) + add_subdirectory(hip) endif() add_subdirectory(opencl) diff --git a/sycl/plugins/hip/CMakeLists.txt b/sycl/plugins/hip/CMakeLists.txt new file mode 100644 index 0000000000000..7b3d9c395b478 --- /dev/null +++ b/sycl/plugins/hip/CMakeLists.txt @@ -0,0 +1,83 @@ +# Set default PI HIP platform to AMD +set(SYCL_BUILD_PI_HIP_PLATFORM "AMD" CACHE STRING "PI HIP platform, AMD or NVIDIA") + +message(STATUS "Including the PI API HIP backend for ${SYCL_BUILD_PI_HIP_PLATFORM}.") + +# Set default HIP include dirs +set(SYCL_BUILD_PI_HIP_INCLUDE_DIR "/opt/rocm/hip/include" CACHE STRING "HIP include dir") +set(SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR "/opt/rocm/hsa/include" CACHE STRING "HSA include dir") +set(HIP_HEADERS "${SYCL_BUILD_PI_HIP_INCLUDE_DIR};${SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR}") + +# Create pi_hip library +add_library(pi_hip SHARED + "${sycl_inc_dir}/CL/sycl/detail/pi.h" + "${sycl_inc_dir}/CL/sycl/detail/pi.hpp" + "pi_hip.hpp" + "pi_hip.cpp" +) +add_dependencies(sycl-toolchain pi_hip) +set_target_properties(pi_hip PROPERTIES LINKER_LANGUAGE CXX) +target_link_libraries(pi_hip PUBLIC OpenCL-Headers) + +# Setup include directories +target_include_directories(pi_hip + PRIVATE + ${sycl_inc_dir} + ${sycl_plugin_dir} +) + +if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD") + # Import HIP runtime library + set(SYCL_BUILD_PI_HIP_AMD_LIBRARY "/opt/rocm/hip/lib/libamdhip64.so" CACHE STRING "HIP AMD runtime library") + add_library(rocmdrv SHARED IMPORTED GLOBAL) + + set_target_properties( + rocmdrv PROPERTIES + IMPORTED_LOCATION ${SYCL_BUILD_PI_HIP_AMD_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" + ) + target_link_libraries(pi_hip PUBLIC rocmdrv) + + # Set HIP define to select AMD platform + target_compile_definitions(pi_hip PRIVATE __HIP_PLATFORM_AMD__) + + # Make sure lld is built as part of the toolchain + add_dependencies(sycl-toolchain lld) +elseif("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "NVIDIA") + # Import CUDA libraries + find_package(CUDA REQUIRED) + list(APPEND HIP_HEADERS ${CUDA_INCLUDE_DIRS}) + + # cudadrv may be defined by the CUDA plugin + if(NOT TARGET cudadrv) + add_library(cudadrv SHARED IMPORTED GLOBAL) + set_target_properties( + cudadrv PROPERTIES + IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" + ) + endif() + + add_library(cudart SHARED IMPORTED GLOBAL) + set_target_properties( + cudart PROPERTIES + IMPORTED_LOCATION ${CUDA_CUDART_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" + ) + target_link_libraries(pi_hip PUBLIC cudadrv cudart) + + # Set HIP define to select NVIDIA platform + target_compile_definitions(pi_hip PRIVATE __HIP_PLATFORM_NVIDIA__) +else() + message(FATAL_ERROR "Unspecified PI HIP platform please set SYCL_BUILD_PI_HIP_PLATFORM to 'AMD' or 'NVIDIA'") +endif() + +add_common_options(pi_hip) + +install(TARGETS pi_hip + LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT pi_hip + RUNTIME DESTINATION "bin" COMPONENT pi_hip +) diff --git a/sycl/plugins/rocm/pi_rocm.cpp b/sycl/plugins/hip/pi_hip.cpp similarity index 84% rename from sycl/plugins/rocm/pi_rocm.cpp rename to sycl/plugins/hip/pi_hip.cpp index aa4bbe5270efd..d5019a234d30c 100644 --- a/sycl/plugins/rocm/pi_rocm.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1,4 +1,4 @@ -//==---------- pi_rocm.cpp - HIP Plugin -----------------------------------==// +//==---------- pi_hip.cpp - HIP Plugin ------------------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// -/// \file pi_rocm.cpp +/// \file pi_hip.cpp /// Implementation of HIP Plugin. /// -/// \ingroup sycl_pi_rocm +/// \ingroup sycl_pi_hip #include #include #include -#include +#include #include #include @@ -367,12 +367,12 @@ void assertion(bool Condition, const char *Message) { extern "C" { // Required in a number of functions, so forward declare here -pi_result rocm_piEnqueueEventsWait(pi_queue command_queue, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event); -pi_result rocm_piEventRelease(pi_event event); -pi_result rocm_piEventRetain(pi_event event); +pi_result hip_piEnqueueEventsWait(pi_queue command_queue, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); +pi_result hip_piEventRelease(pi_event event); +pi_result hip_piEventRetain(pi_event event); } // extern "C" @@ -396,16 +396,16 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue) } if (queue_ != nullptr) { - rocm_piQueueRetain(queue_); + hip_piQueueRetain(queue_); } - rocm_piContextRetain(context_); + hip_piContextRetain(context_); } _pi_event::~_pi_event() { if (queue_ != nullptr) { - rocm_piQueueRelease(queue_); + hip_piQueueRelease(queue_); } - rocm_piContextRelease(context_); + hip_piContextRelease(context_); } pi_result _pi_event::start() { @@ -518,10 +518,10 @@ pi_result enqueueEventWait(pi_queue queue, pi_event event) { _pi_program::_pi_program(pi_context ctxt) : module_{nullptr}, binary_{}, binarySizeInBytes_{0}, refCount_{1}, context_{ctxt} { - rocm_piContextRetain(context_); + hip_piContextRetain(context_); } -_pi_program::~_pi_program() { rocm_piContextRelease(context_); } +_pi_program::~_pi_program() { hip_piContextRelease(context_); } pi_result _pi_program::set_binary(const char *source, size_t length) { assert((binary_ == nullptr && binarySizeInBytes_ == 0) && @@ -597,31 +597,31 @@ template class ReleaseGuard { T Captive; static pi_result callRelease(pi_device Captive) { - return rocm_piDeviceRelease(Captive); + return hip_piDeviceRelease(Captive); } static pi_result callRelease(pi_context Captive) { - return rocm_piContextRelease(Captive); + return hip_piContextRelease(Captive); } static pi_result callRelease(pi_mem Captive) { - return rocm_piMemRelease(Captive); + return hip_piMemRelease(Captive); } static pi_result callRelease(pi_program Captive) { - return rocm_piProgramRelease(Captive); + return hip_piProgramRelease(Captive); } static pi_result callRelease(pi_kernel Captive) { - return rocm_piKernelRelease(Captive); + return hip_piKernelRelease(Captive); } static pi_result callRelease(pi_queue Captive) { - return rocm_piQueueRelease(Captive); + return hip_piQueueRelease(Captive); } static pi_result callRelease(pi_event Captive) { - return rocm_piEventRelease(Captive); + return hip_piEventRelease(Captive); } public: @@ -645,7 +645,7 @@ template class ReleaseGuard { // succeeded or not. Either way, the state of the program is compromised // and likely unrecoverable. cl::sycl::detail::pi::die( - "Unrecoverable program state reached in rocm_piMemRelease"); + "Unrecoverable program state reached in hip_piMemRelease"); } } } @@ -671,8 +671,8 @@ extern "C" { /// Triggers the HIP Driver initialization (hipInit) the first time, so this /// must be the first PI API called. /// -pi_result rocm_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, - pi_uint32 *num_platforms) { +pi_result hip_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, + pi_uint32 *num_platforms) { try { static std::once_flag initFlag; @@ -742,16 +742,16 @@ pi_result rocm_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, } } -pi_result rocm_piPlatformGetInfo(pi_platform platform, - pi_platform_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piPlatformGetInfo(pi_platform platform, + pi_platform_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { assert(platform != nullptr); switch (param_name) { case PI_PLATFORM_INFO_NAME: return getInfo(param_value_size, param_value, param_value_size_ret, - "AMD ROCM BACKEND"); + "AMD HIP BACKEND"); case PI_PLATFORM_INFO_VENDOR: return getInfo(param_value_size, param_value, param_value_size_ret, "AMD Corporation"); @@ -778,9 +778,9 @@ pi_result rocm_piPlatformGetInfo(pi_platform platform, /// Requesting a non-GPU device triggers an error, all PI HIP devices /// are GPUs. /// -pi_result rocm_piDevicesGet(pi_platform platform, pi_device_type device_type, - pi_uint32 num_entries, pi_device *devices, - pi_uint32 *num_devices) { +pi_result hip_piDevicesGet(pi_platform platform, pi_device_type device_type, + pi_uint32 num_entries, pi_device *devices, + pi_uint32 *num_devices) { pi_result err = PI_SUCCESS; const bool askingForDefault = device_type == PI_DEVICE_TYPE_DEFAULT; @@ -810,11 +810,11 @@ pi_result rocm_piDevicesGet(pi_platform platform, pi_device_type device_type, /// \return PI_SUCCESS if the function is exehipted successfully /// HIP devices are always root devices so retain always returns success. -pi_result rocm_piDeviceRetain(pi_device device) { return PI_SUCCESS; } +pi_result hip_piDeviceRetain(pi_device device) { return PI_SUCCESS; } -pi_result rocm_piContextGetInfo(pi_context context, pi_context_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piContextGetInfo(pi_context context, pi_context_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { switch (param_name) { case PI_CONTEXT_INFO_NUM_DEVICES: @@ -832,7 +832,7 @@ pi_result rocm_piContextGetInfo(pi_context context, pi_context_info param_name, return PI_OUT_OF_RESOURCES; } -pi_result rocm_piContextRetain(pi_context context) { +pi_result hip_piContextRetain(pi_context context) { assert(context != nullptr); assert(context->get_reference_count() > 0); @@ -840,7 +840,7 @@ pi_result rocm_piContextRetain(pi_context context) { return PI_SUCCESS; } -pi_result rocm_piextContextSetExtendedDeleter( +pi_result hip_piextContextSetExtendedDeleter( pi_context context, pi_context_extended_deleter function, void *user_data) { context->set_extended_deleter(function, user_data); return PI_SUCCESS; @@ -848,7 +848,7 @@ pi_result rocm_piextContextSetExtendedDeleter( /// Not applicable to HIP, devices cannot be partitioned. /// -pi_result rocm_piDevicePartition( +pi_result hip_piDevicePartition( pi_device device, const cl_device_partition_property *properties, // TODO: untie from OpenCL pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices) { @@ -857,10 +857,10 @@ pi_result rocm_piDevicePartition( /// \return If available, the first binary that is PTX /// -pi_result rocm_piextDeviceSelectBinary(pi_device device, - pi_device_binary *binaries, - pi_uint32 num_binaries, - pi_uint32 *selected_binary) { +pi_result hip_piextDeviceSelectBinary(pi_device device, + pi_device_binary *binaries, + pi_uint32 num_binaries, + pi_uint32 *selected_binary) { if (!binaries) { cl::sycl::detail::pi::die("No list of device images provided"); } @@ -868,7 +868,7 @@ pi_result rocm_piextDeviceSelectBinary(pi_device device, cl::sycl::detail::pi::die("No binary images in the list"); } - // Look for an image for the ROCm target, and return the first one that is + // Look for an image for the HIP target, and return the first one that is // found #if defined(__HIP_PLATFORM_AMD__) const char *binary_type = __SYCL_PI_DEVICE_BINARY_TARGET_AMDGCN; @@ -889,22 +889,22 @@ pi_result rocm_piextDeviceSelectBinary(pi_device device, return PI_INVALID_BINARY; } -pi_result rocm_piextGetDeviceFunctionPointer(pi_device device, - pi_program program, - const char *function_name, - pi_uint64 *function_pointer_ret) { +pi_result hip_piextGetDeviceFunctionPointer(pi_device device, + pi_program program, + const char *function_name, + pi_uint64 *function_pointer_ret) { cl::sycl::detail::pi::die( - "rocm_piextGetDeviceFunctionPointer not implemented"); + "hip_piextGetDeviceFunctionPointer not implemented"); return {}; } /// \return PI_SUCCESS always since HIP devices are always root devices. /// -pi_result rocm_piDeviceRelease(pi_device device) { return PI_SUCCESS; } +pi_result hip_piDeviceRelease(pi_device device) { return PI_SUCCESS; } -pi_result rocm_piDeviceGetInfo(pi_device device, pi_device_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { static constexpr pi_uint32 max_work_item_dimensions = 3u; @@ -1203,7 +1203,7 @@ pi_result rocm_piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t(0)); } case PI_DEVICE_INFO_MAX_SAMPLERS: { - // This call is kind of meaningless for rocm, as samplers don't exist. + // This call is kind of meaningless for HIP, as samplers don't exist. // Closest thing is textures, which is 128. return getInfo(param_value_size, param_value, param_value_size_ret, 128u); } @@ -1564,8 +1564,8 @@ pi_result rocm_piDeviceGetInfo(pi_device device, pi_device_info param_name, /// \param[out] nativeHandle Set to the native handle of the PI device object. /// /// \return PI_SUCCESS -pi_result rocm_piextDeviceGetNativeHandle(pi_device device, - pi_native_handle *nativeHandle) { +pi_result hip_piextDeviceGetNativeHandle(pi_device device, + pi_native_handle *nativeHandle) { *nativeHandle = static_cast(device->get()); return PI_SUCCESS; } @@ -1579,9 +1579,9 @@ pi_result rocm_piextDeviceGetNativeHandle(pi_device device, /// \param[out] device Set to the PI device object created from native handle. /// /// \return TBD -pi_result rocm_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_platform platform, - pi_device *device) { +pi_result hip_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_platform platform, + pi_device *device) { cl::sycl::detail::pi::die( "Creation of PI device from native handle not implemented"); return {}; @@ -1607,12 +1607,12 @@ pi_result rocm_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, /// \param[out] retcontext Set to created context on success. /// /// \return PI_SUCCESS on success, otherwise an error return code. -pi_result rocm_piContextCreate(const pi_context_properties *properties, - pi_uint32 num_devices, const pi_device *devices, - void (*pfn_notify)(const char *errinfo, - const void *private_info, - size_t cb, void *user_data), - void *user_data, pi_context *retcontext) { +pi_result hip_piContextCreate(const pi_context_properties *properties, + pi_uint32 num_devices, const pi_device *devices, + void (*pfn_notify)(const char *errinfo, + const void *private_info, + size_t cb, void *user_data), + void *user_data, pi_context *retcontext) { assert(devices != nullptr); // TODO: How to implement context callback? @@ -1624,7 +1624,7 @@ pi_result rocm_piContextCreate(const pi_context_properties *properties, pi_result errcode_ret = PI_SUCCESS; // Parse properties. - bool property_rocm_primary = false; + bool property_hip_primary = false; while (properties && (0 != *properties)) { // Consume property ID. pi_context_properties id = *properties; @@ -1635,7 +1635,7 @@ pi_result rocm_piContextCreate(const pi_context_properties *properties, switch (id) { case __SYCL_PI_CONTEXT_PROPERTIES_HIP_PRIMARY: assert(value == PI_FALSE || value == PI_TRUE); - property_rocm_primary = static_cast(value); + property_hip_primary = static_cast(value); break; default: // Unknown property. @@ -1648,7 +1648,7 @@ pi_result rocm_piContextCreate(const pi_context_properties *properties, try { hipCtx_t current = nullptr; - if (property_rocm_primary) { + if (property_hip_primary) { // Use the HIP primary context and assume that we want to use it // immediately as we want to forge context switches. hipCtx_t Ctxt; @@ -1689,7 +1689,7 @@ pi_result rocm_piContextCreate(const pi_context_properties *properties, return errcode_ret; } -pi_result rocm_piContextRelease(pi_context ctxt) { +pi_result hip_piContextRelease(pi_context ctxt) { assert(ctxt != nullptr); @@ -1737,8 +1737,8 @@ pi_result rocm_piContextRelease(pi_context ctxt) { /// \param[out] nativeHandle Set to the native handle of the PI context object. /// /// \return PI_SUCCESS -pi_result rocm_piextContextGetNativeHandle(pi_context context, - pi_native_handle *nativeHandle) { +pi_result hip_piextContextGetNativeHandle(pi_context context, + pi_native_handle *nativeHandle) { *nativeHandle = reinterpret_cast(context->get()); return PI_SUCCESS; } @@ -1751,11 +1751,11 @@ pi_result rocm_piextContextGetNativeHandle(pi_context context, /// \param[out] context Set to the PI context object created from native handle. /// /// \return TBD -pi_result rocm_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_uint32 num_devices, - const pi_device *devices, - bool ownNativeHandle, - pi_context *context) { +pi_result hip_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_uint32 num_devices, + const pi_device *devices, + bool ownNativeHandle, + pi_context *context) { cl::sycl::detail::pi::die( "Creation of PI context from native handle not implemented"); return {}; @@ -1765,12 +1765,12 @@ pi_result rocm_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, /// Can trigger a manual copy depending on the mode. /// \TODO Implement USE_HOST_PTR using cuHostRegister /// -pi_result rocm_piMemBufferCreate(pi_context context, pi_mem_flags flags, - size_t size, void *host_ptr, pi_mem *ret_mem, - const pi_mem_properties *properties) { +pi_result hip_piMemBufferCreate(pi_context context, pi_mem_flags flags, + size_t size, void *host_ptr, pi_mem *ret_mem, + const pi_mem_properties *properties) { // Need input memory object assert(ret_mem != nullptr); - assert(properties == nullptr && "no mem properties goes to rocm RT yet"); + assert(properties == nullptr && "no mem properties goes to HIP RT yet"); // Currently, USE_HOST_PTR is not implemented using host register // since this triggers a weird segfault after program ends. // Setting this constant to true enables testing that behavior. @@ -1842,7 +1842,7 @@ pi_result rocm_piMemBufferCreate(pi_context context, pi_mem_flags flags, /// If this is zero, calls the relevant HIP Free function /// \return PI_SUCCESS unless deallocation error /// -pi_result rocm_piMemRelease(pi_mem memObj) { +pi_result hip_piMemRelease(pi_mem memObj) { assert((memObj != nullptr) && "PI_INVALID_MEM_OBJECTS"); pi_result ret = PI_SUCCESS; @@ -1899,7 +1899,7 @@ pi_result rocm_piMemRelease(pi_mem memObj) { // or not. Either way, the state of the program is compromised and likely // unrecoverable. cl::sycl::detail::pi::die( - "Unrecoverable program state reached in rocm_piMemRelease"); + "Unrecoverable program state reached in hip_piMemRelease"); } return PI_SUCCESS; @@ -1909,9 +1909,9 @@ pi_result rocm_piMemRelease(pi_mem memObj) { /// A buffer partition (or a sub-buffer, in OpenCL terms) is simply implemented /// as an offset over an existing HIP allocation. /// -pi_result rocm_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags, - pi_buffer_create_type buffer_create_type, - void *buffer_create_info, pi_mem *memObj) { +pi_result hip_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags, + pi_buffer_create_type buffer_create_type, + void *buffer_create_info, pi_mem *memObj) { assert((parent_buffer != nullptr) && "PI_INVALID_MEM_OBJECT"); assert(parent_buffer->is_buffer() && "PI_INVALID_MEM_OBJECTS"); assert(!parent_buffer->is_sub_buffer() && "PI_INVALID_MEM_OBJECT"); @@ -1973,11 +1973,11 @@ pi_result rocm_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags, return PI_SUCCESS; } -pi_result rocm_piMemGetInfo(pi_mem memObj, cl_mem_info queriedInfo, - size_t expectedQuerySize, void *queryOutput, - size_t *writtenQuerySize) { +pi_result hip_piMemGetInfo(pi_mem memObj, cl_mem_info queriedInfo, + size_t expectedQuerySize, void *queryOutput, + size_t *writtenQuerySize) { - cl::sycl::detail::pi::die("rocm_piMemGetInfo not implemented"); + cl::sycl::detail::pi::die("hip_piMemGetInfo not implemented"); } /// Gets the native HIP handle of a PI mem object @@ -1995,8 +1995,8 @@ pi_result rocm_piMemGetInfo(pi_mem memObj, cl_mem_info queriedInfo, /// \param[out] mem Set to the PI mem object created from native handle. /// /// \return TBD -pi_result rocm_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_mem *mem) { +pi_result hip_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_mem *mem) { cl::sycl::detail::pi::die( "Creation of PI mem from native handle not implemented"); return {}; @@ -2008,8 +2008,8 @@ pi_result rocm_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, /// * __SYCL_PI_HIP_SYNC_WITH_DEFAULT -> hipStreamNonBlocking /// \return Pi queue object mapping to a HIPStream /// -pi_result rocm_piQueueCreate(pi_context context, pi_device device, - pi_queue_properties properties, pi_queue *queue) { +pi_result hip_piQueueCreate(pi_context context, pi_device device, + pi_queue_properties properties, pi_queue *queue) { try { pi_result err = PI_SUCCESS; @@ -2045,9 +2045,9 @@ pi_result rocm_piQueueCreate(pi_context context, pi_device device, } } -pi_result rocm_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { assert(command_queue != nullptr); switch (param_name) { @@ -2070,7 +2070,7 @@ pi_result rocm_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, return {}; } -pi_result rocm_piQueueRetain(pi_queue command_queue) { +pi_result hip_piQueueRetain(pi_queue command_queue) { assert(command_queue != nullptr); assert(command_queue->get_reference_count() > 0); @@ -2078,7 +2078,7 @@ pi_result rocm_piQueueRetain(pi_queue command_queue) { return PI_SUCCESS; } -pi_result rocm_piQueueRelease(pi_queue command_queue) { +pi_result hip_piQueueRelease(pi_queue command_queue) { assert(command_queue != nullptr); if (command_queue->decrement_reference_count() > 0) { @@ -2102,7 +2102,7 @@ pi_result rocm_piQueueRelease(pi_queue command_queue) { } } -pi_result rocm_piQueueFinish(pi_queue command_queue) { +pi_result hip_piQueueFinish(pi_queue command_queue) { // set default result to a negative result (avoid false-positve tests) pi_result result = PI_OUT_OF_HOST_MEMORY; @@ -2132,8 +2132,8 @@ pi_result rocm_piQueueFinish(pi_queue command_queue) { /// \param[out] nativeHandle Set to the native handle of the PI queue object. /// /// \return PI_SUCCESS -pi_result rocm_piextQueueGetNativeHandle(pi_queue queue, - pi_native_handle *nativeHandle) { +pi_result hip_piextQueueGetNativeHandle(pi_queue queue, + pi_native_handle *nativeHandle) { *nativeHandle = reinterpret_cast(queue->get()); return PI_SUCCESS; } @@ -2150,22 +2150,22 @@ pi_result rocm_piextQueueGetNativeHandle(pi_queue queue, /// /// /// \return TBD -pi_result rocm_piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context context, - pi_queue *queue, - bool ownNativeHandle) { +pi_result hip_piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_context context, + pi_queue *queue, + bool ownNativeHandle) { (void)ownNativeHandle; cl::sycl::detail::pi::die( "Creation of PI queue from native handle not implemented"); return {}; } -pi_result rocm_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, - pi_bool blocking_write, size_t offset, - size_t size, void *ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +pi_result hip_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, + pi_bool blocking_write, size_t offset, + size_t size, void *ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { assert(buffer != nullptr); assert(command_queue != nullptr); @@ -2176,8 +2176,8 @@ pi_result rocm_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, try { ScopedContext active(command_queue->get_context()); - retErr = rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + retErr = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); if (event) { retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native( @@ -2206,12 +2206,12 @@ pi_result rocm_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, return retErr; } -pi_result rocm_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer, - pi_bool blocking_read, size_t offset, - size_t size, void *ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +pi_result hip_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer, + pi_bool blocking_read, size_t offset, + size_t size, void *ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { assert(buffer != nullptr); assert(command_queue != nullptr); @@ -2222,8 +2222,8 @@ pi_result rocm_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer, try { ScopedContext active(command_queue->get_context()); - retErr = rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + retErr = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); if (event) { retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native( @@ -2253,7 +2253,7 @@ pi_result rocm_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer, return retErr; } -pi_result rocm_piEventsWait(pi_uint32 num_events, const pi_event *event_list) { +pi_result hip_piEventsWait(pi_uint32 num_events, const pi_event *event_list) { try { assert(num_events != 0); @@ -2288,8 +2288,8 @@ pi_result rocm_piEventsWait(pi_uint32 num_events, const pi_event *event_list) { } } -pi_result rocm_piKernelCreate(pi_program program, const char *kernel_name, - pi_kernel *kernel) { +pi_result hip_piKernelCreate(pi_program program, const char *kernel_name, + pi_kernel *kernel) { assert(kernel != nullptr); assert(program != nullptr); @@ -2328,8 +2328,8 @@ pi_result rocm_piKernelCreate(pi_program program, const char *kernel_name, return retErr; } -pi_result rocm_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, - size_t arg_size, const void *arg_value) { +pi_result hip_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, + size_t arg_size, const void *arg_value) { assert(kernel != nullptr); pi_result retErr = PI_SUCCESS; @@ -2345,8 +2345,8 @@ pi_result rocm_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, return retErr; } -pi_result rocm_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, - const pi_mem *arg_value) { +pi_result hip_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, + const pi_mem *arg_value) { assert(kernel != nullptr); assert(arg_value != nullptr); @@ -2381,8 +2381,8 @@ pi_result rocm_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, return retErr; } -pi_result rocm_piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, - const pi_sampler *arg_value) { +pi_result hip_piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, + const pi_sampler *arg_value) { assert(kernel != nullptr); assert(arg_value != nullptr); @@ -2397,7 +2397,7 @@ pi_result rocm_piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, return retErr; } -pi_result rocm_piEnqueueKernelLaunch( +pi_result hip_piEnqueueKernelLaunch( pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, @@ -2419,13 +2419,13 @@ pi_result rocm_piEnqueueKernelLaunch( bool providedLocalWorkGroupSize = (local_work_size != nullptr); { - pi_result retError = rocm_piDeviceGetInfo( + pi_result retError = hip_piDeviceGetInfo( command_queue->device_, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(maxThreadsPerBlock), maxThreadsPerBlock, nullptr); assert(retError == PI_SUCCESS); (void)retError; - retError = rocm_piDeviceGetInfo( + retError = hip_piDeviceGetInfo( command_queue->device_, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, sizeof(maxWorkGroupSize), &maxWorkGroupSize, nullptr); assert(retError == PI_SUCCESS); @@ -2474,23 +2474,23 @@ pi_result rocm_piEnqueueKernelLaunch( hipStream_t hipStream = command_queue->get(); hipFunction_t hipFunc = kernel->get(); - retError = rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + retError = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); // Set the implicit global offset parameter if kernel has offset variant if (kernel->get_with_offset_parameter()) { - std::uint32_t rocm_implicit_offset[3] = {0, 0, 0}; + std::uint32_t hip_implicit_offset[3] = {0, 0, 0}; if (global_work_offset) { for (size_t i = 0; i < work_dim; i++) { - rocm_implicit_offset[i] = + hip_implicit_offset[i] = static_cast(global_work_offset[i]); if (global_work_offset[i] != 0) { hipFunc = kernel->get_with_offset_parameter(); } } } - kernel->set_implicit_offset_arg(sizeof(rocm_implicit_offset), - rocm_implicit_offset); + kernel->set_implicit_offset_arg(sizeof(hip_implicit_offset), + hip_implicit_offset); } auto argIndices = kernel->get_arg_indices(); @@ -2521,21 +2521,22 @@ pi_result rocm_piEnqueueKernelLaunch( } /// \TODO Not implemented -pi_result rocm_piEnqueueNativeKernel( - pi_queue queue, void (*user_func)(void *), void *args, size_t cb_args, - pi_uint32 num_mem_objects, const pi_mem *mem_list, - const void **args_mem_loc, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { +pi_result +hip_piEnqueueNativeKernel(pi_queue queue, void (*user_func)(void *), void *args, + size_t cb_args, pi_uint32 num_mem_objects, + const pi_mem *mem_list, const void **args_mem_loc, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { cl::sycl::detail::pi::die("Not implemented in HIP backend"); return {}; } /// \TODO Not implemented -pi_result rocm_piMemImageCreate(pi_context context, pi_mem_flags flags, - const pi_image_format *image_format, - const pi_image_desc *image_desc, void *host_ptr, - pi_mem *ret_mem) { +pi_result hip_piMemImageCreate(pi_context context, pi_mem_flags flags, + const pi_image_format *image_format, + const pi_image_desc *image_desc, void *host_ptr, + pi_mem *ret_mem) { // Need input memory object assert(ret_mem != nullptr); @@ -2548,7 +2549,7 @@ pi_result rocm_piMemImageCreate(pi_context context, pi_mem_flags flags, if (image_format->image_channel_order != pi_image_channel_order::PI_IMAGE_CHANNEL_ORDER_RGBA) { cl::sycl::detail::pi::die( - "rocm_piMemImageCreate only supports RGBA channel order"); + "hip_piMemImageCreate only supports RGBA channel order"); } // We have to use cuArray3DCreate, which has some caveats. The height and @@ -2609,7 +2610,7 @@ pi_result rocm_piMemImageCreate(pi_context context, pi_mem_flags flags, break; default: cl::sycl::detail::pi::die( - "rocm_piMemImageCreate given unsupported image_channel_data_type"); + "hip_piMemImageCreate given unsupported image_channel_data_type"); } // When a dimension isn't used image_desc has the size set to 1 @@ -2686,14 +2687,14 @@ pi_result rocm_piMemImageCreate(pi_context context, pi_mem_flags flags, } /// \TODO Not implemented -pi_result rocm_piMemImageGetInfo(pi_mem image, pi_image_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - cl::sycl::detail::pi::die("rocm_piMemImageGetInfo not implemented"); +pi_result hip_piMemImageGetInfo(pi_mem image, pi_image_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + cl::sycl::detail::pi::die("hip_piMemImageGetInfo not implemented"); return {}; } -pi_result rocm_piMemRetain(pi_mem mem) { +pi_result hip_piMemRetain(pi_mem mem) { assert(mem != nullptr); assert(mem->get_reference_count() > 0); mem->increment_reference_count(); @@ -2701,14 +2702,14 @@ pi_result rocm_piMemRetain(pi_mem mem) { } /// Not used as HIP backend only creates programs from binary. -/// See \ref rocm_piclProgramCreateWithBinary. +/// See \ref hip_piclProgramCreateWithBinary. /// -pi_result rocm_piclProgramCreateWithSource(pi_context context, pi_uint32 count, - const char **strings, - const size_t *lengths, - pi_program *program) { +pi_result hip_piclProgramCreateWithSource(pi_context context, pi_uint32 count, + const char **strings, + const size_t *lengths, + pi_program *program) { cl::sycl::detail::pi::hipPrint( - "rocm_piclProgramCreateWithSource not implemented"); + "hip_piclProgramCreateWithSource not implemented"); return PI_INVALID_OPERATION; } @@ -2716,11 +2717,11 @@ pi_result rocm_piclProgramCreateWithSource(pi_context context, pi_uint32 count, /// used later on to extract functions (kernels). /// See \ref _pi_program for implementation details. /// -pi_result rocm_piProgramBuild(pi_program program, pi_uint32 num_devices, - const pi_device *device_list, const char *options, - void (*pfn_notify)(pi_program program, - void *user_data), - void *user_data) { +pi_result hip_piProgramBuild(pi_program program, pi_uint32 num_devices, + const pi_device *device_list, const char *options, + void (*pfn_notify)(pi_program program, + void *user_data), + void *user_data) { assert(program != nullptr); assert(num_devices == 1 || num_devices == 0); @@ -2741,9 +2742,9 @@ pi_result rocm_piProgramBuild(pi_program program, pi_uint32 num_devices, } /// \TODO Not implemented -pi_result rocm_piProgramCreate(pi_context context, const void *il, - size_t length, pi_program *res_program) { - cl::sycl::detail::pi::die("rocm_piProgramCreate not implemented"); +pi_result hip_piProgramCreate(pi_context context, const void *il, size_t length, + pi_program *res_program) { + cl::sycl::detail::pi::die("hip_piProgramCreate not implemented"); return {}; } @@ -2753,7 +2754,7 @@ pi_result rocm_piProgramCreate(pi_context context, const void *il, /// /// Note: Only supports one device /// -pi_result rocm_piProgramCreateWithBinary( +pi_result hip_piProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, size_t num_metadata_entries, const pi_device_binary_property *metadata, @@ -2788,9 +2789,9 @@ pi_result rocm_piProgramCreateWithBinary( return retError; } -pi_result rocm_piProgramGetInfo(pi_program program, pi_program_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piProgramGetInfo(pi_program program, pi_program_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { assert(program != nullptr); switch (param_name) { @@ -2829,7 +2830,7 @@ pi_result rocm_piProgramGetInfo(pi_program program, pi_program_info param_name, /// and the program. /// \TODO Implement asynchronous compilation /// -pi_result rocm_piProgramCompile( +pi_result hip_piProgramCompile( pi_program program, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_headers, const pi_program *input_headers, const char **header_include_names, @@ -2853,10 +2854,10 @@ pi_result rocm_piProgramCompile( return retError; } -pi_result rocm_piProgramGetBuildInfo(pi_program program, pi_device device, - cl_program_build_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piProgramGetBuildInfo(pi_program program, pi_device device, + cl_program_build_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { assert(program != nullptr); @@ -2878,7 +2879,7 @@ pi_result rocm_piProgramGetBuildInfo(pi_program program, pi_device device, return {}; } -pi_result rocm_piProgramRetain(pi_program program) { +pi_result hip_piProgramRetain(pi_program program) { assert(program != nullptr); assert(program->get_reference_count() > 0); program->increment_reference_count(); @@ -2888,13 +2889,13 @@ pi_result rocm_piProgramRetain(pi_program program) { /// Decreases the reference count of a pi_program object. /// When the reference count reaches 0, it unloads the module from /// the context. -pi_result rocm_piProgramRelease(pi_program program) { +pi_result hip_piProgramRelease(pi_program program) { assert(program != nullptr); // double delete or someone is messing with the ref count. // either way, cannot safely proceed. assert(program->get_reference_count() != 0 && - "Reference count overflow detected in rocm_piProgramRelease."); + "Reference count overflow detected in hip_piProgramRelease."); // decrement ref count. If it is 0, delete the program. if (program->decrement_reference_count() == 0) { @@ -2923,8 +2924,8 @@ pi_result rocm_piProgramRelease(pi_program program) { /// \param[out] nativeHandle Set to the native handle of the PI program object. /// /// \return TBD -pi_result rocm_piextProgramGetNativeHandle(pi_program program, - pi_native_handle *nativeHandle) { +pi_result hip_piextProgramGetNativeHandle(pi_program program, + pi_native_handle *nativeHandle) { *nativeHandle = reinterpret_cast(program->get()); return PI_SUCCESS; } @@ -2938,17 +2939,17 @@ pi_result rocm_piextProgramGetNativeHandle(pi_program program, /// \param[out] program Set to the PI program object created from native handle. /// /// \return TBD -pi_result rocm_piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context context, - pi_program *program) { +pi_result hip_piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_context context, + pi_program *program) { cl::sycl::detail::pi::die( "Creation of PI program from native handle not implemented"); return {}; } -pi_result rocm_piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { if (kernel != nullptr) { @@ -2982,12 +2983,12 @@ pi_result rocm_piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, return PI_INVALID_KERNEL; } -pi_result rocm_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, - pi_kernel_group_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, + pi_kernel_group_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { - // here we want to query about a kernel's rocm blocks! + // here we want to query about a kernel's hip blocks! if (kernel != nullptr) { @@ -3047,7 +3048,7 @@ pi_result rocm_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, return PI_INVALID_KERNEL; } -pi_result rocm_piKernelGetSubGroupInfo( +pi_result hip_piKernelGetSubGroupInfo( pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { @@ -3070,9 +3071,9 @@ pi_result rocm_piKernelGetSubGroupInfo( HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, kernel->get()) == hipSuccess); int warpSize = 0; - rocm_piKernelGetSubGroupInfo(kernel, device, PI_KERNEL_MAX_SUB_GROUP_SIZE, - 0, nullptr, sizeof(uint32_t), &warpSize, - nullptr); + hip_piKernelGetSubGroupInfo(kernel, device, PI_KERNEL_MAX_SUB_GROUP_SIZE, + 0, nullptr, sizeof(uint32_t), &warpSize, + nullptr); int maxWarps = (max_threads + warpSize - 1) / warpSize; return getInfo(param_value_size, param_value, param_value_size_ret, static_cast(maxWarps)); @@ -3096,7 +3097,7 @@ pi_result rocm_piKernelGetSubGroupInfo( return PI_INVALID_KERNEL; } -pi_result rocm_piKernelRetain(pi_kernel kernel) { +pi_result hip_piKernelRetain(pi_kernel kernel) { assert(kernel != nullptr); assert(kernel->get_reference_count() > 0u); @@ -3104,17 +3105,17 @@ pi_result rocm_piKernelRetain(pi_kernel kernel) { return PI_SUCCESS; } -pi_result rocm_piKernelRelease(pi_kernel kernel) { +pi_result hip_piKernelRelease(pi_kernel kernel) { assert(kernel != nullptr); // double delete or someone is messing with the ref count. // either way, cannot safely proceed. assert(kernel->get_reference_count() != 0 && - "Reference count overflow detected in rocm_piKernelRelease."); + "Reference count overflow detected in hip_piKernelRelease."); // decrement ref count. If it is 0, delete the program. if (kernel->decrement_reference_count() == 0) { - // no internal rocm resources to clean up. Just delete it. + // no internal hip resources to clean up. Just delete it. delete kernel; return PI_SUCCESS; } @@ -3123,16 +3124,15 @@ pi_result rocm_piKernelRelease(pi_kernel kernel) { } // A NOP for the HIP backend -pi_result rocm_piKernelSetExecInfo(pi_kernel kernel, - pi_kernel_exec_info param_name, - size_t param_value_size, - const void *param_value) { +pi_result hip_piKernelSetExecInfo(pi_kernel kernel, + pi_kernel_exec_info param_name, + size_t param_value_size, + const void *param_value) { return PI_SUCCESS; } -pi_result rocm_piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, - size_t arg_size, - const void *arg_value) { +pi_result hip_piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, + size_t arg_size, const void *arg_value) { kernel->set_kernel_arg(arg_index, arg_size, arg_value); return PI_SUCCESS; } @@ -3140,13 +3140,13 @@ pi_result rocm_piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, // // Events // -pi_result rocm_piEventCreate(pi_context context, pi_event *event) { +pi_result hip_piEventCreate(pi_context context, pi_event *event) { cl::sycl::detail::pi::die("PI Event Create not implemented in HIP backend"); } -pi_result rocm_piEventGetInfo(pi_event event, pi_event_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piEventGetInfo(pi_event event, pi_event_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { assert(event != nullptr); switch (param_name) { @@ -3175,11 +3175,11 @@ pi_result rocm_piEventGetInfo(pi_event event, pi_event_info param_name, /// Obtain profiling information from PI HIP events /// \TODO Untie from OpenCL, timings from HIP are only elapsed time. -pi_result rocm_piEventGetProfilingInfo(pi_event event, - pi_profiling_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piEventGetProfilingInfo(pi_event event, + pi_profiling_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { assert(event != nullptr); @@ -3206,40 +3206,39 @@ pi_result rocm_piEventGetProfilingInfo(pi_event event, return {}; } -pi_result rocm_piEventSetCallback(pi_event event, - pi_int32 command_exec_callback_type, - pfn_notify notify, void *user_data) { +pi_result hip_piEventSetCallback(pi_event event, + pi_int32 command_exec_callback_type, + pfn_notify notify, void *user_data) { cl::sycl::detail::pi::die("Event Callback not implemented in HIP backend"); return PI_SUCCESS; } -pi_result rocm_piEventSetStatus(pi_event event, pi_int32 execution_status) { +pi_result hip_piEventSetStatus(pi_event event, pi_int32 execution_status) { cl::sycl::detail::pi::die("Event Set Status not implemented in HIP backend"); return PI_INVALID_VALUE; } -pi_result rocm_piEventRetain(pi_event event) { +pi_result hip_piEventRetain(pi_event event) { assert(event != nullptr); const auto refCount = event->increment_reference_count(); cl::sycl::detail::pi::assertion( - refCount != 0, - "Reference count overflow detected in rocm_piEventRetain."); + refCount != 0, "Reference count overflow detected in hip_piEventRetain."); return PI_SUCCESS; } -pi_result rocm_piEventRelease(pi_event event) { +pi_result hip_piEventRelease(pi_event event) { assert(event != nullptr); // double delete or someone is messing with the ref count. // either way, cannot safely proceed. cl::sycl::detail::pi::assertion( event->get_reference_count() != 0, - "Reference count overflow detected in rocm_piEventRelease."); + "Reference count overflow detected in hip_piEventRelease."); // decrement ref count. If it is 0, delete the event. if (event->decrement_reference_count() == 0) { @@ -3260,10 +3259,10 @@ pi_result rocm_piEventRelease(pi_event event) { /// Enqueues a wait on the given CUstream for all events. /// See \ref enqueueEventWait /// -pi_result rocm_piEnqueueEventsWait(pi_queue command_queue, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +pi_result hip_piEnqueueEventsWait(pi_queue command_queue, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { if (!command_queue) { return PI_INVALID_QUEUE; } @@ -3303,8 +3302,8 @@ pi_result rocm_piEnqueueEventsWait(pi_queue command_queue, /// \param[out] nativeHandle Set to the native handle of the PI event object. /// /// \return PI_SUCCESS on success. PI_INVALID_EVENT if given a user event. -pi_result rocm_piextEventGetNativeHandle(pi_event event, - pi_native_handle *nativeHandle) { +pi_result hip_piextEventGetNativeHandle(pi_event event, + pi_native_handle *nativeHandle) { *nativeHandle = reinterpret_cast(event->get()); return PI_SUCCESS; } @@ -3317,10 +3316,10 @@ pi_result rocm_piextEventGetNativeHandle(pi_event event, /// \param[out] event Set to the PI event object created from native handle. /// /// \return TBD -pi_result rocm_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, - pi_context context, - bool ownNativeHandle, - pi_event *event) { +pi_result hip_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_context context, + bool ownNativeHandle, + pi_event *event) { cl::sycl::detail::pi::die( "Creation of PI event from native handle not implemented"); return {}; @@ -3334,9 +3333,9 @@ pi_result rocm_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, /// /// \return PI_SUCCESS on success. PI_INVALID_VALUE if given an invalid property /// or if there is multiple of properties from the same category. -pi_result rocm_piSamplerCreate(pi_context context, - const pi_sampler_properties *sampler_properties, - pi_sampler *result_sampler) { +pi_result hip_piSamplerCreate(pi_context context, + const pi_sampler_properties *sampler_properties, + pi_sampler *result_sampler) { std::unique_ptr<_pi_sampler> retImplSampl{new _pi_sampler(context)}; bool propSeen[3] = {false, false, false}; @@ -3391,9 +3390,9 @@ pi_result rocm_piSamplerCreate(pi_context context, /// \param[out] param_value_size_ret Set to the size of the information value. /// /// \return PI_SUCCESS on success. -pi_result rocm_piSamplerGetInfo(pi_sampler sampler, cl_sampler_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piSamplerGetInfo(pi_sampler sampler, cl_sampler_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { assert(sampler != nullptr); switch (param_name) { @@ -3432,7 +3431,7 @@ pi_result rocm_piSamplerGetInfo(pi_sampler sampler, cl_sampler_info param_name, /// \param[in] sampler The sampler to increment the reference count of. /// /// \return PI_SUCCESS. -pi_result rocm_piSamplerRetain(pi_sampler sampler) { +pi_result hip_piSamplerRetain(pi_sampler sampler) { assert(sampler != nullptr); sampler->increment_reference_count(); return PI_SUCCESS; @@ -3444,14 +3443,14 @@ pi_result rocm_piSamplerRetain(pi_sampler sampler) { /// \param[in] sampler The sampler to decrement the reference count of. /// /// \return PI_SUCCESS. -pi_result rocm_piSamplerRelease(pi_sampler sampler) { +pi_result hip_piSamplerRelease(pi_sampler sampler) { assert(sampler != nullptr); // double delete or someone is messing with the ref count. // either way, cannot safely proceed. cl::sycl::detail::pi::assertion( sampler->get_reference_count() != 0, - "Reference count overflow detected in rocm_piSamplerRelease."); + "Reference count overflow detected in hip_piSamplerRelease."); // decrement ref count. If it is 0, delete the sampler. if (sampler->decrement_reference_count() == 0) { @@ -3520,7 +3519,7 @@ static pi_result commonEnqueueMemBufferCopyRect( return PI_SUCCESS; } -pi_result rocm_piEnqueueMemBufferReadRect( +pi_result hip_piEnqueueMemBufferReadRect( pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, @@ -3539,8 +3538,8 @@ pi_result rocm_piEnqueueMemBufferReadRect( try { ScopedContext active(command_queue->get_context()); - retErr = rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + retErr = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); if (event) { retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native( @@ -3571,7 +3570,7 @@ pi_result rocm_piEnqueueMemBufferReadRect( return retErr; } -pi_result rocm_piEnqueueMemBufferWriteRect( +pi_result hip_piEnqueueMemBufferWriteRect( pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, pi_buff_rect_region region, size_t buffer_row_pitch, @@ -3590,8 +3589,8 @@ pi_result rocm_piEnqueueMemBufferWriteRect( try { ScopedContext active(command_queue->get_context()); - retErr = rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + retErr = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); if (event) { retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native( @@ -3622,12 +3621,12 @@ pi_result rocm_piEnqueueMemBufferWriteRect( return retErr; } -pi_result rocm_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, - pi_mem dst_buffer, size_t src_offset, - size_t dst_offset, size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +pi_result hip_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, + pi_mem dst_buffer, size_t src_offset, + size_t dst_offset, size_t size, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { if (!command_queue) { return PI_INVALID_QUEUE; } @@ -3638,8 +3637,8 @@ pi_result rocm_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, ScopedContext active(command_queue->get_context()); if (event_wait_list) { - rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); } pi_result result; @@ -3669,7 +3668,7 @@ pi_result rocm_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, } } -pi_result rocm_piEnqueueMemBufferCopyRect( +pi_result hip_piEnqueueMemBufferCopyRect( pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, @@ -3690,8 +3689,8 @@ pi_result rocm_piEnqueueMemBufferCopyRect( try { ScopedContext active(command_queue->get_context()); - retErr = rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + retErr = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); if (event) { retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native( @@ -3715,12 +3714,12 @@ pi_result rocm_piEnqueueMemBufferCopyRect( return retErr; } -pi_result rocm_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, - const void *pattern, size_t pattern_size, - size_t offset, size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +pi_result hip_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, + const void *pattern, size_t pattern_size, + size_t offset, size_t size, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { assert(command_queue != nullptr); auto args_are_multiples_of_pattern_size = @@ -3744,8 +3743,8 @@ pi_result rocm_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, ScopedContext active(command_queue->get_context()); if (event_wait_list) { - rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); } pi_result result; @@ -3922,11 +3921,13 @@ static pi_result commonEnqueueMemImageNDCopy( return PI_INVALID_VALUE; } -pi_result rocm_piEnqueueMemImageRead( - pi_queue command_queue, pi_mem image, pi_bool blocking_read, - const size_t *origin, const size_t *region, size_t row_pitch, - size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { +pi_result hip_piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, + pi_bool blocking_read, const size_t *origin, + const size_t *region, size_t row_pitch, + size_t slice_pitch, void *ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { assert(command_queue != nullptr); assert(image != nullptr); @@ -3939,8 +3940,8 @@ pi_result rocm_piEnqueueMemImageRead( ScopedContext active(command_queue->get_context()); if (event_wait_list) { - rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); } hipArray *array = image->mem_.surface_mem_.get_array(); @@ -3986,13 +3987,14 @@ pi_result rocm_piEnqueueMemImageRead( return retErr; } -pi_result -rocm_piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, - pi_bool blocking_write, const size_t *origin, - const size_t *region, size_t input_row_pitch, - size_t input_slice_pitch, const void *ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { +pi_result hip_piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, + pi_bool blocking_write, + const size_t *origin, const size_t *region, + size_t input_row_pitch, + size_t input_slice_pitch, const void *ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { assert(command_queue != nullptr); assert(image != nullptr); @@ -4005,8 +4007,8 @@ rocm_piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, ScopedContext active(command_queue->get_context()); if (event_wait_list) { - rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); } hipArray *array = image->mem_.surface_mem_.get_array(); @@ -4050,13 +4052,13 @@ rocm_piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, return retErr; } -pi_result rocm_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, - pi_mem dst_image, const size_t *src_origin, - const size_t *dst_origin, - const size_t *region, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +pi_result hip_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, + pi_mem dst_image, const size_t *src_origin, + const size_t *dst_origin, + const size_t *region, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { assert(src_image->mem_type_ == _pi_mem::mem_type::surface); assert(dst_image->mem_type_ == _pi_mem::mem_type::surface); @@ -4070,8 +4072,8 @@ pi_result rocm_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, ScopedContext active(command_queue->get_context()); if (event_wait_list) { - rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); } hipArray *srcArray = src_image->mem_.surface_mem_.get_array(); @@ -4124,13 +4126,13 @@ pi_result rocm_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, } /// \TODO Not implemented in HIP, requires untie from OpenCL -pi_result rocm_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, - const void *fill_color, - const size_t *origin, const size_t *region, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { - cl::sycl::detail::pi::die("rocm_piEnqueueMemImageFill not implemented"); +pi_result hip_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, + const void *fill_color, + const size_t *origin, const size_t *region, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { + cl::sycl::detail::pi::die("hip_piEnqueueMemImageFill not implemented"); return {}; } @@ -4140,13 +4142,13 @@ pi_result rocm_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, /// and no read operation is done. /// \TODO Untie types from OpenCL /// -pi_result rocm_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, - pi_bool blocking_map, - pi_map_flags map_flags, size_t offset, - size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event, void **ret_map) { +pi_result hip_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, + pi_bool blocking_map, + pi_map_flags map_flags, size_t offset, + size_t size, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event, void **ret_map) { assert(ret_map != nullptr); assert(command_queue != nullptr); assert(buffer != nullptr); @@ -4170,15 +4172,15 @@ pi_result rocm_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, if (!is_pinned && ((map_flags & PI_MAP_READ) || (map_flags & PI_MAP_WRITE))) { // Pinned host memory is already on host so it doesn't need to be read. - ret_err = rocm_piEnqueueMemBufferRead( + ret_err = hip_piEnqueueMemBufferRead( command_queue, buffer, blocking_map, offset, size, hostPtr, num_events_in_wait_list, event_wait_list, event); } else { ScopedContext active(command_queue->get_context()); if (is_pinned) { - ret_err = rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + ret_err = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); } if (event) { @@ -4200,11 +4202,11 @@ pi_result rocm_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, /// Requires the mapped pointer to be already registered in the given memobj. /// If memobj uses pinned host memory, this will not do a write. /// -pi_result rocm_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, - void *mapped_ptr, - pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, - pi_event *event) { +pi_result hip_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, + void *mapped_ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { pi_result ret_err = PI_SUCCESS; assert(command_queue != nullptr); @@ -4222,7 +4224,7 @@ pi_result rocm_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, (memobj->mem_.buffer_mem_.get_map_flags() & PI_MAP_WRITE_INVALIDATE_REGION))) { // Pinned host memory is only on host so it doesn't need to be written to. - ret_err = rocm_piEnqueueMemBufferWrite( + ret_err = hip_piEnqueueMemBufferWrite( command_queue, memobj, true, memobj->mem_.buffer_mem_.get_map_offset(mapped_ptr), memobj->mem_.buffer_mem_.get_size(), mapped_ptr, @@ -4231,8 +4233,8 @@ pi_result rocm_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, ScopedContext active(command_queue->get_context()); if (is_pinned) { - ret_err = rocm_piEnqueueEventsWait(command_queue, num_events_in_wait_list, - event_wait_list, nullptr); + ret_err = hip_piEnqueueEventsWait(command_queue, num_events_in_wait_list, + event_wait_list, nullptr); } if (event) { @@ -4253,9 +4255,9 @@ pi_result rocm_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, /// USM: Implements USM Host allocations using HIP Pinned Memory /// -pi_result rocm_piextUSMHostAlloc(void **result_ptr, pi_context context, - pi_usm_mem_properties *properties, size_t size, - pi_uint32 alignment) { +pi_result hip_piextUSMHostAlloc(void **result_ptr, pi_context context, + pi_usm_mem_properties *properties, size_t size, + pi_uint32 alignment) { assert(result_ptr != nullptr); assert(context != nullptr); assert(properties == nullptr); @@ -4275,10 +4277,10 @@ pi_result rocm_piextUSMHostAlloc(void **result_ptr, pi_context context, /// USM: Implements USM device allocations using a normal HIP device pointer /// -pi_result rocm_piextUSMDeviceAlloc(void **result_ptr, pi_context context, - pi_device device, - pi_usm_mem_properties *properties, - size_t size, pi_uint32 alignment) { +pi_result hip_piextUSMDeviceAlloc(void **result_ptr, pi_context context, + pi_device device, + pi_usm_mem_properties *properties, + size_t size, pi_uint32 alignment) { assert(result_ptr != nullptr); assert(context != nullptr); assert(device != nullptr); @@ -4299,10 +4301,10 @@ pi_result rocm_piextUSMDeviceAlloc(void **result_ptr, pi_context context, /// USM: Implements USM Shared allocations using HIP Managed Memory /// -pi_result rocm_piextUSMSharedAlloc(void **result_ptr, pi_context context, - pi_device device, - pi_usm_mem_properties *properties, - size_t size, pi_uint32 alignment) { +pi_result hip_piextUSMSharedAlloc(void **result_ptr, pi_context context, + pi_device device, + pi_usm_mem_properties *properties, + size_t size, pi_uint32 alignment) { assert(result_ptr != nullptr); assert(context != nullptr); assert(device != nullptr); @@ -4324,7 +4326,7 @@ pi_result rocm_piextUSMSharedAlloc(void **result_ptr, pi_context context, /// USM: Frees the given USM pointer associated with the context. /// -pi_result rocm_piextUSMFree(pi_context context, void *ptr) { +pi_result hip_piextUSMFree(pi_context context, void *ptr) { assert(context != nullptr); pi_result result = PI_SUCCESS; @@ -4348,11 +4350,11 @@ pi_result rocm_piextUSMFree(pi_context context, void *ptr) { return result; } -pi_result rocm_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, - size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { +pi_result hip_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, + size_t count, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event) { assert(queue != nullptr); assert(ptr != nullptr); @@ -4362,8 +4364,8 @@ pi_result rocm_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, try { ScopedContext active(queue->get_context()); - result = rocm_piEnqueueEventsWait(queue, num_events_in_waitlist, - events_waitlist, nullptr); + result = hip_piEnqueueEventsWait(queue, num_events_in_waitlist, + events_waitlist, nullptr); if (event) { event_ptr = std::unique_ptr<_pi_event>( _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_FILL, queue)); @@ -4383,12 +4385,12 @@ pi_result rocm_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, return result; } -pi_result rocm_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, - void *dst_ptr, const void *src_ptr, - size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { +pi_result hip_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, + void *dst_ptr, const void *src_ptr, + size_t size, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event) { assert(queue != nullptr); assert(dst_ptr != nullptr); @@ -4399,8 +4401,8 @@ pi_result rocm_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, try { ScopedContext active(queue->get_context()); - result = rocm_piEnqueueEventsWait(queue, num_events_in_waitlist, - events_waitlist, nullptr); + result = hip_piEnqueueEventsWait(queue, num_events_in_waitlist, + events_waitlist, nullptr); if (event) { event_ptr = std::unique_ptr<_pi_event>( _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_COPY, queue)); @@ -4424,12 +4426,11 @@ pi_result rocm_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, return result; } -pi_result rocm_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, - size_t size, - pi_usm_migration_flags flags, - pi_uint32 num_events_in_waitlist, - const pi_event *events_waitlist, - pi_event *event) { +pi_result hip_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, + size_t size, pi_usm_migration_flags flags, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event) { assert(queue != nullptr); assert(ptr != nullptr); @@ -4443,8 +4444,8 @@ pi_result rocm_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, try { ScopedContext active(queue->get_context()); - result = rocm_piEnqueueEventsWait(queue, num_events_in_waitlist, - events_waitlist, nullptr); + result = hip_piEnqueueEventsWait(queue, num_events_in_waitlist, + events_waitlist, nullptr); if (event) { event_ptr = std::unique_ptr<_pi_event>( _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_COPY, queue)); @@ -4464,15 +4465,15 @@ pi_result rocm_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, } /// USM: memadvise API to govern behavior of automatic migration mechanisms -pi_result rocm_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, - size_t length, pi_mem_advice advice, - pi_event *event) { +pi_result hip_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, + size_t length, pi_mem_advice advice, + pi_event *event) { assert(queue != nullptr); assert(ptr != nullptr); // TODO implement a mapping to hipMemAdvise once the expected behaviour // of piextUSMEnqueueMemAdvise is detailed in the USM extension - return rocm_piEnqueueEventsWait(queue, 0, nullptr, event); + return hip_piEnqueueEventsWait(queue, 0, nullptr, event); return PI_SUCCESS; } @@ -4493,11 +4494,11 @@ pi_result rocm_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, /// \param param_value_size is the size of the result in bytes /// \param param_value is the result /// \param param_value_ret is how many bytes were written -pi_result rocm_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, - pi_mem_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { +pi_result hip_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, + pi_mem_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { assert(context != nullptr); assert(ptr != nullptr); @@ -4557,7 +4558,7 @@ pi_result rocm_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, static_cast(hipPointerAttributeType.devicePointer); value = *devicePointer; pi_platform platform; - result = rocm_piPlatformsGet(0, &platform, nullptr); + result = hip_piPlatformsGet(0, &platform, nullptr); pi_device device = platform->devices_[value].get(); return getInfo(param_value_size, param_value, param_value_size_ret, device); @@ -4573,7 +4574,7 @@ pi_result rocm_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, // This API is called by Sycl RT to notify the end of the plugin lifetime. // TODO: add a global variable lifetime management code here (see // pi_level_zero.cpp for reference) Currently this is just a NOOP. -pi_result rocm_piTearDown(void *PluginParameter) { return PI_SUCCESS; } +pi_result hip_piTearDown(void *PluginParameter) { return PI_SUCCESS; } const char SupportedVersion[] = _PI_H_VERSION_STRING; @@ -4594,122 +4595,120 @@ pi_result piPluginInit(pi_plugin *PluginInit) { sizeof(PluginInit->PiFunctionTable)); // Forward calls to HIP RT. -#define _PI_CL(pi_api, rocm_api) \ - (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&rocm_api); +#define _PI_CL(pi_api, hip_api) \ + (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&hip_api); // Platform - _PI_CL(piPlatformsGet, rocm_piPlatformsGet) - _PI_CL(piPlatformGetInfo, rocm_piPlatformGetInfo) + _PI_CL(piPlatformsGet, hip_piPlatformsGet) + _PI_CL(piPlatformGetInfo, hip_piPlatformGetInfo) // Device - _PI_CL(piDevicesGet, rocm_piDevicesGet) - _PI_CL(piDeviceGetInfo, rocm_piDeviceGetInfo) - _PI_CL(piDevicePartition, rocm_piDevicePartition) - _PI_CL(piDeviceRetain, rocm_piDeviceRetain) - _PI_CL(piDeviceRelease, rocm_piDeviceRelease) - _PI_CL(piextDeviceSelectBinary, rocm_piextDeviceSelectBinary) - _PI_CL(piextGetDeviceFunctionPointer, rocm_piextGetDeviceFunctionPointer) - _PI_CL(piextDeviceGetNativeHandle, rocm_piextDeviceGetNativeHandle) + _PI_CL(piDevicesGet, hip_piDevicesGet) + _PI_CL(piDeviceGetInfo, hip_piDeviceGetInfo) + _PI_CL(piDevicePartition, hip_piDevicePartition) + _PI_CL(piDeviceRetain, hip_piDeviceRetain) + _PI_CL(piDeviceRelease, hip_piDeviceRelease) + _PI_CL(piextDeviceSelectBinary, hip_piextDeviceSelectBinary) + _PI_CL(piextGetDeviceFunctionPointer, hip_piextGetDeviceFunctionPointer) + _PI_CL(piextDeviceGetNativeHandle, hip_piextDeviceGetNativeHandle) _PI_CL(piextDeviceCreateWithNativeHandle, - rocm_piextDeviceCreateWithNativeHandle) + hip_piextDeviceCreateWithNativeHandle) // Context - _PI_CL(piextContextSetExtendedDeleter, rocm_piextContextSetExtendedDeleter) - _PI_CL(piContextCreate, rocm_piContextCreate) - _PI_CL(piContextGetInfo, rocm_piContextGetInfo) - _PI_CL(piContextRetain, rocm_piContextRetain) - _PI_CL(piContextRelease, rocm_piContextRelease) - _PI_CL(piextContextGetNativeHandle, rocm_piextContextGetNativeHandle) + _PI_CL(piextContextSetExtendedDeleter, hip_piextContextSetExtendedDeleter) + _PI_CL(piContextCreate, hip_piContextCreate) + _PI_CL(piContextGetInfo, hip_piContextGetInfo) + _PI_CL(piContextRetain, hip_piContextRetain) + _PI_CL(piContextRelease, hip_piContextRelease) + _PI_CL(piextContextGetNativeHandle, hip_piextContextGetNativeHandle) _PI_CL(piextContextCreateWithNativeHandle, - rocm_piextContextCreateWithNativeHandle) + hip_piextContextCreateWithNativeHandle) // Queue - _PI_CL(piQueueCreate, rocm_piQueueCreate) - _PI_CL(piQueueGetInfo, rocm_piQueueGetInfo) - _PI_CL(piQueueFinish, rocm_piQueueFinish) - _PI_CL(piQueueRetain, rocm_piQueueRetain) - _PI_CL(piQueueRelease, rocm_piQueueRelease) - _PI_CL(piextQueueGetNativeHandle, rocm_piextQueueGetNativeHandle) - _PI_CL(piextQueueCreateWithNativeHandle, - rocm_piextQueueCreateWithNativeHandle) + _PI_CL(piQueueCreate, hip_piQueueCreate) + _PI_CL(piQueueGetInfo, hip_piQueueGetInfo) + _PI_CL(piQueueFinish, hip_piQueueFinish) + _PI_CL(piQueueRetain, hip_piQueueRetain) + _PI_CL(piQueueRelease, hip_piQueueRelease) + _PI_CL(piextQueueGetNativeHandle, hip_piextQueueGetNativeHandle) + _PI_CL(piextQueueCreateWithNativeHandle, hip_piextQueueCreateWithNativeHandle) // Memory - _PI_CL(piMemBufferCreate, rocm_piMemBufferCreate) - _PI_CL(piMemImageCreate, rocm_piMemImageCreate) - _PI_CL(piMemGetInfo, rocm_piMemGetInfo) - _PI_CL(piMemImageGetInfo, rocm_piMemImageGetInfo) - _PI_CL(piMemRetain, rocm_piMemRetain) - _PI_CL(piMemRelease, rocm_piMemRelease) - _PI_CL(piMemBufferPartition, rocm_piMemBufferPartition) - //_PI_CL(piextMemGetNativeHandle, rocm_piextMemGetNativeHandle) - _PI_CL(piextMemCreateWithNativeHandle, rocm_piextMemCreateWithNativeHandle) + _PI_CL(piMemBufferCreate, hip_piMemBufferCreate) + _PI_CL(piMemImageCreate, hip_piMemImageCreate) + _PI_CL(piMemGetInfo, hip_piMemGetInfo) + _PI_CL(piMemImageGetInfo, hip_piMemImageGetInfo) + _PI_CL(piMemRetain, hip_piMemRetain) + _PI_CL(piMemRelease, hip_piMemRelease) + _PI_CL(piMemBufferPartition, hip_piMemBufferPartition) + //_PI_CL(piextMemGetNativeHandle, hip_piextMemGetNativeHandle) + _PI_CL(piextMemCreateWithNativeHandle, hip_piextMemCreateWithNativeHandle) // Program - _PI_CL(piProgramCreate, rocm_piProgramCreate) - _PI_CL(piclProgramCreateWithSource, rocm_piclProgramCreateWithSource) - _PI_CL(piProgramCreateWithBinary, rocm_piProgramCreateWithBinary) - _PI_CL(piProgramGetInfo, rocm_piProgramGetInfo) - _PI_CL(piProgramCompile, rocm_piProgramCompile) - _PI_CL(piProgramBuild, rocm_piProgramBuild) - _PI_CL(piProgramGetBuildInfo, rocm_piProgramGetBuildInfo) - _PI_CL(piProgramRetain, rocm_piProgramRetain) - _PI_CL(piProgramRelease, rocm_piProgramRelease) - _PI_CL(piextProgramGetNativeHandle, rocm_piextProgramGetNativeHandle) + _PI_CL(piProgramCreate, hip_piProgramCreate) + _PI_CL(piclProgramCreateWithSource, hip_piclProgramCreateWithSource) + _PI_CL(piProgramCreateWithBinary, hip_piProgramCreateWithBinary) + _PI_CL(piProgramGetInfo, hip_piProgramGetInfo) + _PI_CL(piProgramCompile, hip_piProgramCompile) + _PI_CL(piProgramBuild, hip_piProgramBuild) + _PI_CL(piProgramGetBuildInfo, hip_piProgramGetBuildInfo) + _PI_CL(piProgramRetain, hip_piProgramRetain) + _PI_CL(piProgramRelease, hip_piProgramRelease) + _PI_CL(piextProgramGetNativeHandle, hip_piextProgramGetNativeHandle) _PI_CL(piextProgramCreateWithNativeHandle, - rocm_piextProgramCreateWithNativeHandle) + hip_piextProgramCreateWithNativeHandle) // Kernel - _PI_CL(piKernelCreate, rocm_piKernelCreate) - _PI_CL(piKernelSetArg, rocm_piKernelSetArg) - _PI_CL(piKernelGetInfo, rocm_piKernelGetInfo) - _PI_CL(piKernelGetGroupInfo, rocm_piKernelGetGroupInfo) - _PI_CL(piKernelGetSubGroupInfo, rocm_piKernelGetSubGroupInfo) - _PI_CL(piKernelRetain, rocm_piKernelRetain) - _PI_CL(piKernelRelease, rocm_piKernelRelease) - _PI_CL(piKernelSetExecInfo, rocm_piKernelSetExecInfo) - _PI_CL(piextKernelSetArgPointer, rocm_piextKernelSetArgPointer) + _PI_CL(piKernelCreate, hip_piKernelCreate) + _PI_CL(piKernelSetArg, hip_piKernelSetArg) + _PI_CL(piKernelGetInfo, hip_piKernelGetInfo) + _PI_CL(piKernelGetGroupInfo, hip_piKernelGetGroupInfo) + _PI_CL(piKernelGetSubGroupInfo, hip_piKernelGetSubGroupInfo) + _PI_CL(piKernelRetain, hip_piKernelRetain) + _PI_CL(piKernelRelease, hip_piKernelRelease) + _PI_CL(piKernelSetExecInfo, hip_piKernelSetExecInfo) + _PI_CL(piextKernelSetArgPointer, hip_piextKernelSetArgPointer) // Event - _PI_CL(piEventCreate, rocm_piEventCreate) - _PI_CL(piEventGetInfo, rocm_piEventGetInfo) - _PI_CL(piEventGetProfilingInfo, rocm_piEventGetProfilingInfo) - _PI_CL(piEventsWait, rocm_piEventsWait) - _PI_CL(piEventSetCallback, rocm_piEventSetCallback) - _PI_CL(piEventSetStatus, rocm_piEventSetStatus) - _PI_CL(piEventRetain, rocm_piEventRetain) - _PI_CL(piEventRelease, rocm_piEventRelease) - _PI_CL(piextEventGetNativeHandle, rocm_piextEventGetNativeHandle) - _PI_CL(piextEventCreateWithNativeHandle, - rocm_piextEventCreateWithNativeHandle) + _PI_CL(piEventCreate, hip_piEventCreate) + _PI_CL(piEventGetInfo, hip_piEventGetInfo) + _PI_CL(piEventGetProfilingInfo, hip_piEventGetProfilingInfo) + _PI_CL(piEventsWait, hip_piEventsWait) + _PI_CL(piEventSetCallback, hip_piEventSetCallback) + _PI_CL(piEventSetStatus, hip_piEventSetStatus) + _PI_CL(piEventRetain, hip_piEventRetain) + _PI_CL(piEventRelease, hip_piEventRelease) + _PI_CL(piextEventGetNativeHandle, hip_piextEventGetNativeHandle) + _PI_CL(piextEventCreateWithNativeHandle, hip_piextEventCreateWithNativeHandle) // Sampler - _PI_CL(piSamplerCreate, rocm_piSamplerCreate) - _PI_CL(piSamplerGetInfo, rocm_piSamplerGetInfo) - _PI_CL(piSamplerRetain, rocm_piSamplerRetain) - _PI_CL(piSamplerRelease, rocm_piSamplerRelease) + _PI_CL(piSamplerCreate, hip_piSamplerCreate) + _PI_CL(piSamplerGetInfo, hip_piSamplerGetInfo) + _PI_CL(piSamplerRetain, hip_piSamplerRetain) + _PI_CL(piSamplerRelease, hip_piSamplerRelease) // Queue commands - _PI_CL(piEnqueueKernelLaunch, rocm_piEnqueueKernelLaunch) - _PI_CL(piEnqueueNativeKernel, rocm_piEnqueueNativeKernel) - _PI_CL(piEnqueueEventsWait, rocm_piEnqueueEventsWait) - _PI_CL(piEnqueueMemBufferRead, rocm_piEnqueueMemBufferRead) - _PI_CL(piEnqueueMemBufferReadRect, rocm_piEnqueueMemBufferReadRect) - _PI_CL(piEnqueueMemBufferWrite, rocm_piEnqueueMemBufferWrite) - _PI_CL(piEnqueueMemBufferWriteRect, rocm_piEnqueueMemBufferWriteRect) - _PI_CL(piEnqueueMemBufferCopy, rocm_piEnqueueMemBufferCopy) - _PI_CL(piEnqueueMemBufferCopyRect, rocm_piEnqueueMemBufferCopyRect) - _PI_CL(piEnqueueMemBufferFill, rocm_piEnqueueMemBufferFill) - _PI_CL(piEnqueueMemImageRead, rocm_piEnqueueMemImageRead) - _PI_CL(piEnqueueMemImageWrite, rocm_piEnqueueMemImageWrite) - _PI_CL(piEnqueueMemImageCopy, rocm_piEnqueueMemImageCopy) - _PI_CL(piEnqueueMemImageFill, rocm_piEnqueueMemImageFill) - _PI_CL(piEnqueueMemBufferMap, rocm_piEnqueueMemBufferMap) - _PI_CL(piEnqueueMemUnmap, rocm_piEnqueueMemUnmap) + _PI_CL(piEnqueueKernelLaunch, hip_piEnqueueKernelLaunch) + _PI_CL(piEnqueueNativeKernel, hip_piEnqueueNativeKernel) + _PI_CL(piEnqueueEventsWait, hip_piEnqueueEventsWait) + _PI_CL(piEnqueueMemBufferRead, hip_piEnqueueMemBufferRead) + _PI_CL(piEnqueueMemBufferReadRect, hip_piEnqueueMemBufferReadRect) + _PI_CL(piEnqueueMemBufferWrite, hip_piEnqueueMemBufferWrite) + _PI_CL(piEnqueueMemBufferWriteRect, hip_piEnqueueMemBufferWriteRect) + _PI_CL(piEnqueueMemBufferCopy, hip_piEnqueueMemBufferCopy) + _PI_CL(piEnqueueMemBufferCopyRect, hip_piEnqueueMemBufferCopyRect) + _PI_CL(piEnqueueMemBufferFill, hip_piEnqueueMemBufferFill) + _PI_CL(piEnqueueMemImageRead, hip_piEnqueueMemImageRead) + _PI_CL(piEnqueueMemImageWrite, hip_piEnqueueMemImageWrite) + _PI_CL(piEnqueueMemImageCopy, hip_piEnqueueMemImageCopy) + _PI_CL(piEnqueueMemImageFill, hip_piEnqueueMemImageFill) + _PI_CL(piEnqueueMemBufferMap, hip_piEnqueueMemBufferMap) + _PI_CL(piEnqueueMemUnmap, hip_piEnqueueMemUnmap) // USM - _PI_CL(piextUSMHostAlloc, rocm_piextUSMHostAlloc) - _PI_CL(piextUSMDeviceAlloc, rocm_piextUSMDeviceAlloc) - _PI_CL(piextUSMSharedAlloc, rocm_piextUSMSharedAlloc) - _PI_CL(piextUSMFree, rocm_piextUSMFree) - _PI_CL(piextUSMEnqueueMemset, rocm_piextUSMEnqueueMemset) - _PI_CL(piextUSMEnqueueMemcpy, rocm_piextUSMEnqueueMemcpy) - _PI_CL(piextUSMEnqueuePrefetch, rocm_piextUSMEnqueuePrefetch) - _PI_CL(piextUSMEnqueueMemAdvise, rocm_piextUSMEnqueueMemAdvise) - _PI_CL(piextUSMGetMemAllocInfo, rocm_piextUSMGetMemAllocInfo) - - _PI_CL(piextKernelSetArgMemObj, rocm_piextKernelSetArgMemObj) - _PI_CL(piextKernelSetArgSampler, rocm_piextKernelSetArgSampler) - _PI_CL(piTearDown, rocm_piTearDown) + _PI_CL(piextUSMHostAlloc, hip_piextUSMHostAlloc) + _PI_CL(piextUSMDeviceAlloc, hip_piextUSMDeviceAlloc) + _PI_CL(piextUSMSharedAlloc, hip_piextUSMSharedAlloc) + _PI_CL(piextUSMFree, hip_piextUSMFree) + _PI_CL(piextUSMEnqueueMemset, hip_piextUSMEnqueueMemset) + _PI_CL(piextUSMEnqueueMemcpy, hip_piextUSMEnqueueMemcpy) + _PI_CL(piextUSMEnqueuePrefetch, hip_piextUSMEnqueuePrefetch) + _PI_CL(piextUSMEnqueueMemAdvise, hip_piextUSMEnqueueMemAdvise) + _PI_CL(piextUSMGetMemAllocInfo, hip_piextUSMGetMemAllocInfo) + + _PI_CL(piextKernelSetArgMemObj, hip_piextKernelSetArgMemObj) + _PI_CL(piextKernelSetArgSampler, hip_piextKernelSetArgSampler) + _PI_CL(piTearDown, hip_piTearDown) #undef _PI_CL diff --git a/sycl/plugins/rocm/pi_rocm.hpp b/sycl/plugins/hip/pi_hip.hpp similarity index 85% rename from sycl/plugins/rocm/pi_rocm.hpp rename to sycl/plugins/hip/pi_hip.hpp index 9888651809675..09048f2570bc3 100644 --- a/sycl/plugins/rocm/pi_rocm.hpp +++ b/sycl/plugins/hip/pi_hip.hpp @@ -1,4 +1,4 @@ -//===-- pi_rocm.hpp - ROCM Plugin -----------------------------------------===// +//===-- pi_hip.hpp - HIP Plugin -------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,17 +6,17 @@ // //===----------------------------------------------------------------------===// -/// \defgroup sycl_pi_rocm ROCM Plugin +/// \defgroup sycl_pi_hip HIP Plugin /// \ingroup sycl_pi -/// \file pi_rocm.hpp -/// Declarations for ROCM Plugin. It is the interface between the -/// device-agnostic SYCL runtime layer and underlying ROCM runtime. +/// \file pi_hip.hpp +/// Declarations for HIP Plugin. It is the interface between the +/// device-agnostic SYCL runtime layer and underlying HIP runtime. /// -/// \ingroup sycl_pi_rocm +/// \ingroup sycl_pi_hip -#ifndef PI_ROCM_HPP -#define PI_ROCM_HPP +#ifndef PI_HIP_HPP +#define PI_HIP_HPP #include "CL/sycl/detail/pi.h" #include @@ -35,23 +35,23 @@ extern "C" { /// \cond INGORE_BLOCK_IN_DOXYGEN -pi_result rocm_piContextRetain(pi_context); -pi_result rocm_piContextRelease(pi_context); -pi_result rocm_piDeviceRelease(pi_device); -pi_result rocm_piDeviceRetain(pi_device); -pi_result rocm_piProgramRetain(pi_program); -pi_result rocm_piProgramRelease(pi_program); -pi_result rocm_piQueueRelease(pi_queue); -pi_result rocm_piQueueRetain(pi_queue); -pi_result rocm_piMemRetain(pi_mem); -pi_result rocm_piMemRelease(pi_mem); -pi_result rocm_piKernelRetain(pi_kernel); -pi_result rocm_piKernelRelease(pi_kernel); +pi_result hip_piContextRetain(pi_context); +pi_result hip_piContextRelease(pi_context); +pi_result hip_piDeviceRelease(pi_device); +pi_result hip_piDeviceRetain(pi_device); +pi_result hip_piProgramRetain(pi_program); +pi_result hip_piProgramRelease(pi_program); +pi_result hip_piQueueRelease(pi_queue); +pi_result hip_piQueueRetain(pi_queue); +pi_result hip_piMemRetain(pi_mem); +pi_result hip_piMemRelease(pi_mem); +pi_result hip_piKernelRetain(pi_kernel); +pi_result hip_piKernelRelease(pi_kernel); /// \endcond } /// A PI platform stores all known PI devices, -/// in the ROCM plugin this is just a vector of +/// in the HIP plugin this is just a vector of /// available devices since initialization is done /// when devices are used. /// @@ -62,7 +62,7 @@ struct _pi_platform { /// PI device mapping to a hipDevice_t. /// Includes an observer pointer to the platform, /// and implements the reference counting semantics since -/// ROCM objects are not refcounted. +/// HIP objects are not refcounted. /// class _pi_device { using native_type = hipDevice_t; @@ -82,32 +82,32 @@ class _pi_device { pi_platform get_platform() const noexcept { return platform_; }; }; -/// PI context mapping to a ROCM context object. +/// PI context mapping to a HIP context object. /// -/// There is no direct mapping between a ROCM context and a PI context, +/// There is no direct mapping between a HIP context and a PI context, /// main differences described below: /// -/// ROCM context vs PI context +/// HIP context vs PI context /// -/// One of the main differences between the PI API and the ROCM driver API is +/// One of the main differences between the PI API and the HIP driver API is /// that the second modifies the state of the threads by assigning /// `hipCtx_t` objects to threads. `hipCtx_t` objects store data associated /// with a given device and control access to said device from the user side. /// PI API context are objects that are passed to functions, and not bound /// to threads. /// The _pi_context object doesn't implement this behavior, only holds the -/// ROCM context data. The RAII object \ref ScopedContext implements the active +/// HIP context data. The RAII object \ref ScopedContext implements the active /// context behavior. /// /// Primary vs User-defined context /// -/// ROCM has two different types of context, the Primary context, +/// HIP has two different types of context, the Primary context, /// which is usable by all threads on a given process for a given device, and /// the aforementioned custom contexts. -/// ROCM documentation, and performance analysis, indicates it is recommended +/// HIP documentation, and performance analysis, indicates it is recommended /// to use Primary context whenever possible. -/// Primary context is used as well by the ROCM Runtime API. -/// For PI applications to interop with ROCM Runtime API, they have to use +/// Primary context is used as well by the HIP Runtime API. +/// For PI applications to interop with HIP Runtime API, they have to use /// the primary context - and make that active in the thread. /// The `_pi_context` object can be constructed with a `kind` parameter /// that allows to construct a Primary or `user-defined` context, so that @@ -136,15 +136,15 @@ struct _pi_context { _pi_device *deviceId_; std::atomic_uint32_t refCount_; - hipEvent_t evBase_; // ROCM event used as base counter + hipEvent_t evBase_; // HIP event used as base counter _pi_context(kind k, hipCtx_t ctxt, _pi_device *devId) : kind_{k}, hipContext_{ctxt}, deviceId_{devId}, refCount_{1}, evBase_(nullptr) { - rocm_piDeviceRetain(deviceId_); + hip_piDeviceRetain(deviceId_); }; - ~_pi_context() { rocm_piDeviceRelease(deviceId_); } + ~_pi_context() { hip_piDeviceRelease(deviceId_); } void invoke_extended_deleters() { std::lock_guard guard(mutex_); @@ -176,8 +176,8 @@ struct _pi_context { std::vector extended_deleters_; }; -/// PI Mem mapping to ROCM memory allocations, both data and texture/surface. -/// \brief Represents non-SVM allocations on the ROCM backend. +/// PI Mem mapping to HIP memory allocations, both data and texture/surface. +/// \brief Represents non-SVM allocations on the HIP backend. /// Keeps tracks of all mapped regions used for Map/Unmap calls. /// Only one region can be active at the same time per allocation. struct _pi_mem { @@ -194,18 +194,18 @@ struct _pi_mem { /// A PI Memory object represents either plain memory allocations ("Buffers" /// in OpenCL) or typed allocations ("Images" in OpenCL). - /// In ROCM their API handlers are different. Whereas "Buffers" are allocated + /// In HIP their API handlers are different. Whereas "Buffers" are allocated /// as pointer-like structs, "Images" are stored in Textures or Surfaces /// This union allows implementation to use either from the same handler. union mem_ { - // Handler for plain, pointer-based ROCM allocations + // Handler for plain, pointer-based HIP allocations struct buffer_mem_ { using native_type = hipDeviceptr_t; // If this allocation is a sub-buffer (i.e., a view on an existing // allocation), this is the pointer to the parent handler structure pi_mem parent_; - // ROCM handler for the pointer + // HIP handler for the pointer native_type ptr_; /// Pointer associated with this device on the host @@ -220,7 +220,7 @@ struct _pi_mem { pi_map_flags mapFlags_; /** alloc_mode - * classic: Just a normal buffer allocated on the device via rocm malloc + * classic: Just a normal buffer allocated on the device via hip malloc * use_host_ptr: Use an address on the host for the device * copy_in: The data for the device comes from the host but the host pointer is not available later for re-use @@ -250,7 +250,7 @@ struct _pi_mem { /// Returns a pointer to data visible on the host that contains /// the data on the device associated with this allocation. - /// The offset is used to index into the ROCM allocation. + /// The offset is used to index into the HIP allocation. /// void *map_to_ptr(size_t offset, pi_map_flags flags) noexcept { assert(mapPtr_ == nullptr); @@ -309,9 +309,9 @@ struct _pi_mem { mem_.buffer_mem_.mapFlags_ = PI_MAP_WRITE; mem_.buffer_mem_.allocMode_ = mode; if (is_sub_buffer()) { - rocm_piMemRetain(mem_.buffer_mem_.parent_); + hip_piMemRetain(mem_.buffer_mem_.parent_); } else { - rocm_piContextRetain(context_); + hip_piContextRetain(context_); } }; @@ -322,17 +322,17 @@ struct _pi_mem { mem_.surface_mem_.array_ = array; mem_.surface_mem_.imageType_ = image_type; mem_.surface_mem_.surfObj_ = surf; - rocm_piContextRetain(context_); + hip_piContextRetain(context_); } ~_pi_mem() { if (mem_type_ == mem_type::buffer) { if (is_sub_buffer()) { - rocm_piMemRelease(mem_.buffer_mem_.parent_); + hip_piMemRelease(mem_.buffer_mem_.parent_); return; } } - rocm_piContextRelease(context_); + hip_piContextRelease(context_); } // TODO: Move as many shared funcs up as possible @@ -369,13 +369,13 @@ struct _pi_queue { pi_queue_properties properties) : stream_{stream}, context_{context}, device_{device}, properties_{properties}, refCount_{1}, eventCount_{0} { - rocm_piContextRetain(context_); - rocm_piDeviceRetain(device_); + hip_piContextRetain(context_); + hip_piDeviceRetain(device_); } ~_pi_queue() { - rocm_piContextRelease(context_); - rocm_piDeviceRelease(device_); + hip_piContextRelease(context_); + hip_piDeviceRelease(device_); } native_type get() const noexcept { return stream_; }; @@ -451,7 +451,7 @@ class _pi_event { // pi_uint64 get_end_time() const; - // construct a native ROCM. This maps closely to the underlying ROCM event. + // construct a native HIP. This maps closely to the underlying HIP event. static pi_event make_native(pi_command_type type, pi_queue queue) { return new _pi_event(type, queue->get_context(), queue); } @@ -462,7 +462,7 @@ class _pi_event { private: // This constructor is private to force programmers to use the make_native / - // make_user static members in order to create a pi_event for ROCM. + // make_user static members in order to create a pi_event for HIP. _pi_event(pi_command_type type, pi_context context, pi_queue queue); pi_command_type commandType_; // The type of command associated with event. @@ -472,7 +472,7 @@ class _pi_event { bool isCompleted_; // Signifies whether the operations have completed // - bool isRecorded_; // Signifies wether a native ROCM event has been recorded + bool isRecorded_; // Signifies wether a native HIP event has been recorded // yet. bool isStarted_; // Signifies wether the operation associated with the // PI event has started or not @@ -480,12 +480,12 @@ class _pi_event { pi_uint32 eventId_; // Queue identifier of the event. - native_type evEnd_; // ROCM event handle. If this _pi_event represents a user + native_type evEnd_; // HIP event handle. If this _pi_event represents a user // event, this will be nullptr. - native_type evStart_; // ROCM event handle associated with the start + native_type evStart_; // HIP event handle associated with the start - native_type evQueued_; // ROCM event handle associated with the time + native_type evQueued_; // HIP event handle associated with the time // the command was enqueued pi_queue queue_; // pi_queue associated with the event. If this is a user @@ -496,7 +496,7 @@ class _pi_event { // with the queue_ member. }; -/// Implementation of PI Program on ROCM Module object +/// Implementation of PI Program on HIP Module object /// struct _pi_program { using native_type = hipModule_t; @@ -530,20 +530,20 @@ struct _pi_program { pi_uint32 get_reference_count() const noexcept { return refCount_; } }; -/// Implementation of a PI Kernel for ROCM +/// Implementation of a PI Kernel for HIP /// /// PI Kernels are used to set kernel arguments, /// creating a state on the Kernel object for a given /// invocation. This is not the case of HIPFunction objects, /// which are simply passed together with the arguments on the invocation. -/// The PI Kernel implementation for ROCM stores the list of arguments, +/// The PI Kernel implementation for HIP stores the list of arguments, /// argument sizes and offsets to emulate the interface of PI Kernel, /// saving the arguments for the later dispatch. /// Note that in PI API, the Local memory is specified as a size per -/// individual argument, but in ROCM only the total usage of shared +/// individual argument, but in HIP only the total usage of shared /// memory is required since it is not passed as a parameter. /// A compiler pass converts the PI API local memory model into the -/// ROCM shared model. This object simply calculates the total of +/// HIP shared model. This object simply calculates the total of /// shared memory, and the initial offsets of each parameter. /// struct _pi_kernel { @@ -559,7 +559,7 @@ struct _pi_kernel { /// Structure that holds the arguments to the kernel. /// Note earch argument size is known, since it comes /// from the kernel signature. - /// This is not something can be queried from the ROCM API + /// This is not something can be queried from the HIP API /// so there is a hard-coded size (\ref MAX_PARAM_BYTES) /// and a storage. /// @@ -630,8 +630,8 @@ struct _pi_kernel { const char *name, pi_program program, pi_context ctxt) : function_{func}, functionWithOffsetParam_{funcWithOffsetParam}, name_{name}, context_{ctxt}, program_{program}, refCount_{1} { - rocm_piProgramRetain(program_); - rocm_piContextRetain(context_); + hip_piProgramRetain(program_); + hip_piContextRetain(context_); } _pi_kernel(hipFunction_t func, const char *name, pi_program program, @@ -639,8 +639,8 @@ struct _pi_kernel { : _pi_kernel{func, nullptr, name, program, ctxt} {} ~_pi_kernel() { - rocm_piProgramRelease(program_); - rocm_piContextRelease(context_); + hip_piProgramRelease(program_); + hip_piContextRelease(context_); } pi_program get_program() const noexcept { return program_; } @@ -668,7 +668,7 @@ struct _pi_kernel { /// Returns the number of arguments, excluding the implicit global offset. /// Note this only returns the current known number of arguments, not the /// real one required by the kernel, since this cannot be queried from - /// the ROCM Driver API + /// the HIP Driver API pi_uint32 get_num_args() const noexcept { return args_.indices_.size() - 1; } void set_kernel_arg(int index, size_t size, const void *arg) { @@ -692,7 +692,7 @@ struct _pi_kernel { void clear_local_size() { args_.clear_local_size(); } }; -/// Implementation of samplers for ROCM +/// Implementation of samplers for HIP /// /// Sampler property layout: /// | 31 30 ... 6 5 | 4 3 2 | 1 | 0 | @@ -716,4 +716,4 @@ struct _pi_sampler { // Helper types and functions // -#endif // PI_ROCM_HPP +#endif // PI_HIP_HPP diff --git a/sycl/plugins/rocm/CMakeLists.txt b/sycl/plugins/rocm/CMakeLists.txt deleted file mode 100644 index bee2599fc237a..0000000000000 --- a/sycl/plugins/rocm/CMakeLists.txt +++ /dev/null @@ -1,83 +0,0 @@ -# Set default PI ROCm platform to AMD -set(SYCL_BUILD_PI_ROCM_PLATFORM "AMD" CACHE STRING "PI ROCm platform, AMD or NVIDIA") - -message(STATUS "Including the PI API ROCM backend for ${SYCL_BUILD_PI_ROCM_PLATFORM}.") - -# Set default ROCm include dirs -set(SYCL_BUILD_PI_ROCM_INCLUDE_DIR "/opt/rocm/hip/include" CACHE STRING "HIP include dir") -set(SYCL_BUILD_PI_ROCM_HSA_INCLUDE_DIR "/opt/rocm/hsa/include" CACHE STRING "HSA include dir") -set(HIP_HEADERS "${SYCL_BUILD_PI_ROCM_INCLUDE_DIR};${SYCL_BUILD_PI_ROCM_HSA_INCLUDE_DIR}") - -# Create pi_rocm library -add_library(pi_rocm SHARED - "${sycl_inc_dir}/CL/sycl/detail/pi.h" - "${sycl_inc_dir}/CL/sycl/detail/pi.hpp" - "pi_rocm.hpp" - "pi_rocm.cpp" -) -add_dependencies(sycl-toolchain pi_rocm) -set_target_properties(pi_rocm PROPERTIES LINKER_LANGUAGE CXX) -target_link_libraries(pi_rocm PUBLIC OpenCL-Headers) - -# Setup include directories -target_include_directories(pi_rocm - PRIVATE - ${sycl_inc_dir} - ${sycl_plugin_dir} -) - -if("${SYCL_BUILD_PI_ROCM_PLATFORM}" STREQUAL "AMD") - # Import HIP runtime library - set(SYCL_BUILD_PI_ROCM_AMD_LIBRARY "/opt/rocm/hip/lib/libamdhip64.so" CACHE STRING "HIP AMD runtime library") - add_library(rocmdrv SHARED IMPORTED GLOBAL) - - set_target_properties( - rocmdrv PROPERTIES - IMPORTED_LOCATION ${SYCL_BUILD_PI_ROCM_AMD_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - ) - target_link_libraries(pi_rocm PUBLIC rocmdrv) - - # Set HIP define to select AMD platform - target_compile_definitions(pi_rocm PRIVATE __HIP_PLATFORM_AMD__) - - # Make sure lld is built as part of the toolchain - add_dependencies(sycl-toolchain lld) - elseif("${SYCL_BUILD_PI_ROCM_PLATFORM}" STREQUAL "NVIDIA") - # Import CUDA libraries - find_package(CUDA REQUIRED) - list(APPEND HIP_HEADERS ${CUDA_INCLUDE_DIRS}) - - # cudadrv may be defined by the CUDA plugin - if(NOT TARGET cudadrv) - add_library(cudadrv SHARED IMPORTED GLOBAL) - set_target_properties( - cudadrv PROPERTIES - IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - ) - endif() - - add_library(cudart SHARED IMPORTED GLOBAL) - set_target_properties( - cudart PROPERTIES - IMPORTED_LOCATION ${CUDA_CUDART_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" - ) - target_link_libraries(pi_rocm PUBLIC cudadrv cudart) - - # Set HIP define to select NVIDIA platform - target_compile_definitions(pi_rocm PRIVATE __HIP_PLATFORM_NVIDIA__) -else() - message(FATAL_ERROR "Unspecified PI ROCM platform please set SYCL_BUILD_PI_ROCM_PLATFORM to 'AMD' or 'NVIDIA'") -endif() - -add_common_options(pi_rocm) - -install(TARGETS pi_rocm - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT pi_rocm - RUNTIME DESTINATION "bin" COMPONENT pi_rocm -) diff --git a/sycl/source/detail/config.cpp b/sycl/source/detail/config.cpp index f7bab10f513cb..1b6d58d5ebce3 100644 --- a/sycl/source/detail/config.cpp +++ b/sycl/source/detail/config.cpp @@ -175,7 +175,7 @@ const std::array, 6> &getSyclBeMap() { {"opencl", backend::opencl}, {"level_zero", backend::level_zero}, {"cuda", backend::cuda}, - {"rocm", backend::rocm}, + {"hip", backend::hip}, {"*", backend::all}}}; return SyclBeMap; } diff --git a/sycl/source/detail/config.def b/sycl/source/detail/config.def index 995ba885b46b2..9a89de13ecd88 100644 --- a/sycl/source/detail/config.def +++ b/sycl/source/detail/config.def @@ -37,5 +37,5 @@ CONFIG(INTEL_ENABLE_OFFLOAD_ANNOTATIONS, 1, __SYCL_INTEL_ENABLE_OFFLOAD_ANNOTATI CONFIG(SYCL_OVERRIDE_PI_OPENCL, 1024, __SYCL_OVERRIDE_PI_OPENCL) CONFIG(SYCL_OVERRIDE_PI_LEVEL_ZERO, 1024, __SYCL_OVERRIDE_PI_LEVEL_ZERO) CONFIG(SYCL_OVERRIDE_PI_CUDA, 1024, __SYCL_OVERRIDE_PI_CUDA) -CONFIG(SYCL_OVERRIDE_PI_ROCM, 1024, __SYCL_OVERRIDE_PI_ROCM) +CONFIG(SYCL_OVERRIDE_PI_HIP, 1024, __SYCL_OVERRIDE_PI_HIP) CONFIG(SYCL_ENABLE_DEFAULT_CONTEXTS, 1, __SYCL_ENABLE_DEFAULT_CONTEXTS) diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index 8c2b08778c397..b1a22a9b9ab74 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -139,7 +139,7 @@ template <> class SYCLConfig { {"PI_LEVEL_ZERO", backend::level_zero}, {"PI_LEVEL0", backend::level_zero}, // for backward compatibility {"PI_CUDA", backend::cuda}, - {"PI_ROCM", backend::rocm}}}; + {"PI_HIP", backend::hip}}}; if (ValStr) { auto It = std::find_if( std::begin(SyclBeMap), std::end(SyclBeMap), @@ -148,7 +148,7 @@ template <> class SYCLConfig { }); if (It == SyclBeMap.end()) pi::die("Invalid backend. " - "Valid values are PI_OPENCL/PI_LEVEL_ZERO/PI_CUDA/PI_ROCM"); + "Valid values are PI_OPENCL/PI_LEVEL_ZERO/PI_CUDA/PI_HIP"); static backend Backend = It->second; BackendPtr = &Backend; } diff --git a/sycl/source/detail/device_filter.cpp b/sycl/source/detail/device_filter.cpp index 6ff840c578290..ba6b15ba8cb1b 100644 --- a/sycl/source/detail/device_filter.cpp +++ b/sycl/source/detail/device_filter.cpp @@ -76,7 +76,7 @@ device_filter::device_filter(const std::string &FilterString) { std::string Message = std::string("Invalid device filter: ") + FilterString + "\nPossible backend values are " - "{host,opencl,level_zero,cuda,rocm,*}.\n" + "{host,opencl,level_zero,cuda,hip,*}.\n" "Possible device types are {host,cpu,gpu,acc,*}.\n" "Device number should be an non-negative integer.\n"; throw cl::sycl::invalid_parameter_error(Message, PI_INVALID_VALUE); diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 4c5a323c35499..d7745f148c89c 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -289,21 +289,21 @@ std::vector> findPlugins() { const char *CUDAPluginName = SYCLConfig::get() ? SYCLConfig::get() : __SYCL_CUDA_PLUGIN_NAME; - const char *ROCMPluginName = SYCLConfig::get() - ? SYCLConfig::get() - : __SYCL_ROCM_PLUGIN_NAME; + const char *HIPPluginName = SYCLConfig::get() + ? SYCLConfig::get() + : __SYCL_HIP_PLUGIN_NAME; device_filter_list *FilterList = SYCLConfig::get(); if (!FilterList) { PluginNames.emplace_back(OpenCLPluginName, backend::opencl); PluginNames.emplace_back(L0PluginName, backend::level_zero); PluginNames.emplace_back(CUDAPluginName, backend::cuda); - PluginNames.emplace_back(ROCMPluginName, backend::rocm); + PluginNames.emplace_back(HIPPluginName, backend::hip); } else { std::vector Filters = FilterList->get(); bool OpenCLFound = false; bool LevelZeroFound = false; bool CudaFound = false; - bool RocmFound = false; + bool HIPFound = false; for (const device_filter &Filter : Filters) { backend Backend = Filter.Backend; if (!OpenCLFound && @@ -320,9 +320,9 @@ std::vector> findPlugins() { PluginNames.emplace_back(CUDAPluginName, backend::cuda); CudaFound = true; } - if (!RocmFound && (Backend == backend::rocm || Backend == backend::all)) { - PluginNames.emplace_back(ROCMPluginName, backend::rocm); - RocmFound = true; + if (!HIPFound && (Backend == backend::hip || Backend == backend::all)) { + PluginNames.emplace_back(HIPPluginName, backend::hip); + HIPFound = true; } } } @@ -427,11 +427,11 @@ static void initializePlugins(std::vector *Plugins) { // Use the CUDA plugin as the GlobalPlugin GlobalPlugin = std::make_shared(PluginInformation, backend::cuda, Library); - } else if (InteropBE == backend::rocm && - PluginNames[I].first.find("rocm") != std::string::npos) { - // Use the ROCM plugin as the GlobalPlugin + } else if (InteropBE == backend::hip && + PluginNames[I].first.find("hip") != std::string::npos) { + // Use the HIP plugin as the GlobalPlugin GlobalPlugin = - std::make_shared(PluginInformation, backend::rocm, Library); + std::make_shared(PluginInformation, backend::hip, Library); } else if (InteropBE == backend::level_zero && PluginNames[I].first.find("level_zero") != std::string::npos) { // Use the LEVEL_ZERO plugin as the GlobalPlugin diff --git a/sycl/test/CMakeLists.txt b/sycl/test/CMakeLists.txt index 0518112672fae..5f345343cef10 100644 --- a/sycl/test/CMakeLists.txt +++ b/sycl/test/CMakeLists.txt @@ -77,31 +77,31 @@ if(SYCL_BUILD_PI_CUDA) add_dependencies(check-sycl check-sycl-cuda) endif() -if(SYCL_BUILD_PI_ROCM) - add_custom_target(check-sycl-rocm) - if("${SYCL_BUILD_PI_ROCM_PLATFORM}" STREQUAL "NVIDIA") - add_lit_testsuite(check-sycl-rocm-ptx "Running device-agnostic SYCL regression tests for ROCm NVidia PTX" +if(SYCL_BUILD_PI_HIP) + add_custom_target(check-sycl-hip) + if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "NVIDIA") + add_lit_testsuite(check-sycl-hip-ptx "Running device-agnostic SYCL regression tests for HIP NVidia PTX" ${CMAKE_CURRENT_BINARY_DIR} ARGS ${RT_TEST_ARGS} - PARAMS "SYCL_TRIPLE=nvptx64-nvidia-cuda;SYCL_PLUGIN=rocm" + PARAMS "SYCL_TRIPLE=nvptx64-nvidia-cuda;SYCL_PLUGIN=hip" DEPENDS ${SYCL_TEST_DEPS} EXCLUDE_FROM_CHECK_ALL ) - add_dependencies(check-sycl-rocm check-sycl-rocm-ptx) - elseif("${SYCL_BUILD_PI_ROCM_PLATFORM}" STREQUAL "AMD") - add_lit_testsuite(check-sycl-rocm-gcn "Running device-agnostic SYCL regression tests for ROCm AMDGCN" + add_dependencies(check-sycl-hip check-sycl-hip-ptx) + elseif("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD") + add_lit_testsuite(check-sycl-hip-gcn "Running device-agnostic SYCL regression tests for HIP AMDGCN" ${CMAKE_CURRENT_BINARY_DIR} ARGS ${RT_TEST_ARGS} - PARAMS "SYCL_TRIPLE=amdgcn-amd-amdhsa;SYCL_PLUGIN=rocm" + PARAMS "SYCL_TRIPLE=amdgcn-amd-amdhsa;SYCL_PLUGIN=hip" DEPENDS ${SYCL_TEST_DEPS} EXCLUDE_FROM_CHECK_ALL ) - add_dependencies(check-sycl-rocm check-sycl-rocm-gcn) + add_dependencies(check-sycl-hip check-sycl-hip-gcn) else() - message(FATAL_ERROR "SYCL_BUILD_PI_ROCM_PLATFORM must be set to either 'AMD' or 'NVIDIA' (set to: '${SYCL_BUILD_PI_ROCM_PLATFORM}')") + message(FATAL_ERROR "SYCL_BUILD_PI_HIP_PLATFORM must be set to either 'AMD' or 'NVIDIA' (set to: '${SYCL_BUILD_PI_HIP_PLATFORM}')") endif() - add_dependencies(check-sycl check-sycl-rocm) + add_dependencies(check-sycl check-sycl-hip) endif() diff --git a/sycl/test/basic_tests/built-ins.cpp b/sycl/test/basic_tests/built-ins.cpp index 8f221dfc5419a..05f0e94e4042c 100644 --- a/sycl/test/basic_tests/built-ins.cpp +++ b/sycl/test/basic_tests/built-ins.cpp @@ -5,7 +5,7 @@ // UNSUPPORTED: cuda // // Hits an assertion with AMD: -// XFAIL: rocm_amd +// XFAIL: hip_amd #include diff --git a/sycl/test/basic_tests/exceptions-SYCL-2020.cpp b/sycl/test/basic_tests/exceptions-SYCL-2020.cpp index 91d8bdbb9ff4b..192e9b4fcc1f5 100644 --- a/sycl/test/basic_tests/exceptions-SYCL-2020.cpp +++ b/sycl/test/basic_tests/exceptions-SYCL-2020.cpp @@ -94,9 +94,9 @@ int main() { sycl::backend_traits::errc someESIMDErrCode{EC}; sycl::errc_for anotherESIMDErrCode{EC}; assert(someESIMDErrCode == anotherESIMDErrCode); - sycl::backend_traits::errc someROCMErrCode{EC}; - sycl::errc_for anotherROCMErrCode{EC}; - assert(someROCMErrCode == anotherROCMErrCode); + sycl::backend_traits::errc someHIPErrCode{EC}; + sycl::errc_for anotherHIPErrCode{EC}; + assert(someHIPErrCode == anotherHIPErrCode); std::cout << "OK" << std::endl; return 0; diff --git a/sycl/test/basic_tests/plugin_overrides_negative.cpp b/sycl/test/basic_tests/plugin_overrides_negative.cpp index cecf969fa07f8..04dd2289c430e 100644 --- a/sycl/test/basic_tests/plugin_overrides_negative.cpp +++ b/sycl/test/basic_tests/plugin_overrides_negative.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsycl %s -o %t.out -// RUN: env SYCL_OVERRIDE_PI_OPENCL=opencl_test env SYCL_OVERRIDE_PI_LEVEL_ZERO=l0_test env SYCL_OVERRIDE_PI_CUDA=cuda_test env SYCL_OVERRIDE_PI_ROCM=rocm_test env SYCL_PI_TRACE=-1 %t.out > %t.log 2>&1 +// RUN: env SYCL_OVERRIDE_PI_OPENCL=opencl_test env SYCL_OVERRIDE_PI_LEVEL_ZERO=l0_test env SYCL_OVERRIDE_PI_CUDA=cuda_test env SYCL_OVERRIDE_PI_HIP=hip_test env SYCL_PI_TRACE=-1 %t.out > %t.log 2>&1 // RUN: FileCheck %s --input-file %t.log #include @@ -13,4 +13,4 @@ int main() { // CHECK: SYCL_PI_TRACE[all]: Check if plugin is present. Failed to load plugin: opencl_test // CHECK: SYCL_PI_TRACE[all]: Check if plugin is present. Failed to load plugin: l0_test // CHECK: SYCL_PI_TRACE[all]: Check if plugin is present. Failed to load plugin: cuda_test -// CHECK: SYCL_PI_TRACE[all]: Check if plugin is present. Failed to load plugin: rocm_test +// CHECK: SYCL_PI_TRACE[all]: Check if plugin is present. Failed to load plugin: hip_test diff --git a/sycl/test/basic_tests/plugin_overrides_positive.cpp b/sycl/test/basic_tests/plugin_overrides_positive.cpp index 63f44ecdf056e..fee1874431bbc 100644 --- a/sycl/test/basic_tests/plugin_overrides_positive.cpp +++ b/sycl/test/basic_tests/plugin_overrides_positive.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl -DFAKE_PLUGIN -shared %s -o %t_fake_plugin.so // RUN: %clangxx -fsycl %s -o %t.out -// RUN: env SYCL_OVERRIDE_PI_OPENCL=%t_fake_plugin.so env SYCL_OVERRIDE_PI_LEVEL_ZERO=%t_fake_plugin.so env SYCL_OVERRIDE_PI_CUDA=%t_fake_plugin.so env SYCL_OVERRIDE_PI_ROCM=%t_fake_plugin.so env SYCL_PI_TRACE=-1 %t.out > %t.log 2>&1 +// RUN: env SYCL_OVERRIDE_PI_OPENCL=%t_fake_plugin.so env SYCL_OVERRIDE_PI_LEVEL_ZERO=%t_fake_plugin.so env SYCL_OVERRIDE_PI_CUDA=%t_fake_plugin.so env SYCL_OVERRIDE_PI_HIP=%t_fake_plugin.so env SYCL_PI_TRACE=-1 %t.out > %t.log 2>&1 // RUN: FileCheck %s --input-file %t.log // REQUIRES: linux diff --git a/sycl/test/esimd/odr.cpp b/sycl/test/esimd/odr.cpp index 464564f6d09ce..315a53d3dac04 100644 --- a/sycl/test/esimd/odr.cpp +++ b/sycl/test/esimd/odr.cpp @@ -9,10 +9,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %t1.o %t2.o -o %t.exe // // CUDA does not support intrinsics generated by the ESIMD compilation path: -// UNSUPPORTED: cuda -// -// Linking issues with AMD: -// XFAIL: rocm_amd +// UNSUPPORTED: cuda || hip_amd #include #include diff --git a/sycl/test/extensions/group-algorithm.cpp b/sycl/test/extensions/group-algorithm.cpp index 7698da76f565c..53359e19d6c81 100644 --- a/sycl/test/extensions/group-algorithm.cpp +++ b/sycl/test/extensions/group-algorithm.cpp @@ -4,7 +4,7 @@ // // Missing __spirv_GroupIAdd, __spirv_GroupAll, __spirv_GroupBroadcast, // __spirv_GroupAny, __spirv_GroupSMin on AMD: -// XFAIL: rocm_amd +// XFAIL: hip_amd // TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3. // That requires either adding a switch to clang (-spirv-max-version=1.3) or diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index 1b53db20f3ef1..4b101919a1411 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -104,7 +104,7 @@ config.available_features.add('cuda') if triple == 'amdgcn-amd-amdhsa': - config.available_features.add('rocm_amd') + config.available_features.add('hip_amd') # For AMD the specific GPU has to be specified with --offload-arch if not re.match('.*--offload-arch.*', config.sycl_clang_extra_flags): raise Exception("Error: missing --offload-arch flag when trying to " \ diff --git a/sycl/tools/CMakeLists.txt b/sycl/tools/CMakeLists.txt index 737605bf9c4c4..a4f5674826721 100644 --- a/sycl/tools/CMakeLists.txt +++ b/sycl/tools/CMakeLists.txt @@ -24,24 +24,24 @@ target_link_libraries(get_device_count_by_type LevelZeroLoader::Headers OpenCL-ICD ${LEVEL_ZERO_LIBRARY} - # The CUDA and ROCm for NVIDA plugins need cudadrv - $<$,$,$>>:cudadrv> - # The ROCm for AMD plugin needs rocmdrv - $<$,$>:rocmdrv> - # The ROCm for NVIDIA plugin also needs cudart - $<$,$>:cudart> + # The CUDA and HIP for NVIDA plugins need cudadrv + $<$,$,$>>:cudadrv> + # The HIP for AMD plugin needs rocmdrv + $<$,$>:rocmdrv> + # The HIP for NVIDIA plugin also needs cudart + $<$,$>:cudart> ) target_compile_definitions(get_device_count_by_type PRIVATE $<$:USE_PI_CUDA> - $<$:USE_PI_ROCM> - # For ROCm set the HIP define depending on the platform - $<$,$>:__HIP_PLATFORM_AMD__> - $<$,$>:__HIP_PLATFORM_NVIDIA__> + $<$:USE_PI_HIP> + # For HIP set defines depending on the platform + $<$,$>:__HIP_PLATFORM_AMD__> + $<$,$>:__HIP_PLATFORM_NVIDIA__> ) -if(SYCL_BUILD_PI_ROCM) +if(SYCL_BUILD_PI_HIP) target_include_directories(get_device_count_by_type PRIVATE - ${SYCL_BUILD_PI_ROCM_INCLUDE_DIR}) + ${SYCL_BUILD_PI_HIP_INCLUDE_DIR}) endif() diff --git a/sycl/tools/get_device_count_by_type.cpp b/sycl/tools/get_device_count_by_type.cpp index b8d1dbe030869..c22fd90e31259 100644 --- a/sycl/tools/get_device_count_by_type.cpp +++ b/sycl/tools/get_device_count_by_type.cpp @@ -20,9 +20,9 @@ #include #endif // USE_PI_CUDA -#ifdef USE_PI_ROCM +#ifdef USE_PI_HIP #include -#endif // USE_PI_ROCM +#endif // USE_PI_HIP #include #include @@ -36,7 +36,7 @@ static const std::string help = " Help\n" " Example: ./get_device_count_by_type cpu opencl\n" " Supported device types: cpu/gpu/accelerator/default/all\n" - " Supported backends: PI_CUDA/PI_ROCM/PI_OPENCL/PI_LEVEL_ZERO \n" + " Supported backends: PI_CUDA/PI_HIP/PI_OPENCL/PI_LEVEL_ZERO \n" " Output format: :"; // Return the string with all characters translated to lower case. @@ -228,10 +228,10 @@ static bool queryCUDA(cl_device_type deviceType, cl_uint &deviceCount, #endif } -static bool queryROCm(cl_device_type deviceType, cl_uint &deviceCount, - std::string &msg) { +static bool queryHIP(cl_device_type deviceType, cl_uint &deviceCount, + std::string &msg) { deviceCount = 0u; -#ifdef USE_PI_ROCM +#ifdef USE_PI_HIP switch (deviceType) { case CL_DEVICE_TYPE_DEFAULT: // Fall through. case CL_DEVICE_TYPE_ALL: // Fall through. @@ -239,18 +239,18 @@ static bool queryROCm(cl_device_type deviceType, cl_uint &deviceCount, int count = 0; hipError_t err = hipGetDeviceCount(&count); if (err != hipSuccess || count < 0) { - msg = "ERROR: ROCm error querying device count"; + msg = "ERROR: HIP error querying device count"; return false; } if (count < 1) { - msg = "ERROR: ROCm no device found"; + msg = "ERROR: HIP no device found"; return false; } deviceCount = static_cast(count); #if defined(__HIP_PLATFORM_AMD__) - msg = "rocm-amd "; + msg = "hip-amd "; #elif defined(__HIP_PLATFORM_NVIDIA__) - msg = "rocm-nvidia "; + msg = "hip-nvidia "; #else #error("Must define one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); #endif @@ -258,13 +258,13 @@ static bool queryROCm(cl_device_type deviceType, cl_uint &deviceCount, return true; } break; default: - msg = "WARNING: ROCm unsupported device type "; + msg = "WARNING: HIP unsupported device type "; msg += deviceTypeToString(deviceType); return true; } #else (void)deviceType; - msg = "ERROR: ROCm not supported"; + msg = "ERROR: HIP not supported"; deviceCount = 0u; return false; @@ -311,8 +311,8 @@ int main(int argc, char *argv[]) { querySuccess = queryLevelZero(deviceType, deviceCount, msg); } else if (backend == "cuda" || backend == "pi_cuda") { querySuccess = queryCUDA(deviceType, deviceCount, msg); - } else if (backend == "rocm" || backend == "pi_rocm") { - querySuccess = queryROCm(deviceType, deviceCount, msg); + } else if (backend == "hip" || backend == "pi_hip") { + querySuccess = queryHIP(deviceType, deviceCount, msg); } else { msg = "ERROR: Unknown backend " + backend + "\n" + help + "\n"; } diff --git a/sycl/unittests/SYCL2020/KernelBundle.cpp b/sycl/unittests/SYCL2020/KernelBundle.cpp index 175dbb928e1e7..b30b63f0040cf 100644 --- a/sycl/unittests/SYCL2020/KernelBundle.cpp +++ b/sycl/unittests/SYCL2020/KernelBundle.cpp @@ -71,8 +71,8 @@ TEST(KernelBundle, GetKernelBundleFromKernel) { return; } - if (Plt.get_backend() == sycl::backend::rocm) { - std::cout << "Test is not supported on ROCm platform, skipping\n"; + if (Plt.get_backend() == sycl::backend::hip) { + std::cout << "Test is not supported on HIP platform, skipping\n"; return; } @@ -109,8 +109,8 @@ TEST(KernelBundle, KernelBundleAndItsDevImageStateConsistency) { return; } - if (Plt.get_backend() == sycl::backend::rocm) { - std::cout << "Test is not supported on ROCm platform, skipping\n"; + if (Plt.get_backend() == sycl::backend::hip) { + std::cout << "Test is not supported on HIP platform, skipping\n"; return; } diff --git a/sycl/unittests/SYCL2020/SpecConstDefaultValues.cpp b/sycl/unittests/SYCL2020/SpecConstDefaultValues.cpp index d755e24433454..8bdf85b162c9d 100644 --- a/sycl/unittests/SYCL2020/SpecConstDefaultValues.cpp +++ b/sycl/unittests/SYCL2020/SpecConstDefaultValues.cpp @@ -85,8 +85,8 @@ TEST(SpecConstDefaultValues, DefaultValuesAreSet) { return; } - if (Plt.get_backend() == sycl::backend::rocm) { - std::cerr << "Test is not supported on ROCm platform, skipping\n"; + if (Plt.get_backend() == sycl::backend::hip) { + std::cerr << "Test is not supported on HIP platform, skipping\n"; return; } @@ -130,8 +130,8 @@ TEST(SpecConstDefaultValues, DefaultValuesAreOverriden) { return; } - if (Plt.get_backend() == sycl::backend::rocm) { - std::cerr << "Test is not supported on ROCm platform, skipping\n"; + if (Plt.get_backend() == sycl::backend::hip) { + std::cerr << "Test is not supported on HIP platform, skipping\n"; return; } diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index 714c0f20fec7e..637defd908088 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -167,7 +167,7 @@ TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { sycl::detail::AllowListParsedT ExpectedValue{ {{"BackendName", "host"}}, {{"BackendName", "opencl"}}, {{"BackendName", "level_zero"}}, {{"BackendName", "cuda"}}, - {{"BackendName", "rocm"}}, {{"BackendName", "*"}}}; + {{"BackendName", "hip"}}, {{"BackendName", "*"}}}; EXPECT_EQ(ExpectedValue, ActualValue); } diff --git a/sycl/unittests/misc/KernelBuildOptions.cpp b/sycl/unittests/misc/KernelBuildOptions.cpp index 91d84e30c39de..90f3c3235aab6 100644 --- a/sycl/unittests/misc/KernelBuildOptions.cpp +++ b/sycl/unittests/misc/KernelBuildOptions.cpp @@ -223,8 +223,8 @@ TEST(KernelBuildOptions, KernelBundleBasic) { return; } - if (Plt.get_backend() == sycl::backend::rocm) { - std::cerr << "Test is not supported on ROCm platform, skipping\n"; + if (Plt.get_backend() == sycl::backend::hip) { + std::cerr << "Test is not supported on HIP platform, skipping\n"; return; } @@ -263,8 +263,8 @@ TEST(KernelBuildOptions, Program) { return; } - if (Plt.get_backend() == sycl::backend::rocm) { - std::cerr << "Test is not supported on ROCm platform, skipping\n"; + if (Plt.get_backend() == sycl::backend::hip) { + std::cerr << "Test is not supported on HIP platform, skipping\n"; return; } diff --git a/sycl/unittests/pi/BackendString.hpp b/sycl/unittests/pi/BackendString.hpp index d573d4f48a3d8..4be98b9790fac 100644 --- a/sycl/unittests/pi/BackendString.hpp +++ b/sycl/unittests/pi/BackendString.hpp @@ -12,7 +12,7 @@ inline const char *GetBackendString(cl::sycl::backend backend) { #define PI_BACKEND_STR(backend_name) \ case cl::sycl::backend::backend_name: \ return #backend_name - PI_BACKEND_STR(rocm); + PI_BACKEND_STR(hip); PI_BACKEND_STR(cuda); PI_BACKEND_STR(host); PI_BACKEND_STR(opencl); diff --git a/sycl/unittests/pi/EnqueueMemTest.cpp b/sycl/unittests/pi/EnqueueMemTest.cpp index 15a286059b22d..edd1a85070ac3 100644 --- a/sycl/unittests/pi/EnqueueMemTest.cpp +++ b/sycl/unittests/pi/EnqueueMemTest.cpp @@ -74,8 +74,8 @@ class EnqueueMemTest : public testing::TestWithParam { detail::plugin plugin = GetParam(); - if (plugin.getBackend() == sycl::backend::rocm && sizeof(T) > 4) { - std::cerr << "ROCm plugin doesn't support patterns larger than 4 bytes, " + if (plugin.getBackend() == sycl::backend::hip && sizeof(T) > 4) { + std::cerr << "HIP plugin doesn't support patterns larger than 4 bytes, " "skipping\n"; GTEST_SKIP(); } diff --git a/sycl/unittests/program_manager/EliminatedArgMask.cpp b/sycl/unittests/program_manager/EliminatedArgMask.cpp index d64e8c7c0b476..1b4e1c9c3764a 100644 --- a/sycl/unittests/program_manager/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/EliminatedArgMask.cpp @@ -197,8 +197,8 @@ TEST(EliminatedArgMask, KernelBundleWith2Kernels) { } else if (Plt.get_backend() == sycl::backend::cuda) { std::cerr << "Test is not supported on CUDA platform, skipping\n"; return; - } else if (Plt.get_backend() == sycl::backend::rocm) { - std::cout << "Test is not supported on ROCm platform, skipping\n"; + } else if (Plt.get_backend() == sycl::backend::hip) { + std::cout << "Test is not supported on HIP platform, skipping\n"; return; } diff --git a/sycl/unittests/program_manager/itt_annotations.cpp b/sycl/unittests/program_manager/itt_annotations.cpp index 625d90bf024b9..6431c9333ae99 100644 --- a/sycl/unittests/program_manager/itt_annotations.cpp +++ b/sycl/unittests/program_manager/itt_annotations.cpp @@ -257,8 +257,8 @@ TEST(ITTNotify, UseKernelBundle) { return; } - if (Plt.get_backend() == sycl::backend::rocm) { - std::cerr << "Test is not supported on ROCm platform, skipping\n"; + if (Plt.get_backend() == sycl::backend::hip) { + std::cerr << "Test is not supported on HIP platform, skipping\n"; return; } @@ -298,8 +298,8 @@ TEST(ITTNotify, VarNotSet) { return; } - if (Plt.get_backend() == sycl::backend::rocm) { - std::cerr << "Test is not supported on ROCm platform, skipping\n"; + if (Plt.get_backend() == sycl::backend::hip) { + std::cerr << "Test is not supported on HIP platform, skipping\n"; return; } diff --git a/sycl/unittests/scheduler/RequiredWGSize.cpp b/sycl/unittests/scheduler/RequiredWGSize.cpp index 3d4fb1ddffb90..dc2530a2eb20f 100644 --- a/sycl/unittests/scheduler/RequiredWGSize.cpp +++ b/sycl/unittests/scheduler/RequiredWGSize.cpp @@ -240,8 +240,8 @@ static void performChecks() { return; } - if (Plt.get_backend() == sycl::backend::rocm) { - std::cerr << "Test is not supported on ROCm platform, skipping\n"; + if (Plt.get_backend() == sycl::backend::hip) { + std::cerr << "Test is not supported on HIP platform, skipping\n"; return; }