From ad4226708ab093241da6fe98183785e96c5fde5d Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Mon, 22 May 2023 15:20:33 -0700 Subject: [PATCH 01/12] [SYCL] [L0] Add queue property to select submission mode. --- sycl/include/sycl/detail/pi.h | 2 + .../include/sycl/detail/properties_traits.def | 2172 ++++++++++++++++- sycl/include/sycl/detail/property_helper.hpp | 4 +- .../sycl/properties/queue_properties.def | 5 + sycl/plugins/level_zero/pi_level_zero.cpp | 41 +- sycl/plugins/level_zero/pi_level_zero.hpp | 4 + sycl/source/detail/queue_impl.hpp | 22 + 7 files changed, 2219 insertions(+), 31 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 799e820a1946c..d7c8a49311afd 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -652,6 +652,8 @@ constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3); constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4); constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5); constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_BATCHED_SUBMISSION = (1 << 7); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_IMMEDIATE_SUBMISSION = (1 << 8); // clang-format on typedef enum { diff --git a/sycl/include/sycl/detail/properties_traits.def b/sycl/include/sycl/detail/properties_traits.def index 581380cc51f05..d7c8a49311afd 100644 --- a/sycl/include/sycl/detail/properties_traits.def +++ b/sycl/include/sycl/detail/properties_traits.def @@ -1,19 +1,2153 @@ -__SYCL_PARAM_TRAITS_SPEC(sycl::property::buffer::use_host_ptr) -__SYCL_PARAM_TRAITS_SPEC(sycl::property::buffer::use_mutex) -__SYCL_PARAM_TRAITS_SPEC(sycl::property::buffer::context_bound) -__SYCL_PARAM_TRAITS_SPEC(sycl::property::image::use_host_ptr) -__SYCL_PARAM_TRAITS_SPEC(sycl::property::image::use_mutex) -__SYCL_PARAM_TRAITS_SPEC(sycl::property::image::context_bound) -__SYCL_PARAM_TRAITS_SPEC( - sycl::ext::oneapi::property::buffer::use_pinned_host_memory) -__SYCL_PARAM_TRAITS_SPEC(sycl::property::noinit) -__SYCL_PARAM_TRAITS_SPEC(sycl::property::no_init) -__SYCL_PARAM_TRAITS_SPEC( - sycl::property::context::cuda::use_primary_context) // Deprecated -__SYCL_PARAM_TRAITS_SPEC( - sycl::ext::oneapi::cuda::property::context::use_primary_context) // Deprecated -__SYCL_PARAM_TRAITS_SPEC(sycl::property::queue::in_order) -__SYCL_PARAM_TRAITS_SPEC(sycl::property::reduction::initialize_to_identity) -__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_low) -__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_high) -__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_normal) +//==---------- pi.h - Plugin Interface -------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// \defgroup sycl_pi The Plugin Interface +// TODO: link to sphinx page + +/// \file Main Plugin Interface header file. +/// +/// This is the definition of a generic offload Plugin Interface (PI), which is +/// used by the SYCL implementation to connect to multiple device back-ends, +/// e.g. to OpenCL. The interface is intentionally kept C-only for the +/// purpose of having full flexibility and interoperability with different +/// environments. +/// +/// \ingroup sycl_pi + +#ifndef _PI_H_ +#define _PI_H_ + +// Every single change in PI API should be accompanied with the minor +// version increase (+1). In the cases where backward compatibility is not +// maintained there should be a (+1) change to the major version in +// addition to the increase of the minor. +// +// PI version changes log: +// -- Version 1.2: +// 1. (Binary backward compatibility breaks) Two fields added to the +// pi_device_binary_struct structure: +// pi_device_binary_property_set PropertySetsBegin; +// pi_device_binary_property_set PropertySetsEnd; +// 2. A number of types needed to define pi_device_binary_property_set added. +// 3. Added new ownership argument to piextContextCreateWithNativeHandle. +// 4. Add interoperability interfaces for kernel. +// 4.6 Added new ownership argument to piextQueueCreateWithNativeHandle which +// changes the API version from 3.5 to 4.6. +// 5.7 Added new context and ownership arguments to +// piextEventCreateWithNativeHandle +// 6.8 Added new ownership argument to piextProgramCreateWithNativeHandle. Added +// piQueueFlush function. +// 7.9 Added new context and ownership arguments to +// piextMemCreateWithNativeHandle. +// 8.10 Added new optional device argument to piextQueueCreateWithNativeHandle +// 9.11 Use values of OpenCL enums directly, rather than including ``; +// NOTE that this results in a changed API for `piProgramGetBuildInfo`. +// 10.12 Change enum value PI_MEM_ADVICE_UNKNOWN from 0 to 999, and set enum +// PI_MEM_ADVISE_RESET to 0. +// 10.13 Added new PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS queue property. +// 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for +// piDeviceGetInfo. +// 11.15 piEventCreate creates even in the signalled state now. +// 11.16 Add PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE and +// PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH as an extension for +// piDeviceGetInfo. +// 11.17 Added new PI_EXT_ONEAPI_QUEUE_PRIORITY_LOW and +// PI_EXT_ONEAPI_QUEUE_PRIORITY_HIGH queue properties. +// 11.18 Add new parameter name PI_EXT_ONEAPI_QUEUE_INFO_EMPTY to +// _pi_queue_info. +// 12.19 Add new PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE piDevicePartition +// scheme. Sub-sub-devices (representing compute slice) creation via +// partitioning by affinity domain is disabled by default and can be temporarily +// restored via SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING +// environment variable. +// 12.20 Added piextQueueCreate API to be used instead of piQueueCreate, also +// added PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES for piDeviceGetInfo. +// Both are needed to support sycl_ext_intel_queue_index extension. +// 12.21 Added new piextUSMEnqueueFill2D, piextUSMEnqueueMemset2D, and +// piextUSMEnqueueMemcpy2D functions. Added new +// PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT, +// PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT, and +// PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT context info query +// descriptors. +// 12.22 Add piGetDeviceAndHostTimer to query device wall-clock timestamp +// 12.23 Added new piextEnqueueDeviceGlobalVariableWrite and +// piextEnqueueDeviceGlobalVariableRead functions. +// 12.24 Added new PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG property to the +// _pi_kernel_exec_info. Defined _pi_kernel_cache_config enum with values of +// the new PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG property. +// 12.25 Added PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES and +// PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES for piDeviceGetInfo. +// 12.26 Added piextEnqueueReadHostPipe and piextEnqueueWriteHostPipe functions. +// 12.27 Added new queue create and get APIs for immediate commandlists +// piextQueueCreate2, piextQueueCreateWithNativeHandle2, +// piextQueueGetNativeHandle2 +// 12.28 Added piextMemImageCreateWithNativeHandle for creating images from +// native handles. +// 12.29 Support PI_EXT_PLATFORM_INFO_BACKEND query in piPlatformGetInfo +// 12.30 Added PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT device info query. +// 12.31 Added PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP device +// info query. + +#define _PI_H_VERSION_MAJOR 12 +#define _PI_H_VERSION_MINOR 31 + +#define _PI_STRING_HELPER(a) #a +#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) +#define _PI_TRIPLE_CONCAT(a, b, c) _PI_STRING_HELPER(a.b.c) + +// This is the macro that plugins should all use to define their version. +// _PI_PLUGIN_VERSION_STRING will be printed when environment variable +// SYCL_PI_TRACE is set to 1. PluginVersion should be defined for each plugin +// in plugins/*/pi_*.hpp. PluginVersion should be incremented with each change +// to the plugin. +#define _PI_PLUGIN_VERSION_STRING(PluginVersion) \ + _PI_TRIPLE_CONCAT(_PI_H_VERSION_MAJOR, _PI_H_VERSION_MINOR, PluginVersion) + +#define _PI_H_VERSION_STRING \ + _PI_CONCAT(_PI_H_VERSION_MAJOR, _PI_H_VERSION_MINOR) + +// This will be used to check the major versions of plugins versus the major +// versions of PI. +#define _PI_STRING_SUBSTITUTE(X) _PI_STRING_HELPER(X) +#define _PI_PLUGIN_VERSION_CHECK(PI_API_VERSION, PI_PLUGIN_VERSION) \ + if (strncmp(PI_API_VERSION, PI_PLUGIN_VERSION, \ + sizeof(_PI_STRING_SUBSTITUTE(_PI_H_VERSION_MAJOR))) < 0) { \ + return PI_ERROR_INVALID_OPERATION; \ + } + +// NOTE: This file presents a maping of OpenCL to PI enums, constants and +// typedefs. The general approach taken was to replace `CL_` prefix with `PI_`. +// Please consider this when adding or modifying values, as the strict value +// match is required. +// TODO: We should consider re-implementing PI enums and constants and only +// perform a mapping of PI to OpenCL in the pi_opencl backend. +#include + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +using pi_int32 = int32_t; +using pi_uint32 = uint32_t; +using pi_uint64 = uint64_t; +using pi_bool = pi_uint32; +using pi_bitfield = pi_uint64; +using pi_native_handle = uintptr_t; + +// +// NOTE: prefer to map 1:1 to OpenCL so that no translation is needed +// for PI <-> OpenCL ways. The PI <-> to other BE translation is almost +// always needed anyway. +// +typedef enum { +#define _PI_ERRC(NAME, VAL) NAME = VAL, +#define _PI_ERRC_WITH_MSG(NAME, VAL, MSG) NAME = VAL, +#include +#undef _PI_ERRC +#undef _PI_ERRC_WITH_MSG +} _pi_result; + +typedef enum { + PI_EVENT_COMPLETE = 0x0, + PI_EVENT_RUNNING = 0x1, + PI_EVENT_SUBMITTED = 0x2, + PI_EVENT_QUEUED = 0x3 +} _pi_event_status; + +typedef enum { + PI_PLATFORM_INFO_EXTENSIONS = 0x0904, + PI_PLATFORM_INFO_NAME = 0x0902, + PI_PLATFORM_INFO_PROFILE = 0x0900, + PI_PLATFORM_INFO_VENDOR = 0x0903, + PI_PLATFORM_INFO_VERSION = 0x0901, + PI_EXT_PLATFORM_INFO_BACKEND = 0x21000 // returns pi_platform_backend +} _pi_platform_info; + +typedef enum { + PI_PROGRAM_BUILD_INFO_STATUS = 0x1181, + PI_PROGRAM_BUILD_INFO_OPTIONS = 0x1182, + PI_PROGRAM_BUILD_INFO_LOG = 0x1183, + PI_PROGRAM_BUILD_INFO_BINARY_TYPE = 0x1184 +} _pi_program_build_info; + +typedef enum { + PI_PROGRAM_BUILD_STATUS_NONE = -1, + PI_PROGRAM_BUILD_STATUS_ERROR = -2, + PI_PROGRAM_BUILD_STATUS_SUCCESS = 0, + PI_PROGRAM_BUILD_STATUS_IN_PROGRESS = -3 +} _pi_program_build_status; + +typedef enum { + PI_PROGRAM_BINARY_TYPE_NONE = 0x0, + PI_PROGRAM_BINARY_TYPE_COMPILED_OBJECT = 0x1, + PI_PROGRAM_BINARY_TYPE_LIBRARY = 0x2, + PI_PROGRAM_BINARY_TYPE_EXECUTABLE = 0x4 +} _pi_program_binary_type; + +// NOTE: this is made 64-bit to match the size of cl_device_type to +// make the translation to OpenCL transparent. +// +typedef enum : pi_uint64 { + PI_DEVICE_TYPE_DEFAULT = + (1 << 0), ///< The default device available in the PI plugin. + PI_DEVICE_TYPE_ALL = 0xFFFFFFFF, ///< All devices available in the PI plugin. + PI_DEVICE_TYPE_CPU = (1 << 1), ///< A PI device that is the host processor. + PI_DEVICE_TYPE_GPU = (1 << 2), ///< A PI device that is a GPU. + PI_DEVICE_TYPE_ACC = (1 << 3), ///< A PI device that is a + ///< dedicated accelerator. + PI_DEVICE_TYPE_CUSTOM = (1 << 4) ///< A PI device that is a custom device. +} _pi_device_type; + +typedef enum { + PI_EXT_PLATFORM_BACKEND_UNKNOWN = 0, ///< The backend is not a recognized one + PI_EXT_PLATFORM_BACKEND_LEVEL_ZERO = 1, ///< The backend is Level Zero + PI_EXT_PLATFORM_BACKEND_OPENCL = 2, ///< The backend is OpenCL + PI_EXT_PLATFORM_BACKEND_CUDA = 3, ///< The backend is CUDA + PI_EXT_PLATFORM_BACKEND_HIP = 4, ///< The backend is HIP + PI_EXT_PLATFORM_BACKEND_ESIMD = 5, ///< The backend is ESIMD +} _pi_platform_backend; + +typedef enum { + PI_DEVICE_MEM_CACHE_TYPE_NONE = 0x0, + PI_DEVICE_MEM_CACHE_TYPE_READ_ONLY_CACHE = 0x1, + PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE = 0x2 +} _pi_device_mem_cache_type; + +typedef enum { + PI_DEVICE_LOCAL_MEM_TYPE_LOCAL = 0x1, + PI_DEVICE_LOCAL_MEM_TYPE_GLOBAL = 0x2 +} _pi_device_local_mem_type; + +typedef enum { + PI_DEVICE_INFO_TYPE = 0x1000, + PI_DEVICE_INFO_VENDOR_ID = 0x1001, + PI_DEVICE_INFO_MAX_COMPUTE_UNITS = 0x1002, + PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = 0x1003, + PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES = 0x1005, + PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = 0x1004, + PI_DEVICE_INFO_SINGLE_FP_CONFIG = 0x101B, + PI_DEVICE_INFO_HALF_FP_CONFIG = 0x1033, + PI_DEVICE_INFO_DOUBLE_FP_CONFIG = 0x1032, + PI_DEVICE_INFO_QUEUE_PROPERTIES = 0x102A, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = 0x1006, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = 0x1007, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = 0x1008, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG = 0x1009, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = 0x100A, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = 0x100B, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = 0x1034, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = 0x1036, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = 0x1037, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = 0x1038, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = 0x1039, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = 0x103A, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = 0x103B, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = 0x103C, + PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY = 0x100C, + PI_DEVICE_INFO_ADDRESS_BITS = 0x100D, + PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = 0x1010, + PI_DEVICE_INFO_IMAGE_SUPPORT = 0x1016, + PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS = 0x100E, + PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = 0x100F, + PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH = 0x1011, + PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT = 0x1012, + PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH = 0x1013, + PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = 0x1014, + PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH = 0x1015, + PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = 0x1040, + PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = 0x1041, + PI_DEVICE_INFO_MAX_SAMPLERS = 0x1018, + PI_DEVICE_INFO_MAX_PARAMETER_SIZE = 0x1017, + PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = 0x1019, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = 0x101C, + PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = 0x101D, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = 0x101E, + PI_DEVICE_INFO_GLOBAL_MEM_SIZE = 0x101F, + PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = 0x1020, + PI_DEVICE_INFO_MAX_CONSTANT_ARGS = 0x1021, + PI_DEVICE_INFO_LOCAL_MEM_TYPE = 0x1022, + PI_DEVICE_INFO_LOCAL_MEM_SIZE = 0x1023, + PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = 0x1024, + PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = 0x1035, + PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = 0x1025, + PI_DEVICE_INFO_ENDIAN_LITTLE = 0x1026, + PI_DEVICE_INFO_AVAILABLE = 0x1027, + PI_DEVICE_INFO_COMPILER_AVAILABLE = 0x1028, + PI_DEVICE_INFO_LINKER_AVAILABLE = 0x103E, + PI_DEVICE_INFO_EXECUTION_CAPABILITIES = 0x1029, + PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = 0x104E, + PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = 0x102A, + PI_DEVICE_INFO_BUILT_IN_KERNELS = 0x103F, + PI_DEVICE_INFO_PLATFORM = 0x1031, + PI_DEVICE_INFO_REFERENCE_COUNT = 0x1047, + PI_DEVICE_INFO_IL_VERSION = 0x105B, + PI_DEVICE_INFO_NAME = 0x102B, + PI_DEVICE_INFO_VENDOR = 0x102C, + PI_DEVICE_INFO_DRIVER_VERSION = 0x102D, + PI_DEVICE_INFO_PROFILE = 0x102E, + PI_DEVICE_INFO_VERSION = 0x102F, + PI_DEVICE_INFO_OPENCL_C_VERSION = 0x103D, + PI_DEVICE_INFO_EXTENSIONS = 0x1030, + PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = 0x1049, + PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = 0x1048, + PI_DEVICE_INFO_PARENT_DEVICE = 0x1042, + PI_DEVICE_INFO_PARTITION_PROPERTIES = 0x1044, + PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = 0x1043, + PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = 0x1045, + PI_DEVICE_INFO_PARTITION_TYPE = 0x1046, + PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS = 0x105C, + PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 0x105D, + PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 0x4108, + PI_DEVICE_INFO_USM_HOST_SUPPORT = 0x4190, + PI_DEVICE_INFO_USM_DEVICE_SUPPORT = 0x4191, + PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = 0x4192, + PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT = 0x4193, + PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT = 0x4194, + // Intel UUID extension. + PI_DEVICE_INFO_UUID = 0x106A, + // These are Intel-specific extensions. + PI_DEVICE_INFO_DEVICE_ID = 0x4251, + PI_DEVICE_INFO_PCI_ADDRESS = 0x10020, + PI_DEVICE_INFO_GPU_EU_COUNT = 0x10021, + PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH = 0x10022, + PI_DEVICE_INFO_GPU_SLICES = 0x10023, + PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE = 0x10024, + PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = 0x10025, + PI_DEVICE_INFO_MAX_MEM_BANDWIDTH = 0x10026, + PI_DEVICE_INFO_IMAGE_SRGB = 0x10027, + // Return true if sub-device should do its own program build + PI_DEVICE_INFO_BUILD_ON_SUBDEVICE = 0x10028, + PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY = 0x10029, + // Return 0 if device doesn't have any memory modules. Return the minimum of + // the clock rate values if there are several memory modules on the device. + PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE = 0x10030, + // Return 0 if device doesn't have any memory modules. Return the minimum of + // the bus width values if there are several memory modules on the device. + PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH = 0x10031, + // Return 1 if the device doesn't have a notion of a "queue index". Otherwise, + // return the number of queue indices that are available for this device. + PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = 0x10032, + PI_DEVICE_INFO_ATOMIC_64 = 0x10110, + PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111, + PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, + PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112, + PI_DEVICE_INFO_BACKEND_VERSION = 0x10113, + // Return whether bfloat16 math functions are supported by device + PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS = 0x1FFFF, + PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS = 0x20000, + PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D = 0x20001, + PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002, + PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003, + PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER = 0x20004, + PI_EXT_CODEPLAY_DEVICE_INFO_SUPPORTS_FUSION = 0x20005, + PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x20006, + PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x20007, + PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT = 0x20008, + // The number of max registers per block (device specific) + PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP = 0x20009, +} _pi_device_info; + +typedef enum { + PI_PROGRAM_INFO_REFERENCE_COUNT = 0x1160, + PI_PROGRAM_INFO_CONTEXT = 0x1161, + PI_PROGRAM_INFO_NUM_DEVICES = 0x1162, + PI_PROGRAM_INFO_DEVICES = 0x1163, + PI_PROGRAM_INFO_SOURCE = 0x1164, + PI_PROGRAM_INFO_BINARY_SIZES = 0x1165, + PI_PROGRAM_INFO_BINARIES = 0x1166, + PI_PROGRAM_INFO_NUM_KERNELS = 0x1167, + PI_PROGRAM_INFO_KERNEL_NAMES = 0x1168 +} _pi_program_info; + +typedef enum { + PI_CONTEXT_INFO_DEVICES = 0x1081, + PI_CONTEXT_INFO_PLATFORM = 0x1084, + PI_CONTEXT_INFO_NUM_DEVICES = 0x1083, + PI_CONTEXT_INFO_PROPERTIES = 0x1082, + PI_CONTEXT_INFO_REFERENCE_COUNT = 0x1080, + // Atomics capabilities extensions + PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010, + PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011, + PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x10012, + PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x10013, + // Native 2D USM memory operation support + PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT = 0x30000, + PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT = 0x30001, + PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT = 0x30002 +} _pi_context_info; + +typedef enum { + PI_QUEUE_INFO_CONTEXT = 0x1090, + PI_QUEUE_INFO_DEVICE = 0x1091, + PI_QUEUE_INFO_DEVICE_DEFAULT = 0x1095, + PI_QUEUE_INFO_PROPERTIES = 0x1093, + PI_QUEUE_INFO_REFERENCE_COUNT = 0x1092, + PI_QUEUE_INFO_SIZE = 0x1094, + // Return 'true' if all commands previously submitted to the queue have + // completed, otherwise return 'false'. + PI_EXT_ONEAPI_QUEUE_INFO_EMPTY = 0x2096 +} _pi_queue_info; + +typedef enum { + PI_KERNEL_INFO_FUNCTION_NAME = 0x1190, + PI_KERNEL_INFO_NUM_ARGS = 0x1191, + PI_KERNEL_INFO_REFERENCE_COUNT = 0x1192, + PI_KERNEL_INFO_CONTEXT = 0x1193, + PI_KERNEL_INFO_PROGRAM = 0x1194, + PI_KERNEL_INFO_ATTRIBUTES = 0x1195 +} _pi_kernel_info; + +typedef enum { + PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE = 0x11B5, + PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE = 0x11B0, + PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE = 0x11B1, + PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = 0x11B2, + PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 0x11B3, + PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = 0x11B4, + // The number of registers used by the compiled kernel (device specific) + PI_KERNEL_GROUP_INFO_NUM_REGS = 0x10112 +} _pi_kernel_group_info; + +typedef enum { + PI_IMAGE_INFO_FORMAT = 0x1110, + PI_IMAGE_INFO_ELEMENT_SIZE = 0x1111, + PI_IMAGE_INFO_ROW_PITCH = 0x1112, + PI_IMAGE_INFO_SLICE_PITCH = 0x1113, + PI_IMAGE_INFO_WIDTH = 0x1114, + PI_IMAGE_INFO_HEIGHT = 0x1115, + PI_IMAGE_INFO_DEPTH = 0x1116 +} _pi_image_info; + +typedef enum { + PI_KERNEL_MAX_SUB_GROUP_SIZE = 0x2033, + PI_KERNEL_MAX_NUM_SUB_GROUPS = 0x11B9, + PI_KERNEL_COMPILE_NUM_SUB_GROUPS = 0x11BA, + PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL = 0x410A +} _pi_kernel_sub_group_info; + +typedef enum { + PI_EVENT_INFO_COMMAND_QUEUE = 0x11D0, + PI_EVENT_INFO_CONTEXT = 0x11D4, + PI_EVENT_INFO_COMMAND_TYPE = 0x11D1, + PI_EVENT_INFO_COMMAND_EXECUTION_STATUS = 0x11D3, + PI_EVENT_INFO_REFERENCE_COUNT = 0x11D2 +} _pi_event_info; + +typedef enum { + PI_COMMAND_TYPE_NDRANGE_KERNEL = 0x11F0, + PI_COMMAND_TYPE_MEM_BUFFER_READ = 0x11F3, + PI_COMMAND_TYPE_MEM_BUFFER_WRITE = 0x11F4, + PI_COMMAND_TYPE_MEM_BUFFER_COPY = 0x11F5, + PI_COMMAND_TYPE_MEM_BUFFER_MAP = 0x11FB, + PI_COMMAND_TYPE_MEM_BUFFER_UNMAP = 0x11FD, + PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT = 0x1201, + PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT = 0x1202, + PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT = 0x1203, + PI_COMMAND_TYPE_USER = 0x1204, + PI_COMMAND_TYPE_MEM_BUFFER_FILL = 0x1207, + PI_COMMAND_TYPE_IMAGE_READ = 0x11F6, + PI_COMMAND_TYPE_IMAGE_WRITE = 0x11F7, + PI_COMMAND_TYPE_IMAGE_COPY = 0x11F8, + PI_COMMAND_TYPE_NATIVE_KERNEL = 0x11F2, + PI_COMMAND_TYPE_COPY_BUFFER_TO_IMAGE = 0x11FA, + PI_COMMAND_TYPE_COPY_IMAGE_TO_BUFFER = 0x11F9, + PI_COMMAND_TYPE_MAP_IMAGE = 0x11FC, + PI_COMMAND_TYPE_MARKER = 0x11FE, + PI_COMMAND_TYPE_ACQUIRE_GL_OBJECTS = 0x11FF, + PI_COMMAND_TYPE_RELEASE_GL_OBJECTS = 0x1200, + PI_COMMAND_TYPE_BARRIER = 0x1205, + PI_COMMAND_TYPE_MIGRATE_MEM_OBJECTS = 0x1206, + PI_COMMAND_TYPE_FILL_IMAGE = 0x1208, + PI_COMMAND_TYPE_SVM_FREE = 0x1209, + PI_COMMAND_TYPE_SVM_MEMCPY = 0x120A, + PI_COMMAND_TYPE_SVM_MEMFILL = 0x120B, + PI_COMMAND_TYPE_SVM_MAP = 0x120C, + PI_COMMAND_TYPE_SVM_UNMAP = 0x120D, + PI_COMMAND_TYPE_DEVICE_GLOBAL_VARIABLE_READ = 0x418E, + PI_COMMAND_TYPE_DEVICE_GLOBAL_VARIABLE_WRITE = 0x418F +} _pi_command_type; + +typedef enum { + PI_MEM_TYPE_BUFFER = 0x10F0, + PI_MEM_TYPE_IMAGE2D = 0x10F1, + PI_MEM_TYPE_IMAGE3D = 0x10F2, + PI_MEM_TYPE_IMAGE2D_ARRAY = 0x10F3, + PI_MEM_TYPE_IMAGE1D = 0x10F4, + PI_MEM_TYPE_IMAGE1D_ARRAY = 0x10F5, + PI_MEM_TYPE_IMAGE1D_BUFFER = 0x10F6 +} _pi_mem_type; + +typedef enum { + // Device-specific value opaque in PI API. + PI_MEM_ADVICE_RESET = 0, + PI_MEM_ADVICE_CUDA_SET_READ_MOSTLY = 101, + PI_MEM_ADVICE_CUDA_UNSET_READ_MOSTLY = 102, + PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION = 103, + PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION = 104, + PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY = 105, + PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY = 106, + PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION_HOST = 107, + PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST = 108, + PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST = 109, + PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST = 110, + PI_MEM_ADVICE_UNKNOWN = 999, +} _pi_mem_advice; + +typedef enum { + PI_IMAGE_CHANNEL_ORDER_A = 0x10B1, + PI_IMAGE_CHANNEL_ORDER_R = 0x10B0, + PI_IMAGE_CHANNEL_ORDER_RG = 0x10B2, + PI_IMAGE_CHANNEL_ORDER_RA = 0x10B3, + PI_IMAGE_CHANNEL_ORDER_RGB = 0x10B4, + PI_IMAGE_CHANNEL_ORDER_RGBA = 0x10B5, + PI_IMAGE_CHANNEL_ORDER_BGRA = 0x10B6, + PI_IMAGE_CHANNEL_ORDER_ARGB = 0x10B7, + PI_IMAGE_CHANNEL_ORDER_ABGR = 0x10C3, + PI_IMAGE_CHANNEL_ORDER_INTENSITY = 0x10B8, + PI_IMAGE_CHANNEL_ORDER_LUMINANCE = 0x10B9, + PI_IMAGE_CHANNEL_ORDER_Rx = 0x10BA, + PI_IMAGE_CHANNEL_ORDER_RGx = 0x10BB, + PI_IMAGE_CHANNEL_ORDER_RGBx = 0x10BC, + PI_IMAGE_CHANNEL_ORDER_sRGBA = 0x10C1 +} _pi_image_channel_order; + +typedef enum { + PI_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0x10D0, + PI_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 0x10D1, + PI_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 0x10D2, + PI_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 0x10D3, + PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 0x10D4, + PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 0x10D5, + PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010 = 0x10D6, + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 0x10D7, + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 0x10D8, + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 0x10D9, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 0x10DA, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 0x10DB, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 0x10DC, + PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 0x10DD, + PI_IMAGE_CHANNEL_TYPE_FLOAT = 0x10DE +} _pi_image_channel_type; + +typedef enum { PI_BUFFER_CREATE_TYPE_REGION = 0x1220 } _pi_buffer_create_type; + +const pi_bool PI_TRUE = 1; +const pi_bool PI_FALSE = 0; + +typedef enum { + PI_SAMPLER_INFO_REFERENCE_COUNT = 0x1150, + PI_SAMPLER_INFO_CONTEXT = 0x1151, + PI_SAMPLER_INFO_NORMALIZED_COORDS = 0x1152, + PI_SAMPLER_INFO_ADDRESSING_MODE = 0x1153, + PI_SAMPLER_INFO_FILTER_MODE = 0x1154, + PI_SAMPLER_INFO_MIP_FILTER_MODE = 0x1155, + PI_SAMPLER_INFO_LOD_MIN = 0x1156, + PI_SAMPLER_INFO_LOD_MAX = 0x1157 +} _pi_sampler_info; + +typedef enum { + PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = 0x1134, + PI_SAMPLER_ADDRESSING_MODE_REPEAT = 0x1133, + PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = 0x1131, + PI_SAMPLER_ADDRESSING_MODE_CLAMP = 0x1132, + PI_SAMPLER_ADDRESSING_MODE_NONE = 0x1130 +} _pi_sampler_addressing_mode; + +typedef enum { + PI_SAMPLER_FILTER_MODE_NEAREST = 0x1140, + PI_SAMPLER_FILTER_MODE_LINEAR = 0x1141, +} _pi_sampler_filter_mode; + +using pi_context_properties = intptr_t; + +using pi_device_exec_capabilities = pi_bitfield; +constexpr pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_KERNEL = + (1 << 0); +constexpr pi_device_exec_capabilities + PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL = (1 << 1); + +using pi_sampler_properties = pi_bitfield; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS = + 0x1152; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = 0x1153; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = 0x1154; + +using pi_memory_order_capabilities = pi_bitfield; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELAXED = 0x01; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQUIRE = 0x02; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE = 0x04; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL = 0x08; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST = 0x10; + +using pi_memory_scope_capabilities = pi_bitfield; +constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_ITEM = 0x01; +constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SUB_GROUP = 0x02; +constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP = 0x04; +constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE = 0x08; +constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM = 0x10; + +typedef enum { + PI_PROFILING_INFO_COMMAND_QUEUED = 0x1280, + PI_PROFILING_INFO_COMMAND_SUBMIT = 0x1281, + PI_PROFILING_INFO_COMMAND_START = 0x1282, + PI_PROFILING_INFO_COMMAND_END = 0x1283 +} _pi_profiling_info; + +// NOTE: this is made 64-bit to match the size of cl_mem_flags to +// make the translation to OpenCL transparent. +// TODO: populate +// +using pi_mem_flags = pi_bitfield; +// Access +constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW = (1 << 0); +constexpr pi_mem_flags PI_MEM_ACCESS_READ_ONLY = (1 << 2); +// Host pointer +constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE = (1 << 3); +constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY = (1 << 5); +constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC = (1 << 4); + +// flags passed to Map operations +using pi_map_flags = pi_bitfield; +constexpr pi_map_flags PI_MAP_READ = (1 << 0); +constexpr pi_map_flags PI_MAP_WRITE = (1 << 1); +constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION = (1 << 2); +// NOTE: this is made 64-bit to match the size of cl_mem_properties_intel to +// make the translation to OpenCL transparent. +using pi_mem_properties = pi_bitfield; +constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL = 0x4213; +constexpr pi_mem_properties PI_MEM_PROPERTIES_ALLOC_BUFFER_LOCATION = 0x419E; + +// NOTE: this is made 64-bit to match the size of cl_mem_properties_intel to +// make the translation to OpenCL transparent. +using pi_usm_mem_properties = pi_bitfield; +constexpr pi_usm_mem_properties PI_MEM_ALLOC_FLAGS = 0x4195; +constexpr pi_usm_mem_properties PI_MEM_ALLOC_WRTITE_COMBINED = (1 << 0); +constexpr pi_usm_mem_properties PI_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE = + (1 << 1); +constexpr pi_usm_mem_properties PI_MEM_ALLOC_INITIAL_PLACEMENT_HOST = (1 << 2); +// Hints that the device/shared allocation will not be written on device. +constexpr pi_usm_mem_properties PI_MEM_ALLOC_DEVICE_READ_ONLY = (1 << 3); + +constexpr pi_usm_mem_properties PI_MEM_USM_ALLOC_BUFFER_LOCATION = 0x419E; + +// NOTE: queue properties are implemented this way to better support bit +// manipulations +using pi_queue_properties = pi_bitfield; +constexpr pi_queue_properties PI_QUEUE_FLAGS = -1; +constexpr pi_queue_properties PI_QUEUE_COMPUTE_INDEX = -2; +// clang-format off +constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0); +constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE = (1 << 1); +constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE = (1 << 2); +constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_BATCHED_SUBMISSION = (1 << 7); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_IMMEDIATE_SUBMISSION = (1 << 8); +// clang-format on + +typedef enum { + // No preference for SLM or data cache. + PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT = 0x0, + // Large SLM size. + PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_SLM = 0x1, + // Large General Data size. + PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_DATA = 0x2 +} _pi_kernel_cache_config; + +using pi_result = _pi_result; +using pi_platform_info = _pi_platform_info; +using pi_platform_backend = _pi_platform_backend; +using pi_device_type = _pi_device_type; +using pi_device_mem_cache_type = _pi_device_mem_cache_type; +using pi_device_local_mem_type = _pi_device_local_mem_type; +using pi_device_info = _pi_device_info; +using pi_program_info = _pi_program_info; +using pi_context_info = _pi_context_info; +using pi_queue_info = _pi_queue_info; +using pi_image_info = _pi_image_info; +using pi_kernel_info = _pi_kernel_info; +using pi_kernel_group_info = _pi_kernel_group_info; +using pi_kernel_sub_group_info = _pi_kernel_sub_group_info; +using pi_event_info = _pi_event_info; +using pi_command_type = _pi_command_type; +using pi_mem_type = _pi_mem_type; +using pi_mem_advice = _pi_mem_advice; +using pi_image_channel_order = _pi_image_channel_order; +using pi_image_channel_type = _pi_image_channel_type; +using pi_buffer_create_type = _pi_buffer_create_type; +using pi_sampler_addressing_mode = _pi_sampler_addressing_mode; +using pi_sampler_filter_mode = _pi_sampler_filter_mode; +using pi_sampler_info = _pi_sampler_info; +using pi_event_status = _pi_event_status; +using pi_program_build_info = _pi_program_build_info; +using pi_program_build_status = _pi_program_build_status; +using pi_program_binary_type = _pi_program_binary_type; +using pi_kernel_info = _pi_kernel_info; +using pi_profiling_info = _pi_profiling_info; +using pi_kernel_cache_config = _pi_kernel_cache_config; + +// For compatibility with OpenCL define this not as enum. +using pi_device_partition_property = intptr_t; +static constexpr pi_device_partition_property PI_DEVICE_PARTITION_EQUALLY = + 0x1086; +static constexpr pi_device_partition_property PI_DEVICE_PARTITION_BY_COUNTS = + 0x1087; +static constexpr pi_device_partition_property + PI_DEVICE_PARTITION_BY_COUNTS_LIST_END = 0x0; +static constexpr pi_device_partition_property + PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN = 0x1088; +static constexpr pi_device_partition_property + PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE = 0x1089; + +// For compatibility with OpenCL define this not as enum. +using pi_device_affinity_domain = pi_bitfield; +static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NUMA = + (1 << 0); +static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L4_CACHE = + (1 << 1); +static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L3_CACHE = + (1 << 2); +static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L2_CACHE = + (1 << 3); +static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L1_CACHE = + (1 << 4); +static constexpr pi_device_affinity_domain + PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE = (1 << 5); + +// For compatibility with OpenCL define this not as enum. +using pi_device_fp_config = pi_bitfield; +static constexpr pi_device_fp_config PI_FP_DENORM = (1 << 0); +static constexpr pi_device_fp_config PI_FP_INF_NAN = (1 << 1); +static constexpr pi_device_fp_config PI_FP_ROUND_TO_NEAREST = (1 << 2); +static constexpr pi_device_fp_config PI_FP_ROUND_TO_ZERO = (1 << 3); +static constexpr pi_device_fp_config PI_FP_ROUND_TO_INF = (1 << 4); +static constexpr pi_device_fp_config PI_FP_FMA = (1 << 5); +static constexpr pi_device_fp_config PI_FP_SOFT_FLOAT = (1 << 6); +static constexpr pi_device_fp_config PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT = + (1 << 7); + +// For compatibility with OpenCL define this not as enum. +using pi_device_exec_capabilities = pi_bitfield; +static constexpr pi_device_exec_capabilities PI_EXEC_KERNEL = (1 << 0); +static constexpr pi_device_exec_capabilities PI_EXEC_NATIVE_KERNEL = (1 << 1); + +// Entry type, matches OpenMP for compatibility +struct _pi_offload_entry_struct { + void *addr; + char *name; + size_t size; + int32_t flags; + int32_t reserved; +}; + +using _pi_offload_entry = _pi_offload_entry_struct *; + +// A type of a binary image property. +typedef enum { + PI_PROPERTY_TYPE_UNKNOWN, + PI_PROPERTY_TYPE_UINT32, // 32-bit integer + PI_PROPERTY_TYPE_BYTE_ARRAY, // byte array + PI_PROPERTY_TYPE_STRING // null-terminated string +} pi_property_type; + +// Device binary image property. +// If the type size of the property value is fixed and is no greater than +// 64 bits, then ValAddr is 0 and the value is stored in the ValSize field. +// Example - PI_PROPERTY_TYPE_UINT32, which is 32-bit +struct _pi_device_binary_property_struct { + char *Name; // null-terminated property name + void *ValAddr; // address of property value + uint32_t Type; // _pi_property_type + uint64_t ValSize; // size of property value in bytes +}; + +typedef _pi_device_binary_property_struct *pi_device_binary_property; + +// Named array of properties. +struct _pi_device_binary_property_set_struct { + char *Name; // the name + pi_device_binary_property PropertiesBegin; // array start + pi_device_binary_property PropertiesEnd; // array end +}; + +typedef _pi_device_binary_property_set_struct *pi_device_binary_property_set; + +/// Types of device binary. +using pi_device_binary_type = uint8_t; +// format is not determined +static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NONE = 0; +// specific to a device +static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_NATIVE = 1; +// portable binary types go next +// SPIR-V +static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_SPIRV = 2; +// LLVM bitcode +static constexpr pi_device_binary_type PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3; + +// Device binary descriptor version supported by this library. +static const uint16_t PI_DEVICE_BINARY_VERSION = 1; + +// The kind of offload model the binary employs; must be 4 for SYCL +static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; + +/// Target identification strings for +/// pi_device_binary_struct.DeviceTargetSpec +/// +/// A device type represented by a particular target +/// triple requires specific binary images. We need +/// to map the image type onto the device target triple +/// +#define __SYCL_PI_DEVICE_BINARY_TARGET_UNKNOWN "" +/// SPIR-V 32-bit image <-> "spir", 32-bit OpenCL device +#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV32 "spir" +/// SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device +#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64 "spir64" +/// Device-specific binary images produced from SPIR-V 64-bit <-> +/// various "spir64_*" triples for specific 64-bit OpenCL devices +#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64 "spir64_x86_64" +#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN "spir64_gen" +#define __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA "spir64_fpga" +/// PTX 64-bit image <-> "nvptx64", 64-bit NVIDIA PTX device +#define __SYCL_PI_DEVICE_BINARY_TARGET_NVPTX64 "nvptx64" +#define __SYCL_PI_DEVICE_BINARY_TARGET_AMDGCN "amdgcn" + +/// Extension to denote native support of assert feature by an arbitrary device +/// piDeviceGetInfo call should return this extension when the device supports +/// native asserts if supported extensions' names are requested +#define PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT \ + "pi_ext_intel_devicelib_assert" + +/// Device binary image property set names recognized by the SYCL runtime. +/// Name must be consistent with +/// PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS defined in +/// PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_SPEC_CONST_MAP "SYCL/specialization constants" +/// PropertySetRegistry::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES defined in +/// PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_SPEC_CONST_DEFAULT_VALUES_MAP \ + "SYCL/specialization constants default values" +/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask" +/// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO "SYCL/kernel param opt" +/// PropertySetRegistry::SYCL_KERNEL_PROGRAM_METADATA defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_PROGRAM_METADATA "SYCL/program metadata" +/// PropertySetRegistry::SYCL_MISC_PROP defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_SYCL_MISC_PROP "SYCL/misc properties" +/// PropertySetRegistry::SYCL_ASSERT_USED defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_SYCL_ASSERT_USED "SYCL/assert used" +/// PropertySetRegistry::SYCL_EXPORTED_SYMBOLS defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS "SYCL/exported symbols" +/// PropertySetRegistry::SYCL_DEVICE_GLOBALS defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_SYCL_DEVICE_GLOBALS "SYCL/device globals" +/// PropertySetRegistry::SYCL_DEVICE_REQUIREMENTS defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS \ + "SYCL/device requirements" +/// PropertySetRegistry::SYCL_HOST_PIPES defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_SYCL_HOST_PIPES "SYCL/host pipes" + +/// Program metadata tags recognized by the PI backends. For kernels the tag +/// must appear after the kernel name. +#define __SYCL_PI_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE \ + "@reqd_work_group_size" +#define __SYCL_PI_PROGRAM_METADATA_GLOBAL_ID_MAPPING "@global_id_mapping" + +/// This struct is a record of the device binary information. If the Kind field +/// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec +/// field can still be specific and denote e.g. FPGA target. It must match the +/// __tgt_device_image structure generated by the clang-offload-wrapper tool +/// when their Version field match. +struct pi_device_binary_struct { + /// version of this structure - for backward compatibility; + /// all modifications which change order/type/offsets of existing fields + /// should increment the version. + uint16_t Version; + /// the type of offload model the binary employs; must be 4 for SYCL + uint8_t Kind; + /// format of the binary data - SPIR-V, LLVM IR bitcode,... + uint8_t Format; + /// null-terminated string representation of the device's target architecture + /// which holds one of: + /// __SYCL_PI_DEVICE_BINARY_TARGET_UNKNOWN - unknown + /// __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV32 - general value for 32-bit OpenCL + /// devices + /// __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64 - general value for 64-bit OpenCL + /// devices + /// __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64 - 64-bit OpenCL CPU device + /// __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_GEN - GEN GPU device (64-bit + /// OpenCL) + /// __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA - 64-bit OpenCL FPGA device + const char *DeviceTargetSpec; + /// a null-terminated string; target- and compiler-specific options + /// which are suggested to use to "compile" program at runtime + const char *CompileOptions; + /// a null-terminated string; target- and compiler-specific options + /// which are suggested to use to "link" program at runtime + const char *LinkOptions; + /// Pointer to the manifest data start + const char *ManifestStart; + /// Pointer to the manifest data end + const char *ManifestEnd; + /// Pointer to the target code start + const unsigned char *BinaryStart; + /// Pointer to the target code end + const unsigned char *BinaryEnd; + /// the offload entry table + _pi_offload_entry EntriesBegin; + _pi_offload_entry EntriesEnd; + // Array of preperty sets; e.g. specialization constants symbol-int ID map is + // propagated to runtime with this mechanism. + pi_device_binary_property_set PropertySetsBegin; + pi_device_binary_property_set PropertySetsEnd; + // TODO Other fields like entries, link options can be propagated using + // the property set infrastructure. This will improve binary compatibility and + // add flexibility. +}; +using pi_device_binary = pi_device_binary_struct *; + +// pi_buffer_region structure repeats cl_buffer_region, used for sub buffers. +struct pi_buffer_region_struct { + size_t origin; + size_t size; +}; +using pi_buffer_region = pi_buffer_region_struct *; + +// pi_buff_rect_offset structure is 3D offset argument passed to buffer rect +// operations (piEnqueueMemBufferCopyRect, etc). +struct pi_buff_rect_offset_struct { + size_t x_bytes; + size_t y_scalar; + size_t z_scalar; +}; +using pi_buff_rect_offset = pi_buff_rect_offset_struct *; + +// pi_buff_rect_region structure represents size of 3D region passed to buffer +// rect operations (piEnqueueMemBufferCopyRect, etc). +struct pi_buff_rect_region_struct { + size_t width_bytes; + size_t height_scalar; + size_t depth_scalar; +}; +using pi_buff_rect_region = pi_buff_rect_region_struct *; + +// pi_image_offset structure is 3D offset argument passed to image operations +// (piEnqueueMemImageRead, etc). +struct pi_image_offset_struct { + size_t x; + size_t y; + size_t z; +}; +using pi_image_offset = pi_image_offset_struct *; + +// pi_image_region structure represents size of 3D region passed to image +// operations (piEnqueueMemImageRead, etc). +struct pi_image_region_struct { + size_t width; + size_t height; + size_t depth; +}; +using pi_image_region = pi_image_region_struct *; + +// Offload binaries descriptor version supported by this library. +static const uint16_t PI_DEVICE_BINARIES_VERSION = 1; + +/// This struct is a record of all the device code that may be offloaded. +/// It must match the __tgt_bin_desc structure generated by +/// the clang-offload-wrapper tool when their Version field match. +struct pi_device_binaries_struct { + /// version of this structure - for backward compatibility; + /// all modifications which change order/type/offsets of existing fields + /// should increment the version. + uint16_t Version; + /// Number of device binaries in this descriptor + uint16_t NumDeviceBinaries; + /// Device binaries data + pi_device_binary DeviceBinaries; + /// the offload entry table (not used, for compatibility with OpenMP) + _pi_offload_entry *HostEntriesBegin; + _pi_offload_entry *HostEntriesEnd; +}; +using pi_device_binaries = pi_device_binaries_struct *; + +// Opaque types that make reading build log errors easier. +struct _pi_platform; +struct _pi_device; +struct _pi_context; +struct _pi_queue; +struct _pi_mem; +struct _pi_program; +struct _pi_kernel; +struct _pi_event; +struct _pi_sampler; + +using pi_platform = _pi_platform *; +using pi_device = _pi_device *; +using pi_context = _pi_context *; +using pi_queue = _pi_queue *; +using pi_mem = _pi_mem *; +using pi_program = _pi_program *; +using pi_kernel = _pi_kernel *; +using pi_event = _pi_event *; +using pi_sampler = _pi_sampler *; + +typedef struct { + pi_image_channel_order image_channel_order; + pi_image_channel_type image_channel_data_type; +} _pi_image_format; + +typedef struct { + pi_mem_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + pi_uint32 num_mip_levels; + pi_uint32 num_samples; + pi_mem buffer; +} _pi_image_desc; + +using pi_image_format = _pi_image_format; +using pi_image_desc = _pi_image_desc; + +typedef enum { PI_MEM_CONTEXT = 0x1106, PI_MEM_SIZE = 0x1102 } _pi_mem_info; + +using pi_mem_info = _pi_mem_info; + +// +// Following section contains SYCL RT Plugin Interface (PI) functions. +// They are 3 distinct categories: +// +// 1) Ones having direct analogy in OpenCL and needed for the core SYCL +// functionality are started with just "pi" prefix in their names. +// 2) Those having direct analogy in OpenCL but only needed for SYCL +// interoperability with OpenCL are started with "picl" prefix. +// 3) Functions having no direct analogy in OpenCL, started with "piext". +// +// TODO: describe interfaces in Doxygen format +// + +struct _pi_plugin; +using pi_plugin = _pi_plugin; + +// PI Plugin Initialise. +// Plugin will check the PI version of Plugin Interface, +// populate the PI Version it supports, update targets field and populate +// PiFunctionTable with Supported APIs. The pointers are in a predetermined +// order in pi.def file. +__SYCL_EXPORT pi_result piPluginInit(pi_plugin *plugin_info); + +// +// Platform +// +__SYCL_EXPORT pi_result piPlatformsGet(pi_uint32 num_entries, + pi_platform *platforms, + pi_uint32 *num_platforms); + +__SYCL_EXPORT pi_result piPlatformGetInfo(pi_platform platform, + pi_platform_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +/// Gets the native handle of a PI platform object. +/// +/// \param platform is the PI platform to get the native handle of. +/// \param nativeHandle is the native handle of platform. +__SYCL_EXPORT pi_result piextPlatformGetNativeHandle( + pi_platform platform, pi_native_handle *nativeHandle); + +/// Creates PI platform object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI device from. +/// \param platform is the PI platform created from the native handle. +__SYCL_EXPORT pi_result piextPlatformCreateWithNativeHandle( + pi_native_handle nativeHandle, pi_platform *platform); + +__SYCL_EXPORT pi_result piDevicesGet(pi_platform platform, + pi_device_type device_type, + pi_uint32 num_entries, pi_device *devices, + pi_uint32 *num_devices); + +/// Returns requested info for provided native device +/// Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT for +/// PI_DEVICE_INFO_EXTENSIONS query when the device supports native asserts +__SYCL_EXPORT pi_result piDeviceGetInfo(pi_device device, + pi_device_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piDeviceRetain(pi_device device); + +__SYCL_EXPORT pi_result piDeviceRelease(pi_device device); + +__SYCL_EXPORT pi_result piDevicePartition( + pi_device device, const pi_device_partition_property *properties, + pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices); + +/// Gets the native handle of a PI device object. +/// +/// \param device is the PI device to get the native handle of. +/// \param nativeHandle is the native handle of device. +__SYCL_EXPORT pi_result +piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle); + +/// Creates PI device object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI device from. +/// \param platform is the platform of the device (optional). +/// \param device is the PI device created from the native handle. +__SYCL_EXPORT pi_result piextDeviceCreateWithNativeHandle( + pi_native_handle nativeHandle, pi_platform platform, pi_device *device); + +/// Selects the most appropriate device binary based on runtime information +/// and the IR characteristics. +/// +__SYCL_EXPORT pi_result piextDeviceSelectBinary(pi_device device, + pi_device_binary *binaries, + pi_uint32 num_binaries, + pi_uint32 *selected_binary_ind); + +/// Retrieves a device function pointer to a user-defined function +/// \arg \c function_name. \arg \c function_pointer_ret is set to 0 if query +/// failed. +/// +/// \arg \c program must be built before calling this API. \arg \c device +/// must present in the list of devices returned by \c get_device method for +/// \arg \c program. +/// +/// If a fallback method determines the function exists but the address is +/// not available PI_ERROR_FUNCTION_ADDRESS_IS_NOT_AVAILABLE is returned. If the +/// address does not exist PI_ERROR_INVALID_KERNEL_NAME is returned. +__SYCL_EXPORT pi_result piextGetDeviceFunctionPointer( + pi_device device, pi_program program, const char *function_name, + pi_uint64 *function_pointer_ret); + +// +// Context +// +__SYCL_EXPORT pi_result piContextCreate( + const pi_context_properties *properties, pi_uint32 num_devices, + const pi_device *devices, + void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, + void *user_data), + void *user_data, pi_context *ret_context); + +__SYCL_EXPORT pi_result piContextGetInfo(pi_context context, + pi_context_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piContextRetain(pi_context context); + +__SYCL_EXPORT pi_result piContextRelease(pi_context context); + +typedef void (*pi_context_extended_deleter)(void *user_data); + +__SYCL_EXPORT pi_result piextContextSetExtendedDeleter( + pi_context context, pi_context_extended_deleter func, void *user_data); + +/// Gets the native handle of a PI context object. +/// +/// \param context is the PI context to get the native handle of. +/// \param nativeHandle is the native handle of context. +__SYCL_EXPORT pi_result +piextContextGetNativeHandle(pi_context context, pi_native_handle *nativeHandle); + +/// Creates PI context object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// NOTE: The number of devices and the list of devices is needed for Level Zero +/// backend because there is no possilibity to query this information from +/// context handle for Level Zero. If backend has API to query a list of devices +/// from the context native handle then these parameters are ignored. +/// +/// \param nativeHandle is the native handle to create PI context from. +/// \param numDevices is the number of devices in the context. Parameter is +/// ignored if number of devices can be queried from the context native +/// handle for a backend. +/// \param devices is the list of devices in the context. Parameter is ignored +/// if devices can be queried from the context native handle for a +/// backend. +/// \param pluginOwnsNativeHandle Indicates whether the created PI object +/// should take ownership of the native handle. +/// \param context is the PI context created from the native handle. +/// \return PI_SUCCESS if successfully created pi_context from the handle. +/// PI_ERROR_OUT_OF_HOST_MEMORY if can't allocate memory for the +/// pi_context object. PI_ERROR_INVALID_VALUE if numDevices == 0 or +/// devices is NULL but backend doesn't have API to query a list of +/// devices from the context native handle. PI_UNKNOWN_ERROR in case of +/// another error. +__SYCL_EXPORT pi_result piextContextCreateWithNativeHandle( + pi_native_handle nativeHandle, pi_uint32 numDevices, + const pi_device *devices, bool pluginOwnsNativeHandle, pi_context *context); + +// +// Queue +// + +// TODO: Remove during next ABI break and rename piextQueueCreate to +// piQueueCreate. +__SYCL_EXPORT pi_result piQueueCreate(pi_context context, pi_device device, + pi_queue_properties properties, + pi_queue *queue); +/// \param properties points to a zero-terminated array of extra data describing +/// desired queue properties. Format is +/// {[PROPERTY[, property-specific elements of data]*,]* 0} +__SYCL_EXPORT pi_result piextQueueCreate(pi_context context, pi_device device, + pi_queue_properties *properties, + pi_queue *queue); +/// \param properties points to a zero-terminated array of extra data describing +/// desired queue properties. Format is +/// {[PROPERTY[, property-specific elements of data]*,]* 0} +__SYCL_EXPORT pi_result piextQueueCreate2(pi_context context, pi_device device, + pi_queue_properties *properties, + pi_queue *queue); + +__SYCL_EXPORT pi_result piQueueGetInfo(pi_queue command_queue, + pi_queue_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piQueueRetain(pi_queue command_queue); + +__SYCL_EXPORT pi_result piQueueRelease(pi_queue command_queue); + +__SYCL_EXPORT pi_result piQueueFinish(pi_queue command_queue); + +__SYCL_EXPORT pi_result piQueueFlush(pi_queue command_queue); + +/// Gets the native handle of a PI queue object. +/// +/// \param queue is the PI queue to get the native handle of. +/// \param nativeHandle is the native handle of queue. +__SYCL_EXPORT pi_result +piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle); + +/// Gets the native handle of a PI queue object. +/// +/// \param queue is the PI queue to get the native handle of. +/// \param nativeHandle is the native handle of queue or commandlist. +/// \param nativeHandleDesc provides additional properties of the native handle. +__SYCL_EXPORT pi_result piextQueueGetNativeHandle2( + pi_queue queue, pi_native_handle *nativeHandle, int32_t *nativeHandleDesc); + +/// Creates PI queue object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI queue from. +/// \param context is the PI context of the queue. +/// \param device is the PI device associated with the native device used when +/// creating the native queue. This parameter is optional but some backends +/// may fail to create the right PI queue if omitted. +/// \param pluginOwnsNativeHandle Indicates whether the created PI object +/// should take ownership of the native handle. +/// \param queue is the PI queue created from the native handle. +__SYCL_EXPORT pi_result piextQueueCreateWithNativeHandle( + pi_native_handle nativeHandle, pi_context context, pi_device device, + bool pluginOwnsNativeHandle, pi_queue *queue); + +/// Creates PI queue object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI queue from. +/// \param nativeHandleDesc provides additional properties of the native handle. +/// \param context is the PI context of the queue. +/// \param device is the PI device associated with the native device used when +/// creating the native queue. This parameter is optional but some backends +/// may fail to create the right PI queue if omitted. +/// \param pluginOwnsNativeHandle Indicates whether the created PI object +/// should take ownership of the native handle. +/// \param Properties holds queue properties. +/// \param queue is the PI queue created from the native handle. +__SYCL_EXPORT pi_result piextQueueCreateWithNativeHandle2( + pi_native_handle nativeHandle, int32_t nativeHandleDesc, pi_context context, + pi_device device, bool pluginOwnsNativeHandle, + pi_queue_properties *Properties, pi_queue *queue); + +// +// Memory +// +__SYCL_EXPORT pi_result piMemBufferCreate( + pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, + pi_mem *ret_mem, const pi_mem_properties *properties = nullptr); + +__SYCL_EXPORT pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, + const pi_image_format *image_format, + const pi_image_desc *image_desc, + void *host_ptr, pi_mem *ret_mem); + +__SYCL_EXPORT pi_result piMemGetInfo(pi_mem mem, pi_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piMemImageGetInfo(pi_mem image, + pi_image_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piMemRetain(pi_mem mem); + +__SYCL_EXPORT pi_result piMemRelease(pi_mem mem); + +__SYCL_EXPORT pi_result piMemBufferPartition( + pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, + void *buffer_create_info, pi_mem *ret_mem); + +/// Gets the native handle of a PI mem object. +/// +/// \param mem is the PI mem to get the native handle of. +/// \param nativeHandle is the native handle of mem. +__SYCL_EXPORT pi_result piextMemGetNativeHandle(pi_mem mem, + pi_native_handle *nativeHandle); + +/// Creates PI mem object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI mem from. +/// \param context The PI context of the memory allocation. +/// \param ownNativeHandle Indicates if we own the native memory handle or it +/// came from interop that asked to not transfer the ownership to SYCL RT. +/// \param mem is the PI mem created from the native handle. +__SYCL_EXPORT pi_result piextMemCreateWithNativeHandle( + pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, + pi_mem *mem); + +/// Creates PI image object from a native handle. +/// +/// \param nativeHandle is the native handle to create PI image from. +/// \param context The PI context of the memory allocation. +/// \param ownNativeHandle Indicates if we own the native memory handle or it +/// came from interop that asked to not transfer the ownership to SYCL RT. +/// \param ImageFormat is the pi_image_format struct that +/// specifies the image channnel order and channel data type that +/// match what the nativeHandle uses +/// \param ImageDesc is the pi_image_desc struct that specifies +/// the image dimension, pitch, slice and other information about +/// the nativeHandle +/// \param img is the PI img created from the native handle. +__SYCL_EXPORT pi_result piextMemImageCreateWithNativeHandle( + pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, + const pi_image_format *ImageFormat, const pi_image_desc *ImageDesc, + pi_mem *img); + +// +// Program +// + +__SYCL_EXPORT pi_result piProgramCreate(pi_context context, const void *il, + size_t length, pi_program *res_program); + +__SYCL_EXPORT pi_result piclProgramCreateWithSource(pi_context context, + pi_uint32 count, + const char **strings, + const size_t *lengths, + pi_program *ret_program); + +/// Creates a PI program for a context and loads the given binary into it. +/// +/// \param context is the PI context to associate the program with. +/// \param num_devices is the number of devices in device_list. +/// \param device_list is a pointer to a list of devices. These devices must all +/// be in context. +/// \param lengths is an array of sizes in bytes of the binary in binaries. +/// \param binaries is a pointer to a list of program binaries. +/// \param num_metadata_entries is the number of metadata entries in metadata. +/// \param metadata is a pointer to a list of program metadata entries. The +/// use of metadata entries is backend-defined. +/// \param binary_status returns whether the program binary was loaded +/// succesfully or not, for each device in device_list. +/// binary_status is ignored if it is null and otherwise +/// it must be an array of num_devices elements. +/// \param ret_program is the PI program created from the program binaries. +__SYCL_EXPORT pi_result piProgramCreateWithBinary( + pi_context context, pi_uint32 num_devices, const pi_device *device_list, + const size_t *lengths, const unsigned char **binaries, + size_t num_metadata_entries, const pi_device_binary_property *metadata, + pi_int32 *binary_status, pi_program *ret_program); + +__SYCL_EXPORT pi_result piProgramGetInfo(pi_program program, + pi_program_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result +piProgramLink(pi_context context, pi_uint32 num_devices, + const pi_device *device_list, const char *options, + pi_uint32 num_input_programs, const pi_program *input_programs, + void (*pfn_notify)(pi_program program, void *user_data), + void *user_data, pi_program *ret_program); + +__SYCL_EXPORT pi_result piProgramCompile( + pi_program program, pi_uint32 num_devices, const pi_device *device_list, + const char *options, pi_uint32 num_input_headers, + const pi_program *input_headers, const char **header_include_names, + void (*pfn_notify)(pi_program program, void *user_data), void *user_data); + +__SYCL_EXPORT pi_result piProgramBuild( + pi_program program, pi_uint32 num_devices, const pi_device *device_list, + const char *options, + void (*pfn_notify)(pi_program program, void *user_data), void *user_data); + +__SYCL_EXPORT pi_result piProgramGetBuildInfo( + pi_program program, pi_device device, _pi_program_build_info param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piProgramRetain(pi_program program); + +__SYCL_EXPORT pi_result piProgramRelease(pi_program program); + +/// Sets a specialization constant to a specific value. +/// +/// Note: Only used when specialization constants are natively supported (SPIR-V +/// binaries), and not when they are emulated (AOT binaries). +/// +/// \param prog the program object which will use the value +/// \param spec_id integer ID of the constant +/// \param spec_size size of the value +/// \param spec_value bytes of the value +__SYCL_EXPORT pi_result +piextProgramSetSpecializationConstant(pi_program prog, pi_uint32 spec_id, + size_t spec_size, const void *spec_value); + +/// Gets the native handle of a PI program object. +/// +/// \param program is the PI program to get the native handle of. +/// \param nativeHandle is the native handle of program. +__SYCL_EXPORT pi_result +piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle); + +/// Creates PI program object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI program from. +/// \param context is the PI context of the program. +/// \param pluginOwnsNativeHandle Indicates whether the created PI object +/// should take ownership of the native handle. +/// \param program is the PI program created from the native handle. +__SYCL_EXPORT pi_result piextProgramCreateWithNativeHandle( + pi_native_handle nativeHandle, pi_context context, + bool pluginOwnsNativeHandle, pi_program *program); + +// +// Kernel +// + +typedef enum { + /// indicates that the kernel might access data through USM ptrs + PI_USM_INDIRECT_ACCESS, + /// provides an explicit list of pointers that the kernel will access + PI_USM_PTRS = 0x4203, + /// provides the preferred cache configuration (large slm or large data) + PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG = 0x4204 +} _pi_kernel_exec_info; + +using pi_kernel_exec_info = _pi_kernel_exec_info; + +__SYCL_EXPORT pi_result piKernelCreate(pi_program program, + const char *kernel_name, + pi_kernel *ret_kernel); + +__SYCL_EXPORT pi_result piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, + size_t arg_size, const void *arg_value); + +__SYCL_EXPORT pi_result piKernelGetInfo(pi_kernel kernel, + pi_kernel_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, + pi_kernel_group_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +/// API to query information from the sub-group from a kernel +/// +/// \param kernel is the pi_kernel to query +/// \param device is the device the kernel is executed on +/// \param param_name is a pi_kernel_sub_group_info enum value that +/// specifies the informtation queried for. +/// \param input_value_size is the size of input value passed in +/// ptr input_value param +/// \param input_value is the ptr to the input value passed. +/// \param param_value_size is the size of the value in bytes. +/// \param param_value is a pointer to the value to set. +/// \param param_value_size_ret is a pointer to return the size of data in +/// param_value ptr. +/// +/// All queries expect a return of 4 bytes in param_value_size, +/// param_value_size_ret, and a uint32_t value should to be written in +/// param_value ptr. +/// Note: This behaviour differs from OpenCL. OpenCL returns size_t. +__SYCL_EXPORT pi_result piKernelGetSubGroupInfo( + pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, + size_t input_value_size, const void *input_value, size_t param_value_size, + void *param_value, size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piKernelRetain(pi_kernel kernel); + +__SYCL_EXPORT pi_result piKernelRelease(pi_kernel kernel); + +/// Sets up pointer arguments for CL kernels. An extra indirection +/// is required due to CL argument conventions. +/// +/// \param kernel is the kernel to be launched +/// \param arg_index is the index of the kernel argument +/// \param arg_size is the size in bytes of the argument (ignored in CL) +/// \param arg_value is the pointer argument +__SYCL_EXPORT pi_result piextKernelSetArgPointer(pi_kernel kernel, + pi_uint32 arg_index, + size_t arg_size, + const void *arg_value); + +/// API to set attributes controlling kernel execution +/// +/// \param kernel is the pi kernel to execute +/// \param param_name is a pi_kernel_exec_info value that specifies the info +/// passed to the kernel +/// \param param_value_size is the size of the value in bytes +/// \param param_value is a pointer to the value to set for the kernel +/// +/// If param_name is PI_USM_INDIRECT_ACCESS, the value will be a ptr to +/// the pi_bool value PI_TRUE +/// If param_name is PI_USM_PTRS, the value will be an array of ptrs +__SYCL_EXPORT pi_result piKernelSetExecInfo(pi_kernel kernel, + pi_kernel_exec_info value_name, + size_t param_value_size, + const void *param_value); + +/// Creates PI kernel object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI kernel from. +/// \param context is the PI context of the kernel. +/// \param program is the PI program of the kernel. +/// \param pluginOwnsNativeHandle Indicates whether the created PI object +/// should take ownership of the native handle. +/// \param kernel is the PI kernel created from the native handle. +__SYCL_EXPORT pi_result piextKernelCreateWithNativeHandle( + pi_native_handle nativeHandle, pi_context context, pi_program program, + bool pluginOwnsNativeHandle, pi_kernel *kernel); + +/// Gets the native handle of a PI kernel object. +/// +/// \param kernel is the PI kernel to get the native handle of. +/// \param nativeHandle is the native handle of kernel. +__SYCL_EXPORT pi_result +piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle); + +// +// Events +// + +/// Create PI event object in a signalled/completed state. +/// +/// \param context is the PI context of the event. +/// \param ret_event is the PI even created. +__SYCL_EXPORT pi_result piEventCreate(pi_context context, pi_event *ret_event); + +__SYCL_EXPORT pi_result piEventGetInfo(pi_event event, pi_event_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piEventGetProfilingInfo(pi_event event, + pi_profiling_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piEventsWait(pi_uint32 num_events, + const pi_event *event_list); + +__SYCL_EXPORT pi_result piEventSetCallback( + pi_event event, pi_int32 command_exec_callback_type, + void (*pfn_notify)(pi_event event, pi_int32 event_command_status, + void *user_data), + void *user_data); + +__SYCL_EXPORT pi_result piEventSetStatus(pi_event event, + pi_int32 execution_status); + +__SYCL_EXPORT pi_result piEventRetain(pi_event event); + +__SYCL_EXPORT pi_result piEventRelease(pi_event event); + +/// Gets the native handle of a PI event object. +/// +/// \param event is the PI event to get the native handle of. +/// \param nativeHandle is the native handle of event. +__SYCL_EXPORT pi_result +piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle); + +/// Creates PI event object from a native handle. +/// NOTE: The created PI object takes ownership of the native handle. +/// +/// \param nativeHandle is the native handle to create PI event from. +/// \param context is the corresponding PI context +/// \param pluginOwnsNativeHandle Indicates whether the created PI object +/// should take ownership of the native handle. +/// \param event is the PI event created from the native handle. +__SYCL_EXPORT pi_result piextEventCreateWithNativeHandle( + pi_native_handle nativeHandle, pi_context context, bool ownNativeHandle, + pi_event *event); + +// +// Sampler +// +__SYCL_EXPORT pi_result piSamplerCreate( + pi_context context, const pi_sampler_properties *sampler_properties, + pi_sampler *result_sampler); + +__SYCL_EXPORT pi_result piSamplerGetInfo(pi_sampler sampler, + pi_sampler_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +__SYCL_EXPORT pi_result piSamplerRetain(pi_sampler sampler); + +__SYCL_EXPORT pi_result piSamplerRelease(pi_sampler sampler); + +// +// Queue Commands +// +__SYCL_EXPORT pi_result piEnqueueKernelLaunch( + pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, + const size_t *global_work_offset, const size_t *global_work_size, + const size_t *local_work_size, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueNativeKernel( + pi_queue queue, void (*user_func)(void *), void *args, size_t cb_args, + pi_uint32 num_mem_objects, const pi_mem *mem_list, + const void **args_mem_loc, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueEventsWait(pi_queue command_queue, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueEventsWaitWithBarrier( + pi_queue command_queue, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueMemBufferRead( + pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, + size_t size, void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueMemBufferReadRect( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result +piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, + pi_bool blocking_write, size_t offset, size_t size, + const void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueMemBufferWriteRect( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + const void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result +piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, + pi_mem dst_buffer, size_t src_offset, size_t dst_offset, + size_t size, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueMemBufferCopyRect( + pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, + pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, + pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, + size_t dst_row_pitch, size_t dst_slice_pitch, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event); + +__SYCL_EXPORT pi_result +piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, + const void *pattern, size_t pattern_size, size_t offset, + size_t size, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueMemImageRead( + pi_queue command_queue, pi_mem image, pi_bool blocking_read, + pi_image_offset origin, pi_image_region region, size_t row_pitch, + size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueMemImageWrite( + pi_queue command_queue, pi_mem image, pi_bool blocking_write, + pi_image_offset origin, pi_image_region region, size_t input_row_pitch, + size_t input_slice_pitch, const void *ptr, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueMemImageCopy( + pi_queue command_queue, pi_mem src_image, pi_mem dst_image, + pi_image_offset src_origin, pi_image_offset dst_origin, + pi_image_region region, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result +piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, + const void *fill_color, const size_t *origin, + const size_t *region, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +__SYCL_EXPORT pi_result piEnqueueMemBufferMap( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, + pi_map_flags map_flags, size_t offset, size_t size, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event, void **ret_map); + +__SYCL_EXPORT pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, + void *mapped_ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); + +// Extension to allow backends to process a PI memory object before adding it +// as an argument for a kernel. +// Note: This is needed by the CUDA backend to extract the device pointer to +// the memory as the kernels uses it rather than the PI object itself. +__SYCL_EXPORT pi_result piextKernelSetArgMemObj(pi_kernel kernel, + pi_uint32 arg_index, + const pi_mem *arg_value); + +// Extension to allow backends to process a PI sampler object before adding it +// as an argument for a kernel. +// Note: This is needed by the CUDA backend to extract the properties of the +// sampler as the kernels uses it rather than the PI object itself. +__SYCL_EXPORT pi_result piextKernelSetArgSampler(pi_kernel kernel, + pi_uint32 arg_index, + const pi_sampler *arg_value); + +/// +// USM +/// +typedef enum { + PI_USM_HOST_SUPPORT = 0x4190, + PI_USM_DEVICE_SUPPORT = 0x4191, + PI_USM_SINGLE_SHARED_SUPPORT = 0x4192, + PI_USM_CROSS_SHARED_SUPPORT = 0x4193, + PI_USM_SYSTEM_SHARED_SUPPORT = 0x4194 +} _pi_usm_capability_query; + +typedef enum : pi_bitfield { + PI_USM_ACCESS = (1 << 0), + PI_USM_ATOMIC_ACCESS = (1 << 1), + PI_USM_CONCURRENT_ACCESS = (1 << 2), + PI_USM_CONCURRENT_ATOMIC_ACCESS = (1 << 3) +} _pi_usm_capabilities; + +typedef enum { + PI_MEM_ALLOC_TYPE = 0x419A, + PI_MEM_ALLOC_BASE_PTR = 0x419B, + PI_MEM_ALLOC_SIZE = 0x419C, + PI_MEM_ALLOC_DEVICE = 0x419D, +} _pi_mem_alloc_info; + +typedef enum { + PI_MEM_TYPE_UNKNOWN = 0x4196, + PI_MEM_TYPE_HOST = 0x4197, + PI_MEM_TYPE_DEVICE = 0x4198, + PI_MEM_TYPE_SHARED = 0x4199 +} _pi_usm_type; + +// Flag is used for piProgramUSMEnqueuePrefetch. PI_USM_MIGRATION_TBD0 is a +// placeholder for future developments and should not change the behaviour of +// piProgramUSMEnqueuePrefetch +typedef enum : pi_bitfield { + PI_USM_MIGRATION_TBD0 = (1 << 0) +} _pi_usm_migration_flags; + +using pi_usm_capability_query = _pi_usm_capability_query; +using pi_usm_capabilities = _pi_usm_capabilities; +using pi_mem_alloc_info = _pi_mem_alloc_info; +using pi_usm_type = _pi_usm_type; +using pi_usm_migration_flags = _pi_usm_migration_flags; + +/// Allocates host memory accessible by the device. +/// +/// \param result_ptr contains the allocated memory +/// \param context is the pi_context +/// \param properties are optional allocation properties +/// \param size is the size of the allocation +/// \param alignment is the desired alignment of the allocation +__SYCL_EXPORT pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, + pi_usm_mem_properties *properties, + size_t size, pi_uint32 alignment); + +/// Allocates device memory +/// +/// \param result_ptr contains the allocated memory +/// \param context is the pi_context +/// \param device is the device the memory will be allocated on +/// \param properties are optional allocation properties +/// \param size is the size of the allocation +/// \param alignment is the desired alignment of the allocation +__SYCL_EXPORT pi_result piextUSMDeviceAlloc(void **result_ptr, + pi_context context, + pi_device device, + pi_usm_mem_properties *properties, + size_t size, pi_uint32 alignment); + +/// Allocates memory accessible on both host and device +/// +/// \param result_ptr contains the allocated memory +/// \param context is the pi_context +/// \param device is the device the memory will be allocated on +/// \param properties are optional allocation properties +/// \param size is the size of the allocation +/// \param alignment is the desired alignment of the allocation +__SYCL_EXPORT pi_result piextUSMSharedAlloc(void **result_ptr, + pi_context context, + pi_device device, + pi_usm_mem_properties *properties, + size_t size, pi_uint32 alignment); + +/// Indicates that the allocated USM memory is no longer needed on the runtime +/// side. The actual freeing of the memory may be done in a blocking or deferred +/// manner, e.g. to avoid issues with indirect memory access from kernels. +/// +/// \param context is the pi_context of the allocation +/// \param ptr is the memory to be freed +__SYCL_EXPORT pi_result piextUSMFree(pi_context context, void *ptr); + +/// USM Memset API +/// +/// \param queue is the queue to submit to +/// \param ptr is the ptr to memset +/// \param value is value to set. It is interpreted as an 8-bit value and the +/// upper +/// 24 bits are ignored +/// \param count is the size in bytes to memset +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +__SYCL_EXPORT pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, + pi_int32 value, size_t count, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event); + +/// USM Memcpy API +/// +/// \param queue is the queue to submit to +/// \param blocking is whether this operation should block the host +/// \param src_ptr is the data to be copied +/// \param dst_ptr is the location the data will be copied +/// \param size is number of bytes to copy +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, + void *dst_ptr, + const void *src_ptr, size_t size, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event); + +/// Hint to migrate memory to the device +/// +/// \param queue is the queue to submit to +/// \param ptr points to the memory to migrate +/// \param size is the number of bytes to migrate +/// \param flags is a bitfield used to specify memory migration options +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +__SYCL_EXPORT pi_result piextUSMEnqueuePrefetch( + pi_queue queue, const void *ptr, size_t size, pi_usm_migration_flags flags, + pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, + pi_event *event); + +/// USM Memadvise API +/// +/// \param queue is the queue to submit to +/// \param ptr is the data to be advised +/// \param length is the size in bytes of the memory to advise +/// \param advice is device specific advice +/// \param event is the event that represents this operation +// USM memadvise API to govern behavior of automatic migration mechanisms +__SYCL_EXPORT pi_result piextUSMEnqueueMemAdvise(pi_queue queue, + const void *ptr, size_t length, + pi_mem_advice advice, + pi_event *event); + +/// API to query information about USM allocated pointers +/// Valid Queries: +/// PI_MEM_ALLOC_TYPE returns host/device/shared pi_host_usm value +/// PI_MEM_ALLOC_BASE_PTR returns the base ptr of an allocation if +/// the queried pointer fell inside an allocation. +/// Result must fit in void * +/// PI_MEM_ALLOC_SIZE returns how big the queried pointer's +/// allocation is in bytes. Result is a size_t. +/// PI_MEM_ALLOC_DEVICE returns the pi_device this was allocated against +/// +/// \param context is the pi_context +/// \param ptr is the pointer to query +/// \param param_name is the type of query to perform +/// \param param_value_size is the size of the result in bytes +/// \param param_value is the result +/// \param param_value_size_ret is how many bytes were written +__SYCL_EXPORT pi_result piextUSMGetMemAllocInfo( + pi_context context, const void *ptr, pi_mem_alloc_info param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret); + +/// USM 2D fill API +/// +/// \param queue is the queue to submit to +/// \param ptr is the ptr to fill +/// \param pitch is the total width of the destination memory including padding +/// \param pattern is a pointer with the bytes of the pattern to set +/// \param pattern_size is the size in bytes of the pattern +/// \param width is width in bytes of each row to fill +/// \param height is height the columns to fill +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +__SYCL_EXPORT pi_result piextUSMEnqueueFill2D(pi_queue queue, void *ptr, + size_t pitch, size_t pattern_size, + const void *pattern, size_t width, + size_t height, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event); + +/// USM 2D Memset API +/// +/// \param queue is the queue to submit to +/// \param ptr is the ptr to fill +/// \param pitch is the total width of the destination memory including padding +/// \param value the value to fill into the region in \param ptr +/// \param width is width in bytes of each row to fill +/// \param height is height the columns to fill +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +__SYCL_EXPORT pi_result piextUSMEnqueueMemset2D( + pi_queue queue, void *ptr, size_t pitch, int value, size_t width, + size_t height, pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, pi_event *event); + +/// USM 2D Memcpy API +/// +/// \param queue is the queue to submit to +/// \param blocking is whether this operation should block the host +/// \param dst_ptr is the location the data will be copied +/// \param dst_pitch is the total width of the destination memory including +/// padding +/// \param src_ptr is the data to be copied +/// \param src_pitch is the total width of the source memory including padding +/// \param width is width in bytes of each row to be copied +/// \param height is height the columns to be copied +/// \param num_events_in_waitlist is the number of events to wait on +/// \param events_waitlist is an array of events to wait on +/// \param event is the event that represents this operation +__SYCL_EXPORT pi_result piextUSMEnqueueMemcpy2D( + pi_queue queue, pi_bool blocking, void *dst_ptr, size_t dst_pitch, + const void *src_ptr, size_t src_pitch, size_t width, size_t height, + pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, + pi_event *event); + +/// +/// Device global variable +/// + +/// API for writing data from host to a device global variable. +/// +/// \param queue is the queue +/// \param program is the program containing the device global variable +/// \param blocking_write is true if the write should block +/// \param name is the unique identifier for the device global variable +/// \param count is the number of bytes to copy +/// \param offset is the byte offset into the device global variable to start +/// copying +/// \param src is a pointer to where the data must be copied from +/// \param num_events_in_wait_list is a number of events in the wait list +/// \param event_wait_list is the wait list +/// \param event is the resulting event +pi_result piextEnqueueDeviceGlobalVariableWrite( + pi_queue queue, pi_program program, const char *name, + pi_bool blocking_write, size_t count, size_t offset, const void *src, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event); + +/// API reading data from a device global variable to host. +/// +/// \param queue is the queue +/// \param program is the program containing the device global variable +/// \param blocking_read is true if the read should block +/// \param name is the unique identifier for the device global variable +/// \param count is the number of bytes to copy +/// \param offset is the byte offset into the device global variable to start +/// copying +/// \param dst is a pointer to where the data must be copied to +/// \param num_events_in_wait_list is a number of events in the wait list +/// \param event_wait_list is the wait list +/// \param event is the resulting event +pi_result piextEnqueueDeviceGlobalVariableRead( + pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, + size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +/// +/// Plugin +/// +/// +// Host Pipes +/// + +/// Read from pipe of a given name +/// +/// @param queue a valid host command-queue in which the read / write command +/// will be queued. command_queue and program must be created with the same +/// OpenCL context. +/// @param program a program object with a successfully built executable. +/// @param pipe_symbol the name of the program scope pipe global variable. +/// @param blocking indicate if the read and write operations are blocking or +/// non-blocking +/// @param ptr a pointer to buffer in host memory that will hold resulting data +/// from pipe +/// @param size size of the memory region to read or write, in bytes. +/// @param num_events_in_waitlist number of events in the wait list. +/// @param events_waitlist specify events that need to complete before this +/// particular command can be executed. +/// @param event returns an event object that identifies this read / write +/// command and can be used to query or queue a wait for this command to +/// complete. +__SYCL_EXPORT pi_result piextEnqueueReadHostPipe( + pi_queue queue, pi_program program, const char *pipe_symbol, + pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, pi_event *event); + +/// Write to pipe of a given name +/// +/// @param queue a valid host command-queue in which the read / write command +/// will be queued. command_queue and program must be created with the same +/// OpenCL context. +/// @param program a program object with a successfully built executable. +/// @param pipe_symbol the name of the program scope pipe global variable. +/// @param blocking indicate if the read and write operations are blocking or +/// non-blocking +/// @param ptr a pointer to buffer in host memory that holds data to be written +/// to host pipe. +/// @param size size of the memory region to read or write, in bytes. +/// @param num_events_in_waitlist number of events in the wait list. +/// @param events_waitlist specify events that need to complete before this +/// particular command can be executed. +/// @param event returns an event object that identifies this read / write +/// command and can be used to query or queue a wait for this command to +/// complete. +__SYCL_EXPORT pi_result piextEnqueueWriteHostPipe( + pi_queue queue, pi_program program, const char *pipe_symbol, + pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, pi_event *event); + +/// API to get Plugin internal data, opaque to SYCL RT. Some devices whose +/// device code is compiled by the host compiler (e.g. CPU emulators) may use it +/// to access some device code functionality implemented in/behind the plugin. +/// \param opaque_data_param - unspecified argument, interpretation is specific +/// to a plugin \param opaque_data_return - placeholder for the returned opaque +/// data. +__SYCL_EXPORT pi_result piextPluginGetOpaqueData(void *opaque_data_param, + void **opaque_data_return); + +/// API to notify that the plugin should clean up its resources. +/// No PI calls should be made until the next piPluginInit call. +/// \param PluginParameter placeholder for future use, currenly not used. +__SYCL_EXPORT pi_result piTearDown(void *PluginParameter); + +/// API to get Plugin specific warning and error messages. +/// \param message is a returned address to the first element in the message the +/// plugin owns the error message string. The string is thread-local. As a +/// result, different threads may return different errors. A message is +/// overwritten by the following error or warning that is produced within the +/// given thread. The memory is cleaned up at the end of the thread's lifetime. +/// +/// \return PI_SUCCESS if plugin is indicating non-fatal warning. Any other +/// error code indicates that plugin considers this to be a fatal error and the +/// Returns the global timestamp from \param device , and syncronized host +/// timestamp +__SYCL_EXPORT pi_result piPluginGetLastError(char **message); + +/// API to get backend specific option. +/// \param frontend_option is a string that contains frontend option. +/// \param backend_option is used to return the backend option corresponding to +/// frontend option. +/// +/// \return PI_SUCCESS is returned for valid frontend_option. If a valid backend +/// option is not available, an empty string is returned. +__SYCL_EXPORT pi_result piPluginGetBackendOption(pi_platform platform, + const char *frontend_option, + const char **backend_option); + +/// Queries device for it's global timestamp in nanoseconds, and updates +/// HostTime with the value of the host timer at the closest possible point in +/// time to that at which DeviceTime was returned. +/// +/// \param Device device to query for timestamp +/// \param DeviceTime pointer to store device timestamp in nanoseconds. Optional +/// argument, can be nullptr +/// \param HostTime pointer to store host timestamp in +/// nanoseconds. Optional argurment, can be nullptr in which case timestamp will +/// not be written +__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, + uint64_t *DeviceTime, + uint64_t *HostTime); + +struct _pi_plugin { + // PI version supported by host passed to the plugin. The Plugin + // checks and writes the appropriate Function Pointers in + // PiFunctionTable. + // TODO: Work on version fields and their handshaking mechanism. + // Some choices are: + // - Use of integers to keep major and minor version. + // - Keeping char* Versions. + char PiVersion[20]; + // Plugin edits this. + char PluginVersion[20]; + char *Targets; + struct FunctionPointers { +#define _PI_API(api) decltype(::api) *api; +#include + } PiFunctionTable; +}; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // _PI_H_ diff --git a/sycl/include/sycl/detail/property_helper.hpp b/sycl/include/sycl/detail/property_helper.hpp index 8be70fe39ae58..d918c9e080ca1 100644 --- a/sycl/include/sycl/detail/property_helper.hpp +++ b/sycl/include/sycl/detail/property_helper.hpp @@ -43,8 +43,10 @@ enum DataLessPropKind { QueuePriorityNormal = 16, QueuePriorityLow = 17, QueuePriorityHigh = 18, + QueueSubmissionBatched = 19, + QueueSubmissionImmediate = 20, // Indicates the last known dataless property. - LastKnownDataLessPropKind = 18, + LastKnownDataLessPropKind = 20, // Exceeding 32 may cause ABI breaking change on some of OSes. DataLessPropKindSize = 32 }; diff --git a/sycl/include/sycl/properties/queue_properties.def b/sycl/include/sycl/properties/queue_properties.def index a888c0ffe44aa..f76259eb9e926 100644 --- a/sycl/include/sycl/properties/queue_properties.def +++ b/sycl/include/sycl/properties/queue_properties.def @@ -21,6 +21,11 @@ __SYCL_DATA_LESS_PROP(ext::oneapi::property::queue, priority_high, __SYCL_DATA_LESS_PROP(ext::oneapi::cuda::property::queue, use_default_stream, UseDefaultStream) +__SYCL_DATA_LESS_PROP(ext::oneapi::level_zero::property::queue, batched_submission, + QueueSubmissionBatched) +__SYCL_DATA_LESS_PROP(ext::oneapi::level_zero::property::queue, immediate_submission, + QueueSubmissionImmediate) + // Deprecated alias for ext::oneapi::cuda::property::queue. __SYCL_MANUALLY_DEFINED_PROP(property::queue::cuda, use_default_stream) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index b56e5dd47079d..c852f0a4fdd0f 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -739,6 +739,16 @@ bool _pi_queue::isPriorityHigh() const { return ((this->Properties & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH) != 0); } +bool _pi_queue::isBatchedSubmission() const { + return ((this->Properties & PI_EXT_ONEAPI_QUEUE_FLAG_BATCHED_SUBMISSION) != + 0); +} + +bool _pi_queue::isImmediateSubmission() const { + return ((this->Properties & PI_EXT_ONEAPI_QUEUE_FLAG_IMMEDIATE_SUBMISSION) != + 0); +} + pi_result _pi_queue::resetCommandList(pi_command_list_ptr_t CommandList, bool MakeAvailable, std::vector &EventListToCleanup, @@ -980,17 +990,24 @@ _pi_queue::_pi_queue(std::vector &ComputeQueues, Properties(PiQueueProperties) { // Set the type of commandlists the queue will use. - bool Default = !ImmediateCommandlistEnvVarIsSet; - UsingImmCmdLists = Device->useImmediateCommandLists(); - urPrint("ImmCmdList env var is set (%s), OldAPI (%s)\n", - (ImmediateCommandlistEnvVarIsSet ? "YES" : "NO"), - (OldAPI ? "YES" : "NO")); - - if (OldAPI && Default) - // The default when called from pre-compiled binaries is to not use - // immediate command lists. + // When user-selected submission mode, ignore env var setting. + if (isBatchedSubmission()) { UsingImmCmdLists = false; - urPrint("ImmCmdList setting (%s)\n", (UsingImmCmdLists ? "YES" : "NO")); + } else if (isImmediateSubmission()) { + UsingImmCmdLists = true; + } else { + bool Default = !ImmediateCommandlistEnvVarIsSet; + UsingImmCmdLists = Device->useImmediateCommandLists(); + urPrint("ImmCmdList env var is set (%s), OldAPI (%s)\n", + (ImmediateCommandlistEnvVarIsSet ? "YES" : "NO"), + (OldAPI ? "YES" : "NO")); + + if (OldAPI && Default) + // The default when called from pre-compiled binaries is to not use + // immediate command lists. + UsingImmCmdLists = false; + urPrint("ImmCmdList setting (%s)\n", (UsingImmCmdLists ? "YES" : "NO")); + } // Compute group initialization. // First, see if the queue's device allows for round-robin or it is @@ -2598,7 +2615,9 @@ pi_result piextQueueCreateInternal(pi_context Context, pi_device Device, PI_QUEUE_FLAG_ON_DEVICE_DEFAULT | PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS | PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW | - PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH)), + PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH | + PI_EXT_ONEAPI_QUEUE_FLAG_BATCHED_SUBMISSION | + PI_EXT_ONEAPI_QUEUE_FLAG_IMMEDIATE_SUBMISSION)), PI_ERROR_INVALID_VALUE); PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 76309bac4cf06..839d60867034f 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -591,6 +591,10 @@ struct _pi_queue : _ur_object { bool isPriorityLow() const; bool isPriorityHigh() const; + // Returns true if the queue has explicitly selected submission mode. + bool isBatchedSubmission() const; + bool isImmediateSubmission() const; + // adjust the queue's batch size, knowing that the current command list // is being closed with a full batch. // For copy commands, IsCopy is set to 'true'. diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index f497b3fd84327..31fdde7996739 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -469,6 +469,28 @@ class queue_impl { } CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH; } + // Track that submission modes do not conflict. + bool SubmissionSeen = false; + if (PropList.has_property< + ext::oneapi::level_zero::property::queue::batched_submission>()) { + if (SubmissionSeen) { + throw sycl::exception( + make_error_code(errc::invalid), + "Queue cannot be constructed with different submission modes."); + } + SubmissionSeen = true; + CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_BATCHED_SUBMISSION; + } + if (PropList.has_property< + ext::oneapi::level_zero::property::queue::immediate_submission>()) { + if (SubmissionSeen) { + throw sycl::exception( + make_error_code(errc::invalid), + "Queue cannot be constructed with different submission modes."); + } + SubmissionSeen = true; + CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_IMMEDIATE_SUBMISSION; + } return CreationFlags; } From 950fd8d7abfd16546908c442c7044f1d1e8a4f68 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Mon, 22 May 2023 23:09:17 -0700 Subject: [PATCH 02/12] Updated symbols and doc. --- ..._ext_oneapi_queue_submission_mode.asciidoc | 104 +++++++++++ sycl/test/abi/sycl_symbols_linux.dump | 166 +----------------- 2 files changed, 109 insertions(+), 161 deletions(-) create mode 100755 sycl/doc/extensions/supported/sycl_ext_oneapi_queue_submission_mode.asciidoc diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_submission_mode.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_submission_mode.asciidoc new file mode 100755 index 0000000000000..ef9e8ca319900 --- /dev/null +++ b/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_submission_mode.asciidoc @@ -0,0 +1,104 @@ += sycl_ext_oneapi_queue_submission_mode + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2023-2023 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 7 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + + +== Status + +This extension is implemented and fully supported by {dpcpp}. + + +== Backend support status + +This extension is currently implemented in {dpcpp} only for GPU devices and +only when using the Level Zero backend. Use of this extension in on other devices +or backends may have no effect on the submission mode. + +== Overview + +This extension enables specifying one of two submission modes for a queue. + +The property `batched_submission` specifies that submissions to a SYCL queue +should be collected into groups before actual submission to the hardware. + +The property `immediate_submission` specifies that submissions to a +SYCL queue should be submitted to the hardware immediately. + +When the submission mode is left unspecified a platform-specific default +is chosen by the implementation. + + + + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_QUEUE_SUBMISSION_MODE` to one of the values defined in +the table below. Applications can test for the existence of this macro to +determine if the implementation supports this feature, or applications can test +the macro's value to determine which of the extension's features the +implementation supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|Initial version of this extension. +|=== + + +=== Guidelines for using queue submission properties + +The default queue submission mode chosen by the implementation is usually +optimal. + +In some cases it may be necessary to explicitly select +a submission mode based on the types of activities the queue is expected to handle. +For example, when kernel runtimes are very short the submission time on the host +may be as long or longer than the actual runtime of the kernel. In this case, doing +batched submissions may be preferable so that the submission overhead is amortized +over a number of kernel executions. + diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index bf362be8fe262..60e458d1c4233 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4093,6 +4093,8 @@ _ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEE _ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue14discard_eventsEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue15priority_normalEEET_v +_ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue18batched_submissionEEET_v +_ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue20immediate_submissionEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_8property5queue16enable_profilingEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_8property5queue4cuda18use_default_streamEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_8property5queue8in_orderEEET_v @@ -4102,6 +4104,8 @@ _ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue12priority_lowEE _ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue13priority_highEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue14discard_eventsEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue15priority_normalEEEbv +_ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue18batched_submissionEEEbv +_ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue20immediate_submissionEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue16enable_profilingEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue4cuda18use_default_streamEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue8in_orderEEEbv @@ -4140,41 +4144,9 @@ _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6de _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi3EEEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device22max_global_work_groupsEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext8codeplay12experimental4info6device15supports_fusionEEENT_11return_typeEv -_ZNK4sycl3_V16detail11image_plain10getSamplerEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext8codeplay12experimental4info6device28max_registers_per_work_groupEEENT_11return_typeEv +_ZNK4sycl3_V16detail11image_plain10getSamplerEv _ZNK4sycl3_V16detail11image_plain11getRowPitchEv -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_3ext6oneapi8property5queue15priority_normalEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property5image12use_host_ptrEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property5image13context_boundEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property5image9use_mutexEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property5queue8in_orderEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property6buffer12use_host_ptrEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property6buffer13context_boundEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property6buffer9use_mutexEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property6noinitEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property7no_initEEET_v -_ZNK4sycl3_V16detail11image_plain12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_3ext6oneapi8property5queue12priority_lowEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_3ext6oneapi8property5queue13priority_highEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_3ext6oneapi8property5queue15priority_normalEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property5image12use_host_ptrEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property5image13context_boundEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property5image9use_mutexEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property5queue8in_orderEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property6buffer12use_host_ptrEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property6buffer13context_boundEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property6buffer9use_mutexEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property6noinitEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property7no_initEEEbv -_ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv _ZNK4sycl3_V16detail11image_plain13getSlicePitchEv _ZNK4sycl3_V16detail11image_plain14getChannelTypeEv _ZNK4sycl3_V16detail11image_plain14getElementSizeEv @@ -4188,38 +4160,6 @@ _ZNK4sycl3_V16detail11stream_impl22get_max_statement_sizeEv _ZNK4sycl3_V16detail11stream_impl25get_work_item_buffer_sizeEv _ZNK4sycl3_V16detail11stream_impl4sizeEv _ZNK4sycl3_V16detail11stream_impl8get_sizeEv -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue15priority_normalEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property5image12use_host_ptrEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property5image13context_boundEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property5image9use_mutexEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property5queue8in_orderEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property6buffer12use_host_ptrEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property6buffer13context_boundEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property6buffer9use_mutexEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property6noinitEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property7no_initEEET_v -_ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_3ext6oneapi8property5queue12priority_lowEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_3ext6oneapi8property5queue13priority_highEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_3ext6oneapi8property5queue15priority_normalEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property5image12use_host_ptrEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property5image13context_boundEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property5image9use_mutexEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property5queue8in_orderEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property6buffer12use_host_ptrEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property6buffer13context_boundEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property6buffer9use_mutexEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property6noinitEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property7no_initEEEbv -_ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv _ZNK4sycl3_V16detail12buffer_plain13handleReleaseEv _ZNK4sycl3_V16detail12buffer_plain15getNativeVectorENS0_7backendE _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv @@ -4422,38 +4362,6 @@ _ZNK4sycl3_V16kernel8get_infoINS0_4info6kernel15reference_countEEENS0_6detail19i _ZNK4sycl3_V16kernel8get_infoINS0_4info6kernel7contextEEENS0_6detail19is_kernel_info_descIT_E11return_typeEv _ZNK4sycl3_V16kernel8get_infoINS0_4info6kernel8num_argsEEENS0_6detail19is_kernel_info_descIT_E11return_typeEv _ZNK4sycl3_V16kernel9getNativeEv -_ZNK4sycl3_V16stream12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_3ext6oneapi8property5queue15priority_normalEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property5image12use_host_ptrEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property5image13context_boundEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property5image9use_mutexEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property5queue8in_orderEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property6buffer12use_host_ptrEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property6buffer13context_boundEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property6buffer9use_mutexEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property6noinitEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property7no_initEEET_v -_ZNK4sycl3_V16stream12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v -_ZNK4sycl3_V16stream12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_3ext6oneapi8property5queue12priority_lowEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_3ext6oneapi8property5queue13priority_highEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_3ext6oneapi8property5queue15priority_normalEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property5image12use_host_ptrEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property5image13context_boundEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property5image9use_mutexEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property5queue8in_orderEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property6buffer12use_host_ptrEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property6buffer13context_boundEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property6buffer9use_mutexEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property6noinitEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property7no_initEEEbv -_ZNK4sycl3_V16stream12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv _ZNK4sycl3_V16stream22get_max_statement_sizeEv _ZNK4sycl3_V16stream25get_work_item_buffer_sizeEv _ZNK4sycl3_V16stream4sizeEv @@ -4463,38 +4371,6 @@ _ZNK4sycl3_V16streamneERKS1_ _ZNK4sycl3_V17context11get_backendEv _ZNK4sycl3_V17context11get_devicesEv _ZNK4sycl3_V17context12get_platformEv -_ZNK4sycl3_V17context12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_3ext6oneapi8property5queue15priority_normalEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property5image12use_host_ptrEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property5image13context_boundEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property5image9use_mutexEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property5queue8in_orderEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property6buffer12use_host_ptrEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property6buffer13context_boundEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property6buffer9use_mutexEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property6noinitEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property7no_initEEET_v -_ZNK4sycl3_V17context12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v -_ZNK4sycl3_V17context12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_3ext6oneapi8property5queue12priority_lowEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_3ext6oneapi8property5queue13priority_highEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_3ext6oneapi8property5queue15priority_normalEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property5image12use_host_ptrEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property5image13context_boundEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property5image9use_mutexEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property5queue8in_orderEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property6buffer12use_host_ptrEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property6buffer13context_boundEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property6buffer9use_mutexEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property6noinitEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property7no_initEEEbv -_ZNK4sycl3_V17context12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv _ZNK4sycl3_V17context3getEv _ZNK4sycl3_V17context7is_hostEv _ZNK4sycl3_V17context8get_infoINS0_4info7context15reference_countEEENS0_6detail20is_context_info_descIT_E11return_typeEv @@ -4508,38 +4384,6 @@ _ZNK4sycl3_V17context9getNativeEv _ZNK4sycl3_V17handler17getContextImplPtrEv _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv _ZNK4sycl3_V17handler30getOrInsertHandlerKernelBundleEb -_ZNK4sycl3_V17sampler12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_3ext6oneapi8property5queue15priority_normalEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property5image12use_host_ptrEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property5image13context_boundEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property5image9use_mutexEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property5queue8in_orderEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property6buffer12use_host_ptrEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property6buffer13context_boundEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property6buffer9use_mutexEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property6noinitEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property7no_initEEET_v -_ZNK4sycl3_V17sampler12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v -_ZNK4sycl3_V17sampler12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_3ext6oneapi8property5queue12priority_lowEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_3ext6oneapi8property5queue13priority_highEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_3ext6oneapi8property5queue15priority_normalEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property5image12use_host_ptrEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property5image13context_boundEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property5image9use_mutexEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property5queue8in_orderEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property6buffer12use_host_ptrEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property6buffer13context_boundEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property6buffer9use_mutexEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property6noinitEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property7no_initEEEbv -_ZNK4sycl3_V17sampler12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv _ZNK4sycl3_V17sampler18get_filtering_modeEv _ZNK4sycl3_V17sampler19get_addressing_modeEv _ZNK4sycl3_V17sampler33get_coordinate_normalization_modeEv From ebed42e06837a5592cb61495ec718d6ecde4d6d1 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 27 Jun 2023 13:03:00 -0700 Subject: [PATCH 03/12] Changed queue property names. --- sycl/plugins/unified_runtime/pi2ur.hpp | 4 ++-- .../ur/adapters/level_zero/ur_level_zero_queue.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index a767626ab2443..6c7594bf62d8a 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1306,9 +1306,9 @@ inline pi_result piextQueueCreate(pi_context Context, pi_device Device, UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_LOW; if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH) UrProperties.flags |= UR_QUEUE_FLAG_PRIORITY_HIGH; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_BATCHED_SUBMISSION) + if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_BATCHED) UrProperties.flags |= UR_QUEUE_FLAG_BATCHED_SUBMISSION; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_IMMEDIATE_SUBMISSION) + if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_IMMEDIATE) UrProperties.flags |= UR_QUEUE_FLAG_IMMEDIATE_SUBMISSION; ur_queue_index_properties_t IndexProperties{}; diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_queue.cpp index 977860e12c601..51b6733f3c24a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_queue.cpp @@ -1327,11 +1327,11 @@ bool ur_queue_handle_t_::isPriorityHigh() const { } bool ur_queue_handle_t_::isBatchedSubmission() const { - return ((this->Properties & UR_QUEUE_FLAG_BATCHED_SUBMISSION) != 0); + return ((this->Properties & UR_QUEUE_FLAG_SUBMISSION_BATCHED) != 0); } bool ur_queue_handle_t_::isImmediateSubmission() const { - return ((this->Properties & UR_QUEUE_FLAG_IMMEDIATE_SUBMISSION) != 0); + return ((this->Properties & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) != 0); } bool ur_queue_handle_t_::isInOrderQueue() const { From 9b7c2129a69fae4af6615412adb156fa8c51cb14 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 27 Jun 2023 16:54:02 -0700 Subject: [PATCH 04/12] Updated linux symbol file. --- sycl/test/abi/sycl_symbols_linux.dump | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index a2acc0e1036e2..9562789737c2b 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4180,8 +4180,6 @@ _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext8codeplay12experimental4info6 _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext8codeplay12experimental4info6device28max_registers_per_work_groupEEENT_11return_typeEv _ZNK4sycl3_V16detail11image_plain10getSamplerEv _ZNK4sycl3_V16detail11image_plain11getRowPitchEv -<<<<<<< HEAD -======= _ZNK4sycl3_V16detail11image_plain12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK4sycl3_V16detail11image_plain12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V16detail11image_plain12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v @@ -4216,7 +4214,6 @@ _ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property6noinitEEEbv _ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property7no_initEEEbv _ZNK4sycl3_V16detail11image_plain12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv ->>>>>>> 2ddb42c40c6737ce9faaecfdc9c5f684eba237e4 _ZNK4sycl3_V16detail11image_plain13getSlicePitchEv _ZNK4sycl3_V16detail11image_plain14getChannelTypeEv _ZNK4sycl3_V16detail11image_plain14getElementSizeEv @@ -4230,8 +4227,6 @@ _ZNK4sycl3_V16detail11stream_impl22get_max_statement_sizeEv _ZNK4sycl3_V16detail11stream_impl25get_work_item_buffer_sizeEv _ZNK4sycl3_V16detail11stream_impl4sizeEv _ZNK4sycl3_V16detail11stream_impl8get_sizeEv -<<<<<<< HEAD -======= _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v @@ -4266,7 +4261,6 @@ _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property6noinitEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property7no_initEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv ->>>>>>> 2ddb42c40c6737ce9faaecfdc9c5f684eba237e4 _ZNK4sycl3_V16detail12buffer_plain13handleReleaseEv _ZNK4sycl3_V16detail12buffer_plain15getNativeVectorENS0_7backendE _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv @@ -4488,8 +4482,6 @@ _ZNK4sycl3_V16kernel8get_infoINS0_4info6kernel15reference_countEEENS0_6detail19i _ZNK4sycl3_V16kernel8get_infoINS0_4info6kernel7contextEEENS0_6detail19is_kernel_info_descIT_E11return_typeEv _ZNK4sycl3_V16kernel8get_infoINS0_4info6kernel8num_argsEEENS0_6detail19is_kernel_info_descIT_E11return_typeEv _ZNK4sycl3_V16kernel9getNativeEv -<<<<<<< HEAD -======= _ZNK4sycl3_V16stream12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK4sycl3_V16stream12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V16stream12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v @@ -4524,7 +4516,6 @@ _ZNK4sycl3_V16stream12has_propertyINS0_8property6noinitEEEbv _ZNK4sycl3_V16stream12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK4sycl3_V16stream12has_propertyINS0_8property7no_initEEEbv _ZNK4sycl3_V16stream12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv ->>>>>>> 2ddb42c40c6737ce9faaecfdc9c5f684eba237e4 _ZNK4sycl3_V16stream22get_max_statement_sizeEv _ZNK4sycl3_V16stream25get_work_item_buffer_sizeEv _ZNK4sycl3_V16stream4sizeEv @@ -4534,8 +4525,6 @@ _ZNK4sycl3_V16streamneERKS1_ _ZNK4sycl3_V17context11get_backendEv _ZNK4sycl3_V17context11get_devicesEv _ZNK4sycl3_V17context12get_platformEv -<<<<<<< HEAD -======= _ZNK4sycl3_V17context12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK4sycl3_V17context12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V17context12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v @@ -4570,7 +4559,6 @@ _ZNK4sycl3_V17context12has_propertyINS0_8property6noinitEEEbv _ZNK4sycl3_V17context12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK4sycl3_V17context12has_propertyINS0_8property7no_initEEEbv _ZNK4sycl3_V17context12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv ->>>>>>> 2ddb42c40c6737ce9faaecfdc9c5f684eba237e4 _ZNK4sycl3_V17context3getEv _ZNK4sycl3_V17context7is_hostEv _ZNK4sycl3_V17context8get_infoINS0_4info7context15reference_countEEENS0_6detail20is_context_info_descIT_E11return_typeEv @@ -4584,8 +4572,6 @@ _ZNK4sycl3_V17context9getNativeEv _ZNK4sycl3_V17handler17getContextImplPtrEv _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv _ZNK4sycl3_V17handler30getOrInsertHandlerKernelBundleEb -<<<<<<< HEAD -======= _ZNK4sycl3_V17sampler12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK4sycl3_V17sampler12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V17sampler12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v @@ -4620,7 +4606,6 @@ _ZNK4sycl3_V17sampler12has_propertyINS0_8property6noinitEEEbv _ZNK4sycl3_V17sampler12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK4sycl3_V17sampler12has_propertyINS0_8property7no_initEEEbv _ZNK4sycl3_V17sampler12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv ->>>>>>> 2ddb42c40c6737ce9faaecfdc9c5f684eba237e4 _ZNK4sycl3_V17sampler18get_filtering_modeEv _ZNK4sycl3_V17sampler19get_addressing_modeEv _ZNK4sycl3_V17sampler33get_coordinate_normalization_modeEv From b5c7d623b1a56988c3f858762e3af84baee026d4 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Thu, 29 Jun 2023 11:19:53 -0700 Subject: [PATCH 05/12] Fixes for internal submission mode selection. --- sycl/plugins/unified_runtime/pi2ur.hpp | 4 ++- .../level_zero/ur_level_zero_context.cpp | 2 +- .../level_zero/ur_level_zero_event.cpp | 8 +++--- .../level_zero/ur_level_zero_kernel.cpp | 2 +- .../level_zero/ur_level_zero_queue.cpp | 28 +++++++++---------- sycl/test-e2e/Plugin/queue_submit_mode.cpp | 6 ++-- 6 files changed, 26 insertions(+), 24 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 01fd05e5cf278..436c182259760 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1452,7 +1452,9 @@ inline pi_result piextQueueCreate(pi_context Context, pi_device Device, PI_QUEUE_FLAG_ON_DEVICE_DEFAULT | PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS | PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW | - PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH)), + PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH | + PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_BATCHED | + PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_IMMEDIATE)), PI_ERROR_INVALID_VALUE); PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_context.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_context.cpp index 34b4e5ceb7229..740db3ab8212c 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_context.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_context.cpp @@ -583,7 +583,7 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( bool UseCopyEngine, bool AllowBatching, ze_command_queue_handle_t *ForcedCmdQueue) { // Immediate commandlists have been pre-allocated and are always available. - if (Queue->Device->ImmCommandListUsed) { + if (Queue->UsingImmCmdLists) { CommandList = Queue->getQueueGroup(UseCopyEngine).getImmCmdList(); if (CommandList->second.EventList.size() > ImmCmdListsEventCleanupThreshold) { diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_event.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_event.cpp index 2eaa671b21d07..0ecc8ad630c77 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_event.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_event.cpp @@ -113,7 +113,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( } } - if (!Queue->Device->ImmCommandListUsed) { + if (!Queue->UsingImmCmdLists) { std::unique_lock Lock(Queue->Mutex); resetCommandLists(Queue); } @@ -268,7 +268,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( for (auto &QueueGroup : QueueMap) { bool UseCopyEngine = QueueGroup.second.Type != ur_queue_handle_t_::queue_type::Compute; - if (Queue->Device->ImmCommandListUsed) { + if (Queue->UsingImmCmdLists) { // If immediate command lists are being used, each will act as their own // queue, so we must insert a barrier into each. for (auto &ImmCmdList : QueueGroup.second.ImmCmdLists) @@ -585,7 +585,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( } } if (auto Q = Event->UrQueue) { - if (Q->Device->ImmCommandListUsed && Q->isInOrderQueue()) + if (Q->UsingImmCmdLists && Q->isInOrderQueue()) // Use information about waited event to cleanup completed events in // the in-order queue. CleanupEventsInImmCmdLists( @@ -1027,7 +1027,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( this->UrEventList = nullptr; if (CurQueue->isInOrderQueue() && CurQueue->LastCommandEvent != nullptr) { - if (CurQueue->Device->ImmCommandListUsed) { + if (CurQueue->UsingImmCmdLists) { if (ReuseDiscardedEvents && CurQueue->isDiscardEvents()) { // If queue is in-order with discarded events and if // new command list is different from the last used command list then diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp index 04bef4242b1b7..1a45c64c6d7af 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp @@ -209,7 +209,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( if (IndirectAccessTrackingEnabled) Queue->KernelsToBeSubmitted.push_back(Kernel); - if (Queue->Device->ImmCommandListUsed && IndirectAccessTrackingEnabled) { + if (Queue->UsingImmCmdLists && IndirectAccessTrackingEnabled) { // If using immediate commandlists then gathering of indirect // references and appending to the queue (which means submission) // must be done together. diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_queue.cpp index bc036824d7212..6f0b956c81f2a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_queue.cpp @@ -29,7 +29,7 @@ ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, bool QueueLocked, bool QueueSynced, ur_event_handle_t CompletedEvent) { // Handle only immediate command lists here. - if (!UrQueue || !UrQueue->Device->ImmCommandListUsed) + if (!UrQueue || !UrQueue->UsingImmCmdLists) return UR_RESULT_SUCCESS; ur_event_handle_t_ *UrCompletedEvent = @@ -102,7 +102,7 @@ ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, ur_result_t resetCommandLists(ur_queue_handle_t Queue) { // Handle immediate command lists here, they don't need to be reset and we // only need to cleanup events. - if (Queue->Device->ImmCommandListUsed) { + if (Queue->UsingImmCmdLists) { UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*locked*/)); return UR_RESULT_SUCCESS; } @@ -192,7 +192,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( // because immediate command lists are not associated with level zero // queue. Conservatively return false in this case because last event is // discarded and we can't check its status. - if (Queue->Device->ImmCommandListUsed) + if (Queue->UsingImmCmdLists) return ReturnValue(false); } @@ -207,7 +207,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( for (const auto &QueueMap : {Queue->ComputeQueueGroupsByTID, Queue->CopyQueueGroupsByTID}) { for (const auto &QueueGroup : QueueMap) { - if (Queue->Device->ImmCommandListUsed) { + if (Queue->UsingImmCmdLists) { // Immediate command lists are not associated with any Level Zero // queue, that's why we have to check status of events in each // immediate command list. Start checking from the end and exit early @@ -342,7 +342,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( uint32_t RepeatCount) -> ur_result_t { ur_command_list_ptr_t CommandList; while (RepeatCount--) { - if (Q->Device->ImmCommandListUsed) { + if (Q->UsingImmCmdLists) { CommandList = Q->getQueueGroup(UseCopyEngine).getImmCmdList(); } else { // Heuristically create some number of regular command-list to reuse. @@ -620,7 +620,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( ur_queue_handle_t UrQueue ///< [in] handle of the queue to be finished. ) { - if (UrQueue->Device->ImmCommandListUsed) { + if (UrQueue->UsingImmCmdLists) { // Lock automatically releases when this goes out of scope. std::scoped_lock Lock(UrQueue->Mutex); @@ -677,7 +677,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( // Reset signalled command lists and return them back to the cache of // available command lists. Events in the immediate command lists are cleaned // up in synchronize(). - if (!UrQueue->Device->ImmCommandListUsed) { + if (!UrQueue->UsingImmCmdLists) { std::unique_lock Lock(UrQueue->Mutex); resetCommandLists(UrQueue); } @@ -932,7 +932,7 @@ ur_queue_handle_t_::ur_queue_handle_t_( CopyQueueGroup.NextIndex = CopyQueueGroup.LowerIndex; // Create space to hold immediate commandlists corresponding to the // ZeQueues - if (Device->ImmCommandListUsed) { + if (UsingImmCmdLists) { CopyQueueGroup.ImmCmdLists = std::vector( CopyQueueGroup.ZeQueues.size(), CommandListMap.end()); } @@ -1040,7 +1040,7 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList, this->LastUsedCommandList = CommandList; - if (!Device->ImmCommandListUsed) { + if (!UsingImmCmdLists) { // Batch if allowed to, but don't batch if we know there are no kernels // from this queue that are currently executing. This is intended to get // kernels started as soon as possible when there are no kernels from this @@ -1093,7 +1093,7 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList, CaptureIndirectAccesses(); } - if (!Device->ImmCommandListUsed) { + if (!UsingImmCmdLists) { // In this mode all inner-batch events have device visibility only, // and we want the last command in the batch to signal a host-visible // event that anybody waiting for any event in the batch will @@ -1204,7 +1204,7 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList, // Check global control to make every command blocking for debugging. if (IsBlocking || (UrL0Serialize & UrL0SerializeBlock) != 0) { - if (Device->ImmCommandListUsed) { + if (UsingImmCmdLists) { synchronize(); } else { // Wait until command lists attached to the command queue are executed. @@ -1404,7 +1404,7 @@ ur_result_t ur_queue_handle_t_::synchronize() { // so they can be reused later for (auto &QueueMap : {ComputeQueueGroupsByTID, CopyQueueGroupsByTID}) { for (auto &QueueGroup : QueueMap) { - if (Device->ImmCommandListUsed) { + if (UsingImmCmdLists) { for (auto &ImmCmdList : QueueGroup.second.ImmCmdLists) { if (ImmCmdList == this->CommandListMap.end()) continue; @@ -1420,7 +1420,7 @@ ur_result_t ur_queue_handle_t_::synchronize() { // Otherwise sync all L0 queues/immediate command-lists. for (auto &QueueMap : {ComputeQueueGroupsByTID, CopyQueueGroupsByTID}) { for (auto &QueueGroup : QueueMap) { - if (Device->ImmCommandListUsed) { + if (UsingImmCmdLists) { for (auto &ImmCmdList : QueueGroup.second.ImmCmdLists) syncImmCmdList(this, ImmCmdList); } else { @@ -1694,7 +1694,7 @@ ur_command_list_ptr_t ur_queue_handle_t_::eventOpenCommandList(ur_event_handle_t Event) { using IsCopy = bool; - if (Device->ImmCommandListUsed) { + if (UsingImmCmdLists) { // When using immediate commandlists there are no open command lists. return CommandListMap.end(); } diff --git a/sycl/test-e2e/Plugin/queue_submit_mode.cpp b/sycl/test-e2e/Plugin/queue_submit_mode.cpp index 1860289a3e3e7..a3e0c15b9cc3f 100755 --- a/sycl/test-e2e/Plugin/queue_submit_mode.cpp +++ b/sycl/test-e2e/Plugin/queue_submit_mode.cpp @@ -1,4 +1,4 @@ -// R EQUIRES: gpu, level_zero +// REQUIRES: gpu, level_zero // RUN: %{build} %level_zero_options -o %t.out // RUN: env ZE_DEBUG=4 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{run} %t.out 0 2>&1 | FileCheck %s --check-prefixes=CHECK-STD // RUN: env ZE_DEBUG=4 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{run} %t.out 1 2>&1 | FileCheck %s --check-prefixes=CHECK-IMM @@ -26,9 +26,9 @@ int main(int argc, char *argv[]) { } property_list P; if (Immediate) - P = ext::oneapi::level_zero::property::queue::immediate_submission(); + P = ext::oneapi::property::queue::immediate_submission(); else - P = ext::oneapi::level_zero::property::queue::batched_submission(); + P = ext::oneapi::property::queue::batched_submission(); // CHECK-STD: zeCommandListCreateImmediate = 1 // CHECK-IMM: zeCommandListCreateImmediate = 2 From 788d0e0de3cddab755a7373f8d981c6688bdb9bb Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Fri, 30 Jun 2023 16:20:01 -0700 Subject: [PATCH 06/12] Updated windows symbol file. --- sycl/test/abi/sycl_symbols_windows.dump | 64 +++++++++++++------------ 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index d5cf3c79deae0..532c1ab08dec3 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -25,9 +25,9 @@ ??$get_info@U?$max_work_groups@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$01@23@XZ ??$get_info@U?$max_work_groups@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$id@$02@12@XZ ??$get_info@U?$max_work_groups@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$02@23@XZ -??$get_info@U?$max_work_item_sizes@$02@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$range@$02@12@XZ ??$get_info@U?$max_work_item_sizes@$00@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$range@$00@12@XZ ??$get_info@U?$max_work_item_sizes@$01@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$range@$01@12@XZ +??$get_info@U?$max_work_item_sizes@$02@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$range@$02@12@XZ ??$get_info@Uaddress_bits@device@info@_V1@sycl@@@device@_V1@sycl@@QEBAIXZ ??$get_info@Uaspects@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4aspect@_V1@sycl@@V?$allocator@W4aspect@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uatomic64@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_NXZ @@ -204,6 +204,7 @@ ??$get_profiling_info@Ucommand_end@event_profiling@info@_V1@sycl@@@event@_V1@sycl@@QEBA_KXZ ??$get_profiling_info@Ucommand_start@event_profiling@info@_V1@sycl@@@event@_V1@sycl@@QEBA_KXZ ??$get_profiling_info@Ucommand_submit@event_profiling@info@_V1@sycl@@@event@_V1@sycl@@QEBA_KXZ +??$get_property@Vbatched_submission@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVbatched_submission@0property@oneapi@ext@12@XZ ??$get_property@Vcompute_index@queue@property@intel@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVcompute_index@0property@intel@ext@12@XZ ??$get_property@Vcontext_bound@buffer@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA?AVcontext_bound@buffer@property@23@XZ ??$get_property@Vcontext_bound@buffer@property@_V1@sycl@@@context@_V1@sycl@@QEBA?AVcontext_bound@buffer@property@12@XZ @@ -217,6 +218,7 @@ ??$get_property@Vcontext_bound@image@property@_V1@sycl@@@stream@_V1@sycl@@QEBA?AVcontext_bound@image@property@12@XZ ??$get_property@Vdiscard_events@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVdiscard_events@0property@oneapi@ext@12@XZ ??$get_property@Venable_profiling@queue@property@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVenable_profiling@0property@12@XZ +??$get_property@Vimmediate_submission@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVimmediate_submission@0property@oneapi@ext@12@XZ ??$get_property@Vin_order@queue@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA?AVin_order@queue@property@23@XZ ??$get_property@Vin_order@queue@property@_V1@sycl@@@context@_V1@sycl@@QEBA?AVin_order@queue@property@12@XZ ??$get_property@Vin_order@queue@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA?AVin_order@queue@property@23@XZ @@ -228,10 +230,10 @@ ??$get_property@Vinitialize_to_identity@reduction@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA?AVinitialize_to_identity@reduction@property@23@XZ ??$get_property@Vinitialize_to_identity@reduction@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA?AVinitialize_to_identity@reduction@property@12@XZ ??$get_property@Vinitialize_to_identity@reduction@property@_V1@sycl@@@stream@_V1@sycl@@QEBA?AVinitialize_to_identity@reduction@property@12@XZ -??$get_property@Vmem_channel@buffer@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA?AVmem_channel@buffer@property@12@XZ ??$get_property@Vmem_channel@buffer@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA?AVmem_channel@buffer@property@23@XZ -??$get_property@Vmem_channel@buffer@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA?AVmem_channel@buffer@property@23@XZ ??$get_property@Vmem_channel@buffer@property@_V1@sycl@@@context@_V1@sycl@@QEBA?AVmem_channel@buffer@property@12@XZ +??$get_property@Vmem_channel@buffer@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA?AVmem_channel@buffer@property@23@XZ +??$get_property@Vmem_channel@buffer@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA?AVmem_channel@buffer@property@12@XZ ??$get_property@Vmem_channel@buffer@property@_V1@sycl@@@stream@_V1@sycl@@QEBA?AVmem_channel@buffer@property@12@XZ ??$get_property@Vno_init@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA?AVno_init@property@23@XZ ??$get_property@Vno_init@property@_V1@sycl@@@context@_V1@sycl@@QEBA?AVno_init@property@12@XZ @@ -298,6 +300,7 @@ ??$get_property@Vuse_primary_context@cuda@context@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA?AVuse_primary_context@cuda@context@property@23@XZ ??$get_property@Vuse_primary_context@cuda@context@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA?AVuse_primary_context@cuda@context@property@12@XZ ??$get_property@Vuse_primary_context@cuda@context@property@_V1@sycl@@@stream@_V1@sycl@@QEBA?AVuse_primary_context@cuda@context@property@12@XZ +??$has_property@Vbatched_submission@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ ??$has_property@Vcompute_index@queue@property@intel@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ ??$has_property@Vcontext_bound@buffer@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA_NXZ ??$has_property@Vcontext_bound@buffer@property@_V1@sycl@@@context@_V1@sycl@@QEBA_NXZ @@ -311,6 +314,7 @@ ??$has_property@Vcontext_bound@image@property@_V1@sycl@@@stream@_V1@sycl@@QEBA_NXZ ??$has_property@Vdiscard_events@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ ??$has_property@Venable_profiling@queue@property@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ +??$has_property@Vimmediate_submission@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ ??$has_property@Vin_order@queue@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA_NXZ ??$has_property@Vin_order@queue@property@_V1@sycl@@@context@_V1@sycl@@QEBA_NXZ ??$has_property@Vin_order@queue@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA_NXZ @@ -322,11 +326,11 @@ ??$has_property@Vinitialize_to_identity@reduction@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA_NXZ ??$has_property@Vinitialize_to_identity@reduction@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA_NXZ ??$has_property@Vinitialize_to_identity@reduction@property@_V1@sycl@@@stream@_V1@sycl@@QEBA_NXZ +??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA_NXZ ??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@context@_V1@sycl@@QEBA_NXZ -??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@stream@_V1@sycl@@QEBA_NXZ -??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA_NXZ ??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA_NXZ -??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA_NXZ +??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA_NXZ +??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@stream@_V1@sycl@@QEBA_NXZ ??$has_property@Vno_init@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA_NXZ ??$has_property@Vno_init@property@_V1@sycl@@@context@_V1@sycl@@QEBA_NXZ ??$has_property@Vno_init@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA_NXZ @@ -392,12 +396,6 @@ ??$has_property@Vuse_primary_context@cuda@context@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA_NXZ ??$has_property@Vuse_primary_context@cuda@context@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA_NXZ ??$has_property@Vuse_primary_context@cuda@context@property@_V1@sycl@@@stream@_V1@sycl@@QEBA_NXZ -??0executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@AEBVcontext@56@@Z -?make_edge@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEAVnode@34567@0@Z -??0executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z -??1modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ -??0executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z -?finalize@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$command_graph@$00@34567@AEBVproperty_list@67@@Z ??0AccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VAccessorImplHost@detail@_V1@sycl@@@std@@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z @@ -505,6 +503,9 @@ ??0exception_list@_V1@sycl@@QEAA@$$QEAV012@@Z ??0exception_list@_V1@sycl@@QEAA@AEBV012@@Z ??0exception_list@_V1@sycl@@QEAA@XZ +??0executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@AEBVcontext@56@@Z +??0executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z +??0executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z ??0filter_selector@ONEAPI@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0filter_selector@ONEAPI@_V1@sycl@@QEAA@AEBV0123@@Z ??0filter_selector@ONEAPI@_V1@sycl@@QEAA@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z @@ -565,6 +566,10 @@ ??0kernel_id@_V1@sycl@@AEAA@PEBD@Z ??0kernel_id@_V1@sycl@@QEAA@$$QEAV012@@Z ??0kernel_id@_V1@sycl@@QEAA@AEBV012@@Z +??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z +??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z +??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z +??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBVcontext@56@AEBVdevice@56@AEBVproperty_list@56@@Z ??0node@experimental@oneapi@ext@_V1@sycl@@AEAA@AEBV?$shared_ptr@Vnode_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z ??0node@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z ??0node@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV012345@@Z @@ -606,8 +611,6 @@ ??0stream_impl@detail@_V1@sycl@@QEAA@_K0AEBVproperty_list@23@@Z ??0tls_code_loc_t@detail@_V1@sycl@@QEAA@AEBUcode_location@123@@Z ??0tls_code_loc_t@detail@_V1@sycl@@QEAA@XZ -?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NAEBV?$vector@Vqueue@_V1@sycl@@V?$allocator@Vqueue@_V1@sycl@@@std@@@std@@@Z -??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z ??1AccessorBaseHost@detail@_V1@sycl@@QEAA@XZ ??1AccessorImplHost@detail@_V1@sycl@@QEAA@XZ ??1LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ @@ -628,6 +631,7 @@ ??1event@_V1@sycl@@QEAA@XZ ??1exception@_V1@sycl@@UEAA@XZ ??1exception_list@_V1@sycl@@QEAA@XZ +??1executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1filter_selector@ONEAPI@_V1@sycl@@UEAA@XZ ??1filter_selector@oneapi@ext@_V1@sycl@@UEAA@XZ ??1fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAA@XZ @@ -639,6 +643,7 @@ ??1kernel@_V1@sycl@@QEAA@XZ ??1kernel_bundle_plain@detail@_V1@sycl@@QEAA@XZ ??1kernel_id@_V1@sycl@@QEAA@XZ +??1modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1node@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1platform@_V1@sycl@@QEAA@XZ ??1queue@_V1@sycl@@QEAA@XZ @@ -663,10 +668,6 @@ ??4?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z -?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$command_graph@$0A@@34567@@Z -?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NAEAVqueue@67@@Z -??1executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ -?add@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA?AVnode@34567@AEBVproperty_list@67@@Z ??4AccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4AccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4AccessorImplHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z @@ -704,6 +705,8 @@ ??4exception@_V1@sycl@@QEAAAEAV012@AEBV012@@Z ??4exception_list@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z ??4exception_list@_V1@sycl@@QEAAAEAV012@AEBV012@@Z +??4executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z +??4executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z ??4filter_selector@ONEAPI@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4filter_selector@ONEAPI@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4filter_selector@oneapi@ext@_V1@sycl@@QEAAAEAV01234@$$QEAV01234@@Z @@ -723,6 +726,8 @@ ??4kernel_bundle_plain@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4kernel_id@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z ??4kernel_id@_V1@sycl@@QEAAAEAV012@AEBV012@@Z +??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z +??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z ??4node@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z ??4node@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@AEBV012345@@Z ??4platform@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z @@ -833,11 +838,12 @@ ?accessGlobalOffset@stream_impl@detail@_V1@sycl@@QEAA?AV?$accessor@I$00$0EAF@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@34@AEAVhandler@34@@Z ?add@device_global_map@detail@_V1@sycl@@YAXPEBXPEBD@Z ?add@host_pipe_map@detail@_V1@sycl@@YAXPEBXPEBD@Z +?add@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA?AVnode@34567@AEBVproperty_list@67@@Z ?addHostAccessorAndWait@detail@_V1@sycl@@YAXPEAVAccessorImplHost@123@@Z ?addHostSampledImageAccessorAndWait@detail@_V1@sycl@@YAXPEAVSampledImageAccessorImplHost@123@@Z ?addHostUnsampledImageAccessorAndWait@detail@_V1@sycl@@YAXPEAVUnsampledImageAccessorImplHost@123@@Z +?addImpl@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA?AVnode@34567@AEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z ?addImpl@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA?AVnode@34567@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z -??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z ?addInteropObject@buffer_impl@detail@_V1@sycl@@QEBAXAEAV?$vector@_KV?$allocator@_K@std@@@std@@@Z ?addOrReplaceAccessorProperties@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBVproperty_list@34@@Z ?addOrReplaceAccessorProperties@buffer_plain@detail@_V1@sycl@@IEAAXAEBVproperty_list@34@@Z @@ -887,10 +893,8 @@ ?barrier@handler@_V1@sycl@@QEAAXXZ ?begin@exception_list@_V1@sycl@@QEBA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@Vexception_ptr@std@@@std@@@std@@@std@@XZ ?begin@kernel_bundle_plain@detail@_V1@sycl@@IEBAPEBVdevice_image_plain@234@XZ -??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBVcontext@56@AEBVdevice@56@AEBVproperty_list@56@@Z -??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z -??4executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z -??4executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z +?begin_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NAEAVqueue@67@@Z +?begin_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NAEBV?$vector@Vqueue@_V1@sycl@@V?$allocator@Vqueue@_V1@sycl@@@std@@@std@@@Z ?build_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$kernel_bundle@$0A@@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z ?canReuseHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NPEAX_K@Z ?cancel_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ @@ -931,9 +935,9 @@ ?end@HostProfilingInfo@detail@_V1@sycl@@QEAAXXZ ?end@exception_list@_V1@sycl@@QEBA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@Vexception_ptr@std@@@std@@@std@@@std@@XZ ?end@kernel_bundle_plain@detail@_V1@sycl@@IEBAPEBVdevice_image_plain@234@XZ -??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z -?finalizeImpl@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXXZ -?addImpl@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA?AVnode@34567@AEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z +?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NAEAVqueue@67@@Z +?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NAEBV?$vector@Vqueue@_V1@sycl@@V?$allocator@Vqueue@_V1@sycl@@@std@@@std@@@Z +?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NXZ ?ext_codeplay_supports_fusion@queue@_V1@sycl@@QEBA_NXZ ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z @@ -971,9 +975,9 @@ ?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?begin_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NAEAVqueue@67@@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ -??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z +?finalize@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$command_graph@$00@34567@AEBVproperty_list@67@@Z +?finalizeImpl@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXXZ ?find_device_intersection@detail@_V1@sycl@@YA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@V?$kernel_bundle@$00@_V1@sycl@@V?$allocator@V?$kernel_bundle@$00@_V1@sycl@@@std@@@5@@Z ?flush@stream_impl@detail@_V1@sycl@@QEAAXAEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@Z ?flush@stream_impl@detail@_V1@sycl@@QEAAXXZ @@ -1207,7 +1211,7 @@ ?make_device@detail@_V1@sycl@@YA?AVdevice@23@_KW4backend@23@@Z ?make_device@level_zero@oneapi@ext@_V1@sycl@@YA?AVdevice@45@AEBVplatform@45@_K@Z ?make_device@opencl@_V1@sycl@@YA?AVdevice@23@_K@Z -?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NXZ +?make_edge@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEAVnode@34567@0@Z ?make_error_code@_V1@sycl@@YA?AVerror_code@std@@W4errc@12@@Z ?make_event@detail@_V1@sycl@@YA?AVevent@23@_KAEBVcontext@23@W4backend@23@@Z ?make_event@detail@_V1@sycl@@YA?AVevent@23@_KAEBVcontext@23@_NW4backend@23@@Z @@ -4923,7 +4927,7 @@ ?throw_asynchronous@queue@_V1@sycl@@QEAAXXZ ?unmap@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@1V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z ?unset_flag@stream@_V1@sycl@@AEBAXI@Z -?begin_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NAEBV?$vector@Vqueue@_V1@sycl@@V?$allocator@Vqueue@_V1@sycl@@@std@@@std@@@Z +?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$command_graph@$0A@@34567@@Z ?updateHostMemory@SYCLMemObjT@detail@_V1@sycl@@IEAAXQEAX@Z ?updateHostMemory@SYCLMemObjT@detail@_V1@sycl@@IEAAXXZ ?useHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NXZ From fd269cdfa0ea0031bc7e9a555b9f509db3b5e0fb Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Fri, 7 Jul 2023 14:03:37 -0700 Subject: [PATCH 07/12] Adjust event scope management. --- .../unified_runtime/ur/adapters/level_zero/device.cpp | 4 ---- .../unified_runtime/ur/adapters/level_zero/device.hpp | 5 ----- .../unified_runtime/ur/adapters/level_zero/event.cpp | 7 +++---- .../unified_runtime/ur/adapters/level_zero/queue.cpp | 11 ++++++++--- .../unified_runtime/ur/adapters/level_zero/queue.hpp | 5 +++++ 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/device.cpp index 9b3113647e6f4..18b1e3b4a3ee1 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/device.cpp @@ -971,10 +971,6 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, ImmCommandListUsed = this->useImmediateCommandLists(); - if (ImmCommandListUsed == ImmCmdlistMode::NotUsed) { - ZeEventsScope = DeviceEventsSetting; - } - uint32_t numQueueGroups = 0; ZE2UR_CALL(zeDeviceGetCommandQueueGroupProperties, (ZeDevice, &numQueueGroups, nullptr)); diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/device.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/device.hpp index 7edb43ab96ddf..c47613c720110 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/device.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/device.hpp @@ -135,11 +135,6 @@ struct ur_device_handle_t_ : _ur_object { // Returns whether immediate command lists are used on this device. ImmCmdlistMode ImmCommandListUsed{}; - // Scope of events used for events on the device - // Can be adjusted with UR_L0_DEVICE_SCOPE_EVENTS - // for non-immediate command lists - EventsScope ZeEventsScope = AllHostVisible; - bool isSubDevice() { return RootDevice != nullptr; } // Is this a Data Center GPU Max series (aka PVC)? diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/event.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/event.cpp index d113e33bf2cee..1492ee13e62dd 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/event.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/event.cpp @@ -496,7 +496,7 @@ ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( this->Mutex); if (!HostVisibleEvent) { - if (UrQueue->Device->ZeEventsScope != OnDemandHostVisibleProxy) + if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy) die("getOrCreateHostVisibleEvent: missing host-visible event"); // Submit the command(s) signalling the proxy event to the queue. @@ -536,8 +536,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( ///< events to wait for completion ) { for (uint32_t I = 0; I < NumEvents; I++) { - if (EventWaitList[I]->UrQueue->Device->ZeEventsScope == - OnDemandHostVisibleProxy) { + if (EventWaitList[I]->UrQueue->ZeEventsScope == OnDemandHostVisibleProxy) { // Make sure to add all host-visible "proxy" event signals if needed. // This ensures that all signalling commands are submitted below and // thus proxy events can be waited without a deadlock. @@ -1156,7 +1155,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( // // Make sure that event1.wait() will wait for a host-visible // event that is signalled before the command2 is enqueued. - if (CurQueue->Device->ZeEventsScope != AllHostVisible) { + if (CurQueue->ZeEventsScope != AllHostVisible) { CurQueue->executeAllOpenCommandLists(); } } diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/queue.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/queue.cpp index 550cd08a3211a..adc9a993a81d0 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/queue.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/queue.cpp @@ -863,6 +863,12 @@ ur_queue_handle_t_::ur_queue_handle_t_( else UsingImmCmdLists = Device->useImmediateCommandLists(); + // Set events scope for this queue. Non-immediate can be controlled by env + // var. Immediate always uses AllHostVisible. + if (!UsingImmCmdLists) { + ZeEventsScope = DeviceEventsSetting; + } + // Compute group initialization. // First, see if the queue's device allows for round-robin or it is // fixed to one particular compute CCS (it is so for sub-sub-devices). @@ -1102,7 +1108,7 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList, // in the command list is not empty, otherwise we are going to just create // and remove proxy event right away and dereference deleted object // afterwards. - if (Device->ZeEventsScope == LastCommandInBatchHostVisible && + if (ZeEventsScope == LastCommandInBatchHostVisible && !CommandList->second.EventList.empty()) { // If there are only internal events in the command list then we don't // need to create host proxy event. @@ -1479,8 +1485,7 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, if (!HostVisible.has_value()) { // Internal/discarded events do not need host-scope visibility. - HostVisible = - IsInternal ? false : Queue->Device->ZeEventsScope == AllHostVisible; + HostVisible = IsInternal ? false : Queue->ZeEventsScope == AllHostVisible; } // If event is discarded then try to get event from the queue cache. diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/queue.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/queue.hpp index 9ae5e17ea24f4..5485bfd173e61 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/queue.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/queue.hpp @@ -212,6 +212,11 @@ struct ur_queue_handle_t_ : _ur_object { // constructed, the caller chooses the type of commandlists to use. bool UsingImmCmdLists = false; + // Scope of events used for events on the queue + // Can be adjusted with UR_L0_DEVICE_SCOPE_EVENTS + // for non-immediate command lists + EventsScope ZeEventsScope = AllHostVisible; + // Keeps track of the event associated with the last enqueued command into // this queue. this is used to add dependency with the last command to add // in-order semantics and updated with the latest event each time a new From e0067ff9a7f69fcfd884f3b7b5aa90725191c000 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Tue, 11 Jul 2023 13:23:23 -0700 Subject: [PATCH 08/12] Changed extension to be intel-only. --- ...> sycl_ext_intel_queue_submission_mode.asciidoc} | 12 +++++++----- sycl/include/sycl/detail/pi.h | 4 ++-- sycl/include/sycl/properties/queue_properties.def | 4 ++-- sycl/plugins/unified_runtime/pi2ur.hpp | 8 ++++---- sycl/source/detail/queue_impl.hpp | 13 ++++--------- sycl/source/feature_test.hpp.in | 1 + sycl/test-e2e/Plugin/queue_submit_mode.cpp | 4 ++-- 7 files changed, 22 insertions(+), 24 deletions(-) rename sycl/doc/extensions/supported/{sycl_ext_oneapi_queue_submission_mode.asciidoc => sycl_ext_intel_queue_submission_mode.asciidoc} (86%) diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_submission_mode.asciidoc b/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc similarity index 86% rename from sycl/doc/extensions/supported/sycl_ext_oneapi_queue_submission_mode.asciidoc rename to sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc index ef9e8ca319900..c0208db2f5152 100755 --- a/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_submission_mode.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc @@ -1,4 +1,4 @@ -= sycl_ext_oneapi_queue_submission_mode += sycl_ext_intel_queue_submission_mode :source-highlighter: coderay :coderay-linenums-mode: table @@ -48,9 +48,8 @@ This extension is implemented and fully supported by {dpcpp}. == Backend support status -This extension is currently implemented in {dpcpp} only for GPU devices and -only when using the Level Zero backend. Use of this extension in on other devices -or backends may have no effect on the submission mode. +This extension is currently implemented in {dpcpp} only for Intel GPU devices +when using the Level Zero backend. == Overview @@ -74,7 +73,7 @@ is chosen by the implementation. This extension provides a feature-test macro as described in the core SYCL specification. An implementation supporting this extension must predefine the -macro `SYCL_EXT_ONEAPI_QUEUE_SUBMISSION_MODE` to one of the values defined in +macro `SYCL_EXT_INTEL_QUEUE_SUBMISSION_MODE` to one of the values defined in the table below. Applications can test for the existence of this macro to determine if the implementation supports this feature, or applications can test the macro's value to determine which of the extension's features the @@ -102,3 +101,6 @@ may be as long or longer than the actual runtime of the kernel. In this case, do batched submissions may be preferable so that the submission overhead is amortized over a number of kernel executions. +The immediate submission mode has been extensively tested on +IntelĀ® Data Center Max Series GPUs. On other Intel GPUs the recommendation is +to use the platform defaults for submission mode. \ No newline at end of file diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index b08a8fd29ae20..a39b37437378d 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -660,8 +660,8 @@ constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3); constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4); constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5); constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_BATCHED = (1 << 7); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_IMMEDIATE = (1 << 8); +constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_BATCHED = (1 << 7); +constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE = (1 << 8); // clang-format on typedef enum { diff --git a/sycl/include/sycl/properties/queue_properties.def b/sycl/include/sycl/properties/queue_properties.def index 73625da6fbe68..d3e606639b416 100644 --- a/sycl/include/sycl/properties/queue_properties.def +++ b/sycl/include/sycl/properties/queue_properties.def @@ -17,9 +17,9 @@ __SYCL_DATA_LESS_PROP(ext::oneapi::property::queue, priority_low, QueuePriorityLow) __SYCL_DATA_LESS_PROP(ext::oneapi::property::queue, priority_high, QueuePriorityHigh) -__SYCL_DATA_LESS_PROP(ext::oneapi::property::queue, batched_submission, +__SYCL_DATA_LESS_PROP(ext::intel::property::queue, batched_submission, QueueSubmissionBatched) -__SYCL_DATA_LESS_PROP(ext::oneapi::property::queue, immediate_submission, +__SYCL_DATA_LESS_PROP(ext::intel::property::queue, immediate_submission, QueueSubmissionImmediate) __SYCL_DATA_LESS_PROP(ext::oneapi::cuda::property::queue, use_default_stream, diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 9f3b4d11eb08d..26853b097675b 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1456,8 +1456,8 @@ inline pi_result piextQueueCreate(pi_context Context, pi_device Device, PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS | PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW | PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH | - PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_BATCHED | - PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_IMMEDIATE)), + PI_EXT_QUEUE_FLAG_SUBMISSION_BATCHED | + PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE)), PI_ERROR_INVALID_VALUE); PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); @@ -1484,9 +1484,9 @@ inline pi_result piextQueueCreate(pi_context Context, pi_device Device, UrProperties.flags |= UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM; if (Properties[1] & __SYCL_PI_CUDA_USE_DEFAULT_STREAM) UrProperties.flags |= UR_QUEUE_FLAG_USE_DEFAULT_STREAM; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_BATCHED) + if (Properties[1] & PI_EXT_QUEUE_FLAG_SUBMISSION_BATCHED) UrProperties.flags |= UR_QUEUE_FLAG_SUBMISSION_BATCHED; - if (Properties[1] & PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_IMMEDIATE) + if (Properties[1] & PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE) UrProperties.flags |= UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE; ur_queue_index_properties_t IndexProperties{}; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 4cebd20cb4874..dfa5f60b04adc 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -477,24 +477,19 @@ class queue_impl { // Track that submission modes do not conflict. bool SubmissionSeen = false; if (PropList - .has_property()) { - if (SubmissionSeen) { - throw sycl::exception( - make_error_code(errc::invalid), - "Queue cannot be constructed with different submission modes."); - } + .has_property()) { SubmissionSeen = true; - CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_BATCHED; + CreationFlags |= PI_EXT_QUEUE_FLAG_SUBMISSION_BATCHED; } if (PropList.has_property< - ext::oneapi::property::queue::immediate_submission>()) { + ext::intel::property::queue::immediate_submission>()) { if (SubmissionSeen) { throw sycl::exception( make_error_code(errc::invalid), "Queue cannot be constructed with different submission modes."); } SubmissionSeen = true; - CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_SUBMISSION_IMMEDIATE; + CreationFlags |= PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE; } return CreationFlags; } diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index c3766cd57beeb..06ae1776299bf 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -83,6 +83,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { #define SYCL_EXT_INTEL_CACHE_CONFIG 1 #define SYCL_EXT_CODEPLAY_MAX_REGISTERS_PER_WORK_GROUP_QUERY 1 #define SYCL_EXT_ONEAPI_DEVICE_GLOBAL 1 +#define SYCL_EXT_INTEL_QUEUE_SUBMISSION_MODE 1 #ifndef __has_include #define __has_include(x) 0 diff --git a/sycl/test-e2e/Plugin/queue_submit_mode.cpp b/sycl/test-e2e/Plugin/queue_submit_mode.cpp index a3e0c15b9cc3f..ba99cc1138826 100755 --- a/sycl/test-e2e/Plugin/queue_submit_mode.cpp +++ b/sycl/test-e2e/Plugin/queue_submit_mode.cpp @@ -26,9 +26,9 @@ int main(int argc, char *argv[]) { } property_list P; if (Immediate) - P = ext::oneapi::property::queue::immediate_submission(); + P = ext::intel::property::queue::immediate_submission(); else - P = ext::oneapi::property::queue::batched_submission(); + P = ext::intel::property::queue::batched_submission(); // CHECK-STD: zeCommandListCreateImmediate = 1 // CHECK-IMM: zeCommandListCreateImmediate = 2 From 4f2e380d10584e5f4b250554ae537dc7326546ef Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Wed, 12 Jul 2023 10:15:58 -0700 Subject: [PATCH 09/12] Changed name of property. --- .../supported/sycl_ext_intel_queue_submission_mode.asciidoc | 4 ++-- sycl/include/sycl/detail/pi.h | 2 +- sycl/include/sycl/properties/queue_properties.def | 2 +- sycl/plugins/unified_runtime/pi2ur.hpp | 4 ++-- sycl/source/detail/queue_impl.hpp | 6 +++--- sycl/test-e2e/Plugin/queue_submit_mode.cpp | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc b/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc index c0208db2f5152..36329adca8a8d 100755 --- a/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc @@ -55,8 +55,8 @@ when using the Level Zero backend. This extension enables specifying one of two submission modes for a queue. -The property `batched_submission` specifies that submissions to a SYCL queue -should be collected into groups before actual submission to the hardware. +The property `no_immediate_submission` specifies that submissions to a SYCL +queue should be collected into groups before actual submission to the hardware. The property `immediate_submission` specifies that submissions to a SYCL queue should be submitted to the hardware immediately. diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index a39b37437378d..2f6cd0ce24b42 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -660,7 +660,7 @@ constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3); constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4); constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5); constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6); -constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_BATCHED = (1 << 7); +constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE = (1 << 7); constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE = (1 << 8); // clang-format on diff --git a/sycl/include/sycl/properties/queue_properties.def b/sycl/include/sycl/properties/queue_properties.def index d3e606639b416..7c3c3dc5016a0 100644 --- a/sycl/include/sycl/properties/queue_properties.def +++ b/sycl/include/sycl/properties/queue_properties.def @@ -17,7 +17,7 @@ __SYCL_DATA_LESS_PROP(ext::oneapi::property::queue, priority_low, QueuePriorityLow) __SYCL_DATA_LESS_PROP(ext::oneapi::property::queue, priority_high, QueuePriorityHigh) -__SYCL_DATA_LESS_PROP(ext::intel::property::queue, batched_submission, +__SYCL_DATA_LESS_PROP(ext::intel::property::queue, no_immediate_submission, QueueSubmissionBatched) __SYCL_DATA_LESS_PROP(ext::intel::property::queue, immediate_submission, QueueSubmissionImmediate) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 26853b097675b..a0ca8c8a1d500 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1456,7 +1456,7 @@ inline pi_result piextQueueCreate(pi_context Context, pi_device Device, PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS | PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW | PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH | - PI_EXT_QUEUE_FLAG_SUBMISSION_BATCHED | + PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE | PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE)), PI_ERROR_INVALID_VALUE); @@ -1484,7 +1484,7 @@ inline pi_result piextQueueCreate(pi_context Context, pi_device Device, UrProperties.flags |= UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM; if (Properties[1] & __SYCL_PI_CUDA_USE_DEFAULT_STREAM) UrProperties.flags |= UR_QUEUE_FLAG_USE_DEFAULT_STREAM; - if (Properties[1] & PI_EXT_QUEUE_FLAG_SUBMISSION_BATCHED) + if (Properties[1] & PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE) UrProperties.flags |= UR_QUEUE_FLAG_SUBMISSION_BATCHED; if (Properties[1] & PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE) UrProperties.flags |= UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index dfa5f60b04adc..93d443ae6a2c9 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -476,10 +476,10 @@ class queue_impl { } // Track that submission modes do not conflict. bool SubmissionSeen = false; - if (PropList - .has_property()) { + if (PropList.has_property< + ext::intel::property::queue::no_immediate_submission>()) { SubmissionSeen = true; - CreationFlags |= PI_EXT_QUEUE_FLAG_SUBMISSION_BATCHED; + CreationFlags |= PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE; } if (PropList.has_property< ext::intel::property::queue::immediate_submission>()) { diff --git a/sycl/test-e2e/Plugin/queue_submit_mode.cpp b/sycl/test-e2e/Plugin/queue_submit_mode.cpp index ba99cc1138826..84f430215bcd4 100755 --- a/sycl/test-e2e/Plugin/queue_submit_mode.cpp +++ b/sycl/test-e2e/Plugin/queue_submit_mode.cpp @@ -28,7 +28,7 @@ int main(int argc, char *argv[]) { if (Immediate) P = ext::intel::property::queue::immediate_submission(); else - P = ext::intel::property::queue::batched_submission(); + P = ext::intel::property::queue::no_immediate_submission(); // CHECK-STD: zeCommandListCreateImmediate = 1 // CHECK-IMM: zeCommandListCreateImmediate = 2 From 7455f67287e7ec7c353d69f92d5b403c3c26643d Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Wed, 12 Jul 2023 12:04:28 -0700 Subject: [PATCH 10/12] Changed name of property. --- .../sycl_ext_intel_queue_submission_mode.asciidoc | 4 ++-- sycl/include/sycl/properties/queue_properties.def | 4 ++-- sycl/source/detail/queue_impl.hpp | 4 ++-- sycl/test-e2e/Plugin/queue_submit_mode.cpp | 4 ++-- sycl/test/abi/pi_level_zero_symbol_check.dump | 6 +++--- sycl/test/abi/sycl_symbols_linux.dump | 14 +++++++------- sycl/test/abi/sycl_symbols_windows.dump | 8 ++++---- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc b/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc index 36329adca8a8d..1a06a04d54866 100755 --- a/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc @@ -55,10 +55,10 @@ when using the Level Zero backend. This extension enables specifying one of two submission modes for a queue. -The property `no_immediate_submission` specifies that submissions to a SYCL +The property `no_immediate_command_list` specifies that submissions to a SYCL queue should be collected into groups before actual submission to the hardware. -The property `immediate_submission` specifies that submissions to a +The property `immediate_command_list` specifies that submissions to a SYCL queue should be submitted to the hardware immediately. When the submission mode is left unspecified a platform-specific default diff --git a/sycl/include/sycl/properties/queue_properties.def b/sycl/include/sycl/properties/queue_properties.def index 7c3c3dc5016a0..6e0f3fd700952 100644 --- a/sycl/include/sycl/properties/queue_properties.def +++ b/sycl/include/sycl/properties/queue_properties.def @@ -17,9 +17,9 @@ __SYCL_DATA_LESS_PROP(ext::oneapi::property::queue, priority_low, QueuePriorityLow) __SYCL_DATA_LESS_PROP(ext::oneapi::property::queue, priority_high, QueuePriorityHigh) -__SYCL_DATA_LESS_PROP(ext::intel::property::queue, no_immediate_submission, +__SYCL_DATA_LESS_PROP(ext::intel::property::queue, no_immediate_command_list, QueueSubmissionBatched) -__SYCL_DATA_LESS_PROP(ext::intel::property::queue, immediate_submission, +__SYCL_DATA_LESS_PROP(ext::intel::property::queue, immediate_command_list, QueueSubmissionImmediate) __SYCL_DATA_LESS_PROP(ext::oneapi::cuda::property::queue, use_default_stream, diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index ba3a4518ba712..fb9c7e455dc46 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -486,12 +486,12 @@ class queue_impl { // Track that submission modes do not conflict. bool SubmissionSeen = false; if (PropList.has_property< - ext::intel::property::queue::no_immediate_submission>()) { + ext::intel::property::queue::no_immediate_command_list>()) { SubmissionSeen = true; CreationFlags |= PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE; } if (PropList.has_property< - ext::intel::property::queue::immediate_submission>()) { + ext::intel::property::queue::immediate_command_list>()) { if (SubmissionSeen) { throw sycl::exception( make_error_code(errc::invalid), diff --git a/sycl/test-e2e/Plugin/queue_submit_mode.cpp b/sycl/test-e2e/Plugin/queue_submit_mode.cpp index 84f430215bcd4..f89854d7df2ce 100755 --- a/sycl/test-e2e/Plugin/queue_submit_mode.cpp +++ b/sycl/test-e2e/Plugin/queue_submit_mode.cpp @@ -26,9 +26,9 @@ int main(int argc, char *argv[]) { } property_list P; if (Immediate) - P = ext::intel::property::queue::immediate_submission(); + P = ext::intel::property::queue::immediate_command_list(); else - P = ext::intel::property::queue::no_immediate_submission(); + P = ext::intel::property::queue::no_immediate_command_list(); // CHECK-STD: zeCommandListCreateImmediate = 1 // CHECK-IMM: zeCommandListCreateImmediate = 2 diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index daeefdedb3066..2efc1eaa6fb3d 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -101,6 +101,8 @@ piextContextSetExtendedDeleter piextDeviceCreateWithNativeHandle piextDeviceGetNativeHandle piextDeviceSelectBinary +piextDisablePeerAccess +piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueReadHostPipe piextEnqueueWriteHostPipe @@ -115,6 +117,7 @@ piextKernelSetArgSampler piextMemCreateWithNativeHandle piextMemGetNativeHandle piextMemImageCreateWithNativeHandle +piextPeerAccessGetInfo piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -136,6 +139,3 @@ piextUSMFree piextUSMGetMemAllocInfo piextUSMHostAlloc piextUSMSharedAlloc -piextEnablePeerAccess -piextDisablePeerAccess -piextPeerAccessGetInfo diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 579b7e448df0e..8c291e1786d40 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3948,6 +3948,9 @@ _ZN4sycl3_V16detail9join_implERKSt6vectorISt10shared_ptrINS1_18kernel_bundle_imp _ZN4sycl3_V16detail9link_implERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EERKS2_INS0_6deviceESaISA_EERKNS0_13property_listE _ZN4sycl3_V16device11get_devicesENS0_4info11device_typeE _ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental12architectureE +_ZN4sycl3_V16device26ext_oneapi_can_access_peerERKS1_NS0_3ext6oneapi11peer_accessE +_ZN4sycl3_V16device29ext_oneapi_enable_peer_accessERKS1_ +_ZN4sycl3_V16device30ext_oneapi_disable_peer_accessERKS1_ _ZN4sycl3_V16deviceC1EP13_cl_device_id _ZN4sycl3_V16deviceC1ERKNS0_15device_selectorE _ZN4sycl3_V16deviceC1Ev @@ -4120,24 +4123,24 @@ _ZNK4sycl3_V15queue11get_backendEv _ZNK4sycl3_V15queue11get_contextEv _ZNK4sycl3_V15queue11is_in_orderEv _ZNK4sycl3_V15queue12get_propertyINS0_3ext5intel8property5queue13compute_indexEEET_v +_ZNK4sycl3_V15queue12get_propertyINS0_3ext5intel8property5queue22immediate_command_listEEET_v +_ZNK4sycl3_V15queue12get_propertyINS0_3ext5intel8property5queue25no_immediate_command_listEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi4cuda8property5queue18use_default_streamEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue14discard_eventsEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue15priority_normalEEET_v -_ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue18batched_submissionEEET_v -_ZNK4sycl3_V15queue12get_propertyINS0_3ext6oneapi8property5queue20immediate_submissionEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_8property5queue16enable_profilingEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_8property5queue4cuda18use_default_streamEEET_v _ZNK4sycl3_V15queue12get_propertyINS0_8property5queue8in_orderEEET_v _ZNK4sycl3_V15queue12has_propertyINS0_3ext5intel8property5queue13compute_indexEEEbv +_ZNK4sycl3_V15queue12has_propertyINS0_3ext5intel8property5queue22immediate_command_listEEEbv +_ZNK4sycl3_V15queue12has_propertyINS0_3ext5intel8property5queue25no_immediate_command_listEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi4cuda8property5queue18use_default_streamEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue12priority_lowEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue13priority_highEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue14discard_eventsEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue15priority_normalEEEbv -_ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue18batched_submissionEEEbv -_ZNK4sycl3_V15queue12has_propertyINS0_3ext6oneapi8property5queue20immediate_submissionEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue16enable_profilingEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue4cuda18use_default_streamEEEbv _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue8in_orderEEEbv @@ -4460,9 +4463,6 @@ _ZNK4sycl3_V16device8get_infoINS0_4info6device8atomic64EEENS0_6detail19is_device _ZNK4sycl3_V16device8get_infoINS0_4info6device8platformEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device9vendor_idEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device9getNativeEv -_ZN4sycl3_V16device26ext_oneapi_can_access_peerERKS1_NS0_3ext6oneapi11peer_accessE -_ZN4sycl3_V16device29ext_oneapi_enable_peer_accessERKS1_ -_ZN4sycl3_V16device30ext_oneapi_disable_peer_accessERKS1_ _ZNK4sycl3_V16kernel11get_backendEv _ZNK4sycl3_V16kernel11get_contextEv _ZNK4sycl3_V16kernel13getNativeImplEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index a5503b7416855..74cafa3beef4d 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -206,7 +206,6 @@ ??$get_profiling_info@Ucommand_end@event_profiling@info@_V1@sycl@@@event@_V1@sycl@@QEBA_KXZ ??$get_profiling_info@Ucommand_start@event_profiling@info@_V1@sycl@@@event@_V1@sycl@@QEBA_KXZ ??$get_profiling_info@Ucommand_submit@event_profiling@info@_V1@sycl@@@event@_V1@sycl@@QEBA_KXZ -??$get_property@Vbatched_submission@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVbatched_submission@0property@oneapi@ext@12@XZ ??$get_property@Vcompute_index@queue@property@intel@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVcompute_index@0property@intel@ext@12@XZ ??$get_property@Vcontext_bound@buffer@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA?AVcontext_bound@buffer@property@23@XZ ??$get_property@Vcontext_bound@buffer@property@_V1@sycl@@@context@_V1@sycl@@QEBA?AVcontext_bound@buffer@property@12@XZ @@ -220,7 +219,7 @@ ??$get_property@Vcontext_bound@image@property@_V1@sycl@@@stream@_V1@sycl@@QEBA?AVcontext_bound@image@property@12@XZ ??$get_property@Vdiscard_events@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVdiscard_events@0property@oneapi@ext@12@XZ ??$get_property@Venable_profiling@queue@property@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVenable_profiling@0property@12@XZ -??$get_property@Vimmediate_submission@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVimmediate_submission@0property@oneapi@ext@12@XZ +??$get_property@Vimmediate_command_list@queue@property@intel@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVimmediate_command_list@0property@intel@ext@12@XZ ??$get_property@Vin_order@queue@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA?AVin_order@queue@property@23@XZ ??$get_property@Vin_order@queue@property@_V1@sycl@@@context@_V1@sycl@@QEBA?AVin_order@queue@property@12@XZ ??$get_property@Vin_order@queue@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA?AVin_order@queue@property@23@XZ @@ -237,6 +236,7 @@ ??$get_property@Vmem_channel@buffer@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA?AVmem_channel@buffer@property@23@XZ ??$get_property@Vmem_channel@buffer@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA?AVmem_channel@buffer@property@12@XZ ??$get_property@Vmem_channel@buffer@property@_V1@sycl@@@stream@_V1@sycl@@QEBA?AVmem_channel@buffer@property@12@XZ +??$get_property@Vno_immediate_command_list@queue@property@intel@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVno_immediate_command_list@0property@intel@ext@12@XZ ??$get_property@Vno_init@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA?AVno_init@property@23@XZ ??$get_property@Vno_init@property@_V1@sycl@@@context@_V1@sycl@@QEBA?AVno_init@property@12@XZ ??$get_property@Vno_init@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA?AVno_init@property@23@XZ @@ -302,7 +302,6 @@ ??$get_property@Vuse_primary_context@cuda@context@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA?AVuse_primary_context@cuda@context@property@23@XZ ??$get_property@Vuse_primary_context@cuda@context@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA?AVuse_primary_context@cuda@context@property@12@XZ ??$get_property@Vuse_primary_context@cuda@context@property@_V1@sycl@@@stream@_V1@sycl@@QEBA?AVuse_primary_context@cuda@context@property@12@XZ -??$has_property@Vbatched_submission@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ ??$has_property@Vcompute_index@queue@property@intel@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ ??$has_property@Vcontext_bound@buffer@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA_NXZ ??$has_property@Vcontext_bound@buffer@property@_V1@sycl@@@context@_V1@sycl@@QEBA_NXZ @@ -316,7 +315,7 @@ ??$has_property@Vcontext_bound@image@property@_V1@sycl@@@stream@_V1@sycl@@QEBA_NXZ ??$has_property@Vdiscard_events@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ ??$has_property@Venable_profiling@queue@property@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ -??$has_property@Vimmediate_submission@queue@property@oneapi@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ +??$has_property@Vimmediate_command_list@queue@property@intel@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ ??$has_property@Vin_order@queue@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA_NXZ ??$has_property@Vin_order@queue@property@_V1@sycl@@@context@_V1@sycl@@QEBA_NXZ ??$has_property@Vin_order@queue@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA_NXZ @@ -333,6 +332,7 @@ ??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA_NXZ ??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@sampler@_V1@sycl@@QEBA_NXZ ??$has_property@Vmem_channel@buffer@property@_V1@sycl@@@stream@_V1@sycl@@QEBA_NXZ +??$has_property@Vno_immediate_command_list@queue@property@intel@ext@_V1@sycl@@@queue@_V1@sycl@@QEBA_NXZ ??$has_property@Vno_init@property@_V1@sycl@@@buffer_plain@detail@_V1@sycl@@IEBA_NXZ ??$has_property@Vno_init@property@_V1@sycl@@@context@_V1@sycl@@QEBA_NXZ ??$has_property@Vno_init@property@_V1@sycl@@@image_plain@detail@_V1@sycl@@IEBA_NXZ From db89376882acc4fc24fb9b0fa76a1ac977e86ca3 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Wed, 12 Jul 2023 16:20:53 -0700 Subject: [PATCH 11/12] Updated documentation and changed macro name. --- ...ntel_queue_immediate_command_list.asciidoc | 160 ++++++++++++++++++ ...l_ext_intel_queue_submission_mode.asciidoc | 106 ------------ sycl/source/feature_test.hpp.in | 2 +- 3 files changed, 161 insertions(+), 107 deletions(-) create mode 100755 sycl/doc/extensions/supported/sycl_ext_intel_queue_immediate_command_list.asciidoc delete mode 100755 sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_queue_immediate_command_list.asciidoc b/sycl/doc/extensions/supported/sycl_ext_intel_queue_immediate_command_list.asciidoc new file mode 100755 index 0000000000000..244d6324bd5b0 --- /dev/null +++ b/sycl/doc/extensions/supported/sycl_ext_intel_queue_immediate_command_list.asciidoc @@ -0,0 +1,160 @@ += sycl_ext_intel_queue_immediate_command_list + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2023-2023 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 7 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + + +== Status + +This extension is implemented and fully supported by {dpcpp}. + +However, the immediate command list feature (which is exposed by this extension) +has been well-tested only on Intel (R) Data Center Max Series GPUs (aka PVC). +Use of this extension to specify immediate command lists is not recommended +for other Intel GPUs. + + +== Backend support status + +The properties added by this extension are a hint, which all backends accept. +However, in the current {dpcpp} implementation, the hint is only meaningful +on the Level Zero backend. + +== Overview + +This extension enables specifying one of two submission modes for a queue. + +The property `no_immediate_command_list` specifies that submissions to a SYCL +queue should be collected into groups before actual submission to the hardware. + +The property `immediate_command_list` specifies that submissions to a +SYCL queue should be submitted to the hardware immediately. + +When the submission mode is left unspecified a platform-specific default +is chosen by the implementation. + +When commands are submitted to a SYCL queue that uses the Level Zero backend, +those commands can be submitted to the hardware in one of two ways: +either through an immediate command list or through a standard command queue. +Commands submitted through an immediate command list are immediately submitted +to the device while commands submitted through a standard command queue may be +batched with other commands before they are submitted. By default the +implementation chooses a method that works best for most workloads. + +In most cases, applications should rely on the default behavior. +However, sometimes it is advantageous for the application to choose one method +or the other. This extension provides a way for applications to select either +of these two methods via a queue property. + +For example, when kernel runtimes are very short, the submission time on the +host may be as long or longer than the actual runtime of the kernel. In this +case, doing batched submissions may be preferable so that the submission +overhead is amortized over a number of kernel executions. + + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_INTEL_QUEUE_IMMEDIATE_COMMAND_LIST` to one of the values +defined in the table below. Applications can test for the existence of this +macro to determine if the implementation supports this feature, or +applications can test the macro's value to determine which of the +extension's features the implementation supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|Initial version of this extension. +|=== + +=== New queue properties +This extension adds the following new properties that can be used when +constructing a queue object. + +```c++ +namespace ext { +namespace intel { + +// Use immediate command lists +property_list P1{property::queue::immediate_command_list()}; +queue Q1{P1}; + +// Do not use immediate command lists +property_list P2{property::queue::no_immediate_command_list()}; +queue Q2{P2}; + +... + +} // namespace intel +} // namespace ext +``` + + +Both properties are hints, which are ignored unless the backend is Level Zero. + +The property `immediate_command_list` requests that the implementation use an +immediate command list when commands are submitted to this queue. As a result, +these commands are submitted immediately to the device. + +The property `no_immediate_command_list` requests that the implementation use +a standard command queue instead of an immediate command list. As a result, +commands submitted to this queue may be batched with other commands before +being submitted to the device. + +These two properties are mutually exclusive. Constructing a queue with both +properties causes the constructor to throw a synchronous exception with +the `errc::invalid` error code. + + +== Interaction with the SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS environment variable + +{dpcpp} supports an environment variable named +SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS which also controls +the use of immediate command lists in SYCL queues. When that +environment variable is used in conjunction with the properties in this +extension, the properties take precedence. The environment variable has +no effect on queues constructed with one of these properties, however it +still affects queues that were not constructed with either of these properties. diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc b/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc deleted file mode 100755 index 1a06a04d54866..0000000000000 --- a/sycl/doc/extensions/supported/sycl_ext_intel_queue_submission_mode.asciidoc +++ /dev/null @@ -1,106 +0,0 @@ -= sycl_ext_intel_queue_submission_mode - -:source-highlighter: coderay -:coderay-linenums-mode: table - -// This section needs to be after the document title. -:doctype: book -:toc2: -:toc: left -:encoding: utf-8 -:lang: en -:dpcpp: pass:[DPC++] - -// Set the default source code type in this document to C++, -// for syntax highlighting purposes. This is needed because -// docbook uses c++ and html5 uses cpp. -:language: {basebackend@docbook:c++:cpp} - - -== Notice - -[%hardbreaks] -Copyright (C) 2023-2023 Intel Corporation. All rights reserved. - -Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks -of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by -permission by Khronos. - - -== Contact - -To report problems with this extension, please open a new issue at: - -https://github.com/intel/llvm/issues - - -== Dependencies - -This extension is written against the SYCL 2020 revision 7 specification. All -references below to the "core SYCL specification" or to section numbers in the -SYCL specification refer to that revision. - - -== Status - -This extension is implemented and fully supported by {dpcpp}. - - -== Backend support status - -This extension is currently implemented in {dpcpp} only for Intel GPU devices -when using the Level Zero backend. - -== Overview - -This extension enables specifying one of two submission modes for a queue. - -The property `no_immediate_command_list` specifies that submissions to a SYCL -queue should be collected into groups before actual submission to the hardware. - -The property `immediate_command_list` specifies that submissions to a -SYCL queue should be submitted to the hardware immediately. - -When the submission mode is left unspecified a platform-specific default -is chosen by the implementation. - - - - -== Specification - -=== Feature test macro - -This extension provides a feature-test macro as described in the core SYCL -specification. An implementation supporting this extension must predefine the -macro `SYCL_EXT_INTEL_QUEUE_SUBMISSION_MODE` to one of the values defined in -the table below. Applications can test for the existence of this macro to -determine if the implementation supports this feature, or applications can test -the macro's value to determine which of the extension's features the -implementation supports. - -[%header,cols="1,5"] -|=== -|Value -|Description - -|1 -|Initial version of this extension. -|=== - - -=== Guidelines for using queue submission properties - -The default queue submission mode chosen by the implementation is usually -optimal. - -In some cases it may be necessary to explicitly select -a submission mode based on the types of activities the queue is expected to handle. -For example, when kernel runtimes are very short the submission time on the host -may be as long or longer than the actual runtime of the kernel. In this case, doing -batched submissions may be preferable so that the submission overhead is amortized -over a number of kernel executions. - -The immediate submission mode has been extensively tested on -IntelĀ® Data Center Max Series GPUs. On other Intel GPUs the recommendation is -to use the platform defaults for submission mode. \ No newline at end of file diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index 06ae1776299bf..6752728771626 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -83,7 +83,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { #define SYCL_EXT_INTEL_CACHE_CONFIG 1 #define SYCL_EXT_CODEPLAY_MAX_REGISTERS_PER_WORK_GROUP_QUERY 1 #define SYCL_EXT_ONEAPI_DEVICE_GLOBAL 1 -#define SYCL_EXT_INTEL_QUEUE_SUBMISSION_MODE 1 +#define SYCL_EXT_INTEL_QUEUE_IMMEDIATE_COMMAND_LIST 1 #ifndef __has_include #define __has_include(x) 0 From a3c473f097d01e272891a7a94de9118de93c0029 Mon Sep 17 00:00:00 2001 From: Rajiv Deodhar Date: Thu, 13 Jul 2023 08:31:31 -0700 Subject: [PATCH 12/12] Updated doc. --- ...ntel_queue_immediate_command_list.asciidoc | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_queue_immediate_command_list.asciidoc b/sycl/doc/extensions/supported/sycl_ext_intel_queue_immediate_command_list.asciidoc index 244d6324bd5b0..4c929de9cf55d 100755 --- a/sycl/doc/extensions/supported/sycl_ext_intel_queue_immediate_command_list.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_intel_queue_immediate_command_list.asciidoc @@ -59,17 +59,6 @@ on the Level Zero backend. == Overview -This extension enables specifying one of two submission modes for a queue. - -The property `no_immediate_command_list` specifies that submissions to a SYCL -queue should be collected into groups before actual submission to the hardware. - -The property `immediate_command_list` specifies that submissions to a -SYCL queue should be submitted to the hardware immediately. - -When the submission mode is left unspecified a platform-specific default -is chosen by the implementation. - When commands are submitted to a SYCL queue that uses the Level Zero backend, those commands can be submitted to the hardware in one of two ways: either through an immediate command list or through a standard command queue. @@ -115,21 +104,13 @@ This extension adds the following new properties that can be used when constructing a queue object. ```c++ -namespace ext { -namespace intel { - -// Use immediate command lists -property_list P1{property::queue::immediate_command_list()}; -queue Q1{P1}; +namespace sycl::ext::intel::property::queue { -// Do not use immediate command lists -property_list P2{property::queue::no_immediate_command_list()}; -queue Q2{P2}; +struct immediate_command_list {}; +struct no_immediate_command_list {}; -... +} // namespace sycl::ext::intel::property::queue -} // namespace intel -} // namespace ext ``` @@ -148,6 +129,22 @@ These two properties are mutually exclusive. Constructing a queue with both properties causes the constructor to throw a synchronous exception with the `errc::invalid` error code. +== Example +```c++ +#include + +namespace syclintel = sycl::ext::intel; + +int main() { + // Use immediate command lists + sycl::queue q1{syclintel::property::queue::immediate_command_list{}}; + ... + + // Do not use immediate command lists + sycl::queue q2{syclintel::property::queue::no_immediate_command_list{}}; + ... +} +``` == Interaction with the SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS environment variable