diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 5e003771d06c8..fde97657ff641 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -47,8 +47,11 @@ namespace { // This is an experimental option to test performance of device to device copy // operations on copy engines (versus compute engine) static const bool UseCopyEngineForD2DCopy = [] { - const char *CopyEngineForD2DCopy = + const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE_FOR_D2D_COPY"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY"); + const char *CopyEngineForD2DCopy = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + return (CopyEngineForD2DCopy && (std::stoi(CopyEngineForD2DCopy) != 0)); }(); @@ -56,8 +59,12 @@ static const bool UseCopyEngineForD2DCopy = [] { // available in the device, in Level Zero plugin for copy operations submitted // to an in-order queue. The default is 1. static const bool UseCopyEngineForInOrderQueue = [] { - const char *CopyEngineForInOrderQueue = + const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE"); + const char *CopyEngineForInOrderQueue = + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + return (!CopyEngineForInOrderQueue || (std::stoi(CopyEngineForInOrderQueue) != 0)); }(); @@ -65,8 +72,12 @@ static const bool UseCopyEngineForInOrderQueue = [] { // This is an experimental option that allows the use of multiple command lists // when submitting barriers. The default is 0. static const bool UseMultipleCmdlistBarriers = [] { - const char *UseMultipleCmdlistBarriersFlag = + const char *UrRet = std::getenv("UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_MULTIPLE_COMMANDLIST_BARRIERS"); + const char *UseMultipleCmdlistBarriersFlag = + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (!UseMultipleCmdlistBarriersFlag) return true; return std::stoi(UseMultipleCmdlistBarriersFlag) > 0; @@ -75,8 +86,11 @@ static const bool UseMultipleCmdlistBarriers = [] { // This is an experimental option that allows to disable caching of events in // the context. static const bool DisableEventsCaching = [] { + const char *UrRet = std::getenv("UR_L0_DISABLE_EVENTS_CACHING"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_DISABLE_EVENTS_CACHING"); const char *DisableEventsCachingFlag = - std::getenv("SYCL_PI_LEVEL_ZERO_DISABLE_EVENTS_CACHING"); + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (!DisableEventsCachingFlag) return false; return std::stoi(DisableEventsCachingFlag) != 0; @@ -85,8 +99,11 @@ static const bool DisableEventsCaching = [] { // This is an experimental option that allows reset and reuse of uncompleted // events in the in-order queue with discard_events property. static const bool ReuseDiscardedEvents = [] { + const char *UrRet = std::getenv("UR_L0_REUSE_DISCARDED_EVENTS"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_REUSE_DISCARDED_EVENTS"); const char *ReuseDiscardedEventsFlag = - std::getenv("SYCL_PI_LEVEL_ZERO_REUSE_DISCARDED_EVENTS"); + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (!ReuseDiscardedEventsFlag) return true; return std::stoi(ReuseDiscardedEventsFlag) > 0; @@ -95,8 +112,11 @@ static const bool ReuseDiscardedEvents = [] { // Due to a bug with 2D memory copy to and from non-USM pointers, this option is // disabled by default. static const bool UseMemcpy2DOperations = [] { + const char *UrRet = std::getenv("UR_L0_USE_NATIVE_USM_MEMCPY2D"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_NATIVE_USM_MEMCPY2D"); const char *UseMemcpy2DOperationsFlag = - std::getenv("SYCL_PI_LEVEL_ZERO_USE_NATIVE_USM_MEMCPY2D"); + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (!UseMemcpy2DOperationsFlag) return false; return std::stoi(UseMemcpy2DOperationsFlag) > 0; @@ -130,7 +150,9 @@ static inline pi_result mapError(ze_result_t Result) { // paths be less likely affected. // static bool doEagerInit = [] { - const char *EagerInit = std::getenv("SYCL_EAGER_INIT"); + const char *UrRet = std::getenv("UR_L0_EAGER_INIT"); + const char *PiRet = std::getenv("SYCL_EAGER_INIT"); + const char *EagerInit = UrRet ? UrRet : (PiRet ? PiRet : nullptr); return EagerInit ? std::atoi(EagerInit) != 0 : false; }(); @@ -138,8 +160,11 @@ static bool doEagerInit = [] { // here. Setting it to 256 gave best possible performance for several // benchmarks. static const pi_uint32 MaxNumEventsPerPool = [] { - const auto MaxNumEventsPerPoolEnv = - std::getenv("ZE_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL"); + const char *UrRet = std::getenv("UR_L0_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL"); + const char *PiRet = std::getenv("ZE_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL"); + const char *MaxNumEventsPerPoolEnv = + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + pi_uint32 Result = MaxNumEventsPerPoolEnv ? std::atoi(MaxNumEventsPerPoolEnv) : 256; if (Result <= 0) @@ -177,7 +202,7 @@ template <> ze_result_t zeHostSynchronize(ze_command_queue_handle_t Handle) { } // anonymous namespace -// SYCL_PI_LEVEL_ZERO_USE_COMPUTE_ENGINE can be set to an integer (>=0) in +// UR_L0_LEVEL_ZERO_USE_COMPUTE_ENGINE can be set to an integer (>=0) in // which case all compute commands will be submitted to the command-queue // with the given index in the compute command group. If it is instead set // to negative then all available compute engines may be used. @@ -185,8 +210,10 @@ template <> ze_result_t zeHostSynchronize(ze_command_queue_handle_t Handle) { // The default value is "0". // static const std::pair getRangeOfAllowedComputeEngines() { - static const char *EnvVar = - std::getenv("SYCL_PI_LEVEL_ZERO_USE_COMPUTE_ENGINE"); + const char *UrRet = std::getenv("UR_L0_USE_COMPUTE_ENGINE"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COMPUTE_ENGINE"); + const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + // If the environment variable is not set only use "0" CCS for now. // TODO: allow all CCSs when HW support is complete. if (!EnvVar) @@ -466,8 +493,13 @@ pi_result _pi_queue::addEventToQueueCache(pi_event Event) { // If number of events in the immediate command list exceeds this threshold then // cleanup process for those events is executed. static const size_t ImmCmdListsEventCleanupThreshold = [] { - const char *ImmCmdListsEventCleanupThresholdStr = std::getenv( + const char *UrRet = + std::getenv("UR_L0_IMMEDIATE_COMMANDLISTS_EVENT_CLEANUP_THRESHOLD"); + const char *PiRet = std::getenv( "SYCL_PI_LEVEL_ZERO_IMMEDIATE_COMMANDLISTS_EVENT_CLEANUP_THRESHOLD"); + const char *ImmCmdListsEventCleanupThresholdStr = + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + static constexpr int Default = 1000; if (!ImmCmdListsEventCleanupThresholdStr) return Default; @@ -484,8 +516,12 @@ static const size_t ImmCmdListsEventCleanupThreshold = [] { // Get value of the threshold for number of active command lists allowed before // we start heuristically cleaning them up. static const size_t CmdListsCleanupThreshold = [] { - const char *CmdListsCleanupThresholdStr = + const char *UrRet = std::getenv("UR_L0_COMMANDLISTS_CLEANUP_THRESHOLD"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_COMMANDLISTS_CLEANUP_THRESHOLD"); + const char *CmdListsCleanupThresholdStr = + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + static constexpr int Default = 20; if (!CmdListsCleanupThresholdStr) return Default; @@ -826,9 +862,17 @@ static const zeCommandListBatchConfig ZeCommandListBatchConfig(bool IsCopy) { zeCommandListBatchConfig Config{}; // default initialize // Default value of 0. This specifies to use dynamic batch size adjustment. - const auto BatchSizeStr = - (IsCopy) ? std::getenv("SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE") - : std::getenv("SYCL_PI_LEVEL_ZERO_BATCH_SIZE"); + const char *UrRet = nullptr; + const char *PiRet = nullptr; + if (IsCopy) { + UrRet = std::getenv("UR_L0_COPY_BATCH_SIZE"); + PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE"); + } else { + UrRet = std::getenv("UR_L0_BATCH_SIZE"); + PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_BATCH_SIZE"); + } + const char *BatchSizeStr = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (BatchSizeStr) { pi_int32 BatchSizeStrVal = std::atoi(BatchSizeStr); // Level Zero may only support a limted number of commands per command @@ -861,10 +905,9 @@ static const zeCommandListBatchConfig ZeCommandListBatchConfig(bool IsCopy) { Val = std::stoi(BatchConfig.substr(Pos)); } catch (...) { if (IsCopy) - urPrint( - "SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE: failed to parse value\n"); + urPrint("UR_L0_COPY_BATCH_SIZE: failed to parse value\n"); else - urPrint("SYCL_PI_LEVEL_ZERO_BATCH_SIZE: failed to parse value\n"); + urPrint("UR_L0_BATCH_SIZE: failed to parse value\n"); break; } switch (Ord) { @@ -887,21 +930,20 @@ static const zeCommandListBatchConfig ZeCommandListBatchConfig(bool IsCopy) { die("Unexpected batch config"); } if (IsCopy) - urPrint("SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE: dynamic batch param " + urPrint("UR_L0_COPY_BATCH_SIZE: dynamic batch param " "#%d: %d\n", (int)Ord, (int)Val); else - urPrint( - "SYCL_PI_LEVEL_ZERO_BATCH_SIZE: dynamic batch param #%d: %d\n", - (int)Ord, (int)Val); + urPrint("UR_L0_BATCH_SIZE: dynamic batch param #%d: %d\n", (int)Ord, + (int)Val); }; } else { // Negative batch sizes are silently ignored. if (IsCopy) - urPrint("SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE: ignored negative value\n"); + urPrint("UR_L0_COPY_BATCH_SIZE: ignored negative value\n"); else - urPrint("SYCL_PI_LEVEL_ZERO_BATCH_SIZE: ignored negative value\n"); + urPrint("UR_L0_BATCH_SIZE: ignored negative value\n"); } } return Config; @@ -922,7 +964,10 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] { // Temporarily check whether immediate command list env var has been set. This // affects default behavior of make_queue API. static const bool ImmediateCommandlistEnvVarIsSet = [] { - return std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); + const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS"); + const char *PiRet = + std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); + return (UrRet ? std::stoi(UrRet) : (PiRet ? std::stoi(PiRet) : 0)); }(); _pi_queue::_pi_queue(std::vector &ComputeQueues, @@ -1893,9 +1938,9 @@ pi_result _pi_queue::executeOpenCommandList(bool IsCopy) { } static const bool FilterEventWaitList = [] { - const char *Ret = std::getenv("SYCL_PI_LEVEL_ZERO_FILTER_EVENT_WAIT_LIST"); - const bool RetVal = Ret ? std::stoi(Ret) : 0; - return RetVal; + const char *UrRet = std::getenv("UR_L0_FILTER_EVENT_WAIT_LIST"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_FILTER_EVENT_WAIT_LIST"); + return (UrRet ? std::stoi(UrRet) : (PiRet ? std::stoi(PiRet) : 0)); }(); pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList( @@ -2398,7 +2443,7 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, return ReturnValue(pi_uint32{Context->RefCount.load()}); case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: // 2D USM memcpy is supported unless disabled through - // SYCL_PI_LEVEL_ZERO_USE_NATIVE_USM_MEMCPY2D. + // UR_L0_LEVEL_ZERO_USE_NATIVE_USM_MEMCPY2D. return ReturnValue(pi_bool{UseMemcpy2DOperations}); case PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT: case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: @@ -2901,8 +2946,12 @@ pi_result piQueueFinish(pi_queue Queue) { // TODO: this currently exhibits some issues in the driver, so // we control this with an env var. Remove this control when // we settle one way or the other. - static bool HoldLock = - std::getenv("SYCL_PI_LEVEL_ZERO_QUEUE_FINISH_HOLD_LOCK") != nullptr; + const char *UrRet = std::getenv("UR_L0_QUEUE_FINISH_HOLD_LOCK"); + const char *PiRet = + std::getenv("SYCL_PI_LEVEL_ZERO_QUEUE_FINISH_HOLD_LOCK"); + const bool HoldLock = + UrRet ? std::stoi(UrRet) : (PiRet ? std::stoi(PiRet) : 0); + if (!HoldLock) { Lock.unlock(); } @@ -5793,7 +5842,7 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, // If we have a list of events to make the barrier from, then we can create a // barrier on these and use the resulting event as our future barrier. // We use the same approach if - // SYCL_PI_LEVEL_ZERO_USE_MULTIPLE_COMMANDLIST_BARRIERS is not set to a + // UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS is not set to a // positive value. // We use the same approach if we have in-order queue because every command // depends on previous one, so we don't need to insert barrier to multiple @@ -6346,8 +6395,10 @@ pi_result piEnqueueMemBufferCopyRect( // Default to using compute engine for fill operation, but allow to // override this with an environment variable. static bool PreferCopyEngine = [] { - const char *Env = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_FILL"); - return Env ? std::stoi(Env) != 0 : false; + const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE_FOR_FILL"); + const char *PiRet = + std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_FILL"); + return (UrRet ? std::stoi(UrRet) : (PiRet ? std::stoi(PiRet) : 0)); }(); // PI interfaces must have queue's and buffer's mutexes locked on entry. @@ -7188,7 +7239,10 @@ enum class USMAllocationForceResidencyType { // Returns the desired USM residency setting static USMAllocationForceResidencyType USMAllocationForceResidency = [] { - const auto Str = std::getenv("SYCL_PI_LEVEL_ZERO_USM_RESIDENT"); + const char *UrRet = std::getenv("UR_L0_USM_RESIDENT"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USM_RESIDENT"); + const char *Str = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (!Str) return USMAllocationForceResidencyType::P2PDevices; switch (std::atoi(Str)) { @@ -8599,8 +8653,12 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, // cross-tile traffic. // static const bool SingleRootDeviceBufferMigration = [] { - const char *EnvStr = + const char *UrRet = + std::getenv("UR_L0_SINGLE_ROOT_DEVICE_BUFFER_MIGRATION"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_SINGLE_ROOT_DEVICE_BUFFER_MIGRATION"); + const char *EnvStr = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (EnvStr) return (std::stoi(EnvStr) != 0); // The default is to migrate normally, which may not always be the diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 4bd077a2b0c81..ae7ae6375bea0 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -1028,7 +1028,7 @@ ur_result_t urDeviceGetInfo( return UR_RESULT_SUCCESS; } -// SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE can be set to an integer value, or +// UR_L0_USE_COPY_ENGINE can be set to an integer value, or // a pair of integer values of the form "lower_index:upper_index". // Here, the indices point to copy engines in a list of all available copy // engines. @@ -1038,7 +1038,10 @@ ur_result_t urDeviceGetInfo( // available copy engines can be used. const std::pair getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { - static const char *EnvVar = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); + const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); + static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + // If the environment variable is not set, no copy engines are used when // immediate commandlists are being used. For standard commandlists all are // used. @@ -1061,7 +1064,7 @@ getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1)); if ((LowerCopyEngineIndex > UpperCopyEngineIndex) || (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) { - urPrint("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, " + urPrint("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, " "default set.\n"); LowerCopyEngineIndex = 0; UpperCopyEngineIndex = INT_MAX; @@ -1085,8 +1088,10 @@ _ur_device_handle_t::useImmediateCommandLists() { // If immediate commandlist setting is not explicitly set, then use the device // default. static const int ImmediateCommandlistsSetting = [] { + char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS"); + char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); const char *ImmediateCommandlistsSettingStr = - std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); + UrRet ? UrRet : (PiRet ? PiRet : nullptr); if (!ImmediateCommandlistsSettingStr) return -1; return std::stoi(ImmediateCommandlistsSettingStr); @@ -1114,8 +1119,10 @@ _ur_device_handle_t::useImmediateCommandLists() { // Get value of device scope events env var setting or default setting static const EventsScope DeviceEventsSetting = [] { + char *UrRet = std::getenv("UR_L0_DEVICE_SCOPE_EVENTS"); + char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS"); const char *DeviceEventsSettingStr = - std::getenv("SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS"); + UrRet ? UrRet : (PiRet ? PiRet : nullptr); if (DeviceEventsSettingStr) { // Override the default if user has explicitly chosen the events scope. switch (std::stoi(DeviceEventsSettingStr)) { @@ -1532,7 +1539,7 @@ ur_result_t urDevicePartition( // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain. // However, if - // SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that + // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that // still expose CSlices in partitioning by affinity domain for compatibility // reasons. if (Properties[0] == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp index c94080e547c63..2698355522225 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp @@ -175,7 +175,7 @@ struct _ur_device_handle_t : _ur_object { ImmCmdlistMode ImmCommandListUsed{}; // Scope of events used for events on the device - // Can be adjusted with SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS + // Can be adjusted with UR_DEVICE_SCOPE_EVENTS // for non-immediate command lists EventsScope ZeEventsScope = AllHostVisible; diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp index 1c8a665aafc43..16b4c1ef4e582 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.hpp @@ -335,13 +335,17 @@ extern const bool UseUSMAllocator; // Controls support of the indirect access kernels and deferred memory release. const bool IndirectAccessTrackingEnabled = [] { - return std::getenv("SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY") != - nullptr; + char *UrRet = std::getenv("UR_L0_TRACK_INDIRECT_ACCESS_MEMORY"); + char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY"); + const bool RetVal = UrRet ? std::stoi(UrRet) : (PiRet ? std::stoi(PiRet) : 0); + return RetVal; }(); const bool ExposeCSliceInAffinityPartitioning = [] { - const char *Flag = + char *UrRet = std::getenv("UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING"); + char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING"); + const char *Flag = UrRet ? UrRet : (PiRet ? PiRet : 0); return Flag ? std::atoi(Flag) != 0 : false; }(); diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_mem.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_mem.cpp index f193a1cd8ad63..080cb2eb5d201 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_mem.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_mem.cpp @@ -10,6 +10,9 @@ bool ShouldUseUSMAllocator() { // Enable allocator by default if it's not explicitly disabled - return std::getenv("SYCL_PI_LEVEL_ZERO_DISABLE_USM_ALLOCATOR") == nullptr; + const char *UrRet = std::getenv("UR_L0_DISABLE_USM_ALLOCATOR"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_DISABLE_USM_ALLOCATOR"); + const char *Ret = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + return Ret == nullptr; } const bool UseUSMAllocator = ShouldUseUSMAllocator(); \ No newline at end of file diff --git a/sycl/plugins/unified_runtime/ur/ur.cpp b/sycl/plugins/unified_runtime/ur/ur.cpp index 4997f72063d34..67a6ac4bb391d 100644 --- a/sycl/plugins/unified_runtime/ur/ur.cpp +++ b/sycl/plugins/unified_runtime/ur/ur.cpp @@ -12,7 +12,10 @@ // Controls tracing UR calls from within the UR itself. bool PrintTrace = [] { - const char *Trace = std::getenv("SYCL_PI_TRACE"); + const char *UrRet = std::getenv("UR_L0_TRACE"); + const char *PiRet = std::getenv("SYCL_PI_TRACE"); + const char *Trace = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + const int TraceValue = Trace ? std::stoi(Trace) : 0; if (TraceValue == -1 || TraceValue == 2) { // Means print all traces return true; diff --git a/sycl/plugins/unified_runtime/ur/ur.hpp b/sycl/plugins/unified_runtime/ur/ur.hpp index 0ce7fe9b9885e..70a52aabe290c 100644 --- a/sycl/plugins/unified_runtime/ur/ur.hpp +++ b/sycl/plugins/unified_runtime/ur/ur.hpp @@ -67,8 +67,9 @@ const int UR_EXT_USM_CAPS_CONCURRENT_ATOMIC_ACCESS = 1 << 3; // overhead from mutex locking. Default value is 0 which means that single // thread mode is disabled. static const bool SingleThreadMode = [] { - const char *Ret = std::getenv("SYCL_PI_LEVEL_ZERO_SINGLE_THREAD_MODE"); - const bool RetVal = Ret ? std::stoi(Ret) : 0; + const char *UrRet = std::getenv("UR_L0_SINGLE_THREAD_MODE"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_SINGLE_THREAD_MODE"); + const bool RetVal = UrRet ? std::stoi(UrRet) : (PiRet ? std::stoi(PiRet) : 0); return RetVal; }(); diff --git a/sycl/plugins/unified_runtime/ur/usm_allocator.cpp b/sycl/plugins/unified_runtime/ur/usm_allocator.cpp index 31490a6ce8513..74e143306c672 100644 --- a/sycl/plugins/unified_runtime/ur/usm_allocator.cpp +++ b/sycl/plugins/unified_runtime/ur/usm_allocator.cpp @@ -840,7 +840,7 @@ USMAllocContext::~USMAllocContext() { std::cout << "Current Pool Size " << pImpl->getParams().limits->TotalSize.load() << std::endl; const char *Label = MT; - std::cout << "Suggested Setting: SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=;" + std::cout << "Suggested Setting: UR_L0_LEVEL_ZERO_USM_ALLOCATOR=;" << std::string(1, tolower(*Label)) << std::string(Label + 1) << ":" << HighBucketSize << "," << HighPeakSlabsInUse << ",64K" << std::endl; diff --git a/sycl/plugins/unified_runtime/ur/usm_allocator_config.cpp b/sycl/plugins/unified_runtime/ur/usm_allocator_config.cpp index 67e70d2c5f401..30b67945ad28a 100644 --- a/sycl/plugins/unified_runtime/ur/usm_allocator_config.cpp +++ b/sycl/plugins/unified_runtime/ur/usm_allocator_config.cpp @@ -61,7 +61,7 @@ USMAllocatorConfig::USMAllocatorConfig() { Configs[MemType::SharedReadOnly].SlabMinSize = 2_MB; // Parse optional parameters of this form: - // SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=[EnableBuffers][;[MaxPoolSize][;memtypelimits]...] + // UR_L0_USM_ALLOCATOR=[EnableBuffers][;[MaxPoolSize][;memtypelimits]...] // memtypelimits: [:] // memtype: host|device|shared // limits: [MaxPoolableSize][,[Capacity][,SlabMinSize]] @@ -83,7 +83,7 @@ USMAllocatorConfig::USMAllocatorConfig() { // Default 64KB host and device, 2MB shared. // // Example of usage: - // SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=1;32M;host:1M,4,64K;device:1M,4,64K;shared:0,0,2M + // UR_L0_USM_ALLOCATOR=1;32M;host:1M,4,64K;device:1M,4,64K;shared:0,0,2M auto GetValue = [=](std::string &Param, size_t Length, size_t &Setting) { size_t Multiplier = 1; @@ -181,7 +181,10 @@ USMAllocatorConfig::USMAllocatorConfig() { auto limits = std::make_shared(); // Update pool settings if specified in environment. - char *PoolParams = getenv("SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR"); + const char *UrRet = std::getenv("UR_L0_USM_ALLOCATOR"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR"); + const char *PoolParams = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (PoolParams != nullptr) { std::string Params(PoolParams); size_t Pos = Params.find(';'); @@ -220,7 +223,13 @@ USMAllocatorConfig::USMAllocatorConfig() { } } - char *PoolTraceVal = getenv("SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR_TRACE"); + const char *UrRetUsmAllocator = std::getenv("UR_L0_USM_ALLOCATOR_TRACE"); + const char *PiRetUsmAllocator = + std::getenv("SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR_TRACE"); + const char *PoolTraceVal = + UrRetUsmAllocator ? UrRetUsmAllocator + : (PiRetUsmAllocator ? PiRetUsmAllocator : nullptr); + int PoolTrace = 0; if (PoolTraceVal != nullptr) { PoolTrace = std::atoi(PoolTraceVal);