@@ -170,7 +170,10 @@ enum DebugLevel {
170170};
171171
172172// Controls Level Zero calls tracing.
173- static int ZeDebug = ZE_DEBUG_NONE;
173+ static const int ZeDebug = [] {
174+ const char *DebugMode = std::getenv (" ZE_DEBUG" );
175+ return DebugMode ? std::atoi (DebugMode) : ZE_DEBUG_NONE;
176+ }();
174177
175178static void zePrint (const char *Format, ...) {
176179 if (ZeDebug & ZE_DEBUG_BASIC) {
@@ -843,20 +846,100 @@ static const int ZeMaxCommandListCacheSize = [] {
843846 return CommandListCacheSizeValue;
844847}();
845848
846- static const pi_uint32 ZeCommandListBatchSize = [] {
849+ // Configuration of the command-list batching.
850+ typedef struct {
851+ // Default value of 0. This specifies to use dynamic batch size adjustment.
852+ // Other values will try to collect specified amount of commands.
853+ pi_uint32 Size{0 };
854+
855+ // If doing dynamic batching, specifies start batch size.
856+ pi_uint32 DynamicSizeStart{4 };
857+
858+ // The maximum size for dynamic batch.
859+ pi_uint32 DynamicSizeMax{16 };
860+
861+ // The step size for dynamic batch increases.
862+ pi_uint32 DynamicSizeStep{1 };
863+
864+ // Thresholds for when increase batch size (number of closed early is small
865+ // and number of closed full is high).
866+ pi_uint32 NumTimesClosedEarlyThreshold{2 };
867+ pi_uint32 NumTimesClosedFullThreshold{10 };
868+
869+ // Tells the starting size of a batch.
870+ pi_uint32 startSize () const { return Size > 0 ? Size : DynamicSizeStart; }
871+ // Tells is we are doing dynamic batch size adjustment.
872+ bool dynamic () const { return Size == 0 ; }
873+ } zeCommandListBatchConfig;
874+
875+ static const zeCommandListBatchConfig ZeCommandListBatch = [] {
876+ zeCommandListBatchConfig Config{}; // default initialize
877+
847878 // Default value of 0. This specifies to use dynamic batch size adjustment.
848- pi_uint32 BatchSizeVal = 0 ;
849879 const auto BatchSizeStr = std::getenv (" SYCL_PI_LEVEL_ZERO_BATCH_SIZE" );
850880 if (BatchSizeStr) {
851881 pi_int32 BatchSizeStrVal = std::atoi (BatchSizeStr);
852882 // Level Zero may only support a limted number of commands per command
853883 // list. The actual upper limit is not specified by the Level Zero
854884 // Specification. For now we allow an arbitrary upper limit.
855- // Negative numbers will be silently ignored.
856- if (BatchSizeStrVal >= 0 )
857- BatchSizeVal = BatchSizeStrVal;
885+ if (BatchSizeStrVal > 0 ) {
886+ Config.Size = BatchSizeStrVal;
887+ } else if (BatchSizeStrVal == 0 ) {
888+ Config.Size = 0 ;
889+ // We are requested to do dynamic batching. Collect specifics, if any.
890+ // The extended format supported is ":" separated values.
891+ //
892+ // NOTE: these extra settings are experimental and are intended to
893+ // be used only for finding a better default heuristic.
894+ //
895+ std::string BatchConfig (BatchSizeStr);
896+ size_t Ord = 0 ;
897+ size_t Pos = 0 ;
898+ while (true ) {
899+ if (++Ord > 5 )
900+ break ;
901+
902+ Pos = BatchConfig.find (" :" , Pos);
903+ if (Pos == std::string::npos)
904+ break ;
905+ ++Pos; // past the ":"
906+
907+ pi_uint32 Val;
908+ try {
909+ Val = std::stoi (BatchConfig.substr (Pos));
910+ } catch (...) {
911+ zePrint (" SYCL_PI_LEVEL_ZERO_BATCH_SIZE: failed to parse value\n " );
912+ break ;
913+ }
914+ switch (Ord) {
915+ case 1 :
916+ Config.DynamicSizeStart = Val;
917+ break ;
918+ case 2 :
919+ Config.DynamicSizeMax = Val;
920+ break ;
921+ case 3 :
922+ Config.DynamicSizeStep = Val;
923+ break ;
924+ case 4 :
925+ Config.NumTimesClosedEarlyThreshold = Val;
926+ break ;
927+ case 5 :
928+ Config.NumTimesClosedFullThreshold = Val;
929+ break ;
930+ default :
931+ die (" Unexpected batch config" );
932+ }
933+ zePrint (" SYCL_PI_LEVEL_ZERO_BATCH_SIZE: dynamic batch param #%d: %d\n " ,
934+ (int )Ord, (int )Val);
935+ };
936+
937+ } else {
938+ // Negative batch sizes are silently ignored.
939+ zePrint (" SYCL_PI_LEVEL_ZERO_BATCH_SIZE: ignored negative value\n " );
940+ }
858941 }
859- return BatchSizeVal ;
942+ return Config ;
860943}();
861944
862945// Retrieve an available command list to be used in a PI call
@@ -1000,7 +1083,7 @@ pi_result _pi_context::getAvailableCommandList(
10001083
10011084void _pi_queue::adjustBatchSizeForFullBatch () {
10021085 // QueueBatchSize of 0 means never allow batching.
1003- if (QueueBatchSize == 0 || !UseDynamicBatching )
1086+ if (QueueBatchSize == 0 || !ZeCommandListBatch. dynamic () )
10041087 return ;
10051088
10061089 NumTimesClosedFull += 1 ;
@@ -1009,19 +1092,20 @@ void _pi_queue::adjustBatchSizeForFullBatch() {
10091092 // the number of times it has been closed full is high, then raise
10101093 // the batching size slowly. Don't raise it if it is already pretty
10111094 // high.
1012- if (NumTimesClosedEarly <= 2 && NumTimesClosedFull > 10 ) {
1013- if (QueueBatchSize < 16 ) {
1014- QueueBatchSize = QueueBatchSize + 1 ;
1095+ if (NumTimesClosedEarly <= ZeCommandListBatch.NumTimesClosedEarlyThreshold &&
1096+ NumTimesClosedFull > ZeCommandListBatch.NumTimesClosedFullThreshold ) {
1097+ if (QueueBatchSize < ZeCommandListBatch.DynamicSizeMax ) {
1098+ QueueBatchSize += ZeCommandListBatch.DynamicSizeStep ;
10151099 zePrint (" Raising QueueBatchSize to %d\n " , QueueBatchSize);
10161100 }
10171101 NumTimesClosedEarly = 0 ;
10181102 NumTimesClosedFull = 0 ;
10191103 }
10201104}
10211105
1022- void _pi_queue::adjustBatchSizeForPartialBatch (pi_uint32 PartialBatchSize ) {
1106+ void _pi_queue::adjustBatchSizeForPartialBatch () {
10231107 // QueueBatchSize of 0 means never allow batching.
1024- if (QueueBatchSize == 0 || !UseDynamicBatching )
1108+ if (QueueBatchSize == 0 || !ZeCommandListBatch. dynamic () )
10251109 return ;
10261110
10271111 NumTimesClosedEarly += 1 ;
@@ -1032,7 +1116,7 @@ void _pi_queue::adjustBatchSizeForPartialBatch(pi_uint32 PartialBatchSize) {
10321116 // batch size that will be able to be closed full at least once
10331117 // in a while.
10341118 if (NumTimesClosedEarly > (NumTimesClosedFull + 1 ) * 3 ) {
1035- QueueBatchSize = PartialBatchSize - 1 ;
1119+ QueueBatchSize = OpenCommandList-> second . size () - 1 ;
10361120 if (QueueBatchSize < 1 )
10371121 QueueBatchSize = 1 ;
10381122 zePrint (" Lowering QueueBatchSize to %d\n " , QueueBatchSize);
@@ -1057,10 +1141,11 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
10571141 // kernels started as soon as possible when there are no kernels from this
10581142 // queue awaiting execution, while allowing batching to occur when there
10591143 // are kernels already executing. Also, if we are using fixed size batching,
1060- // as indicated by !UseDynamicBatching, then just ignore CurrentlyEmpty
1061- // as we want to strictly follow the batching the user specified.
1144+ // as indicated by !ZeCommandListBatch.dynamic(), then just ignore
1145+ // CurrentlyEmpty as we want to strictly follow the batching the user
1146+ // specified.
10621147 if (OKToBatchCommand && this ->isBatchingAllowed () &&
1063- (!UseDynamicBatching || !CurrentlyEmpty)) {
1148+ (!ZeCommandListBatch. dynamic () || !CurrentlyEmpty)) {
10641149
10651150 if (hasOpenCommandList () && OpenCommandList != CommandList)
10661151 die (" executeCommandList: OpenCommandList should be equal to"
@@ -1207,7 +1292,7 @@ pi_result _pi_queue::executeOpenCommandList() {
12071292 // If there are any commands still in the open command list for this
12081293 // queue, then close and execute that command list now.
12091294 if (hasOpenCommandList ()) {
1210- adjustBatchSizeForPartialBatch (OpenCommandList-> second . size () );
1295+ adjustBatchSizeForPartialBatch ();
12111296 auto Res = executeCommandList (OpenCommandList, false , false );
12121297 OpenCommandList = CommandListMap.end ();
12131298 return Res;
@@ -1444,10 +1529,6 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
14441529 PrintPiTrace = true ;
14451530 }
14461531
1447- static const char *DebugMode = std::getenv (" ZE_DEBUG" );
1448- static const int DebugModeValue = DebugMode ? std::stoi (DebugMode) : 0 ;
1449- ZeDebug = DebugModeValue;
1450-
14511532 if (ZeDebug & ZE_DEBUG_CALL_COUNT) {
14521533 ZeCallCount = new std::map<const char *, int >;
14531534 }
@@ -2694,8 +2775,9 @@ pi_result piQueueCreate(pi_context Context, pi_device Device,
26942775 PI_ASSERT (Queue, PI_INVALID_QUEUE);
26952776
26962777 try {
2697- *Queue = new _pi_queue (ZeComputeCommandQueue, ZeCopyCommandQueues, Context,
2698- Device, ZeCommandListBatchSize, true , Properties);
2778+ *Queue =
2779+ new _pi_queue (ZeComputeCommandQueue, ZeCopyCommandQueues, Context,
2780+ Device, ZeCommandListBatch.startSize (), true , Properties);
26992781 } catch (const std::bad_alloc &) {
27002782 return PI_OUT_OF_HOST_MEMORY;
27012783 } catch (...) {
@@ -2879,7 +2961,7 @@ pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle,
28792961 // compute vs. copy Level-Zero queue.
28802962 std::vector<ze_command_queue_handle_t > ZeroCopyQueues;
28812963 *Queue = new _pi_queue (ZeQueue, ZeroCopyQueues, Context, Device,
2882- ZeCommandListBatchSize , OwnNativeHandle);
2964+ ZeCommandListBatch. startSize () , OwnNativeHandle);
28832965 return PI_SUCCESS;
28842966}
28852967
0 commit comments