-
Notifications
You must be signed in to change notification settings - Fork 802
[SYCL][L0] Experimental options for fine-tune of dynamic batching #4492
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -170,7 +170,10 @@ enum DebugLevel { | |
| }; | ||
|
|
||
| // Controls Level Zero calls tracing. | ||
| static int ZeDebug = ZE_DEBUG_NONE; | ||
| static const int ZeDebug = [] { | ||
| const char *DebugMode = std::getenv("ZE_DEBUG"); | ||
| return DebugMode ? std::atoi(DebugMode) : ZE_DEBUG_NONE; | ||
| }(); | ||
|
|
||
| static void zePrint(const char *Format, ...) { | ||
| if (ZeDebug & ZE_DEBUG_BASIC) { | ||
|
|
@@ -843,20 +846,100 @@ static const int ZeMaxCommandListCacheSize = [] { | |
| return CommandListCacheSizeValue; | ||
| }(); | ||
|
|
||
| static const pi_uint32 ZeCommandListBatchSize = [] { | ||
| // Configuration of the command-list batching. | ||
| typedef struct { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm trying to build on Windows today, and I'm getting this error on this struct: But the CI was able to build and pass. I tried building from scratch with our python script and no luck. MSVC 2019 I think on the Win dev machine.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would you type-name the struct to avoid possible errors? (since you can verify this)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @smaslov-intel - yeah, that was it. I opened a newer PR that includes that fix: #4557 |
||
| // Default value of 0. This specifies to use dynamic batch size adjustment. | ||
| // Other values will try to collect specified amount of commands. | ||
| pi_uint32 Size{0}; | ||
|
|
||
| // If doing dynamic batching, specifies start batch size. | ||
| pi_uint32 DynamicSizeStart{4}; | ||
|
|
||
| // The maximum size for dynamic batch. | ||
| pi_uint32 DynamicSizeMax{16}; | ||
|
|
||
| // The step size for dynamic batch increases. | ||
| pi_uint32 DynamicSizeStep{1}; | ||
|
|
||
| // Thresholds for when increase batch size (number of closed early is small | ||
| // and number of closed full is high). | ||
| pi_uint32 NumTimesClosedEarlyThreshold{2}; | ||
| pi_uint32 NumTimesClosedFullThreshold{10}; | ||
|
|
||
| // Tells the starting size of a batch. | ||
| pi_uint32 startSize() const { return Size > 0 ? Size : DynamicSizeStart; } | ||
| // Tells is we are doing dynamic batch size adjustment. | ||
| bool dynamic() const { return Size == 0; } | ||
| } zeCommandListBatchConfig; | ||
|
|
||
| static const zeCommandListBatchConfig ZeCommandListBatch = [] { | ||
| zeCommandListBatchConfig Config{}; // default initialize | ||
|
|
||
| // Default value of 0. This specifies to use dynamic batch size adjustment. | ||
| pi_uint32 BatchSizeVal = 0; | ||
| const auto BatchSizeStr = std::getenv("SYCL_PI_LEVEL_ZERO_BATCH_SIZE"); | ||
| if (BatchSizeStr) { | ||
| pi_int32 BatchSizeStrVal = std::atoi(BatchSizeStr); | ||
| // Level Zero may only support a limted number of commands per command | ||
| // list. The actual upper limit is not specified by the Level Zero | ||
| // Specification. For now we allow an arbitrary upper limit. | ||
| // Negative numbers will be silently ignored. | ||
| if (BatchSizeStrVal >= 0) | ||
| BatchSizeVal = BatchSizeStrVal; | ||
| if (BatchSizeStrVal > 0) { | ||
| Config.Size = BatchSizeStrVal; | ||
| } else if (BatchSizeStrVal == 0) { | ||
| Config.Size = 0; | ||
| // We are requested to do dynamic batching. Collect specifics, if any. | ||
romanovvlad marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // The extended format supported is ":" separated values. | ||
| // | ||
| // NOTE: these extra settings are experimental and are intended to | ||
| // be used only for finding a better default heuristic. | ||
| // | ||
| std::string BatchConfig(BatchSizeStr); | ||
| size_t Ord = 0; | ||
| size_t Pos = 0; | ||
| while (true) { | ||
| if (++Ord > 5) | ||
| break; | ||
|
|
||
| Pos = BatchConfig.find(":", Pos); | ||
| if (Pos == std::string::npos) | ||
| break; | ||
| ++Pos; // past the ":" | ||
|
|
||
| pi_uint32 Val; | ||
| try { | ||
| Val = std::stoi(BatchConfig.substr(Pos)); | ||
| } catch (...) { | ||
| zePrint("SYCL_PI_LEVEL_ZERO_BATCH_SIZE: failed to parse value\n"); | ||
| break; | ||
| } | ||
| switch (Ord) { | ||
| case 1: | ||
| Config.DynamicSizeStart = Val; | ||
| break; | ||
| case 2: | ||
| Config.DynamicSizeMax = Val; | ||
| break; | ||
| case 3: | ||
| Config.DynamicSizeStep = Val; | ||
| break; | ||
| case 4: | ||
| Config.NumTimesClosedEarlyThreshold = Val; | ||
| break; | ||
| case 5: | ||
| Config.NumTimesClosedFullThreshold = Val; | ||
| break; | ||
| default: | ||
| die("Unexpected batch config"); | ||
| } | ||
| zePrint("SYCL_PI_LEVEL_ZERO_BATCH_SIZE: dynamic batch param #%d: %d\n", | ||
| (int)Ord, (int)Val); | ||
| }; | ||
|
|
||
| } else { | ||
| // Negative batch sizes are silently ignored. | ||
smaslov-intel marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| zePrint("SYCL_PI_LEVEL_ZERO_BATCH_SIZE: ignored negative value\n"); | ||
| } | ||
| } | ||
| return BatchSizeVal; | ||
| return Config; | ||
| }(); | ||
|
|
||
| // Retrieve an available command list to be used in a PI call | ||
|
|
@@ -1000,7 +1083,7 @@ pi_result _pi_context::getAvailableCommandList( | |
|
|
||
| void _pi_queue::adjustBatchSizeForFullBatch() { | ||
| // QueueBatchSize of 0 means never allow batching. | ||
| if (QueueBatchSize == 0 || !UseDynamicBatching) | ||
| if (QueueBatchSize == 0 || !ZeCommandListBatch.dynamic()) | ||
| return; | ||
|
|
||
| NumTimesClosedFull += 1; | ||
|
|
@@ -1009,19 +1092,20 @@ void _pi_queue::adjustBatchSizeForFullBatch() { | |
| // the number of times it has been closed full is high, then raise | ||
| // the batching size slowly. Don't raise it if it is already pretty | ||
| // high. | ||
| if (NumTimesClosedEarly <= 2 && NumTimesClosedFull > 10) { | ||
| if (QueueBatchSize < 16) { | ||
| QueueBatchSize = QueueBatchSize + 1; | ||
| if (NumTimesClosedEarly <= ZeCommandListBatch.NumTimesClosedEarlyThreshold && | ||
| NumTimesClosedFull > ZeCommandListBatch.NumTimesClosedFullThreshold) { | ||
| if (QueueBatchSize < ZeCommandListBatch.DynamicSizeMax) { | ||
| QueueBatchSize += ZeCommandListBatch.DynamicSizeStep; | ||
| zePrint("Raising QueueBatchSize to %d\n", QueueBatchSize); | ||
| } | ||
| NumTimesClosedEarly = 0; | ||
| NumTimesClosedFull = 0; | ||
| } | ||
| } | ||
|
|
||
| void _pi_queue::adjustBatchSizeForPartialBatch(pi_uint32 PartialBatchSize) { | ||
| void _pi_queue::adjustBatchSizeForPartialBatch() { | ||
| // QueueBatchSize of 0 means never allow batching. | ||
| if (QueueBatchSize == 0 || !UseDynamicBatching) | ||
| if (QueueBatchSize == 0 || !ZeCommandListBatch.dynamic()) | ||
| return; | ||
|
|
||
| NumTimesClosedEarly += 1; | ||
|
|
@@ -1032,7 +1116,7 @@ void _pi_queue::adjustBatchSizeForPartialBatch(pi_uint32 PartialBatchSize) { | |
| // batch size that will be able to be closed full at least once | ||
| // in a while. | ||
| if (NumTimesClosedEarly > (NumTimesClosedFull + 1) * 3) { | ||
| QueueBatchSize = PartialBatchSize - 1; | ||
| QueueBatchSize = OpenCommandList->second.size() - 1; | ||
| if (QueueBatchSize < 1) | ||
| QueueBatchSize = 1; | ||
| zePrint("Lowering QueueBatchSize to %d\n", QueueBatchSize); | ||
|
|
@@ -1057,10 +1141,11 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList, | |
| // kernels started as soon as possible when there are no kernels from this | ||
| // queue awaiting execution, while allowing batching to occur when there | ||
| // are kernels already executing. Also, if we are using fixed size batching, | ||
| // as indicated by !UseDynamicBatching, then just ignore CurrentlyEmpty | ||
| // as we want to strictly follow the batching the user specified. | ||
| // as indicated by !ZeCommandListBatch.dynamic(), then just ignore | ||
| // CurrentlyEmpty as we want to strictly follow the batching the user | ||
| // specified. | ||
| if (OKToBatchCommand && this->isBatchingAllowed() && | ||
| (!UseDynamicBatching || !CurrentlyEmpty)) { | ||
| (!ZeCommandListBatch.dynamic() || !CurrentlyEmpty)) { | ||
|
|
||
| if (hasOpenCommandList() && OpenCommandList != CommandList) | ||
| die("executeCommandList: OpenCommandList should be equal to" | ||
|
|
@@ -1207,7 +1292,7 @@ pi_result _pi_queue::executeOpenCommandList() { | |
| // If there are any commands still in the open command list for this | ||
| // queue, then close and execute that command list now. | ||
| if (hasOpenCommandList()) { | ||
| adjustBatchSizeForPartialBatch(OpenCommandList->second.size()); | ||
| adjustBatchSizeForPartialBatch(); | ||
| auto Res = executeCommandList(OpenCommandList, false, false); | ||
| OpenCommandList = CommandListMap.end(); | ||
| return Res; | ||
|
|
@@ -1444,10 +1529,6 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, | |
| PrintPiTrace = true; | ||
| } | ||
|
|
||
| static const char *DebugMode = std::getenv("ZE_DEBUG"); | ||
| static const int DebugModeValue = DebugMode ? std::stoi(DebugMode) : 0; | ||
| ZeDebug = DebugModeValue; | ||
|
|
||
| if (ZeDebug & ZE_DEBUG_CALL_COUNT) { | ||
| ZeCallCount = new std::map<const char *, int>; | ||
| } | ||
|
|
@@ -2694,8 +2775,9 @@ pi_result piQueueCreate(pi_context Context, pi_device Device, | |
| PI_ASSERT(Queue, PI_INVALID_QUEUE); | ||
|
|
||
| try { | ||
| *Queue = new _pi_queue(ZeComputeCommandQueue, ZeCopyCommandQueues, Context, | ||
| Device, ZeCommandListBatchSize, true, Properties); | ||
| *Queue = | ||
| new _pi_queue(ZeComputeCommandQueue, ZeCopyCommandQueues, Context, | ||
| Device, ZeCommandListBatch.startSize(), true, Properties); | ||
| } catch (const std::bad_alloc &) { | ||
| return PI_OUT_OF_HOST_MEMORY; | ||
| } catch (...) { | ||
|
|
@@ -2879,7 +2961,7 @@ pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle, | |
| // compute vs. copy Level-Zero queue. | ||
| std::vector<ze_command_queue_handle_t> ZeroCopyQueues; | ||
| *Queue = new _pi_queue(ZeQueue, ZeroCopyQueues, Context, Device, | ||
| ZeCommandListBatchSize, OwnNativeHandle); | ||
| ZeCommandListBatch.startSize(), OwnNativeHandle); | ||
| return PI_SUCCESS; | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.