Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 107 additions & 25 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,10 @@ enum DebugLevel {
};

// Controls Level Zero calls tracing.
static int ZeDebug = ZE_DEBUG_NONE;
static const int ZeDebug = [] {
const char *DebugMode = std::getenv("ZE_DEBUG");
return DebugMode ? std::atoi(DebugMode) : ZE_DEBUG_NONE;
}();

static void zePrint(const char *Format, ...) {
if (ZeDebug & ZE_DEBUG_BASIC) {
Expand Down Expand Up @@ -843,20 +846,100 @@ static const int ZeMaxCommandListCacheSize = [] {
return CommandListCacheSizeValue;
}();

static const pi_uint32 ZeCommandListBatchSize = [] {
// Configuration of the command-list batching.
typedef struct {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to build on Windows today, and I'm getting this error on this struct:

C:\iusers\cperkins\sycl_workspace\llvm\sycl\plugins\level_zero\pi_level_zero.cpp(850): error C2220: the following warning is treated as an error
C:\iusers\cperkins\sycl_workspace\llvm\sycl\plugins\level_zero\pi_level_zero.cpp(850): warning C5208: unnamed class used in typedef name cannot declare members other than non-static data members, member enumerations, or member classes

But the CI was able to build and pass. I tried building from scratch with our python script and no luck. MSVC 2019 I think on the Win dev machine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you type-name the struct to avoid possible errors? (since you can verify this)

Copy link
Contributor

@cperkinsintel cperkinsintel Sep 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@smaslov-intel - yeah, that was it.

I opened a newer PR that includes that fix: #4557

// Default value of 0. This specifies to use dynamic batch size adjustment.
// Other values will try to collect specified amount of commands.
pi_uint32 Size{0};

// If doing dynamic batching, specifies start batch size.
pi_uint32 DynamicSizeStart{4};

// The maximum size for dynamic batch.
pi_uint32 DynamicSizeMax{16};

// The step size for dynamic batch increases.
pi_uint32 DynamicSizeStep{1};

// Thresholds for when increase batch size (number of closed early is small
// and number of closed full is high).
pi_uint32 NumTimesClosedEarlyThreshold{2};
pi_uint32 NumTimesClosedFullThreshold{10};

// Tells the starting size of a batch.
pi_uint32 startSize() const { return Size > 0 ? Size : DynamicSizeStart; }
// Tells is we are doing dynamic batch size adjustment.
bool dynamic() const { return Size == 0; }
} zeCommandListBatchConfig;

static const zeCommandListBatchConfig ZeCommandListBatch = [] {
zeCommandListBatchConfig Config{}; // default initialize

// Default value of 0. This specifies to use dynamic batch size adjustment.
pi_uint32 BatchSizeVal = 0;
const auto BatchSizeStr = std::getenv("SYCL_PI_LEVEL_ZERO_BATCH_SIZE");
if (BatchSizeStr) {
pi_int32 BatchSizeStrVal = std::atoi(BatchSizeStr);
// Level Zero may only support a limted number of commands per command
// list. The actual upper limit is not specified by the Level Zero
// Specification. For now we allow an arbitrary upper limit.
// Negative numbers will be silently ignored.
if (BatchSizeStrVal >= 0)
BatchSizeVal = BatchSizeStrVal;
if (BatchSizeStrVal > 0) {
Config.Size = BatchSizeStrVal;
} else if (BatchSizeStrVal == 0) {
Config.Size = 0;
// We are requested to do dynamic batching. Collect specifics, if any.
// The extended format supported is ":" separated values.
//
// NOTE: these extra settings are experimental and are intended to
// be used only for finding a better default heuristic.
//
std::string BatchConfig(BatchSizeStr);
size_t Ord = 0;
size_t Pos = 0;
while (true) {
if (++Ord > 5)
break;

Pos = BatchConfig.find(":", Pos);
if (Pos == std::string::npos)
break;
++Pos; // past the ":"

pi_uint32 Val;
try {
Val = std::stoi(BatchConfig.substr(Pos));
} catch (...) {
zePrint("SYCL_PI_LEVEL_ZERO_BATCH_SIZE: failed to parse value\n");
break;
}
switch (Ord) {
case 1:
Config.DynamicSizeStart = Val;
break;
case 2:
Config.DynamicSizeMax = Val;
break;
case 3:
Config.DynamicSizeStep = Val;
break;
case 4:
Config.NumTimesClosedEarlyThreshold = Val;
break;
case 5:
Config.NumTimesClosedFullThreshold = Val;
break;
default:
die("Unexpected batch config");
}
zePrint("SYCL_PI_LEVEL_ZERO_BATCH_SIZE: dynamic batch param #%d: %d\n",
(int)Ord, (int)Val);
};

} else {
// Negative batch sizes are silently ignored.
zePrint("SYCL_PI_LEVEL_ZERO_BATCH_SIZE: ignored negative value\n");
}
}
return BatchSizeVal;
return Config;
}();

// Retrieve an available command list to be used in a PI call
Expand Down Expand Up @@ -1000,7 +1083,7 @@ pi_result _pi_context::getAvailableCommandList(

void _pi_queue::adjustBatchSizeForFullBatch() {
// QueueBatchSize of 0 means never allow batching.
if (QueueBatchSize == 0 || !UseDynamicBatching)
if (QueueBatchSize == 0 || !ZeCommandListBatch.dynamic())
return;

NumTimesClosedFull += 1;
Expand All @@ -1009,19 +1092,20 @@ void _pi_queue::adjustBatchSizeForFullBatch() {
// the number of times it has been closed full is high, then raise
// the batching size slowly. Don't raise it if it is already pretty
// high.
if (NumTimesClosedEarly <= 2 && NumTimesClosedFull > 10) {
if (QueueBatchSize < 16) {
QueueBatchSize = QueueBatchSize + 1;
if (NumTimesClosedEarly <= ZeCommandListBatch.NumTimesClosedEarlyThreshold &&
NumTimesClosedFull > ZeCommandListBatch.NumTimesClosedFullThreshold) {
if (QueueBatchSize < ZeCommandListBatch.DynamicSizeMax) {
QueueBatchSize += ZeCommandListBatch.DynamicSizeStep;
zePrint("Raising QueueBatchSize to %d\n", QueueBatchSize);
}
NumTimesClosedEarly = 0;
NumTimesClosedFull = 0;
}
}

void _pi_queue::adjustBatchSizeForPartialBatch(pi_uint32 PartialBatchSize) {
void _pi_queue::adjustBatchSizeForPartialBatch() {
// QueueBatchSize of 0 means never allow batching.
if (QueueBatchSize == 0 || !UseDynamicBatching)
if (QueueBatchSize == 0 || !ZeCommandListBatch.dynamic())
return;

NumTimesClosedEarly += 1;
Expand All @@ -1032,7 +1116,7 @@ void _pi_queue::adjustBatchSizeForPartialBatch(pi_uint32 PartialBatchSize) {
// batch size that will be able to be closed full at least once
// in a while.
if (NumTimesClosedEarly > (NumTimesClosedFull + 1) * 3) {
QueueBatchSize = PartialBatchSize - 1;
QueueBatchSize = OpenCommandList->second.size() - 1;
if (QueueBatchSize < 1)
QueueBatchSize = 1;
zePrint("Lowering QueueBatchSize to %d\n", QueueBatchSize);
Expand All @@ -1057,10 +1141,11 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
// kernels started as soon as possible when there are no kernels from this
// queue awaiting execution, while allowing batching to occur when there
// are kernels already executing. Also, if we are using fixed size batching,
// as indicated by !UseDynamicBatching, then just ignore CurrentlyEmpty
// as we want to strictly follow the batching the user specified.
// as indicated by !ZeCommandListBatch.dynamic(), then just ignore
// CurrentlyEmpty as we want to strictly follow the batching the user
// specified.
if (OKToBatchCommand && this->isBatchingAllowed() &&
(!UseDynamicBatching || !CurrentlyEmpty)) {
(!ZeCommandListBatch.dynamic() || !CurrentlyEmpty)) {

if (hasOpenCommandList() && OpenCommandList != CommandList)
die("executeCommandList: OpenCommandList should be equal to"
Expand Down Expand Up @@ -1207,7 +1292,7 @@ pi_result _pi_queue::executeOpenCommandList() {
// If there are any commands still in the open command list for this
// queue, then close and execute that command list now.
if (hasOpenCommandList()) {
adjustBatchSizeForPartialBatch(OpenCommandList->second.size());
adjustBatchSizeForPartialBatch();
auto Res = executeCommandList(OpenCommandList, false, false);
OpenCommandList = CommandListMap.end();
return Res;
Expand Down Expand Up @@ -1444,10 +1529,6 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
PrintPiTrace = true;
}

static const char *DebugMode = std::getenv("ZE_DEBUG");
static const int DebugModeValue = DebugMode ? std::stoi(DebugMode) : 0;
ZeDebug = DebugModeValue;

if (ZeDebug & ZE_DEBUG_CALL_COUNT) {
ZeCallCount = new std::map<const char *, int>;
}
Expand Down Expand Up @@ -2694,8 +2775,9 @@ pi_result piQueueCreate(pi_context Context, pi_device Device,
PI_ASSERT(Queue, PI_INVALID_QUEUE);

try {
*Queue = new _pi_queue(ZeComputeCommandQueue, ZeCopyCommandQueues, Context,
Device, ZeCommandListBatchSize, true, Properties);
*Queue =
new _pi_queue(ZeComputeCommandQueue, ZeCopyCommandQueues, Context,
Device, ZeCommandListBatch.startSize(), true, Properties);
} catch (const std::bad_alloc &) {
return PI_OUT_OF_HOST_MEMORY;
} catch (...) {
Expand Down Expand Up @@ -2879,7 +2961,7 @@ pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle,
// compute vs. copy Level-Zero queue.
std::vector<ze_command_queue_handle_t> ZeroCopyQueues;
*Queue = new _pi_queue(ZeQueue, ZeroCopyQueues, Context, Device,
ZeCommandListBatchSize, OwnNativeHandle);
ZeCommandListBatch.startSize(), OwnNativeHandle);
return PI_SUCCESS;
}

Expand Down
17 changes: 3 additions & 14 deletions sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -587,19 +587,14 @@ struct _pi_context : _pi_object {
std::mutex NumEventsUnreleasedInEventPoolMutex;
};

// If doing dynamic batching, start batch size at 4.
const pi_uint32 DynamicBatchStartSize = 4;

struct _pi_queue : _pi_object {
_pi_queue(ze_command_queue_handle_t Queue,
std::vector<ze_command_queue_handle_t> &CopyQueues,
pi_context Context, pi_device Device, pi_uint32 BatchSize,
bool OwnZeCommandQueue, pi_queue_properties PiQueueProperties = 0)
: ZeComputeCommandQueue{Queue},
ZeCopyCommandQueues{CopyQueues}, Context{Context}, Device{Device},
QueueBatchSize{BatchSize > 0 ? BatchSize : DynamicBatchStartSize},
OwnZeCommandQueue{OwnZeCommandQueue}, UseDynamicBatching{BatchSize ==
0},
QueueBatchSize{BatchSize}, OwnZeCommandQueue{OwnZeCommandQueue},
PiQueueProperties(PiQueueProperties) {
OpenCommandList = CommandListMap.end();
}
Expand Down Expand Up @@ -668,11 +663,6 @@ struct _pi_queue : _pi_object {
// asked to not transfer the ownership to SYCL RT.
bool OwnZeCommandQueue;

// specifies whether this queue will be using dynamic batch size adjustment
// or not. This is set only at queue creation time, and is therefore
// const for the life of the queue.
const bool UseDynamicBatching;

// These two members are used to keep track of how often the
// batching closes and executes a command list before reaching the
// QueueBatchSize limit, versus how often we reach the limit.
Expand Down Expand Up @@ -704,9 +694,8 @@ struct _pi_queue : _pi_object {
void adjustBatchSizeForFullBatch();

// adjust the queue's batch size, knowing that the current command list
// is being closed with only a partial batch of commands. How many commands
// are in this partial closure is passed as the parameter.
void adjustBatchSizeForPartialBatch(pi_uint32 PartialBatchSize);
// is being closed with only a partial batch of commands.
void adjustBatchSizeForPartialBatch();

// Resets the Command List and Associated fence in the ZeCommandListFenceMap.
// If the reset command list should be made available, then MakeAvailable
Expand Down