Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
1a46fbb
Enable immediate command lists by default.
rdeodhar Apr 6, 2023
3490eb8
Fixes to tests and to plugin.
rdeodhar Apr 7, 2023
941ee76
Adjusted test for checking default imm cmd lists.
rdeodhar Apr 7, 2023
ae37452
Minor comment change.
rdeodhar Apr 8, 2023
831d11a
Merge branch 'sycl' of https://github.com/intel/llvm into cmdlist10
rdeodhar Apr 9, 2023
692a9a3
Merge branch 'sycl' of https://github.com/intel/llvm into cmdlist10
rdeodhar Apr 10, 2023
bc60402
Merge branch 'sycl' of https://github.com/intel/llvm into cmdlist10
rdeodhar Apr 11, 2023
ef61e97
Merge branch 'sycl' of https://github.com/intel/llvm into cmdlist10
rdeodhar Apr 12, 2023
6768619
Merge branch 'sycl' of https://github.com/intel/llvm into cmdlist10
rdeodhar Apr 14, 2023
b08d311
Merge branch 'sycl' of https://github.com/intel/llvm into cmdlist10
rdeodhar Apr 17, 2023
3dc2347
Turn off copy engines as an experiment.
rdeodhar Apr 18, 2023
752e6d2
Formatting change.
rdeodhar Apr 18, 2023
d00825d
Limit immediate command lists to PVC, with main copy engine only.
rdeodhar Apr 18, 2023
7935ad9
Limit default imm cmdlists to Linux PVC.
rdeodhar Apr 19, 2023
9cac6be
Formatting change.
rdeodhar Apr 19, 2023
e0bcee0
Merge branch 'sycl' of https://github.com/intel/llvm into cmdlist10
rdeodhar Apr 19, 2023
571d30a
Rearrange device init code to allow PVC check.
rdeodhar Apr 20, 2023
1887d02
Merge branch 'sycl' of https://github.com/intel/llvm into cmdlist10
rdeodhar Apr 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6042,7 +6042,7 @@ pi_result _pi_queue::synchronize() {
// Otherwise sync all L0 queues/immediate command-lists.
for (auto &QueueMap : {ComputeQueueGroupsByTID, CopyQueueGroupsByTID}) {
for (auto &QueueGroup : QueueMap) {
if (Device->ImmCommandListUsed) {
if (UsingImmCmdLists) {
for (auto ImmCmdList : QueueGroup.second.ImmCmdLists)
syncImmCmdList(this, ImmCmdList);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,7 @@ getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) {
// used.
if (!EnvVar) {
if (Device->useImmediateCommandLists())
return std::pair<int, int>(-1, -1); // No copy engines can be used.
return std::pair<int, int>(0, 0); // Only main copy engine will be used.
return std::pair<int, int>(0, INT_MAX); // All copy engines will be used.
}
std::string CopyEngineRange = EnvVar;
Expand Down Expand Up @@ -1089,8 +1089,13 @@ _ur_device_handle_t::useImmediateCommandLists() {
}();

if (ImmediateCommandlistsSetting == -1)
// Change this to PerQueue as default after more testing.
// Immediate command lists will be used by default only on Linux PVC.
#ifdef _WIN32
return NotUsed;
#else
return isPVC() ? PerQueue : NotUsed;
#endif

switch (ImmediateCommandlistsSetting) {
case 0:
return NotUsed;
Expand Down Expand Up @@ -1128,75 +1133,6 @@ static const EventsScope DeviceEventsSetting = [] {

ur_result_t _ur_device_handle_t::initialize(int SubSubDeviceOrdinal,
int SubSubDeviceIndex) {
uint32_t numQueueGroups = 0;
ZE_CALL(zeDeviceGetCommandQueueGroupProperties,
(ZeDevice, &numQueueGroups, nullptr));
if (numQueueGroups == 0) {
return UR_RESULT_ERROR_UNKNOWN;
}
urPrint("NOTE: Number of queue groups = %d\n", numQueueGroups);
std::vector<ZeStruct<ze_command_queue_group_properties_t>>
QueueGroupProperties(numQueueGroups);
ZE_CALL(zeDeviceGetCommandQueueGroupProperties,
(ZeDevice, &numQueueGroups, QueueGroupProperties.data()));

// Initialize ordinal and compute queue group properties
for (uint32_t i = 0; i < numQueueGroups; i++) {
if (QueueGroupProperties[i].flags &
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeOrdinal =
i;
QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute]
.ZeProperties = QueueGroupProperties[i];
break;
}
}

// Reinitialize a sub-sub-device with its own ordinal, index.
// Our sub-sub-device representation is currently [Level-Zero sub-device
// handle + Level-Zero compute group/engine index]. Only the specified
// index queue will be used to submit work to the sub-sub-device.
if (SubSubDeviceOrdinal >= 0) {
QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeOrdinal =
SubSubDeviceOrdinal;
QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeIndex =
SubSubDeviceIndex;
} else { // Proceed with initialization for root and sub-device
// How is it possible that there are no "compute" capabilities?
if (QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeOrdinal <
0) {
return UR_RESULT_ERROR_UNKNOWN;
}

if (CopyEngineRequested((ur_device_handle_t)this)) {
for (uint32_t i = 0; i < numQueueGroups; i++) {
if (((QueueGroupProperties[i].flags &
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) &&
(QueueGroupProperties[i].flags &
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY)) {
if (QueueGroupProperties[i].numQueues == 1) {
QueueGroup[queue_group_info_t::MainCopy].ZeOrdinal = i;
QueueGroup[queue_group_info_t::MainCopy].ZeProperties =
QueueGroupProperties[i];
} else {
QueueGroup[queue_group_info_t::LinkCopy].ZeOrdinal = i;
QueueGroup[queue_group_info_t::LinkCopy].ZeProperties =
QueueGroupProperties[i];
break;
}
}
}
if (QueueGroup[queue_group_info_t::MainCopy].ZeOrdinal < 0)
urPrint("NOTE: main blitter/copy engine is not available\n");
else
urPrint("NOTE: main blitter/copy engine is available\n");

if (QueueGroup[queue_group_info_t::LinkCopy].ZeOrdinal < 0)
urPrint("NOTE: link blitter/copy engines are not available\n");
else
urPrint("NOTE: link blitter/copy engines are available\n");
}
}

// Maintain various device properties cache.
// Note that we just describe here how to compute the data.
Expand Down Expand Up @@ -1269,6 +1205,76 @@ ur_result_t _ur_device_handle_t::initialize(int SubSubDeviceOrdinal,
ZeEventsScope = DeviceEventsSetting;
}

uint32_t numQueueGroups = 0;
ZE_CALL(zeDeviceGetCommandQueueGroupProperties,
(ZeDevice, &numQueueGroups, nullptr));
if (numQueueGroups == 0) {
return UR_RESULT_ERROR_UNKNOWN;
}
urPrint("NOTE: Number of queue groups = %d\n", numQueueGroups);
std::vector<ZeStruct<ze_command_queue_group_properties_t>>
QueueGroupProperties(numQueueGroups);
ZE_CALL(zeDeviceGetCommandQueueGroupProperties,
(ZeDevice, &numQueueGroups, QueueGroupProperties.data()));

// Initialize ordinal and compute queue group properties
for (uint32_t i = 0; i < numQueueGroups; i++) {
if (QueueGroupProperties[i].flags &
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeOrdinal =
i;
QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute]
.ZeProperties = QueueGroupProperties[i];
break;
}
}

// Reinitialize a sub-sub-device with its own ordinal, index.
// Our sub-sub-device representation is currently [Level-Zero sub-device
// handle + Level-Zero compute group/engine index]. Only the specified
// index queue will be used to submit work to the sub-sub-device.
if (SubSubDeviceOrdinal >= 0) {
QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeOrdinal =
SubSubDeviceOrdinal;
QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeIndex =
SubSubDeviceIndex;
} else { // Proceed with initialization for root and sub-device
// How is it possible that there are no "compute" capabilities?
if (QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeOrdinal <
0) {
return UR_RESULT_ERROR_UNKNOWN;
}

if (CopyEngineRequested((ur_device_handle_t)this)) {
for (uint32_t i = 0; i < numQueueGroups; i++) {
if (((QueueGroupProperties[i].flags &
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) &&
(QueueGroupProperties[i].flags &
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY)) {
if (QueueGroupProperties[i].numQueues == 1) {
QueueGroup[queue_group_info_t::MainCopy].ZeOrdinal = i;
QueueGroup[queue_group_info_t::MainCopy].ZeProperties =
QueueGroupProperties[i];
} else {
QueueGroup[queue_group_info_t::LinkCopy].ZeOrdinal = i;
QueueGroup[queue_group_info_t::LinkCopy].ZeProperties =
QueueGroupProperties[i];
break;
}
}
}
if (QueueGroup[queue_group_info_t::MainCopy].ZeOrdinal < 0)
urPrint("NOTE: main blitter/copy engine is not available\n");
else
urPrint("NOTE: main blitter/copy engine is available\n");

if (QueueGroup[queue_group_info_t::LinkCopy].ZeOrdinal < 0)
urPrint("NOTE: link blitter/copy engines are not available\n");
else
urPrint("NOTE: link blitter/copy engines are available\n");
}
}

return UR_RESULT_SUCCESS;
}

Expand Down
8 changes: 4 additions & 4 deletions sycl/test-e2e/DiscardEvents/discard_events_l0_inorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
//
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=0 ONEAPI_DEVICE_SELECTOR="level_zero:*" %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 ONEAPI_DEVICE_SELECTOR="level_zero:*" %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=2 ONEAPI_DEVICE_SELECTOR="level_zero:*" %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 ONEAPI_DEVICE_SELECTOR="level_zero:*" %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_BATCH_SIZE=0 ONEAPI_DEVICE_SELECTOR="level_zero:*" %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_BATCH_SIZE=1 ONEAPI_DEVICE_SELECTOR="level_zero:*" %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_BATCH_SIZE=2 ONEAPI_DEVICE_SELECTOR="level_zero:*" %GPU_RUN_PLACEHOLDER %t.out
// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_BATCH_SIZE=3 ONEAPI_DEVICE_SELECTOR="level_zero:*" %GPU_RUN_PLACEHOLDER %t.out
Comment on lines +5 to +8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix we discussed offline is merged: 88e7c55

Does this test still fail?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, but because the test depends on batching behavior which is only applicable with standard command lists, we have to explicitly set immediate command lists off.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see, thanks a lot!

//
// The test is to check the execution of different queue operations has in-order
// semantics regardless of batching.
Expand Down
4 changes: 2 additions & 2 deletions sycl/test-e2e/DiscardEvents/discard_events_l0_leak.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
//
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 ONEAPI_DEVICE_SELECTOR='level_zero:*' ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out wait 2>&1 %GPU_CHECK_PLACEHOLDER
// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 ONEAPI_DEVICE_SELECTOR='level_zero:*' ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out nowait 2>&1 %GPU_CHECK_PLACEHOLDER
// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 ONEAPI_DEVICE_SELECTOR='level_zero:*' ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out wait 2>&1 %GPU_CHECK_PLACEHOLDER
// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 ONEAPI_DEVICE_SELECTOR='level_zero:*' ZE_DEBUG=4 %GPU_RUN_PLACEHOLDER %t.out nowait 2>&1 %GPU_CHECK_PLACEHOLDER
//
// CHECK-NOT: LEAK
//
Expand Down
7 changes: 4 additions & 3 deletions sycl/test-e2e/Plugin/level_zero_imm_cmdlist.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
// REQUIRES: level_zero, level_zero_dev_kit
// REQUIRES: linux, gpu-intel-pvc, level_zero, level_zero_dev_kit
//
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %level_zero_options %s -o %t.out
// RUN: env ONEAPI_DEVICE_SELECTOR="level_zero:*" ZE_DEBUG=1 SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER
// RUN: env ONEAPI_DEVICE_SELECTOR="level_zero:*" ZE_DEBUG=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER
//
// CHECK-NOT: zeCommandListCreate(
// CHECK: zeCommandListCreateImmediate(

// The test checks that immediate commandlists are used and not regular ones.
// This test checks that immediate commandlists are used and not regular ones on
// PVC Linux.

#include <sycl/sycl.hpp>

Expand Down