Skip to content
13 changes: 2 additions & 11 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,10 @@
// 10.13 Added new PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS queue property.
// 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for
// piDeviceGetInfo.
// 10.15 Add new PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION queue property
// 10.16 Add command-buffer extension methods
// 10.15 Add command-buffer extension methods

#define _PI_H_VERSION_MAJOR 10
#define _PI_H_VERSION_MINOR 16
#define _PI_H_VERSION_MINOR 15

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -572,14 +571,6 @@ constexpr pi_queue_properties PI_QUEUE_PROFILING_ENABLE = (1 << 1);
constexpr pi_queue_properties PI_QUEUE_ON_DEVICE = (1 << 2);
constexpr pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT = (1 << 3);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS = (1 << 4);
// In a lazy queue, enqueued commands are not submitted for execution
// immediately, instead they are submitted for execution once the queue is
// flushed.
//
// This is to enable prototyping of the SYCL_EXT_ONEAPI_GRAPH extension,
// before a native command-list interface in PI can be designed and
// implemented.
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION = (1 << 5);

using pi_result = _pi_result;
using pi_platform_info = _pi_platform_info;
Expand Down
11 changes: 7 additions & 4 deletions sycl/include/sycl/ext/oneapi/experimental/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace experimental {
namespace detail {
struct node_impl;
struct graph_impl;
class exec_graph_impl;

} // namespace detail

Expand Down Expand Up @@ -125,18 +126,20 @@ template <> class __SYCL_EXPORT command_graph<graph_state::executable> {
public:
command_graph() = delete;

command_graph(const std::shared_ptr<detail::graph_impl> &g,
const sycl::context &ctx)
: MTag(rand()), MCtx(ctx), impl(g) {}
command_graph(const std::shared_ptr<detail::graph_impl> &Graph,
const sycl::context &Ctx);

private:
template <class Obj>
friend decltype(Obj::impl)
sycl::detail::getSyclObjImpl(const Obj &SyclObject);

// Creates a backend representation of the graph in impl
void finalize_impl();

int MTag;
const sycl::context &MCtx;
std::shared_ptr<detail::graph_impl> impl;
std::shared_ptr<detail::exec_graph_impl> impl;
};
} // namespace experimental
} // namespace oneapi
Expand Down
8 changes: 0 additions & 8 deletions sycl/include/sycl/properties/queue_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ namespace property {
namespace queue {
class discard_events
: public ::sycl::detail::DataLessProperty<::sycl::detail::DiscardEvents> {};
class lazy_execution
: public ::sycl::detail::DataLessProperty<::sycl::detail::LazyExecution> {};
} // namespace queue
} // namespace property

Expand Down Expand Up @@ -67,9 +65,6 @@ template <>
struct is_property<ext::oneapi::property::queue::discard_events>
: std::true_type {};
template <>
struct is_property<ext::oneapi::property::queue::lazy_execution>
: std::true_type {};
template <>
struct is_property<property::queue::cuda::use_default_stream> : std::true_type {
};
template <>
Expand All @@ -85,9 +80,6 @@ template <>
struct is_property_of<ext::oneapi::property::queue::discard_events, queue>
: std::true_type {};
template <>
struct is_property_of<ext::oneapi::property::queue::lazy_execution, queue>
: std::true_type {};
template <>
struct is_property_of<property::queue::cuda::use_default_stream, queue>
: std::true_type {};
template <>
Expand Down
65 changes: 1 addition & 64 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1307,49 +1307,6 @@ pi_result resetCommandLists(pi_queue Queue) {
pi_result _pi_context::getAvailableCommandList(
pi_queue Queue, pi_command_list_ptr_t &CommandList, bool UseCopyEngine,
bool AllowBatching, ze_command_queue_handle_t *ForcedCmdQueue) {

#if SYCL_EXT_ONEAPI_GRAPH
// This is a hack. TODO: Proper CommandList allocation per Executable Graph.
if( Queue->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION ) {
// TODO: Create new Command List.
if(Queue->LazyCommandListMap.empty()) {
const bool UseCopyEngine = false;
// Adding createCommandList() to LazyCommandListMap
ze_fence_handle_t ZeFence;
ZeStruct<ze_fence_desc_t> ZeFenceDesc;
ze_command_list_handle_t ZeCommandList;

uint32_t QueueGroupOrdinal;
auto &QGroup = Queue->getQueueGroup(UseCopyEngine);
auto &ZeCommandQueue =
// ForcedCmdQueue ? *ForcedCmdQueue :
QGroup.getZeQueue(&QueueGroupOrdinal);
// if (ForcedCmdQueue)
// QueueGroupOrdinal = QGroup.getCmdQueueOrdinal(ZeCommandQueue);

ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;

ZE_CALL(zeCommandListCreate,
(Queue->Context->ZeContext, Queue->Device->ZeDevice,
&ZeCommandListDesc, &ZeCommandList));

ZE_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
std::tie(CommandList, std::ignore) = Queue->LazyCommandListMap.insert(
std::pair<ze_command_list_handle_t, pi_command_list_info_t>(
ZeCommandList,
{ZeFence, false, ZeCommandQueue, QueueGroupOrdinal}));

Queue->insertActiveBarriers(CommandList, UseCopyEngine);
//
CommandList->second.ZeFenceInUse = true;
} else {
CommandList = Queue->LazyCommandListMap.begin();
}
return PI_SUCCESS;
}
#endif

// Immediate commandlists have been pre-allocated and are always available.
if (Queue->Device->useImmediateCommandLists()) {
CommandList = Queue->getQueueGroup(UseCopyEngine).getImmCmdList();
Expand Down Expand Up @@ -1588,13 +1545,6 @@ void _pi_queue::CaptureIndirectAccesses() {
pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
bool IsBlocking,
bool OKToBatchCommand) {
// When executing a Graph, defer execution if this is a command
// which could be batched (i.e. likely a kernel submission)
#if SYCL_EXT_ONEAPI_GRAPH
if (this->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION && OKToBatchCommand)
return PI_SUCCESS;
#endif

bool UseCopyEngine = CommandList->second.isCopy(this);

// If the current LastCommandEvent is the nullptr, then it means
Expand Down Expand Up @@ -3560,8 +3510,7 @@ pi_result piQueueCreate(pi_context Context, pi_device Device,
PI_ASSERT(!(Properties & ~(PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_ON_DEVICE |
PI_QUEUE_ON_DEVICE_DEFAULT |
PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS |
PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION)),
PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS)),
PI_ERROR_INVALID_VALUE);

PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT);
Expand Down Expand Up @@ -3835,18 +3784,6 @@ pi_result piQueueFinish(pi_queue Queue) {
// Flushing cross-queue dependencies is covered by createAndRetainPiZeEventList,
// so this can be left as a no-op.
pi_result piQueueFlush(pi_queue Queue) {
#if SYCL_EXT_ONEAPI_GRAPH
if( Queue->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION ) {

pi_command_list_ptr_t CommandList{};
// TODO:
CommandList = Queue->LazyCommandListMap.begin();

Queue->executeCommandList(CommandList, false, false);
}
#else
(void)Queue;
#endif
return PI_SUCCESS;
}

Expand Down
Loading