diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def
index 59dab0c4721a1..41e0839eaaa4c 100644
--- a/sycl/include/CL/sycl/detail/pi.def
+++ b/sycl/include/CL/sycl/detail/pi.def
@@ -103,6 +103,7 @@ _PI_API(piSamplerGetInfo)
 _PI_API(piSamplerRetain)
 _PI_API(piSamplerRelease)
 // Queue commands
+_PI_API(piKernelLaunch)
 _PI_API(piEnqueueKernelLaunch)
 _PI_API(piEnqueueNativeKernel)
 _PI_API(piEnqueueEventsWait)
diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h
index 0e15200d19a6b..5a5ec40e2dc71 100644
--- a/sycl/include/CL/sycl/detail/pi.h
+++ b/sycl/include/CL/sycl/detail/pi.h
@@ -626,6 +626,7 @@ constexpr pi_queue_properties PI_QUEUE_PROFILING_ENABLE =
 constexpr pi_queue_properties PI_QUEUE_ON_DEVICE = CL_QUEUE_ON_DEVICE;
 constexpr pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT =
     CL_QUEUE_ON_DEVICE_DEFAULT;
+constexpr pi_queue_properties PI_QUEUE_LAZY_EXECUTION = 1 << 10;
 
 using pi_result = _pi_result;
 using pi_platform_info = _pi_platform_info;
@@ -1488,6 +1489,8 @@ __SYCL_EXPORT pi_result piSamplerRelease(pi_sampler sampler);
 //
 // Queue Commands
 //
+__SYCL_EXPORT pi_result piKernelLaunch(pi_queue queue);
+
 __SYCL_EXPORT pi_result piEnqueueKernelLaunch(
     pi_queue queue, pi_kernel kernel, pi_uint32 work_dim,
     const size_t *global_work_offset, const size_t *global_work_size,
diff --git a/sycl/include/CL/sycl/detail/property_helper.hpp b/sycl/include/CL/sycl/detail/property_helper.hpp
index e4e8eea1007a4..7cd346f8f4455 100644
--- a/sycl/include/CL/sycl/detail/property_helper.hpp
+++ b/sycl/include/CL/sycl/detail/property_helper.hpp
@@ -35,8 +35,9 @@ enum DataLessPropKind {
   UseDefaultStream = 8,
   DiscardEvents = 9,
   DeviceReadOnly = 10,
+  LazyExecution = 11,
   // Indicates the last known dataless property.
-  LastKnownDataLessPropKind = 10,
+  LastKnownDataLessPropKind = 11,
   // Exceeding 32 may cause ABI breaking change on some of OSes.
   DataLessPropKindSize = 32
 };
diff --git a/sycl/include/CL/sycl/feature_test.hpp.in b/sycl/include/CL/sycl/feature_test.hpp.in
index 94e0447a8e0e6..eb80556ed2c9f 100644
--- a/sycl/include/CL/sycl/feature_test.hpp.in
+++ b/sycl/include/CL/sycl/feature_test.hpp.in
@@ -47,6 +47,7 @@ namespace sycl {
 #define SYCL_EXT_ONEAPI_FREE_FUNCTION_QUERIES 1
 #define SYCL_EXT_ONEAPI_GROUP_ALGORITHMS 1
 #define SYCL_EXT_ONEAPI_GROUP_SORT 1
+#define SYCL_EXT_ONEAPI_LAZY_QUEUE 1
 #define SYCL_EXT_ONEAPI_MAX_WORK_GROUP_QUERY 1
 #define SYCL_EXT_ONEAPI_ND_RANGE_REDUCTIONS 1
 #define SYCL_EXT_ONEAPI_DEFAULT_CONTEXT 1
diff --git a/sycl/include/CL/sycl/properties/queue_properties.hpp b/sycl/include/CL/sycl/properties/queue_properties.hpp
index 76a3bfaea9373..c9fb6e88d890c 100644
--- a/sycl/include/CL/sycl/properties/queue_properties.hpp
+++ b/sycl/include/CL/sycl/properties/queue_properties.hpp
@@ -28,6 +28,8 @@ namespace property {
 namespace queue {
 class discard_events : public ::cl::sycl::detail::DataLessProperty<
                            ::cl::sycl::detail::DiscardEvents> {};
+class lazy_execution : public ::cl::sycl::detail::DataLessProperty<
+                           ::cl::sycl::detail::LazyExecution> {};
 } // namespace queue
 } // namespace property
 
@@ -63,6 +65,9 @@ template <>
 struct is_property<ext::oneapi::property::queue::discard_events>
     : std::true_type {};
 template <>
+struct is_property<ext::oneapi::property::queue::lazy_execution>
+    : std::true_type {};
+template <>
 struct is_property<property::queue::cuda::use_default_stream> : std::true_type {
 };
 template <>
@@ -78,6 +83,9 @@ template <>
 struct is_property_of<ext::oneapi::property::queue::discard_events, queue>
     : std::true_type {};
 template <>
+struct is_property_of<ext::oneapi::property::queue::lazy_execution, queue>
+    : std::true_type {};
+template <>
 struct is_property_of<property::queue::cuda::use_default_stream, queue>
     : std::true_type {};
 template <>
diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp
new file mode 100644
index 0000000000000..304a928f4e7e4
--- /dev/null
+++ b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp
@@ -0,0 +1,220 @@
+//==--------- graph.hpp --- SYCL graph extension ---------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <CL/sycl/detail/defines_elementary.hpp>
+
+#include <list>
+#include <set>
+
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+namespace ext {
+namespace oneapi {
+namespace experimental {
+namespace detail {
+
+struct node_impl;
+
+struct graph_impl;
+
+using node_ptr = std::shared_ptr<node_impl>;
+
+using graph_ptr = std::shared_ptr<graph_impl>;
+
+class wrapper {
+  using T = std::function<void(sycl::handler &)>;
+  T my_func;
+  std::vector<sycl::event> my_deps;
+
+public:
+  wrapper(T t, const std::vector<sycl::event> &deps)
+      : my_func(t), my_deps(deps){};
+
+  void operator()(sycl::handler &cgh) {
+    cgh.depends_on(my_deps);
+    std::invoke(my_func, cgh);
+  }
+};
+
+struct node_impl {
+  bool is_scheduled;
+
+  graph_ptr my_graph;
+  sycl::event my_event;
+
+  std::vector<node_ptr> my_successors;
+  std::vector<node_ptr> my_predecessors;
+
+  std::function<void(sycl::handler &)> my_body;
+
+  void exec(sycl::queue q) {
+    std::vector<sycl::event> __deps;
+    for (auto i : my_predecessors)
+      __deps.push_back(i->get_event());
+    my_event = q.submit(wrapper{my_body, __deps});
+  }
+
+  void register_successor(node_ptr n) {
+    my_successors.push_back(n);
+    n->register_predecessor(node_ptr(this));
+  }
+
+  void register_predecessor(node_ptr n) { my_predecessors.push_back(n); }
+
+  sycl::event get_event(void) { return my_event; }
+
+  template <typename T>
+  node_impl(graph_ptr g, T cgf)
+      : is_scheduled(false), my_graph(g), my_body(cgf) {}
+
+  // Recursively adding nodes to execution stack:
+  void topology_sort(std::list<node_ptr> &schedule) {
+    is_scheduled = true;
+    for (auto i : my_successors) {
+      if (!i->is_scheduled)
+        i->topology_sort(schedule);
+    }
+    schedule.push_front(node_ptr(this));
+  }
+};
+
+struct graph_impl {
+  std::set<node_ptr> my_roots;
+  std::list<node_ptr> my_schedule;
+
+  graph_ptr parent;
+
+  void exec(sycl::queue q) {
+    if (my_schedule.empty()) {
+      for (auto n : my_roots) {
+        n->topology_sort(my_schedule);
+      }
+    }
+    for (auto n : my_schedule)
+      n->exec(q);
+  }
+
+  void exec_and_wait(sycl::queue q) {
+    exec(q);
+    q.wait();
+  }
+
+  void add_root(node_ptr n) {
+    my_roots.insert(n);
+    for (auto n : my_schedule)
+      n->is_scheduled = false;
+    my_schedule.clear();
+  }
+
+  void remove_root(node_ptr n) {
+    my_roots.erase(n);
+    for (auto n : my_schedule)
+      n->is_scheduled = false;
+    my_schedule.clear();
+  }
+
+  graph_impl() {}
+};
+
+} // namespace detail
+
+struct node {
+  // TODO: add properties to distinguish between empty, host, device nodes.
+  detail::node_ptr my_node;
+  detail::graph_ptr my_graph;
+
+  template <typename T>
+  node(detail::graph_ptr g, T cgf)
+      : my_graph(g), my_node(new detail::node_impl(g, cgf)){};
+  void register_successor(node n) { my_node->register_successor(n.my_node); }
+  void exec(sycl::queue q, sycl::event = sycl::event()) { my_node->exec(q); }
+
+  void set_root() { my_graph->add_root(my_node); }
+
+  // TODO: Add query functions: is_root, ...
+};
+
+enum class graph_state{
+  modifiable,
+  executable
+};
+
+template<graph_state State=graph_state::modifiable>
+class command_graph {
+public:
+  // Adding empty node with [0..n] predecessors:
+  node add(const std::vector<node> &dep = {});
+
+  // Adding device node:
+  template <typename T>
+  node add(T cgf, const std::vector<node> &dep = {});
+
+  // Adding dependency between two nodes.
+  void make_edge(node sender, node receiver);
+
+  // TODO: Extend queue to directly submit graph
+  void exec_and_wait(sycl::queue q);
+
+  command_graph<graph_state::executable> finalize(const sycl::context &syclContext) const;
+
+  command_graph() : my_graph(new detail::graph_impl()) {}
+
+private:
+  detail::graph_ptr my_graph;
+};
+
+template<>
+class command_graph<graph_state::executable>{
+public:
+  int my_tag;
+  const sycl::context& my_ctx;
+
+  void exec_and_wait(sycl::queue q);
+
+  command_graph() = delete;
+
+  command_graph(detail::graph_ptr g, const sycl::context& ctx)
+      : my_graph(g) , my_ctx(ctx), my_tag(rand()) {}
+
+private:
+  detail::graph_ptr my_graph;
+};
+
+template<> template<typename T>
+node command_graph<graph_state::modifiable>::add(T cgf, const std::vector<node> &dep) {
+  node _node(my_graph, cgf);
+  if (!dep.empty()) {
+    for (auto n : dep)
+      this->make_edge(n, _node);
+  } else {
+    _node.set_root();
+  }
+  return _node;
+}
+
+template<>
+void command_graph<graph_state::modifiable>::make_edge(node sender, node receiver) {
+  sender.register_successor(receiver);     // register successor
+  my_graph->remove_root(receiver.my_node); // remove receiver from root node
+                                           // list
+}
+
+template<>
+command_graph<graph_state::executable> command_graph<graph_state::modifiable>::finalize(const sycl::context &ctx) const {
+  return command_graph<graph_state::executable>{ this->my_graph, ctx };
+}
+
+void command_graph<graph_state::executable>::exec_and_wait(sycl::queue q) { my_graph->exec_and_wait(q); };
+
+} // namespace experimental
+} // namespace oneapi
+} // namespace ext
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp
index 09de4b0bdaf44..058437e0285c2 100644
--- a/sycl/plugins/level_zero/pi_level_zero.cpp
+++ b/sycl/plugins/level_zero/pi_level_zero.cpp
@@ -872,6 +872,11 @@ bool _pi_queue::isInOrderQueue() const {
   return ((this->Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0);
 }
 
+bool _pi_queue::isEagerExec() const {
+  // If lazy exec queue property is not set, then it's an eager queue.
+  return ((this->Properties & PI_QUEUE_LAZY_EXECUTION) == 0);
+}
+
 pi_result _pi_queue::resetCommandList(pi_command_list_ptr_t CommandList,
                                       bool MakeAvailable) {
   bool UseCopyEngine = CommandList->second.isCopy(this);
@@ -1135,137 +1140,142 @@ _pi_queue::_pi_queue(std::vector<ze_command_queue_handle_t> &ComputeQueues,
 }
 
 // Retrieve an available command list to be used in a PI call.
-pi_result
-_pi_context::getAvailableCommandList(pi_queue Queue,
-                                     pi_command_list_ptr_t &CommandList,
-                                     bool UseCopyEngine, bool AllowBatching) {
-  // Immediate commandlists have been pre-allocated and are always available.
-  if (UseImmediateCommandLists) {
-    CommandList = Queue->getQueueGroup(UseCopyEngine).getImmCmdList();
-    return PI_SUCCESS;
-  }
-
-  auto &CommandBatch =
-      UseCopyEngine ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch;
-  // Handle batching of commands
-  // First see if there is an command-list open for batching commands
-  // for this queue.
-  if (Queue->hasOpenCommandList(UseCopyEngine)) {
-    if (AllowBatching) {
-      CommandList = CommandBatch.OpenCommandList;
+pi_result _pi_context::getAvailableCommandList(
+    pi_queue Queue, pi_command_list_ptr_t &CommandList, bool UseCopyEngine,
+    bool AllowBatching, bool Graph) {
+  // TODO: Do proper CommandList allocation. This is a hack!
+  if (!Graph) {
+    // Immediate commandlists have been pre-allocated and are always available.
+    if (UseImmediateCommandLists) {
+      CommandList = Queue->getQueueGroup(UseCopyEngine).getImmCmdList();
       return PI_SUCCESS;
     }
-    // If this command isn't allowed to be batched, then we need to
-    // go ahead and execute what is already in the batched list,
-    // and then go on to process this. On exit from executeOpenCommandList
-    // OpenCommandList will be invalidated.
-    if (auto Res = Queue->executeOpenCommandList(UseCopyEngine))
-      return Res;
-  }
 
-  // Create/Reuse the command list, because in Level Zero commands are added to
-  // the command lists, and later are then added to the command queue.
-  // Each command list is paired with an associated fence to track when the
-  // command list is available for reuse.
-  _pi_result pi_result = PI_OUT_OF_RESOURCES;
-  ZeStruct<ze_fence_desc_t> ZeFenceDesc;
-  // Initally, we need to check if a command list has already been created
-  // on this device that is available for use. If so, then reuse that
-  // Level-Zero Command List and Fence for this PI call.
-  {
-    // Make sure to acquire the lock before checking the size, or there
-    // will be a race condition.
-    std::lock_guard<std::mutex> lock(Queue->Context->ZeCommandListCacheMutex);
-    // Under mutex since operator[] does insertion on the first usage for every
-    // unique ZeDevice.
-    auto &ZeCommandListCache =
-        UseCopyEngine
-            ? Queue->Context->ZeCopyCommandListCache[Queue->Device->ZeDevice]
-            : Queue->Context
-                  ->ZeComputeCommandListCache[Queue->Device->ZeDevice];
-
-    if (ZeCommandListCache.size() > 0) {
-      auto &ZeCommandList = ZeCommandListCache.front();
-      auto it = Queue->CommandListMap.find(ZeCommandList);
-      if (it != Queue->CommandListMap.end()) {
-        CommandList = it;
-        CommandList->second.InUse = true;
-      } else {
-        // If there is a command list available on this context, but it
-        // wasn't yet used in this queue then create a new entry in this
-        // queue's map to hold the fence and other associated command
-        // list information.
-        uint32_t QueueGroupOrdinal;
-        auto &ZeCommandQueue =
-            Queue->getQueueGroup(UseCopyEngine).getZeQueue(&QueueGroupOrdinal);
-
-        ze_fence_handle_t ZeFence;
-        ZE_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
-        CommandList =
-            Queue->CommandListMap
-                .emplace(ZeCommandList,
-                         pi_command_list_info_t{ZeFence, true, ZeCommandQueue,
-                                                QueueGroupOrdinal})
-                .first;
+    auto &CommandBatch =
+        UseCopyEngine ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch;
+    // Handle batching of commands
+    // First see if there is an command-list open for batching commands
+    // for this queue.
+    if (Queue->hasOpenCommandList(UseCopyEngine)) {
+      if (AllowBatching) {
+        CommandList = CommandBatch.OpenCommandList;
+        return PI_SUCCESS;
       }
-      ZeCommandListCache.pop_front();
-      return PI_SUCCESS;
+      // If this command isn't allowed to be batched, then we need to
+      // go ahead and execute what is already in the batched list,
+      // and then go on to process this. On exit from executeOpenCommandList
+      // OpenCommandList will be invalidated.
+      if (auto Res = Queue->executeOpenCommandList(UseCopyEngine))
+        return Res;
     }
-  }
-
-  // If there are no available command lists in the cache, then we check for
-  // command lists that have already signalled, but have not been added to the
-  // available list yet. Each command list has a fence associated which tracks
-  // if a command list has completed dispatch of its commands and is ready for
-  // reuse. If a command list is found to have been signalled, then the
-  // command list & fence are reset and we return.
-  for (auto it = Queue->CommandListMap.begin();
-       it != Queue->CommandListMap.end(); ++it) {
-    // Make sure this is the command list type needed.
-    if (UseCopyEngine != it->second.isCopy(Queue))
-      continue;
 
-    ze_result_t ZeResult =
-        ZE_CALL_NOCHECK(zeFenceQueryStatus, (it->second.ZeFence));
-    if (ZeResult == ZE_RESULT_SUCCESS) {
-      Queue->resetCommandList(it, false);
-      CommandList = it;
-      CommandList->second.InUse = true;
-      return PI_SUCCESS;
+    // Create/Reuse the command list, because in Level Zero commands are added
+    // to the command lists, and later are then added to the command queue. Each
+    // command list is paired with an associated fence to track when the command
+    // list is available for reuse.
+    _pi_result pi_result = PI_OUT_OF_RESOURCES;
+    ZeStruct<ze_fence_desc_t> ZeFenceDesc;
+    // Initally, we need to check if a command list has already been created
+    // on this device that is available for use. If so, then reuse that
+    // Level-Zero Command List and Fence for this PI call.
+    {
+      // Make sure to acquire the lock before checking the size, or there
+      // will be a race condition.
+      std::lock_guard<std::mutex> lock(Queue->Context->ZeCommandListCacheMutex);
+      // Under mutex since operator[] does insertion on the first usage for
+      // every unique ZeDevice.
+      auto &ZeCommandListCache =
+          UseCopyEngine
+              ? Queue->Context->ZeCopyCommandListCache[Queue->Device->ZeDevice]
+              : Queue->Context
+                    ->ZeComputeCommandListCache[Queue->Device->ZeDevice];
+
+      if (ZeCommandListCache.size() > 0) {
+        auto &ZeCommandList = ZeCommandListCache.front();
+        auto it = Queue->CommandListMap.find(ZeCommandList);
+        if (it != Queue->CommandListMap.end()) {
+          CommandList = it;
+          CommandList->second.InUse = true;
+        } else {
+          // If there is a command list available on this context, but it
+          // wasn't yet used in this queue then create a new entry in this
+          // queue's map to hold the fence and other associated command
+          // list information.
+          uint32_t QueueGroupOrdinal;
+          auto &ZeCommandQueue = Queue->getQueueGroup(UseCopyEngine)
+                                     .getZeQueue(&QueueGroupOrdinal);
+
+          ze_fence_handle_t ZeFence;
+          ZE_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
+          CommandList =
+              Queue->CommandListMap
+                  .emplace(ZeCommandList,
+                           pi_command_list_info_t{ZeFence, true, ZeCommandQueue,
+                                                  QueueGroupOrdinal})
+                  .first;
+        }
+        ZeCommandListCache.pop_front();
+        return PI_SUCCESS;
+      }
     }
-  }
 
-  // If there are no available command lists nor signalled command lists, then
-  // we must create another command list if we have not exceed the maximum
-  // command lists we can create.
-  // Once created, this command list & fence are added to the command list fence
-  // map.
-  if (Queue->Device->Platform->ZeGlobalCommandListCount <
-      ZeMaxCommandListCacheSize) {
-    ze_command_list_handle_t ZeCommandList;
-    ze_fence_handle_t ZeFence;
-
-    uint32_t QueueGroupOrdinal;
-    auto &ZeCommandQueue =
-        Queue->getQueueGroup(UseCopyEngine).getZeQueue(&QueueGroupOrdinal);
-
-    ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
-    ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;
-
-    ZE_CALL(zeCommandListCreate,
-            (Queue->Context->ZeContext, Queue->Device->ZeDevice,
-             &ZeCommandListDesc, &ZeCommandList));
-    // Increments the total number of command lists created on this platform.
-    Queue->Device->Platform->ZeGlobalCommandListCount++;
+    // If there are no available command lists in the cache, then we check for
+    // command lists that have already signalled, but have not been added to the
+    // available list yet. Each command list has a fence associated which tracks
+    // if a command list has completed dispatch of its commands and is ready for
+    // reuse. If a command list is found to have been signalled, then the
+    // command list & fence are reset and we return.
+    for (auto it = Queue->CommandListMap.begin();
+         it != Queue->CommandListMap.end(); ++it) {
+      // Make sure this is the command list type needed.
+      if (UseCopyEngine != it->second.isCopy(Queue))
+        continue;
+
+      ze_result_t ZeResult =
+          ZE_CALL_NOCHECK(zeFenceQueryStatus, (it->second.ZeFence));
+      if (ZeResult == ZE_RESULT_SUCCESS) {
+        Queue->resetCommandList(it, false);
+        CommandList = it;
+        CommandList->second.InUse = true;
+        return PI_SUCCESS;
+      }
+    }
 
-    ZE_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
-    std::tie(CommandList, std::ignore) = Queue->CommandListMap.insert(
-        std::pair<ze_command_list_handle_t, pi_command_list_info_t>(
-            ZeCommandList, {ZeFence, true, ZeCommandQueue, QueueGroupOrdinal}));
-    pi_result = PI_SUCCESS;
+    // If there are no available command lists nor signalled command lists, then
+    // we must create another command list if we have not exceed the maximum
+    // command lists we can create.
+    // Once created, this command list & fence are added to the command list
+    // fence map.
+    if (Queue->Device->Platform->ZeGlobalCommandListCount <
+        ZeMaxCommandListCacheSize) {
+      ze_command_list_handle_t ZeCommandList;
+      ze_fence_handle_t ZeFence;
+
+      uint32_t QueueGroupOrdinal;
+      auto &ZeCommandQueue =
+          Queue->getQueueGroup(UseCopyEngine).getZeQueue(&QueueGroupOrdinal);
+
+      ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
+      ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;
+
+      ZE_CALL(zeCommandListCreate,
+              (Queue->Context->ZeContext, Queue->Device->ZeDevice,
+               &ZeCommandListDesc, &ZeCommandList));
+      // Increments the total number of command lists created on this platform.
+      Queue->Device->Platform->ZeGlobalCommandListCount++;
+
+      ZE_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
+      std::tie(CommandList, std::ignore) = Queue->CommandListMap.insert(
+          std::pair<ze_command_list_handle_t, pi_command_list_info_t>(
+              ZeCommandList,
+              {ZeFence, true, ZeCommandQueue, QueueGroupOrdinal}));
+      pi_result = PI_SUCCESS;
+    }
+    return pi_result;
+  } else {
+    CommandList = Queue->CommandListMap.begin();
   }
-
-  return pi_result;
+  return PI_SUCCESS;
 }
 
 void _pi_queue::adjustBatchSizeForFullBatch(bool IsCopy) {
@@ -1346,8 +1356,8 @@ void _pi_queue::CaptureIndirectAccesses() {
 }
 
 pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
-                                        bool IsBlocking,
-                                        bool OKToBatchCommand) {
+                                        bool IsBlocking, bool OKToBatchCommand,
+                                        bool Graph) {
   bool UseCopyEngine = CommandList->second.isCopy(this);
 
   // If the current LastCommandEvent is the nullptr, then it means
@@ -1466,6 +1476,8 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
     }
 
     // Close the command list and have it ready for dispatch.
+    // TODO: Close command list only once before initial execution, but works as
+    // is.
     ZE_CALL(zeCommandListClose, (CommandList->first));
     // Offload command list to the GPU for asynchronous execution
     auto ZeCommandList = CommandList->first;
@@ -2051,10 +2063,11 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
   }
 
   zePrint("Using events scope: %s\n",
-          EventsScope == AllHostVisible ? "all host-visible"
-          : EventsScope == OnDemandHostVisibleProxy
-              ? "on demand host-visible proxy"
-              : "only last command in a batch is host-visible");
+          EventsScope == AllHostVisible
+              ? "all host-visible"
+              : EventsScope == OnDemandHostVisibleProxy
+                    ? "on demand host-visible proxy"
+                    : "only last command in a batch is host-visible");
   return PI_SUCCESS;
 }
 
@@ -3203,10 +3216,11 @@ pi_result piQueueCreate(pi_context Context, pi_device Device,
                         pi_queue_properties Properties, pi_queue *Queue) {
 
   // Check that unexpected bits are not set.
-  PI_ASSERT(!(Properties & ~(PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
-                             PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_ON_DEVICE |
-                             PI_QUEUE_ON_DEVICE_DEFAULT)),
-            PI_INVALID_VALUE);
+  PI_ASSERT(
+      !(Properties & ~(PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
+                       PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_ON_DEVICE |
+                       PI_QUEUE_ON_DEVICE_DEFAULT | PI_QUEUE_LAZY_EXECUTION)),
+      PI_INVALID_VALUE);
 
   PI_ASSERT(Context, PI_INVALID_CONTEXT);
   PI_ASSERT(Queue, PI_INVALID_QUEUE);
@@ -4929,12 +4943,12 @@ pi_result piKernelRelease(pi_kernel Kernel) {
   return PI_SUCCESS;
 }
 
-pi_result
-piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
-                      const size_t *GlobalWorkOffset,
-                      const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
-                      pi_uint32 NumEventsInWaitList,
-                      const pi_event *EventWaitList, pi_event *Event) {
+pi_result piEnqueueKernel(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
+                          const size_t *GlobalWorkOffset,
+                          const size_t *GlobalWorkSize,
+                          const size_t *LocalWorkSize,
+                          pi_uint32 NumEventsInWaitList,
+                          const pi_event *EventWaitList, pi_event *Event) {
   PI_ASSERT(Kernel, PI_INVALID_KERNEL);
   PI_ASSERT(Queue, PI_INVALID_QUEUE);
   PI_ASSERT(Event, PI_INVALID_EVENT);
@@ -5099,14 +5113,55 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
           pi_cast<std::uintptr_t>(ZeEvent));
   printZeEventList((*Event)->WaitList);
 
+#if 0
   // Execute command list asynchronously, as the event will be used
   // to track down its completion.
   if (auto Res = Queue->executeCommandList(CommandList, false, true))
     return Res;
+#endif
 
   return PI_SUCCESS;
 }
 
+pi_result piKernelLaunch(pi_queue Queue) {
+
+  const bool Graph = !(Queue->isEagerExec());
+  // const bool Graph = true;
+
+  // TODO: Make sure (re-)execute specific command list.
+
+  // Get a new command list to be used on this call
+  pi_command_list_ptr_t CommandList{};
+  if (auto Res = Queue->Context->getAvailableCommandList(
+          Queue, CommandList, false /* PreferCopyEngine */,
+          true /* AllowBatching */, Graph /* Shortcut for Graph */))
+    return Res;
+
+  // Execute command list asynchronously, as the event will be used
+  // to track down its completion.
+  if (auto Res = Queue->executeCommandList(CommandList, false, true, Graph))
+    return Res;
+
+  return PI_SUCCESS;
+}
+
+pi_result
+piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
+                      const size_t *GlobalWorkOffset,
+                      const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
+                      pi_uint32 NumEventsInWaitList,
+                      const pi_event *EventWaitList, pi_event *Event) {
+  auto Res =
+      piEnqueueKernel(Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize,
+                      LocalWorkSize, NumEventsInWaitList, EventWaitList, Event);
+#if 1
+  if (Res == PI_SUCCESS && Queue->isEagerExec()) {
+    return piKernelLaunch(Queue);
+  }
+#endif
+  return Res;
+}
+
 pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle,
                                             pi_context Context,
                                             pi_program Program,
diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp
index a0f9ed530baf3..47eac0846f43a 100644
--- a/sycl/plugins/level_zero/pi_level_zero.hpp
+++ b/sycl/plugins/level_zero/pi_level_zero.hpp
@@ -636,6 +636,10 @@ struct _pi_context : _pi_object {
   std::unordered_map<ze_device_handle_t, std::list<ze_command_list_handle_t>>
       ZeCopyCommandListCache;
 
+  // Single command list for graph api
+
+  std::list<ze_command_list_handle_t> ZeGraphCommandList;
+
   // Retrieves a command list for executing on this device along with
   // a fence to be used in tracking the execution of this command list.
   // If a command list has been created on this device which has
@@ -656,7 +660,8 @@ struct _pi_context : _pi_object {
   pi_result getAvailableCommandList(pi_queue Queue,
                                     pi_command_list_ptr_t &CommandList,
                                     bool UseCopyEngine = false,
-                                    bool AllowBatching = false);
+                                    bool AllowBatching = false,
+                                    bool Graph = false);
 
   // Get index of the free slot in the available pool. If there is no available
   // pool then create new one. The HostVisible parameter tells if we need a
@@ -872,6 +877,8 @@ struct _pi_queue : _pi_object {
   // Returns true if the queue is a in-order queue.
   bool isInOrderQueue() const;
 
+  bool isEagerExec() const;
+
   // adjust the queue's batch size, knowing that the current command list
   // is being closed with a full batch.
   // For copy commands, IsCopy is set to 'true'.
@@ -912,7 +919,8 @@ struct _pi_queue : _pi_object {
   // For immediate commandlists, no close and execute is necessary.
   pi_result executeCommandList(pi_command_list_ptr_t CommandList,
                                bool IsBlocking = false,
-                               bool OKToBatchCommand = false);
+                               bool OKToBatchCommand = false,
+                               bool Graph = false);
 
   // If there is an open command list associated with this queue,
   // close it, execute it, and reset the corresponding OpenCommandList.
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index 63e4132403ac7..7aefc92a3567f 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -276,6 +276,14 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
   TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId);
 #endif
 
+#if 1
+  if (has_property<ext::oneapi::property::queue::lazy_execution>()) {
+    const detail::plugin &Plugin = getPlugin();
+    if (Plugin.getBackend() == backend::ext_oneapi_level_zero)
+      Plugin.call<detail::PiApiKind::piKernelLaunch>(getHandleRef());
+  }
+#endif
+
   std::vector<std::weak_ptr<event_impl>> WeakEvents;
   std::vector<event> SharedEvents;
   {
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index 0c05db22d126a..504c28d1b435c 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -307,6 +307,9 @@ class queue_impl {
             ext::oneapi::cuda::property::queue::use_default_stream>()) {
       CreationFlags |= __SYCL_PI_CUDA_USE_DEFAULT_STREAM;
     }
+    if (has_property<ext::oneapi::property::queue::lazy_execution>()) {
+      CreationFlags |= PI_QUEUE_LAZY_EXECUTION;
+    }
     RT::PiQueue Queue{};
     RT::PiContext Context = MContext->getHandleRef();
     RT::PiDevice Device = MDevice->getHandleRef();