diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h
index 492e43534dcdd..2246463d17255 100644
--- a/sycl/include/sycl/detail/pi.h
+++ b/sycl/include/sycl/detail/pi.h
@@ -52,9 +52,10 @@
 // 10.13 Added new PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS queue property.
 // 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for
 // piDeviceGetInfo.
+// 10.15 Add new PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION queue property
 
 #define _PI_H_VERSION_MAJOR 10
-#define _PI_H_VERSION_MINOR 14
+#define _PI_H_VERSION_MINOR 15
 
 #define _PI_STRING_HELPER(a) #a
 #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -569,6 +570,14 @@ constexpr pi_queue_properties PI_QUEUE_PROFILING_ENABLE = (1 << 1);
 constexpr pi_queue_properties PI_QUEUE_ON_DEVICE = (1 << 2);
 constexpr pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT = (1 << 3);
 constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS = (1 << 4);
+// In a lazy queue, enqueued commands are not submitted for execution
+// immediately, instead they are submitted for execution once the queue is
+// flushed.
+//
+// This is to enable prototyping of the SYCL_EXT_ONEAPI_GRAPH extension,
+// before a native command-list interface in PI can be designed and
+// implemented.
+constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION = (1 << 5);
 
 using pi_result = _pi_result;
 using pi_platform_info = _pi_platform_info;
diff --git a/sycl/include/sycl/detail/property_helper.hpp b/sycl/include/sycl/detail/property_helper.hpp
index ff0f4aa8568b0..5397da6053000 100644
--- a/sycl/include/sycl/detail/property_helper.hpp
+++ b/sycl/include/sycl/detail/property_helper.hpp
@@ -35,8 +35,9 @@ enum DataLessPropKind {
   UseDefaultStream = 8,
   DiscardEvents = 9,
   DeviceReadOnly = 10,
+  LazyExecution = 11,
   // Indicates the last known dataless property.
-  LastKnownDataLessPropKind = 10,
+  LastKnownDataLessPropKind = 11,
   // Exceeding 32 may cause ABI breaking change on some of OSes.
   DataLessPropKindSize = 32
 };
diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp
new file mode 100644
index 0000000000000..c172f23c29b8a
--- /dev/null
+++ b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp
@@ -0,0 +1,106 @@
+//==--------- graph.hpp --- SYCL graph extension ---------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <memory>
+#include <sycl/detail/common.hpp>
+#include <sycl/detail/defines_elementary.hpp>
+#include <vector>
+
+namespace sycl {
+__SYCL_INLINE_VER_NAMESPACE(_V1) {
+
+class handler;
+namespace ext {
+namespace oneapi {
+namespace experimental {
+
+namespace detail {
+struct node_impl;
+struct graph_impl;
+
+using node_ptr = std::shared_ptr<node_impl>;
+using graph_ptr = std::shared_ptr<graph_impl>;
+} // namespace detail
+
+enum class graph_state {
+  modifiable,
+  executable,
+};
+
+class __SYCL_EXPORT node {
+private:
+  node(detail::node_ptr Impl) : impl(Impl) {}
+
+  template <class Obj>
+  friend decltype(Obj::impl)
+  sycl::detail::getSyclObjImpl(const Obj &SyclObject);
+  template <class T>
+  friend T sycl::detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
+
+  detail::node_ptr impl;
+  detail::graph_ptr MGraph;
+};
+
+template <graph_state State = graph_state::modifiable>
+class __SYCL_EXPORT command_graph {
+public:
+  command_graph();
+
+  // Adding empty node with [0..n] predecessors:
+  node add(const std::vector<node> &dep = {});
+
+  // Adding device node:
+  template <typename T> node add(T cgf, const std::vector<node> &dep = {}) {
+    return add_impl(cgf, dep);
+  }
+
+  // Adding dependency between two nodes.
+  void make_edge(node sender, node receiver);
+
+  command_graph<graph_state::executable>
+  finalize(const sycl::context &syclContext) const;
+
+private:
+  command_graph(detail::graph_ptr Impl) : impl(Impl) {}
+
+  // Template-less implementation of add()
+  node add_impl(std::function<void(handler &)> cgf,
+                const std::vector<node> &dep);
+
+  template <class Obj>
+  friend decltype(Obj::impl)
+  sycl::detail::getSyclObjImpl(const Obj &SyclObject);
+  template <class T>
+  friend T sycl::detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
+
+  detail::graph_ptr impl;
+};
+
+template <> class __SYCL_EXPORT command_graph<graph_state::executable> {
+public:
+  command_graph() = delete;
+
+  command_graph(detail::graph_ptr g, const sycl::context &ctx)
+      : MTag(rand()), MCtx(ctx), impl(g) {}
+
+private:
+  template <class Obj>
+  friend decltype(Obj::impl)
+  sycl::detail::getSyclObjImpl(const Obj &SyclObject);
+
+  int MTag;
+  const sycl::context &MCtx;
+  detail::graph_ptr impl;
+};
+} // namespace experimental
+} // namespace oneapi
+} // namespace ext
+} // __SYCL_INLINE_VER_NAMESPACE(_V1)
+} // namespace sycl
diff --git a/sycl/include/sycl/feature_test.hpp.in b/sycl/include/sycl/feature_test.hpp.in
index 59d2ebed77e39..1b21acfcbf56a 100644
--- a/sycl/include/sycl/feature_test.hpp.in
+++ b/sycl/include/sycl/feature_test.hpp.in
@@ -47,6 +47,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
 #define SYCL_EXT_ONEAPI_FREE_FUNCTION_QUERIES 1
 #define SYCL_EXT_ONEAPI_GROUP_ALGORITHMS 1
 #define SYCL_EXT_ONEAPI_GROUP_SORT 1
+#define SYCL_EXT_ONEAPI_LAZY_QUEUE 1
 #define SYCL_EXT_ONEAPI_MAX_WORK_GROUP_QUERY 1
 #define SYCL_EXT_ONEAPI_ND_RANGE_REDUCTIONS 1
 #define SYCL_EXT_ONEAPI_DEFAULT_CONTEXT 1
diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp
index 335daae9904d6..42e489c3eea1a 100644
--- a/sycl/include/sycl/handler.hpp
+++ b/sycl/include/sycl/handler.hpp
@@ -30,6 +30,8 @@
 #include <sycl/sampler.hpp>
 #include <sycl/stl.hpp>
 
+#include <sycl/ext/oneapi/experimental/graph.hpp>
+
 #include <functional>
 #include <limits>
 #include <memory>
@@ -2516,6 +2518,13 @@ class __SYCL_EXPORT handler {
   /// \param Advice is a device-defined advice for the specified allocation.
   void mem_advise(const void *Ptr, size_t Length, int Advice);
 
+  /// Executes a command_graph.
+  ///
+  /// \param Graph Executable command_graph to run
+  void exec_graph(ext::oneapi::experimental::command_graph<
+                  ext::oneapi::experimental::graph_state::executable>
+                      Graph);
+
 private:
   std::shared_ptr<detail::handler_impl> MImpl;
   std::shared_ptr<detail::queue_impl> MQueue;
diff --git a/sycl/include/sycl/properties/queue_properties.hpp b/sycl/include/sycl/properties/queue_properties.hpp
index 2457b4c561dcf..643f16fbe7cb8 100644
--- a/sycl/include/sycl/properties/queue_properties.hpp
+++ b/sycl/include/sycl/properties/queue_properties.hpp
@@ -28,6 +28,8 @@ namespace property {
 namespace queue {
 class discard_events
     : public ::sycl::detail::DataLessProperty<::sycl::detail::DiscardEvents> {};
+class lazy_execution
+    : public ::sycl::detail::DataLessProperty<::sycl::detail::LazyExecution> {};
 } // namespace queue
 } // namespace property
 
@@ -65,6 +67,9 @@ template <>
 struct is_property<ext::oneapi::property::queue::discard_events>
     : std::true_type {};
 template <>
+struct is_property<ext::oneapi::property::queue::lazy_execution>
+    : std::true_type {};
+template <>
 struct is_property<property::queue::cuda::use_default_stream> : std::true_type {
 };
 template <>
@@ -80,6 +85,9 @@ template <>
 struct is_property_of<ext::oneapi::property::queue::discard_events, queue>
     : std::true_type {};
 template <>
+struct is_property_of<ext::oneapi::property::queue::lazy_execution, queue>
+    : std::true_type {};
+template <>
 struct is_property_of<property::queue::cuda::use_default_stream, queue>
     : std::true_type {};
 template <>
diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp
index fd2cf7736dc3a..3aacc460e6d29 100644
--- a/sycl/include/sycl/queue.hpp
+++ b/sycl/include/sycl/queue.hpp
@@ -23,6 +23,7 @@
 #include <sycl/property_list.hpp>
 #include <sycl/stl.hpp>
 
+
 // Explicitly request format macros
 #ifndef __STDC_FORMAT_MACROS
 #define __STDC_FORMAT_MACROS 1
@@ -1058,6 +1059,55 @@ class __SYCL_EXPORT queue {
 // Clean KERNELFUNC macros.
 #undef _KERNELFUNCPARAM
 
+  /// Shortcut for executing a graph of commands.
+  ///
+  /// \param Graph the graph of commands to execute
+  /// \return an event representing graph execution operation.
+  event exec_graph(ext::oneapi::experimental::command_graph<
+                   ext::oneapi::experimental::graph_state::executable>
+                       Graph) {
+    const detail::code_location CodeLoc = {};
+    return submit([&](handler &CGH) { CGH.exec_graph(Graph); }, CodeLoc);
+  }
+
+  /// Shortcut for executing a graph of commands.
+  ///
+  /// \param Graph the graph of commands to execute
+  /// \param DepEvent is an event that specifies the graph execution
+  /// dependencies.
+  /// \return an event representing graph execution operation.
+  event exec_graph(ext::oneapi::experimental::command_graph<
+                       ext::oneapi::experimental::graph_state::executable>
+                       Graph,
+                   event DepEvent) {
+    const detail::code_location CodeLoc = {};
+    return submit(
+        [&](handler &CGH) {
+          CGH.depends_on(DepEvent);
+          CGH.exec_graph(Graph);
+        },
+        CodeLoc);
+  }
+
+  /// Shortcut for executing a graph of commands.
+  ///
+  /// \param Graph the graph of commands to execute
+  /// \param DepEvents is a vector of events that specifies the graph
+  /// execution dependencies.
+  /// \return an event representing graph execution operation.
+  event exec_graph(ext::oneapi::experimental::command_graph<
+                       ext::oneapi::experimental::graph_state::executable>
+                       Graph,
+                   const std::vector<event> &DepEvents) {
+    const detail::code_location CodeLoc = {};
+    return submit(
+        [&](handler &CGH) {
+          CGH.depends_on(DepEvents);
+          CGH.exec_graph(Graph);
+        },
+        CodeLoc);
+  }
+
   /// Returns whether the queue is in order or OoO
   ///
   /// Equivalent to has_property<property::queue::in_order>()
diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp
index 54ed20383d9a9..1a6937e47498d 100644
--- a/sycl/plugins/level_zero/pi_level_zero.cpp
+++ b/sycl/plugins/level_zero/pi_level_zero.cpp
@@ -1306,6 +1306,45 @@ pi_result resetCommandLists(pi_queue Queue) {
 pi_result _pi_context::getAvailableCommandList(
     pi_queue Queue, pi_command_list_ptr_t &CommandList, bool UseCopyEngine,
     bool AllowBatching, ze_command_queue_handle_t *ForcedCmdQueue) {
+    
+  // This is a hack. TODO: Proper CommandList allocation per Executable Graph.
+  if( Queue->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION ) {
+    // TODO: Create new Command List.
+    if(Queue->LazyCommandListMap.empty()) {
+      const bool UseCopyEngine = false;
+      // Adding createCommandList() to LazyCommandListMap
+      ze_fence_handle_t ZeFence;
+      ZeStruct<ze_fence_desc_t> ZeFenceDesc;
+      ze_command_list_handle_t ZeCommandList;
+
+      uint32_t QueueGroupOrdinal;
+      auto &QGroup = Queue->getQueueGroup(UseCopyEngine);
+      auto &ZeCommandQueue =
+        //ForcedCmdQueue ? *ForcedCmdQueue : 
+        QGroup.getZeQueue(&QueueGroupOrdinal);
+      //if (ForcedCmdQueue)
+      //  QueueGroupOrdinal = QGroup.getCmdQueueOrdinal(ZeCommandQueue);
+
+      ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
+      ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;
+
+      ZE_CALL(zeCommandListCreate, (Queue->Context->ZeContext, Queue->Device->ZeDevice,
+                                    &ZeCommandListDesc, &ZeCommandList));
+
+      ZE_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
+      std::tie(CommandList, std::ignore) = Queue->LazyCommandListMap.insert(
+        std::pair<ze_command_list_handle_t, pi_command_list_info_t>(
+          ZeCommandList, {ZeFence, false, ZeCommandQueue, QueueGroupOrdinal}));
+
+      Queue->insertActiveBarriers(CommandList, UseCopyEngine);
+      //
+      CommandList->second.ZeFenceInUse = true;
+    } else {
+        CommandList = Queue->LazyCommandListMap.begin();
+    }
+    return PI_SUCCESS;
+  }
+    
   // Immediate commandlists have been pre-allocated and are always available.
   if (Queue->Device->useImmediateCommandLists()) {
     CommandList = Queue->getQueueGroup(UseCopyEngine).getImmCmdList();
@@ -1544,6 +1583,9 @@ void _pi_queue::CaptureIndirectAccesses() {
 pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
                                         bool IsBlocking,
                                         bool OKToBatchCommand) {
+  // When executing a Graph, defer execution
+  if( this->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION ) return PI_SUCCESS;
+    
   bool UseCopyEngine = CommandList->second.isCopy(this);
 
   // If the current LastCommandEvent is the nullptr, then it means
@@ -3509,7 +3551,8 @@ pi_result piQueueCreate(pi_context Context, pi_device Device,
   PI_ASSERT(!(Properties & ~(PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
                              PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_ON_DEVICE |
                              PI_QUEUE_ON_DEVICE_DEFAULT |
-                             PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS)),
+                             PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS |
+                             PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION)),
             PI_ERROR_INVALID_VALUE);
 
   PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT);
@@ -3783,6 +3826,31 @@ pi_result piQueueFinish(pi_queue Queue) {
 // Flushing cross-queue dependencies is covered by createAndRetainPiZeEventList,
 // so this can be left as a no-op.
 pi_result piQueueFlush(pi_queue Queue) {
+  if( Queue->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION ) {
+    pi_command_list_ptr_t CommandList{};
+    // TODO: 
+    CommandList = Queue->LazyCommandListMap.begin();
+      
+    auto &ZeCommandQueue = CommandList->second.ZeQueue;
+    // Scope of the lock must be till the end of the function, otherwise new mem
+    // allocs can be created between the moment when we made a snapshot and the
+    // moment when command list is closed and executed. But mutex is locked only
+    // if indirect access tracking enabled, because std::defer_lock is used.
+    // unique_lock destructor at the end of the function will unlock the mutex
+    // if it was locked (which happens only if IndirectAccessTrackingEnabled is
+    // true).
+    std::unique_lock<pi_shared_mutex> ContextsLock(
+      Queue->Device->Platform->ContextsMutex, std::defer_lock);
+      
+    // Close the command list and have it ready for dispatch.
+    ZE_CALL(zeCommandListClose, (CommandList->first));
+    
+    // Offload command list to the GPU for asynchronous execution
+    auto ZeCommandList = CommandList->first;
+    auto ZeResult = ZE_CALL_NOCHECK(
+        zeCommandQueueExecuteCommandLists,
+        (ZeCommandQueue, 1, &ZeCommandList, CommandList->second.ZeFence));
+  }
   (void)Queue;
   return PI_SUCCESS;
 }
diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp
index f6b31229f0535..25fda46b65d89 100644
--- a/sycl/plugins/level_zero/pi_level_zero.hpp
+++ b/sycl/plugins/level_zero/pi_level_zero.hpp
@@ -937,6 +937,8 @@ struct _pi_queue : _pi_object {
 
   // Map of all command lists used in this queue.
   pi_command_list_map_t CommandListMap;
+  // TODO: Assign Graph related command lists to command_graph object
+  pi_command_list_map_t LazyCommandListMap;
 
   // Helper data structure to hold all variables related to batching
   typedef struct CommandBatch {
diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt
index 23b161480b17a..1a92f6bfa5f2f 100644
--- a/sycl/source/CMakeLists.txt
+++ b/sycl/source/CMakeLists.txt
@@ -132,6 +132,7 @@ set(SYCL_SOURCES
     "detail/filter_selector_impl.cpp"
     "detail/force_device.cpp"
     "detail/global_handler.cpp"
+    "detail/graph_impl.cpp"
     "detail/helpers.cpp"
     "detail/handler_proxy.cpp"
     "detail/image_accessor_util.cpp"
diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp
new file mode 100644
index 0000000000000..d478d2e302026
--- /dev/null
+++ b/sycl/source/detail/graph_impl.cpp
@@ -0,0 +1,120 @@
+//==--------- graph_impl.cpp - SYCL graph extension -----------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <detail/graph_impl.hpp>
+#include <detail/queue_impl.hpp>
+
+namespace sycl {
+__SYCL_INLINE_VER_NAMESPACE(_V1) {
+
+namespace detail {
+struct queue_impl;
+using queue_ptr = std::shared_ptr<queue_impl>;
+} // namespace detail
+
+namespace ext {
+namespace oneapi {
+namespace experimental {
+namespace detail {
+
+void graph_impl::exec(sycl::detail::queue_ptr q) {
+  if (MSchedule.empty()) {
+    for (auto n : MRoots) {
+      n->topology_sort(MSchedule);
+    }
+  }
+  for (auto n : MSchedule)
+    n->exec(q);
+}
+
+void graph_impl::exec_and_wait(sycl::detail::queue_ptr q) {
+  if (MFirst) {
+    exec(q);
+    MFirst = false;
+  }
+  q->wait();
+}
+
+void graph_impl::add_root(node_ptr n) {
+  MRoots.insert(n);
+  for (auto n : MSchedule)
+    n->MScheduled = false;
+  MSchedule.clear();
+}
+
+void graph_impl::remove_root(node_ptr n) {
+  MRoots.erase(n);
+  for (auto n : MSchedule)
+    n->MScheduled = false;
+  MSchedule.clear();
+}
+
+template <typename T>
+node_ptr graph_impl::add(graph_ptr impl, T cgf,
+                         const std::vector<node_ptr> &dep) {
+  node_ptr nodeImpl = std::make_shared<node_impl>(impl, cgf);
+  if (!dep.empty()) {
+    for (auto n : dep) {
+      n->register_successor(nodeImpl); // register successor
+      this->remove_root(nodeImpl);     // remove receiver from root node
+                                       // list
+    }
+  } else {
+    this->add_root(nodeImpl);
+  }
+  return nodeImpl;
+}
+
+void node_impl::exec(sycl::detail::queue_ptr q) {
+  std::vector<sycl::event> deps;
+  for (auto i : MPredecessors)
+    deps.push_back(i->get_event());
+
+  const sycl::detail::code_location CodeLoc;
+  MEvent = q->submit(wrapper{MBody, deps}, q, CodeLoc);
+}
+} // namespace detail
+
+template <>
+command_graph<graph_state::modifiable>::command_graph()
+    : impl(std::make_shared<detail::graph_impl>()) {}
+
+template <>
+node command_graph<graph_state::modifiable>::add_impl(
+    std::function<void(handler &)> cgf, const std::vector<node> &dep) {
+  std::vector<detail::node_ptr> depImpls;
+  for (auto &d : dep) {
+    depImpls.push_back(sycl::detail::getSyclObjImpl(d));
+  }
+
+  auto nodeImpl = impl->add(impl, cgf, depImpls);
+  return sycl::detail::createSyclObjFromImpl<node>(nodeImpl);
+}
+
+template <>
+void command_graph<graph_state::modifiable>::make_edge(node sender,
+                                                       node receiver) {
+  auto sender_impl = sycl::detail::getSyclObjImpl(sender);
+  auto receiver_impl = sycl::detail::getSyclObjImpl(receiver);
+
+  sender_impl->register_successor(receiver_impl); // register successor
+  impl->remove_root(receiver_impl); // remove receiver from root node list
+}
+
+template <>
+command_graph<graph_state::executable>
+command_graph<graph_state::modifiable>::finalize(
+    const sycl::context &ctx) const {
+  return command_graph<graph_state::executable>{this->impl, ctx};
+}
+
+} // namespace experimental
+} // namespace oneapi
+} // namespace ext
+} // __SYCL_INLINE_VER_NAMESPACE(_V1)
+} // namespace sycl
diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp
new file mode 100644
index 0000000000000..cb4f5cb2d6e81
--- /dev/null
+++ b/sycl/source/detail/graph_impl.hpp
@@ -0,0 +1,106 @@
+//==--------- graph_impl.hpp --- SYCL graph extension ---------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <sycl/ext/oneapi/experimental/graph.hpp>
+#include <sycl/handler.hpp>
+
+#include <functional>
+#include <list>
+#include <set>
+
+namespace sycl {
+__SYCL_INLINE_VER_NAMESPACE(_V1) {
+
+namespace detail {
+struct queue_impl;
+using queue_ptr = std::shared_ptr<queue_impl>;
+} // namespace detail
+
+namespace ext {
+namespace oneapi {
+namespace experimental {
+namespace detail {
+
+class wrapper {
+  using T = std::function<void(sycl::handler &)>;
+  T MFunc;
+  std::vector<sycl::event> MDeps;
+
+public:
+  wrapper(T t, const std::vector<sycl::event> &deps) : MFunc(t), MDeps(deps){};
+
+  void operator()(sycl::handler &cgh) {
+    cgh.depends_on(MDeps);
+    std::invoke(MFunc, cgh);
+  }
+};
+
+struct node_impl {
+  bool MScheduled;
+
+  graph_ptr MGraph;
+  sycl::event MEvent;
+
+  std::vector<node_ptr> MSuccessors;
+  std::vector<node_ptr> MPredecessors;
+
+  std::function<void(sycl::handler &)> MBody;
+
+  void exec(sycl::detail::queue_ptr q);
+
+  void register_successor(node_ptr n) {
+    MSuccessors.push_back(n);
+    n->register_predecessor(node_ptr(this));
+  }
+
+  void register_predecessor(node_ptr n) { MPredecessors.push_back(n); }
+
+  sycl::event get_event(void) const { return MEvent; }
+
+  template <typename T>
+  node_impl(graph_ptr g, T cgf) : MScheduled(false), MGraph(g), MBody(cgf) {}
+
+  // Recursively adding nodes to execution stack:
+  void topology_sort(std::list<node_ptr> &schedule) {
+    MScheduled = true;
+    for (auto i : MSuccessors) {
+      if (!i->MScheduled)
+        i->topology_sort(schedule);
+    }
+    schedule.push_front(node_ptr(this));
+  }
+};
+
+struct graph_impl {
+  std::set<node_ptr> MRoots;
+  std::list<node_ptr> MSchedule;
+  // TODO: Change one time initialization to per executable object
+  bool MFirst;
+
+  graph_ptr MParent;
+
+  void exec(sycl::detail::queue_ptr q);
+  void exec_and_wait(sycl::detail::queue_ptr q);
+
+  void add_root(node_ptr n);
+  void remove_root(node_ptr n);
+
+  template <typename T>
+  node_ptr add(graph_ptr impl, T cgf, const std::vector<node_ptr> &dep = {});
+
+  graph_impl() : MFirst(true) {}
+};
+
+} // namespace detail
+} // namespace experimental
+} // namespace oneapi
+} // namespace ext
+} // __SYCL_INLINE_VER_NAMESPACE(_V1)
+} // namespace sycl
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index f55b845b1c84e..e6caedcc66bcc 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -278,6 +278,12 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
   TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId);
 #endif
 
+  if (has_property<ext::oneapi::property::queue::lazy_execution>()) {
+    const detail::plugin &Plugin = getPlugin();
+    if (Plugin.getBackend() == backend::ext_oneapi_level_zero)
+      Plugin.call<detail::PiApiKind::piQueueFlush>(getHandleRef());
+  }
+
   std::vector<std::weak_ptr<event_impl>> WeakEvents;
   std::vector<event> SharedEvents;
   {
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index 3e3ec40bc2fa9..2e217912ee6c1 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -316,6 +316,9 @@ class queue_impl {
       // queue property.
       CreationFlags |= PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS;
     }
+    if (MPropList.has_property<ext::oneapi::property::queue::lazy_execution>()) {
+      CreationFlags |= PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION;
+    }
     RT::PiQueue Queue{};
     RT::PiContext Context = MContext->getHandleRef();
     RT::PiDevice Device = MDevice->getHandleRef();
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index 7919031827cfd..d95ef9a8bd4d5 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -10,6 +10,7 @@
 
 #include <detail/config.hpp>
 #include <detail/global_handler.hpp>
+#include <detail/graph_impl.hpp>
 #include <detail/handler_impl.hpp>
 #include <detail/kernel_bundle_impl.hpp>
 #include <detail/kernel_impl.hpp>
@@ -698,5 +699,12 @@ void handler::depends_on(const std::vector<event> &Events) {
   }
 }
 
+void handler::exec_graph(ext::oneapi::experimental::command_graph<
+                         ext::oneapi::experimental::graph_state::executable>
+                             Graph) {
+  auto GraphImpl = detail::getSyclObjImpl(Graph);
+  GraphImpl->exec_and_wait(MQueue);
+}
+
 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
 } // namespace sycl
diff --git a/sycl/test/graph/graph-explicit-dotp.cpp b/sycl/test/graph/graph-explicit-dotp.cpp
new file mode 100644
index 0000000000000..561057bc13f84
--- /dev/null
+++ b/sycl/test/graph/graph-explicit-dotp.cpp
@@ -0,0 +1,98 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+#include <CL/sycl.hpp>
+#include <iostream>
+
+#include <sycl/ext/oneapi/experimental/graph.hpp>
+
+const size_t n = 10;
+
+float host_gold_result() {
+  float alpha = 1.0f;
+  float beta = 2.0f;
+  float gamma = 3.0f;
+
+  float sum = 0.0f;
+
+  for (size_t i = 0; i < n; ++i) {
+    sum += (alpha * 1.0f + beta * 2.0f) * (gamma * 3.0f + beta * 2.0f);
+  }
+
+  return sum;
+}
+
+int main() {
+  float alpha = 1.0f;
+  float beta = 2.0f;
+  float gamma = 3.0f;
+
+  sycl::property_list properties{
+      sycl::property::queue::in_order{},
+      sycl::ext::oneapi::property::queue::lazy_execution{}};
+
+  sycl::queue q{sycl::gpu_selector_v, properties};
+
+  sycl::ext::oneapi::experimental::command_graph g;
+
+  float *dotp = sycl::malloc_shared<float>(1, q);
+
+  float *x = sycl::malloc_shared<float>(n, q);
+  float *y = sycl::malloc_shared<float>(n, q);
+  float *z = sycl::malloc_shared<float>(n, q);
+
+  /* init data on the device */
+  auto n_i = g.add([&](sycl::handler &h) {
+    h.parallel_for(n, [=](sycl::id<1> it) {
+      const size_t i = it[0];
+      x[i] = 1.0f;
+      y[i] = 2.0f;
+      z[i] = 3.0f;
+    });
+  });
+
+  auto node_a = g.add(
+      [&](sycl::handler &h) {
+        h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> it) {
+          const size_t i = it[0];
+          x[i] = alpha * x[i] + beta * y[i];
+        });
+      },
+      {n_i});
+
+  auto node_b = g.add(
+      [&](sycl::handler &h) {
+        h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> it) {
+          const size_t i = it[0];
+          z[i] = gamma * z[i] + beta * y[i];
+        });
+      },
+      {n_i});
+
+  auto node_c = g.add(
+      [&](sycl::handler &h) {
+        h.parallel_for(sycl::range<1>{n},
+                       sycl::reduction(dotp, 0.0f, std::plus()),
+                       [=](sycl::id<1> it, auto &sum) {
+                         const size_t i = it[0];
+                         sum += x[i] * z[i];
+                       });
+      },
+      {node_a, node_b});
+
+  auto executable_graph = g.finalize(q.get_context());
+
+  // Using shortcut for executing a graph of commands
+  q.exec_graph(executable_graph).wait();
+
+  if (*dotp != host_gold_result()) {
+    std::cout << "Error unexpected result!\n";
+  }
+
+  sycl::free(dotp, q);
+  sycl::free(x, q);
+  sycl::free(y, q);
+  sycl::free(z, q);
+
+  std::cout << "done.\n";
+
+  return 0;
+}
diff --git a/sycl/test/graph/graph-explicit-queue-shortcuts.cpp b/sycl/test/graph/graph-explicit-queue-shortcuts.cpp
new file mode 100644
index 0000000000000..19d74ae895e86
--- /dev/null
+++ b/sycl/test/graph/graph-explicit-queue-shortcuts.cpp
@@ -0,0 +1,39 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+#include <CL/sycl.hpp>
+#include <iostream>
+
+#include <sycl/ext/oneapi/experimental/graph.hpp>
+
+int main() {
+
+  sycl::property_list properties{
+      sycl::property::queue::in_order{},
+      sycl::ext::oneapi::property::queue::lazy_execution{}};
+
+  sycl::queue q{sycl::gpu_selector_v, properties};
+
+  sycl::ext::oneapi::experimental::command_graph g;
+
+  const size_t n = 10;
+  float *arr = sycl::malloc_shared<float>(n, q);
+
+  g.add([&](sycl::handler &h) {
+    h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx) {
+      size_t i = idx;
+      arr[i] = 1;
+    });
+  });
+
+  auto executable_graph = g.finalize(q.get_context());
+
+  auto e1 = q.exec_graph(executable_graph);
+  auto e2 = q.exec_graph(executable_graph, e1);
+  auto e3 = q.exec_graph(executable_graph, e1);
+  q.exec_graph(executable_graph, {e2, e3}).wait();
+
+  sycl::free(arr, q);
+
+  std::cout << "done " << arr[0] << std::endl;
+
+  return 0;
+}
diff --git a/sycl/test/graph/graph-explicit-reduction.cpp b/sycl/test/graph/graph-explicit-reduction.cpp
new file mode 100644
index 0000000000000..9a2788079570c
--- /dev/null
+++ b/sycl/test/graph/graph-explicit-reduction.cpp
@@ -0,0 +1,37 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+#include <CL/sycl.hpp>
+#include <iostream>
+
+#include <sycl/ext/oneapi/experimental/graph.hpp>
+
+int main() {
+  sycl::property_list properties{
+      sycl::property::queue::in_order{},
+      sycl::ext::oneapi::property::queue::lazy_execution{}};
+
+  sycl::queue q{sycl::gpu_selector_v, properties};
+
+  sycl::ext::oneapi::experimental::command_graph g;
+
+  const size_t n = 10;
+  float *input = sycl::malloc_shared<float>(n, q);
+  float *output = sycl::malloc_shared<float>(1, q);
+  for (size_t i = 0; i < n; i++) {
+    input[i] = i;
+  }
+
+  auto e = q.submit([&](sycl::handler &h) {
+    h.parallel_for(sycl::range<1>{n},
+                   sycl::reduction(output, 0.0f, std::plus()),
+                   [=](sycl::id<1> idx, auto &sum) { sum += input[idx]; });
+  });
+
+  e.wait();
+
+  sycl::free(input, q);
+  sycl::free(output, q);
+
+  std::cout << "done\n";
+
+  return 0;
+}
diff --git a/sycl/test/graph/graph-explicit-simple.cpp b/sycl/test/graph/graph-explicit-simple.cpp
new file mode 100644
index 0000000000000..1e7bf1fec9afb
--- /dev/null
+++ b/sycl/test/graph/graph-explicit-simple.cpp
@@ -0,0 +1,42 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+#include <CL/sycl.hpp>
+#include <iostream>
+
+#include <sycl/ext/oneapi/experimental/graph.hpp>
+
+int main() {
+
+  sycl::property_list properties{
+      sycl::property::queue::in_order{},
+      sycl::ext::oneapi::property::queue::lazy_execution{}};
+
+  sycl::queue q{sycl::gpu_selector_v, properties};
+
+  sycl::ext::oneapi::experimental::command_graph g;
+
+  const size_t n = 10;
+  float *arr = sycl::malloc_shared<float>(n, q);
+
+  g.add([&](sycl::handler &h) {
+    h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx) {
+      size_t i = idx;
+      arr[i] = 1;
+    });
+  });
+
+  auto result_before_exec1 = arr[0];
+
+  auto executable_graph = g.finalize(q.get_context());
+
+  auto result_before_exec2 = arr[0];
+
+  q.submit([&](sycl::handler &h) { h.exec_graph(executable_graph); });
+
+  auto result = arr[0];
+
+  sycl::free(arr, q);
+
+  std::cout << "done.\n";
+
+  return 0;
+}