diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 61ffe777f4c45..69254eaa8154d 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -586,7 +586,15 @@ Command *Command::addDep(DepDesc NewDep, std::vector &ToCleanUp) { ConnectionCmd = processDepEvent(NewDep.MDepCommand->getEvent(), NewDep, ToCleanUp); } - MDeps.push_back(NewDep); + // ConnectionCmd insertion builds the following dependency structure: + // this -> emptyCmd (for ConnectionCmd) -> ConnectionCmd -> NewDep + // that means that this and NewDep are already dependent + if (!ConnectionCmd) { + MDeps.push_back(NewDep); + if (NewDep.MDepCommand) + NewDep.MDepCommand->addUser(this); + } + #ifdef XPTI_ENABLE_INSTRUMENTATION emitEdgeEventForCommandDependence( NewDep.MDepCommand, (void *)NewDep.MDepRequirement->MSYCLMemObj, diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index a42b5f5155aec..ed2ee3e6f78dc 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -190,9 +190,10 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord( DepDesc Dep = findDepForRecord(Dependant, Record); Dep.MDepCommand = Dependency; std::vector ToCleanUp; - if (Command *ConnectionCmd = Dependant->addDep(Dep, ToCleanUp)) + Command *ConnectionCmd = Dependant->addDep(Dep, ToCleanUp); + if (ConnectionCmd) ToEnqueue.push_back(ConnectionCmd); - Dependency->addUser(Dependant); + --(Dependency->MLeafCounter); if (Dependency->MLeafCounter == 0 && Dependency->isSuccessfullyEnqueued() && @@ -281,7 +282,6 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd( UpdateCommand->addDep(DepDesc{Dep, StoredReq, AllocaCmd}, ToCleanUp); if (ConnCmd) ToEnqueue.push_back(ConnCmd); - Dep->addUser(UpdateCommand); } updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp); addNodeToLeaves(Record, UpdateCommand, Req->MAccessMode, ToEnqueue); @@ -397,7 +397,6 @@ Command *Scheduler::GraphBuilder::insertMemoryMove( DepDesc{Dep, NewCmd->getRequirement(), AllocaCmdDst}, ToCleanUp); if (ConnCmd) ToEnqueue.push_back(ConnCmd); - Dep->addUser(NewCmd); } updateLeaves(Deps, Record, access::mode::read_write, ToCleanUp); addNodeToLeaves(Record, NewCmd, access::mode::read_write, ToEnqueue); @@ -437,14 +436,12 @@ Command *Scheduler::GraphBuilder::remapMemoryObject( DepDesc{Dep, UnMapCmd->getRequirement(), LinkedAllocaCmd}, ToCleanUp); if (ConnCmd) ToEnqueue.push_back(ConnCmd); - Dep->addUser(UnMapCmd); } Command *ConnCmd = MapCmd->addDep( DepDesc{UnMapCmd, MapCmd->getRequirement(), HostAllocaCmd}, ToCleanUp); if (ConnCmd) ToEnqueue.push_back(ConnCmd); - UnMapCmd->addUser(MapCmd); updateLeaves(Deps, Record, access::mode::read_write, ToCleanUp); addNodeToLeaves(Record, MapCmd, access::mode::read_write, ToEnqueue); @@ -489,7 +486,6 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req, DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd}, ToCleanUp); if (ConnCmd) ToEnqueue.push_back(ConnCmd); - Dep->addUser(MemCpyCmd); } updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp); @@ -780,7 +776,6 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( ToCleanUp); if (ConnCmd) ToEnqueue.push_back(ConnCmd); - LinkedAllocaCmd->addUser(AllocaCmd); LinkedAllocaCmd->MLinkedAllocaCmd = AllocaCmd; // To ensure that the leader allocation is removed first @@ -808,7 +803,6 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( DepDesc{Dep, Req, LinkedAllocaCmd}, ToCleanUp); if (ConnCmd) ToEnqueue.push_back(ConnCmd); - Dep->addUser(AllocaCmd); } updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp); addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode, ToEnqueue); @@ -848,7 +842,8 @@ typename detail::enable_if_t< Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector &Reqs, const QueueImplPtr &Queue, Command::BlockReason Reason, - std::vector &ToEnqueue) { + std::vector &ToEnqueue, + const bool AddDepsToLeaves) { EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); @@ -865,20 +860,24 @@ Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector &Reqs, getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); EmptyCmd->addRequirement(Cmd, AllocaCmd, Req); } + // addRequirement above call addDep that already will add EmptyCmd as user for + // Cmd no Reqs size check here so assume it is possible to have no Reqs passed + if (!Reqs.size()) + Cmd->addUser(EmptyCmd); - Cmd->addUser(EmptyCmd); - - const std::vector &Deps = Cmd->MDeps; - std::vector ToCleanUp; - for (const DepDesc &Dep : Deps) { - const Requirement *Req = Dep.MDepRequirement; - MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); + if (AddDepsToLeaves) { + const std::vector &Deps = Cmd->MDeps; + std::vector ToCleanUp; + for (const DepDesc &Dep : Deps) { + const Requirement *Req = Dep.MDepRequirement; + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); - updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp); - addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); + updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp); + addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); + } + for (Command *Cmd : ToCleanUp) + cleanupCommand(Cmd); } - for (Command *Cmd : ToCleanUp) - cleanupCommand(Cmd); return EmptyCmd; } @@ -989,10 +988,12 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, std::set Deps = findDepsForReq(Record, Req, Queue->getContextImplPtr()); - for (Command *Dep : Deps) - if (Command *ConnCmd = - NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}, ToCleanUp)) + for (Command *Dep : Deps) { + Command *ConnCmd = + NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}, ToCleanUp); + if (ConnCmd) ToEnqueue.push_back(ConnCmd); + } } // Set new command as user for dependencies and update leaves. @@ -1001,7 +1002,6 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, // FIXME employ a reference here to eliminate copying of a vector std::vector Deps = NewCmd->MDeps; for (DepDesc &Dep : Deps) { - Dep.MDepCommand->addUser(NewCmd.get()); const Requirement *Req = Dep.MDepRequirement; MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode, ToCleanUp); @@ -1297,9 +1297,6 @@ Command *Scheduler::GraphBuilder::connectDepEvent( throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); } - if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) - DepCmd->addUser(ConnectCmd); - EmptyCommand *EmptyCmd = nullptr; if (Dep.MDepRequirement) { @@ -1311,21 +1308,17 @@ Command *Scheduler::GraphBuilder::connectDepEvent( Dep.MDepCommand); // add user to Dep.MDepCommand is already performed beyond this if branch - MemObjRecord *Record = getMemObjRecord(Dep.MDepRequirement->MSYCLMemObj); - updateLeaves({Dep.MDepCommand}, Record, Dep.MDepRequirement->MAccessMode, - ToCleanUp); + // ConnectCmd is added as dependency to Cmd + // We build the following structure Cmd->EmptyCmd/ConnectCmd->DepCmd + // No need to add ConnectCmd to leaves buffer since it is a dependency + // for command Cmd that will be added there std::vector ToEnqueue; - addNodeToLeaves(Record, ConnectCmd, Dep.MDepRequirement->MAccessMode, - ToEnqueue); - assert(ToEnqueue.size() == 0); - const std::vector Reqs(1, Dep.MDepRequirement); EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, Scheduler::getInstance().getDefaultHostQueue(), - Command::BlockReason::HostTask, ToEnqueue); + Command::BlockReason::HostTask, ToEnqueue, false); assert(ToEnqueue.size() == 0); - // Dependencies for EmptyCmd are set in addEmptyCmd for provided Reqs. // Depend Cmd on empty command { @@ -1337,6 +1330,11 @@ Command *Scheduler::GraphBuilder::connectDepEvent( (void)Cmd->addDep(CmdDep, ToCleanUp); } } else { + // It is required condition in another a path and addUser will be set in + // addDep + if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) + DepCmd->addUser(ConnectCmd); + std::vector ToEnqueue; EmptyCmd = addEmptyCmd( ConnectCmd, {}, Scheduler::getInstance().getDefaultHostQueue(), @@ -1354,10 +1352,10 @@ Command *Scheduler::GraphBuilder::connectDepEvent( // Dismiss the result here as it's not a connection now, // 'cause EmptyCmd is host one (void)Cmd->addDep(EmptyCmd->getEvent(), ToCleanUp); + // Added by addDep in another path + EmptyCmd->addUser(Cmd); } - EmptyCmd->addUser(Cmd); - ConnectCmd->MEmptyCmd = EmptyCmd; return ConnectCmd; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 63e2cccaabfd2..18ed2f5004c06 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -606,7 +606,8 @@ class Scheduler { EmptyCommand *> addEmptyCmd(Command *Cmd, const std::vector &Req, const QueueImplPtr &Queue, Command::BlockReason Reason, - std::vector &ToEnqueue); + std::vector &ToEnqueue, + const bool AddDepsToLeaves = true); protected: /// Finds a command dependency corresponding to the record. diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index ceb7c8990c3f2..604f63b736297 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -18,4 +18,5 @@ add_sycl_unittest(SchedulerTests OBJECT QueueFlushing.cpp PostEnqueueCleanup.cpp utils.cpp + LeafLimitDiffContexts.cpp ) diff --git a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp new file mode 100644 index 0000000000000..2d3d728c8de57 --- /dev/null +++ b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp @@ -0,0 +1,153 @@ +//==---------------- LeafLimit.cpp --- Scheduler unit tests ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SchedulerTest.hpp" +#include "SchedulerTestUtils.hpp" + +#include +#include + +#include +#include +#include +#include + +using namespace cl::sycl; + +inline constexpr auto DisablePostEnqueueCleanupName = + "SYCL_DISABLE_POST_ENQUEUE_CLEANUP"; + +// Checks that scheduler's (or graph-builder's) addNodeToLeaves method works +// correctly with dependency tracking when leaf-limit for generic commands is +// overflowed. +// Checks that in case of different contexts for deleted leaf and a new one +// ConnectCmd will be created and scheduler will build the following dependency +// structure: NewLeaf->EmptyCmd/ConnectCmd->OldLeaf +TEST_F(SchedulerTest, LeafLimitDiffContexts) { + // All of the mock commands are owned on the test side, prevent post enqueue + // cleanup from deleting some of them. + unittest::ScopedEnvVar DisabledCleanup{ + DisablePostEnqueueCleanupName, "1", + detail::SYCLConfig::reset}; + + default_selector Selector; + device Device = Selector.select_device(); + // ConnectCmd will not be created for host contextx + if (Device.is_host()) { + std::cout << "Not run due to host-only environment\n"; + return; + } + + struct QueueRelatedObjects { + context Context; + queue Queue; + std::unique_ptr DepCmd; + detail::MemObjRecord *Rec; + detail::AllocaCommandBase *AllocaCmd; + + QueueRelatedObjects(const device &Dev) + : Context(Dev), Queue(Context, Dev), DepCmd(), Rec(nullptr), + AllocaCmd(nullptr) {} + + void InitializeUtils(detail::Requirement &MockReq, MockScheduler &MS) { + std::vector ToEnqueue; + Rec = MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(Queue), &MockReq, + ToEnqueue); + // Creating Alloca on both - device and host contexts (will be created in + // real case in insertMemMove for example) It is done to avoid extra + // AllocCmd insertion during ConnectCmd insertion + AllocaCmd = MS.getOrCreateAllocaForReq( + Rec, &MockReq, detail::getSyclObjImpl(Queue), ToEnqueue); + std::ignore = MS.getOrCreateAllocaForReq( + Rec, &MockReq, MS.getDefaultHostQueue(), ToEnqueue); + DepCmd = + std::make_unique(detail::getSyclObjImpl(Queue), MockReq); + } + }; + + // Creating 2 queues with different context objects + QueueRelatedObjects ExtQueue1(Device), ExtQueue2(Device); + MockScheduler MS; + std::vector> AddedLeaves; + + buffer Buf(range<1>(1)); + detail::Requirement MockReq = getMockRequirement(Buf); + + ExtQueue1.InitializeUtils(MockReq, MS); + ExtQueue2.InitializeUtils(MockReq, MS); + + size_t CommandsCapacity = + ExtQueue1.Rec->MWriteLeaves.genericCommandsCapacity(); + + // Adds leaf with 1 deps to buffer + auto AddLeafWithDeps = [&AddedLeaves, &MockReq, + &MS](const QueueRelatedObjects &QueueStuff) { + auto NewLeaf = std::make_unique( + detail::getSyclObjImpl(QueueStuff.Queue), MockReq); + // Create edges: all soon-to-be leaves are direct users of MockDep + std::vector ToCleanUp; + (void)NewLeaf->addDep(detail::DepDesc{QueueStuff.DepCmd.get(), &MockReq, + QueueStuff.AllocaCmd}, + ToCleanUp); + QueueStuff.DepCmd->addUser(NewLeaf.get()); + + std::vector ToEnqueue; + MS.addNodeToLeaves(QueueStuff.Rec, NewLeaf.get(), access::mode::write, + ToEnqueue); + AddedLeaves.push_back(std::move(NewLeaf)); + }; + + // Create commands that will be added as leaves up to the limit + for (std::size_t i = 0; i < CommandsCapacity; ++i) { + AddLeafWithDeps(ExtQueue1); + } + // Adding extra command on different to exceed buffer limit + // The command #0 and command #8 must be on different queues to insert connect + // command + AddLeafWithDeps(ExtQueue2); + + // Check that the oldest leaf #0 has been removed from the leaf list + const detail::CircularBuffer &Leaves = + ExtQueue1.Rec->MWriteLeaves.getGenericCommands(); + ASSERT_TRUE(std::find(Leaves.begin(), Leaves.end(), + AddedLeaves.front().get()) == Leaves.end()); + // Check that another leaves #1...#7 that should not be removed are present in + // buffer + for (std::size_t i = 1; i < AddedLeaves.size(); ++i) { + assert(std::find(Leaves.begin(), Leaves.end(), AddedLeaves[i].get()) != + Leaves.end()); + } + + // Check NewLeaf->EmptyCmd/ConnectCmd->OldLeaf structure + MockCommand *OldestLeaf = AddedLeaves.front().get(); + MockCommand *NewestLeaf = AddedLeaves.back().get(); + // The only user for oldLeaf must be ConnectCmd + ASSERT_EQ(OldestLeaf->MUsers.size(), 1U); + // No direct connection between OldLeaf and newLeaf, only via ConnectCmd + EXPECT_EQ(OldestLeaf->MUsers.count(NewestLeaf), 0U); + // 2 deps for NewLeaf: 1 dep command and connect cmd - no OldLeaf direct + // dependency + ASSERT_EQ(NewestLeaf->MDeps.size(), 2U); + EXPECT_FALSE(std::any_of( + NewestLeaf->MDeps.begin(), NewestLeaf->MDeps.end(), + [&](const detail::DepDesc &DD) { return DD.MDepCommand == OldestLeaf; })); + // Check NewLeaf dependencies in depth by MUsers + auto ConnectCmdIt = OldestLeaf->MUsers.begin(); + ASSERT_EQ((*ConnectCmdIt)->MUsers.size(), 1U); + auto EmptyCmdIt = (*ConnectCmdIt)->MUsers.begin(); + EXPECT_TRUE(std::any_of(NewestLeaf->MDeps.begin(), NewestLeaf->MDeps.end(), + [&](const detail::DepDesc &DD) { + return DD.MDepCommand == (*EmptyCmdIt); + })); + // ConnectCmd is created internally in scheduler and not a mock object + // This fact leads to active scheduler shutdown process that deletes a + // part of commands for record we store in AddedLeaves vector. + // We abort this process by removing record to avoid double release or + // or memory leaks + MS.removeRecordForMemObj(MockReq.MSYCLMemObj); +}