Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ _PI_API(piextContextCreateWithNativeHandle)
_PI_API(piQueueCreate)
_PI_API(piQueueGetInfo)
_PI_API(piQueueFinish)
_PI_API(piQueueFlush)
_PI_API(piQueueRetain)
_PI_API(piQueueRelease)
_PI_API(piextQueueGetNativeHandle)
Expand Down
4 changes: 3 additions & 1 deletion sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
//
#include "CL/cl.h"
#define _PI_H_VERSION_MAJOR 5
#define _PI_H_VERSION_MINOR 7
#define _PI_H_VERSION_MINOR 8

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -1082,6 +1082,8 @@ __SYCL_EXPORT pi_result piQueueRelease(pi_queue command_queue);

__SYCL_EXPORT pi_result piQueueFinish(pi_queue command_queue);

__SYCL_EXPORT pi_result piQueueFlush(pi_queue command_queue);

/// Gets the native handle of a PI queue object.
///
/// \param queue is the PI queue to get the native handle of.
Expand Down
6 changes: 6 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2252,6 +2252,11 @@ pi_result cuda_piQueueFinish(pi_queue command_queue) {
return result;
}

// There is no CUDA counterpart for queue flushing and we don't run into the
// same problem of having to flush cross-queue dependencies as some of the
// other plugins, so it can be left as no-op.
pi_result cuda_piQueueFlush(pi_queue command_queue) { return PI_SUCCESS; }

/// Gets the native CUDA handle of a PI queue object
///
/// \param[in] queue The PI queue to get the native CUDA object of.
Expand Down Expand Up @@ -4885,6 +4890,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piQueueCreate, cuda_piQueueCreate)
_PI_CL(piQueueGetInfo, cuda_piQueueGetInfo)
_PI_CL(piQueueFinish, cuda_piQueueFinish)
_PI_CL(piQueueFlush, cuda_piQueueFlush)
_PI_CL(piQueueRetain, cuda_piQueueRetain)
_PI_CL(piQueueRelease, cuda_piQueueRelease)
_PI_CL(piextQueueGetNativeHandle, cuda_piextQueueGetNativeHandle)
Expand Down
6 changes: 6 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2202,6 +2202,11 @@ pi_result hip_piQueueFinish(pi_queue command_queue) {
return result;
}

// There is no HIP counterpart for queue flushing and we don't run into the
// same problem of having to flush cross-queue dependencies as some of the
// other plugins, so it can be left as no-op.
pi_result hip_piQueueFlush(pi_queue command_queue) { return PI_SUCCESS; }

/// Gets the native HIP handle of a PI queue object
///
/// \param[in] queue The PI queue to get the native HIP object of.
Expand Down Expand Up @@ -4820,6 +4825,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piQueueCreate, hip_piQueueCreate)
_PI_CL(piQueueGetInfo, hip_piQueueGetInfo)
_PI_CL(piQueueFinish, hip_piQueueFinish)
_PI_CL(piQueueFlush, hip_piQueueFlush)
_PI_CL(piQueueRetain, hip_piQueueRetain)
_PI_CL(piQueueRelease, hip_piQueueRelease)
_PI_CL(piextQueueGetNativeHandle, hip_piextQueueGetNativeHandle)
Expand Down
4 changes: 4 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2997,6 +2997,10 @@ pi_result piQueueFinish(pi_queue Queue) {
return PI_SUCCESS;
}

// Flushing cross-queue dependencies is covered by createAndRetainPiZeEventList,
// so this can be left as a no-op.
pi_result piQueueFlush(pi_queue Queue) { return PI_SUCCESS; }

pi_result piextQueueGetNativeHandle(pi_queue Queue,
pi_native_handle *NativeHandle) {
PI_ASSERT(Queue, PI_INVALID_QUEUE);
Expand Down
1 change: 1 addition & 0 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1377,6 +1377,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piQueueCreate, piQueueCreate)
_PI_CL(piQueueGetInfo, clGetCommandQueueInfo)
_PI_CL(piQueueFinish, clFinish)
_PI_CL(piQueueFlush, clFlush)
_PI_CL(piQueueRetain, clRetainCommandQueue)
_PI_CL(piQueueRelease, clReleaseCommandQueue)
_PI_CL(piextQueueGetNativeHandle, piextQueueGetNativeHandle)
Expand Down
38 changes: 35 additions & 3 deletions sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,12 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) {
MState = HES_NotComplete;
}

event_impl::event_impl() : MState(HES_Complete) {}
event_impl::event_impl() : MIsFlushed(true), MState(HES_Complete) {}

event_impl::event_impl(RT::PiEvent Event, const context &SyclContext)
: MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)),
MOpenCLInterop(true), MHostEvent(false), MState(HES_Complete) {
MOpenCLInterop(true), MHostEvent(false), MIsFlushed(true),
MState(HES_Complete) {

if (MContext->is_host()) {
throw cl::sycl::invalid_parameter_error(
Expand All @@ -120,7 +121,7 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext)
getPlugin().call<PiApiKind::piEventRetain>(MEvent);
}

event_impl::event_impl(QueueImplPtr Queue) {
event_impl::event_impl(QueueImplPtr Queue) : MQueue{Queue} {
if (Queue->is_host()) {
MState.store(HES_NotComplete);

Expand Down Expand Up @@ -344,6 +345,37 @@ std::vector<EventImplPtr> event_impl::getWaitList() {
return Result;
}

void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) {
assert(MEvent != nullptr);
if (MIsFlushed)
return;

Command *Cmd = static_cast<Command *>(getCommand());
assert(!Cmd || Cmd->getWorkerQueue() != nullptr);
QueueImplPtr Queue = Cmd ? Cmd->getWorkerQueue() : MQueue.lock();
// If the queue has been released, all of the commands have already been
// implicitly flushed by piQueueRelease.
if (!Queue) {
MIsFlushed = true;
return;
}
if (Queue == UserQueue)
return;

// Check if the task for this event has already been submitted.
pi_event_status Status = PI_EVENT_QUEUED;
getPlugin().call<PiApiKind::piEventGetInfo>(
MEvent, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(pi_int32), &Status,
nullptr);
if (Status != PI_EVENT_QUEUED) {
MIsFlushed = true;
return;
}

getPlugin().call<PiApiKind::piQueueFlush>(Queue->getHandleRef());
MIsFlushed = true;
}

void event_impl::cleanupDependencyEvents() {
std::lock_guard<std::mutex> Lock(MMutex);
MPreparedDepsEvents.clear();
Expand Down
10 changes: 10 additions & 0 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ class event_impl {
/// @return a vector of "immediate" dependencies for this event_impl.
std::vector<EventImplPtr> getWaitList();

/// Performs a flush on the queue associated with this event if the user queue
/// is different and the task associated with this event hasn't been submitted
/// to the device yet.
void flushIfNeeded(const QueueImplPtr &UserQueue);

/// Cleans dependencies of this event_impl
void cleanupDependencyEvents();

Expand All @@ -200,11 +205,16 @@ class event_impl {
bool MHostEvent = true;
std::unique_ptr<HostProfilingInfo> MHostProfilingInfo;
void *MCommand = nullptr;
std::weak_ptr<queue_impl> MQueue;

/// Dependency events prepared for waiting by backend.
std::vector<EventImplPtr> MPreparedDepsEvents;
std::vector<EventImplPtr> MPreparedHostDepsEvents;

/// Indicates that the task associated with this event has been submitted by
/// the queue to the device.
std::atomic<bool> MIsFlushed = false;

enum HostEventState : int { HES_NotComplete = 0, HES_Complete };

// State of host event. Employed only for host events and event with no
Expand Down
13 changes: 13 additions & 0 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,13 @@ getPiEvents(const std::vector<EventImplPtr> &EventImpls) {
return RetPiEvents;
}

static void flushCrossQueueDeps(const std::vector<EventImplPtr> &EventImpls,
const QueueImplPtr &Queue) {
for (auto &EventImpl : EventImpls) {
EventImpl->flushIfNeeded(Queue);
}
}

class DispatchHostTask {
ExecCGCommand *MThisCmd;
std::vector<interop_handle::ReqToMem> MReqToMem;
Expand Down Expand Up @@ -325,6 +332,7 @@ void Command::waitForEvents(QueueImplPtr Queue,
#endif

std::vector<RT::PiEvent> RawEvents = getPiEvents(EventImpls);
flushCrossQueueDeps(EventImpls, getWorkerQueue());
const detail::plugin &Plugin = Queue->getPlugin();
Plugin.call<PiApiKind::piEnqueueEventsWait>(
Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event);
Expand Down Expand Up @@ -1073,6 +1081,7 @@ cl_int MapMemObject::enqueueImp() {
waitForPreparedHostEvents();
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
std::vector<RT::PiEvent> RawEvents = getPiEvents(EventImpls);
flushCrossQueueDeps(EventImpls, getWorkerQueue());

RT::PiEvent &Event = MEvent->getHandleRef();
*MDstPtr = MemoryManager::map(
Expand Down Expand Up @@ -1150,6 +1159,7 @@ cl_int UnMapMemObject::enqueueImp() {
waitForPreparedHostEvents();
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
std::vector<RT::PiEvent> RawEvents = getPiEvents(EventImpls);
flushCrossQueueDeps(EventImpls, getWorkerQueue());

RT::PiEvent &Event = MEvent->getHandleRef();
MemoryManager::unmap(MDstAllocaCmd->getSYCLMemObj(),
Expand Down Expand Up @@ -1250,6 +1260,7 @@ cl_int MemCpyCommand::enqueueImp() {
RT::PiEvent &Event = MEvent->getHandleRef();

auto RawEvents = getPiEvents(EventImpls);
flushCrossQueueDeps(EventImpls, getWorkerQueue());

MemoryManager::copy(
MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(),
Expand Down Expand Up @@ -1400,6 +1411,7 @@ cl_int MemCpyCommandHost::enqueueImp() {
return CL_SUCCESS;
}

flushCrossQueueDeps(EventImpls, getWorkerQueue());
MemoryManager::copy(
MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(),
MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange,
Expand Down Expand Up @@ -1986,6 +1998,7 @@ cl_int ExecCGCommand::enqueueImp() {
waitForPreparedHostEvents();
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
auto RawEvents = getPiEvents(EventImpls);
flushCrossQueueDeps(EventImpls, getWorkerQueue());

RT::PiEvent &Event = MEvent->getHandleRef();

Expand Down
1 change: 1 addition & 0 deletions sycl/test/abi/pi_level_zero_symbol_check.dump
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ piProgramRelease
piProgramRetain
piQueueCreate
piQueueFinish
piQueueFlush
piQueueGetInfo
piQueueRelease
piQueueRetain
Expand Down
13 changes: 13 additions & 0 deletions sycl/unittests/helpers/CommonRedefinitions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,18 @@ inline pi_result redefinedEventsWaitCommon(pi_uint32 num_events,
return PI_SUCCESS;
}

inline pi_result redefinedEventGetInfoCommon(pi_event event,
pi_event_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret) {
if (param_name == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) {
auto *status = reinterpret_cast<pi_event_status *>(param_value);
*status = PI_EVENT_SUBMITTED;
}
return PI_SUCCESS;
}

inline pi_result redefinedEventReleaseCommon(pi_event event) {
if (event != nullptr)
delete reinterpret_cast<int *>(event);
Expand Down Expand Up @@ -166,6 +178,7 @@ inline void setupDefaultMockAPIs(sycl::unittest::PiMock &Mock) {
Mock.redefine<PiApiKind::piKernelSetExecInfo>(
redefinedKernelSetExecInfoCommon);
Mock.redefine<PiApiKind::piEventsWait>(redefinedEventsWaitCommon);
Mock.redefine<PiApiKind::piEventGetInfo>(redefinedEventGetInfoCommon);
Mock.redefine<PiApiKind::piEventRelease>(redefinedEventReleaseCommon);
Mock.redefine<PiApiKind::piEnqueueKernelLaunch>(
redefinedEnqueueKernelLaunchCommon);
Expand Down
1 change: 1 addition & 0 deletions sycl/unittests/scheduler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ add_sycl_unittest(SchedulerTests OBJECT
InOrderQueueHostTaskDeps.cpp
AllocaLinking.cpp
RequiredWGSize.cpp
QueueFlushing.cpp
utils.cpp
)
Loading