Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
}
case UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP: {
// L0 doesn't support enqueueing native work through the urNativeEnqueueExp
return ReturnValue(static_cast<ur_bool_t>(true));
return ReturnValue(static_cast<ur_bool_t>(false));
}

case UR_DEVICE_INFO_ESIMD_SUPPORT: {
Expand Down
69 changes: 4 additions & 65 deletions source/adapters/level_zero/enqueue_native.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,72 +10,11 @@

#include <ur_api.h>

#include "logger/ur_logger.hpp"
#include "queue.hpp"
#include "ur_level_zero.hpp"

ur_result_t ur_queue_handle_legacy_t_::enqueueNativeCommandExp(
ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data,
uint32_t, const ur_mem_handle_t *,
const ur_exp_enqueue_native_command_properties_t *,
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventList,
ur_event_handle_t *phEvent) {
auto Queue = this;
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);

bool UseCopyEngine = false;

// Please note that the following code should be run before the
// subsequent getAvailableCommandList() call so that there is no
// dead-lock from waiting unsubmitted events in an open batch.
// The createAndRetainUrZeEventList() has the proper side-effect
// of submitting batches with dependent events.
//
_ur_ze_event_list_t TmpWaitList;
UR_CALL(TmpWaitList.createAndRetainUrZeEventList(
NumEventsInWaitList, phEventList, Queue, UseCopyEngine));

// Get a new command list to be used on this call
ur_command_list_ptr_t CommandList{};
// TODO: Change UseCopyEngine argument to 'true' once L0 backend
// support is added
UR_CALL(Queue->Context->getAvailableCommandList(
Queue, CommandList, UseCopyEngine, NumEventsInWaitList, phEventList));

// TODO: do we need to create a unique command type for this?
ze_event_handle_t ZeEvent = nullptr;
ur_event_handle_t InternalEvent;
bool IsInternal = phEvent == nullptr;
ur_event_handle_t *Event = phEvent ? phEvent : &InternalEvent;
UR_CALL(createEventAndAssociateQueue(Queue, Event,
UR_COMMAND_ENQUEUE_NATIVE_EXP,
CommandList, IsInternal, false));
ZeEvent = (*Event)->ZeEvent;
(*Event)->WaitList = TmpWaitList;

const auto &WaitList = (*Event)->WaitList;
if (WaitList.Length) {
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(CommandList->first, WaitList.Length, WaitList.ZeEventList));
}

UR_CALL(Queue->executeCommandList(CommandList, false, false));
UR_CALL(Queue->Context->getAvailableCommandList(Queue, CommandList,
UseCopyEngine, 0, nullptr));

{
ScopedCommandList Active{Queue, CommandList->first};

// Call interop func which enqueues native async work
pfnNativeEnqueue(Queue, data);
}

UR_CALL(Queue->executeCommandList(CommandList, false, false));
UR_CALL(Queue->Context->getAvailableCommandList(Queue, CommandList,
UseCopyEngine, 0, nullptr));

ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandList->first, ZeEvent));

UR_CALL(Queue->executeCommandList(CommandList, false));
return UR_RESULT_SUCCESS;
ur_exp_enqueue_native_command_function_t, void *, uint32_t,
const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *,
uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
9 changes: 0 additions & 9 deletions source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -705,15 +705,6 @@ ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle(
) {
auto Queue = this;

// Needed for EnqueueNativeCommandExp, so that the native queue 'got' in the
// interop func is the as the native queue used to manage dependencies
// before the interop func invocation
if (Queue->getThreadLocalCommandList() != ze_command_list_handle_t{0}) {
auto ZeCmdList = ur_cast<ze_command_list_handle_t *>(NativeQueue);
*ZeCmdList = Queue->getThreadLocalCommandList();
return UR_RESULT_SUCCESS;
}

// Lock automatically releases when this goes out of scope.
std::shared_lock<ur_shared_mutex> lock(Queue->Mutex);

Expand Down
26 changes: 0 additions & 26 deletions source/adapters/level_zero/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,12 +423,6 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ {
uint32_t, const ur_event_handle_t *,
ur_event_handle_t *) override;

// Thread local stream will be used if ScopedStream is active
static ze_command_list_handle_t &getThreadLocalCommandList() {
static thread_local ze_command_list_handle_t CommandList{0};
return CommandList;
}

using queue_type = ur_device_handle_t_::queue_group_info_t::type;
// PI queue is in general a one to many mapping to L0 native queues.
struct ur_queue_group_t {
Expand Down Expand Up @@ -947,23 +941,3 @@ ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine,
ur_result_t CleanupEventListFromResetCmdList(
std::vector<ur_event_handle_t> &EventListToCleanup,
bool QueueLocked = false);

// RAII object to make hQueue command list getter methods all return the same
// command list within the lifetime of this object.
//
// This is useful for urEnqueueNativeCommandExp where we want guarantees that
// the user submitted native calls will be dispatched to a known command list,
// which must be "got" within the user submitted fuction.
class ScopedCommandList {
ur_queue_handle_legacy_t hQueue;

public:
ScopedCommandList(ur_queue_handle_legacy_t hQueue,
ze_command_list_handle_t CommandList)
: hQueue{hQueue} {
hQueue->getThreadLocalCommandList() = CommandList;
}
~ScopedCommandList() {
hQueue->getThreadLocalCommandList() = ze_command_list_handle_t{0};
}
};
17 changes: 0 additions & 17 deletions test/conformance/exp_enqueue_native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,4 @@ if (UR_BUILD_ADAPTER_CUDA)
target_link_libraries(test-exp_enqueue_native PRIVATE cudadrv)
endif()

if (UR_BUILD_ADAPTER_L0)
add_conformance_test_with_kernels_environment(
exp_enqueue_native
enqueue_native_level_zero.cpp
)
target_link_libraries(test-exp_enqueue_native PRIVATE
LevelZeroLoader
LevelZeroLoader-Headers
)

target_include_directories(test-exp_enqueue_native PRIVATE
${PROJECT_SOURCE_DIR}/source
${PROJECT_SOURCE_DIR}/source/adapters/level_zero
LevelZeroLoader-Headers
)
endif()

# TODO: Add more tests for different triples
128 changes: 0 additions & 128 deletions test/conformance/exp_enqueue_native/enqueue_native_level_zero.cpp

This file was deleted.

This file was deleted.