diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 7322d84f61..9e832bbb9a 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -877,7 +877,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } case UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP: { // L0 doesn't support enqueueing native work through the urNativeEnqueueExp - return ReturnValue(static_cast(true)); + return ReturnValue(static_cast(false)); } case UR_DEVICE_INFO_ESIMD_SUPPORT: { diff --git a/source/adapters/level_zero/enqueue_native.cpp b/source/adapters/level_zero/enqueue_native.cpp index cc646a2cc2..b67cccc4f1 100644 --- a/source/adapters/level_zero/enqueue_native.cpp +++ b/source/adapters/level_zero/enqueue_native.cpp @@ -10,72 +10,11 @@ #include -#include "logger/ur_logger.hpp" #include "queue.hpp" -#include "ur_level_zero.hpp" ur_result_t ur_queue_handle_legacy_t_::enqueueNativeCommandExp( - ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, - uint32_t, const ur_mem_handle_t *, - const ur_exp_enqueue_native_command_properties_t *, - uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventList, - ur_event_handle_t *phEvent) { - auto Queue = this; - std::scoped_lock lock(Queue->Mutex); - - bool UseCopyEngine = false; - - // Please note that the following code should be run before the - // subsequent getAvailableCommandList() call so that there is no - // dead-lock from waiting unsubmitted events in an open batch. - // The createAndRetainUrZeEventList() has the proper side-effect - // of submitting batches with dependent events. - // - _ur_ze_event_list_t TmpWaitList; - UR_CALL(TmpWaitList.createAndRetainUrZeEventList( - NumEventsInWaitList, phEventList, Queue, UseCopyEngine)); - - // Get a new command list to be used on this call - ur_command_list_ptr_t CommandList{}; - // TODO: Change UseCopyEngine argument to 'true' once L0 backend - // support is added - UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, UseCopyEngine, NumEventsInWaitList, phEventList)); - - // TODO: do we need to create a unique command type for this? - ze_event_handle_t ZeEvent = nullptr; - ur_event_handle_t InternalEvent; - bool IsInternal = phEvent == nullptr; - ur_event_handle_t *Event = phEvent ? phEvent : &InternalEvent; - UR_CALL(createEventAndAssociateQueue(Queue, Event, - UR_COMMAND_ENQUEUE_NATIVE_EXP, - CommandList, IsInternal, false)); - ZeEvent = (*Event)->ZeEvent; - (*Event)->WaitList = TmpWaitList; - - const auto &WaitList = (*Event)->WaitList; - if (WaitList.Length) { - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (CommandList->first, WaitList.Length, WaitList.ZeEventList)); - } - - UR_CALL(Queue->executeCommandList(CommandList, false, false)); - UR_CALL(Queue->Context->getAvailableCommandList(Queue, CommandList, - UseCopyEngine, 0, nullptr)); - - { - ScopedCommandList Active{Queue, CommandList->first}; - - // Call interop func which enqueues native async work - pfnNativeEnqueue(Queue, data); - } - - UR_CALL(Queue->executeCommandList(CommandList, false, false)); - UR_CALL(Queue->Context->getAvailableCommandList(Queue, CommandList, - UseCopyEngine, 0, nullptr)); - - ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandList->first, ZeEvent)); - - UR_CALL(Queue->executeCommandList(CommandList, false)); - return UR_RESULT_SUCCESS; + ur_exp_enqueue_native_command_function_t, void *, uint32_t, + const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *, + uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 8eccada361..f467447753 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -705,15 +705,6 @@ ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle( ) { auto Queue = this; - // Needed for EnqueueNativeCommandExp, so that the native queue 'got' in the - // interop func is the as the native queue used to manage dependencies - // before the interop func invocation - if (Queue->getThreadLocalCommandList() != ze_command_list_handle_t{0}) { - auto ZeCmdList = ur_cast(NativeQueue); - *ZeCmdList = Queue->getThreadLocalCommandList(); - return UR_RESULT_SUCCESS; - } - // Lock automatically releases when this goes out of scope. std::shared_lock lock(Queue->Mutex); diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 4ac85d285d..97ddcf014c 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -423,12 +423,6 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { uint32_t, const ur_event_handle_t *, ur_event_handle_t *) override; - // Thread local stream will be used if ScopedStream is active - static ze_command_list_handle_t &getThreadLocalCommandList() { - static thread_local ze_command_list_handle_t CommandList{0}; - return CommandList; - } - using queue_type = ur_device_handle_t_::queue_group_info_t::type; // PI queue is in general a one to many mapping to L0 native queues. struct ur_queue_group_t { @@ -947,23 +941,3 @@ ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, ur_result_t CleanupEventListFromResetCmdList( std::vector &EventListToCleanup, bool QueueLocked = false); - -// RAII object to make hQueue command list getter methods all return the same -// command list within the lifetime of this object. -// -// This is useful for urEnqueueNativeCommandExp where we want guarantees that -// the user submitted native calls will be dispatched to a known command list, -// which must be "got" within the user submitted fuction. -class ScopedCommandList { - ur_queue_handle_legacy_t hQueue; - -public: - ScopedCommandList(ur_queue_handle_legacy_t hQueue, - ze_command_list_handle_t CommandList) - : hQueue{hQueue} { - hQueue->getThreadLocalCommandList() = CommandList; - } - ~ScopedCommandList() { - hQueue->getThreadLocalCommandList() = ze_command_list_handle_t{0}; - } -}; diff --git a/test/conformance/exp_enqueue_native/CMakeLists.txt b/test/conformance/exp_enqueue_native/CMakeLists.txt index 8769cf716b..8638fa1349 100644 --- a/test/conformance/exp_enqueue_native/CMakeLists.txt +++ b/test/conformance/exp_enqueue_native/CMakeLists.txt @@ -15,21 +15,4 @@ if (UR_BUILD_ADAPTER_CUDA) target_link_libraries(test-exp_enqueue_native PRIVATE cudadrv) endif() -if (UR_BUILD_ADAPTER_L0) - add_conformance_test_with_kernels_environment( - exp_enqueue_native - enqueue_native_level_zero.cpp - ) - target_link_libraries(test-exp_enqueue_native PRIVATE - LevelZeroLoader - LevelZeroLoader-Headers - ) - - target_include_directories(test-exp_enqueue_native PRIVATE - ${PROJECT_SOURCE_DIR}/source - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero - LevelZeroLoader-Headers - ) -endif() - # TODO: Add more tests for different triples diff --git a/test/conformance/exp_enqueue_native/enqueue_native_level_zero.cpp b/test/conformance/exp_enqueue_native/enqueue_native_level_zero.cpp deleted file mode 100644 index 75dacebddb..0000000000 --- a/test/conformance/exp_enqueue_native/enqueue_native_level_zero.cpp +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -// See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "ze_api.h" - -#include -#include - -using T = uint32_t; - -struct urLevelZeroEnqueueNativeCommandTest : uur::urQueueTest { - void SetUp() { - UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp()); - - host_vec = std::vector(global_size, 0); - ASSERT_EQ(host_vec.size(), global_size); - ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, - allocation_size, &device_ptr)); - ASSERT_NE(device_ptr, nullptr); - } - static constexpr T val = 42; - static constexpr uint32_t global_size = 1e7; - std::vector host_vec; - void *device_ptr = nullptr; - static constexpr size_t allocation_size = sizeof(val) * global_size; -}; - -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urLevelZeroEnqueueNativeCommandTest); - -struct InteropData1 { - void *fill_ptr; -}; - -// Fill a device ptr with the pattern val -void interop_func_1(ur_queue_handle_t hQueue, void *data) { - ze_command_list_handle_t CommandList; - ASSERT_SUCCESS(urQueueGetNativeHandle(hQueue, nullptr, - (ur_native_handle_t *)&CommandList)); - InteropData1 *func_data = reinterpret_cast(data); - - // If L0 interop becomes a real use case we should make a new UR entry point - // to propagate events into and out of the the interop func. - zeCommandListAppendMemoryFill( - CommandList, func_data->fill_ptr, - &urLevelZeroEnqueueNativeCommandTest::val, - sizeof(urLevelZeroEnqueueNativeCommandTest::val), - urLevelZeroEnqueueNativeCommandTest::allocation_size, nullptr, 0, - nullptr); -} - -struct InteropData2 { - void *from, *to; -}; - -// Read from device ptr to host ptr -void interop_func_2(ur_queue_handle_t hQueue, void *data) { - ze_command_list_handle_t CommandList; - ASSERT_SUCCESS(urQueueGetNativeHandle(hQueue, nullptr, - (ur_native_handle_t *)&CommandList)); - InteropData2 *func_data = reinterpret_cast(data); - - // If L0 interop becomes a real use case we should make a new UR entry point - // to propagate events into and out of the the interop func. - zeCommandListAppendMemoryCopy( - CommandList, func_data->to, func_data->from, - urLevelZeroEnqueueNativeCommandTest::allocation_size, nullptr, 0, - nullptr); -} - -TEST_P(urLevelZeroEnqueueNativeCommandTest, Success) { - InteropData1 data_1{device_ptr}; - ur_event_handle_t event_1; - ASSERT_SUCCESS(urEnqueueNativeCommandExp( - queue, &interop_func_1, &data_1, 0, nullptr /*phMemList=*/, - nullptr /*pProperties=*/, 0, nullptr /*phEventWaitList=*/, &event_1)); -} - -TEST_P(urLevelZeroEnqueueNativeCommandTest, Dependencies) { - ur_event_handle_t event_1, event_2; - - InteropData1 data_1{device_ptr}; - ASSERT_SUCCESS(urEnqueueNativeCommandExp( - queue, &interop_func_1, &data_1, 0, nullptr /*phMemList=*/, - nullptr /*pProperties=*/, 0, nullptr /*phEventWaitList=*/, &event_1)); - - InteropData2 data_2{device_ptr, host_vec.data()}; - ASSERT_SUCCESS(urEnqueueNativeCommandExp( - queue, &interop_func_2, &data_2, 0, nullptr /*phMemList=*/, - nullptr /*pProperties=*/, 1, &event_1, &event_2)); - urQueueFinish(queue); - for (auto &i : host_vec) { - ASSERT_EQ(i, val); - } -} - -TEST_P(urLevelZeroEnqueueNativeCommandTest, DependenciesURBefore) { - ur_event_handle_t event_1, event_2; - - ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptr, sizeof(val), &val, - allocation_size, 0, - nullptr /*phEventWaitList=*/, &event_1)); - - InteropData2 data_2{device_ptr, host_vec.data()}; - ASSERT_SUCCESS(urEnqueueNativeCommandExp( - queue, &interop_func_2, &data_2, 0, nullptr /*phMemList=*/, - nullptr /*pProperties=*/, 1, &event_1, &event_2)); - urQueueFinish(queue); - for (auto &i : host_vec) { - ASSERT_EQ(i, val); - } -} - -TEST_P(urLevelZeroEnqueueNativeCommandTest, DependenciesURAfter) { - ur_event_handle_t event_1; - - InteropData1 data_1{device_ptr}; - ASSERT_SUCCESS(urEnqueueNativeCommandExp( - queue, &interop_func_1, &data_1, 0, nullptr /*phMemList=*/, - nullptr /*pProperties=*/, 0, nullptr /*phEventWaitList=*/, &event_1)); - - urEnqueueUSMMemcpy(queue, /*blocking*/ true, host_vec.data(), device_ptr, - allocation_size, 1, &event_1, nullptr); - for (auto &i : host_vec) { - ASSERT_EQ(i, val); - } -} diff --git a/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero-v2.match b/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero-v2.match deleted file mode 100644 index 2c9b3a0f8d..0000000000 --- a/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero-v2.match +++ /dev/null @@ -1,4 +0,0 @@ -urLevelZeroEnqueueNativeCommandTest.Success{{.*}} -urLevelZeroEnqueueNativeCommandTest.Dependencies{{.*}} -urLevelZeroEnqueueNativeCommandTest.DependenciesURBefore{{.*}} -urLevelZeroEnqueueNativeCommandTest.DependenciesURAfter{{.*}} diff --git a/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero.match b/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero.match deleted file mode 100644 index e69de29bb2..0000000000