From 66ea060028d99615e5be841652ae586b08d3c9a5 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Wed, 2 Oct 2024 23:43:23 +0000 Subject: [PATCH 1/2] [CTS] extract queue and device creation logic from urMultiQueueLaunchMemcpyTest to a separate fixture --- test/conformance/enqueue/helpers.h | 44 +++++++++++++ .../urEnqueueKernelLaunchAndMemcpyInOrder.cpp | 65 +++++-------------- 2 files changed, 60 insertions(+), 49 deletions(-) diff --git a/test/conformance/enqueue/helpers.h b/test/conformance/enqueue/helpers.h index ef91a385d9..2cdf383586 100644 --- a/test/conformance/enqueue/helpers.h +++ b/test/conformance/enqueue/helpers.h @@ -154,6 +154,50 @@ printFillTestString(const testing::TestParamInfo &info) { return test_name.str(); } +struct urMultiQueueMultiDeviceTest : uur::urMultiDeviceContextTestTemplate<1> { + void initQueues(std::vector srcDevices, + size_t numDuplicate) { + for (size_t i = 0; i < numDuplicate; i++) { + devices.insert(devices.end(), srcDevices.begin(), srcDevices.end()); + } + + for (auto &device : devices) { + ur_queue_handle_t queue = nullptr; + ASSERT_SUCCESS(urQueueCreate(context, device, nullptr, &queue)); + queues.push_back(queue); + } + } + + // Default implementation that uses all available devices + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + uur::urMultiDeviceContextTestTemplate<1>::SetUp()); + initQueues(uur::KernelsEnvironment::instance->devices, 1); + } + + // Specialized implementation that duplicates all devices and queues + void SetUp(std::vector srcDevices, + size_t numDuplicate) { + UUR_RETURN_ON_FATAL_FAILURE( + uur::urMultiDeviceContextTestTemplate<1>::SetUp()); + initQueues(srcDevices, numDuplicate); + } + + void TearDown() override { + for (auto &queue : queues) { + EXPECT_SUCCESS(urQueueRelease(queue)); + } + UUR_RETURN_ON_FATAL_FAILURE( + uur::urMultiDeviceContextTestTemplate<1>::TearDown()); + } + std::function, + std::vector>(void)> + makeQueues; + + std::vector devices; + std::vector queues; +}; + } // namespace uur #endif // UUR_ENQUEUE_RECT_HELPERS_H_INCLUDED diff --git a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp index f8ed6d4f42..d662abaafd 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp @@ -3,6 +3,8 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "helpers.h" + #include #include @@ -13,27 +15,23 @@ std::tuple minL0DriverVersion = {1, 3, 29534}; template -struct urMultiQueueLaunchMemcpyTest : uur::urMultiDeviceContextTestTemplate<1>, +struct urMultiQueueLaunchMemcpyTest : uur::urMultiQueueMultiDeviceTest, testing::WithParamInterface { std::string KernelName; std::vector programs; std::vector kernels; std::vector SharedMem; - std::vector queues; - std::vector devices; - - std::function createQueues; - static constexpr char ProgramName[] = "increment"; static constexpr size_t ArraySize = 100; static constexpr size_t InitialValue = 1; - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE( - uur::urMultiDeviceContextTestTemplate<1>::SetUp()); + void SetUp() override { throw std::runtime_error("Not implemented"); } - createQueues(); + void SetUp(std::vector srcDevices, + size_t duplicateDevices) { + UUR_RETURN_ON_FATAL_FAILURE(uur::urMultiQueueMultiDeviceTest::SetUp( + srcDevices, duplicateDevices)); for (auto &device : devices) { SKIP_IF_DRIVER_TOO_OLD("Level-Zero", minL0DriverVersion, platform, @@ -87,9 +85,6 @@ struct urMultiQueueLaunchMemcpyTest : uur::urMultiDeviceContextTestTemplate<1>, for (auto &Ptr : SharedMem) { urUSMFree(context, Ptr); } - for (const auto &queue : queues) { - EXPECT_SUCCESS(urQueueRelease(queue)); - } for (const auto &kernel : kernels) { urKernelRelease(kernel); } @@ -136,23 +131,8 @@ struct urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam using urMultiQueueLaunchMemcpyTest::SharedMem; void SetUp() override { - this->createQueues = [&] { - for (size_t i = 0; i < duplicateDevices; i++) { - devices.insert( - devices.end(), - uur::KernelsEnvironment::instance->devices.begin(), - uur::KernelsEnvironment::instance->devices.end()); - } - - for (auto &device : devices) { - ur_queue_handle_t queue = nullptr; - ASSERT_SUCCESS(urQueueCreate(context, device, 0, &queue)); - queues.push_back(queue); - } - }; - - UUR_RETURN_ON_FATAL_FAILURE( - urMultiQueueLaunchMemcpyTest::SetUp()); + UUR_RETURN_ON_FATAL_FAILURE(urMultiQueueLaunchMemcpyTest::SetUp( + uur::KernelsEnvironment::instance->devices, duplicateDevices)); } void TearDown() override { @@ -166,8 +146,6 @@ struct urEnqueueKernelLaunchIncrementTest std::tuple> { static constexpr size_t numOps = 50; - ur_queue_handle_t queue; - using Param = std::tuple; using urMultiQueueLaunchMemcpyTest::context; using urMultiQueueLaunchMemcpyTest::queues; @@ -176,26 +154,12 @@ struct urEnqueueKernelLaunchIncrementTest using urMultiQueueLaunchMemcpyTest::SharedMem; void SetUp() override { - auto device = std::get<0>(GetParam()); - - this->createQueues = [&] { - ASSERT_SUCCESS(urQueueCreate(context, device, 0, &queue)); - - // use the same queue and device for all operations - for (size_t i = 0; i < numOps; i++) { - urQueueRetain(queue); - - queues.push_back(queue); - devices.push_back(device); - } - }; - - UUR_RETURN_ON_FATAL_FAILURE( - urMultiQueueLaunchMemcpyTest::SetUp()); + UUR_RETURN_ON_FATAL_FAILURE(urMultiQueueLaunchMemcpyTest::SetUp( + std::vector{std::get<0>(GetParam())}, + numOps)); // Use single device, duplicated numOps times } void TearDown() override { - urQueueRelease(queue); UUR_RETURN_ON_FATAL_FAILURE( urMultiQueueLaunchMemcpyTest::TearDown()); } @@ -219,6 +183,9 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) { ur_event_handle_t *kernelEvent = nullptr; ur_event_handle_t *memcpyEvent = nullptr; + // This is a single device test + auto queue = queues[0]; + for (size_t i = 0; i < numOps; i++) { if (useEvents) { lastMemcpyEvent = memcpyEvent; From 2c60671cd435ac13c7d9f2767356703f4236bbd2 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Wed, 2 Oct 2024 23:53:14 +0000 Subject: [PATCH 2/2] [CTS] extend tests for urEnqueueEventsWait --- test/conformance/enqueue/CMakeLists.txt | 1 + .../enqueue/enqueue_adapter_native_cpu.match | 9 + .../urEnqueueEventsWaitMultiDevice.cpp | 218 ++++++++++++++++++ 3 files changed, 228 insertions(+) create mode 100644 test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp diff --git a/test/conformance/enqueue/CMakeLists.txt b/test/conformance/enqueue/CMakeLists.txt index 1e19658dac..5856d24257 100644 --- a/test/conformance/enqueue/CMakeLists.txt +++ b/test/conformance/enqueue/CMakeLists.txt @@ -7,6 +7,7 @@ add_conformance_test_with_kernels_environment(enqueue urEnqueueDeviceGlobalVariableRead.cpp urEnqueueDeviceGlobalVariableWrite.cpp urEnqueueEventsWait.cpp + urEnqueueEventsWaitMultiDevice.cpp urEnqueueEventsWaitWithBarrier.cpp urEnqueueKernelLaunch.cpp urEnqueueKernelLaunchAndMemcpyInOrder.cpp diff --git a/test/conformance/enqueue/enqueue_adapter_native_cpu.match b/test/conformance/enqueue/enqueue_adapter_native_cpu.match index a7d6797f94..dff8beac66 100644 --- a/test/conformance/enqueue/enqueue_adapter_native_cpu.match +++ b/test/conformance/enqueue/enqueue_adapter_native_cpu.match @@ -1,4 +1,7 @@ {{NONDETERMINISTIC}} +{{OPT}}urEnqueueEventsWaitMultiDeviceTest.EmptyWaitList +{{OPT}}urEnqueueEventsWaitMultiDeviceTest.EmptyWaitListWithEvent +{{OPT}}urEnqueueEventsWaitMultiDeviceTest.EnqueueWaitOnADifferentQueue {{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} @@ -16,6 +19,12 @@ {{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitInvalidEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueEventsWaitTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueEventsWaitTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitSingleQueueMultiOps/MultiThread +{{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitSingleQueueMultiOps/NoMultiThread +{{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueues/MultiThread +{{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueues/NoMultiThread +{{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueuesCommonDependency/MultiThread +{{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueuesCommonDependency/NoMultiThread {{OPT}}urEnqueueEventsWaitWithBarrierTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueEventsWaitWithBarrierTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urEnqueueKernelLaunchTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp b/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp new file mode 100644 index 0000000000..1e281b0632 --- /dev/null +++ b/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp @@ -0,0 +1,218 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "helpers.h" + +#include + +#include +#include + +struct urEnqueueEventsWaitMultiDeviceTest : uur::urMultiQueueMultiDeviceTest { + void SetUp() override { SetUp(2); /* we need at least 2 devices */ } + + void SetUp(size_t numDuplicateDevices) { + UUR_RETURN_ON_FATAL_FAILURE(uur::urMultiQueueMultiDeviceTest::SetUp( + uur::KernelsEnvironment::instance->devices, numDuplicateDevices)); + + for (auto device : devices) { + ur_device_usm_access_capability_flags_t shared_usm_single = 0; + EXPECT_SUCCESS(uur::GetDeviceUSMSingleSharedSupport( + device, shared_usm_single)); + if (!shared_usm_single) { + GTEST_SKIP() << "Shared USM is not supported by the device."; + } + } + + ptrs.resize(devices.size()); + for (size_t i = 0; i < devices.size(); i++) { + EXPECT_SUCCESS(urUSMSharedAlloc(context, devices[i], nullptr, + nullptr, size, &ptrs[i])); + } + } + + void TearDown() override { + for (auto ptr : ptrs) { + if (ptr) { + EXPECT_SUCCESS(urUSMFree(context, ptr)); + } + } + UUR_RETURN_ON_FATAL_FAILURE( + uur::urMultiQueueMultiDeviceTest::TearDown()); + } + + void initData() { + EXPECT_SUCCESS(urEnqueueUSMFill(queues[0], ptrs[0], sizeof(pattern), + &pattern, size, 0, nullptr, nullptr)); + EXPECT_SUCCESS(urQueueFinish(queues[0])); + } + + void verifyData(void *ptr, uint32_t pattern) { + for (size_t i = 0; i < count; i++) { + ASSERT_EQ(reinterpret_cast(ptr)[i], pattern); + } + } + + uint32_t pattern = 42; + const size_t count = 1024; + const size_t size = sizeof(uint32_t) * count; + + std::vector ptrs; +}; + +TEST_F(urEnqueueEventsWaitMultiDeviceTest, EmptyWaitList) { + initData(); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queues[0], false, ptrs[1], ptrs[0], size, + 0, nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queues[0])); + + verifyData(ptrs[1], pattern); +} + +TEST_F(urEnqueueEventsWaitMultiDeviceTest, EmptyWaitListWithEvent) { + initData(); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queues[0], false, ptrs[1], ptrs[0], size, + 0, nullptr, nullptr)); + + uur::raii::Event event; + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, event.ptr())); + ASSERT_SUCCESS(urEventWait(1, event.ptr())); + + verifyData(ptrs[1], pattern); +} + +TEST_F(urEnqueueEventsWaitMultiDeviceTest, EnqueueWaitOnADifferentQueue) { + initData(); + + uur::raii::Event event; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queues[0], false, ptrs[1], ptrs[0], size, + 0, nullptr, event.ptr())); + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], 1, event.ptr(), nullptr)); + ASSERT_SUCCESS(urQueueFinish(queues[0])); + + verifyData(ptrs[1], pattern); +} + +struct urEnqueueEventsWaitMultiDeviceMTTest + : urEnqueueEventsWaitMultiDeviceTest, + testing::WithParamInterface { + void doComputation(std::function work) { + auto multiThread = GetParam().value; + std::vector threads; + for (size_t i = 0; i < devices.size(); i++) { + if (multiThread) { + threads.emplace_back(work, i); + } else { + work(i); + } + } + for (auto &thread : threads) { + thread.join(); + } + } + + void SetUp() override { + const size_t numDuplicateDevices = 8; + UUR_RETURN_ON_FATAL_FAILURE( + urEnqueueEventsWaitMultiDeviceTest::SetUp(numDuplicateDevices)); + } + + void TearDown() override { urEnqueueEventsWaitMultiDeviceTest::TearDown(); } +}; + +template +inline std::string +printParams(const testing::TestParamInfo &info) { + std::stringstream ss; + + auto param1 = info.param; + ss << (param1.value ? "" : "No") << param1.name; + + return ss.str(); +} + +INSTANTIATE_TEST_SUITE_P( + , urEnqueueEventsWaitMultiDeviceMTTest, + testing::ValuesIn(uur::BoolTestParam::makeBoolParam("MultiThread")), + printParams); + +TEST_P(urEnqueueEventsWaitMultiDeviceMTTest, EnqueueWaitSingleQueueMultiOps) { + std::vector data(count, pattern); + + auto work = [this, &data](size_t i) { + ASSERT_SUCCESS(urEnqueueUSMMemcpy( + queues[0], false, ptrs[i], data.data(), size, 0, nullptr, nullptr)); + }; + + doComputation(work); + + auto verify = [this](size_t i) { + uur::raii::Event event; + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, event.ptr())); + ASSERT_SUCCESS(urEventWait(1, event.ptr())); + + verifyData(ptrs[i], pattern); + }; + + doComputation(verify); +} + +TEST_P(urEnqueueEventsWaitMultiDeviceMTTest, EnqueueWaitOnAllQueues) { + std::vector eventsRaii(devices.size()); + std::vector events(devices.size()); + auto work = [this, &events, &eventsRaii](size_t i) { + ASSERT_SUCCESS(urEnqueueUSMFill(queues[i], ptrs[i], sizeof(pattern), + &pattern, size, 0, nullptr, + eventsRaii[i].ptr())); + events[i] = eventsRaii[i].get(); + }; + + doComputation(work); + + uur::raii::Event gatherEvent; + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], devices.size(), events.data(), + gatherEvent.ptr())); + ASSERT_SUCCESS(urEventWait(1, gatherEvent.ptr())); + + for (size_t i = 0; i < devices.size(); i++) { + verifyData(ptrs[i], pattern); + } +} + +TEST_P(urEnqueueEventsWaitMultiDeviceMTTest, + EnqueueWaitOnAllQueuesCommonDependency) { + uur::raii::Event event; + ASSERT_SUCCESS(urEnqueueUSMFill(queues[0], ptrs[0], sizeof(pattern), + &pattern, size, 0, nullptr, event.ptr())); + + std::vector perQueueEvents(devices.size()); + std::vector eventHandles(devices.size()); + auto work = [this, &event, &perQueueEvents, &eventHandles](size_t i) { + ASSERT_SUCCESS(urEnqueueEventsWait(queues[i], 1, event.ptr(), + perQueueEvents[i].ptr())); + eventHandles[i] = perQueueEvents[i].get(); + }; + + doComputation(work); + + uur::raii::Event hGatherEvent; + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], eventHandles.size(), + eventHandles.data(), + hGatherEvent.ptr())); + ASSERT_SUCCESS(urEventWait(1, hGatherEvent.ptr())); + + for (auto &event : eventHandles) { + ur_event_status_t status; + ASSERT_SUCCESS( + urEventGetInfo(event, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS, + sizeof(ur_event_status_t), &status, nullptr)); + ASSERT_EQ(status, UR_EVENT_STATUS_COMPLETE); + } + + verifyData(ptrs[0], pattern); +}