Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ option(UR_BUILD_ADAPTER_CUDA "Build the CUDA adapter" OFF)
option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF)
option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF)
option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF)
option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF)
Copy link
Contributor Author

@igchor igchor Aug 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we should build UR_BUILD_ADAPTER_L0_V2 if UR_BUILD_ADAPTER_ALL is set (right now we don't) - it is pretty quite far from fully functional at this moment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, let's leave it off.

option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF)
Expand Down
2 changes: 1 addition & 1 deletion scripts/generate_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ def generate_level_zero_queue_api(path, section, namespace, tags, version, specs

name = "queue_api"
filename = "queue_api.cpp"
layer_dstpath = os.path.join(path, "adapters/level_zero")
layer_dstpath = os.path.join(path, "adapters", "level_zero", "v2")
os.makedirs(layer_dstpath, exist_ok=True)
fout = os.path.join(layer_dstpath, filename)

Expand Down
85 changes: 78 additions & 7 deletions source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,6 @@ add_ur_adapter(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/queue_api.hpp
${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp
${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_factory.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/context.cpp
Expand All @@ -136,9 +132,6 @@ add_ur_adapter(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp
)

if(NOT WIN32)
Expand Down Expand Up @@ -175,3 +168,81 @@ target_include_directories(${TARGET_NAME} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/../../"
LevelZeroLoader-Headers
)

if(UR_BUILD_ADAPTER_L0_V2)
add_ur_adapter(ur_adapter_level_zero_v2
SHARED
# sources shared with legacy adapter
${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp
${CMAKE_CURRENT_SOURCE_DIR}/device.hpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
# v2-only sources
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool_cache.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_counter.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool_cache.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_counter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
)

# api.cpp contains NOT_SUPPORTED functions-only
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
PROPERTIES APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-unused-parameter")

if(NOT WIN32)
target_sources(ur_adapter_level_zero_v2
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/adapter_lib_init_linux.cpp
)
endif()

# TODO: fix level_zero adapter conversion warnings
target_compile_options(ur_adapter_level_zero_v2 PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
)

set_target_properties(ur_adapter_level_zero_v2 PROPERTIES
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
SOVERSION "${PROJECT_VERSION_MAJOR}"
)

if (WIN32)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero_v2 PUBLIC /DEPENDENTLOADFLAG:0x800)
endif()

target_link_libraries(ur_adapter_level_zero_v2 PRIVATE
${PROJECT_NAME}::headers
${PROJECT_NAME}::common
${PROJECT_NAME}::umf
LevelZeroLoader
LevelZeroLoader-Headers
)

target_include_directories(ur_adapter_level_zero_v2 PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/../.."
LevelZeroLoader-Headers
)
endif()
4 changes: 1 addition & 3 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
#include "queue.hpp"
#include "ur_level_zero.hpp"

#include "v2/context.hpp"

UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
uint32_t DeviceCount, ///< [in] the number of devices given in phDevices
const ur_device_handle_t
Expand All @@ -38,7 +36,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
ZE2UR_CALL(zeContextCreate, (Platform->ZeDriver, &ContextDesc, &ZeContext));
try {
ur_context_handle_t_ *Context =
new v2::ur_context_handle_t_(ZeContext, DeviceCount, Devices, true);
new ur_context_handle_t_(ZeContext, DeviceCount, Devices, true);

Context->initialize();
*RetContext = reinterpret_cast<ur_context_handle_t>(Context);
Expand Down
8 changes: 0 additions & 8 deletions source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
#include "ur_util.hpp"
#include "ze_api.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will this allow for all the queue_legacy to be reverted?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I have a revert commit ready but it might be better to merge it separately: #1919


#include "v2/queue_factory.hpp"

// Hard limit for the event completion batches.
static const uint64_t CompletionBatchesMax = [] {
// Default value chosen empirically to maximize the number of asynchronous
Expand Down Expand Up @@ -501,12 +499,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(

UR_ASSERT(Context->isValidDevice(Device), UR_RESULT_ERROR_INVALID_DEVICE);

// optimized path for immediate, in-order command lists
if (v2::shouldUseQueueV2(Device, Flags)) {
*Queue = v2::createQueue(Context, Device, Props);
return UR_RESULT_SUCCESS;
}

// Create placeholder queues in the compute queue group.
// Actual L0 queues will be created at first use.
std::vector<ze_command_queue_handle_t> ZeComputeCommandQueues(
Expand Down
9 changes: 5 additions & 4 deletions source/adapters/level_zero/v2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

This is the home directory for L0 v2 adapter sources. This is a redesigned version of the L0 adapter that focuses on maximizing the performance of each queue mode individually (immediate/batched, in-order/out-of-order).

L0 v2 adapter can be enabled by setting `UR_L0_USE_QUEUE_V2=1` env variable. If the variable is not set, legacy path will be used.
L0 v2 adapter can be enabled by setting passing `UR_BUILD_ADAPTER_L0_V2=1` option to cmake. When enabled, `libur_adapter_level_zero_v2.[so|dll]` will be created.

# Code structure

v2 adapter only rewrites certain functions (mostly urEnqueue* functions) while reusing the rest. `ur_queue_handle_t` has become an abstract class and each enqueue function a virtual function.
v2 adapters is is a standalone adapter but reuses some logic from the legacy L0 adapter implementation - most notably: adapter.cpp, platform.cpp, device.cpp

Legacy enqeue path is implemented in `ur_queue_handle_legacy_t` which inherits from `ur_queue_handle_t`. For new, optimized path, each queue mode will be implemented as a separate queue class (e.g. `v2::ur_queue_immediate_in_order_t`) inheriting from `ur_queue_handle_t`.
Each queue mode will be implemented as a separate queue class (e.g. `v2::ur_queue_immediate_in_order_t`) inheriting from `ur_queue_handle_t` which is an abstract class
in v2 adapter.

`ur_queue_handle_t` is auto-generated by `make generate-code` - for every API function that accepts `ur_queue_handle_t` as a first parameter, new pure virtual method is created. The API function is then
auto-implemented (see ../queue_api.cpp) by dispatching to that virtual method. Developer is only responsbile for implementing that virtual function for every queue base class.
auto-implemented (see ./queue_api.cpp) by dispatching to that virtual method. Developer is only responsbile for implementing that virtual function for every queue base class.
Loading