Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ add_ur_adapter(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_factory.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/latency_tracker.hpp
${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/context.cpp
Expand Down
3 changes: 3 additions & 0 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ struct ur_context_handle_t_ : _ur_object {

ur_context_handle_t_(ze_context_handle_t ZeContext) : ZeContext{ZeContext} {}

// Make sure this is virtual so that v2::context is appropriately destroyed
virtual ~ur_context_handle_t_() {}

// A L0 context handle is primarily used during creation and management of
// resources that may be used by multiple devices.
// This field is only set at ur_context_handle_t creation time, and cannot
Expand Down
9 changes: 8 additions & 1 deletion source/adapters/level_zero/v2/command_list_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ inline size_t command_list_descriptor_hash_t::operator()(
}

command_list_cache_t::command_list_cache_t(ze_context_handle_t ZeContext)
: ZeContext{ZeContext} {}
: ZeContext{ZeContext},
immediateGetLatencyTracker(
"command_list_cache_t::getImmediateCommandList"),
regularGetLatencyTracker("command_list_cache_t::getRegularCommandList") {}

raii::ze_command_list_t
command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
Expand Down Expand Up @@ -81,6 +84,8 @@ command_list_cache_t::getImmediateCommandList(
ze_device_handle_t ZeDevice, bool IsInOrder, uint32_t Ordinal,
ze_command_queue_mode_t Mode, ze_command_queue_priority_t Priority,
std::optional<uint32_t> Index) {
rolling_latency_tracker tracker(immediateGetLatencyTracker);

immediate_command_list_descriptor_t Desc;
Desc.ZeDevice = ZeDevice;
Desc.Ordinal = Ordinal;
Expand All @@ -100,6 +105,8 @@ command_list_cache_t::getImmediateCommandList(
raii::cache_borrowed_command_list_t
command_list_cache_t::getRegularCommandList(ze_device_handle_t ZeDevice,
bool IsInOrder, uint32_t Ordinal) {
rolling_latency_tracker tracker(regularGetLatencyTracker);

regular_command_list_descriptor_t Desc;
Desc.ZeDevice = ZeDevice;
Desc.IsInOrder = IsInOrder;
Expand Down
4 changes: 4 additions & 0 deletions source/adapters/level_zero/v2/command_list_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <ze_api.h>

#include "../common.hpp"
#include "latency_tracker.hpp"

namespace v2 {
namespace raii {
Expand Down Expand Up @@ -81,5 +82,8 @@ struct command_list_cache_t {
raii::ze_command_list_t cmdList);
raii::ze_command_list_t
createCommandList(const command_list_descriptor_t &desc);

rolling_stats immediateGetLatencyTracker;
rolling_stats regularGetLatencyTracker;
};
} // namespace v2
97 changes: 97 additions & 0 deletions source/adapters/level_zero/v2/latency_tracker.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
//===--------- ur_latency_tracker.cpp - common ---------------------------===//
//
// Copyright (C) 2024 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <chrono>
#include <cstdint>
#include <limits>
#include <memory>

#include "logger/ur_logger.hpp"

namespace v2 {

static inline bool trackLatency = []() {
return std::getenv("UR_ENABLE_LATENCY_TRACKING") != nullptr;
}();

class rolling_stats {
public:
rolling_stats(const char *name) : name(name) {}

~rolling_stats() {
if (trackLatency) {
logger::info("[{}] average latency: {}ns", name, estimate());
logger::info("[{}] number of samples: {}", name, count());
}
}

// track latency by taking the value of duration directly.
void trackValue(double value) {
auto ratio = static_cast<double>(cnt) / (cnt + 1);
avg *= ratio;
++cnt;
avg += value / cnt;
}

// Return the rolling average.
uint64_t estimate() { return static_cast<uint64_t>(avg); }

// Number of samples tracked.
uint64_t count() { return cnt; }

private:
const char *name;
double avg{0};
uint64_t cnt{0};
};

class rolling_latency_tracker {
public:
explicit rolling_latency_tracker(rolling_stats &stats)
: stats_(trackLatency ? &stats : nullptr), begin_() {
if (trackLatency) {
begin_ = std::chrono::steady_clock::now();
}
}
rolling_latency_tracker() {}
~rolling_latency_tracker() {
if (stats_) {
auto tp = std::chrono::steady_clock::now();
auto diffNanos =
std::chrono::duration_cast<std::chrono::nanoseconds>(tp - begin_)
.count();
stats_->trackValue(static_cast<double>(diffNanos));
}
}

rolling_latency_tracker(const rolling_latency_tracker &) = delete;
rolling_latency_tracker &operator=(const rolling_latency_tracker &) = delete;

rolling_latency_tracker(rolling_latency_tracker &&rhs) noexcept
: stats_(rhs.stats_), begin_(rhs.begin_) {
rhs.stats_ = nullptr;
}

rolling_latency_tracker &operator=(rolling_latency_tracker &&rhs) noexcept {
if (this != &rhs) {
this->~rolling_latency_tracker();
new (this) rolling_latency_tracker(std::move(rhs));
}
return *this;
}

private:
rolling_stats *stats_{nullptr};
std::chrono::time_point<std::chrono::steady_clock> begin_;
};

} // namespace v2