Skip to content

Commit

Permalink
Merge pull request #72 from ARM-software/protobufs
Browse files Browse the repository at this point in the history
Refactor workload tracking and message generation
  • Loading branch information
bengaineyarm authored Feb 7, 2025
2 parents 9d6cab9 + 94efee1 commit d9a1094
Show file tree
Hide file tree
Showing 27 changed files with 2,089 additions and 591 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@
[submodule "source_third_party/gtest"]
path = source_third_party/gtest
url = https://github.com/google/googletest
[submodule "source_third_party/protopuf"]
path = source_third_party/protopuf
url = https://github.com/PragmaTwice/protopuf.git
5 changes: 4 additions & 1 deletion .mypy.ini
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
[mypy]
exclude = lglpy/timeline/protos/.*\.py
exclude = lglpy/timeline/protos/
ignore_missing_imports = True
disable_error_code = annotation-unchecked

[mypy-lglpy.timeline.data.raw_trace]
disable_error_code = attr-defined

[mypy-lglpy.comms.service_gpu_timeline]
disable_error_code = attr-defined

[mypy-google.*]
ignore_missing_imports = True
2 changes: 1 addition & 1 deletion .pycodestyle.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[pycodestyle]
exclude = lglpy/timeline/protos
ignore = E402,E126,E127
ignore = E402,E126,E127,W503
max-line-length = 80
19 changes: 19 additions & 0 deletions docs/updating_protobuf_files.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Updating the generated protobuf (de)serialization code

This project uses protobufs for (de)serialization of certain data:

* In the raw GPU timeline messages sent from `layer_gpu_timeline` to the host.
* In the Perfetto data collected from the device.

Python decoders for those protocols are pre-generated and stored in the sources
under `lglpy/timeline/protos`.

To regenerate or update the timeline protocol files use:

protoc -I layer_gpu_timeline/ \
--python_out=lglpy/timeline/protos/layer_driver/ \
layer_gpu_timeline/timeline.proto

- - -

_Copyright © 2025, Arm Limited and contributors._
4 changes: 4 additions & 0 deletions layer_gpu_timeline/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ set(LGL_CONFIG_LOG 1)
include(../source_common/compiler_helper.cmake)
include(../cmake/clang-tools.cmake)

# TPIP
set(BUILD_TESTS OFF)
add_subdirectory(../source_third_party/protopuf "source_third_party/protopuf")

# Build steps
add_subdirectory(../source_common/comms source_common/comms)
add_subdirectory(../source_common/framework source_common/framework)
Expand Down
9 changes: 6 additions & 3 deletions layer_gpu_timeline/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ add_library(
layer_device_functions_render_pass.cpp
layer_device_functions_trace_rays.cpp
layer_device_functions_transfer.cpp
timeline_comms.cpp)
timeline_comms.cpp
timeline_protobuf_encoder.cpp)

target_include_directories(
${VK_LAYER} PRIVATE
Expand All @@ -64,7 +65,8 @@ target_include_directories(
target_include_directories(
${VK_LAYER} SYSTEM PRIVATE
../../source_third_party/
../../source_third_party/khronos/vulkan/include/)
../../source_third_party/khronos/vulkan/include/
../../source_third_party/protopuf/include/)

lgl_set_build_options(${VK_LAYER})

Expand All @@ -73,7 +75,8 @@ target_link_libraries(
lib_layer_comms
lib_layer_framework
lib_layer_trackers
$<$<PLATFORM_ID:Android>:log>)
$<$<PLATFORM_ID:Android>:log>
protopuf)

if (CMAKE_BUILD_TYPE STREQUAL "Release")
add_custom_command(
Expand Down
19 changes: 2 additions & 17 deletions layer_gpu_timeline/source/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,13 @@
#include "comms/comms_module.hpp"
#include "framework/utils.hpp"
#include "instance.hpp"
#include "timeline_protobuf_encoder.hpp"

#include <array>
#include <fstream>
#include <iostream>
#include <vector>

#include <nlohmann/json.hpp>
#include <sys/stat.h>
#include <unistd.h>

using json = nlohmann::json;

/**
* @brief The dispatch lookup for all of the created Vulkan devices.
*/
Expand Down Expand Up @@ -125,15 +120,5 @@ Device::Device(Instance* _instance,

pid_t processPID = getpid();

json deviceMetadata {
{"type", "device"},
{"pid", static_cast<uint32_t>(processPID)},
{"device", reinterpret_cast<uintptr_t>(device)},
{"deviceName", name},
{"driverMajor", major},
{"driverMinor", minor},
{"driverPatch", patch},
};

commsWrapper->txMessage(deviceMetadata.dump());
TimelineProtobufEncoder::emitMetadata(*this, processPID, major, minor, patch, std::move(name));
}
11 changes: 2 additions & 9 deletions layer_gpu_timeline/source/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,11 @@ class Device
~Device() = default;

/**
* @brief Callback for sending messages on frame boundary.
* @brief Callback for sending some message for the device.
*
* @param message The message to send.
*/
void onFrame(const std::string& message) { commsWrapper->txMessage(message); }

/**
* @brief Callback for sending messages on workload submit to a queue.
*
* @param message The message to send.
*/
void onWorkloadSubmit(const std::string& message) { commsWrapper->txMessage(message); }
void txMessage(Comms::MessageData&& message) { commsWrapper->txMessage(std::move(message)); }

/**
* @brief Get the cumulative stats for this device.
Expand Down
66 changes: 22 additions & 44 deletions layer_gpu_timeline/source/layer_device_functions_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,13 @@

#include "device.hpp"
#include "framework/device_dispatch_table.hpp"
#include "utils/misc.hpp"
#include "timeline_protobuf_encoder.hpp"
#include "trackers/queue.hpp"

#include <mutex>

#include <nlohmann/json.hpp>
#include <time.h>

using json = nlohmann::json;

using namespace std::placeholders;

extern std::mutex g_vulkanLock;

/**
Expand Down Expand Up @@ -66,34 +62,26 @@ static uint64_t getClockMonotonicRaw()
/**
* @brief Emit the queue submit time metadata.
*
* @param queue The queue being submitted to.
* @param callback The data emit callback.
* @param queue The queue being submitted to.
* @param workloadVisitor The data emit callback.
*/
static void emitQueueMetadata(VkDevice device, VkQueue queue, std::function<void(const std::string&)> callback)
static void emitQueueMetadata(VkQueue queue, TimelineProtobufEncoder& workloadVisitor)
{
// Write the queue submit metadata
json submitMetadata {
{"type", "submit"},
{"device", reinterpret_cast<uintptr_t>(device)},
{"queue", reinterpret_cast<uintptr_t>(queue)},
{"timestamp", getClockMonotonicRaw()},
};

callback(submitMetadata.dump());
workloadVisitor.emitSubmit(queue, getClockMonotonicRaw());
}

/**
* @brief Emit the command buffer submit time metadata.
*
* @param layer The layer context.
* @param queue The queue being submitted to.
* @param commandBuffer The command buffer being submitted.
* @param callback The data emit callback.
* @param layer The layer context.
* @param queue The queue being submitted to.
* @param commandBuffer The command buffer being submitted.
* @param workloadVisitor The data emit callback.
*/
static void emitCommandBufferMetadata(Device& layer,
VkQueue queue,
VkCommandBuffer commandBuffer,
std::function<void(const std::string&)> callback)
Tracker::SubmitCommandWorkloadVisitor& workloadVisitor)
{
// Fetch layer proxies for this workload
auto& tracker = layer.getStateTracker();
Expand All @@ -102,7 +90,7 @@ static void emitCommandBufferMetadata(Device& layer,

// Play the layer command stream into the queue
const auto& LCS = trackCB.getSubmitCommandStream();
trackQueue.runSubmitCommandStream(LCS, callback);
trackQueue.runSubmitCommandStream(LCS, workloadVisitor);
}

/* See Vulkan API for documentation. */
Expand All @@ -120,14 +108,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(VkQueue queue,

// This is run with the lock held to ensure that all queue submit
// messages are sent sequentially to the host tool
json frame {
{"type", "frame"},
{"device", reinterpret_cast<uintptr_t>(layer->device)},
{"fid", tracker.totalStats.getFrameCount()},
{"timestamp", getClockMonotonicRaw()},
};

layer->onFrame(frame.dump());
TimelineProtobufEncoder::emitFrame(*layer, tracker.totalStats.getFrameCount(), getClockMonotonicRaw());

// Release the lock to call into the driver
lock.unlock();
Expand All @@ -145,13 +126,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(queue);

auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);

// This is run with the lock held to ensure that all queue submit
// messages are sent sequentially and contiguously to the host tool
TimelineProtobufEncoder workloadVisitor {*layer};

// Add queue-level metadata
emitQueueMetadata(layer->device, queue, onSubmit);
emitQueueMetadata(queue, workloadVisitor);

// Add per-command buffer metadata
for (uint32_t i = 0; i < submitCount; i++)
Expand All @@ -160,7 +140,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
for (uint32_t j = 0; j < submit.commandBufferCount; j++)
{
VkCommandBuffer commandBuffer = submit.pCommandBuffers[j];
emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
}
}

Expand All @@ -180,13 +160,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(queue);

auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);

// This is run with the lock held to ensure that all queue submit
// messages are sent sequentially and contiguously to the host tool
TimelineProtobufEncoder workloadVisitor {*layer};

// Add queue-level metadata
emitQueueMetadata(layer->device, queue, onSubmit);
emitQueueMetadata(queue, workloadVisitor);

// Add per-command buffer metadata
for (uint32_t i = 0; i < submitCount; i++)
Expand All @@ -195,7 +174,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
{
VkCommandBuffer commandBuffer = submit.pCommandBufferInfos[j].commandBuffer;
emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
}
}

Expand All @@ -215,13 +194,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(queue);

auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);

// This is run with the lock held to ensure that all queue submit
// messages are sent sequentially and contiguously to the host tool
TimelineProtobufEncoder workloadVisitor {*layer};

// Add queue-level metadata
emitQueueMetadata(layer->device, queue, onSubmit);
emitQueueMetadata(queue, workloadVisitor);

// Add per-command buffer metadata
for (uint32_t i = 0; i < submitCount; i++)
Expand All @@ -230,7 +208,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
{
VkCommandBuffer commandBuffer = submit.pCommandBufferInfos[j].commandBuffer;
emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
}
}

Expand Down
Loading

0 comments on commit d9a1094

Please sign in to comment.