Merge pull request #72 from ARM-software/protobufs

Refactor workload tracking and message generation
ARM-software · Feb 7, 2025 · d9a1094 · d9a1094
2 parents 9d6cab9 + 94efee1
commit d9a1094
Show file tree

Hide file tree

Showing 27 changed files with 2,089 additions and 591 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -10,3 +10,6 @@
 [submodule "source_third_party/gtest"]
 	path = source_third_party/gtest
 	url = https://github.com/google/googletest
+[submodule "source_third_party/protopuf"]
+	path = source_third_party/protopuf
+	url = https://github.com/PragmaTwice/protopuf.git
diff --git a/.mypy.ini b/.mypy.ini
@@ -1,10 +1,13 @@
 [mypy]
-exclude = lglpy/timeline/protos/.*\.py
+exclude = lglpy/timeline/protos/
 ignore_missing_imports = True
 disable_error_code = annotation-unchecked
 
 [mypy-lglpy.timeline.data.raw_trace]
 disable_error_code = attr-defined
 
+[mypy-lglpy.comms.service_gpu_timeline]
+disable_error_code = attr-defined
+
 [mypy-google.*]
 ignore_missing_imports = True
diff --git a/.pycodestyle.ini b/.pycodestyle.ini
@@ -1,4 +1,4 @@
 [pycodestyle]
 exclude = lglpy/timeline/protos
-ignore = E402,E126,E127
+ignore = E402,E126,E127,W503
 max-line-length = 80
diff --git a/docs/updating_protobuf_files.md b/docs/updating_protobuf_files.md
@@ -0,0 +1,19 @@
+# Updating the generated protobuf (de)serialization code
+
+This project uses protobufs for (de)serialization of certain data:
+
+ * In the raw GPU timeline messages sent from `layer_gpu_timeline` to the host.
+ * In the Perfetto data collected from the device.
+
+Python decoders for those protocols are pre-generated and stored in the sources
+under `lglpy/timeline/protos`. 
+
+To regenerate or update the timeline protocol files use:
+
+        protoc -I layer_gpu_timeline/                           \
+            --python_out=lglpy/timeline/protos/layer_driver/    \
+            layer_gpu_timeline/timeline.proto
+
+- - -
+
+_Copyright © 2025, Arm Limited and contributors._
diff --git a/layer_gpu_timeline/CMakeLists.txt b/layer_gpu_timeline/CMakeLists.txt
@@ -35,6 +35,10 @@ set(LGL_CONFIG_LOG 1)
 include(../source_common/compiler_helper.cmake)
 include(../cmake/clang-tools.cmake)
 
+# TPIP
+set(BUILD_TESTS OFF)
+add_subdirectory(../source_third_party/protopuf "source_third_party/protopuf")
+
 # Build steps
 add_subdirectory(../source_common/comms source_common/comms)
 add_subdirectory(../source_common/framework source_common/framework)

diff --git a/layer_gpu_timeline/source/CMakeLists.txt b/layer_gpu_timeline/source/CMakeLists.txt
@@ -53,7 +53,8 @@ add_library(
         layer_device_functions_render_pass.cpp
         layer_device_functions_trace_rays.cpp
         layer_device_functions_transfer.cpp
-        timeline_comms.cpp)
+        timeline_comms.cpp
+        timeline_protobuf_encoder.cpp)
 
 target_include_directories(
     ${VK_LAYER} PRIVATE
@@ -64,7 +65,8 @@ target_include_directories(
 target_include_directories(
     ${VK_LAYER} SYSTEM PRIVATE
         ../../source_third_party/
-        ../../source_third_party/khronos/vulkan/include/)
+        ../../source_third_party/khronos/vulkan/include/
+        ../../source_third_party/protopuf/include/)
 
 lgl_set_build_options(${VK_LAYER})
 
@@ -73,7 +75,8 @@ target_link_libraries(
         lib_layer_comms
         lib_layer_framework
         lib_layer_trackers
-        $<$<PLATFORM_ID:Android>:log>)
+        $<$<PLATFORM_ID:Android>:log>
+        protopuf)
 
 if (CMAKE_BUILD_TYPE STREQUAL "Release")
     add_custom_command(

diff --git a/layer_gpu_timeline/source/device.cpp b/layer_gpu_timeline/source/device.cpp
@@ -28,18 +28,13 @@
 #include "comms/comms_module.hpp"
 #include "framework/utils.hpp"
 #include "instance.hpp"
+#include "timeline_protobuf_encoder.hpp"
 
-#include <array>
-#include <fstream>
-#include <iostream>
 #include <vector>
 
-#include <nlohmann/json.hpp>
 #include <sys/stat.h>
 #include <unistd.h>
 
-using json = nlohmann::json;
-
 /**
  * @brief The dispatch lookup for all of the created Vulkan devices.
  */
@@ -125,15 +120,5 @@ Device::Device(Instance* _instance,
 
     pid_t processPID = getpid();
 
-    json deviceMetadata {
-        {"type", "device"},
-        {"pid", static_cast<uint32_t>(processPID)},
-        {"device", reinterpret_cast<uintptr_t>(device)},
-        {"deviceName", name},
-        {"driverMajor", major},
-        {"driverMinor", minor},
-        {"driverPatch", patch},
-    };
-
-    commsWrapper->txMessage(deviceMetadata.dump());
+    TimelineProtobufEncoder::emitMetadata(*this, processPID, major, minor, patch, std::move(name));
 }
diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp
@@ -131,18 +131,11 @@ class Device
     ~Device() = default;
 
     /**
-     * @brief Callback for sending messages on frame boundary.
+     * @brief Callback for sending some message for the device.
      *
      * @param message   The message to send.
      */
-    void onFrame(const std::string& message) { commsWrapper->txMessage(message); }
-
-    /**
-     * @brief Callback for sending messages on workload submit to a queue.
-     *
-     * @param message   The message to send.
-     */
-    void onWorkloadSubmit(const std::string& message) { commsWrapper->txMessage(message); }
+    void txMessage(Comms::MessageData&& message) { commsWrapper->txMessage(std::move(message)); }
 
     /**
      * @brief Get the cumulative stats for this device.

diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
@@ -25,17 +25,13 @@
 
 #include "device.hpp"
 #include "framework/device_dispatch_table.hpp"
-#include "utils/misc.hpp"
+#include "timeline_protobuf_encoder.hpp"
+#include "trackers/queue.hpp"
 
 #include <mutex>
 
-#include <nlohmann/json.hpp>
 #include <time.h>
 
-using json = nlohmann::json;
-
-using namespace std::placeholders;
-
 extern std::mutex g_vulkanLock;
 
 /**
@@ -66,34 +62,26 @@ static uint64_t getClockMonotonicRaw()
 /**
  * @brief Emit the queue submit time metadata.
  *
- * @param queue      The queue being submitted to.
- * @param callback   The data emit callback.
+ * @param queue             The queue being submitted to.
+ * @param workloadVisitor   The data emit callback.
  */
-static void emitQueueMetadata(VkDevice device, VkQueue queue, std::function<void(const std::string&)> callback)
+static void emitQueueMetadata(VkQueue queue, TimelineProtobufEncoder& workloadVisitor)
 {
-    // Write the queue submit metadata
-    json submitMetadata {
-        {"type", "submit"},
-        {"device", reinterpret_cast<uintptr_t>(device)},
-        {"queue", reinterpret_cast<uintptr_t>(queue)},
-        {"timestamp", getClockMonotonicRaw()},
-    };
-
-    callback(submitMetadata.dump());
+    workloadVisitor.emitSubmit(queue, getClockMonotonicRaw());
 }
 
 /**
  * @brief Emit the command buffer submit time metadata.
  *
- * @param layer           The layer context.
- * @param queue           The queue being submitted to.
- * @param commandBuffer   The command buffer being submitted.
- * @param callback        The data emit callback.
+ * @param layer             The layer context.
+ * @param queue             The queue being submitted to.
+ * @param commandBuffer     The command buffer being submitted.
+ * @param workloadVisitor   The data emit callback.
  */
 static void emitCommandBufferMetadata(Device& layer,
                                       VkQueue queue,
                                       VkCommandBuffer commandBuffer,
-                                      std::function<void(const std::string&)> callback)
+                                      Tracker::SubmitCommandWorkloadVisitor& workloadVisitor)
 {
     // Fetch layer proxies for this workload
     auto& tracker = layer.getStateTracker();
@@ -102,7 +90,7 @@ static void emitCommandBufferMetadata(Device& layer,
 
     // Play the layer command stream into the queue
     const auto& LCS = trackCB.getSubmitCommandStream();
-    trackQueue.runSubmitCommandStream(LCS, callback);
+    trackQueue.runSubmitCommandStream(LCS, workloadVisitor);
 }
 
 /* See Vulkan API for documentation. */
@@ -120,14 +108,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(VkQueue queue,
 
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially to the host tool
-    json frame {
-        {"type", "frame"},
-        {"device", reinterpret_cast<uintptr_t>(layer->device)},
-        {"fid", tracker.totalStats.getFrameCount()},
-        {"timestamp", getClockMonotonicRaw()},
-    };
-
-    layer->onFrame(frame.dump());
+    TimelineProtobufEncoder::emitFrame(*layer, tracker.totalStats.getFrameCount(), getClockMonotonicRaw());
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -145,13 +126,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
     std::unique_lock<std::mutex> lock {g_vulkanLock};
     auto* layer = Device::retrieve(queue);
 
-    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
-
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially and contiguously to the host tool
+    TimelineProtobufEncoder workloadVisitor {*layer};
 
     // Add queue-level metadata
-    emitQueueMetadata(layer->device, queue, onSubmit);
+    emitQueueMetadata(queue, workloadVisitor);
 
     // Add per-command buffer metadata
     for (uint32_t i = 0; i < submitCount; i++)
@@ -160,7 +140,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
         for (uint32_t j = 0; j < submit.commandBufferCount; j++)
         {
             VkCommandBuffer commandBuffer = submit.pCommandBuffers[j];
-            emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
+            emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
         }
     }
 
@@ -180,13 +160,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
     std::unique_lock<std::mutex> lock {g_vulkanLock};
     auto* layer = Device::retrieve(queue);
 
-    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
-
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially and contiguously to the host tool
+    TimelineProtobufEncoder workloadVisitor {*layer};
 
     // Add queue-level metadata
-    emitQueueMetadata(layer->device, queue, onSubmit);
+    emitQueueMetadata(queue, workloadVisitor);
 
     // Add per-command buffer metadata
     for (uint32_t i = 0; i < submitCount; i++)
@@ -195,7 +174,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
         for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
         {
             VkCommandBuffer commandBuffer = submit.pCommandBufferInfos[j].commandBuffer;
-            emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
+            emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
         }
     }
 
@@ -215,13 +194,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
     std::unique_lock<std::mutex> lock {g_vulkanLock};
     auto* layer = Device::retrieve(queue);
 
-    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
-
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially and contiguously to the host tool
+    TimelineProtobufEncoder workloadVisitor {*layer};
 
     // Add queue-level metadata
-    emitQueueMetadata(layer->device, queue, onSubmit);
+    emitQueueMetadata(queue, workloadVisitor);
 
     // Add per-command buffer metadata
     for (uint32_t i = 0; i < submitCount; i++)
@@ -230,7 +208,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
         for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
         {
             VkCommandBuffer commandBuffer = submit.pCommandBufferInfos[j].commandBuffer;
-            emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
+            emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
         }
     }