diff --git a/layer_gpu_support/README_LAYER.md b/layer_gpu_support/README_LAYER.md index e955679..6c3c155 100644 --- a/layer_gpu_support/README_LAYER.md +++ b/layer_gpu_support/README_LAYER.md @@ -103,6 +103,10 @@ irrespective of other settings. "pre": false, // Insert full barrier before render passes "post": false // Insert full barrier after render passes }, + "asbuild": { + "pre": false, // Insert full barrier before acceleration structure builds + "post": false // Insert full barrier after acceleration structure builds + }, "tracerays": { "pre": false, // Insert full barrier before trace rays "post": false // Insert full barrier after trace rays diff --git a/layer_gpu_support/layer_config.json b/layer_gpu_support/layer_config.json index 07772ae..50f28e0 100644 --- a/layer_gpu_support/layer_config.json +++ b/layer_gpu_support/layer_config.json @@ -12,6 +12,10 @@ "pre": false, "post": false }, + "asbuild": { + "pre": false, + "post": false + }, "tracerays": { "pre": false, "post": false diff --git a/layer_gpu_support/source/layer_config.cpp b/layer_gpu_support/source/layer_config.cpp index 4df39dd..e491be1 100644 --- a/layer_gpu_support/source/layer_config.cpp +++ b/layer_gpu_support/source/layer_config.cpp @@ -57,6 +57,9 @@ void LayerConfig::parse_serialization_options(const json& config) bool s_stream_rp_pre = s_stream.at("renderpass").at("pre"); bool s_stream_rp_post = s_stream.at("renderpass").at("post"); + bool s_stream_asb_pre = s_stream.at("asbuild").at("pre"); + bool s_stream_asb_post = s_stream.at("asbuild").at("post"); + bool s_stream_rt_pre = s_stream.at("tracerays").at("pre"); bool s_stream_rt_post = s_stream.at("tracerays").at("post"); @@ -65,12 +68,19 @@ void LayerConfig::parse_serialization_options(const json& config) // Write after all options read from JSON so we know it parsed correctly conf_serialize_queues = (!s_none) && (s_all || s_queue); + conf_serialize_dispatch_pre = (!s_none) && (s_all || s_stream_c_pre); conf_serialize_dispatch_post = (!s_none) && (s_all || s_stream_c_post); + conf_serialize_render_pass_pre = (!s_none) && (s_all || s_stream_rp_pre); conf_serialize_render_pass_post = (!s_none) && (s_all || s_stream_rp_post); + + conf_serialize_as_build_pre = (!s_none) && (s_all || s_stream_asb_pre); + conf_serialize_as_build_post = (!s_none) && (s_all || s_stream_asb_post); + conf_serialize_trace_rays_pre = (!s_none) && (s_all || s_stream_rt_pre); conf_serialize_trace_rays_post = (!s_none) && (s_all || s_stream_rt_post); + conf_serialize_transfer_pre = (!s_none) && (s_all || s_stream_tx_pre); conf_serialize_transfer_post = (!s_none) && (s_all || s_stream_tx_post); @@ -81,6 +91,8 @@ void LayerConfig::parse_serialization_options(const json& config) LAYER_LOG(" - Serialize compute post: %d", conf_serialize_dispatch_post); LAYER_LOG(" - Serialize render pass pre: %d", conf_serialize_render_pass_pre); LAYER_LOG(" - Serialize render pass post: %d", conf_serialize_render_pass_post); + LAYER_LOG(" - Serialize acceleration structure build pre: %d", conf_serialize_as_build_pre); + LAYER_LOG(" - Serialize acceleration structure build post: %d", conf_serialize_as_build_post); LAYER_LOG(" - Serialize trace rays pre: %d", conf_serialize_trace_rays_pre); LAYER_LOG(" - Serialize trace rays post: %d", conf_serialize_trace_rays_post); LAYER_LOG(" - Serialize transfer pre: %d", conf_serialize_transfer_pre); @@ -281,6 +293,18 @@ bool LayerConfig::serialize_cmdstream_render_pass_post() const return conf_serialize_render_pass_post; } +/* See header for documentation. */ +bool LayerConfig::serialize_cmdstream_as_build_pre() const +{ + return conf_serialize_as_build_pre; +} + +/* See header for documentation. */ +bool LayerConfig::serialize_cmdstream_as_build_post() const +{ + return conf_serialize_as_build_post; +} + /* See header for documentation. */ bool LayerConfig::serialize_cmdstream_trace_rays_pre() const { diff --git a/layer_gpu_support/source/layer_config.hpp b/layer_gpu_support/source/layer_config.hpp index a065341..07eb831 100644 --- a/layer_gpu_support/source/layer_config.hpp +++ b/layer_gpu_support/source/layer_config.hpp @@ -96,6 +96,16 @@ class LayerConfig */ bool serialize_cmdstream_transfer_post() const; + /** + * @brief True if config wants to serialize before acceleration structure build workloads. + */ + bool serialize_cmdstream_as_build_pre() const; + + /** + * @brief True if config wants to serialize after acceleration structure build workloads. + */ + bool serialize_cmdstream_as_build_post() const; + // Config queries for shaders /** @@ -202,6 +212,16 @@ class LayerConfig */ bool conf_serialize_trace_rays_post {false}; + /** + * @brief True if we force serialize before acceleration structure build workloads. + */ + bool conf_serialize_as_build_pre {false}; + + /** + * @brief True if we force serialize after acceleration structure build workloads. + */ + bool conf_serialize_as_build_post {false}; + /** * @brief True if we force serialize before transfer workloads. */ diff --git a/layer_gpu_support/source/layer_device_functions.hpp b/layer_gpu_support/source/layer_device_functions.hpp index a4ac236..faa98f8 100644 --- a/layer_gpu_support/source/layer_device_functions.hpp +++ b/layer_gpu_support/source/layer_device_functions.hpp @@ -133,6 +133,36 @@ VKAPI_ATTR void VKAPI_CALL uint32_t height, uint32_t depth); +// Commands for acceleration structure builds + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresIndirectKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkDeviceAddress* pIndirectDeviceAddresses, + const uint32_t* pIndirectStrides, + const uint32_t* const* ppMaxPrimitiveCounts); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresIndirectKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkDeviceAddress* pIndirectDeviceAddresses, + const uint32_t* pIndirectStrides, + const uint32_t* const* ppMaxPrimitiveCounts); + // Commands for transfers /* See Vulkan API for documentation. */ @@ -229,6 +259,24 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer(VkCommandBuffe uint32_t regionCount, const VkBufferImageCopy* pRegions); +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureInfoKHR* pInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo); + /* See Vulkan API for documentation. */ template<> VKAPI_ATTR void VKAPI_CALL diff --git a/layer_gpu_support/source/layer_device_functions_trace_rays.cpp b/layer_gpu_support/source/layer_device_functions_trace_rays.cpp index ef76d2d..e1ccee1 100644 --- a/layer_gpu_support/source/layer_device_functions_trace_rays.cpp +++ b/layer_gpu_support/source/layer_device_functions_trace_rays.cpp @@ -82,6 +82,58 @@ static void postTraceRays(Device* layer, VkCommandBuffer commandBuffer) nullptr); } +/** + * @brief Pre-build-acceleration-structure code injection point. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + */ +static void preAccelerationStructureBuild(Device* layer, VkCommandBuffer commandBuffer) +{ + if (!layer->instance->config.serialize_cmdstream_as_build_pre()) + { + return; + } + + // Execution dependency + layer->driver.vkCmdPipelineBarrier(commandBuffer, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, + 0, + nullptr, + 0, + nullptr, + 0, + nullptr); +} + +/** + * @brief Post-build-acceleration-structure code injection point. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + */ +static void postAccelerationStructureBuild(Device* layer, VkCommandBuffer commandBuffer) +{ + if (!layer->instance->config.serialize_cmdstream_as_build_post()) + { + return; + } + + // Execution dependency + layer->driver.vkCmdPipelineBarrier(commandBuffer, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, + 0, + nullptr, + 0, + nullptr, + 0, + nullptr); +} + /* See Vulkan API for documentation. */ template<> VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, @@ -162,3 +214,52 @@ VKAPI_ATTR void VKAPI_CALL depth); postTraceRays(layer, commandBuffer); } + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresIndirectKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkDeviceAddress* pIndirectDeviceAddresses, + const uint32_t* pIndirectStrides, + const uint32_t* const* ppMaxPrimitiveCounts) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + preAccelerationStructureBuild(layer, commandBuffer); + layer->driver.vkCmdBuildAccelerationStructuresIndirectKHR(commandBuffer, + infoCount, + pInfos, + pIndirectDeviceAddresses, + pIndirectStrides, + ppMaxPrimitiveCounts); + postAccelerationStructureBuild(layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + preAccelerationStructureBuild(layer, commandBuffer); + layer->driver.vkCmdBuildAccelerationStructuresKHR(commandBuffer, infoCount, pInfos, ppBuildRangeInfos); + postAccelerationStructureBuild(layer, commandBuffer); +} diff --git a/layer_gpu_support/source/layer_device_functions_transfer.cpp b/layer_gpu_support/source/layer_device_functions_transfer.cpp index d442e96..bea1605 100644 --- a/layer_gpu_support/source/layer_device_functions_transfer.cpp +++ b/layer_gpu_support/source/layer_device_functions_transfer.cpp @@ -400,3 +400,61 @@ VKAPI_ATTR void VKAPI_CALL layer->driver.vkCmdCopyImageToBuffer2KHR(commandBuffer, pCopyImageToBufferInfo); postTransfer(layer, commandBuffer); } + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureInfoKHR* pInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + + preTransfer(layer, commandBuffer); + layer->driver.vkCmdCopyAccelerationStructureKHR(commandBuffer, pInfo); + postTransfer(layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + preTransfer(layer, commandBuffer); + layer->driver.vkCmdCopyAccelerationStructureToMemoryKHR(commandBuffer, pInfo); + postTransfer(layer, commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + // Release the lock to call into the driver + lock.unlock(); + preTransfer(layer, commandBuffer); + layer->driver.vkCmdCopyMemoryToAccelerationStructureKHR(commandBuffer, pInfo); + postTransfer(layer, commandBuffer); +}