Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AS build to support layer serialization #89

Merged
merged 2 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions layer_gpu_support/README_LAYER.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ irrespective of other settings.
"pre": false, // Insert full barrier before render passes
"post": false // Insert full barrier after render passes
},
"asbuild": {
"pre": false, // Insert full barrier before acceleration structure builds
"post": false // Insert full barrier after acceleration structure builds
},
"tracerays": {
"pre": false, // Insert full barrier before trace rays
"post": false // Insert full barrier after trace rays
Expand Down
4 changes: 4 additions & 0 deletions layer_gpu_support/layer_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
"pre": false,
"post": false
},
"asbuild": {
"pre": false,
"post": false
},
"tracerays": {
"pre": false,
"post": false
Expand Down
24 changes: 24 additions & 0 deletions layer_gpu_support/source/layer_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ void LayerConfig::parse_serialization_options(const json& config)
bool s_stream_rp_pre = s_stream.at("renderpass").at("pre");
bool s_stream_rp_post = s_stream.at("renderpass").at("post");

bool s_stream_asb_pre = s_stream.at("asbuild").at("pre");
bool s_stream_asb_post = s_stream.at("asbuild").at("post");

bool s_stream_rt_pre = s_stream.at("tracerays").at("pre");
bool s_stream_rt_post = s_stream.at("tracerays").at("post");

Expand All @@ -65,12 +68,19 @@ void LayerConfig::parse_serialization_options(const json& config)

// Write after all options read from JSON so we know it parsed correctly
conf_serialize_queues = (!s_none) && (s_all || s_queue);

conf_serialize_dispatch_pre = (!s_none) && (s_all || s_stream_c_pre);
conf_serialize_dispatch_post = (!s_none) && (s_all || s_stream_c_post);

conf_serialize_render_pass_pre = (!s_none) && (s_all || s_stream_rp_pre);
conf_serialize_render_pass_post = (!s_none) && (s_all || s_stream_rp_post);

conf_serialize_as_build_pre = (!s_none) && (s_all || s_stream_asb_pre);
conf_serialize_as_build_post = (!s_none) && (s_all || s_stream_asb_post);

conf_serialize_trace_rays_pre = (!s_none) && (s_all || s_stream_rt_pre);
conf_serialize_trace_rays_post = (!s_none) && (s_all || s_stream_rt_post);

conf_serialize_transfer_pre = (!s_none) && (s_all || s_stream_tx_pre);
conf_serialize_transfer_post = (!s_none) && (s_all || s_stream_tx_post);

Expand All @@ -81,6 +91,8 @@ void LayerConfig::parse_serialization_options(const json& config)
LAYER_LOG(" - Serialize compute post: %d", conf_serialize_dispatch_post);
LAYER_LOG(" - Serialize render pass pre: %d", conf_serialize_render_pass_pre);
LAYER_LOG(" - Serialize render pass post: %d", conf_serialize_render_pass_post);
LAYER_LOG(" - Serialize acceleration structure build pre: %d", conf_serialize_as_build_pre);
LAYER_LOG(" - Serialize acceleration structure build post: %d", conf_serialize_as_build_post);
LAYER_LOG(" - Serialize trace rays pre: %d", conf_serialize_trace_rays_pre);
LAYER_LOG(" - Serialize trace rays post: %d", conf_serialize_trace_rays_post);
LAYER_LOG(" - Serialize transfer pre: %d", conf_serialize_transfer_pre);
Expand Down Expand Up @@ -281,6 +293,18 @@ bool LayerConfig::serialize_cmdstream_render_pass_post() const
return conf_serialize_render_pass_post;
}

/* See header for documentation. */
bool LayerConfig::serialize_cmdstream_as_build_pre() const
{
return conf_serialize_as_build_pre;
}

/* See header for documentation. */
bool LayerConfig::serialize_cmdstream_as_build_post() const
{
return conf_serialize_as_build_post;
}

/* See header for documentation. */
bool LayerConfig::serialize_cmdstream_trace_rays_pre() const
{
Expand Down
20 changes: 20 additions & 0 deletions layer_gpu_support/source/layer_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ class LayerConfig
*/
bool serialize_cmdstream_transfer_post() const;

/**
* @brief True if config wants to serialize before acceleration structure build workloads.
*/
bool serialize_cmdstream_as_build_pre() const;

/**
* @brief True if config wants to serialize after acceleration structure build workloads.
*/
bool serialize_cmdstream_as_build_post() const;

// Config queries for shaders

/**
Expand Down Expand Up @@ -202,6 +212,16 @@ class LayerConfig
*/
bool conf_serialize_trace_rays_post {false};

/**
* @brief True if we force serialize before acceleration structure build workloads.
*/
bool conf_serialize_as_build_pre {false};

/**
* @brief True if we force serialize after acceleration structure build workloads.
*/
bool conf_serialize_as_build_post {false};

/**
* @brief True if we force serialize before transfer workloads.
*/
Expand Down
48 changes: 48 additions & 0 deletions layer_gpu_support/source/layer_device_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,36 @@ VKAPI_ATTR void VKAPI_CALL
uint32_t height,
uint32_t depth);

// Commands for acceleration structure builds

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresIndirectKHR<user_tag>(
VkCommandBuffer commandBuffer,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
const VkDeviceAddress* pIndirectDeviceAddresses,
const uint32_t* pIndirectStrides,
const uint32_t* const* ppMaxPrimitiveCounts);

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresKHR<user_tag>(
VkCommandBuffer commandBuffer,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos);

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresIndirectKHR<user_tag>(
VkCommandBuffer commandBuffer,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
const VkDeviceAddress* pIndirectDeviceAddresses,
const uint32_t* pIndirectStrides,
const uint32_t* const* ppMaxPrimitiveCounts);

// Commands for transfers

/* See Vulkan API for documentation. */
Expand Down Expand Up @@ -229,6 +259,24 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer<user_tag>(VkCommandBuffe
uint32_t regionCount,
const VkBufferImageCopy* pRegions);

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL
layer_vkCmdCopyAccelerationStructureKHR<user_tag>(VkCommandBuffer commandBuffer,
const VkCopyAccelerationStructureInfoKHR* pInfo);

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL
layer_vkCmdCopyAccelerationStructureToMemoryKHR<user_tag>(VkCommandBuffer commandBuffer,
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo);

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL
layer_vkCmdCopyMemoryToAccelerationStructureKHR<user_tag>(VkCommandBuffer commandBuffer,
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo);

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL
Expand Down
101 changes: 101 additions & 0 deletions layer_gpu_support/source/layer_device_functions_trace_rays.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,58 @@ static void postTraceRays(Device* layer, VkCommandBuffer commandBuffer)
nullptr);
}

/**
* @brief Pre-build-acceleration-structure code injection point.
*
* @param layer The layer context for the device.
* @param commandBuffer The command buffer we are recording.
*/
static void preAccelerationStructureBuild(Device* layer, VkCommandBuffer commandBuffer)
{
if (!layer->instance->config.serialize_cmdstream_as_build_pre())
{
return;
}

// Execution dependency
layer->driver.vkCmdPipelineBarrier(commandBuffer,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0,
0,
nullptr,
0,
nullptr,
0,
nullptr);
}

/**
* @brief Post-build-acceleration-structure code injection point.
*
* @param layer The layer context for the device.
* @param commandBuffer The command buffer we are recording.
*/
static void postAccelerationStructureBuild(Device* layer, VkCommandBuffer commandBuffer)
{
if (!layer->instance->config.serialize_cmdstream_as_build_post())
{
return;
}

// Execution dependency
layer->driver.vkCmdPipelineBarrier(commandBuffer,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0,
0,
nullptr,
0,
nullptr,
0,
nullptr);
}

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR<user_tag>(VkCommandBuffer commandBuffer,
Expand Down Expand Up @@ -162,3 +214,52 @@ VKAPI_ATTR void VKAPI_CALL
depth);
postTraceRays(layer, commandBuffer);
}

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresIndirectKHR<user_tag>(
VkCommandBuffer commandBuffer,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
const VkDeviceAddress* pIndirectDeviceAddresses,
const uint32_t* pIndirectStrides,
const uint32_t* const* ppMaxPrimitiveCounts)
{
LAYER_TRACE(__func__);

// Hold the lock to access layer-wide global store
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(commandBuffer);

// Release the lock to call into the driver
lock.unlock();
preAccelerationStructureBuild(layer, commandBuffer);
layer->driver.vkCmdBuildAccelerationStructuresIndirectKHR(commandBuffer,
infoCount,
pInfos,
pIndirectDeviceAddresses,
pIndirectStrides,
ppMaxPrimitiveCounts);
postAccelerationStructureBuild(layer, commandBuffer);
}

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresKHR<user_tag>(
VkCommandBuffer commandBuffer,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
{
LAYER_TRACE(__func__);

// Hold the lock to access layer-wide global store
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(commandBuffer);

// Release the lock to call into the driver
lock.unlock();
preAccelerationStructureBuild(layer, commandBuffer);
layer->driver.vkCmdBuildAccelerationStructuresKHR(commandBuffer, infoCount, pInfos, ppBuildRangeInfos);
postAccelerationStructureBuild(layer, commandBuffer);
}
58 changes: 58 additions & 0 deletions layer_gpu_support/source/layer_device_functions_transfer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,3 +400,61 @@ VKAPI_ATTR void VKAPI_CALL
layer->driver.vkCmdCopyImageToBuffer2KHR(commandBuffer, pCopyImageToBufferInfo);
postTransfer(layer, commandBuffer);
}

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL
layer_vkCmdCopyAccelerationStructureKHR<user_tag>(VkCommandBuffer commandBuffer,
const VkCopyAccelerationStructureInfoKHR* pInfo)
{
LAYER_TRACE(__func__);

// Hold the lock to access layer-wide global store
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(commandBuffer);

// Release the lock to call into the driver
lock.unlock();

preTransfer(layer, commandBuffer);
layer->driver.vkCmdCopyAccelerationStructureKHR(commandBuffer, pInfo);
postTransfer(layer, commandBuffer);
}

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL
layer_vkCmdCopyAccelerationStructureToMemoryKHR<user_tag>(VkCommandBuffer commandBuffer,
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
{
LAYER_TRACE(__func__);

// Hold the lock to access layer-wide global store
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(commandBuffer);

// Release the lock to call into the driver
lock.unlock();
preTransfer(layer, commandBuffer);
layer->driver.vkCmdCopyAccelerationStructureToMemoryKHR(commandBuffer, pInfo);
postTransfer(layer, commandBuffer);
}

/* See Vulkan API for documentation. */
template<>
VKAPI_ATTR void VKAPI_CALL
layer_vkCmdCopyMemoryToAccelerationStructureKHR<user_tag>(VkCommandBuffer commandBuffer,
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
{
LAYER_TRACE(__func__);

// Hold the lock to access layer-wide global store
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(commandBuffer);

// Release the lock to call into the driver
lock.unlock();
preTransfer(layer, commandBuffer);
layer->driver.vkCmdCopyMemoryToAccelerationStructureKHR(commandBuffer, pInfo);
postTransfer(layer, commandBuffer);
}