Skip to content

Commit

Permalink
Traktor: Async compute support in Vulkan renderer.
Browse files Browse the repository at this point in the history
  • Loading branch information
apistol78 committed Nov 19, 2024
1 parent 0b4b2bd commit 4fdf4d6
Show file tree
Hide file tree
Showing 20 changed files with 211 additions and 121 deletions.
8 changes: 8 additions & 0 deletions code/Core/Containers/StaticVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
*/
#pragma once

#include <initializer_list>
#include <iterator>
#include "Core/Config.h"
#include "Core/Containers/VectorConstructor.h"
Expand Down Expand Up @@ -270,6 +271,13 @@ class StaticVector

StaticVector() = default;

StaticVector(const std::initializer_list< ItemType >& iv) noexcept
: m_size(0)
{
for (auto it = std::begin(iv); it != std::end(iv); ++it)
push_back(*it);
}

explicit StaticVector(size_t size)
: m_size(size)
{
Expand Down
3 changes: 2 additions & 1 deletion code/Render/Context/RenderBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ void ComputeRenderBlock::render(IRenderView* renderView) const

renderView->compute(
program,
workSize
workSize,
asynchronous
);

T_CONTEXT_POP_MARKER(renderView);
Expand Down
1 change: 1 addition & 0 deletions code/Render/Context/RenderBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class T_DLLCLASS ComputeRenderBlock : public RenderBlock
IProgram* program = nullptr;
ProgramParameters* programParams = nullptr;
int32_t workSize[3] = { 1, 1, 1 };
bool asynchronous = false;

virtual void render(IRenderView* renderView) const override final;
};
Expand Down
2 changes: 1 addition & 1 deletion code/Render/IRenderView.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class T_DLLCLASS IRenderView : public Object
*
* \param workSize Work size, 3 dimensional size.
*/
virtual void compute(IProgram* program, const int32_t* workSize) = 0;
virtual void compute(IProgram* program, const int32_t* workSize, bool asynchronous) = 0;

/*! Enqueue indirect compute task.
*
Expand Down
4 changes: 2 additions & 2 deletions code/Render/Vrfy/RenderViewVrfy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ void RenderViewVrfy::drawIndirect(const IBufferView* vertexBuffer, const IVertex
m_renderView->drawIndirect(wrappedVertexView, vl->getWrappedVertexLayout(), wrappedIndexView, indexType, programVrfy->m_program, primitiveType, wrappedDrawView, drawOffset, drawCount);
}

void RenderViewVrfy::compute(IProgram* program, const int32_t* workSize)
void RenderViewVrfy::compute(IProgram* program, const int32_t* workSize, bool asynchronous)
{
T_CAPTURE_TRACE(L"compute");
T_CAPTURE_ASSERT(m_insideFrame, L"Cannot compute outside of beginFrame/endFrame.");
Expand All @@ -340,7 +340,7 @@ void RenderViewVrfy::compute(IProgram* program, const int32_t* workSize)

programVrfy->verify();

m_renderView->compute(programVrfy->m_program, workSize);
m_renderView->compute(programVrfy->m_program, workSize, asynchronous);
}

void RenderViewVrfy::computeIndirect(IProgram* program, const IBufferView* workBuffer, uint32_t workOffset)
Expand Down
2 changes: 1 addition & 1 deletion code/Render/Vrfy/RenderViewVrfy.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class RenderViewVrfy : public IRenderView

virtual void drawIndirect(const IBufferView* vertexBuffer, const IVertexLayout* vertexLayout, const IBufferView* indexBuffer, IndexType indexType, IProgram* program, PrimitiveType primitiveType, const IBufferView* drawBuffer, uint32_t drawOffset, uint32_t drawCount) override final;

virtual void compute(IProgram* program, const int32_t* workSize) override final;
virtual void compute(IProgram* program, const int32_t* workSize, bool asynchronous) override final;

virtual void computeIndirect(IProgram* program, const IBufferView* workBuffer, uint32_t workOffset) override final;

Expand Down
2 changes: 1 addition & 1 deletion code/Render/Vulkan/AccelerationStructureVk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ Ref< AccelerationStructureVk > AccelerationStructureVk::createTopLevel(Context*

Ref< AccelerationStructureVk > AccelerationStructureVk::createBottomLevel(Context* context, const Buffer* vertexBuffer, const IVertexLayout* vertexLayout, const Buffer* indexBuffer, IndexType indexType, const AlignedVector< Primitives >& primitives)
{
auto commandBuffer = context->getGraphicsQueue()->acquireCommandBuffer(T_FILE_LINE_W);
auto commandBuffer = context->getGraphicsQueue()->acquireCommandBuffer(L"AccelerationStructureVk::createBottomLevel");

Ref< AccelerationStructureVk > as = new AccelerationStructureVk(context);
as->m_scratchAlignment = getScratchAlignment(context);
Expand Down
2 changes: 1 addition & 1 deletion code/Render/Vulkan/BufferStaticVk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ void BufferStaticVk::unlock()
{
m_stageBuffer->unlock();

auto commandBuffer = m_context->getGraphicsQueue()->acquireCommandBuffer(T_FILE_LINE_W);
auto commandBuffer = m_context->getGraphicsQueue()->acquireCommandBuffer(L"BufferStaticVk::unlock");

const VkBufferCopy bc =
{
Expand Down
13 changes: 7 additions & 6 deletions code/Render/Vulkan/Private/CommandBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ bool CommandBuffer::reset()
return true;
}

bool CommandBuffer::submit(VkSemaphore waitSemaphore, VkPipelineStageFlags waitStageFlags, VkSemaphore signalSemaphore)
bool CommandBuffer::submit(const StaticVector< VkSemaphore, 2 >& waitSemaphores, const StaticVector< VkPipelineStageFlags, 2 >& waitStageFlags, VkSemaphore signalSemaphore)
{
T_ASSERT(ThreadManager::getInstance().getCurrentThread() == m_thread);
T_ASSERT(!m_submitted);
Expand All @@ -59,11 +59,12 @@ bool CommandBuffer::submit(VkSemaphore waitSemaphore, VkPipelineStageFlags waitS
VkSubmitInfo si = {};
si.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;

if (waitSemaphore != VK_NULL_HANDLE)
if (!waitSemaphores.empty())
{
si.waitSemaphoreCount = 1;
si.pWaitSemaphores = &waitSemaphore;
si.pWaitDstStageMask = &waitStageFlags;
T_ASSERT(waitSemaphores.size() == waitStageFlags.size());
si.waitSemaphoreCount = (uint32_t)waitSemaphores.size();
si.pWaitSemaphores = waitSemaphores.c_ptr();
si.pWaitDstStageMask = waitStageFlags.c_ptr();
}

si.commandBufferCount = 1;
Expand Down Expand Up @@ -101,7 +102,7 @@ bool CommandBuffer::wait()

bool CommandBuffer::submitAndWait()
{
if (!submit(VK_NULL_HANDLE, 0, VK_NULL_HANDLE))
if (!submit({}, {}, VK_NULL_HANDLE))
return false;
if (!wait())
return false;
Expand Down
5 changes: 3 additions & 2 deletions code/Render/Vulkan/Private/CommandBuffer.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* TRAKTOR
* Copyright (c) 2022 Anders Pistol.
* Copyright (c) 2022-2024 Anders Pistol.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
Expand All @@ -9,6 +9,7 @@
#pragma once

#include "Core/Object.h"
#include "Core/Containers/StaticVector.h"
#include "Render/Vulkan/Private/ApiHeader.h"

namespace traktor
Expand All @@ -34,7 +35,7 @@ class CommandBuffer : public Object

bool reset();

bool submit(VkSemaphore waitSemaphore, VkPipelineStageFlags waitStageFlags, VkSemaphore signalSemaphore);
bool submit(const StaticVector< VkSemaphore, 2 >& waitSemaphores, const StaticVector< VkPipelineStageFlags, 2 >& waitStageFlags, VkSemaphore signalSemaphore);

bool wait();

Expand Down
21 changes: 13 additions & 8 deletions code/Render/Vulkan/Private/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ Context::Context(
VkPhysicalDevice physicalDevice,
VkDevice logicalDevice,
VmaAllocator allocator,
uint32_t graphicsQueueIndex
uint32_t graphicsQueueIndex,
uint32_t computeQueueIndex
)
: m_physicalDevice(physicalDevice)
, m_logicalDevice(logicalDevice)
, m_allocator(allocator)
, m_graphicsQueueIndex(graphicsQueueIndex)
, m_computeQueueIndex(computeQueueIndex)
, m_sampledResourceIndexAllocator(0, MaxBindlessResources - 1)
, m_storageResourceIndexAllocator(0, MaxBindlessResources - 1)
, m_bufferResourceIndexAllocator(0, MaxBindlessResources - 1)
Expand Down Expand Up @@ -65,6 +67,7 @@ bool Context::create()

// Create queues.
m_graphicsQueue = Queue::create(this, m_graphicsQueueIndex);
m_computeQueue = Queue::create(this, m_computeQueueIndex);

// Create pipeline cache.
VkPipelineCacheCreateInfo pcci = {};
Expand Down Expand Up @@ -118,13 +121,15 @@ bool Context::create()
dps[5].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
dps[5].descriptorCount = MaxBindlessResources;

VkDescriptorPoolCreateInfo dpci = {};
dpci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
dpci.pNext = nullptr;
dpci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT | VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT;
dpci.maxSets = 32000;
dpci.poolSizeCount = sizeof_array(dps);
dpci.pPoolSizes = dps;
const VkDescriptorPoolCreateInfo dpci =
{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT | VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT,
.maxSets = 32000,
.poolSizeCount = sizeof_array(dps),
.pPoolSizes = dps
};

vkCreateDescriptorPool(m_logicalDevice, &dpci, nullptr, &m_descriptorPool);

Expand Down
7 changes: 6 additions & 1 deletion code/Render/Vulkan/Private/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ class Context : public Object
VkPhysicalDevice physicalDevice,
VkDevice logicalDevice,
VmaAllocator allocator,
uint32_t graphicsQueueIndex
uint32_t graphicsQueueIndex,
uint32_t computeQueueIndex
);

virtual ~Context();
Expand Down Expand Up @@ -90,6 +91,8 @@ class Context : public Object

Queue* getGraphicsQueue() const { return m_graphicsQueue; }

Queue* getComputeQueue() const { return m_computeQueue; }

UniformBufferPool* getUniformBufferPool(int32_t index) const { return m_uniformBufferPools[index]; }

VkDescriptorSetLayout getBindlessTexturesSetLayout() const { return m_bindlessTexturesDescriptorLayout; }
Expand Down Expand Up @@ -121,10 +124,12 @@ class Context : public Object
VkDevice m_logicalDevice;
VmaAllocator m_allocator;
uint32_t m_graphicsQueueIndex;
uint32_t m_computeQueueIndex;
VkPipelineCache m_pipelineCache = 0;
VkDescriptorPool m_descriptorPool = 0;
int32_t m_views = 0;
Ref< Queue > m_graphicsQueue;
Ref< Queue > m_computeQueue;
Ref< UniformBufferPool > m_uniformBufferPools[3];
Semaphore m_cleanupLock;
Semaphore m_resourceIndexLock;
Expand Down
11 changes: 6 additions & 5 deletions code/Render/Vulkan/Private/Queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,19 @@ namespace traktor::render

T_IMPLEMENT_RTTI_CLASS(L"traktor.render.Queue", Queue, Object)

thread_local VkCommandPool Queue::ms_commandPool;
thread_local VkCommandPool Queue::ms_commandPools[32];

Ref< Queue > Queue::create(Context* context, uint32_t queueIndex)
{
VkQueue queue;
T_FATAL_ASSERT(queueIndex < sizeof_array(ms_commandPools));
vkGetDeviceQueue(context->getLogicalDevice(), queueIndex, 0, &queue);
return new Queue(context, queue, queueIndex);
}

Ref< CommandBuffer > Queue::acquireCommandBuffer(const wchar_t* const tag)
{
if (!ms_commandPool)
if (!ms_commandPools[m_queueIndex])
{
VkCommandPool commandPool;

Expand All @@ -40,14 +41,14 @@ Ref< CommandBuffer > Queue::acquireCommandBuffer(const wchar_t* const tag)
if (vkCreateCommandPool(m_context->getLogicalDevice(), &cpci, 0, &commandPool) != VK_SUCCESS)
return nullptr;

ms_commandPool = commandPool;
ms_commandPools[m_queueIndex] = commandPool;
}

VkCommandBuffer commandBuffer = 0;

VkCommandBufferAllocateInfo cbai = {};
cbai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
cbai.commandPool = ms_commandPool;
cbai.commandPool = ms_commandPools[m_queueIndex];
cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cbai.commandBufferCount = 1;
if (vkAllocateCommandBuffers(m_context->getLogicalDevice(), &cbai, &commandBuffer) != VK_SUCCESS)
Expand All @@ -61,7 +62,7 @@ Ref< CommandBuffer > Queue::acquireCommandBuffer(const wchar_t* const tag)
if (vkBeginCommandBuffer(commandBuffer, &cbbi) != VK_SUCCESS)
return nullptr;

return new CommandBuffer(m_context, this, ms_commandPool, commandBuffer);
return new CommandBuffer(m_context, this, ms_commandPools[m_queueIndex], commandBuffer);
}

VkResult Queue::submit(const VkSubmitInfo& si, VkFence fence)
Expand Down
4 changes: 1 addition & 3 deletions code/Render/Vulkan/Private/Queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,14 @@ class Queue : public Object

uint32_t getQueueIndex() const { return m_queueIndex; }

VkCommandPool getCommandPool() const { return ms_commandPool; }

private:
friend class Context;

Context* m_context;
VkQueue m_queue;
uint32_t m_queueIndex;
Semaphore m_lock;
static thread_local VkCommandPool ms_commandPool;
static thread_local VkCommandPool ms_commandPools[32];

Queue(Context* context, VkQueue queue, uint32_t queueIndex);
};
Expand Down
Loading

0 comments on commit 4fdf4d6

Please sign in to comment.