diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index 6684bb34a..57773c278 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -350,6 +350,7 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_surface` - `VK_KHR_swapchain` - `VK_KHR_swapchain_mutable_format` +- `VK_KHR_synchronization2` - `VK_KHR_timeline_semaphore` - `VK_KHR_uniform_buffer_standard_layout` - `VK_KHR_variable_pointers` diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 14f6edfc3..d08bca1f0 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -18,6 +18,8 @@ MoltenVK 1.2.6 Released TBD +- Add support for extensions: + - `VK_KHR_synchronization2` - Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. - Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`. diff --git a/MoltenVK/MoltenVK/API/mvk_datatypes.h b/MoltenVK/MoltenVK/API/mvk_datatypes.h index b0e2dac7c..8e5670c9f 100644 --- a/MoltenVK/MoltenVK/API/mvk_datatypes.h +++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h @@ -414,13 +414,13 @@ MTLWinding mvkMTLWindingFromSpvExecutionMode(uint32_t spvMode); MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionMode(uint32_t spvMode); /** - * Returns the combination of Metal MTLRenderStage bits corresponding to the specified Vulkan VkPiplineStageFlags, + * Returns the combination of Metal MTLRenderStage bits corresponding to the specified Vulkan VkPipelineStageFlags2, * taking into consideration whether the barrier is to be placed before or after the specified pipeline stages. */ -MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags vkStages, bool placeBarrierBefore); +MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags2 vkStages, bool placeBarrierBefore); -/** Returns the combination of Metal MTLBarrierScope bits corresponding to the specified Vulkan VkAccessFlags. */ -MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags vkAccess); +/** Returns the combination of Metal MTLBarrierScope bits corresponding to the specified Vulkan VkAccessFlags2. */ +MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags2 vkAccess); #pragma mark - #pragma mark Geometry conversions diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h index 8e1772566..aec8800c3 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h @@ -41,6 +41,9 @@ template class MVKCmdPipelineBarrier : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + const VkDependencyInfo* pDependencyInfo); + VkResult setContent(MVKCommandBuffer* cmdBuff, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, @@ -59,8 +62,6 @@ class MVKCmdPipelineBarrier : public MVKCommand { bool coversTextures(); MVKSmallVector _barriers; - VkPipelineStageFlags _srcStageMask; - VkPipelineStageFlags _dstStageMask; VkDependencyFlags _dependencyFlags; }; @@ -281,34 +282,26 @@ class MVKCmdPushDescriptorSetWithTemplate : public MVKCommand { #pragma mark - -#pragma mark MVKCmdSetResetEvent +#pragma mark MVKCmdSetEvent -/** Abstract Vulkan command to set or reset an event. */ -class MVKCmdSetResetEvent : public MVKCommand { +/** Vulkan command to set an event. */ +class MVKCmdSetEvent : public MVKCommand { public: VkResult setContent(MVKCommandBuffer* cmdBuff, VkEvent event, - VkPipelineStageFlags stageMask); - -protected: - MVKEvent* _mvkEvent; - -}; - - -#pragma mark - -#pragma mark MVKCmdSetEvent + const VkDependencyInfo* pDependencyInfo); -/** Vulkan command to set an event. */ -class MVKCmdSetEvent : public MVKCmdSetResetEvent { + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags stageMask); -public: void encode(MVKCommandEncoder* cmdEncoder) override; protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + MVKEvent* _mvkEvent; }; @@ -316,14 +309,19 @@ class MVKCmdSetEvent : public MVKCmdSetResetEvent { #pragma mark MVKCmdResetEvent /** Vulkan command to reset an event. */ -class MVKCmdResetEvent : public MVKCmdSetResetEvent { +class MVKCmdResetEvent : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags2 stageMask); + void encode(MVKCommandEncoder* cmdEncoder) override; protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + MVKEvent* _mvkEvent; }; @@ -339,6 +337,11 @@ template class MVKCmdWaitEvents : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos); + VkResult setContent(MVKCommandBuffer* cmdBuff, uint32_t eventCount, const VkEvent* pEvents, diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm index 3efcab53c..05e578f6e 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm @@ -29,6 +29,29 @@ #pragma mark - #pragma mark MVKCmdPipelineBarrier +template +VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, + const VkDependencyInfo* pDependencyInfo) { + _dependencyFlags = pDependencyInfo->dependencyFlags; + + _barriers.clear(); // Clear for reuse + _barriers.reserve(pDependencyInfo->memoryBarrierCount + + pDependencyInfo->bufferMemoryBarrierCount + + pDependencyInfo->imageMemoryBarrierCount); + + for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pMemoryBarriers[i]); + } + for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pBufferMemoryBarriers[i]); + } + for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pImageMemoryBarriers[i]); + } + + return VK_SUCCESS; +} + template VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, VkPipelineStageFlags srcStageMask, @@ -40,21 +63,19 @@ const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier* pImageMemoryBarriers) { - _srcStageMask = srcStageMask; - _dstStageMask = dstStageMask; _dependencyFlags = dependencyFlags; _barriers.clear(); // Clear for reuse _barriers.reserve(memoryBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount); for (uint32_t i = 0; i < memoryBarrierCount; i++) { - _barriers.emplace_back(pMemoryBarriers[i]); + _barriers.emplace_back(pMemoryBarriers[i], srcStageMask, dstStageMask); } for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { - _barriers.emplace_back(pBufferMemoryBarriers[i]); + _barriers.emplace_back(pBufferMemoryBarriers[i], srcStageMask, dstStageMask); } for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { - _barriers.emplace_back(pImageMemoryBarriers[i]); + _barriers.emplace_back(pImageMemoryBarriers[i], srcStageMask, dstStageMask); } return VK_SUCCESS; @@ -67,13 +88,9 @@ // Calls below invoke MTLBlitCommandEncoder so must apply this first. // Check if pipeline barriers are available and we are in a renderpass. if (cmdEncoder->getDevice()->_pMetalFeatures->memoryBarriers && cmdEncoder->_mtlRenderEncoder) { - MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_srcStageMask, false); - MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_dstStageMask, true); - - id resources[_barriers.size()]; - uint32_t rezCnt = 0; - for (auto& b : _barriers) { + MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.srcStageMask, false); + MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.dstStageMask, true); switch (b.type) { case MVKPipelineBarrier::Memory: { MTLBarrierScope scope = (mvkMTLBarrierScopeFromVkAccessFlags(b.srcAccessMask) | @@ -84,27 +101,30 @@ break; } - case MVKPipelineBarrier::Buffer: - resources[rezCnt++] = b.mvkBuffer->getMTLBuffer(); + case MVKPipelineBarrier::Buffer: { + id mtlRez = b.mvkBuffer->getMTLBuffer(); + [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: &mtlRez + count: 1 + afterStages: srcStages + beforeStages: dstStages]; break; - - case MVKPipelineBarrier::Image: - for (uint8_t planeIndex = 0; planeIndex < b.mvkImage->getPlaneCount(); planeIndex++) { - resources[rezCnt++] = b.mvkImage->getMTLTexture(planeIndex); - } + } + case MVKPipelineBarrier::Image: { + uint32_t plnCnt = b.mvkImage->getPlaneCount(); + id mtlRezs[plnCnt]; + for (uint8_t plnIdx = 0; plnIdx < plnCnt; plnIdx++) { + mtlRezs[plnIdx] = b.mvkImage->getMTLTexture(plnIdx); + } + [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: mtlRezs + count: plnCnt + afterStages: srcStages + beforeStages: dstStages]; break; - + } default: break; } } - - if (rezCnt) { - [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: resources - count: rezCnt - afterStages: srcStages - beforeStages: dstStages]; - } } else if (cmdEncoder->getDevice()->_pMetalFeatures->textureBarriers) { #if !MVK_MACCAT if (coversTextures()) { [cmdEncoder->_mtlRenderEncoder textureBarrier]; } @@ -138,15 +158,15 @@ for (auto& b : _barriers) { switch (b.type) { case MVKPipelineBarrier::Memory: - mvkDvc->applyMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + mvkDvc->applyMemoryBarrier(b, cmdEncoder, cmdUse); break; case MVKPipelineBarrier::Buffer: - b.mvkBuffer->applyBufferMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + b.mvkBuffer->applyBufferMemoryBarrier(b, cmdEncoder, cmdUse); break; case MVKPipelineBarrier::Image: - b.mvkImage->applyImageMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + b.mvkImage->applyImageMemoryBarrier(b, cmdEncoder, cmdUse); break; default: @@ -493,19 +513,23 @@ #pragma mark - -#pragma mark MVKCmdSetResetEvent +#pragma mark MVKCmdSetEvent -VkResult MVKCmdSetResetEvent::setContent(MVKCommandBuffer* cmdBuff, - VkEvent event, - VkPipelineStageFlags stageMask) { +VkResult MVKCmdSetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags stageMask) { _mvkEvent = (MVKEvent*)event; return VK_SUCCESS; } +VkResult MVKCmdSetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + const VkDependencyInfo* pDependencyInfo) { + _mvkEvent = (MVKEvent*)event; -#pragma mark - -#pragma mark MVKCmdSetEvent + return VK_SUCCESS; +} void MVKCmdSetEvent::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->signalEvent(_mvkEvent, true); @@ -515,6 +539,14 @@ #pragma mark - #pragma mark MVKCmdResetEvent +VkResult MVKCmdResetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags2 stageMask) { + _mvkEvent = (MVKEvent*)event; + + return VK_SUCCESS; +} + void MVKCmdResetEvent::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->signalEvent(_mvkEvent, false); } @@ -523,6 +555,20 @@ #pragma mark - #pragma mark MVKCmdWaitEvents +template +VkResult MVKCmdWaitEvents::setContent(MVKCommandBuffer* cmdBuff, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos) { + _mvkEvents.clear(); // Clear for reuse + _mvkEvents.reserve(eventCount); + for (uint32_t i = 0; i < eventCount; i++) { + _mvkEvents.push_back((MVKEvent*)pEvents[i]); + } + + return VK_SUCCESS; +} + template VkResult MVKCmdWaitEvents::setContent(MVKCommandBuffer* cmdBuff, uint32_t eventCount, diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h index baa588634..6b3686e80 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h @@ -85,7 +85,7 @@ class MVKCmdWriteTimestamp : public MVKCmdQuery { public: VkResult setContent(MVKCommandBuffer* cmdBuff, - VkPipelineStageFlagBits pipelineStage, + VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query); @@ -94,7 +94,7 @@ class MVKCmdWriteTimestamp : public MVKCmdQuery { protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - VkPipelineStageFlagBits _pipelineStage; + VkPipelineStageFlags2 _stage; }; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm index bc5ba9c6d..aac431fb1 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm @@ -77,13 +77,13 @@ #pragma mark MVKCmdWriteTimestamp VkResult MVKCmdWriteTimestamp::setContent(MVKCommandBuffer* cmdBuff, - VkPipelineStageFlagBits pipelineStage, + VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query) { VkResult rslt = MVKCmdQuery::setContent(cmdBuff, queryPool, query); - _pipelineStage = pipelineStage; + _stage = stage; cmdBuff->recordTimestampCommand(); diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h index de72f06dd..3eeb7d426 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h @@ -112,8 +112,10 @@ typedef struct MVKPipelineBarrier { } MVKPipelineBarrierType; MVKPipelineBarrierType type = None; - VkAccessFlags srcAccessMask = 0; - VkAccessFlags dstAccessMask = 0; + VkPipelineStageFlags2 srcStageMask = 0; + VkAccessFlags2 srcAccessMask = 0; + VkPipelineStageFlags2 dstStageMask = 0; + VkAccessFlags2 dstAccessMask = 0; uint8_t srcQueueFamilyIndex = 0; uint8_t dstQueueFamilyIndex = 0; union { MVKBuffer* mvkBuffer = nullptr; MVKImage* mvkImage; MVKResource* mvkResource; }; @@ -136,15 +138,44 @@ typedef struct MVKPipelineBarrier { bool isBufferBarrier() { return type == Buffer; } bool isImageBarrier() { return type == Image; } - MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier) : + MVKPipelineBarrier(const VkMemoryBarrier2& vkBarrier) : type(Memory), + srcStageMask(vkBarrier.srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), dstAccessMask(vkBarrier.dstAccessMask) {} - MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier) : + MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : + type(Memory), + srcStageMask(srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask) + {} + + MVKPipelineBarrier(const VkBufferMemoryBarrier2& vkBarrier) : + type(Buffer), + srcStageMask(vkBarrier.srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask), + srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), + dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), + mvkBuffer((MVKBuffer*)vkBarrier.buffer), + offset(vkBarrier.offset), + size(vkBarrier.size) + {} + + MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : type(Buffer), + srcStageMask(srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), dstAccessMask(vkBarrier.dstAccessMask), srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), @@ -153,9 +184,30 @@ typedef struct MVKPipelineBarrier { size(vkBarrier.size) {} - MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier) : + MVKPipelineBarrier(const VkImageMemoryBarrier2& vkBarrier) : + type(Image), + srcStageMask(vkBarrier.srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask), + srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), + dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), + mvkImage((MVKImage*)vkBarrier.image), + newLayout(vkBarrier.newLayout), + aspectMask(vkBarrier.subresourceRange.aspectMask), + baseArrayLayer(vkBarrier.subresourceRange.baseArrayLayer), + layerCount(vkBarrier.subresourceRange.layerCount), + baseMipLevel(vkBarrier.subresourceRange.baseMipLevel), + levelCount(vkBarrier.subresourceRange.levelCount) + {} + + MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : type(Image), + srcStageMask(srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), dstAccessMask(vkBarrier.dstAccessMask), srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h index 2e338ce7a..95fdf681b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h @@ -52,16 +52,12 @@ class MVKBuffer : public MVKResource { VkResult bindDeviceMemory2(const VkBindBufferMemoryInfo* pBindInfo); /** Applies the specified global memory barrier. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) override; /** Applies the specified buffer memory barrier. */ - void applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyBufferMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); @@ -95,9 +91,7 @@ class MVKBuffer : public MVKResource { friend class MVKDeviceMemory; void propagateDebugName() override; - bool needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier); + bool needsHostReadSync(MVKPipelineBarrier& barrier); bool overlaps(VkDeviceSize offset, VkDeviceSize size, VkDeviceSize &overlapOffset, VkDeviceSize &overlapSize); bool shouldFlushHostMemory(); VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm index a99f4f0fc..41ee4cef1 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm @@ -94,25 +94,21 @@ return bindDeviceMemory((MVKDeviceMemory*)pBindInfo->memory, pBindInfo->memoryOffset); } -void MVKBuffer::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKBuffer::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if ( needsHostReadSync(barrier) ) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()]; } #endif } -void MVKBuffer::applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKBuffer::applyBufferMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if ( needsHostReadSync(barrier) ) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()]; } #endif @@ -120,11 +116,9 @@ // Returns whether the specified buffer memory barrier requires a sync between this // buffer and host memory for the purpose of the host reading texture memory. -bool MVKBuffer::needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier) { +bool MVKBuffer::needsHostReadSync(MVKPipelineBarrier& barrier) { #if MVK_MACOS - return (mvkIsAnyFlagEnabled(dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) && + return (mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) && mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) && isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer)); #endif diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 7a04e90e0..b6b462ad7 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -681,9 +681,7 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { void removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value); /** Applies the specified global memory barrier to all resource issued by this device. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 422f1b43e..acd50514e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -320,6 +320,11 @@ subgroupSizeFeatures->computeFullSubgroups = _metalFeatures.simdPermute || _metalFeatures.quadPermute; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES: { + auto* synch2Features = (VkPhysicalDeviceSynchronization2Features*)next; + synch2Features->synchronization2 = true; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES: { auto* astcHDRFeatures = (VkPhysicalDeviceTextureCompressionASTCHDRFeatures*)next; astcHDRFeatures->textureCompressionASTC_HDR = _metalFeatures.astcHDRTextures; @@ -4172,16 +4177,14 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope mvkRemoveFirstOccurance(_awaitingTimelineSem4s, make_pair(sem4, value)); } -void MVKDevice::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKDevice::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { - if (!mvkIsAnyFlagEnabled(dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) || + if (!mvkIsAnyFlagEnabled(barrier.dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) || !mvkIsAnyFlagEnabled(barrier.dstAccessMask, VK_ACCESS_HOST_READ_BIT) ) { return; } lock_guard lock(_rezLock); for (auto& rez : _resources) { - rez->applyMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse); + rez->applyMemoryBarrier(barrier, cmdEncoder, cmdUse); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def index c0bbb4816..88a3a33f1 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def @@ -55,6 +55,7 @@ MVK_DEVICE_FEATURE(ShaderAtomicInt64, SHADER_ATOMIC_INT64, MVK_DEVICE_FEATURE(ShaderFloat16Int8, SHADER_FLOAT16_INT8, 2) MVK_DEVICE_FEATURE(ShaderSubgroupExtendedTypes, SHADER_SUBGROUP_EXTENDED_TYPES, 1) MVK_DEVICE_FEATURE(SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, 2) +MVK_DEVICE_FEATURE(Synchronization2, SYNCHRONIZATION_2, 1) MVK_DEVICE_FEATURE(TextureCompressionASTCHDR, TEXTURE_COMPRESSION_ASTC_HDR, 1) MVK_DEVICE_FEATURE(TimelineSemaphore, TIMELINE_SEMAPHORE, 1) MVK_DEVICE_FEATURE(UniformBufferStandardLayout, UNIFORM_BUFFER_STANDARD_LAYOUT, 1) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index ef606b035..900b10ffa 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -74,9 +74,7 @@ class MVKImagePlane : public MVKBaseObject { bool overlaps(VkSubresourceLayout& imgLayout, VkDeviceSize offset, VkDeviceSize size); void propagateDebugName(); MVKImageMemoryBinding* getMemoryBinding() const; - void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); void pullFromDeviceOnCompletion(MVKCommandEncoder* cmdEncoder, @@ -119,9 +117,7 @@ class MVKImageMemoryBinding : public MVKResource { VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset) override; /** Applies the specified global memory barrier. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) override; @@ -133,9 +129,7 @@ class MVKImageMemoryBinding : public MVKResource { friend MVKImage; void propagateDebugName() override; - bool needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier); + bool needsHostReadSync(MVKPipelineBarrier& barrier); bool shouldFlushHostMemory(); VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size); VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size); @@ -251,9 +245,7 @@ class MVKImage : public MVKVulkanAPIDeviceObject { virtual VkResult bindDeviceMemory2(const VkBindImageMemoryInfo* pBindInfo); /** Applies the specified image memory barrier. */ - void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index f09495c74..c605f45a9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -304,9 +304,7 @@ return (_image->_memoryBindings.size() > 1) ? _image->_memoryBindings[_planeIndex] : _image->_memoryBindings[0]; } -void MVKImagePlane::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImagePlane::applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { @@ -323,7 +321,7 @@ : (layerStart + barrier.layerCount)); MVKImageMemoryBinding* memBind = getMemoryBinding(); - bool needsSync = memBind->needsHostReadSync(srcStageMask, dstStageMask, barrier); + bool needsSync = memBind->needsHostReadSync(barrier); bool needsPull = ((!memBind->_mtlTexelBuffer || memBind->_ownsTexelBuffer) && memBind->isMemoryHostCoherent() && barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL && @@ -444,13 +442,11 @@ return _deviceMemory ? _deviceMemory->addImageMemoryBinding(this) : VK_SUCCESS; } -void MVKImageMemoryBinding::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImageMemoryBinding::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if (needsHostReadSync(barrier)) { for(uint8_t planeIndex = beginPlaneIndex(); planeIndex < endPlaneIndex(); planeIndex++) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: _image->_planes[planeIndex]->_mtlTexture]; } @@ -469,9 +465,7 @@ // Returns whether the specified image memory barrier requires a sync between this // texture and host memory for the purpose of the host reading texture memory. -bool MVKImageMemoryBinding::needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier) { +bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) { #if MVK_MACOS return ((barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) && mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) && @@ -625,15 +619,13 @@ #pragma mark Resource memory -void MVKImage::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImage::applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { for (uint8_t planeIndex = 0; planeIndex < _planes.size(); planeIndex++) { if ( !_hasChromaSubsampling || mvkIsAnyFlagEnabled(barrier.aspectMask, (VK_IMAGE_ASPECT_PLANE_0_BIT << planeIndex)) ) { - _planes[planeIndex]->applyImageMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse); + _planes[planeIndex]->applyImageMemoryBarrier(barrier, cmdEncoder, cmdUse); } } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index c3b1d242f..b4509f0b7 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -92,7 +92,8 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking #pragma mark Queue submissions /** Submits the specified command buffers to the queue. */ - VkResult submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse); + template + VkResult submit(uint32_t submitCount, const S* pSubmits, VkFence fence, MVKCommandUse cmdUse); /** Submits the specified presentation command to the queue. */ VkResult submit(const VkPresentInfoKHR* pPresentInfo); @@ -164,6 +165,24 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking #pragma mark - #pragma mark MVKQueueSubmission +typedef struct MVKSemaphoreSubmitInfo { +private: + MVKSemaphore* _semaphore; +public: + uint64_t value; + VkPipelineStageFlags2 stageMask; + uint32_t deviceIndex; + + void encodeWait(id mtlCmdBuff); + void encodeSignal(id mtlCmdBuff); + MVKSemaphoreSubmitInfo(const VkSemaphoreSubmitInfo& semaphoreSubmitInfo); + MVKSemaphoreSubmitInfo(const VkSemaphore semaphore, VkPipelineStageFlags stageMask); + MVKSemaphoreSubmitInfo(const MVKSemaphoreSubmitInfo& other); + MVKSemaphoreSubmitInfo& operator=(const MVKSemaphoreSubmitInfo& other); + ~MVKSemaphoreSubmitInfo(); + +} MVKSemaphoreSubmitInfo; + /** This is an abstract class for an operation that can be submitted to an MVKQueue. */ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { @@ -179,9 +198,14 @@ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { */ virtual VkResult execute() = 0; + MVKQueueSubmission(MVKQueue* queue, + uint32_t waitSemaphoreInfoCount, + const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos); + MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, - const VkSemaphore* pWaitSemaphores); + const VkSemaphore* pWaitSemaphores, + const VkPipelineStageFlags* pWaitDstStageMask); ~MVKQueueSubmission() override; @@ -192,13 +216,22 @@ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { MVKDevice* getDevice() { return _queue->getDevice(); } MVKQueue* _queue; - MVKSmallVector> _waitSemaphores; + MVKSmallVector _waitSemaphores; }; #pragma mark - #pragma mark MVKQueueCommandBufferSubmission +typedef struct MVKCommandBufferSubmitInfo { + MVKCommandBuffer* commandBuffer; + uint32_t deviceMask; + + MVKCommandBufferSubmitInfo(const VkCommandBufferSubmitInfo& commandBufferInfo); + MVKCommandBufferSubmitInfo(VkCommandBuffer commandBuffer); + +} MVKCommandBufferSubmitInfo; + /** * Submits an empty set of command buffers to the queue. * Used for fence-only command submissions. @@ -208,7 +241,15 @@ class MVKQueueCommandBufferSubmission : public MVKQueueSubmission { public: VkResult execute() override; - MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, MVKCommandUse cmdUse); + MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); + + MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); ~MVKQueueCommandBufferSubmission() override; @@ -222,11 +263,11 @@ class MVKQueueCommandBufferSubmission : public MVKQueueSubmission { virtual void submitCommandBuffers() {} MVKCommandEncodingContext _encodingContext; - MVKSmallVector> _signalSemaphores; - MVKFence* _fence; - id _activeMTLCommandBuffer; - MVKCommandUse _commandUse; - bool _emulatedWaitDone; //Used to track if we've already waited for emulated semaphores. + MVKSmallVector _signalSemaphores; + MVKFence* _fence = nullptr; + id _activeMTLCommandBuffer = nil; + MVKCommandUse _commandUse = kMVKCommandUseNone; + bool _emulatedWaitDone = false; //Used to track if we've already waited for emulated semaphores. }; @@ -238,7 +279,12 @@ template class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmission { public: - MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); + + MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, MVKCommandUse cmdUse); @@ -246,7 +292,7 @@ class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmissi protected: void submitCommandBuffers() override; - MVKSmallVector _cmdBuffers; + MVKSmallVector _cmdBuffers; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 401fa8b22..9b4afdf83 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -92,20 +92,24 @@ return rslt; } -VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse) { +static inline uint32_t getCommandBufferCount(const VkSubmitInfo2* pSubmitInfo) { return pSubmitInfo->commandBufferInfoCount; } +static inline uint32_t getCommandBufferCount(const VkSubmitInfo* pSubmitInfo) { return pSubmitInfo->commandBufferCount; } + +template +VkResult MVKQueue::submit(uint32_t submitCount, const S* pSubmits, VkFence fence, MVKCommandUse cmdUse) { // Fence-only submission if (submitCount == 0 && fence) { - return submit(new MVKQueueCommandBufferSubmission(this, nullptr, fence, cmdUse)); + return submit(new MVKQueueCommandBufferSubmission(this, (S*)nullptr, fence, cmdUse)); } VkResult rslt = VK_SUCCESS; for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) { VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence - const VkSubmitInfo* pVkSub = &pSubmits[sIdx]; + const S* pVkSub = &pSubmits[sIdx]; MVKQueueCommandBufferSubmission* mvkSub; - uint32_t cbCnt = pVkSub->commandBufferCount; + uint32_t cbCnt = getCommandBufferCount(pVkSub); if (cbCnt <= 1) { mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 16) { @@ -128,6 +132,10 @@ return rslt; } +// Concrete implementations of templated MVKQueue::submit(). +template VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence, MVKCommandUse cmdUse); +template VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse); + VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) { return submit(new MVKQueuePresentSurfaceSubmission(this, pPresentInfo)); } @@ -344,23 +352,89 @@ #pragma mark - #pragma mark MVKQueueSubmission +void MVKSemaphoreSubmitInfo::encodeWait(id mtlCmdBuff) { + if (_semaphore) { _semaphore->encodeWait(mtlCmdBuff, value); } +} + +void MVKSemaphoreSubmitInfo::encodeSignal(id mtlCmdBuff) { + if (_semaphore) { _semaphore->encodeSignal(mtlCmdBuff, value); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const VkSemaphoreSubmitInfo& semaphoreSubmitInfo) : + _semaphore((MVKSemaphore*)semaphoreSubmitInfo.semaphore), + value(semaphoreSubmitInfo.value), + stageMask(semaphoreSubmitInfo.stageMask), + deviceIndex(semaphoreSubmitInfo.deviceIndex) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const VkSemaphore semaphore, + VkPipelineStageFlags stageMask) : + _semaphore((MVKSemaphore*)semaphore), + value(0), + stageMask(stageMask), + deviceIndex(0) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const MVKSemaphoreSubmitInfo& other) : + _semaphore(other._semaphore), + value(other.value), + stageMask(other.stageMask), + deviceIndex(other.deviceIndex) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo& MVKSemaphoreSubmitInfo::operator=(const MVKSemaphoreSubmitInfo& other) { + // Retain new object first in case it's the same object + if (other._semaphore) {other._semaphore->retain(); } + if (_semaphore) { _semaphore->release(); } + _semaphore = other._semaphore; + + value = other.value; + stageMask = other.stageMask; + deviceIndex = other.deviceIndex; + return *this; +} + +MVKSemaphoreSubmitInfo::~MVKSemaphoreSubmitInfo() { + if (_semaphore) { _semaphore->release(); } +} + +MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(const VkCommandBufferSubmitInfo& commandBufferInfo) : + commandBuffer(MVKCommandBuffer::getMVKCommandBuffer(commandBufferInfo.commandBuffer)), + deviceMask(commandBufferInfo.deviceMask) {} + +MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(VkCommandBuffer commandBuffer) : + commandBuffer(MVKCommandBuffer::getMVKCommandBuffer(commandBuffer)), + deviceMask(0) {} + +MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, + uint32_t waitSemaphoreInfoCount, + const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos) { + _queue = queue; + _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). + + _waitSemaphores.reserve(waitSemaphoreInfoCount); + for (uint32_t i = 0; i < waitSemaphoreInfoCount; i++) { + _waitSemaphores.emplace_back(pWaitSemaphoreSubmitInfos[i]); + } +} + MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, - const VkSemaphore* pWaitSemaphores) { + const VkSemaphore* pWaitSemaphores, + const VkPipelineStageFlags* pWaitDstStageMask) { _queue = queue; _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). _waitSemaphores.reserve(waitSemaphoreCount); for (uint32_t i = 0; i < waitSemaphoreCount; i++) { - auto* sem4 = (MVKSemaphore*)pWaitSemaphores[i]; - sem4->retain(); - uint64_t sem4Val = 0; - _waitSemaphores.emplace_back(sem4, sem4Val); + _waitSemaphores.emplace_back(pWaitSemaphores[i], pWaitDstStageMask ? pWaitDstStageMask[i] : 0); } } MVKQueueSubmission::~MVKQueueSubmission() { - for (auto s : _waitSemaphores) { s.first->release(); } _queue->release(); } @@ -373,13 +447,13 @@ _queue->_submissionCaptureScope->beginScope(); // If using encoded semaphore waiting, do so now. - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(getActiveMTLCommandBuffer(), ws.second); } + for (auto& ws : _waitSemaphores) { ws.encodeWait(getActiveMTLCommandBuffer()); } // Submit each command buffer. submitCommandBuffers(); // If using encoded semaphore signaling, do so now. - for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(getActiveMTLCommandBuffer(), ss.second); } + for (auto& ss : _signalSemaphores) { ss.encodeSignal(getActiveMTLCommandBuffer()); } // Commit the last MTLCommandBuffer. // Nothing after this because callback might destroy this instance before this function ends. @@ -417,7 +491,7 @@ // should be more performant when prefilled command buffers aren't used, because we spend time encoding commands // first, thus giving the command buffer signalling these semaphores more time to complete. if ( !_emulatedWaitDone ) { - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, ws.second); } + for (auto& ws : _waitSemaphores) { ws.encodeWait(nil); } _emulatedWaitDone = true; } @@ -466,7 +540,7 @@ _queue->_submissionCaptureScope->endScope(); // If using inline semaphore signaling, do so now. - for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(nil, ss.second); } + for (auto& ss : _signalSemaphores) { ss.encodeSignal(nil); } // If a fence exists, signal it. if (_fence) { _fence->signal(); } @@ -474,6 +548,31 @@ this->destroy(); } +// On device loss, the fence and signal semaphores may be signalled early, and they might then +// be destroyed on the waiting thread before this submission is done with them. We therefore +// retain() each here to ensure they live long enough for this submission to finish using them. +MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) : + MVKQueueSubmission(queue, + pSubmit ? pSubmit->waitSemaphoreInfoCount : 0, + pSubmit ? pSubmit->pWaitSemaphoreInfos : nullptr), + _fence((MVKFence*)fence), + _commandUse(cmdUse) { + + if (_fence) { _fence->retain(); } + + // pSubmit can be null if just tracking the fence alone + if (pSubmit) { + uint32_t ssCnt = pSubmit->signalSemaphoreInfoCount; + _signalSemaphores.reserve(ssCnt); + for (uint32_t i = 0; i < ssCnt; i++) { + _signalSemaphores.emplace_back(pSubmit->pSignalSemaphoreInfos[i]); + } + } +} + // On device loss, the fence and signal semaphores may be signalled early, and they might then // be destroyed on the waiting thread before this submission is done with them. We therefore // retain() each here to ensure they live long enough for this submission to finish using them. @@ -482,15 +581,24 @@ VkFence fence, MVKCommandUse cmdUse) : MVKQueueSubmission(queue, - (pSubmit ? pSubmit->waitSemaphoreCount : 0), - (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), + pSubmit ? pSubmit->waitSemaphoreCount : 0, + pSubmit ? pSubmit->pWaitSemaphores : nullptr, + pSubmit ? pSubmit->pWaitDstStageMask : nullptr), - _commandUse(cmdUse), - _emulatedWaitDone(false) { + _fence((MVKFence*)fence), + _commandUse(cmdUse) { + + if (_fence) { _fence->retain(); } // pSubmit can be null if just tracking the fence alone if (pSubmit) { - VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr; + uint32_t ssCnt = pSubmit->signalSemaphoreCount; + _signalSemaphores.reserve(ssCnt); + for (uint32_t i = 0; i < ssCnt; i++) { + _signalSemaphores.emplace_back(pSubmit->pSignalSemaphores[i], 0); + } + + VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr; for (const auto* next = (const VkBaseInStructure*)pSubmit->pNext; next; next = next->pNext) { switch (next->sType) { case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO: @@ -501,31 +609,21 @@ } } if (pTimelineSubmit) { - // Presentation doesn't support timeline semaphores, so handle wait values here. - uint32_t wsCnt = pTimelineSubmit->waitSemaphoreValueCount; - for (uint32_t i = 0; i < wsCnt; i++) { - _waitSemaphores[i].second = pTimelineSubmit->pWaitSemaphoreValues[i]; + uint32_t wsvCnt = pTimelineSubmit->waitSemaphoreValueCount; + for (uint32_t i = 0; i < wsvCnt; i++) { + _waitSemaphores[i].value = pTimelineSubmit->pWaitSemaphoreValues[i]; } + + uint32_t ssvCnt = pTimelineSubmit->signalSemaphoreValueCount; + for (uint32_t i = 0; i < ssvCnt; i++) { + _signalSemaphores[i].value = pTimelineSubmit->pSignalSemaphoreValues[i]; + } } - uint32_t ssCnt = pSubmit->signalSemaphoreCount; - _signalSemaphores.reserve(ssCnt); - for (uint32_t i = 0; i < ssCnt; i++) { - auto* sem4 = (MVKSemaphore*)pSubmit->pSignalSemaphores[i]; - sem4->retain(); - uint64_t sem4Val = pTimelineSubmit ? pTimelineSubmit->pSignalSemaphoreValues[i] : 0; - _signalSemaphores.emplace_back(sem4, sem4Val); - } } - - _fence = (MVKFence*)fence; - if (_fence) { _fence->retain(); } - - _activeMTLCommandBuffer = nil; } MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() { if (_fence) { _fence->release(); } - for (auto s : _signalSemaphores) { s.first->release(); } } @@ -534,11 +632,28 @@ MVKDevice* mvkDev = getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); - for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); } + for (auto& cbInfo : _cmdBuffers) { cbInfo.commandBuffer->submit(this, &_encodingContext); } mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); } +template +MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) + : MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) { + + if (pSubmit) { + uint32_t cbCnt = pSubmit->commandBufferInfoCount; + _cmdBuffers.reserve(cbCnt); + for (uint32_t i = 0; i < cbCnt; i++) { + _cmdBuffers.emplace_back(pSubmit->pCommandBufferInfos[i]); + setConfigurationResult(_cmdBuffers.back().commandBuffer->getConfigurationResult()); + } + } +} + template MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, @@ -550,9 +665,8 @@ uint32_t cbCnt = pSubmit->commandBufferCount; _cmdBuffers.reserve(cbCnt); for (uint32_t i = 0; i < cbCnt; i++) { - MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]); - _cmdBuffers.push_back(cb); - setConfigurationResult(cb->getConfigurationResult()); + _cmdBuffers.emplace_back(pSubmit->pCommandBuffers[i]); + setConfigurationResult(_cmdBuffers.back().commandBuffer->getConfigurationResult()); } } } @@ -571,9 +685,8 @@ id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent, true); for (auto& ws : _waitSemaphores) { - auto& sem4 = ws.first; - sem4->encodeWait(mtlCmdBuff, 0); // Encoded semaphore waits - sem4->encodeWait(nil, 0); // Inline semaphore waits + ws.encodeWait(mtlCmdBuff); // Encoded semaphore waits + ws.encodeWait(nil); // Inline semaphore waits } for (int i = 0; i < _presentInfo.size(); i++ ) { @@ -612,7 +725,7 @@ MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* queue, const VkPresentInfoKHR* pPresentInfo) - : MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores) { + : MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores, nullptr) { const VkPresentTimesInfoGOOGLE* pPresentTimesInfo = nullptr; const VkSwapchainPresentFenceInfoEXT* pPresentFenceInfo = nullptr; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h index 534ec018f..6cbe2e4e2 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h @@ -265,6 +265,22 @@ class MVKAttachmentDescription : public MVKBaseObject { #pragma mark - #pragma mark MVKRenderPass +/** Collects together VkSubpassDependency and VkMemoryBarrier2. */ +typedef struct MVKSubpassDependency { + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags2 srcStageMask; + VkPipelineStageFlags2 dstStageMask; + VkAccessFlags2 srcAccessMask; + VkAccessFlags2 dstAccessMask; + VkDependencyFlags dependencyFlags; + int32_t viewOffset; + + MVKSubpassDependency(const VkSubpassDependency& spDep, int32_t viewOffset); + MVKSubpassDependency(const VkSubpassDependency2& spDep, const VkMemoryBarrier2* pMemBar); + +} MVKSubpassDependency; + /** Represents a Vulkan render pass. */ class MVKRenderPass : public MVKVulkanAPIDeviceObject { @@ -308,7 +324,7 @@ class MVKRenderPass : public MVKVulkanAPIDeviceObject { MVKSmallVector _attachments; MVKSmallVector _subpasses; - MVKSmallVector _subpassDependencies; + MVKSmallVector _subpassDependencies; VkRenderingFlags _renderingFlags = 0; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm index 762d72d9b..3bf8a1887 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm @@ -904,6 +904,26 @@ #pragma mark - #pragma mark MVKRenderPass +MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency& spDep, int32_t viewOffset) : + srcSubpass(spDep.srcSubpass), + dstSubpass(spDep.dstSubpass), + srcStageMask(spDep.srcStageMask), + dstStageMask(spDep.dstStageMask), + srcAccessMask(spDep.srcAccessMask), + dstAccessMask(spDep.dstAccessMask), + dependencyFlags(spDep.dependencyFlags), + viewOffset(viewOffset) {} + +MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency2& spDep, const VkMemoryBarrier2* pMemBar) : + srcSubpass(spDep.srcSubpass), + dstSubpass(spDep.dstSubpass), + srcStageMask(pMemBar ? pMemBar->srcStageMask : spDep.srcStageMask), + dstStageMask(pMemBar ? pMemBar->dstStageMask : spDep.dstStageMask), + srcAccessMask(pMemBar ? pMemBar->srcAccessMask : spDep.srcAccessMask), + dstAccessMask(pMemBar ? pMemBar->dstAccessMask : spDep.dstAccessMask), + dependencyFlags(spDep.dependencyFlags), + viewOffset(spDep.viewOffset) {} + VkExtent2D MVKRenderPass::getRenderAreaGranularity() { if (_device->_pMetalFeatures->tileBasedDeferredRendering) { // This is the tile area. @@ -954,19 +974,7 @@ } _subpassDependencies.reserve(pCreateInfo->dependencyCount); for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { - VkSubpassDependency2 dependency = { - .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, - .pNext = nullptr, - .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass, - .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass, - .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask, - .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask, - .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask, - .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask, - .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags, - .viewOffset = viewOffsets ? viewOffsets[i] : 0, - }; - _subpassDependencies.push_back(dependency); + _subpassDependencies.emplace_back(pCreateInfo->pDependencies[i], viewOffsets ? viewOffsets[i] : 0); } // Link attachments to subpasses @@ -991,7 +999,19 @@ } _subpassDependencies.reserve(pCreateInfo->dependencyCount); for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { - _subpassDependencies.push_back(pCreateInfo->pDependencies[i]); + auto& spDep = pCreateInfo->pDependencies[i]; + + const VkMemoryBarrier2* pMemoryBarrier2 = nullptr; + for (auto* next = (const VkBaseInStructure*)spDep.pNext; next; next = next->pNext) { + switch (next->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER_2: + pMemoryBarrier2 = (const VkMemoryBarrier2*)next; + break; + default: + break; + } + } + _subpassDependencies.emplace_back(spDep, pMemoryBarrier2); } // Link attachments to subpasses diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h index a1c3da6b2..5b9c47fdd 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h @@ -60,9 +60,7 @@ class MVKResource : public MVKVulkanAPIDeviceObject { } /** Applies the specified global memory barrier. */ - virtual void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + virtual void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) = 0; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h index 87418edd1..be4f25454 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h @@ -43,7 +43,7 @@ using namespace mvk; typedef struct MVKMTLFunction { SPIRVToMSLConversionResultInfo shaderConversionResults; MTLSize threadGroupSize; - inline id getMTLFunction() { return _mtlFunction; } + id getMTLFunction() { return _mtlFunction; } MVKMTLFunction(id mtlFunc, const SPIRVToMSLConversionResultInfo scRslts, MTLSize tgSize); MVKMTLFunction(const MVKMTLFunction& other); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm index 908314989..8619a0da9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm @@ -36,10 +36,11 @@ } MVKMTLFunction& MVKMTLFunction::operator=(const MVKMTLFunction& other) { - if (_mtlFunction != other._mtlFunction) { - [_mtlFunction release]; - _mtlFunction = [other._mtlFunction retain]; // retained - } + // Retain new object first in case it's the same object + [other._mtlFunction retain]; + [_mtlFunction release]; + _mtlFunction = other._mtlFunction; + shaderConversionResults = other.shaderConversionResults; threadGroupSize = other.threadGroupSize; return *this; diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index 74a006290..f6ad3447c 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -91,6 +91,7 @@ MVK_EXTENSION(KHR_storage_buffer_storage_class, KHR_STORAGE_BUFFER_STORAGE MVK_EXTENSION(KHR_surface, KHR_SURFACE, INSTANCE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_swapchain, KHR_SWAPCHAIN, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_swapchain_mutable_format, KHR_SWAPCHAIN_MUTABLE_FORMAT, DEVICE, 10.11, 8.0, 1.0) +MVK_EXTENSION(KHR_synchronization2, KHR_SYNCHRONIZATION_2, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_timeline_semaphore, KHR_TIMELINE_SEMAPHORE, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_uniform_buffer_standard_layout, KHR_UNIFORM_BUFFER_STANDARD_LAYOUT, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_variable_pointers, KHR_VARIABLE_POINTERS, DEVICE, 10.11, 8.0, 1.0) diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm index caa776237..90cb72e0c 100644 --- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm +++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm @@ -728,40 +728,50 @@ MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionMode } } -MVK_PUBLIC_SYMBOL MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags vkStages, +MVK_PUBLIC_SYMBOL MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags2 vkStages, bool placeBarrierBefore) { // Although there are many combined render/compute/host stages in Vulkan, there are only two render // stages in Metal. If the Vulkan stage did not map ONLY to a specific Metal render stage, then if the // barrier is to be placed before the render stages, it should come before the vertex stage, otherwise // if the barrier is to be placed after the render stages, it should come after the fragment stage. if (placeBarrierBefore) { - bool placeBeforeFragment = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)); + bool placeBeforeFragment = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)); return placeBeforeFragment ? MTLRenderStageFragment : MTLRenderStageVertex; } else { - bool placeAfterVertex = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT)); + bool placeAfterVertex = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT)); return placeAfterVertex ? MTLRenderStageVertex : MTLRenderStageFragment; } } -MVK_PUBLIC_SYMBOL MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags vkAccess) { +MVK_PUBLIC_SYMBOL MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags2 vkAccess) { MTLBarrierScope mtlScope = MTLBarrierScope(0); - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | + VK_ACCESS_2_INDEX_READ_BIT | + VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT | + VK_ACCESS_2_UNIFORM_READ_BIT)) ) { mtlScope |= MTLBarrierScopeBuffers; } - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_SHADER_READ_BIT | + VK_ACCESS_2_SHADER_WRITE_BIT | + VK_ACCESS_2_MEMORY_READ_BIT | + VK_ACCESS_2_MEMORY_WRITE_BIT)) ) { mtlScope |= MTLBarrierScopeBuffers | MTLBarrierScopeTextures; } #if MVK_MACOS - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_MEMORY_READ_BIT | + VK_ACCESS_2_MEMORY_WRITE_BIT)) ) { mtlScope |= MTLBarrierScopeRenderTargets; } #endif diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index 44b0e5f69..d3dcbca90 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -2517,8 +2517,8 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkWaitSemaphores( #pragma mark Vulkan 1.3 calls MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginRendering( - VkCommandBuffer commandBuffer, - const VkRenderingInfo* pRenderingInfo) { + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo) { MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BeginRendering, pRenderingInfo->colorAttachmentCount, @@ -2527,7 +2527,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginRendering( } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndRendering( - VkCommandBuffer commandBuffer) { + VkCommandBuffer commandBuffer) { MVKTraceVulkanCallStart(); MVKAddCmd(EndRendering, commandBuffer); @@ -2537,56 +2537,79 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndRendering( MVK_PUBLIC_VULKAN_STUB(vkCmdBindVertexBuffers2, void, VkCommandBuffer, uint32_t, uint32_t, const VkBuffer*, const VkDeviceSize*, const VkDeviceSize*, const VkDeviceSize*) MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBlitImage2( - VkCommandBuffer commandBuffer, - const VkBlitImageInfo2* pBlitImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkBlitImageInfo2* pBlitImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(BlitImage, pBlitImageInfo->regionCount, 1, commandBuffer, pBlitImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyBuffer2( - VkCommandBuffer commandBuffer, - const VkCopyBufferInfo2* pCopyBufferInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(CopyBuffer, pCopyBufferInfo->regionCount, 1, commandBuffer, pCopyBufferInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyBufferToImage2( - VkCommandBuffer commandBuffer, - const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BufferImageCopy, pCopyBufferToImageInfo->regionCount, 1, 4, 8, commandBuffer, pCopyBufferToImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyImage2( - VkCommandBuffer commandBuffer, - const VkCopyImageInfo2* pCopyImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(CopyImage, pCopyImageInfo->regionCount, 1, commandBuffer, pCopyImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyImageToBuffer2( - VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2* pCopyImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BufferImageCopy, pCopyImageInfo->regionCount, 1, 4, 8, commandBuffer, pCopyImageInfo); MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB(vkCmdPipelineBarrier2, void, VkCommandBuffer, const VkDependencyInfo*) -MVK_PUBLIC_VULKAN_STUB(vkCmdResetEvent2, void, VkCommandBuffer, VkEvent, VkPipelineStageFlags2 stageMask) +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdPipelineBarrier2( + VkCommandBuffer commandBuffer, + const VkDependencyInfo* pDependencyInfo) { + + MVKTraceVulkanCallStart(); + uint32_t barrierCount = pDependencyInfo->memoryBarrierCount + pDependencyInfo->bufferMemoryBarrierCount + pDependencyInfo->imageMemoryBarrierCount; + MVKAddCmdFrom2Thresholds(PipelineBarrier, barrierCount, 1, 4, commandBuffer, pDependencyInfo); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResetEvent2( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags2 stageMask) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(ResetEvent, commandBuffer, event, stageMask); + MVKTraceVulkanCallEnd(); +} MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2( - VkCommandBuffer commandBuffer, - const VkResolveImageInfo2* pResolveImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkResolveImageInfo2* pResolveImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(ResolveImage, pResolveImageInfo->regionCount, 1, commandBuffer, pResolveImageInfo); MVKTraceVulkanCallEnd(); @@ -2598,7 +2621,17 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2( MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthCompareOp, void, VkCommandBuffer, VkCompareOp) MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthTestEnable, void, VkCommandBuffer, VkBool32) MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthWriteEnable, void, VkCommandBuffer, VkBool32) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetEvent2, void, VkCommandBuffer, VkEvent, const VkDependencyInfo*) + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetEvent2( + VkCommandBuffer commandBuffer, + VkEvent event, + const VkDependencyInfo* pDependencyInfo) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetEvent, commandBuffer, event, pDependencyInfo); + MVKTraceVulkanCallEnd(); +} + MVK_PUBLIC_VULKAN_STUB(vkCmdSetFrontFace, void, VkCommandBuffer, VkFrontFace) MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveRestartEnable, void, VkCommandBuffer, VkBool32) MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveTopology, void, VkCommandBuffer, VkPrimitiveTopology) @@ -2607,8 +2640,29 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2( MVK_PUBLIC_VULKAN_STUB(vkCmdSetStencilOp, void, VkCommandBuffer, VkStencilFaceFlags, VkStencilOp, VkStencilOp, VkStencilOp, VkCompareOp) MVK_PUBLIC_VULKAN_STUB(vkCmdSetStencilTestEnable, void, VkCommandBuffer, VkBool32) MVK_PUBLIC_VULKAN_STUB(vkCmdSetViewportWithCount, void, VkCommandBuffer, uint32_t, const VkViewport*) -MVK_PUBLIC_VULKAN_STUB(vkCmdWaitEvents2, void, VkCommandBuffer, uint32_t, const VkEvent*, const VkDependencyInfo*) -MVK_PUBLIC_VULKAN_STUB(vkCmdWriteTimestamp2, void, VkCommandBuffer, VkPipelineStageFlags2, VkQueryPool, uint32_t) + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWaitEvents2( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos) { + + MVKTraceVulkanCallStart(); + MVKAddCmdFromThreshold(WaitEvents, eventCount, 1, commandBuffer, eventCount, pEvents, pDependencyInfos); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWriteTimestamp2( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags2 stage, + VkQueryPool queryPool, + uint32_t query) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(WriteTimestamp, commandBuffer, stage, queryPool, query); + MVKTraceVulkanCallEnd(); +} + MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkCreatePrivateDataSlot, VkDevice, const VkPrivateDataSlotCreateInfo*, const VkAllocationCallbacks*, VkPrivateDataSlot*) MVK_PUBLIC_VULKAN_STUB(vkDestroyPrivateDataSlot, void, VkDevice, VkPrivateDataSlot, const VkAllocationCallbacks*) MVK_PUBLIC_VULKAN_STUB(vkGetDeviceBufferMemoryRequirements, void, VkDevice, const VkDeviceBufferMemoryRequirements*, VkMemoryRequirements2*) @@ -2616,7 +2670,20 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2( MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageSparseMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, uint32_t*, VkSparseImageMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkGetPhysicalDeviceToolProperties, VkPhysicalDevice, uint32_t*, VkPhysicalDeviceToolProperties*) MVK_PUBLIC_VULKAN_STUB(vkGetPrivateData, void, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t*) -MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkQueueSubmit2, VkQueue, uint32_t, const VkSubmitInfo2*, VkFence) + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkQueueSubmit2( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence) { + + MVKTraceVulkanCallStart(); + MVKQueue* mvkQ = MVKQueue::getMVKQueue(queue); + VkResult rslt = mvkQ->submit(submitCount, pSubmits, fence, kMVKCommandUseQueueSubmit); + MVKTraceVulkanCallEnd(); + return rslt; +} + MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkSetPrivateData, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t) #pragma mark - @@ -3102,6 +3169,17 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkGetPhysicalDeviceSurfaceFormats2KHR( } +#pragma mark - +#pragma mark VK_KHR_synchronization2 + +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdPipelineBarrier2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdResetEvent2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetEvent2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdWaitEvents2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdWriteTimestamp2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkQueueSubmit2, KHR); + + #pragma mark - #pragma mark VK_KHR_timeline_semaphore