diff --git a/Common/Data/Convert/SmallDataConvert.h b/Common/Data/Convert/SmallDataConvert.h index c388eb9eb7ff..0449d3df16bb 100644 --- a/Common/Data/Convert/SmallDataConvert.h +++ b/Common/Data/Convert/SmallDataConvert.h @@ -21,7 +21,7 @@ extern const float one_over_255_x4[4]; extern const float exactly_255_x4[4]; // Utilities useful for filling in std140-layout uniform buffers, and similar. -// NEON intrinsics: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0491f/BABDCGGF.html +// NEON intrinsics: https://developer.arm.com/documentation/den0018/a/NEON-Intrinsics?lang=en // LSBs in f[0], etc. inline void Uint8x4ToFloat4(float f[4], uint32_t u) { diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index 25cb495d0b56..9b59852c8847 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -271,6 +271,7 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de caps_.fragmentShaderDepthWriteSupported = true; caps_.fragmentShaderStencilWriteSupported = false; caps_.blendMinMaxSupported = true; + caps_.multiSampleLevelsMask = 1; // More could be supported with some work. D3D11_FEATURE_DATA_D3D11_OPTIONS options{}; HRESULT result = device_->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options)); @@ -309,6 +310,8 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de dxgiDevice->Release(); } + caps_.isTilingGPU = false; + // Temp texture for read-back of small images. Custom textures are created on demand for larger ones. // TODO: Should really benchmark if this extra complexity has any benefit. D3D11_TEXTURE2D_DESC packDesc{}; diff --git a/Common/GPU/D3D9/thin3d_d3d9.cpp b/Common/GPU/D3D9/thin3d_d3d9.cpp index 8b4fb8031225..e50eb3b1c866 100644 --- a/Common/GPU/D3D9/thin3d_d3d9.cpp +++ b/Common/GPU/D3D9/thin3d_d3d9.cpp @@ -763,6 +763,8 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID caps_.fragmentShaderDepthWriteSupported = true; caps_.fragmentShaderStencilWriteSupported = false; caps_.blendMinMaxSupported = true; + caps_.isTilingGPU = false; + caps_.multiSampleLevelsMask = 1; // More could be supported with some work. if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) { caps_.anisoSupported = true; diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 2842e7bffdcd..b107f2f194c0 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -557,6 +557,7 @@ OpenGLContext::OpenGLContext() { caps_.framebufferStencilBlitSupported = caps_.framebufferBlitSupported; caps_.depthClampSupported = gl_extensions.ARB_depth_clamp || gl_extensions.EXT_depth_clamp; caps_.blendMinMaxSupported = gl_extensions.EXT_blend_minmax; + caps_.multiSampleLevelsMask = 1; // More could be supported with some work. if (gl_extensions.IsGLES) { caps_.clipDistanceSupported = gl_extensions.EXT_clip_cull_distance || gl_extensions.APPLE_clip_distance; @@ -601,6 +602,10 @@ OpenGLContext::OpenGLContext() { caps_.vendor = GPUVendor::VENDOR_UNKNOWN; break; } + + // Very rough heuristic! + caps_.isTilingGPU = gl_extensions.IsGLES && caps_.vendor != GPUVendor::VENDOR_NVIDIA && caps_.vendor != GPUVendor::VENDOR_INTEL; + for (int i = 0; i < GLRenderManager::MAX_INFLIGHT_FRAMES; i++) { frameData_[i].push = renderManager_.CreatePushBuffer(i, GL_ARRAY_BUFFER, 64 * 1024); } diff --git a/Common/GPU/Vulkan/VulkanBarrier.cpp b/Common/GPU/Vulkan/VulkanBarrier.cpp index e4f2d0908933..d47f50684579 100644 --- a/Common/GPU/Vulkan/VulkanBarrier.cpp +++ b/Common/GPU/Vulkan/VulkanBarrier.cpp @@ -9,4 +9,5 @@ void VulkanBarrier::Flush(VkCommandBuffer cmd) { imageBarriers_.clear(); srcStageMask_ = 0; dstStageMask_ = 0; + dependencyFlags_ = 0; } diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp index 19ecb40588b6..33d27b40fb47 100644 --- a/Common/GPU/Vulkan/VulkanContext.cpp +++ b/Common/GPU/Vulkan/VulkanContext.cpp @@ -57,7 +57,7 @@ std::string VulkanVendorString(uint32_t vendorId) { case VULKAN_VENDOR_ARM: return "ARM"; case VULKAN_VENDOR_QUALCOMM: return "Qualcomm"; case VULKAN_VENDOR_IMGTEC: return "Imagination"; - + case VULKAN_VENDOR_APPLE: return "Apple"; default: return StringFromFormat("%08x", vendorId); } @@ -253,15 +253,21 @@ VkResult VulkanContext::CreateInstance(const CreateInfo &info) { VkPhysicalDeviceProperties2 props2{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; VkPhysicalDevicePushDescriptorPropertiesKHR pushProps{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR}; VkPhysicalDeviceExternalMemoryHostPropertiesEXT extHostMemProps{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT}; + VkPhysicalDeviceDepthStencilResolveProperties depthStencilResolveProps{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES}; + props2.pNext = &pushProps; pushProps.pNext = &extHostMemProps; + extHostMemProps.pNext = &depthStencilResolveProps; vkGetPhysicalDeviceProperties2KHR(physical_devices_[i], &props2); // Don't want bad pointers sitting around. props2.pNext = nullptr; pushProps.pNext = nullptr; + extHostMemProps.pNext = nullptr; + depthStencilResolveProps.pNext = nullptr; physicalDeviceProperties_[i].properties = props2.properties; physicalDeviceProperties_[i].pushDescriptorProperties = pushProps; physicalDeviceProperties_[i].externalMemoryHostProperties = extHostMemProps; + physicalDeviceProperties_[i].depthStencilResolve = depthStencilResolveProps; } } else { for (uint32_t i = 0; i < gpu_count; i++) { @@ -329,7 +335,7 @@ bool VulkanContext::MemoryTypeFromProperties(uint32_t typeBits, VkFlags requirem for (uint32_t i = 0; i < 32; i++) { if ((typeBits & 1) == 1) { // Type is available, does it match user properties? - if ((memory_properties.memoryTypes[i].propertyFlags & requirements_mask) == requirements_mask) { + if ((memory_properties_.memoryTypes[i].propertyFlags & requirements_mask) == requirements_mask) { *typeIndex = i; return true; } @@ -569,17 +575,17 @@ void VulkanContext::ChooseDevice(int physical_device) { } // This is as good a place as any to do this. - vkGetPhysicalDeviceMemoryProperties(physical_devices_[physical_device_], &memory_properties); - INFO_LOG(G3D, "Memory Types (%d):", memory_properties.memoryTypeCount); - for (int i = 0; i < (int)memory_properties.memoryTypeCount; i++) { + vkGetPhysicalDeviceMemoryProperties(physical_devices_[physical_device_], &memory_properties_); + INFO_LOG(G3D, "Memory Types (%d):", memory_properties_.memoryTypeCount); + for (int i = 0; i < (int)memory_properties_.memoryTypeCount; i++) { // Don't bother printing dummy memory types. - if (!memory_properties.memoryTypes[i].propertyFlags) + if (!memory_properties_.memoryTypes[i].propertyFlags) continue; - INFO_LOG(G3D, " %d: Heap %d; Flags: %s%s%s%s ", i, memory_properties.memoryTypes[i].heapIndex, - (memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) ? "DEVICE_LOCAL " : "", - (memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ? "HOST_VISIBLE " : "", - (memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) ? "HOST_CACHED " : "", - (memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) ? "HOST_COHERENT " : ""); + INFO_LOG(G3D, " %d: Heap %d; Flags: %s%s%s%s ", i, memory_properties_.memoryTypes[i].heapIndex, + (memory_properties_.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) ? "DEVICE_LOCAL " : "", + (memory_properties_.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ? "HOST_VISIBLE " : "", + (memory_properties_.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) ? "HOST_CACHED " : "", + (memory_properties_.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) ? "HOST_COHERENT " : ""); } // Optional features @@ -606,6 +612,7 @@ void VulkanContext::ChooseDevice(int physical_device) { deviceFeatures_.enabled.standard.shaderClipDistance = deviceFeatures_.available.standard.shaderClipDistance; deviceFeatures_.enabled.standard.shaderCullDistance = deviceFeatures_.available.standard.shaderCullDistance; deviceFeatures_.enabled.standard.geometryShader = deviceFeatures_.available.standard.geometryShader; + deviceFeatures_.enabled.standard.sampleRateShading = deviceFeatures_.available.standard.sampleRateShading; deviceFeatures_.enabled.multiview = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES }; deviceFeatures_.enabled.multiview.multiview = deviceFeatures_.available.multiview.multiview; diff --git a/Common/GPU/Vulkan/VulkanContext.h b/Common/GPU/Vulkan/VulkanContext.h index a6f568aa5f52..464b78a47d7a 100644 --- a/Common/GPU/Vulkan/VulkanContext.h +++ b/Common/GPU/Vulkan/VulkanContext.h @@ -35,6 +35,7 @@ enum { VULKAN_VENDOR_ARM = 0x000013B5, // Mali VULKAN_VENDOR_QUALCOMM = 0x00005143, VULKAN_VENDOR_IMGTEC = 0x00001010, // PowerVR + VULKAN_VENDOR_APPLE = 0x0000106b, // Apple through MoltenVK }; VK_DEFINE_HANDLE(VmaAllocator); @@ -253,6 +254,7 @@ class VulkanContext { VkPhysicalDeviceProperties properties; VkPhysicalDevicePushDescriptorPropertiesKHR pushDescriptorProperties; VkPhysicalDeviceExternalMemoryHostPropertiesEXT externalMemoryHostProperties; + VkPhysicalDeviceDepthStencilResolveProperties depthStencilResolve; }; struct AllPhysicalDeviceFeatures { @@ -283,6 +285,10 @@ class VulkanContext { return device_extensions_enabled_; } + const VkPhysicalDeviceMemoryProperties &GetMemoryProperties() const { + return memory_properties_; + } + struct PhysicalDeviceFeatures { AllPhysicalDeviceFeatures available{}; AllPhysicalDeviceFeatures enabled{}; @@ -401,7 +407,8 @@ class VulkanContext { uint32_t graphics_queue_family_index_ = -1; std::vector physicalDeviceProperties_; std::vector queueFamilyProperties_; - VkPhysicalDeviceMemoryProperties memory_properties{}; + + VkPhysicalDeviceMemoryProperties memory_properties_{}; // Custom collection of things that are good to know VulkanPhysicalDeviceInfo deviceInfo_{}; diff --git a/Common/GPU/Vulkan/VulkanFramebuffer.cpp b/Common/GPU/Vulkan/VulkanFramebuffer.cpp index 8f277cf676eb..ec50e3f7a479 100644 --- a/Common/GPU/Vulkan/VulkanFramebuffer.cpp +++ b/Common/GPU/Vulkan/VulkanFramebuffer.cpp @@ -2,14 +2,59 @@ #include "Common/GPU/Vulkan/VulkanFramebuffer.h" #include "Common/GPU/Vulkan/VulkanQueueRunner.h" -VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, bool createDepthStencilBuffer, const char *tag) +VkSampleCountFlagBits MultiSampleLevelToFlagBits(int count) { + // TODO: Check hardware support here, or elsewhere? + // Some hardware only supports 4x. + switch (count) { + case 0: return VK_SAMPLE_COUNT_1_BIT; + case 1: return VK_SAMPLE_COUNT_2_BIT; + case 2: return VK_SAMPLE_COUNT_4_BIT; // The only non-1 level supported on some mobile chips. + case 3: return VK_SAMPLE_COUNT_8_BIT; + case 4: return VK_SAMPLE_COUNT_16_BIT; // rare but exists, on Intel for example + default: + _assert_(false); + return VK_SAMPLE_COUNT_1_BIT; + } +} + +void VKRImage::Delete(VulkanContext *vulkan) { + // Get rid of the views first, feels cleaner (but in reality doesn't matter). + if (rtView) + vulkan->Delete().QueueDeleteImageView(rtView); + if (texAllLayersView) + vulkan->Delete().QueueDeleteImageView(texAllLayersView); + for (int i = 0; i < 2; i++) { + if (texLayerViews[i]) { + vulkan->Delete().QueueDeleteImageView(texLayerViews[i]); + } + } + + if (image) { + _dbg_assert_(alloc); + vulkan->Delete().QueueDeleteImageAllocation(image, alloc); + } +} + +VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag) : vulkan_(vk), tag_(tag), width(_width), height(_height), numLayers(_numLayers) { _dbg_assert_(tag); - CreateImage(vulkan_, initCmd, color, width, height, numLayers, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag); + CreateImage(vulkan_, initCmd, color, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag); if (createDepthStencilBuffer) { - CreateImage(vulkan_, initCmd, depth, width, height, numLayers, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag); + CreateImage(vulkan_, initCmd, depth, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag); + } + + if (_multiSampleLevel > 0) { + sampleCount = MultiSampleLevelToFlagBits(_multiSampleLevel); + + // TODO: Create a different tag for these? + CreateImage(vulkan_, initCmd, msaaColor, width, height, numLayers, sampleCount, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag); + if (createDepthStencilBuffer) { + CreateImage(vulkan_, initCmd, msaaDepth, width, height, numLayers, sampleCount, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag); + } + } else { + sampleCount = VK_SAMPLE_COUNT_1_BIT; } UpdateTag(tag); @@ -44,19 +89,27 @@ VkFramebuffer VKRFramebuffer::Get(VKRRenderPass *compatibleRenderPass, RenderPas } VkFramebufferCreateInfo fbci{ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO }; - VkImageView views[2]{}; + VkImageView views[4]{}; bool hasDepth = RenderPassTypeHasDepth(rpType); - views[0] = color.rtView; // 2D array texture if multilayered. + int attachmentCount = 0; + views[attachmentCount++] = color.rtView; // 2D array texture if multilayered. if (hasDepth) { if (!depth.rtView) { WARN_LOG(G3D, "depth render type to non-depth fb: %p %p fmt=%d (%s %dx%d)", depth.image, depth.texAllLayersView, depth.format, tag_.c_str(), width, height); // Will probably crash, depending on driver. } - views[1] = depth.rtView; + views[attachmentCount++] = depth.rtView; } - fbci.renderPass = compatibleRenderPass->Get(vulkan_, rpType); - fbci.attachmentCount = hasDepth ? 2 : 1; + if (rpType & RenderPassType::MULTISAMPLE) { + views[attachmentCount++] = msaaColor.rtView; + if (hasDepth) { + views[attachmentCount++] = msaaDepth.rtView; + } + } + + fbci.renderPass = compatibleRenderPass->Get(vulkan_, rpType, sampleCount); + fbci.attachmentCount = attachmentCount; fbci.pAttachments = views; fbci.width = width; fbci.height = height; @@ -73,32 +126,11 @@ VkFramebuffer VKRFramebuffer::Get(VKRRenderPass *compatibleRenderPass, RenderPas } VKRFramebuffer::~VKRFramebuffer() { - // Get rid of the views first, feels cleaner (but in reality doesn't matter). - if (color.rtView) - vulkan_->Delete().QueueDeleteImageView(color.rtView); - if (depth.rtView) - vulkan_->Delete().QueueDeleteImageView(depth.rtView); - if (color.texAllLayersView) - vulkan_->Delete().QueueDeleteImageView(color.texAllLayersView); - if (depth.texAllLayersView) - vulkan_->Delete().QueueDeleteImageView(depth.texAllLayersView); - for (int i = 0; i < 2; i++) { - if (color.texLayerViews[i]) { - vulkan_->Delete().QueueDeleteImageView(color.texLayerViews[i]); - } - if (depth.texLayerViews[i]) { - vulkan_->Delete().QueueDeleteImageView(depth.texLayerViews[i]); - } - } + color.Delete(vulkan_); + depth.Delete(vulkan_); + msaaColor.Delete(vulkan_); + msaaDepth.Delete(vulkan_); - if (color.image) { - _dbg_assert_(color.alloc); - vulkan_->Delete().QueueDeleteImageAllocation(color.image, color.alloc); - } - if (depth.image) { - _dbg_assert_(depth.alloc); - vulkan_->Delete().QueueDeleteImageAllocation(depth.image, depth.alloc); - } for (auto &fb : framebuf) { if (fb) { vulkan_->Delete().QueueDeleteFramebuffer(fb); @@ -108,7 +140,7 @@ VKRFramebuffer::~VKRFramebuffer() { // NOTE: If numLayers > 1, it will create an array texture, rather than a normal 2D texture. // This requires a different sampling path! -void VKRFramebuffer::CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag) { +void VKRFramebuffer::CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkSampleCountFlagBits sampleCount, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag) { // We don't support more exotic layer setups for now. Mono or stereo. _dbg_assert_(numLayers == 1 || numLayers == 2); @@ -120,13 +152,18 @@ void VKRFramebuffer::CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKR ici.extent.depth = 1; ici.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; ici.imageType = VK_IMAGE_TYPE_2D; - ici.samples = VK_SAMPLE_COUNT_1_BIT; + ici.samples = sampleCount; ici.tiling = VK_IMAGE_TILING_OPTIMAL; ici.format = format; - // Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers. - ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + ici.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + if (sampleCount == VK_SAMPLE_COUNT_1_BIT) { + ici.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } if (color) { - ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + if (sampleCount == VK_SAMPLE_COUNT_1_BIT) { + ici.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + } } else { ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; } @@ -202,6 +239,7 @@ void VKRFramebuffer::CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKR 0, dstAccessMask); img.layout = initialLayout; img.format = format; + img.sampleCount = sampleCount; img.tag = tag ? tag : "N/A"; img.numLayers = numLayers; } @@ -226,62 +264,101 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) { // Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827 // Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies -VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) { +VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType, VkSampleCountFlagBits sampleCount) { bool selfDependency = RenderPassTypeHasInput(rpType); bool isBackbuffer = rpType == RenderPassType::BACKBUFFER; bool hasDepth = RenderPassTypeHasDepth(rpType); bool multiview = RenderPassTypeHasMultiView(rpType); + bool multisample = RenderPassTypeHasMultisample(rpType); if (multiview) { // TODO: Assert that the device has multiview support enabled. } - VkAttachmentDescription attachments[2] = {}; - attachments[0].format = isBackbuffer ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM; - attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; - attachments[0].loadOp = ConvertLoadAction(key.colorLoadAction); - attachments[0].storeOp = ConvertStoreAction(key.colorStoreAction); - attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - attachments[0].initialLayout = isBackbuffer ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - attachments[0].finalLayout = isBackbuffer ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - attachments[0].flags = 0; + int colorAttachmentIndex = 0; + int depthAttachmentIndex = 1; + + int attachmentCount = 0; + VkAttachmentDescription attachments[4]{}; + attachments[attachmentCount].format = isBackbuffer ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM; + attachments[attachmentCount].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[attachmentCount].loadOp = multisample ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : ConvertLoadAction(key.colorLoadAction); + attachments[attachmentCount].storeOp = ConvertStoreAction(key.colorStoreAction); + attachments[attachmentCount].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[attachmentCount].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[attachmentCount].initialLayout = isBackbuffer ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachments[attachmentCount].finalLayout = isBackbuffer ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachmentCount++; if (hasDepth) { - attachments[1].format = vulkan->GetDeviceInfo().preferredDepthStencilFormat; - attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; - attachments[1].loadOp = ConvertLoadAction(key.depthLoadAction); - attachments[1].storeOp = ConvertStoreAction(key.depthStoreAction); - attachments[1].stencilLoadOp = ConvertLoadAction(key.stencilLoadAction); - attachments[1].stencilStoreOp = ConvertStoreAction(key.stencilStoreAction); - attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - attachments[1].flags = 0; + attachments[attachmentCount].format = vulkan->GetDeviceInfo().preferredDepthStencilFormat; + attachments[attachmentCount].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[attachmentCount].loadOp = multisample ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : ConvertLoadAction(key.depthLoadAction); + attachments[attachmentCount].storeOp = ConvertStoreAction(key.depthStoreAction); + attachments[attachmentCount].stencilLoadOp = multisample ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : ConvertLoadAction(key.stencilLoadAction); + attachments[attachmentCount].stencilStoreOp = ConvertStoreAction(key.stencilStoreAction); + attachments[attachmentCount].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachments[attachmentCount].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachmentCount++; } - VkAttachmentReference color_reference{}; - color_reference.attachment = 0; - color_reference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + if (multisample) { + colorAttachmentIndex = attachmentCount; + attachments[attachmentCount].format = isBackbuffer ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM; + attachments[attachmentCount].samples = sampleCount; + attachments[attachmentCount].loadOp = ConvertLoadAction(key.colorLoadAction); + attachments[attachmentCount].storeOp = ConvertStoreAction(key.colorStoreAction); + attachments[attachmentCount].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[attachmentCount].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[attachmentCount].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachments[attachmentCount].finalLayout = isBackbuffer ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachmentCount++; + + if (hasDepth) { + depthAttachmentIndex = attachmentCount; + attachments[attachmentCount].format = vulkan->GetDeviceInfo().preferredDepthStencilFormat; + attachments[attachmentCount].samples = sampleCount; + attachments[attachmentCount].loadOp = ConvertLoadAction(key.depthLoadAction); + attachments[attachmentCount].storeOp = ConvertStoreAction(key.depthStoreAction); + attachments[attachmentCount].stencilLoadOp = ConvertLoadAction(key.stencilLoadAction); + attachments[attachmentCount].stencilStoreOp = ConvertStoreAction(key.stencilStoreAction); + attachments[attachmentCount].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachments[attachmentCount].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachmentCount++; + } + } + + VkAttachmentReference colorReference{}; + colorReference.attachment = colorAttachmentIndex; + colorReference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - VkAttachmentReference depth_reference{}; - depth_reference.attachment = 1; - depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + VkAttachmentReference depthReference{}; + depthReference.attachment = depthAttachmentIndex; + depthReference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkSubpassDescription subpass{}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.flags = 0; if (selfDependency) { subpass.inputAttachmentCount = 1; - subpass.pInputAttachments = &color_reference; + subpass.pInputAttachments = &colorReference; } else { subpass.inputAttachmentCount = 0; subpass.pInputAttachments = nullptr; } subpass.colorAttachmentCount = 1; - subpass.pColorAttachments = &color_reference; - subpass.pResolveAttachments = nullptr; + subpass.pColorAttachments = &colorReference; + + VkAttachmentReference colorResolveReference; + if (multisample) { + colorResolveReference.attachment = 0; // the non-msaa color buffer. + colorResolveReference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + subpass.pResolveAttachments = &colorResolveReference; + } else { + subpass.pResolveAttachments = nullptr; + } if (hasDepth) { - subpass.pDepthStencilAttachment = &depth_reference; + subpass.pDepthStencilAttachment = &depthReference; } subpass.preserveAttachmentCount = 0; subpass.pPreserveAttachments = nullptr; @@ -291,7 +368,7 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas size_t numDeps = 0; VkRenderPassCreateInfo rp{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO }; - rp.attachmentCount = hasDepth ? 2 : 1; + rp.attachmentCount = attachmentCount; rp.pAttachments = attachments; rp.subpassCount = 1; rp.pSubpasses = &subpass; @@ -336,18 +413,124 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas } VkRenderPass pass; - VkResult res = vkCreateRenderPass(vulkan->GetDevice(), &rp, nullptr, &pass); + VkResult res; + + // We could always use renderpass2, but I think it'll get both paths better tested if we + // only use it with multisample enabled. + // if (vulkan->Extensions().KHR_create_renderpass2) { + if (multisample) { + // It's a bit unfortunate that we can't rely on vkCreateRenderPass2, because here we now have + // to do a bunch of struct conversion, just to not have to repeat the logic from above. + VkAttachmentDescription2KHR attachments2[4]{}; + for (int i = 0; i < attachmentCount; i++) { + attachments2[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR; + attachments2[i].format = attachments[i].format; + attachments2[i].samples = attachments[i].samples; + attachments2[i].loadOp = attachments[i].loadOp; + attachments2[i].storeOp = attachments[i].storeOp; + attachments2[i].stencilLoadOp = attachments[i].stencilLoadOp; + attachments2[i].stencilStoreOp = attachments[i].stencilStoreOp; + attachments2[i].initialLayout = attachments[i].initialLayout; + attachments2[i].finalLayout = attachments[i].finalLayout; + } + + VkAttachmentReference2KHR colorReference2{ VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR }; + colorReference2.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + colorReference2.attachment = colorReference.attachment; + colorReference2.layout = colorReference.layout; + + VkAttachmentReference2KHR depthReference2{ VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR }; + depthReference2.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + depthReference2.attachment = depthReference.attachment; + depthReference2.layout = depthReference.layout; + + VkSubpassDependency2KHR deps2[2]{}; + for (int i = 0; i < numDeps; i++) { + deps2[i].sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR; + deps2[i].dependencyFlags = deps[i].dependencyFlags; + deps2[i].srcAccessMask = deps[i].srcAccessMask; + deps2[i].dstAccessMask = deps[i].dstAccessMask; + deps2[i].srcStageMask = deps[i].srcStageMask; + deps2[i].dstStageMask = deps[i].dstStageMask; + deps2[i].srcSubpass = deps[i].srcSubpass; + deps2[i].dstSubpass = deps[i].dstSubpass; + deps2[i].dependencyFlags = deps[i].dependencyFlags; + deps2[i].viewOffset = 0; + } + + VkAttachmentReference2KHR colorResolveReference2{ VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR }; + + VkSubpassDescription2KHR subpass2{ VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR }; + subpass2.colorAttachmentCount = subpass.colorAttachmentCount; + subpass2.flags = subpass.flags; + if (selfDependency) { + subpass2.inputAttachmentCount = subpass.inputAttachmentCount; + subpass2.pInputAttachments = &colorReference2; + } + subpass2.pColorAttachments = &colorReference2; + if (hasDepth) { + subpass2.pDepthStencilAttachment = &depthReference2; + } + subpass2.pipelineBindPoint = subpass.pipelineBindPoint; + subpass2.viewMask = multiview ? viewMask : 0; + if (multisample) { + colorResolveReference2.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + colorResolveReference2.attachment = colorResolveReference.attachment; // the non-msaa color buffer. + colorResolveReference2.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + subpass2.pResolveAttachments = &colorResolveReference2; + } else { + subpass2.pResolveAttachments = nullptr; + } + + VkAttachmentReference2KHR depthResolveReference2{ VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR }; + VkSubpassDescriptionDepthStencilResolveKHR depthStencilResolve{ VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR }; + if (hasDepth && multisample) { + subpass2.pNext = &depthStencilResolve; + depthResolveReference2.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + depthResolveReference2.attachment = 1; + depthResolveReference2.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + // TODO: Some games might benefit from the other depth resolve modes when depth texturing. + depthStencilResolve.depthResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR; + depthStencilResolve.stencilResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR; + depthStencilResolve.pDepthStencilResolveAttachment = &depthResolveReference2; + } + + VkRenderPassCreateInfo2KHR rp2{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR }; + rp2.pAttachments = attachments2; + rp2.pDependencies = deps2; + rp2.attachmentCount = rp.attachmentCount; + rp2.dependencyCount = rp.dependencyCount; + rp2.correlatedViewMaskCount = multiview ? 1 : 0; + rp2.pCorrelatedViewMasks = multiview ? &viewMask : nullptr; + rp2.pSubpasses = &subpass2; + rp2.subpassCount = 1; + res = vkCreateRenderPass2KHR(vulkan->GetDevice(), &rp2, nullptr, &pass); + } else { + res = vkCreateRenderPass(vulkan->GetDevice(), &rp, nullptr, &pass); + } + _assert_(res == VK_SUCCESS); _assert_(pass != VK_NULL_HANDLE); return pass; } -VkRenderPass VKRRenderPass::Get(VulkanContext *vulkan, RenderPassType rpType) { +VkRenderPass VKRRenderPass::Get(VulkanContext *vulkan, RenderPassType rpType, VkSampleCountFlagBits sampleCount) { // When we create a render pass, we create all "types" of it immediately, // practical later when referring to it. Could change to on-demand if it feels motivated // but I think the render pass objects are cheap. - if (!pass[(int)rpType]) { - pass[(int)rpType] = CreateRenderPass(vulkan, key_, (RenderPassType)rpType); + + // WARNING: We don't include sampleCount in the key, there's only the distinction multisampled or not + // which comes from the rpType. + // So you CAN NOT mix and match different non-one sample counts. + + _dbg_assert_(!((rpType & RenderPassType::MULTISAMPLE) && sampleCount == VK_SAMPLE_COUNT_1_BIT)); + + if (!pass[(int)rpType] || sampleCounts[(int)rpType] != sampleCount) { + if (pass[(int)rpType]) { + vulkan->Delete().QueueDeleteRenderPass(pass[(int)rpType]); + } + pass[(int)rpType] = CreateRenderPass(vulkan, key_, (RenderPassType)rpType, sampleCount); + sampleCounts[(int)rpType] = sampleCount; } return pass[(int)rpType]; } diff --git a/Common/GPU/Vulkan/VulkanFramebuffer.h b/Common/GPU/Vulkan/VulkanFramebuffer.h index c52e9336406e..fc584f4b8cfa 100644 --- a/Common/GPU/Vulkan/VulkanFramebuffer.h +++ b/Common/GPU/Vulkan/VulkanFramebuffer.h @@ -15,12 +15,13 @@ enum class RenderPassType { HAS_DEPTH = 1, COLOR_INPUT = 2, // input attachment MULTIVIEW = 4, + MULTISAMPLE = 8, // This is the odd one out, and gets special handling in MergeRPTypes. // If this flag is set, none of the other flags can be set. // For the backbuffer we can always use CLEAR/DONT_CARE, so bandwidth cost for a depth channel is negligible // so we don't bother with a non-depth version. - BACKBUFFER = 8, + BACKBUFFER = 16, TYPE_COUNT = BACKBUFFER + 1, }; @@ -42,6 +43,7 @@ struct VKRImage { VmaAllocation alloc; VkFormat format; + VkSampleCountFlagBits sampleCount; // This one is used by QueueRunner's Perform functions to keep track. CANNOT be used anywhere else due to sync issues. VkImageLayout layout; @@ -50,11 +52,13 @@ struct VKRImage { // For debugging. std::string tag; + + void Delete(VulkanContext *vulkan); }; class VKRFramebuffer { public: - VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, bool createDepthStencilBuffer, const char *tag); + VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag); ~VKRFramebuffer(); VkFramebuffer Get(VKRRenderPass *compatibleRenderPass, RenderPassType rpType); @@ -62,10 +66,15 @@ class VKRFramebuffer { int width = 0; int height = 0; int numLayers = 0; + VkSampleCountFlagBits sampleCount; VKRImage color{}; // color.image is always there. VKRImage depth{}; // depth.image is allowed to be VK_NULL_HANDLE. + // These are only initialized and used if numSamples > 1. + VKRImage msaaColor{}; + VKRImage msaaDepth{}; + const char *Tag() const { return tag_.c_str(); } @@ -76,13 +85,21 @@ class VKRFramebuffer { return depth.image != VK_NULL_HANDLE; } - // TODO: Hide. - VulkanContext *vulkan_; + VkImageView GetRTView() { + if (sampleCount == VK_SAMPLE_COUNT_1_BIT) { + return color.rtView; + } else { + return msaaColor.rtView; + } + } + + VulkanContext *Vulkan() const { return vulkan_; } private: - static void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag); + static void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkSampleCountFlagBits sampleCount, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag); VkFramebuffer framebuf[(size_t)RenderPassType::TYPE_COUNT]{}; + VulkanContext *vulkan_; std::string tag_; }; @@ -98,6 +115,12 @@ inline bool RenderPassTypeHasMultiView(RenderPassType type) { return (type & RenderPassType::MULTIVIEW) != 0; } +inline bool RenderPassTypeHasMultisample(RenderPassType type) { + return (type & RenderPassType::MULTISAMPLE) != 0; +} + +VkSampleCountFlagBits MultiSampleLevelToFlagBits(int count); + // Must be the same order as Draw::RPAction enum class VKRRenderPassLoadAction : uint8_t { KEEP, // default. avoid when possible. @@ -124,7 +147,7 @@ class VKRRenderPass { public: VKRRenderPass(const RPKey &key) : key_(key) {} - VkRenderPass Get(VulkanContext *vulkan, RenderPassType rpType); + VkRenderPass Get(VulkanContext *vulkan, RenderPassType rpType, VkSampleCountFlagBits sampleCount); void Destroy(VulkanContext *vulkan) { for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) { if (pass[i]) { @@ -136,5 +159,6 @@ class VKRRenderPass { private: // TODO: Might be better off with a hashmap once the render pass type count grows really large.. VkRenderPass pass[(size_t)RenderPassType::TYPE_COUNT]{}; + VkSampleCountFlagBits sampleCounts[(size_t)RenderPassType::TYPE_COUNT]; RPKey key_; }; diff --git a/Common/GPU/Vulkan/VulkanLoader.cpp b/Common/GPU/Vulkan/VulkanLoader.cpp index f710330fc520..94092b216674 100644 --- a/Common/GPU/Vulkan/VulkanLoader.cpp +++ b/Common/GPU/Vulkan/VulkanLoader.cpp @@ -223,6 +223,7 @@ PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; +PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR; } // namespace PPSSPP_VK using namespace PPSSPP_VK; @@ -720,6 +721,9 @@ void VulkanLoadDeviceFunctions(VkDevice device, const VulkanExtensions &enabledE LOAD_DEVICE_FUNC(device, vkGetBufferMemoryRequirements2KHR); LOAD_DEVICE_FUNC(device, vkGetImageMemoryRequirements2KHR); } + if (enabledExtensions.KHR_create_renderpass2) { + LOAD_DEVICE_FUNC(device, vkCreateRenderPass2KHR); + } } void VulkanFree() { diff --git a/Common/GPU/Vulkan/VulkanLoader.h b/Common/GPU/Vulkan/VulkanLoader.h index 4f2d97d045b2..caaef0d59766 100644 --- a/Common/GPU/Vulkan/VulkanLoader.h +++ b/Common/GPU/Vulkan/VulkanLoader.h @@ -224,6 +224,7 @@ extern PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; extern PFN_vkGetMemoryHostPointerPropertiesEXT vkGetMemoryHostPointerPropertiesEXT; extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; +extern PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR; } // namespace PPSSPP_VK // For fast extension-enabled checks. diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 617c349b79f5..7b260c72e73c 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -198,7 +198,7 @@ bool VulkanQueueRunner::InitBackbufferFramebuffers(int width, int height) { VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view }; VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO }; - fb_info.renderPass = GetCompatibleRenderPass()->Get(vulkan_, RenderPassType::BACKBUFFER); + fb_info.renderPass = GetCompatibleRenderPass()->Get(vulkan_, RenderPassType::BACKBUFFER, VK_SAMPLE_COUNT_1_BIT); fb_info.attachmentCount = 2; fb_info.pAttachments = attachments; fb_info.width = width; @@ -762,6 +762,14 @@ static const char *rpTypeDebugNames[] = { "MV_RENDER_DEPTH", "MV_RENDER_INPUT", "MV_RENDER_DEPTH_INPUT", + "MS_RENDER", + "MS_RENDER_DEPTH", + "MS_RENDER_INPUT", + "MS_RENDER_DEPTH_INPUT", + "MS_MV_RENDER", + "MS_MV_RENDER_DEPTH", + "MS_MV_RENDER_INPUT", + "MS_MV_RENDER_DEPTH_INPUT", "BACKBUF", }; @@ -1316,6 +1324,14 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c { VKRGraphicsPipeline *graphicsPipeline = c.graphics_pipeline.pipeline; if (graphicsPipeline != lastGraphicsPipeline) { + VkSampleCountFlagBits fbSampleCount = step.render.framebuffer ? step.render.framebuffer->sampleCount : VK_SAMPLE_COUNT_1_BIT; + + if (RenderPassTypeHasMultisample(rpType) && fbSampleCount != graphicsPipeline->SampleCount()) { + // should have been invalidated. + _assert_msg_(graphicsPipeline->SampleCount() == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM, + "expected %d sample count, got %d", fbSampleCount, graphicsPipeline->SampleCount()); + } + if (!graphicsPipeline->pipeline[(size_t)rpType]) { // NOTE: If render steps got merged, it can happen that, as they ended during recording, // they didn't know their final render pass type so they created the wrong pipelines in EndCurRenderStep(). @@ -1323,10 +1339,11 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c // Maybe a middle pass. But let's try to just block and compile here for now, this doesn't // happen all that much. graphicsPipeline->pipeline[(size_t)rpType] = Promise::CreateEmpty(); - graphicsPipeline->Create(vulkan_, renderPass->Get(vulkan_, rpType), rpType); + graphicsPipeline->Create(vulkan_, renderPass->Get(vulkan_, rpType, fbSampleCount), rpType, fbSampleCount); } VkPipeline pipeline = graphicsPipeline->pipeline[(size_t)rpType]->BlockUntilReady(); + if (pipeline != VK_NULL_HANDLE) { vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); pipelineLayout = c.pipeline.pipelineLayout; @@ -1405,7 +1422,12 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c { _assert_(step.render.pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT); VulkanBarrier barrier; - SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier); + if (step.render.framebuffer->sampleCount != VK_SAMPLE_COUNT_1_BIT) { + // Rendering is happening to the multisample buffer, not the color buffer. + SelfDependencyBarrier(step.render.framebuffer->msaaColor, VK_IMAGE_ASPECT_COLOR_BIT, &barrier); + } else { + SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier); + } barrier.Flush(cmd); break; } @@ -1516,13 +1538,15 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c VKRRenderPass *VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKRStep &step, VkCommandBuffer cmd) { VKRRenderPass *renderPass; int numClearVals = 0; - VkClearValue clearVal[2]{}; + VkClearValue clearVal[4]{}; VkFramebuffer framebuf; int w; int h; bool hasDepth = RenderPassTypeHasDepth(step.render.renderPassType); + VkSampleCountFlagBits sampleCount; + if (step.render.framebuffer) { _dbg_assert_(step.render.finalColorLayout != VK_IMAGE_LAYOUT_UNDEFINED); _dbg_assert_(step.render.finalDepthStencilLayout != VK_IMAGE_LAYOUT_UNDEFINED); @@ -1535,6 +1559,7 @@ VKRRenderPass *VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKR VKRFramebuffer *fb = step.render.framebuffer; framebuf = fb->Get(renderPass, step.render.renderPassType); + sampleCount = fb->sampleCount; _dbg_assert_(framebuf != VK_NULL_HANDLE); w = fb->width; h = fb->height; @@ -1560,15 +1585,22 @@ VKRRenderPass *VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKR // The transition from the optimal format happens after EndRenderPass, now that we don't // do it as part of the renderpass itself anymore. + if (sampleCount != VK_SAMPLE_COUNT_1_BIT) { + // We don't initialize values for these. + numClearVals = hasDepth ? 2 : 1; // Skip the resolve buffers, don't need to clear those. + } if (step.render.colorLoad == VKRRenderPassLoadAction::CLEAR) { - Uint8x4ToFloat4(clearVal[0].color.float32, step.render.clearColor); - numClearVals = 1; + Uint8x4ToFloat4(clearVal[numClearVals].color.float32, step.render.clearColor); } - if (hasDepth && (step.render.depthLoad == VKRRenderPassLoadAction::CLEAR || step.render.stencilLoad == VKRRenderPassLoadAction::CLEAR)) { - clearVal[1].depthStencil.depth = step.render.clearDepth; - clearVal[1].depthStencil.stencil = step.render.clearStencil; - numClearVals = 2; + numClearVals++; + if (hasDepth) { + if (step.render.depthLoad == VKRRenderPassLoadAction::CLEAR || step.render.stencilLoad == VKRRenderPassLoadAction::CLEAR) { + clearVal[numClearVals].depthStencil.depth = step.render.clearDepth; + clearVal[numClearVals].depthStencil.stencil = step.render.clearStencil; + } + numClearVals++; } + _dbg_assert_(numClearVals != 3); } else { RPKey key{ VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, @@ -1590,10 +1622,11 @@ VKRRenderPass *VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKR numClearVals = hasDepth ? 2 : 1; // We might do depth-less backbuffer in the future, though doubtful of the value. clearVal[1].depthStencil.depth = 0.0f; clearVal[1].depthStencil.stencil = 0; + sampleCount = VK_SAMPLE_COUNT_1_BIT; } VkRenderPassBeginInfo rp_begin = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO }; - rp_begin.renderPass = renderPass->Get(vulkan_, step.render.renderPassType); + rp_begin.renderPass = renderPass->Get(vulkan_, step.render.renderPassType, sampleCount); rp_begin.framebuffer = framebuf; VkRect2D rc = step.render.renderArea; @@ -1633,30 +1666,45 @@ void VulkanQueueRunner::PerformCopy(const VKRStep &step, VkCommandBuffer cmd) { // First source barriers. if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - if (src->color.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { - SetupTransitionToTransferSrc(src->color, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); - } - if (dst->color.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - SetupTransitionToTransferDst(dst->color, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); - } + SetupTransitionToTransferSrc(src->color, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); + SetupTransitionToTransferDst(dst->color, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); } // We can't copy only depth or only stencil unfortunately - or can we?. if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { _dbg_assert_(src->depth.image != VK_NULL_HANDLE); - if (src->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { - SetupTransitionToTransferSrc(src->depth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_); - } + SetupTransitionToTransferSrc(src->depth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_); if (dst->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { SetupTransitionToTransferDst(dst->depth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_); _dbg_assert_(dst->depth.layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); } else { - // Kingdom Hearts: Subsequent copies to the same depth buffer without any other use. + // Kingdom Hearts: Subsequent copies twice to the same depth buffer without any other use. // Not super sure how that happens, but we need a barrier to pass sync validation. SetupTransferDstWriteAfterWrite(dst->depth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_); } } + recordBarrier_.Flush(cmd); + + bool multisampled = src->sampleCount != VK_SAMPLE_COUNT_1_BIT && dst->sampleCount != VK_SAMPLE_COUNT_1_BIT; + if (multisampled) { + // If both the targets are multisampled, copy the msaa targets too. + // For that, we need to transition them from their normally permanent VK_*_ATTACHMENT_OPTIMAL layouts, and then back. + if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + SetupTransitionToTransferSrc(src->msaaColor, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); + recordBarrier_.Flush(cmd); + SetupTransitionToTransferDst(dst->msaaColor, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); + recordBarrier_.Flush(cmd); + } + if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + // Kingdom Hearts: Subsequent copies to the same depth buffer without any other use. + // Not super sure how that happens, but we need a barrier to pass sync validation. + SetupTransitionToTransferSrc(src->msaaDepth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_); + recordBarrier_.Flush(cmd); + SetupTransitionToTransferDst(dst->msaaDepth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_); + recordBarrier_.Flush(cmd); + } + } recordBarrier_.Flush(cmd); @@ -1679,6 +1727,10 @@ void VulkanQueueRunner::PerformCopy(const VKRStep &step, VkCommandBuffer cmd) { copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; vkCmdCopyImage(cmd, src->color.image, src->color.layout, dst->color.image, dst->color.layout, 1, ©); + + if (multisampled) { + vkCmdCopyImage(cmd, src->msaaColor.image, src->msaaColor.layout, dst->msaaColor.image, dst->msaaColor.layout, 1, ©); + } } if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { _dbg_assert_(src->depth.image != VK_NULL_HANDLE); @@ -1686,6 +1738,75 @@ void VulkanQueueRunner::PerformCopy(const VKRStep &step, VkCommandBuffer cmd) { copy.srcSubresource.aspectMask = step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); copy.dstSubresource.aspectMask = step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); vkCmdCopyImage(cmd, src->depth.image, src->depth.layout, dst->depth.image, dst->depth.layout, 1, ©); + + if (multisampled) { + vkCmdCopyImage(cmd, src->msaaDepth.image, src->msaaDepth.layout, dst->msaaDepth.image, dst->msaaDepth.layout, 1, ©); + } + } + + if (multisampled) { + // Transition the MSAA surfaces back to optimal. + if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + recordBarrier_.TransitionImage( + src->msaaColor.image, + 0, + 1, + src->msaaColor.numLayers, + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_ACCESS_TRANSFER_READ_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + ); + recordBarrier_.TransitionImage( + dst->msaaColor.image, + 0, + 1, + dst->msaaColor.numLayers, + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + ); + src->msaaColor.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + dst->msaaColor.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } + if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + recordBarrier_.TransitionImage( + src->msaaDepth.image, + 0, + 1, + src->msaaDepth.numLayers, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_ACCESS_TRANSFER_READ_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT + ); + recordBarrier_.TransitionImage( + dst->msaaDepth.image, + 0, + 1, + dst->msaaDepth.numLayers, + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT + ); + src->msaaDepth.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + dst->msaaDepth.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } + recordBarrier_.Flush(cmd); } } @@ -1701,12 +1822,8 @@ void VulkanQueueRunner::PerformBlit(const VKRStep &step, VkCommandBuffer cmd) { // First source barriers. if (step.blit.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - if (src->color.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { - SetupTransitionToTransferSrc(src->color, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); - } - if (dst->color.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - SetupTransitionToTransferDst(dst->color, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); - } + SetupTransitionToTransferSrc(src->color, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); + SetupTransitionToTransferDst(dst->color, VK_IMAGE_ASPECT_COLOR_BIT, &recordBarrier_); } // We can't copy only depth or only stencil unfortunately. @@ -1714,12 +1831,8 @@ void VulkanQueueRunner::PerformBlit(const VKRStep &step, VkCommandBuffer cmd) { _dbg_assert_(src->depth.image != VK_NULL_HANDLE); _dbg_assert_(dst->depth.image != VK_NULL_HANDLE); - if (src->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { - SetupTransitionToTransferSrc(src->depth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_); - } - if (dst->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - SetupTransitionToTransferDst(dst->depth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_); - } + SetupTransitionToTransferSrc(src->depth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_); + SetupTransitionToTransferDst(dst->depth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_); } recordBarrier_.Flush(cmd); @@ -1768,6 +1881,9 @@ void VulkanQueueRunner::PerformBlit(const VKRStep &step, VkCommandBuffer cmd) { } void VulkanQueueRunner::SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) { + if (img.layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) { + return; + } VkImageAspectFlags imageAspect = aspect; VkAccessFlags srcAccessMask = 0; VkPipelineStageFlags srcStageMask = 0; @@ -1817,6 +1933,9 @@ void VulkanQueueRunner::SetupTransitionToTransferSrc(VKRImage &img, VkImageAspec } void VulkanQueueRunner::SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) { + if (img.layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + return; + } VkImageAspectFlags imageAspect = aspect; VkAccessFlags srcAccessMask = 0; VkPipelineStageFlags srcStageMask = 0; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index 0f844bbd8897..0d5f5053ae1b 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -51,6 +51,7 @@ enum class PipelineFlags : u8 { USES_INPUT_ATTACHMENT = (1 << 3), USES_GEOMETRY_SHADER = (1 << 4), USES_MULTIVIEW = (1 << 5), // Inherited from the render pass it was created with. + USES_DISCARD = (1 << 6), }; ENUM_CLASS_BITOPS(PipelineFlags); diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 98fb8a3edfac..4f95a244587e 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -27,7 +27,16 @@ using namespace PPSSPP_VK; // renderPass is an example of the "compatibility class" or RenderPassType type. -bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType) { +bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount) { + bool multisample = RenderPassTypeHasMultisample(rpType); + if (multisample) { + if (sampleCount_ != VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) { + _assert_(sampleCount == sampleCount_); + } else { + sampleCount_ = sampleCount; + } + } + // Fill in the last part of the desc since now it's time to block. VkShaderModule vs = desc->vertexShader->BlockUntilReady(); VkShaderModule fs = desc->fragmentShader->BlockUntilReady(); @@ -69,13 +78,21 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR pipe.pDepthStencilState = &desc->dss; pipe.pRasterizationState = &desc->rs; + VkPipelineMultisampleStateCreateInfo ms{ VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO }; + ms.rasterizationSamples = multisample ? sampleCount : VK_SAMPLE_COUNT_1_BIT; + if (multisample && (flags_ & PipelineFlags::USES_DISCARD)) { + // Extreme quality + ms.sampleShadingEnable = true; + ms.minSampleShading = 1.0f; + } + // We will use dynamic viewport state. pipe.pVertexInputState = &desc->vis; pipe.pViewportState = &desc->views; pipe.pTessellationState = nullptr; pipe.pDynamicState = &desc->ds; pipe.pInputAssemblyState = &desc->inputAssembly; - pipe.pMultisampleState = &desc->ms; + pipe.pMultisampleState = &ms; pipe.layout = desc->pipelineLayout; pipe.basePipelineHandle = VK_NULL_HANDLE; pipe.basePipelineIndex = 0; @@ -104,8 +121,8 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR success = false; } else { // Success! - if (!tag.empty()) { - vulkan->SetDebugName(vkpipeline, VK_OBJECT_TYPE_PIPELINE, tag.c_str()); + if (!tag_.empty()) { + vulkan->SetDebugName(vkpipeline, VK_OBJECT_TYPE_PIPELINE, tag_.c_str()); } pipeline[(size_t)rpType]->Post(vkpipeline); } @@ -113,16 +130,25 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR return success; } -void VKRGraphicsPipeline::QueueForDeletion(VulkanContext *vulkan) { +void VKRGraphicsPipeline::DestroyVariants(VulkanContext *vulkan, bool msaaOnly) { for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) { if (!this->pipeline[i]) continue; + if (msaaOnly && (i & (int)RenderPassType::MULTISAMPLE) == 0) + continue; + VkPipeline pipeline = this->pipeline[i]->BlockUntilReady(); // pipeline can be nullptr here, if it failed to compile before. if (pipeline) { vulkan->Delete().QueueDeletePipeline(pipeline); } + this->pipeline[i] = nullptr; } + sampleCount_ = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM; +} + +void VKRGraphicsPipeline::QueueForDeletion(VulkanContext *vulkan) { + DestroyVariants(vulkan, false); vulkan->Delete().QueueCallback([](void *p) { VKRGraphicsPipeline *pipeline = (VKRGraphicsPipeline *)p; delete pipeline; @@ -332,7 +358,7 @@ void VulkanRenderManager::CompileThreadFunc() { for (auto &entry : toCompile) { switch (entry.type) { case CompileQueueEntry::Type::GRAPHICS: - entry.graphics->Create(vulkan_, entry.compatibleRenderPass, entry.renderPassType); + entry.graphics->Create(vulkan_, entry.compatibleRenderPass, entry.renderPassType, entry.sampleCount); break; case CompileQueueEntry::Type::COMPUTE: entry.compute->Create(vulkan_); @@ -481,12 +507,11 @@ VkCommandBuffer VulkanRenderManager::GetInitCmd() { return frameData_[curFrame].GetInitCmd(vulkan_); } -VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, PipelineFlags pipelineFlags, uint32_t variantBitmask, const char *tag) { - VKRGraphicsPipeline *pipeline = new VKRGraphicsPipeline(); +VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, PipelineFlags pipelineFlags, uint32_t variantBitmask, VkSampleCountFlagBits sampleCount, const char *tag) { + VKRGraphicsPipeline *pipeline = new VKRGraphicsPipeline(pipelineFlags, tag); _dbg_assert_(desc->vertexShader); _dbg_assert_(desc->fragmentShader); pipeline->desc = desc; - pipeline->tag = tag; if (curRenderStep_) { // The common case pipelinesToCheck_.push_back(pipeline); @@ -523,7 +548,7 @@ VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipe } pipeline->pipeline[i] = Promise::CreateEmpty(); - compileQueue_.push_back(CompileQueueEntry(pipeline, compatibleRenderPass->Get(vulkan_, rpType), rpType)); + compileQueue_.push_back(CompileQueueEntry(pipeline, compatibleRenderPass->Get(vulkan_, rpType, sampleCount), rpType, sampleCount)); needsCompile = true; } if (needsCompile) @@ -575,17 +600,23 @@ void VulkanRenderManager::EndCurRenderStep() { if (curRenderStep_->render.framebuffer->numLayers > 1) { rpType = (RenderPassType)(rpType | RenderPassType::MULTIVIEW); } + + if (curRenderStep_->render.framebuffer->sampleCount != VK_SAMPLE_COUNT_1_BIT) { + rpType = (RenderPassType)(rpType | RenderPassType::MULTISAMPLE); + } } VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key); curRenderStep_->render.renderPassType = rpType; + VkSampleCountFlagBits sampleCount = curRenderStep_->render.framebuffer ? curRenderStep_->render.framebuffer->sampleCount : VK_SAMPLE_COUNT_1_BIT; + compileMutex_.lock(); bool needsCompile = false; for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) { if (!pipeline->pipeline[(size_t)rpType]) { pipeline->pipeline[(size_t)rpType] = Promise::CreateEmpty(); - compileQueue_.push_back(CompileQueueEntry(pipeline, renderPass->Get(vulkan_, rpType), rpType)); + compileQueue_.push_back(CompileQueueEntry(pipeline, renderPass->Get(vulkan_, rpType, sampleCount), rpType, sampleCount)); needsCompile = true; } } diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index fcc843535acb..c15d9027a277 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -82,7 +82,6 @@ struct VKRGraphicsPipelineDesc { VkDynamicState dynamicStates[6]{}; VkPipelineDynamicStateCreateInfo ds{ VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO }; VkPipelineRasterizationStateCreateInfo rs{ VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO }; - VkPipelineMultisampleStateCreateInfo ms{ VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO }; // Replaced the ShaderStageInfo with promises here so we can wait for compiles to finish. Promise *vertexShader = nullptr; @@ -116,13 +115,16 @@ struct VKRComputePipelineDesc { // Wrapped pipeline. Doesn't own desc. struct VKRGraphicsPipeline { + VKRGraphicsPipeline(PipelineFlags flags, const char *tag) : flags_(flags), tag_(tag) {} ~VKRGraphicsPipeline() { for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) { delete pipeline[i]; } } - bool Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType); + bool Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount); + + void DestroyVariants(VulkanContext *vulkan, bool msaaOnly); // This deletes the whole VKRGraphicsPipeline, you must remove your last pointer to it when doing this. void QueueForDeletion(VulkanContext *vulkan); @@ -133,7 +135,12 @@ struct VKRGraphicsPipeline { VKRGraphicsPipelineDesc *desc = nullptr; // not owned! Promise *pipeline[(size_t)RenderPassType::TYPE_COUNT]{}; - std::string tag; + + VkSampleCountFlagBits SampleCount() const { return sampleCount_; } +private: + std::string tag_; + PipelineFlags flags_; + VkSampleCountFlagBits sampleCount_ = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM; }; struct VKRComputePipeline { @@ -151,9 +158,9 @@ struct VKRComputePipeline { }; struct CompileQueueEntry { - CompileQueueEntry(VKRGraphicsPipeline *p, VkRenderPass _compatibleRenderPass, RenderPassType _renderPassType) - : type(Type::GRAPHICS), graphics(p), compatibleRenderPass(_compatibleRenderPass), renderPassType(_renderPassType) {} - CompileQueueEntry(VKRComputePipeline *p) : type(Type::COMPUTE), compute(p), renderPassType(RenderPassType::HAS_DEPTH) {} // renderpasstype here shouldn't matter + CompileQueueEntry(VKRGraphicsPipeline *p, VkRenderPass _compatibleRenderPass, RenderPassType _renderPassType, VkSampleCountFlagBits _sampleCount) + : type(Type::GRAPHICS), graphics(p), compatibleRenderPass(_compatibleRenderPass), renderPassType(_renderPassType), sampleCount(_sampleCount) {} + CompileQueueEntry(VKRComputePipeline *p) : type(Type::COMPUTE), compute(p), renderPassType(RenderPassType::HAS_DEPTH), sampleCount(VK_SAMPLE_COUNT_1_BIT) {} // renderpasstype here shouldn't matter enum class Type { GRAPHICS, COMPUTE, @@ -163,6 +170,7 @@ struct CompileQueueEntry { RenderPassType renderPassType; VKRGraphicsPipeline *graphics = nullptr; VKRComputePipeline *compute = nullptr; + VkSampleCountFlagBits sampleCount; }; class VulkanRenderManager { @@ -217,7 +225,7 @@ class VulkanRenderManager { // We delay creating pipelines until the end of the current render pass, so we can create the right type immediately. // Unless a variantBitmask is passed in, in which case we can just go ahead. // WARNING: desc must stick around during the lifetime of the pipeline! It's not enough to build it on the stack and drop it. - VKRGraphicsPipeline *CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, PipelineFlags pipelineFlags, uint32_t variantBitmask, const char *tag); + VKRGraphicsPipeline *CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, PipelineFlags pipelineFlags, uint32_t variantBitmask, VkSampleCountFlagBits sampleCount, const char *tag); VKRComputePipeline *CreateComputePipeline(VKRComputePipelineDesc *desc); void NudgeCompilerThread() { @@ -450,6 +458,8 @@ class VulkanRenderManager { return outOfDateFrames_ > VulkanContext::MAX_INFLIGHT_FRAMES; } + void Invalidate(InvalidationFlags flags); + void ResetStats(); private: diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index f52cc51ab4bd..8381256640a2 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -242,7 +242,7 @@ bool VKShaderModule::Compile(VulkanContext *vulkan, ShaderLanguage language, con #endif VkShaderModule shaderModule = VK_NULL_HANDLE; - if (vulkan->CreateShaderModule(spirv, &shaderModule, vkstage_ == VK_SHADER_STAGE_VERTEX_BIT ? "thin3d_vs" : "thin3d_fs")) { + if (vulkan->CreateShaderModule(spirv, &shaderModule, tag_.c_str())) { module_ = Promise::AlreadyDone(shaderModule); ok_ = true; } else { @@ -832,6 +832,9 @@ VKContext::VKContext(VulkanContext *vulkan) caps_.blendMinMaxSupported = true; caps_.logicOpSupported = vulkan->GetDeviceFeatures().enabled.standard.logicOp != 0; caps_.multiViewSupported = vulkan->GetDeviceFeatures().enabled.multiview.multiview != 0; + caps_.sampleRateShadingSupported = vulkan->GetDeviceFeatures().enabled.standard.sampleRateShading != 0; + + const auto &limits = vulkan->GetPhysicalDeviceProperties().properties.limits; auto deviceProps = vulkan->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDeviceIndex()).properties; @@ -842,7 +845,36 @@ VKContext::VKContext(VulkanContext *vulkan) case VULKAN_VENDOR_NVIDIA: caps_.vendor = GPUVendor::VENDOR_NVIDIA; break; case VULKAN_VENDOR_QUALCOMM: caps_.vendor = GPUVendor::VENDOR_QUALCOMM; break; case VULKAN_VENDOR_INTEL: caps_.vendor = GPUVendor::VENDOR_INTEL; break; - default: caps_.vendor = GPUVendor::VENDOR_UNKNOWN; break; + case VULKAN_VENDOR_APPLE: caps_.vendor = GPUVendor::VENDOR_APPLE; break; + default: + WARN_LOG(G3D, "Unknown vendor ID %08x", deviceProps.vendorID); + caps_.vendor = GPUVendor::VENDOR_UNKNOWN; + break; + } + + bool hasLazyMemory = false; + for (u32 i = 0; i < vulkan->GetMemoryProperties().memoryTypeCount; i++) { + if (vulkan->GetMemoryProperties().memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) { + hasLazyMemory = true; + } + } + caps_.isTilingGPU = hasLazyMemory && caps_.vendor != GPUVendor::VENDOR_APPLE; + + // VkSampleCountFlagBits is arranged correctly for our purposes. + // Only support MSAA levels that have support for all three of color, depth, stencil. + if (!caps_.isTilingGPU) { + // Check for depth stencil resolve. Without it, depth textures won't work, and we don't want that mess + // of compatibility reports, so we'll just disable multisampling in this case for now. + // There are potential workarounds for devices that don't support it, but those are nearly non-existent now. + const auto &resolveProperties = vulkan->GetPhysicalDeviceProperties().depthStencilResolve; + if (vulkan->Extensions().KHR_depth_stencil_resolve && + ((resolveProperties.supportedDepthResolveModes & resolveProperties.supportedStencilResolveModes) & VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) != 0) { + caps_.multiSampleLevelsMask = (limits.framebufferColorSampleCounts & limits.framebufferDepthSampleCounts & limits.framebufferStencilSampleCounts); + } else { + caps_.multiSampleLevelsMask = 1; + } + } else { + caps_.multiSampleLevelsMask = 1; } if (caps_.vendor == GPUVendor::VENDOR_QUALCOMM) { @@ -1189,9 +1221,6 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char } gDesc.ds.pDynamicStates = gDesc.dynamicStates; - gDesc.ms.pSampleMask = nullptr; - gDesc.ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - gDesc.views.viewportCount = 1; gDesc.views.scissorCount = 1; gDesc.views.pViewports = nullptr; // dynamic @@ -1202,7 +1231,7 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char VkPipelineRasterizationStateCreateInfo rs{ VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO }; raster->ToVulkan(&gDesc.rs); - pipeline->pipeline = renderManager_.CreateGraphicsPipeline(&gDesc, pipelineFlags, 1 << (size_t)RenderPassType::BACKBUFFER, tag ? tag : "thin3d"); + pipeline->pipeline = renderManager_.CreateGraphicsPipeline(&gDesc, pipelineFlags, 1 << (size_t)RenderPassType::BACKBUFFER, VK_SAMPLE_COUNT_1_BIT, tag ? tag : "thin3d"); if (desc.uniformDesc) { pipeline->dynamicUniformSize = (int)desc.uniformDesc->uniformBufferSize; @@ -1553,15 +1582,16 @@ uint32_t VKContext::GetDataFormatSupport(DataFormat fmt) const { // use this frame's init command buffer. class VKFramebuffer : public Framebuffer { public: - VKFramebuffer(VKRFramebuffer *fb) : buf_(fb) { + VKFramebuffer(VKRFramebuffer *fb, int multiSampleLevel) : buf_(fb) { _assert_msg_(fb, "Null fb in VKFramebuffer constructor"); width_ = fb->width; height_ = fb->height; layers_ = fb->numLayers; + multiSampleLevel_ = multiSampleLevel; } ~VKFramebuffer() { _assert_msg_(buf_, "Null buf_ in VKFramebuffer - double delete?"); - buf_->vulkan_->Delete().QueueCallback([](void *fb) { + buf_->Vulkan()->Delete().QueueCallback([](void *fb) { VKRFramebuffer *vfb = static_cast(fb); delete vfb; }, buf_); @@ -1576,9 +1606,14 @@ class VKFramebuffer : public Framebuffer { }; Framebuffer *VKContext::CreateFramebuffer(const FramebufferDesc &desc) { + _assert_(desc.multiSampleLevel >= 0); + _assert_(desc.numLayers > 0); + _assert_(desc.width > 0); + _assert_(desc.height > 0); + VkCommandBuffer cmd = renderManager_.GetInitCmd(); - VKRFramebuffer *vkrfb = new VKRFramebuffer(vulkan_, cmd, renderManager_.GetQueueRunner()->GetCompatibleRenderPass(), desc.width, desc.height, desc.numLayers, desc.z_stencil, desc.tag); - return new VKFramebuffer(vkrfb); + VKRFramebuffer *vkrfb = new VKRFramebuffer(vulkan_, cmd, renderManager_.GetQueueRunner()->GetCompatibleRenderPass(), desc.width, desc.height, desc.numLayers, desc.multiSampleLevel, desc.z_stencil, desc.tag); + return new VKFramebuffer(vkrfb, desc.multiSampleLevel); } void VKContext::CopyFramebufferImage(Framebuffer *srcfb, int level, int x, int y, int z, Framebuffer *dstfb, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) { @@ -1728,7 +1763,7 @@ uint64_t VKContext::GetNativeObject(NativeObject obj, void *srcObject) { case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW_ALL_LAYERS: return (uint64_t)curFramebuffer_->GetFB()->color.texAllLayersView; case NativeObject::BOUND_FRAMEBUFFER_COLOR_IMAGEVIEW_RT: - return (uint64_t)curFramebuffer_->GetFB()->color.rtView; + return (uint64_t)curFramebuffer_->GetFB()->GetRTView(); case NativeObject::FRAME_DATA_DESC_SET_LAYOUT: return (uint64_t)frameDescSetLayout_; case NativeObject::THIN3D_PIPELINE_LAYOUT: diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index f36e0cf872f6..78ae3375b831 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -300,6 +300,7 @@ struct FramebufferDesc { int height; int depth; int numLayers; + int multiSampleLevel; // 0 = 1xaa, 1 = 2xaa, and so on. bool z_stencil; const char *tag; // For graphics debuggers }; @@ -436,10 +437,11 @@ class Framebuffer : public RefCountedObject { int Width() { return width_; } int Height() { return height_; } int Layers() { return layers_; } + int MultiSampleLevel() { return multiSampleLevel_; } virtual void UpdateTag(const char *tag) {} protected: - int width_ = -1, height_ = -1, layers_ = 1; + int width_ = -1, height_ = -1, layers_ = 1, multiSampleLevel_ = 0; }; class Buffer : public RefCountedObject { @@ -576,7 +578,10 @@ struct DeviceCaps { bool textureDepthSupported; bool blendMinMaxSupported; bool multiViewSupported; + bool isTilingGPU; // This means that it benefits from correct store-ops, msaa without backing memory, etc. + bool sampleRateShadingSupported; + u32 multiSampleLevelsMask; // Bit n is set if (1 << n) is a valid multisample level. Bit 0 is always set. std::string deviceName; // The device name to use when creating the thin3d context, to get the same one. }; diff --git a/Core/Config.cpp b/Core/Config.cpp index 40e3478d3f21..b053500bf15e 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -901,6 +901,7 @@ static ConfigSetting graphicsSettings[] = { // Most low-performance (and many high performance) mobile GPUs do not support aniso anyway so defaulting to 4 is fine. ConfigSetting("AnisotropyLevel", &g_Config.iAnisotropyLevel, 4, true, true), + ConfigSetting("MultiSampleLevel", &g_Config.iMultiSampleLevel, 0, true, true), // Number of samples is 1 << iMultiSampleLevel ReportedConfigSetting("VertexDecCache", &g_Config.bVertexCache, false, true, true), ReportedConfigSetting("TextureBackoffCache", &g_Config.bTextureBackoffCache, false, true, true), diff --git a/Core/Config.h b/Core/Config.h index 6e8a6e808db7..342b5c34504c 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -210,6 +210,7 @@ struct Config { int iForceFullScreen = -1; // -1 = nope, 0 = force off, 1 = force on (not saved.) int iInternalResolution; // 0 = Auto (native), 1 = 1x (480x272), 2 = 2x, 3 = 3x, 4 = 4x and so on. int iAnisotropyLevel; // 0 - 5, powers of 2: 0 = 1x = no aniso + int iMultiSampleLevel; int bHighQualityDepth; bool bReplaceTextures; bool bSaveNewTextures; diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 93f30f3323e1..2d85308a9261 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -51,9 +51,7 @@ static const SamplerDef samplersStereo[3] = { bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLanguageDesc &compat, Draw::Bugs bugs, uint64_t *uniformMask, FragmentShaderFlags *fragmentShaderFlags, std::string *errorString) { *uniformMask = 0; - if (fragmentShaderFlags) { - *fragmentShaderFlags = (FragmentShaderFlags)0; - } + *fragmentShaderFlags = (FragmentShaderFlags)0; errorString->clear(); bool useStereo = id.Bit(FS_BIT_STEREO); @@ -203,9 +201,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu p.F("layout (set = 1, binding = %d) uniform sampler2DArray fbotex;\n", DRAW_BINDING_2ND_TEXTURE); } else if (fetchFramebuffer) { p.F("layout (input_attachment_index = 0, set = 1, binding = %d) uniform subpassInput inputColor;\n", DRAW_BINDING_INPUT_ATTACHMENT); - if (fragmentShaderFlags) { - *fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT; - } + *fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT; } if (shaderDepalMode != ShaderDepalMode::OFF) { @@ -922,6 +918,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu const char *discardStatement = testForceToZero ? "v.a = 0.0;" : "DISCARD;"; if (enableAlphaTest) { + *fragmentShaderFlags |= FragmentShaderFlags::USES_DISCARD; + if (alphaTestAgainstZero) { // When testing against 0 (extremely common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. @@ -959,6 +957,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } if (enableColorTest) { + *fragmentShaderFlags |= FragmentShaderFlags::USES_DISCARD; + if (colorTestAgainstZero) { // When testing against 0 (common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h index 8f358fa7ff8e..88c2c3f9d6cc 100644 --- a/GPU/Common/FragmentShaderGenerator.h +++ b/GPU/Common/FragmentShaderGenerator.h @@ -43,6 +43,7 @@ struct FShaderID; // Can technically be deduced from the fragment shader ID, but this is safer. enum class FragmentShaderFlags : u32 { INPUT_ATTACHMENT = 1, + USES_DISCARD = 2, }; ENUM_CLASS_BITOPS(FragmentShaderFlags); diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 05521c047e4b..5bf2cdc9de92 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -89,7 +89,7 @@ void FramebufferManagerCommon::Init() { } bool FramebufferManagerCommon::UpdateRenderSize() { - const bool newRender = renderWidth_ != (float)PSP_CoreParameter().renderWidth || renderHeight_ != (float)PSP_CoreParameter().renderHeight; + const bool newRender = renderWidth_ != (float)PSP_CoreParameter().renderWidth || renderHeight_ != (float)PSP_CoreParameter().renderHeight || msaaLevel_ != g_Config.iMultiSampleLevel; const int effectiveBloomHack = PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOn ? 3 : g_Config.iBloomHack; @@ -99,6 +99,7 @@ bool FramebufferManagerCommon::UpdateRenderSize() { renderWidth_ = (float)PSP_CoreParameter().renderWidth; renderHeight_ = (float)PSP_CoreParameter().renderHeight; renderScaleFactor_ = (float)PSP_CoreParameter().renderScaleFactor; + msaaLevel_ = g_Config.iMultiSampleLevel; bloomHack_ = effectiveBloomHack; useBufferedRendering_ = newBuffered; @@ -946,6 +947,14 @@ void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, Vir bool useRaster = draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported && draw_->GetDeviceCaps().textureDepthSupported; + if (src->fbo->MultiSampleLevel() > 0 && dst->fbo->MultiSampleLevel() > 0) { + // If multisampling, we want to copy depth properly so we get all the samples, to avoid aliased edges. + // Can be seen in the fire in Jeanne D'arc, for example. + if (useRaster && useCopy) { + useRaster = false; + } + } + int w = std::min(src->renderWidth, dst->renderWidth); int h = std::min(src->renderHeight, dst->renderHeight); @@ -1652,7 +1661,10 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, shaderManager_->DirtyLastShader(); char tag[128]; size_t len = FormatFramebufferName(vfb, tag, sizeof(tag)); - vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), true, tag }); + + int msaaLevel = g_Config.iMultiSampleLevel; + + vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel, true, tag }); if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) { NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, ColorBufferByteSize(vfb), tag, len); } @@ -2019,7 +2031,7 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd char name[64]; snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address); textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED); - vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), true, name }); + vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, name }); vfbs_.push_back(vfb); u32 byteSize = ColorBufferByteSize(vfb); @@ -2072,7 +2084,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram snprintf(name, sizeof(name), "download_temp"); // TODO: We don't have a way to create a depth-only framebuffer yet. // Also, at least on Vulkan we always create both depth and color, need to rework how we handle renderpasses. - nvfb->fbo = draw_->CreateFramebuffer({ nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, channel == RASTER_DEPTH ? true : false, name }); + nvfb->fbo = draw_->CreateFramebuffer({ nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, 0, channel == RASTER_DEPTH ? true : false, name }); if (!nvfb->fbo) { ERROR_LOG(FRAMEBUF, "Error creating FBO! %d x %d", nvfb->renderWidth, nvfb->renderHeight); delete nvfb; @@ -2466,7 +2478,7 @@ Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u char name[128]; snprintf(name, sizeof(name), "tempfbo_%s_%dx%d", TempFBOReasonToString(reason), w / renderScaleFactor_, h / renderScaleFactor_); - Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, GetFramebufferLayers(), z_stencil, name }); + Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, GetFramebufferLayers(), 0, z_stencil, name }); if (!fbo) { return nullptr; } @@ -2874,11 +2886,7 @@ static void DoRelease(T *&obj) { obj = nullptr; } -void FramebufferManagerCommon::DeviceLost() { - DestroyAllFBOs(); - - presentation_->DeviceLost(); - +void FramebufferManagerCommon::ReleasePipelines() { for (int i = 0; i < ARRAY_SIZE(reinterpretFromTo_); i++) { for (int j = 0; j < ARRAY_SIZE(reinterpretFromTo_); j++) { DoRelease(reinterpretFromTo_[i][j]); @@ -2895,9 +2903,16 @@ void FramebufferManagerCommon::DeviceLost() { DoRelease(draw2DPipelineDepth_); DoRelease(draw2DPipeline565ToDepth_); DoRelease(draw2DPipeline565ToDepthDeswizzle_); +} + +void FramebufferManagerCommon::DeviceLost() { + DestroyAllFBOs(); + presentation_->DeviceLost(); draw2D_.DeviceLost(); + ReleasePipelines(); + draw_ = nullptr; } @@ -2998,9 +3013,9 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX bool useBlit = channel == RASTER_COLOR ? draw_->GetDeviceCaps().framebufferBlitSupported : false; bool useCopy = channel == RASTER_COLOR ? draw_->GetDeviceCaps().framebufferCopySupported : false; - if (dst == currentRenderVfb_) { + if (dst == currentRenderVfb_ || dst->fbo->MultiSampleLevel() != 0 || src->fbo->MultiSampleLevel() != 0) { // If already bound, using either a blit or a copy is unlikely to be an optimization. - // So we're gonna use a raster draw instead. + // So we're gonna use a raster draw instead. Also multisampling has problems with copies currently. useBlit = false; useCopy = false; } @@ -3156,7 +3171,7 @@ VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(Vi char tag[128]; FormatFramebufferName(vfb, tag, sizeof(tag)); - vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), true, tag }); + vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, tag }); vfbs_.push_back(vfb); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 676c024f7c87..27380df45481 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -443,6 +443,8 @@ class FramebufferManagerCommon { int scaleFactor, // usually unused, except for swizzle... Draw2DPipeline *pipeline, const char *tag); + void ReleasePipelines(); + protected: virtual void ReadbackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel); // Used for when a shader is required, such as GLES. @@ -545,6 +547,7 @@ class FramebufferManagerCommon { float renderWidth_ = 0.0f; float renderHeight_ = 0.0f; + int msaaLevel_ = 0; int renderScaleFactor_ = 1; int pixelWidth_ = 0; int pixelHeight_ = 0; diff --git a/GPU/Common/PresentationCommon.cpp b/GPU/Common/PresentationCommon.cpp index 5980bbb95e3e..e07eec14999f 100644 --- a/GPU/Common/PresentationCommon.cpp +++ b/GPU/Common/PresentationCommon.cpp @@ -270,7 +270,7 @@ bool PresentationCommon::UpdatePostShader() { previousIndex_ = 0; for (int i = 0; i < FRAMES; ++i) { - previousFramebuffers_[i] = draw_->CreateFramebuffer({ w, h, 1, 1, false, "inter_presentation" }); + previousFramebuffers_[i] = draw_->CreateFramebuffer({ w, h, 1, 1, 0, false, "inter_presentation" }); if (!previousFramebuffers_[i]) { DestroyPostShader(); return false; @@ -386,7 +386,7 @@ bool PresentationCommon::AllocateFramebuffer(int w, int h) { } // No depth/stencil for post processing - Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, false, "presentation" }); + Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, 0, false, "presentation" }); if (!fbo) { return false; } diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 893772c31b3d..9aa9e25c5281 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1298,6 +1298,7 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { desc.depth = 1; desc.z_stencil = false; desc.numLayers = 1; + desc.multiSampleLevel = 0; desc.tag = "dynamic_clut"; dynamicClutFbo_ = draw_->CreateFramebuffer(desc); desc.tag = "dynamic_clut_temp"; diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index b797e78b6467..b7d430fa5927 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -130,9 +130,7 @@ static const char * const boneWeightDecl[9] = { bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguageDesc &compat, Draw::Bugs bugs, uint32_t *attrMask, uint64_t *uniformMask, VertexShaderFlags *vertexShaderFlags, std::string *errorString) { *attrMask = 0; *uniformMask = 0; - if (vertexShaderFlags) { - *vertexShaderFlags = (VertexShaderFlags)0; - } + *vertexShaderFlags = (VertexShaderFlags)0; bool highpFog = false; bool highpTexcoord = false; diff --git a/GPU/D3D11/ShaderManagerD3D11.cpp b/GPU/D3D11/ShaderManagerD3D11.cpp index f8788f6df54d..72668fc71856 100644 --- a/GPU/D3D11/ShaderManagerD3D11.cpp +++ b/GPU/D3D11/ShaderManagerD3D11.cpp @@ -212,7 +212,8 @@ void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader ** std::string genErrorString; uint32_t attrMask; uint64_t uniformMask; - GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, nullptr, &genErrorString); + VertexShaderFlags flags; + GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &flags, &genErrorString); _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_)); vs = new D3D11VertexShader(device_, featureLevel_, VSID, codeBuffer_, vertType, useHWTransform); vsCache_[VSID] = vs; @@ -227,7 +228,8 @@ void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader ** // Fragment shader not in cache. Let's compile it. std::string genErrorString; uint64_t uniformMask; - GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, nullptr, &genErrorString); + FragmentShaderFlags flags; + GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, &flags, &genErrorString); _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "FS length error: %d", (int)strlen(codeBuffer_)); fs = new D3D11FragmentShader(device_, featureLevel_, FSID, codeBuffer_, useHWTransform); fsCache_[FSID] = fs; diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 0b419cfe76ca..d7f31b885241 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -581,7 +581,8 @@ VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellat std::string genErrorString; uint32_t attrMask; uint64_t uniformMask; - if (GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, nullptr, &genErrorString)) { + VertexShaderFlags flags; + if (GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &flags, &genErrorString)) { vs = new VSShader(device_, VSID, codeBuffer_, useHWTransform); } if (!vs || vs->Failed()) { @@ -606,7 +607,7 @@ VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellat // Can still work with software transform. uint32_t attrMask; uint64_t uniformMask; - bool success = GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, nullptr, &genErrorString); + bool success = GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &flags, &genErrorString); _assert_(success); vs = new VSShader(device_, VSID, codeBuffer_, false); } @@ -623,7 +624,8 @@ VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellat // Fragment shader not in cache. Let's compile it. std::string errorString; uint64_t uniformMask; - bool success = GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, nullptr, &errorString); + FragmentShaderFlags flags; + bool success = GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, &flags, &errorString); // We're supposed to handle all possible cases. _assert_(success); fs = new PSShader(device_, FSID, codeBuffer_); diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 61cf8893111b..7e4ae0f8819c 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -753,7 +753,8 @@ void ShaderManagerGLES::DirtyLastShader() { Shader *ShaderManagerGLES::CompileFragmentShader(FShaderID FSID) { uint64_t uniformMask; std::string errorString; - if (!GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, nullptr, &errorString)) { + FragmentShaderFlags flags; + if (!GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, &flags, &errorString)) { ERROR_LOG(G3D, "Shader gen error: %s", errorString.c_str()); return nullptr; } @@ -768,7 +769,8 @@ Shader *ShaderManagerGLES::CompileVertexShader(VShaderID VSID) { uint32_t attrMask; uint64_t uniformMask; std::string errorString; - if (!GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, nullptr, &errorString)) { + VertexShaderFlags flags; + if (!GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &flags, &errorString)) { ERROR_LOG(G3D, "Shader gen error: %s", errorString.c_str()); return nullptr; } diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 76b14bdc9eb4..7d2cde8b121b 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -267,7 +267,7 @@ class GPUCommon : public GPUInterface, public GPUDebugInterface { void DeviceLost() override; void DeviceRestore() override; - void CheckRenderResized(); + virtual void CheckRenderResized(); // Add additional common features dependent on other features, which may be backend-determined. u32 CheckGPUFeaturesLate(u32 features) const; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index aed306b4c295..f91ed4df80b7 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -269,6 +269,12 @@ u32 GPU_Vulkan::CheckGPUFeatures() const { } } + // We need to turn off framebuffer fetch through input attachments if MSAA is on for now. + // This is fixable, just needs some shader generator work (subpassInputMS). + if (g_Config.iMultiSampleLevel != 0) { + features &= ~GPU_USE_FRAMEBUFFER_FETCH; + } + return CheckGPUFeaturesLate(features); } @@ -459,6 +465,14 @@ void GPU_Vulkan::DestroyDeviceObjects() { } } +void GPU_Vulkan::CheckRenderResized() { + if (renderResized_) { + GPUCommon::CheckRenderResized(); + pipelineManager_->InvalidateMSAAPipelines(); + framebufferManager_->ReleasePipelines(); + } +} + void GPU_Vulkan::DeviceLost() { CancelReady(); while (!IsReady()) { diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h index ee45b43e5bf8..7b83f4d583a4 100644 --- a/GPU/Vulkan/GPU_Vulkan.h +++ b/GPU/Vulkan/GPU_Vulkan.h @@ -71,6 +71,7 @@ class GPU_Vulkan : public GPUCommon { protected: void FinishDeferred() override; + void CheckRenderResized() override; private: void Flush() { diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index d1b2da92b6af..0c081c3f78ba 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -8,6 +8,7 @@ #include "Common/Log.h" #include "Common/StringUtils.h" #include "Common/GPU/Vulkan/VulkanContext.h" +#include "Core/Config.h" #include "GPU/Vulkan/VulkanUtil.h" #include "GPU/Vulkan/PipelineManagerVulkan.h" #include "GPU/Vulkan/ShaderManagerVulkan.h" @@ -48,6 +49,12 @@ void PipelineManagerVulkan::Clear() { pipelines_.Clear(); } +void PipelineManagerVulkan::InvalidateMSAAPipelines() { + pipelines_.Iterate([&](const VulkanPipelineKey &key, VulkanPipeline *value) { + value->pipeline->DestroyVariants(vulkan_, true); + }); +} + void PipelineManagerVulkan::DeviceLost() { Clear(); if (pipelineCache_ != VK_NULL_HANDLE) @@ -171,7 +178,7 @@ static std::string CutFromMain(std::string str) { } static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, VkPipelineCache pipelineCache, - VkPipelineLayout layout, PipelineFlags pipelineFlags, const VulkanPipelineRasterStateKey &key, + VkPipelineLayout layout, PipelineFlags pipelineFlags, VkSampleCountFlagBits sampleCount, const VulkanPipelineRasterStateKey &key, const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, VulkanGeometryShader *gs, bool useHwTransform, u32 variantBitmask) { VulkanPipeline *vulkanPipeline = new VulkanPipeline(); VKRGraphicsPipelineDesc *desc = &vulkanPipeline->desc; @@ -249,17 +256,13 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, rs.polygonMode = VK_POLYGON_MODE_FILL; rs.depthClampEnable = key.depthClampEnable; - VkPipelineMultisampleStateCreateInfo &ms = desc->ms; - ms.pSampleMask = nullptr; - ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - desc->fragmentShader = fs->GetModule(); desc->vertexShader = vs->GetModule(); desc->geometryShader = gs ? gs->GetModule() : nullptr; desc->fragmentShaderSource = fs->GetShaderString(SHADER_STRING_SOURCE_CODE); desc->vertexShaderSource = vs->GetShaderString(SHADER_STRING_SOURCE_CODE); if (gs) { - desc->geometryShaderSource = gs->GetShaderString(SHADER_STRING_SOURCE_CODE); + desc->geometryShaderSource = gs->GetShaderString(SHADER_STRING_SOURCE_CODE); } VkPipelineInputAssemblyStateCreateInfo &inputAssembly = desc->inputAssembly; @@ -307,7 +310,7 @@ static VulkanPipeline *CreateVulkanPipeline(VulkanRenderManager *renderManager, tag = FragmentShaderDesc(fs->GetID()) + " VS " + VertexShaderDesc(vs->GetID()); #endif - VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, pipelineFlags, variantBitmask, tag.c_str()); + VKRGraphicsPipeline *pipeline = renderManager->CreateGraphicsPipeline(desc, pipelineFlags, variantBitmask, sampleCount, tag.c_str()); vulkanPipeline->pipeline = pipeline; if (useBlendConstant) { @@ -347,12 +350,17 @@ VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VulkanRenderManager * if (fs->Flags() & FragmentShaderFlags::INPUT_ATTACHMENT) { pipelineFlags |= PipelineFlags::USES_INPUT_ATTACHMENT; } + if (fs->Flags() & FragmentShaderFlags::USES_DISCARD) { + pipelineFlags |= PipelineFlags::USES_DISCARD; + } if (vs->Flags() & VertexShaderFlags::MULTI_VIEW) { pipelineFlags |= PipelineFlags::USES_MULTIVIEW; } + VkSampleCountFlagBits sampleCount = MultiSampleLevelToFlagBits(g_Config.iMultiSampleLevel); + VulkanPipeline *pipeline = CreateVulkanPipeline( - renderManager, pipelineCache_, layout, pipelineFlags, + renderManager, pipelineCache_, layout, pipelineFlags, sampleCount, rasterKey, decFmt, vs, fs, gs, useHwTransform, variantBitmask); pipelines_.Insert(key, pipeline); diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h index 32e45c3faf7d..dcd13c54c5b7 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.h +++ b/GPU/Vulkan/PipelineManagerVulkan.h @@ -62,6 +62,7 @@ struct VulkanPipeline { bool UsesDepthStencil() const { return (pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) != 0; } bool UsesInputAttachment() const { return (pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT) != 0; } bool UsesGeometryShader() const { return (pipelineFlags & PipelineFlags::USES_GEOMETRY_SHADER) != 0; } + bool UsesDiscard() const { return (pipelineFlags & PipelineFlags::USES_DISCARD) != 0; } u32 GetVariantsBitmask() const; }; @@ -87,6 +88,8 @@ class PipelineManagerVulkan { void DeviceLost(); void DeviceRestore(VulkanContext *vulkan); + void InvalidateMSAAPipelines(); + std::string DebugGetObjectString(std::string id, DebugShaderType type, DebugShaderStringType stringType); std::vector DebugGetObjectIDs(DebugShaderType type); diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index b2ad14f16eb2..9bb4f69995ed 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -305,6 +305,21 @@ void GameSettingsScreen::CreateViews() { return !g_Config.bSoftwareRendering && !g_Config.bSkipBufferEffects; }); + if (draw->GetDeviceCaps().multiSampleLevelsMask != 1) { + static const char *msaaModes[] = { "Off", "2x", "4x", "8x", "16x" }; + auto msaaChoice = graphicsSettings->Add(new PopupMultiChoice(&g_Config.iMultiSampleLevel, gr->T("Antialiasing (MSAA)"), msaaModes, 0, ARRAY_SIZE(msaaModes), gr->GetName(), screenManager())); + msaaChoice->OnChoice.Add([&](UI::EventParams &) -> UI::EventReturn { + NativeMessageReceived("gpu_renderResized", ""); + return UI::EVENT_DONE; + }); + // Hide unsupported levels. + for (int i = 1; i < 5; i++) { + if ((draw->GetDeviceCaps().multiSampleLevelsMask & (1 << i)) == 0) { + msaaChoice->HideChoice(i); + } + } + } + #if PPSSPP_PLATFORM(ANDROID) if ((deviceType != DEVICE_TYPE_TV) && (deviceType != DEVICE_TYPE_VR)) { static const char *deviceResolutions[] = { "Native device resolution", "Auto (same as Rendering)", "1x PSP", "2x PSP", "3x PSP", "4x PSP", "5x PSP" }; diff --git a/headless/Headless.cpp b/headless/Headless.cpp index 3b2bd96dd69b..98aefd31e76e 100644 --- a/headless/Headless.cpp +++ b/headless/Headless.cpp @@ -426,6 +426,7 @@ int main(int argc, const char* argv[]) g_Config.bSkipGPUReadbacks = false; g_Config.bHardwareTransform = true; g_Config.iAnisotropyLevel = 0; // When testing mipmapping we really don't want this. + g_Config.iMultiSampleLevel = 0; g_Config.bVertexCache = false; g_Config.iLanguage = PSP_SYSTEMPARAM_LANGUAGE_ENGLISH; g_Config.iTimeFormat = PSP_SYSTEMPARAM_TIME_FORMAT_24HR; diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 7722c9f94756..89e0eaafec0d 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -30,32 +30,34 @@ static constexpr size_t CODE_BUFFER_SIZE = 32768; bool GenerateFShader(FShaderID id, char *buffer, ShaderLanguage lang, Draw::Bugs bugs, std::string *errorString) { buffer[0] = '\0'; + FragmentShaderFlags flags; + uint64_t uniformMask; switch (lang) { case ShaderLanguage::GLSL_VULKAN: { ShaderLanguageDesc compat(ShaderLanguage::GLSL_VULKAN); - return GenerateFragmentShader(id, buffer, compat, bugs, &uniformMask, nullptr, errorString); + return GenerateFragmentShader(id, buffer, compat, bugs, &uniformMask, &flags, errorString); } case ShaderLanguage::GLSL_1xx: { ShaderLanguageDesc compat(ShaderLanguage::GLSL_1xx); - return GenerateFragmentShader(id, buffer, compat, bugs, &uniformMask, nullptr, errorString); + return GenerateFragmentShader(id, buffer, compat, bugs, &uniformMask, &flags, errorString); } case ShaderLanguage::GLSL_3xx: { ShaderLanguageDesc compat(ShaderLanguage::GLSL_3xx); - return GenerateFragmentShader(id, buffer, compat, bugs, &uniformMask, nullptr, errorString); + return GenerateFragmentShader(id, buffer, compat, bugs, &uniformMask, &flags, errorString); } case ShaderLanguage::HLSL_D3D9: { ShaderLanguageDesc compat(ShaderLanguage::HLSL_D3D9); - return GenerateFragmentShader(id, buffer, compat, bugs, &uniformMask, nullptr, errorString); + return GenerateFragmentShader(id, buffer, compat, bugs, &uniformMask, &flags, errorString); } case ShaderLanguage::HLSL_D3D11: { ShaderLanguageDesc compat(ShaderLanguage::HLSL_D3D11); - return GenerateFragmentShader(id, buffer, compat, bugs, &uniformMask, nullptr, errorString); + return GenerateFragmentShader(id, buffer, compat, bugs, &uniformMask, &flags, errorString); } default: return false; @@ -65,33 +67,35 @@ bool GenerateFShader(FShaderID id, char *buffer, ShaderLanguage lang, Draw::Bugs bool GenerateVShader(VShaderID id, char *buffer, ShaderLanguage lang, Draw::Bugs bugs, std::string *errorString) { buffer[0] = '\0'; + VertexShaderFlags flags; + uint32_t attrMask; uint64_t uniformMask; switch (lang) { case ShaderLanguage::GLSL_VULKAN: { ShaderLanguageDesc compat(ShaderLanguage::GLSL_VULKAN); - return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, nullptr, errorString); + return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, &flags, errorString); } case ShaderLanguage::GLSL_1xx: { ShaderLanguageDesc compat(ShaderLanguage::GLSL_1xx); - return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, nullptr, errorString); + return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, &flags, errorString); } case ShaderLanguage::GLSL_3xx: { ShaderLanguageDesc compat(ShaderLanguage::GLSL_3xx); - return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, nullptr, errorString); + return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, &flags, errorString); } case ShaderLanguage::HLSL_D3D9: { ShaderLanguageDesc compat(ShaderLanguage::HLSL_D3D9); - return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, nullptr, errorString); + return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, &flags, errorString); } case ShaderLanguage::HLSL_D3D11: { ShaderLanguageDesc compat(ShaderLanguage::HLSL_D3D11); - return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, nullptr, errorString); + return GenerateVertexShader(id, buffer, compat, bugs, &attrMask, &uniformMask, &flags, errorString); } default: return false;