Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vk: Use synchronization2 extension for events #14065

Merged
merged 2 commits into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions rpcs3/Emu/RSX/VK/VKAsyncScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

namespace vk
{
AsyncTaskScheduler::AsyncTaskScheduler([[maybe_unused]] vk_gpu_scheduler_mode mode)
AsyncTaskScheduler::AsyncTaskScheduler(vk_gpu_scheduler_mode mode, const VkDependencyInfoKHR& queue_dependency)
{
if (g_cfg.video.renderer != video_renderer::vulkan || !g_cfg.video.vk.asynchronous_texture_streaming)
{
Expand All @@ -20,7 +20,7 @@ namespace vk
return;
}

init_config_options(mode);
init_config_options(mode, queue_dependency);
}

AsyncTaskScheduler::~AsyncTaskScheduler()
Expand All @@ -32,7 +32,7 @@ namespace vk
}
}

void AsyncTaskScheduler::init_config_options(vk_gpu_scheduler_mode mode)
void AsyncTaskScheduler::init_config_options(vk_gpu_scheduler_mode mode, const VkDependencyInfoKHR& queue_dependency)
{
std::lock_guard lock(m_config_mutex);
if (std::exchange(m_options_initialized, true))
Expand All @@ -43,6 +43,8 @@ namespace vk

m_use_host_scheduler = (mode == vk_gpu_scheduler_mode::safe) || g_cfg.video.strict_rendering_mode;
rsx_log.notice("Asynchronous task scheduler is active running in %s mode", m_use_host_scheduler? "'Safe'" : "'Fast'");

m_dependency_info = queue_dependency;
}

void AsyncTaskScheduler::delayed_init()
Expand Down Expand Up @@ -76,7 +78,7 @@ namespace vk
auto& sync_label = m_events_pool[m_next_event_id++ % events_pool_size];

sync_label->reset();
sync_label->signal(*m_current_cb, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0);
sync_label->signal(*m_current_cb, m_dependency_info);
m_sync_label = sync_label.get();
}

Expand Down
5 changes: 3 additions & 2 deletions rpcs3/Emu/RSX/VK/VKAsyncScheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ namespace vk
// Sync
event* m_sync_label = nullptr;
atomic_t<bool> m_sync_required = false;
VkDependencyInfoKHR m_dependency_info{};

static constexpr u32 events_pool_size = 16384;
std::vector<std::unique_ptr<vk::event>> m_events_pool;
Expand All @@ -39,12 +40,12 @@ namespace vk

shared_mutex m_submit_mutex;

void init_config_options(vk_gpu_scheduler_mode mode);
void init_config_options(vk_gpu_scheduler_mode mode, const VkDependencyInfoKHR& queue_dependency);
void delayed_init();
void insert_sync_event();

public:
AsyncTaskScheduler(vk_gpu_scheduler_mode mode); // This ctor stops default initialization by fxo
AsyncTaskScheduler(vk_gpu_scheduler_mode mode, const VkDependencyInfoKHR& queue_dependency);
~AsyncTaskScheduler();

command_buffer* get_current();
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/RSX/VK/VKDraw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ void VKGSRender::load_texture_env()
// Sync any async scheduler tasks
if (auto ev = async_task_scheduler.get_primary_sync_label())
{
ev->gpu_wait(*m_current_command_buffer);
ev->gpu_wait(*m_current_command_buffer, m_async_compute_dependency_info);
}
}
}
Expand Down
19 changes: 18 additions & 1 deletion rpcs3/Emu/RSX/VK/VKGSRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -822,8 +822,25 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)

if (backend_config.supports_asynchronous_compute)
{
m_async_compute_memory_barrier =
{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR,
.pNext = nullptr,
.srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR | VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
.srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
.dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR | VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT_KHR,
.dstAccessMask = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT_KHR
};

m_async_compute_dependency_info =
{
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.memoryBarrierCount = 1,
.pMemoryBarriers = &m_async_compute_memory_barrier
};

// Run only if async compute can be used.
g_fxo->init<vk::AsyncTaskScheduler>(g_cfg.video.vk.asynchronous_scheduler);
g_fxo->init<vk::AsyncTaskScheduler>(g_cfg.video.vk.asynchronous_scheduler, m_async_compute_dependency_info);
}

if (backend_config.supports_host_gpu_labels)
Expand Down
3 changes: 3 additions & 0 deletions rpcs3/Emu/RSX/VK/VKGSRender.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
std::unique_ptr<vk::buffer_view> m_volatile_attribute_storage;
std::unique_ptr<vk::buffer_view> m_vertex_layout_storage;

VkDependencyInfoKHR m_async_compute_dependency_info{};
VkMemoryBarrier2KHR m_async_compute_memory_barrier{};

public:
//vk::fbo draw_fbo;
std::unique_ptr<vk::vertex_cache> m_vertex_cache;
Expand Down
16 changes: 15 additions & 1 deletion rpcs3/Emu/RSX/VK/VKTextureCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,23 @@ namespace vk

src->pop_layout(cmd);

VkMemoryBarrier2KHR copy_memory_barrier = {
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR,
.pNext = nullptr,
.srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT_KHR,
.srcAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR | VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
.dstStageMask = VK_PIPELINE_STAGE_2_NONE_KHR,
.dstAccessMask = 0
};

// Create event object for this transfer and queue signal op
dma_fence = std::make_unique<vk::event>(*m_device, sync_domain::any);
dma_fence->signal(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
dma_fence->signal(cmd,
{
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.memoryBarrierCount = 1,
.pMemoryBarriers = &copy_memory_barrier
});

// Set cb flag for queued dma operations
cmd.set_flag(vk::command_buffer::cb_has_dma_transfer);
Expand Down
20 changes: 20 additions & 0 deletions rpcs3/Emu/RSX/VK/vkutils/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ namespace vk
optional_features_support.conditional_rendering = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
optional_features_support.external_memory_host = device_extensions.is_supported(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
optional_features_support.sampler_mirror_clamped = device_extensions.is_supported(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME);
optional_features_support.synchronization_2 = device_extensions.is_supported(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
optional_features_support.unrestricted_depth_range = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);

optional_features_support.debug_utils = instance_extensions.is_supported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
Expand Down Expand Up @@ -486,6 +487,11 @@ namespace vk
requested_extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
}

if (pgpu->optional_features_support.synchronization_2)
{
requested_extensions.push_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
}

enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE;
Expand Down Expand Up @@ -670,6 +676,14 @@ namespace vk
device.pNext = &custom_border_color_features;
}

VkPhysicalDeviceSynchronization2FeaturesKHR synchronization2_info{};
if (pgpu->optional_features_support.synchronization_2)
{
synchronization2_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES;
synchronization2_info.pNext = const_cast<void*>(device.pNext);
device.pNext = &synchronization2_info;
}

CHECK_RESULT_EX(vkCreateDevice(*pgpu, &device, nullptr, &dev), message_on_error);

// Initialize queues
Expand All @@ -695,6 +709,12 @@ namespace vk
_vkCmdInsertDebugUtilsLabelEXT = reinterpret_cast<PFN_vkCmdInsertDebugUtilsLabelEXT>(vkGetDeviceProcAddr(dev, "vkCmdInsertDebugUtilsLabelEXT"));
}

if (pgpu->optional_features_support.synchronization_2)
{
_vkCmdSetEvent2KHR = reinterpret_cast<PFN_vkCmdSetEvent2KHR>(vkGetDeviceProcAddr(dev, "vkCmdSetEvent2KHR"));
_vkCmdWaitEvents2KHR = reinterpret_cast<PFN_vkCmdWaitEvents2KHR>(vkGetDeviceProcAddr(dev, "vkCmdWaitEvents2KHR"));
}

memory_map = vk::get_memory_mapping(pdev);
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);
Expand Down
4 changes: 4 additions & 0 deletions rpcs3/Emu/RSX/VK/vkutils/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ namespace vk
bool sampler_mirror_clamped = false;
bool shader_stencil_export = false;
bool surface_capabilities_2 = false;
bool synchronization_2 = false;
bool unrestricted_depth_range = false;
} optional_features_support;

Expand Down Expand Up @@ -135,6 +136,8 @@ namespace vk
PFN_vkSetDebugUtilsObjectNameEXT _vkSetDebugUtilsObjectNameEXT = nullptr;
PFN_vkQueueInsertDebugUtilsLabelEXT _vkQueueInsertDebugUtilsLabelEXT = nullptr;
PFN_vkCmdInsertDebugUtilsLabelEXT _vkCmdInsertDebugUtilsLabelEXT = nullptr;
PFN_vkCmdSetEvent2KHR _vkCmdSetEvent2KHR = nullptr;
PFN_vkCmdWaitEvents2KHR _vkCmdWaitEvents2KHR = nullptr;

public:
render_device() = default;
Expand Down Expand Up @@ -168,6 +171,7 @@ namespace vk
bool get_framebuffer_loops_support() const { return pgpu->optional_features_support.framebuffer_loops; }
bool get_barycoords_support() const { return pgpu->optional_features_support.barycentric_coords; }
bool get_custom_border_color_support() const { return pgpu->optional_features_support.custom_border_color; }
bool get_synchronization2_support() const { return pgpu->optional_features_support.synchronization_2; }

u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_indexing_support.update_after_bind_mask; }
u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; }
Expand Down
Loading