Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proof of concept fix for Async GPU Readbacks #87850

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 44 additions & 3 deletions drivers/vulkan/vulkan_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cstdint>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#include <cstdint>

Already included (even in the header, only include what's necessary)


#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
#define APP_SHORT_NAME "GodotEngine"
Expand Down Expand Up @@ -1794,6 +1795,22 @@ Error VulkanContext::_create_semaphores() {
/*pNext*/ nullptr,
/*flags*/ 0,
};
// Create compute semaphore that can be polled to determine whether
// a given compute block has finished.
// Every time a compute call finishes, we'll increment the value
// of this semaphore by one, so we can check the status
VkSemaphoreTypeCreateInfo timelineCreateInfo = {
/*sType*/ VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
/*pNext*/ nullptr,
/*semaphoreType*/ VK_SEMAPHORE_TYPE_TIMELINE,
/*initialValue*/ 0,
};
Comment on lines +1800 to +1807
Copy link
Member

@AThousandShips AThousandShips Feb 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Every time a compute call finishes, we'll increment the value
// of this semaphore by one, so we can check the status
VkSemaphoreTypeCreateInfo timelineCreateInfo = {
/*sType*/ VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
/*pNext*/ nullptr,
/*semaphoreType*/ VK_SEMAPHORE_TYPE_TIMELINE,
/*initialValue*/ 0,
};
// Every time a compute call finishes, we'll increment the value
// of this semaphore by one, so we can check the status.
VkSemaphoreTypeCreateInfo timelineCreateInfo = {
/*sType*/ VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
/*pNext*/ nullptr,
/*semaphoreType*/ VK_SEMAPHORE_TYPE_TIMELINE,
/*initialValue*/ 0,
};


VkSemaphoreCreateInfo computeSemaphoreCreateInfo = {
/*sType*/ VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
/*pNext*/ &timelineCreateInfo,
/*flags*/ 0,
};

// Create fences that we can use to throttle if we get too far
// ahead of the image presents.
Expand All @@ -1816,6 +1833,9 @@ Error VulkanContext::_create_semaphores() {
}
frame_index = 0;

err = vkCreateSemaphore(device, &computeSemaphoreCreateInfo, nullptr, &compute_semaphore[0]);
ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
Comment on lines +1836 to +1837
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
err = vkCreateSemaphore(device, &computeSemaphoreCreateInfo, nullptr, &compute_semaphore[0]);
ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
err = vkCreateSemaphore(device, &computeSemaphoreCreateInfo, nullptr, &compute_semaphore[0]);
ERR_FAIL_COND_V(err, ERR_CANT_CREATE);


// Get Memory information and properties.
vkGetPhysicalDeviceMemoryProperties(gpu, &memory_properties);

Expand Down Expand Up @@ -2781,16 +2801,26 @@ void VulkanContext::local_device_push_command_buffers(RID p_local_device, const
LocalDevice *ld = local_device_owner.get_or_null(p_local_device);
ERR_FAIL_COND(ld->waiting);

compute_semaphore_signal_value += 1;

VkTimelineSemaphoreSubmitInfo signal_info;
signal_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
signal_info.pNext = nullptr;
signal_info.waitSemaphoreValueCount = 0;
signal_info.signalSemaphoreValueCount = 1;
signal_info.pSignalSemaphoreValues = &compute_semaphore_signal_value;
Comment on lines +2804 to +2811
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
compute_semaphore_signal_value += 1;
VkTimelineSemaphoreSubmitInfo signal_info;
signal_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
signal_info.pNext = nullptr;
signal_info.waitSemaphoreValueCount = 0;
signal_info.signalSemaphoreValueCount = 1;
signal_info.pSignalSemaphoreValues = &compute_semaphore_signal_value;
compute_semaphore_signal_value += 1;
VkTimelineSemaphoreSubmitInfo signal_info;
signal_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO;
signal_info.pNext = nullptr;
signal_info.waitSemaphoreValueCount = 0;
signal_info.signalSemaphoreValueCount = 1;
signal_info.pSignalSemaphoreValues = &compute_semaphore_signal_value;


VkSubmitInfo submit_info;
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.pNext = nullptr;
submit_info.pNext = &signal_info;
submit_info.pWaitDstStageMask = nullptr;
submit_info.waitSemaphoreCount = 0;
submit_info.pWaitSemaphores = nullptr;
submit_info.commandBufferCount = p_count;
submit_info.pCommandBuffers = (const VkCommandBuffer *)p_buffers;
submit_info.signalSemaphoreCount = 0;
submit_info.pSignalSemaphores = nullptr;
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &compute_semaphore[0];

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change


VkResult err = vkQueueSubmit(ld->queue, 1, &submit_info, VK_NULL_HANDLE);
if (err == VK_ERROR_OUT_OF_HOST_MEMORY) {
Expand All @@ -2813,8 +2843,18 @@ void VulkanContext::local_device_sync(RID p_local_device) {

vkDeviceWaitIdle(ld->device);
ld->waiting = false;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

}

bool VulkanContext::local_device_check_status(RID p_local_device) {
LocalDevice *ld = local_device_owner.get_or_null(p_local_device);
ERR_FAIL_COND_V(!ld->waiting, false);
uint64_t out = 0;
vkGetSemaphoreCounterValue(ld->device, compute_semaphore[0], &out);
return out >= compute_semaphore_signal_value;
}

Comment on lines +2852 to +2856
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
uint64_t out = 0;
vkGetSemaphoreCounterValue(ld->device, compute_semaphore[0], &out);
return out >= compute_semaphore_signal_value;
}
uint64_t out = 0;
vkGetSemaphoreCounterValue(ld->device, compute_semaphore[0], &out);
return out >= compute_semaphore_signal_value;
}


void VulkanContext::local_device_free(RID p_local_device) {
LocalDevice *ld = local_device_owner.get_or_null(p_local_device);
memdelete(ld->driver);
Expand Down Expand Up @@ -2901,6 +2941,7 @@ VulkanContext::~VulkanContext() {
vkDestroySemaphore(device, image_ownership_semaphores[i], nullptr);
}
}
vkDestroySemaphore(device, compute_semaphore[0], nullptr);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
vkDestroySemaphore(device, compute_semaphore[0], nullptr);
vkDestroySemaphore(device, compute_semaphore[0], nullptr);

if (inst_initialized && is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) {
DestroyDebugUtilsMessengerEXT(inst, dbg_messenger, nullptr);
}
Expand Down
4 changes: 4 additions & 0 deletions drivers/vulkan/vulkan_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "rendering_device_driver_vulkan.h"
#include "servers/display_server.h"
#include "servers/rendering/renderer_rd/api_context_rd.h"
#include <cstdint>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#include <cstdint>


#ifdef USE_VOLK
#include <volk.h>
Expand Down Expand Up @@ -133,6 +134,8 @@ class VulkanContext : public ApiContextRD {
VkFormat format;
VkSemaphore draw_complete_semaphores[FRAME_LAG];
VkSemaphore image_ownership_semaphores[FRAME_LAG];
VkSemaphore compute_semaphore[1];
uint64_t compute_semaphore_signal_value = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
uint64_t compute_semaphore_signal_value = 0;
uint64_t compute_semaphore_signal_value = 0;

int frame_index = 0;
VkFence fences[FRAME_LAG];
VkPhysicalDeviceMemoryProperties memory_properties;
Expand Down Expand Up @@ -313,6 +316,7 @@ class VulkanContext : public ApiContextRD {
virtual RID local_device_create() override final;
virtual void local_device_push_command_buffers(RID p_local_device, const RDD::CommandBufferID *p_buffers, int p_count) override final;
virtual void local_device_sync(RID p_local_device) override final;
virtual bool local_device_check_status(RID p_local_device) override final;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
virtual bool local_device_check_status(RID p_local_device) override final;
virtual bool local_device_check_status(RID p_local_device) override final;

virtual void local_device_free(RID p_local_device) override final;

VkFormat get_screen_format() const;
Expand Down
1 change: 1 addition & 0 deletions servers/rendering/renderer_rd/api_context_rd.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class ApiContextRD {
virtual RID local_device_create() = 0;
virtual void local_device_push_command_buffers(RID p_local_device, const RDD::CommandBufferID *p_buffers, int p_count) = 0;
virtual void local_device_sync(RID p_local_device) = 0;
virtual bool local_device_check_status(RID p_local_device) = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
virtual bool local_device_check_status(RID p_local_device) = 0;
virtual bool local_device_check_status(RID p_local_device) = 0;

virtual void local_device_free(RID p_local_device) = 0;

virtual void set_setup_buffer(RDD::CommandBufferID p_command_buffer) = 0;
Expand Down
8 changes: 8 additions & 0 deletions servers/rendering/rendering_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
/**************************************************************************/

#include "rendering_device.h"
#include "core/os/thread_safe.h"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#include "core/os/thread_safe.h"

#include "rendering_device.compat.inc"

#include "rendering_device_binds.h"
Expand Down Expand Up @@ -4659,6 +4660,12 @@ void RenderingDevice::sync() {
local_device_processing = false;
}

bool RenderingDevice::check_status(){
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool RenderingDevice::check_status(){
bool RenderingDevice::check_status() {

_THREAD_SAFE_METHOD_

return context->local_device_check_status(local_device);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return context->local_device_check_status(local_device);
return context->local_device_check_status(local_device);

}

void RenderingDevice::_free_pending_resources(int p_frame) {
// Free in dependency usage order, so nothing weird happens.
// Pipelines.
Expand Down Expand Up @@ -5340,6 +5347,7 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_frame_delay"), &RenderingDevice::get_frame_delay);
ClassDB::bind_method(D_METHOD("submit"), &RenderingDevice::submit);
ClassDB::bind_method(D_METHOD("sync"), &RenderingDevice::sync);
ClassDB::bind_method(D_METHOD("check_status"), &RenderingDevice::check_status);

#ifndef DISABLE_DEPRECATED
ClassDB::bind_method(D_METHOD("barrier", "from", "to"), &RenderingDevice::barrier, DEFVAL(BARRIER_MASK_ALL_BARRIERS), DEFVAL(BARRIER_MASK_ALL_BARRIERS));
Expand Down
1 change: 1 addition & 0 deletions servers/rendering/rendering_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,7 @@ class RenderingDevice : public RenderingDeviceCommons {

void submit();
void sync();
bool check_status();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool check_status();
bool check_status();


enum MemoryType {
MEMORY_TEXTURES,
Expand Down
Loading