From e02f73d6b7bd9f8d2e9105285a2525789a585639 Mon Sep 17 00:00:00 2001 From: Dario Date: Tue, 8 Aug 2023 09:46:06 -0300 Subject: [PATCH] Improve handling of motion vectors for multimesh instances. Fixes #67287. There was a subtle error where due to how enabling motion vectors for multi-meshes was handled, only the first instance would have a valid transforms buffer and the rest would point to an invalid buffer. This change moves over the responsibility of enabling motion vectors only when changes happen to the individual 3D transforms or the entire buffer itself. It also fixes an unnecessary download of the existing buffer that'd get overwritten by the current cache if it exists. Another fix is handling the case where the buffer was not set, and enabling motion vectors would not cause the buffer to be recreated correctly. --- .../render_forward_clustered.cpp | 7 --- .../renderer_rd/storage_rd/mesh_storage.cpp | 48 ++++++++++++------- .../renderer_rd/storage_rd/mesh_storage.h | 2 +- servers/rendering/renderer_viewport.cpp | 9 ++++ servers/rendering/renderer_viewport.h | 3 ++ 5 files changed, 45 insertions(+), 24 deletions(-) diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index 9c7990d679d2..2974e9c4a316 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -1010,13 +1010,6 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DEPTH_TEXTURE) { scene_state.used_depth_texture = true; } - - if (p_color_pass_flags & COLOR_PASS_FLAG_MOTION_VECTORS) { - if ((flags & (INSTANCE_DATA_FLAG_MULTIMESH | INSTANCE_DATA_FLAG_PARTICLES)) == INSTANCE_DATA_FLAG_MULTIMESH && RendererRD::MeshStorage::get_singleton()->_multimesh_enable_motion_vectors(inst->data->base)) { - inst->transforms_uniform_set = mesh_storage->multimesh_get_3d_uniform_set(inst->data->base, scene_shader.default_shader_rd, TRANSFORMS_UNIFORM_SET); - } - } - } else if (p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) { if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW) { rl->add_element(surf); diff --git a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp index 67b5cdd291f7..4bfec8ae8dce 100644 --- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp @@ -1289,12 +1289,9 @@ void MeshStorage::multimesh_allocate_data(RID p_multimesh, int p_instances, RS:: multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_MULTIMESH); } -bool MeshStorage::_multimesh_enable_motion_vectors(RID p_multimesh) { - MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); - ERR_FAIL_COND_V(!multimesh, false); - +void MeshStorage::_multimesh_enable_motion_vectors(MultiMesh *multimesh) { if (multimesh->motion_vectors_enabled) { - return false; + return; } multimesh->motion_vectors_enabled = true; @@ -1307,22 +1304,30 @@ bool MeshStorage::_multimesh_enable_motion_vectors(RID p_multimesh) { multimesh->data_cache.append_array(multimesh->data_cache); } - if (multimesh->buffer_set) { + uint32_t buffer_size = multimesh->instances * multimesh->stride_cache * sizeof(float); + uint32_t new_buffer_size = buffer_size * 2; + RID new_buffer = RD::get_singleton()->storage_buffer_create(new_buffer_size); + + if (multimesh->buffer_set && multimesh->data_cache.is_empty()) { + // If the buffer was set but there's no data cached in the CPU, we must download it from the GPU and + // upload it because RD does not provide a way to copy the buffer directly yet. RD::get_singleton()->barrier(); Vector buffer_data = RD::get_singleton()->buffer_get_data(multimesh->buffer); - if (!multimesh->data_cache.is_empty()) { - memcpy(buffer_data.ptrw(), multimesh->data_cache.ptr(), buffer_data.size()); - } + ERR_FAIL_COND(buffer_data.size() != int(buffer_size)); + RD::get_singleton()->buffer_update(new_buffer, 0, buffer_size, buffer_data.ptr(), RD::BARRIER_MASK_NO_BARRIER); + RD::get_singleton()->buffer_update(new_buffer, buffer_size, buffer_size, buffer_data.ptr()); + } else if (!multimesh->data_cache.is_empty()) { + // Simply upload the data cached in the CPU, which should already be doubled in size. + ERR_FAIL_COND(multimesh->data_cache.size() != int(new_buffer_size)); + RD::get_singleton()->buffer_update(new_buffer, 0, new_buffer_size, multimesh->data_cache.ptr()); + } + if (multimesh->buffer.is_valid()) { RD::get_singleton()->free(multimesh->buffer); - uint32_t buffer_size = multimesh->instances * multimesh->stride_cache * sizeof(float) * 2; - multimesh->buffer = RD::get_singleton()->storage_buffer_create(buffer_size); - RD::get_singleton()->buffer_update(multimesh->buffer, 0, buffer_data.size(), buffer_data.ptr(), RD::BARRIER_MASK_NO_BARRIER); - RD::get_singleton()->buffer_update(multimesh->buffer, buffer_data.size(), buffer_data.size(), buffer_data.ptr()); - multimesh->uniform_set_3d = RID(); // Cleared by dependency - return true; } - return false; // Update the transforms uniform set cache + + multimesh->buffer = new_buffer; + multimesh->uniform_set_3d = RID(); // Cleared by dependency. } void MeshStorage::_multimesh_get_motion_vectors_offsets(RID p_multimesh, uint32_t &r_current_offset, uint32_t &r_prev_offset) { @@ -1531,6 +1536,12 @@ void MeshStorage::multimesh_instance_set_transform(RID p_multimesh, int p_index, ERR_FAIL_COND(multimesh->xform_format != RS::MULTIMESH_TRANSFORM_3D); _multimesh_make_local(multimesh); + + bool uses_motion_vectors = (RSG::viewport->get_num_viewports_with_motion_vectors() > 0); + if (uses_motion_vectors) { + _multimesh_enable_motion_vectors(multimesh); + } + _multimesh_update_motion_vectors_data_cache(multimesh); { @@ -1749,6 +1760,11 @@ void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector &p_b ERR_FAIL_COND(!multimesh); ERR_FAIL_COND(p_buffer.size() != (multimesh->instances * (int)multimesh->stride_cache)); + bool uses_motion_vectors = (RSG::viewport->get_num_viewports_with_motion_vectors() > 0); + if (uses_motion_vectors) { + _multimesh_enable_motion_vectors(multimesh); + } + if (multimesh->motion_vectors_enabled) { uint32_t frame = RSG::rasterizer->get_frame_number(); diff --git a/servers/rendering/renderer_rd/storage_rd/mesh_storage.h b/servers/rendering/renderer_rd/storage_rd/mesh_storage.h index 4d46a62a76cf..c23a5b1449c8 100644 --- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.h @@ -234,6 +234,7 @@ class MeshStorage : public RendererMeshStorage { MultiMesh *multimesh_dirty_list = nullptr; _FORCE_INLINE_ void _multimesh_make_local(MultiMesh *multimesh) const; + _FORCE_INLINE_ void _multimesh_enable_motion_vectors(MultiMesh *multimesh); _FORCE_INLINE_ void _multimesh_update_motion_vectors_data_cache(MultiMesh *multimesh); _FORCE_INLINE_ void _multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool p_aabb); _FORCE_INLINE_ void _multimesh_mark_all_dirty(MultiMesh *multimesh, bool p_data, bool p_aabb); @@ -593,7 +594,6 @@ class MeshStorage : public RendererMeshStorage { virtual AABB multimesh_get_aabb(RID p_multimesh) const override; void _update_dirty_multimeshes(); - bool _multimesh_enable_motion_vectors(RID p_multimesh); void _multimesh_get_motion_vectors_offsets(RID p_multimesh, uint32_t &r_current_offset, uint32_t &r_prev_offset); _FORCE_INLINE_ RS::MultimeshTransformFormat multimesh_get_transform_format(RID p_multimesh) const { diff --git a/servers/rendering/renderer_viewport.cpp b/servers/rendering/renderer_viewport.cpp index 75371de814db..5880bf395123 100644 --- a/servers/rendering/renderer_viewport.cpp +++ b/servers/rendering/renderer_viewport.cpp @@ -1194,6 +1194,7 @@ void RendererViewport::viewport_set_use_taa(RID p_viewport, bool p_use_taa) { return; } viewport->use_taa = p_use_taa; + num_viewports_with_motion_vectors += p_use_taa ? 1 : -1; _configure_3d_render_buffers(viewport); } @@ -1378,6 +1379,10 @@ bool RendererViewport::free(RID p_rid) { RendererSceneOcclusionCull::get_singleton()->remove_buffer(p_rid); } + if (viewport->use_taa) { + num_viewports_with_motion_vectors--; + } + viewport_owner.free(p_rid); return true; @@ -1433,6 +1438,10 @@ int RendererViewport::get_total_draw_calls_used() const { return total_draw_calls_used; } +int RendererViewport::get_num_viewports_with_motion_vectors() const { + return num_viewports_with_motion_vectors; +} + RendererViewport::RendererViewport() { occlusion_rays_per_thread = GLOBAL_GET("rendering/occlusion_culling/occlusion_rays_per_thread"); } diff --git a/servers/rendering/renderer_viewport.h b/servers/rendering/renderer_viewport.h index 92c5929796b3..3bfb1afd51c0 100644 --- a/servers/rendering/renderer_viewport.h +++ b/servers/rendering/renderer_viewport.h @@ -198,6 +198,8 @@ class RendererViewport { int total_vertices_drawn = 0; int total_draw_calls_used = 0; + int num_viewports_with_motion_vectors = 0; + private: Vector _sort_active_viewports(); void _viewport_set_size(Viewport *p_viewport, int p_width, int p_height, uint32_t p_view_count); @@ -302,6 +304,7 @@ class RendererViewport { int get_total_objects_drawn() const; int get_total_primitives_drawn() const; int get_total_draw_calls_used() const; + int get_num_viewports_with_motion_vectors() const; // Workaround for setting this on thread. void call_set_vsync_mode(DisplayServer::VSyncMode p_mode, DisplayServer::WindowID p_window);