From 8a1c91f55677b429782f1f22094cc06ec32a1f01 Mon Sep 17 00:00:00 2001 From: marc0246 <40955683+marc0246@users.noreply.github.com> Date: Thu, 27 Oct 2022 20:59:47 +0200 Subject: [PATCH] Make command buffer/descriptor set allocators `Sync` again (#2046) * Make cmd buffer/desc set allocators `Sync` again * Ingles --- .../deferred/frame/ambient_lighting_system.rs | 10 +- .../frame/directional_lighting_system.rs | 10 +- .../deferred/frame/point_lighting_system.rs | 10 +- examples/src/bin/deferred/frame/system.rs | 8 +- examples/src/bin/deferred/main.rs | 4 +- .../src/bin/deferred/triangle_draw_system.rs | 6 +- examples/src/bin/interactive_fractal/app.rs | 6 +- .../fractal_compute_pipeline.rs | 10 +- .../pixels_draw_pipeline.rs | 10 +- .../interactive_fractal/place_over_frame.rs | 8 +- .../src/bin/multi_window_game_of_life/app.rs | 6 +- .../multi_window_game_of_life/game_of_life.rs | 10 +- .../multi_window_game_of_life/pixels_draw.rs | 10 +- .../multi_window_game_of_life/render_pass.rs | 8 +- vulkano/Cargo.toml | 1 + vulkano/src/command_buffer/allocator.rs | 120 ++++++++++++++---- vulkano/src/descriptor_set/allocator.rs | 88 ++++++++++++- .../src/descriptor_set/single_layout_pool.rs | 18 ++- 18 files changed, 250 insertions(+), 93 deletions(-) diff --git a/examples/src/bin/deferred/frame/ambient_lighting_system.rs b/examples/src/bin/deferred/frame/ambient_lighting_system.rs index 1dca0b558d..ab5ef46c4d 100644 --- a/examples/src/bin/deferred/frame/ambient_lighting_system.rs +++ b/examples/src/bin/deferred/frame/ambient_lighting_system.rs @@ -8,7 +8,7 @@ // according to those terms. use bytemuck::{Pod, Zeroable}; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess}, command_buffer::{ @@ -40,8 +40,8 @@ pub struct AmbientLightingSystem { vertex_buffer: Arc>, subpass: Subpass, pipeline: Arc, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, } impl AmbientLightingSystem { @@ -50,8 +50,8 @@ impl AmbientLightingSystem { gfx_queue: Arc, subpass: Subpass, memory_allocator: &impl MemoryAllocator, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, ) -> AmbientLightingSystem { // TODO: vulkano doesn't allow us to draw without a vertex buffer, otherwise we could // hard-code these values in the shader diff --git a/examples/src/bin/deferred/frame/directional_lighting_system.rs b/examples/src/bin/deferred/frame/directional_lighting_system.rs index efd2fd9007..49e39331fd 100644 --- a/examples/src/bin/deferred/frame/directional_lighting_system.rs +++ b/examples/src/bin/deferred/frame/directional_lighting_system.rs @@ -9,7 +9,7 @@ use bytemuck::{Pod, Zeroable}; use cgmath::Vector3; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess}, command_buffer::{ @@ -41,8 +41,8 @@ pub struct DirectionalLightingSystem { vertex_buffer: Arc>, subpass: Subpass, pipeline: Arc, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, } impl DirectionalLightingSystem { @@ -51,8 +51,8 @@ impl DirectionalLightingSystem { gfx_queue: Arc, subpass: Subpass, memory_allocator: &impl MemoryAllocator, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, ) -> DirectionalLightingSystem { // TODO: vulkano doesn't allow us to draw without a vertex buffer, otherwise we could // hard-code these values in the shader diff --git a/examples/src/bin/deferred/frame/point_lighting_system.rs b/examples/src/bin/deferred/frame/point_lighting_system.rs index 5337f19ca4..5ca7f1f72f 100644 --- a/examples/src/bin/deferred/frame/point_lighting_system.rs +++ b/examples/src/bin/deferred/frame/point_lighting_system.rs @@ -9,7 +9,7 @@ use bytemuck::{Pod, Zeroable}; use cgmath::{Matrix4, Vector3}; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess}, command_buffer::{ @@ -40,8 +40,8 @@ pub struct PointLightingSystem { vertex_buffer: Arc>, subpass: Subpass, pipeline: Arc, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, } impl PointLightingSystem { @@ -50,8 +50,8 @@ impl PointLightingSystem { gfx_queue: Arc, subpass: Subpass, memory_allocator: &impl MemoryAllocator, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, ) -> PointLightingSystem { // TODO: vulkano doesn't allow us to draw without a vertex buffer, otherwise we could // hard-code these values in the shader diff --git a/examples/src/bin/deferred/frame/system.rs b/examples/src/bin/deferred/frame/system.rs index d09811c18f..df864f7a99 100644 --- a/examples/src/bin/deferred/frame/system.rs +++ b/examples/src/bin/deferred/frame/system.rs @@ -13,7 +13,7 @@ use super::{ point_lighting_system::PointLightingSystem, }; use cgmath::{Matrix4, SquareMatrix, Vector3}; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ command_buffer::{ allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage, @@ -40,7 +40,7 @@ pub struct FrameSystem { render_pass: Arc, memory_allocator: Arc, - command_buffer_allocator: Rc, + command_buffer_allocator: Arc, // Intermediate render target that will contain the albedo of each pixel of the scene. diffuse_buffer: Arc>, @@ -74,7 +74,7 @@ impl FrameSystem { gfx_queue: Arc, final_output_format: Format, memory_allocator: Arc, - command_buffer_allocator: Rc, + command_buffer_allocator: Arc, ) -> FrameSystem { // Creating the render pass. // @@ -196,7 +196,7 @@ impl FrameSystem { ) .unwrap(); - let descriptor_set_allocator = Rc::new(StandardDescriptorSetAllocator::new( + let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new( gfx_queue.device().clone(), )); diff --git a/examples/src/bin/deferred/main.rs b/examples/src/bin/deferred/main.rs index 782310857e..97d712c00e 100644 --- a/examples/src/bin/deferred/main.rs +++ b/examples/src/bin/deferred/main.rs @@ -30,7 +30,7 @@ use crate::{ triangle_draw_system::TriangleDrawSystem, }; use cgmath::{Matrix4, SquareMatrix, Vector3}; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ command_buffer::allocator::StandardCommandBufferAllocator, device::{ @@ -166,7 +166,7 @@ fn main() { }; let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); - let command_buffer_allocator = Rc::new(StandardCommandBufferAllocator::new(device.clone())); + let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new(device.clone())); // Here is the basic initialization for the deferred system. let mut frame_system = FrameSystem::new( diff --git a/examples/src/bin/deferred/triangle_draw_system.rs b/examples/src/bin/deferred/triangle_draw_system.rs index 25e53892dd..9a001dc957 100644 --- a/examples/src/bin/deferred/triangle_draw_system.rs +++ b/examples/src/bin/deferred/triangle_draw_system.rs @@ -8,7 +8,7 @@ // according to those terms. use bytemuck::{Pod, Zeroable}; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess}, command_buffer::{ @@ -35,7 +35,7 @@ pub struct TriangleDrawSystem { vertex_buffer: Arc>, subpass: Subpass, pipeline: Arc, - command_buffer_allocator: Rc, + command_buffer_allocator: Arc, } impl TriangleDrawSystem { @@ -44,7 +44,7 @@ impl TriangleDrawSystem { gfx_queue: Arc, subpass: Subpass, memory_allocator: &StandardMemoryAllocator, - command_buffer_allocator: Rc, + command_buffer_allocator: Arc, ) -> TriangleDrawSystem { let vertices = [ Vertex { diff --git a/examples/src/bin/interactive_fractal/app.rs b/examples/src/bin/interactive_fractal/app.rs index c30f230f02..42ab41537d 100644 --- a/examples/src/bin/interactive_fractal/app.rs +++ b/examples/src/bin/interactive_fractal/app.rs @@ -10,8 +10,8 @@ use crate::fractal_compute_pipeline::FractalComputePipeline; use crate::place_over_frame::RenderPassPlaceOverFrame; use cgmath::Vector2; +use std::sync::Arc; use std::time::Instant; -use std::{rc::Rc, sync::Arc}; use vulkano::command_buffer::allocator::StandardCommandBufferAllocator; use vulkano::descriptor_set::allocator::StandardDescriptorSetAllocator; use vulkano::device::Queue; @@ -64,10 +64,10 @@ impl FractalApp { let memory_allocator = Arc::new(StandardMemoryAllocator::new_default( gfx_queue.device().clone(), )); - let command_buffer_allocator = Rc::new(StandardCommandBufferAllocator::new( + let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new( gfx_queue.device().clone(), )); - let descriptor_set_allocator = Rc::new(StandardDescriptorSetAllocator::new( + let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new( gfx_queue.device().clone(), )); diff --git a/examples/src/bin/interactive_fractal/fractal_compute_pipeline.rs b/examples/src/bin/interactive_fractal/fractal_compute_pipeline.rs index 1d4c951938..360ae2a706 100644 --- a/examples/src/bin/interactive_fractal/fractal_compute_pipeline.rs +++ b/examples/src/bin/interactive_fractal/fractal_compute_pipeline.rs @@ -9,7 +9,7 @@ use cgmath::Vector2; use rand::Rng; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ buffer::{BufferUsage, CpuAccessibleBuffer}, command_buffer::{ @@ -31,8 +31,8 @@ pub struct FractalComputePipeline { queue: Arc, pipeline: Arc, memory_allocator: Arc, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, palette: Arc>, palette_size: i32, end_color: [f32; 4], @@ -42,8 +42,8 @@ impl FractalComputePipeline { pub fn new( queue: Arc, memory_allocator: Arc, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, ) -> FractalComputePipeline { // Initial colors let colors = vec![ diff --git a/examples/src/bin/interactive_fractal/pixels_draw_pipeline.rs b/examples/src/bin/interactive_fractal/pixels_draw_pipeline.rs index d3f3701c45..421b8201c9 100644 --- a/examples/src/bin/interactive_fractal/pixels_draw_pipeline.rs +++ b/examples/src/bin/interactive_fractal/pixels_draw_pipeline.rs @@ -8,7 +8,7 @@ // according to those terms. use bytemuck::{Pod, Zeroable}; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess}, command_buffer::{ @@ -72,8 +72,8 @@ pub struct PixelsDrawPipeline { gfx_queue: Arc, subpass: Subpass, pipeline: Arc, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, vertices: Arc>, indices: Arc>, } @@ -83,8 +83,8 @@ impl PixelsDrawPipeline { gfx_queue: Arc, subpass: Subpass, memory_allocator: &impl MemoryAllocator, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, ) -> PixelsDrawPipeline { let (vertices, indices) = textured_quad(2.0, 2.0); let vertex_buffer = CpuAccessibleBuffer::<[TexturedVertex]>::from_iter( diff --git a/examples/src/bin/interactive_fractal/place_over_frame.rs b/examples/src/bin/interactive_fractal/place_over_frame.rs index 7432a91118..f343a3ea2f 100644 --- a/examples/src/bin/interactive_fractal/place_over_frame.rs +++ b/examples/src/bin/interactive_fractal/place_over_frame.rs @@ -8,7 +8,7 @@ // according to those terms. use crate::pixels_draw_pipeline::PixelsDrawPipeline; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ command_buffer::{ allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage, @@ -29,15 +29,15 @@ pub struct RenderPassPlaceOverFrame { gfx_queue: Arc, render_pass: Arc, pixels_draw_pipeline: PixelsDrawPipeline, - command_buffer_allocator: Rc, + command_buffer_allocator: Arc, } impl RenderPassPlaceOverFrame { pub fn new( gfx_queue: Arc, memory_allocator: &impl MemoryAllocator, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, output_format: Format, ) -> RenderPassPlaceOverFrame { let render_pass = vulkano::single_pass_renderpass!(gfx_queue.device().clone(), diff --git a/examples/src/bin/multi_window_game_of_life/app.rs b/examples/src/bin/multi_window_game_of_life/app.rs index 73c9d7e4c2..bc6be6bf41 100644 --- a/examples/src/bin/multi_window_game_of_life/app.rs +++ b/examples/src/bin/multi_window_game_of_life/app.rs @@ -11,7 +11,7 @@ use crate::{ game_of_life::GameOfLifeComputePipeline, render_pass::RenderPassPlaceOverFrame, SCALING, WINDOW2_HEIGHT, WINDOW2_WIDTH, WINDOW_HEIGHT, WINDOW_WIDTH, }; -use std::{collections::HashMap, rc::Rc, sync::Arc}; +use std::{collections::HashMap, sync::Arc}; use vulkano::command_buffer::allocator::StandardCommandBufferAllocator; use vulkano::descriptor_set::allocator::StandardDescriptorSetAllocator; use vulkano::memory::allocator::StandardMemoryAllocator; @@ -33,10 +33,10 @@ impl RenderPipeline { swapchain_format: Format, ) -> RenderPipeline { let memory_allocator = StandardMemoryAllocator::new_default(gfx_queue.device().clone()); - let command_buffer_allocator = Rc::new(StandardCommandBufferAllocator::new( + let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new( gfx_queue.device().clone(), )); - let descriptor_set_allocator = Rc::new(StandardDescriptorSetAllocator::new( + let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new( gfx_queue.device().clone(), )); diff --git a/examples/src/bin/multi_window_game_of_life/game_of_life.rs b/examples/src/bin/multi_window_game_of_life/game_of_life.rs index afcc7cfa2c..71c9cde673 100644 --- a/examples/src/bin/multi_window_game_of_life/game_of_life.rs +++ b/examples/src/bin/multi_window_game_of_life/game_of_life.rs @@ -9,7 +9,7 @@ use cgmath::Vector2; use rand::Rng; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::command_buffer::allocator::StandardCommandBufferAllocator; use vulkano::descriptor_set::allocator::StandardDescriptorSetAllocator; use vulkano::image::{ImageUsage, StorageImage}; @@ -34,8 +34,8 @@ use vulkano_util::renderer::DeviceImageView; pub struct GameOfLifeComputePipeline { compute_queue: Arc, compute_life_pipeline: Arc, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, life_in: Arc>, life_out: Arc>, image: DeviceImageView, @@ -63,8 +63,8 @@ impl GameOfLifeComputePipeline { pub fn new( compute_queue: Arc, memory_allocator: &impl MemoryAllocator, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, size: [u32; 2], ) -> GameOfLifeComputePipeline { let life_in = rand_grid(memory_allocator, size); diff --git a/examples/src/bin/multi_window_game_of_life/pixels_draw.rs b/examples/src/bin/multi_window_game_of_life/pixels_draw.rs index 2a18bfe23a..31c558e583 100644 --- a/examples/src/bin/multi_window_game_of_life/pixels_draw.rs +++ b/examples/src/bin/multi_window_game_of_life/pixels_draw.rs @@ -8,7 +8,7 @@ // according to those terms. use bytemuck::{Pod, Zeroable}; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess}, command_buffer::{ @@ -72,8 +72,8 @@ pub struct PixelsDrawPipeline { gfx_queue: Arc, subpass: Subpass, pipeline: Arc, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, vertices: Arc>, indices: Arc>, } @@ -83,8 +83,8 @@ impl PixelsDrawPipeline { gfx_queue: Arc, subpass: Subpass, memory_allocator: &impl MemoryAllocator, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, ) -> PixelsDrawPipeline { let (vertices, indices) = textured_quad(2.0, 2.0); let vertex_buffer = CpuAccessibleBuffer::<[TexturedVertex]>::from_iter( diff --git a/examples/src/bin/multi_window_game_of_life/render_pass.rs b/examples/src/bin/multi_window_game_of_life/render_pass.rs index 6e634e6c40..f0f53dfaad 100644 --- a/examples/src/bin/multi_window_game_of_life/render_pass.rs +++ b/examples/src/bin/multi_window_game_of_life/render_pass.rs @@ -8,7 +8,7 @@ // according to those terms. use crate::pixels_draw::PixelsDrawPipeline; -use std::{rc::Rc, sync::Arc}; +use std::sync::Arc; use vulkano::{ command_buffer::{ allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage, @@ -29,15 +29,15 @@ pub struct RenderPassPlaceOverFrame { gfx_queue: Arc, render_pass: Arc, pixels_draw_pipeline: PixelsDrawPipeline, - command_buffer_allocator: Rc, + command_buffer_allocator: Arc, } impl RenderPassPlaceOverFrame { pub fn new( gfx_queue: Arc, memory_allocator: &impl MemoryAllocator, - command_buffer_allocator: Rc, - descriptor_set_allocator: Rc, + command_buffer_allocator: Arc, + descriptor_set_allocator: Arc, output_format: Format, ) -> RenderPassPlaceOverFrame { let render_pass = vulkano::single_pass_renderpass!(gfx_queue.device().clone(), diff --git a/vulkano/Cargo.toml b/vulkano/Cargo.toml index 403b0ea1bd..f8f4b4a95c 100644 --- a/vulkano/Cargo.toml +++ b/vulkano/Cargo.toml @@ -25,6 +25,7 @@ libloading = "0.7" nalgebra = { version = "0.31.0", optional = true } parking_lot = { version = "0.12", features = ["send_guard"] } smallvec = "1.8" +thread_local = "1.1" [target.'cfg(target_os = "ios")'.dependencies] objc = "0.2.5" diff --git a/vulkano/src/command_buffer/allocator.rs b/vulkano/src/command_buffer/allocator.rs index b15be52b36..823882c705 100644 --- a/vulkano/src/command_buffer/allocator.rs +++ b/vulkano/src/command_buffer/allocator.rs @@ -30,6 +30,7 @@ use crate::{ use crossbeam_queue::SegQueue; use smallvec::SmallVec; use std::{cell::UnsafeCell, marker::PhantomData, mem::ManuallyDrop, sync::Arc, vec::IntoIter}; +use thread_local::ThreadLocal; /// Types that manage the memory of command buffers. /// @@ -100,9 +101,20 @@ pub unsafe trait CommandBufferAlloc: DeviceOwned + Send + Sync + 'static { /// Standard implementation of a command buffer allocator. /// -/// A thread can have as many `StandardCommandBufferAllocator`s as needed, but they can't be shared -/// between threads. This is done so that there are no locks involved when creating command -/// buffers. You are encouraged to create one allocator per frame in flight per thread. +/// The intended way to use this allocator is to have one that is used globally for the duration of +/// the program, in order to avoid creating and destroying [`CommandPool`]s, as that is expensive. +/// +/// Internally, this allocator keeps one `CommandPool` per queue family index per thread, using +/// Thread-Local Storage. When a thread first allocates, an entry is reserved for it in the TLS. +/// After a thread exits and the allocator wasn't dropped yet, its entry is freed, but the pools +/// it used are not dropped. The next time a new thread allocates for the first time, the entry is +/// reused along with the pools. If all threads drop their reference to the allocator, all entries +/// along with the allocator are dropped, even if the threads didn't exit yet, which is why you +/// should keep the allocator alive for as long as you need to allocate so that the pools can keep +/// being reused. +/// +/// This allocator only needs to lock when a thread first allocates or when a thread that +/// previously allocated exits. In all other cases, allocation is lock-free. /// /// Command buffers can't be moved between threads during the building process, but finished command /// buffers can. When a command buffer is dropped, it is returned back to the pool for reuse. @@ -110,21 +122,28 @@ pub unsafe trait CommandBufferAlloc: DeviceOwned + Send + Sync + 'static { pub struct StandardCommandBufferAllocator { device: Arc, /// Each queue family index points directly to its pool. - pools: SmallVec<[UnsafeCell>>; 8]>, + pools: ThreadLocal>; 8]>>, +} + +#[derive(Debug)] +struct Pool { + inner: Arc, } +// This is needed because of the blanket impl of `Send` on `Arc`, which requires that `T` is +// `Send + Sync`. `PoolInner` is `Send + !Sync` because `CommandPool` is `!Sync`. That's fine +// however because we never access the `CommandPool` concurrently, only drop it once the `Arc` +// containing it is dropped. +unsafe impl Send for Pool {} + impl StandardCommandBufferAllocator { /// Creates a new `StandardCommandBufferAllocator`. #[inline] pub fn new(device: Arc) -> Self { - let pools = device - .physical_device() - .queue_family_properties() - .iter() - .map(|_| UnsafeCell::new(None)) - .collect(); - - StandardCommandBufferAllocator { device, pools } + StandardCommandBufferAllocator { + device, + pools: ThreadLocal::new(), + } } } @@ -135,6 +154,10 @@ unsafe impl CommandBufferAllocator for StandardCommandBufferAllocator { type Alloc = StandardCommandBufferAlloc; + /// Allocates command buffers. + /// + /// Returns an iterator that contains the requested amount of allocated command buffers. + /// /// # Panics /// /// - Panics if the queue family index is not active on the device. @@ -151,12 +174,26 @@ unsafe impl CommandBufferAllocator for StandardCommandBufferAllocator { .active_queue_family_indices() .contains(&queue_family_index)); - let pool = unsafe { &mut *self.pools[queue_family_index as usize].get() }; + let pools = self.pools.get_or(|| { + self.device + .physical_device() + .queue_family_properties() + .iter() + .map(|_| UnsafeCell::new(None)) + .collect() + }); + + let pool = unsafe { &mut *pools[queue_family_index as usize].get() }; if pool.is_none() { - *pool = Some(Pool::new(self.device.clone(), queue_family_index)?); + *pool = Some(Pool { + inner: PoolInner::new(self.device.clone(), queue_family_index)?, + }); } - pool.as_ref().unwrap().allocate(level, command_buffer_count) + pool.as_ref() + .unwrap() + .inner + .allocate(level, command_buffer_count) } } @@ -168,7 +205,7 @@ unsafe impl DeviceOwned for StandardCommandBufferAllocator { } #[derive(Debug)] -struct Pool { +struct PoolInner { // The Vulkan pool specific to a device's queue family. inner: CommandPool, // List of existing primary command buffers that are available for reuse. @@ -177,7 +214,7 @@ struct Pool { secondary_pool: SegQueue, } -impl Pool { +impl PoolInner { fn new(device: Arc, queue_family_index: u32) -> Result, OomError> { CommandPool::new( device, @@ -188,7 +225,7 @@ impl Pool { }, ) .map(|inner| { - Arc::new(Pool { + Arc::new(PoolInner { inner, primary_pool: Default::default(), secondary_pool: Default::default(), @@ -224,7 +261,7 @@ impl Pool { cmd: ManuallyDrop::new(cmd), pool: self.clone(), }, - dummy_avoid_send_sync: PhantomData, + _marker: PhantomData, }); } else { break; @@ -249,7 +286,7 @@ impl Pool { cmd: ManuallyDrop::new(cmd), pool: self.clone(), }, - dummy_avoid_send_sync: PhantomData, + _marker: PhantomData, }); } } @@ -267,7 +304,7 @@ pub struct StandardCommandBufferBuilderAlloc { // Therefore we just share the structs. inner: StandardCommandBufferAlloc, // Unimplemented `Send` and `Sync` from the builder. - dummy_avoid_send_sync: PhantomData<*const u8>, + _marker: PhantomData<*const ()>, } unsafe impl CommandBufferBuilderAlloc for StandardCommandBufferBuilderAlloc { @@ -301,7 +338,7 @@ pub struct StandardCommandBufferAlloc { // The actual command buffer. Extracted in the `Drop` implementation. cmd: ManuallyDrop, // We hold a reference to the command pool for our destructor. - pool: Arc, + pool: Arc, } unsafe impl Send for StandardCommandBufferAlloc {} @@ -340,10 +377,9 @@ impl Drop for StandardCommandBufferAlloc { #[cfg(test)] mod tests { - use super::{ - CommandBufferAllocator, CommandBufferBuilderAlloc, StandardCommandBufferAllocator, - }; - use crate::{command_buffer::CommandBufferLevel, VulkanObject}; + use super::*; + use crate::VulkanObject; + use std::thread; #[test] fn reuse_command_buffers() { @@ -366,4 +402,36 @@ mod tests { .unwrap(); assert_eq!(raw, cb2.inner().handle()); } + + #[test] + fn threads_use_different_pools() { + let (device, queue) = gfx_dev_and_queue!(); + + let allocator = StandardCommandBufferAllocator::new(device); + + let pool1 = allocator + .allocate(queue.queue_family_index(), CommandBufferLevel::Primary, 1) + .unwrap() + .next() + .unwrap() + .into_alloc() + .pool + .inner + .handle(); + + thread::spawn(move || { + let pool2 = allocator + .allocate(queue.queue_family_index(), CommandBufferLevel::Primary, 1) + .unwrap() + .next() + .unwrap() + .into_alloc() + .pool + .inner + .handle(); + assert_ne!(pool1, pool2); + }) + .join() + .unwrap(); + } } diff --git a/vulkano/src/descriptor_set/allocator.rs b/vulkano/src/descriptor_set/allocator.rs index 4039d23a9e..0e6db11e9f 100644 --- a/vulkano/src/descriptor_set/allocator.rs +++ b/vulkano/src/descriptor_set/allocator.rs @@ -30,6 +30,7 @@ use crate::{ }; use ahash::HashMap; use std::{cell::UnsafeCell, sync::Arc}; +use thread_local::ThreadLocal; /// Types that manage the memory of descriptor sets. /// @@ -69,12 +70,26 @@ pub trait DescriptorSetAlloc: Send + Sync { /// Standard implementation of a descriptor set allocator. /// -/// Internally, this implementation uses one [`SingleLayoutDescriptorSetPool`] / -/// [`SingleLayoutVariableDescriptorSetPool`] per descriptor set layout. +/// The intended way to use this allocator is to have one that is used globally for the duration of +/// the program, in order to avoid creating and destroying [`DescriptorPool`]s, as that is +/// expensive. +/// +/// Internally, this allocator uses one [`SingleLayoutDescriptorSetPool`] / +/// [`SingleLayoutVariableDescriptorSetPool`] per descriptor set layout per thread, using +/// Thread-Local Storage. When a thread first allocates, an entry is reserved for it in the TLS. +/// After a thread exits and the allocator wasn't dropped yet, its entry is freed, but the pools +/// it used are not dropped. The next time a new thread allocates for the first time, the entry is +/// reused along with the pools. If all threads drop their reference to the allocator, all entries +/// along with the allocator are dropped, even if the threads didn't exit yet, which is why you +/// should keep the allocator alive for as long as you need to allocate so that the pools can keep +/// being reused. +/// +/// This allocator only needs to lock when a thread first allocates or when a thread that +/// previously allocated exits. In all other cases, allocation is lock-free. #[derive(Debug)] pub struct StandardDescriptorSetAllocator { device: Arc, - pools: UnsafeCell, Pool>>, + pools: ThreadLocal, Pool>>>, } #[derive(Debug)] @@ -89,7 +104,7 @@ impl StandardDescriptorSetAllocator { pub fn new(device: Arc) -> StandardDescriptorSetAllocator { StandardDescriptorSetAllocator { device, - pools: UnsafeCell::new(HashMap::default()), + pools: ThreadLocal::new(), } } } @@ -97,6 +112,14 @@ impl StandardDescriptorSetAllocator { unsafe impl DescriptorSetAllocator for StandardDescriptorSetAllocator { type Alloc = StandardDescriptorSetAlloc; + /// Allocates a descriptor set. + /// + /// # Panics + /// + /// - Panics if the provided `layout` is for push descriptors rather than regular descriptor + /// sets. + /// - Panics if the provided `variable_descriptor_count` is greater than the maximum number of + /// variable count descriptors in the set. #[inline] fn allocate( &self, @@ -119,7 +142,8 @@ unsafe impl DescriptorSetAllocator for StandardDescriptorSetAllocator { max_count, ); - let pools = unsafe { &mut *self.pools.get() }; + let pools = self.pools.get_or(|| UnsafeCell::new(HashMap::default())); + let pools = unsafe { &mut *pools.get() }; // We do this instead of using `HashMap::entry` directly because that would involve cloning // an `Arc` every time. `hash_raw_entry` is still not stabilized >:( @@ -181,3 +205,57 @@ impl DescriptorSetAlloc for StandardDescriptorSetAlloc { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + descriptor_set::layout::{ + DescriptorSetLayoutBinding, DescriptorSetLayoutCreateInfo, DescriptorType, + }, + shader::ShaderStages, + VulkanObject, + }; + use std::thread; + + #[test] + fn threads_use_different_pools() { + let (device, _) = gfx_dev_and_queue!(); + + let layout = DescriptorSetLayout::new( + device.clone(), + DescriptorSetLayoutCreateInfo { + bindings: [( + 0, + DescriptorSetLayoutBinding { + stages: ShaderStages::all_graphics(), + ..DescriptorSetLayoutBinding::descriptor_type(DescriptorType::UniformBuffer) + }, + )] + .into(), + ..Default::default() + }, + ) + .unwrap(); + + let allocator = StandardDescriptorSetAllocator::new(device); + + let pool1 = if let PoolAlloc::Fixed(alloc) = allocator.allocate(&layout, 0).unwrap().inner { + alloc.pool().handle() + } else { + unreachable!() + }; + + thread::spawn(move || { + let pool2 = + if let PoolAlloc::Fixed(alloc) = allocator.allocate(&layout, 0).unwrap().inner { + alloc.pool().handle() + } else { + unreachable!() + }; + assert_ne!(pool1, pool2); + }) + .join() + .unwrap(); + } +} diff --git a/vulkano/src/descriptor_set/single_layout_pool.rs b/vulkano/src/descriptor_set/single_layout_pool.rs index 6396c2c7a4..515ab97b3f 100644 --- a/vulkano/src/descriptor_set/single_layout_pool.rs +++ b/vulkano/src/descriptor_set/single_layout_pool.rs @@ -52,8 +52,9 @@ pub struct SingleLayoutDescriptorSetPool { layout: Arc, } -// This is needed because of the blanket impl on `Arc`, which requires that `T` is `Send + Sync`. -// `SingleLayoutPool` is `Send + !Sync`. +// This is needed because of the blanket impl of `Send` on `Arc`, which requires that `T` is +// `Send + Sync`. `SingleLayoutPool` is `Send + !Sync` because `DescriptorPool` is `!Sync`. That's +// fine however because we never access the `DescriptorPool`. unsafe impl Send for SingleLayoutDescriptorSetPool {} impl SingleLayoutDescriptorSetPool { @@ -191,6 +192,13 @@ pub(crate) struct SingleLayoutPoolAlloc { pool: Arc, } +impl SingleLayoutPoolAlloc { + #[cfg(test)] + pub(crate) fn pool(&self) -> &DescriptorPool { + &self.pool._inner + } +} + // This is required for the same reason as for `SingleLayoutDescriptorSetPool`. unsafe impl Send for SingleLayoutPoolAlloc {} // `DescriptorPool` is `!Sync`, but we never access it, only keep it alive. @@ -278,8 +286,10 @@ pub struct SingleLayoutVariableDescriptorSetPool { allocated_sets: Cell, } -// This is needed because of the blanket impl on `Arc`, which requires that `T` is `Send + Sync`. -// `SingleLayoutVariablePool` is `Send + !Sync`. +// This is needed because of the blanket impl of `Send` on `Arc`, which requires that `T` is +// `Send + Sync`. `SingleLayoutVariablePool` is `Send + !Sync` because `DescriptorPool` is `!Sync`. +// That's fine however because we never access the `DescriptorPool` concurrently, only drop it once +// the `Arc` containing it is dropped. unsafe impl Send for SingleLayoutVariableDescriptorSetPool {} impl SingleLayoutVariableDescriptorSetPool {