From 8c02ef119b8abf8e8240b003f2bfa2f0b2c7920e Mon Sep 17 00:00:00 2001 From: marc0246 <40955683+marc0246@users.noreply.github.com> Date: Sun, 25 Aug 2024 02:13:57 +0200 Subject: [PATCH] Task graph [4/10]: compilation --- Cargo.lock | 11 +- Cargo.toml | 6 +- examples/async-update/Cargo.toml | 1 + examples/async-update/main.rs | 942 +++++---- vulkano-taskgraph/Cargo.toml | 2 +- vulkano-taskgraph/src/graph/compile.rs | 2622 ++++++++++++++++++++++++ vulkano-taskgraph/src/graph/execute.rs | 1249 ++++++----- vulkano-taskgraph/src/graph/mod.rs | 738 +++---- vulkano-taskgraph/src/lib.rs | 833 ++++---- vulkano-taskgraph/src/resource.rs | 1197 ++++------- vulkano/src/sync/pipeline.rs | 44 +- 11 files changed, 5107 insertions(+), 2538 deletions(-) create mode 100644 vulkano-taskgraph/src/graph/compile.rs diff --git a/Cargo.lock b/Cargo.lock index 60d44b8131..1378b3c8cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,6 +114,7 @@ dependencies = [ "rand", "vulkano", "vulkano-shaders", + "vulkano-taskgraph", "winit 0.29.15", ] @@ -402,7 +403,7 @@ dependencies = [ [[package]] name = "concurrent-slotmap" version = "0.1.0" -source = "git+https://github.com/vulkano-rs/concurrent-slotmap?rev=bf52f0a55713bb29dde3e38bc3497b03473d1628#bf52f0a55713bb29dde3e38bc3497b03473d1628" +source = "git+https://github.com/vulkano-rs/concurrent-slotmap?rev=fa906d916d8d126d3cc3a2b4ab9a29fa27bee62d#fa906d916d8d126d3cc3a2b4ab9a29fa27bee62d" dependencies = [ "virtual-buffer", ] @@ -1684,12 +1685,6 @@ dependencies = [ "getrandom", ] -[[package]] -name = "rangemap" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60fcc7d6849342eff22c4350c8b9a989ee8ceabc4b481253e8946b9fe83d684" - [[package]] name = "raw-window-handle" version = "0.4.3" @@ -2408,10 +2403,10 @@ dependencies = [ name = "vulkano-taskgraph" version = "0.34.0" dependencies = [ + "ahash", "ash", "concurrent-slotmap", "parking_lot", - "rangemap", "smallvec", "thread_local", "vulkano", diff --git a/Cargo.toml b/Cargo.toml index 1667baccce..8b466edbee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,10 @@ path = "vulkano-macros" version = "0.34" path = "vulkano-shaders" +[workspace.dependencies.vulkano-taskgraph] +version = "0.34" +path = "vulkano-taskgraph" + [workspace.dependencies.vulkano-util] version = "0.34" path = "vulkano-util" @@ -42,7 +46,7 @@ ahash = "0.8" # https://github.com/KhronosGroup/Vulkan-Headers/commits/main/registry/vk.xml ash = "0.38.0" bytemuck = "1.9" -concurrent-slotmap = { git = "https://github.com/vulkano-rs/concurrent-slotmap", rev = "bf52f0a55713bb29dde3e38bc3497b03473d1628" } +concurrent-slotmap = { git = "https://github.com/vulkano-rs/concurrent-slotmap", rev = "fa906d916d8d126d3cc3a2b4ab9a29fa27bee62d" } core-graphics-types = "0.1" crossbeam-queue = "0.3" half = "2.0" diff --git a/examples/async-update/Cargo.toml b/examples/async-update/Cargo.toml index c7da2e7b46..7b9fb935a2 100644 --- a/examples/async-update/Cargo.toml +++ b/examples/async-update/Cargo.toml @@ -16,4 +16,5 @@ glam = { workspace = true } rand = { workspace = true } vulkano = { workspace = true, features = ["macros"] } vulkano-shaders = { workspace = true } +vulkano-taskgraph = { workspace = true } winit = { workspace = true, default-features = true } diff --git a/examples/async-update/main.rs b/examples/async-update/main.rs index d11636ac45..39875a2615 100644 --- a/examples/async-update/main.rs +++ b/examples/async-update/main.rs @@ -30,21 +30,21 @@ use glam::f32::Mat4; use rand::Rng; use std::{ + alloc::Layout, error::Error, - hint, + slice, sync::{ - atomic::{AtomicBool, AtomicU64, Ordering}, + atomic::{AtomicBool, Ordering}, mpsc, Arc, }, thread, time::{SystemTime, UNIX_EPOCH}, }; use vulkano::{ - buffer::{Buffer, BufferContents, BufferCreateInfo, BufferUsage}, + buffer::{Buffer, BufferContents, BufferCreateInfo, BufferUsage, Subbuffer}, command_buffer::{ - allocator::StandardCommandBufferAllocator, BufferImageCopy, ClearColorImageInfo, - CommandBufferBeginInfo, CommandBufferLevel, CommandBufferUsage, CopyBufferToImageInfo, - RecordingCommandBuffer, RenderPassBeginInfo, + sys::RawRecordingCommandBuffer, BufferImageCopy, ClearColorImageInfo, + CopyBufferToImageInfo, RenderPassBeginInfo, }, descriptor_set::{ allocator::StandardDescriptorSetAllocator, DescriptorSet, WriteDescriptorSet, @@ -60,7 +60,7 @@ use vulkano::{ Image, ImageCreateInfo, ImageType, ImageUsage, }, instance::{Instance, InstanceCreateFlags, InstanceCreateInfo}, - memory::allocator::{AllocationCreateInfo, MemoryTypeFilter, StandardMemoryAllocator}, + memory::allocator::{AllocationCreateInfo, DeviceLayout, MemoryTypeFilter}, pipeline::{ graphics::{ color_blend::{ColorBlendAttachmentState, ColorBlendState}, @@ -76,11 +76,14 @@ use vulkano::{ PipelineShaderStageCreateInfo, }, render_pass::{Framebuffer, FramebufferCreateInfo, RenderPass, Subpass}, - swapchain::{ - acquire_next_image, Surface, Swapchain, SwapchainCreateInfo, SwapchainPresentInfo, - }, - sync::{self, GpuFuture}, - Validated, VulkanError, VulkanLibrary, + swapchain::{Surface, Swapchain, SwapchainCreateInfo}, + sync::Sharing, + DeviceSize, Validated, VulkanError, VulkanLibrary, +}; +use vulkano_taskgraph::{ + graph::{CompileInfo, ExecuteError, TaskGraph}, + resource::{AccessType, Flight, HostAccessType, ImageLayoutType, Resources}, + resource_map, Id, QueueFamilyType, Task, TaskContext, TaskResult, }; use winit::{ event::{ElementState, Event, KeyEvent, WindowEvent}, @@ -90,6 +93,7 @@ use winit::{ }; const TRANSFER_GRANULARITY: u32 = 4096; +const MAX_FRAMES_IN_FLIGHT: u32 = 2; fn main() -> Result<(), impl Error> { let event_loop = EventLoop::new().unwrap(); @@ -190,6 +194,16 @@ fn main() -> Result<(), impl Error> { queue_family_index: transfer_family_index, ..Default::default() }); + } else { + let queue_family_properties = + &physical_device.queue_family_properties()[graphics_family_index as usize]; + + // Even if we can't get an async transfer queue family, it's still better to use + // different queues on the same queue family. This way, at least the threads on the + // host don't have lock the same queue when submitting. + if queue_family_properties.queue_count > 1 { + queue_create_infos[0].queues.push(0.5); + } } Device::new( @@ -213,45 +227,42 @@ fn main() -> Result<(), impl Error> { {transfer_family_index} for transfers", ); - let (mut swapchain, images) = { + let resources = Resources::new(device.clone(), Default::default()); + + let graphics_flight_id = resources.create_flight(MAX_FRAMES_IN_FLIGHT).unwrap(); + let transfer_flight_id = resources.create_flight(1).unwrap(); + + let swapchain_format = device + .physical_device() + .surface_formats(&surface, Default::default()) + .unwrap()[0] + .0; + let mut swapchain_id = { let surface_capabilities = device .physical_device() .surface_capabilities(&surface, Default::default()) .unwrap(); - let image_format = device - .physical_device() - .surface_formats(&surface, Default::default()) - .unwrap()[0] - .0; - Swapchain::new( - device.clone(), - surface, - SwapchainCreateInfo { - min_image_count: surface_capabilities.min_image_count.max(2), - image_format, - image_extent: window.inner_size().into(), - image_usage: ImageUsage::COLOR_ATTACHMENT, - composite_alpha: surface_capabilities - .supported_composite_alpha - .into_iter() - .next() - .unwrap(), - ..Default::default() - }, - ) - .unwrap() + resources + .create_swapchain( + graphics_flight_id, + surface, + SwapchainCreateInfo { + min_image_count: surface_capabilities.min_image_count.max(3), + image_format: swapchain_format, + image_extent: window.inner_size().into(), + image_usage: ImageUsage::COLOR_ATTACHMENT, + composite_alpha: surface_capabilities + .supported_composite_alpha + .into_iter() + .next() + .unwrap(), + ..Default::default() + }, + ) + .unwrap() }; - let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); - - #[derive(BufferContents, Vertex)] - #[repr(C)] - struct MyVertex { - #[format(R32G32_SFLOAT)] - position: [f32; 2], - } - let vertices = [ MyVertex { position: [-0.5, -0.5], @@ -266,27 +277,26 @@ fn main() -> Result<(), impl Error> { position: [0.5, 0.5], }, ]; - let vertex_buffer = Buffer::from_iter( - memory_allocator.clone(), - BufferCreateInfo { - usage: BufferUsage::VERTEX_BUFFER, - ..Default::default() - }, - AllocationCreateInfo { - memory_type_filter: MemoryTypeFilter::PREFER_DEVICE - | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, - ..Default::default() - }, - vertices, - ) - .unwrap(); + let vertex_buffer_id = resources + .create_buffer( + BufferCreateInfo { + usage: BufferUsage::VERTEX_BUFFER, + ..Default::default() + }, + AllocationCreateInfo { + memory_type_filter: MemoryTypeFilter::PREFER_DEVICE + | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, + ..Default::default() + }, + DeviceLayout::from_layout(Layout::for_value(&vertices)).unwrap(), + ) + .unwrap(); // Create a pool of uniform buffers, one per frame in flight. This way we always have an // available buffer to write during each frame while reusing them as much as possible. - let uniform_buffers = (0..swapchain.image_count()) - .map(|_| { - Buffer::new_sized( - memory_allocator.clone(), + let uniform_buffer_ids = [(); MAX_FRAMES_IN_FLIGHT as usize].map(|_| { + resources + .create_buffer( BufferCreateInfo { usage: BufferUsage::UNIFORM_BUFFER, ..Default::default() @@ -296,121 +306,93 @@ fn main() -> Result<(), impl Error> { | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, ..Default::default() }, + DeviceLayout::from_layout(Layout::new::()).unwrap(), ) .unwrap() - }) - .collect::>(); + }); + + let texture_create_info = ImageCreateInfo { + image_type: ImageType::Dim2d, + format: Format::R8G8B8A8_UNORM, + extent: [TRANSFER_GRANULARITY * 2, TRANSFER_GRANULARITY * 2, 1], + usage: ImageUsage::TRANSFER_DST | ImageUsage::SAMPLED, + sharing: if graphics_family_index != transfer_family_index { + Sharing::Concurrent( + [graphics_family_index, transfer_family_index] + .into_iter() + .collect(), + ) + } else { + Sharing::Exclusive + }, + ..Default::default() + }; // Create two textures, where at any point in time one is used exclusively for reading and one // is used exclusively for writing, swapping the two after each update. - let textures = [(); 2].map(|_| { - Image::new( - memory_allocator.clone(), - ImageCreateInfo { - image_type: ImageType::Dim2d, - format: Format::R8G8B8A8_UNORM, - extent: [TRANSFER_GRANULARITY * 2, TRANSFER_GRANULARITY * 2, 1], - usage: ImageUsage::TRANSFER_DST | ImageUsage::SAMPLED, - ..Default::default() - }, - AllocationCreateInfo::default(), - ) - .unwrap() + let texture_ids = [(); 2].map(|_| { + resources + .create_image(texture_create_info.clone(), AllocationCreateInfo::default()) + .unwrap() }); // The index of the currently most up-to-date texture. The worker thread swaps the index after // every finished write, which is always done to the, at that point in time, unused texture. let current_texture_index = Arc::new(AtomicBool::new(false)); - // Current generation, used to notify the worker thread of when a texture is no longer read. - let current_generation = Arc::new(AtomicU64::new(0)); - - let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new( - device.clone(), - Default::default(), - )); + // Initialize the resources. + unsafe { + vulkano_taskgraph::execute( + graphics_queue.clone(), + resources.clone(), + graphics_flight_id, + |cbf, tcx| { + tcx.write_buffer::<[MyVertex]>(vertex_buffer_id, ..)? + .copy_from_slice(&vertices); + + for &texture_id in &texture_ids { + let texture = tcx.image(texture_id)?.image(); + cbf.clear_color_image(&ClearColorImageInfo::image(texture.clone()))?; + } - // Initialize the textures. - { - let mut builder = RecordingCommandBuffer::new( - command_buffer_allocator.clone(), - graphics_queue.queue_family_index(), - CommandBufferLevel::Primary, - CommandBufferBeginInfo { - usage: CommandBufferUsage::OneTimeSubmit, - ..Default::default() + Ok(()) }, + [(vertex_buffer_id, HostAccessType::Write)], + [], + [ + ( + texture_ids[0], + AccessType::ClearTransferWrite, + ImageLayoutType::Optimal, + ), + ( + texture_ids[1], + AccessType::ClearTransferWrite, + ImageLayoutType::Optimal, + ), + ], ) - .unwrap(); - for texture in &textures { - builder - .clear_color_image(ClearColorImageInfo::image(texture.clone())) - .unwrap(); - } - let command_buffer = builder.end().unwrap(); - - // This waits for the queue to become idle, which is fine for startup initializations. - let _ = command_buffer.execute(graphics_queue.clone()).unwrap(); } + .unwrap(); // Start the worker thread. let (channel, receiver) = mpsc::channel(); run_worker( receiver, transfer_queue, - textures.clone(), + resources.clone(), + graphics_flight_id, + transfer_flight_id, + &texture_create_info, + texture_ids, current_texture_index.clone(), - current_generation.clone(), - swapchain.image_count(), - memory_allocator, - command_buffer_allocator.clone(), ); - mod vs { - vulkano_shaders::shader! { - ty: "vertex", - src: r" - #version 450 - - layout(location = 0) in vec2 position; - layout(location = 0) out vec2 tex_coords; - - layout(set = 0, binding = 0) uniform Data { - mat4 transform; - }; - - void main() { - gl_Position = vec4(transform * vec4(position, 0.0, 1.0)); - tex_coords = position + vec2(0.5); - } - ", - } - } - - mod fs { - vulkano_shaders::shader! { - ty: "fragment", - src: r" - #version 450 - - layout(location = 0) in vec2 tex_coords; - layout(location = 0) out vec4 f_color; - - layout(set = 1, binding = 0) uniform sampler s; - layout(set = 1, binding = 1) uniform texture2D tex; - - void main() { - f_color = texture(sampler2D(tex, s), tex_coords); - } - ", - } - } - let render_pass = vulkano::single_pass_renderpass!( device.clone(), attachments: { color: { - format: swapchain.image_format(), + format: swapchain_format, samples: 1, load_op: Clear, store_op: Store, @@ -476,7 +458,8 @@ fn main() -> Result<(), impl Error> { extent: [0.0, 0.0], depth_range: 0.0..=1.0, }; - let mut framebuffers = window_size_dependent_setup(&images, render_pass.clone(), &mut viewport); + let framebuffers = + window_size_dependent_setup(&resources, swapchain_id, &render_pass, &mut viewport); let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new( device.clone(), @@ -485,36 +468,91 @@ fn main() -> Result<(), impl Error> { // A byproduct of always using the same set of uniform buffers is that we can also create one // descriptor set for each, reusing them in the same way as the buffers. - let uniform_buffer_sets = uniform_buffers - .iter() - .map(|buffer| { - DescriptorSet::new( - descriptor_set_allocator.clone(), - pipeline.layout().set_layouts()[0].clone(), - [WriteDescriptorSet::buffer(0, buffer.clone())], - [], - ) - .unwrap() - }) - .collect::>(); + let uniform_buffer_sets = uniform_buffer_ids.map(|buffer_id| { + let buffer_state = resources.buffer(buffer_id).unwrap(); + let buffer = buffer_state.buffer(); + + DescriptorSet::new( + descriptor_set_allocator.clone(), + pipeline.layout().set_layouts()[0].clone(), + [WriteDescriptorSet::buffer(0, buffer.clone().into())], + [], + ) + .unwrap() + }); // Create the descriptor sets for sampling the textures. let sampler = Sampler::new(device.clone(), SamplerCreateInfo::simple_repeat_linear()).unwrap(); - let sampler_sets = textures.map(|texture| { + let sampler_sets = texture_ids.map(|texture_id| { + let texture_state = resources.image(texture_id).unwrap(); + let texture = texture_state.image(); + DescriptorSet::new( descriptor_set_allocator.clone(), pipeline.layout().set_layouts()[1].clone(), [ WriteDescriptorSet::sampler(0, sampler.clone()), - WriteDescriptorSet::image_view(1, ImageView::new_default(texture).unwrap()), + WriteDescriptorSet::image_view(1, ImageView::new_default(texture.clone()).unwrap()), ], [], ) .unwrap() }); + let mut rcx = RenderContext { + viewport, + framebuffers, + }; + + let mut task_graph = TaskGraph::new(resources.clone(), 1, 4); + + let virtual_swapchain_id = task_graph.add_swapchain(&SwapchainCreateInfo::default()); + let virtual_texture_id = task_graph.add_image(&texture_create_info); + let virtual_uniform_buffer_id = task_graph.add_buffer(&BufferCreateInfo::default()); + + task_graph.add_host_buffer_access(virtual_uniform_buffer_id, HostAccessType::Write); + + task_graph + .create_task_node( + "Render", + QueueFamilyType::Graphics, + RenderTask { + swapchain_id: virtual_swapchain_id, + vertex_buffer_id, + current_texture_index: current_texture_index.clone(), + pipeline: pipeline.clone(), + uniform_buffer_id: virtual_uniform_buffer_id, + uniform_buffer_sets: uniform_buffer_sets.clone(), + sampler_sets: sampler_sets.clone(), + }, + ) + .image_access( + virtual_swapchain_id.current_image_id(), + AccessType::ColorAttachmentWrite, + ImageLayoutType::Optimal, + ) + .buffer_access(vertex_buffer_id, AccessType::VertexAttributeRead) + .image_access( + virtual_texture_id, + AccessType::FragmentShaderSampledRead, + ImageLayoutType::Optimal, + ) + .buffer_access( + virtual_uniform_buffer_id, + AccessType::VertexShaderUniformRead, + ); + + let task_graph = unsafe { + task_graph.compile(CompileInfo { + queues: vec![graphics_queue.clone()], + present_queue: Some(graphics_queue.clone()), + flight_id: graphics_flight_id, + ..Default::default() + }) + } + .unwrap(); + let mut recreate_swapchain = false; - let mut previous_frame_end = Some(sync::now(device.clone()).boxed()); println!("\nPress space to update part of the texture"); @@ -559,178 +597,222 @@ fn main() -> Result<(), impl Error> { return; } + let flight = resources.flight(graphics_flight_id).unwrap(); + if recreate_swapchain { - let (new_swapchain, new_images) = swapchain - .recreate(SwapchainCreateInfo { + swapchain_id = resources + .recreate_swapchain(swapchain_id, |create_info| SwapchainCreateInfo { image_extent, - ..swapchain.create_info() + ..create_info }) .expect("failed to recreate swapchain"); - swapchain = new_swapchain; - framebuffers = window_size_dependent_setup( - &new_images, - render_pass.clone(), - &mut viewport, + flight.destroy_objects(rcx.framebuffers.drain(..)); + + rcx.framebuffers = window_size_dependent_setup( + &resources, + swapchain_id, + &render_pass, + &mut rcx.viewport, ); + recreate_swapchain = false; } - let (image_index, suboptimal, acquire_future) = - match acquire_next_image(swapchain.clone(), None).map_err(Validated::unwrap) { - Ok(r) => r, - Err(VulkanError::OutOfDate) => { - recreate_swapchain = true; - return; - } - Err(e) => panic!("failed to acquire next image: {e}"), - }; - - if suboptimal { - recreate_swapchain = true; - } + let frame_index = flight.current_frame_index(); + let texture_index = current_texture_index.load(Ordering::Relaxed); - let mut builder = RecordingCommandBuffer::new( - command_buffer_allocator.clone(), - graphics_queue.queue_family_index(), - CommandBufferLevel::Primary, - CommandBufferBeginInfo { - usage: CommandBufferUsage::OneTimeSubmit, - ..Default::default() - }, + let resource_map = resource_map!( + &task_graph, + virtual_swapchain_id => swapchain_id, + virtual_texture_id => texture_ids[texture_index as usize], + virtual_uniform_buffer_id => uniform_buffer_ids[frame_index as usize], ) .unwrap(); - builder - .begin_render_pass( - RenderPassBeginInfo { - clear_values: vec![Some([0.0, 0.0, 0.0, 1.0].into())], - ..RenderPassBeginInfo::framebuffer( - framebuffers[image_index as usize].clone(), - ) - }, - Default::default(), - ) - .unwrap() - .set_viewport(0, [viewport.clone()].into_iter().collect()) - .unwrap() - .bind_pipeline_graphics(pipeline.clone()) - .unwrap() - .bind_descriptor_sets( - PipelineBindPoint::Graphics, - pipeline.layout().clone(), - 0, - ( - // Bind the uniform buffer designated for this frame. - uniform_buffer_sets[image_index as usize].clone(), - // Bind the currently most up-to-date texture. - sampler_sets[current_texture_index.load(Ordering::Acquire) as usize] - .clone(), - ), - ) - .unwrap() - .bind_vertex_buffers(0, vertex_buffer.clone()) - .unwrap(); - - unsafe { - builder.draw(vertex_buffer.len() as u32, 1, 0, 0).unwrap(); - } - - builder.end_render_pass(Default::default()).unwrap(); - - let command_buffer = builder.end().unwrap(); - acquire_future.wait(None).unwrap(); - previous_frame_end.as_mut().unwrap().cleanup_finished(); - - // Write to the uniform buffer designated for this frame. This must happen after - // waiting for the acquire future and cleaning up, otherwise the buffer is still - // going to be marked as in use by the device. - *uniform_buffers[image_index as usize].write().unwrap() = vs::Data { - transform: { - const DURATION: f64 = 5.0; - - let elapsed = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs_f64(); - let remainder = elapsed.rem_euclid(DURATION); - let delta = (remainder / DURATION) as f32; - let angle = delta * std::f32::consts::PI * 2.0; - - Mat4::from_rotation_z(angle).to_cols_array_2d() - }, - }; + flight.wait(None).unwrap(); - // Increment the generation, signalling that the previous frame has finished. This - // must be done after waiting on the acquire future, otherwise the oldest frame - // would still be in flight. - // - // NOTE: We are relying on the fact that this thread is the only one doing stores. - current_generation.fetch_add(1, Ordering::Release); - - let future = previous_frame_end - .take() - .unwrap() - .join(acquire_future) - .then_execute(graphics_queue.clone(), command_buffer) - .unwrap() - .then_swapchain_present( - graphics_queue.clone(), - SwapchainPresentInfo::swapchain_image_index(swapchain.clone(), image_index), - ) - .then_signal_fence_and_flush(); - - match future.map_err(Validated::unwrap) { - Ok(future) => { - previous_frame_end = Some(future.boxed()); - } - Err(VulkanError::OutOfDate) => { + match unsafe { + task_graph.execute(resource_map, &rcx, || window.pre_present_notify()) + } { + Ok(()) => {} + Err(ExecuteError::Swapchain { + error: Validated::Error(VulkanError::OutOfDate), + .. + }) => { recreate_swapchain = true; - previous_frame_end = Some(sync::now(device.clone()).boxed()); } Err(e) => { - println!("failed to flush future: {e}"); - // previous_frame_end = Some(sync::now(device.clone()).boxed()); + panic!("failed to execute next frame: {e:?}"); } } } - Event::AboutToWait => window.request_redraw(), + Event::AboutToWait => { + window.request_redraw(); + } + Event::LoopExiting => { + let flight = resources.flight(graphics_flight_id).unwrap(); + flight.destroy_object(pipeline.clone()); + flight.destroy_objects(rcx.framebuffers.drain(..)); + flight.destroy_objects(uniform_buffer_sets.clone()); + flight.destroy_objects(sampler_sets.clone()); + } _ => (), } }) } +#[derive(Clone, Copy, BufferContents, Vertex)] +#[repr(C)] +struct MyVertex { + #[format(R32G32_SFLOAT)] + position: [f32; 2], +} + +mod vs { + vulkano_shaders::shader! { + ty: "vertex", + src: r" + #version 450 + + layout(location = 0) in vec2 position; + layout(location = 0) out vec2 tex_coords; + + layout(set = 0, binding = 0) uniform Data { + mat4 transform; + }; + + void main() { + gl_Position = vec4(transform * vec4(position, 0.0, 1.0)); + tex_coords = position + vec2(0.5); + } + ", + } +} + +mod fs { + vulkano_shaders::shader! { + ty: "fragment", + src: r" + #version 450 + + layout(location = 0) in vec2 tex_coords; + layout(location = 0) out vec4 f_color; + + layout(set = 1, binding = 0) uniform sampler s; + layout(set = 1, binding = 1) uniform texture2D tex; + + void main() { + f_color = texture(sampler2D(tex, s), tex_coords); + } + ", + } +} + +struct RenderContext { + viewport: Viewport, + framebuffers: Vec>, +} + +struct RenderTask { + swapchain_id: Id, + vertex_buffer_id: Id, + current_texture_index: Arc, + pipeline: Arc, + uniform_buffer_id: Id, + uniform_buffer_sets: [Arc; MAX_FRAMES_IN_FLIGHT as usize], + sampler_sets: [Arc; 2], +} + +impl Task for RenderTask { + type World = RenderContext; + + unsafe fn execute( + &self, + cbf: &mut RawRecordingCommandBuffer, + tcx: &mut TaskContext<'_>, + rcx: &Self::World, + ) -> TaskResult { + let frame_index = tcx.current_frame_index(); + let swapchain_state = tcx.swapchain(self.swapchain_id)?; + let image_index = swapchain_state.current_image_index().unwrap(); + let vertex_buffer = Subbuffer::from(tcx.buffer(self.vertex_buffer_id)?.buffer().clone()); + + // Write to the uniform buffer designated for this frame. + *tcx.write_buffer(self.uniform_buffer_id, ..)? = vs::Data { + transform: { + const DURATION: f64 = 5.0; + + let elapsed = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs_f64(); + let remainder = elapsed.rem_euclid(DURATION); + let delta = (remainder / DURATION) as f32; + let angle = delta * std::f32::consts::PI * 2.0; + + Mat4::from_rotation_z(angle).to_cols_array_2d() + }, + }; + + cbf.begin_render_pass( + &RenderPassBeginInfo { + clear_values: vec![Some([0.0, 0.0, 0.0, 1.0].into())], + ..RenderPassBeginInfo::framebuffer(rcx.framebuffers[image_index as usize].clone()) + }, + &Default::default(), + )? + .set_viewport(0, slice::from_ref(&rcx.viewport))? + .bind_pipeline_graphics(&self.pipeline)? + .bind_descriptor_sets( + PipelineBindPoint::Graphics, + self.pipeline.layout(), + 0, + &[ + // Bind the uniform buffer designated for this frame. + self.uniform_buffer_sets[frame_index as usize] + .clone() + .into(), + // Bind the currently most up-to-date texture. + self.sampler_sets[self.current_texture_index.load(Ordering::Relaxed) as usize] + .clone() + .into(), + ], + )? + .bind_vertex_buffers(0, slice::from_ref(&vertex_buffer))?; + + let vertex_count = vertex_buffer.reinterpret_ref::<[MyVertex]>().len(); + unsafe { cbf.draw(vertex_count as u32, 1, 0, 0) }?; + + cbf.end_render_pass(&Default::default())?; + + Ok(()) + } +} + #[allow(clippy::too_many_arguments)] fn run_worker( channel: mpsc::Receiver<()>, transfer_queue: Arc, - textures: [Arc; 2], + resources: Arc, + graphics_flight_id: Id, + transfer_flight_id: Id, + texture_create_info: &ImageCreateInfo, + texture_ids: [Id; 2], current_texture_index: Arc, - current_generation: Arc, - swapchain_image_count: u32, - memory_allocator: Arc, - command_buffer_allocator: Arc, ) { - thread::spawn(move || { - const CORNER_OFFSETS: [[u32; 3]; 4] = [ - [0, 0, 0], - [TRANSFER_GRANULARITY, 0, 0], - [TRANSFER_GRANULARITY, TRANSFER_GRANULARITY, 0], - [0, TRANSFER_GRANULARITY, 0], - ]; - - // We are going to be updating one of 4 corners of the texture at any point in time. For - // that, we will use a staging buffer and initiate a copy. However, since our texture is - // eventually consistent and there are 2 replicas, that means that every time we update one - // of the replicas the other replica is going to be behind by one update. Therefore we - // actually need 2 staging buffers as well: one for the update that happened to the - // currently up-to-date texture (at `current_index`) and one for the update that is about - // to happen to the currently out-of-date texture (at `!current_index`), so that we can - // apply both the current and the upcoming update to the out-of-date texture. Then the - // out-of-date texture is the current up-to-date texture and vice-versa, cycle repeating. - let staging_buffers = [(); 2].map(|_| { - Buffer::from_iter( - memory_allocator.clone(), + // We are going to be updating one of 4 corners of the texture at any point in time. For that, + // we will use a staging buffer and initiate a copy. However, since our texture is eventually + // consistent and there are 2 replicas, that means that every time we update one of the + // replicas the other replica is going to be behind by one update. Therefore we actually need 2 + // staging buffers as well: one for the update that happened to the currently up-to-date + // texture (at `current_index`) and one for the update that is about to happen to the currently + // out-of-date texture (at `!current_index`), so that we can apply both the current and the + // upcoming update to the out-of-date texture. Then the out-of-date texture is the current + // up-to-date texture and vice-versa, cycle repeating. + let staging_buffer_ids = [(); 2].map(|_| { + resources + .create_buffer( BufferCreateInfo { usage: BufferUsage::TRANSFER_SRC, ..Default::default() @@ -740,77 +822,62 @@ fn run_worker( | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, ..Default::default() }, - (0..TRANSFER_GRANULARITY * TRANSFER_GRANULARITY).map(|_| [0u8; 4]), + DeviceLayout::from_size_alignment( + TRANSFER_GRANULARITY as DeviceSize * TRANSFER_GRANULARITY as DeviceSize * 4, + 1, + ) + .unwrap(), ) .unwrap() - }); + }); + + let mut task_graph = TaskGraph::new(resources.clone(), 1, 3); + + let virtual_front_staging_buffer_id = task_graph.add_buffer(&BufferCreateInfo::default()); + let virtual_back_staging_buffer_id = task_graph.add_buffer(&BufferCreateInfo::default()); + let virtual_texture_id = task_graph.add_image(texture_create_info); + task_graph.add_host_buffer_access(virtual_front_staging_buffer_id, HostAccessType::Write); + + task_graph + .create_task_node( + "Image Upload", + QueueFamilyType::Transfer, + UploadTask { + front_staging_buffer_id: virtual_front_staging_buffer_id, + back_staging_buffer_id: virtual_back_staging_buffer_id, + texture_id: virtual_texture_id, + }, + ) + .buffer_access( + virtual_front_staging_buffer_id, + AccessType::CopyTransferRead, + ) + .buffer_access(virtual_back_staging_buffer_id, AccessType::CopyTransferRead) + .image_access( + virtual_texture_id, + AccessType::CopyTransferWrite, + ImageLayoutType::Optimal, + ); + + let task_graph = unsafe { + task_graph.compile(CompileInfo { + queues: vec![transfer_queue], + flight_id: transfer_flight_id, + ..Default::default() + }) + } + .unwrap(); + + thread::spawn(move || { let mut current_corner = 0; - let mut rng = rand::thread_rng(); - let mut last_generation = 0; + let mut last_frame = 0; // The worker thread is awakened by sending a signal through the channel. In a real program // you would likely send some actual data over the channel, instructing the worker what to // do, but our work is hard-coded. while let Ok(()) = channel.recv() { - let current_index = current_texture_index.load(Ordering::Acquire); - - // We simulate some work for the worker to indulge in. In a real program this would - // likely be some kind of I/O, for example reading from disk (think loading the next - // level in a level-based game, loading the next chunk of terrain in an open-world - // game, etc.) or downloading images or other data from the internet. - // - // NOTE: The size of these textures is exceedingly large on purpose, so that you can - // feel that the update is in fact asynchronous due to the latency of the updates while - // the rendering continues without any. - let color = [rng.gen(), rng.gen(), rng.gen(), u8::MAX]; - for texel in &mut *staging_buffers[!current_index as usize].write().unwrap() { - *texel = color; - } - - // Write to the texture that's currently not in use for rendering. - let texture = textures[!current_index as usize].clone(); - - let mut builder = RecordingCommandBuffer::new( - command_buffer_allocator.clone(), - transfer_queue.queue_family_index(), - CommandBufferLevel::Primary, - CommandBufferBeginInfo { - usage: CommandBufferUsage::OneTimeSubmit, - ..Default::default() - }, - ) - .unwrap(); - builder - .copy_buffer_to_image(CopyBufferToImageInfo { - regions: [BufferImageCopy { - image_subresource: texture.subresource_layers(), - image_offset: CORNER_OFFSETS[current_corner % 4], - image_extent: [TRANSFER_GRANULARITY, TRANSFER_GRANULARITY, 1], - ..Default::default() - }] - .into(), - ..CopyBufferToImageInfo::buffer_image( - staging_buffers[current_index as usize].clone(), - texture.clone(), - ) - }) - .unwrap() - .copy_buffer_to_image(CopyBufferToImageInfo { - regions: [BufferImageCopy { - image_subresource: texture.subresource_layers(), - image_offset: CORNER_OFFSETS[(current_corner + 1) % 4], - image_extent: [TRANSFER_GRANULARITY, TRANSFER_GRANULARITY, 1], - ..Default::default() - }] - .into(), - ..CopyBufferToImageInfo::buffer_image( - staging_buffers[!current_index as usize].clone(), - texture, - ) - }) - .unwrap(); - let command_buffer = builder.end().unwrap(); + let graphics_flight = resources.flight(graphics_flight_id).unwrap(); // We swap the texture index to use after a write, but there is no guarantee that other // tasks have actually moved on to using the new texture. What could happen then, if @@ -822,56 +889,135 @@ fn run_worker( // 3. Task B writes texture 0, swapping the index // 4. Task A stops reading texture 0 // - // This is known as the A/B/A problem. In this case it results in a race condition, - // since task A (rendering, in our case) is still reading texture 0 while task B (our - // worker) has already started writing the very same texture. - // - // The most common way to solve this issue is using *generations*, also known as - // *epochs*. A generation is simply a monotonically increasing integer. What exactly - // one generation represents depends on the application. In our case, one generation - // passed represents one frame that finished rendering. Knowing this, we can keep track - // of the generation at the time of swapping the texture index, and ensure that any - // further write only happens after a generation was reached which makes it impossible - // for any readers to be stuck on the old index. Here we are simply spinning. + // This is known as the A/B/A problem. In this case it results in a data race, since + // task A (rendering, in our case) is still reading texture 0 while task B (our worker) + // has already started writing the very same texture. // - // NOTE: You could also use the thread for other things in the meantime. Since frames - // are typically very short though, it would make no sense to do that in this case. - while current_generation.load(Ordering::Acquire) - last_generation - < swapchain_image_count as u64 - { - hint::spin_loop(); - } + // To solve this issue, we keep track of the frame counter before swapping the texture + // index and ensure that any further write only happens after a frame was reached which + // makes it impossible for any readers to be stuck on the old index, by waiting on the + // frame to finish on the rendering thread. + graphics_flight.wait_for_frame(last_frame, None).unwrap(); + + let current_index = current_texture_index.load(Ordering::Relaxed); + + let resource_map = resource_map!( + &task_graph, + virtual_front_staging_buffer_id => staging_buffer_ids[current_index as usize], + virtual_back_staging_buffer_id => staging_buffer_ids[!current_index as usize], + // Write to the texture that's currently not in use for rendering. + virtual_texture_id => texture_ids[!current_index as usize], + ) + .unwrap(); - // Execute the transfer, blocking the thread until it finishes. - // - // NOTE: You could also use the thread for other things in the meantime. - command_buffer - .execute(transfer_queue.clone()) - .unwrap() - .then_signal_fence_and_flush() + unsafe { task_graph.execute(resource_map, ¤t_corner, || {}) }.unwrap(); + + // Block the thread until the transfer finishes. + resources + .flight(transfer_flight_id) .unwrap() .wait(None) .unwrap(); - // Remember the latest generation. - last_generation = current_generation.load(Ordering::Acquire); + last_frame = graphics_flight.current_frame(); // Swap the texture used for rendering to the newly updated one. // // NOTE: We are relying on the fact that this thread is the only one doing stores. - current_texture_index.store(!current_index, Ordering::Release); + current_texture_index.store(!current_index, Ordering::Relaxed); current_corner += 1; } }); } +struct UploadTask { + front_staging_buffer_id: Id, + back_staging_buffer_id: Id, + texture_id: Id, +} + +impl Task for UploadTask { + type World = usize; + + unsafe fn execute( + &self, + cbf: &mut RawRecordingCommandBuffer, + tcx: &mut TaskContext<'_>, + ¤t_corner: &Self::World, + ) -> TaskResult { + const CORNER_OFFSETS: [[u32; 3]; 4] = [ + [0, 0, 0], + [TRANSFER_GRANULARITY, 0, 0], + [TRANSFER_GRANULARITY, TRANSFER_GRANULARITY, 0], + [0, TRANSFER_GRANULARITY, 0], + ]; + + let mut rng = rand::thread_rng(); + + // We simulate some work for the worker to indulge in. In a real program this would likely + // be some kind of I/O, for example reading from disk (think loading the next level in a + // level-based game, loading the next chunk of terrain in an open-world game, etc.) or + // downloading images or other data from the internet. + // + // NOTE: The size of these textures is exceedingly large on purpose, so that you can feel + // that the update is in fact asynchronous due to the latency of the updates while the + // rendering continues without any. + let color = [rng.gen(), rng.gen(), rng.gen(), u8::MAX]; + tcx.write_buffer::<[_]>(self.front_staging_buffer_id, ..)? + .fill(color); + + let texture = tcx.image(self.texture_id)?.image(); + + cbf.copy_buffer_to_image(&CopyBufferToImageInfo { + regions: [BufferImageCopy { + image_subresource: texture.subresource_layers(), + image_offset: CORNER_OFFSETS[current_corner % 4], + image_extent: [TRANSFER_GRANULARITY, TRANSFER_GRANULARITY, 1], + ..Default::default() + }] + .into(), + ..CopyBufferToImageInfo::buffer_image( + tcx.buffer(self.front_staging_buffer_id)? + .buffer() + .clone() + .into(), + texture.clone(), + ) + })?; + + if current_corner > 0 { + cbf.copy_buffer_to_image(&CopyBufferToImageInfo { + regions: [BufferImageCopy { + image_subresource: texture.subresource_layers(), + image_offset: CORNER_OFFSETS[(current_corner - 1) % 4], + image_extent: [TRANSFER_GRANULARITY, TRANSFER_GRANULARITY, 1], + ..Default::default() + }] + .into(), + ..CopyBufferToImageInfo::buffer_image( + tcx.buffer(self.back_staging_buffer_id)? + .buffer() + .clone() + .into(), + texture.clone(), + ) + })?; + } + + Ok(()) + } +} + /// This function is called once during initialization, then again whenever the window is resized. fn window_size_dependent_setup( - images: &[Arc], - render_pass: Arc, + resources: &Resources, + swapchain_id: Id, + render_pass: &Arc, viewport: &mut Viewport, ) -> Vec> { + let swapchain_state = resources.swapchain(swapchain_id).unwrap(); + let images = swapchain_state.images(); let extent = images[0].extent(); viewport.extent = [extent[0] as f32, extent[1] as f32]; diff --git a/vulkano-taskgraph/Cargo.toml b/vulkano-taskgraph/Cargo.toml index 3667a8d14d..8af6fb8488 100644 --- a/vulkano-taskgraph/Cargo.toml +++ b/vulkano-taskgraph/Cargo.toml @@ -14,10 +14,10 @@ keywords = { workspace = true } categories = { workspace = true } [dependencies] +ahash = { workspace = true } ash = { workspace = true } concurrent-slotmap = { workspace = true } parking_lot = { workspace = true } -rangemap = { workspace = true } smallvec = { workspace = true } thread_local = { workspace = true } vulkano = { workspace = true } diff --git a/vulkano-taskgraph/src/graph/compile.rs b/vulkano-taskgraph/src/graph/compile.rs new file mode 100644 index 0000000000..2025e1318d --- /dev/null +++ b/vulkano-taskgraph/src/graph/compile.rs @@ -0,0 +1,2622 @@ +// FIXME: host read barriers + +use self::linear_map::LinearMap; +use super::{ + BarrierIndex, ExecutableTaskGraph, Instruction, NodeIndex, NodeInner, ResourceAccess, + SemaphoreIndex, Submission, TaskGraph, +}; +use crate::{resource::Flight, Id, ObjectType, QueueFamilyType}; +use ash::vk; +use smallvec::{smallvec, SmallVec}; +use std::{cell::RefCell, cmp, error::Error, fmt, mem, ops::Range, sync::Arc}; +use vulkano::{ + device::{Device, DeviceOwned, Queue, QueueFlags}, + image::{Image, ImageLayout}, + swapchain::Swapchain, + sync::{semaphore::Semaphore, AccessFlags, PipelineStages}, + VulkanError, +}; + +impl TaskGraph { + /// Compiles the task graph into an executable form. + /// + /// # Safety + /// + /// - There must be no conflicting device accesses in task nodes with no path between them. + /// - There must be no accesses that are incompatible with the queue family type of the task + /// node. + /// - There must be no accesses that are unsupported by the device. + /// + /// # Panics + /// + /// - Panics if `compile_info.queues` is empty. + /// - Panics if the device of any queue in `compile_info.queues` or + /// `compile_info.present_queue` is not the same as that of `self`. + /// - Panics if `compile_info.queues` contains duplicate queue families. + /// - Panics if `compile_info.present_queue` is `None` and the task graph uses any swapchains. + /// + /// # Errors + /// + /// In order to get a successful compilation, the graph must satisfy the following conditions: + /// - It must be [weakly connected]: every node must be able to reach every other node when + /// disregarding the direction of the edges. + /// - It must have no [directed cycles]: if you were to walk starting from any node following + /// the direction of the edges, there must be no way to end up at the node you started at. + /// + /// [weakly connected]: https://en.wikipedia.org/wiki/Connectivity_(graph_theory)#Connected_vertices_and_graphs + /// [directed cycles]: https://en.wikipedia.org/wiki/Cycle_(graph_theory)#Directed_circuit_and_directed_cycle + pub unsafe fn compile( + mut self, + compile_info: CompileInfo, + ) -> Result, CompileError> { + let CompileInfo { + queues, + present_queue, + flight_id, + _ne: _, + } = compile_info; + + assert_ne!(queues.len(), 0, "expected to be given at least one queue"); + + let device = &self.device().clone(); + + for queue in &queues { + assert_eq!(queue.device(), device); + assert_eq!( + queues + .iter() + .filter(|q| q.queue_family_index() == queue.queue_family_index()) + .count(), + 1, + "expected each queue in `compile_info.queues` to be from a unique queue family", + ); + } + + if let Some(present_queue) = &present_queue { + assert_eq!(present_queue.device(), device); + } + + if !self.is_weakly_connected() { + return Err(CompileError::new(self, CompileErrorKind::Unconnected)); + } + + let topological_order = match self.topological_sort() { + Ok(topological_order) => topological_order, + Err(kind) => return Err(CompileError::new(self, kind)), + }; + unsafe { self.dependency_levels(&topological_order) }; + let queue_family_indices = + match unsafe { self.queue_family_indices(device, &queues, &topological_order) } { + Ok(queue_family_indices) => queue_family_indices, + Err(kind) => return Err(CompileError::new(self, kind)), + }; + let mut queues_by_queue_family_index: SmallVec<[_; 8]> = + smallvec![None; *queue_family_indices.iter().max().unwrap() as usize + 1]; + + for queue in &queues { + if let Some(x) = + queues_by_queue_family_index.get_mut(queue.queue_family_index() as usize) + { + *x = Some(queue); + } + } + + let mut prev_accesses = vec![ResourceAccess::default(); self.resources.capacity() as usize]; + let mut barrier_stages = vec![BarrierStage::Stage0; self.resources.capacity() as usize]; + + let (node_meta, semaphore_count, last_swapchain_accesses) = unsafe { + self.node_metadata(&topological_order, &mut prev_accesses, &mut barrier_stages) + }; + + prev_accesses.fill(ResourceAccess::default()); + barrier_stages.fill(BarrierStage::Stage0); + + let mut state = CompileState::new(&mut prev_accesses, present_queue); + let mut prev_submission_end = 0; + + while prev_submission_end < topological_order.len() { + // First per-submission pass: compute the initial barriers for the submission. + for (i, &node_index) in + (prev_submission_end..).zip(&topological_order[prev_submission_end..]) + { + let node = unsafe { self.nodes.node_unchecked(node_index) }; + let NodeInner::Task(task_node) = &node.inner else { + unreachable!(); + }; + + for (id, access) in task_node.accesses.iter() { + let access = ResourceAccess { + queue_family_index: task_node.queue_family_index, + ..*access + }; + + let barrier_stage = &mut barrier_stages[id.index() as usize]; + + if *barrier_stage == BarrierStage::Stage0 { + if !id.is::() { + if id.is::() { + state.transition_image(id, access); + } else if access.access_mask.contains_reads() { + state.memory_barrier(id, access); + } else { + state.execution_barrier(id, access); + } + } + + *barrier_stage = BarrierStage::Stage1; + } + } + + let should_submit = if let Some(&next_node_index) = topological_order.get(i + 1) { + let next_node = unsafe { self.nodes.node_unchecked(next_node_index) }; + let NodeInner::Task(next_task_node) = &next_node.inner else { + unreachable!() + }; + + next_task_node.queue_family_index != task_node.queue_family_index + } else { + true + }; + + if should_submit { + break; + } + } + + state.flush_initial_barriers(); + + // Second per-submission pass: add instructions and barriers for the submission. + for (i, &node_index) in + (prev_submission_end..).zip(&topological_order[prev_submission_end..]) + { + let node = unsafe { self.nodes.node_unchecked(node_index) }; + let NodeInner::Task(task_node) = &node.inner else { + unreachable!(); + }; + + for &semaphore_index in &node_meta[node_index as usize].wait_semaphores { + state.wait_semaphore(semaphore_index); + } + + for (id, access) in task_node.accesses.iter() { + let prev_access = state.prev_accesses[id.index() as usize]; + let access = ResourceAccess { + queue_family_index: task_node.queue_family_index, + ..*access + }; + + let barrier_stage = &mut barrier_stages[id.index() as usize]; + + if *barrier_stage == BarrierStage::Stage1 { + if id.is::() { + state.wait_acquire(unsafe { id.parametrize() }, access); + } + + *barrier_stage = BarrierStage::Stage2; + } else if prev_access.queue_family_index != access.queue_family_index { + let prev_access = &mut state.prev_accesses[id.index() as usize]; + prev_access.stage_mask = PipelineStages::empty(); + prev_access.access_mask = AccessFlags::empty(); + + if id.is_exclusive() { + state.acquire_queue_family_ownership(id, access); + } else if prev_access.image_layout != access.image_layout { + state.transition_image(id, access); + } else { + state.prev_accesses[id.index() as usize] = access; + } + } else if prev_access.image_layout != access.image_layout { + state.transition_image(id, access); + } else if prev_access.access_mask.contains_writes() + && access.access_mask.contains_reads() + { + state.memory_barrier(id, access); + } else if access.access_mask.contains_writes() { + state.execution_barrier(id, access); + } else { + // TODO: Could there be use cases for read-after-read execution barriers? + let prev_access = &mut state.prev_accesses[id.index() as usize]; + prev_access.stage_mask |= access.stage_mask; + prev_access.access_mask |= access.access_mask; + } + } + + state.execute_task(node_index); + + for (id, _) in task_node.accesses.iter() { + if let Some((_, next_access)) = node_meta[node_index as usize] + .release_queue_family_ownership + .iter() + .find(|(x, _)| *x == id) + { + state.release_queue_family_ownership(id, *next_access); + } + } + + for &semaphore_index in &node_meta[node_index as usize].signal_semaphores { + state.signal_semaphore(semaphore_index); + } + + for (&swapchain_id, _) in last_swapchain_accesses + .iter() + .filter(|(_, &i)| i == node_index) + { + state.signal_present(swapchain_id); + } + + let should_submit = if let Some(&next_node_index) = topological_order.get(i + 1) { + let next_node = unsafe { self.nodes.node_unchecked(next_node_index) }; + let NodeInner::Task(next_task_node) = &next_node.inner else { + unreachable!() + }; + + next_task_node.queue_family_index != task_node.queue_family_index + } else { + true + }; + + if state.should_flush_submit || should_submit { + state.flush_submit(); + } + + if should_submit { + let queue = queues_by_queue_family_index[task_node.queue_family_index as usize] + .unwrap(); + state.submit(queue.clone()); + prev_submission_end = i + 1; + break; + } + } + } + + if !state + .pre_present_queue_family_ownership_transfers + .is_empty() + { + for swapchain_id in mem::take(&mut state.pre_present_queue_family_ownership_transfers) { + state.pre_present_acquire_queue_family_ownership(swapchain_id); + } + + state.flush_submit(); + state.submit(state.present_queue.clone().unwrap()); + } + + let semaphores = match (0..semaphore_count) + .map(|_| { + // SAFETY: The parameters are valid. + unsafe { Semaphore::new_unchecked(device.clone(), Default::default()) } + .map(Arc::new) + }) + .collect::>() + { + Ok(semaphores) => semaphores, + Err(err) => return Err(CompileError::new(self, CompileErrorKind::VulkanError(err))), + }; + + let swapchains = last_swapchain_accesses.iter().map(|(&id, _)| id).collect(); + + Ok(ExecutableTaskGraph { + graph: self, + flight_id, + instructions: state.instructions, + submissions: state.submissions, + buffer_barriers: state.buffer_barriers, + image_barriers: state.image_barriers, + semaphores: RefCell::new(semaphores), + swapchains, + present_queue: state.present_queue, + last_accesses: prev_accesses, + }) + } + + /// Performs [depth-first search] on the equivalent undirected graph to determine if every node + /// is visited, meaning the undirected graph is [connected]. If it is, then the directed graph + /// is [weakly connected]. This property is required because otherwise it could happen that we + /// end up with a submit that is in no way synchronized with the host. + /// + /// [depth-first search]: https://en.wikipedia.org/wiki/Depth-first_search + /// [connected]: https://en.wikipedia.org/wiki/Connectivity_(graph_theory)#Connected_vertices_and_graphs + /// [weakly connected]: https://en.wikipedia.org/wiki/Connectivity_(graph_theory)#Connected_vertices_and_graphs + fn is_weakly_connected(&self) -> bool { + unsafe fn dfs( + graph: &TaskGraph, + node_index: NodeIndex, + visited: &mut [bool], + visited_count: &mut u32, + ) { + let is_visited = &mut visited[node_index as usize]; + + if *is_visited { + return; + } + + *is_visited = true; + *visited_count += 1; + + let node = unsafe { graph.nodes.node_unchecked(node_index) }; + + for &node_index in node.in_edges.iter().chain(&node.out_edges) { + unsafe { dfs(graph, node_index, visited, visited_count) }; + } + } + + let mut visited = vec![false; self.nodes.capacity() as usize]; + let mut visited_count = 0; + + if let Some((id, _)) = self.nodes.nodes().next() { + unsafe { dfs(self, id.index(), &mut visited, &mut visited_count) }; + } + + visited_count == self.nodes.len() + } + + /// Performs [topological sort using depth-first search]. Returns a vector of node indices in + /// topological order. + /// + /// [topological sort using depth-first search]: https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search + fn topological_sort(&self) -> Result, CompileErrorKind> { + type NodeState = u8; + + const VISITED_BIT: NodeState = 1 << 0; + const ON_STACK_BIT: NodeState = 1 << 1; + + unsafe fn dfs( + graph: &TaskGraph, + node_index: NodeIndex, + state: &mut [NodeState], + output: &mut [NodeIndex], + mut output_index: u32, + ) -> Result { + let node_state = &mut state[node_index as usize]; + + if *node_state == VISITED_BIT { + return Ok(output_index); + } + + if *node_state == ON_STACK_BIT { + return Err(CompileErrorKind::Cycle); + } + + *node_state = ON_STACK_BIT; + + let node = unsafe { graph.nodes.node_unchecked(node_index) }; + + for &node_index in &node.out_edges { + output_index = unsafe { dfs(graph, node_index, state, output, output_index) }?; + } + + state[node_index as usize] = VISITED_BIT; + output[output_index as usize] = node_index; + + Ok(output_index.wrapping_sub(1)) + } + + let mut state = vec![0; self.nodes.capacity() as usize]; + let mut output = vec![0; self.nodes.len() as usize]; + let mut output_index = self.nodes.len().wrapping_sub(1); + + for (id, _) in self.nodes.nodes() { + output_index = unsafe { dfs(self, id.index(), &mut state, &mut output, output_index) }?; + } + + debug_assert_eq!(output_index, u32::MAX); + + Ok(output) + } + + /// Performs [longest path search] to assign the dependency level index to each task node. + /// Tasks in the same dependency level don't depend on eachother and can therefore be run in + /// parallel. Returns a vector of dependency levels in topological order indexed by the node's + /// dependency level index. + /// + /// [longest path search]: https://en.wikipedia.org/wiki/Longest_path_problem#Acyclic_graphs + unsafe fn dependency_levels(&mut self, topological_order: &[NodeIndex]) -> Vec> { + let mut distances = vec![0; self.nodes.capacity() as usize]; + let mut max_level = 0; + + for &node_index in topological_order { + let node = unsafe { self.nodes.node_unchecked(node_index) }; + + for &out_node_index in &node.out_edges { + let new_distance = distances[node_index as usize] + 1; + + if distances[out_node_index as usize] < new_distance { + distances[out_node_index as usize] = new_distance; + max_level = cmp::max(max_level, new_distance); + } + } + } + + let mut levels = vec![Vec::new(); max_level as usize + 1]; + + for (id, node) in self.nodes.nodes_mut() { + let NodeInner::Task(task_node) = &mut node.inner else { + unreachable!(); + }; + + let level_index = distances[id.index() as usize]; + levels[level_index as usize].push(id.index()); + task_node.dependency_level_index = level_index; + } + + levels + } + + /// Assigns a queue family index to each task node. Returns a vector of the used queue family + /// indices in topological order. + unsafe fn queue_family_indices( + &mut self, + device: &Device, + queues: &[Arc], + topological_order: &[NodeIndex], + ) -> Result, CompileErrorKind> { + let queue_family_properties = device.physical_device().queue_family_properties(); + let graphics_queue_family_index = queues + .iter() + .find(|q| { + queue_family_properties[q.queue_family_index() as usize] + .queue_flags + .contains(QueueFlags::GRAPHICS) + }) + .map(|q| q.queue_family_index()); + let compute_queue_family_index = queues + .iter() + .filter(|q| { + queue_family_properties[q.queue_family_index() as usize] + .queue_flags + .contains(QueueFlags::COMPUTE) + }) + .min_by_key(|q| { + queue_family_properties[q.queue_family_index() as usize] + .queue_flags + .count() + }) + .map(|q| q.queue_family_index()); + let transfer_queue_family_index = queues + .iter() + .filter(|q| { + queue_family_properties[q.queue_family_index() as usize] + .queue_flags + .contains(QueueFlags::TRANSFER) + }) + .min_by_key(|q| { + queue_family_properties[q.queue_family_index() as usize] + .queue_flags + .count() + }) + .map(|q| q.queue_family_index()) + .or(compute_queue_family_index) + .or(graphics_queue_family_index); + + let mut queue_family_indices = SmallVec::new(); + + for &node_index in topological_order { + let node = unsafe { self.nodes.node_unchecked_mut(node_index) }; + let NodeInner::Task(task_node) = &mut node.inner else { + unreachable!(); + }; + + let queue_family_index = match task_node.queue_family_type() { + QueueFamilyType::Graphics => graphics_queue_family_index, + QueueFamilyType::Compute => compute_queue_family_index, + QueueFamilyType::Transfer => transfer_queue_family_index, + QueueFamilyType::Specific { index } => queues + .iter() + .any(|q| q.queue_family_index() == index) + .then_some(index), + } + .ok_or(CompileErrorKind::InsufficientQueues)?; + + task_node.queue_family_index = queue_family_index; + + if !queue_family_indices.contains(&queue_family_index) { + queue_family_indices.push(queue_family_index); + } + } + + Ok(queue_family_indices) + } + + /// Does a preliminary pass over all nodes in the graph to collect information needed before + /// the actual compilation pass. Returns a vector of metadata indexed by the node index, the + /// current semaphore count, and a map from the swapchain ID to the last node that accessed the + /// swapchain. + // TODO: Cull redundant semaphores. + unsafe fn node_metadata( + &self, + topological_order: &[NodeIndex], + prev_accesses: &mut [ResourceAccess], + barrier_stages: &mut [BarrierStage], + ) -> (Vec, usize, LinearMap, NodeIndex, 1>) { + let mut node_meta = vec![NodeMeta::default(); self.nodes.capacity() as usize]; + let mut prev_node_indices = vec![0; self.resources.capacity() as usize]; + let mut semaphore_count = 0; + let mut last_swapchain_accesses = LinearMap::new(); + + for &node_index in topological_order { + let node = unsafe { self.nodes.node_unchecked(node_index) }; + let NodeInner::Task(task_node) = &node.inner else { + unreachable!(); + }; + + for &out_node_index in &node.out_edges { + let out_node = unsafe { self.nodes.node_unchecked(out_node_index) }; + let NodeInner::Task(out_task_node) = &out_node.inner else { + unreachable!(); + }; + + if task_node.queue_family_index != out_task_node.queue_family_index { + let semaphore_index = semaphore_count; + node_meta[node_index as usize] + .signal_semaphores + .push(semaphore_index); + node_meta[out_node_index as usize] + .wait_semaphores + .push(semaphore_index); + semaphore_count += 1; + } + } + + for (id, access) in task_node.accesses.iter() { + let prev_access = &mut prev_accesses[id.index() as usize]; + let access = ResourceAccess { + queue_family_index: task_node.queue_family_index, + ..*access + }; + let prev_node_index = &mut prev_node_indices[id.index() as usize]; + + let barrier_stage = &mut barrier_stages[id.index() as usize]; + + if *barrier_stage == BarrierStage::Stage0 { + *prev_access = access; + *prev_node_index = node_index; + *barrier_stage = BarrierStage::Stage1; + } else { + if id.is_exclusive() + && prev_access.queue_family_index != access.queue_family_index + { + node_meta[*prev_node_index as usize] + .release_queue_family_ownership + .push((id, access)); + } + + if prev_access.queue_family_index != access.queue_family_index + || prev_access.image_layout != access.image_layout + || prev_access.access_mask.contains_writes() + || access.access_mask.contains_writes() + { + *prev_access = access; + } else { + prev_access.stage_mask |= access.stage_mask; + prev_access.access_mask |= access.access_mask; + } + + *prev_node_index = node_index; + } + + if id.is::() { + *last_swapchain_accesses + .get_or_insert(unsafe { id.parametrize() }, node_index) = node_index; + } + } + } + + (node_meta, semaphore_count, last_swapchain_accesses) + } +} + +#[derive(Clone, Default)] +struct NodeMeta { + wait_semaphores: Vec, + signal_semaphores: Vec, + release_queue_family_ownership: Vec<(Id, ResourceAccess)>, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum BarrierStage { + Stage0, + Stage1, + Stage2, +} + +struct CompileState<'a> { + prev_accesses: &'a mut [ResourceAccess], + instructions: Vec, + submissions: Vec, + buffer_barriers: Vec, + image_barriers: Vec, + present_queue: Option>, + initial_buffer_barrier_range: Range, + initial_image_barrier_range: Range, + has_flushed_submit: bool, + should_flush_submit: bool, + prev_buffer_barrier_index: usize, + prev_image_barrier_index: usize, + pre_present_queue_family_ownership_transfers: Vec>, +} + +impl<'a> CompileState<'a> { + fn new(prev_accesses: &'a mut [ResourceAccess], present_queue: Option>) -> Self { + CompileState { + prev_accesses, + instructions: Vec::new(), + submissions: Vec::new(), + buffer_barriers: Vec::new(), + image_barriers: Vec::new(), + present_queue, + initial_buffer_barrier_range: 0..0, + initial_image_barrier_range: 0..0, + has_flushed_submit: true, + should_flush_submit: false, + prev_buffer_barrier_index: 0, + prev_image_barrier_index: 0, + pre_present_queue_family_ownership_transfers: Vec::new(), + } + } + + fn release_queue_family_ownership(&mut self, id: Id, access: ResourceAccess) { + debug_assert!(id.is_exclusive()); + + let prev_access = &mut self.prev_accesses[id.index() as usize]; + let mut src = *prev_access; + let dst = ResourceAccess { + stage_mask: PipelineStages::empty(), + access_mask: AccessFlags::empty(), + ..access + }; + + if !prev_access.access_mask.contains_writes() { + src.access_mask = AccessFlags::empty(); + } + + debug_assert_ne!(src.queue_family_index, dst.queue_family_index); + + self.memory_barrier_inner(id, src, dst); + } + + fn acquire_queue_family_ownership(&mut self, id: Id, access: ResourceAccess) { + debug_assert!(id.is_exclusive()); + + let prev_access = &mut self.prev_accesses[id.index() as usize]; + let src = ResourceAccess { + stage_mask: PipelineStages::empty(), + access_mask: AccessFlags::empty(), + ..*prev_access + }; + let dst = access; + + debug_assert_ne!(src.queue_family_index, dst.queue_family_index); + + *prev_access = access; + + self.memory_barrier_inner(id, src, dst); + } + + fn transition_image(&mut self, id: Id, access: ResourceAccess) { + debug_assert_ne!( + self.prev_accesses[id.index() as usize].image_layout, + access.image_layout, + ); + + self.memory_barrier(id, access); + } + + fn memory_barrier(&mut self, id: Id, access: ResourceAccess) { + let prev_access = &mut self.prev_accesses[id.index() as usize]; + let src = ResourceAccess { + queue_family_index: vk::QUEUE_FAMILY_IGNORED, + ..*prev_access + }; + let dst = ResourceAccess { + queue_family_index: vk::QUEUE_FAMILY_IGNORED, + ..access + }; + + *prev_access = access; + + self.memory_barrier_inner(id, src, dst); + } + + fn execution_barrier(&mut self, id: Id, access: ResourceAccess) { + let prev_access = &mut self.prev_accesses[id.index() as usize]; + let src = ResourceAccess { + access_mask: AccessFlags::empty(), + queue_family_index: vk::QUEUE_FAMILY_IGNORED, + ..*prev_access + }; + let dst = ResourceAccess { + access_mask: AccessFlags::empty(), + queue_family_index: vk::QUEUE_FAMILY_IGNORED, + ..access + }; + + debug_assert_eq!(prev_access.image_layout, access.image_layout); + + *prev_access = access; + + self.memory_barrier_inner(id, src, dst); + } + + fn memory_barrier_inner(&mut self, id: Id, src: ResourceAccess, dst: ResourceAccess) { + match id.object_type() { + ObjectType::Buffer => { + self.buffer_barriers.push(super::BufferMemoryBarrier { + src_stage_mask: src.stage_mask, + src_access_mask: src.access_mask, + dst_stage_mask: dst.stage_mask, + dst_access_mask: dst.access_mask, + src_queue_family_index: src.queue_family_index, + dst_queue_family_index: dst.queue_family_index, + buffer: unsafe { id.parametrize() }, + }); + } + ObjectType::Image | ObjectType::Swapchain => { + self.image_barriers.push(super::ImageMemoryBarrier { + src_stage_mask: src.stage_mask, + src_access_mask: src.access_mask, + dst_stage_mask: dst.stage_mask, + dst_access_mask: dst.access_mask, + old_layout: src.image_layout, + new_layout: dst.image_layout, + src_queue_family_index: src.queue_family_index, + dst_queue_family_index: dst.queue_family_index, + image: id, + }); + } + _ => unreachable!(), + } + } + + fn flush_initial_barriers(&mut self) { + self.initial_buffer_barrier_range = self.prev_buffer_barrier_index as BarrierIndex + ..self.buffer_barriers.len() as BarrierIndex; + self.initial_image_barrier_range = self.prev_image_barrier_index as BarrierIndex + ..self.image_barriers.len() as BarrierIndex; + self.prev_buffer_barrier_index = self.buffer_barriers.len(); + self.prev_image_barrier_index = self.image_barriers.len(); + } + + fn wait_acquire(&mut self, swapchain_id: Id, access: ResourceAccess) { + if !self.has_flushed_submit { + self.flush_submit(); + } + + self.image_barriers.push(super::ImageMemoryBarrier { + src_stage_mask: access.stage_mask, + src_access_mask: AccessFlags::empty(), + dst_stage_mask: access.stage_mask, + dst_access_mask: access.access_mask, + old_layout: ImageLayout::Undefined, + new_layout: access.image_layout, + src_queue_family_index: vk::QUEUE_FAMILY_IGNORED, + dst_queue_family_index: vk::QUEUE_FAMILY_IGNORED, + image: swapchain_id.erase(), + }); + + self.prev_accesses[swapchain_id.index() as usize] = access; + + self.instructions.push(Instruction::WaitAcquire { + swapchain_id, + stage_mask: access.stage_mask, + }); + } + + fn wait_semaphore(&mut self, semaphore_index: SemaphoreIndex) { + if !self.has_flushed_submit { + self.flush_submit(); + } + + self.instructions.push(Instruction::WaitSemaphore { + semaphore_index, + stage_mask: PipelineStages::ALL_COMMANDS, + }); + } + + fn execute_task(&mut self, node_index: NodeIndex) { + self.flush_barriers(); + + self.instructions + .push(Instruction::ExecuteTask { node_index }); + + self.has_flushed_submit = false; + } + + fn signal_semaphore(&mut self, semaphore_index: SemaphoreIndex) { + self.instructions.push(Instruction::SignalSemaphore { + semaphore_index, + stage_mask: PipelineStages::ALL_COMMANDS, + }); + + self.should_flush_submit = true; + } + + fn signal_present(&mut self, swapchain_id: Id) { + let present_queue = self + .present_queue + .as_ref() + .expect("expected to be given a present queue"); + + let prev_access = self.prev_accesses[swapchain_id.index() as usize]; + + if prev_access.queue_family_index == present_queue.queue_family_index() + || !swapchain_id.is_exclusive() + { + self.memory_barrier( + swapchain_id.erase(), + ResourceAccess { + stage_mask: PipelineStages::empty(), + access_mask: AccessFlags::empty(), + image_layout: ImageLayout::PresentSrc, + queue_family_index: vk::QUEUE_FAMILY_IGNORED, + }, + ); + + self.instructions.push(Instruction::SignalPresent { + swapchain_id, + stage_mask: prev_access.stage_mask, + }); + } else { + self.pre_present_release_queue_family_ownership(swapchain_id); + } + + self.should_flush_submit = true; + } + + fn pre_present_release_queue_family_ownership(&mut self, swapchain_id: Id) { + let prev_access = self.prev_accesses[swapchain_id.index() as usize]; + + self.release_queue_family_ownership( + swapchain_id.erase(), + ResourceAccess { + stage_mask: PipelineStages::empty(), + access_mask: AccessFlags::empty(), + image_layout: ImageLayout::PresentSrc, + queue_family_index: self.present_queue.as_ref().unwrap().queue_family_index(), + }, + ); + + self.instructions.push(Instruction::SignalPrePresent { + swapchain_id, + stage_mask: prev_access.stage_mask, + }); + + self.pre_present_queue_family_ownership_transfers + .push(swapchain_id); + } + + fn pre_present_acquire_queue_family_ownership(&mut self, swapchain_id: Id) { + if !self.has_flushed_submit { + self.flush_submit(); + } + + self.instructions.push(Instruction::WaitPrePresent { + swapchain_id, + stage_mask: PipelineStages::ALL_COMMANDS, + }); + + self.acquire_queue_family_ownership( + swapchain_id.erase(), + ResourceAccess { + stage_mask: PipelineStages::empty(), + access_mask: AccessFlags::empty(), + image_layout: ImageLayout::PresentSrc, + queue_family_index: self.present_queue.as_ref().unwrap().queue_family_index(), + }, + ); + + self.instructions.push(Instruction::SignalPresent { + swapchain_id, + stage_mask: PipelineStages::ALL_COMMANDS, + }); + } + + fn flush_barriers(&mut self) { + if self.prev_buffer_barrier_index != self.buffer_barriers.len() + || self.prev_image_barrier_index != self.image_barriers.len() + { + self.instructions.push(Instruction::PipelineBarrier { + buffer_barrier_range: self.prev_buffer_barrier_index as BarrierIndex + ..self.buffer_barriers.len() as BarrierIndex, + image_barrier_range: self.prev_image_barrier_index as BarrierIndex + ..self.image_barriers.len() as BarrierIndex, + }); + self.prev_buffer_barrier_index = self.buffer_barriers.len(); + self.prev_image_barrier_index = self.image_barriers.len(); + } + } + + fn flush_submit(&mut self) { + self.flush_barriers(); + self.instructions.push(Instruction::FlushSubmit); + self.has_flushed_submit = true; + self.should_flush_submit = false; + } + + fn submit(&mut self, queue: Arc) { + self.instructions.push(Instruction::Submit); + + let prev_instruction_range_end = self + .submissions + .last() + .map(|s| s.instruction_range.end) + .unwrap_or(0); + self.submissions.push(Submission { + queue, + initial_buffer_barrier_range: self.initial_buffer_barrier_range.clone(), + initial_image_barrier_range: self.initial_image_barrier_range.clone(), + instruction_range: prev_instruction_range_end..self.instructions.len(), + }); + } +} + +impl ExecutableTaskGraph { + /// Decompiles the graph back into a modifiable form. + #[inline] + pub fn decompile(self) -> TaskGraph { + self.graph + } +} + +/// Parameters to [compile] a [`TaskGraph`]. +/// +/// [compile]: TaskGraph::compile +#[derive(Clone, Debug)] +pub struct CompileInfo { + /// The queues to work with. + /// + /// You must supply at least one queue and all queues must be from unique queue families. + /// + /// The default value is empty, which must be overridden. + pub queues: Vec>, + + /// The queue to use for swapchain presentation, if any. + /// + /// You must supply this queue if the task graph uses any swapchains. It can be the same queue + /// as one in the [`queues`] field, or a different one. + /// + /// The default value is `None`. + /// + /// [`queues`]: Self::queues + pub present_queue: Option>, + + /// The flight which will be executed. + /// + /// The default value is `Id::INVALID`, which must be overridden. + pub flight_id: Id, + + pub _ne: vulkano::NonExhaustive, +} + +impl Default for CompileInfo { + #[inline] + fn default() -> Self { + CompileInfo { + queues: Vec::new(), + present_queue: None, + flight_id: Id::INVALID, + _ne: crate::NE, + } + } +} + +/// Error that can happen when [compiling] a [`TaskGraph`]. +/// +/// [compiling]: TaskGraph::compile +pub struct CompileError { + pub graph: TaskGraph, + pub kind: CompileErrorKind, +} + +/// The kind of [`CompileError`] that occurred. +#[derive(Debug)] +pub enum CompileErrorKind { + Unconnected, + Cycle, + InsufficientQueues, + VulkanError(VulkanError), +} + +impl CompileError { + fn new(graph: TaskGraph, kind: CompileErrorKind) -> Self { + CompileError { graph, kind } + } +} + +impl fmt::Debug for CompileError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.kind, f) + } +} + +impl fmt::Display for CompileError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.kind { + CompileErrorKind::Unconnected => f.write_str("the graph is not weakly connected"), + CompileErrorKind::Cycle => f.write_str("the graph contains a directed cycle"), + CompileErrorKind::InsufficientQueues => { + f.write_str("the given queues are not sufficient for the requirements of a task") + } + CompileErrorKind::VulkanError(_) => f.write_str("a runtime error occurred"), + } + } +} + +impl Error for CompileError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + match &self.kind { + CompileErrorKind::VulkanError(err) => Some(err), + _ => None, + } + } +} + +mod linear_map { + use smallvec::{Array, SmallVec}; + + pub struct LinearMap + where + [(K, V); N]: Array, + { + inner: SmallVec<[(K, V); N]>, + } + + impl LinearMap + where + [(K, V); N]: Array, + { + #[inline] + pub fn new() -> Self { + LinearMap { + inner: SmallVec::new(), + } + } + + #[inline] + pub fn get_or_insert(&mut self, key: K, value: V) -> &mut V + where + K: Eq, + { + let index = if let Some(index) = self.inner.iter().position(|(k, _)| k == &key) { + index + } else { + let index = self.inner.len(); + self.inner.push((key, value)); + + index + }; + + &mut unsafe { self.inner.get_unchecked_mut(index) }.1 + } + + #[inline] + pub fn iter(&self) -> impl Iterator { + self.inner.iter().map(|(k, v)| (k, v)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + resource::{AccessType, ImageLayoutType}, + tests::test_queues, + }; + use std::marker::PhantomData; + use vulkano::{ + buffer::BufferCreateInfo, image::ImageCreateInfo, swapchain::SwapchainCreateInfo, + sync::Sharing, + }; + + #[test] + fn unconnected() { + let (resources, queues) = test_queues!(); + let compile_info = CompileInfo { + queues, + ..Default::default() + }; + + { + // ┌───┐ + // │ A │ + // └───┘ + // ┄┄┄┄┄ + // ┌───┐ + // │ B │ + // └───┘ + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 0); + graph + .create_task_node("A", QueueFamilyType::Graphics, PhantomData) + .build(); + graph + .create_task_node("B", QueueFamilyType::Compute, PhantomData) + .build(); + + assert!(matches!( + unsafe { graph.compile(compile_info.clone()) }, + Err(CompileError { + kind: CompileErrorKind::Unconnected, + .. + }), + )); + } + + { + // ┌───┐ + // │ A ├─┐ + // └───┘ │ + // ┌───┐ │ + // │ B ├┐│ + // └───┘││ + // ┄┄┄┄┄││┄┄┄┄┄┄ + // ││ ┌───┐ + // │└►│ C │ + // │ └───┘ + // │ ┌───┐ + // └─►│ D │ + // └───┘ + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 0); + let a = graph + .create_task_node("A", QueueFamilyType::Graphics, PhantomData) + .build(); + let b = graph + .create_task_node("B", QueueFamilyType::Graphics, PhantomData) + .build(); + let c = graph + .create_task_node("C", QueueFamilyType::Compute, PhantomData) + .build(); + let d = graph + .create_task_node("D", QueueFamilyType::Compute, PhantomData) + .build(); + graph.add_edge(a, c).unwrap(); + graph.add_edge(b, d).unwrap(); + + assert!(matches!( + unsafe { graph.compile(compile_info.clone()) }, + Err(CompileError { + kind: CompileErrorKind::Unconnected, + .. + }), + )); + } + + { + // ┌───┐ ┌───┐ ┌───┐ + // │ A ├─►│ B ├─►│ C │ + // └───┘ └───┘ └───┘ + // ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄ + // ┌───┐ ┌───┐ ┌───┐ ┌───┐ + // │ D ├┬►│ E ├┬►│ F ├──►│ │ + // └───┘│ └───┘│ └───┘┌─►│ G │ + // │ └──────┘┌►│ │ + // └──────────────┘ └───┘ + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 0); + let a = graph + .create_task_node("A", QueueFamilyType::Graphics, PhantomData) + .build(); + let b = graph + .create_task_node("B", QueueFamilyType::Graphics, PhantomData) + .build(); + let c = graph + .create_task_node("C", QueueFamilyType::Graphics, PhantomData) + .build(); + graph.add_edge(a, b).unwrap(); + graph.add_edge(b, c).unwrap(); + let d = graph + .create_task_node("D", QueueFamilyType::Compute, PhantomData) + .build(); + let e = graph + .create_task_node("E", QueueFamilyType::Compute, PhantomData) + .build(); + let f = graph + .create_task_node("F", QueueFamilyType::Compute, PhantomData) + .build(); + let g = graph + .create_task_node("G", QueueFamilyType::Compute, PhantomData) + .build(); + graph.add_edge(d, e).unwrap(); + graph.add_edge(d, g).unwrap(); + graph.add_edge(e, f).unwrap(); + graph.add_edge(e, g).unwrap(); + graph.add_edge(f, g).unwrap(); + + assert!(matches!( + unsafe { graph.compile(compile_info) }, + Err(CompileError { + kind: CompileErrorKind::Unconnected, + .. + }), + )); + } + } + + #[test] + fn cycle() { + let (resources, queues) = test_queues!(); + let compile_info = CompileInfo { + queues, + ..Default::default() + }; + + { + // ┌───┐ ┌───┐ ┌───┐ + // ┌►│ A ├─►│ B ├─►│ C ├┐ + // │ └───┘ └───┘ └───┘│ + // └────────────────────┘ + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 0); + let a = graph + .create_task_node("A", QueueFamilyType::Graphics, PhantomData) + .build(); + let b = graph + .create_task_node("B", QueueFamilyType::Graphics, PhantomData) + .build(); + let c = graph + .create_task_node("C", QueueFamilyType::Graphics, PhantomData) + .build(); + graph.add_edge(a, b).unwrap(); + graph.add_edge(b, c).unwrap(); + graph.add_edge(c, a).unwrap(); + + assert!(matches!( + unsafe { graph.compile(compile_info.clone()) }, + Err(CompileError { + kind: CompileErrorKind::Cycle, + .. + }), + )); + } + + { + // ┌───┐ ┌───┐ ┌───┐ + // ┌►│ A ├┬►│ B ├─►│ C ├┐ + // │ └───┘│ └───┘ └───┘│ + // │┄┄┄┄┄┄│┄┄┄┄┄┄┄┄┄┄┄┄┄│┄┄┄┄┄┄┄ + // │ │ ┌───┐ ┌───┐│ ┌───┐ + // │ └►│ D ├─►│ E ├┴►│ F ├┐ + // │ └───┘ └───┘ └───┘│ + // └───────────────────────────┘ + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 0); + let a = graph + .create_task_node("A", QueueFamilyType::Graphics, PhantomData) + .build(); + let b = graph + .create_task_node("B", QueueFamilyType::Graphics, PhantomData) + .build(); + let c = graph + .create_task_node("C", QueueFamilyType::Graphics, PhantomData) + .build(); + let d = graph + .create_task_node("D", QueueFamilyType::Compute, PhantomData) + .build(); + let e = graph + .create_task_node("E", QueueFamilyType::Compute, PhantomData) + .build(); + let f = graph + .create_task_node("F", QueueFamilyType::Compute, PhantomData) + .build(); + graph.add_edge(a, b).unwrap(); + graph.add_edge(a, d).unwrap(); + graph.add_edge(b, c).unwrap(); + graph.add_edge(c, f).unwrap(); + graph.add_edge(d, e).unwrap(); + graph.add_edge(e, f).unwrap(); + graph.add_edge(f, a).unwrap(); + + assert!(matches!( + unsafe { graph.compile(compile_info.clone()) }, + Err(CompileError { + kind: CompileErrorKind::Cycle, + .. + }), + )); + } + + { + // ┌─────┐ + // │┌───┐└►┌───┐ ┌───┐ + // ││ A ├┬►│ B ├─►│ C ├┬──────┐ + // │└───┘│ └───┘┌►└───┘│ │ + // │┄┄┄┄┄│┄┄┄┄┄┄│┄┄┄┄┄┄│┄┄┄┄┄┄│┄ + // │ │ ┌───┐│ ┌───┐│ ┌───┐│ + // │ └►│ D ├┴►│ E │└►│ F ├│┐ + // │ ┌►└───┘ └───┘ └───┘││ + // │ └────────────────────┘│ + // └───────────────────────────┘ + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 0); + let a = graph + .create_task_node("A", QueueFamilyType::Graphics, PhantomData) + .build(); + let b = graph + .create_task_node("B", QueueFamilyType::Graphics, PhantomData) + .build(); + let c = graph + .create_task_node("C", QueueFamilyType::Graphics, PhantomData) + .build(); + let d = graph + .create_task_node("D", QueueFamilyType::Compute, PhantomData) + .build(); + let e = graph + .create_task_node("E", QueueFamilyType::Compute, PhantomData) + .build(); + let f = graph + .create_task_node("F", QueueFamilyType::Compute, PhantomData) + .build(); + graph.add_edge(a, b).unwrap(); + graph.add_edge(a, d).unwrap(); + graph.add_edge(b, c).unwrap(); + graph.add_edge(c, d).unwrap(); + graph.add_edge(c, f).unwrap(); + graph.add_edge(d, c).unwrap(); + graph.add_edge(d, e).unwrap(); + graph.add_edge(f, b).unwrap(); + + assert!(matches!( + unsafe { graph.compile(compile_info) }, + Err(CompileError { + kind: CompileErrorKind::Cycle, + .. + }), + )); + } + } + + #[test] + fn initial_pipeline_barrier() { + let (resources, queues) = test_queues!(); + let compile_info = CompileInfo { + queues, + ..Default::default() + }; + + { + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 10); + let buffer = graph.add_buffer(&BufferCreateInfo::default()); + let image = graph.add_image(&ImageCreateInfo::default()); + let node = graph + .create_task_node("", QueueFamilyType::Graphics, PhantomData) + .buffer_access(buffer, AccessType::VertexShaderUniformRead) + .image_access( + image, + AccessType::FragmentShaderSampledRead, + ImageLayoutType::Optimal, + ) + .build(); + + let graph = unsafe { graph.compile(compile_info.clone()) }.unwrap(); + + assert_matches_instructions!( + graph, + InitialPipelineBarrier { + buffer_barriers: [ + { + dst_stage_mask: VERTEX_SHADER, + dst_access_mask: UNIFORM_READ, + buffer: buffer, + }, + ], + image_barriers: [ + { + dst_stage_mask: FRAGMENT_SHADER, + dst_access_mask: SHADER_SAMPLED_READ, + new_layout: ShaderReadOnlyOptimal, + image: image, + }, + ], + }, + ExecuteTask { node: node }, + FlushSubmit, + Submit, + ); + } + } + + #[test] + fn semaphore() { + let (resources, queues) = test_queues!(); + + let queue_family_properties = resources + .device() + .physical_device() + .queue_family_properties(); + let has_compute_only_queue = queues.iter().any(|q| { + let queue_flags = queue_family_properties[q.queue_family_index() as usize].queue_flags; + + queue_flags.contains(QueueFlags::COMPUTE) && !queue_flags.contains(QueueFlags::GRAPHICS) + }); + + if !has_compute_only_queue { + return; + } + + let compile_info = CompileInfo { + queues, + ..Default::default() + }; + + { + // ┌───┐ + // │ A ├─┐ + // └───┘ │ + // ┌───┐ │ + // │ B ├┐│ + // └───┘││ + // ┄┄┄┄┄││┄┄┄┄┄┄ + // │└►┌───┐ + // └─►│ C │ + // └───┘ + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 10); + let a = graph + .create_task_node("A", QueueFamilyType::Graphics, PhantomData) + .build(); + let b = graph + .create_task_node("B", QueueFamilyType::Graphics, PhantomData) + .build(); + let c = graph + .create_task_node("C", QueueFamilyType::Compute, PhantomData) + .build(); + graph.add_edge(a, c).unwrap(); + graph.add_edge(b, c).unwrap(); + + let graph = unsafe { graph.compile(compile_info.clone()) }.unwrap(); + + assert_matches_instructions!( + graph, + ExecuteTask { node: b }, + // TODO: This semaphore is redundant. + SignalSemaphore { + semaphore_index: semaphore1, + stage_mask: ALL_COMMANDS, + }, + FlushSubmit, + ExecuteTask { node: a }, + SignalSemaphore { + semaphore_index: semaphore2, + stage_mask: ALL_COMMANDS, + }, + FlushSubmit, + Submit, + WaitSemaphore { + semaphore_index: semaphore1, + stage_mask: ALL_COMMANDS, + }, + WaitSemaphore { + semaphore_index: semaphore2, + stage_mask: ALL_COMMANDS, + }, + ExecuteTask { node: c }, + FlushSubmit, + Submit, + ); + } + + { + // ┌───┐ + // │ A ├┐ + // └───┘│ + // ┄┄┄┄┄│┄┄┄┄┄┄ + // │ ┌───┐ + // ├►│ B │ + // │ └───┘ + // │ ┌───┐ + // └►│ C │ + // └───┘ + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 10); + let a = graph + .create_task_node("A", QueueFamilyType::Graphics, PhantomData) + .build(); + let b = graph + .create_task_node("B", QueueFamilyType::Compute, PhantomData) + .build(); + let c = graph + .create_task_node("C", QueueFamilyType::Compute, PhantomData) + .build(); + graph.add_edge(a, b).unwrap(); + graph.add_edge(a, c).unwrap(); + + let graph = unsafe { graph.compile(compile_info.clone()) }.unwrap(); + + assert_matches_instructions!( + graph, + ExecuteTask { node: a }, + // TODO: This semaphore is redundant. + SignalSemaphore { + semaphore_index: semaphore1, + stage_mask: ALL_COMMANDS, + }, + SignalSemaphore { + semaphore_index: semaphore2, + stage_mask: ALL_COMMANDS, + }, + FlushSubmit, + Submit, + WaitSemaphore { + semaphore_index: semaphore2, + stage_mask: ALL_COMMANDS, + }, + ExecuteTask { node: c }, + FlushSubmit, + WaitSemaphore { + semaphore_index: semaphore1, + stage_mask: ALL_COMMANDS, + }, + ExecuteTask { node: b }, + FlushSubmit, + Submit, + ); + } + + { + // ┌───┐ ┌───┐ + // │ A ├───────┬───────►│ E │ + // └───┘ │ ┌►└───┘ + // ┌───┐ │ │ + // │ B ├┐ │ │ + // └───┘│ │ │ + // ┄┄┄┄┄│┄┄┄┄┄┄│┄┄┄┄┄┄│┄┄ + // │ ┌───┐└►┌───┐│ + // └►│ C ├─►│ D ├┘ + // └───┘ └───┘ + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 10); + let a = graph + .create_task_node("A", QueueFamilyType::Graphics, PhantomData) + .build(); + let b = graph + .create_task_node("B", QueueFamilyType::Graphics, PhantomData) + .build(); + let c = graph + .create_task_node("C", QueueFamilyType::Compute, PhantomData) + .build(); + let d = graph + .create_task_node("D", QueueFamilyType::Compute, PhantomData) + .build(); + let e = graph + .create_task_node("E", QueueFamilyType::Graphics, PhantomData) + .build(); + graph.add_edge(a, d).unwrap(); + graph.add_edge(a, e).unwrap(); + graph.add_edge(b, c).unwrap(); + graph.add_edge(c, d).unwrap(); + graph.add_edge(d, e).unwrap(); + + let graph = unsafe { graph.compile(compile_info.clone()) }.unwrap(); + + // TODO: This could be brought down to 3 submissions with task reordering. + assert_matches_instructions!( + graph, + ExecuteTask { node: b }, + SignalSemaphore { + semaphore_index: semaphore1, + stage_mask: ALL_COMMANDS, + }, + FlushSubmit, + Submit, + WaitSemaphore { + semaphore_index: semaphore1, + stage_mask: ALL_COMMANDS, + }, + ExecuteTask { node: c }, + FlushSubmit, + Submit, + ExecuteTask { node: a }, + SignalSemaphore { + semaphore_index: semaphore2, + stage_mask: ALL_COMMANDS, + }, + FlushSubmit, + Submit, + WaitSemaphore { + semaphore_index: semaphore2, + stage_mask: ALL_COMMANDS, + }, + ExecuteTask { node: d }, + SignalSemaphore { + semaphore_index: semaphore3, + stage_mask: ALL_COMMANDS, + }, + FlushSubmit, + Submit, + WaitSemaphore { + semaphore_index: semaphore3, + stage_mask: ALL_COMMANDS, + }, + ExecuteTask { node: e }, + FlushSubmit, + Submit, + ); + } + } + + #[test] + fn queue_family_ownership_transfer() { + let (resources, queues) = test_queues!(); + + let queue_family_properties = resources + .device() + .physical_device() + .queue_family_properties(); + let has_compute_only_queue = queues.iter().any(|q| { + let queue_flags = queue_family_properties[q.queue_family_index() as usize].queue_flags; + + queue_flags.contains(QueueFlags::COMPUTE) && !queue_flags.contains(QueueFlags::GRAPHICS) + }); + + if !has_compute_only_queue { + return; + } + + let compile_info = CompileInfo { + queues, + ..Default::default() + }; + + { + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 10); + let buffer1 = graph.add_buffer(&BufferCreateInfo::default()); + let buffer2 = graph.add_buffer(&BufferCreateInfo::default()); + let image1 = graph.add_image(&ImageCreateInfo::default()); + let image2 = graph.add_image(&ImageCreateInfo::default()); + let compute_node = graph + .create_task_node("", QueueFamilyType::Compute, PhantomData) + .buffer_access(buffer1, AccessType::ComputeShaderStorageWrite) + .buffer_access(buffer2, AccessType::ComputeShaderStorageRead) + .image_access( + image1, + AccessType::ComputeShaderStorageWrite, + ImageLayoutType::Optimal, + ) + .image_access( + image2, + AccessType::ComputeShaderSampledRead, + ImageLayoutType::Optimal, + ) + .build(); + let graphics_node = graph + .create_task_node("", QueueFamilyType::Graphics, PhantomData) + .buffer_access(buffer1, AccessType::IndexRead) + .buffer_access(buffer2, AccessType::VertexShaderSampledRead) + .image_access( + image1, + AccessType::VertexShaderSampledRead, + ImageLayoutType::General, + ) + .image_access( + image2, + AccessType::FragmentShaderSampledRead, + ImageLayoutType::General, + ) + .build(); + graph.add_edge(compute_node, graphics_node).unwrap(); + + let graph = unsafe { graph.compile(compile_info.clone()) }.unwrap(); + + assert_matches_instructions!( + graph, + ExecuteTask { + node: compute_node, + }, + SignalSemaphore { + semaphore_index: semaphore, + stage_mask: ALL_COMMANDS, + }, + PipelineBarrier { + buffer_barriers: [ + { + src_stage_mask: COMPUTE_SHADER, + src_access_mask: SHADER_STORAGE_WRITE, + dst_stage_mask: , + dst_access_mask: , + buffer: buffer1, + }, + { + src_stage_mask: COMPUTE_SHADER, + src_access_mask: , + dst_stage_mask: , + dst_access_mask: , + buffer: buffer2, + }, + ], + image_barriers: [ + { + src_stage_mask: COMPUTE_SHADER, + src_access_mask: SHADER_STORAGE_WRITE, + dst_stage_mask: , + dst_access_mask: , + old_layout: General, + new_layout: General, + image: image1, + }, + { + src_stage_mask: COMPUTE_SHADER, + src_access_mask: , + dst_stage_mask: , + dst_access_mask: , + old_layout: ShaderReadOnlyOptimal, + new_layout: General, + image: image2, + }, + ], + }, + FlushSubmit, + Submit, + WaitSemaphore { + semaphore_index: semaphore, + stage_mask: ALL_COMMANDS, + }, + PipelineBarrier { + buffer_barriers: [ + { + src_stage_mask: , + src_access_mask: , + dst_stage_mask: INDEX_INPUT, + dst_access_mask: INDEX_READ, + buffer: buffer1, + }, + { + src_stage_mask: , + src_access_mask: , + dst_stage_mask: VERTEX_SHADER, + dst_access_mask: SHADER_SAMPLED_READ, + buffer: buffer2, + }, + ], + image_barriers: [ + { + src_stage_mask: , + src_access_mask: , + dst_stage_mask: VERTEX_SHADER, + dst_access_mask: SHADER_SAMPLED_READ, + old_layout: General, + new_layout: General, + image: image1, + }, + { + src_stage_mask: , + src_access_mask: , + dst_stage_mask: FRAGMENT_SHADER, + dst_access_mask: SHADER_SAMPLED_READ, + old_layout: ShaderReadOnlyOptimal, + new_layout: General, + image: image2, + }, + ], + }, + ExecuteTask { + node: graphics_node, + }, + FlushSubmit, + Submit, + ); + } + + { + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 10); + let sharing = Sharing::Concurrent( + compile_info + .queues + .iter() + .map(|q| q.queue_family_index()) + .collect(), + ); + let buffer1 = graph.add_buffer(&BufferCreateInfo { + sharing: sharing.clone(), + ..Default::default() + }); + let buffer2 = graph.add_buffer(&BufferCreateInfo { + sharing: sharing.clone(), + ..Default::default() + }); + let image1 = graph.add_image(&ImageCreateInfo { + sharing: sharing.clone(), + ..Default::default() + }); + let image2 = graph.add_image(&ImageCreateInfo { + sharing: sharing.clone(), + ..Default::default() + }); + let compute_node = graph + .create_task_node("", QueueFamilyType::Compute, PhantomData) + .buffer_access(buffer1, AccessType::ComputeShaderStorageWrite) + .buffer_access(buffer2, AccessType::ComputeShaderStorageRead) + .image_access( + image1, + AccessType::ComputeShaderStorageWrite, + ImageLayoutType::Optimal, + ) + .image_access( + image2, + AccessType::ComputeShaderSampledRead, + ImageLayoutType::Optimal, + ) + .build(); + let graphics_node = graph + .create_task_node("", QueueFamilyType::Graphics, PhantomData) + .buffer_access(buffer1, AccessType::IndexRead) + .buffer_access(buffer2, AccessType::VertexShaderSampledRead) + .image_access( + image1, + AccessType::VertexShaderSampledRead, + ImageLayoutType::General, + ) + .image_access( + image2, + AccessType::FragmentShaderSampledRead, + ImageLayoutType::General, + ) + .build(); + graph.add_edge(compute_node, graphics_node).unwrap(); + + let graph = unsafe { graph.compile(compile_info.clone()) }.unwrap(); + + assert_matches_instructions!( + graph, + ExecuteTask { + node: compute_node, + }, + SignalSemaphore { + semaphore_index: semaphore, + stage_mask: ALL_COMMANDS, + }, + FlushSubmit, + Submit, + WaitSemaphore { + semaphore_index: semaphore, + stage_mask: ALL_COMMANDS, + }, + PipelineBarrier { + buffer_barriers: [], + image_barriers: [ + { + src_stage_mask: , + src_access_mask: , + dst_stage_mask: FRAGMENT_SHADER, + dst_access_mask: SHADER_SAMPLED_READ, + old_layout: ShaderReadOnlyOptimal, + new_layout: General, + image: image2, + }, + ], + }, + ExecuteTask { + node: graphics_node, + }, + FlushSubmit, + Submit, + ); + } + } + + #[test] + fn swapchain() { + let (resources, queues) = test_queues!(); + + let queue_family_properties = resources + .device() + .physical_device() + .queue_family_properties(); + + let present_queue = queues.iter().find(|q| { + let queue_flags = queue_family_properties[q.queue_family_index() as usize].queue_flags; + + queue_flags.contains(QueueFlags::GRAPHICS) + }); + let compile_info = CompileInfo { + queues: queues.clone(), + present_queue: Some(present_queue.unwrap().clone()), + ..Default::default() + }; + + { + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 10); + let swapchain1 = graph.add_swapchain(&SwapchainCreateInfo::default()); + let swapchain2 = graph.add_swapchain(&SwapchainCreateInfo::default()); + let node = graph + .create_task_node("", QueueFamilyType::Graphics, PhantomData) + .image_access( + swapchain1.current_image_id(), + AccessType::ColorAttachmentWrite, + ImageLayoutType::Optimal, + ) + .image_access( + swapchain2.current_image_id(), + AccessType::ComputeShaderStorageWrite, + ImageLayoutType::Optimal, + ) + .build(); + + let graph = unsafe { graph.compile(compile_info.clone()) }.unwrap(); + + assert_matches_instructions!( + graph, + WaitAcquire { + swapchain_id: swapchain1, + stage_mask: COLOR_ATTACHMENT_OUTPUT, + }, + WaitAcquire { + swapchain_id: swapchain2, + stage_mask: COMPUTE_SHADER, + }, + PipelineBarrier { + buffer_barriers: [], + image_barriers: [ + { + src_stage_mask: COLOR_ATTACHMENT_OUTPUT, + src_access_mask: , + dst_stage_mask: COLOR_ATTACHMENT_OUTPUT, + dst_access_mask: COLOR_ATTACHMENT_WRITE, + old_layout: Undefined, + new_layout: ColorAttachmentOptimal, + image: swapchain1, + }, + { + src_stage_mask: COMPUTE_SHADER, + src_access_mask: , + dst_stage_mask: COMPUTE_SHADER, + dst_access_mask: SHADER_STORAGE_WRITE, + old_layout: Undefined, + new_layout: General, + image: swapchain2, + }, + ], + }, + ExecuteTask { node: node }, + SignalPresent { + swapchain_id: swapchain1, + stage_mask: COLOR_ATTACHMENT_OUTPUT, + }, + SignalPresent { + swapchain_id: swapchain2, + stage_mask: COMPUTE_SHADER, + }, + PipelineBarrier { + buffer_barriers: [], + image_barriers: [ + { + src_stage_mask: COLOR_ATTACHMENT_OUTPUT, + src_access_mask: COLOR_ATTACHMENT_WRITE, + dst_stage_mask: , + dst_access_mask: , + old_layout: ColorAttachmentOptimal, + new_layout: PresentSrc, + image: swapchain1, + }, + { + src_stage_mask: COMPUTE_SHADER, + src_access_mask: SHADER_STORAGE_WRITE, + dst_stage_mask: , + dst_access_mask: , + old_layout: General, + new_layout: PresentSrc, + image: swapchain2, + }, + ], + }, + FlushSubmit, + Submit, + ); + } + + let present_queue = queues.iter().find(|q| { + let queue_flags = queue_family_properties[q.queue_family_index() as usize].queue_flags; + + queue_flags.contains(QueueFlags::COMPUTE) && !queue_flags.contains(QueueFlags::GRAPHICS) + }); + + if !present_queue.is_some() { + return; + } + + let compile_info = CompileInfo { + queues: queues.clone(), + present_queue: Some(present_queue.unwrap().clone()), + ..Default::default() + }; + + { + let mut graph = TaskGraph::<()>::new(resources.clone(), 10, 10); + let concurrent_sharing = Sharing::Concurrent( + compile_info + .queues + .iter() + .map(|q| q.queue_family_index()) + .collect(), + ); + let swapchain1 = graph.add_swapchain(&SwapchainCreateInfo::default()); + let swapchain2 = graph.add_swapchain(&SwapchainCreateInfo { + image_sharing: concurrent_sharing.clone(), + ..Default::default() + }); + let swapchain3 = graph.add_swapchain(&SwapchainCreateInfo::default()); + let swapchain4 = graph.add_swapchain(&SwapchainCreateInfo { + image_sharing: concurrent_sharing.clone(), + ..Default::default() + }); + let node = graph + .create_task_node("", QueueFamilyType::Graphics, PhantomData) + .image_access( + swapchain1.current_image_id(), + AccessType::ColorAttachmentWrite, + ImageLayoutType::Optimal, + ) + .image_access( + swapchain2.current_image_id(), + AccessType::ColorAttachmentWrite, + ImageLayoutType::Optimal, + ) + .image_access( + swapchain3.current_image_id(), + AccessType::ComputeShaderStorageWrite, + ImageLayoutType::Optimal, + ) + .image_access( + swapchain4.current_image_id(), + AccessType::ComputeShaderStorageWrite, + ImageLayoutType::Optimal, + ) + .build(); + + let graph = unsafe { graph.compile(compile_info.clone()) }.unwrap(); + + assert_matches_instructions!( + graph, + WaitAcquire { + swapchain_id: swapchain1, + stage_mask: COLOR_ATTACHMENT_OUTPUT, + }, + WaitAcquire { + swapchain_id: swapchain2, + stage_mask: COLOR_ATTACHMENT_OUTPUT, + }, + WaitAcquire { + swapchain_id: swapchain3, + stage_mask: COMPUTE_SHADER, + }, + WaitAcquire { + swapchain_id: swapchain4, + stage_mask: COMPUTE_SHADER, + }, + PipelineBarrier { + buffer_barriers: [], + image_barriers: [ + { + src_stage_mask: COLOR_ATTACHMENT_OUTPUT, + src_access_mask: , + dst_stage_mask: COLOR_ATTACHMENT_OUTPUT, + dst_access_mask: COLOR_ATTACHMENT_WRITE, + old_layout: Undefined, + new_layout: ColorAttachmentOptimal, + image: swapchain1, + }, + { + src_stage_mask: COLOR_ATTACHMENT_OUTPUT, + src_access_mask: , + dst_stage_mask: COLOR_ATTACHMENT_OUTPUT, + dst_access_mask: COLOR_ATTACHMENT_WRITE, + old_layout: Undefined, + new_layout: ColorAttachmentOptimal, + image: swapchain2, + }, + { + src_stage_mask: COMPUTE_SHADER, + src_access_mask: , + dst_stage_mask: COMPUTE_SHADER, + dst_access_mask: SHADER_STORAGE_WRITE, + old_layout: Undefined, + new_layout: General, + image: swapchain3, + }, + { + src_stage_mask: COMPUTE_SHADER, + src_access_mask: , + dst_stage_mask: COMPUTE_SHADER, + dst_access_mask: SHADER_STORAGE_WRITE, + old_layout: Undefined, + new_layout: General, + image: swapchain4, + }, + ], + }, + ExecuteTask { node: node }, + SignalPrePresent { + swapchain_id: swapchain1, + stage_mask: COLOR_ATTACHMENT_OUTPUT, + }, + SignalPresent { + swapchain_id: swapchain2, + stage_mask: COLOR_ATTACHMENT_OUTPUT, + }, + SignalPrePresent { + swapchain_id: swapchain3, + stage_mask: COMPUTE_SHADER, + }, + SignalPresent { + swapchain_id: swapchain4, + stage_mask: COMPUTE_SHADER, + }, + PipelineBarrier { + buffer_barriers: [], + image_barriers: [ + { + src_stage_mask: COLOR_ATTACHMENT_OUTPUT, + src_access_mask: COLOR_ATTACHMENT_WRITE, + dst_stage_mask: , + dst_access_mask: , + old_layout: ColorAttachmentOptimal, + new_layout: PresentSrc, + image: swapchain1, + }, + { + src_stage_mask: COLOR_ATTACHMENT_OUTPUT, + src_access_mask: COLOR_ATTACHMENT_WRITE, + dst_stage_mask: , + dst_access_mask: , + old_layout: ColorAttachmentOptimal, + new_layout: PresentSrc, + image: swapchain2, + }, + { + src_stage_mask: COMPUTE_SHADER, + src_access_mask: SHADER_STORAGE_WRITE, + dst_stage_mask: , + dst_access_mask: , + old_layout: General, + new_layout: PresentSrc, + image: swapchain3, + }, + { + src_stage_mask: COMPUTE_SHADER, + src_access_mask: SHADER_STORAGE_WRITE, + dst_stage_mask: , + dst_access_mask: , + old_layout: General, + new_layout: PresentSrc, + image: swapchain4, + }, + ], + }, + FlushSubmit, + Submit, + WaitPrePresent { + swapchain_id: swapchain1, + stage_mask: ALL_COMMANDS, + }, + SignalPresent { + swapchain_id: swapchain1, + stage_mask: ALL_COMMANDS, + }, + WaitPrePresent { + swapchain_id: swapchain3, + stage_mask: ALL_COMMANDS, + }, + SignalPresent { + swapchain_id: swapchain3, + stage_mask: ALL_COMMANDS, + }, + PipelineBarrier { + buffer_barriers: [], + image_barriers: [ + { + src_stage_mask: , + src_access_mask: , + dst_stage_mask: , + dst_access_mask: , + old_layout: ColorAttachmentOptimal, + new_layout: PresentSrc, + image: swapchain1, + }, + { + src_stage_mask: , + src_access_mask: , + dst_stage_mask: , + dst_access_mask: , + old_layout: General, + new_layout: PresentSrc, + image: swapchain3, + }, + ], + }, + FlushSubmit, + Submit, + ); + } + } + + struct MatchingState { + submission_index: usize, + instruction_index: usize, + semaphores: ahash::HashMap<&'static str, SemaphoreIndex>, + } + + macro_rules! assert_matches_instructions { + ( + $graph:ident, + $($arg:tt)+ + ) => { + let mut state = MatchingState { + submission_index: 0, + instruction_index: 0, + semaphores: Default::default(), + }; + assert_matches_instructions!(@ $graph, state, $($arg)+); + }; + ( + @ + $graph:ident, + $state:ident, + InitialPipelineBarrier { + buffer_barriers: [ + $({ + dst_stage_mask: $($buffer_dst_stage:ident)|*, + dst_access_mask: $($buffer_dst_access:ident)|*, + buffer: $buffer:ident, + },)* + ], + image_barriers: [ + $({ + dst_stage_mask: $($image_dst_stage:ident)|*, + dst_access_mask: $($image_dst_access:ident)|*, + new_layout: $image_new_layout:ident, + image: $image:ident, + },)* + ], + }, + $($arg:tt)* + ) => { + let submission = &$graph.submissions[$state.submission_index]; + let buffer_barrier_range = &submission.initial_buffer_barrier_range; + let image_barrier_range = &submission.initial_image_barrier_range; + + let buffer_barrier_range = + buffer_barrier_range.start as usize..buffer_barrier_range.end as usize; + let buffer_barriers = &$graph.buffer_barriers[buffer_barrier_range]; + #[allow(unused_mut)] + let mut buffer_barrier_count = 0; + $( + let barrier = buffer_barriers + .iter() + .find(|barrier| barrier.buffer == $buffer) + .unwrap(); + assert_eq!(barrier.src_stage_mask, PipelineStages::empty()); + assert_eq!(barrier.src_access_mask, AccessFlags::empty()); + assert_eq!( + barrier.dst_stage_mask, + PipelineStages::empty() $(| PipelineStages::$buffer_dst_stage)*, + ); + assert_eq!( + barrier.dst_access_mask, + AccessFlags::empty() $(| AccessFlags::$buffer_dst_access)*, + ); + buffer_barrier_count += 1; + )* + assert_eq!(buffer_barriers.len(), buffer_barrier_count); + + let image_barrier_range = + image_barrier_range.start as usize..image_barrier_range.end as usize; + let image_barriers = &$graph.image_barriers[image_barrier_range]; + #[allow(unused_mut)] + let mut image_barrier_count = 0; + $( + let barrier = image_barriers + .iter() + .find(|barrier| barrier.image == $image.erase()) + .unwrap(); + assert_eq!(barrier.src_stage_mask, PipelineStages::empty()); + assert_eq!(barrier.src_access_mask, AccessFlags::empty()); + assert_eq!( + barrier.dst_stage_mask, + PipelineStages::empty() $(| PipelineStages::$image_dst_stage)*, + ); + assert_eq!( + barrier.dst_access_mask, + AccessFlags::empty() $(| AccessFlags::$image_dst_access)*, + ); + assert_eq!(barrier.old_layout, ImageLayout::Undefined); + assert_eq!(barrier.new_layout, ImageLayout::$image_new_layout); + image_barrier_count += 1; + )* + assert_eq!(image_barriers.len(), image_barrier_count); + + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + WaitAcquire { + swapchain_id: $swapchain_id:expr, + stage_mask: $($stage:ident)|*, + }, + $($arg:tt)* + ) => { + assert!(matches!( + $graph.instructions[$state.instruction_index], + Instruction::WaitAcquire { + swapchain_id, + stage_mask, + } if swapchain_id == $swapchain_id + && stage_mask == PipelineStages::empty() $(| PipelineStages::$stage)*, + )); + $state.instruction_index += 1; + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + WaitSemaphore { + semaphore_index: $semaphore_index:ident, + stage_mask: $($stage:ident)|*, + }, + $($arg:tt)* + ) => { + assert!(matches!( + $graph.instructions[$state.instruction_index], + Instruction::WaitSemaphore { + stage_mask, + .. + } if stage_mask == PipelineStages::empty() $(| PipelineStages::$stage)*, + )); + let Instruction::WaitSemaphore { semaphore_index, .. } = + &$graph.instructions[$state.instruction_index] + else { + unreachable!(); + }; + + assert_eq!( + semaphore_index, + $state.semaphores.get(stringify!($semaphore_index)).unwrap(), + ); + + $state.instruction_index += 1; + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + ExecuteTask { + node: $node:expr $(,)? + }, + $($arg:tt)* + ) => { + assert!(matches!( + $graph.instructions[$state.instruction_index], + Instruction::ExecuteTask { node_index } if node_index == $node.index(), + )); + $state.instruction_index += 1; + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + PipelineBarrier { + buffer_barriers: [ + $({ + src_stage_mask: $($buffer_src_stage:ident)|*, + src_access_mask: $($buffer_src_access:ident)|*, + dst_stage_mask: $($buffer_dst_stage:ident)|*, + dst_access_mask: $($buffer_dst_access:ident)|*, + buffer: $buffer:ident, + },)* + ], + image_barriers: [ + $({ + src_stage_mask: $($image_src_stage:ident)|*, + src_access_mask: $($image_src_access:ident)|*, + dst_stage_mask: $($image_dst_stage:ident)|*, + dst_access_mask: $($image_dst_access:ident)|*, + old_layout: $image_old_layout:ident, + new_layout: $image_new_layout:ident, + image: $image:ident, + },)* + ], + }, + $($arg:tt)* + ) => { + assert!(matches!( + $graph.instructions[$state.instruction_index], + Instruction::PipelineBarrier { .. }, + )); + let Instruction::PipelineBarrier { buffer_barrier_range, image_barrier_range } = + &$graph.instructions[$state.instruction_index] + else { + unreachable!(); + }; + + let buffer_barrier_range = + buffer_barrier_range.start as usize..buffer_barrier_range.end as usize; + let buffer_barriers = &$graph.buffer_barriers[buffer_barrier_range]; + #[allow(unused_mut)] + let mut buffer_barrier_count = 0; + $( + let barrier = buffer_barriers + .iter() + .find(|barrier| barrier.buffer == $buffer) + .unwrap(); + assert_eq!( + barrier.src_stage_mask, + PipelineStages::empty() $(| PipelineStages::$buffer_src_stage)*, + ); + assert_eq!( + barrier.src_access_mask, + AccessFlags::empty() $(| AccessFlags::$buffer_src_access)*, + ); + assert_eq!( + barrier.dst_stage_mask, + PipelineStages::empty() $(| PipelineStages::$buffer_dst_stage)*, + ); + assert_eq!( + barrier.dst_access_mask, + AccessFlags::empty() $(| AccessFlags::$buffer_dst_access)*, + ); + buffer_barrier_count += 1; + )* + assert_eq!(buffer_barriers.len(), buffer_barrier_count); + + let image_barrier_range = + image_barrier_range.start as usize..image_barrier_range.end as usize; + let image_barriers = &$graph.image_barriers[image_barrier_range]; + #[allow(unused_mut)] + let mut image_barrier_count = 0; + $( + let barrier = image_barriers + .iter() + .find(|barrier| barrier.image == $image.erase()) + .unwrap(); + assert_eq!( + barrier.src_stage_mask, + PipelineStages::empty() $(| PipelineStages::$image_src_stage)*, + ); + assert_eq!( + barrier.src_access_mask, + AccessFlags::empty() $(| AccessFlags::$image_src_access)*, + ); + assert_eq!( + barrier.dst_stage_mask, + PipelineStages::empty() $(| PipelineStages::$image_dst_stage)*, + ); + assert_eq!( + barrier.dst_access_mask, + AccessFlags::empty() $(| AccessFlags::$image_dst_access)*, + ); + assert_eq!(barrier.old_layout, ImageLayout::$image_old_layout); + assert_eq!(barrier.new_layout, ImageLayout::$image_new_layout); + image_barrier_count += 1; + )* + assert_eq!(image_barriers.len(), image_barrier_count); + + $state.instruction_index += 1; + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + SignalSemaphore { + semaphore_index: $semaphore_index:ident, + stage_mask: $($stage:ident)|*, + }, + $($arg:tt)* + ) => { + assert!(matches!( + $graph.instructions[$state.instruction_index], + Instruction::SignalSemaphore { + stage_mask, + .. + } if stage_mask == PipelineStages::empty() $(| PipelineStages::$stage)*, + )); + let Instruction::SignalSemaphore { semaphore_index, .. } = + &$graph.instructions[$state.instruction_index] + else { + unreachable!(); + }; + + assert!($state.semaphores.get(&stringify!($semaphore_index)).is_none()); + $state.semaphores.insert(stringify!($semaphore_index), *semaphore_index); + + $state.instruction_index += 1; + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + SignalPrePresent { + swapchain_id: $swapchain_id:expr, + stage_mask: $($stage:ident)|*, + }, + $($arg:tt)* + ) => { + assert!(matches!( + $graph.instructions[$state.instruction_index], + Instruction::SignalPrePresent { + swapchain_id, + stage_mask, + } if swapchain_id == $swapchain_id + && stage_mask == PipelineStages::empty() $(| PipelineStages::$stage)*, + )); + $state.instruction_index += 1; + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + WaitPrePresent { + swapchain_id: $swapchain_id:expr, + stage_mask: $($stage:ident)|*, + }, + $($arg:tt)* + ) => { + assert!(matches!( + $graph.instructions[$state.instruction_index], + Instruction::WaitPrePresent { + swapchain_id, + stage_mask, + } if swapchain_id == $swapchain_id + && stage_mask == PipelineStages::empty() $(| PipelineStages::$stage)*, + )); + $state.instruction_index += 1; + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + SignalPresent { + swapchain_id: $swapchain_id:expr, + stage_mask: $($stage:ident)|*, + }, + $($arg:tt)* + ) => { + assert!(matches!( + $graph.instructions[$state.instruction_index], + Instruction::SignalPresent { + swapchain_id, + stage_mask, + } if swapchain_id == $swapchain_id + && stage_mask == PipelineStages::empty() $(| PipelineStages::$stage)*, + )); + $state.instruction_index += 1; + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + FlushSubmit, + $($arg:tt)* + ) => { + assert!(matches!( + $graph.instructions[$state.instruction_index], + Instruction::FlushSubmit, + )); + $state.instruction_index += 1; + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + Submit, + $($arg:tt)* + ) => { + assert!(matches!( + $graph.instructions[$state.instruction_index], + Instruction::Submit, + )); + $state.submission_index += 1; + $state.instruction_index += 1; + assert_matches_instructions!(@ $graph, $state, $($arg)*); + }; + ( + @ + $graph:ident, + $state:ident, + ) => { + assert_eq!($graph.submissions.len(), $state.submission_index); + assert_eq!($graph.instructions.len(), $state.instruction_index); + }; + } + use assert_matches_instructions; +} diff --git a/vulkano-taskgraph/src/graph/execute.rs b/vulkano-taskgraph/src/graph/execute.rs index 88565bd735..602fd27afe 100644 --- a/vulkano-taskgraph/src/graph/execute.rs +++ b/vulkano-taskgraph/src/graph/execute.rs @@ -1,13 +1,11 @@ use super::{ - BarrierIndex, ExecutableTaskGraph, ImageReference, Instruction, InstructionIndex, NodeIndex, - ResourceAccess, SemaphoreIndex, EXCLUSIVE_BIT, + BarrierIndex, ExecutableTaskGraph, Instruction, NodeIndex, ResourceAccess, SemaphoreIndex, }; use crate::{ resource::{ - BufferAccess, BufferState, DeathRow, Flight, FlightState, ImageAccess, ImageState, - Resources, SwapchainState, + BufferAccess, BufferState, DeathRow, ImageAccess, ImageState, Resources, SwapchainState, }, - Id, InvalidSlotError, TaskContext, TaskError, + Id, InvalidSlotError, ObjectType, TaskContext, TaskError, }; use ash::vk; use concurrent_slotmap::epoch; @@ -21,14 +19,14 @@ use std::{ sync::{atomic::Ordering, Arc}, }; use vulkano::{ - buffer::Buffer, + buffer::{Buffer, BufferMemory}, command_buffer::{ sys::{RawCommandBuffer, RawRecordingCommandBuffer}, CommandBufferBeginInfo, CommandBufferLevel, CommandBufferUsage, }, device::{Device, DeviceOwned, Queue}, image::Image, - swapchain::Swapchain, + swapchain::{AcquireNextImageInfo, AcquiredImage, Swapchain}, sync::{fence::Fence, semaphore::Semaphore, AccessFlags, PipelineStages}, Validated, Version, VulkanError, VulkanObject, }; @@ -45,17 +43,17 @@ impl ExecutableTaskGraph { /// # Panics /// /// - Panics if `resource_map` doesn't map the virtual resources of `self` exhaustively. - /// - Panics if `flight_id` is invalid. + /// - Panics if `self.flight_id()` is invalid. /// - Panics if another thread is already executing a task graph using the flight. - /// - Panics if the [current fence] of the flight wasn't waited on. /// - Panics if `resource_map` maps to any swapchain that isn't owned by the flight. + /// - Panics if the oldest frame of the flight wasn't [waited] on. /// - /// [current fence]: Flight::current_fence + /// [waited]: Flight::wait pub unsafe fn execute( &self, resource_map: ResourceMap<'_>, - flight_id: Id, world: &W, + pre_present_notify: impl FnOnce(), ) -> Result { assert!(ptr::eq( resource_map.virtual_resources, @@ -63,9 +61,15 @@ impl ExecutableTaskGraph { )); assert!(resource_map.is_exhaustive()); + let flight_id = self.flight_id; + // SAFETY: `resource_map` owns an `epoch::Guard`. - let flight = unsafe { resource_map.resources.flight_unprotected(flight_id) } - .expect("invalid flight"); + let flight = unsafe { + resource_map + .physical_resources + .flight_unprotected(flight_id) + } + .expect("invalid flight"); let mut flight_state = flight.state.try_lock().unwrap_or_else(|| { panic!( @@ -73,10 +77,9 @@ impl ExecutableTaskGraph { ); }); - let current_fence = flight.current_fence(); - + // TODO: This call is quite expensive. assert!( - current_fence.is_signaled()?, + flight.current_fence().read().is_signaled()?, "you must wait on the fence for the current frame before submitting more work", ); @@ -92,26 +95,32 @@ impl ExecutableTaskGraph { ); } - let current_frame = flight.current_frame(); + let current_frame_index = flight.current_frame_index(); + let death_row = &mut flight_state.death_rows[current_frame_index as usize]; - for object in flight_state.death_rows[current_frame as usize].drain(..) { + for object in death_row.drain(..) { // FIXME: drop(object); } // SAFETY: We checked that `resource_map` maps the virtual IDs exhaustively. - unsafe { self.acquire_images_khr(&resource_map, current_frame) }?; + unsafe { self.acquire_images_khr(&resource_map, current_frame_index) }?; + + let current_fence = flight.current_fence().write(); // SAFETY: We checked that the fence has been signalled. unsafe { current_fence.reset_unchecked() }?; + // SAFETY: We checked that `resource_map` maps the virtual IDs exhaustively. + unsafe { self.invalidate_mapped_memory_ranges(&resource_map) }?; + let mut state_guard = StateGuard { executable: self, resource_map: &resource_map, submission_count: 0, }; - let execute_instructions = if resource_map.device().enabled_features().synchronization2 { + let execute_instructions = if self.device().enabled_features().synchronization2 { Self::execute_instructions2 } else { Self::execute_instructions @@ -122,9 +131,9 @@ impl ExecutableTaskGraph { execute_instructions( self, &resource_map, - &mut flight_state, - current_frame, - current_fence, + death_row, + current_frame_index, + ¤t_fence, &mut state_guard.submission_count, world, ) @@ -132,16 +141,22 @@ impl ExecutableTaskGraph { mem::forget(state_guard); + for semaphore in self.semaphores.borrow().iter() { + death_row.push(semaphore.clone()); + } + unsafe { flight.next_frame() }; + pre_present_notify(); + // SAFETY: We checked that `resource_map` maps the virtual IDs exhaustively. - let res = unsafe { self.present_images_khr(&resource_map, current_frame) }; + let res = unsafe { self.present_images_khr(&resource_map, current_frame_index) }; // SAFETY: We checked that `resource_map` maps the virtual IDs exhaustively. - unsafe { self.update_resource_state(&resource_map, 0..self.instructions.len()) }; + unsafe { self.update_resource_state(&resource_map, &self.last_accesses) }; resource_map - .resources + .physical_resources .try_advance_global_and_collect(&resource_map.guard); res @@ -150,16 +165,13 @@ impl ExecutableTaskGraph { unsafe fn acquire_images_khr( &self, resource_map: &ResourceMap<'_>, - current_frame: u32, + current_frame_index: u32, ) -> Result { - let fns = resource_map.device().fns(); - let acquire_next_image_khr = fns.khr_swapchain.acquire_next_image_khr; - for &swapchain_id in &self.swapchains { // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs exhaustively. let swapchain_state = unsafe { resource_map.swapchain_unchecked(swapchain_id) }; let semaphore = - &swapchain_state.semaphores[current_frame as usize].image_available_semaphore; + &swapchain_state.semaphores[current_frame_index as usize].image_available_semaphore; // Make sure to not acquire another image index if we already acquired one. This can // happen when using multiple swapchains, if one acquire succeeds and another fails, or @@ -168,30 +180,79 @@ impl ExecutableTaskGraph { continue; } - let mut current_image_index = u32::MAX; - let result = unsafe { - acquire_next_image_khr( - resource_map.device().handle(), - swapchain_state.swapchain().handle(), - u64::MAX, - semaphore.handle(), - vk::Fence::null(), - &mut current_image_index, - ) + let res = unsafe { + swapchain_state + .swapchain() + .acquire_next_image(&AcquireNextImageInfo { + semaphore: Some(semaphore.clone()), + ..Default::default() + }) }; - // If an error occurred, this will set the index to `u32::MAX`. - swapchain_state - .current_image_index - .store(current_image_index, Ordering::Relaxed); + match res { + Ok(AcquiredImage { image_index, .. }) => { + swapchain_state + .current_image_index + .store(image_index, Ordering::Relaxed); + } + Err(error) => { + swapchain_state + .current_image_index + .store(u32::MAX, Ordering::Relaxed); + return Err(ExecuteError::Swapchain { + swapchain_id, + error, + }); + } + } + } - // These are the only possible success codes because we set the timeout to `u64::MAX`. - if !matches!(result, vk::Result::SUCCESS | vk::Result::SUBOPTIMAL_KHR) { - return Err(ExecuteError::Swapchain { - swapchain_id, - error: result.into(), - }); + Ok(()) + } + + unsafe fn invalidate_mapped_memory_ranges(&self, resource_map: &ResourceMap<'_>) -> Result { + let mut mapped_memory_ranges = Vec::new(); + + for &buffer_id in &self.graph.resources.host_reads { + // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs exhaustively. + let buffer = unsafe { resource_map.buffer_unchecked(buffer_id) }.buffer(); + + let allocation = match buffer.memory() { + BufferMemory::Normal(a) => a, + BufferMemory::Sparse => todo!("`TaskGraph` doesn't support sparse binding yet"), + BufferMemory::External => continue, + _ => unreachable!(), + }; + + if allocation.atom_size().is_none() { + continue; + } + + if unsafe { allocation.mapped_slice_unchecked(..) }.is_err() { + continue; } + + // This works because the memory allocator must align allocations to the non-coherent + // atom size when the memory is host-visible but not host-coherent. + mapped_memory_ranges.push( + vk::MappedMemoryRange::default() + .memory(allocation.device_memory().handle()) + .offset(allocation.offset()) + .size(allocation.size()), + ); + } + + if !mapped_memory_ranges.is_empty() { + let fns = self.device().fns(); + unsafe { + (fns.v1_0.invalidate_mapped_memory_ranges)( + self.device().handle(), + mapped_memory_ranges.len() as u32, + mapped_memory_ranges.as_ptr(), + ) + } + .result() + .map_err(VulkanError::from)?; } Ok(()) @@ -200,18 +261,17 @@ impl ExecutableTaskGraph { unsafe fn execute_instructions2( &self, resource_map: &ResourceMap<'_>, - flight_state: &mut FlightState, - current_frame: u32, + death_row: &mut DeathRow, + current_frame_index: u32, current_fence: &Fence, submission_count: &mut usize, world: &W, ) -> Result { - let death_row = &mut flight_state.death_rows[current_frame as usize]; let mut state = ExecuteState2::new( self, resource_map, death_row, - current_frame, + current_frame_index, current_fence, submission_count, world, @@ -220,7 +280,7 @@ impl ExecutableTaskGraph { for instruction in self.instructions.iter().cloned() { if execute_initial_barriers { - let submission = &state.executable.submissions[*state.submission_count]; + let submission = state.current_submission(); state.initial_pipeline_barrier( submission.initial_buffer_barrier_range.clone(), submission.initial_image_barrier_range.clone(), @@ -256,6 +316,18 @@ impl ExecutableTaskGraph { } => { state.signal_semaphore(semaphore_index, stage_mask); } + Instruction::SignalPrePresent { + swapchain_id, + stage_mask, + } => { + state.signal_pre_present(swapchain_id, stage_mask); + } + Instruction::WaitPrePresent { + swapchain_id, + stage_mask, + } => { + state.wait_pre_present(swapchain_id, stage_mask); + } Instruction::SignalPresent { swapchain_id, stage_mask, @@ -278,18 +350,17 @@ impl ExecutableTaskGraph { unsafe fn execute_instructions( &self, resource_map: &ResourceMap<'_>, - flight_state: &mut FlightState, - current_frame: u32, + death_row: &mut DeathRow, + current_frame_index: u32, current_fence: &Fence, submission_count: &mut usize, world: &W, ) -> Result { - let death_row = &mut flight_state.death_rows[current_frame as usize]; let mut state = ExecuteState::new( self, resource_map, death_row, - current_frame, + current_frame_index, current_fence, submission_count, world, @@ -298,7 +369,7 @@ impl ExecutableTaskGraph { for instruction in self.instructions.iter().cloned() { if execute_initial_barriers { - let submission = &state.executable.submissions[*state.submission_count]; + let submission = state.current_submission(); state.initial_pipeline_barrier( submission.initial_buffer_barrier_range.clone(), submission.initial_image_barrier_range.clone(), @@ -334,6 +405,18 @@ impl ExecutableTaskGraph { } => { state.signal_semaphore(semaphore_index, stage_mask); } + Instruction::SignalPrePresent { + swapchain_id, + stage_mask, + } => { + state.signal_pre_present(swapchain_id, stage_mask); + } + Instruction::WaitPrePresent { + swapchain_id, + stage_mask, + } => { + state.wait_pre_present(swapchain_id, stage_mask); + } Instruction::SignalPresent { swapchain_id, stage_mask, @@ -353,10 +436,58 @@ impl ExecutableTaskGraph { Ok(()) } + unsafe fn flush_mapped_memory_ranges(&self, resource_map: &ResourceMap<'_>) -> Result { + let mut mapped_memory_ranges = Vec::new(); + + for &buffer_id in &self.graph.resources.host_writes { + // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs exhaustively. + let buffer = unsafe { resource_map.buffer_unchecked(buffer_id) }.buffer(); + + let allocation = match buffer.memory() { + BufferMemory::Normal(a) => a, + BufferMemory::Sparse => todo!("`TaskGraph` doesn't support sparse binding yet"), + BufferMemory::External => continue, + _ => unreachable!(), + }; + + if allocation.atom_size().is_none() { + continue; + } + + if unsafe { allocation.mapped_slice_unchecked(..) }.is_err() { + continue; + } + + // This works because the memory allocator must align allocations to the non-coherent + // atom size when the memory is host-visible but not host-coherent. + mapped_memory_ranges.push( + vk::MappedMemoryRange::default() + .memory(allocation.device_memory().handle()) + .offset(allocation.offset()) + .size(allocation.size()), + ); + } + + if !mapped_memory_ranges.is_empty() { + let fns = self.device().fns(); + unsafe { + (fns.v1_0.flush_mapped_memory_ranges)( + self.device().handle(), + mapped_memory_ranges.len() as u32, + mapped_memory_ranges.as_ptr(), + ) + } + .result() + .map_err(VulkanError::from)?; + } + + Ok(()) + } + unsafe fn present_images_khr( &self, resource_map: &ResourceMap<'_>, - current_frame: u32, + current_frame_index: u32, ) -> Result { let Some(present_queue) = &self.present_queue else { return Ok(()); @@ -372,7 +503,7 @@ impl ExecutableTaskGraph { // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs exhaustively. let swapchain_state = unsafe { resource_map.swapchain_unchecked(swapchain_id) }; semaphores.push( - swapchain_state.semaphores[current_frame as usize] + swapchain_state.semaphores[current_frame_index as usize] .tasks_complete_semaphore .handle(), ); @@ -387,7 +518,7 @@ impl ExecutableTaskGraph { .image_indices(&image_indices) .results(&mut results); - let fns = resource_map.device().fns(); + let fns = self.device().fns(); let queue_present_khr = fns.khr_swapchain.queue_present_khr; let _ = unsafe { queue_present_khr(present_queue.handle(), &present_info) }; @@ -397,13 +528,8 @@ impl ExecutableTaskGraph { // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs exhaustively. let swapchain_state = unsafe { resource_map.swapchain_unchecked(swapchain_id) }; - unsafe { - swapchain_state.set_access( - 0..swapchain_state.swapchain().image_array_layers(), - // TODO: Could there be a use case for keeping the old image contents? - ImageAccess::NONE, - ) - }; + // TODO: Could there be a use case for keeping the old image contents? + unsafe { swapchain_state.set_access(ImageAccess::NONE) }; // In case of these error codes, the semaphore wait operation is not executed. if !matches!( @@ -422,7 +548,7 @@ impl ExecutableTaskGraph { if res.is_ok() { res = Err(ExecuteError::Swapchain { swapchain_id, - error: result.into(), + error: Validated::Error(result.into()), }); } } @@ -434,42 +560,48 @@ impl ExecutableTaskGraph { unsafe fn update_resource_state( &self, resource_map: &ResourceMap<'_>, - instruction_range: Range, + last_accesses: &[ResourceAccess], ) { - // TODO: This isn't particularly efficient. - for instruction in &self.instructions[instruction_range] { - let Instruction::ExecuteTask { node_index } = instruction else { - continue; - }; - let task_node = unsafe { self.graph.nodes.task_node_unchecked(*node_index) }; - let queue_family_index = task_node.queue_family_index; - - for resource_access in task_node.accesses.iter().cloned() { - match resource_access { - ResourceAccess::Buffer(a) => { - // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs - // exhaustively. - let state = unsafe { resource_map.buffer_unchecked(a.id) }; - let access = BufferAccess::new(a.access_type, queue_family_index); - unsafe { state.set_access(a.range, access) }; - } - ResourceAccess::Image(a) => { - // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs - // exhaustively. - let state = unsafe { resource_map.image_unchecked(a.id) }; - let access = - ImageAccess::new(a.access_type, a.layout_type, queue_family_index); - unsafe { state.set_access(a.subresource_range, access) }; - } - ResourceAccess::Swapchain(a) => { - // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs - // exhaustively. - let state = unsafe { resource_map.swapchain_unchecked(a.id) }; - let access = - ImageAccess::new(a.access_type, a.layout_type, queue_family_index); - unsafe { state.set_access(a.array_layers, access) }; - } + for (id, _) in self.graph.resources.iter() { + let access = last_accesses[id.index() as usize]; + + match id.object_type() { + ObjectType::Buffer => { + // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs + // exhaustively. + let state = unsafe { resource_map.buffer_unchecked(id.parametrize()) }; + let access = BufferAccess::from_masks( + access.stage_mask, + access.access_mask, + access.queue_family_index, + ); + unsafe { state.set_access(access) }; } + ObjectType::Image => { + // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs + // exhaustively. + let state = unsafe { resource_map.image_unchecked(id.parametrize()) }; + let access = ImageAccess::from_masks( + access.stage_mask, + access.access_mask, + access.image_layout, + access.queue_family_index, + ); + unsafe { state.set_access(access) }; + } + ObjectType::Swapchain => { + // SAFETY: The caller must ensure that `resource_map` maps the virtual IDs + // exhaustively. + let state = unsafe { resource_map.swapchain_unchecked(id.parametrize()) }; + let access = ImageAccess::from_masks( + access.stage_mask, + access.access_mask, + access.image_layout, + access.queue_family_index, + ); + unsafe { state.set_access(access) }; + } + _ => unreachable!(), } } } @@ -479,7 +611,7 @@ struct ExecuteState2<'a, W: ?Sized + 'static> { executable: &'a ExecutableTaskGraph, resource_map: &'a ResourceMap<'a>, death_row: &'a mut DeathRow, - current_frame: u32, + current_frame_index: u32, current_fence: &'a Fence, submission_count: &'a mut usize, world: &'a W, @@ -505,15 +637,15 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { executable: &'a ExecutableTaskGraph, resource_map: &'a ResourceMap<'a>, death_row: &'a mut DeathRow, - current_frame: u32, + current_frame_index: u32, current_fence: &'a Fence, submission_count: &'a mut usize, world: &'a W, ) -> Result { - let fns = resource_map.device().fns(); + let fns = executable.device().fns(); let (cmd_pipeline_barrier2, queue_submit2); - if resource_map.device().api_version() >= Version::V1_3 { + if executable.device().api_version() >= Version::V1_3 { cmd_pipeline_barrier2 = fns.v1_3.cmd_pipeline_barrier2; queue_submit2 = fns.v1_3.queue_submit2; } else { @@ -528,7 +660,7 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { executable, resource_map, death_row, - current_frame, + current_frame_index, current_fence, submission_count, world, @@ -543,6 +675,10 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { }) } + fn current_submission(&self) -> &super::Submission { + &self.executable.submissions[*self.submission_count] + } + fn initial_pipeline_barrier( &mut self, buffer_barrier_range: Range, @@ -550,101 +686,112 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { ) { self.convert_initial_buffer_barriers(buffer_barrier_range); self.convert_initial_image_barriers(image_barrier_range); - - unsafe { - (self.cmd_pipeline_barrier2)( - self.current_command_buffer.as_mut().unwrap().handle(), - &vk::DependencyInfo::default() - .buffer_memory_barriers(&self.current_buffer_barriers) - .image_memory_barriers(&self.current_image_barriers), - ) - }; - - self.current_buffer_barriers.clear(); - self.current_image_barriers.clear(); } fn convert_initial_buffer_barriers(&mut self, barrier_range: Range) { let barrier_range = barrier_range.start as usize..barrier_range.end as usize; + let queue_family_index = self.current_submission().queue.queue_family_index(); for barrier in &self.executable.buffer_barriers[barrier_range] { let state = unsafe { self.resource_map.buffer_unchecked(barrier.buffer) }; + let buffer = state.buffer(); + let access = state.access(); + let mut src_stage_mask = PipelineStages::empty(); + let mut src_access_mask = AccessFlags::empty(); + let dst_stage_mask = barrier.dst_stage_mask; + let mut dst_access_mask = barrier.dst_access_mask; + + if access.queue_family_index() == queue_family_index { + src_stage_mask = access.stage_mask(); + src_access_mask = access.access_mask(); + } - for (range, access) in state.accesses(barrier.range.clone()) { - self.current_buffer_barriers.push( - vk::BufferMemoryBarrier2::default() - .src_stage_mask(access.stage_mask().into()) - .src_access_mask(access.access_mask().into()) - .dst_stage_mask(barrier.dst_stage_mask.into()) - .dst_access_mask(barrier.dst_access_mask.into()) - // FIXME: - .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .buffer(state.buffer().handle()) - .offset(range.start) - .size(range.end - range.start), - ); + if src_access_mask.contains_writes() && dst_access_mask.contains_reads() { + } else if dst_access_mask.contains_writes() { + src_access_mask = AccessFlags::empty(); + dst_access_mask = AccessFlags::empty(); + } else { + continue; } + + self.current_buffer_barriers.push( + vk::BufferMemoryBarrier2::default() + .src_stage_mask(src_stage_mask.into()) + .src_access_mask(src_access_mask.into()) + .dst_stage_mask(dst_stage_mask.into()) + .dst_access_mask(dst_access_mask.into()) + // FIXME: + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .buffer(buffer.handle()) + .offset(0) + .size(buffer.size()), + ); } } fn convert_initial_image_barriers(&mut self, barrier_range: Range) { let barrier_range = barrier_range.start as usize..barrier_range.end as usize; + let queue_family_index = self.current_submission().queue.queue_family_index(); for barrier in &self.executable.image_barriers[barrier_range] { - match barrier.image { - ImageReference::Normal(image) => { - let state = unsafe { self.resource_map.image_unchecked(image) }; - - for (subresource_range, access) in - state.accesses(barrier.subresource_range.clone()) - { - self.current_image_barriers.push( - vk::ImageMemoryBarrier2::default() - .src_stage_mask(access.stage_mask().into()) - .src_access_mask(access.access_mask().into()) - .dst_stage_mask(barrier.dst_stage_mask.into()) - .dst_access_mask(barrier.dst_access_mask.into()) - .old_layout(access.image_layout().into()) - .new_layout(barrier.new_layout.into()) - // FIXME: - .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .image(state.image().handle()) - .subresource_range(subresource_range.into()), - ); - } + let (image, access) = match barrier.image.object_type() { + ObjectType::Image => { + let image_id = unsafe { barrier.image.parametrize() }; + let state = unsafe { self.resource_map.image_unchecked(image_id) }; + + (state.image(), state.access()) } - ImageReference::Swapchain(swapchain) => { - let state = unsafe { self.resource_map.swapchain_unchecked(swapchain) }; - - for (subresource_range, access) in - state.accesses(barrier.subresource_range.array_layers.clone()) - { - self.current_image_barriers.push( - vk::ImageMemoryBarrier2::default() - .src_stage_mask(access.stage_mask().into()) - .src_access_mask(access.access_mask().into()) - .dst_stage_mask(barrier.dst_stage_mask.into()) - .dst_access_mask(barrier.dst_access_mask.into()) - .old_layout(access.image_layout().into()) - .new_layout(barrier.new_layout.into()) - // FIXME: - .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .image(state.current_image().handle()) - .subresource_range(subresource_range.into()), - ); - } + ObjectType::Swapchain => { + let swapchain_id = unsafe { barrier.image.parametrize() }; + let state = unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }; + + (state.current_image(), state.access()) } + _ => unreachable!(), + }; + + let mut src_stage_mask = PipelineStages::empty(); + let mut src_access_mask = AccessFlags::empty(); + let dst_stage_mask = barrier.dst_stage_mask; + let mut dst_access_mask = barrier.dst_access_mask; + + if access.queue_family_index() == queue_family_index { + src_stage_mask = access.stage_mask(); + src_access_mask = access.access_mask(); + } + + #[allow(clippy::if_same_then_else)] + if access.image_layout() != barrier.new_layout { + } else if src_access_mask.contains_writes() && dst_access_mask.contains_reads() { + } else if dst_access_mask.contains_writes() { + src_access_mask = AccessFlags::empty(); + dst_access_mask = AccessFlags::empty(); + } else { + continue; } + + self.current_image_barriers.push( + vk::ImageMemoryBarrier2::default() + .src_stage_mask(src_stage_mask.into()) + .src_access_mask(src_access_mask.into()) + .dst_stage_mask(dst_stage_mask.into()) + .dst_access_mask(dst_access_mask.into()) + .old_layout(access.image_layout().into()) + .new_layout(barrier.new_layout.into()) + // FIXME: + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(image.handle()) + .subresource_range(image.subresource_range().into()), + ); } } fn wait_acquire(&mut self, swapchain_id: Id, stage_mask: PipelineStages) { let swapchain_state = unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }; - let semaphore = - &swapchain_state.semaphores[self.current_frame as usize].image_available_semaphore; + let semaphore = &swapchain_state.semaphores[self.current_frame_index as usize] + .image_available_semaphore; self.current_per_submit.wait_semaphore_infos.push( vk::SemaphoreSubmitInfo::default() @@ -662,16 +809,21 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { } fn execute_task(&mut self, node_index: NodeIndex) -> Result { + if !self.current_buffer_barriers.is_empty() || !self.current_image_barriers.is_empty() { + self.flush_barriers(); + } + let task_node = unsafe { self.executable.graph.nodes.task_node_unchecked(node_index) }; + let task = &task_node.task; + let current_command_buffer = self.current_command_buffer.as_mut().unwrap(); let mut context = TaskContext { resource_map: self.resource_map, death_row: Cell::new(Some(self.death_row)), - current_command_buffer: Cell::new(Some(self.current_command_buffer.as_mut().unwrap())), + current_frame_index: self.current_frame_index, command_buffers: Cell::new(Some(&mut self.command_buffers)), - accesses: &task_node.accesses, }; - unsafe { task_node.task.execute(&mut context, self.world) } + unsafe { task.execute(current_command_buffer, &mut context, self.world) } .map_err(|error| ExecuteError::Task { node_index, error })?; if !self.command_buffers.is_empty() { @@ -696,17 +848,7 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { self.convert_buffer_barriers(buffer_barrier_range); self.convert_image_barriers(image_barrier_range); - unsafe { - (self.cmd_pipeline_barrier2)( - self.current_command_buffer.as_mut().unwrap().handle(), - &vk::DependencyInfo::default() - .buffer_memory_barriers(&self.current_buffer_barriers) - .image_memory_barriers(&self.current_image_barriers), - ) - }; - - self.current_buffer_barriers.clear(); - self.current_image_barriers.clear(); + self.flush_barriers(); } fn convert_buffer_barriers(&mut self, barrier_range: Range) { @@ -714,6 +856,7 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { for barrier in &self.executable.buffer_barriers[barrier_range] { let state = unsafe { self.resource_map.buffer_unchecked(barrier.buffer) }; + let buffer = state.buffer(); self.current_buffer_barriers.push( vk::BufferMemoryBarrier2::default() @@ -723,9 +866,9 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { .dst_access_mask(barrier.dst_access_mask.into()) .src_queue_family_index(barrier.src_queue_family_index) .dst_queue_family_index(barrier.dst_queue_family_index) - .buffer(state.buffer().handle()) - .offset(barrier.range.start) - .size(barrier.range.end - barrier.range.start), + .buffer(buffer.handle()) + .offset(0) + .size(buffer.size()), ); } } @@ -734,13 +877,18 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { let barrier_range = barrier_range.start as usize..barrier_range.end as usize; for barrier in &self.executable.image_barriers[barrier_range] { - let image = match barrier.image { - ImageReference::Normal(image) => { - unsafe { self.resource_map.image_unchecked(image) }.image() + let image = match barrier.image.object_type() { + ObjectType::Image => { + let image_id = unsafe { barrier.image.parametrize() }; + + unsafe { self.resource_map.image_unchecked(image_id) }.image() } - ImageReference::Swapchain(swapchain) => { - unsafe { self.resource_map.swapchain_unchecked(swapchain) }.current_image() + ObjectType::Swapchain => { + let swapchain_id = unsafe { barrier.image.parametrize() }; + + unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }.current_image() } + _ => unreachable!(), }; self.current_image_barriers.push( @@ -754,7 +902,7 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { .src_queue_family_index(barrier.src_queue_family_index) .dst_queue_family_index(barrier.dst_queue_family_index) .image(image.handle()) - .subresource_range(barrier.subresource_range.clone().into()), + .subresource_range(image.subresource_range().into()), ); } } @@ -767,10 +915,34 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { ); } + fn signal_pre_present(&mut self, swapchain_id: Id, stage_mask: PipelineStages) { + let swapchain_state = unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }; + let semaphore = &swapchain_state.semaphores[self.current_frame_index as usize] + .pre_present_complete_semaphore; + + self.current_per_submit.signal_semaphore_infos.push( + vk::SemaphoreSubmitInfo::default() + .semaphore(semaphore.handle()) + .stage_mask(stage_mask.into()), + ); + } + + fn wait_pre_present(&mut self, swapchain_id: Id, stage_mask: PipelineStages) { + let swapchain_state = unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }; + let semaphore = &swapchain_state.semaphores[self.current_frame_index as usize] + .pre_present_complete_semaphore; + + self.current_per_submit.wait_semaphore_infos.push( + vk::SemaphoreSubmitInfo::default() + .semaphore(semaphore.handle()) + .stage_mask(stage_mask.into()), + ); + } + fn signal_present(&mut self, swapchain_id: Id, stage_mask: PipelineStages) { let swapchain_state = unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }; let semaphore = - &swapchain_state.semaphores[self.current_frame as usize].tasks_complete_semaphore; + &swapchain_state.semaphores[self.current_frame_index as usize].tasks_complete_semaphore; self.current_per_submit.signal_semaphore_infos.push( vk::SemaphoreSubmitInfo::default() @@ -779,6 +951,20 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { ); } + fn flush_barriers(&mut self) { + unsafe { + (self.cmd_pipeline_barrier2)( + self.current_command_buffer.as_ref().unwrap().handle(), + &vk::DependencyInfo::default() + .buffer_memory_barriers(&self.current_buffer_barriers) + .image_memory_barriers(&self.current_image_barriers), + ) + }; + + self.current_buffer_barriers.clear(); + self.current_image_barriers.clear(); + } + fn flush_submit(&mut self) -> Result { unsafe { self.flush_current_command_buffer() }?; @@ -789,7 +975,12 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { } fn submit(&mut self) -> Result { - let submission = &self.executable.submissions[*self.submission_count]; + unsafe { + self.executable + .flush_mapped_memory_ranges(self.resource_map) + }?; + + let submission = self.current_submission(); let mut submit_infos = SmallVec::<[_; 4]>::with_capacity(self.per_submits.len()); submit_infos.extend(self.per_submits.iter().map(|per_submit| { @@ -806,16 +997,18 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { vk::Fence::null() }; - unsafe { - (self.queue_submit2)( - submission.queue.handle(), - submit_infos.len() as u32, - submit_infos.as_ptr(), - fence_handle, - ) - } - .result() - .map_err(VulkanError::from)?; + submission.queue.with(|_guard| { + unsafe { + (self.queue_submit2)( + submission.queue.handle(), + submit_infos.len() as u32, + submit_infos.as_ptr(), + fence_handle, + ) + } + .result() + .map_err(VulkanError::from) + })?; *self.submission_count += 1; @@ -831,7 +1024,7 @@ impl<'a, W: ?Sized + 'static> ExecuteState2<'a, W> { self.death_row.push(Arc::new(command_buffer)); self.current_command_buffer = Some(create_command_buffer( self.resource_map, - &self.executable.submissions[*self.submission_count].queue, + &self.current_submission().queue, )?); } @@ -843,7 +1036,7 @@ struct ExecuteState<'a, W: ?Sized + 'static> { executable: &'a ExecutableTaskGraph, resource_map: &'a ResourceMap<'a>, death_row: &'a mut DeathRow, - current_frame: u32, + current_frame_index: u32, current_fence: &'a Fence, submission_count: &'a mut usize, world: &'a W, @@ -855,6 +1048,8 @@ struct ExecuteState<'a, W: ?Sized + 'static> { command_buffers: Vec>, current_buffer_barriers: Vec>, current_image_barriers: Vec>, + current_src_stage_mask: vk::PipelineStageFlags, + current_dst_stage_mask: vk::PipelineStageFlags, } #[derive(Default)] @@ -870,12 +1065,12 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { executable: &'a ExecutableTaskGraph, resource_map: &'a ResourceMap<'a>, death_row: &'a mut DeathRow, - current_frame: u32, + current_frame_index: u32, current_fence: &'a Fence, submission_count: &'a mut usize, world: &'a W, ) -> Result { - let fns = resource_map.device().fns(); + let fns = executable.device().fns(); let cmd_pipeline_barrier = fns.v1_0.cmd_pipeline_barrier; let queue_submit = fns.v1_0.queue_submit; @@ -886,7 +1081,7 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { executable, resource_map, death_row, - current_frame, + current_frame_index, current_fence, submission_count, world, @@ -898,150 +1093,130 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { command_buffers: Vec::new(), current_buffer_barriers: Vec::new(), current_image_barriers: Vec::new(), + current_src_stage_mask: vk::PipelineStageFlags::empty(), + current_dst_stage_mask: vk::PipelineStageFlags::empty(), }) } + fn current_submission(&self) -> &super::Submission { + &self.executable.submissions[*self.submission_count] + } + fn initial_pipeline_barrier( &mut self, buffer_barrier_range: Range, image_barrier_range: Range, ) { - let mut src_stage_mask = vk::PipelineStageFlags::empty(); - let mut dst_stage_mask = vk::PipelineStageFlags::empty(); - - self.convert_initial_buffer_barriers( - buffer_barrier_range, - &mut src_stage_mask, - &mut dst_stage_mask, - ); - self.convert_initial_image_barriers( - image_barrier_range, - &mut src_stage_mask, - &mut dst_stage_mask, - ); - - if src_stage_mask.is_empty() { - src_stage_mask = vk::PipelineStageFlags::TOP_OF_PIPE; - } - - if dst_stage_mask.is_empty() { - dst_stage_mask = vk::PipelineStageFlags::BOTTOM_OF_PIPE; - } - - unsafe { - (self.cmd_pipeline_barrier)( - self.current_command_buffer.as_mut().unwrap().handle(), - src_stage_mask, - dst_stage_mask, - vk::DependencyFlags::empty(), - 0, - ptr::null(), - self.current_buffer_barriers.len() as u32, - self.current_buffer_barriers.as_ptr(), - self.current_image_barriers.len() as u32, - self.current_image_barriers.as_ptr(), - ) - }; - - self.current_buffer_barriers.clear(); - self.current_image_barriers.clear(); + self.convert_initial_buffer_barriers(buffer_barrier_range); + self.convert_initial_image_barriers(image_barrier_range); } - fn convert_initial_buffer_barriers( - &mut self, - barrier_range: Range, - src_stage_mask: &mut vk::PipelineStageFlags, - dst_stage_mask: &mut vk::PipelineStageFlags, - ) { + fn convert_initial_buffer_barriers(&mut self, barrier_range: Range) { let barrier_range = barrier_range.start as usize..barrier_range.end as usize; + let queue_family_index = self.current_submission().queue.queue_family_index(); for barrier in &self.executable.buffer_barriers[barrier_range] { let state = unsafe { self.resource_map.buffer_unchecked(barrier.buffer) }; + let buffer = state.buffer(); + let access = state.access(); + let mut src_stage_mask = PipelineStages::empty(); + let mut src_access_mask = AccessFlags::empty(); + let dst_stage_mask = barrier.dst_stage_mask; + let mut dst_access_mask = barrier.dst_access_mask; + + if access.queue_family_index() == queue_family_index { + src_stage_mask = access.stage_mask(); + src_access_mask = access.access_mask(); + } - for (range, access) in state.accesses(barrier.range.clone()) { - self.current_buffer_barriers.push( - vk::BufferMemoryBarrier::default() - .src_access_mask(convert_access_mask(access.access_mask())) - .dst_access_mask(convert_access_mask(barrier.dst_access_mask)) - // FIXME: - .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .buffer(state.buffer().handle()) - .offset(range.start) - .size(range.end - range.start), - ); - - *src_stage_mask |= convert_stage_mask(access.stage_mask()); + if src_access_mask.contains_writes() && dst_access_mask.contains_reads() { + } else if dst_access_mask.contains_writes() { + src_access_mask = AccessFlags::empty(); + dst_access_mask = AccessFlags::empty(); + } else { + continue; } - *dst_stage_mask |= convert_stage_mask(barrier.dst_stage_mask); + self.current_buffer_barriers.push( + vk::BufferMemoryBarrier::default() + .src_access_mask(convert_access_mask(src_access_mask)) + .dst_access_mask(convert_access_mask(dst_access_mask)) + // FIXME: + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .buffer(buffer.handle()) + .offset(0) + .size(buffer.size()), + ); + + self.current_src_stage_mask |= convert_stage_mask(src_stage_mask); + self.current_dst_stage_mask |= convert_stage_mask(dst_stage_mask); } } - fn convert_initial_image_barriers( - &mut self, - barrier_range: Range, - src_stage_mask: &mut vk::PipelineStageFlags, - dst_stage_mask: &mut vk::PipelineStageFlags, - ) { + fn convert_initial_image_barriers(&mut self, barrier_range: Range) { let barrier_range = barrier_range.start as usize..barrier_range.end as usize; + let queue_family_index = self.current_submission().queue.queue_family_index(); for barrier in &self.executable.image_barriers[barrier_range] { - match barrier.image { - ImageReference::Normal(image) => { - let state = unsafe { self.resource_map.image_unchecked(image) }; - - for (subresource_range, access) in - state.accesses(barrier.subresource_range.clone()) - { - self.current_image_barriers.push( - vk::ImageMemoryBarrier::default() - .src_access_mask(convert_access_mask(access.access_mask())) - .dst_access_mask(convert_access_mask(barrier.dst_access_mask)) - .old_layout(access.image_layout().into()) - .new_layout(barrier.new_layout.into()) - // FIXME: - .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .image(state.image().handle()) - .subresource_range(subresource_range.into()), - ); - - *src_stage_mask |= convert_stage_mask(access.stage_mask()); - } + let (image, access) = match barrier.image.object_type() { + ObjectType::Image => { + let image_id = unsafe { barrier.image.parametrize() }; + let state = unsafe { self.resource_map.image_unchecked(image_id) }; + + (state.image(), state.access()) } - ImageReference::Swapchain(swapchain) => { - let state = unsafe { self.resource_map.swapchain_unchecked(swapchain) }; - - for (subresource_range, access) in - state.accesses(barrier.subresource_range.array_layers.clone()) - { - self.current_image_barriers.push( - vk::ImageMemoryBarrier::default() - .src_access_mask(convert_access_mask(access.access_mask())) - .dst_access_mask(convert_access_mask(barrier.dst_access_mask)) - .old_layout(access.image_layout().into()) - .new_layout(barrier.new_layout.into()) - // FIXME: - .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) - .image(state.current_image().handle()) - .subresource_range(subresource_range.into()), - ); - - *src_stage_mask |= convert_stage_mask(access.stage_mask()); - } + ObjectType::Swapchain => { + let swapchain_id = unsafe { barrier.image.parametrize() }; + let state = unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }; + + (state.current_image(), state.access()) } + _ => unreachable!(), + }; + + let mut src_stage_mask = PipelineStages::empty(); + let mut src_access_mask = AccessFlags::empty(); + let dst_stage_mask = barrier.dst_stage_mask; + let mut dst_access_mask = barrier.dst_access_mask; + + if access.queue_family_index() == queue_family_index { + src_stage_mask = access.stage_mask(); + src_access_mask = access.access_mask(); + } + + #[allow(clippy::if_same_then_else)] + if access.image_layout() != barrier.new_layout { + } else if src_access_mask.contains_writes() && dst_access_mask.contains_reads() { + } else if dst_access_mask.contains_writes() { + src_access_mask = AccessFlags::empty(); + dst_access_mask = AccessFlags::empty(); + } else { + continue; } - *dst_stage_mask |= convert_stage_mask(barrier.dst_stage_mask); + self.current_image_barriers.push( + vk::ImageMemoryBarrier::default() + .src_access_mask(convert_access_mask(src_access_mask)) + .dst_access_mask(convert_access_mask(dst_access_mask)) + .old_layout(access.image_layout().into()) + .new_layout(barrier.new_layout.into()) + // FIXME: + .src_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .dst_queue_family_index(vk::QUEUE_FAMILY_IGNORED) + .image(image.handle()) + .subresource_range(image.subresource_range().into()), + ); + + self.current_src_stage_mask |= convert_stage_mask(src_stage_mask); + self.current_dst_stage_mask |= convert_stage_mask(dst_stage_mask); } } fn wait_acquire(&mut self, swapchain_id: Id, stage_mask: PipelineStages) { let swapchain_state = unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }; - let semaphore = - &swapchain_state.semaphores[self.current_frame as usize].image_available_semaphore; + let semaphore = &swapchain_state.semaphores[self.current_frame_index as usize] + .image_available_semaphore; self.current_per_submit .wait_semaphores @@ -1061,16 +1236,21 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { } fn execute_task(&mut self, node_index: NodeIndex) -> Result { + if !self.current_buffer_barriers.is_empty() || !self.current_image_barriers.is_empty() { + self.flush_barriers(); + } + let task_node = unsafe { self.executable.graph.nodes.task_node_unchecked(node_index) }; + let task = &task_node.task; + let current_command_buffer = self.current_command_buffer.as_mut().unwrap(); let mut context = TaskContext { resource_map: self.resource_map, death_row: Cell::new(Some(self.death_row)), - current_command_buffer: Cell::new(Some(self.current_command_buffer.as_mut().unwrap())), + current_frame_index: self.current_frame_index, command_buffers: Cell::new(Some(&mut self.command_buffers)), - accesses: &task_node.accesses, }; - unsafe { task_node.task.execute(&mut context, self.world) } + unsafe { task.execute(current_command_buffer, &mut context, self.world) } .map_err(|error| ExecuteError::Task { node_index, error })?; if !self.command_buffers.is_empty() { @@ -1092,57 +1272,18 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { buffer_barrier_range: Range, image_barrier_range: Range, ) { - let mut src_stage_mask = vk::PipelineStageFlags::empty(); - let mut dst_stage_mask = vk::PipelineStageFlags::empty(); - - self.convert_buffer_barriers( - buffer_barrier_range, - &mut src_stage_mask, - &mut dst_stage_mask, - ); - self.convert_image_barriers( - image_barrier_range, - &mut src_stage_mask, - &mut dst_stage_mask, - ); - - if src_stage_mask.is_empty() { - src_stage_mask = vk::PipelineStageFlags::TOP_OF_PIPE; - } - - if dst_stage_mask.is_empty() { - dst_stage_mask = vk::PipelineStageFlags::BOTTOM_OF_PIPE; - } - - unsafe { - (self.cmd_pipeline_barrier)( - self.current_command_buffer.as_mut().unwrap().handle(), - src_stage_mask, - dst_stage_mask, - vk::DependencyFlags::empty(), - 0, - ptr::null(), - self.current_buffer_barriers.len() as u32, - self.current_buffer_barriers.as_ptr(), - self.current_image_barriers.len() as u32, - self.current_image_barriers.as_ptr(), - ) - }; + self.convert_buffer_barriers(buffer_barrier_range); + self.convert_image_barriers(image_barrier_range); - self.current_buffer_barriers.clear(); - self.current_image_barriers.clear(); + self.flush_barriers(); } - fn convert_buffer_barriers( - &mut self, - barrier_range: Range, - src_stage_mask: &mut vk::PipelineStageFlags, - dst_stage_mask: &mut vk::PipelineStageFlags, - ) { + fn convert_buffer_barriers(&mut self, barrier_range: Range) { let barrier_range = barrier_range.start as usize..barrier_range.end as usize; for barrier in &self.executable.buffer_barriers[barrier_range] { let state = unsafe { self.resource_map.buffer_unchecked(barrier.buffer) }; + let buffer = state.buffer(); self.current_buffer_barriers.push( vk::BufferMemoryBarrier::default() @@ -1151,31 +1292,31 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { .src_queue_family_index(barrier.src_queue_family_index) .dst_queue_family_index(barrier.dst_queue_family_index) .buffer(state.buffer().handle()) - .offset(barrier.range.start) - .size(barrier.range.end - barrier.range.start), + .offset(0) + .size(buffer.size()), ); - *src_stage_mask |= convert_stage_mask(barrier.src_stage_mask); - *dst_stage_mask |= convert_stage_mask(barrier.dst_stage_mask); + self.current_src_stage_mask |= convert_stage_mask(barrier.src_stage_mask); + self.current_dst_stage_mask |= convert_stage_mask(barrier.dst_stage_mask); } } - fn convert_image_barriers( - &mut self, - barrier_range: Range, - src_stage_mask: &mut vk::PipelineStageFlags, - dst_stage_mask: &mut vk::PipelineStageFlags, - ) { + fn convert_image_barriers(&mut self, barrier_range: Range) { let barrier_range = barrier_range.start as usize..barrier_range.end as usize; for barrier in &self.executable.image_barriers[barrier_range] { - let image = match barrier.image { - ImageReference::Normal(image) => { - unsafe { self.resource_map.image_unchecked(image) }.image() + let image = match barrier.image.object_type() { + ObjectType::Image => { + let image_id = unsafe { barrier.image.parametrize() }; + + unsafe { self.resource_map.image_unchecked(image_id) }.image() } - ImageReference::Swapchain(swapchain) => { - unsafe { self.resource_map.swapchain_unchecked(swapchain) }.current_image() + ObjectType::Swapchain => { + let swapchain_id = unsafe { barrier.image.parametrize() }; + + unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }.current_image() } + _ => unreachable!(), }; self.current_image_barriers.push( @@ -1187,11 +1328,11 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { .src_queue_family_index(barrier.src_queue_family_index) .dst_queue_family_index(barrier.dst_queue_family_index) .image(image.handle()) - .subresource_range(barrier.subresource_range.clone().into()), + .subresource_range(image.subresource_range().into()), ); - *src_stage_mask |= convert_stage_mask(barrier.src_stage_mask); - *dst_stage_mask |= convert_stage_mask(barrier.dst_stage_mask); + self.current_src_stage_mask |= convert_stage_mask(barrier.src_stage_mask); + self.current_dst_stage_mask |= convert_stage_mask(barrier.dst_stage_mask); } } @@ -1201,10 +1342,33 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { .push(self.executable.semaphores.borrow()[semaphore_index].handle()); } + fn signal_pre_present(&mut self, swapchain_id: Id, _stage_mask: PipelineStages) { + let swapchain_state = unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }; + let semaphore = &swapchain_state.semaphores[self.current_frame_index as usize] + .pre_present_complete_semaphore; + + self.current_per_submit + .signal_semaphores + .push(semaphore.handle()); + } + + fn wait_pre_present(&mut self, swapchain_id: Id, stage_mask: PipelineStages) { + let swapchain_state = unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }; + let semaphore = &swapchain_state.semaphores[self.current_frame_index as usize] + .pre_present_complete_semaphore; + + self.current_per_submit + .wait_semaphores + .push(semaphore.handle()); + self.current_per_submit + .wait_dst_stage_mask + .push(convert_stage_mask(stage_mask)); + } + fn signal_present(&mut self, swapchain_id: Id, _stage_mask: PipelineStages) { let swapchain_state = unsafe { self.resource_map.swapchain_unchecked(swapchain_id) }; let semaphore = - &swapchain_state.semaphores[self.current_frame as usize].tasks_complete_semaphore; + &swapchain_state.semaphores[self.current_frame_index as usize].tasks_complete_semaphore; self.current_per_submit .signal_semaphores @@ -1220,8 +1384,43 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { Ok(()) } + fn flush_barriers(&mut self) { + if self.current_src_stage_mask.is_empty() { + self.current_src_stage_mask = vk::PipelineStageFlags::TOP_OF_PIPE; + } + + if self.current_dst_stage_mask.is_empty() { + self.current_dst_stage_mask = vk::PipelineStageFlags::BOTTOM_OF_PIPE; + } + + unsafe { + (self.cmd_pipeline_barrier)( + self.current_command_buffer.as_ref().unwrap().handle(), + self.current_src_stage_mask, + self.current_dst_stage_mask, + vk::DependencyFlags::empty(), + 0, + ptr::null(), + self.current_buffer_barriers.len() as u32, + self.current_buffer_barriers.as_ptr(), + self.current_image_barriers.len() as u32, + self.current_image_barriers.as_ptr(), + ) + }; + + self.current_buffer_barriers.clear(); + self.current_image_barriers.clear(); + self.current_src_stage_mask = vk::PipelineStageFlags::empty(); + self.current_dst_stage_mask = vk::PipelineStageFlags::empty(); + } + fn submit(&mut self) -> Result { - let submission = &self.executable.submissions[*self.submission_count]; + unsafe { + self.executable + .flush_mapped_memory_ranges(self.resource_map) + }?; + + let submission = self.current_submission(); let mut submit_infos = SmallVec::<[_; 4]>::with_capacity(self.per_submits.len()); submit_infos.extend(self.per_submits.iter().map(|per_submit| { @@ -1239,16 +1438,18 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { vk::Fence::null() }; - unsafe { - (self.queue_submit)( - submission.queue.handle(), - submit_infos.len() as u32, - submit_infos.as_ptr(), - fence_handle, - ) - } - .result() - .map_err(VulkanError::from)?; + submission.queue.with(|_guard| { + unsafe { + (self.queue_submit)( + submission.queue.handle(), + submit_infos.len() as u32, + submit_infos.as_ptr(), + fence_handle, + ) + } + .result() + .map_err(VulkanError::from) + })?; *self.submission_count += 1; @@ -1264,7 +1465,7 @@ impl<'a, W: ?Sized + 'static> ExecuteState<'a, W> { self.death_row.push(Arc::new(command_buffer)); self.current_command_buffer = Some(create_command_buffer( self.resource_map, - &self.executable.submissions[*self.submission_count].queue, + &self.current_submission().queue, )?); } @@ -1279,7 +1480,10 @@ fn create_command_buffer( // SAFETY: The parameters are valid. unsafe { RawRecordingCommandBuffer::new_unchecked( - resource_map.resources.command_buffer_allocator().clone(), + resource_map + .physical_resources + .command_buffer_allocator() + .clone(), queue.queue_family_index(), CommandBufferLevel::Primary, CommandBufferBeginInfo { @@ -1367,7 +1571,7 @@ impl Drop for StateGuard<'_, W> { } } - let device = submissions[0].queue.device(); + let device = self.executable.device(); // But even after waiting for idle, the state of the graph is invalid because some // semaphores are still signalled, so we have to recreate them. @@ -1375,7 +1579,7 @@ impl Drop for StateGuard<'_, W> { // SAFETY: The parameters are valid. match unsafe { Semaphore::new_unchecked(device.clone(), Default::default()) } { Ok(new_semaphore) => { - let _ = mem::replace(semaphore, new_semaphore); + let _ = mem::replace(semaphore, Arc::new(new_semaphore)); } Err(err) => { if err == VulkanError::DeviceLost { @@ -1391,11 +1595,41 @@ impl Drop for StateGuard<'_, W> { } } + let mut last_accesses = + vec![ResourceAccess::default(); self.executable.graph.resources.capacity() as usize]; + let instruction_range = 0..submissions[self.submission_count - 1].instruction_range.end; + + // Determine the last accesses of resources up until before the failed submission. + for instruction in &self.executable.instructions[instruction_range] { + let Instruction::ExecuteTask { node_index } = instruction else { + continue; + }; + let task_node = unsafe { self.executable.graph.nodes.task_node_unchecked(*node_index) }; + + for (id, access) in task_node.accesses.iter() { + let prev_access = &mut last_accesses[id.index() as usize]; + let access = ResourceAccess { + queue_family_index: task_node.queue_family_index, + ..*access + }; + + if prev_access.queue_family_index != access.queue_family_index + || prev_access.image_layout != access.image_layout + || prev_access.access_mask.contains_writes() + || access.access_mask.contains_writes() + { + *prev_access = access; + } else { + prev_access.stage_mask |= access.stage_mask; + prev_access.access_mask |= access.access_mask; + } + } + } + + // Update the resource state with the correct last accesses. unsafe { - self.executable.update_resource_state( - self.resource_map, - 0..submissions[self.submission_count - 1].instruction_range.end, - ) + self.executable + .update_resource_state(self.resource_map, &last_accesses) }; } } @@ -1403,32 +1637,60 @@ impl Drop for StateGuard<'_, W> { /// Maps [virtual resources] to physical resources. pub struct ResourceMap<'a> { virtual_resources: &'a super::Resources, - resources: &'a Resources, + physical_resources: Arc, map: Vec<*const ()>, len: u32, guard: epoch::Guard<'a>, } impl<'a> ResourceMap<'a> { - /// Creates a new `ResourceMap` mapping the virtual resources of the given `executable` to - /// physical resources from the given `resources` collection. - /// - /// # Panics - /// - /// - Panics if the device of `executable` is not the same as that of `resources`. - pub fn new(executable: &'a ExecutableTaskGraph, resources: &'a Resources) -> Self { - assert_eq!(executable.device(), resources.device()); - + /// Creates a new `ResourceMap` mapping the virtual resources of the given `executable`. + pub fn new(executable: &'a ExecutableTaskGraph) -> Result { let virtual_resources = &executable.graph.resources; - let map = vec![ptr::null(); virtual_resources.capacity() as usize]; + let physical_resources = virtual_resources.physical_resources.clone(); + let mut map = vec![ptr::null(); virtual_resources.capacity() as usize]; + let guard = virtual_resources.physical_resources.pin(); + + for (&physical_id, &virtual_id) in &virtual_resources.physical_map { + // SAFETY: Virtual IDs inside the `physical_map` are always valid. + let slot = unsafe { map.get_unchecked_mut(virtual_id.index() as usize) }; + + *slot = match physical_id.object_type() { + // SAFETY: We own an `epoch::Guard`. + ObjectType::Buffer => <*const _>::cast(unsafe { + physical_resources.buffer_unprotected(physical_id.parametrize()) + }?), + // SAFETY: We own an `epoch::Guard`. + ObjectType::Image => <*const _>::cast(unsafe { + physical_resources.image_unprotected(physical_id.parametrize()) + }?), + // SAFETY: We own an `epoch::Guard`. + ObjectType::Swapchain => <*const _>::cast(unsafe { + physical_resources.swapchain_unprotected(physical_id.parametrize()) + }?), + _ => unreachable!(), + }; + } + + let len = virtual_resources.physical_map.len() as u32; - ResourceMap { + Ok(ResourceMap { virtual_resources, - resources, + physical_resources, map, - len: 0, - guard: resources.pin(), - } + len, + guard, + }) + } + + #[doc(hidden)] + #[inline] + pub fn insert( + &mut self, + virtual_id: Id, + physical_id: Id, + ) -> Result<(), InvalidSlotError> { + R::insert(self, virtual_id, physical_id) } /// Inserts a mapping from the [virtual buffer resource] corresponding to `virtual_id` to the @@ -1444,15 +1706,14 @@ impl<'a> ResourceMap<'a> { virtual_id: Id, physical_id: Id, ) -> Result<(), InvalidSlotError> { - let virtual_buffer = self.virtual_resources.buffer(virtual_id)?; + self.virtual_resources.get(virtual_id.erase())?; // SAFETY: We own an `epoch::Guard`. - let state = unsafe { self.resources.buffer_unprotected(physical_id) }?; + let state = unsafe { self.physical_resources.buffer_unprotected(physical_id) }?; - assert!(state.buffer().size() >= virtual_buffer.size); assert_eq!( state.buffer().sharing().is_exclusive(), - virtual_id.tag() & EXCLUSIVE_BIT != 0, + virtual_id.is_exclusive(), ); let ptr = <*const _>::cast(state); @@ -1494,7 +1755,10 @@ impl<'a> ResourceMap<'a> { // SAFETY: // * The caller must ensure that `physical_id` is a valid ID. // * We own an `epoch::Guard`. - let state = unsafe { self.resources.buffer_unchecked_unprotected(physical_id) }; + let state = unsafe { + self.physical_resources + .buffer_unchecked_unprotected(physical_id) + }; // SAFETY: The caller must ensure that `virtual_id` is a valid virtual ID, and since we // initialized `self.map` with a length at least that of `self.virtual_resources`, the @@ -1515,24 +1779,23 @@ impl<'a> ResourceMap<'a> { /// /// - Panics if the physical resource doesn't match the virtual resource. /// - Panics if the physical resource already has a mapping from another virtual resource. + /// - Panics if `virtual_id` refers to a swapchain image. #[inline] pub fn insert_image( &mut self, virtual_id: Id, physical_id: Id, ) -> Result<(), InvalidSlotError> { - let virtual_image = self.virtual_resources.image(virtual_id)?; + assert_ne!(virtual_id.object_type(), ObjectType::Swapchain); + + self.virtual_resources.get(virtual_id.erase())?; // SAFETY: We own an `epoch::Guard`. - let state = unsafe { self.resources.image_unprotected(physical_id) }?; + let state = unsafe { self.physical_resources.image_unprotected(physical_id) }?; - assert_eq!(state.image().flags(), virtual_image.flags); - assert_eq!(state.image().format(), virtual_image.format); - assert!(state.image().array_layers() >= virtual_image.array_layers); - assert!(state.image().mip_levels() >= virtual_image.mip_levels); assert_eq!( state.image().sharing().is_exclusive(), - virtual_id.tag() & EXCLUSIVE_BIT != 0, + virtual_id.is_exclusive(), ); let ptr = <*const _>::cast(state); @@ -1570,7 +1833,10 @@ impl<'a> ResourceMap<'a> { // SAFETY: // * The caller must ensure that `physical_id` is a valid ID. // * We own an `epoch::Guard`. - let state = unsafe { self.resources.image_unchecked_unprotected(physical_id) }; + let state = unsafe { + self.physical_resources + .image_unchecked_unprotected(physical_id) + }; // SAFETY: The caller must ensure that `virtual_id` is a valid virtual ID, and since we // initialized `self.map` with a length at least that of `self.virtual_resources`, the @@ -1597,12 +1863,15 @@ impl<'a> ResourceMap<'a> { virtual_id: Id, physical_id: Id, ) -> Result<(), InvalidSlotError> { - let virtual_swapchain = self.virtual_resources.swapchain(virtual_id)?; + self.virtual_resources.get(virtual_id.erase())?; // SAFETY: We own an `epoch::Guard`. - let state = unsafe { self.resources.swapchain_unprotected(physical_id) }?; + let state = unsafe { self.physical_resources.swapchain_unprotected(physical_id) }?; - assert!(state.swapchain().image_array_layers() >= virtual_swapchain.image_array_layers); + assert_eq!( + state.swapchain().image_sharing().is_exclusive(), + virtual_id.is_exclusive(), + ); let ptr = <*const _>::cast(state); let is_duplicate = self.map.iter().any(|&p| p == ptr); @@ -1643,7 +1912,10 @@ impl<'a> ResourceMap<'a> { // SAFETY: // * The caller must ensure that `physical_id` is a valid ID. // * We own an `epoch::Guard`. - let state = unsafe { self.resources.swapchain_unchecked_unprotected(physical_id) }; + let state = unsafe { + self.physical_resources + .swapchain_unchecked_unprotected(physical_id) + }; // SAFETY: The caller must ensure that `virtual_id` is a valid virtual ID, and since we // initialized `self.map` with a length at least that of `self.virtual_resources`, the @@ -1657,11 +1929,15 @@ impl<'a> ResourceMap<'a> { *slot = <*const _>::cast(state); } + pub(crate) fn virtual_resources(&self) -> &super::Resources { + self.virtual_resources + } + /// Returns the `Resources` collection. #[inline] #[must_use] - pub fn resources(&self) -> &'a Resources { - self.resources + pub fn resources(&self) -> &Arc { + &self.physical_resources } /// Returns the number of mappings in the map. @@ -1682,7 +1958,7 @@ impl<'a> ResourceMap<'a> { } pub(crate) unsafe fn buffer(&self, id: Id) -> Result<&BufferState, InvalidSlotError> { - self.virtual_resources.buffer(id)?; + self.virtual_resources.get(id.erase())?; // SAFETY: The caller must ensure that a mapping for `id` has been inserted. Ok(unsafe { self.buffer_unchecked(id) }) @@ -1697,7 +1973,7 @@ impl<'a> ResourceMap<'a> { } pub(crate) unsafe fn image(&self, id: Id) -> Result<&ImageState, InvalidSlotError> { - self.virtual_resources.image(id)?; + self.virtual_resources.get(id.erase())?; // SAFETY: The caller must ensure that a mapping for `id` has been inserted. Ok(unsafe { self.image_unchecked(id) }) @@ -1715,7 +1991,7 @@ impl<'a> ResourceMap<'a> { &self, id: Id, ) -> Result<&SwapchainState, InvalidSlotError> { - self.virtual_resources.swapchain(id)?; + self.virtual_resources.get(id.erase())?; // SAFETY: The caller must ensure that a mapping for `id` has been inserted. Ok(unsafe { self.swapchain_unchecked(id) }) @@ -1733,10 +2009,65 @@ impl<'a> ResourceMap<'a> { unsafe impl DeviceOwned for ResourceMap<'_> { #[inline] fn device(&self) -> &Arc { - self.resources.device() + self.physical_resources.device() + } +} + +pub trait Resource: Sized { + fn insert( + map: &mut ResourceMap<'_>, + virtual_id: Id, + physical_id: Id, + ) -> Result<(), InvalidSlotError>; +} + +impl Resource for Buffer { + fn insert( + map: &mut ResourceMap<'_>, + virtual_id: Id, + physical_id: Id, + ) -> Result<(), InvalidSlotError> { + map.insert_buffer(virtual_id, physical_id) } } +impl Resource for Image { + fn insert( + map: &mut ResourceMap<'_>, + virtual_id: Id, + physical_id: Id, + ) -> Result<(), InvalidSlotError> { + map.insert_image(virtual_id, physical_id) + } +} + +impl Resource for Swapchain { + fn insert( + map: &mut ResourceMap<'_>, + virtual_id: Id, + physical_id: Id, + ) -> Result<(), InvalidSlotError> { + map.insert_swapchain(virtual_id, physical_id) + } +} + +/// Creates a [`ResourceMap`] containing the given mappings. +#[macro_export] +macro_rules! resource_map { + ($executable:expr $(, $virtual_id:expr => $physical_id:expr)* $(,)?) => { + match $crate::graph::ResourceMap::new($executable) { + ::std::result::Result::Ok(mut map) => { + $(if let ::std::result::Result::Err(err) = map.insert($virtual_id, $physical_id) { + ::std::result::Result::Err(err) + } else)* { + ::std::result::Result::Ok::<_, $crate::InvalidSlotError>(map) + } + } + ::std::result::Result::Err(err) => ::std::result::Result::Err(err), + } + }; +} + type Result = ::std::result::Result; /// Error that can happen when [executing] an [`ExecutableTaskGraph`]. @@ -1750,7 +2081,7 @@ pub enum ExecuteError { }, Swapchain { swapchain_id: Id, - error: VulkanError, + error: Validated, }, VulkanError(VulkanError), } diff --git a/vulkano-taskgraph/src/graph/mod.rs b/vulkano-taskgraph/src/graph/mod.rs index e315a21a96..d5c707381f 100644 --- a/vulkano-taskgraph/src/graph/mod.rs +++ b/vulkano-taskgraph/src/graph/mod.rs @@ -1,33 +1,31 @@ //! The task graph data structure and associated types. -pub use self::execute::{ExecuteError, ResourceMap}; +pub use self::{ + compile::{CompileError, CompileErrorKind, CompileInfo}, + execute::{ExecuteError, ResourceMap}, +}; use crate::{ - resource::{AccessType, BufferRange, ImageLayoutType}, - Id, InvalidSlotError, QueueFamilyType, Task, BUFFER_TAG, IMAGE_TAG, SWAPCHAIN_TAG, + resource::{self, AccessType, Flight, HostAccessType, ImageLayoutType}, + Id, InvalidSlotError, Object, ObjectType, QueueFamilyType, Task, }; +use ahash::HashMap; +use ash::vk; use concurrent_slotmap::{IterMut, IterUnprotected, SlotId, SlotMap}; use smallvec::SmallVec; use std::{ - borrow::Cow, cell::RefCell, error::Error, fmt, hint, iter::FusedIterator, ops::Range, slice, - sync::Arc, + borrow::Cow, cell::RefCell, error::Error, fmt, hint, iter::FusedIterator, ops::Range, sync::Arc, }; use vulkano::{ buffer::{Buffer, BufferCreateInfo}, device::{Device, DeviceOwned, Queue}, - format::Format, - image::{ - Image, ImageAspects, ImageCreateFlags, ImageCreateInfo, ImageLayout, ImageSubresourceRange, - }, + image::{Image, ImageCreateInfo, ImageLayout}, swapchain::{Swapchain, SwapchainCreateInfo}, sync::{semaphore::Semaphore, AccessFlags, PipelineStages}, - DeviceSize, }; +mod compile; mod execute; -const EXCLUSIVE_BIT: u32 = 1 << 6; -const VIRTUAL_BIT: u32 = 1 << 7; - /// The task graph is a [directed acyclic graph] consisting of [`Task`] nodes, with edges /// representing happens-before relations. /// @@ -42,6 +40,8 @@ struct Nodes { } struct Node { + // TODO: + #[allow(unused)] name: Cow<'static, str>, inner: NodeInner, in_edges: Vec, @@ -51,53 +51,41 @@ struct Node { enum NodeInner { Task(TaskNode), // TODO: + #[allow(unused)] Semaphore, } type NodeIndex = u32; -struct Resources { - inner: SlotMap, -} - -#[derive(Clone, Copy)] -enum ResourceInfo { - Buffer(BufferInfo), - Image(ImageInfo), - Swapchain(SwapchainInfo), -} - -#[derive(Clone, Copy)] -struct BufferInfo { - size: DeviceSize, -} - -#[derive(Clone, Copy)] -struct ImageInfo { - flags: ImageCreateFlags, - format: Format, - array_layers: u32, - mip_levels: u32, -} - -#[derive(Clone, Copy)] -struct SwapchainInfo { - image_array_layers: u32, +pub(crate) struct Resources { + inner: SlotMap<()>, + physical_resources: Arc, + physical_map: HashMap, + host_reads: Vec>, + host_writes: Vec>, } impl TaskGraph { /// Creates a new `TaskGraph`. /// /// `max_nodes` is the maximum number of nodes the graph can ever have. `max_resources` is the - /// maximum number of resources the graph can ever have. + /// maximum number of virtual resources the graph can ever have. #[must_use] - pub fn new(max_nodes: u32, max_resources: u32) -> Self { + pub fn new( + physical_resources: Arc, + max_nodes: u32, + max_resources: u32, + ) -> Self { TaskGraph { nodes: Nodes { inner: SlotMap::new(max_nodes), }, resources: Resources { inner: SlotMap::new(max_resources), + physical_resources, + physical_map: HashMap::default(), + host_reads: Vec::new(), + host_writes: Vec::new(), }, } } @@ -124,7 +112,7 @@ impl TaskGraph { TaskNodeBuilder { id, task_node, - resources: &self.resources, + resources: &mut self.resources, } } @@ -221,6 +209,15 @@ impl TaskGraph { pub fn add_swapchain(&mut self, create_info: &SwapchainCreateInfo) -> Id { self.resources.add_swapchain(create_info) } + + /// Adds a host buffer access to this task graph. + /// + /// # Panics + /// + /// - Panics if `id` is not a valid virtual resource ID nor a valid physical ID. + pub fn add_host_buffer_access(&mut self, id: Id, access_type: HostAccessType) { + self.resources.add_host_buffer_access(id, access_type) + } } impl Nodes { @@ -363,147 +360,161 @@ impl Nodes { impl Resources { fn add_buffer(&mut self, create_info: &BufferCreateInfo) -> Id { - let resource_info = ResourceInfo::Buffer(BufferInfo { - size: create_info.size, - }); - let mut tag = BUFFER_TAG | VIRTUAL_BIT; + let mut tag = Buffer::TAG | Id::VIRTUAL_BIT; if create_info.sharing.is_exclusive() { - tag |= EXCLUSIVE_BIT; + tag |= Id::EXCLUSIVE_BIT; } - let slot = self.inner.insert_with_tag_mut(resource_info, tag); + let slot = self.inner.insert_with_tag_mut((), tag); - Id::new(slot) + unsafe { Id::new(slot) } } fn add_image(&mut self, create_info: &ImageCreateInfo) -> Id { - let resource_info = ResourceInfo::Image(ImageInfo { - flags: create_info.flags, - format: create_info.format, - array_layers: create_info.array_layers, - mip_levels: create_info.mip_levels, - }); - let mut tag = IMAGE_TAG | VIRTUAL_BIT; + let mut tag = Image::TAG | Id::VIRTUAL_BIT; if create_info.sharing.is_exclusive() { - tag |= EXCLUSIVE_BIT; + tag |= Id::EXCLUSIVE_BIT; } - let slot = self.inner.insert_with_tag_mut(resource_info, tag); + let slot = self.inner.insert_with_tag_mut((), tag); - Id::new(slot) + unsafe { Id::new(slot) } } fn add_swapchain(&mut self, create_info: &SwapchainCreateInfo) -> Id { - let resource_info = ResourceInfo::Swapchain(SwapchainInfo { - image_array_layers: create_info.image_array_layers, - }); - let tag = SWAPCHAIN_TAG | VIRTUAL_BIT; + let mut tag = Swapchain::TAG | Id::VIRTUAL_BIT; - let slot = self.inner.insert_with_tag_mut(resource_info, tag); + if create_info.image_sharing.is_exclusive() { + tag |= Id::EXCLUSIVE_BIT; + } - Id::new(slot) - } + let slot = self.inner.insert_with_tag_mut((), tag); - fn capacity(&self) -> u32 { - self.inner.capacity() + unsafe { Id::new(slot) } } - fn len(&self) -> u32 { - self.inner.len() + fn add_physical_buffer( + &mut self, + physical_id: Id, + ) -> Result, InvalidSlotError> { + let physical_resources = self.physical_resources.clone(); + let buffer_state = physical_resources.buffer(physical_id)?; + let buffer = buffer_state.buffer(); + let virtual_id = self.add_buffer(&BufferCreateInfo { + sharing: buffer.sharing().clone(), + ..Default::default() + }); + self.physical_map + .insert(physical_id.erase(), virtual_id.erase()); + + Ok(virtual_id) } - fn buffer(&self, id: Id) -> Result<&BufferInfo, InvalidSlotError> { - // SAFETY: We never modify the map concurrently. - let resource_info = - unsafe { self.inner.get_unprotected(id.slot) }.ok_or(InvalidSlotError::new(id))?; + fn add_physical_image( + &mut self, + physical_id: Id, + ) -> Result, InvalidSlotError> { + let physical_resources = self.physical_resources.clone(); + let image_state = physical_resources.image(physical_id)?; + let image = image_state.image(); + let virtual_id = self.add_image(&ImageCreateInfo { + sharing: image.sharing().clone(), + ..Default::default() + }); + self.physical_map + .insert(physical_id.erase(), virtual_id.erase()); - if let ResourceInfo::Buffer(buffer) = resource_info { - Ok(buffer) - } else { - // SAFETY: The `get_unprotected` call above already successfully compared the tag, so - // there is no need to check it again. We always ensure that buffer IDs get tagged with - // the `BUFFER_TAG`. - unsafe { hint::unreachable_unchecked() } - } + Ok(virtual_id) } - unsafe fn buffer_unchecked(&self, id: Id) -> &BufferInfo { - // SAFETY: - // * The caller must ensure that the `id` is valid. - // * We never modify the map concurrently. - let resource_info = unsafe { self.inner.index_unchecked_unprotected(id.index()) }; + fn add_physical_swapchain( + &mut self, + id: Id, + ) -> Result, InvalidSlotError> { + let physical_resources = self.physical_resources.clone(); + let swapchain_state = physical_resources.swapchain(id)?; + let swapchain = swapchain_state.swapchain(); + let virtual_id = self.add_swapchain(&SwapchainCreateInfo { + image_sharing: swapchain.image_sharing().clone(), + ..Default::default() + }); + self.physical_map.insert(id.erase(), virtual_id.erase()); - if let ResourceInfo::Buffer(buffer) = resource_info { - buffer + Ok(virtual_id) + } + + fn add_host_buffer_access(&mut self, mut id: Id, access_type: HostAccessType) { + if id.is_virtual() { + self.get(id.erase()).expect("invalid buffer"); + } else if let Some(&virtual_id) = self.physical_map.get(&id.erase()) { + id = unsafe { virtual_id.parametrize() }; } else { - // SAFETY: The caller must ensure that the `id` is valid. - unsafe { hint::unreachable_unchecked() } + id = self.add_physical_buffer(id).expect("invalid buffer"); } - } - fn image(&self, id: Id) -> Result<&ImageInfo, InvalidSlotError> { - // SAFETY: We never modify the map concurrently. - let resource_info = - unsafe { self.inner.get_unprotected(id.slot) }.ok_or(InvalidSlotError::new(id))?; + let host_accesses = match access_type { + HostAccessType::Read => &mut self.host_reads, + HostAccessType::Write => &mut self.host_writes, + }; - if let ResourceInfo::Image(image) = resource_info { - Ok(image) - } else { - // SAFETY: The `get_unprotected` call above already successfully compared the tag, so - // there is no need to check it again. We always ensure that image IDs get tagged with - // the `IMAGE_TAG`. - unsafe { hint::unreachable_unchecked() } + if !host_accesses.contains(&id) { + host_accesses.push(id); } } - unsafe fn image_unchecked(&self, id: Id) -> &ImageInfo { - // SAFETY: - // * The caller must ensure that the `index` is valid. - // * We never modify the map concurrently. - let resource_info = unsafe { self.inner.index_unchecked_unprotected(id.index()) }; + fn capacity(&self) -> u32 { + self.inner.capacity() + } - if let ResourceInfo::Image(image) = resource_info { - image - } else { - // SAFETY: The caller must ensure that the `id` is valid. - unsafe { hint::unreachable_unchecked() } - } + fn len(&self) -> u32 { + self.inner.len() } - fn swapchain(&self, id: Id) -> Result<&SwapchainInfo, InvalidSlotError> { + fn get(&self, id: Id) -> Result<&(), InvalidSlotError> { // SAFETY: We never modify the map concurrently. - let resource_info = - unsafe { self.inner.get_unprotected(id.slot) }.ok_or(InvalidSlotError::new(id))?; - - if let ResourceInfo::Swapchain(swapchain) = resource_info { - Ok(swapchain) - } else { - // SAFETY: The `get_unprotected` call above already successfully compared the tag, so - // there is no need to check it again. We always ensure that swapchain IDs get tagged - // with the `SWAPCHAIN_TAG`. - unsafe { hint::unreachable_unchecked() } - } + unsafe { self.inner.get_unprotected(id.slot) }.ok_or(InvalidSlotError::new(id)) } - unsafe fn swapchain_unchecked(&self, id: Id) -> &SwapchainInfo { - // SAFETY: - // * The caller must ensure that the `index` is valid. - // * We never modify the map concurrently. - let resource_info = unsafe { self.inner.index_unchecked_unprotected(id.index()) }; + fn iter(&self) -> impl Iterator { + // SAFETY: We never modify the map concurrently. + unsafe { self.inner.iter_unprotected() }.map(|(slot, v)| (unsafe { Id::new(slot) }, v)) + } - if let ResourceInfo::Swapchain(swapchain) = resource_info { - swapchain - } else { - // SAFETY: The caller must ensure that the `id` is valid. - unsafe { hint::unreachable_unchecked() } + pub(crate) fn contains_host_buffer_access( + &self, + mut id: Id, + access_type: HostAccessType, + ) -> bool { + if !id.is_virtual() { + if let Some(&virtual_id) = self.physical_map.get(&id.erase()) { + id = unsafe { virtual_id.parametrize() }; + } else { + return false; + } } + + let host_accesses = match access_type { + HostAccessType::Read => &self.host_reads, + HostAccessType::Write => &self.host_writes, + }; + + host_accesses.contains(&id) } +} - fn iter(&self) -> IterUnprotected<'_, ResourceInfo> { - // SAFETY: We never modify the map concurrently. - unsafe { self.inner.iter_unprotected() } +impl fmt::Debug for TaskGraph { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // FIXME: + f.debug_struct("TaskGraph").finish_non_exhaustive() + } +} + +unsafe impl DeviceOwned for TaskGraph { + #[inline] + fn device(&self) -> &Arc { + self.resources.physical_resources.device() } } @@ -540,38 +551,15 @@ pub struct TaskNode { } pub(crate) struct ResourceAccesses { - inner: Vec, + inner: Vec<(Id, ResourceAccess)>, } -// TODO: Literally anything else -#[derive(Clone)] -enum ResourceAccess { - Buffer(BufferAccess), - Image(ImageAccess), - Swapchain(SwapchainAccess), -} - -#[derive(Clone)] -struct BufferAccess { - id: Id, - range: BufferRange, - access_type: AccessType, -} - -#[derive(Clone)] -struct ImageAccess { - id: Id, - subresource_range: ImageSubresourceRange, - access_type: AccessType, - layout_type: ImageLayoutType, -} - -#[derive(Clone)] -struct SwapchainAccess { - id: Id, - array_layers: Range, - access_type: AccessType, - layout_type: ImageLayoutType, +#[derive(Clone, Copy, Default)] +struct ResourceAccess { + stage_mask: PipelineStages, + access_mask: AccessFlags, + image_layout: ImageLayout, + queue_family_index: u32, } impl TaskNode { @@ -605,113 +593,46 @@ impl TaskNode { pub fn task_mut(&mut self) -> &mut dyn Task { &mut *self.task } - - /// Returns `true` if the task node has access of the given `access_type` to the buffer - /// corresponding to `id` where the given `range` is contained within the access's range. - #[inline] - #[must_use] - pub fn contains_buffer_access( - &self, - id: Id, - range: BufferRange, - access_type: AccessType, - ) -> bool { - self.accesses.contains_buffer_access(id, range, access_type) - } - - /// Returns `true` if the task node has access of the given `access_type` and `layout_type` to - /// the image corresponding to `id` where the given `subresource_range` is contained within - /// the access's subresource range. - #[inline] - #[must_use] - pub fn contains_image_access( - &self, - id: Id, - subresource_range: ImageSubresourceRange, - access_type: AccessType, - layout_type: ImageLayoutType, - ) -> bool { - self.accesses - .contains_image_access(id, subresource_range, access_type, layout_type) - } - - /// Returns `true` if the task node has access of the given `access_type` and `layout_type` to - /// the swapchain corresponding to `id` where the given `array_layers` are contained within - /// the access's array layers. - #[inline] - #[must_use] - pub fn contains_swapchain_access( - &self, - id: Id, - array_layers: Range, - access_type: AccessType, - layout_type: ImageLayoutType, - ) -> bool { - self.accesses - .contains_swapchain_access(id, array_layers, access_type, layout_type) - } } impl ResourceAccesses { - fn iter(&self) -> slice::Iter<'_, ResourceAccess> { - self.inner.iter() - } + fn get_mut( + &mut self, + resources: &mut Resources, + mut id: Id, + ) -> Result<(Id, Option<&mut ResourceAccess>), InvalidSlotError> { + if id.is_virtual() { + resources.get(id)?; + } else if let Some(&virtual_id) = resources.physical_map.get(&id) { + id = virtual_id; + } else { + id = match id.object_type() { + ObjectType::Buffer => resources + .add_physical_buffer(unsafe { id.parametrize() })? + .erase(), + ObjectType::Image => resources + .add_physical_image(unsafe { id.parametrize() })? + .erase(), + ObjectType::Swapchain => resources + .add_physical_swapchain(unsafe { id.parametrize() })? + .erase(), + _ => unreachable!(), + }; + } - pub(crate) fn contains_buffer_access( - &self, - id: Id, - range: BufferRange, - access_type: AccessType, - ) -> bool { - debug_assert!(!range.is_empty()); + let access = self + .iter_mut() + .find_map(|(x, access)| (x == id).then_some(access)); - self.iter().any(|resource_access| { - matches!(resource_access, ResourceAccess::Buffer(a) if a.id == id - && a.access_type == access_type - && a.range.start <= range.start - && range.end <= a.range.end) - }) + Ok((id, access)) } - pub(crate) fn contains_image_access( - &self, - id: Id, - subresource_range: ImageSubresourceRange, - access_type: AccessType, - layout_type: ImageLayoutType, - ) -> bool { - debug_assert!(!subresource_range.aspects.is_empty()); - debug_assert!(!subresource_range.mip_levels.is_empty()); - debug_assert!(!subresource_range.array_layers.is_empty()); - - self.iter().any(|resource_access| { - matches!(resource_access, ResourceAccess::Image(a) if a.id == id - && a.access_type == access_type - && a.layout_type == layout_type - && a.subresource_range.aspects.contains(subresource_range.aspects) - && a.subresource_range.mip_levels.start <= subresource_range.mip_levels.start - && subresource_range.mip_levels.end <= a.subresource_range.mip_levels.end - && a.subresource_range.array_layers.start <= subresource_range.array_layers.start - && subresource_range.array_layers.end <= a.subresource_range.array_layers.end) - }) - } - - pub(crate) fn contains_swapchain_access( - &self, - id: Id, - array_layers: Range, - access_type: AccessType, - layout_type: ImageLayoutType, - ) -> bool { - debug_assert!(!array_layers.is_empty()); + fn iter(&self) -> impl Iterator { + self.inner.iter().map(|(id, access)| (*id, access)) + } - self.iter().any(|resource_access| { - matches!(resource_access, ResourceAccess::Swapchain(a) if a.id == id - && a.access_type == access_type - && a.layout_type == layout_type - && a.array_layers.start <= array_layers.start - && array_layers.end <= a.array_layers.end) - }) + fn iter_mut(&mut self) -> impl Iterator { + self.inner.iter_mut().map(|(id, access)| (*id, access)) } } @@ -719,7 +640,7 @@ impl ResourceAccesses { pub struct TaskNodeBuilder<'a, W: ?Sized> { id: NodeId, task_node: &'a mut TaskNode, - resources: &'a Resources, + resources: &'a mut Resources, } impl TaskNodeBuilder<'_, W> { @@ -727,48 +648,27 @@ impl TaskNodeBuilder<'_, W> { /// /// # Panics /// - /// - Panics if `id` is not a valid virtual resource ID. - /// - Panics if `range` doesn't denote a valid range of the buffer. + /// - Panics if `id` is not a valid virtual resource ID nor a valid physical ID. /// - Panics if `access_type` isn't a valid buffer access type. - pub fn buffer_access( - &mut self, - id: Id, - range: BufferRange, - access_type: AccessType, - ) -> &mut Self { - let buffer = self.resources.buffer(id).expect("invalid buffer"); - - assert!(range.end <= buffer.size); - assert!(!range.is_empty()); + pub fn buffer_access(&mut self, id: Id, access_type: AccessType) -> &mut Self { + let (id, access) = self.access_mut(id.erase()).expect("invalid buffer"); assert!(access_type.is_valid_buffer_access_type()); - // SAFETY: We checked the safety preconditions above. - unsafe { self.buffer_access_unchecked(id, range, access_type) } - } - - /// Adds a buffer access to this task node without doing any checks. - /// - /// # Safety - /// - /// - `id` must be a valid virtual resource ID. - /// - `range` must denote a valid range of the buffer. - /// - `access_type` must be a valid buffer access type. - #[inline] - pub unsafe fn buffer_access_unchecked( - &mut self, - id: Id, - range: BufferRange, - access_type: AccessType, - ) -> &mut Self { - self.task_node - .accesses - .inner - .push(ResourceAccess::Buffer(BufferAccess { - id, - range, - access_type, - })); + if let Some(access) = access { + access.stage_mask |= access_type.stage_mask(); + access.access_mask |= access_type.access_mask(); + } else { + self.task_node.accesses.inner.push(( + id.erase(), + ResourceAccess { + stage_mask: access_type.stage_mask(), + access_mask: access_type.access_mask(), + image_layout: ImageLayout::Undefined, + queue_family_index: vk::QUEUE_FAMILY_IGNORED, + }, + )); + } self } @@ -777,136 +677,52 @@ impl TaskNodeBuilder<'_, W> { /// /// # Panics /// - /// - Panics if `id` is not a valid virtual resource ID. - /// - Panics if `subresource_range` doesn't denote a valid subresource range of the image. + /// - Panics if `id` is not a valid virtual resource ID nor a valid physical ID. /// - Panics if `access_type` isn't a valid image access type. + /// - Panics if an access for `id` was already added and its image layout doesn't equal + /// `access_type.image_layout(layout_type)`. pub fn image_access( &mut self, id: Id, - mut subresource_range: ImageSubresourceRange, access_type: AccessType, layout_type: ImageLayoutType, ) -> &mut Self { - let image = self.resources.image(id).expect("invalid image"); - - if image.flags.contains(ImageCreateFlags::DISJOINT) { - subresource_range.aspects -= ImageAspects::COLOR; - subresource_range.aspects |= match image.format.planes().len() { - 2 => ImageAspects::PLANE_0 | ImageAspects::PLANE_1, - 3 => ImageAspects::PLANE_0 | ImageAspects::PLANE_1 | ImageAspects::PLANE_2, - _ => unreachable!(), - }; - } - - assert!(image.format.aspects().contains(subresource_range.aspects)); - assert!(subresource_range.mip_levels.end <= image.mip_levels); - assert!(subresource_range.array_layers.end <= image.array_layers); - assert!(!subresource_range.aspects.is_empty()); - assert!(!subresource_range.mip_levels.is_empty()); - assert!(!subresource_range.array_layers.is_empty()); + let (id, access) = self.access_mut(id.erase()).expect("invalid image"); assert!(access_type.is_valid_image_access_type()); - // SAFETY: We checked the safety preconditions above. - unsafe { self.image_access_unchecked(id, subresource_range, access_type, layout_type) } - } + let image_layout = access_type.image_layout(layout_type); - /// Adds an image access to this task node without doing any checks. - /// - /// # Safety - /// - /// - `id` must be a valid virtual resource ID. - /// - `subresource_range` must denote a valid subresource range of the image. If the image - /// flags contain `ImageCreateFlags::DISJOINT`, then the color aspect is not considered - /// valid. - /// - `access_type` must be a valid image access type. - #[inline] - pub unsafe fn image_access_unchecked( - &mut self, - id: Id, - subresource_range: ImageSubresourceRange, - access_type: AccessType, - mut layout_type: ImageLayoutType, - ) -> &mut Self { - // Normalize the layout type so that comparisons of accesses are predictable. - if access_type.image_layout() == ImageLayout::General { - layout_type = ImageLayoutType::Optimal; - } + if let Some(access) = access { + assert_eq!(access.image_layout, image_layout); - self.task_node - .accesses - .inner - .push(ResourceAccess::Image(ImageAccess { - id, - subresource_range, - access_type, - layout_type, - })); + access.stage_mask |= access_type.stage_mask(); + access.access_mask |= access_type.access_mask(); + } else { + self.task_node.accesses.inner.push(( + id.erase(), + ResourceAccess { + stage_mask: access_type.stage_mask(), + access_mask: access_type.access_mask(), + image_layout, + queue_family_index: vk::QUEUE_FAMILY_IGNORED, + }, + )); + } self } - /// Adds a swapchain image access to this task node. - /// - /// # Panics - /// - /// - Panics if `id` is not a valid virtual resource ID. - /// - Panics if `array_layers` doesn't denote a valid range of array layers of the swapchain. - /// - Panics if `access_type` isn't a valid image access type. - pub fn swapchain_access( + fn access_mut( &mut self, - id: Id, - array_layers: Range, - access_type: AccessType, - layout_type: ImageLayoutType, - ) -> &mut Self { - let swapchain = self.resources.swapchain(id).expect("invalid swapchain"); - - assert!(array_layers.end <= swapchain.image_array_layers); - assert!(!array_layers.is_empty()); - - assert!(access_type.is_valid_image_access_type()); - - // SAFETY: We checked the safety preconditions above. - unsafe { self.swapchain_access_unchecked(id, array_layers, access_type, layout_type) } - } - - /// Adds a swapchain image access to this task node without doing any checks. - /// - /// # Safety - /// - /// - `id` must be a valid virtual resource ID. - /// - `array_layers` must denote a valid range of array layers of the swapchain. - /// - `access_type` must be a valid image access type. - #[inline] - pub unsafe fn swapchain_access_unchecked( - &mut self, - id: Id, - array_layers: Range, - access_type: AccessType, - mut layout_type: ImageLayoutType, - ) -> &mut Self { - // Normalize the layout type so that comparisons of accesses are predictable. - if access_type.image_layout() == ImageLayout::General { - layout_type = ImageLayoutType::Optimal; - } - - self.task_node - .accesses - .inner - .push(ResourceAccess::Swapchain(SwapchainAccess { - id, - access_type, - layout_type, - array_layers, - })); - - self + id: Id, + ) -> Result<(Id, Option<&mut ResourceAccess>), InvalidSlotError> { + self.task_node.accesses.get_mut(self.resources, id) } /// Finishes building the task node and returns the ID of the built node. #[inline] - pub fn build(self) -> NodeId { + pub fn build(&mut self) -> NodeId { self.id } } @@ -914,16 +730,19 @@ impl TaskNodeBuilder<'_, W> { /// A [`TaskGraph`] that has been compiled into an executable form. pub struct ExecutableTaskGraph { graph: TaskGraph, + flight_id: Id, instructions: Vec, submissions: Vec, buffer_barriers: Vec, image_barriers: Vec, - semaphores: RefCell>, + semaphores: RefCell>>, swapchains: SmallVec<[Id; 1]>, present_queue: Option>, + last_accesses: Vec, } // FIXME: Initial queue family ownership transfers +#[derive(Debug)] struct Submission { queue: Arc, initial_buffer_barrier_range: Range, @@ -933,7 +752,7 @@ struct Submission { type InstructionIndex = usize; -#[derive(Clone)] +#[derive(Clone, Debug)] enum Instruction { WaitAcquire { swapchain_id: Id, @@ -965,6 +784,14 @@ enum Instruction { semaphore_index: SemaphoreIndex, stage_mask: PipelineStages, }, + SignalPrePresent { + swapchain_id: Id, + stage_mask: PipelineStages, + }, + WaitPrePresent { + swapchain_id: Id, + stage_mask: PipelineStages, + }, SignalPresent { swapchain_id: Id, stage_mask: PipelineStages, @@ -977,6 +804,7 @@ type SemaphoreIndex = usize; type BarrierIndex = u32; +#[derive(Clone, Debug)] struct BufferMemoryBarrier { src_stage_mask: PipelineStages, src_access_mask: AccessFlags, @@ -985,9 +813,9 @@ struct BufferMemoryBarrier { src_queue_family_index: u32, dst_queue_family_index: u32, buffer: Id, - range: BufferRange, } +#[derive(Clone, Debug)] struct ImageMemoryBarrier { src_stage_mask: PipelineStages, src_access_mask: AccessFlags, @@ -997,15 +825,7 @@ struct ImageMemoryBarrier { new_layout: ImageLayout, src_queue_family_index: u32, dst_queue_family_index: u32, - image: ImageReference, - subresource_range: ImageSubresourceRange, -} - -// TODO: This really ought not to be necessary. -#[derive(Clone, Copy)] -enum ImageReference { - Normal(Id), - Swapchain(Id), + image: Id, } impl ExecutableTaskGraph { @@ -1032,12 +852,36 @@ impl ExecutableTaskGraph { pub fn task_nodes_mut(&mut self) -> TaskNodesMut<'_, W> { self.graph.task_nodes_mut() } + + /// Returns the flight ID that the task graph was compiled with. + #[inline] + pub fn flight_id(&self) -> Id { + self.flight_id + } +} + +impl fmt::Debug for ExecutableTaskGraph { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut debug = f.debug_struct("ExecutableTaskGraph"); + + debug + .field("graph", &self.graph) + .field("flight_id", &self.flight_id) + .field("instructions", &self.instructions) + .field("submissions", &self.submissions) + .field("buffer_barriers", &self.buffer_barriers) + .field("image_barriers", &self.image_barriers) + .field("semaphores", &self.semaphores) + .field("swapchains", &self.swapchains) + .field("present_queue", &self.present_queue) + .finish_non_exhaustive() + } } unsafe impl DeviceOwned for ExecutableTaskGraph { #[inline] fn device(&self) -> &Arc { - self.submissions[0].queue.device() + self.graph.device() } } @@ -1172,27 +1016,19 @@ impl Error for TaskGraphError {} #[cfg(test)] mod tests { use super::*; - use crate::{TaskContext, TaskResult}; - - struct DummyTask; - - impl Task for DummyTask { - type World = (); - - unsafe fn execute(&self, _tcx: &mut TaskContext<'_>, _world: &Self::World) -> TaskResult { - Ok(()) - } - } + use crate::tests::test_queues; + use std::marker::PhantomData; #[test] fn basic_usage1() { - let mut graph = TaskGraph::new(10, 0); + let (resources, _) = test_queues!(); + let mut graph = TaskGraph::<()>::new(resources, 10, 0); let x = graph - .create_task_node("", QueueFamilyType::Graphics, DummyTask) + .create_task_node("X", QueueFamilyType::Graphics, PhantomData) .build(); let y = graph - .create_task_node("", QueueFamilyType::Graphics, DummyTask) + .create_task_node("Y", QueueFamilyType::Graphics, PhantomData) .build(); graph.add_edge(x, y).unwrap(); @@ -1220,16 +1056,17 @@ mod tests { #[test] fn basic_usage2() { - let mut graph = TaskGraph::new(10, 0); + let (resources, _) = test_queues!(); + let mut graph = TaskGraph::<()>::new(resources, 10, 0); let x = graph - .create_task_node("", QueueFamilyType::Graphics, DummyTask) + .create_task_node("X", QueueFamilyType::Graphics, PhantomData) .build(); let y = graph - .create_task_node("", QueueFamilyType::Graphics, DummyTask) + .create_task_node("Y", QueueFamilyType::Graphics, PhantomData) .build(); let z = graph - .create_task_node("", QueueFamilyType::Graphics, DummyTask) + .create_task_node("Z", QueueFamilyType::Graphics, PhantomData) .build(); assert!(graph.task_node(x).is_ok()); @@ -1256,10 +1093,11 @@ mod tests { #[test] fn self_referential_node() { - let mut graph = TaskGraph::new(10, 0); + let (resources, _) = test_queues!(); + let mut graph = TaskGraph::<()>::new(resources, 10, 0); let x = graph - .create_task_node("", QueueFamilyType::Graphics, DummyTask) + .create_task_node("X", QueueFamilyType::Graphics, PhantomData) .build(); assert_eq!(graph.add_edge(x, x), Err(TaskGraphError::InvalidNode)); diff --git a/vulkano-taskgraph/src/lib.rs b/vulkano-taskgraph/src/lib.rs index c3b4fdb76d..7edf77375c 100644 --- a/vulkano-taskgraph/src/lib.rs +++ b/vulkano-taskgraph/src/lib.rs @@ -1,10 +1,11 @@ -// FIXME: -#![allow(unused)] #![forbid(unsafe_op_in_unsafe_fn)] use concurrent_slotmap::SlotId; -use graph::{ResourceAccesses, ResourceMap}; -use resource::{AccessType, BufferRange, BufferState, DeathRow, ImageState, SwapchainState}; +use graph::{CompileInfo, ExecuteError, ResourceMap, TaskGraph}; +use resource::{ + AccessType, BufferState, DeathRow, Flight, HostAccessType, ImageLayoutType, ImageState, + Resources, SwapchainState, +}; use std::{ any::{Any, TypeId}, cell::Cell, @@ -13,25 +14,109 @@ use std::{ fmt, hash::{Hash, Hasher}, marker::PhantomData, - ops::{Deref, DerefMut, Range, RangeBounds}, + mem, + ops::{Deref, RangeBounds}, sync::Arc, - thread, }; use vulkano::{ buffer::{Buffer, BufferContents, BufferMemory, Subbuffer}, command_buffer::sys::{RawCommandBuffer, RawRecordingCommandBuffer}, + device::Queue, image::Image, - memory::{ - allocator::{align_down, align_up}, - DeviceAlignment, MappedMemoryRange, ResourceMemory, - }, swapchain::Swapchain, - DeviceSize, ValidationError, VulkanError, + DeviceSize, ValidationError, }; pub mod graph; pub mod resource; +/// Creates a [`TaskGraph`] with one task node, compiles it, and executes it. +pub unsafe fn execute( + queue: Arc, + resources: Arc, + flight_id: Id, + task: impl FnOnce(&mut RawRecordingCommandBuffer, &mut TaskContext<'_>) -> TaskResult, + host_buffer_accesses: impl IntoIterator, HostAccessType)>, + buffer_accesses: impl IntoIterator, AccessType)>, + image_accesses: impl IntoIterator, AccessType, ImageLayoutType)>, +) -> Result<(), ExecuteError> { + #[repr(transparent)] + struct OnceTask<'a>( + &'a dyn Fn(&mut RawRecordingCommandBuffer, &mut TaskContext<'_>) -> TaskResult, + ); + + // SAFETY: The task is constructed inside this function and never leaves its scope, so there is + // no way it could be sent to another thread. + unsafe impl Send for OnceTask<'_> {} + + // SAFETY: The task is constructed inside this function and never leaves its scope, so there is + // no way it could be shared with another thread. + unsafe impl Sync for OnceTask<'_> {} + + impl Task for OnceTask<'static> { + type World = (); + + unsafe fn execute( + &self, + cbf: &mut RawRecordingCommandBuffer, + tcx: &mut TaskContext<'_>, + _: &Self::World, + ) -> TaskResult { + (self.0)(cbf, tcx) + } + } + + let task = Cell::new(Some(task)); + let trampoline = move |cbf: &mut RawRecordingCommandBuffer, tcx: &mut TaskContext<'_>| { + // `ExecutableTaskGraph::execute` calls each task exactly once, and we only execute the + // task graph once. + (Cell::take(&task).unwrap())(cbf, tcx) + }; + + let mut task_graph = TaskGraph::new(resources, 1, 64 * 1024); + + for (id, access_type) in host_buffer_accesses { + task_graph.add_host_buffer_access(id, access_type); + } + + let mut node = task_graph.create_task_node( + "", + QueueFamilyType::Specific { + index: queue.queue_family_index(), + }, + // SAFETY: The task never leaves this function scope, so it is safe to pretend that the + // local `trampoline` and its captures from the outer scope live forever. + unsafe { mem::transmute::, OnceTask<'static>>(OnceTask(&trampoline)) }, + ); + + for (id, access_type) in buffer_accesses { + node.buffer_access(id, access_type); + } + + for (id, access_type, layout_type) in image_accesses { + node.image_access(id, access_type, layout_type); + } + + // SAFETY: + // * The user must ensure that there are no accesses that are incompatible with the queue. + // * The user must ensure that there are no accesses incompatible with the device. + let task_graph = unsafe { + task_graph.compile(CompileInfo { + queues: vec![queue], + present_queue: None, + flight_id, + _ne: crate::NE, + }) + } + .unwrap(); + + let resource_map = ResourceMap::new(&task_graph).unwrap(); + + // SAFETY: The user must ensure that there are no other task graphs executing that access any + // of the same subresources. + unsafe { task_graph.execute(resource_map, &(), || {}) } +} + /// A task represents a unit of work to be recorded to a command buffer. pub trait Task: Any + Send + Sync { type World: ?Sized; @@ -39,19 +124,25 @@ pub trait Task: Any + Send + Sync { // Potentially TODO: // fn update(&mut self, ...) {} - /// Executes the task, which should record its commands using the provided context. + /// Executes the task, which should record its commands using the provided command buffer and + /// context. /// /// # Safety /// - /// - Every subresource in the [task's input/output interface] must not be written to - /// concurrently in any other tasks during execution on the device. - /// - Every subresource in the task's input/output interface, if it's a [host access], must not - /// be written to concurrently in any other tasks during execution on the host. - /// - Every subresource in the task's input interface, if it's an [image access], must have had - /// its layout transitioned to the layout specified in the interface. - /// - Every subresource in the task's input interface, if the resource's [sharing mode] is - /// exclusive, must be currently owned by the queue family the task is executing on. - unsafe fn execute(&self, tcx: &mut TaskContext<'_>, world: &Self::World) -> TaskResult; + /// - Every resource in the [task's access set] must not be written to concurrently in any + /// other tasks during execution on the device. + /// - Every resource in the task's access set, if it's an [image access], must have had its + /// layout transitioned to the layout specified in the access. + /// - Every resource in the task's access set, if the resource's [sharing mode] is exclusive, + /// must be currently owned by the queue family the task is executing on. + /// + /// [sharing mode]: vulkano::sync::Sharing + unsafe fn execute( + &self, + cbf: &mut RawRecordingCommandBuffer, + tcx: &mut TaskContext<'_>, + world: &Self::World, + ) -> TaskResult; } impl dyn Task { @@ -112,101 +203,78 @@ impl fmt::Debug for dyn Task { } } +/// An implementation of a phantom task, which is zero-sized and doesn't do anything. +/// +/// You may want to use this if all you're interested in is the automatic synchronization and don't +/// have any other commands to execute. A common example would be doing a queue family ownership +/// transfer after doing an upload. +impl Task for PhantomData W> { + type World = W; + + unsafe fn execute( + &self, + _cbf: &mut RawRecordingCommandBuffer, + _tcx: &mut TaskContext<'_>, + _world: &Self::World, + ) -> TaskResult { + Ok(()) + } +} + /// The context of a task. /// /// This gives you access to the current command buffer, resources, as well as resource cleanup. pub struct TaskContext<'a> { resource_map: &'a ResourceMap<'a>, death_row: Cell>, - current_command_buffer: Cell>, + current_frame_index: u32, command_buffers: Cell>>>, - accesses: &'a ResourceAccesses, } impl<'a> TaskContext<'a> { - /// Returns the current raw command buffer for the task. - /// - /// While this method is safe, using the command buffer isn't. You must guarantee that any - /// subresources you use while recording commands are either accounted for in the [task's - /// input/output interface], or that those subresources don't require any synchronization - /// (including layout transitions and queue family ownership transfers), or that no other task - /// is accessing the subresources at the same time without appropriate synchronization. - /// - /// # Panics - /// - /// - Panics if called more than once. - // TODO: We could alternatively to ^ pass two parameters to `Task::execute`. - #[inline] - pub fn raw_command_buffer(&self) -> &'a mut RawRecordingCommandBuffer { - self.current_command_buffer - .take() - .expect("`TaskContext::raw_command_buffer` can only be called once") - } - - /// Pushes a command buffer into the list of command buffers to be executed on the queue. - /// - /// All command buffers will be executed in the order in which they are pushed after the task - /// has finished execution. That means in particular, that commands recorded by the task will - /// start execution before execution of any pushed command buffers starts. - /// - /// # Safety - /// - /// The same safety preconditions apply as outlined in the [`raw_command_buffer`] method. Since - /// the command buffer will be executed on the same queue right after the current command - /// buffer, without any added synchronization, it must be safe to do so. The given command - /// buffer must not do any accesses not accounted for in the [task's input/output interface], - /// or ensure that such accesses are appropriately synchronized. - /// - /// [`raw_command_buffer`]: Self::raw_command_buffer - #[inline] - pub unsafe fn push_command_buffer(&self, command_buffer: Arc) { - let vec = self.command_buffers.take().unwrap(); - vec.push(command_buffer); - self.command_buffers.set(Some(vec)); - } - - /// Extends the list of command buffers to be executed on the queue. - /// - /// This function behaves identically to the [`push_command_buffer`] method, except that it - /// pushes all command buffers from the given iterator in order. - /// - /// # Safety - /// - /// See the [`push_command_buffer`] method for the safety preconditions. - /// - /// [`push_command_buffer`]: Self::push_command_buffer - #[inline] - pub unsafe fn extend_command_buffers( - &self, - command_buffers: impl IntoIterator>, - ) { - let vec = self.command_buffers.take().unwrap(); - vec.extend(command_buffers); - self.command_buffers.set(Some(vec)); - } - /// Returns the buffer corresponding to `id`, or returns an error if it isn't present. #[inline] pub fn buffer(&self, id: Id) -> TaskResult<&'a BufferState> { - // SAFETY: The caller of `Task::execute` must ensure that `self.resource_map` maps the - // virtual IDs of the graph exhaustively. - Ok(unsafe { self.resource_map.buffer(id) }?) + if id.is_virtual() { + // SAFETY: The caller of `Task::execute` must ensure that `self.resource_map` maps the + // virtual IDs of the graph exhaustively. + Ok(unsafe { self.resource_map.buffer(id) }?) + } else { + // SAFETY: `ResourceMap` owns an `epoch::Guard`. + Ok(unsafe { self.resource_map.resources().buffer_unprotected(id) }?) + } } /// Returns the image corresponding to `id`, or returns an error if it isn't present. + /// + /// # Panics + /// + /// - Panics if `id` refers to a swapchain image. #[inline] pub fn image(&self, id: Id) -> TaskResult<&'a ImageState> { - // SAFETY: The caller of `Task::execute` must ensure that `self.resource_map` maps the - // virtual IDs of the graph exhaustively. - Ok(unsafe { self.resource_map.image(id) }?) + assert_ne!(id.object_type(), ObjectType::Swapchain); + + if id.is_virtual() { + // SAFETY: The caller of `Task::execute` must ensure that `self.resource_map` maps the + // virtual IDs of the graph exhaustively. + Ok(unsafe { self.resource_map.image(id) }?) + } else { + // SAFETY: `ResourceMap` owns an `epoch::Guard`. + Ok(unsafe { self.resource_map.resources().image_unprotected(id) }?) + } } /// Returns the swapchain corresponding to `id`, or returns an error if it isn't present. #[inline] pub fn swapchain(&self, id: Id) -> TaskResult<&'a SwapchainState> { - // SAFETY: The caller of `Task::execute` must ensure that `self.resource_map` maps the - // virtual IDs of the graph exhaustively. - Ok(unsafe { self.resource_map.swapchain(id) }?) + if id.is_virtual() { + // SAFETY: The caller of `Task::execute` must ensure that `self.resource_map` maps the + // virtual IDs of the graph exhaustively. + Ok(unsafe { self.resource_map.swapchain(id) }?) + } else { + // SAFETY: `ResourceMap` owns an `epoch::Guard`. + Ok(unsafe { self.resource_map.resources().swapchain_unprotected(id) }?) + } } /// Returns the `ResourceMap`. @@ -215,21 +283,17 @@ impl<'a> TaskContext<'a> { self.resource_map } + /// Returns the index of the current [frame] in [flight]. + #[inline] + #[must_use] + pub fn current_frame_index(&self) -> u32 { + self.current_frame_index + } + /// Tries to get read access to a portion of the buffer corresponding to `id`. /// - /// If host read access of the portion of the buffer is not accounted for in the [task's - /// input/output interface], this method will return an error. - /// - /// If the memory backing the buffer is not [host-coherent], then this method will check a - /// range that is potentially larger than the given range, because the range given to - /// [`invalidate_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for - /// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2 - /// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the - /// buffer is backed by non-coherent memory, then invalidating one subbuffer would also - /// invalidate the other subbuffer. This can lead to data races and is therefore not allowed. - /// What you should do in that case is ensure that each subbuffer is aligned to the - /// non-coherent atom size, so in this case one would be at offset 0 and the other at offset - /// 64. + /// If host read access for the buffer is not accounted for in the [task graph's host access + /// set], this method will return an error. /// /// If the memory backing the buffer is not managed by vulkano (i.e. the buffer was created /// by [`RawBuffer::assume_bound`]), then it can't be read using this method and an error will @@ -241,111 +305,31 @@ impl<'a> TaskContext<'a> { /// - Panics if [`Subbuffer::slice`] with the given `range` panics. /// - Panics if [`Subbuffer::reinterpret`] to the given `T` panics. /// - /// [host-coherent]: vulkano::memory::MemoryPropertyFlags::HOST_COHERENT - /// [`invalidate_range`]: vulkano::memory::ResourceMemory::invalidate_range - /// [`non_coherent_atom_size`]: vulkano::device::DeviceProperties::non_coherent_atom_size /// [`RawBuffer::assume_bound`]: vulkano::buffer::sys::RawBuffer::assume_bound pub fn read_buffer( &self, id: Id, range: impl RangeBounds, - ) -> TaskResult> { - #[cold] - unsafe fn invalidate_subbuffer( - tcx: &TaskContext<'_>, - id: Id, - subbuffer: &Subbuffer<[u8]>, - allocation: &ResourceMemory, - atom_size: DeviceAlignment, - ) -> TaskResult { - // This works because the memory allocator must align allocations to the non-coherent - // atom size when the memory is host-visible but not host-coherent. - let start = align_down(subbuffer.offset(), atom_size); - let end = cmp::min( - align_up(subbuffer.offset() + subbuffer.size(), atom_size), - allocation.size(), - ); - let range = Range { start, end }; - - tcx.validate_read_buffer(id, range.clone())?; - - let memory_range = MappedMemoryRange { - offset: range.start, - size: range.end - range.start, - _ne: crate::NE, - }; - - // SAFETY: - // - We checked that the task has read access to the subbuffer above. - // - The caller must guarantee that the subbuffer falls within the mapped range of - // memory. - // - We ensure that memory mappings are always aligned to the non-coherent atom size for - // non-host-coherent memory, therefore the subbuffer's range aligned to the - // non-coherent atom size must fall within the mapped range of the memory. - unsafe { allocation.invalidate_range_unchecked(memory_range) } - .map_err(HostAccessError::Invalidate)?; - - Ok(()) - } - - assert!(T::LAYOUT.alignment().as_devicesize() <= 64); - - let buffer = self.buffer(id)?.buffer(); - let subbuffer = Subbuffer::from(buffer.clone()) - .slice(range) - .reinterpret::(); - - let allocation = match buffer.memory() { - BufferMemory::Normal(a) => a, - BufferMemory::Sparse => { - todo!("`TaskContext::read_buffer` doesn't support sparse binding yet") - } - BufferMemory::External => { - return Err(TaskError::HostAccess(HostAccessError::Unmanaged)) - } - _ => unreachable!(), - }; - - let mapped_slice = subbuffer.mapped_slice().map_err(|err| match err { - vulkano::sync::HostAccessError::NotHostMapped => HostAccessError::NotHostMapped, - vulkano::sync::HostAccessError::OutOfMappedRange => HostAccessError::OutOfMappedRange, - _ => unreachable!(), - })?; - - let atom_size = allocation.atom_size(); - - if let Some(atom_size) = atom_size { - // SAFETY: - // `subbuffer.mapped_slice()` didn't return an error, which means that the subbuffer - // falls within the mapped range of the memory. - unsafe { invalidate_subbuffer(self, id, subbuffer.as_bytes(), allocation, atom_size) }?; - } else { - let range = subbuffer.offset()..subbuffer.offset() + subbuffer.size(); - self.validate_write_buffer(id, range)?; - } + ) -> TaskResult<&T> { + self.validate_read_buffer(id)?; - // SAFETY: We checked that the task has read access to the subbuffer above, which also + // SAFETY: We checked that the task has read access to the buffer above, which also // includes the guarantee that no other tasks can be writing the subbuffer on neither the - // host nor the device. The same task cannot obtain another `BufferWriteGuard` to the - // subbuffer because `TaskContext::write_buffer` requires a mutable reference. - let data = unsafe { &*T::ptr_from_slice(mapped_slice) }; - - Ok(BufferReadGuard { data }) + // host nor the device. The same task cannot obtain another mutable reference to the buffer + // because `TaskContext::write_buffer` requires a mutable reference. + unsafe { self.read_buffer_unchecked(id, range) } } - fn validate_read_buffer( - &self, - id: Id, - range: BufferRange, - ) -> Result<(), Box> { + fn validate_read_buffer(&self, id: Id) -> Result<(), Box> { if !self - .accesses - .contains_buffer_access(id, range, AccessType::HostRead) + .resource_map + .virtual_resources() + .contains_host_buffer_access(id, HostAccessType::Read) { return Err(Box::new(ValidationError { context: "TaskContext::read_buffer".into(), - problem: "the task node does not have an access of type `AccessType::HostRead` \ - for the range of the buffer" + problem: "the task graph does not have an access of type `HostAccessType::Read` \ + for the buffer" .into(), ..Default::default() })); @@ -355,13 +339,7 @@ impl<'a> TaskContext<'a> { } /// Gets read access to a portion of the buffer corresponding to `id` without checking if this - /// access is accounted for in the [task's input/output interface]. - /// - /// This method doesn't do any host cache control. If the memory backing the buffer is not - /// [host-coherent], you must call [`invalidate_range`] in order for any device writes to be - /// visible to the host, and must not forget that such flushes must be aligned to the - /// [`non_coherent_atom_size`] and hence the aligned range must be accounted for in the task's - /// input/output interface. + /// access is accounted for in the [task graph's host access set]. /// /// If the memory backing the buffer is not managed by vulkano (i.e. the buffer was created /// by [`RawBuffer::assume_bound`]), then it can't be read using this method and an error will @@ -369,7 +347,7 @@ impl<'a> TaskContext<'a> { /// /// # Safety /// - /// This access must be accounted for in the task's input/output interface. + /// This access must be accounted for in the task graph's host access set. /// /// # Panics /// @@ -377,9 +355,6 @@ impl<'a> TaskContext<'a> { /// - Panics if [`Subbuffer::slice`] with the given `range` panics. /// - Panics if [`Subbuffer::reinterpret`] to the given `T` panics. /// - /// [host-coherent]: vulkano::memory::MemoryPropertyFlags::HOST_COHERENT - /// [`invalidate_range`]: vulkano::memory::ResourceMemory::invalidate_range - /// [`non_coherent_atom_size`]: vulkano::device::DeviceProperties::non_coherent_atom_size /// [`RawBuffer::assume_bound`]: vulkano::buffer::sys::RawBuffer::assume_bound pub unsafe fn read_buffer_unchecked( &self, @@ -393,10 +368,10 @@ impl<'a> TaskContext<'a> { .slice(range) .reinterpret::(); - match buffer.memory() { + let allocation = match buffer.memory() { BufferMemory::Normal(a) => a, BufferMemory::Sparse => { - todo!("`TaskContext::read_buffer_unchecked` doesn't support sparse binding yet"); + todo!("`TaskContext::read_buffer` doesn't support sparse binding yet"); } BufferMemory::External => { return Err(TaskError::HostAccess(HostAccessError::Unmanaged)); @@ -404,12 +379,14 @@ impl<'a> TaskContext<'a> { _ => unreachable!(), }; - let mapped_slice = subbuffer.mapped_slice().map_err(|err| match err { + unsafe { allocation.mapped_slice_unchecked(..) }.map_err(|err| match err { vulkano::sync::HostAccessError::NotHostMapped => HostAccessError::NotHostMapped, vulkano::sync::HostAccessError::OutOfMappedRange => HostAccessError::OutOfMappedRange, _ => unreachable!(), })?; + let mapped_slice = subbuffer.mapped_slice().unwrap(); + // SAFETY: The caller must ensure that access to the data is synchronized. let data = unsafe { &*T::ptr_from_slice(mapped_slice) }; @@ -418,19 +395,8 @@ impl<'a> TaskContext<'a> { /// Tries to get write access to a portion of the buffer corresponding to `id`. /// - /// If host write access of the portion of the buffer is not accounted for in the [task's - /// input/output interface], this method will return an error. - /// - /// If the memory backing the buffer is not [host-coherent], then this method will check a - /// range that is potentially larger than the given range, because the range given to - /// [`flush_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for - /// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2 - /// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the - /// buffer is backed by non-coherent memory, then invalidating one subbuffer would also - /// invalidate the other subbuffer. This can lead to data races and is therefore not allowed. - /// What you should do in that case is ensure that each subbuffer is aligned to the - /// non-coherent atom size, so in this case one would be at offset 0 and the other at offset - /// 64. + /// If host write access for the buffer is not accounted for in the [task graph's host access + /// set], this method will return an error. /// /// If the memory backing the buffer is not managed by vulkano (i.e. the buffer was created /// by [`RawBuffer::assume_bound`]), then it can't be written using this method and an error @@ -442,115 +408,31 @@ impl<'a> TaskContext<'a> { /// - Panics if [`Subbuffer::slice`] with the given `range` panics. /// - Panics if [`Subbuffer::reinterpret`] to the given `T` panics. /// - /// [host-coherent]: vulkano::memory::MemoryPropertyFlags::HOST_COHERENT - /// [`flush_range`]: vulkano::memory::ResourceMemory::flush_range - /// [`non_coherent_atom_size`]: vulkano::device::DeviceProperties::non_coherent_atom_size /// [`RawBuffer::assume_bound`]: vulkano::buffer::sys::RawBuffer::assume_bound pub fn write_buffer( &mut self, id: Id, range: impl RangeBounds, - ) -> TaskResult> { - #[cold] - unsafe fn invalidate_subbuffer( - tcx: &TaskContext<'_>, - id: Id, - subbuffer: &Subbuffer<[u8]>, - allocation: &ResourceMemory, - atom_size: DeviceAlignment, - ) -> TaskResult { - // This works because the memory allocator must align allocations to the non-coherent - // atom size when the memory is host-visible but not host-coherent. - let start = align_down(subbuffer.offset(), atom_size); - let end = cmp::min( - align_up(subbuffer.offset() + subbuffer.size(), atom_size), - allocation.size(), - ); - let range = Range { start, end }; - - tcx.validate_write_buffer(id, range.clone())?; - - let memory_range = MappedMemoryRange { - offset: range.start, - size: range.end - range.start, - _ne: crate::NE, - }; - - // SAFETY: - // - We checked that the task has write access to the subbuffer above. - // - The caller must guarantee that the subbuffer falls within the mapped range of - // memory. - // - We ensure that memory mappings are always aligned to the non-coherent atom size for - // non-host-coherent memory, therefore the subbuffer's range aligned to the - // non-coherent atom size must fall within the mapped range of the memory. - unsafe { allocation.invalidate_range_unchecked(memory_range) } - .map_err(HostAccessError::Invalidate)?; - - Ok(()) - } - - assert!(T::LAYOUT.alignment().as_devicesize() <= 64); - - let buffer = self.buffer(id)?.buffer(); - let subbuffer = Subbuffer::from(buffer.clone()) - .slice(range) - .reinterpret::(); - - let allocation = match buffer.memory() { - BufferMemory::Normal(a) => a, - BufferMemory::Sparse => { - todo!("`TaskContext::write_buffer` doesn't support sparse binding yet"); - } - BufferMemory::External => { - return Err(TaskError::HostAccess(HostAccessError::Unmanaged)); - } - _ => unreachable!(), - }; - - let mapped_slice = subbuffer.mapped_slice().map_err(|err| match err { - vulkano::sync::HostAccessError::NotHostMapped => HostAccessError::NotHostMapped, - vulkano::sync::HostAccessError::OutOfMappedRange => HostAccessError::OutOfMappedRange, - _ => unreachable!(), - })?; - - let atom_size = allocation.atom_size(); - - if let Some(atom_size) = atom_size { - // SAFETY: - // `subbuffer.mapped_slice()` didn't return an error, which means that the subbuffer - // falls within the mapped range of the memory. - unsafe { invalidate_subbuffer(self, id, subbuffer.as_bytes(), allocation, atom_size) }?; - } else { - let range = subbuffer.offset()..subbuffer.offset() + subbuffer.size(); - self.validate_write_buffer(id, range)?; - } - - // SAFETY: We checked that the task has write access to the subbuffer above, which also - // includes the guarantee that no other tasks can be accessing the subbuffer on neither the - // host nor the device. The same task cannot obtain another `BufferWriteGuard` to the - // subbuffer because `TaskContext::write_buffer` requires a mutable reference. - let data = unsafe { &mut *T::ptr_from_slice(mapped_slice) }; + ) -> TaskResult<&mut T> { + self.validate_write_buffer(id)?; - Ok(BufferWriteGuard { - subbuffer: subbuffer.into_bytes(), - data, - atom_size, - }) + // SAFETY: We checked that the task has write access to the buffer above, which also + // includes the guarantee that no other tasks can be accessing the buffer on neither the + // host nor the device. The same task cannot obtain another mutable reference to the buffer + // because `TaskContext::write_buffer` requires a mutable reference. + unsafe { self.write_buffer_unchecked(id, range) } } - fn validate_write_buffer( - &self, - id: Id, - range: BufferRange, - ) -> Result<(), Box> { + fn validate_write_buffer(&self, id: Id) -> Result<(), Box> { if !self - .accesses - .contains_buffer_access(id, range, AccessType::HostWrite) + .resource_map + .virtual_resources() + .contains_host_buffer_access(id, HostAccessType::Write) { return Err(Box::new(ValidationError { context: "TaskContext::write_buffer".into(), - problem: "the task node does not have an access of type `AccessType::HostWrite` \ - for the range of the buffer" + problem: "the task graph does not have an access of type `HostAccessType::Write` \ + for the buffer" .into(), ..Default::default() })); @@ -560,13 +442,7 @@ impl<'a> TaskContext<'a> { } /// Gets write access to a portion of the buffer corresponding to `id` without checking if this - /// access is accounted for in the [task's input/output interface]. - /// - /// This method doesn't do any host cache control. If the memory backing the buffer is not - /// [host-coherent], you must call [`flush_range`] in order for any writes to be available to - /// the host memory domain, and must not forget that such flushes must be aligned to the - /// [`non_coherent_atom_size`] and hence the aligned range must be accounted for in the task's - /// input/output interface. + /// access is accounted for in the [task graph's host access set]. /// /// If the memory backing the buffer is not managed by vulkano (i.e. the buffer was created /// by [`RawBuffer::assume_bound`]), then it can't be written using this method and an error @@ -574,7 +450,7 @@ impl<'a> TaskContext<'a> { /// /// # Safety /// - /// This access must be accounted for in the task's input/output interface. + /// This access must be accounted for in the task graph's host access set. /// /// # Panics /// @@ -582,9 +458,6 @@ impl<'a> TaskContext<'a> { /// - Panics if [`Subbuffer::slice`] with the given `range` panics. /// - Panics if [`Subbuffer::reinterpret`] to the given `T` panics. /// - /// [host-coherent]: vulkano::memory::MemoryPropertyFlags::HOST_COHERENT - /// [`flush_range`]: vulkano::memory::ResourceMemory::flush_range - /// [`non_coherent_atom_size`]: vulkano::device::DeviceProperties::non_coherent_atom_size /// [`RawBuffer::assume_bound`]: vulkano::buffer::sys::RawBuffer::assume_bound pub unsafe fn write_buffer_unchecked( &mut self, @@ -598,10 +471,10 @@ impl<'a> TaskContext<'a> { .slice(range) .reinterpret::(); - match buffer.memory() { + let allocation = match buffer.memory() { BufferMemory::Normal(a) => a, BufferMemory::Sparse => { - todo!("`TaskContext::write_buffer_unchecked` doesn't support sparse binding yet"); + todo!("`TaskContext::write_buffer` doesn't support sparse binding yet"); } BufferMemory::External => { return Err(TaskError::HostAccess(HostAccessError::Unmanaged)); @@ -609,12 +482,14 @@ impl<'a> TaskContext<'a> { _ => unreachable!(), }; - let mapped_slice = subbuffer.mapped_slice().map_err(|err| match err { + unsafe { allocation.mapped_slice_unchecked(..) }.map_err(|err| match err { vulkano::sync::HostAccessError::NotHostMapped => HostAccessError::NotHostMapped, vulkano::sync::HostAccessError::OutOfMappedRange => HostAccessError::OutOfMappedRange, _ => unreachable!(), })?; + let mapped_slice = subbuffer.mapped_slice().unwrap(); + // SAFETY: The caller must ensure that access to the data is synchronized. let data = unsafe { &mut *T::ptr_from_slice(mapped_slice) }; @@ -662,86 +537,44 @@ impl<'a> TaskContext<'a> { Ok(()) } -} - -/// Allows you to read a subbuffer from the host. -/// -/// This type is created by the [`read_buffer`] method on [`TaskContext`]. -/// -/// [`read_buffer`]: TaskContext::read_buffer -// NOTE(Marc): This type doesn't actually do anything, but exists for forward-compatibility. -#[derive(Debug)] -pub struct BufferReadGuard<'a, T: ?Sized> { - data: &'a T, -} -impl Deref for BufferReadGuard<'_, T> { - type Target = T; - - #[inline] - fn deref(&self) -> &Self::Target { - self.data - } -} - -/// Allows you to write a subbuffer from the host. -/// -/// This type is created by the [`write_buffer`] method on [`TaskContext`]. -/// -/// [`write_buffer`]: TaskContext::write_buffer -pub struct BufferWriteGuard<'a, T: ?Sized> { - subbuffer: Subbuffer<[u8]>, - data: &'a mut T, - atom_size: Option, -} - -impl Deref for BufferWriteGuard<'_, T> { - type Target = T; - - #[inline] - fn deref(&self) -> &Self::Target { - self.data - } -} - -impl DerefMut for BufferWriteGuard<'_, T> { + /// Pushes a command buffer into the list of command buffers to be executed on the queue. + /// + /// All command buffers will be executed in the order in which they are pushed after the task + /// has finished execution. That means in particular, that commands recorded by the task will + /// start execution before execution of any pushed command buffers starts. + /// + /// # Safety + /// + /// Since the command buffer will be executed on the same queue right after the current command + /// buffer, without any added synchronization, it must be safe to do so. The given command + /// buffer must not do any accesses not accounted for in the [task's access set], or ensure + /// that such accesses are appropriately synchronized. #[inline] - fn deref_mut(&mut self) -> &mut Self::Target { - self.data + pub unsafe fn push_command_buffer(&self, command_buffer: Arc) { + let vec = self.command_buffers.take().unwrap(); + vec.push(command_buffer); + self.command_buffers.set(Some(vec)); } -} -impl Drop for BufferWriteGuard<'_, T> { + /// Extends the list of command buffers to be executed on the queue. + /// + /// This function behaves identically to the [`push_command_buffer`] method, except that it + /// pushes all command buffers from the given iterator in order. + /// + /// # Safety + /// + /// See the [`push_command_buffer`] method for the safety preconditions. + /// + /// [`push_command_buffer`]: Self::push_command_buffer #[inline] - fn drop(&mut self) { - #[cold] - fn flush_subbuffer(subbuffer: &Subbuffer<[u8]>, atom_size: DeviceAlignment) { - let allocation = match subbuffer.buffer().memory() { - BufferMemory::Normal(a) => a, - _ => unreachable!(), - }; - - let memory_range = MappedMemoryRange { - offset: align_down(subbuffer.offset(), atom_size), - size: cmp::min( - align_up(subbuffer.offset() + subbuffer.size(), atom_size), - allocation.size(), - ) - subbuffer.offset(), - _ne: crate::NE, - }; - - // SAFETY: `TaskContext::write_buffer` ensures that the task has write access to this - // subbuffer aligned to the non-coherent atom size. - if let Err(err) = unsafe { allocation.flush_range_unchecked(memory_range) } { - if !thread::panicking() { - panic!("failed to flush buffer write: {err:?}"); - } - } - } - - if let Some(atom_size) = self.atom_size { - flush_subbuffer(&self.subbuffer, atom_size); - } + pub unsafe fn extend_command_buffers( + &self, + command_buffers: impl IntoIterator>, + ) { + let vec = self.command_buffers.take().unwrap(); + vec.extend(command_buffers); + self.command_buffers.set(Some(vec)); } } @@ -799,27 +632,21 @@ impl Error for TaskError { /// Error that can happen when trying to retrieve a Vulkan object or state by [`Id`]. #[derive(Debug)] pub struct InvalidSlotError { - slot: SlotId, + id: Id, } impl InvalidSlotError { fn new(id: Id) -> Self { - InvalidSlotError { slot: id.slot } + InvalidSlotError { id: id.erase() } } } impl fmt::Display for InvalidSlotError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let &InvalidSlotError { slot } = self; - let object_type = match slot.tag() & OBJECT_TYPE_MASK { - 0 => ObjectType::Buffer, - 1 => ObjectType::Image, - 2 => ObjectType::Swapchain, - 3 => ObjectType::Flight, - _ => unreachable!(), - }; + let &InvalidSlotError { id } = self; + let object_type = id.object_type(); - write!(f, "invalid slot for object type {object_type:?}: {slot:?}") + write!(f, "invalid slot for object type `{object_type:?}`: {id:?}") } } @@ -828,7 +655,6 @@ impl Error for InvalidSlotError {} /// Error that can happen when attempting to read or write a resource from the host. #[derive(Debug)] pub enum HostAccessError { - Invalidate(VulkanError), Unmanaged, NotHostMapped, OutOfMappedRange, @@ -837,7 +663,6 @@ pub enum HostAccessError { impl fmt::Display for HostAccessError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let msg = match self { - Self::Invalidate(_) => "invalidating the device memory failed", Self::Unmanaged => "the resource is not managed by vulkano", Self::NotHostMapped => "the device memory is not current host-mapped", Self::OutOfMappedRange => { @@ -849,14 +674,7 @@ impl fmt::Display for HostAccessError { } } -impl Error for HostAccessError { - fn source(&self) -> Option<&(dyn Error + 'static)> { - match self { - Self::Invalidate(err) => Some(err), - _ => None, - } - } -} +impl Error for HostAccessError {} /// Specifies the type of queue family that a task can be executed on. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] @@ -891,13 +709,19 @@ pub enum QueueFamilyType { /// /// Note that this ID **is not** globally unique. It is unique in the scope of a logical device. #[repr(transparent)] -pub struct Id { +pub struct Id { slot: SlotId, marker: PhantomData T>, } impl Id { - fn new(slot: SlotId) -> Self { + /// An ID that's guaranteed to be invalid. + pub const INVALID: Self = Id { + slot: SlotId::INVALID, + marker: PhantomData, + }; + + const unsafe fn new(slot: SlotId) -> Self { Id { slot, marker: PhantomData, @@ -908,8 +732,53 @@ impl Id { self.slot.index() } - fn tag(self) -> u32 { - self.slot.tag() + /// Returns `true` if this ID represents a [virtual resource]. + #[inline] + pub const fn is_virtual(self) -> bool { + self.slot.tag() & Id::VIRTUAL_BIT != 0 + } + + /// Returns `true` if this ID represents a resource with the exclusive sharing mode. + fn is_exclusive(self) -> bool { + self.slot.tag() & Id::EXCLUSIVE_BIT != 0 + } + + fn erase(self) -> Id { + unsafe { Id::new(self.slot) } + } + + fn object_type(self) -> ObjectType { + match self.slot.tag() & Id::OBJECT_TYPE_MASK { + Buffer::TAG => ObjectType::Buffer, + Image::TAG => ObjectType::Image, + Swapchain::TAG => ObjectType::Swapchain, + Flight::TAG => ObjectType::Flight, + _ => unreachable!(), + } + } +} + +impl Id { + /// Returns the ID that always refers to the swapchain image that's currently acquired from the + /// swapchain. + #[inline] + pub const fn current_image_id(self) -> Id { + unsafe { Id::new(self.slot) } + } +} + +impl Id { + const OBJECT_TYPE_MASK: u32 = 0b11; + + const VIRTUAL_BIT: u32 = 1 << 7; + const EXCLUSIVE_BIT: u32 = 1 << 6; + + fn is(self) -> bool { + self.object_type() == O::TYPE + } + + unsafe fn parametrize(self) -> Id { + unsafe { Id::new(self.slot) } } } @@ -924,10 +793,14 @@ impl Copy for Id {} impl fmt::Debug for Id { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Id") - .field("index", &self.slot.index()) - .field("generation", &self.slot.generation()) - .finish() + if *self == Id::INVALID { + f.pad("Id::INVALID") + } else { + f.debug_struct("Id") + .field("index", &self.slot.index()) + .field("generation", &self.slot.generation()) + .finish() + } } } @@ -983,7 +856,29 @@ impl fmt::Debug for Ref<'_, T> { } } -#[derive(Debug, Clone, Copy)] +trait Object { + const TYPE: ObjectType; + + const TAG: u32 = Self::TYPE as u32; +} + +impl Object for Buffer { + const TYPE: ObjectType = ObjectType::Buffer; +} + +impl Object for Image { + const TYPE: ObjectType = ObjectType::Image; +} + +impl Object for Swapchain { + const TYPE: ObjectType = ObjectType::Swapchain; +} + +impl Object for Flight { + const TYPE: ObjectType = ObjectType::Flight; +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] enum ObjectType { Buffer = 0, Image = 1, @@ -991,14 +886,56 @@ enum ObjectType { Flight = 3, } -const BUFFER_TAG: u32 = ObjectType::Buffer as u32; -const IMAGE_TAG: u32 = ObjectType::Image as u32; -const SWAPCHAIN_TAG: u32 = ObjectType::Swapchain as u32; -const FLIGHT_TAG: u32 = ObjectType::Flight as u32; - -const OBJECT_TYPE_MASK: u32 = 0b11; - // SAFETY: ZSTs can always be safely produced out of thin air, barring any safety invariants they // might impose, which in the case of `NonExhaustive` are none. const NE: vulkano::NonExhaustive = unsafe { ::std::mem::transmute::<(), ::vulkano::NonExhaustive>(()) }; + +#[cfg(test)] +mod tests { + macro_rules! test_queues { + () => {{ + let Ok(library) = vulkano::VulkanLibrary::new() else { + return; + }; + let Ok(instance) = vulkano::instance::Instance::new(library, Default::default()) else { + return; + }; + let Ok(mut physical_devices) = instance.enumerate_physical_devices() else { + return; + }; + let Some(physical_device) = physical_devices.find(|p| { + p.queue_family_properties().iter().any(|q| { + q.queue_flags + .contains(vulkano::device::QueueFlags::GRAPHICS) + }) + }) else { + return; + }; + let queue_create_infos = physical_device + .queue_family_properties() + .iter() + .enumerate() + .map(|(i, _)| vulkano::device::QueueCreateInfo { + queue_family_index: i as u32, + ..Default::default() + }) + .collect(); + let Ok((device, queues)) = vulkano::device::Device::new( + physical_device, + vulkano::device::DeviceCreateInfo { + queue_create_infos, + ..Default::default() + }, + ) else { + return; + }; + + ( + $crate::resource::Resources::new(device, Default::default()), + queues.collect::>(), + ) + }}; + } + pub(crate) use test_queues; +} diff --git a/vulkano-taskgraph/src/resource.rs b/vulkano-taskgraph/src/resource.rs index f6b78bba58..42a9a6fa79 100644 --- a/vulkano-taskgraph/src/resource.rs +++ b/vulkano-taskgraph/src/resource.rs @@ -1,41 +1,34 @@ //! Synchronization state tracking of all resources. -use crate::{Id, InvalidSlotError, Ref, BUFFER_TAG, FLIGHT_TAG, IMAGE_TAG, SWAPCHAIN_TAG}; +use crate::{Id, InvalidSlotError, Object, Ref}; use ash::vk; use concurrent_slotmap::{epoch, SlotMap}; -use parking_lot::{Mutex, MutexGuard}; -use rangemap::RangeMap; +use parking_lot::{Mutex, RwLock}; use smallvec::SmallVec; use std::{ any::Any, - cmp, hash::Hash, - iter::FusedIterator, - mem, - num::NonZeroU32, - ops::Range, + num::{NonZeroU32, NonZeroU64}, sync::{ - atomic::{AtomicU32, Ordering}, + atomic::{AtomicU32, AtomicU64, Ordering}, Arc, }, + time::Duration, }; use thread_local::ThreadLocal; use vulkano::{ buffer::{AllocateBufferError, Buffer, BufferCreateInfo}, command_buffer::allocator::StandardCommandBufferAllocator, device::{Device, DeviceOwned}, - image::{ - AllocateImageError, Image, ImageAspects, ImageCreateFlags, ImageCreateInfo, ImageLayout, - ImageMemory, ImageSubresourceRange, - }, - memory::allocator::{AllocationCreateInfo, DeviceLayout, MemoryAllocator}, + image::{AllocateImageError, Image, ImageCreateInfo, ImageLayout, ImageMemory}, + memory::allocator::{AllocationCreateInfo, DeviceLayout, StandardMemoryAllocator}, swapchain::{Surface, Swapchain, SwapchainCreateInfo}, sync::{ fence::{Fence, FenceCreateFlags, FenceCreateInfo}, semaphore::Semaphore, AccessFlags, PipelineStages, }, - DeviceSize, Validated, VulkanError, + Validated, VulkanError, }; static REGISTERED_DEVICES: Mutex> = Mutex::new(Vec::new()); @@ -46,10 +39,10 @@ static REGISTERED_DEVICES: Mutex> = Mutex::new(Vec::new()); /// source of truth in regards to the synchronization state of a resource. In a similar vein, each /// resource in the collection must be unique. // FIXME: Custom collector -// FIXME: Swapchain recreation #[derive(Debug)] pub struct Resources { - memory_allocator: Arc, + device: Arc, + memory_allocator: Arc, command_buffer_allocator: Arc, global: epoch::GlobalHandle, @@ -63,27 +56,27 @@ pub struct Resources { #[derive(Debug)] pub struct BufferState { buffer: Arc, - // FIXME: This is terribly inefficient. - last_accesses: Mutex>, + last_access: Mutex, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct BufferAccess { - access_type: AccessType, + stage_mask: PipelineStages, + access_mask: AccessFlags, queue_family_index: u32, } #[derive(Debug)] pub struct ImageState { image: Arc, - // FIXME: This is terribly inefficient. - last_accesses: Mutex>, + last_access: Mutex, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct ImageAccess { - access_type: AccessType, - layout_type: ImageLayoutType, + stage_mask: PipelineStages, + access_mask: AccessFlags, + image_layout: ImageLayout, queue_family_index: u32, } @@ -95,21 +88,22 @@ pub struct SwapchainState { pub(crate) semaphores: SmallVec<[SwapchainSemaphoreState; 3]>, flight_id: Id, pub(crate) current_image_index: AtomicU32, - last_accesses: Mutex>, + last_access: Mutex, } -#[derive(Debug)] +#[derive(Clone, Debug)] pub(crate) struct SwapchainSemaphoreState { - pub(crate) image_available_semaphore: Semaphore, - pub(crate) tasks_complete_semaphore: Semaphore, + pub(crate) image_available_semaphore: Arc, + pub(crate) pre_present_complete_semaphore: Arc, + pub(crate) tasks_complete_semaphore: Arc, } // FIXME: imported/exported fences #[derive(Debug)] pub struct Flight { frame_count: NonZeroU32, - current_frame: AtomicU32, - fences: SmallVec<[Fence; 3]>, + current_frame: AtomicU64, + fences: SmallVec<[RwLock; 3]>, pub(crate) state: Mutex, } @@ -125,16 +119,11 @@ impl Resources { /// /// # Panics /// - /// - Panics if `memory_allocator.device()` already has a `Resources` collection associated - /// with it. + /// - Panics if `device` already has a `Resources` collection associated with it. #[must_use] - pub fn new( - memory_allocator: Arc, - create_info: ResourcesCreateInfo, - ) -> Self { - let device = memory_allocator.device(); + pub fn new(device: Arc, create_info: ResourcesCreateInfo) -> Arc { let mut registered_devices = REGISTERED_DEVICES.lock(); - let device_addr = Arc::as_ptr(device) as usize; + let device_addr = Arc::as_ptr(&device) as usize; assert!( !registered_devices.contains(&device_addr), @@ -143,6 +132,7 @@ impl Resources { registered_devices.push(device_addr); + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new( device.clone(), Default::default(), @@ -150,7 +140,8 @@ impl Resources { let global = epoch::GlobalHandle::new(); - Resources { + Arc::new(Resources { + device, memory_allocator, command_buffer_allocator, locals: ThreadLocal::new(), @@ -159,13 +150,13 @@ impl Resources { swapchains: SlotMap::with_global(create_info.max_swapchains, global.clone()), flights: SlotMap::with_global(create_info.max_flights, global.clone()), global, - } + }) } - /// Returns the memory allocator that the collection was created with. + /// Returns the standard memory allocator. #[inline] #[must_use] - pub fn memory_allocator(&self) -> &Arc { + pub fn memory_allocator(&self) -> &Arc { &self.memory_allocator } @@ -233,11 +224,7 @@ impl Resources { surface: Arc, create_info: SwapchainCreateInfo, ) -> Result, Validated> { - let frames_in_flight = self - .flights - .get(flight_id.slot, self.pin()) - .unwrap() - .frame_count(); + let frames_in_flight = self.flight(flight_id).unwrap().frame_count(); assert!(create_info.min_image_count >= frames_in_flight); @@ -272,21 +259,24 @@ impl Resources { }, ) } + .map(RwLock::new) }) .collect::>()?; let flight = Flight { frame_count, - current_frame: AtomicU32::new(0), + current_frame: AtomicU64::new(0), fences, state: Mutex::new(FlightState { death_rows: (0..frame_count.get()).map(|_| Vec::new()).collect(), }), }; - let slot = self.flights.insert_with_tag(flight, FLIGHT_TAG, self.pin()); + let slot = self + .flights + .insert_with_tag(flight, Flight::TAG, self.pin()); - Ok(Id::new(slot)) + Ok(unsafe { Id::new(slot) }) } /// Adds a buffer to the collection. @@ -306,14 +296,12 @@ impl Resources { unsafe fn add_buffer_unchecked(&self, buffer: Arc) -> Id { let state = BufferState { buffer, - last_accesses: Mutex::new(RangeMap::new()), + last_access: Mutex::new(BufferAccess::NONE), }; - unsafe { state.set_access(0..state.buffer.size(), BufferAccess::NONE) }; - - let slot = self.buffers.insert_with_tag(state, BUFFER_TAG, self.pin()); + let slot = self.buffers.insert_with_tag(state, Buffer::TAG, self.pin()); - Id::new(slot) + unsafe { Id::new(slot) } } /// Adds an image to the collection. @@ -340,14 +328,12 @@ impl Resources { unsafe fn add_image_unchecked(&self, image: Arc) -> Id { let state = ImageState { image, - last_accesses: Mutex::new(RangeMap::new()), + last_access: Mutex::new(ImageAccess::NONE), }; - unsafe { state.set_access(state.image.subresource_range(), ImageAccess::NONE) }; + let slot = self.images.insert_with_tag(state, Image::TAG, self.pin()); - let slot = self.images.insert_with_tag(state, IMAGE_TAG, self.pin()); - - Id::new(slot) + unsafe { Id::new(slot) } } /// Adds a swapchain to the collection. `(swapchain, images)` must correspond to the value @@ -375,11 +361,7 @@ impl Resources { assert_eq!(swapchain.device(), self.device()); assert_eq!(images.len(), swapchain.image_count() as usize); - let frames_in_flight = self - .flights - .get(flight_id.slot, self.pin()) - .unwrap() - .frame_count(); + let frames_in_flight = self.flight(flight_id).unwrap().frame_count(); assert!(swapchain.image_count() >= frames_in_flight); @@ -435,9 +417,7 @@ impl Resources { ) -> Result, VulkanError> { let guard = &self.pin(); - let frames_in_flight = self - .flights - .get(flight_id.slot, guard) + let frames_in_flight = unsafe { self.flight_unprotected(flight_id) } .unwrap() .frame_count(); @@ -445,13 +425,17 @@ impl Resources { .map(|_| { Ok(SwapchainSemaphoreState { // SAFETY: The parameters are valid. - image_available_semaphore: unsafe { + image_available_semaphore: Arc::new(unsafe { Semaphore::new_unchecked(self.device().clone(), Default::default()) - }?, + }?), // SAFETY: The parameters are valid. - tasks_complete_semaphore: unsafe { + pre_present_complete_semaphore: Arc::new(unsafe { Semaphore::new_unchecked(self.device().clone(), Default::default()) - }?, + }?), + // SAFETY: The parameters are valid. + tasks_complete_semaphore: Arc::new(unsafe { + Semaphore::new_unchecked(self.device().clone(), Default::default()) + }?), }) }) .collect::>()?; @@ -462,14 +446,69 @@ impl Resources { semaphores, flight_id, current_image_index: AtomicU32::new(u32::MAX), - last_accesses: Mutex::new(RangeMap::new()), + last_access: Mutex::new(ImageAccess::NONE), }; - unsafe { state.set_access(0..state.swapchain.image_array_layers(), ImageAccess::NONE) }; + let slot = self + .swapchains + .insert_with_tag(state, Swapchain::TAG, guard); + + Ok(unsafe { Id::new(slot) }) + } + + /// Calls [`Swapchain::recreate`] on the swapchain corresponding to `id` and adds the new + /// swapchain to the collection. The old swapchain will be cleaned up as soon as possible. + /// + /// # Panics + /// + /// - Panics if called from multiple threads at the same time. + /// - Panics if the flight is currently being executed. + /// - Panics if `f` panics. + /// - Panics if [`Swapchain::recreate`] panics. + /// - Panics if `new_swapchain.image_count()` is not greater than or equal to the number of + /// [frames] of the flight that owns the swapchain. + /// + /// # Errors + /// + /// - Returns an error when [`Swapchain::recreate`] returns an error. + pub fn recreate_swapchain( + &self, + id: Id, + f: impl FnOnce(SwapchainCreateInfo) -> SwapchainCreateInfo, + ) -> Result, Validated> { + let guard = self.pin(); + + let state = unsafe { self.swapchain_unprotected(id) }.unwrap(); + let swapchain = state.swapchain(); + let flight_id = state.flight_id; + let flight = unsafe { self.flight_unprotected_unchecked(flight_id) }; + let mut flight_state = flight.state.try_lock().unwrap(); + + let (new_swapchain, new_images) = swapchain.recreate(f(swapchain.create_info()))?; + + let frames_in_flight = flight.frame_count(); + + assert!(new_swapchain.image_count() >= frames_in_flight); + + let death_row = &mut flight_state.death_rows[flight.previous_frame_index() as usize]; + death_row.push(swapchain.clone()); + + let new_state = SwapchainState { + swapchain: new_swapchain, + images: new_images.into(), + semaphores: state.semaphores.clone(), + flight_id, + current_image_index: AtomicU32::new(u32::MAX), + last_access: Mutex::new(ImageAccess::NONE), + }; - let slot = self.swapchains.insert_with_tag(state, SWAPCHAIN_TAG, guard); + let slot = self + .swapchains + .insert_with_tag(new_state, Swapchain::TAG, guard); - Ok(Id::new(slot)) + let _ = unsafe { self.remove_swapchain(id) }; + + Ok(unsafe { Id::new(slot) }) } /// Removes the buffer corresponding to `id`. @@ -598,6 +637,12 @@ impl Resources { unsafe { self.flights.get_unprotected(id.slot) }.ok_or(InvalidSlotError::new(id)) } + #[inline] + pub(crate) unsafe fn flight_unprotected_unchecked(&self, id: Id) -> &Flight { + // SAFETY: Enforced by the caller. + unsafe { self.flights.index_unchecked_unprotected(id.slot.index()) } + } + #[inline] pub(crate) fn pin(&self) -> epoch::Guard<'_> { self.locals.get_or(|| self.global.register_local()).pin() @@ -619,6 +664,23 @@ impl Resources { impl Drop for Resources { fn drop(&mut self) { + for (flight_id, flight) in &mut self.flights { + let prev_frame_index = flight.previous_frame_index(); + let fence = flight.fences[prev_frame_index as usize].get_mut(); + + if let Err(err) = fence.wait(None) { + if err == VulkanError::DeviceLost { + break; + } + + eprintln!( + "failed to wait for flight {flight_id:?} to finish rendering graceful shutdown \ + impossible: {err}; aborting", + ); + std::process::abort(); + } + } + // FIXME: let _ = unsafe { self.device().wait_idle() }; @@ -638,7 +700,7 @@ impl Drop for Resources { unsafe impl DeviceOwned for Resources { #[inline] fn device(&self) -> &Arc { - self.memory_allocator.device() + &self.device } } @@ -650,68 +712,89 @@ impl BufferState { &self.buffer } - /// Returns all last accesses that overlap the given `range` of the buffer. - /// - /// # Panics - /// - /// - Panics if `range` doesn't denote a valid range of the buffer. + /// Returns the last access that was performed on the buffer. #[inline] - pub fn accesses(&self, range: BufferRange) -> BufferAccesses<'_> { - assert!(range.end <= self.buffer.size()); - assert!(!range.is_empty()); - - BufferAccesses { - range: range.clone(), - overlapping: MutexGuard::leak(self.last_accesses.lock()).overlapping(range), - // SAFETY: We locked the mutex above. - _guard: unsafe { AccessesGuard::new(&self.last_accesses) }, - } + pub fn access(&self) -> BufferAccess { + *self.last_access.lock() } - /// Sets the last access of the given `range` of the buffer. + /// Sets the last access that was performed on the buffer. /// /// # Safety /// - /// - `access` must constitute the correct access that was last performed on the `range` of the - /// buffer. - /// - /// # Panics - /// - /// - Panics if `range` is empty. + /// - `access` must constitute the correct access that was last performed on the buffer. #[inline] - pub unsafe fn set_access(&self, range: BufferRange, access: BufferAccess) { - self.last_accesses.lock().insert(range, access); + pub unsafe fn set_access(&self, access: BufferAccess) { + *self.last_access.lock() = access; } } impl BufferAccess { /// A `BufferAccess` that signifies the lack thereof, for instance because the resource was /// never accessed. - pub const NONE: Self = BufferAccess::new(AccessType::None, vk::QUEUE_FAMILY_IGNORED); + pub const NONE: Self = BufferAccess { + stage_mask: PipelineStages::empty(), + access_mask: AccessFlags::empty(), + queue_family_index: vk::QUEUE_FAMILY_IGNORED, + }; /// Creates a new `BufferAccess`. + /// + /// # Panics + /// + /// - Panics if `access_types` contains any access type that's not valid for buffers. #[inline] - pub const fn new(access_type: AccessType, queue_family_index: u32) -> Self { + #[must_use] + pub const fn new(access_types: &[AccessType], queue_family_index: u32) -> Self { + let mut access = BufferAccess { + stage_mask: PipelineStages::empty(), + access_mask: AccessFlags::empty(), + queue_family_index, + }; + let mut i = 0; + + while i < access_types.len() { + let access_type = access_types[i]; + + assert!(access_type.is_valid_buffer_access_type()); + + access.stage_mask = access.stage_mask.union(access_type.stage_mask()); + access.access_mask = access.access_mask.union(access_type.access_mask()); + i += 1; + } + + access + } + + pub(crate) const fn from_masks( + stage_mask: PipelineStages, + access_mask: AccessFlags, + queue_family_index: u32, + ) -> Self { BufferAccess { - access_type, + stage_mask, + access_mask, queue_family_index, } } /// Returns the stage mask of this access. #[inline] + #[must_use] pub const fn stage_mask(&self) -> PipelineStages { - self.access_type.stage_mask() + self.stage_mask } /// Returns the access mask of this access. #[inline] + #[must_use] pub const fn access_mask(&self) -> AccessFlags { - self.access_type.access_mask() + self.access_mask } /// Returns the queue family index of this access. #[inline] + #[must_use] pub const fn queue_family_index(&self) -> u32 { self.queue_family_index } @@ -725,78 +808,85 @@ impl ImageState { &self.image } - /// Returns all last accesses that overlap the given `subresource_range` of the image. - /// - /// # Panics - /// - /// - Panics if `subresource_range` doesn't denote a valid subresource range of the image. + /// Returns the last access that was performed on the image. #[inline] - pub fn accesses(&self, subresource_range: ImageSubresourceRange) -> ImageAccesses<'_> { - let subresource_ranges = SubresourceRanges::from_image(&self.image, subresource_range); - let last_accesses = MutexGuard::leak(self.last_accesses.lock()); - - ImageAccesses { - mip_levels: self.image.mip_levels(), - array_layers: self.image.array_layers(), - subresource_ranges, - range: 0..0, - overlapping: last_accesses.overlapping(0..0), - last_accesses, - // SAFETY: We locked the mutex above. - _guard: unsafe { AccessesGuard::new(&self.last_accesses) }, - } + pub fn access(&self) -> ImageAccess { + *self.last_access.lock() } - /// Sets the last access of the given `subresource_range` of the image. + /// Sets the last access that was performed on the image. /// /// # Safety /// - /// - `access` must constitute the correct access that was last performed on the - /// `subresource_range` of the image. - /// - /// # Panics - /// - /// - Panics if `range` is empty. + /// - `access` must constitute the correct access that was last performed on the image. #[inline] - pub unsafe fn set_access(&self, subresource_range: ImageSubresourceRange, access: ImageAccess) { - let mut last_accesses = self.last_accesses.lock(); - - for range in SubresourceRanges::from_image(&self.image, subresource_range) { - last_accesses.insert(range, access); - } + pub unsafe fn set_access(&self, access: ImageAccess) { + *self.last_access.lock() = access; } } impl ImageAccess { /// An `ImageAccess` that signifies the lack thereof, for instance because the resource was /// never accessed. - pub const NONE: Self = ImageAccess::new( - AccessType::None, - ImageLayoutType::Optimal, - vk::QUEUE_FAMILY_IGNORED, - ); + pub const NONE: Self = ImageAccess { + stage_mask: PipelineStages::empty(), + access_mask: AccessFlags::empty(), + image_layout: ImageLayout::Undefined, + queue_family_index: vk::QUEUE_FAMILY_IGNORED, + }; /// Creates a new `ImageAccess`. + /// + /// # Panics + /// + /// - Panics if `access_types` contains any access type that's not valid for images. #[inline] + #[must_use] pub const fn new( - access_type: AccessType, - mut layout_type: ImageLayoutType, + access_types: &[AccessType], + layout_type: ImageLayoutType, queue_family_index: u32, ) -> Self { - // Make sure that entries in the tree always compare equal if the effective access is the - // same, so that they can be combined for easier pipeline barrier batching. - if matches!(access_type.image_layout(), ImageLayout::General) { - layout_type = ImageLayoutType::Optimal; - } + let mut access = ImageAccess { + stage_mask: PipelineStages::empty(), + access_mask: AccessFlags::empty(), + image_layout: ImageLayout::Undefined, + queue_family_index, + }; + let mut i = 0; + + while i < access_types.len() { + let access_type = access_types[i]; + + assert!(access_type.is_valid_image_access_type()); - // Presentation must be done in the optimal image layout. - if matches!(access_type, AccessType::Present) { - layout_type = ImageLayoutType::Optimal; + let image_layout = access_type.image_layout(layout_type); + + access.stage_mask = access.stage_mask.union(access_type.stage_mask()); + access.access_mask = access.access_mask.union(access_type.access_mask()); + access.image_layout = if matches!(access.image_layout, ImageLayout::Undefined) + || access.image_layout as i32 == image_layout as i32 + { + image_layout + } else { + ImageLayout::General + }; + i += 1; } + access + } + + pub(crate) const fn from_masks( + stage_mask: PipelineStages, + access_mask: AccessFlags, + image_layout: ImageLayout, + queue_family_index: u32, + ) -> Self { ImageAccess { - access_type, - layout_type, + stage_mask, + access_mask, + image_layout, queue_family_index, } } @@ -805,29 +895,26 @@ impl ImageAccess { #[inline] #[must_use] pub const fn stage_mask(&self) -> PipelineStages { - self.access_type.stage_mask() + self.stage_mask } /// Returns the access mask of this access. #[inline] #[must_use] pub const fn access_mask(&self) -> AccessFlags { - self.access_type.access_mask() + self.access_mask } /// Returns the image layout of this access. #[inline] #[must_use] pub const fn image_layout(&self) -> ImageLayout { - if self.layout_type.is_general() { - ImageLayout::General - } else { - self.access_type.image_layout() - } + self.image_layout } /// Returns the queue family index of this access. #[inline] + #[must_use] pub const fn queue_family_index(&self) -> u32 { self.queue_family_index } @@ -873,41 +960,12 @@ impl SwapchainState { &self.images[self.current_image_index.load(Ordering::Relaxed) as usize] } - pub(crate) fn accesses(&self, array_layers: Range) -> ImageAccesses<'_> { - let subresource_range = ImageSubresourceRange { - aspects: ImageAspects::COLOR, - mip_levels: 0..1, - array_layers, - }; - let subresource_ranges = - SubresourceRanges::new(subresource_range, 1, self.swapchain.image_array_layers()); - let last_accesses = MutexGuard::leak(self.last_accesses.lock()); - - ImageAccesses { - mip_levels: 1, - array_layers: self.swapchain.image_array_layers(), - subresource_ranges, - range: 0..0, - overlapping: last_accesses.overlapping(0..0), - last_accesses, - // SAFETY: We locked the mutex above. - _guard: unsafe { AccessesGuard::new(&self.last_accesses) }, - } + pub(crate) fn access(&self) -> ImageAccess { + *self.last_access.lock() } - pub(crate) unsafe fn set_access(&self, array_layers: Range, access: ImageAccess) { - let subresource_range = ImageSubresourceRange { - aspects: ImageAspects::COLOR, - mip_levels: 0..1, - array_layers, - }; - let mut last_accesses = self.last_accesses.lock(); - - for range in - SubresourceRanges::new(subresource_range, 1, self.swapchain.image_array_layers()) - { - last_accesses.insert(range, access); - } + pub(crate) unsafe fn set_access(&self, access: ImageAccess) { + *self.last_access.lock() = access; } } @@ -919,18 +977,85 @@ impl Flight { self.frame_count.get() } - /// Returns the index of the current [frame] in [flight]. + /// Returns the current frame counter value. This always starts out at 0 and is monotonically + /// increasing with each passing [frame]. #[inline] #[must_use] - pub fn current_frame(&self) -> u32 { - self.current_frame.load(Ordering::Relaxed) % self.frame_count + pub fn current_frame(&self) -> u64 { + self.current_frame.load(Ordering::Relaxed) } - /// Returns the fence for the current [frame] in [flight]. + /// Returns the index of the current [frame] in [flight]. #[inline] #[must_use] - pub fn current_fence(&self) -> &Fence { - &self.fences[self.current_frame() as usize] + pub fn current_frame_index(&self) -> u32 { + (self.current_frame() % NonZeroU64::from(self.frame_count)) as u32 + } + + fn previous_frame_index(&self) -> u32 { + (self.current_frame().wrapping_sub(1) % NonZeroU64::from(self.frame_count)) as u32 + } + + pub(crate) fn current_fence(&self) -> &RwLock { + &self.fences[self.current_frame_index() as usize] + } + + /// Waits for the oldest [frame] in [flight] to finish. + #[inline] + pub fn wait(&self, timeout: Option) -> Result<(), VulkanError> { + self.fences[self.current_frame_index() as usize] + .read() + .wait(timeout) + } + + /// Waits for the given [frame] to finish. `frame` must have been previously obtained using + /// [`current_frame`] on `self`. + /// + /// # Panics + /// + /// - Panics if `frame` is greater than the current frame. + /// + /// [`current_frame`]: Self::current_frame + #[inline] + pub fn wait_for_frame(&self, frame: u64, timeout: Option) -> Result<(), VulkanError> { + let current_frame = self.current_frame(); + + assert!(frame <= current_frame); + + if current_frame - frame > u64::from(self.frame_count()) { + return Ok(()); + } + + self.fences[(frame % NonZeroU64::from(self.frame_count)) as usize] + .read() + .wait(timeout) + } + + /// Queues the destruction of the given `object` after the destruction of the command buffer(s) + /// for the previous [frame] in [flight]. + /// + /// # Panics + /// + /// - Panics if called from multiple threads at the same time. + /// - Panics if the flight is currently being executed. + #[inline] + pub fn destroy_object(&self, object: Arc) { + let mut state = self.state.try_lock().unwrap(); + state.death_rows[self.previous_frame_index() as usize].push(object); + } + + /// Queues the destruction of the given `objects` after the destruction of the command + /// buffer(s) for the previous [frame] in [flight]. + /// + /// # Panics + /// + /// - Panics if called from multiple threads at the same time. + /// - Panics if the flight is currently being executed. + #[inline] + pub fn destroy_objects(&self, objects: impl IntoIterator>) { + let mut state = self.state.try_lock().unwrap(); + state.death_rows[self.previous_frame_index() as usize] + .extend(objects.into_iter().map(|object| object as _)); } pub(crate) unsafe fn next_frame(&self) { @@ -969,302 +1094,6 @@ impl Default for ResourcesCreateInfo { } } -/// A subresource of a buffer that should be accessed. -pub type BufferRange = Range; - -/// An iterator over the last accesses of a buffer subresource. -/// -/// This type is created by the [`accesses`] method on [`BufferState`]. -/// -/// [`accesses`]: BufferState::accesses -pub struct BufferAccesses<'a> { - range: BufferRange, - overlapping: rangemap::map::Overlapping<'a, DeviceSize, BufferAccess, Range>, - _guard: AccessesGuard<'a, BufferAccess>, -} - -impl<'a> Iterator for BufferAccesses<'a> { - type Item = (BufferRange, &'a BufferAccess); - - #[inline] - fn next(&mut self) -> Option { - self.overlapping.next().map(|(range, access)| { - let start = cmp::max(range.start, self.range.start); - let end = cmp::min(range.end, self.range.end); - - (start..end, access) - }) - } -} - -impl FusedIterator for BufferAccesses<'_> {} - -/// An iterator over the last accesses of an image subresource. -/// -/// This type is created by the [`accesses`] method on [`ImageState`]. -/// -/// [`accesses`]: ImageState::accesses -pub struct ImageAccesses<'a> { - mip_levels: u32, - array_layers: u32, - subresource_ranges: SubresourceRanges, - range: Range, - overlapping: rangemap::map::Overlapping<'a, DeviceSize, ImageAccess, Range>, - last_accesses: &'a RangeMap, - _guard: AccessesGuard<'a, ImageAccess>, -} - -impl<'a> Iterator for ImageAccesses<'a> { - type Item = (ImageSubresourceRange, &'a ImageAccess); - - #[inline] - fn next(&mut self) -> Option { - loop { - if let Some((range, access)) = self.overlapping.next() { - let start = cmp::max(range.start, self.range.start); - let end = cmp::min(range.end, self.range.end); - let subresource_range = - range_to_subresources(start..end, self.mip_levels, self.array_layers); - - break Some((subresource_range, access)); - } else if let Some(range) = self.subresource_ranges.next() { - self.range = range.clone(); - self.overlapping = self.last_accesses.overlapping(range); - } else { - break None; - } - } - } -} - -impl FusedIterator for ImageAccesses<'_> {} - -struct AccessesGuard<'a, V> { - mutex: &'a Mutex>, -} - -impl<'a, V> AccessesGuard<'a, V> { - unsafe fn new(mutex: &'a Mutex>) -> Self { - AccessesGuard { mutex } - } -} - -impl Drop for AccessesGuard<'_, V> { - fn drop(&mut self) { - // SAFETY: Enforced by the caller of `AccessesGuard::new`. - unsafe { self.mutex.force_unlock() } - } -} - -const _: () = assert!(mem::size_of::() == mem::size_of::()); - -#[derive(Clone)] -struct SubresourceRanges { - aspects: u32, - mip_levels: Range, - array_layers: Range, - aspect_size: DeviceSize, - mip_level_size: DeviceSize, - aspect_offset: DeviceSize, - mip_level_offset: DeviceSize, - granularity: SubresourceRangeGranularity, -} - -#[derive(Clone, Copy)] -enum SubresourceRangeGranularity { - Aspect, - MipLevel, - ArrayLayer, -} - -impl SubresourceRanges { - fn from_image(image: &Image, mut subresource_range: ImageSubresourceRange) -> Self { - assert!(image.format().aspects().contains(subresource_range.aspects)); - - if image.flags().intersects(ImageCreateFlags::DISJOINT) - && subresource_range.aspects.intersects(ImageAspects::COLOR) - { - subresource_range.aspects -= ImageAspects::COLOR; - subresource_range.aspects |= match image.format().planes().len() { - 2 => ImageAspects::PLANE_0 | ImageAspects::PLANE_1, - 3 => ImageAspects::PLANE_0 | ImageAspects::PLANE_1 | ImageAspects::PLANE_2, - _ => unreachable!(), - }; - } - - SubresourceRanges::new(subresource_range, image.mip_levels(), image.array_layers()) - } - - fn new( - subresource_range: ImageSubresourceRange, - image_mip_levels: u32, - image_array_layers: u32, - ) -> Self { - assert!(subresource_range.mip_levels.end <= image_mip_levels); - assert!(subresource_range.array_layers.end <= image_array_layers); - assert!(!subresource_range.aspects.is_empty()); - assert!(!subresource_range.mip_levels.is_empty()); - assert!(!subresource_range.array_layers.is_empty()); - - let mip_level_size = DeviceSize::from(image_array_layers); - let mip_levels = DeviceSize::from(subresource_range.mip_levels.start) * mip_level_size - ..DeviceSize::from(subresource_range.mip_levels.end) * mip_level_size; - let aspect_size = mip_level_size * DeviceSize::from(image_mip_levels); - let aspect_offset = 0; - let mip_level_offset = mip_levels.end - mip_level_size; - - let granularity = if subresource_range.array_layers != (0..image_array_layers) { - SubresourceRangeGranularity::ArrayLayer - } else if subresource_range.mip_levels != (0..image_mip_levels) { - SubresourceRangeGranularity::MipLevel - } else { - SubresourceRangeGranularity::Aspect - }; - - SubresourceRanges { - aspects: vk::ImageAspectFlags::from(subresource_range.aspects).as_raw(), - mip_levels, - array_layers: subresource_range.array_layers, - aspect_size, - mip_level_size, - aspect_offset, - mip_level_offset, - granularity, - } - } - - fn skip_unset_aspects(&mut self) { - let aspect_count = self.aspects.trailing_zeros(); - self.aspects >>= aspect_count; - self.aspect_offset += DeviceSize::from(aspect_count) * self.aspect_size; - } - - fn next_aspect(&mut self) { - self.aspects >>= 1; - self.aspect_offset += self.aspect_size; - } -} - -impl Iterator for SubresourceRanges { - type Item = Range; - - fn next(&mut self) -> Option { - if self.aspects != 0 { - match self.granularity { - SubresourceRangeGranularity::Aspect => { - self.skip_unset_aspects(); - - let aspect_count = DeviceSize::from(self.aspects.trailing_ones()); - let start = self.aspect_offset; - let end = self.aspect_offset + aspect_count * self.aspect_size; - - self.aspects >>= aspect_count; - self.aspect_offset += aspect_count * self.aspect_size; - - Some(Range { start, end }) - } - SubresourceRangeGranularity::MipLevel => { - self.skip_unset_aspects(); - - let start = self.aspect_offset + self.mip_levels.start; - let end = self.aspect_offset + self.mip_levels.end; - - self.next_aspect(); - - Some(Range { start, end }) - } - SubresourceRangeGranularity::ArrayLayer => { - self.mip_level_offset += self.mip_level_size; - - if self.mip_level_offset == self.mip_levels.end { - self.mip_level_offset = self.mip_levels.start; - self.skip_unset_aspects(); - } - - let offset = self.aspect_offset + self.mip_level_offset; - let start = offset + DeviceSize::from(self.array_layers.start); - let end = offset + DeviceSize::from(self.array_layers.end); - - if self.mip_level_offset == self.mip_levels.end - self.mip_level_size { - self.next_aspect(); - } - - Some(Range { start, end }) - } - } - } else { - None - } - } -} - -fn range_to_subresources( - mut range: Range, - image_mip_levels: u32, - image_array_layers: u32, -) -> ImageSubresourceRange { - debug_assert!(!range.is_empty()); - - let aspect_size = DeviceSize::from(image_mip_levels) * DeviceSize::from(image_array_layers); - let mip_level_size = DeviceSize::from(image_array_layers); - - if range.end - range.start > aspect_size { - debug_assert!(range.start % aspect_size == 0); - debug_assert!(range.end % aspect_size == 0); - - let aspect_start = (range.start / aspect_size) as u32; - let aspect_end = (range.end / aspect_size) as u32; - let aspects = u32::MAX >> (u32::BITS - (aspect_end - aspect_start)) << aspect_start; - - ImageSubresourceRange { - aspects: vk::ImageAspectFlags::from_raw(aspects).into(), - mip_levels: 0..image_mip_levels, - array_layers: 0..image_array_layers, - } - } else { - let aspect_index = (range.start / aspect_size) as u32; - range.start %= aspect_size; - range.end %= aspect_size; - - // Wraparound - if range.end == 0 { - range.end = aspect_size; - } - - if range.end - range.start > mip_level_size { - debug_assert!(range.start % mip_level_size == 0); - debug_assert!(range.end % mip_level_size == 0); - - let mip_level_start = (range.start / mip_level_size) as u32; - let mip_level_end = (range.end / mip_level_size) as u32; - - ImageSubresourceRange { - aspects: vk::ImageAspectFlags::from_raw(1 << aspect_index).into(), - mip_levels: mip_level_start..mip_level_end, - array_layers: 0..image_array_layers, - } - } else { - let mip_level = (range.start / mip_level_size) as u32; - range.start %= mip_level_size; - range.end %= mip_level_size; - - // Wraparound - if range.end == 0 { - range.end = mip_level_size; - } - - let array_layer_start = range.start as u32; - let array_layer_end = range.end as u32; - - ImageSubresourceRange { - aspects: vk::ImageAspectFlags::from_raw(1 << aspect_index).into(), - mip_levels: mip_level..mip_level + 1, - array_layers: array_layer_start..array_layer_end, - } - } - } -} - macro_rules! access_types { ( $( @@ -1273,19 +1102,18 @@ macro_rules! access_types { stage_mask: $($stage_flag:ident)|+, access_mask: $($access_flag:ident)|+, image_layout: $image_layout:ident, + valid_for: $($valid_for:ident)|+, } )* ) => { - /// Specifies which type of access is performed on a subresource. + /// Specifies which type of access is performed on a resource. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum AccessType { - None, $( $(#[$meta])* $name, )* - Present, } impl AccessType { @@ -1294,12 +1122,10 @@ macro_rules! access_types { #[must_use] pub const fn stage_mask(self) -> PipelineStages { match self { - Self::None => PipelineStages::empty(), $( Self::$name => PipelineStages::empty() $(.union(PipelineStages::$stage_flag))+, )* - Self::Present => PipelineStages::empty(), } } @@ -1308,348 +1134,391 @@ macro_rules! access_types { #[must_use] pub const fn access_mask(self) -> AccessFlags { match self { - Self::None => AccessFlags::empty(), $( Self::$name => AccessFlags::empty() $(.union(AccessFlags::$access_flag))+, )* - Self::Present => AccessFlags::empty(), } } - /// Returns the optimal image layout for this type of access, if any. + /// Returns the image layout for this type of access. #[inline] #[must_use] - pub const fn image_layout(self) -> ImageLayout { + pub const fn image_layout(self, layout_type: ImageLayoutType) -> ImageLayout { + if layout_type.is_general() { + return ImageLayout::General; + } + match self { - Self::None => ImageLayout::Undefined, $( Self::$name => ImageLayout::$image_layout, )* - Self::Present => ImageLayout::PresentSrc, + } + } + + const fn valid_for(self) -> u8 { + match self { + $( + Self::$name => $($valid_for)|+, + )* } } } }; } +const BUFFER: u8 = 1 << 0; +const IMAGE: u8 = 1 << 1; + access_types! { IndirectCommandRead { stage_mask: DRAW_INDIRECT, access_mask: INDIRECT_COMMAND_READ, image_layout: Undefined, + valid_for: BUFFER, } IndexRead { stage_mask: INDEX_INPUT, access_mask: INDEX_READ, image_layout: Undefined, + valid_for: BUFFER, } VertexAttributeRead { stage_mask: VERTEX_ATTRIBUTE_INPUT, access_mask: VERTEX_ATTRIBUTE_READ, image_layout: Undefined, + valid_for: BUFFER, } VertexShaderUniformRead { stage_mask: VERTEX_SHADER, access_mask: UNIFORM_READ, image_layout: Undefined, + valid_for: BUFFER, } VertexShaderSampledRead { stage_mask: VERTEX_SHADER, access_mask: SHADER_SAMPLED_READ, image_layout: ShaderReadOnlyOptimal, + valid_for: BUFFER | IMAGE, } VertexShaderStorageRead { stage_mask: VERTEX_SHADER, access_mask: SHADER_STORAGE_READ, image_layout: General, + valid_for: BUFFER | IMAGE, } VertexShaderStorageWrite { stage_mask: VERTEX_SHADER, access_mask: SHADER_STORAGE_WRITE, image_layout: General, + valid_for: BUFFER | IMAGE, } VertexShaderAccelerationStructureRead { stage_mask: VERTEX_SHADER, access_mask: ACCELERATION_STRUCTURE_READ, image_layout: Undefined, + valid_for: BUFFER, } TessellationControlShaderUniformRead { stage_mask: TESSELLATION_CONTROL_SHADER, access_mask: UNIFORM_READ, image_layout: Undefined, + valid_for: BUFFER, } TessellationControlShaderSampledRead { stage_mask: TESSELLATION_CONTROL_SHADER, access_mask: SHADER_SAMPLED_READ, image_layout: ShaderReadOnlyOptimal, + valid_for: BUFFER | IMAGE, } TessellationControlShaderStorageRead { stage_mask: TESSELLATION_CONTROL_SHADER, access_mask: SHADER_STORAGE_READ, image_layout: General, + valid_for: BUFFER | IMAGE, } TessellationControlShaderStorageWrite { stage_mask: TESSELLATION_CONTROL_SHADER, access_mask: SHADER_STORAGE_WRITE, image_layout: General, + valid_for: BUFFER | IMAGE, } TessellationControlShaderAccelerationStructureRead { stage_mask: TESSELLATION_CONTROL_SHADER, access_mask: ACCELERATION_STRUCTURE_READ, image_layout: Undefined, + valid_for: BUFFER, } TessellationEvaluationShaderUniformRead { stage_mask: TESSELLATION_EVALUATION_SHADER, access_mask: UNIFORM_READ, image_layout: Undefined, + valid_for: BUFFER, } TessellationEvaluationShaderSampledRead { stage_mask: TESSELLATION_EVALUATION_SHADER, access_mask: SHADER_SAMPLED_READ, image_layout: ShaderReadOnlyOptimal, + valid_for: BUFFER | IMAGE, } TessellationEvaluationShaderStorageRead { stage_mask: TESSELLATION_EVALUATION_SHADER, access_mask: SHADER_STORAGE_READ, image_layout: General, + valid_for: BUFFER | IMAGE, } TessellationEvaluationShaderStorageWrite { stage_mask: TESSELLATION_EVALUATION_SHADER, access_mask: SHADER_STORAGE_WRITE, image_layout: General, + valid_for: BUFFER | IMAGE, } TessellationEvaluationShaderAccelerationStructureRead { stage_mask: TESSELLATION_EVALUATION_SHADER, access_mask: ACCELERATION_STRUCTURE_READ, image_layout: Undefined, + valid_for: BUFFER, } GeometryShaderUniformRead { stage_mask: GEOMETRY_SHADER, access_mask: UNIFORM_READ, image_layout: Undefined, + valid_for: BUFFER, } GeometryShaderSampledRead { stage_mask: GEOMETRY_SHADER, access_mask: SHADER_SAMPLED_READ, image_layout: ShaderReadOnlyOptimal, + valid_for: BUFFER | IMAGE, } GeometryShaderStorageRead { stage_mask: GEOMETRY_SHADER, access_mask: SHADER_STORAGE_READ, image_layout: General, + valid_for: BUFFER | IMAGE, } GeometryShaderStorageWrite { stage_mask: GEOMETRY_SHADER, access_mask: SHADER_STORAGE_WRITE, image_layout: General, + valid_for: BUFFER | IMAGE, } GeometryShaderAccelerationStructureRead { stage_mask: GEOMETRY_SHADER, access_mask: ACCELERATION_STRUCTURE_READ, image_layout: Undefined, + valid_for: BUFFER, } FragmentShaderUniformRead { stage_mask: FRAGMENT_SHADER, access_mask: UNIFORM_READ, image_layout: Undefined, + valid_for: BUFFER, } FragmentShaderColorInputAttachmentRead { stage_mask: FRAGMENT_SHADER, access_mask: INPUT_ATTACHMENT_READ, image_layout: ShaderReadOnlyOptimal, + valid_for: IMAGE, } FragmentShaderDepthStencilInputAttachmentRead { stage_mask: FRAGMENT_SHADER, access_mask: INPUT_ATTACHMENT_READ, image_layout: DepthStencilReadOnlyOptimal, + valid_for: IMAGE, } FragmentShaderSampledRead { stage_mask: FRAGMENT_SHADER, access_mask: SHADER_SAMPLED_READ, image_layout: ShaderReadOnlyOptimal, + valid_for: BUFFER | IMAGE, } FragmentShaderStorageRead { stage_mask: FRAGMENT_SHADER, access_mask: SHADER_STORAGE_READ, image_layout: General, + valid_for: BUFFER | IMAGE, } FragmentShaderStorageWrite { stage_mask: FRAGMENT_SHADER, access_mask: SHADER_STORAGE_WRITE, image_layout: General, + valid_for: BUFFER | IMAGE, } FragmentShaderAccelerationStructureRead { stage_mask: FRAGMENT_SHADER, access_mask: ACCELERATION_STRUCTURE_READ, image_layout: Undefined, + valid_for: BUFFER, } DepthStencilAttachmentRead { stage_mask: EARLY_FRAGMENT_TESTS | LATE_FRAGMENT_TESTS, access_mask: DEPTH_STENCIL_ATTACHMENT_READ, image_layout: DepthStencilReadOnlyOptimal, + valid_for: IMAGE, } DepthStencilAttachmentWrite { stage_mask: EARLY_FRAGMENT_TESTS | LATE_FRAGMENT_TESTS, access_mask: DEPTH_STENCIL_ATTACHMENT_WRITE, image_layout: DepthStencilAttachmentOptimal, + valid_for: IMAGE, } DepthAttachmentWriteStencilReadOnly { stage_mask: EARLY_FRAGMENT_TESTS | LATE_FRAGMENT_TESTS, access_mask: DEPTH_STENCIL_ATTACHMENT_READ | DEPTH_STENCIL_ATTACHMENT_WRITE, image_layout: DepthAttachmentStencilReadOnlyOptimal, + valid_for: IMAGE, } DepthReadOnlyStencilAttachmentWrite { stage_mask: EARLY_FRAGMENT_TESTS | LATE_FRAGMENT_TESTS, access_mask: DEPTH_STENCIL_ATTACHMENT_READ | DEPTH_STENCIL_ATTACHMENT_WRITE, image_layout: DepthReadOnlyStencilAttachmentOptimal, + valid_for: IMAGE, } ColorAttachmentRead { stage_mask: COLOR_ATTACHMENT_OUTPUT, access_mask: COLOR_ATTACHMENT_READ, image_layout: ColorAttachmentOptimal, + valid_for: IMAGE, } ColorAttachmentWrite { stage_mask: COLOR_ATTACHMENT_OUTPUT, access_mask: COLOR_ATTACHMENT_WRITE, image_layout: ColorAttachmentOptimal, - } - - ColorAttachmentReadWrite { - stage_mask: COLOR_ATTACHMENT_OUTPUT, - access_mask: COLOR_ATTACHMENT_READ | COLOR_ATTACHMENT_WRITE, - image_layout: ColorAttachmentOptimal, + valid_for: IMAGE, } ComputeShaderUniformRead { stage_mask: COMPUTE_SHADER, access_mask: UNIFORM_READ, image_layout: Undefined, + valid_for: BUFFER, } ComputeShaderSampledRead { stage_mask: COMPUTE_SHADER, access_mask: SHADER_SAMPLED_READ, image_layout: ShaderReadOnlyOptimal, + valid_for: BUFFER | IMAGE, } ComputeShaderStorageRead { stage_mask: COMPUTE_SHADER, access_mask: SHADER_STORAGE_READ, image_layout: General, + valid_for: BUFFER | IMAGE, } ComputeShaderStorageWrite { stage_mask: COMPUTE_SHADER, access_mask: SHADER_STORAGE_WRITE, image_layout: General, + valid_for: BUFFER | IMAGE, } ComputeShaderAccelerationStructureRead { stage_mask: COMPUTE_SHADER, access_mask: ACCELERATION_STRUCTURE_READ, image_layout: Undefined, - } - - HostRead { - stage_mask: HOST, - access_mask: HOST_READ, - image_layout: General, - } - - HostWrite { - stage_mask: HOST, - access_mask: HOST_WRITE, - image_layout: General, + valid_for: BUFFER, } CopyTransferRead { stage_mask: COPY, access_mask: TRANSFER_READ, image_layout: TransferSrcOptimal, + valid_for: BUFFER | IMAGE, } CopyTransferWrite { stage_mask: COPY, access_mask: TRANSFER_WRITE, image_layout: TransferDstOptimal, + valid_for: BUFFER | IMAGE, } BlitTransferRead { stage_mask: BLIT, access_mask: TRANSFER_READ, image_layout: TransferSrcOptimal, + valid_for: IMAGE, } BlitTransferWrite { stage_mask: BLIT, access_mask: TRANSFER_WRITE, image_layout: TransferDstOptimal, + valid_for: IMAGE, } ResolveTransferRead { stage_mask: RESOLVE, access_mask: TRANSFER_READ, image_layout: TransferSrcOptimal, + valid_for: IMAGE, } ResolveTransferWrite { stage_mask: RESOLVE, access_mask: TRANSFER_WRITE, image_layout: TransferDstOptimal, + valid_for: IMAGE, } ClearTransferWrite { stage_mask: CLEAR, access_mask: TRANSFER_WRITE, image_layout: TransferDstOptimal, + valid_for: IMAGE, } AccelerationStructureCopyTransferRead { stage_mask: ACCELERATION_STRUCTURE_COPY, access_mask: TRANSFER_READ, image_layout: Undefined, + valid_for: BUFFER, } AccelerationStructureCopyTransferWrite { stage_mask: ACCELERATION_STRUCTURE_COPY, access_mask: TRANSFER_WRITE, image_layout: Undefined, + valid_for: BUFFER, } // TODO: @@ -1657,6 +1526,7 @@ access_types! { // stage_mask: VIDEO_DECODE, // access_mask: VIDEO_DECODE_READ, // image_layout: Undefined, + // valid_for: BUFFER, // } // TODO: @@ -1664,6 +1534,7 @@ access_types! { // stage_mask: VIDEO_DECODE, // access_mask: VIDEO_DECODE_WRITE, // image_layout: VideoDecodeDst, + // valid_for: IMAGE, // } // TODO: @@ -1671,6 +1542,7 @@ access_types! { // stage_mask: VIDEO_DECODE, // access_mask: VIDEO_DECODE_READ, // image_layout: VideoDecodeDpb, + // valid_for: IMAGE, // } // TODO: @@ -1678,6 +1550,7 @@ access_types! { // stage_mask: VIDEO_DECODE, // access_mask: VIDEO_DECODE_WRITE, // image_layout: VideoDecodeDpb, + // valid_for: IMAGE, // } // TODO: @@ -1685,6 +1558,7 @@ access_types! { // stage_mask: VIDEO_ENCODE, // access_mask: VIDEO_ENCODE_READ, // image_layout: VideoEncodeSrc, + // valid_for: IMAGE, // } // TODO: @@ -1692,6 +1566,7 @@ access_types! { // stage_mask: VIDEO_ENCODE, // access_mask: VIDEO_ENCODE_WRITE, // image_layout: Undefined, + // valid_for: BUFFER, // } // TODO: @@ -1699,6 +1574,7 @@ access_types! { // stage_mask: VIDEO_ENCODE, // access_mask: VIDEO_ENCODE_READ, // image_layout: VideoEncodeDpb, + // valid_for: IMAGE, // } // TODO: @@ -1706,6 +1582,7 @@ access_types! { // stage_mask: VIDEO_ENCODE, // access_mask: VIDEO_ENCODE_WRITE, // image_layout: VideoEncodeDpb, + // valid_for: IMAGE, // } // TODO: @@ -1713,6 +1590,7 @@ access_types! { // stage_mask: RAY_TRACING_SHADER, // access_mask: UNIFORM_READ, // image_layout: Undefined, + // valid_for: BUFFER, // } // TODO: @@ -1720,6 +1598,7 @@ access_types! { // stage_mask: RAY_TRACING_SHADER, // access_mask: INPUT_ATTACHMENT_READ, // image_layout: ShaderReadOnlyOptimal, + // valid_for: IMAGE, // } // TODO: @@ -1727,6 +1606,7 @@ access_types! { // stage_mask: RAY_TRACING_SHADER, // access_mask: INPUT_ATTACHMENT_READ, // image_layout: DepthStencilReadOnlyOptimal, + // valid_for: IMAGE, // } // TODO: @@ -1734,6 +1614,7 @@ access_types! { // stage_mask: RAY_TRACING_SHADER, // access_mask: SHADER_SAMPLED_READ, // image_layout: ShaderReadOnlyOptimal, + // valid_for: BUFFER | IMAGE, // } // TODO: @@ -1741,6 +1622,7 @@ access_types! { // stage_mask: RAY_TRACING_SHADER, // access_mask: SHADER_STORAGE_READ, // image_layout: General, + // valid_for: BUFFER | IMAGE, // } // TODO: @@ -1748,6 +1630,7 @@ access_types! { // stage_mask: RAY_TRACING_SHADER, // access_mask: SHADER_STORAGE_WRITE, // image_layout: General, + // valid_for: BUFFER | IMAGE, // } // TODO: @@ -1755,6 +1638,7 @@ access_types! { // stage_mask: RAY_TRACING_SHADER, // access_mask: SHADER_BINDING_TABLE_READ, // image_layout: Undefined, + // valid_for: BUFFER, // } // TODO: @@ -1762,96 +1646,119 @@ access_types! { // stage_mask: RAY_TRACING_SHADER, // access_mask: ACCELERATION_STRUCTURE_READ, // image_layout: Undefined, + // valid_for: BUFFER, // } TaskShaderUniformRead { stage_mask: TASK_SHADER, access_mask: UNIFORM_READ, image_layout: Undefined, + valid_for: BUFFER, } TaskShaderSampledRead { stage_mask: TASK_SHADER, access_mask: SHADER_SAMPLED_READ, image_layout: ShaderReadOnlyOptimal, + valid_for: BUFFER | IMAGE, } TaskShaderStorageRead { stage_mask: TASK_SHADER, access_mask: SHADER_STORAGE_READ, image_layout: General, + valid_for: BUFFER | IMAGE, } TaskShaderStorageWrite { stage_mask: TASK_SHADER, access_mask: SHADER_STORAGE_WRITE, image_layout: General, + valid_for: BUFFER | IMAGE, } TaskShaderAccelerationStructureRead { stage_mask: TASK_SHADER, access_mask: ACCELERATION_STRUCTURE_READ, image_layout: Undefined, + valid_for: BUFFER, } MeshShaderUniformRead { stage_mask: MESH_SHADER, access_mask: UNIFORM_READ, image_layout: Undefined, + valid_for: BUFFER, } MeshShaderSampledRead { stage_mask: MESH_SHADER, access_mask: SHADER_SAMPLED_READ, image_layout: ShaderReadOnlyOptimal, + valid_for: BUFFER | IMAGE, } MeshShaderStorageRead { stage_mask: MESH_SHADER, access_mask: SHADER_STORAGE_READ, image_layout: General, + valid_for: BUFFER | IMAGE, } MeshShaderStorageWrite { stage_mask: MESH_SHADER, access_mask: SHADER_STORAGE_WRITE, image_layout: General, + valid_for: BUFFER | IMAGE, } MeshShaderAccelerationStructureRead { stage_mask: MESH_SHADER, access_mask: ACCELERATION_STRUCTURE_READ, image_layout: Undefined, + valid_for: BUFFER, + } + + AccelerationStructureBuildIndirectCommandRead { + stage_mask: ACCELERATION_STRUCTURE_BUILD, + access_mask: INDIRECT_COMMAND_READ, + image_layout: Undefined, + valid_for: BUFFER, } AccelerationStructureBuildShaderRead { stage_mask: ACCELERATION_STRUCTURE_BUILD, access_mask: SHADER_READ, image_layout: Undefined, + valid_for: BUFFER, } AccelerationStructureBuildAccelerationStructureRead { stage_mask: ACCELERATION_STRUCTURE_BUILD, access_mask: ACCELERATION_STRUCTURE_READ, image_layout: Undefined, + valid_for: BUFFER, } AccelerationStructureBuildAccelerationStructureWrite { stage_mask: ACCELERATION_STRUCTURE_BUILD, access_mask: ACCELERATION_STRUCTURE_WRITE, image_layout: Undefined, + valid_for: BUFFER, } AccelerationStructureCopyAccelerationStructureRead { stage_mask: ACCELERATION_STRUCTURE_COPY, access_mask: ACCELERATION_STRUCTURE_READ, image_layout: Undefined, + valid_for: BUFFER, } AccelerationStructureCopyAccelerationStructureWrite { stage_mask: ACCELERATION_STRUCTURE_COPY, access_mask: ACCELERATION_STRUCTURE_WRITE, image_layout: Undefined, + valid_for: BUFFER, } /// Only use this for prototyping or debugging please. 🔫 Please. 🔫 @@ -1859,28 +1766,21 @@ access_types! { stage_mask: ALL_COMMANDS, access_mask: MEMORY_READ | MEMORY_WRITE, image_layout: General, + valid_for: BUFFER | IMAGE, } } impl AccessType { pub(crate) const fn is_valid_buffer_access_type(self) -> bool { - // Let's reuse the image layout lookup table, since it already exists. - let image_layout = self.image_layout(); - - matches!(image_layout, ImageLayout::Undefined) && !matches!(self, AccessType::None) + self.valid_for() & BUFFER != 0 } pub(crate) const fn is_valid_image_access_type(self) -> bool { - let image_layout = self.image_layout(); - - !matches!( - image_layout, - ImageLayout::Undefined | ImageLayout::PresentSrc, - ) + self.valid_for() & IMAGE != 0 } } -/// Specifies which type of layout an image subresource is accessed in. +/// Specifies which type of layout an image resource is accessed in. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum ImageLayoutType { @@ -1922,219 +1822,14 @@ impl ImageLayoutType { } } -type Result = ::std::result::Result; - -#[allow(clippy::erasing_op, clippy::identity_op)] -#[cfg(test)] -mod tests { - use super::*; - use vulkano::image::ImageAspects; - - #[test] - fn subresource_ranges_aspect_granularity() { - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::COLOR, - mip_levels: 0..4, - array_layers: 0..8, - }, - 4, - 8, - ); - - assert_eq!(iter.next(), Some(0 * 32..1 * 32)); - assert_eq!(iter.next(), None); - - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::DEPTH | ImageAspects::STENCIL, - mip_levels: 0..2, - array_layers: 0..12, - }, - 2, - 12, - ); - - assert_eq!(iter.next(), Some(1 * 24..3 * 24)); - assert_eq!(iter.next(), None); - - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::COLOR | ImageAspects::METADATA | ImageAspects::PLANE_0, - mip_levels: 0..5, - array_layers: 0..10, - }, - 5, - 10, - ); - - assert_eq!(iter.next(), Some(0 * 50..1 * 50)); - assert_eq!(iter.next(), Some(3 * 50..5 * 50)); - assert_eq!(iter.next(), None); - - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::COLOR - | ImageAspects::DEPTH - | ImageAspects::STENCIL - | ImageAspects::PLANE_0 - | ImageAspects::PLANE_2 - | ImageAspects::MEMORY_PLANE_2 - | ImageAspects::MEMORY_PLANE_3, - mip_levels: 0..3, - array_layers: 0..20, - }, - 3, - 20, - ); - - assert_eq!(iter.next(), Some(0 * 60..3 * 60)); - assert_eq!(iter.next(), Some(4 * 60..5 * 60)); - assert_eq!(iter.next(), Some(6 * 60..7 * 60)); - assert_eq!(iter.next(), Some(9 * 60..11 * 60)); - assert_eq!(iter.next(), None); - } - - #[test] - fn subresource_ranges_mip_level_granularity() { - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::DEPTH, - mip_levels: 1..3, - array_layers: 0..8, - }, - 5, - 8, - ); - - assert_eq!(iter.next(), Some(1 * 40 + 1 * 8..1 * 40 + 3 * 8)); - assert_eq!(iter.next(), None); - - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::PLANE_0 | ImageAspects::PLANE_1 | ImageAspects::PLANE_2, - mip_levels: 1..3, - array_layers: 0..12, - }, - 3, - 12, - ); - - assert_eq!(iter.next(), Some(4 * 36 + 1 * 12..4 * 36 + 3 * 12)); - assert_eq!(iter.next(), Some(5 * 36 + 1 * 12..5 * 36 + 3 * 12)); - assert_eq!(iter.next(), Some(6 * 36 + 1 * 12..6 * 36 + 3 * 12)); - assert_eq!(iter.next(), None); - - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::DEPTH - | ImageAspects::STENCIL - | ImageAspects::PLANE_0 - | ImageAspects::PLANE_1 - | ImageAspects::PLANE_2, - mip_levels: 1..3, - array_layers: 0..10, - }, - 4, - 10, - ); - - dbg!(iter.clone().collect::>()); - - assert_eq!(iter.next(), Some(1 * 40 + 1 * 10..1 * 40 + 3 * 10)); - assert_eq!(iter.next(), Some(2 * 40 + 1 * 10..2 * 40 + 3 * 10)); - assert_eq!(iter.next(), Some(4 * 40 + 1 * 10..4 * 40 + 3 * 10)); - assert_eq!(iter.next(), Some(5 * 40 + 1 * 10..5 * 40 + 3 * 10)); - assert_eq!(iter.next(), Some(6 * 40 + 1 * 10..6 * 40 + 3 * 10)); - assert_eq!(iter.next(), None); - - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::METADATA - | ImageAspects::PLANE_2 - | ImageAspects::MEMORY_PLANE_1, - mip_levels: 2..4, - array_layers: 0..6, - }, - 4, - 6, - ); - - assert_eq!(iter.next(), Some(3 * 24 + 2 * 6..3 * 24 + 4 * 6)); - assert_eq!(iter.next(), Some(6 * 24 + 2 * 6..6 * 24 + 4 * 6)); - assert_eq!(iter.next(), Some(8 * 24 + 2 * 6..8 * 24 + 4 * 6)); - assert_eq!(iter.next(), None); - } - - #[test] - fn subresource_ranges_array_layer_granularity() { - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::STENCIL, - mip_levels: 0..4, - array_layers: 2..9, - }, - 4, - 10, - ); - - assert_eq!(iter.next(), Some(2 * 40 + 0 * 10 + 2..2 * 40 + 0 * 10 + 9)); - assert_eq!(iter.next(), Some(2 * 40 + 1 * 10 + 2..2 * 40 + 1 * 10 + 9)); - assert_eq!(iter.next(), Some(2 * 40 + 2 * 10 + 2..2 * 40 + 2 * 10 + 9)); - assert_eq!(iter.next(), Some(2 * 40 + 3 * 10 + 2..2 * 40 + 3 * 10 + 9)); - assert_eq!(iter.next(), None); - - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::COLOR | ImageAspects::METADATA, - mip_levels: 1..3, - array_layers: 3..8, - }, - 3, - 8, - ); - - assert_eq!(iter.next(), Some(0 * 24 + 1 * 8 + 3..0 * 24 + 1 * 8 + 8)); - assert_eq!(iter.next(), Some(0 * 24 + 2 * 8 + 3..0 * 24 + 2 * 8 + 8)); - assert_eq!(iter.next(), Some(3 * 24 + 1 * 8 + 3..3 * 24 + 1 * 8 + 8)); - assert_eq!(iter.next(), Some(3 * 24 + 2 * 8 + 3..3 * 24 + 2 * 8 + 8)); - assert_eq!(iter.next(), None); - - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::DEPTH | ImageAspects::PLANE_0 | ImageAspects::PLANE_1, - mip_levels: 1..3, - array_layers: 2..4, - }, - 5, - 6, - ); - - assert_eq!(iter.next(), Some(1 * 30 + 1 * 6 + 2..1 * 30 + 1 * 6 + 4)); - assert_eq!(iter.next(), Some(1 * 30 + 2 * 6 + 2..1 * 30 + 2 * 6 + 4)); - assert_eq!(iter.next(), Some(4 * 30 + 1 * 6 + 2..4 * 30 + 1 * 6 + 4)); - assert_eq!(iter.next(), Some(4 * 30 + 2 * 6 + 2..4 * 30 + 2 * 6 + 4)); - assert_eq!(iter.next(), Some(5 * 30 + 1 * 6 + 2..5 * 30 + 1 * 6 + 4)); - assert_eq!(iter.next(), Some(5 * 30 + 2 * 6 + 2..5 * 30 + 2 * 6 + 4)); - assert_eq!(iter.next(), None); - - let mut iter = SubresourceRanges::new( - ImageSubresourceRange { - aspects: ImageAspects::PLANE_2 - | ImageAspects::MEMORY_PLANE_0 - | ImageAspects::MEMORY_PLANE_1 - | ImageAspects::MEMORY_PLANE_2, - mip_levels: 5..6, - array_layers: 0..3, - }, - 8, - 4, - ); +/// Specifies which type of host access is performed on a resource. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum HostAccessType { + /// The resource is read on the host. + Read, - assert_eq!(iter.next(), Some(6 * 32 + 5 * 4 + 0..6 * 32 + 5 * 4 + 3)); - assert_eq!(iter.next(), Some(7 * 32 + 5 * 4 + 0..7 * 32 + 5 * 4 + 3)); - assert_eq!(iter.next(), Some(8 * 32 + 5 * 4 + 0..8 * 32 + 5 * 4 + 3)); - assert_eq!(iter.next(), Some(9 * 32 + 5 * 4 + 0..9 * 32 + 5 * 4 + 3)); - assert_eq!(iter.next(), None); - } + /// The resource is written on the host. + Write, } + +type Result = ::std::result::Result; diff --git a/vulkano/src/sync/pipeline.rs b/vulkano/src/sync/pipeline.rs index 411d505a08..9a2bf431c2 100644 --- a/vulkano/src/sync/pipeline.rs +++ b/vulkano/src/sync/pipeline.rs @@ -512,33 +512,33 @@ vulkan_bitflags! { /// A set of memory access types that are included in a memory dependency. AccessFlags impl { - // TODO: use the Vulkano associated constants once | becomes const for custom types. - const WRITES: AccessFlags = AccessFlags( - ash::vk::AccessFlags2::SHADER_WRITE.as_raw() - | ash::vk::AccessFlags2::COLOR_ATTACHMENT_WRITE.as_raw() - | ash::vk::AccessFlags2::DEPTH_STENCIL_ATTACHMENT_WRITE.as_raw() - | ash::vk::AccessFlags2::TRANSFER_WRITE.as_raw() - | ash::vk::AccessFlags2::HOST_WRITE.as_raw() - | ash::vk::AccessFlags2::MEMORY_WRITE.as_raw() - | ash::vk::AccessFlags2::SHADER_STORAGE_WRITE.as_raw() - | ash::vk::AccessFlags2::VIDEO_DECODE_WRITE_KHR.as_raw() - | ash::vk::AccessFlags2::VIDEO_ENCODE_WRITE_KHR.as_raw() - | ash::vk::AccessFlags2::TRANSFORM_FEEDBACK_WRITE_EXT.as_raw() - | ash::vk::AccessFlags2::TRANSFORM_FEEDBACK_COUNTER_WRITE_EXT.as_raw() - | ash::vk::AccessFlags2::COMMAND_PREPROCESS_WRITE_NV.as_raw() - | ash::vk::AccessFlags2::ACCELERATION_STRUCTURE_WRITE_KHR.as_raw() - ); - - pub(crate) fn contains_reads(self) -> bool { - !(self - Self::WRITES).is_empty() + const WRITES: AccessFlags = AccessFlags::SHADER_WRITE + .union(AccessFlags::COLOR_ATTACHMENT_WRITE) + .union(AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE) + .union(AccessFlags::TRANSFER_WRITE) + .union(AccessFlags::HOST_WRITE) + .union(AccessFlags::MEMORY_WRITE) + .union(AccessFlags::SHADER_STORAGE_WRITE) + .union(AccessFlags::VIDEO_DECODE_WRITE) + .union(AccessFlags::VIDEO_ENCODE_WRITE) + .union(AccessFlags::TRANSFORM_FEEDBACK_WRITE) + .union(AccessFlags::TRANSFORM_FEEDBACK_COUNTER_WRITE) + .union(AccessFlags::COMMAND_PREPROCESS_WRITE) + .union(AccessFlags::ACCELERATION_STRUCTURE_WRITE); + + /// Returns whether `self` contains any read flags. + #[inline] + pub const fn contains_reads(self) -> bool { + !self.difference(Self::WRITES).is_empty() } - pub(crate) fn contains_writes(self) -> bool { + /// Returns whether `self` contains any write flags. + #[inline] + pub const fn contains_writes(self) -> bool { self.intersects(Self::WRITES) } - /// Returns whether `self` contains stages that are only available in - /// `VkAccessFlagBits2`. + /// Returns whether `self` contains flags that are only available in `VkAccessFlagBits2`. pub(crate) fn contains_flags2(self) -> bool { !(self - (AccessFlags::INDIRECT_COMMAND_READ