diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a9d07ad27..3d02995134 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,7 @@ By @Valaphee in [#3402](https://github.com/gfx-rs/wgpu/pull/3402) #### General - Derive storage bindings via `naga::StorageAccess` instead of `naga::GlobalUse`. By @teoxoy in [#3985](https://github.com/gfx-rs/wgpu/pull/3985). +- `Queue::on_submitted_work_done` callbacks will now always be called after all previous `BufferSlice::map_async` callbacks, even when there are no active submissions. By @cwfitzgerald in [#4036](https://github.com/gfx-rs/wgpu/pull/4036). #### Vulkan - Fix enabling `wgpu::Features::PARTIALLY_BOUND_BINDING_ARRAY` not being actually enabled in vulkan backend. By @39ali in[#3772](https://github.com/gfx-rs/wgpu/pull/3772). diff --git a/Cargo.lock b/Cargo.lock index 9117ea2316..aa4e692c89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3459,6 +3459,7 @@ dependencies = [ "log", "naga", "nv-flip", + "parking_lot", "png", "pollster", "raw-window-handle 0.5.2", diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 85dae4959c..5738ed1bdb 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -24,6 +24,7 @@ bytemuck.workspace = true cfg-if.workspace = true env_logger.workspace = true log.workspace = true +parking_lot.workspace = true png.workspace = true pollster.workspace = true wgpu.workspace = true diff --git a/tests/tests/regression/issue_4024.rs b/tests/tests/regression/issue_4024.rs new file mode 100644 index 0000000000..959f58ffa5 --- /dev/null +++ b/tests/tests/regression/issue_4024.rs @@ -0,0 +1,91 @@ +use std::sync::Arc; + +use parking_lot::Mutex; +use wgpu_test::{initialize_test, TestParameters}; + +use wasm_bindgen_test::wasm_bindgen_test; +use wgpu::*; + +/// The WebGPU specification has very specific requirements about the ordering of map_async +/// and on_submitted_work_done callbacks. Specifically, all map_async callbacks that are initiated +/// before a given on_submitted_work_done callback must be invoked before the on_submitted_work_done +/// callback is invoked. +/// +/// We previously immediately invoked on_submitted_work_done callbacks if there was no active submission +/// to add them to. This is incorrect, as we do not immediatley invoke map_async callbacks. +#[wasm_bindgen_test] +#[test] +fn queue_submitted_callback_ordering() { + initialize_test(TestParameters::default(), |ctx| { + // Create a mappable buffer + let buffer = ctx.device.create_buffer(&BufferDescriptor { + label: Some("mappable buffer"), + size: 4, + usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + // Encode some work using it. The specifics of this work don't matter, just + // that the buffer is used. + let mut encoder = ctx + .device + .create_command_encoder(&CommandEncoderDescriptor { + label: Some("encoder"), + }); + + encoder.clear_buffer(&buffer, 0, None); + + // Submit the work. + ctx.queue.submit(Some(encoder.finish())); + // Ensure the work is finished. + ctx.device.poll(MaintainBase::Wait); + + #[derive(Debug)] + struct OrderingContext { + /// Incremented every time a callback in invoked. + /// This allows the callbacks to know their ordering. + counter: u8, + /// The value of the counter when the map_async callback was invoked. + value_read_map_async: Option, + /// The value of the counter when the queue submitted work done callback was invoked. + value_read_queue_submitted: Option, + } + + // Create shared ownership of the ordering context, and clone 2 copies. + let ordering = Arc::new(Mutex::new(OrderingContext { + counter: 0, + value_read_map_async: None, + value_read_queue_submitted: None, + })); + let ordering_clone_map_async = Arc::clone(&ordering); + let ordering_clone_queue_submitted = Arc::clone(&ordering); + + // Register the callabacks. + buffer.slice(..).map_async(MapMode::Read, move |_| { + let mut guard = ordering_clone_map_async.lock(); + guard.value_read_map_async = Some(guard.counter); + guard.counter += 1; + }); + + // If the bug is present, this callback will be invoked immediately inside this function, + // despite the fact there is an outstanding map_async callback. + ctx.queue.on_submitted_work_done(move || { + let mut guard = ordering_clone_queue_submitted.lock(); + guard.value_read_queue_submitted = Some(guard.counter); + guard.counter += 1; + }); + + // No GPU work is happening at this point, but we want to process callbacks. + ctx.device.poll(MaintainBase::Poll); + + // Extract the ordering out of the arc. + let ordering = Arc::try_unwrap(ordering).unwrap().into_inner(); + + // There were two callbacks invoked + assert_eq!(ordering.counter, 2); + // The map async callback was invoked fist + assert_eq!(ordering.value_read_map_async, Some(0)); + // The queue submitted work done callback was invoked second. + assert_eq!(ordering.value_read_queue_submitted, Some(1)); + }) +} diff --git a/tests/tests/root.rs b/tests/tests/root.rs index a75367f827..86139c9bd2 100644 --- a/tests/tests/root.rs +++ b/tests/tests/root.rs @@ -2,6 +2,7 @@ use wasm_bindgen_test::wasm_bindgen_test_configure; mod regression { mod issue_3457; + mod issue_4024; } mod buffer; diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 75491d10cb..a12d864104 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -219,6 +219,9 @@ struct ActiveSubmission { mapped: Vec>, encoders: Vec>, + + /// List of queue "on_submitted_work_done" closures to be called once this + /// submission has completed. work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>, } @@ -304,6 +307,12 @@ pub(super) struct LifetimeTracker { /// Buffers the user has asked us to map, and which are not used by any /// queue submission still in flight. ready_to_map: Vec>, + + /// Queue "on_submitted_work_done" closures that were initiated for while there is no + /// currently pending submissions. These cannot be immeidately invoked as they + /// must happen _after_ all mapped buffer callbacks are mapped, so we defer them + /// here until the next time the device is maintained. + work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>, } impl LifetimeTracker { @@ -316,6 +325,7 @@ impl LifetimeTracker { active: Vec::new(), free_resources: NonReferencedResources::new(), ready_to_map: Vec::new(), + work_done_closures: SmallVec::new(), } } @@ -405,7 +415,7 @@ impl LifetimeTracker { .position(|a| a.index > last_done) .unwrap_or(self.active.len()); - let mut work_done_closures = SmallVec::new(); + let mut work_done_closures: SmallVec<_> = self.work_done_closures.drain(..).collect(); for a in self.active.drain(..done_count) { log::trace!("Active submission {} is done", a.index); self.free_resources.extend(a.last_resources); @@ -445,18 +455,16 @@ impl LifetimeTracker { } } - pub fn add_work_done_closure( - &mut self, - closure: SubmittedWorkDoneClosure, - ) -> Option { + pub fn add_work_done_closure(&mut self, closure: SubmittedWorkDoneClosure) { match self.active.last_mut() { Some(active) => { active.work_done_closures.push(closure); - None } - // Note: we can't immediately invoke the closure, since it assumes - // nothing is currently locked in the hubs. - None => Some(closure), + // We must defer the closure until all previously occuring map_async closures + // have fired. This is required by the spec. + None => { + self.work_done_closures.push(closure); + } } } } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 5792791abe..0ae6d7a2dd 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -181,6 +181,9 @@ impl UserClosures { fn fire(self) { // Note: this logic is specifically moved out of `handle_mapping()` in order to // have nothing locked by the time we execute users callback code. + + // Mappings _must_ be fired before submissions, as the spec requires all mapping callbacks that are registered before + // a on_submitted_work_done callback to be fired before the on_submitted_work_done callback. for (operation, status) in self.mappings { operation.callback.call(status); } diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 6e0be3b297..73fa5de3b0 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -1435,17 +1435,12 @@ impl Global { closure: SubmittedWorkDoneClosure, ) -> Result<(), InvalidQueue> { //TODO: flush pending writes - let closure_opt = { - let hub = A::hub(self); - let mut token = Token::root(); - let (device_guard, mut token) = hub.devices.read(&mut token); - match device_guard.get(queue_id) { - Ok(device) => device.lock_life(&mut token).add_work_done_closure(closure), - Err(_) => return Err(InvalidQueue), - } - }; - if let Some(closure) = closure_opt { - closure.call(); + let hub = A::hub(self); + let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); + match device_guard.get(queue_id) { + Ok(device) => device.lock_life(&mut token).add_work_done_closure(closure), + Err(_) => return Err(InvalidQueue), } Ok(()) } diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 04f535dd22..0138051a10 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -4580,8 +4580,8 @@ impl Queue { } /// Registers a callback when the previous call to submit finishes running on the gpu. This callback - /// being called implies that all mapped buffer callbacks attached to the same submission have also - /// been called. + /// being called implies that all mapped buffer callbacks which were registered before this call will + /// have been called. /// /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)` /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.