diff --git a/CHANGELOG.md b/CHANGELOG.md index 8847f8edb0..735ca04393 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ Wgpu now exposes backend feature for the Direct3D 12 (`dx12`) and Metal (`metal` - Added support for the float32-filterable feature. By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759) - GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851) - wgpu and wgpu-core features are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886) +- `SurfaceConfiguration` now exposes `desired_maximum_frame_latency` which was previously hard-coded to 2. By setting it to 1 you can reduce latency under the risk of making GPU & CPU work sequential. Currently, on DX12 this affects the `MaximumFrameLatency`, on all other backends except OpenGL the size of the swapchain (on OpenGL this has no effect). By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899) - DeviceLostClosure is guaranteed to be invoked exactly once. By @bradwerth in [#4862](https://github.com/gfx-rs/wgpu/pull/4862) #### OpenGL diff --git a/examples/src/framework.rs b/examples/src/framework.rs index a014495171..d61ada0a16 100644 --- a/examples/src/framework.rs +++ b/examples/src/framework.rs @@ -571,6 +571,7 @@ impl From> format, width: params.width, height: params.height, + desired_maximum_frame_latency: 2, present_mode: wgpu::PresentMode::Fifo, alpha_mode: wgpu::CompositeAlphaMode::Auto, view_formats: vec![format], diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs index 40cb805c28..faa1db8f8b 100644 --- a/examples/src/hello_triangle/mod.rs +++ b/examples/src/hello_triangle/mod.rs @@ -72,16 +72,9 @@ async fn run(event_loop: EventLoop<()>, window: Window) { multiview: None, }); - let mut config = wgpu::SurfaceConfiguration { - usage: wgpu::TextureUsages::RENDER_ATTACHMENT, - format: swapchain_format, - width: size.width, - height: size.height, - present_mode: wgpu::PresentMode::Fifo, - alpha_mode: swapchain_capabilities.alpha_modes[0], - view_formats: vec![], - }; - + let mut config = surface + .get_default_config(&adapter, size.width, size.height) + .unwrap(); surface.configure(&device, &config); let window = &window; diff --git a/examples/src/hello_windows/mod.rs b/examples/src/hello_windows/mod.rs index 9a42b9afbd..7d81dbef7b 100644 --- a/examples/src/hello_windows/mod.rs +++ b/examples/src/hello_windows/mod.rs @@ -30,20 +30,11 @@ impl ViewportDesc { fn build(self, adapter: &wgpu::Adapter, device: &wgpu::Device) -> Viewport { let size = self.window.inner_size(); - - let caps = self.surface.get_capabilities(adapter); - let config = wgpu::SurfaceConfiguration { - usage: wgpu::TextureUsages::RENDER_ATTACHMENT, - format: caps.formats[0], - width: size.width, - height: size.height, - present_mode: wgpu::PresentMode::Fifo, - alpha_mode: caps.alpha_modes[0], - view_formats: vec![], - }; - + let config = self + .surface + .get_default_config(adapter, size.width, size.height) + .unwrap(); self.surface.configure(device, &config); - Viewport { desc: self, config } } } diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs index de71ce5067..4a31ddc069 100644 --- a/examples/src/uniform_values/mod.rs +++ b/examples/src/uniform_values/mod.rs @@ -192,15 +192,9 @@ impl WgpuContext { multiview: None, }); - let surface_config = wgpu::SurfaceConfiguration { - usage: wgpu::TextureUsages::RENDER_ATTACHMENT, - format: swapchain_format, - width: size.width, - height: size.height, - present_mode: wgpu::PresentMode::Fifo, - alpha_mode: swapchain_capabilities.alpha_modes[0], - view_formats: vec![], - }; + let surface_config = surface + .get_default_config(&adapter, size.width, size.height) + .unwrap(); surface.configure(&device, &surface_config); // (5) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 9575979c8d..fe31de7722 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -2000,10 +2000,12 @@ impl Global { } } - let num_frames = present::DESIRED_NUM_FRAMES - .clamp(*caps.swap_chain_sizes.start(), *caps.swap_chain_sizes.end()); + let maximum_frame_latency = config.desired_maximum_frame_latency.clamp( + *caps.maximum_frame_latency.start(), + *caps.maximum_frame_latency.end(), + ); let mut hal_config = hal::SurfaceConfiguration { - swap_chain_size: num_frames, + maximum_frame_latency, present_mode: config.present_mode, composite_alpha_mode: config.alpha_mode, format: config.format, @@ -2074,7 +2076,6 @@ impl Global { *presentation = Some(present::Presentation { device: super::any_device::AnyDevice::new(device.clone()), config: config.clone(), - num_frames, acquired_texture: None, }); } diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index 4c2a042b15..efc2bc5c7b 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -36,14 +36,11 @@ use thiserror::Error; use wgt::SurfaceStatus as Status; const FRAME_TIMEOUT_MS: u32 = 1000; -pub const DESIRED_NUM_FRAMES: u32 = 3; #[derive(Debug)] pub(crate) struct Presentation { pub(crate) device: AnyDevice, pub(crate) config: wgt::SurfaceConfiguration>, - #[allow(unused)] - pub(crate) num_frames: u32, pub(crate) acquired_texture: Option, } diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 18f283d8e7..7bc8013415 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -23,7 +23,7 @@ const BUNNY_SIZE: f32 = 0.15 * 256.0; const GRAVITY: f32 = -9.8 * 100.0; const MAX_VELOCITY: f32 = 750.0; const COMMAND_BUFFER_PER_CONTEXT: usize = 100; -const DESIRED_FRAMES: u32 = 3; +const DESIRED_MAX_LATENCY: u32 = 2; #[repr(C)] #[derive(Clone, Copy)] @@ -132,9 +132,9 @@ impl Example { let window_size: (u32, u32) = window.inner_size().into(); let surface_config = hal::SurfaceConfiguration { - swap_chain_size: DESIRED_FRAMES.clamp( - *surface_caps.swap_chain_sizes.start(), - *surface_caps.swap_chain_sizes.end(), + maximum_frame_latency: DESIRED_MAX_LATENCY.clamp( + *surface_caps.maximum_frame_latency.start(), + *surface_caps.maximum_frame_latency.end(), ), present_mode: wgt::PresentMode::Fifo, composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 6454cb8998..01a0968f3d 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -14,7 +14,7 @@ use std::{ use winit::window::WindowButtons; const COMMAND_BUFFER_PER_CONTEXT: usize = 100; -const DESIRED_FRAMES: u32 = 3; +const DESIRED_MAX_LATENCY: u32 = 2; /// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc) /// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html) @@ -264,9 +264,9 @@ impl Example { *surface_caps.formats.first().unwrap() }; let surface_config = hal::SurfaceConfiguration { - swap_chain_size: DESIRED_FRAMES - .max(*surface_caps.swap_chain_sizes.start()) - .min(*surface_caps.swap_chain_sizes.end()), + maximum_frame_latency: DESIRED_MAX_LATENCY + .max(*surface_caps.maximum_frame_latency.start()) + .min(*surface_caps.maximum_frame_latency.end()), present_mode: wgt::PresentMode::Fifo, composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, format: surface_format, diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 1db9b0877d..f6027014d2 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -626,8 +626,8 @@ impl crate::Adapter for super::Adapter { wgt::TextureFormat::Rgb10a2Unorm, wgt::TextureFormat::Rgba16Float, ], - // we currently use a flip effect which supports 2..=16 buffers - swap_chain_sizes: 2..=16, + // See https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency + maximum_frame_latency: 1..=16, current_extent, usage: crate::TextureUses::COLOR_TARGET | crate::TextureUses::COPY_SRC diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index e0cd1c15cf..af8d5a8c01 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -660,13 +660,18 @@ impl crate::Surface for Surface { let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format); + // Nvidia recommends to use 1-2 more buffers than the maximum latency + // https://developer.nvidia.com/blog/advanced-api-performance-swap-chains/ + // For high latency extra buffers seems excessive, so go with a minimum of 3 and beyond that add 1. + let swap_chain_buffer = (config.maximum_frame_latency + 1).min(3); + let swap_chain = match self.swap_chain.write().take() { //Note: this path doesn't properly re-initialize all of the things Some(sc) => { let raw = unsafe { sc.release_resources() }; let result = unsafe { raw.ResizeBuffers( - config.swap_chain_size, + swap_chain_buffer, config.extent.width, config.extent.height, non_srgb_format, @@ -693,7 +698,7 @@ impl crate::Surface for Surface { quality: 0, }, buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, - buffer_count: config.swap_chain_size, + buffer_count: swap_chain_buffer, scaling: d3d12::Scaling::Stretch, swap_effect: d3d12::SwapEffect::FlipDiscard, flags, @@ -797,11 +802,11 @@ impl crate::Surface for Surface { | SurfaceTarget::SwapChainPanel(_) => {} } - unsafe { swap_chain.SetMaximumFrameLatency(config.swap_chain_size) }; + unsafe { swap_chain.SetMaximumFrameLatency(config.maximum_frame_latency) }; let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() }; - let mut resources = Vec::with_capacity(config.swap_chain_size as usize); - for i in 0..config.swap_chain_size { + let mut resources = Vec::with_capacity(config.maximum_frame_latency as usize); + for i in 0..config.maximum_frame_latency { let mut resource = d3d12::Resource::null(); unsafe { swap_chain.GetBuffer(i, &d3d12_ty::ID3D12Resource::uuidof(), resource.mut_void()) diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index e46c91ab9c..9099c82829 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -1141,7 +1141,7 @@ impl crate::Adapter for super::Adapter { vec![wgt::PresentMode::Fifo] //TODO }, composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO - swap_chain_sizes: 2..=2, + maximum_frame_latency: 2..=2, //TODO, unused currently current_extent: None, usage: crate::TextureUses::COLOR_TARGET, }) diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 39037e895c..e51bf2cd52 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -922,11 +922,14 @@ pub struct SurfaceCapabilities { /// Must be at least one. pub formats: Vec, - /// Range for the swap chain sizes. + /// Range for the number of queued frames. /// - /// - `swap_chain_sizes.start` must be at least 1. - /// - `swap_chain_sizes.end` must be larger or equal to `swap_chain_sizes.start`. - pub swap_chain_sizes: RangeInclusive, + /// This adjusts either the swapchain frame count to value + 1 - or sets SetMaximumFrameLatency to the value given, + /// or uses a wait-for-present in the acquire method to limit rendering such that it acts like it's a value + 1 swapchain frame set. + /// + /// - `maximum_frame_latency.start` must be at least 1. + /// - `maximum_frame_latency.end` must be larger or equal to `maximum_frame_latency.start`. + pub maximum_frame_latency: RangeInclusive, /// Current extent of the surface, if known. pub current_extent: Option, @@ -1252,9 +1255,9 @@ pub struct RenderPipelineDescriptor<'a, A: Api> { #[derive(Debug, Clone)] pub struct SurfaceConfiguration { - /// Number of textures in the swap chain. Must be in - /// `SurfaceCapabilities::swap_chain_size` range. - pub swap_chain_size: u32, + /// Maximum number of queued frames. Must be in + /// `SurfaceCapabilities::maximum_frame_latency` range. + pub maximum_frame_latency: u32, /// Vertical synchronization mode. pub present_mode: wgt::PresentMode, /// Alpha composition mode. diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 3d8f6f3e57..a946ce5819 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -320,13 +320,14 @@ impl crate::Adapter for super::Adapter { let pc = &self.shared.private_caps; Some(crate::SurfaceCapabilities { formats, - //Note: this is hardcoded in `CAMetalLayer` documentation - swap_chain_sizes: if pc.can_set_maximum_drawables_count { - 2..=3 + // We use this here to govern the maximum number of drawables + 1. + // See https://developer.apple.com/documentation/quartzcore/cametallayer/2938720-maximumdrawablecount + maximum_frame_latency: if pc.can_set_maximum_drawables_count { + 1..=2 } else { - // 3 is the default in `CAMetalLayer` documentation + // 3 is the default value for maximum drawables in `CAMetalLayer` documentation // iOS 10.3 was tested to use 3 on iphone5s - 3..=3 + 2..=2 }, present_modes: if pc.can_set_display_sync { vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate] diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs index e54a176da5..a97eff0aae 100644 --- a/wgpu-hal/src/metal/surface.rs +++ b/wgpu-hal/src/metal/surface.rs @@ -221,7 +221,7 @@ impl crate::Surface for super::Surface { } // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3) - render_layer.set_maximum_drawable_count(config.swap_chain_size as _); + render_layer.set_maximum_drawable_count(config.maximum_frame_latency as u64 + 1); render_layer.set_drawable_size(drawable_size); if caps.can_set_next_drawable_timeout { let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false]; diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 737615215d..589200964f 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -1838,7 +1838,11 @@ impl crate::Adapter for super::Adapter { .collect(); Some(crate::SurfaceCapabilities { formats, - swap_chain_sizes: caps.min_image_count..=max_image_count, + // TODO: Right now we're always trunkating the swap chain + // (presumably - we're actually setting the min image count which isn't necessarily the swap chain size) + // Instead, we should use extensions when available to wait in present. + // See https://github.com/gfx-rs/wgpu/issues/2869 + maximum_frame_latency: (caps.min_image_count - 1)..=(max_image_count - 1), // Note this can't underflow since both `min_image_count` is at least one and we already patched `max_image_count`. current_extent, usage: conv::map_vk_image_usage(caps.supported_usage_flags), present_modes: raw_present_modes diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index a37017a9e6..23182b440c 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -579,7 +579,7 @@ impl super::Device { let mut info = vk::SwapchainCreateInfoKHR::builder() .flags(raw_flags) .surface(surface.raw) - .min_image_count(config.swap_chain_size) + .min_image_count(config.maximum_frame_latency + 1) // TODO: https://github.com/gfx-rs/wgpu/issues/2869 .image_format(original_format) .image_color_space(color_space) .image_extent(vk::Extent2D { diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index e76bebe625..fcec3d62b1 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -5107,6 +5107,26 @@ pub struct SurfaceConfiguration { /// AutoNoVsync will gracefully do a designed sets of fallbacks if their primary modes are /// unsupported. pub present_mode: PresentMode, + /// Desired maximum number of frames that the presentation engine should queue in advance. + /// + /// This is a hint to the backend implementation and will always be clamped to the supported range. + /// As a consequence, either the maximum frame latency is set directly on the swap chain, + /// or waits on present are scheduled to avoid exceeding the maximum frame latency if supported, + /// or the swap chain size is set to (max-latency + 1). + /// + /// Defaults to 2 when created via `wgpu::Surface::get_default_config`. + /// + /// Typical values range from 3 to 1, but higher values are possible: + /// * Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame + /// to be queued up. This typically avoids starving the GPU's work queue. + /// Higher values are useful for achieving a constant flow of frames to the display under varying load. + /// * Choose 1 for low latency from frame recording to frame display. + /// ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU + /// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`, + /// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle). + /// It is currently not possible to query this. See . + /// * A value of 0 is generally not supported and always clamped to a higher value. + pub desired_maximum_frame_latency: u32, /// Specifies how the alpha channel of the textures should be handled during compositing. pub alpha_mode: CompositeAlphaMode, /// Specifies what view formats will be allowed when calling create_view() on texture returned by get_current_texture(). @@ -5126,6 +5146,7 @@ impl SurfaceConfiguration { width: self.width, height: self.height, present_mode: self.present_mode, + desired_maximum_frame_latency: self.desired_maximum_frame_latency, alpha_mode: self.alpha_mode, view_formats: fun(self.view_formats.clone()), } diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index c7266d9bfb..37e53f0108 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -5102,6 +5102,7 @@ impl Surface<'_> { format: *caps.formats.get(0)?, width, height, + desired_maximum_frame_latency: 2, present_mode: *caps.present_modes.get(0)?, alpha_mode: wgt::CompositeAlphaMode::Auto, view_formats: vec![],