From 06b75fe53fe5bb25809a9a7ca0f0a9548cc5132c Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 19 Dec 2023 11:50:56 +0100 Subject: [PATCH 01/13] Expose `desired_swap_chain_size` --- wgpu-core/src/device/global.rs | 3 ++- wgpu-core/src/present.rs | 1 - wgpu-types/src/lib.rs | 10 ++++++++++ wgpu/src/lib.rs | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index ded8c41da8..b1fd77ce8a 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -1991,7 +1991,8 @@ impl Global { } } - let num_frames = present::DESIRED_NUM_FRAMES + let num_frames = config + .desired_swap_chain_size .clamp(*caps.swap_chain_sizes.start(), *caps.swap_chain_sizes.end()); let mut hal_config = hal::SurfaceConfiguration { swap_chain_size: num_frames, diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index 4c2a042b15..05946677f3 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -36,7 +36,6 @@ use thiserror::Error; use wgt::SurfaceStatus as Status; const FRAME_TIMEOUT_MS: u32 = 1000; -pub const DESIRED_NUM_FRAMES: u32 = 3; #[derive(Debug)] pub(crate) struct Presentation { diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index e76bebe625..5492bb3397 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -5107,6 +5107,15 @@ pub struct SurfaceConfiguration { /// AutoNoVsync will gracefully do a designed sets of fallbacks if their primary modes are /// unsupported. pub present_mode: PresentMode, + /// Desired number of buffers in the swap chain. + /// + /// Defaults to 3 when created via [`Surface::get_default_config`]. + /// + /// Recommended to use 3 (or higher) for high throughput, 2 for low latency. + /// This is a hint to the backend implementation and will be clamped to the supported range. + /// A number of one (which is rarely supported) would means that CPU and GPU will be fighting over the same texture, + /// this never executing work in parallel. + pub desired_swap_chain_size: u32, /// Specifies how the alpha channel of the textures should be handled during compositing. pub alpha_mode: CompositeAlphaMode, /// Specifies what view formats will be allowed when calling create_view() on texture returned by get_current_texture(). @@ -5126,6 +5135,7 @@ impl SurfaceConfiguration { width: self.width, height: self.height, present_mode: self.present_mode, + desired_swap_chain_size: self.desired_swap_chain_size, alpha_mode: self.alpha_mode, view_formats: fun(self.view_formats.clone()), } diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index b02434e47d..3b4c183941 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -5057,6 +5057,7 @@ impl Surface<'_> { format: *caps.formats.get(0)?, width, height, + desired_swap_chain_size: 3, present_mode: *caps.present_modes.get(0)?, alpha_mode: wgt::CompositeAlphaMode::Auto, view_formats: vec![], From 15269571c13cca148bcfd0260bf65a88f8c5dc77 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 19 Dec 2023 11:53:50 +0100 Subject: [PATCH 02/13] update examples --- examples/src/hello_triangle/mod.rs | 11 +---------- examples/src/hello_windows/mod.rs | 17 ++++------------- examples/src/uniform_values/mod.rs | 12 +++--------- 3 files changed, 8 insertions(+), 32 deletions(-) diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs index 40cb805c28..31b7473a91 100644 --- a/examples/src/hello_triangle/mod.rs +++ b/examples/src/hello_triangle/mod.rs @@ -72,16 +72,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) { multiview: None, }); - let mut config = wgpu::SurfaceConfiguration { - usage: wgpu::TextureUsages::RENDER_ATTACHMENT, - format: swapchain_format, - width: size.width, - height: size.height, - present_mode: wgpu::PresentMode::Fifo, - alpha_mode: swapchain_capabilities.alpha_modes[0], - view_formats: vec![], - }; - + let mut config = surface.get_default_config(&adapter, size.width, size.height).unwrap(); surface.configure(&device, &config); let window = &window; diff --git a/examples/src/hello_windows/mod.rs b/examples/src/hello_windows/mod.rs index 9a42b9afbd..7d81dbef7b 100644 --- a/examples/src/hello_windows/mod.rs +++ b/examples/src/hello_windows/mod.rs @@ -30,20 +30,11 @@ impl ViewportDesc { fn build(self, adapter: &wgpu::Adapter, device: &wgpu::Device) -> Viewport { let size = self.window.inner_size(); - - let caps = self.surface.get_capabilities(adapter); - let config = wgpu::SurfaceConfiguration { - usage: wgpu::TextureUsages::RENDER_ATTACHMENT, - format: caps.formats[0], - width: size.width, - height: size.height, - present_mode: wgpu::PresentMode::Fifo, - alpha_mode: caps.alpha_modes[0], - view_formats: vec![], - }; - + let config = self + .surface + .get_default_config(adapter, size.width, size.height) + .unwrap(); self.surface.configure(device, &config); - Viewport { desc: self, config } } } diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs index de71ce5067..4a31ddc069 100644 --- a/examples/src/uniform_values/mod.rs +++ b/examples/src/uniform_values/mod.rs @@ -192,15 +192,9 @@ impl WgpuContext { multiview: None, }); - let surface_config = wgpu::SurfaceConfiguration { - usage: wgpu::TextureUsages::RENDER_ATTACHMENT, - format: swapchain_format, - width: size.width, - height: size.height, - present_mode: wgpu::PresentMode::Fifo, - alpha_mode: swapchain_capabilities.alpha_modes[0], - view_formats: vec![], - }; + let surface_config = surface + .get_default_config(&adapter, size.width, size.height) + .unwrap(); surface.configure(&device, &surface_config); // (5) From c9017ad1bab554279e29af7f49aa463bf42352d9 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 19 Dec 2023 11:54:55 +0100 Subject: [PATCH 03/13] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3b731daf6..d146886017 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ Wgpu now exposes backend feature for the Direct3D 12 (`dx12`) and Metal (`metal` - No longer validate surfaces against their allowed extent range on configure. This caused warnings that were almost impossible to avoid. As before, the resulting behavior depends on the compositor. By @wumpf in [#4796](https://github.com/gfx-rs/wgpu/pull/4796) - Added support for the float32-filterable feature. By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759) - wgpu and wgpu-core features are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886) +- `SurfaceConfiguration` now exposes `desired_swap_chain_size` which was previously hard-coded to 3. By setting it to 2 you can reduce latency. By picking a large value you can ensure higher throughput. By @emilk & @wumpf in [#????](https://github.com/gfx-rs/wgpu/pull/????) #### OpenGL - `@builtin(instance_index)` now properly reflects the range provided in the draw call instead of always counting from 0. By @cwfitzgerald in [#4722](https://github.com/gfx-rs/wgpu/pull/4722). From f62112ce0c8f1e90fcb9a3b00dbbb74f68e0501f Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 19 Dec 2023 12:07:02 +0100 Subject: [PATCH 04/13] fix missing fixup --- examples/src/framework.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/src/framework.rs b/examples/src/framework.rs index a014495171..76ea77a51d 100644 --- a/examples/src/framework.rs +++ b/examples/src/framework.rs @@ -571,6 +571,7 @@ impl From> format, width: params.width, height: params.height, + desired_swap_chain_size: 3, present_mode: wgpu::PresentMode::Fifo, alpha_mode: wgpu::CompositeAlphaMode::Auto, view_formats: vec![format], From a2a6a0a9754f7a8137973198ac4629f0e6b6c675 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 19 Dec 2023 12:13:28 +0100 Subject: [PATCH 05/13] doc fix --- wgpu-types/src/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 5492bb3397..3136371c87 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -5109,12 +5109,10 @@ pub struct SurfaceConfiguration { pub present_mode: PresentMode, /// Desired number of buffers in the swap chain. /// - /// Defaults to 3 when created via [`Surface::get_default_config`]. + /// Defaults to 3 when created via `wgpu::Surface::get_default_config`. /// /// Recommended to use 3 (or higher) for high throughput, 2 for low latency. /// This is a hint to the backend implementation and will be clamped to the supported range. - /// A number of one (which is rarely supported) would means that CPU and GPU will be fighting over the same texture, - /// this never executing work in parallel. pub desired_swap_chain_size: u32, /// Specifies how the alpha channel of the textures should be handled during compositing. pub alpha_mode: CompositeAlphaMode, From 35f813924a449b4662bc2573a0cf5dd7f1c186c3 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 19 Dec 2023 12:15:40 +0100 Subject: [PATCH 06/13] update PR number --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d146886017..ec778d4732 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,7 +66,7 @@ Wgpu now exposes backend feature for the Direct3D 12 (`dx12`) and Metal (`metal` - No longer validate surfaces against their allowed extent range on configure. This caused warnings that were almost impossible to avoid. As before, the resulting behavior depends on the compositor. By @wumpf in [#4796](https://github.com/gfx-rs/wgpu/pull/4796) - Added support for the float32-filterable feature. By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759) - wgpu and wgpu-core features are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886) -- `SurfaceConfiguration` now exposes `desired_swap_chain_size` which was previously hard-coded to 3. By setting it to 2 you can reduce latency. By picking a large value you can ensure higher throughput. By @emilk & @wumpf in [#????](https://github.com/gfx-rs/wgpu/pull/????) +- `SurfaceConfiguration` now exposes `desired_swap_chain_size` which was previously hard-coded to 3. By setting it to 2 you can reduce latency. By picking a large value you can ensure higher throughput. By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899) #### OpenGL - `@builtin(instance_index)` now properly reflects the range provided in the draw call instead of always counting from 0. By @cwfitzgerald in [#4722](https://github.com/gfx-rs/wgpu/pull/4722). From c39dbc035db040f93c08ec136e8c43d3d3887e2f Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Tue, 19 Dec 2023 13:57:28 +0100 Subject: [PATCH 07/13] formatting --- examples/src/hello_triangle/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs index 31b7473a91..faa1db8f8b 100644 --- a/examples/src/hello_triangle/mod.rs +++ b/examples/src/hello_triangle/mod.rs @@ -72,7 +72,9 @@ async fn run(event_loop: EventLoop<()>, window: Window) { multiview: None, }); - let mut config = surface.get_default_config(&adapter, size.width, size.height).unwrap(); + let mut config = surface + .get_default_config(&adapter, size.width, size.height) + .unwrap(); surface.configure(&device, &config); let window = &window; From 2157b53ce874796394a6d38987d4ec97f2d4c98a Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Fri, 22 Dec 2023 12:28:22 +0100 Subject: [PATCH 08/13] Change swap_chain_size to maximum_frame_latency On most platforms this is merely a change in narrative, on DX12 it actually has a practical effect already since we use it to directly set frame latency --- examples/src/framework.rs | 2 +- wgpu-core/src/device/global.rs | 9 +++++---- wgpu-hal/examples/halmark/main.rs | 8 ++++---- wgpu-hal/examples/ray-traced-triangle/main.rs | 8 ++++---- wgpu-hal/src/dx12/adapter.rs | 4 ++-- wgpu-hal/src/dx12/mod.rs | 17 ++++++++++++----- wgpu-hal/src/gles/adapter.rs | 2 +- wgpu-hal/src/lib.rs | 17 ++++++++++------- wgpu-hal/src/metal/adapter.rs | 11 ++++++----- wgpu-hal/src/metal/surface.rs | 2 +- wgpu-hal/src/vulkan/adapter.rs | 7 ++++++- wgpu-hal/src/vulkan/device.rs | 2 +- wgpu-types/src/lib.rs | 13 ++++++++----- wgpu/src/lib.rs | 2 +- 14 files changed, 62 insertions(+), 42 deletions(-) diff --git a/examples/src/framework.rs b/examples/src/framework.rs index 76ea77a51d..d61ada0a16 100644 --- a/examples/src/framework.rs +++ b/examples/src/framework.rs @@ -571,7 +571,7 @@ impl From> format, width: params.width, height: params.height, - desired_swap_chain_size: 3, + desired_maximum_frame_latency: 2, present_mode: wgpu::PresentMode::Fifo, alpha_mode: wgpu::CompositeAlphaMode::Auto, view_formats: vec![format], diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index b1fd77ce8a..892ec8e1be 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -1991,11 +1991,12 @@ impl Global { } } - let num_frames = config - .desired_swap_chain_size - .clamp(*caps.swap_chain_sizes.start(), *caps.swap_chain_sizes.end()); + let num_frames = config.desired_maximum_frame_latency.clamp( + *caps.maximum_frame_latency.start(), + *caps.maximum_frame_latency.end(), + ); let mut hal_config = hal::SurfaceConfiguration { - swap_chain_size: num_frames, + maximum_frame_latency: num_frames, present_mode: config.present_mode, composite_alpha_mode: config.alpha_mode, format: config.format, diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 18f283d8e7..7bc8013415 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -23,7 +23,7 @@ const BUNNY_SIZE: f32 = 0.15 * 256.0; const GRAVITY: f32 = -9.8 * 100.0; const MAX_VELOCITY: f32 = 750.0; const COMMAND_BUFFER_PER_CONTEXT: usize = 100; -const DESIRED_FRAMES: u32 = 3; +const DESIRED_MAX_LATENCY: u32 = 2; #[repr(C)] #[derive(Clone, Copy)] @@ -132,9 +132,9 @@ impl Example { let window_size: (u32, u32) = window.inner_size().into(); let surface_config = hal::SurfaceConfiguration { - swap_chain_size: DESIRED_FRAMES.clamp( - *surface_caps.swap_chain_sizes.start(), - *surface_caps.swap_chain_sizes.end(), + maximum_frame_latency: DESIRED_MAX_LATENCY.clamp( + *surface_caps.maximum_frame_latency.start(), + *surface_caps.maximum_frame_latency.end(), ), present_mode: wgt::PresentMode::Fifo, composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 6454cb8998..01a0968f3d 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -14,7 +14,7 @@ use std::{ use winit::window::WindowButtons; const COMMAND_BUFFER_PER_CONTEXT: usize = 100; -const DESIRED_FRAMES: u32 = 3; +const DESIRED_MAX_LATENCY: u32 = 2; /// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc) /// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html) @@ -264,9 +264,9 @@ impl Example { *surface_caps.formats.first().unwrap() }; let surface_config = hal::SurfaceConfiguration { - swap_chain_size: DESIRED_FRAMES - .max(*surface_caps.swap_chain_sizes.start()) - .min(*surface_caps.swap_chain_sizes.end()), + maximum_frame_latency: DESIRED_MAX_LATENCY + .max(*surface_caps.maximum_frame_latency.start()) + .min(*surface_caps.maximum_frame_latency.end()), present_mode: wgt::PresentMode::Fifo, composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, format: surface_format, diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 1db9b0877d..f6027014d2 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -626,8 +626,8 @@ impl crate::Adapter for super::Adapter { wgt::TextureFormat::Rgb10a2Unorm, wgt::TextureFormat::Rgba16Float, ], - // we currently use a flip effect which supports 2..=16 buffers - swap_chain_sizes: 2..=16, + // See https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency + maximum_frame_latency: 1..=16, current_extent, usage: crate::TextureUses::COLOR_TARGET | crate::TextureUses::COPY_SRC diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 2d42d5faa1..f6ed972b3c 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -654,13 +654,18 @@ impl crate::Surface for Surface { let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format); + // Nvidia recommends to use 1-2 more buffers than the maximum latency + // https://developer.nvidia.com/blog/advanced-api-performance-swap-chains/ + // For high latency extra buffers seems excessive, so go with a minimum of 3 and beyond that add 1. + let swap_chain_buffer = (config.maximum_frame_latency + 1).min(3); + let swap_chain = match self.swap_chain.write().take() { //Note: this path doesn't properly re-initialize all of the things Some(sc) => { let raw = unsafe { sc.release_resources() }; let result = unsafe { raw.ResizeBuffers( - config.swap_chain_size, + swap_chain_buffer, config.extent.width, config.extent.height, non_srgb_format, @@ -687,7 +692,9 @@ impl crate::Surface for Surface { quality: 0, }, buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, - buffer_count: config.swap_chain_size, + // Nvidia recommends to use 1-2 more buffers than the maximum latency + // https://developer.nvidia.com/blog/advanced-api-performance-swap-chains/ + buffer_count: swap_chain_buffer, scaling: d3d12::Scaling::Stretch, swap_effect: d3d12::SwapEffect::FlipDiscard, flags, @@ -791,11 +798,11 @@ impl crate::Surface for Surface { | SurfaceTarget::SwapChainPanel(_) => {} } - unsafe { swap_chain.SetMaximumFrameLatency(config.swap_chain_size) }; + unsafe { swap_chain.SetMaximumFrameLatency(config.maximum_frame_latency) }; let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() }; - let mut resources = Vec::with_capacity(config.swap_chain_size as usize); - for i in 0..config.swap_chain_size { + let mut resources = Vec::with_capacity(config.maximum_frame_latency as usize); + for i in 0..config.maximum_frame_latency { let mut resource = d3d12::Resource::null(); unsafe { swap_chain.GetBuffer(i, &d3d12_ty::ID3D12Resource::uuidof(), resource.mut_void()) diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index e46c91ab9c..9099c82829 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -1141,7 +1141,7 @@ impl crate::Adapter for super::Adapter { vec![wgt::PresentMode::Fifo] //TODO }, composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO - swap_chain_sizes: 2..=2, + maximum_frame_latency: 2..=2, //TODO, unused currently current_extent: None, usage: crate::TextureUses::COLOR_TARGET, }) diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 39037e895c..e51bf2cd52 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -922,11 +922,14 @@ pub struct SurfaceCapabilities { /// Must be at least one. pub formats: Vec, - /// Range for the swap chain sizes. + /// Range for the number of queued frames. /// - /// - `swap_chain_sizes.start` must be at least 1. - /// - `swap_chain_sizes.end` must be larger or equal to `swap_chain_sizes.start`. - pub swap_chain_sizes: RangeInclusive, + /// This adjusts either the swapchain frame count to value + 1 - or sets SetMaximumFrameLatency to the value given, + /// or uses a wait-for-present in the acquire method to limit rendering such that it acts like it's a value + 1 swapchain frame set. + /// + /// - `maximum_frame_latency.start` must be at least 1. + /// - `maximum_frame_latency.end` must be larger or equal to `maximum_frame_latency.start`. + pub maximum_frame_latency: RangeInclusive, /// Current extent of the surface, if known. pub current_extent: Option, @@ -1252,9 +1255,9 @@ pub struct RenderPipelineDescriptor<'a, A: Api> { #[derive(Debug, Clone)] pub struct SurfaceConfiguration { - /// Number of textures in the swap chain. Must be in - /// `SurfaceCapabilities::swap_chain_size` range. - pub swap_chain_size: u32, + /// Maximum number of queued frames. Must be in + /// `SurfaceCapabilities::maximum_frame_latency` range. + pub maximum_frame_latency: u32, /// Vertical synchronization mode. pub present_mode: wgt::PresentMode, /// Alpha composition mode. diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 3d8f6f3e57..a946ce5819 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -320,13 +320,14 @@ impl crate::Adapter for super::Adapter { let pc = &self.shared.private_caps; Some(crate::SurfaceCapabilities { formats, - //Note: this is hardcoded in `CAMetalLayer` documentation - swap_chain_sizes: if pc.can_set_maximum_drawables_count { - 2..=3 + // We use this here to govern the maximum number of drawables + 1. + // See https://developer.apple.com/documentation/quartzcore/cametallayer/2938720-maximumdrawablecount + maximum_frame_latency: if pc.can_set_maximum_drawables_count { + 1..=2 } else { - // 3 is the default in `CAMetalLayer` documentation + // 3 is the default value for maximum drawables in `CAMetalLayer` documentation // iOS 10.3 was tested to use 3 on iphone5s - 3..=3 + 2..=2 }, present_modes: if pc.can_set_display_sync { vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate] diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs index e54a176da5..811f66e71e 100644 --- a/wgpu-hal/src/metal/surface.rs +++ b/wgpu-hal/src/metal/surface.rs @@ -221,7 +221,7 @@ impl crate::Surface for super::Surface { } // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3) - render_layer.set_maximum_drawable_count(config.swap_chain_size as _); + render_layer.set_maximum_drawable_count(config.maximum_frame_latency as _ + 1); render_layer.set_drawable_size(drawable_size); if caps.can_set_next_drawable_timeout { let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false]; diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 737615215d..ecbd491e2a 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -1838,7 +1838,12 @@ impl crate::Adapter for super::Adapter { .collect(); Some(crate::SurfaceCapabilities { formats, - swap_chain_sizes: caps.min_image_count..=max_image_count, + // TODO: Right now we're always trunkating the swap chain + // (presumably - we're actually setting the min image count which isn't necessarily the swap chain size) + // Instead, we should use extensions when available to wait in present. + // See https://github.com/gfx-rs/wgpu/issues/2869 + maximum_frame_latency: caps.min_image_count.saturating_sub(1) + ..=max_image_count.saturating_sub(1), current_extent, usage: conv::map_vk_image_usage(caps.supported_usage_flags), present_modes: raw_present_modes diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index a37017a9e6..23182b440c 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -579,7 +579,7 @@ impl super::Device { let mut info = vk::SwapchainCreateInfoKHR::builder() .flags(raw_flags) .surface(surface.raw) - .min_image_count(config.swap_chain_size) + .min_image_count(config.maximum_frame_latency + 1) // TODO: https://github.com/gfx-rs/wgpu/issues/2869 .image_format(original_format) .image_color_space(color_space) .image_extent(vk::Extent2D { diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 3136371c87..d5194297f4 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -5107,13 +5107,16 @@ pub struct SurfaceConfiguration { /// AutoNoVsync will gracefully do a designed sets of fallbacks if their primary modes are /// unsupported. pub present_mode: PresentMode, - /// Desired number of buffers in the swap chain. + /// Desired maximum number of frames that the presentation engine should queue in advance. /// - /// Defaults to 3 when created via `wgpu::Surface::get_default_config`. + /// Defaults to 2 when created via `wgpu::Surface::get_default_config`. + /// Recommended to use 2 (or higher) for high throughput, 1 for low latency. /// - /// Recommended to use 3 (or higher) for high throughput, 2 for low latency. /// This is a hint to the backend implementation and will be clamped to the supported range. - pub desired_swap_chain_size: u32, + /// As a consequence either the maximum frame latency is set directly on the swap chain, + /// or waits on present to avoid exceeding the maximum frame latency, + /// or the swap chain size is set to max-latency + 1. + pub desired_maximum_frame_latency: u32, /// Specifies how the alpha channel of the textures should be handled during compositing. pub alpha_mode: CompositeAlphaMode, /// Specifies what view formats will be allowed when calling create_view() on texture returned by get_current_texture(). @@ -5133,7 +5136,7 @@ impl SurfaceConfiguration { width: self.width, height: self.height, present_mode: self.present_mode, - desired_swap_chain_size: self.desired_swap_chain_size, + desired_maximum_frame_latency: self.desired_maximum_frame_latency, alpha_mode: self.alpha_mode, view_formats: fun(self.view_formats.clone()), } diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 3b4c183941..e73afd47ca 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -5057,7 +5057,7 @@ impl Surface<'_> { format: *caps.formats.get(0)?, width, height, - desired_swap_chain_size: 3, + desired_maximum_frame_latency: 2, present_mode: *caps.present_modes.get(0)?, alpha_mode: wgt::CompositeAlphaMode::Auto, view_formats: vec![], From 91c2c55de9c1e6ae8db0203507c3b3eebfdeefd6 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 23 Dec 2023 14:20:02 +0100 Subject: [PATCH 09/13] update changelog, improve comments --- CHANGELOG.md | 2 +- wgpu-hal/src/dx12/mod.rs | 2 -- wgpu-hal/src/vulkan/adapter.rs | 3 +-- wgpu-types/src/lib.rs | 12 ++++++++++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac9a73491c..f055076a82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,7 +67,7 @@ Wgpu now exposes backend feature for the Direct3D 12 (`dx12`) and Metal (`metal` - Added support for the float32-filterable feature. By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759) - GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851) - wgpu and wgpu-core features are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886) -- `SurfaceConfiguration` now exposes `desired_swap_chain_size` which was previously hard-coded to 3. By setting it to 2 you can reduce latency. By picking a large value you can ensure higher throughput. By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899) +- `SurfaceConfiguration` now exposes `desired_swap_chain_size` which was previously hard-coded to 2. By setting it to 1 you can reduce latency. By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899) - DeviceLostClosure is guaranteed to be invoked exactly once. By @bradwerth in [#4862](https://github.com/gfx-rs/wgpu/pull/4862) #### OpenGL diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index f02e5a26d9..af8d5a8c01 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -698,8 +698,6 @@ impl crate::Surface for Surface { quality: 0, }, buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, - // Nvidia recommends to use 1-2 more buffers than the maximum latency - // https://developer.nvidia.com/blog/advanced-api-performance-swap-chains/ buffer_count: swap_chain_buffer, scaling: d3d12::Scaling::Stretch, swap_effect: d3d12::SwapEffect::FlipDiscard, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index ecbd491e2a..589200964f 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -1842,8 +1842,7 @@ impl crate::Adapter for super::Adapter { // (presumably - we're actually setting the min image count which isn't necessarily the swap chain size) // Instead, we should use extensions when available to wait in present. // See https://github.com/gfx-rs/wgpu/issues/2869 - maximum_frame_latency: caps.min_image_count.saturating_sub(1) - ..=max_image_count.saturating_sub(1), + maximum_frame_latency: (caps.min_image_count - 1)..=(max_image_count - 1), // Note this can't underflow since both `min_image_count` is at least one and we already patched `max_image_count`. current_extent, usage: conv::map_vk_image_usage(caps.supported_usage_flags), present_modes: raw_present_modes diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index d5194297f4..72c32f9425 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -5110,9 +5110,17 @@ pub struct SurfaceConfiguration { /// Desired maximum number of frames that the presentation engine should queue in advance. /// /// Defaults to 2 when created via `wgpu::Surface::get_default_config`. - /// Recommended to use 2 (or higher) for high throughput, 1 for low latency. /// - /// This is a hint to the backend implementation and will be clamped to the supported range. + /// Typical values are either 2 or 1, but higher values are possible. + /// Choose 1 for low latency from frame recording to frame display. + /// Choose 2 or higher for potentially smoother frame display - by having more frames in flight, it's more + /// likely that the next refresh interval of the display will be able to display a new frame + /// under varying GPU & CPU frame timings. + /// A value of 0 is typically not supported (i.e. clamped to a higher value) and would mean that GPU and CPU + /// won't be able to work in parallel, use this only if you expect the combined GPU & CPU workload + /// to be below below your screen refresh rate. + /// + /// This is a hint to the backend implementation and will always be clamped to the supported range. /// As a consequence either the maximum frame latency is set directly on the swap chain, /// or waits on present to avoid exceeding the maximum frame latency, /// or the swap chain size is set to max-latency + 1. From de1640a87c5348277a57ebb6232eb806111c7078 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 23 Dec 2023 14:42:38 +0100 Subject: [PATCH 10/13] fix missing rename, remove unused and incorrectly named variable --- wgpu-core/src/device/global.rs | 5 ++--- wgpu-core/src/present.rs | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 6dfab2be01..fe31de7722 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -2000,12 +2000,12 @@ impl Global { } } - let num_frames = config.desired_maximum_frame_latency.clamp( + let maximum_frame_latency = config.desired_maximum_frame_latency.clamp( *caps.maximum_frame_latency.start(), *caps.maximum_frame_latency.end(), ); let mut hal_config = hal::SurfaceConfiguration { - maximum_frame_latency: num_frames, + maximum_frame_latency, present_mode: config.present_mode, composite_alpha_mode: config.alpha_mode, format: config.format, @@ -2076,7 +2076,6 @@ impl Global { *presentation = Some(present::Presentation { device: super::any_device::AnyDevice::new(device.clone()), config: config.clone(), - num_frames, acquired_texture: None, }); } diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index 05946677f3..efc2bc5c7b 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -41,8 +41,6 @@ const FRAME_TIMEOUT_MS: u32 = 1000; pub(crate) struct Presentation { pub(crate) device: AnyDevice, pub(crate) config: wgt::SurfaceConfiguration>, - #[allow(unused)] - pub(crate) num_frames: u32, pub(crate) acquired_texture: Option, } From d64de0d68b2f74a11581e14e0faa0a307e28bde6 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 23 Dec 2023 17:05:16 +0100 Subject: [PATCH 11/13] iterating on desired_maximum_frame_latency doc --- wgpu-types/src/lib.rs | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 72c32f9425..0041d17ecb 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -5109,21 +5109,24 @@ pub struct SurfaceConfiguration { pub present_mode: PresentMode, /// Desired maximum number of frames that the presentation engine should queue in advance. /// + /// This is a hint to the backend implementation and will always be clamped to the supported range. + /// As a consequence either the maximum frame latency is set directly on the swap chain, + /// or waits on present to avoid exceeding the maximum frame latency if supported, + /// or the swap chain size is set to max-latency + 1. + /// /// Defaults to 2 when created via `wgpu::Surface::get_default_config`. /// - /// Typical values are either 2 or 1, but higher values are possible. + /// Typical values range from 3 to 1, but higher values are possible. /// Choose 1 for low latency from frame recording to frame display. - /// Choose 2 or higher for potentially smoother frame display - by having more frames in flight, it's more - /// likely that the next refresh interval of the display will be able to display a new frame - /// under varying GPU & CPU frame timings. + /// If the backend does not support waiting on present, this will cause the CPU to wait for the GPU + /// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`, + /// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle). + /// Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame + /// to be queued up. This typically avoids starving the GPU's work queue. + /// Higher values are useful for achieving a constant flow of frames to the display under varying load. /// A value of 0 is typically not supported (i.e. clamped to a higher value) and would mean that GPU and CPU - /// won't be able to work in parallel, use this only if you expect the combined GPU & CPU workload + /// won't be able to work in parallel at all, use this only if you expect the combined GPU & CPU workload /// to be below below your screen refresh rate. - /// - /// This is a hint to the backend implementation and will always be clamped to the supported range. - /// As a consequence either the maximum frame latency is set directly on the swap chain, - /// or waits on present to avoid exceeding the maximum frame latency, - /// or the swap chain size is set to max-latency + 1. pub desired_maximum_frame_latency: u32, /// Specifies how the alpha channel of the textures should be handled during compositing. pub alpha_mode: CompositeAlphaMode, From b6ceb5159a199ee2c1f70e1798f1c53b874735ad Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 23 Dec 2023 17:07:29 +0100 Subject: [PATCH 12/13] fix macos build --- wgpu-hal/src/metal/surface.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs index 811f66e71e..a97eff0aae 100644 --- a/wgpu-hal/src/metal/surface.rs +++ b/wgpu-hal/src/metal/surface.rs @@ -221,7 +221,7 @@ impl crate::Surface for super::Surface { } // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3) - render_layer.set_maximum_drawable_count(config.maximum_frame_latency as _ + 1); + render_layer.set_maximum_drawable_count(config.maximum_frame_latency as u64 + 1); render_layer.set_drawable_size(drawable_size); if caps.can_set_next_drawable_timeout { let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false]; From e10df8fb8a0bd474b9496e28cee451ea9af4bf71 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 17 Jan 2024 11:24:28 +0100 Subject: [PATCH 13/13] iterate on documentation, update changelog --- CHANGELOG.md | 2 +- wgpu-types/src/lib.rs | 23 +++++++++++------------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f055076a82..735ca04393 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,7 +67,7 @@ Wgpu now exposes backend feature for the Direct3D 12 (`dx12`) and Metal (`metal` - Added support for the float32-filterable feature. By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759) - GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851) - wgpu and wgpu-core features are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886) -- `SurfaceConfiguration` now exposes `desired_swap_chain_size` which was previously hard-coded to 2. By setting it to 1 you can reduce latency. By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899) +- `SurfaceConfiguration` now exposes `desired_maximum_frame_latency` which was previously hard-coded to 2. By setting it to 1 you can reduce latency under the risk of making GPU & CPU work sequential. Currently, on DX12 this affects the `MaximumFrameLatency`, on all other backends except OpenGL the size of the swapchain (on OpenGL this has no effect). By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899) - DeviceLostClosure is guaranteed to be invoked exactly once. By @bradwerth in [#4862](https://github.com/gfx-rs/wgpu/pull/4862) #### OpenGL diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 0041d17ecb..fcec3d62b1 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -5110,23 +5110,22 @@ pub struct SurfaceConfiguration { /// Desired maximum number of frames that the presentation engine should queue in advance. /// /// This is a hint to the backend implementation and will always be clamped to the supported range. - /// As a consequence either the maximum frame latency is set directly on the swap chain, - /// or waits on present to avoid exceeding the maximum frame latency if supported, - /// or the swap chain size is set to max-latency + 1. + /// As a consequence, either the maximum frame latency is set directly on the swap chain, + /// or waits on present are scheduled to avoid exceeding the maximum frame latency if supported, + /// or the swap chain size is set to (max-latency + 1). /// /// Defaults to 2 when created via `wgpu::Surface::get_default_config`. /// - /// Typical values range from 3 to 1, but higher values are possible. - /// Choose 1 for low latency from frame recording to frame display. - /// If the backend does not support waiting on present, this will cause the CPU to wait for the GPU - /// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`, - /// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle). - /// Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame + /// Typical values range from 3 to 1, but higher values are possible: + /// * Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame /// to be queued up. This typically avoids starving the GPU's work queue. /// Higher values are useful for achieving a constant flow of frames to the display under varying load. - /// A value of 0 is typically not supported (i.e. clamped to a higher value) and would mean that GPU and CPU - /// won't be able to work in parallel at all, use this only if you expect the combined GPU & CPU workload - /// to be below below your screen refresh rate. + /// * Choose 1 for low latency from frame recording to frame display. + /// ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU + /// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`, + /// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle). + /// It is currently not possible to query this. See . + /// * A value of 0 is generally not supported and always clamped to a higher value. pub desired_maximum_frame_latency: u32, /// Specifies how the alpha channel of the textures should be handled during compositing. pub alpha_mode: CompositeAlphaMode,