From be899d4fbf81090eb55a278ea598e220646fdb64 Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Sat, 8 Jan 2022 11:39:22 +0100 Subject: [PATCH 1/2] bevy_spite: Cache the previous image and atlas queries. These two getters are not cheap. Caching the previous handle and result allow skipping pricy lookups when the same handle is requested multiple times in a row. This makes the getters disappear from synthetic benchmarks such as bevymark making it a pretty large win there, although in a more realistic setup the wins will likely be more nucanced. In some ways it's a form of batching, but at the handle querying level. I don't know whether this would be better done at a higher level in the Assets logic directly or via some sort of generic helper, I suspect that other parts of bevy may benefit from similar tricks. --- crates/bevy_sprite/src/render/mod.rs | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/crates/bevy_sprite/src/render/mod.rs b/crates/bevy_sprite/src/render/mod.rs index 3e5477f4b9f11..799acb5feeca1 100644 --- a/crates/bevy_sprite/src/render/mod.rs +++ b/crates/bevy_sprite/src/render/mod.rs @@ -218,11 +218,22 @@ pub fn extract_sprites( ) { let mut extracted_sprites = render_world.get_resource_mut::().unwrap(); extracted_sprites.sprites.clear(); - for (computed_visibility, sprite, transform, handle) in sprite_query.iter() { + + // `get` is not cheap. Cache the previous image and handle and bypass `get` when requesting the + // same handle multiple times in a row. + let mut current_image_handle = None; + let mut current_image = None; + for (computed_visibility, sprite, transform, image_handle) in sprite_query.iter() { if !computed_visibility.is_visible { continue; } - if let Some(image) = images.get(handle) { + + if current_image_handle != Some(image_handle) { + current_image = images.get(image_handle); + current_image_handle = Some(image_handle); + }; + + if let Some(image) = current_image { let size = image.texture_descriptor.size; extracted_sprites.sprites.push(ExtractedSprite { @@ -237,15 +248,24 @@ pub fn extract_sprites( }, flip_x: sprite.flip_x, flip_y: sprite.flip_y, - handle: handle.clone_weak(), + handle: image_handle.clone_weak(), }); }; } + + let mut current_atlas_handle = None; + let mut current_atlas = None; for (computed_visibility, atlas_sprite, transform, texture_atlas_handle) in atlas_query.iter() { if !computed_visibility.is_visible { continue; } - if let Some(texture_atlas) = texture_atlases.get(texture_atlas_handle) { + + if current_atlas_handle != Some(texture_atlas_handle) { + current_atlas = texture_atlases.get(texture_atlas_handle); + current_atlas_handle = Some(texture_atlas_handle); + }; + + if let Some(texture_atlas) = current_atlas { if images.contains(&texture_atlas.texture) { let rect = texture_atlas.textures[atlas_sprite.index as usize]; extracted_sprites.sprites.push(ExtractedSprite { From 8ba8af4801e55bd6079df8142331eaaa0dae31fb Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Sat, 8 Jan 2022 12:21:22 +0100 Subject: [PATCH 2/2] bevy_sprite: avoid unnecessary data movement when extractign sprites. ExtractedSprite is a rather large struct. Large enough to be moved via memcpy. Pushing into a vector by default forces the value to exist on the stack before it is moved into the vector storage. This is because Vec::push may have to attempt a memory allocation which could panic before writing the data into the stoage. The initialization of the extracted sprite cannot be reordered with the potentially panicking operation, preventing rustc/llvm to initialize the ExtractedSprite struct directly into the vector's storage. Fortunately there is a very simple crate to work around this: copyless. What it does is to split the push operation into the allocation and the write, so that the payload can be initialized after the allocation. This lets llvm optimize away the move. With this change, ExtractedSprite is initialized directly into the vector storage and the memcpy disappears from profiles. --- crates/bevy_sprite/Cargo.toml | 1 + crates/bevy_sprite/src/render/mod.rs | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/bevy_sprite/Cargo.toml b/crates/bevy_sprite/Cargo.toml index 53b70abeb8d9f..6b81e4cff2f9e 100644 --- a/crates/bevy_sprite/Cargo.toml +++ b/crates/bevy_sprite/Cargo.toml @@ -30,3 +30,4 @@ guillotiere = "0.6.0" thiserror = "1.0" rectangle-pack = "0.4" serde = { version = "1", features = ["derive"] } +copyless = "0.1.5" diff --git a/crates/bevy_sprite/src/render/mod.rs b/crates/bevy_sprite/src/render/mod.rs index 799acb5feeca1..68cd750516525 100644 --- a/crates/bevy_sprite/src/render/mod.rs +++ b/crates/bevy_sprite/src/render/mod.rs @@ -1,5 +1,7 @@ use std::{cmp::Ordering, ops::Range}; +use copyless::VecHelper; + use crate::{ texture_atlas::{TextureAtlas, TextureAtlasSprite}, Rect, Sprite, SPRITE_SHADER_HANDLE, @@ -236,7 +238,7 @@ pub fn extract_sprites( if let Some(image) = current_image { let size = image.texture_descriptor.size; - extracted_sprites.sprites.push(ExtractedSprite { + extracted_sprites.sprites.alloc().init(ExtractedSprite { atlas_size: None, color: sprite.color, transform: transform.compute_matrix(), @@ -268,7 +270,7 @@ pub fn extract_sprites( if let Some(texture_atlas) = current_atlas { if images.contains(&texture_atlas.texture) { let rect = texture_atlas.textures[atlas_sprite.index as usize]; - extracted_sprites.sprites.push(ExtractedSprite { + extracted_sprites.sprites.alloc().init(ExtractedSprite { atlas_size: Some(texture_atlas.size), color: atlas_sprite.color, transform: transform.compute_matrix(),