Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GpuArrayBuffer and BatchedUniformBuffer #8204

Merged
merged 27 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
40e9985
Add GpuList and BatchedUniformBuffer
JMS55 Mar 25, 2023
c11d4f2
Clippy lint
JMS55 Mar 25, 2023
8688472
Add GpuList and BatchedUniformBuffer
superdump Mar 25, 2023
1c94dcd
Merge branch 'gpu-list' of https://github.com/JMS55/bevy into gpu-list
JMS55 Mar 31, 2023
dc39eb5
Update crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
JMS55 Apr 24, 2023
f88ccc4
Update crates/bevy_render/src/render_resource/gpu_list.rs
JMS55 Apr 24, 2023
cdbbad2
Update crates/bevy_render/src/render_resource/gpu_list.rs
JMS55 Apr 24, 2023
94a58ec
Update crates/bevy_render/src/render_resource/gpu_list.rs
JMS55 Apr 24, 2023
c5357de
Update crates/bevy_render/src/render_resource/gpu_list.rs
JMS55 Apr 24, 2023
70254d4
Update crates/bevy_render/src/render_resource/gpu_list.rs
JMS55 Apr 24, 2023
7f7101d
Update crates/bevy_render/src/render_resource/gpu_list.rs
JMS55 Apr 24, 2023
66e48d7
Update crates/bevy_render/src/render_resource/storage_buffer.rs
JMS55 Apr 24, 2023
00fb9b5
Update crates/bevy_render/src/render_resource/storage_buffer.rs
JMS55 Apr 24, 2023
3144b42
Update crates/bevy_render/src/render_resource/uniform_buffer.rs
JMS55 Apr 24, 2023
658568a
Update crates/bevy_render/src/render_resource/gpu_list.rs
superdump May 1, 2023
eb93067
Update crates/bevy_render/src/render_resource/gpu_list.rs
superdump May 1, 2023
1d10195
Merge branch 'main' into gpu-list-main
superdump May 1, 2023
68f7a8e
Fixes to buffer sizes
superdump May 1, 2023
05c723f
Fix after merge from main
superdump May 1, 2023
37fdbd4
Add credit to Teoxoy for MaxCapacityArray
superdump May 1, 2023
f5ca55d
Clarify logic around max_storage_buffers_per_shader_stage
superdump May 1, 2023
7765c86
Merge commit '1e73312e49fc90479d8c9c645ffd85a59233067c' into gpu-list
JMS55 Jun 26, 2023
9f4f027
Lower MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE on WebGL2
JMS55 Jun 26, 2023
973b8bb
Rename GpuList -> GpuArrayBuffer
JMS55 Jun 26, 2023
d65cab4
Update crates/bevy_render/src/render_resource/gpu_array_buffer.rs
JMS55 Jun 26, 2023
499e3a2
Add internal documentation of BatchedUniformBuffer members
superdump Jul 21, 2023
df66b85
BatchedUniformBuffer: Optimize rounding code
konsolas Jul 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions crates/bevy_render/src/gpu_component_array_buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use crate::{
render_resource::{GpuArrayBuffer, GpuArrayBufferable},
renderer::{RenderDevice, RenderQueue},
Render, RenderApp, RenderSet,
};
use bevy_app::{App, Plugin};
use bevy_ecs::{
prelude::{Component, Entity},
schedule::IntoSystemConfigs,
system::{Commands, Query, Res, ResMut},
};
use std::marker::PhantomData;

/// This plugin prepares the components of the corresponding type for the GPU
/// by storing them in a [`GpuArrayBuffer`].
pub struct GpuComponentArrayBufferPlugin<C: Component + GpuArrayBufferable>(PhantomData<C>);

impl<C: Component + GpuArrayBufferable> Plugin for GpuComponentArrayBufferPlugin<C> {
fn build(&self, app: &mut App) {
if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
render_app
.insert_resource(GpuArrayBuffer::<C>::new(
render_app.world.resource::<RenderDevice>(),
))
.add_systems(
Render,
prepare_gpu_component_array_buffers::<C>.in_set(RenderSet::Prepare),
);
}
}
}

impl<C: Component + GpuArrayBufferable> Default for GpuComponentArrayBufferPlugin<C> {
fn default() -> Self {
Self(PhantomData::<C>)
}
}

fn prepare_gpu_component_array_buffers<C: Component + GpuArrayBufferable>(
mut commands: Commands,
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut gpu_array_buffer: ResMut<GpuArrayBuffer<C>>,
components: Query<(Entity, &C)>,
) {
gpu_array_buffer.clear();

let entities = components
.iter()
.map(|(entity, component)| (entity, gpu_array_buffer.push(component.clone())))
.collect::<Vec<_>>();
commands.insert_or_spawn_batch(entities);

gpu_array_buffer.write_buffer(&render_device, &render_queue);
}
1 change: 1 addition & 0 deletions crates/bevy_render/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub mod extract_component;
mod extract_param;
pub mod extract_resource;
pub mod globals;
pub mod gpu_component_array_buffer;
pub mod mesh;
pub mod pipelined_rendering;
pub mod primitives;
Expand Down
152 changes: 152 additions & 0 deletions crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
use super::{GpuArrayBufferIndex, GpuArrayBufferable};
use crate::{
render_resource::DynamicUniformBuffer,
renderer::{RenderDevice, RenderQueue},
};
use encase::{
private::{ArrayMetadata, BufferMut, Metadata, RuntimeSizedArray, WriteInto, Writer},
ShaderType,
};
use std::{marker::PhantomData, num::NonZeroU64};
use wgpu::{BindingResource, Limits};

// 1MB else we will make really large arrays on macOS which reports very large
// `max_uniform_buffer_binding_size`. On macOS this ends up being the minimum
// size of the uniform buffer as well as the size of each chunk of data at a
// dynamic offset.
#[cfg(any(not(feature = "webgl"), not(target_arch = "wasm32")))]
const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 20;

// WebGL2 quirk: using uniform buffers larger than 4KB will cause extremely
// long shader compilation times, so the limit needs to be lower on WebGL2.
// This is due to older shader compilers/GPUs that don't support dynamically
// indexing uniform buffers, and instead emulate it with large switch statements
// over buffer indices that take a long time to compile.
#[cfg(all(feature = "webgl", target_arch = "wasm32"))]
const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 12;

/// Similar to [`DynamicUniformBuffer`], except every N elements (depending on size)
/// are grouped into a batch as an `array<T, N>` in WGSL.
JMS55 marked this conversation as resolved.
Show resolved Hide resolved
///
/// This reduces the number of rebindings required due to having to pass dynamic
/// offsets to bind group commands, and if indices into the array can be passed
/// in via other means, it enables batching of draw commands.
pub struct BatchedUniformBuffer<T: GpuArrayBufferable> {
// Batches of fixed-size arrays of T are written to this buffer so that
// each batch in a fixed-size array can be bound at a dynamic offset.
uniforms: DynamicUniformBuffer<MaxCapacityArray<Vec<T>>>,
// A batch of T are gathered into this `MaxCapacityArray` until it is full,
// then it is written into the `DynamicUniformBuffer`, cleared, and new T
// are gathered here, and so on for each batch.
temp: MaxCapacityArray<Vec<T>>,
superdump marked this conversation as resolved.
Show resolved Hide resolved
current_offset: u32,
dynamic_offset_alignment: u32,
}

impl<T: GpuArrayBufferable> BatchedUniformBuffer<T> {
pub fn batch_size(limits: &Limits) -> usize {
(limits
.max_uniform_buffer_binding_size
.min(MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE) as u64
/ T::min_size().get()) as usize
}

pub fn new(limits: &Limits) -> Self {
let capacity = Self::batch_size(limits);
let alignment = limits.min_uniform_buffer_offset_alignment;

Self {
uniforms: DynamicUniformBuffer::new_with_alignment(alignment as u64),
temp: MaxCapacityArray(Vec::with_capacity(capacity), capacity),
current_offset: 0,
dynamic_offset_alignment: alignment,
}
}

#[inline]
pub fn size(&self) -> NonZeroU64 {
self.temp.size()
}

pub fn clear(&mut self) {
self.uniforms.clear();
self.current_offset = 0;
self.temp.0.clear();
}

pub fn push(&mut self, component: T) -> GpuArrayBufferIndex<T> {
let result = GpuArrayBufferIndex {
index: self.temp.0.len() as u32,
dynamic_offset: Some(self.current_offset),
element_type: PhantomData,
};
self.temp.0.push(component);
if self.temp.0.len() == self.temp.1 {
self.flush();
}
result
}

pub fn flush(&mut self) {
self.uniforms.push(self.temp.clone());

self.current_offset +=
align_to_next(self.temp.size().get(), self.dynamic_offset_alignment as u64) as u32;

self.temp.0.clear();
}

pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
if !self.temp.0.is_empty() {
self.flush();
}
self.uniforms.write_buffer(device, queue);
}

#[inline]
pub fn binding(&self) -> Option<BindingResource> {
let mut binding = self.uniforms.binding();
if let Some(BindingResource::Buffer(binding)) = &mut binding {
// MaxCapacityArray is runtime-sized so can't use T::min_size()
binding.size = Some(self.size());
}
binding
}
}

#[inline]
fn align_to_next(value: u64, alignment: u64) -> u64 {
debug_assert!(alignment & (alignment - 1) == 0);
((value - 1) | (alignment - 1)) + 1
}

// ----------------------------------------------------------------------------
// MaxCapacityArray was implemented by Teodor Tanasoaia for encase. It was
// copied here as it was not yet included in an encase release and it is
// unclear if it is the correct long-term solution for encase.

#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
struct MaxCapacityArray<T>(T, usize);

impl<T> ShaderType for MaxCapacityArray<T>
where
T: ShaderType<ExtraMetadata = ArrayMetadata>,
{
type ExtraMetadata = ArrayMetadata;

const METADATA: Metadata<Self::ExtraMetadata> = T::METADATA;

fn size(&self) -> ::core::num::NonZeroU64 {
Self::METADATA.stride().mul(self.1.max(1) as u64).0
}
}

impl<T> WriteInto for MaxCapacityArray<T>
where
T: WriteInto + RuntimeSizedArray,
{
fn write_into<B: BufferMut>(&self, writer: &mut Writer<B>) {
debug_assert!(self.0.len() <= self.1);
self.0.write_into(writer);
}
}
Comment on lines +128 to +152
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code was written by @teoxoy so we need to add credit for them to the commit that introduces it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! If this is ready for production I can merge the branch in encase and do a release.
Let me know!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It works fine for us. :) There is that other aspect of being able to start the next dynamic offset binding of a uniform buffer at the next dynamic offset alignment if not all space is used, and ensure that the final binding is full-size. I don't know if that would clash with this and basically immediately deprecate this approach. If so maybe you'd prefer that we use a solution in bevy for what we need and add the long-term and more flexible solution to encase when someone gets to it. What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I won't block the PR on this. We can figure it out over time. :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to rebase to give credit on the original commit but due to merges it was a pain. I instead added a comment and a co-authored-by so that when the squash merge is done, the credit will follow along with it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, we can further iterate and see what we come up with. Thanks for the credit!

2 changes: 2 additions & 0 deletions crates/bevy_render/src/render_resource/buffer_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ use wgpu::BufferUsages;
/// from system RAM to VRAM.
///
/// Other options for storing GPU-accessible data are:
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
pub struct BufferVec<T: Pod> {
Expand Down
129 changes: 129 additions & 0 deletions crates/bevy_render/src/render_resource/gpu_array_buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
use super::StorageBuffer;
use crate::{
render_resource::batched_uniform_buffer::BatchedUniformBuffer,
renderer::{RenderDevice, RenderQueue},
};
use bevy_ecs::{prelude::Component, system::Resource};
use encase::{private::WriteInto, ShaderSize, ShaderType};
use std::{marker::PhantomData, mem};
use wgpu::{BindGroupLayoutEntry, BindingResource, BindingType, BufferBindingType, ShaderStages};

/// Trait for types able to go in a [`GpuArrayBuffer`].
pub trait GpuArrayBufferable: ShaderType + ShaderSize + WriteInto + Clone {}
impl<T: ShaderType + ShaderSize + WriteInto + Clone> GpuArrayBufferable for T {}

/// Stores an array of elements to be transferred to the GPU and made accessible to shaders as a read-only array.
///
/// On platforms that support storage buffers, this is equivalent to [`StorageBuffer<Vec<T>>`].
/// Otherwise, this falls back to a dynamic offset uniform buffer with the largest
/// array of T that fits within a uniform buffer binding (within reasonable limits).
///
/// Other options for storing GPU-accessible data are:
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
#[derive(Resource)]
pub enum GpuArrayBuffer<T: GpuArrayBufferable> {
Uniform(BatchedUniformBuffer<T>),
Storage((StorageBuffer<Vec<T>>, Vec<T>)),
}

impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
pub fn new(device: &RenderDevice) -> Self {
let limits = device.limits();
if limits.max_storage_buffers_per_shader_stage == 0 {
GpuArrayBuffer::Uniform(BatchedUniformBuffer::new(&limits))
} else {
GpuArrayBuffer::Storage((StorageBuffer::default(), Vec::new()))
}
}

pub fn clear(&mut self) {
match self {
GpuArrayBuffer::Uniform(buffer) => buffer.clear(),
GpuArrayBuffer::Storage((_, buffer)) => buffer.clear(),
}
}

pub fn push(&mut self, value: T) -> GpuArrayBufferIndex<T> {
match self {
GpuArrayBuffer::Uniform(buffer) => buffer.push(value),
GpuArrayBuffer::Storage((_, buffer)) => {
let index = buffer.len() as u32;
buffer.push(value);
GpuArrayBufferIndex {
index,
dynamic_offset: None,
element_type: PhantomData,
}
}
}
}

pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
match self {
GpuArrayBuffer::Uniform(buffer) => buffer.write_buffer(device, queue),
GpuArrayBuffer::Storage((buffer, vec)) => {
buffer.set(mem::take(vec));
buffer.write_buffer(device, queue);
}
}
}

pub fn binding_layout(
binding: u32,
visibility: ShaderStages,
device: &RenderDevice,
) -> BindGroupLayoutEntry {
BindGroupLayoutEntry {
binding,
visibility,
ty: if device.limits().max_storage_buffers_per_shader_stage == 0 {
JMS55 marked this conversation as resolved.
Show resolved Hide resolved
BindingType::Buffer {
ty: BufferBindingType::Uniform,
has_dynamic_offset: true,
// BatchedUniformBuffer uses a MaxCapacityArray that is runtime-sized, so we use
// None here and let wgpu figure out the size.
min_binding_size: None,
}
} else {
BindingType::Buffer {
ty: BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: Some(T::min_size()),
}
},
count: None,
}
}

pub fn binding(&self) -> Option<BindingResource> {
match self {
GpuArrayBuffer::Uniform(buffer) => buffer.binding(),
GpuArrayBuffer::Storage((buffer, _)) => buffer.binding(),
}
}

pub fn batch_size(device: &RenderDevice) -> Option<u32> {
let limits = device.limits();
if limits.max_storage_buffers_per_shader_stage == 0 {
Some(BatchedUniformBuffer::<T>::batch_size(&limits) as u32)
} else {
None
}
}
}

/// An index into a [`GpuArrayBuffer`] for a given element.
#[derive(Component)]
pub struct GpuArrayBufferIndex<T: GpuArrayBufferable> {
/// The index to use in a shader into the array.
pub index: u32,
/// The dynamic offset to use when setting the bind group in a pass.
/// Only used on platforms that don't support storage buffers.
pub dynamic_offset: Option<u32>,
pub element_type: PhantomData<T>,
}
3 changes: 3 additions & 0 deletions crates/bevy_render/src/render_resource/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
mod batched_uniform_buffer;
mod bind_group;
mod bind_group_layout;
mod buffer;
mod buffer_vec;
mod gpu_array_buffer;
mod pipeline;
mod pipeline_cache;
mod pipeline_specializer;
Expand All @@ -15,6 +17,7 @@ pub use bind_group::*;
pub use bind_group_layout::*;
pub use buffer::*;
pub use buffer_vec::*;
pub use gpu_array_buffer::*;
pub use pipeline::*;
pub use pipeline_cache::*;
pub use pipeline_specializer::*;
Expand Down
2 changes: 2 additions & 0 deletions crates/bevy_render/src/render_resource/storage_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferUsa
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
///
Expand Down Expand Up @@ -154,6 +155,7 @@ impl<T: ShaderType + WriteInto> StorageBuffer<T> {
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
/// * [`BufferVec`](crate::render_resource::BufferVec)
/// * [`Texture`](crate::render_resource::Texture)
///
Expand Down
Loading