ensure safety of indirect dispatch

by injecting a compute shader that validates the content of the indirect buffer also adds missing indirect buffer offset validation
gfx-rs · Jun 11, 2024 · abeb863 · abeb863
1 parent be4eabc
commit abeb863
Show file tree

Hide file tree

Showing 20 changed files with 861 additions and 21 deletions.
diff --git a/deno_webgpu/binding.rs b/deno_webgpu/binding.rs
@@ -224,6 +224,7 @@ pub fn op_webgpu_create_pipeline_layout(
  label: Some(label),
  bind_group_layouts: Cow::from(bind_group_layouts),
  push_constant_ranges: Default::default(),
+ ignore_push_constant_check: false,
  };
 
  gfx_put!(device => instance.device_create_pipeline_layout(
@@ -288,6 +289,7 @@ pub fn op_webgpu_create_bind_group(
  buffer_id: buffer_resource.1,
  offset: entry.offset.unwrap_or(0),
  size: std::num::NonZeroU64::new(entry.size.unwrap_or(0)),
+ allow_indirect_as_storage: false,
  },
  )
  }

diff --git a/deno_webgpu/shader.rs b/deno_webgpu/shader.rs
@@ -43,6 +43,7 @@ pub fn op_webgpu_create_shader_module(
  let descriptor = wgpu_core::pipeline::ShaderModuleDescriptor {
  label: Some(label),
  shader_bound_checks: wgpu_types::ShaderBoundChecks::default(),
+ ignore_push_constant_check: false,
  };
 
  gfx_put!(device => instance.device_create_shader_module(

diff --git a/tests/tests/dispatch_workgroups_indirect.rs b/tests/tests/dispatch_workgroups_indirect.rs
@@ -0,0 +1,197 @@
+use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// Make sure that the num_workgroups builtin works properly (it requires a workaround on D3D12).
+#[gpu_test]
+static NUM_WORKGROUPS_BUILTIN: GpuTestConfiguration = GpuTestConfiguration::new()
+ .parameters(
+ TestParameters::default()
+ .downlevel_flags(
+ wgpu::DownlevelFlags::COMPUTE_SHADERS | wgpu::DownlevelFlags::INDIRECT_EXECUTION,
+ )
+ .limits(wgpu::Limits::downlevel_defaults())
+ .expect_fail(FailureCase::backend(wgt::Backends::DX12)),
+ )
+ .run_async(|ctx| async move {
+ let num_workgroups = [1, 2, 3];
+ let res = run_test(&ctx, &num_workgroups, false).await;
+ assert_eq!(res, num_workgroups);
+ });
+
+/// Make sure that we discard (don't run) the dispatch if its size exceeds the device limit.
+#[gpu_test]
+static DISCARD_DISPATCH: GpuTestConfiguration = GpuTestConfiguration::new()
+ .parameters(
+ TestParameters::default()
+ .downlevel_flags(
+ wgpu::DownlevelFlags::COMPUTE_SHADERS | wgpu::DownlevelFlags::INDIRECT_EXECUTION,
+ )
+ .limits(wgpu::Limits {
+ max_compute_workgroups_per_dimension: 10,
+ ..wgpu::Limits::downlevel_defaults()
+ }),
+ )
+ .run_async(|ctx| async move {
+ let max = ctx.device.limits().max_compute_workgroups_per_dimension;
+
+ let res = run_test(&ctx, &[max, max, max], false).await;
+ assert_eq!(res, [max; 3]);
+
+ let res = run_test(&ctx, &[max + 1, 1, 1], false).await;
+ assert_eq!(res, [0; 3]);
+
+ let res = run_test(&ctx, &[1, max + 1, 1], false).await;
+ assert_eq!(res, [0; 3]);
+
+ let res = run_test(&ctx, &[1, 1, max + 1], false).await;
+ assert_eq!(res, [0; 3]);
+ });
+
+/// Make sure that unsetting the bind group set by the validation code works properly.
+#[gpu_test]
+static UNSET_INTERNAL_BIND_GROUP: GpuTestConfiguration = GpuTestConfiguration::new()
+ .parameters(
+ TestParameters::default()
+ .downlevel_flags(
+ wgpu::DownlevelFlags::COMPUTE_SHADERS | wgpu::DownlevelFlags::INDIRECT_EXECUTION,
+ )
+ .limits(wgpu::Limits::downlevel_defaults()),
+ )
+ .run_async(|ctx| async move {
+ ctx.device.push_error_scope(wgpu::ErrorFilter::Validation);
+
+ let _ = run_test(&ctx, &[0, 0, 0], true).await;
+
+ let error = pollster::block_on(ctx.device.pop_error_scope());
+ assert!(error.map_or(false, |error| format!("{error}")
+ .contains("Expected bind group is missing")));
+ });
+
+async fn run_test(
+ ctx: &TestingContext,
+ num_workgroups: &[u32; 3],
+ forget_to_set_bind_group: bool,
+) -> [u32; 3] {
+ const SHADER_SRC: &str = "
+ @group(0) @binding(0)
+ var<storage, read_write> out: array<u32, 3>;
+
+ @compute @workgroup_size(1)
+ fn main(@builtin(num_workgroups) num_workgroups: vec3<u32>, @builtin(workgroup_id) workgroup_id: vec3<u32>) {
+ if (all(workgroup_id == vec3<u32>())) {
+ out[0] = num_workgroups.x;
+ out[1] = num_workgroups.y;
+ out[2] = num_workgroups.z;
+ }
+ }
+ ";
+
+ let module = ctx
+ .device
+ .create_shader_module(wgpu::ShaderModuleDescriptor {
+ label: None,
+ source: wgpu::ShaderSource::Wgsl(SHADER_SRC.into()),
+ });
+
+ let pipeline = ctx
+ .device
+ .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+ label: None,
+ layout: None,
+ module: &module,
+ entry_point: "main",
+ compilation_options: Default::default(),
+ cache: None,
+ });
+
+ let out_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+ label: None,
+ size: 12,
+ usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
+ mapped_at_creation: false,
+ });
+
+ let readback_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+ label: None,
+ size: 12,
+ usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+ mapped_at_creation: false,
+ });
+
+ let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+ label: None,
+ layout: &pipeline.get_bind_group_layout(0),
+ entries: &[wgpu::BindGroupEntry {
+ binding: 0,
+ resource: out_buffer.as_entire_binding(),
+ }],
+ });
+
+ let mut res = None;
+
+ for (indirect_offset, indirect_buffer_size) in [
+ // internal src buffer binding size will be buffer.size
+ (0, 12),
+ (4, 4 + 12),
+ (4, 8 + 12),
+ (256 * 2 - 4 - 12, 256 * 2 - 4),
+ // internal src buffer binding size will be 256 * 2 + x
+ (0, 256 * 2 * 2 + 4),
+ (256, 256 * 2 * 2 + 8),
+ (256 + 4, 256 * 2 * 2 + 12),
+ (256 * 2 + 16, 256 * 2 * 2 + 16),
+ (256 * 2 * 2, 256 * 2 * 2 + 32),
+ (256 + 12, 256 * 2 * 2 + 64),
+ ] {
+ let indirect_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+ label: None,
+ size: indirect_buffer_size,
+ usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::INDIRECT,
+ mapped_at_creation: false,
+ });
+
+ ctx.queue.write_buffer(
+ &indirect_buffer,
+ indirect_offset,
+ bytemuck::bytes_of(num_workgroups),
+ );
+
+ let mut encoder = ctx
+ .device
+ .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+ {
+ let mut compute_pass =
+ encoder.begin_compute_pass(&wgpu::ComputePassDescriptor::default());
+ compute_pass.set_pipeline(&pipeline);
+ if !forget_to_set_bind_group {
+ compute_pass.set_bind_group(0, &bind_group, &[]);
+ }
+ compute_pass.dispatch_workgroups_indirect(&indirect_buffer, indirect_offset);
+ }
+
+ encoder.copy_buffer_to_buffer(&out_buffer, 0, &readback_buffer, 0, 12);
+
+ ctx.queue.submit(Some(encoder.finish()));
+
+ readback_buffer
+ .slice(..)
+ .map_async(wgpu::MapMode::Read, |_| {});
+
+ ctx.async_poll(wgpu::Maintain::wait())
+ .await
+ .panic_on_timeout();
+
+ let view = readback_buffer.slice(..).get_mapped_range();
+
+ let current_res = *bytemuck::from_bytes(&view);
+ drop(view);
+ readback_buffer.unmap();
+
+ if let Some(past_res) = res {
+ assert_eq!(past_res, current_res);
+ } else {
+ res = Some(current_res);
+ }
+ }
+
+ res.unwrap()
+}
diff --git a/tests/tests/root.rs b/tests/tests/root.rs
@@ -14,6 +14,7 @@ mod clear_texture;
 mod compute_pass_ownership;
 mod create_surface_error;
 mod device;
+mod dispatch_workgroups_indirect;
 mod encoder;
 mod external_texture;
 mod float32_filterable;

diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
@@ -46,6 +46,10 @@ renderdoc = ["hal/renderdoc"]
 ## to the validation carried out at public APIs in all builds.
 strict_asserts = ["wgt/strict_asserts"]
 
+## Validates indirect draw/dispatch calls. This will also enable naga's
+## WGSL frontend since we use a WGSL compute shader to do the validation.
+indirect-validation = ["naga/wgsl-in"]
+
 ## Enables serialization via `serde` on common wgpu types.
 serde = ["dep:serde", "wgt/serde", "arrayvec/serde"]
 

diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
@@ -617,6 +617,10 @@ pub struct PipelineLayoutDescriptor<'a> {
  /// [`Features::PUSH_CONSTANTS`](wgt::Features::PUSH_CONSTANTS) feature must
  /// be enabled.
  pub push_constant_ranges: Cow<'a, [wgt::PushConstantRange]>,
+ /// This is an internal flag used by indirect validation.
+ /// It allows usage of push constants without having the
+ /// [`Features::PUSH_CONSTANTS`](wgt::Features::PUSH_CONSTANTS) feature enabled.
+ pub ignore_push_constant_check: bool,
 }
 
 #[derive(Debug)]
@@ -758,6 +762,9 @@ pub struct BufferBinding {
  pub buffer_id: BufferId,
  pub offset: wgt::BufferAddress,
  pub size: Option<wgt::BufferSize>,
+ /// This is an internal flag used by indirect validation.
+ /// It allows indirect buffers to be bound as storage buffers.
+ pub allow_indirect_as_storage: bool,
 }
 
 // Note: Duplicated in `wgpu-rs` as `BindingResource`

diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
@@ -131,7 +131,7 @@ mod compat {
  diff.push(format!("Expected {expected_bgl_type} bind group layout, got {assigned_bgl_type}"))
  }
  } else {
- diff.push("Assigned bind group layout not found (internal error)".to_owned());
+ diff.push("Expected bind group is missing".to_owned());
  }
  } else {
  diff.push("Expected bind group layout not found (internal error)".to_owned());
@@ -191,6 +191,10 @@ mod compat {
  self.make_range(index)
  }
 
+ pub fn unassign(&mut self, index: usize) {
+ self.entries[index].assigned = None;
+ }
+
  pub fn list_active(&self) -> impl Iterator<Item = usize> + '_ {
  self.entries
  .iter()
@@ -358,6 +362,14 @@ impl<A: HalApi> Binder<A> {
  &self.payloads[bind_range]
  }
 
+ pub(super) fn unassign_group(&mut self, index: usize) {
+ log::trace!("\tBinding [{}] = null", index);
+
+ self.payloads[index].reset();
+
+ self.manager.unassign(index);
+ }
+
  pub(super) fn list_active<'a>(&'a self) -> impl Iterator<Item = &'a Arc<BindGroup<A>>> + '_ {
  let payloads = &self.payloads;
  self.manager

diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
@@ -182,6 +182,8 @@ pub enum ComputePassErrorInner {
  InvalidQuerySet(id::QuerySetId),
  #[error("Indirect buffer {0:?} is invalid or destroyed")]
  InvalidIndirectBuffer(id::BufferId),
+ #[error("Indirect buffer offset {0:?} is not a multiple of 4")]
+ UnalignedIndirectBufferOffset(BufferAddress),
  #[error("Indirect buffer uses bytes {offset}..{end_offset} which overruns indirect buffer of size {buffer_size}")]
  IndirectBufferOverrun {
  offset: u64,
@@ -473,6 +475,16 @@ impl Global {
  .map_pass_err(pass_scope);
  }
 
+ #[cfg(feature = "indirect-validation")]
+ let mut base = base;
+ #[cfg(feature = "indirect-validation")]
+ device
+ .indirect_validation
+ .get()
+ .unwrap()
+ .inject_dispatch_indirect_validation(device, &mut base)
+ .map_pass_err(pass_scope)?;
+
  let mut cmd_buf_data = cmd_buf.data.lock();
  let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
 
@@ -654,6 +666,20 @@ impl Global {
  }
  }
  }
+ ArcComputeCommand::UnsetBindGroup { index } => {
+ let scope = PassErrorScope::UnsetBindGroup(index);
+
+ let max_bind_groups = cmd_buf.limits.max_bind_groups;
+ if index >= max_bind_groups {
+ return Err(ComputePassErrorInner::BindGroupIndexOutOfRange {
+ index,
+ max: max_bind_groups,
+ })
+ .map_pass_err(scope);
+ }
+
+ state.binder.unassign_group(index as usize);
+ }
  ArcComputeCommand::SetPipeline(pipeline) => {
  let pipeline_id = pipeline.as_info().id();
  let scope = PassErrorScope::SetPipelineCompute(pipeline_id);
@@ -811,6 +837,11 @@ impl Global {
  check_buffer_usage(buffer_id, buffer.usage, wgt::BufferUsages::INDIRECT)
  .map_pass_err(scope)?;
 
+ if offset % 4 != 0 {
+ return Err(ComputePassErrorInner::UnalignedIndirectBufferOffset(offset))
+ .map_pass_err(scope);
+ }
+
  let end_offset = offset + mem::size_of::<wgt::DispatchIndirectArgs>() as u64;
  if end_offset > buffer.size {
  return Err(ComputePassErrorInner::IndirectBufferOverrun {

diff --git a/wgpu-core/src/command/compute_command.rs b/wgpu-core/src/command/compute_command.rs
@@ -19,6 +19,10 @@ pub enum ComputeCommand {
  bind_group_id: id::BindGroupId,
  },
 
+ UnsetBindGroup {
+ index: u32,
+ },
+
  SetPipeline(id::ComputePipelineId),
 
  /// Set a range of push constants to values stored in `push_constant_data`.
@@ -103,6 +107,10 @@ impl ComputeCommand {
  })?,
  },
 
+ ComputeCommand::UnsetBindGroup { index } => {
+ ArcComputeCommand::UnsetBindGroup { index }
+ }
+
  ComputeCommand::SetPipeline(pipeline_id) => ArcComputeCommand::SetPipeline(
  pipelines_guard
  .get_owned(pipeline_id)
@@ -194,6 +202,10 @@ pub enum ArcComputeCommand<A: HalApi> {
  bind_group: Arc<BindGroup<A>>,
  },
 
+ UnsetBindGroup {
+ index: u32,
+ },
+
  SetPipeline(Arc<ComputePipeline<A>>),
 
  /// Set a range of push constants to values stored in `push_constant_data`.
@@ -261,6 +273,10 @@ impl<A: HalApi> From<&ArcComputeCommand<A>> for ComputeCommand {
  bind_group_id: bind_group.as_info().id(),
  },
 
+ ArcComputeCommand::UnsetBindGroup { index } => {
+ ComputeCommand::UnsetBindGroup { index: *index }
+ }
+
  ArcComputeCommand::SetPipeline(pipeline) => {
  ComputeCommand::SetPipeline(pipeline.as_info().id())
  }