Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow "auto" layout args for the create_compute_pipeline #423

Merged
merged 19 commits into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 170 additions & 0 deletions tests/test_util_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,176 @@ def test_compute_indirect():
assert out2[-2:] == [-1, -1]


def test_compute_default_layout1():
compute_shader = """
@group(0)
@binding(0)
var<storage,read> data1: array<i32>;

@group(0)
@binding(1)
var<storage,read_write> data2: array<i32>;

@compute
@workgroup_size(1)
fn main(@builtin(global_invocation_id) index: vec3<u32>) {
let i = i32(index.x);
data2[i] = data1[i] + 1;
}
"""

# Create an array of 100 random int32
n = 100
in1 = [int(random.uniform(0, 100)) for i in range(n)]
in1 = (c_int32 * n)(*in1)

# Create device and shader object
device = wgpu.utils.get_default_device()
cshader = device.create_shader_module(code=compute_shader)

# Create input buffer and upload data to in
buffer1 = device.create_buffer_with_data(data=in1, usage=wgpu.BufferUsage.STORAGE)

# Create output buffer
buffer2 = device.create_buffer(
size=ctypes.sizeof(in1),
usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_SRC,
)

# Create buffer to hold the dispatch parameters for the indirect call
params = (ctypes.c_int32 * 3)(n - 2, 1, 1) # note the minus 2!
buffer3 = device.create_buffer_with_data(
data=params,
usage=wgpu.BufferUsage.INDIRECT,
)

# Setup bindings info
bindings = [
{
"binding": 0,
"resource": {"buffer": buffer1, "offset": 0, "size": buffer1.size},
},
{
"binding": 1,
"resource": {"buffer": buffer2, "offset": 0, "size": buffer2.size},
},
]

# Create a pipeline using "auto" layout mode
compute_pipeline = device.create_compute_pipeline(
layout=wgpu.enums.AutoLayoutMode.auto,
compute={"module": cshader, "entry_point": "main"},
)
bind_group_layout = compute_pipeline.get_bind_group_layout(0)
bind_group = device.create_bind_group(layout=bind_group_layout, entries=bindings)

# Run the pipeline
command_encoder = device.create_command_encoder()
compute_pass = command_encoder.begin_compute_pass()
compute_pass.set_pipeline(compute_pipeline)
compute_pass.set_bind_group(0, bind_group, [], 0, 999999) # last 2 args not used
compute_pass.dispatch_workgroups_indirect(buffer3, 0)
compute_pass.end()
device.queue.submit([command_encoder.finish()])

# Read result
out1 = in1.__class__.from_buffer(device.queue.read_buffer(buffer2))
in2 = list(in1)[:]
out2 = [i - 1 for i in out1]
# The shader was applied to all but the last two elements
assert in2[:-2] == out2[:-2]
assert out2[-2:] == [-1, -1]


def test_compute_default_layout2():
# Default layout with multiple bind groups

compute_shader = """
@group(0)
@binding(0)
var<storage,read> data1: array<i32>;

@group(1)
@binding(0)
var<storage,read_write> data2: array<i32>;

@compute
@workgroup_size(1)
fn main(@builtin(global_invocation_id) index: vec3<u32>) {
let i = i32(index.x);
data2[i] = data1[i] + 1;
}
"""

# Create an array of 100 random int32
n = 100
in1 = [int(random.uniform(0, 100)) for i in range(n)]
in1 = (c_int32 * n)(*in1)

# Create device and shader object
device = wgpu.utils.get_default_device()
cshader = device.create_shader_module(code=compute_shader)

# Create input buffer and upload data to in
buffer1 = device.create_buffer_with_data(data=in1, usage=wgpu.BufferUsage.STORAGE)

# Create output buffer
buffer2 = device.create_buffer(
size=ctypes.sizeof(in1),
usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_SRC,
)

# Create buffer to hold the dispatch parameters for the indirect call
params = (ctypes.c_int32 * 3)(n - 2, 1, 1) # note the minus 2!
buffer3 = device.create_buffer_with_data(
data=params,
usage=wgpu.BufferUsage.INDIRECT,
)

# Setup bindings info
bindings0 = [
{
"binding": 0,
"resource": {"buffer": buffer1, "offset": 0, "size": buffer1.size},
},
]
bindings1 = [
{
"binding": 0,
"resource": {"buffer": buffer2, "offset": 0, "size": buffer2.size},
},
]

# Create a pipeline using "auto" layout mode
compute_pipeline = device.create_compute_pipeline(
layout=wgpu.enums.AutoLayoutMode.auto,
compute={"module": cshader, "entry_point": "main"},
)
bind_group_layout0 = compute_pipeline.get_bind_group_layout(0)
bind_group0 = device.create_bind_group(layout=bind_group_layout0, entries=bindings0)

bind_group_layout1 = compute_pipeline.get_bind_group_layout(1)
bind_group1 = device.create_bind_group(layout=bind_group_layout1, entries=bindings1)

# Run the pipeline
command_encoder = device.create_command_encoder()
compute_pass = command_encoder.begin_compute_pass()
compute_pass.set_pipeline(compute_pipeline)
compute_pass.set_bind_group(0, bind_group0, [], 0, 999999)
compute_pass.set_bind_group(1, bind_group1, [], 0, 999999)
compute_pass.dispatch_workgroups_indirect(buffer3, 0)
compute_pass.end()
device.queue.submit([command_encoder.finish()])

# Read result
out1 = in1.__class__.from_buffer(device.queue.read_buffer(buffer2))
in2 = list(in1)[:]
out2 = [i - 1 for i in out1]
# The shader was applied to all but the last two elements
assert in2[:-2] == out2[:-2]
assert out2[-2:] == [-1, -1]


def test_compute_fails():
compute_shader = """
@group(0)
Expand Down
1 change: 0 additions & 1 deletion tests/test_wgpu_native_compute_tex.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,6 @@ def _compute_texture(compute_shader, texture_format, texture_dim, texture_size,
layout=pipeline_layout,
compute={"module": cshader, "entry_point": "main"},
)
assert compute_pipeline.get_bind_group_layout(0) is bind_group_layout
almarklein marked this conversation as resolved.
Show resolved Hide resolved
command_encoder = device.create_command_encoder()

if False: # Upload via alt route (that does not have 256 alignment constraint)
Expand Down
5 changes: 2 additions & 3 deletions wgpu/_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1330,14 +1330,13 @@ def get_compilation_info(self):
class GPUPipelineBase:
"""A mixin class for render and compute pipelines."""

def __init__(self, label, internal, device, layout):
def __init__(self, label, internal, device):
super().__init__(label, internal, device)
self._layout = layout

# IDL: [NewObject] GPUBindGroupLayout getBindGroupLayout(unsigned long index);
def get_bind_group_layout(self, index):
"""Get the bind group layout at the given index."""
return self._layout._layouts[index]
raise NotImplementedError()


class GPUComputePipeline(GPUPipelineBase, GPUObjectBase):
Expand Down
26 changes: 20 additions & 6 deletions wgpu/backends/wgpu_native/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1167,19 +1167,26 @@ def create_compute_pipeline(
# not used: constants
)

if isinstance(layout, GPUPipelineLayout):
layout_id = layout._internal
elif layout == enums.AutoLayoutMode.auto:
layout_id = ffi.NULL
else:
raise TypeError(
"create_compute_pipeline() 'layout' arg must be a GPUPipelineLayout or 'auto'"
)

# H: nextInChain: WGPUChainedStruct *, label: char *, layout: WGPUPipelineLayout, compute: WGPUProgrammableStageDescriptor
struct = new_struct_p(
"WGPUComputePipelineDescriptor *",
label=to_c_label(label),
layout=layout._internal,
layout=layout_id,
compute=c_compute_stage,
# not used: nextInChain
# not used: compute
)

# H: WGPUComputePipeline f(WGPUDevice device, WGPUComputePipelineDescriptor const * descriptor)
id = libf.wgpuDeviceCreateComputePipeline(self._internal, struct)
return GPUComputePipeline(label, id, self, layout)
return GPUComputePipeline(label, id, self)

async def create_compute_pipeline_async(
self,
Expand Down Expand Up @@ -1381,7 +1388,7 @@ def create_render_pipeline(

# H: WGPURenderPipeline f(WGPUDevice device, WGPURenderPipelineDescriptor const * descriptor)
id = libf.wgpuDeviceCreateRenderPipeline(self._internal, struct)
return GPURenderPipeline(label, id, self, layout)
return GPURenderPipeline(label, id, self)

async def create_render_pipeline_async(
self,
Expand Down Expand Up @@ -1784,7 +1791,14 @@ def _destroy(self):


class GPUPipelineBase(classes.GPUPipelineBase):
pass
def get_bind_group_layout(self, index):
"""Get the bind group layout at the given index.

Note that current wgpu-native aborts immediately if the index is out of range.
"""
# H: WGPUBindGroupLayout f(WGPUComputePipeline computePipeline, uint32_t groupIndex)
layout_id = libf.wgpuComputePipelineGetBindGroupLayout(self._internal, index)
almarklein marked this conversation as resolved.
Show resolved Hide resolved
return GPUBindGroupLayout("", layout_id, self._device, [])


class GPUComputePipeline(classes.GPUComputePipeline, GPUPipelineBase, GPUObjectBase):
Expand Down
6 changes: 3 additions & 3 deletions wgpu/resources/codegen_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Diffs for GPUQueue: add read_buffer, add read_texture, hide copy_external_image_to_texture
* Validated 37 classes, 111 methods, 43 properties
### Patching API for backends/wgpu_native/_api.py
* Validated 37 classes, 100 methods, 0 properties
* Validated 37 classes, 101 methods, 0 properties
## Validating backends/wgpu_native/_api.py
* Enum field TextureFormat.rgb10a2uint missing in wgpu.h
* Enum field StorageTextureAccess.read-only missing in wgpu.h
Expand All @@ -29,6 +29,6 @@
* Enum CanvasAlphaMode missing in wgpu.h
* Enum field DeviceLostReason.unknown missing in wgpu.h
* Wrote 232 enum mappings and 47 struct-field mappings to wgpu_native/_mappings.py
* Validated 89 C function calls
* Not using 113 C functions
* Validated 90 C function calls
* Not using 112 C functions
* Validated 71 C structs
Loading