Skip to content

Commit

Permalink
GPU: Simultaneous compute pass read-write
Browse files Browse the repository at this point in the history
  • Loading branch information
thatcosmonaut committed Sep 27, 2024
1 parent 5ff6e8d commit cc51095
Show file tree
Hide file tree
Showing 7 changed files with 186 additions and 149 deletions.
59 changes: 31 additions & 28 deletions include/SDL3/SDL_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,18 +464,19 @@ typedef enum SDL_GPUTextureFormat
* A texture must have at least one usage flag. Note that some usage flag
* combinations are invalid.
*
* \since This enum is available since SDL 3.0.0
* \since This datatype is available since SDL 3.0.0
*
* \sa SDL_CreateGPUTexture
*/
typedef Uint32 SDL_GPUTextureUsageFlags;

#define SDL_GPU_TEXTUREUSAGE_SAMPLER (1u << 0) /**< Texture supports sampling. */
#define SDL_GPU_TEXTUREUSAGE_COLOR_TARGET (1u << 1) /**< Texture is a color render target. */
#define SDL_GPU_TEXTUREUSAGE_DEPTH_STENCIL_TARGET (1u << 2) /**< Texture is a depth stencil target. */
#define SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ (1u << 3) /**< Texture supports storage reads in graphics stages. */
#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ (1u << 4) /**< Texture supports storage reads in the compute stage. */
#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE (1u << 5) /**< Texture supports storage writes in the compute stage. */
#define SDL_GPU_TEXTUREUSAGE_SAMPLER (1u << 0) /**< Texture supports sampling. */
#define SDL_GPU_TEXTUREUSAGE_COLOR_TARGET (1u << 1) /**< Texture is a color render target. */
#define SDL_GPU_TEXTUREUSAGE_DEPTH_STENCIL_TARGET (1u << 2) /**< Texture is a depth stencil target. */
#define SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ (1u << 3) /**< Texture supports storage reads in graphics stages. */
#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ (1u << 4) /**< Texture supports storage reads in the compute stage. */
#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE (1u << 5) /**< Texture supports storage writes in the compute stage. */
#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE (1u << 6) /**< Texture supports reads and writes in the same compute shader. This is NOT equivalent to READ | WRITE. */

/**
* Specifies the type of a texture.
Expand Down Expand Up @@ -536,18 +537,18 @@ typedef enum SDL_GPUCubeMapFace
* A buffer must have at least one usage flag. Note that some usage flag
* combinations are invalid.
*
* \since This enum is available since SDL 3.0.0
* \since This datatype is available since SDL 3.0.0
*
* \sa SDL_CreateGPUBuffer
*/
typedef Uint32 SDL_GPUBufferUsageFlags;

#define SDL_GPU_BUFFERUSAGE_VERTEX (1u << 0) /**< Buffer is a vertex buffer. */
#define SDL_GPU_BUFFERUSAGE_INDEX (1u << 1) /**< Buffer is an index buffer. */
#define SDL_GPU_BUFFERUSAGE_INDIRECT (1u << 2) /**< Buffer is an indirect buffer. */
#define SDL_GPU_BUFFERUSAGE_GRAPHICS_STORAGE_READ (1u << 3) /**< Buffer supports storage reads in graphics stages. */
#define SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_READ (1u << 4) /**< Buffer supports storage reads in the compute stage. */
#define SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_WRITE (1u << 5) /**< Buffer supports storage writes in the compute stage. */
#define SDL_GPU_BUFFERUSAGE_VERTEX (1u << 0) /**< Buffer is a vertex buffer. */
#define SDL_GPU_BUFFERUSAGE_INDEX (1u << 1) /**< Buffer is an index buffer. */
#define SDL_GPU_BUFFERUSAGE_INDIRECT (1u << 2) /**< Buffer is an indirect buffer. */
#define SDL_GPU_BUFFERUSAGE_GRAPHICS_STORAGE_READ (1u << 3) /**< Buffer supports storage reads in graphics stages. */
#define SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_READ (1u << 4) /**< Buffer supports storage reads in the compute stage. */
#define SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_WRITE (1u << 5) /**< Buffer supports storage writes in the compute stage. */

/**
* Specifies how a transfer buffer is intended to be used by the client.
Expand Down Expand Up @@ -811,7 +812,7 @@ typedef enum SDL_GPUBlendFactor
/**
* Specifies which color components are written in a graphics pipeline.
*
* \since This enum is available since SDL 3.0.0
* \since This datatype is available since SDL 3.0.0
*
* \sa SDL_CreateGPUGraphicsPipeline
*/
Expand Down Expand Up @@ -1489,8 +1490,8 @@ typedef struct SDL_GPUComputePipelineCreateInfo
Uint32 num_samplers; /**< The number of samplers defined in the shader. */
Uint32 num_readonly_storage_textures; /**< The number of readonly storage textures defined in the shader. */
Uint32 num_readonly_storage_buffers; /**< The number of readonly storage buffers defined in the shader. */
Uint32 num_writeonly_storage_textures; /**< The number of writeonly storage textures defined in the shader. */
Uint32 num_writeonly_storage_buffers; /**< The number of writeonly storage buffers defined in the shader. */
Uint32 num_readwrite_storage_textures; /**< The number of read-write storage textures defined in the shader. */
Uint32 num_readwrite_storage_buffers; /**< The number of read-write storage buffers defined in the shader. */
Uint32 num_uniform_buffers; /**< The number of uniform buffers defined in the shader. */
Uint32 threadcount_x; /**< The number of threads in the X dimension. This should match the value in the shader. */
Uint32 threadcount_y; /**< The number of threads in the Y dimension. This should match the value in the shader. */
Expand Down Expand Up @@ -1667,14 +1668,14 @@ typedef struct SDL_GPUTextureSamplerBinding
*
* \sa SDL_BeginGPUComputePass
*/
typedef struct SDL_GPUStorageBufferWriteOnlyBinding
typedef struct SDL_GPUStorageBufferReadWriteBinding
{
SDL_GPUBuffer *buffer; /**< The buffer to bind. Must have been created with SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_WRITE. */
bool cycle; /**< true cycles the buffer if it is already bound. */
Uint8 padding1;
Uint8 padding2;
Uint8 padding3;
} SDL_GPUStorageBufferWriteOnlyBinding;
} SDL_GPUStorageBufferReadWriteBinding;

/**
* A structure specifying parameters related to binding textures in a compute
Expand All @@ -1684,7 +1685,7 @@ typedef struct SDL_GPUStorageBufferWriteOnlyBinding
*
* \sa SDL_BeginGPUComputePass
*/
typedef struct SDL_GPUStorageTextureWriteOnlyBinding
typedef struct SDL_GPUStorageTextureReadWriteBinding
{
SDL_GPUTexture *texture; /**< The texture to bind. Must have been created with SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE. */
Uint32 mip_level; /**< The mip level index to bind. */
Expand All @@ -1693,7 +1694,7 @@ typedef struct SDL_GPUStorageTextureWriteOnlyBinding
Uint8 padding1;
Uint8 padding2;
Uint8 padding3;
} SDL_GPUStorageTextureWriteOnlyBinding;
} SDL_GPUStorageTextureReadWriteBinding;

/* Functions */

Expand Down Expand Up @@ -2807,15 +2808,17 @@ extern SDL_DECLSPEC void SDLCALL SDL_EndGPURenderPass(
* Begins a compute pass on a command buffer.
*
* A compute pass is defined by a set of texture subresources and buffers that
* will be written to by compute pipelines. These textures and buffers must
* have been created with the COMPUTE_STORAGE_WRITE bit. All operations
* may be written to by compute pipelines. These textures and buffers must
* have been created with the COMPUTE_STORAGE_WRITE bit or the COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE bit. All operations
* related to compute pipelines must take place inside of a compute pass. You
* must not begin another compute pass, or a render pass or copy pass before
* ending the compute pass.
*
* A VERY IMPORTANT NOTE Textures and buffers bound as write-only MUST NOT be
* read from during the compute pass. Doing so will result in undefined
* behavior. If your compute work requires reading the output from a previous
* A VERY IMPORTANT NOTE - Reads and writes in compute shaders are NOT implicitly synchronized.
* This means you may cause data races by both reading and writing a resource in a compute pass.
* Reading and writing a texture in the same compute shader is only supported by specific texture formats.
* Make sure you check the format support!
* If your compute work requires reading the completed output from a previous
* dispatch, you MUST end the current compute pass and begin a new one before
* you can safely access the data.
*
Expand All @@ -2836,9 +2839,9 @@ extern SDL_DECLSPEC void SDLCALL SDL_EndGPURenderPass(
*/
extern SDL_DECLSPEC SDL_GPUComputePass *SDLCALL SDL_BeginGPUComputePass(
SDL_GPUCommandBuffer *command_buffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storage_texture_bindings,
const SDL_GPUStorageTextureReadWriteBinding *storage_texture_bindings,
Uint32 num_storage_texture_bindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storage_buffer_bindings,
const SDL_GPUStorageBufferReadWriteBinding *storage_buffer_bindings,
Uint32 num_storage_buffer_bindings);

/**
Expand Down
2 changes: 1 addition & 1 deletion src/dynapi/SDL_dynapi_procs.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ SDL_DYNAPI_PROC(SDL_TimerID,SDL_AddTimerNS,(Uint64 a, SDL_NSTimerCallback b, voi
SDL_DYNAPI_PROC(bool,SDL_AddVulkanRenderSemaphores,(SDL_Renderer *a, Uint32 b, Sint64 c, Sint64 d),(a,b,c,d),return)
SDL_DYNAPI_PROC(SDL_JoystickID,SDL_AttachVirtualJoystick,(const SDL_VirtualJoystickDesc *a),(a),return)
SDL_DYNAPI_PROC(bool,SDL_AudioDevicePaused,(SDL_AudioDeviceID a),(a),return)
SDL_DYNAPI_PROC(SDL_GPUComputePass*,SDL_BeginGPUComputePass,(SDL_GPUCommandBuffer *a, const SDL_GPUStorageTextureWriteOnlyBinding *b, Uint32 c, const SDL_GPUStorageBufferWriteOnlyBinding *d, Uint32 e),(a,b,c,d,e),return)
SDL_DYNAPI_PROC(SDL_GPUComputePass*,SDL_BeginGPUComputePass,(SDL_GPUCommandBuffer *a, const SDL_GPUStorageTextureReadWriteBinding *b, Uint32 c, const SDL_GPUStorageBufferReadWriteBinding *d, Uint32 e),(a,b,c,d,e),return)
SDL_DYNAPI_PROC(SDL_GPUCopyPass*,SDL_BeginGPUCopyPass,(SDL_GPUCommandBuffer *a),(a),return)
SDL_DYNAPI_PROC(SDL_GPURenderPass*,SDL_BeginGPURenderPass,(SDL_GPUCommandBuffer *a, const SDL_GPUColorTargetInfo *b, Uint32 c, const SDL_GPUDepthStencilTargetInfo *d),(a,b,c,d),return)
SDL_DYNAPI_PROC(bool,SDL_BindAudioStream,(SDL_AudioDeviceID a, SDL_AudioStream *b),(a,b),return)
Expand Down
18 changes: 14 additions & 4 deletions src/gpu/SDL_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -705,11 +705,11 @@ SDL_GPUComputePipeline *SDL_CreateGPUComputePipeline(
SDL_assert_release(!"Incompatible shader format for GPU backend");
return NULL;
}
if (createinfo->num_writeonly_storage_textures > MAX_COMPUTE_WRITE_TEXTURES) {
if (createinfo->num_readwrite_storage_textures > MAX_COMPUTE_WRITE_TEXTURES) {
SDL_assert_release(!"Compute pipeline write-only texture count cannot be higher than 8!");
return NULL;
}
if (createinfo->num_writeonly_storage_buffers > MAX_COMPUTE_WRITE_BUFFERS) {
if (createinfo->num_readwrite_storage_buffers > MAX_COMPUTE_WRITE_BUFFERS) {
SDL_assert_release(!"Compute pipeline write-only buffer count cannot be higher than 8!");
return NULL;
}
Expand Down Expand Up @@ -1868,9 +1868,9 @@ void SDL_EndGPURenderPass(

SDL_GPUComputePass *SDL_BeginGPUComputePass(
SDL_GPUCommandBuffer *command_buffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storage_texture_bindings,
const SDL_GPUStorageTextureReadWriteBinding *storage_texture_bindings,
Uint32 num_storage_texture_bindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storage_buffer_bindings,
const SDL_GPUStorageBufferReadWriteBinding *storage_buffer_bindings,
Uint32 num_storage_buffer_bindings)
{
CommandBufferCommonHeader *commandBufferHeader;
Expand Down Expand Up @@ -1898,6 +1898,16 @@ SDL_GPUComputePass *SDL_BeginGPUComputePass(
if (COMMAND_BUFFER_DEVICE->debug_mode) {
CHECK_COMMAND_BUFFER_RETURN_NULL
CHECK_ANY_PASS_IN_PROGRESS("Cannot begin compute pass during another pass!", NULL)

for (Uint32 i = 0; i < num_storage_texture_bindings; i += 1) {
TextureCommonHeader *header = (TextureCommonHeader *)storage_texture_bindings[i].texture;
if (!(header->info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) && !(header->info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE)) {
SDL_assert_release(!"Texture must be created with COMPUTE_STORAGE_WRITE or COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE flag");
return NULL;
}
}

// TODO: validate buffer usage?
}

COMMAND_BUFFER_DEVICE->BeginComputePass(
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/SDL_sysgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -503,9 +503,9 @@ struct SDL_GPUDevice

void (*BeginComputePass)(
SDL_GPUCommandBuffer *commandBuffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings,
Uint32 numStorageTextureBindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings,
Uint32 numStorageBufferBindings);

void (*BindComputePipeline)(
Expand Down
49 changes: 33 additions & 16 deletions src/gpu/d3d11/SDL_gpu_d3d11.c
Original file line number Diff line number Diff line change
Expand Up @@ -529,9 +529,9 @@ typedef struct D3D11ComputePipeline

Uint32 numSamplers;
Uint32 numReadonlyStorageTextures;
Uint32 numWriteonlyStorageTextures;
Uint32 numReadWriteStorageTextures;
Uint32 numReadonlyStorageBuffers;
Uint32 numWriteonlyStorageBuffers;
Uint32 numReadWriteStorageBuffers;
Uint32 numUniformBuffers;
} D3D11ComputePipeline;

Expand Down Expand Up @@ -687,8 +687,8 @@ typedef struct D3D11CommandBuffer
D3D11Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D11Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
D3D11Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
D3D11TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
D3D11Buffer *computeWriteOnlyStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
D3D11TextureSubresource *computeReadWriteStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
D3D11Buffer *computeReadWriteStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];

// Uniform buffers
D3D11UniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
Expand Down Expand Up @@ -1524,9 +1524,9 @@ static SDL_GPUComputePipeline *D3D11_CreateComputePipeline(
pipeline->computeShader = shader;
pipeline->numSamplers = createinfo->num_samplers;
pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
pipeline->numReadWriteStorageTextures = createinfo->num_readwrite_storage_textures;
pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
pipeline->numWriteonlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
pipeline->numReadWriteStorageBuffers = createinfo->num_readwrite_storage_buffers;
pipeline->numUniformBuffers = createinfo->num_uniform_buffers;
// thread counts are ignored in d3d11

Expand Down Expand Up @@ -3224,8 +3224,8 @@ static SDL_GPUCommandBuffer *D3D11_AcquireCommandBuffer(
SDL_zeroa(commandBuffer->computeSamplerTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageBuffers);
SDL_zeroa(commandBuffer->computeReadWriteStorageTextureSubresources);
SDL_zeroa(commandBuffer->computeReadWriteStorageBuffers);

bool acquireFenceResult = D3D11_INTERNAL_AcquireFence(commandBuffer);
commandBuffer->autoReleaseFence = 1;
Expand Down Expand Up @@ -4284,9 +4284,9 @@ static void D3D11_Blit(

static void D3D11_BeginComputePass(
SDL_GPUCommandBuffer *commandBuffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings,
Uint32 numStorageTextureBindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings,
Uint32 numStorageBufferBindings)
{
D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
Expand All @@ -4310,7 +4310,7 @@ static void D3D11_BeginComputePass(
d3d11CommandBuffer,
textureSubresource->parent);

d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i] = textureSubresource;
d3d11CommandBuffer->computeReadWriteStorageTextureSubresources[i] = textureSubresource;
}

for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
Expand All @@ -4325,15 +4325,15 @@ static void D3D11_BeginComputePass(
d3d11CommandBuffer,
buffer);

d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i] = buffer;
d3d11CommandBuffer->computeReadWriteStorageBuffers[i] = buffer;
}

for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) {
uavs[i] = d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uav;
uavs[i] = d3d11CommandBuffer->computeReadWriteStorageTextureSubresources[i]->uav;
}

for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
uavs[numStorageTextureBindings + i] = d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i]->uav;
uavs[numStorageTextureBindings + i] = d3d11CommandBuffer->computeReadWriteStorageBuffers[i]->uav;
}

ID3D11DeviceContext_CSSetUnorderedAccessViews(
Expand Down Expand Up @@ -4622,8 +4622,8 @@ static void D3D11_EndComputePass(
SDL_zeroa(d3d11CommandBuffer->computeSamplerTextures);
SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageBuffers);
SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources);
SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageBuffers);
SDL_zeroa(d3d11CommandBuffer->computeReadWriteStorageTextureSubresources);
SDL_zeroa(d3d11CommandBuffer->computeReadWriteStorageBuffers);
}

// Fence Cleanup
Expand Down Expand Up @@ -5763,6 +5763,7 @@ static bool D3D11_SupportsTextureFormat(
DXGI_FORMAT dxgiFormat = SDLToD3D11_TextureFormat[format];
DXGI_FORMAT typelessFormat = D3D11_INTERNAL_GetTypelessFormat(dxgiFormat);
UINT formatSupport, sampleableFormatSupport;
D3D11_FEATURE_DATA_FORMAT_SUPPORT2 formatSupport2 = { dxgiFormat, 0 };
HRESULT res;

res = ID3D11Device_CheckFormatSupport(
Expand All @@ -5787,6 +5788,19 @@ static bool D3D11_SupportsTextureFormat(
}
}

// Checks for SIMULTANEOUS_READ_WRITE support
if (usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) {
res = ID3D11Device_CheckFeatureSupport(
renderer->device,
D3D11_FEATURE_FORMAT_SUPPORT2,
&formatSupport2,
sizeof(formatSupport2));
if (FAILED(res)) {
// Format is apparently unknown
return false;
}
}

// Is the texture type supported?
if (type == SDL_GPU_TEXTURETYPE_2D && !(formatSupport & D3D11_FORMAT_SUPPORT_TEXTURE2D)) {
return false;
Expand Down Expand Up @@ -5815,6 +5829,9 @@ static bool D3D11_SupportsTextureFormat(
// TYPED_UNORDERED_ACCESS_VIEW implies support for typed UAV stores
return false;
}
if ((usage & (SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) && !(formatSupport2.OutFormatSupport2 & D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD))) {
return false;
}
if ((usage & SDL_GPU_TEXTUREUSAGE_COLOR_TARGET) && !(formatSupport & D3D11_FORMAT_SUPPORT_RENDER_TARGET)) {
return false;
}
Expand Down
Loading

0 comments on commit cc51095

Please sign in to comment.