gl: Avoid UBO/SSBO binding index collisions #12676

kd-11 · 2022-09-18T00:09:28Z

Some drivers don't like this. Actually only Radeonsi. For some reason their slots aren't duplicated across different targets, so writing slot 0 UBO erases slot 0 SSBO and vice-versa 🤦‍♂️
Almost all GPUs going back 15 years have a large number of UBO slots but limited SSBO slots. Move UBO slots up as we have tons more headroom there. e.g The 6600M has like 75 UBO slots but only 8 SSBO slots.

Darkhost1999 · 2022-09-18T00:47:49Z

Just testing NFS Rivals. Haven't tried anything else yet
Master
RPCS3.log
PR
RPCS3.zip

Really long error

·F 0:00:50.146410 {RSX [0x001fd40]} RSX: Compilation failed: 0(8) : error C3012: invalid value '8' for layout qualifier 'binding'
0(25) : error C1154: non constant expression in layout value

source:
#version 450
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

#define IMAGE_LOCATION(x) (x + 8)
#define SSBO_LOCATION(x) (x + 2)

layout(binding=IMAGE_LOCATION(0)) uniform writeonly restrict image2D output2D;

#define FMT_GL_RGBA8 0x8058
#define FMT_GL_BGRA8 0x80E1
#define FMT_GL_R8 0x8229
#define FMT_GL_R16 0x822A
#define FMT_GL_R32F 0x822E
#define FMT_GL_RG8 0x822B
#define FMT_GL_RG8_SNORM 0x8F95
#define FMT_GL_RG16 0x822C
#define FMT_GL_RG16F 0x822F
#define FMT_GL_RGBA16F 0x881A
#define FMT_GL_RGBA32F 0x8814

layout(binding=SSBO_LOCATION(0), std430) readonly restrict buffer RawDataBlock
{
uint data[];
};

#if USE_UBO
layout(%push_block) uniform UnpackConfiguration
{
uint swap_bytes;
uint src_pitch;
uint format;
uint reserved;
ivec2 region_offset;
ivec2 region_size;
};
#else
uniform uint swap_bytes;
uniform uint src_pitch;
uniform uint format;
uniform ivec2 region_offset;
uniform ivec2 region_size;
#endif

uint linear_invocation_id()
{
uint size_in_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x;
}

ivec2 linear_id_to_output_coord(uint index)
{
return ivec2(int(index % src_pitch), int(index / src_pitch));
}

// Decoders. Beware of multi-wide swapped types (e.g swap(16x2) != swap(32x1))
uint readUint8(const in uint address)
{
const uint block = address / 4;
const uint offset = address % 4;
return bitfieldExtract(data[block], int(offset) * 8, 8);
}

uint readUint16(const in uint address)
{
const uint block = address / 2;
const uint offset = address % 2;
const uint value = bitfieldExtract(data[block], int(offset) * 16, 16);

if (swap_bytes != 0)
{
	return bswap_u16(value);
}

return value;

}

uint readUint32(const in uint address)
{
const uint value = data[address];
return (swap_bytes != 0) ? bswap_u32(value) : value;
}

uvec2 readUint8x2(const in uint address)
{
const uint raw = readUint16(address);
return uvec2(bitfieldExtract(raw, 0, 8), bitfieldExtract(raw, 8, 8));
}

ivec2 readInt8x2(const in uint address)
{
const ivec2 raw = ivec2(readUint8x2(address));
return raw - (ivec2(greaterThan(raw, ivec2(127))) * 256);
}

#define readFixed8(address) readUint8(address) / 255.f
#define readFixed8x2(address) readUint8x2(address) / 255.f
#define readFixed8x2Snorm(address) readInt8x2(address) / 127.f

vec4 readFixed8x4(const in uint address)
{
const uint raw = readUint32(address);
return uvec4(
bitfieldExtract(raw, 0, 8),
bitfieldExtract(raw, 8, 8),
bitfieldExtract(raw, 16, 8),
bitfieldExtract(raw, 24, 8)
) / 255.f;
}

#define readFixed16(address) readUint16(uint(address)) / 65535.f
#define readFixed16x2(address) vec2(readFixed16(address * 2 + 0), readFixed16(address * 2 + 1))
#define readFixed16x4(address) vec4(readFixed16(address * 4 + 0), readFixed16(address * 4 + 1), readFixed16(address * 4 + 2), readFixed16(address * 4 + 3))

#define readFloat16(address) unpackHalf2x16(readUint16(uint(address))).x
#define readFloat16x2(address) vec2(readFloat16(address * 2 + 0), readFloat16(address * 2 + 1))
#define readFloat16x4(address) vec4(readFloat16(address * 4 + 0), readFloat16(address * 4 + 1), readFloat16(address * 4 + 2), readFloat16(address * 4 + 3))

#define readFloat32(address) uintBitsToFloat(readUint32(address))
#define readFloat32x4(address) uintBitsToFloat(uvec4(readUint32(address * 4 + 0), readUint32(address * 4 + 1), readUint32(address * 4 + 2), readUint32(address * 4 + 3)))

#define KERNEL_SIZE 8

void write_output(const in uint invocation_id)
{
vec4 outColor;
uint utmp;

switch (format)
{
// Simple color
case FMT_GL_RGBA8:
	outColor = readFixed8x4(invocation_id);
	break;
case FMT_GL_BGRA8:
	outColor = readFixed8x4(invocation_id).bgra;
	break;
case FMT_GL_R8:
	outColor.r = readFixed8(invocation_id);
	break;
case FMT_GL_R16:
	outColor.r = readFixed16(invocation_id);
	break;
case FMT_GL_R32F:
	outColor.r = readFloat32(invocation_id);
	break;
case FMT_GL_RG8:
	outColor.rg = readFixed8x2(invocation_id);
	break;
case FMT_GL_RG8_SNORM:
	outColor.rg = readFixed8x2Snorm(invocation_id);
	break;
case FMT_GL_RG16:
	outColor.rg = readFixed16x2(invocation_id);
	break;
case FMT_GL_RG16F:
	outColor.rg = readFloat16x2(invocation_id);
	break;
case FMT_GL_RGBA16F:
	outColor = readFloat16x4(invocation_id);
	break;
case FMT_GL_RGBA32F:
	outColor = readFloat32x4(invocation_id);
	break;
}

const ivec2 coord = linear_id_to_output_coord(invocation_id);
if (any(greaterThan(coord, region_size)))
{
	return;
}

imageStore(output2D, coord + region_offset, outColor);

}

void main()
{
uint index = linear_invocation_id() * KERNEL_SIZE;

for (int loop = 0; loop < KERNEL_SIZE; ++loop, ++index)
{
	write_output(index);
}

}

·W 0:00:50.146754 {RSX [0x001fd40]} SYS: Emulation has been frozen! You can either use debugger tools to inspect current emulation state or terminate it.
·F 0:00:50.147796 {RSX [0x001fd40]} RSX: Linkage failed: Compute info

Darkhost1999 · 2022-09-18T00:59:57Z

K everything with OpenGL is outputting that error right after SPU cache before anything related to the game

- Some drivers don't like this. Actually only RADV. - Almost all GPUs going back 15 years have a large number of UBO slots but limited SSBO slots. Move UBO slots up as we have tons more headroom there.

NVIDIA only supports 8 compute image slots even on modern GPUs.

kd-11 · 2022-09-18T21:32:55Z

NVIDIA only supports 8 compute image slots. Fixed now.

Darkhost1999 · 2022-09-18T21:47:26Z

I was reading about that only has 8. I was actually being directed towards that not being true however when I was reading and don't know where to confirm.

kd-11 · 2022-09-18T22:34:41Z

https://opengl.gpuinfo.org/displayreport.php?id=7899 GL_MAX_COMPUTE_IMAGE_UNIFORMS = 8

kd-11 · 2022-09-18T22:36:54Z

AMD always has 32 since GCN1 so the limit of 8 for new NVIDIA cards was unexpected.

kd-11 mentioned this pull request Sep 18, 2022

Regression: Broken graphics in OpenGL on NieR Replicant (#12454) #12476

Closed

Megamouse added the Render: OpenGL label Sep 18, 2022

kd-11 added 2 commits September 19, 2022 00:32

gl: Avoid UBO/SSBO binding index collisions

89ef57b

- Some drivers don't like this. Actually only RADV. - Almost all GPUs going back 15 years have a large number of UBO slots but limited SSBO slots. Move UBO slots up as we have tons more headroom there.

gl: Restrict compute image bindings to [0-8]

68e8ee4

NVIDIA only supports 8 compute image slots even on modern GPUs.

kd-11 force-pushed the ogl-regression-fix branch from 446e9fd to 68e8ee4 Compare September 18, 2022 21:32

kd-11 merged commit 79f2c21 into RPCS3:master Sep 18, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

gl: Avoid UBO/SSBO binding index collisions #12676

gl: Avoid UBO/SSBO binding index collisions #12676

kd-11 commented Sep 18, 2022 •

edited

Loading

Darkhost1999 commented Sep 18, 2022

Darkhost1999 commented Sep 18, 2022 •

edited

Loading

kd-11 commented Sep 18, 2022

Darkhost1999 commented Sep 18, 2022

kd-11 commented Sep 18, 2022

kd-11 commented Sep 18, 2022

gl: Avoid UBO/SSBO binding index collisions #12676

gl: Avoid UBO/SSBO binding index collisions #12676

Conversation

kd-11 commented Sep 18, 2022 • edited Loading

Darkhost1999 commented Sep 18, 2022

Darkhost1999 commented Sep 18, 2022 • edited Loading

kd-11 commented Sep 18, 2022

Darkhost1999 commented Sep 18, 2022

kd-11 commented Sep 18, 2022

kd-11 commented Sep 18, 2022

kd-11 commented Sep 18, 2022 •

edited

Loading

Darkhost1999 commented Sep 18, 2022 •

edited

Loading