Skip to content

Commit

Permalink
shader_recompiler: Clean up swizzle handling and handle ImageRead sto…
Browse files Browse the repository at this point in the history
…rage swizzle.
  • Loading branch information
squidbus committed Dec 25, 2024
1 parent 4e6a7e5 commit 495e968
Show file tree
Hide file tree
Showing 11 changed files with 284 additions and 55 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/post_order.h
src/shader_recompiler/ir/program.cpp
src/shader_recompiler/ir/program.h
src/shader_recompiler/ir/reinterpret.h
src/shader_recompiler/ir/reg.h
src/shader_recompiler/ir/type.cpp
src/shader_recompiler/ir/type.h
Expand Down
56 changes: 56 additions & 0 deletions src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,20 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
}

Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.U32[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.U32[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.U32[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
}
Expand Down Expand Up @@ -78,6 +92,20 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
}

Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
}
Expand Down Expand Up @@ -114,6 +142,20 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
}

Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

void EmitCompositeConstructF64x2(EmitContext&) {
UNREACHABLE_MSG("SPIR-V Instruction");
}
Expand Down Expand Up @@ -150,4 +192,18 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
}

Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

} // namespace Shader::Backend::SPIRV
20 changes: 20 additions & 0 deletions src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Expand All @@ -136,6 +141,11 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Expand All @@ -145,6 +155,11 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
void EmitCompositeConstructF64x2(EmitContext& ctx);
void EmitCompositeConstructF64x3(EmitContext& ctx);
void EmitCompositeConstructF64x4(EmitContext& ctx);
Expand All @@ -154,6 +169,11 @@ void EmitCompositeExtractF64x4(EmitContext& ctx);
Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Expand Down
12 changes: 7 additions & 5 deletions src/shader_recompiler/frontend/translate/translate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
Expand Down Expand Up @@ -475,11 +476,12 @@ void Translator::EmitFetch(const GcnInst& inst) {

// Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
const std::array components = buffer.DstSelect().Apply<IR::F32>(
[&](const u32 index) { return ir.GetAttribute(attr, index); },
[&](const u32 imm) { return ir.Imm32(float(imm)); });
for (u32 i = 0; i < components.size(); i++) {
ir.SetVectorReg(dst_reg++, components[i]);
const auto values =
ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1),
ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3));
const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect());
for (u32 i = 0; i < 4; i++) {
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
}

// In case of programmable step rates we need to fallback to instance data pulling in
Expand Down
80 changes: 80 additions & 0 deletions src/shader_recompiler/ir/ir_emitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,86 @@ Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_
}
}

Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1) {
if (vector1.Type() != vector2.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
}
if (comp0 >= 4 || comp1 >= 4) {
UNREACHABLE_MSG("One or more out of bounds elements {}, {}", comp0, comp1);
}
const auto shuffle{[&](Opcode opcode) -> Value {
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
Value{static_cast<u32>(comp1)});
}};
switch (vector1.Type()) {
case Type::U32x4:
return shuffle(Opcode::CompositeShuffleU32x2);
case Type::F16x4:
return shuffle(Opcode::CompositeShuffleF16x2);
case Type::F32x4:
return shuffle(Opcode::CompositeShuffleF32x2);
case Type::F64x4:
return shuffle(Opcode::CompositeShuffleF64x2);
default:
ThrowInvalidType(vector1.Type());
}
}

Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1, size_t comp2) {
if (vector1.Type() != vector2.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
}
if (comp0 >= 6 || comp1 >= 6 || comp2 >= 6) {
UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}", comp0, comp1, comp2);
}
const auto shuffle{[&](Opcode opcode) -> Value {
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
Value{static_cast<u32>(comp1)}, Value{static_cast<u32>(comp2)});
}};
switch (vector1.Type()) {
case Type::U32x4:
return shuffle(Opcode::CompositeShuffleU32x3);
case Type::F16x4:
return shuffle(Opcode::CompositeShuffleF16x3);
case Type::F32x4:
return shuffle(Opcode::CompositeShuffleF32x3);
case Type::F64x4:
return shuffle(Opcode::CompositeShuffleF64x3);
default:
ThrowInvalidType(vector1.Type());
}
}

Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1, size_t comp2, size_t comp3) {
if (vector1.Type() != vector2.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
}
if (comp0 >= 8 || comp1 >= 8 || comp2 >= 8 || comp3 >= 8) {
UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}, {}", comp0, comp1, comp2,
comp3);
}
const auto shuffle{[&](Opcode opcode) -> Value {
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
Value{static_cast<u32>(comp1)}, Value{static_cast<u32>(comp2)},
Value{static_cast<u32>(comp3)});
}};
switch (vector1.Type()) {
case Type::U32x4:
return shuffle(Opcode::CompositeShuffleU32x4);
case Type::F16x4:
return shuffle(Opcode::CompositeShuffleF16x4);
case Type::F32x4:
return shuffle(Opcode::CompositeShuffleF32x4);
case Type::F64x4:
return shuffle(Opcode::CompositeShuffleF64x4);
default:
ThrowInvalidType(vector1.Type());
}
}

Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
if (true_value.Type() != false_value.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", true_value.Type(), false_value.Type());
Expand Down
7 changes: 7 additions & 0 deletions src/shader_recompiler/ir/ir_emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ class IREmitter {
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);

[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1);
[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1, size_t comp2);
[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1, size_t comp2, size_t comp3);

[[nodiscard]] Value Select(const U1& condition, const Value& true_value,
const Value& false_value);

Expand Down
12 changes: 12 additions & 0 deletions src/shader_recompiler/ir/opcodes.inc
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ OPCODE(CompositeExtractU32x4, U32, U32x
OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
OPCODE(CompositeShuffleU32x2, U32x2, U32x2, U32x2, U32, U32, )
OPCODE(CompositeShuffleU32x3, U32x3, U32x3, U32x3, U32, U32, U32, )
OPCODE(CompositeShuffleU32x4, U32x4, U32x4, U32x4, U32, U32, U32, U32, )
OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
Expand All @@ -131,6 +134,9 @@ OPCODE(CompositeExtractF16x4, F16, F16x
OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
OPCODE(CompositeShuffleF16x2, F16x2, F16x2, F16x2, U32, U32, )
OPCODE(CompositeShuffleF16x3, F16x3, F16x3, F16x3, U32, U32, U32, )
OPCODE(CompositeShuffleF16x4, F16x4, F16x4, F16x4, U32, U32, U32, U32, )
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
Expand All @@ -140,6 +146,9 @@ OPCODE(CompositeExtractF32x4, F32, F32x
OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
OPCODE(CompositeShuffleF32x2, F32x2, F32x2, F32x2, U32, U32, )
OPCODE(CompositeShuffleF32x3, F32x3, F32x3, F32x3, U32, U32, U32, )
OPCODE(CompositeShuffleF32x4, F32x4, F32x4, F32x4, U32, U32, U32, U32, )
OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
Expand All @@ -149,6 +158,9 @@ OPCODE(CompositeExtractF64x4, F64, F64x
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
OPCODE(CompositeShuffleF64x2, F64x2, F64x2, F64x2, U32, U32, )
OPCODE(CompositeShuffleF64x3, F64x3, F64x3, F64x3, U32, U32, U32, )
OPCODE(CompositeShuffleF64x4, F64x4, F64x4, F64x4, U32, U32, U32, U32, )

// Select operations
OPCODE(SelectU1, U1, U1, U1, U1, )
Expand Down
Loading

0 comments on commit 495e968

Please sign in to comment.