Skip to content

Commit

Permalink
WIP Incomplete tessellation implemention. Squash commits
Browse files Browse the repository at this point in the history
  • Loading branch information
baggins183 committed Nov 13, 2024
1 parent 072714c commit 09acb68
Show file tree
Hide file tree
Showing 43 changed files with 1,679 additions and 163 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_definitions(_DEBUG)
endif()

project(shadPS4)

# Forcing PIE makes sure that the base address is high enough so that it doesn't clash with the PS4 memory.
Expand Down
2 changes: 1 addition & 1 deletion externals/ext-boost
Submodule ext-boost updated 1563 files
11 changes: 9 additions & 2 deletions src/core/libraries/gnmdriver/gnmdriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1599,7 +1599,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) {

s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) {
LOG_TRACE(Lib_GnmDriver, "called");

if (!cmdbuf || size < 0x1E) {
return -1;
}
Expand All @@ -1617,11 +1616,19 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
// This is wrong but just stash them here for now
// Should read the tess constants buffer instead, which is bound as V#, into runtime_info.
// HsConstants member of HsProgram is used to derive TessellationDataConstantBuffer, its members
// dont correspond to real registers
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x11cu, hs_regs[4], hs_regs[5], hs_regs[6], hs_regs[7],
hs_regs[8], hs_regs[9], hs_regs[10], hs_regs[11], hs_regs[12],
hs_regs[13]); // TODO comment
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5],
hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL
hs_regs[6]); // VGT_HOS_MAX_TESS_LEVEL
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG

// right padding?
WriteTrailingNop<11>(cmdbuf);
return ORBIS_OK;
}
Expand Down
9 changes: 9 additions & 0 deletions src/core/libraries/kernel/thread_management.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,16 @@ ScePthread PThreadPool::Create(const char* name) {
}
}

#ifdef _WIN64
auto* ret = new PthreadInternal{};
#else
// TODO: Linux specific hack
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
auto* ret = reinterpret_cast<PthreadInternal*>(
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
#endif
ret->is_free = false;
ret->is_detached = false;
ret->is_almost_done = false;
Expand Down
7 changes: 5 additions & 2 deletions src/shader_recompiler/backend/spirv/emit_spirv.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off

#include <span>
#include <type_traits>
#include <utility>
Expand All @@ -13,6 +13,7 @@
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"

namespace Shader::Backend::SPIRV {
Expand Down Expand Up @@ -281,6 +282,9 @@ void SetupCapabilities(const Info& info, EmitContext& ctx) {
if (stage == LogicalStage::Geometry) {
ctx.AddCapability(spv::Capability::Geometry);
}
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
ctx.AddCapability(spv::Capability::Tessellation);
}
}

void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
Expand All @@ -305,7 +309,6 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
break;
case LogicalStage::TessellationEval: {
execution_model = spv::ExecutionModel::TessellationEvaluation;
ctx.AddCapability(spv::Capability::Tessellation);
const auto& vs_info = ctx.runtime_info.vs_info;
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
Expand Down
9 changes: 9 additions & 0 deletions src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,13 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx) {
MemoryBarrier(ctx, spv::Scope::Device);
}

void EmitTcsOutputBarrier(EmitContext& ctx) {
const auto execution{spv::Scope::Workgroup};
const auto memory{spv::Scope::Invocation};
const auto memory_semantics{spv::MemorySemanticsMask::MaskNone};
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
ctx.ConstU32(static_cast<u32>(memory)),
ctx.ConstU32(static_cast<u32>(memory_semantics)));
}

} // namespace Shader::Backend::SPIRV
70 changes: 57 additions & 13 deletions src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#include "common/assert.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/runtime_info.h"
#pragma clang optimize off
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/ir/patch.h"
Expand Down Expand Up @@ -44,15 +47,24 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
}
}

Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, Id array_index, u32 element) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
ASSERT(info.num_components > 0);
if (info.num_components == 1) {
Id base = info.id;
boost::container::small_vector<Id, 2> indices;
if (ctx.l_stage == LogicalStage::TessellationControl) {
indices.push_back(array_index);
}
if (info.num_components > 1) {
indices.push_back(ctx.ConstU32(element));
}

if (indices.empty()) {
return info.id;
} else {
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
return ctx.OpAccessChain(info.pointer_type, info.id, indices);
}
}
if (IR::IsMrt(attr)) {
Expand Down Expand Up @@ -81,6 +93,10 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
}
}

Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
return OutputAttrPointer(ctx, attr, {}, element);
}

std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
Expand Down Expand Up @@ -172,23 +188,31 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
}

Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
if (ctx.info.stage == Stage::Geometry) {
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
if (ctx.info.l_stage == LogicalStage::Geometry ||
ctx.info.l_stage == LogicalStage::TessellationControl ||
ctx.info.l_stage == LogicalStage::TessellationEval) {
if (IR::IsPosition(attr)) {
ASSERT(ctx.info.l_stage != LogicalStage::TessellationControl &&
ctx.info.l_stage != LogicalStage::TessellationEval);
ASSERT(attr == IR::Attribute::Position0);
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index),
ctx.ConstU32(0u))};
const auto pointer{
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}

if (IR::IsParam(attr)) {
} else if (IR::IsTessCoord(attr)) {
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
const auto pointer{
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
return ctx.OpLoad(ctx.F32[1], pointer);
} else if (IR::IsParam(attr)) {
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
Expand Down Expand Up @@ -270,8 +294,22 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
ctx.u32_zero_value);
case IR::Attribute::PrimitiveId:
ASSERT(ctx.info.stage == Stage::Geometry);
case IR::Attribute::TessPatchIdInVgt: // TODO see why this isnt DCEd
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
ctx.info.l_stage == LogicalStage::TessellationControl ||
ctx.info.l_stage == LogicalStage::TessellationEval);
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
case IR::Attribute::InvocationId:
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
ctx.info.l_stage == LogicalStage::TessellationControl);
return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
case IR::Attribute::PatchVertices:
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
case IR::Attribute::PackedHullInvocationInfo:
// TODO figure out what to do with this
// should be dead code, but otherwise return 0 or concat PrimitiveId and InvocationId
return ctx.u32_zero_value;
default:
UNREACHABLE_MSG("Read U32 attribute {}", attr);
}
Expand All @@ -282,7 +320,13 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
LOG_WARNING(Render_Vulkan, "Ignoring pos1 export");
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)};

Id pointer;
if (ctx.l_stage == LogicalStage::TessellationControl) {
pointer = OutputAttrPointer(ctx, attr, ctx.OpLoad(ctx.U32[1], ctx.invocation_id), element);
} else {
pointer = OutputAttrPointer(ctx, attr, element);
}
const auto component_type{OutputAttrComponentType(ctx, attr)};
if (component_type.second) {
ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value));
Expand Down
5 changes: 2 additions & 3 deletions src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
void EmitReference(EmitContext&);
void EmitPhiMove(EmitContext&);
void EmitJoin(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitGetScc(EmitContext& ctx);
void EmitGetExec(EmitContext& ctx);
void EmitGetVcc(EmitContext& ctx);
Expand All @@ -53,6 +51,7 @@ void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2,
void EmitBarrier(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitTcsOutputBarrier(EmitContext& ctx);
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
void EmitGetThreadBitScalarReg(EmitContext& ctx);
void EmitSetThreadBitScalarReg(EmitContext& ctx);
Expand Down Expand Up @@ -86,7 +85,7 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
Expand Down
98 changes: 96 additions & 2 deletions src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,53 @@ void EmitContext::DefineInputs() {
}
break;
}
case LogicalStage::TessellationControl: {
invocation_id =
DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
patch_vertices =
DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);

for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.loads.GetAny(param)) {
continue;
}
const u32 num_components = info.loads.NumComponents(param);
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id type{TypeArray(F32[4], ConstU32(32u))};
const Id id{DefineInput(type, i)};
Name(id, fmt::format("in_attr{}", i));
input_params[i] = {id, input_f32, F32[1], 4};
}
break;
}
case LogicalStage::TessellationEval: {
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);

for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.loads.GetAny(param)) {
continue;
}
const u32 num_components = info.loads.NumComponents(param);
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id type{TypeArray(F32[4], ConstU32(32u))};
const Id id{DefineInput(type, i)};
Name(id, fmt::format("in_attr{}", i));
input_params[i] = {id, input_f32, F32[1], 4};
}

for (size_t index = 0; index < 30; ++index) {
if (!(info.uses_patches & (1U << index))) {
continue;
}
const Id id{DefineInput(F32[4], index)};
Decorate(id, spv::Decoration::Patch);
Name(id, fmt::format("patch_in{}", index));
patches[index] = id;
}
break;
}
default:
Expand All @@ -353,6 +398,9 @@ void EmitContext::DefineInputs() {
void EmitContext::DefineOutputs() {
switch (l_stage) {
case LogicalStage::Vertex: {
// No point in defining builtin outputs (i.e. position) unless next stage is fragment?
// Might cause problems linking with tcs

output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
Expand Down Expand Up @@ -380,24 +428,70 @@ void EmitContext::DefineOutputs() {
case LogicalStage::TessellationControl: {
if (info.stores_tess_level_outer) {
const Id type{TypeArray(F32[1], ConstU32(4U))};
output_tess_level_outer = DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
output_tess_level_outer =
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
Decorate(output_tess_level_outer, spv::Decoration::Patch);
}
if (info.stores_tess_level_inner) {
const Id type{TypeArray(F32[1], ConstU32(2U))};
output_tess_level_inner = DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
output_tess_level_inner =
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
Decorate(output_tess_level_inner, spv::Decoration::Patch);
}

for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
continue;
}
const u32 num_components = info.stores.NumComponents(param);
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id type{TypeArray(F32[4], ConstU32(runtime_info.hs_info.output_control_points))};
const Id id{DefineOutput(type, i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] = {id, output_f32, F32[1], 4};
}

// TODO is it ok to share output locations between patch consts and
// per-vertex output attrs?
// spirv-val doesn't complain so idk
for (size_t index = 0; index < 30; ++index) {
if (!(info.uses_patches & (1U << index))) {
continue;
}
const Id id{DefineOutput(F32[4], index)};
Decorate(id, spv::Decoration::Patch);
Name(id, fmt::format("patch_out{}", index));
patches[index] = id;
}
break;
}
case LogicalStage::TessellationEval: {
// TODO copied from logical vertex, figure this out
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
info.stores.Get(IR::Attribute::Position3);
if (has_extra_pos_stores) {
const Id type{TypeArray(F32[1], ConstU32(8U))};
clip_distances =
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
cull_distances =
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
}
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
continue;
}
const u32 num_components = info.stores.NumComponents(param);
const Id id{DefineOutput(F32[num_components], i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
}
break;
}
case LogicalStage::Fragment:
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
Expand Down
Loading

0 comments on commit 09acb68

Please sign in to comment.