Skip to content

Commit

Permalink
D3D11-only experimental way to cull out-of-bounds triangles. Helps #5001
Browse files Browse the repository at this point in the history
, at least for Toca.

Really not sure how big the guard band should be, and the right way to
do this would be to use a geometry shader instead of killing triangles
by setting w = NaN, which I'm not sure will do sane things on all
hardware. Unfortunately geometry shaders are not available everywhere.
  • Loading branch information
hrydgard committed Apr 1, 2017
1 parent f7d6d7d commit 7a20652
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 26 deletions.
2 changes: 1 addition & 1 deletion GPU/Common/ShaderUniforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
}
if (dirtyUniforms & DIRTY_FOGCOLOR) {
Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
Uint8x3ToFloat4_Alpha(ub->fogColor, gstate.fogcolor, NAN);
}
if (dirtyUniforms & DIRTY_SHADERBLEND) {
Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA());
Expand Down
28 changes: 14 additions & 14 deletions GPU/Common/ShaderUniforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ struct UB_VS_FS_Base {
float depthRange[4];
float fogCoef_stencil[4];
float matAmbient[4];
int spline_count_u;
int spline_count_v;
int spline_type_u;
int spline_type_v;
// Fragment data
float fogColor[4];
float texEnvColor[4];
Expand All @@ -43,6 +39,10 @@ struct UB_VS_FS_Base {
float blendFixB[4];
float texClamp[4];
float texClampOffset[4];
int spline_count_u;
int spline_count_v;
int spline_type_u;
int spline_type_v;
};

static const char *ub_baseStr =
Expand All @@ -55,18 +55,18 @@ R"( mat4 proj_mtx;
vec4 depthRange;
vec3 fogcoef_stencilreplace;
vec4 matambientalpha;
int spline_count_u;
int spline_count_v;
int spline_type_u;
int spline_type_v;
vec3 fogcolor;
vec3 fogcolor; float nanValue;
vec3 texenv;
ivec4 alphacolorref;
ivec4 alphacolormask;
vec3 blendFixA;
vec3 blendFixB;
vec4 texclamp;
vec2 texclampoff;
int spline_count_u;
int spline_count_v;
int spline_type_u;
int spline_type_v;
)";

// HLSL code is shared so these names are changed to match those in DX9.
Expand All @@ -80,18 +80,18 @@ R"( float4x4 u_proj;
float4 u_depthRange;
float3 u_fogcoef_stencilreplace;
float4 u_matambientalpha;
int u_spline_count_u;
int u_spline_count_v;
int u_spline_type_u;
int u_spline_type_v;
float3 u_fogcolor;
float3 u_fogcolor; float nanValue;
float3 u_texenv;
uint4 u_alphacolorref;
uint4 u_alphacolormask;
float3 u_blendFixA;
float3 u_blendFixB;
float4 u_texclamp;
float2 u_texclampoff;
int u_spline_count_u;
int u_spline_count_v;
int u_spline_type_u;
int u_spline_type_v;
)";

// 576 bytes. Can we get down to 512?
Expand Down
44 changes: 33 additions & 11 deletions GPU/Directx9/VertexShaderGeneratorDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@

namespace DX9 {

// The PSP does not have a proper triangle clipper, but it does have a guard band and can rasterize rather large
// triangles that go outside the viewport. However, there are limits, and it will drop triangles that are very
// large. Some games appear to draw broken geometry, probably game bugs that were never discovered because the PSP
// would drop the geometry, including Parappa The Rapper in an obscure case and Outrun. Try to get rid of those
// triangles by setting the W of one of the vertices to NaN if they are discovered.
const bool guardBandCulling = true;
// Not sure what a good value for this is, it should probably depend on the framebuffer size.
// Let's be conservative.
const float guardBand = 64.0f;

static const char * const boneWeightAttrDecl[9] = {
"#ERROR#",
"float a_w1:TEXCOORD1;\n",
Expand Down Expand Up @@ -344,6 +354,7 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l
}
}


WRITE(p, "VS_OUT main(VS_IN In) {\n");
WRITE(p, " VS_OUT Out;\n");
if (!useHWTransform) {
Expand All @@ -369,26 +380,32 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l
}
if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) {
if (gstate.isModeThrough()) {
WRITE(p, " Out.gl_Position = mul(u_proj_through, float4(In.position.xyz, 1.0));\n");
WRITE(p, " float4 outPos = mul(u_proj_through, float4(In.position.xyz, 1.0));\n");
} else {
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " Out.gl_Position = depthRoundZVP(mul(u_proj, float4(In.position.xyz, 1.0)));\n");
WRITE(p, " float4 outPos = depthRoundZVP(mul(u_proj, float4(In.position.xyz, 1.0)));\n");
} else {
WRITE(p, " Out.gl_Position = mul(u_proj, float4(In.position.xyz, 1.0));\n");
WRITE(p, " float4 outPos = mul(u_proj, float4(In.position.xyz, 1.0));\n");
}
}
} else {
if (gstate.isModeThrough()) {
WRITE(p, " Out.gl_Position = mul(float4(In.position.xyz, 1.0), u_proj_through);\n");
WRITE(p, " float4 outPos = mul(float4(In.position.xyz, 1.0), u_proj_through);\n");
} else {
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " Out.gl_Position = depthRoundZVP(mul(float4(In.position.xyz, 1.0), u_proj));\n");
WRITE(p, " float4 outPos = depthRoundZVP(mul(float4(In.position.xyz, 1.0), u_proj));\n");
} else {
WRITE(p, " Out.gl_Position = mul(float4(In.position.xyz, 1.0), u_proj);\n");
WRITE(p, " float4 outPos = mul(float4(In.position.xyz, 1.0), u_proj);\n");
}
}
}
} else {
if (lang != HLSL_DX9 && guardBandCulling) {
// Guard band culling
WRITE(p, " float2 projPos = outPos.xy / outPos.w;\n");
WRITE(p, " if (abs(projPos.x) > %f || abs(projPos.y) > %f) outPos.w = nanValue;\n", guardBand, guardBand);
}
WRITE(p, " Out.gl_Position = outPos;\n");
} else {
// Step 1: World Transform / Skinning
if (!enableBones) {
// Hardware tessellation
Expand Down Expand Up @@ -574,18 +591,23 @@ void GenerateVertexShaderHLSL(const ShaderID &id, char *buffer, ShaderLanguage l
if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) {
// Final view and projection transforms.
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " Out.gl_Position = depthRoundZVP(mul(u_proj, viewPos));\n");
WRITE(p, " float4 outPos = depthRoundZVP(mul(u_proj, viewPos));\n");
} else {
WRITE(p, " Out.gl_Position = mul(u_proj, viewPos);\n");
WRITE(p, " float4 outPos = mul(u_proj, viewPos);\n");
}
} else {
// Final view and projection transforms.
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " Out.gl_Position = depthRoundZVP(mul(viewPos, u_proj));\n");
WRITE(p, " float4 outPos = depthRoundZVP(mul(viewPos, u_proj));\n");
} else {
WRITE(p, " Out.gl_Position = mul(viewPos, u_proj);\n");
WRITE(p, " float4 outPos = mul(viewPos, u_proj);\n");
}
}
if (lang != HLSL_DX9 && guardBandCulling) {
WRITE(p, " float2 projPos = outPos.xy / outPos.w;\n");
WRITE(p, " if (abs(projPos.x) > %f || abs(projPos.y) > %f) outPos.w = nanValue;\n", guardBand, guardBand);
}
WRITE(p, " Out.gl_Position = outPos;\n");

// TODO: Declare variables for dots for shade mapping if needed.

Expand Down

0 comments on commit 7a20652

Please sign in to comment.