Skip to content

Commit

Permalink
[d3d8] Add hardware shadow buffer filtering
Browse files Browse the repository at this point in the history
Closes #65
  • Loading branch information
AlpyneDreams committed Dec 6, 2022
1 parent b15771a commit e3009f6
Show file tree
Hide file tree
Showing 10 changed files with 98 additions and 17 deletions.
5 changes: 3 additions & 2 deletions dxvk.conf
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,8 @@

# Use NVIDIA Shadow Buffers
#
# Vendor extension for GeForce3 and GeForce4 cards that allows
# sampling depth textures with non-normalized Z coordinates.
# Vendor behavior for GeForce3 and GeForce4 cards that allows
# sampling depth textures with non-normalized Z coordinates
# and applies hardware shadow filtering.

# d3d8.useShadowBuffers = False
5 changes: 3 additions & 2 deletions src/d3d8/d3d8_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ namespace dxvk {
m_bridge->SetAPIName("D3D8");
}

// Shadow buffers are implemented by scaling depth test reference values
m_bridge->SetDrefScalingEnabled(m_d3d8Options.useShadowBuffers);
// Shadow buffers are implemented by scaling
// depth test reference values and applying a 2x2 PCF.
m_bridge->SetShadowBuffersEnabled(m_d3d8Options.useShadowBuffers);

// D3D8 Render states that aren't remapped
// but should still be recorded by D3D9
Expand Down
1 change: 1 addition & 0 deletions src/d3d8/d3d8_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ namespace dxvk {
struct D3D8Options {
/// Treat D24S8 and D16 as early NVIDIA shadow buffers that test
/// reference values in the range [0..2^N - 1] where N is bit depth.
/// Also emulates hardware shadow filtering using a bilinear 2x2 PCF.
bool useShadowBuffers = false;

D3D8Options() {}
Expand Down
5 changes: 3 additions & 2 deletions src/d3d9/d3d9_bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ namespace dxvk {
m_device->m_implicitSwapchain->SetApiName(name);
}

void D3D9Bridge::SetDrefScalingEnabled(bool enabled) {
m_device->m_dxsoOptions.drefScaling = enabled;
void D3D9Bridge::SetShadowBuffersEnabled(bool enabled) {
m_device->m_dxsoOptions.drefScaling = enabled;
m_device->m_dxsoOptions.shadowFilter = enabled;
}

HRESULT D3D9Bridge::UpdateTextureFromBuffer(
Expand Down
2 changes: 1 addition & 1 deletion src/d3d9/d3d9_bridge.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ namespace dxvk {

virtual void SetAPIName(const char* name);

virtual void SetDrefScalingEnabled(bool enabled);
virtual void SetShadowBuffersEnabled(bool enabled);

virtual HRESULT UpdateTextureFromBuffer(
IDirect3DSurface9* pDestSurface,
Expand Down
7 changes: 5 additions & 2 deletions src/d3d9/d3d9_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3843,7 +3843,7 @@ namespace dxvk {

const uint32_t offset = StateSampler * 2;
m_drefScale &= ~(0b11u << offset);
if (m_dxsoOptions.drefScaling)
if (m_dxsoOptions.drefScaling || m_dxsoOptions.shadowFilter)
m_drefScale |= GetDepthBufferDrefScale(newTexture->Desc()->Format) << offset;

const bool oldCube = m_cubeTextures & (1u << StateSampler);
Expand Down Expand Up @@ -6889,7 +6889,10 @@ namespace dxvk {
stage.Projected = (ttff & D3DTTFF_PROJECTED) ? 1 : 0;
stage.ProjectedCount = (ttff & D3DTTFF_PROJECTED) ? count : 0;

stage.DrefScale = D3D9DrefScale((m_drefScale >> samplerOffset) & 0b11u);
if (m_dxsoOptions.drefScaling)
stage.DrefScale = D3D9DrefScale((m_drefScale >> samplerOffset) & 0b11u);
if (m_dxsoOptions.shadowFilter)
stage.ShadowFilter = (m_depthTextures & (1 << idx)) != 0;
}

auto& stage0 = key.Stages[0].Contents;
Expand Down
64 changes: 58 additions & 6 deletions src/d3d9/d3d9_fixed_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,49 @@ namespace dxvk {
spvModule.opLabel(atestSkipLabel);
}

uint32_t DoFixedFunctionShadowFilter(
SpirvModule& module,
uint32_t inSample,
uint32_t sampledImage,
uint32_t coordinates,
uint32_t reference,
const SpirvImageOperands& operands) {

SpirvImageOperands imageOperands = operands;
imageOperands.flags |= spv::ImageOperandsConstOffsetMask;

uint32_t f32 = module.defFloatType(32);
uint32_t vec4 = module.defVectorType(f32, 4);
uint32_t val = module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f);

uint32_t index = 0;
auto Tap = [&](int du, int dv) {
imageOperands.sConstOffset = module.constvec4i32(du, dv, 0, 0);
uint32_t sample = module.opImageSampleDrefImplicitLod(f32, sampledImage, coordinates, reference, imageOperands);
val = module.opCompositeInsert(vec4, sample, val, 1, &index);

if (index < 3) index++; else index = 0;
};

Tap(0, 1);
Tap(-1, 0);
Tap(1, 0);
Tap(0, -1);

uint32_t denom = inSample == 0
? module.constvec4f32(0.25, 0.25, 0.25, 0.25)
: module.constvec4f32(0.20, 0.20, 0.20, 0.20);

// Average the 4 samples together
val = module.opDot(f32, val, denom);

// Average the 4 samples with the center sample, if any
if (inSample != 0) {
val = module.opFFma(f32, inSample, module.constf32(0.20), val);
}

return val;
}

uint32_t SetupRenderStateBlock(SpirvModule& spvModule, uint32_t count) {
uint32_t floatType = spvModule.defFloatType(32);
Expand Down Expand Up @@ -1780,11 +1823,13 @@ namespace dxvk {

uint32_t texcoordCnt = m_ps.samplers[i].texcoordCnt;

D3D9DrefScale drefScale = D3D9DrefScale(m_fsKey.Stages[i].Contents.DrefScale);
D3D9DrefScale drefScale = D3D9DrefScale(stage.DrefScale);
bool drefScaled = drefScale != DrefScale_None;
bool shadowFilter = stage.ShadowFilter;

// Add one for the texcoord count
// if we need to include the divider
if (m_fsKey.Stages[i].Contents.Projected || drefScale != DrefScale_None)
if (m_fsKey.Stages[i].Contents.Projected || drefScaled || shadowFilter)
texcoordCnt++;

std::array<uint32_t, 4> indices = { 0, 1, 2, 3 };
Expand Down Expand Up @@ -1823,14 +1868,21 @@ namespace dxvk {
shouldProject = false;
}

if (unlikely(drefScale != DrefScale_None)) {
if (unlikely(shadowFilter || drefScaled)) {
uint32_t component = 2;
uint32_t reference = m_module.opCompositeExtract(m_floatType, texcoord, 1, &component);

uint32_t maxDref = m_module.constf32(GetDrefScaleFactor(drefScale));
reference = m_module.opFMul(m_floatType, reference, maxDref);

if (drefScaled) {
uint32_t maxDref = m_module.constf32(GetDrefScaleFactor(drefScale));
reference = m_module.opFMul(m_floatType, reference, maxDref);
}

texture = m_module.opImageSampleDrefImplicitLod(m_floatType, imageVarId, texcoord, reference, imageOperands);

if (shadowFilter) {
texture = DoFixedFunctionShadowFilter(m_module, texture, imageVarId, texcoord, reference, imageOperands);
}

texture = ScalarReplicate(texture);
} else {
if (shouldProject)
Expand Down
11 changes: 10 additions & 1 deletion src/d3d9/d3d9_fixed_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ namespace dxvk {

void DoFixedFunctionAlphaTest(SpirvModule& spvModule, const D3D9AlphaTestContext& ctx);

uint32_t DoFixedFunctionShadowFilter(
SpirvModule& module,
uint32_t inSample,
uint32_t sampledImage,
uint32_t coordinates,
uint32_t reference,
const SpirvImageOperands& operands);

// Returns a render state block
uint32_t SetupRenderStateBlock(SpirvModule& spvModule, uint32_t count);

Expand Down Expand Up @@ -189,7 +197,8 @@ namespace dxvk {

uint32_t TextureBound : 1;

uint32_t DrefScale : 2;
uint32_t DrefScale : 2;
uint32_t ShadowFilter : 1;

// Included in here, read from Stage 0 for packing reasons
// Affects all stages.
Expand Down
6 changes: 6 additions & 0 deletions src/dxso/dxso_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2913,6 +2913,12 @@ void DxsoCompiler::emitControlFlowGenericLoop(
fetch4,
imageOperands);

// Emulate hardware shadow filtering for 2D depth texture lookups.
if (depth && m_moduleInfo.options.shadowFilter && samplerType == SamplerTypeTexture2D) {
const uint32_t sampledImage = m_module.opLoad(sampler.typeId, sampler.varId);
result.id = DoFixedFunctionShadowFilter(m_module, result.id, sampledImage, texcoordVar.id, reference, imageOperands);
}

// If we are sampling depth we've already specc'ed this!
// This path is always size 4 because it only hits on color.
if (isNull != 0) {
Expand Down
9 changes: 8 additions & 1 deletion src/dxso/dxso_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,15 @@ namespace dxvk {
/// Whether or not we can rely on robustness2 to handle oob constant access
bool robustness2Supported;

/// Whether we should check SpecDrefScale at runtime to apply Dref scaling for depth buffers
/// Whether we should check SpecDrefScale at runtime to apply Dref scaling for depth
/// textures (D24S8 and D16). This allows compatability with games that expect a
/// different depth test range, which was typically a D3D8 quirk on early NVIDIA hardware.
bool drefScaling = false;

/// Whether to perform 2x2 PCF when linearly sampling certain depth texture formats,
/// as done by early NVIDIA GPUs. The possibility of this behavior is also implied by
/// the spec for GL_ARB_shadow and various NVIDIA publications.
bool shadowFilter = false;
};

}

0 comments on commit e3009f6

Please sign in to comment.