[d3d8] Add hardware shadow buffer filtering

Closes #65
AlpyneDreams · Dec 6, 2022 · e3009f6 · e3009f6
1 parent b15771a
commit e3009f6
Show file tree

Hide file tree

Showing 10 changed files with 98 additions and 17 deletions.
diff --git a/dxvk.conf b/dxvk.conf
@@ -585,7 +585,8 @@
 
 # Use NVIDIA Shadow Buffers
 # 
-# Vendor extension for GeForce3 and GeForce4 cards that allows
-# sampling depth textures with non-normalized Z coordinates.
+# Vendor behavior for GeForce3 and GeForce4 cards that allows
+# sampling depth textures with non-normalized Z coordinates
+# and applies hardware shadow filtering.
 
 # d3d8.useShadowBuffers = False
diff --git a/src/d3d8/d3d8_device.cpp b/src/d3d8/d3d8_device.cpp
@@ -58,8 +58,9 @@ namespace dxvk {
       m_bridge->SetAPIName("D3D8");
     }
 
-    // Shadow buffers are implemented by scaling depth test reference values
-    m_bridge->SetDrefScalingEnabled(m_d3d8Options.useShadowBuffers);
+    // Shadow buffers are implemented by scaling
+    // depth test reference values and applying a 2x2 PCF.
+    m_bridge->SetShadowBuffersEnabled(m_d3d8Options.useShadowBuffers);
 
     // D3D8 Render states that aren't remapped
     // but should still be recorded by D3D9

diff --git a/src/d3d8/d3d8_options.h b/src/d3d8/d3d8_options.h
@@ -8,6 +8,7 @@ namespace dxvk {
   struct D3D8Options {
     /// Treat D24S8 and D16 as early NVIDIA shadow buffers that test
     /// reference values in the range [0..2^N - 1] where N is bit depth.
+    /// Also emulates hardware shadow filtering using a bilinear 2x2 PCF.
     bool useShadowBuffers = false;
 
     D3D8Options() {}

diff --git a/src/d3d9/d3d9_bridge.cpp b/src/d3d9/d3d9_bridge.cpp
@@ -19,8 +19,9 @@ namespace dxvk {
     m_device->m_implicitSwapchain->SetApiName(name);
   }
 
-  void D3D9Bridge::SetDrefScalingEnabled(bool enabled) {
-    m_device->m_dxsoOptions.drefScaling = enabled;
+  void D3D9Bridge::SetShadowBuffersEnabled(bool enabled) {
+    m_device->m_dxsoOptions.drefScaling  = enabled;
+    m_device->m_dxsoOptions.shadowFilter = enabled;
   }
 
   HRESULT D3D9Bridge::UpdateTextureFromBuffer(

diff --git a/src/d3d9/d3d9_bridge.h b/src/d3d9/d3d9_bridge.h
@@ -44,7 +44,7 @@ namespace dxvk {
 
     virtual void SetAPIName(const char* name);
 
-    virtual void SetDrefScalingEnabled(bool enabled);
+    virtual void SetShadowBuffersEnabled(bool enabled);
 
     virtual HRESULT UpdateTextureFromBuffer(
         IDirect3DSurface9*        pDestSurface,

diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp
@@ -3843,7 +3843,7 @@ namespace dxvk {
 
       const uint32_t offset = StateSampler * 2;
       m_drefScale &= ~(0b11u << offset);
-      if (m_dxsoOptions.drefScaling)
+      if (m_dxsoOptions.drefScaling || m_dxsoOptions.shadowFilter)
         m_drefScale |= GetDepthBufferDrefScale(newTexture->Desc()->Format) << offset;
 
       const bool oldCube = m_cubeTextures & (1u << StateSampler);
@@ -6889,7 +6889,10 @@ namespace dxvk {
         stage.Projected      = (ttff & D3DTTFF_PROJECTED) ? 1      : 0;
         stage.ProjectedCount = (ttff & D3DTTFF_PROJECTED) ? count  : 0;
 
-        stage.DrefScale = D3D9DrefScale((m_drefScale >> samplerOffset) & 0b11u);
+        if (m_dxsoOptions.drefScaling)
+          stage.DrefScale = D3D9DrefScale((m_drefScale >> samplerOffset) & 0b11u);
+        if (m_dxsoOptions.shadowFilter)
+          stage.ShadowFilter = (m_depthTextures & (1 << idx)) != 0;
       }
 
       auto& stage0 = key.Stages[0].Contents;

diff --git a/src/d3d9/d3d9_fixed_function.cpp b/src/d3d9/d3d9_fixed_function.cpp
@@ -334,6 +334,49 @@ namespace dxvk {
     spvModule.opLabel(atestSkipLabel);
   }
 
+  uint32_t DoFixedFunctionShadowFilter(
+      SpirvModule&            module,
+      uint32_t                inSample,
+      uint32_t                sampledImage,
+      uint32_t                coordinates,
+      uint32_t                reference,
+  const SpirvImageOperands&   operands) {
+
+    SpirvImageOperands imageOperands = operands;
+    imageOperands.flags             |= spv::ImageOperandsConstOffsetMask;
+
+    uint32_t f32  = module.defFloatType(32);
+    uint32_t vec4 = module.defVectorType(f32, 4);
+    uint32_t val  = module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f);
+
+    uint32_t index = 0;
+    auto Tap = [&](int du, int dv) {
+      imageOperands.sConstOffset = module.constvec4i32(du, dv, 0, 0);
+      uint32_t sample            = module.opImageSampleDrefImplicitLod(f32, sampledImage, coordinates, reference, imageOperands);
+      val                        = module.opCompositeInsert(vec4, sample, val, 1, &index);
+
+      if (index < 3) index++; else index = 0;
+    };
+
+    Tap(0, 1);
+    Tap(-1, 0);
+    Tap(1, 0);
+    Tap(0, -1);
+
+    uint32_t denom = inSample == 0
+                   ? module.constvec4f32(0.25, 0.25, 0.25, 0.25)
+                   : module.constvec4f32(0.20, 0.20, 0.20, 0.20);
+
+    // Average the 4 samples together
+    val = module.opDot(f32, val, denom);
+
+    // Average the 4 samples with the center sample, if any
+    if (inSample != 0) {
+      val = module.opFFma(f32, inSample, module.constf32(0.20), val);
+    }
+
+    return val;
+  }
 
   uint32_t SetupRenderStateBlock(SpirvModule& spvModule, uint32_t count) {
     uint32_t floatType = spvModule.defFloatType(32);
@@ -1780,11 +1823,13 @@ namespace dxvk {
 
           uint32_t texcoordCnt = m_ps.samplers[i].texcoordCnt;
 
-          D3D9DrefScale drefScale = D3D9DrefScale(m_fsKey.Stages[i].Contents.DrefScale);
+          D3D9DrefScale drefScale = D3D9DrefScale(stage.DrefScale);
+          bool drefScaled         = drefScale != DrefScale_None;
+          bool shadowFilter       = stage.ShadowFilter;
 
           // Add one for the texcoord count
           // if we need to include the divider
-          if (m_fsKey.Stages[i].Contents.Projected || drefScale != DrefScale_None)
+          if (m_fsKey.Stages[i].Contents.Projected || drefScaled || shadowFilter)
             texcoordCnt++;
 
           std::array<uint32_t, 4> indices = { 0, 1, 2, 3 };
@@ -1823,14 +1868,21 @@ namespace dxvk {
             shouldProject = false;
           }
 
-          if (unlikely(drefScale != DrefScale_None)) {
+          if (unlikely(shadowFilter || drefScaled)) {
             uint32_t component = 2;
             uint32_t reference = m_module.opCompositeExtract(m_floatType, texcoord, 1, &component);
 
-            uint32_t maxDref = m_module.constf32(GetDrefScaleFactor(drefScale));
-            reference        = m_module.opFMul(m_floatType, reference, maxDref);
-
+            if (drefScaled) {
+              uint32_t maxDref = m_module.constf32(GetDrefScaleFactor(drefScale));
+              reference        = m_module.opFMul(m_floatType, reference, maxDref);
+            }
+
             texture = m_module.opImageSampleDrefImplicitLod(m_floatType, imageVarId, texcoord, reference, imageOperands);
+
+            if (shadowFilter) {
+              texture = DoFixedFunctionShadowFilter(m_module, texture, imageVarId, texcoord, reference, imageOperands);
+            }
+
             texture = ScalarReplicate(texture);
           } else {
             if (shouldProject)

diff --git a/src/d3d9/d3d9_fixed_function.h b/src/d3d9/d3d9_fixed_function.h
@@ -58,6 +58,14 @@ namespace dxvk {
 
   void DoFixedFunctionAlphaTest(SpirvModule& spvModule, const D3D9AlphaTestContext& ctx);
 
+  uint32_t DoFixedFunctionShadowFilter(
+        SpirvModule&            module,
+        uint32_t                inSample,
+        uint32_t                sampledImage,
+        uint32_t                coordinates,
+        uint32_t                reference,
+  const SpirvImageOperands&     operands);
+
   // Returns a render state block
   uint32_t SetupRenderStateBlock(SpirvModule& spvModule, uint32_t count);
 
@@ -189,7 +197,8 @@ namespace dxvk {
 
         uint32_t     TextureBound : 1;
 
-        uint32_t      DrefScale : 2;
+        uint32_t     DrefScale      : 2;
+        uint32_t     ShadowFilter   : 1;
 
         // Included in here, read from Stage 0 for packing reasons
         // Affects all stages.

diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp
@@ -2913,6 +2913,12 @@ void DxsoCompiler::emitControlFlowGenericLoop(
         fetch4,
         imageOperands);
 
+      // Emulate hardware shadow filtering for 2D depth texture lookups.
+      if (depth && m_moduleInfo.options.shadowFilter && samplerType == SamplerTypeTexture2D) {
+        const uint32_t sampledImage = m_module.opLoad(sampler.typeId, sampler.varId);
+        result.id = DoFixedFunctionShadowFilter(m_module, result.id, sampledImage, texcoordVar.id, reference, imageOperands);
+      }
+
       // If we are sampling depth we've already specc'ed this!
       // This path is always size 4 because it only hits on color.
       if (isNull != 0) {

diff --git a/src/dxso/dxso_options.h b/src/dxso/dxso_options.h
@@ -47,8 +47,15 @@ namespace dxvk {
     /// Whether or not we can rely on robustness2 to handle oob constant access
     bool robustness2Supported;
 
-    /// Whether we should check SpecDrefScale at runtime to apply Dref scaling for depth buffers
+    /// Whether we should check SpecDrefScale at runtime to apply Dref scaling for depth
+    /// textures (D24S8 and D16). This allows compatability with games that expect a
+    /// different depth test range, which was typically a D3D8 quirk on early NVIDIA hardware.
     bool drefScaling = false;
+
+    /// Whether to perform 2x2 PCF when linearly sampling certain depth texture formats,
+    /// as done by early NVIDIA GPUs. The possibility of this behavior is also implied by
+    /// the spec for GL_ARB_shadow and various NVIDIA publications. 
+    bool shadowFilter = false;
   };
 
 }