AtlasEngine: Fix custom shader time imprecision (#17104)

Since floats are imprecise we need to constrain the time value into a range that can be accurately represented. Assuming a monitor refresh rate of 1000 Hz, we can still easily represent 1000 seconds accurately (roughly 16 minutes). So to solve this, we'll simply treat the shader time modulo 1000s. This may lead to some unexpected jank every 16min but it keeps any ongoing animation smooth otherwise. (cherry picked from commit daffb2d) Service-Card-Id: 92409332 Service-Version: 1.20
microsoft · Apr 23, 2024 · f442109 · f442109
1 parent 8c1316c
commit f442109
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 3 deletions.
diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp
@@ -82,6 +82,20 @@ struct std::hash<BackendD3D::AtlasFontFaceEntry>
     }
 };
 
+static u64 queryPerfFreq() noexcept
+{
+    LARGE_INTEGER li;
+    QueryPerformanceFrequency(&li);
+    return std::bit_cast<u64>(li.QuadPart);
+}
+
+static u64 queryPerfCount() noexcept
+{
+    LARGE_INTEGER li;
+    QueryPerformanceCounter(&li);
+    return std::bit_cast<u64>(li.QuadPart);
+}
+
 BackendD3D::BackendD3D(const RenderingPayload& p)
 {
     THROW_IF_FAILED(p.device->CreateVertexShader(&shader_vs[0], sizeof(shader_vs), nullptr, _vertexShader.addressof()));
@@ -501,7 +515,14 @@ void BackendD3D::_recreateCustomShader(const RenderingPayload& p)
             THROW_IF_FAILED(p.device->CreateSamplerState(&desc, _customShaderSamplerState.put()));
         }
 
-        _customShaderStartTime = std::chrono::steady_clock::now();
+        // Since floats are imprecise we need to constrain the time value into a range that can be accurately represented.
+        // Assuming a monitor refresh rate of 1000 Hz, we can still easily represent 1000 seconds accurately (roughly 16 minutes).
+        // 10000 seconds would already result in a 50% error. So to avoid this, we use queryPerfCount() modulo _customShaderPerfTickMod.
+        // The use of a power of 10 is intentional, because shaders are often periodic and this makes any decimal multiplier up to 3 fractional
+        // digits not break the periodicity. For instance, with a wraparound of 1000 seconds sin(1.234*x) is still perfectly periodic.
+        const auto freq = queryPerfFreq();
+        _customShaderPerfTickMod = freq * 1000;
+        _customShaderSecsPerPerfTick = 1.0f / freq;
     }
 }
 
@@ -2188,8 +2209,12 @@ void BackendD3D::_debugDumpRenderTarget(const RenderingPayload& p)
 void BackendD3D::_executeCustomShader(RenderingPayload& p)
 {
     {
+        // See the comment in _recreateCustomShader() which initializes the two members below and explains what they do.
+        const auto now = queryPerfCount();
+        const auto time = static_cast<int>(now % _customShaderPerfTickMod) * _customShaderSecsPerPerfTick;
+
         const CustomConstBuffer data{
-            .time = std::chrono::duration<f32>(std::chrono::steady_clock::now() - _customShaderStartTime).count(),
+            .time = time,
             .scale = static_cast<f32>(p.s->font->dpi) / static_cast<f32>(USER_DEFAULT_SCREEN_DPI),
             .resolution = {
                 static_cast<f32>(_viewportCellCount.x * p.s->font->cellSize.x),

diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h
@@ -250,7 +250,8 @@ namespace Microsoft::Console::Render::Atlas
         wil::com_ptr<ID3D11PixelShader> _customPixelShader;
         wil::com_ptr<ID3D11Buffer> _customShaderConstantBuffer;
         wil::com_ptr<ID3D11SamplerState> _customShaderSamplerState;
-        std::chrono::steady_clock::time_point _customShaderStartTime;
+        u64 _customShaderPerfTickMod = 0;
+        f32 _customShaderSecsPerPerfTick = 0;
 
         wil::com_ptr<ID3D11Texture2D> _backgroundBitmap;
         wil::com_ptr<ID3D11ShaderResourceView> _backgroundBitmapView;

diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h
@@ -146,6 +146,8 @@ namespace Microsoft::Console::Render::Atlas
     using i32x4 = vec4<i32>;
     using i32r = rect<i32>;
 
+    using u64 = uint64_t;
+
     using f32 = float;
     using f32x2 = vec2<f32>;
     using f32x4 = vec4<f32>;