From 1ebb3ce6b5e00c2ee6c5f9597858830aca306262 Mon Sep 17 00:00:00 2001 From: dzhdan Date: Fri, 14 Jun 2024 13:54:38 +0800 Subject: [PATCH] v4.8.2: HIGHLIGHTS: - SIGMA improvements DETAILS: - SIGMA: fixed 1 pixel wide blur on shadows with penumbra size < 1 pixel - SIGMA: better behavior for multi-layered shadows (a narrow penumbra inside a wide penumbra) - SIGMA: reduced potential flickering - SIGMA: fixed suboptimal output of the blur pass affecting TS pass - SIGMA: improved weights for moments calculations in TS pass - NRD: resolved some TODOs - updated deps - updated docs --- CMakeLists.txt | 5 +- External/MathLib | 2 +- Include/NRD.h | 4 +- Include/NRDSettings.h | 2 +- README.md | 17 ++- Resources/Version.h | 2 +- Shaders/Include/Common.hlsli | 6 +- Shaders/Include/NRD.hlsli | 108 +++++++++--------- Shaders/Include/REBLUR_Common.hlsli | 4 +- Shaders/Include/REBLUR_Config.hlsli | 3 +- Shaders/Include/REBLUR_HistoryFix.hlsli | 6 +- .../Include/REBLUR_TemporalAccumulation.hlsli | 2 +- .../REBLUR_TemporalStabilization.hlsli | 4 +- Shaders/Include/RELAX_Config.hlsli | 1 - .../Include/RELAX_TemporalAccumulation.hlsli | 6 +- Shaders/Include/SIGMA_Blur.hlsli | 72 +++++++----- Shaders/Include/SIGMA_Config.hlsli | 14 ++- .../Include/SIGMA_TemporalStabilization.hlsli | 92 +++++++-------- Shaders/Source/RELAX_Validation.cs.hlsl | 7 +- Source/Sigma.cpp | 1 + 20 files changed, 187 insertions(+), 171 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8553cd1..9c627c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -195,10 +195,7 @@ if (NOT NRD_DISABLE_SHADER_COMPILATION) if (NRD_EMBEDS_SPIRV_SHADERS) set (SHADERMAKE_COMMANDS ${SHADERMAKE_COMMANDS} COMMAND ShaderMake -p SPIRV --compiler "${DXC_SPIRV_PATH}" ${SHADERMAKE_GENERAL_ARGS} - --sRegShift 100 - --tRegShift 200 - --bRegShift 300 - --uRegShift 400 + --sRegShift 100 --tRegShift 200 --bRegShift 300 --uRegShift 400 ) endif () diff --git a/External/MathLib b/External/MathLib index 903f7ac..63c68ad 160000 --- a/External/MathLib +++ b/External/MathLib @@ -1 +1 @@ -Subproject commit 903f7ac918e63e3704de7a621deae6139575b887 +Subproject commit 63c68ad9811c069fde848922df5e5b5475750a1a diff --git a/Include/NRD.h b/Include/NRD.h index dffec3d..bc888b5 100644 --- a/Include/NRD.h +++ b/Include/NRD.h @@ -29,8 +29,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define NRD_VERSION_MAJOR 4 #define NRD_VERSION_MINOR 8 -#define NRD_VERSION_BUILD 1 -#define NRD_VERSION_DATE "16 May 2024" +#define NRD_VERSION_BUILD 2 +#define NRD_VERSION_DATE "14 June 2024" #if defined(_MSC_VER) #define NRD_CALL __fastcall diff --git a/Include/NRDSettings.h b/Include/NRDSettings.h index 83c11db..9fdd960 100644 --- a/Include/NRDSettings.h +++ b/Include/NRDSettings.h @@ -115,7 +115,7 @@ namespace nrd // (ms) - user provided if > 0, otherwise - tracked internally float timeDeltaBetweenFrames = 0.0f; - // (units) > 0 - use TLAS or tracing range (max value = NRD_FP16_MAX / NRD_FP16_VIEWZ_SCALE - 1 = 524031) + // (units) > 0 - use TLAS or tracing range float denoisingRange = 500000.0f; // (normalized %) - if relative distance difference is greater than threshold, history gets reset (0.5-2.5% works well) diff --git a/README.md b/README.md index e99058d..294703b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# NVIDIA REAL-TIME DENOISERS v4.8.1 (NRD) +# NVIDIA REAL-TIME DENOISERS v4.8.2 (NRD) [![Build NRD SDK](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml/badge.svg)](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml) @@ -168,15 +168,18 @@ IN_NORMAL_ROUGHNESS = GetNormalAndRoughnessAt( A ); IN_MV = GetMotionAt( A ); ``` -See `NRDDescs.h` for more details and descriptions of other inputs and outputs. +See `NRDDescs.h` and `NRD.hlsli` for more details and descriptions of other inputs and outputs. # NOISY INPUTS NRD sample is a good start to familiarize yourself with input requirements and best practices, but main requirements can be summarized to: +Radiance: - Since *NRD* denoisers accumulate signals for a limited number of frames, the input signal must converge *reasonably* well for this number of frames. `REFERENCE` denoiser can be used to estimate temporal signal quality - Since *NRD* denoisers process signals spatially, high-energy fireflies in the input signal should be avoided. Most of them can be removed by enabling anti-firefly filter in *NRD*, but it will only work if the "background" signal is confident. The worst case is having a single pixel with high energy divided by a very small PDF to represent the lack of energy in neighboring non-representative (black) pixels - Radiance must be separated into diffuse and specular at primary hit (or secondary hit in case of *PSR*) + +Hit distance: - `hitT` can't be negative - `hitT` must not include primary hit distance - `hitT` for the first bounce after the primary hit or *PSR* must be provided "as is" @@ -191,9 +194,15 @@ NRD sample is a good start to familiarize yourself with input requirements and b - `hitDistanceReconstructionMode` must be set to something other than `OFF`, but bear in mind that the search area is limited to 3x3 or 5x5. In other words, it's the application's responsibility to guarantee a valid sample in this area. It can be achieved by clamping probabilities and using Bayer-like dithering (see the sample for more details) - Pre-pass must be enabled (i.e. `diffusePrepassBlurRadius` and `specularPrepassBlurRadius` must be set to 20-70 pixels) to compensate entropy increase, since radiance in valid samples is divided by probability to compensate 0 values in some neighbors - Probabilistic sampling for 2nd+ bounces is absolutely acceptable -- in case of many paths per pixel `hitT` for specular must be "averaged" by `NRD_FrontEnd_SpecHitDistAveraging_*` functions from `NRD.hlsli` +- In case of many paths per pixel `hitT` for specular must be "averaged" by `NRD_FrontEnd_SpecHitDistAveraging_*` functions from `NRD.hlsli` +- For *REBLUR* hits distance must be normalized using `REBLUR_FrontEnd_GetNormHitDist` + +Distance to occluder: +- `NoL <= 0` - 0 (it's very important!) +- `NoL > 0, hit` - hit distance +- `NoL > 0, miss` - >= NRD_FP16_MAX -See `NRDDescs.h` for more details and descriptions of other inputs and outputs. +See `NRDDescs.h` and `NRD.hlsli` for more details and descriptions of other inputs and outputs. # IMPROVING OUTPUT QUALITY diff --git a/Resources/Version.h b/Resources/Version.h index 38c0b3e..60206da 100644 --- a/Resources/Version.h +++ b/Resources/Version.h @@ -23,6 +23,6 @@ Versioning rules: #define VERSION_MAJOR 4 #define VERSION_MINOR 8 -#define VERSION_BUILD 1 +#define VERSION_BUILD 2 #define VERSION_STRING STR(VERSION_MAJOR.VERSION_MINOR.VERSION_BUILD encoding=NRD_NORMAL_ENCODING.NRD_ROUGHNESS_ENCODING) diff --git a/Shaders/Include/Common.hlsli b/Shaders/Include/Common.hlsli index 6c1063e..0093c2e 100644 --- a/Shaders/Include/Common.hlsli +++ b/Shaders/Include/Common.hlsli @@ -12,9 +12,9 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. // Constants -#define NRD_NONE 0 -#define NRD_FRAME 1 -#define NRD_PIXEL 2 +#define NRD_NONE 0 // bad +#define NRD_FRAME 1 // good +#define NRD_PIXEL 2 // better, but leads to divergence #define NRD_RANDOM 3 // for experiments only // FP16 diff --git a/Shaders/Include/NRD.hlsli b/Shaders/Include/NRD.hlsli index 0140b9a..3624c32 100644 --- a/Shaders/Include/NRD.hlsli +++ b/Shaders/Include/NRD.hlsli @@ -16,54 +16,56 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. // INPUT PARAMETERS //================================================================================================================================= /* -float3 radiance: - - radiance should not include material information ( use material de-modulation to decouple materials ) - - radiance should not be premultiplied by "exposure" - - for Primary Surface Replacements ( PSR ) throughput should be de-modulated as much as possible ( see test 184 from the sample and TraceOpaque.hlsl ) - - for diffuse rays - - use COS-distribution ( or custom importance sampling ) - - if radiance is the result of path tracing, pass normalized hit distance as the sum of 1-all hits (always ignore primary hit!) - - for specular - - use VNDF sampling ( or custom importance sampling ) - - most advanced v3 version: https://gpuopen.com/download/publications/Bounded_VNDF_Sampling_for_Smith-GGX_Reflections.pdf - - if radiance is the result of path tracing, pass hit distance for the 1st bounce for the first time (always ignore primary hit!) - -float hitDist: - - can't be negative - - must not include primary hit distance - - for the first bounce after the primary hit or PSR must be provided "as is" - - for susequent bounces must be adjusted by curvature and lobe energy dissipation on the application side - - must be explicitly set to 0 for rays pointing inside the surface ( better to nopt cast such rays ) - -float normHitDist: - - normalized hit distance, gotten by using "REBLUR_FrontEnd_GetNormHitDist" - - REBLUR must be aware of the normalization function via "nrd::HitDistanceParameters" - - by definition, normalized hit distance is AO ( ambient occlusion ) for diffuse and SO ( specular occlusion ) for specular - - AO can be used to emulate 2nd+ diffuse bounces - - SO can be used to adjust IBL lighting - - ".w" channel of diffuse / specular output is AO / SO - - if you don't know which normalization function to choose use default values of "nrd::HitDistanceParameters" - -float roughness: - - "linear roughness" = sqrt( "m" ), where "m" = "alpha" - GGX roughness - - usage: "isDiffuse ? 1.0 : roughness" - -float normal: - - world-space normal - -float viewZ: - - linear view space Z for primary rays ( linearized camera depth ) - -float distanceToOccluder: - - distance to occluder, must follow the rules: - - NoL <= 0 - 0 ( it's very important ) - - NoL > 0 ( hit ) - hit distance - - NoL > 0 ( miss ) - >= NRD_FP16_MAX - -float tanOfLightAngularRadius: - - tan( lightAngularSize * 0.5 ) - - angular size is computed from the shadow receiving point - - in other words, tanOfLightAngularRadius = lightRadius / distanceToLight +NON-NOISY INPUTS: + float viewZ: + - linear view space Z for primary rays ( linearized camera depth ) + + float normal: + - world-space normal + + float roughness: + - "linear roughness" = sqrt( "m" ), where "m" = "alpha" - GGX roughness + - usage: "isDiffuse ? 1.0 : roughness" + + float tanOfLightAngularRadius: + - tan( lightAngularSize * 0.5 ) + - angular size is computed from the shadow receiving point + - in other words, tanOfLightAngularRadius = lightRadius / distanceToLight + +NOISY INPUTS: + float3 radiance: + - radiance should not include material information ( use material de-modulation to decouple materials ) + - radiance should not be premultiplied by "exposure" + - for Primary Surface Replacements ( PSR ) throughput should be de-modulated as much as possible ( see test 184 from the sample and TraceOpaque.hlsl ) + - for diffuse rays + - use COS-distribution ( or custom importance sampling ) + - if radiance is the result of path tracing, pass normalized hit distance as the sum of 1-all hits (always ignore primary hit!) + - for specular + - use VNDF sampling ( or custom importance sampling ) + - most advanced v3 version: https://gpuopen.com/download/publications/Bounded_VNDF_Sampling_for_Smith-GGX_Reflections.pdf + - if radiance is the result of path tracing, pass hit distance for the 1st bounce for the first time (always ignore primary hit!) + + float hitDist: + - can't be negative + - must not include primary hit distance + - for the first bounce after the primary hit or PSR must be provided "as is" + - for susequent bounces must be adjusted by curvature and lobe energy dissipation on the application side + - must be explicitly set to 0 for rays pointing inside the surface ( better to nopt cast such rays ) + + float normHitDist: + - logically same as "hitDist", but normalized to [0; 1] range using "REBLUR_FrontEnd_GetNormHitDist" + - REBLUR must be aware of the normalization function via "nrd::HitDistanceParameters" + - by definition, normalized hit distance is AO ( ambient occlusion ) for diffuse and SO ( specular occlusion ) for specular + - AO can be used to emulate 2nd+ diffuse bounces + - SO can be used to adjust IBL lighting + - ".w" channel of diffuse / specular output is AO / SO + - if you don't know which normalization function to choose use default values of "nrd::HitDistanceParameters" + + float distanceToOccluder: + - distance to occluder, must follow the rules: + - NoL <= 0 - 0 ( it's very important ) + - NoL > 0 ( hit ) - hit distance + - NoL > 0 ( miss ) - >= NRD_FP16_MAX */ #ifndef NRD_INCLUDED @@ -275,7 +277,6 @@ float tanOfLightAngularRadius: #define NRD_ROUGHNESS_ENCODING_SQRT_LINEAR 2 // sqrt( linearRoughness ) #define NRD_FP16_MAX 65504.0 -#define NRD_FP16_VIEWZ_SCALE 0.125 // TODO: tuned for meters, needs to be scaled down for cm and mm #define NRD_PI 3.14159265358979323846 #define NRD_EPS 1e-6 #define NRD_REJITTER_VIEWZ_THRESHOLD 0.01 // normalized % @@ -627,11 +628,12 @@ void NRD_FrontEnd_SpecHitDistAveraging_End( inout float accumulatedSpecHitDist ) //================================================================================================================================= // This function returns AO / SO which REBLUR can decode back to "hit distance" internally -float REBLUR_FrontEnd_GetNormHitDist( float hitDist, float viewZ, float4 hitDistParams, float roughness ) +float REBLUR_FrontEnd_GetNormHitDist( float hitDist, float viewZ, float4 hitDistParams, float roughness, float trimmingThreshold = 0.0 ) { - // TODO: Sampling can produce rays pointing inside surface, i.e. "hitDist = 0". But due to ray offsetting - // actual "hitDist" can be a very small value in this case. Since NRD handles "hitDist = 0" case, should be - // small "hitDist" values trimmed to 0? + // Sampling can produce rays pointing inside surface, i.e. "hitDist = 0". But due to ray offsetting actual "hitDist" can be a + // very small value in this case. Since NRD has been designed to handle "hitDist = 0" case, accidentally small "hitDist" values + // better trim to 0 + hitDist = hitDist < trimmingThreshold ? 0.0 : hitDist; float f = _REBLUR_GetHitDistanceNormalization( viewZ, hitDistParams, roughness ); diff --git a/Shaders/Include/REBLUR_Common.hlsli b/Shaders/Include/REBLUR_Common.hlsli index 0864f69..b2ad6d5 100644 --- a/Shaders/Include/REBLUR_Common.hlsli +++ b/Shaders/Include/REBLUR_Common.hlsli @@ -22,8 +22,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. // Internal data ( from the previous frame ) -#define REBLUR_PackViewZ( p ) min( p * NRD_FP16_VIEWZ_SCALE, NRD_FP16_MAX ) -#define REBLUR_UnpackViewZ( p ) ( p / NRD_FP16_VIEWZ_SCALE ) +#define REBLUR_PackViewZ( p ) min( p * REBLUR_FP16_VIEWZ_SCALE, NRD_FP16_MAX ) +#define REBLUR_UnpackViewZ( p ) ( p / REBLUR_FP16_VIEWZ_SCALE ) float4 PackNormalRoughness( float4 p ) { diff --git a/Shaders/Include/REBLUR_Config.hlsli b/Shaders/Include/REBLUR_Config.hlsli index 351c260..391ba92 100644 --- a/Shaders/Include/REBLUR_Config.hlsli +++ b/Shaders/Include/REBLUR_Config.hlsli @@ -54,7 +54,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define REBLUR_POISSON_SAMPLE_NUM 8 #define REBLUR_POISSON_SAMPLES( i ) g_Special8[ i ] -#define REBLUR_PRE_BLUR_ROTATOR_MODE NRD_FRAME // TODO: others are expensive, but work better +#define REBLUR_PRE_BLUR_ROTATOR_MODE NRD_FRAME #define REBLUR_PRE_BLUR_FRACTION_SCALE 2.0 #define REBLUR_PRE_BLUR_NON_LINEAR_ACCUM_SPEED ( 1.0 / ( 1.0 + 10.0 ) ) @@ -67,6 +67,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define REBLUR_HIT_DIST_MIN_WEIGHT( smc ) ( 0.1 * smc ) // was 0.1 +#define REBLUR_FP16_VIEWZ_SCALE ( gViewZScale * 0.125) // TODO: tuned for meters, i.e. gViewZScale = 1.0 #define REBLUR_MAX_PERCENT_OF_LOBE_VOLUME 0.75 #define REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM 1 #define REBLUR_COLOR_CLAMPING_SIGMA_SCALE 2.0 // using smaller values leads to bias if camera rotates slowly due to reprojection instabilities diff --git a/Shaders/Include/REBLUR_HistoryFix.hlsli b/Shaders/Include/REBLUR_HistoryFix.hlsli index 7f0518d..8e78efb 100644 --- a/Shaders/Include/REBLUR_HistoryFix.hlsli +++ b/Shaders/Include/REBLUR_HistoryFix.hlsli @@ -293,16 +293,12 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // Stride between taps float smc = GetSpecMagicCurve( roughness ); float specStride = stride.y * float( frameNum.y < gHistoryFixFrameNum ); - specStride *= lerp( 0.5, 1.0, smc ); // TODO: seems to work better than "minBlurRadius" + specStride *= lerp( 0.5, 1.0, smc ); // hand tuned specStride = floor( specStride ); // History reconstruction if( specStride != 0 ) { - // TODO: introduce IN_SECONDARY_ROUGHNESS: - // - to allow blur on diffuse-like surfaces in reflection - // - use "hitDistanceWeight" only for very low primary roughness to avoid color bleeding from one surface to another - int specStridei = int( specStride + 0.5 ); // Parameters diff --git a/Shaders/Include/REBLUR_TemporalAccumulation.hlsli b/Shaders/Include/REBLUR_TemporalAccumulation.hlsli index 270d527..3ace181 100644 --- a/Shaders/Include/REBLUR_TemporalAccumulation.hlsli +++ b/Shaders/Include/REBLUR_TemporalAccumulation.hlsli @@ -76,7 +76,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float4 normalAndRoughness = s_Normal_Roughness[ pos.y ][ pos.x ]; // Average normal - if( i < 2 && j < 2 ) // TODO: is backward 2x2 OK? + if( i < 2 && j < 2 ) Navg += normalAndRoughness.xyz; #ifdef REBLUR_SPECULAR diff --git a/Shaders/Include/REBLUR_TemporalStabilization.hlsli b/Shaders/Include/REBLUR_TemporalStabilization.hlsli index fc5063e..b43d294 100644 --- a/Shaders/Include/REBLUR_TemporalStabilization.hlsli +++ b/Shaders/Include/REBLUR_TemporalStabilization.hlsli @@ -8,8 +8,6 @@ distribution of this software and related documentation without an express license agreement from NVIDIA CORPORATION is strictly prohibited. */ -// TODO: add REBLUR_OCCLUSION support to TemporalStabilization? - groupshared float4 s_Diff[ BUFFER_Y ][ BUFFER_X ]; groupshared float4 s_Spec[ BUFFER_Y ][ BUFFER_X ]; @@ -321,7 +319,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float f = STL::Math::SmoothStep( gSpecProbabilityThresholdsForMvModification.x, gSpecProbabilityThresholdsForMvModification.y, specProb ); if( STL::Rng::Hash::GetFloat( ) < f ) { - float3 specMv = Xvirtual - X; // TODO: world-space delta fits badly into FP16 + float3 specMv = Xvirtual - X; // world-space delta fits badly into FP16! Prefer 2.5D motion! if( gMvScale.w == 0.0 ) { specMv.xy = vmbPixelUv - pixelUv; diff --git a/Shaders/Include/RELAX_Config.hlsli b/Shaders/Include/RELAX_Config.hlsli index d7ec060..1e0019e 100644 --- a/Shaders/Include/RELAX_Config.hlsli +++ b/Shaders/Include/RELAX_Config.hlsli @@ -12,7 +12,6 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. // Settings #define RELAX_MAX_ACCUM_FRAME_NUM 255 -#define RELAX_SPEC_DOMINANT_DIRECTION STL_SPECULAR_DOMINANT_DIRECTION_G2 // TODO: move to Common, change REBLUR too #define RELAX_HIT_DIST_MIN_WEIGHT 0.2 // Sacrifices spatial fidelity to improve temporal stability. Should be set to 0 for relatively clean input signals like RTXDI and 0.1 .. 0.2 for lower quality input signals #define RELAX_ANTILAG_ACCELERATION_AMOUNT_SCALE 10.0 // Multiplier used to put RelaxAntilagSettings::accelerationAmount to convenient [0; 1] range diff --git a/Shaders/Include/RELAX_TemporalAccumulation.hlsli b/Shaders/Include/RELAX_TemporalAccumulation.hlsli index bdaf2a5..07b7860 100644 --- a/Shaders/Include/RELAX_TemporalAccumulation.hlsli +++ b/Shaders/Include/RELAX_TemporalAccumulation.hlsli @@ -725,10 +725,6 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId, uint2 threadPo // Thin lens equation for adjusting reflection HitT float hitDistFocused = ApplyThinLensEquation(hitDist, curvature); - [flatten] - if (abs(hitDistFocused) < 0.001) // TODO: why? - hitDistFocused = 0.001; - // Loading specular data based on virtual motion float4 prevSpecularIlluminationAnd2ndMomentVMB; float4 prevSpecularIlluminationAnd2ndMomentVMBResponsive; @@ -770,7 +766,7 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId, uint2 threadPo ); // Amount of virtual motion - dominant factor - float4 D = STL::ImportanceSampling::GetSpecularDominantDirection(currentNormal, V, currentRoughnessModified, RELAX_SPEC_DOMINANT_DIRECTION); + float4 D = STL::ImportanceSampling::GetSpecularDominantDirection(currentNormal, V, currentRoughnessModified, STL_SPECULAR_DOMINANT_DIRECTION_G2); float virtualHistoryAmount = VMBReprojectionFound * D.w; // Decreasing virtual history amount for ortho case diff --git a/Shaders/Include/SIGMA_Blur.hlsli b/Shaders/Include/SIGMA_Blur.hlsli index 86592b2..90cb22d 100644 --- a/Shaders/Include/SIGMA_Blur.hlsli +++ b/Shaders/Include/SIGMA_Blur.hlsli @@ -50,7 +50,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : int2 smemPos = threadPos + BORDER; float2 centerData = s_Penumbra_ViewZ[ smemPos.y ][ smemPos.x ]; float centerPenumbra = centerData.x; - float centerSignNoL = float( centerData.x != 0.0 ); + float centerSignNoL = float( centerPenumbra != 0.0 ); float viewZ = centerData.y; // Early out @@ -72,7 +72,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : if( ( tileValue == 0.0 && NRD_USE_TILE_CHECK ) || centerPenumbra == 0.0 ) { - gOut_Penumbra[ pixelPos ] = 0; + gOut_Penumbra[ pixelPos ] = centerPenumbra; gOut_Shadow_Translucency[ pixelPos ] = PackShadow( s_Shadow_Translucency[ smemPos.y ][ smemPos.x ] ); return; @@ -87,13 +87,15 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float3 Nv = STL::Geometry::RotateVector( gWorldToView, N ); // Parameters - float frustumSize = PixelRadiusToWorld( gUnproject, gOrthoMode, min( gRectSize.x, gRectSize.y ), viewZ ); // TODO: use GetFrustumSize + float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ ); + float frustumSize = GetFrustumSize( gMinRectDimMulUnproject, gOrthoMode, viewZ ); float2 geometryWeightParams = GetGeometryWeightParams( gPlaneDistSensitivity, frustumSize, Xv, Nv, 1.0 ); - // Estimate average distance to occluder + // Estimate penumbra size and filter shadow ( pass 1: dense 3x3 or 5x5 ) float2 sum = 0; float penumbra = 0; SIGMA_TYPE result = 0; + SIGMA_TYPE centerTap; [unroll] for( j = 0; j <= BORDER * 2; j++ ) @@ -104,12 +106,19 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : int2 pos = threadPos + int2( i, j ); float2 data = s_Penumbra_ViewZ[ pos.y ][ pos.x ]; - float p = data.x; - float signNoL = float( p != 0.0 ); + float penum = data.x; float z = data.y; + float signNoL = float( penum != 0.0 ); + + SIGMA_TYPE s = s_Shadow_Translucency[ pos.y ][ pos.x ]; - float w = 1.0; - if( !( i == BORDER && j == BORDER ) ) + float w; + if( i == BORDER && j == BORDER ) + { + centerTap = s; + w = 1.0; + } + else { float2 uv = pixelUv + float2( i - BORDER, j - BORDER ) * gRectSizeInv; float3 Xvs = STL::Geometry::ReconstructViewPosition( uv, gFrustum, z, gOrthoMode ); @@ -119,25 +128,32 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : w *= GetGaussianWeight( length( float2( i - BORDER, j - BORDER ) / BORDER ) ); w *= float( z < gDenoisingRange ); w *= float( centerSignNoL == signNoL ); - } - SIGMA_TYPE s = s_Shadow_Translucency[ pos.y ][ pos.x ]; - s = Denanify( w, s ); + s = Denanify( w, s ); + } float2 ww = w; - ww.y *= !IsLit( p ); - ww.y *= 1.0 / ( 1.0 + p * SIGMA_PENUMBRA_WEIGHT_SCALE ); // prefer smaller penumbra + ww.y *= !IsLit( penum ); + + float penumInPixels = penum / unprojectZ; + ww.y /= 1.0 + penumInPixels; // prefer smaller penumbra result += s * ww.x; - penumbra += p * ww.y; + penumbra += penum * ww.y; sum += ww; } } - result /= sum.x; // TODO: lerp to center if blur radius < BORDER + result /= sum.x; + sum.x = 1.0; + penumbra /= max( sum.y, NRD_EPS ); // yes, without patching + sum.y = float( sum.y != 0.0 ); - float invHitDist = 1.0 / max( penumbra, NRD_EPS ); + // Avoid 1-pixel wide blur if penumbra size < 1 pixel + float penumbraInPixels = penumbra / unprojectZ; + float f = STL::Math::LinearStep( 0.75, 1.25, penumbraInPixels ); + result = lerp( centerTap, result, f ); // Tangent basis with anisotropy float3x3 mWorldToLocal = STL::Geometry::GetBasis( Nv ); @@ -158,7 +174,6 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : } // Blur radius - float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ ); float worldRadius = GetKernelRadiusInPixels( penumbra, unprojectZ, tileValue ) * unprojectZ; Tv *= worldRadius; @@ -167,9 +182,8 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // Random rotation float4 rotator = GetBlurKernelRotation( SIGMA_ROTATOR_MODE, pixelPos, gRotator, gFrameIndex ); - // Denoising - sum.x = 1.0; - sum.y = float( sum.y != 0.0 ); + // Estimate penumbra size and filter shadow ( pass 2: sparse 8-taps ) + float invEstimatedPenumbra = 1.0 / max( penumbra, NRD_EPS ); [unroll] for( uint n = 0; n < SIGMA_POISSON_SAMPLE_NUM; n++ ) @@ -185,9 +199,9 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float2 uvScaled = ClampUvToViewport( uv ); // Fetch data - float p = gIn_Penumbra.SampleLevel( gNearestClamp, uvScaled, 0 ); - float signNoL = float( p != 0.0 ); + float penum = gIn_Penumbra.SampleLevel( gNearestClamp, uvScaled, 0 ); float z = UnpackViewZ( gIn_ViewZ.SampleLevel( gNearestClamp, WithRectOffset( uvScaled ), 0 ) ); + float signNoL = float( penum != 0.0 ); // Sample weight float3 Xvs = STL::Geometry::ReconstructViewPosition( uv, gFrustum, z, gOrthoMode ); @@ -200,15 +214,15 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : w *= float( centerSignNoL == signNoL ); // Avoid umbra leaking inside wide penumbra - float t = saturate( p * invHitDist ); - w *= STL::Math::LinearStep( 0.0, 0.1, t ); + float t = saturate( penum * invEstimatedPenumbra ); + w *= STL::Math::SmoothStep( 0.0, 1.0, t ); // TODO: it works surprisingly well, keep an eye on it! // Fetch shadow SIGMA_TYPE s; #if( !defined SIGMA_FIRST_PASS || defined SIGMA_TRANSLUCENT ) s = gIn_Shadow_Translucency.SampleLevel( gNearestClamp, uvScaled, 0 ); #else - s = IsLit( p ); + s = IsLit( penum ); #endif s = Denanify( w, s ); @@ -218,11 +232,13 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // Accumulate float2 ww = w; - ww.y *= !IsLit( p ); - ww.y *= 1.0 / ( 1.0 + p * SIGMA_PENUMBRA_WEIGHT_SCALE ); // prefer smaller penumbra + ww.y *= !IsLit( penum ); + + float penumInPixels = penum / unprojectZ; + ww.y /= 1.0 + penumInPixels; // prefer smaller penumbra result += s * ww.x; - penumbra += p * ww.y; + penumbra += penum * ww.y; sum += ww; } diff --git a/Shaders/Include/SIGMA_Config.hlsli b/Shaders/Include/SIGMA_Config.hlsli index 8b61452..f542398 100644 --- a/Shaders/Include/SIGMA_Config.hlsli +++ b/Shaders/Include/SIGMA_Config.hlsli @@ -16,17 +16,22 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define SIGMA_5X5_BLUR_RADIUS_ESTIMATION_KERNEL 1 // helps to improve stability, but adds 10% of overhead // Switches ( default 0 ) -#define SIGMA_SHOW_TILES 0 +#define SIGMA_SHOW 0 // 1 - tiles, 2 - history weight #define SIGMA_SHOW_PENUMBRA_SIZE 0 // Settings #define SIGMA_ROTATOR_MODE NRD_FRAME #define SIGMA_POISSON_SAMPLE_NUM 8 #define SIGMA_POISSON_SAMPLES g_Special8 -#define SIGMA_MAX_PIXEL_RADIUS 16.0 // TODO: at least 32 needed for test 200 -#define SIGMA_PENUMBRA_WEIGHT_SCALE 10.0 -#define SIGMA_MAX_SIGMA_SCALE 3.0 +#define SIGMA_MAX_PIXEL_RADIUS 32.0 +#define SIGMA_TS_SIGMA_SCALE 3.0 +#define SIGMA_TS_MAX_HISTORY_WEIGHT 0.95 +#define SIGMA_TS_Z_FALLOFF 1.0 // exp2( -SIGMA_TS_Z_FALLOFF * dz ) #define SIGMA_TS_MOTION_MAX_REUSE 0.11 +#define SIGMA_TS_EARLY_OUT_THRESHOLD 0.25 +#define SIGMA_ANTILAG_SIGMA_SCALE 0.25 +#define SIGMA_ANTILAG_POWER 1.0 +#define SIGMA_ANTILAG_EPS 0.05 // Data type #ifdef SIGMA_TRANSLUCENT @@ -62,4 +67,5 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. NRD_CONSTANT( float, gDebug ) \ NRD_CONSTANT( float, gSplitScreen ) \ NRD_CONSTANT( float, gViewZScale ) \ + NRD_CONSTANT( float, gMinRectDimMulUnproject ) \ NRD_CONSTANT( uint, gFrameIndex ) diff --git a/Shaders/Include/SIGMA_TemporalStabilization.hlsli b/Shaders/Include/SIGMA_TemporalStabilization.hlsli index e7db0b8..598795d 100644 --- a/Shaders/Include/SIGMA_TemporalStabilization.hlsli +++ b/Shaders/Include/SIGMA_TemporalStabilization.hlsli @@ -50,7 +50,10 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : return; // Early out - if( centerPenumbra == 0.0 && SIGMA_SHOW_TILES == 0 ) + float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ ); + float penumbraInPixels = centerPenumbra / unprojectZ; + + if( penumbraInPixels <= SIGMA_TS_EARLY_OUT_THRESHOLD && SIGMA_SHOW == 0 ) { gOut_Shadow_Translucency[ pixelPos ] = PackShadow( s_Shadow_Translucency[ smemPos.y ][ smemPos.x ] ); @@ -76,16 +79,18 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float2 data = s_Penumbra_ViewZ[ pos.y ][ pos.x ]; SIGMA_TYPE s = s_Shadow_Translucency[ pos.y ][ pos.x ]; - float signNoL = float( data.x != 0.0 ); + float penum = data.x; float z = data.y; + float signNoL = float( penum != 0.0 ); float w = 1.0; if( i == BORDER && j == BORDER ) input = s; else { - w = GetBilateralWeight( z, viewZ ); - w *= saturate( 1.0 - abs( centerSignNoL - signNoL ) ); + w = exp2( -SIGMA_TS_Z_FALLOFF * abs( z - viewZ ) ); + w *= float( z < gDenoisingRange ); + w *= float( centerSignNoL == signNoL ); if( z < viewZnearest ) { @@ -123,49 +128,31 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : history = max( history, 0.0 ); history = SIGMA_BackEnd_UnpackShadow( history ); - // Clamp history - float2 a = m1.xx; - float2 b = history.xx; - - #ifdef SIGMA_TRANSLUCENT - a.y = STL::Color::Luminance( m1.yzw ); - b.y = STL::Color::Luminance( history.yzw ); - #endif + // Antilag + float fast = m1.x; + float slow = history.x; - float2 ratio = abs( a - b ) / ( min( a, b ) + 0.05 ); - float2 ratioNorm = ratio / ( 1.0 + ratio ); - float2 scale = lerp( SIGMA_MAX_SIGMA_SCALE, 1.0, STL::Math::Sqrt01( ratioNorm ) ); + float a = abs( slow - fast ) - SIGMA_ANTILAG_SIGMA_SCALE * sigma.x - SIGMA_ANTILAG_EPS; + float b = max( slow, fast ) + SIGMA_ANTILAG_SIGMA_SCALE * sigma.x + SIGMA_ANTILAG_EPS; + float antilag = a / b; - #ifdef SIGMA_TRANSLUCENT - sigma *= scale.xyyy; - #else - sigma *= scale.x; - #endif + antilag = STL::Math::SmoothStep01( 1.0 - antilag ); + antilag = STL::Math::Pow01( antilag, SIGMA_ANTILAG_POWER ); - SIGMA_TYPE inputMin = m1 - sigma; - SIGMA_TYPE inputMax = m1 + sigma; + // Clamp history + SIGMA_TYPE inputMin = m1 - sigma * SIGMA_TS_SIGMA_SCALE; + SIGMA_TYPE inputMax = m1 + sigma * SIGMA_TS_SIGMA_SCALE; SIGMA_TYPE historyClamped = clamp( history, inputMin, inputMax ); // History weight - float isInScreen = IsInScreenNearest( pixelUvPrev ); - float motionLength = length( pixelUvPrev - pixelUv ); - float2 historyWeight = 0.93 * lerp( 1.0, 0.7, ratioNorm ); - historyWeight = lerp( historyWeight, 0.1, saturate( motionLength / SIGMA_TS_MOTION_MAX_REUSE ) ); - historyWeight *= isInScreen; + float historyWeight = SIGMA_TS_MAX_HISTORY_WEIGHT; + historyWeight *= IsInScreenNearest( pixelUvPrev ); + historyWeight *= antilag; + historyWeight *= STL::Math::SmoothStep( SIGMA_TS_EARLY_OUT_THRESHOLD, 1.0, penumbraInPixels ); historyWeight *= gStabilizationStrength; - // Reduce history in regions with hard shadows - float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ ); - float pixelRadius = GetKernelRadiusInPixels( centerPenumbra, unprojectZ ); - historyWeight *= STL::Math::LinearStep( 0.0, 0.5, pixelRadius ); - // Combine with current frame - SIGMA_TYPE result; - result.x = lerp( input.x, historyClamped.x, historyWeight.x ); - - #ifdef SIGMA_TRANSLUCENT - result.yzw = lerp( input.yzw, historyClamped.yzw, historyWeight.y ); - #endif + SIGMA_TYPE result = lerp( input, historyClamped, historyWeight ); // Reference #if( SIGMA_REFERENCE == 1 ) @@ -173,18 +160,25 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : #endif // Debug - #if( SIGMA_SHOW_TILES == 1 ) - float tileValue = gIn_Tiles[ pixelPos >> 4 ].x; - tileValue = float( tileValue != 0.0 ); // optional, just to show fully discarded tiles - - #ifdef SIGMA_TRANSLUCENT - result = lerp( float4( 0, 0, 1, 0 ), result, tileValue ); - #else - result = tileValue; + #if( SIGMA_SHOW != 0 ) + #if( SIGMA_SHOW == 1 ) + float tileValue = gIn_Tiles[ pixelPos >> 4 ].x; + tileValue = float( tileValue != 0.0 ); // optional, just to show fully discarded tiles + + #ifdef SIGMA_TRANSLUCENT + result = lerp( float4( 0, 0, 1, 0 ), result, tileValue ); + #else + result = tileValue; + #endif + + // Show grid ( works badly with TAA ) + result *= all( ( pixelPos & 15 ) != 0 ); + #elif( SIGMA_SHOW == 2 ) + // .x - is used in antilag computations! + #ifdef SIGMA_TRANSLUCENT + result.yzw = SIGMA_BackEnd_UnpackShadow( historyWeight ); + #endif #endif - - // Show grid (works badly with TAA) - result *= all( ( pixelPos & 15 ) != 0 ); #endif // Output diff --git a/Shaders/Source/RELAX_Validation.cs.hlsl b/Shaders/Source/RELAX_Validation.cs.hlsl index f012e59..bc46853 100644 --- a/Shaders/Source/RELAX_Validation.cs.hlsl +++ b/Shaders/Source/RELAX_Validation.cs.hlsl @@ -45,10 +45,11 @@ NRD_EXPORT void NRD_CS_MAIN( uint2 pixelPos : SV_DispatchThreadId ) float2 viewportUvScaled = viewportUv * gResolutionScale; + float4 normalAndRoughness = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness.SampleLevel( gNearestClamp, WithRectOffset( viewportUvScaled ), 0 ) ); + float viewZ = UnpackViewZ( gIn_ViewZ.SampleLevel( gNearestClamp, WithRectOffset( viewportUvScaled ), 0 ) ); + float3 mv = gIn_Mv.SampleLevel( gNearestClamp, WithRectOffset( viewportUvScaled ), 0 ) * gMvScale.xyz; + float historyLength = 255.0 * gIn_HistoryLength.SampleLevel( gNearestClamp, viewportUvScaled, 0 ) - 1.0; - float4 normalAndRoughness = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness.SampleLevel( gNearestClamp, viewportUvScaled, 0 ) ); - float viewZ = gIn_ViewZ.SampleLevel( gNearestClamp, viewportUvScaled, 0 ); - float3 mv = gIn_Mv.SampleLevel( gNearestClamp, viewportUvScaled, 0 ) * gMvScale.xyz; float3 N = normalAndRoughness.xyz; float roughness = normalAndRoughness.w; diff --git a/Source/Sigma.cpp b/Source/Sigma.cpp index 9f073f0..5e6810b 100644 --- a/Source/Sigma.cpp +++ b/Source/Sigma.cpp @@ -123,6 +123,7 @@ void nrd::InstanceImpl::AddSharedConstants_Sigma(const SigmaSettings& settings, consts->gDebug = m_CommonSettings.debug; consts->gSplitScreen = m_CommonSettings.splitScreen; consts->gViewZScale = m_CommonSettings.viewZScale; + consts->gMinRectDimMulUnproject = (float)Min(rectW, rectH) * unproject; consts->gFrameIndex = m_CommonSettings.frameIndex; }