From d6a4f9022f4658ac72f9a2ffc14298c7bc0ab80a Mon Sep 17 00:00:00 2001 From: Pentalimbed Date: Sun, 21 Apr 2024 23:44:32 +0100 Subject: [PATCH] fix: add ssgi dynamic resolution support --- .../Shaders/ScreenSpaceGI/blur.cs.hlsl | 19 ++++++----- .../Shaders/ScreenSpaceGI/common.hlsli | 20 +++++++---- .../Shaders/ScreenSpaceGI/gi.cs.hlsl | 34 +++++++++++-------- .../ScreenSpaceGI/prefilterDepths.cs.hlsl | 8 +++-- .../ScreenSpaceGI/radianceDisocc.cs.hlsl | 23 +++++++------ .../Shaders/ScreenSpaceGI/upsample.cs.hlsl | 2 +- src/Features/ScreenSpaceGI.cpp | 21 ++++++------ src/Features/ScreenSpaceGI.h | 9 +++-- 8 files changed, 79 insertions(+), 57 deletions(-) diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl index 40613e279..a7e1c4eb8 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl @@ -26,13 +26,16 @@ static const float3 g_Poisson8[8] = { }; [numthreads(8, 8, 1)] void main(const uint2 dtid : SV_DispatchThreadID) { + const float srcScale = SrcFrameDim * RcpTexDim; + const float outScale = OutFrameDim * RcpTexDim; + float radius = BlurRadius; #ifdef TEMPORAL_DENOISER radius /= (srcAccumFrames[dtid] * 255); #endif const uint numSamples = 8; - const float2 uv = (dtid + .5) * RcpFrameDim; + const float2 uv = (dtid + .5) * RcpOutFrameDim; uint eyeIndex = GET_EYE_IDX(uv); const float2 screenPos = ConvertToStereoUV(uv, eyeIndex); @@ -46,24 +49,24 @@ static const float3 g_Poisson8[8] = { float w = g_Poisson8[i].z; float2 pxOffset = radius * g_Poisson8[i].xy; - float2 uvOffset = pxOffset * RcpFrameDim; - float2 uvSample = uv + uvOffset; + float2 pxSample = dtid + .5 + pxOffset; + float2 uvSample = pxSample * RcpOutFrameDim; + float2 screenPosSample = ConvertToStereoUV(uvSample, eyeIndex); - if (eyeIndex != GET_EYE_IDX(uvSample)) + if (any(screenPosSample < 0) || any(screenPosSample > 1)) continue; - const float2 screenPosSample = ConvertToStereoUV(uvSample, eyeIndex); - float depthSample = srcDepth.SampleLevel(samplerLinearClamp, uvSample, 0); + float depthSample = srcDepth.SampleLevel(samplerLinearClamp, uvSample * srcScale, 0); float3 posSample = ScreenToViewPosition(screenPosSample, depthSample, eyeIndex); - float3 normalSample = DecodeNormal(srcNormal.SampleLevel(samplerLinearClamp, uvSample, 0).xy); + float3 normalSample = DecodeNormal(srcNormal.SampleLevel(samplerLinearClamp, uvSample * srcScale, 0).xy); // geometry weight w *= saturate(1 - abs(dot(normal, posSample - pos)) * DistanceNormalisation); // normal weight w *= 1 - saturate(acosFast4(saturate(dot(normalSample, normal))) / fsl_HALF_PI * 2); - lpfloat4 gi = srcGI.SampleLevel(samplerLinearClamp, uvSample * res_scale, 0); + lpfloat4 gi = srcGI.SampleLevel(samplerLinearClamp, uvSample * outScale, 0); sum += gi * w; wsum += w; diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli b/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli index cbcea0d37..042b93b6f 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli @@ -53,10 +53,14 @@ cbuffer SSGICB : register(b1) float4x4 PrevInvViewMat[2]; float4 NDCToViewMul; float4 NDCToViewAdd; - float4 NDCToViewMul_x_PixelSize; - float2 FrameDim; - float2 RcpFrameDim; + float2 TexDim; + float2 RcpTexDim; + float2 SrcFrameDim; + float2 RcpSrcFrameDim; + float2 OutFrameDim; + float2 RcpOutFrameDim; + uint FrameIndex; uint NumSlices; @@ -92,14 +96,16 @@ SamplerState samplerLinearClamp : register(s1); /////////////////////////////////////////////////////////////////////////////// +// screenPos - normalised position in FrameDim, one eye only +// uv - normalised position in FrameDim, both eye +// texCoord - texture coordinate + #ifdef HALF_RES -const static float res_scale = .5; # define READ_DEPTH(tex, px) tex.Load(int3(px, 1)) -# define FULLRES_LOAD(tex, px, uv, samp) tex.SampleLevel(samp, uv, 0) +# define FULLRES_LOAD(tex, px, texCoord, samp) tex.SampleLevel(samp, texCoord, 0) #else -const static float res_scale = 1.; # define READ_DEPTH(tex, px) tex[px] -# define FULLRES_LOAD(tex, px, uv, samp) tex[px] +# define FULLRES_LOAD(tex, px, texCoord, samp) tex[px] #endif #ifdef VR diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl index ee7218ac8..097c906c3 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl @@ -79,6 +79,9 @@ void CalculateGI( uint2 dtid, float2 uv, float viewspaceZ, lpfloat3 viewspaceNormal, out lpfloat4 o_currGIAO, out lpfloat3 o_bentNormal) { + const float srcScale = SrcFrameDim * RcpTexDim; + const float outScale = OutFrameDim * RcpTexDim; + uint eyeIndex = GET_EYE_IDX(uv); float2 normalizedScreenPos = ConvertToStereoUV(uv, eyeIndex); @@ -95,7 +98,7 @@ void CalculateGI( // if the offset is under approx pixel size (pixelTooCloseThreshold), push it out to the minimum distance const lpfloat pixelTooCloseThreshold = 1.3; // approx viewspace pixel size at pixCoord; approximation of NDCToViewspace( uv.xy + ViewportSize.xy, pixCenterPos.z ).xy - pixCenterPos.xy; - const float2 pixelDirRBViewspaceSizeAtCenterZ = viewspaceZ.xx * (eyeIndex == 0 ? NDCToViewMul_x_PixelSize.xy : NDCToViewMul_x_PixelSize.zw); + const float2 pixelDirRBViewspaceSizeAtCenterZ = viewspaceZ.xx * (eyeIndex == 0 ? NDCToViewMul.xy : NDCToViewMul.zw) * RcpOutFrameDim; lpfloat screenspaceRadius = (lpfloat)EffectRadius / (lpfloat)pixelDirRBViewspaceSizeAtCenterZ.x; // this is the min distance to start sampling from to avoid sampling from the center pixel (no useful data obtained from sampling center pixel) @@ -121,11 +124,8 @@ void CalculateGI( lpfloat3 directionVec = 0; sincos(phi, directionVec.y, directionVec.x); - // convert to screen units for later use - lpfloat2 omega = lpfloat2(directionVec.x, -directionVec.y) * screenspaceRadius * RcpFrameDim; -#ifdef VR - omega.x *= 2; -#endif + // convert to px units for later use + lpfloat2 omega = lpfloat2(directionVec.x, -directionVec.y) * screenspaceRadius; const lpfloat3 orthoDirectionVec = directionVec - (dot(directionVec, viewVec) * viewVec); const lpfloat3 axisVec = normalize(cross(orthoDirectionVec, viewVec)); @@ -163,11 +163,12 @@ void CalculateGI( s *= s; // default 2 is fine s += minS; // avoid sampling center pixel - lpfloat2 sampleOffset = s * omega; // no pixel alignment from original xegtao + lpfloat2 sampleOffset = s * omega; - float2 sampleScreenPos = normalizedScreenPos + sampleOffset * sideSign; + float2 samplePxCoord = dtid + .5 + sampleOffset * sideSign; + float2 sampleUV = samplePxCoord * RcpOutFrameDim; + float2 sampleScreenPos = ConvertToStereoUV(sampleUV, eyeIndex); [branch] if (any(sampleScreenPos > 1.0) || any(sampleScreenPos < 0.0)) break; - float2 sampleUV = ConvertFromStereoUV(sampleScreenPos, eyeIndex); lpfloat sampleOffsetLength = length(sampleOffset); lpfloat mipLevel = (lpfloat)clamp(log2(sampleOffsetLength) - DepthMIPSamplingOffset, 0, 5); @@ -175,7 +176,7 @@ void CalculateGI( mipLevel = max(mipLevel, 1); #endif - float SZ = srcWorkingDepth.SampleLevel(samplerPointClamp, sampleUV, mipLevel); + float SZ = srcWorkingDepth.SampleLevel(samplerPointClamp, sampleUV * srcScale, mipLevel); [branch] if (SZ > DepthFadeRange.y) continue; float3 samplePos = ScreenToViewPosition(sampleScreenPos, SZ, eyeIndex); @@ -221,13 +222,13 @@ void CalculateGI( // IL lpfloat frontBackMult = 1.f; # ifdef BACKFACE - if (dot(DecodeNormal(srcNormal.SampleLevel(samplerPointClamp, sampleUV, 0).xy), sampleHorizonVec) > 0) // backface + if (dot(DecodeNormal(srcNormal.SampleLevel(samplerPointClamp, sampleUV * srcScale, 0).xy), sampleHorizonVec) > 0) // backface frontBackMult = BackfaceStrength; # endif if (frontBackMult > 0.f) { # ifdef BITMASK - lpfloat3 sampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * res_scale, mipLevel).rgb * frontBackMult * giBoost; + lpfloat3 sampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * outScale, mipLevel).rgb * frontBackMult * giBoost; sampleRadiance *= countbits(maskedBits & ~bitmask) * (lpfloat)0.03125; // 1/32 sampleRadiance *= dot(viewspaceNormal, sampleHorizonVec); @@ -236,7 +237,7 @@ void CalculateGI( radiance += sampleRadiance; # else lpfloat3 newSampleRadiance = 0; - newSampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * res_scale, mipLevel).rgb * frontBackMult * giBoost; + newSampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * outScale, mipLevel).rgb * frontBackMult * giBoost; lpfloat anglePrev = n + sideSign * HALF_PI - FastACos(horizonCos); // lpfloat version is closest acos lpfloat angleCurr = n + sideSign * HALF_PI - FastACos(shc); @@ -340,12 +341,15 @@ void CalculateGI( [numthreads(8, 8, 1)] void main(const uint2 dtid : SV_DispatchThreadID) { - float2 uv = (dtid + .5f) * RcpFrameDim; + const float srcScale = SrcFrameDim * RcpTexDim; + const float outScale = OutFrameDim * RcpTexDim; + + float2 uv = (dtid + .5f) * RcpOutFrameDim; uint eyeIndex = GET_EYE_IDX(uv); float viewspaceZ = READ_DEPTH(srcWorkingDepth, dtid); - lpfloat2 normalSample = FULLRES_LOAD(srcNormal, dtid, uv, samplerLinearClamp).xy; + lpfloat2 normalSample = FULLRES_LOAD(srcNormal, dtid, uv * srcScale, samplerLinearClamp).xy; lpfloat3 viewspaceNormal = (lpfloat3)DecodeNormal(normalSample); half2 encodedWorldNormal = EncodeNormal(ViewToWorldVector(viewspaceNormal, InvViewMatrix[eyeIndex])); diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterDepths.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterDepths.cs.hlsl index 930d33884..d6668f525 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterDepths.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterDepths.cs.hlsl @@ -60,13 +60,15 @@ groupshared lpfloat g_scratchDepths[8][8]; [numthreads(8, 8, 1)] void main(uint2 dispatchThreadID : SV_DispatchThreadID, uint2 groupThreadID : SV_GroupThreadID) { + const float srcScale = SrcFrameDim * RcpTexDim; + const float outScale = OutFrameDim * RcpTexDim; + // MIP 0 const uint2 baseCoord = dispatchThreadID; const uint2 pixCoord = baseCoord * 2; - const float2 uv = (pixCoord + .5) * RcpFrameDim * res_scale; - const uint eyeIndex = GET_EYE_IDX(uv); + const float2 uv = (pixCoord + .5) * RcpSrcFrameDim; - float4 depths4 = srcNDCDepth.GatherRed(samplerPointClamp, uv, int2(1, 1)); + float4 depths4 = srcNDCDepth.GatherRed(samplerPointClamp, uv * srcScale, int2(1, 1)); lpfloat depth0 = ClampDepth(ScreenToViewDepth(depths4.w)); lpfloat depth1 = ClampDepth(ScreenToViewDepth(depths4.z)); lpfloat depth2 = ClampDepth(ScreenToViewDepth(depths4.x)); diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl index ad502fef6..40671d30d 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl @@ -8,8 +8,8 @@ Texture2D srcCurrDepth : register(t2); Texture2D srcCurrNormal : register(t3); Texture2D srcPrevGeo : register(t4); // maybe half-res Texture2D srcMotionVec : register(t5); -Texture2D srcPrevGIAlbedo : register(t6); -Texture2D srcAccumFrames : register(t7); +Texture2D srcPrevGIAlbedo : register(t6); // maybe half-res +Texture2D srcAccumFrames : register(t7); // maybe half-res RWTexture2D outRadianceDisocc : register(u0); RWTexture2D outAccumFrames : register(u1); @@ -21,13 +21,16 @@ RWTexture2D outRemappedPrevGI : register(u2); [numthreads(8, 8, 1)] void main(const uint2 pixCoord : SV_DispatchThreadID) { - const float2 uv = (pixCoord + .5) * RcpFrameDim; + const float srcScale = SrcFrameDim * RcpTexDim; + const float outScale = OutFrameDim * RcpTexDim; + + const float2 uv = (pixCoord + .5) * RcpOutFrameDim; uint eyeIndex = GET_EYE_IDX(uv); const float2 screen_pos = ConvertToStereoUV(uv, eyeIndex); float2 prev_uv = uv; #ifdef REPROJECTION - prev_uv += FULLRES_LOAD(srcMotionVec, pixCoord, uv, samplerLinearClamp).xy; + prev_uv += FULLRES_LOAD(srcMotionVec, pixCoord, uv * srcScale, samplerLinearClamp).xy; #endif float2 prev_screen_pos = ConvertToStereoUV(prev_uv, eyeIndex); @@ -37,12 +40,12 @@ RWTexture2D outRemappedPrevGI : register(u2); #ifdef REPROJECTION if ((curr_depth <= DepthFadeRange.y) && !(any(prev_screen_pos < 0) || any(prev_screen_pos > 1))) { - float3 curr_normal = DecodeNormal(FULLRES_LOAD(srcCurrNormal, pixCoord, uv, samplerLinearClamp).xy); + float3 curr_normal = DecodeNormal(FULLRES_LOAD(srcCurrNormal, pixCoord, uv * srcScale, samplerLinearClamp).xy); curr_normal = ViewToWorldVector(curr_normal, InvViewMatrix[eyeIndex]); float3 curr_pos = ScreenToViewPosition(screen_pos, curr_depth, eyeIndex); curr_pos = ViewToWorldPosition(curr_pos, InvViewMatrix[eyeIndex]); - const half3 prev_geo = srcPrevGeo.SampleLevel(samplerPointClamp, prev_uv * res_scale, 0); + const half3 prev_geo = srcPrevGeo.SampleLevel(samplerPointClamp, prev_uv * outScale, 0); const float prev_depth = prev_geo.x; const float3 prev_normal = DecodeNormal(prev_geo.yz); // prev normal is already world float3 prev_pos = ScreenToViewPosition(prev_screen_pos, prev_depth, eyeIndex); @@ -64,17 +67,17 @@ RWTexture2D outRemappedPrevGI : register(u2); [branch] if (valid_history) { # if defined(GI) && defined(GI_BOUNCE) - prev_gi_albedo = srcPrevGIAlbedo.SampleLevel(samplerLinearClamp, prev_uv, 0); + prev_gi_albedo = srcPrevGIAlbedo.SampleLevel(samplerLinearClamp, prev_uv * outScale, 0); # endif # ifdef TEMPORAL_DENOISER - prev_gi = srcPrevGI.SampleLevel(samplerLinearClamp, prev_uv * res_scale, 0); + prev_gi = srcPrevGI.SampleLevel(samplerLinearClamp, prev_uv * outScale, 0); # endif } #endif half3 radiance = 0; #ifdef GI - radiance = FULLRES_LOAD(srcDiffuse, pixCoord, uv, samplerLinearClamp).rgb; + radiance = FULLRES_LOAD(srcDiffuse, pixCoord, uv * srcScale, samplerLinearClamp).rgb; # ifdef GI_BOUNCE radiance += prev_gi_albedo.rgb * GIBounceFade; # endif @@ -84,7 +87,7 @@ RWTexture2D outRemappedPrevGI : register(u2); #ifdef TEMPORAL_DENOISER uint accum_frames = 0; [branch] if (valid_history) - accum_frames = srcAccumFrames.SampleLevel(samplerLinearClamp, prev_uv * res_scale, 0) * 255; + accum_frames = srcAccumFrames.SampleLevel(samplerLinearClamp, prev_uv * outScale, 0) * 255; accum_frames = min(accum_frames + 1, MaxAccumFrames); outAccumFrames[pixCoord] = accum_frames / 255.0; diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl index dad60b043..86a5f6321 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl @@ -52,7 +52,7 @@ RWTexture2D outGI : register(u0); } else { - atten = srcGI.SampleLevel(samplerLinearClamp, (dtid + .5) * RcpFrameDim * .25, 0); + atten = srcGI.SampleLevel(samplerLinearClamp, (dtid + .5) * RcpSrcFrameDim * OutFrameDim * RcpTexDim, 0); } outGI[dtid] = atten; diff --git a/src/Features/ScreenSpaceGI.cpp b/src/Features/ScreenSpaceGI.cpp index a53d9e92a..00d966b0e 100644 --- a/src/Features/ScreenSpaceGI.cpp +++ b/src/Features/ScreenSpaceGI.cpp @@ -530,13 +530,11 @@ void ScreenSpaceGI::UpdateSB() auto viewport = RE::BSGraphics::State::GetSingleton(); auto& state = State::GetSingleton()->shadowState; - uint resolution[2] = { - (uint)(State::GetSingleton()->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale), - (uint)(State::GetSingleton()->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale) - }; - uint halfRes[2] = { (resolution[0] + 1) >> 1, (resolution[1] + 1) >> 1 }; - - float2 res = settings.HalfRes ? float2{ (float)halfRes[0], (float)halfRes[1] } : float2{ (float)resolution[0], (float)resolution[1] }; + float2 res = { (float)texRadiance->desc.Width, (float)texRadiance->desc.Height }; + float2 dynres = res * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale; + dynres = { floor(dynres.x), floor(dynres.y) }; + float2 halfres = dynres * 0.5; + halfres = { floor(halfres.x), floor(halfres.y) }; static float4x4 prevInvView[2] = {}; @@ -548,15 +546,18 @@ void ScreenSpaceGI::UpdateSB() data.PrevInvViewMat[eyeIndex] = prevInvView[eyeIndex]; data.NDCToViewMul[eyeIndex] = { 2.0f / eye.projMat(0, 0), -2.0f / eye.projMat(1, 1) }; data.NDCToViewAdd[eyeIndex] = { -1.0f / eye.projMat(0, 0), 1.0f / eye.projMat(1, 1) }; - data.NDCToViewMul_x_PixelSize[eyeIndex] = data.NDCToViewMul[eyeIndex] / res; if (REL::Module::IsVR()) data.NDCToViewMul[eyeIndex].x *= 2; prevInvView[eyeIndex] = eye.viewMat.Invert(); } - data.FrameDim = res; - data.RcpFrameDim = float2(1.0f) / res; + data.TexDim = res; + data.RcpTexDim = float2(1.0f) / res; + data.SrcFrameDim = dynres; + data.RcpSrcFrameDim = float2(1.0f) / dynres; + data.OutFrameDim = settings.HalfRes ? halfres : dynres; + data.RcpOutFrameDim = float2(1.0f) / (settings.HalfRes ? halfres : dynres); data.FrameIndex = viewport->uiFrameCount; data.NumSlices = settings.NumSlices; diff --git a/src/Features/ScreenSpaceGI.h b/src/Features/ScreenSpaceGI.h index 62e041b07..377b9a71b 100644 --- a/src/Features/ScreenSpaceGI.h +++ b/src/Features/ScreenSpaceGI.h @@ -80,10 +80,13 @@ struct ScreenSpaceGI : Feature float4x4 PrevInvViewMat[2]; float2 NDCToViewMul[2]; float2 NDCToViewAdd[2]; - float2 NDCToViewMul_x_PixelSize[2]; - float2 FrameDim; - float2 RcpFrameDim; // + float2 TexDim; + float2 RcpTexDim; // + float2 SrcFrameDim; + float2 RcpSrcFrameDim; // + float2 OutFrameDim; + float2 RcpOutFrameDim; // uint FrameIndex; uint NumSlices;