diff --git a/features/Cloud Shadows/Shaders/CloudShadows/CloudShadows.hlsli b/features/Cloud Shadows/Shaders/CloudShadows/output.cs.hlsl similarity index 52% rename from features/Cloud Shadows/Shaders/CloudShadows/CloudShadows.hlsli rename to features/Cloud Shadows/Shaders/CloudShadows/output.cs.hlsl index c5229ae66..4b4167bc4 100644 --- a/features/Cloud Shadows/Shaders/CloudShadows/CloudShadows.hlsli +++ b/features/Cloud Shadows/Shaders/CloudShadows/output.cs.hlsl @@ -1,3 +1,6 @@ +#include "../Common/DeferredShared.hlsl" +#include "../Common/VR.hlsl" + struct PerPassCloudShadow { uint EnableCloudShadows; @@ -8,13 +11,17 @@ struct PerPassCloudShadow float EffectMix; float TransparencyPower; - float AbsorptionAmbient; float RcpHPlusR; }; -StructuredBuffer perPassCloudShadow : register(t23); -TextureCube cloudShadows : register(t40); +StructuredBuffer perPassCloudShadow : register(t0); +TextureCube cloudShadows : register(t1); +Texture2D TexDepth : register(t2); + +RWTexture2D RWTexShadowMask : register(u0); + +SamplerState defaultSampler; float3 getCloudShadowSampleDir(float3 rel_pos, float3 eye_to_sun) { @@ -38,13 +45,39 @@ float3 getCloudShadowSampleDirFlatEarth(float3 rel_pos, float3 eye_to_sun) return v; } -float3 getCloudShadowMult(float3 rel_pos, float3 eye_to_sun, SamplerState samp) +float3 getCloudShadowMult(float3 rel_pos, float3 eye_to_sun) { // float3 cloudSampleDir = getCloudShadowSampleDirFlatEarth(rel_pos, eye_to_sun).xyz; float3 cloudSampleDir = getCloudShadowSampleDir(rel_pos, eye_to_sun).xyz; - float4 cloudCubeSample = cloudShadows.Sample(samp, cloudSampleDir); + float4 cloudCubeSample = cloudShadows.SampleLevel(defaultSampler, cloudSampleDir, 0); // TODO Sample in pixel shader float alpha = pow(saturate(cloudCubeSample.w), perPassCloudShadow[0].TransparencyPower); return lerp(1.0, 1.0 - alpha, perPassCloudShadow[0].EffectMix); +} + +[numthreads(32, 32, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + float2 uv = (dtid + .5) * RcpBufferDim; +#ifdef VR + const uint eyeIndex = uv > .5; +#else + const uint eyeIndex = 0; +#endif + + float3 ndc = float3(ConvertToStereoUV(uv, eyeIndex), 1); + ndc = ndc * 2 - 1; + ndc.y = -ndc.y; + ndc.z = TexDepth[dtid]; + + if (ndc.z > 0.9999) + return; + + float4 worldPos = mul(InvViewMatrix[eyeIndex], mul(InvProjMatrix[eyeIndex], float4(ndc, 1))); + worldPos.xyz /= worldPos.w; + + float3 dirLightDirWS = mul((float3x3)InvViewMatrix[eyeIndex], DirLightDirectionVS[eyeIndex].xyz); + float cloudShadow = getCloudShadowMult(worldPos.xyz, dirLightDirWS); + + half shadow = RWTexShadowMask[dtid]; + RWTexShadowMask[dtid] = shadow * cloudShadow; } \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/Features/ScreenSpaceGI.ini b/features/Screen Space GI/Shaders/Features/ScreenSpaceGI.ini new file mode 100644 index 000000000..eb1a462ce --- /dev/null +++ b/features/Screen Space GI/Shaders/Features/ScreenSpaceGI.ini @@ -0,0 +1,2 @@ +[Info] +Version = 2-9-0 \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli b/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli new file mode 100644 index 000000000..6e2d608d7 --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli @@ -0,0 +1,204 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016-2021, Intel Corporation +// +// SPDX-License-Identifier: MIT +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion", +// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf +// +// Implementation: Filip Strugar (filip.strugar@intel.com), Steve Mccalla (\_/) +// Version: (see XeGTAO.h) (='.'=) +// Details: https://github.com/GameTechDev/XeGTAO (")_(") +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// with additional edits by FiveLimbedCat/ProfJack + +#ifndef SSGI_COMMON +#define SSGI_COMMON + +#ifndef USE_HALF_FLOAT_PRECISION +# define USE_HALF_FLOAT_PRECISION 1 +#endif + +#if (USE_HALF_FLOAT_PRECISION != 0) +# if 1 // old fp16 approach ( 0.5) +#else +# define GET_EYE_IDX(uv) (0) +#endif + +/////////////////////////////////////////////////////////////////////////////// + +#define ISNAN(x) (!(x < 0.f || x > 0.f || x == 0.f)) + +// http://h14s.p5r.org/2012/09/0x5f3759df.html, [Drobot2014a] Low Level Optimizations for GCN, https://blog.selfshadow.com/publications/s2016-shading-course/activision/s2016_pbs_activision_occlusion.pdf slide 63 +lpfloat FastSqrt(float x) +{ + return (lpfloat)(asfloat(0x1fbd1df5 + (asint(x) >> 1))); +} + +// input [-1, 1] and output [0, PI], from https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/ +lpfloat FastACos(lpfloat inX) +{ + const lpfloat PI = 3.141593; + const lpfloat HALF_PI = 1.570796; + lpfloat x = abs(inX); + lpfloat res = -0.156583 * x + HALF_PI; + res *= FastSqrt(1.0 - x); + return (inX >= 0) ? res : PI - res; +} + +/////////////////////////////////////////////////////////////////////////////// + +// Inputs are screen XY and viewspace depth, output is viewspace position +float3 ScreenToViewPosition(const float2 screenPos, const float viewspaceDepth, const uint eyeIndex) +{ + const float2 _mul = eyeIndex == 0 ? NDCToViewMul.xy : NDCToViewMul.zw; + const float2 _add = eyeIndex == 0 ? NDCToViewAdd.xy : NDCToViewAdd.zw; + + float3 ret; + ret.xy = (_mul * screenPos.xy + _add) * viewspaceDepth; + ret.z = viewspaceDepth; + return ret; +} + +float ScreenToViewDepth(const float screenDepth, const uint eyeIndex) +{ + const float2 consts = eyeIndex == 0 ? DepthUnpackConsts.xy : DepthUnpackConsts.zw; + + float depthLinearizeMul = consts.x; + float depthLinearizeAdd = consts.y; + // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar" + return depthLinearizeMul / (depthLinearizeAdd - screenDepth); +} + +float3 ViewToWorldPosition(const float3 pos, const float4x4 invView) +{ + float4 worldpos = mul(invView, float4(pos, 1)); + return worldpos.xyz / worldpos.w; +} + +float3 ViewToWorldVector(const float3 vec, const float4x4 invView) +{ + return mul((float3x3)invView, vec); +} + +/////////////////////////////////////////////////////////////////////////////// + +// "Efficiently building a matrix to rotate one vector to another" +// http://cs.brown.edu/research/pubs/pdfs/1999/Moller-1999-EBA.pdf / https://dl.acm.org/doi/10.1080/10867651.1999.10487509 +// (using https://github.com/assimp/assimp/blob/master/include/assimp/matrix3x3.inl#L275 as a code reference as it seems to be best) +lpfloat3x3 RotFromToMatrix(lpfloat3 from, lpfloat3 to) +{ + const lpfloat e = dot(from, to); + const lpfloat f = abs(e); //(e < 0)? -e:e; + + // WARNING: This has not been tested/worked through, especially not for 16bit floats; seems to work in our special use case (from is always {0, 0, -1}) but wouldn't use it in general + if (f > lpfloat(1.0 - 0.0003)) + return lpfloat3x3(1, 0, 0, 0, 1, 0, 0, 0, 1); + + const lpfloat3 v = cross(from, to); + /* ... use this hand optimized version (9 mults less) */ + const lpfloat h = (1.0) / (1.0 + e); /* optimization by Gottfried Chen */ + const lpfloat hvx = h * v.x; + const lpfloat hvz = h * v.z; + const lpfloat hvxy = hvx * v.y; + const lpfloat hvxz = hvx * v.z; + const lpfloat hvyz = hvz * v.y; + + lpfloat3x3 mtx; + mtx[0][0] = e + hvx * v.x; + mtx[0][1] = hvxy - v.z; + mtx[0][2] = hvxz + v.y; + + mtx[1][0] = hvxy + v.z; + mtx[1][1] = e + h * v.y * v.y; + mtx[1][2] = hvyz - v.x; + + mtx[2][0] = hvxz - v.y; + mtx[2][1] = hvyz + v.x; + mtx[2][2] = e + hvz * v.z; + + return mtx; +} + +#endif \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl new file mode 100644 index 000000000..a5fe0a8e1 --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl @@ -0,0 +1,380 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016-2021, Intel Corporation +// +// SPDX-License-Identifier: MIT +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion", +// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf +// +// Implementation: Filip Strugar (filip.strugar@intel.com), Steve Mccalla (\_/) +// Version: (see XeGTAO.h) (='.'=) +// Details: https://github.com/GameTechDev/XeGTAO (")_(") +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// with additional edits by FiveLimbedCat/ProfJack +// +// More references: +// +// Screen Space Indirect Lighting with Visibility Bitmask +// https://arxiv.org/abs/2301.11376 +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "../Common/FastMath.hlsl" +#include "../Common/GBuffer.hlsl" +#include "../Common/VR.hlsl" +#include "common.hlsli" + +#if USE_HALF_FLOAT_PRECISION == 0 +# define PI (3.1415926535897932384626433832795) +# define HALF_PI (1.5707963267948966192313216916398) +# define RCP_PI (0.31830988618) +#else +# define PI ((lpfloat)3.1415926535897932384626433832795) +# define HALF_PI ((lpfloat)1.5707963267948966192313216916398) +# define RCP_PI ((lpfloat)0.31830988618) +#endif + +Texture2D srcWorkingDepth : register(t0); +Texture2D srcNormal : register(t1); +Texture2D srcRadiance : register(t2); // maybe half-res +Texture2D srcHilbertLUT : register(t3); +Texture2D srcAccumFrames : register(t4); // maybe half-res +Texture2D srcPrevGI : register(t5); // maybe half-res + +RWTexture2D outGI : register(u0); +RWTexture2D outBentNormal : register(u1); +RWTexture2D outPrevDepth : register(u2); + +lpfloat GetDepthFade(lpfloat depth) +{ + return (lpfloat)saturate((depth - DepthFadeRange.x) * DepthFadeScaleConst); +} + +// Engine-specific screen & temporal noise loader +lpfloat2 SpatioTemporalNoise(uint2 pixCoord, uint temporalIndex) // without TAA, temporalIndex is always 0 +{ + float2 noise; + uint index = srcHilbertLUT.Load(uint3(pixCoord % 64, 0)).x; + index += 288 * (temporalIndex % 64); // why 288? tried out a few and that's the best so far (with XE_HILBERT_LEVEL 6U) - but there's probably better :) + // R2 sequence - see http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/ + return lpfloat2(frac(0.5 + index * float2(0.75487766624669276005, 0.5698402909980532659114))); +} + +// HBIL pp.29 +lpfloat IlIntegral(lpfloat2 integral_factor, lpfloat angle_prev, lpfloat angle_new) +{ + lpfloat sin_prev, cos_prev, sin_new, cos_new; + sincos(angle_prev, sin_prev, cos_prev); + sincos(angle_new, sin_new, cos_new); + + lpfloat delta_angle = angle_new - angle_prev; + return max(0, integral_factor.x * (delta_angle + sin_prev * cos_prev - sin_new * cos_new) + integral_factor.y * (cos_prev * cos_prev - cos_new * cos_new)); +} + +void CalculateGI( + uint2 dtid, float2 uv, float viewspaceZ, lpfloat3 viewspaceNormal, + out lpfloat4 o_currGIAO, out lpfloat3 o_bentNormal) +{ + uint eyeIndex = GET_EYE_IDX(uv); + float2 normalizedScreenPos = ConvertToStereoUV(uv, eyeIndex); + + const lpfloat rcpNumSlices = rcp(NumSlices); + const lpfloat rcpNumSteps = rcp(NumSteps); + + const lpfloat falloffRange = (lpfloat)EffectFalloffRange * (lpfloat)EffectRadius; + const lpfloat rcpFalloffRange = rcp(falloffRange); + const lpfloat falloffFrom = (lpfloat)EffectRadius * ((lpfloat)1 - (lpfloat)EffectFalloffRange); + const lpfloat falloffMul = -rcpFalloffRange; + const lpfloat falloffAdd = falloffFrom * rcpFalloffRange + (lpfloat)1.0; + + // quality settings / tweaks / hacks + // if the offset is under approx pixel size (pixelTooCloseThreshold), push it out to the minimum distance + const lpfloat pixelTooCloseThreshold = 1.3; + // approx viewspace pixel size at pixCoord; approximation of NDCToViewspace( uv.xy + ViewportSize.xy, pixCenterPos.z ).xy - pixCenterPos.xy; + const float2 pixelDirRBViewspaceSizeAtCenterZ = viewspaceZ.xx * (eyeIndex == 0 ? NDCToViewMul_x_PixelSize.xy : NDCToViewMul_x_PixelSize.zw); + + lpfloat screenspaceRadius = (lpfloat)EffectRadius / (lpfloat)pixelDirRBViewspaceSizeAtCenterZ.x; + // this is the min distance to start sampling from to avoid sampling from the center pixel (no useful data obtained from sampling center pixel) + const lpfloat minS = (lpfloat)pixelTooCloseThreshold / screenspaceRadius; + + ////////////////////////////////////////////////////////////////// + + const lpfloat2 localNoise = SpatioTemporalNoise(dtid, FrameIndex); + const lpfloat noiseSlice = localNoise.x; + const lpfloat noiseStep = localNoise.y; + + ////////////////////////////////////////////////////////////////// + + const float3 pixCenterPos = ScreenToViewPosition(normalizedScreenPos, viewspaceZ, eyeIndex); + const lpfloat3 viewVec = (lpfloat3)normalize(-pixCenterPos); + + lpfloat visibility = 0; + lpfloat3 radiance = 0; + lpfloat3 bentNormal = viewspaceNormal; + + for (uint slice = 0; slice < NumSlices; slice++) { + lpfloat phi = (PI * rcpNumSlices) * (slice + noiseSlice); + lpfloat3 directionVec = 0; + sincos(phi, directionVec.y, directionVec.x); + + // convert to screen units for later use + lpfloat2 omega = lpfloat2(directionVec.x, -directionVec.y) * screenspaceRadius * RcpFrameDim; +#ifdef VR + omega.x *= 2; +#endif + + const lpfloat3 orthoDirectionVec = directionVec - (dot(directionVec, viewVec) * viewVec); + const lpfloat3 axisVec = normalize(cross(orthoDirectionVec, viewVec)); + + lpfloat3 projectedNormalVec = viewspaceNormal - axisVec * dot(viewspaceNormal, axisVec); + lpfloat projectedNormalVecLength = length(projectedNormalVec); + lpfloat signNorm = (lpfloat)sign(dot(orthoDirectionVec, projectedNormalVec)); + lpfloat cosNorm = saturate(dot(projectedNormalVec, viewVec) / projectedNormalVecLength); + + lpfloat n = signNorm * ACos(cosNorm); + +#ifdef BITMASK + uint bitmask = 0; +#else + // this is a lower weight target; not using -1 as in the original paper because it is under horizon, so a 'weight' has different meaning based on the normal + lpfloat2 sincos_n; + sincos(n, sincos_n.x, sincos_n.y); + lpfloat lowHorizonCos1 = sincos_n.x; + const lpfloat lowHorizonCos0 = -lowHorizonCos1; + + lpfloat horizonCos0 = lowHorizonCos0; //-1; + lpfloat horizonCos1 = lowHorizonCos1; //-1; + + lpfloat3 sampleRadiance = 0; +#endif // BITMASK + + // R1 sequence (http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/) + lpfloat stepNoise = frac(noiseStep + slice * 0.6180339887498948482); + + [unroll] for (int sideSign = -1; sideSign <= 1; sideSign += 2) + { + [loop] for (uint step = 0; step < NumSteps; step++) + { + lpfloat s = (step + stepNoise) * rcpNumSteps; + s *= s; // default 2 is fine + s += minS; // avoid sampling center pixel + + lpfloat2 sampleOffset = s * omega; // no pixel alignment from original xegtao + + float2 sampleScreenPos = normalizedScreenPos + sampleOffset * sideSign; + [branch] if (any(sampleScreenPos > 1.0) || any(sampleScreenPos < 0.0)) break; + float2 sampleUV = ConvertFromStereoUV(sampleScreenPos, eyeIndex); + + lpfloat sampleOffsetLength = length(sampleOffset); + lpfloat mipLevel = (lpfloat)clamp(log2(sampleOffsetLength) - DepthMIPSamplingOffset, 0, 5); +#ifdef HALF_RES + mipLevel = max(mipLevel, 1); +#endif + + float SZ = srcWorkingDepth.SampleLevel(samplerPointClamp, sampleUV, mipLevel); + [branch] if (SZ > DepthFadeRange.y) continue; + + float3 samplePos = ScreenToViewPosition(sampleScreenPos, SZ, eyeIndex); + float3 sampleDelta = samplePos - float3(pixCenterPos); + lpfloat3 sampleHorizonVec = (lpfloat3)normalize(sampleDelta); + +#ifdef BITMASK + float3 sampleBackPos = samplePos - viewVec * Thickness; + lpfloat3 sampleBackHorizonVec = normalize(sampleBackPos - pixCenterPos); + + lpfloat angleFront = FastACos(dot(sampleHorizonVec, viewVec)); // either clamp or use lpfloat version for whatever reason + lpfloat angleBack = FastACos(dot(sampleBackHorizonVec, viewVec)); + lpfloat2 angleRange = -sideSign * (sideSign == -1 ? lpfloat2(angleFront, angleBack) : lpfloat2(angleBack, angleFront)); + angleRange = smoothstep(0, 1, (angleRange + n) * RCP_PI + .5); // https://discord.com/channels/586242553746030596/586245736413528082/1102228968247144570 + + uint2 bitsRange = uint2(floor(angleRange.x * 32u), round((angleRange.y - angleRange.x) * 32u)); // ceil gets too gray for flat ground + uint maskedBits = ((1 << bitsRange.y) - 1) << bitsRange.x; + +#else + + // this is our own thickness heuristic that relies on sooner discarding samples behind the center + lpfloat falloffBase = length(lpfloat3(sampleDelta) * lpfloat3(1, 1, 1 + ThinOccluderCompensation)); + lpfloat weight = saturate(falloffBase * falloffMul + falloffAdd); + + // sample horizon cos + lpfloat shc = (lpfloat)dot(sampleHorizonVec, viewVec); + + // discard unwanted samples + shc = lerp(sideSign == -1 ? lowHorizonCos1 : lowHorizonCos0, shc, weight); + lpfloat horizonCos = sideSign == -1 ? horizonCos1 : horizonCos0; +#endif + +#ifdef GI + float giBoost = 1 + GIDistanceCompensation * smoothstep(0, GICompensationMaxDist, s * EffectRadius); + +# ifdef BITMASK + bool checkGI = maskedBits; +# else + bool checkGI = shc > horizonCos; +# endif + + if (checkGI) { + // IL + lpfloat frontBackMult = 1.f; +# ifdef BACKFACE + if (dot(DecodeNormal(srcNormal.SampleLevel(samplerPointClamp, sampleUV, 0).xy), sampleHorizonVec) > 0) // backface + frontBackMult = BackfaceStrength; +# endif + + if (frontBackMult > 0.f) { +# ifdef BITMASK + lpfloat3 sampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * res_scale, mipLevel).rgb * frontBackMult * giBoost; + + sampleRadiance *= countbits(maskedBits & ~bitmask) * (lpfloat)0.03125; // 1/32 + sampleRadiance *= dot(viewspaceNormal, sampleHorizonVec); + sampleRadiance = max(0, sampleRadiance); + + radiance += sampleRadiance; +# else + lpfloat3 newSampleRadiance = 0; + newSampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * res_scale, mipLevel).rgb * frontBackMult * giBoost; + + lpfloat anglePrev = n + sideSign * HALF_PI - FastACos(horizonCos); // lpfloat version is closest acos + lpfloat angleCurr = n + sideSign * HALF_PI - FastACos(shc); + lpfloat2 integralFactor = 0.5 * lpfloat2(dot(directionVec.xy, viewspaceNormal.xy) * sideSign, viewspaceNormal.z); + newSampleRadiance *= IlIntegral(integralFactor, anglePrev, angleCurr); + + // depth filtering. HBIL pp.38 + lpfloat t = smoothstep(0, 1, dot(viewspaceNormal, sampleHorizonVec)); + sampleRadiance = lerp(sampleRadiance, newSampleRadiance, t); + + radiance += max(0, sampleRadiance); +# endif + } +# ifndef BITMASK + horizonCos = shc; +# endif + } +#else +# ifndef BITMASK + // // thickness heuristic - see "4.3 Implementation details, Height-field assumption considerations" + // #if 0 // (disabled, not used) this should match the paper + // lpfloat newhorizonCos = max( horizonCos, shc ); + + // horizonCos = (horizonCos > shc)? lerp( newhorizonCos, shc, ThinOccluderCompensation ) :newhorizonCos ; + // #elif 0 // (disabled, not used) this is slightly different from the paper but cheaper and provides very similar results + // horizonCos = lerp(max(horizonCos, shc), shc, ThinOccluderCompensation); + // #else // this is a version where thicknessHeuristic is completely disabled + horizonCos = max(horizonCos, shc); +// #endif +# endif +#endif // GI + +#ifdef BITMASK + bitmask |= maskedBits; +#else + if (sideSign == -1) + horizonCos1 = horizonCos; + else + horizonCos0 = horizonCos; +#endif + } + } + +#ifdef BITMASK + visibility += (lpfloat)1.0 - countbits(bitmask) * (lpfloat)0.03125; + + // TODO: bent normal for bitmask? +#else +# if 1 // I can't figure out the slight overdarkening on high slopes, so I'm adding this fudge - in the training set, 0.05 is close (PSNR 21.34) to disabled (PSNR 21.45) + projectedNormalVecLength = lerp(projectedNormalVecLength, 1, 0.05); +# endif + + // line ~27, unrolled + lpfloat h0 = -FastACos(horizonCos1); // same, breaks stuff + lpfloat h1 = FastACos(horizonCos0); +# if 0 // we can skip clamping for a tiny little bit more performance + h0 = n + clamp( h0-n, (lpfloat)-HALF_PI, (lpfloat)HALF_PI ); + h1 = n + clamp( h1-n, (lpfloat)-HALF_PI, (lpfloat)HALF_PI ); +# endif + lpfloat iarc0 = ((lpfloat)cosNorm + (lpfloat)2 * (lpfloat)h0 * (lpfloat)sincos_n.x - (lpfloat)cos((lpfloat)2 * (lpfloat)h0 - n)); + lpfloat iarc1 = ((lpfloat)cosNorm + (lpfloat)2 * (lpfloat)h1 * (lpfloat)sincos_n.x - (lpfloat)cos((lpfloat)2 * (lpfloat)h1 - n)); + lpfloat localVisibility = (lpfloat)projectedNormalVecLength * (lpfloat)(iarc0 + iarc1) * (lpfloat).25; + visibility += localVisibility; + +# ifdef BENT_NORMAL + // see "Algorithm 2 Extension that computes bent normals b." + lpfloat2 sincos_3h0mn, sincos_3h1mn, sincos_h0pn, sincos_h1pn; + sincos(3 * h0 - n, sincos_3h0mn.x, sincos_3h0mn.y); + sincos(3 * h1 - n, sincos_3h1mn.x, sincos_3h1mn.y); + sincos(h0 + n, sincos_h0pn.x, sincos_h0pn.y); + sincos(h1 + n, sincos_h1pn.x, sincos_h1pn.y); + + lpfloat t0 = (6 * sin(h0 - n) - sincos_3h0mn.x + 6 * sin(h1 - n) - sincos_3h1mn.x + 16 * sincos_n.x - 3 * (sincos_h0pn.x + sincos_h1pn.x)) * 0.08333333333; // 1/12 + lpfloat t1 = (-sincos_3h0mn.y - sincos_3h1mn.y + 8 * sincos_n.y - 3 * (sincos_h0pn.y + sincos_h1pn.y)) * 0.08333333333; + lpfloat3 localBentNormal = lpfloat3(directionVec.x * t0, directionVec.y * t0, -t1); + localBentNormal = (lpfloat3)mul(RotFromToMatrix(lpfloat3(0, 0, -1), viewVec), localBentNormal) * projectedNormalVecLength; + bentNormal += localBentNormal; +# endif +#endif // BITMASK + } + + lpfloat depthFade = GetDepthFade(viewspaceZ); + + visibility *= rcpNumSlices; + visibility = lerp(saturate(visibility), 1, depthFade); + visibility = pow(visibility, AOPower); + +#ifdef GI + radiance *= rcpNumSlices; + radiance = lerp(radiance, 0, depthFade); + radiance *= GIStrength; +#endif + +#ifdef BENT_NORMAL + bentNormal = normalize(bentNormal); +#endif + + o_currGIAO = lpfloat4(radiance, visibility); + o_bentNormal = bentNormal; +} + +[numthreads(8, 8, 1)] void main(const uint2 dtid : SV_DispatchThreadID) { + float2 uv = (dtid + .5f) * RcpFrameDim; + + float viewspaceZ = READ_DEPTH(srcWorkingDepth, dtid); + + outPrevDepth[dtid] = viewspaceZ; + + lpfloat2 normalSample = FULLRES_LOAD(srcNormal, dtid, uv, samplerLinearClamp).xy; + lpfloat3 viewspaceNormal = (lpfloat3)DecodeNormal(normalSample); + +// Move center pixel slightly towards camera to avoid imprecision artifacts due to depth buffer imprecision; offset depends on depth texture format used +#if USE_HALF_FLOAT_PRECISION == 1 + viewspaceZ *= 0.99920h; // this is good for FP16 depth buffer +#else + viewspaceZ *= 0.99999; // this is good for FP32 depth buffer +#endif + + lpfloat4 currGIAO = lpfloat4(0, 0, 0, 1); + lpfloat3 bentNormal = viewspaceNormal; + [branch] if (viewspaceZ < DepthFadeRange.y) + CalculateGI( + dtid, uv, viewspaceZ, viewspaceNormal, + currGIAO, bentNormal); + +#ifdef BENT_NORMAL + outBentNormal[dtid] = EncodeNormal(bentNormal); +#endif + +#ifdef TEMPORAL_DENOISER + if (viewspaceZ < DepthFadeRange.y) { + lpfloat4 prevGIAO = srcPrevGI[dtid]; + uint accumFrames = srcAccumFrames[dtid]; + + currGIAO = lerp(prevGIAO, currGIAO, fastRcpNR0(accumFrames)); + } +#endif + + currGIAO = any(ISNAN(currGIAO)) ? lpfloat4(0, 0, 0, 1) : currGIAO; + + outGI[dtid] = currGIAO; +} \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/hilbert.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/hilbert.cs.hlsl new file mode 100644 index 000000000..d70c99f23 --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/hilbert.cs.hlsl @@ -0,0 +1,45 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016-2021, Intel Corporation +// +// SPDX-License-Identifier: MIT +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion", +// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf +// +// Implementation: Filip Strugar (filip.strugar@intel.com), Steve Mccalla (\_/) +// Version: (see XeGTAO.h) (='.'=) +// Details: https://github.com/GameTechDev/XeGTAO (")_(") +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +RWTexture2D outHilbertLUT : register(u0); + +// From https://www.shadertoy.com/view/3tB3z3 - except we're using R2 here +#define XE_HILBERT_LEVEL 6U +#define XE_HILBERT_WIDTH ((1U << XE_HILBERT_LEVEL)) +#define XE_HILBERT_AREA (XE_HILBERT_WIDTH * XE_HILBERT_WIDTH) +inline uint HilbertIndex(uint posX, uint posY) +{ + uint index = 0U; + for (uint curLevel = XE_HILBERT_WIDTH / 2U; curLevel > 0U; curLevel /= 2U) { + uint regionX = (posX & curLevel) > 0U; + uint regionY = (posY & curLevel) > 0U; + index += curLevel * curLevel * ((3U * regionX) ^ regionY); + if (regionY == 0U) { + if (regionX == 1U) { + posX = uint((XE_HILBERT_WIDTH - 1U)) - posX; + posY = uint((XE_HILBERT_WIDTH - 1U)) - posY; + } + + uint temp = posX; + posX = posY; + posY = temp; + } + } + return index; +} + +[numthreads(32, 32, 1)] void main(uint2 tid : SV_DispatchThreadID) { + outHilbertLUT[tid] = HilbertIndex(tid.x, tid.y); +} \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/output.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/output.cs.hlsl new file mode 100644 index 000000000..20f1c8f37 --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/output.cs.hlsl @@ -0,0 +1,17 @@ +Texture2D srcGI : register(t0); +Texture2D srcAlbedo : register(t1); + +RWTexture2D outGI : register(u0); +RWTexture2D outGIAlbedo : register(u1); + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + half4 o = outGI[dtid]; + half4 i = srcGI[dtid]; + half3 gi = i.rgb * srcAlbedo[dtid].rgb; + o.rgb += gi; + o.w *= i.w; + outGI[dtid] = o; +#ifdef GI_BOUNCE + outGIAlbedo[dtid] = gi; +#endif +} \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterDepths.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterDepths.cs.hlsl new file mode 100644 index 000000000..ad726f1be --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterDepths.cs.hlsl @@ -0,0 +1,126 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016-2021, Intel Corporation +// +// SPDX-License-Identifier: MIT +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion", +// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf +// +// Implementation: Filip Strugar (filip.strugar@intel.com), Steve Mccalla (\_/) +// Version: (see XeGTAO.h) (='.'=) +// Details: https://github.com/GameTechDev/XeGTAO (")_(") +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "common.hlsli" + +Texture2D srcNDCDepth : register(t0); + +RWTexture2D outDepth0 : register(u0); +RWTexture2D outDepth1 : register(u1); +RWTexture2D outDepth2 : register(u2); +RWTexture2D outDepth3 : register(u3); +RWTexture2D outDepth4 : register(u4); + +// This is also a good place to do non-linear depth conversion for cases where one wants the 'radius' (effectively the threshold between near-field and far-field GI), +// is required to be non-linear (i.e. very large outdoors environments). +lpfloat ClampDepth(float depth) +{ +#ifdef USE_HALF_FLOAT_PRECISION + return (lpfloat)clamp(depth, 0.0h, 65504.0h); +#else + return clamp(depth, 0.0, 3.402823466e+38); +#endif +} + +// weighted average depth filter +lpfloat DepthMIPFilter(lpfloat depth0, lpfloat depth1, lpfloat depth2, lpfloat depth3) +{ + lpfloat maxDepth = max(max(depth0, depth1), max(depth2, depth3)); + + const lpfloat depthRangeScaleFactor = 0.75; // found empirically :) + const lpfloat effectRadius = depthRangeScaleFactor * (lpfloat)EffectRadius; + const lpfloat falloffRange = (lpfloat)EffectFalloffRange * effectRadius; + const lpfloat rcpFalloffRange = rcp(falloffRange); + const lpfloat falloffFrom = (lpfloat)EffectRadius * ((lpfloat)1 - (lpfloat)EffectFalloffRange); + const lpfloat falloffMul = -rcpFalloffRange; + const lpfloat falloffAdd = falloffFrom * rcpFalloffRange + (lpfloat)1.0; + + lpfloat weight0 = saturate((maxDepth - depth0) * falloffMul + falloffAdd); + lpfloat weight1 = saturate((maxDepth - depth1) * falloffMul + falloffAdd); + lpfloat weight2 = saturate((maxDepth - depth2) * falloffMul + falloffAdd); + lpfloat weight3 = saturate((maxDepth - depth3) * falloffMul + falloffAdd); + + lpfloat weightSum = weight0 + weight1 + weight2 + weight3; + return (weight0 * depth0 + weight1 * depth1 + weight2 * depth2 + weight3 * depth3) / weightSum; +} + +groupshared lpfloat g_scratchDepths[8][8]; +[numthreads(8, 8, 1)] void main(uint2 dispatchThreadID : SV_DispatchThreadID, uint2 groupThreadID : SV_GroupThreadID) { + // MIP 0 + const uint2 baseCoord = dispatchThreadID; + const uint2 pixCoord = baseCoord * 2; + const float2 uv = (pixCoord + .5) * RcpFrameDim * res_scale; + const uint eyeIndex = GET_EYE_IDX(uv); + + float4 depths4 = srcNDCDepth.GatherRed(samplerPointClamp, uv, int2(1, 1)); + lpfloat depth0 = ClampDepth(ScreenToViewDepth(depths4.w, eyeIndex)); + lpfloat depth1 = ClampDepth(ScreenToViewDepth(depths4.z, eyeIndex)); + lpfloat depth2 = ClampDepth(ScreenToViewDepth(depths4.x, eyeIndex)); + lpfloat depth3 = ClampDepth(ScreenToViewDepth(depths4.y, eyeIndex)); + outDepth0[pixCoord + uint2(0, 0)] = (lpfloat)depth0; + outDepth0[pixCoord + uint2(1, 0)] = (lpfloat)depth1; + outDepth0[pixCoord + uint2(0, 1)] = (lpfloat)depth2; + outDepth0[pixCoord + uint2(1, 1)] = (lpfloat)depth3; + + // MIP 1 + lpfloat dm1 = DepthMIPFilter(depth0, depth1, depth2, depth3); + outDepth1[baseCoord] = (lpfloat)dm1; + g_scratchDepths[groupThreadID.x][groupThreadID.y] = dm1; + + GroupMemoryBarrierWithGroupSync(); + + // MIP 2 + [branch] if (all((groupThreadID.xy % 2) == 0)) + { + lpfloat inTL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 0]; + lpfloat inTR = g_scratchDepths[groupThreadID.x + 1][groupThreadID.y + 0]; + lpfloat inBL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 1]; + lpfloat inBR = g_scratchDepths[groupThreadID.x + 1][groupThreadID.y + 1]; + + lpfloat dm2 = DepthMIPFilter(inTL, inTR, inBL, inBR); + outDepth2[baseCoord / 2] = (lpfloat)dm2; + g_scratchDepths[groupThreadID.x][groupThreadID.y] = dm2; + } + + GroupMemoryBarrierWithGroupSync(); + + // MIP 3 + [branch] if (all((groupThreadID.xy % 4) == 0)) + { + lpfloat inTL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 0]; + lpfloat inTR = g_scratchDepths[groupThreadID.x + 2][groupThreadID.y + 0]; + lpfloat inBL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 2]; + lpfloat inBR = g_scratchDepths[groupThreadID.x + 2][groupThreadID.y + 2]; + + lpfloat dm3 = DepthMIPFilter(inTL, inTR, inBL, inBR); + outDepth3[baseCoord / 4] = (lpfloat)dm3; + g_scratchDepths[groupThreadID.x][groupThreadID.y] = dm3; + } + + GroupMemoryBarrierWithGroupSync(); + + // MIP 4 + [branch] if (all((groupThreadID.xy % 8) == 0)) + { + lpfloat inTL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 0]; + lpfloat inTR = g_scratchDepths[groupThreadID.x + 4][groupThreadID.y + 0]; + lpfloat inBL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 4]; + lpfloat inBR = g_scratchDepths[groupThreadID.x + 4][groupThreadID.y + 4]; + + lpfloat dm4 = DepthMIPFilter(inTL, inTR, inBL, inBR); + outDepth4[baseCoord / 8] = (lpfloat)dm4; + //g_scratchDepths[ groupThreadID.x ][ groupThreadID.y ] = dm4; + } +} \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl new file mode 100644 index 000000000..1649b10b0 --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl @@ -0,0 +1,84 @@ +#include "../Common/GBuffer.hlsl" +#include "../Common/VR.hlsl" +#include "common.hlsli" + +Texture2D srcDiffuse : register(t0); +Texture2D srcPrevGI : register(t1); // maybe half-res +Texture2D srcCurrDepth : register(t2); +Texture2D srcCurrNormal : register(t3); +Texture2D srcPrevDepth : register(t4); // maybe half-res +Texture2D srcMotionVec : register(t5); +Texture2D srcPrevGIAlbedo : register(t6); + +RWTexture2D outRadianceDisocc : register(u0); +RWTexture2D outAccumFrames : register(u1); +RWTexture2D outRemappedPrevGI : register(u2); + +#if (defined(GI) && defined(GI_BOUNCE)) || defined(TEMPORAL_DENOISER) +# define REPROJECTION +#endif + +[numthreads(8, 8, 1)] void main(const uint2 pixCoord : SV_DispatchThreadID) { + const float2 uv = (pixCoord + .5) * RcpFrameDim; + uint eyeIndex = GET_EYE_IDX(uv); + const float2 screen_pos = ConvertToStereoUV(uv, eyeIndex); + + float2 prev_uv = uv; +#ifdef REPROJECTION + prev_uv += FULLRES_LOAD(srcMotionVec, pixCoord, uv, samplerLinearClamp); +#endif + float2 prev_screen_pos = ConvertToStereoUV(prev_uv, eyeIndex); + + const float curr_depth = READ_DEPTH(srcCurrDepth, pixCoord); + + bool valid_history = false; + +#ifdef REPROJECTION + if ((curr_depth <= DepthFadeRange.y) && !(any(prev_screen_pos < 0) || any(prev_screen_pos > 1))) { + float3 curr_pos = ScreenToViewPosition(screen_pos, curr_depth, eyeIndex); + curr_pos = ViewToWorldPosition(curr_pos, InvViewMatrix[eyeIndex]); + + const float prev_depth = srcPrevDepth.SampleLevel(samplerPointClamp, prev_uv * res_scale, 0); + float3 prev_pos = ScreenToViewPosition(prev_screen_pos, prev_depth, eyeIndex); + prev_pos = ViewToWorldPosition(prev_pos, PrevInvViewMat[eyeIndex]); + + float3 delta_pos = curr_pos - prev_pos; + bool depth_pass = dot(delta_pos, delta_pos) < DepthDisocclusion * DepthDisocclusion; + valid_history = depth_pass; + } +#endif + + half4 prev_gi_albedo = 0; + half4 prev_gi = 0; + +#ifdef REPROJECTION + [branch] if (valid_history) + { +# if defined(GI) && defined(GI_BOUNCE) + prev_gi_albedo = srcPrevGIAlbedo.SampleLevel(samplerLinearClamp, prev_uv, 0); +# endif +# ifdef TEMPORAL_DENOISER + prev_gi = srcPrevGI.SampleLevel(samplerLinearClamp, prev_uv * res_scale, 0); +# endif + } +#endif + + half3 radiance = 0; +#ifdef GI + radiance = FULLRES_LOAD(srcDiffuse, pixCoord, uv, samplerLinearClamp); +# ifdef GI_BOUNCE + radiance += prev_gi_albedo.rgb * GIBounceFade; +# endif + outRadianceDisocc[pixCoord] = radiance; +#endif + +#ifdef TEMPORAL_DENOISER + uint accum_frames = 0; + [branch] if (valid_history) + accum_frames = outAccumFrames[pixCoord]; + accum_frames = min(accum_frames + 1, MaxAccumFrames); + + outAccumFrames[pixCoord] = accum_frames; + outRemappedPrevGI[pixCoord] = prev_gi; +#endif +} \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl new file mode 100644 index 000000000..b9251648a --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl @@ -0,0 +1,58 @@ +// depth-aware upsampling: https://gist.github.com/pixelmager/a4364ea18305ed5ca707d89ddc5f8743 +// blue noise texture from http://momentsingraphics.de/BlueNoise.html + +#include "../Common/FastMath.hlsl" +#include "common.hlsli" + +Texture2D srcDepth : register(t0); +Texture2D srcGI : register(t1); // half-res + +RWTexture2D outGI : register(u0); + +#define min4(v) min(min(v.x, v.y), min(v.z, v.w)) +#define max4(v) max(max(v.x, v.y), max(v.z, v.w)) + +[numthreads(8, 8, 1)] void main(const uint2 dtid : SV_DispatchThreadID) { + int2 px00 = (dtid >> 1) + (dtid & 1) - 1; + int2 px10 = px00 + int2(1, 0); + int2 px01 = px00 + int2(0, 1); + int2 px11 = px00 + int2(1, 1); + + float4 d = float4( + srcDepth.Load(int3(px00, 1)), + srcDepth.Load(int3(px01, 1)), + srcDepth.Load(int3(px10, 1)), + srcDepth.Load(int3(px11, 1))); + + // note: edge-detection + float mind = min4(d); + float maxd = max4(d); + float diffd = maxd - mind; + float avg = dot(d, 0.25.xxxx); + bool d_edge = (diffd / avg) < 0.1; + + float4 atten; + + [branch] if (d_edge) + { + float4 gisample0 = srcGI[px00]; + float4 gisample1 = srcGI[px01]; + float4 gisample2 = srcGI[px10]; + float4 gisample3 = srcGI[px11]; + + float bgdepth = srcDepth[dtid]; + + //note: depth weighing from https://www.ppsloan.org/publications/ProxyPG.pdf#page=5 + float4 dd = abs(d - bgdepth); + float4 w = 1.0 / (dd + 0.00001); + float sumw = w.x + w.y + w.z + w.w; + + atten = (gisample0 * w.x + gisample1 * w.y + gisample2 * w.z + gisample3 * w.w) / (sumw + 0.00001); + } + else + { + atten = srcGI.SampleLevel(samplerLinearClamp, (dtid + .5) * RcpFrameDim * .25, 0); + } + + outGI[dtid] = atten; +} \ No newline at end of file diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/NormalMappingShadowsCS.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/NormalMappingShadowsCS.hlsl index a0bf1c716..13d0fcb18 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/NormalMappingShadowsCS.hlsl +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/NormalMappingShadowsCS.hlsl @@ -1,27 +1,10 @@ - +#include "../Common/DeferredShared.hlsl" #include "../Common/GBuffer.hlsl" -Texture2D NormalRoughnessTexture : register(t0); -Texture2D DepthTexture : register(t1); - -RWTexture2D ShadowMaskTextureRW : register(u0); +Texture2D NormalRoughnessTexture : register(t0); +Texture2D DepthTexture : register(t1); -cbuffer PerFrame : register(b0) -{ - float4 DirLightDirectionVS[2]; - float4 DirLightColor; - float4 CameraData; - float2 BufferDim; - float2 RcpBufferDim; - float4x4 ViewMatrix[2]; - float4x4 ProjMatrix[2]; - float4x4 ViewProjMatrix[2]; - float4x4 InvViewMatrix[2]; - float4x4 InvProjMatrix[2]; - row_major float3x4 DirectionalAmbient; - uint FrameCount; - uint pad0[3]; -}; +RWTexture2D ShadowMaskTextureRW : register(u0); half GetScreenDepth(half depth) { @@ -61,18 +44,16 @@ half2 ViewToUV(half3 position, bool is_position, uint a_eyeIndex) return (uv.xy / uv.w) * half2(0.5f, -0.5f) + 0.5f; } -[numthreads(32, 32, 1)] void main(uint3 globalId : SV_DispatchThreadID, uint3 localId : SV_GroupThreadID, uint3 groupId : SV_GroupID) -{ +[numthreads(32, 32, 1)] void main(uint3 globalId : SV_DispatchThreadID, uint3 localId : SV_GroupThreadID, uint3 groupId : SV_GroupID) { half2 uv = half2(globalId.xy + 0.5) * RcpBufferDim; half3 normalVS = DecodeNormal(NormalRoughnessTexture[globalId.xy].xy); half skinMask = NormalRoughnessTexture[globalId.xy].w; - + half shadowMask = ShadowMaskTextureRW[globalId.xy].x; - if (skinMask != 0) - { + if (skinMask != 0) { ShadowMaskTextureRW[globalId.xy] = shadowMask; return; } @@ -89,7 +70,7 @@ half2 ViewToUV(half3 position, bool is_position, uint a_eyeIndex) half3 viewPosition = DepthToView(uv, rawDepth, 0); viewPosition.z = depth; - + half3 endPosVS = viewPosition + DirLightDirectionVS[0].xyz * 5; half2 endPosUV = ViewToUV(endPosVS, false, eyeIndex); @@ -97,30 +78,28 @@ half2 ViewToUV(half3 position, bool is_position, uint a_eyeIndex) half2 endPosPixel = clamp(endPosUV * BufferDim, 0, BufferDim); half NdotL = dot(normalVS, DirLightDirectionVS[0].xyz); - + half shadow = 0; half3 viewDirectionVS = normalize(viewPosition); - + // Fade based on perceivable difference half fade = smoothstep(4, 5, length(startPosPixel - endPosPixel)); // Only march for: not shadowed, not self-shadowed, march distance greater than 1 pixel bool validMarchPixel = NdotL > 0.0 && shadowMask != 0.0 && fade > 0.0; - if (validMarchPixel) - { + if (validMarchPixel) { half step = 1.0 / 5.0; half pos = step + step * (InterleavedGradientNoise(globalId.xy) * 2.0 - 1.0); half slope = -NdotL; - for(int i = 0; i < 5; i++) - { - uint2 tmpCoords = lerp(startPosPixel, endPosPixel, pos); - half3 tmpNormal = DecodeNormal(NormalRoughnessTexture[tmpCoords]); - half tmpDepth = GetScreenDepth(DepthTexture[tmpCoords]); - half tmpNdotL = dot(tmpNormal, DirLightDirectionVS[0].xyz); + for (int i = 0; i < 5; i++) { + uint2 tmpCoords = lerp(startPosPixel, endPosPixel, pos); + half3 tmpNormal = DecodeNormal(NormalRoughnessTexture[tmpCoords]); + half tmpDepth = GetScreenDepth(DepthTexture[tmpCoords]); + half tmpNdotL = dot(tmpNormal, DirLightDirectionVS[0].xyz); - half shadowed = -tmpNdotL; + half shadowed = -tmpNdotL; shadowed += NdotL * pos; shadowed += max(0, dot(tmpNormal, viewDirectionVS)); shadowed *= 1 - min(1, abs(depth - tmpDepth) * 0.1); @@ -137,4 +116,3 @@ half2 ViewToUV(half3 position, bool is_position, uint a_eyeIndex) ShadowMaskTextureRW[globalId.xy] = min(shadowMask, shadow); } - diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl index f4606ca11..1904c84c1 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl @@ -1,19 +1,5 @@ -cbuffer PerFrameShared : register(b0) -{ - float4 DirLightDirectionVS[2]; - float4 DirLightColor; - float4 CameraData; - float2 BufferDim; - float2 RcpBufferDim; - float4x4 ViewMatrix[2]; - float4x4 ProjMatrix[2]; - float4x4 ViewProjMatrix[2]; - float4x4 InvViewMatrix[2]; - float4x4 InvProjMatrix[2]; - row_major float3x4 DirectionalAmbient; - uint FrameCount; - uint pad0[3]; -}; + +#include "../Common/DeferredShared.hlsl" half GetScreenDepth(half depth) { @@ -22,38 +8,36 @@ half GetScreenDepth(half depth) #include "bend_sss_gpu.hlsl" -Texture2D DepthTexture : register(t0); // Depth Buffer Texture (rasterized non-linear depth) -RWTexture2D OutputTexture : register(u0); // Output screen-space shadow buffer (typically single-channel, 8bit) -SamplerState PointBorderSampler : register(s0); // A point sampler, with Wrap Mode set to Clamp-To-Border-Color (D3D12_TEXTURE_ADDRESS_MODE_BORDER), and Border Color set to "FarDepthValue" (typically zero), or some other far-depth value out of DepthBounds. - // If you have issues where invalid shadows are appearing from off-screen, it is likely that this sampler is not correctly setup +Texture2D DepthTexture : register(t0); // Depth Buffer Texture (rasterized non-linear depth) +RWTexture2D OutputTexture : register(u0); // Output screen-space shadow buffer (typically single-channel, 8bit) +SamplerState PointBorderSampler : register(s0); // A point sampler, with Wrap Mode set to Clamp-To-Border-Color (D3D12_TEXTURE_ADDRESS_MODE_BORDER), and Border Color set to "FarDepthValue" (typically zero), or some other far-depth value out of DepthBounds. + // If you have issues where invalid shadows are appearing from off-screen, it is likely that this sampler is not correctly setup cbuffer PerFrame : register(b1) { // Runtime data returned from BuildDispatchList(): - float4 LightCoordinate; // Values stored in DispatchList::LightCoordinate_Shader by BuildDispatchList() - int2 WaveOffset; // Values stored in DispatchData::WaveOffset_Shader by BuildDispatchList() + float4 LightCoordinate; // Values stored in DispatchList::LightCoordinate_Shader by BuildDispatchList() + int2 WaveOffset; // Values stored in DispatchData::WaveOffset_Shader by BuildDispatchList() // Renderer Specific Values: - float FarDepthValue; // Set to the Depth Buffer Value for the far clip plane, as determined by renderer projection matrix setup (typically 0). - float NearDepthValue; // Set to the Depth Buffer Value for the near clip plane, as determined by renderer projection matrix setup (typically 1). + float FarDepthValue; // Set to the Depth Buffer Value for the far clip plane, as determined by renderer projection matrix setup (typically 0). + float NearDepthValue; // Set to the Depth Buffer Value for the near clip plane, as determined by renderer projection matrix setup (typically 1). // Sampling data: - float2 InvDepthTextureSize; // Inverse of the texture dimensions for 'DepthTexture' (used to convert from pixel coordinates to UVs) - // If 'PointBorderSampler' is an Unnormalized sampler, then this value can be hard-coded to 1. - // The 'USE_HALF_PIXEL_OFFSET' macro might need to be defined if sampling at exact pixel coordinates isn't precise (e.g., if odd patterns appear in the shadow). + float2 InvDepthTextureSize; // Inverse of the texture dimensions for 'DepthTexture' (used to convert from pixel coordinates to UVs) + // If 'PointBorderSampler' is an Unnormalized sampler, then this value can be hard-coded to 1. + // The 'USE_HALF_PIXEL_OFFSET' macro might need to be defined if sampling at exact pixel coordinates isn't precise (e.g., if odd patterns appear in the shadow). float SurfaceThickness; float BilinearThreshold; float ShadowContrast; }; -[numthreads(WAVE_SIZE, 1, 1)] void main -( - int3 groupID : SV_GroupID, - int groupThreadID : SV_GroupThreadID -) { +[numthreads(WAVE_SIZE, 1, 1)] void main( + int3 groupID : SV_GroupID, + int groupThreadID : SV_GroupThreadID) { DispatchParameters parameters; parameters.SetDefaults(); - + parameters.LightCoordinate = LightCoordinate; parameters.WaveOffset = WaveOffset; parameters.FarDepthValue = 1; diff --git a/features/Terrain Occlusion/Shaders/Features/TerrainOcclusion.ini b/features/Terrain Occlusion/Shaders/Features/TerrainOcclusion.ini new file mode 100644 index 000000000..19f01444d --- /dev/null +++ b/features/Terrain Occlusion/Shaders/Features/TerrainOcclusion.ini @@ -0,0 +1,2 @@ +[Info] +Version = 1-0-0 \ No newline at end of file diff --git a/features/Terrain Occlusion/Shaders/TerrainOcclusion/AOGen.cs.hlsl b/features/Terrain Occlusion/Shaders/TerrainOcclusion/AOGen.cs.hlsl new file mode 100644 index 000000000..849eafeaa --- /dev/null +++ b/features/Terrain Occlusion/Shaders/TerrainOcclusion/AOGen.cs.hlsl @@ -0,0 +1,114 @@ +#define PI 3.1415926535 +#define HALF_PI 1.570796327 + +struct AOGenBuffer +{ + float aoDistance; + uint sliceCount; + uint sampleCount; + + float3 pos0; + float3 pos1; + float2 zRange; +}; + +RWTexture2D RWTexOcclusion : register(u0); +RWTexture2D RWTexNormalisedHeight : register(u1); + +StructuredBuffer aoGen : register(t0); +Texture2D TexHeightmap : register(t1); + +SamplerState HeightmapSampler +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Clamp; + AddressV = Clamp; + AddressW = Clamp; +}; + +float3 getPos(float2 uv) +{ + float3 pos = float3(uv, TexHeightmap.SampleLevel(HeightmapSampler, uv, 0).x); + pos = lerp(aoGen[0].pos0.xyz, aoGen[0].pos1.xyz, pos); + return pos; +} + +// https://gist.github.com/bgolus/a07ed65602c009d5e2f753826e8078a0 +float3 ReconstructNormal(float2 uv, float2 texelSize) +{ + // get current pixel's view space position + float3 viewSpacePos_c = getPos(uv + float2(0.0, 0.0) * texelSize); + + // get view space position at 1 pixel offsets in each major direction + float3 viewSpacePos_l = getPos(uv + float2(-1.0, 0.0) * texelSize); + float3 viewSpacePos_r = getPos(uv + float2(1.0, 0.0) * texelSize); + float3 viewSpacePos_d = getPos(uv + float2(0.0, -1.0) * texelSize); + float3 viewSpacePos_u = getPos(uv + float2(0.0, 1.0) * texelSize); + + // get the difference between the current and each offset position + float3 l = viewSpacePos_c - viewSpacePos_l; + float3 r = viewSpacePos_r - viewSpacePos_c; + float3 d = viewSpacePos_c - viewSpacePos_d; + float3 u = viewSpacePos_u - viewSpacePos_c; + + // pick horizontal and vertical diff with the smallest z difference + float3 hDeriv = abs(l.z) < abs(r.z) ? l : r; + float3 vDeriv = abs(d.z) < abs(u.z) ? d : u; + + // get view space normal from the cross product of the two smallest offsets + float3 viewNormal = normalize(cross(hDeriv, vDeriv)); + + return viewNormal; +} + +[numthreads(32, 32, 1)] void main(const uint2 tid + : SV_DispatchThreadID) { + uint2 dims; + TexHeightmap.GetDimensions(dims.x, dims.y); + float2 texelSize = rcp(dims); + + uint2 px_coord = tid.xy; + float2 uv = (px_coord + 0.5) * texelSize; + + float3 normal = -ReconstructNormal(uv, texelSize); + float3 pos = getPos(uv); + float3 view = float3(0, 0, 1); + + // helpful constants + float rcp_sample_count = rcp(aoGen[0].sampleCount); + float2 world_uv_scale = rcp(aoGen[0].pos1.xy - aoGen[0].pos0.xy); // delta world pos * world_uv_scale = delta uv; + + float cos_cone = 0; + float visibility = 0; + for (uint slice = 0; slice < aoGen[0].sliceCount; slice++) { + float theta = (PI / aoGen[0].sliceCount) * slice; + float3 slice_dir = 0; + sincos(theta, slice_dir.y, slice_dir.x); + + float3 axis_dir = cross(slice_dir, view); + float3 proj_normal = normal - axis_dir * dot(normal, axis_dir); + float proj_normal_len = length(proj_normal); + + float sgn_n = sign(dot(slice_dir, proj_normal)); + float cos_n = saturate(dot(proj_normal, view) / proj_normal_len); + float n = sgn_n * acos(cos_n); + + for (int side = 0; side <= 1; side++) { + float horizon_cos = -1; + for (uint samp = 0; samp < aoGen[0].sampleCount; samp++) { + float dist_ratio = (samp + 1) * rcp_sample_count; + float2 curr_uv = uv + (2 * side - 1) * dist_ratio * aoGen[0].aoDistance * slice_dir.xy * world_uv_scale; + float3 curr_pos = getPos(curr_uv); + float3 horizon_dir = normalize(curr_pos - pos); + horizon_cos = max(horizon_cos, dot(horizon_dir, view)); + } + float h = n + clamp((-1 + 2 * side) * acos(horizon_cos) - n, -HALF_PI, HALF_PI); + visibility += saturate(proj_normal_len * (cos_n + 2 * h * sin(n) - cos(2 * h - n)) * .25); + } + } + visibility /= aoGen[0].sliceCount; + + float norm_z = (pos.z - aoGen[0].zRange.x) / (aoGen[0].zRange.y - aoGen[0].zRange.x); + RWTexOcclusion[tid] = visibility; + RWTexNormalisedHeight[tid] = norm_z; +} \ No newline at end of file diff --git a/features/Terrain Occlusion/Shaders/TerrainOcclusion/Output.cs.hlsl b/features/Terrain Occlusion/Shaders/TerrainOcclusion/Output.cs.hlsl new file mode 100644 index 000000000..2108547a1 --- /dev/null +++ b/features/Terrain Occlusion/Shaders/TerrainOcclusion/Output.cs.hlsl @@ -0,0 +1,111 @@ +#include "../Common/DeferredShared.hlsl" +#include "../Common/VR.hlsl" + +struct PerPassTerraOcc +{ + uint EnableTerrainShadow; + uint EnableTerrainAO; + + float HeightBias; + + float ShadowSofteningRadiusAngle; + float2 ShadowFadeDistance; + + float AOMix; + float AOPower; + float AOFadeOutHeightRcp; + + float3 scale; + float3 invScale; + float3 offset; + float2 zRange; +}; + +Texture2D TexDepth : register(t0); +StructuredBuffer perPassTerraOcc : register(t1); +Texture2D TexTerraOcc : register(t2); +Texture2D TexNormalisedHeight : register(t3); +Texture2D TexShadowHeight : register(t4); + +RWTexture2D RWTexShadowMask : register(u0); +RWTexture2D RWTexGI : register(u1); + +SamplerState SamplerDefault; + +float2 GetTerrainOcclusionUV(float2 xy) +{ + return xy * perPassTerraOcc[0].scale.xy + perPassTerraOcc[0].offset.xy; +} + +float2 GetTerrainOcclusionXY(float2 uv) +{ + return (uv - perPassTerraOcc[0].offset.xy) * perPassTerraOcc[0].invScale.xy; +} + +float GetTerrainZ(float norm_z) +{ + return lerp(perPassTerraOcc[0].zRange.x, perPassTerraOcc[0].zRange.y, norm_z) + perPassTerraOcc[0].HeightBias; +} + +float2 GetTerrainZ(float2 norm_z) +{ + return float2(GetTerrainZ(norm_z.x), GetTerrainZ(norm_z.y)); +} + +[numthreads(32, 32, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + float2 uv = (dtid + .5) * RcpBufferDim; +#ifdef VR + const uint eyeIndex = uv > .5; +#else + const uint eyeIndex = 0; +#endif + + float3 ndc = float3(ConvertToStereoUV(uv, eyeIndex), 1); + ndc = ndc * 2 - 1; + ndc.y = -ndc.y; + ndc.z = TexDepth[dtid]; + + float4 worldPos = mul(InvViewMatrix[eyeIndex], mul(InvProjMatrix[eyeIndex], float4(ndc, 1))); + worldPos.xyz /= worldPos.w; + float viewDistance = length(worldPos); + + // if (viewDistance > 1e7) + // return; + + worldPos.xyz += CamPosAdjust[0].xyz; + + float2 terraOccUV = GetTerrainOcclusionUV(worldPos.xy); + + if (any(terraOccUV < 0) && any(terraOccUV > 1)) + return; + + float terrainShadow = 1; + float terrainAo = 1; + + if (perPassTerraOcc[0].EnableTerrainShadow && (viewDistance > perPassTerraOcc[0].ShadowFadeDistance.x)) { + float fadeFactor = saturate((viewDistance - perPassTerraOcc[0].ShadowFadeDistance.x) / (perPassTerraOcc[0].ShadowFadeDistance.y - perPassTerraOcc[0].ShadowFadeDistance.x)); + float2 shadowHeight = GetTerrainZ(TexShadowHeight.SampleLevel(SamplerDefault, terraOccUV, 0)); + float shadowFraction = saturate((worldPos.z - shadowHeight.y) / (shadowHeight.x - shadowHeight.y)); + terrainShadow = lerp(1, shadowFraction, fadeFactor); + } + if (perPassTerraOcc[0].EnableTerrainAO) { + float terrainHeight = GetTerrainZ(TexNormalisedHeight.SampleLevel(SamplerDefault, terraOccUV, 0).x); + terrainAo = TexTerraOcc.SampleLevel(SamplerDefault, terraOccUV, 0).x; + + // power + terrainAo = pow(terrainAo, perPassTerraOcc[0].AOPower); + + // height fadeout + float fadeOut = saturate((worldPos.z - terrainHeight) * perPassTerraOcc[0].AOFadeOutHeightRcp); + terrainAo = lerp(terrainAo, 1, fadeOut); + + terrainAo = lerp(1, terrainAo, perPassTerraOcc[0].AOMix); + } + + half shadow = RWTexShadowMask[dtid]; + RWTexShadowMask[dtid] = min(shadow, terrainShadow); + + float4 gi = RWTexGI[dtid]; + gi.w *= terrainAo; + RWTexGI[dtid] = gi; +} \ No newline at end of file diff --git a/features/Terrain Occlusion/Shaders/TerrainOcclusion/ShadowUpdate.cs.hlsl b/features/Terrain Occlusion/Shaders/TerrainOcclusion/ShadowUpdate.cs.hlsl new file mode 100644 index 000000000..f8e0090c2 --- /dev/null +++ b/features/Terrain Occlusion/Shaders/TerrainOcclusion/ShadowUpdate.cs.hlsl @@ -0,0 +1,106 @@ +Texture2D TexHeight : register(t0); +RWTexture2D RWTexShadowHeights : register(u0); + +cbuffer ShadowUpdateCB : register(b1) +{ + float2 LightPxDir : packoffset(c0.x); // direction on which light descends, from one pixel to next via dda + float2 LightDeltaZ : packoffset(c0.z); // per lightUVDir, normalised, [upper, lower] penumbra, should be negative + uint StartPxCoord : packoffset(c1.x); + float2 PxSize : packoffset(c1.y); +} + +float GetInterpolatedHeight(float2 pxCoord, bool isVertical) +{ + uint2 dims; + TexHeight.GetDimensions(dims.x, dims.y); + + int2 lerpPxCoordA = int2(pxCoord - .5 * float2(isVertical, !isVertical)); + int2 lerpPxCoordB = int2(pxCoord + .5 * float2(isVertical, !isVertical)); + float heightA = TexHeight[lerpPxCoordA]; + float heightB = TexHeight[lerpPxCoordB]; + + bool inBoundA = all(lerpPxCoordA > 0); + bool inBoundB = all(lerpPxCoordB < dims); + if (inBoundA && inBoundB) + return lerp(heightA, heightB, frac(pxCoord - .5)); + else if (!inBoundA) + return heightB; + else + return heightA; +} + +float2 GetInterpolatedHeightRW(float2 pxCoord, bool isVertical) +{ + uint2 dims; + RWTexShadowHeights.GetDimensions(dims.x, dims.y); + + int2 lerpPxCoordA = int2(pxCoord - .5 * float2(isVertical, !isVertical)); + int2 lerpPxCoordB = int2(pxCoord + .5 * float2(isVertical, !isVertical)); + float2 heightA = RWTexShadowHeights[lerpPxCoordA]; + float2 heightB = RWTexShadowHeights[lerpPxCoordB]; + + bool inBoundA = all(lerpPxCoordA > 0); + bool inBoundB = all(lerpPxCoordB < dims); + if (inBoundA && inBoundB) + return lerp(heightA, heightB, frac(pxCoord - .5)); + else if (!inBoundA) + return heightB; + else + return heightA; +} + +groupshared float2 g_shadowHeight[1024]; + +[numthreads(1024, 1, 1)] void main(const uint gtid + : SV_GroupThreadID, const uint gid + : SV_GroupID) { + uint2 dims; + TexHeight.GetDimensions(dims.x, dims.y); + + bool isVertical = abs(LightPxDir.y) > abs(LightPxDir.x); + float2 lightUVDir = LightPxDir * PxSize; + + uint2 rayStartPxCoord = isVertical ? uint2(gid, StartPxCoord) : uint2(StartPxCoord, gid); + float2 rayStartUV = (rayStartPxCoord + .5) * PxSize; + float2 rawThreadUV = rayStartUV + gtid * lightUVDir; + + bool2 isUVinRange = (rawThreadUV > 0) && (rawThreadUV < 1); + bool isValid = isVertical ? isUVinRange.y : isUVinRange.x; + + float2 threadUV = rawThreadUV - floor(rawThreadUV); // wraparound + float2 threadPxCoord = threadUV * dims; + + float2 pastHeights; + if (isValid) { + pastHeights = RWTexShadowHeights[uint2(threadPxCoord)]; + + // bifilter + float2 heights = GetInterpolatedHeight(threadPxCoord, isVertical).xx; + + // fetch last dispatch + if (gtid == 0 && all(floor(rawThreadUV - lightUVDir) == floor(rawThreadUV))) { + heights = max(heights, GetInterpolatedHeightRW(threadPxCoord - LightPxDir, isVertical) + LightDeltaZ); + } + + g_shadowHeight[gtid] = heights; + } + + GroupMemoryBarrierWithGroupSync(); + + // simple parallel scan + [unroll] for (uint offset = 1; offset < 1024; offset <<= 1) + { + if (isValid && gtid >= offset) { + if (all(floor(rawThreadUV - lightUVDir * offset) == floor(rawThreadUV))) // no wraparound happend + g_shadowHeight[gtid] = max(g_shadowHeight[gtid], g_shadowHeight[gtid - offset] + LightDeltaZ * offset); + } + GroupMemoryBarrierWithGroupSync(); + } + + // save + if (isValid) { + RWTexShadowHeights[uint2(threadPxCoord)] = lerp(pastHeights, g_shadowHeight[gtid], .2f); + // RWTexShadowHeights[uint2(threadPxCoord)] = gtid / 1024.f; + // RWTexShadowHeights[uint2(gtid, gid)] = threadUV; + } +} \ No newline at end of file diff --git a/features/Terrain Occlusion/textures/heightmaps/Tamriel.HeightMap.-57.-43.61.50.-32768.32768.-4629.4924.dds b/features/Terrain Occlusion/textures/heightmaps/Tamriel.HeightMap.-57.-43.61.50.-32768.32768.-4629.4924.dds new file mode 100644 index 000000000..8037dea8f Binary files /dev/null and b/features/Terrain Occlusion/textures/heightmaps/Tamriel.HeightMap.-57.-43.61.50.-32768.32768.-4629.4924.dds differ diff --git a/features/Terrain Occlusion/textures/heightmaps/readme.txt b/features/Terrain Occlusion/textures/heightmaps/readme.txt new file mode 100644 index 000000000..0a58a39e9 --- /dev/null +++ b/features/Terrain Occlusion/textures/heightmaps/readme.txt @@ -0,0 +1,11 @@ +[worldspace editorID].HeigthMap.[West cell].[South cell].[East cell].[North cell].[z min].[z max].[Terrain z min].[Terrain z max].dds +The min/max cell coordinates are the actual cells that contain terrain height data. +All z values are actual z values divided by 8. +z min/max corresponds to pixel value zero/pure black and one/pure white. +Terrain z min/max corresponds to the lowest/highest point of the terrain, or bounding box. + +Tamriel.HeightMap.-57.-43.61.50.-32768.32768.-4629.4924.dds +native Skyrim.esm data + +Terrain heightmap for Tamriel +Each cell has 32x32 heightmap data points, so max resolution is 32x32 pixels per cell. \ No newline at end of file diff --git a/package/Shaders/Common/DeferredShared.hlsl b/package/Shaders/Common/DeferredShared.hlsl new file mode 100644 index 000000000..72d581e99 --- /dev/null +++ b/package/Shaders/Common/DeferredShared.hlsl @@ -0,0 +1,18 @@ +cbuffer PerFrameDeferredShared : register(b0) +{ + float4 CamPosAdjust[2]; + float4 DirLightDirectionVS[2]; + float4 DirLightColor; + float4 CameraData; + float2 BufferDim; + float2 RcpBufferDim; + float4x4 ViewMatrix[2]; + float4x4 ProjMatrix[2]; + float4x4 ViewProjMatrix[2]; + float4x4 InvViewMatrix[2]; + float4x4 InvProjMatrix[2]; + float4x4 InvViewProjMatrix[2]; + row_major float3x4 DirectionalAmbient; + uint FrameCount; + uint pad0[3]; +}; \ No newline at end of file diff --git a/package/Shaders/Common/FastMath.hlsl b/package/Shaders/Common/FastMath.hlsl new file mode 100644 index 000000000..480034dc5 --- /dev/null +++ b/package/Shaders/Common/FastMath.hlsl @@ -0,0 +1,341 @@ +/****************************************************************************** + Shader Fast Math Lib (v0.41) + A shader math library for optimized approximate transcendental functions. + Optimized and tested on AMD GCN architecture. + Release notes: + v0.41 minor bug fixes, missing references + + v0.4 new constants calculated for new ranges, minor optimization and precision improvements + Developed during production of : Far Cry 4, Ubisoft Montreal + v0.3 added Newton Raphson 1 and 2 iterations + Newton Raphson methods provided for reference purpose (some code / architectures might still benefit from single NR). + v0.2 fast IEEE float math sqrt() rsqrt() rcp() + v0.1 4th order polynomial approximations for atan() asin() acos() + Developed during production of : Killzone : Shadow Fall, Guerrilla Games, SCEE + Ubisoft and Guerrilla Games granted permission for open source distribution. + Contact information: + Michal Drobot - @MichalDrobot + hello@drobot.org + Presented publicly part of a course: + Low Level Optimizations for AMD GCN + (available @ http://michaldrobot.com/publications/) +********************************************************************************/ + +/****************************************************************************** + The MIT License (MIT) + Copyright (c) <2014> + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +********************************************************************************/ + +#ifndef SHADER_FAST_MATH_INC_FX +#define SHADER_FAST_MATH_INC_FX + +// Define switch for PC compilation +#ifdef _PC +# define asint(_x) *reinterpret_cast(&_x); +# define asfloat(_x) *reinterpret_cast(&_x); +# include +#endif + +// Derived from batch testing +// TODO : Should be improved +#define IEEE_INT_RCP_CONST_NR0 0x7EF311C2 +#define IEEE_INT_RCP_CONST_NR1 0x7EF311C3 +#define IEEE_INT_RCP_CONST_NR2 0x7EF312AC + +// Derived from batch testing +#define IEEE_INT_SQRT_CONST_NR0 0x1FBD1DF5 + +// Biases for global ranges +// 0-1 or 1-2 specific ranges might improve from different bias +// Derived from batch testing +// TODO : Should be improved +#define IEEE_INT_RCP_SQRT_CONST_NR0 0x5f3759df +#define IEEE_INT_RCP_SQRT_CONST_NR1 0x5F375A86 +#define IEEE_INT_RCP_SQRT_CONST_NR2 0x5F375A86 + +// +// Normalized range [0,1] Constants +// +#define IEEE_INT_RCP_CONST_NR0_SNORM 0x7EEF370B +#define IEEE_INT_SQRT_CONST_NR0_SNORM 0x1FBD1DF5 +#define IEEE_INT_RCP_SQRT_CONST_NR0_SNORM 0x5F341A43 + +// +// Distance [0,1000] based constants +// +//#define IEEE_INT_RCP_CONST_NR0_SNORM 0x7EF3210C +//#define IEEE_INT_SQRT_CONST_NR0_SNORM 0x1FBD22DF +//#define IEEE_INT_RCP_SQRT_CONST_NR0_SNORM 0x5F33E79F + +// +// RCP SQRT +// + +// Approximate guess using integer float arithmetics based on IEEE floating point standard +float rcpSqrtIEEEIntApproximation(float inX, const int inRcpSqrtConst) +{ + int x = asint(inX); + x = inRcpSqrtConst - (x >> 1); + return asfloat(x); +} + +float rcpSqrtNewtonRaphson(float inXHalf, float inRcpX) +{ + return inRcpX * (-inXHalf * (inRcpX * inRcpX) + 1.5f); +} + +// +// Using 0 Newton Raphson iterations +// Relative error : ~3.4% over full +// Precise format : ~small float +// 2 ALU +// +float fastRcpSqrtNR0(float inX) +{ + float xRcpSqrt = rcpSqrtIEEEIntApproximation(inX, IEEE_INT_RCP_SQRT_CONST_NR0); + return xRcpSqrt; +} + +// +// Using 1 Newton Raphson iterations +// Relative error : ~0.2% over full +// Precise format : ~half float +// 6 ALU +// +float fastRcpSqrtNR1(float inX) +{ + float xhalf = 0.5f * inX; + float xRcpSqrt = rcpSqrtIEEEIntApproximation(inX, IEEE_INT_RCP_SQRT_CONST_NR1); + xRcpSqrt = rcpSqrtNewtonRaphson(xhalf, xRcpSqrt); + return xRcpSqrt; +} + +// +// Using 2 Newton Raphson iterations +// Relative error : ~4.6e-004% over full +// Precise format : ~full float +// 9 ALU +// +float fastRcpSqrtNR2(float inX) +{ + float xhalf = 0.5f * inX; + float xRcpSqrt = rcpSqrtIEEEIntApproximation(inX, IEEE_INT_RCP_SQRT_CONST_NR2); + xRcpSqrt = rcpSqrtNewtonRaphson(xhalf, xRcpSqrt); + xRcpSqrt = rcpSqrtNewtonRaphson(xhalf, xRcpSqrt); + return xRcpSqrt; +} + +// +// SQRT +// +float sqrtIEEEIntApproximation(float inX, const int inSqrtConst) +{ + int x = asint(inX); + x = inSqrtConst + (x >> 1); + return asfloat(x); +} + +// +// Using 0 Newton Raphson iterations +// Relative error : < 0.7% over full +// Precise format : ~small float +// 1 ALU +// +float fastSqrtNR0(float inX) +{ + float xRcp = sqrtIEEEIntApproximation(inX, IEEE_INT_SQRT_CONST_NR0); + return xRcp; +} + +// +// Use inverse Rcp Sqrt +// Using 1 Newton Raphson iterations +// Relative error : ~0.2% over full +// Precise format : ~half float +// 6 ALU +// +float fastSqrtNR1(float inX) +{ + // Inverse Rcp Sqrt + return inX * fastRcpSqrtNR1(inX); +} + +// +// Use inverse Rcp Sqrt +// Using 2 Newton Raphson iterations +// Relative error : ~4.6e-004% over full +// Precise format : ~full float +// 9 ALU +// +float fastSqrtNR2(float inX) +{ + // Inverse Rcp Sqrt + return inX * fastRcpSqrtNR2(inX); +} + +// +// RCP +// + +float rcpIEEEIntApproximation(float inX, const int inRcpConst) +{ + int x = asint(inX); + x = inRcpConst - x; + return asfloat(x); +} + +float rcpNewtonRaphson(float inX, float inRcpX) +{ + return inRcpX * (-inRcpX * inX + 2.0f); +} + +// +// Using 0 Newton Raphson iterations +// Relative error : < 0.4% over full +// Precise format : ~small float +// 1 ALU +// +float fastRcpNR0(float inX) +{ + float xRcp = rcpIEEEIntApproximation(inX, IEEE_INT_RCP_CONST_NR0); + return xRcp; +} + +// +// Using 1 Newton Raphson iterations +// Relative error : < 0.02% over full +// Precise format : ~half float +// 3 ALU +// +float fastRcpNR1(float inX) +{ + float xRcp = rcpIEEEIntApproximation(inX, IEEE_INT_RCP_CONST_NR1); + xRcp = rcpNewtonRaphson(inX, xRcp); + return xRcp; +} + +// +// Using 2 Newton Raphson iterations +// Relative error : < 5.0e-005% over full +// Precise format : ~full float +// 5 ALU +// +float fastRcpNR2(float inX) +{ + float xRcp = rcpIEEEIntApproximation(inX, IEEE_INT_RCP_CONST_NR2); + xRcp = rcpNewtonRaphson(inX, xRcp); + xRcp = rcpNewtonRaphson(inX, xRcp); + return xRcp; +} + +// +// Trigonometric functions +// +static const float fsl_PI = 3.1415926535897932384626433f; +static const float fsl_HALF_PI = 0.5f * fsl_PI; + +// 4th order polynomial approximation +// 4 VGRP, 16 ALU Full Rate +// 7 * 10^-5 radians precision +// Reference : Handbook of Mathematical Functions (chapter : Elementary Transcendental Functions), M. Abramowitz and I.A. Stegun, Ed. +float acosFast4(float inX) +{ + float x1 = abs(inX); + float x2 = x1 * x1; + float x3 = x2 * x1; + float s; + + s = -0.2121144f * x1 + 1.5707288f; + s = 0.0742610f * x2 + s; + s = -0.0187293f * x3 + s; + s = sqrt(1.0f - x1) * s; + + // acos function mirroring + // check per platform if compiles to a selector - no branch neeeded + return inX >= 0.0f ? s : fsl_PI - s; +} + +// 4th order polynomial approximation +// 4 VGRP, 16 ALU Full Rate +// 7 * 10^-5 radians precision +float asinFast4(float inX) +{ + float x = inX; + + // asin is offset of acos + return fsl_HALF_PI - acosFast4(x); +} + +// 4th order hyperbolical approximation +// 4 VGRP, 12 ALU Full Rate +// 7 * 10^-5 radians precision +// Reference : Efficient approximations for the arctangent function, Rajan, S. Sichun Wang Inkol, R. Joyal, A., May 2006 +float atanFast4(float inX) +{ + float x = inX; + return x * (-0.1784f * abs(x) - 0.0663f * x * x + 1.0301f); +} + +//////////////////////////////////////////////////////////////////////////////////////////////// + +// https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/ + +// max absolute error 9.0x10^-3 +// Eberly's polynomial degree 1 - respect bounds +// 4 VGPR, 12 FR (8 FR, 1 QR), 1 scalar +// input [-1, 1] and output [0, PI] +float ACos(float inX) +{ + float x = abs(inX); + float res = -0.156583f * x + fsl_HALF_PI; + res *= fastSqrtNR0(1.0f - x); + return (inX >= 0) ? res : fsl_PI - res; +} + +// Same cost as Acos + 1 FR +// Same error +// input [-1, 1] and output [-PI/2, PI/2] +float ASin(float x) +{ + return fsl_HALF_PI - ACos(x); +} + +// max absolute error 1.3x10^-3 +// Eberly's odd polynomial degree 5 - respect bounds +// 4 VGPR, 14 FR (10 FR, 1 QR), 2 scalar +// input [0, infinity] and output [0, PI/2] +float ATanPos(float x) +{ + float t0 = (x < 1.0f) ? x : 1.0f / x; + float t1 = t0 * t0; + float poly = 0.0872929f; + poly = -0.301895f + poly * t1; + poly = 1.0f + poly * t1; + poly = poly * t0; + return (x < 1.0f) ? poly : fsl_HALF_PI - poly; +} + +// 4 VGPR, 16 FR (12 FR, 1 QR), 2 scalar +// input [-infinity, infinity] and output [-PI/2, PI/2] +float ATan(float x) +{ + float t0 = ATanPos(abs(x)); + return (x < 0.0f) ? -t0 : t0; +} + +#endif //SHADER_FAST_MATH_INC_FX \ No newline at end of file diff --git a/package/Shaders/DeferredCompositeCS.hlsl b/package/Shaders/DeferredCompositeCS.hlsl index 67634b99b..a9965a9cf 100644 --- a/package/Shaders/DeferredCompositeCS.hlsl +++ b/package/Shaders/DeferredCompositeCS.hlsl @@ -1,36 +1,20 @@ +#include "Common/DeferredShared.hlsl" #include "Common/GBuffer.hlsl" -Texture2D SpecularTexture : register(t0); -Texture2D AlbedoTexture : register(t1); -Texture2D ReflectanceTexture : register(t2); -Texture2D NormalRoughnessTexture : register(t3); -Texture2D ShadowMaskTexture : register(t4); -Texture2D DepthTexture : register(t5); +Texture2D SpecularTexture : register(t0); +Texture2D AlbedoTexture : register(t1); +Texture2D ReflectanceTexture : register(t2); +Texture2D NormalRoughnessTexture : register(t3); +Texture2D ShadowMaskTexture : register(t4); +Texture2D DepthTexture : register(t5); +Texture2D GITexture : register(t6); -RWTexture2D MainRW : register(u0); -RWTexture2D NormalTAAMaskSpecularMaskRW : register(u1); -RWTexture2D FilteredShadowMaskRW : register(u2); +RWTexture2D MainRW : register(u0); +RWTexture2D NormalTAAMaskSpecularMaskRW : register(u1); SamplerState LinearSampler : register(s0); -cbuffer PerFrame : register(b0) -{ - float4 DirLightDirectionVS[2]; - float4 DirLightColor; - float4 CameraData; - float2 BufferDim; - float2 RcpBufferDim; - float4x4 ViewMatrix[2]; - float4x4 ProjMatrix[2]; - float4x4 ViewProjMatrix[2]; - float4x4 InvViewMatrix[2]; - float4x4 InvProjMatrix[2]; - row_major float3x4 DirectionalAmbient; - uint FrameCount; - uint pad0[3]; -}; - // # define DEBUG half GetScreenDepth(half depth) @@ -71,8 +55,7 @@ half2 ViewToUV(half3 position, bool is_position, uint a_eyeIndex) return (uv.xy / uv.w) * half2(0.5f, -0.5f) + 0.5f; } -[numthreads(32, 32, 1)] void main(uint3 globalId : SV_DispatchThreadID, uint3 localId : SV_GroupThreadID, uint3 groupId : SV_GroupID) -{ +[numthreads(32, 32, 1)] void DirectionalShadowPass(uint3 globalId : SV_DispatchThreadID, uint3 localId : SV_GroupThreadID, uint3 groupId : SV_GroupID) { half2 uv = half2(globalId.xy + 0.5) * RcpBufferDim.xy; half3 normalGlossiness = NormalRoughnessTexture[globalId.xy]; @@ -81,20 +64,15 @@ half2 ViewToUV(half3 position, bool is_position, uint a_eyeIndex) half rawDepth = DepthTexture[globalId.xy]; half depth = GetScreenDepth(rawDepth); - uint eyeIndex = 0; - half shadow = ShadowMaskTexture[globalId.xy]; half weight = 1.0; - + half NdotL = dot(normalVS, DirLightDirectionVS[0].xyz); - if (NdotL > 0.0) - { + if (NdotL > 0.0) { // Approximation of PCF in screen-space - for(int i = -1; i < 1; i++) - { - for(int k = -1; k < 1; k++) - { + for (int i = -1; i < 1; i++) { + for (int k = -1; k < 1; k++) { if (i == 0 && k == 0) continue; float2 offset = float2(i, k) * RcpBufferDim.xy * 1.5; @@ -105,8 +83,27 @@ half2 ViewToUV(half3 position, bool is_position, uint a_eyeIndex) } } shadow /= weight; - } - + } + + half4 albedo = AlbedoTexture[globalId.xy]; + + half3 color = MainRW[globalId.xy].rgb; + color += albedo * lerp(max(0, NdotL), 1.0, albedo.w) * DirLightColor.xyz * shadow; + + MainRW[globalId.xy] = half4(color.xyz, 1.0); +}; + +[numthreads(32, 32, 1)] void MainCompositePass(uint3 globalId : SV_DispatchThreadID, uint3 localId : SV_GroupThreadID, uint3 groupId : SV_GroupID) { + half2 uv = half2(globalId.xy + 0.5) * RcpBufferDim.xy; + + half3 normalGlossiness = NormalRoughnessTexture[globalId.xy]; + half3 normalVS = DecodeNormal(normalGlossiness.xyz); + + half rawDepth = DepthTexture[globalId.xy]; + half depth = GetScreenDepth(rawDepth); + + uint eyeIndex = 0; + half4 diffuseColor = MainRW[globalId.xy]; half3 specularColor = SpecularTexture[globalId.xy]; @@ -117,30 +114,28 @@ half2 ViewToUV(half3 position, bool is_position, uint a_eyeIndex) half4 albedo = AlbedoTexture[globalId.xy]; half3 color = diffuseColor + specularColor; - - color += albedo * lerp(max(0, NdotL), 1.0, albedo.w) * DirLightColor.xyz * shadow; + + half4 giAo = GITexture[globalId.xy]; + half3 gi = giAo.rgb; + half ao = giAo.w; half3 directionalAmbientColor = mul(DirectionalAmbient, half4(normalWS, 1.0)); - color += albedo * directionalAmbientColor; + color += albedo * directionalAmbientColor * ao + gi; -# if defined(DEBUG) +#if defined(DEBUG) half2 texCoord = half2(globalId.xy) / BufferDim.xy; - if (texCoord.x < 0.5 && texCoord.y < 0.5) - { + if (texCoord.x < 0.5 && texCoord.y < 0.5) { color = color; - } else if (texCoord.x < 0.5) - { + } else if (texCoord.x < 0.5) { color = albedo; - } else if (texCoord.y < 0.5) - { - color = normalWS; + } else if (texCoord.y < 0.5) { + color = normalWS; } else { - color = glossiness; + color = glossiness; } -# endif +#endif MainRW[globalId.xy] = half4(color.xyz, 1.0); NormalTAAMaskSpecularMaskRW[globalId.xy] = half4(EncodeNormalVanilla(normalVS), 0.0, glossiness); -} - +} \ No newline at end of file diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index a0ca56cf5..a545e15e8 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -1,9 +1,9 @@ #include "Common/Color.hlsl" #include "Common/FrameBuffer.hlsl" +#include "Common/GBuffer.hlsl" #include "Common/LightingData.hlsl" #include "Common/MotionBlur.hlsl" #include "Common/Permutation.hlsl" -#include "Common/GBuffer.hlsl" #define PI 3.1415927 @@ -488,7 +488,7 @@ VS_OUTPUT main(VS_INPUT input) typedef VS_OUTPUT PS_INPUT; -# if defined(DEFERRED) +#if defined(DEFERRED) struct PS_OUTPUT { float4 Diffuse : SV_Target0; @@ -497,21 +497,21 @@ struct PS_OUTPUT float4 Albedo : SV_Target3; float4 Specular : SV_Target4; float4 Reflectance : SV_Target5; -#if defined(SNOW) +# if defined(SNOW) float4 SnowParameters : SV_Target6; -#endif +# endif }; -# else +#else struct PS_OUTPUT { float4 Diffuse : SV_Target0; float4 MotionVectors : SV_Target1; float4 ScreenSpaceNormals : SV_Target2; -#if defined(SNOW) +# if defined(SNOW) float4 SnowParameters : SV_Target3; -#endif -}; # endif +}; +#endif #ifdef PSHADER @@ -1020,10 +1020,6 @@ float GetSnowParameterY(float texProjTmp, float alpha) # include "WetnessEffects/WetnessEffects.hlsli" # endif -# if defined(CLOUD_SHADOWS) -# include "CloudShadows/CloudShadows.hlsli" -# endif - # if !defined(LANDSCAPE) # undef TERRAIN_BLENDING # endif @@ -1510,14 +1506,6 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace normalizedDirLightDirectionWS = normalize(mul(input.World[eyeIndex], float4(DirLightDirection.xyz, 0))); # endif -# if defined(CLOUD_SHADOWS) - float3 cloudShadowMult = 1.0; - if (perPassCloudShadow[0].EnableCloudShadows) { - cloudShadowMult = getCloudShadowMult(input.WorldPosition.xyz, normalizedDirLightDirectionWS, SampColorSampler); - dirLightColor *= cloudShadowMult; - } -# endif - float3 nsDirLightColor = dirLightColor; if ((shaderDescriptors[0].PixelShaderDescriptor & _DefShadow) && (shaderDescriptors[0].PixelShaderDescriptor & _ShadowDir)) @@ -1889,14 +1877,6 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace float3 directionalAmbientColor = mul(DirectionalAmbient, modelNormal); -# if !defined(DEFERRED) -# if defined(CLOUD_SHADOWS) - if (perPassCloudShadow[0].EnableCloudShadows) - directionalAmbientColor *= lerp(1.0, cloudShadowMult, perPassCloudShadow[0].AbsorptionAmbient); -# endif - diffuseColor += directionalAmbientColor; -# endif - diffuseColor += emitColor.xyz; # if defined(ENVMAP) || defined(MULTI_LAYER_PARALLAX) || defined(EYE) @@ -2168,8 +2148,8 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace psout.MotionVectors.xy = SSRParams.z > 1e-5 ? float2(1, 0) : screenMotionVector.xy; psout.MotionVectors.zw = float2(0, 1); - -#if !defined(DEFERRED) + +# if !defined(DEFERRED) float tmp = -1e-5 + SSRParams.x; float tmp3 = (SSRParams.y - tmp); float tmp2 = (glossiness - tmp); @@ -2178,7 +2158,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace tmp *= tmp * (3 + -2 * tmp); psout.ScreenSpaceNormals.w = tmp * SSRParams.w; -# if defined(WATER_BLENDING) +# if defined(WATER_BLENDING) if (perPassWaterBlending[0].EnableWaterBlendingSSR) { // Compute distance to water surface float distToWater = max(0, input.WorldPosition.z - waterHeight); @@ -2186,17 +2166,17 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace // Reduce SSR amount psout.ScreenSpaceNormals.w *= blendFactor; } -# endif // WATER_BLENDING +# endif // WATER_BLENDING -# if (defined(ENVMAP) || defined(MULTI_LAYER_PARALLAX) || defined(EYE)) -# if defined(DYNAMIC_CUBEMAPS) +# if (defined(ENVMAP) || defined(MULTI_LAYER_PARALLAX) || defined(EYE)) +# if defined(DYNAMIC_CUBEMAPS) psout.ScreenSpaceNormals.w = saturate(sqrt(envMask)); +# endif # endif -# endif -# if defined(WETNESS_EFFECTS) +# if defined(WETNESS_EFFECTS) psout.ScreenSpaceNormals.w = max(psout.ScreenSpaceNormals.w, flatnessAmount); -# endif +# endif // Green reflections fix if (FrameParams.z) @@ -2207,9 +2187,9 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace psout.ScreenSpaceNormals.xy = screenSpaceNormal.xy + 0.5.xx; psout.ScreenSpaceNormals.z = 0; -# if defined(TERRAIN_BLENDING) +# if defined(TERRAIN_BLENDING) // Pixel Depth Offset -# if defined(COMPLEX_PARALLAX_MATERIALS) +# if defined(COMPLEX_PARALLAX_MATERIALS) if (perPassParallax[0].EnableTerrainParallax) { float height = 0; if (input.LandBlendWeights1.x > 0) @@ -2237,37 +2217,37 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace clip(blendFactorTerrain); blendFactorTerrain = saturate(blendFactorTerrain); } -# endif +# endif psout.Albedo.w = blendFactorTerrain; -# if defined(SNOW) +# if defined(SNOW) psout.SnowParameters.w = blendFactorTerrain; +# endif # endif -# endif -# if defined(SSS) && defined(SKIN) +# if defined(SSS) && defined(SKIN) if (perPassSSS[0].ValidMaterial) { float sssAmount = saturate(baseColor.a) * 0.5; psout.ScreenSpaceNormals.z = perPassSSS[0].IsBeastRace ? sssAmount : sssAmount + 0.5; } -# endif +# endif # else psout.MotionVectors.zw = float2(0.0, psout.Diffuse.w); psout.Specular = float4(specularColor.xyz, psout.Diffuse.w); psout.Albedo = float4(baseColor.xyz * realVertexColor, psout.Diffuse.w); psout.Reflectance = float4(0.0.xxx, psout.Diffuse.w); - + float outGlossiness = saturate(glossiness * SSRParams.w); psout.NormalGlossiness = float4(EncodeNormal(screenSpaceNormal), outGlossiness, psout.Diffuse.w); - if (lightingData[0].Opaque){ + if (lightingData[0].Opaque) { psout.Albedo.w = 0; psout.NormalGlossiness.w = 0; -#if defined(SKIN) +# if defined(SKIN) psout.NormalGlossiness.w = 1; -#endif +# endif } # endif diff --git a/src/Bindings.cpp b/src/Bindings.cpp index 429e2f671..4d7324c4c 100644 --- a/src/Bindings.cpp +++ b/src/Bindings.cpp @@ -1,8 +1,11 @@ #include "Bindings.h" #include "State.h" #include "Util.h" -#include +#include +#include #include +#include +#include void Bindings::DepthStencilStateSetDepthMode(RE::BSGraphics::DepthStencilDepthMode a_mode) { @@ -151,6 +154,40 @@ void Bindings::SetupResources() samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; DX::ThrowIfFailed(device->CreateSamplerState(&samplerDesc, &linearSampler)); } + + { + D3D11_TEXTURE2D_DESC texDesc; + auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + mainTex.texture->GetDesc(&texDesc); + + texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { + .MostDetailedMip = 0, + .MipLevels = 1 } + }; + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } + }; + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } + }; + + { + giTexture = new Texture2D(texDesc); + giTexture->CreateSRV(srvDesc); + giTexture->CreateRTV(rtvDesc); + giTexture->CreateUAV(uavDesc); + } + } } void Bindings::Reset() @@ -167,6 +204,11 @@ void Bindings::UpdateConstantBuffer() auto shadowState = RE::BSGraphics::RendererShadowState::GetSingleton(); if (REL::Module::IsVR()) { + auto posAdjust = shadowState->GetVRRuntimeData().posAdjust.getEye(0); + data.CamPosAdjust[0] = { posAdjust.x, posAdjust.y, posAdjust.z, 0 }; + posAdjust = shadowState->GetVRRuntimeData().posAdjust.getEye(1); + data.CamPosAdjust[1] = { posAdjust.x, posAdjust.y, posAdjust.z, 0 }; + data.ViewMatrix[0] = shadowState->GetVRRuntimeData().cameraData.getEye(0).viewMat; data.ViewMatrix[1] = shadowState->GetVRRuntimeData().cameraData.getEye(1).viewMat; data.ProjMatrix[0] = shadowState->GetVRRuntimeData().cameraData.getEye(0).projMat; @@ -176,13 +218,18 @@ void Bindings::UpdateConstantBuffer() data.InvViewMatrix[0] = shadowState->GetVRRuntimeData().cameraData.getEye(0).viewMat.Invert(); data.InvViewMatrix[1] = shadowState->GetVRRuntimeData().cameraData.getEye(1).viewMat.Invert(); data.InvProjMatrix[0] = shadowState->GetVRRuntimeData().cameraData.getEye(0).projMat.Invert(); - data.InvProjMatrix[0] = shadowState->GetVRRuntimeData().cameraData.getEye(1).projMat.Invert(); + data.InvProjMatrix[1] = shadowState->GetVRRuntimeData().cameraData.getEye(1).projMat.Invert(); + data.InvViewProjMatrix[0] = data.InvViewMatrix[0] * data.InvProjMatrix[0]; + data.InvViewProjMatrix[1] = data.InvViewMatrix[1] * data.InvProjMatrix[1]; } else { + auto posAdjust = shadowState->GetRuntimeData().posAdjust.getEye(0); + data.CamPosAdjust[0] = { posAdjust.x, posAdjust.y, posAdjust.z, 0 }; data.ViewMatrix[0] = shadowState->GetRuntimeData().cameraData.getEye(0).viewMat; data.ProjMatrix[0] = shadowState->GetRuntimeData().cameraData.getEye(0).projMat; data.ViewProjMatrix[0] = shadowState->GetRuntimeData().cameraData.getEye(0).viewProjMat; data.InvViewMatrix[0] = shadowState->GetRuntimeData().cameraData.getEye(0).viewMat.Invert(); data.InvProjMatrix[0] = shadowState->GetRuntimeData().cameraData.getEye(0).projMat.Invert(); + data.InvViewProjMatrix[0] = data.InvViewMatrix[0] * data.InvProjMatrix[0]; } auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); @@ -346,6 +393,8 @@ void Bindings::DeferredPasses() { auto renderer = RE::BSGraphics::Renderer::GetSingleton(); auto context = renderer->GetRuntimeData().context; + auto state = State::GetSingleton(); + auto viewport = RE::BSGraphics::State::GetSingleton(); UpdateConstantBuffer(); @@ -354,19 +403,35 @@ void Bindings::DeferredPasses() context->CSSetConstantBuffers(0, 1, &buffer); } + { + FLOAT clr[4] = { 0., 0., 0., 1. }; + context->ClearUnorderedAccessViewFloat(giTexture->uav.get(), clr); + } + if (ScreenSpaceShadows::GetSingleton()->loaded) { ScreenSpaceShadows::GetSingleton()->DrawShadows(); } - { - auto specular = renderer->GetRuntimeData().renderTargets[SPECULAR]; - auto albedo = renderer->GetRuntimeData().renderTargets[ALBEDO]; - auto reflectance = renderer->GetRuntimeData().renderTargets[REFLECTANCE]; - auto normalRoughness = renderer->GetRuntimeData().renderTargets[NORMALROUGHNESS]; - auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY]; - auto shadowMask = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kSHADOW_MASK]; + if (TerrainOcclusion::GetSingleton()->loaded) { + TerrainOcclusion::GetSingleton()->DrawTerrainOcclusion(); + } - ID3D11ShaderResourceView* srvs[7]{ + if (CloudShadows::GetSingleton()->loaded) { + CloudShadows::GetSingleton()->DrawShadows(); + } + + auto specular = renderer->GetRuntimeData().renderTargets[SPECULAR]; + auto albedo = renderer->GetRuntimeData().renderTargets[ALBEDO]; + auto reflectance = renderer->GetRuntimeData().renderTargets[REFLECTANCE]; + auto normalRoughness = renderer->GetRuntimeData().renderTargets[NORMALROUGHNESS]; + auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY]; + auto shadowMask = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kSHADOW_MASK]; + + auto main = renderer->GetRuntimeData().renderTargets[forwardRenderTargets[0]]; + auto normals = renderer->GetRuntimeData().renderTargets[forwardRenderTargets[2]]; + + { + ID3D11ShaderResourceView* srvs[6]{ specular.SRV, albedo.SRV, reflectance.SRV, @@ -375,22 +440,16 @@ void Bindings::DeferredPasses() depth.depthSRV }; - context->CSSetShaderResources(0, 7, srvs); - - auto main = renderer->GetRuntimeData().renderTargets[forwardRenderTargets[0]]; - auto normals = renderer->GetRuntimeData().renderTargets[forwardRenderTargets[2]]; + context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); ID3D11UnorderedAccessView* uavs[2]{ main.UAV, normals.UAV }; - context->CSSetUnorderedAccessViews(0, 2, uavs, nullptr); + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); context->CSSetSamplers(0, 1, &linearSampler); - auto shader = GetComputeDeferredComposite(); + auto shader = GetComputeDirectionalShadow(); context->CSSetShader(shader, nullptr, 0); - auto state = State::GetSingleton(); - auto viewport = RE::BSGraphics::State::GetSingleton(); - float resolutionX = state->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale; float resolutionY = state->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentHeightScale; @@ -398,16 +457,48 @@ void Bindings::DeferredPasses() uint32_t dispatchY = (uint32_t)std::ceil(resolutionY / 32.0f); context->Dispatch(dispatchX, dispatchY, 1); + } - shader = GetComputeDeferredComposite(); + // features that require full diffuse lighting should be put here + if (ScreenSpaceGI::GetSingleton()->loaded) { + ScreenSpaceGI::GetSingleton()->DrawSSGI(giTexture); + } + + { + ID3D11ShaderResourceView* srvs[7]{ + specular.SRV, + albedo.SRV, + reflectance.SRV, + normalRoughness.SRV, + shadowMask.SRV, + depth.depthSRV, + giTexture->srv.get(), + }; + + context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); + + ID3D11UnorderedAccessView* uavs[2]{ main.UAV, normals.UAV }; + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); + + context->CSSetSamplers(0, 1, &linearSampler); + + auto shader = GetComputeDeferredComposite(); context->CSSetShader(shader, nullptr, 0); + + float resolutionX = state->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale; + float resolutionY = state->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentHeightScale; + + uint32_t dispatchX = (uint32_t)std::ceil(resolutionX / 32.0f); + uint32_t dispatchY = (uint32_t)std::ceil(resolutionY / 32.0f); + + context->Dispatch(dispatchX, dispatchY, 1); } - ID3D11ShaderResourceView* views[7]{ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }; - context->CSSetShaderResources(0, 7, views); + ID3D11ShaderResourceView* views[8]{ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }; + context->CSSetShaderResources(0, ARRAYSIZE(views), views); ID3D11UnorderedAccessView* uavs[2]{ nullptr, nullptr }; - context->CSSetUnorderedAccessViews(0, 2, uavs, nullptr); + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); ID3D11Buffer* buffer = nullptr; context->CSSetConstantBuffers(0, 1, &buffer); @@ -474,13 +565,26 @@ void Bindings::ClearShaderCache() deferredCompositeCS->Release(); deferredCompositeCS = nullptr; } + if (directionalShadowCS) { + directionalShadowCS->Release(); + directionalShadowCS = nullptr; + } } ID3D11ComputeShader* Bindings::GetComputeDeferredComposite() { if (!deferredCompositeCS) { logger::debug("Compiling DeferredCompositeCS"); - deferredCompositeCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", {}, "cs_5_0"); + deferredCompositeCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", {}, "cs_5_0", "MainCompositePass"); } return deferredCompositeCS; +} + +ID3D11ComputeShader* Bindings::GetComputeDirectionalShadow() +{ + if (!directionalShadowCS) { + logger::debug("Compiling DirectionalShadowCS"); + directionalShadowCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", {}, "cs_5_0", "DirectionalShadowPass"); + } + return directionalShadowCS; } \ No newline at end of file diff --git a/src/Bindings.h b/src/Bindings.h index d4ffa9169..7629917dd 100644 --- a/src/Bindings.h +++ b/src/Bindings.h @@ -37,16 +37,19 @@ class Bindings ID3D11BlendState* forwardBlendStates[4]; RE::RENDER_TARGET forwardRenderTargets[4]; + ID3D11ComputeShader* directionalShadowCS = nullptr; ID3D11ComputeShader* deferredCompositeCS = nullptr; void ClearShaderCache(); ID3D11ComputeShader* GetComputeDeferredComposite(); + ID3D11ComputeShader* GetComputeDirectionalShadow(); bool inWorld = false; bool deferredPass = false; struct alignas(16) DeferredCB { + float4 CamPosAdjust[2]; float4 DirLightDirectionVS[2]; float4 DirLightColor; float4 CameraData; @@ -57,6 +60,7 @@ class Bindings DirectX::XMFLOAT4X4 ViewProjMatrix[2]; DirectX::XMFLOAT4X4 InvViewMatrix[2]; DirectX::XMFLOAT4X4 InvProjMatrix[2]; + DirectX::XMFLOAT4X4 InvViewProjMatrix[2]; DirectX::XMFLOAT3X4 DirectionalAmbient; uint FrameCount; uint pad0[3]; @@ -66,6 +70,8 @@ class Bindings ID3D11SamplerState* linearSampler = nullptr; + Texture2D* giTexture = nullptr; // RGB - GI/IL, A - AO + void UpdateConstantBuffer(); struct Hooks diff --git a/src/Feature.cpp b/src/Feature.cpp index 600f89ce6..36b5c7616 100644 --- a/src/Feature.cpp +++ b/src/Feature.cpp @@ -8,9 +8,11 @@ #include "Features/GrassCollision.h" #include "Features/GrassLighting.h" #include "Features/LightLimitFix.h" +#include "Features/ScreenSpaceGI.h" #include "Features/ScreenSpaceShadows.h" #include "Features/SubsurfaceScattering.h" #include "Features/TerrainBlending.h" +#include "Features/TerrainOcclusion.h" #include "Features/WaterBlending.h" #include "Features/WaterCaustics.h" #include "Features/WaterParallax.h" @@ -115,7 +117,9 @@ const std::vector& Feature::GetFeatureList() TerrainBlending::GetSingleton(), WaterParallax::GetSingleton(), WaterCaustics::GetSingleton(), - SubsurfaceScattering::GetSingleton() + SubsurfaceScattering::GetSingleton(), + TerrainOcclusion::GetSingleton(), + ScreenSpaceGI::GetSingleton() }; static std::vector featuresVR = { @@ -128,7 +132,8 @@ const std::vector& Feature::GetFeatureList() LightLimitFix::GetSingleton(), TerrainBlending::GetSingleton(), WaterCaustics::GetSingleton(), - SubsurfaceScattering::GetSingleton() + SubsurfaceScattering::GetSingleton(), + ScreenSpaceGI::GetSingleton() }; return REL::Module::IsVR() ? featuresVR : features; diff --git a/src/Features/CloudShadows.cpp b/src/Features/CloudShadows.cpp index 35c5a47c7..559281898 100644 --- a/src/Features/CloudShadows.cpp +++ b/src/Features/CloudShadows.cpp @@ -2,18 +2,16 @@ #include "State.h" +#include "Bindings.h" #include "Util.h" -#include "magic_enum_flags.hpp" - NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( CloudShadows::Settings, EnableCloudShadows, CloudHeight, PlanetRadius, EffectMix, - TransparencyPower, - AbsorptionAmbient) + TransparencyPower) enum class SkyShaderTechniques { @@ -41,13 +39,6 @@ void CloudShadows::DrawSettings() "The amount of light absorbed by the cloud is determined by the alpha of the cloud. " "Negative value will result in more light absorbed, and more contrast between lit and occluded areas."); - ImGui::SliderFloat("Ambient Absorption", &settings.AbsorptionAmbient, 0.f, 1.f, "%.2f"); - if (auto _tt = Util::HoverTooltipWrapper()) - ImGui::Text( - "By default, ambient light is not affected by cloud, as it is an approximation of reflected light. " - "However, if you want darker ambient, you may turn it up a bit. " - "Not entirely physical, nonetheless helpful."); - ImGui::TreePop(); } @@ -80,6 +71,21 @@ void CloudShadows::CheckResourcesSide(int side) context->ClearRenderTargetView(cubemapCloudOccRTVs[side], black); } +void CloudShadows::CompileComputeShaders() +{ + logger::debug("Compiling shaders..."); + { + outputProgram = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\CloudShadows\\output.cs.hlsl", { {} }, "cs_5_0")); + } +} + +void CloudShadows::ClearShaderCache() +{ + if (outputProgram) + outputProgram->Release(); + CompileComputeShaders(); +} + void CloudShadows::ModifySky(const RE::BSShader*, const uint32_t descriptor) { if (!settings.EnableCloudShadows) @@ -163,59 +169,72 @@ void CloudShadows::ModifySky(const RE::BSShader*, const uint32_t descriptor) } } -void CloudShadows::ModifyLighting() +void CloudShadows::DrawShadows() { - auto context = RE::BSGraphics::Renderer::GetSingleton()->GetRuntimeData().context; + if (!settings.EnableCloudShadows || + (RE::Sky::GetSingleton()->mode.get() != RE::Sky::Mode::kFull) || + !RE::Sky::GetSingleton()->currentClimate) + return; auto shadowState = RE::BSGraphics::RendererShadowState::GetSingleton(); auto cubeMapRenderTarget = !REL::Module::IsVR() ? shadowState->GetRuntimeData().cubeMapRenderTarget : shadowState->GetVRRuntimeData().cubeMapRenderTarget; + if (cubeMapRenderTarget != RE::RENDER_TARGETS_CUBEMAP::kREFLECTIONS) { static Util::FrameChecker frame_checker; + + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto context = renderer->GetRuntimeData().context; + auto bindings = Bindings::GetSingleton(); + if (frame_checker.isNewFrame()) context->GenerateMips(texCubemapCloudOcc->srv.get()); - auto srv = texCubemapCloudOcc->srv.get(); - context->PSSetShaderResources(40, 1, &srv); - } else { - ID3D11ShaderResourceView* srv = nullptr; - context->PSSetShaderResources(40, 1, &srv); - } + std::array srvs = { nullptr }; + std::array uavs = { nullptr }; + + srvs.at(0) = perPass->SRV(); + srvs.at(1) = texCubemapCloudOcc->srv.get(); + srvs.at(2) = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY].depthSRV; + + uavs.at(0) = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kSHADOW_MASK].UAV; + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(outputProgram, nullptr, 0); + context->Dispatch((bindings->giTexture->desc.Width + 31u) >> 5, (bindings->giTexture->desc.Height + 31u) >> 5, 1); - ID3D11ShaderResourceView* views[1]{}; - views[0] = perPass->srv.get(); - context->PSSetShaderResources(23, ARRAYSIZE(views), views); + // clean up + srvs.fill(nullptr); + uavs.fill(nullptr); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + } } void CloudShadows::Draw(const RE::BSShader* shader, const uint32_t descriptor) { + if (!settings.EnableCloudShadows || + (RE::Sky::GetSingleton()->mode.get() != RE::Sky::Mode::kFull) || + !RE::Sky::GetSingleton()->currentClimate) + return; + static Util::FrameChecker frame_checker; if (frame_checker.isNewFrame()) { // update settings buffer - auto context = RE::BSGraphics::Renderer::GetSingleton()->GetRuntimeData().context; - PerPass perPassData{}; perPassData.Settings = settings; perPassData.Settings.TransparencyPower = exp2(perPassData.Settings.TransparencyPower); perPassData.RcpHPlusR = 1.f / (settings.CloudHeight + settings.PlanetRadius); - D3D11_MAPPED_SUBRESOURCE mapped; - DX::ThrowIfFailed(context->Map(perPass->resource.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - size_t bytes = sizeof(PerPass); - memcpy_s(mapped.pData, bytes, &perPassData, bytes); - context->Unmap(perPass->resource.get(), 0); + perPass->Update(&perPassData, sizeof(perPassData)); } switch (shader->shaderType.get()) { case RE::BSShader::Type::Sky: ModifySky(shader, descriptor); break; - case RE::BSShader::Type::Lighting: - case RE::BSShader::Type::DistantTree: - case RE::BSShader::Type::Grass: - // case RE::BSShader::Type::Water: - ModifyLighting(); - break; default: break; } @@ -264,22 +283,11 @@ void CloudShadows::SetupResources() } { - D3D11_BUFFER_DESC sbDesc{}; - sbDesc.Usage = D3D11_USAGE_DYNAMIC; - sbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - sbDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - sbDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; - sbDesc.StructureByteStride = sizeof(PerPass); - sbDesc.ByteWidth = sizeof(PerPass); - perPass = std::make_unique(sbDesc); - - D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc; - srvDesc.Format = DXGI_FORMAT_UNKNOWN; - srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; - srvDesc.Buffer.FirstElement = 0; - srvDesc.Buffer.NumElements = 1; - perPass->CreateSRV(srvDesc); + perPass = std::make_unique(StructuredBufferDesc(), 1); + perPass->CreateSRV(); } + + CompileComputeShaders(); } void CloudShadows::RestoreDefaultSettings() diff --git a/src/Features/CloudShadows.h b/src/Features/CloudShadows.h index 03ac74bf1..0645df188 100644 --- a/src/Features/CloudShadows.h +++ b/src/Features/CloudShadows.h @@ -26,10 +26,9 @@ struct CloudShadows : Feature float EffectMix = 1.f; float TransparencyPower = 0.1f; - float AbsorptionAmbient = 0.2f; } settings; - struct alignas(16) PerPass + struct PerPass { Settings Settings; @@ -37,7 +36,7 @@ struct CloudShadows : Feature float padding; }; - std::unique_ptr perPass = nullptr; + std::unique_ptr perPass = nullptr; bool isCubemapPass = false; ID3D11BlendState* resetBlendState = nullptr; @@ -48,15 +47,20 @@ struct CloudShadows : Feature ID3D11RenderTargetView* cubemapCloudOccRTVs[6] = { nullptr }; ID3D11ShaderResourceView* cubemapCloudOccDebugSRV = nullptr; + ID3D11ComputeShader* outputProgram = nullptr; + virtual void SetupResources() override; + void CompileComputeShaders(); + virtual inline void Reset() override {} + virtual void ClearShaderCache() override; virtual void DrawSettings() override; void CheckResourcesSide(int side); void ModifySky(const RE::BSShader* shader, const uint32_t descriptor); - void ModifyLighting(); virtual void Draw(const RE::BSShader* shader, const uint32_t descriptor) override; + void DrawShadows(); virtual void Load(json& o_json) override; virtual void Save(json& o_json) override; diff --git a/src/Features/ScreenSpaceGI.cpp b/src/Features/ScreenSpaceGI.cpp new file mode 100644 index 000000000..26332dec0 --- /dev/null +++ b/src/Features/ScreenSpaceGI.cpp @@ -0,0 +1,686 @@ +#include "ScreenSpaceGI.h" + +#include "Bindings.h" +#include "State.h" +#include "Util.h" + +#include "DirectXTex.h" + +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( + ScreenSpaceGI::Settings, + Enabled, + UseBitmask, + EnableGI, + EnableTemporalDenoiser, + NumSlices, + NumSteps, + DepthMIPSamplingOffset, + EffectRadius, + EffectFalloffRange, + ThinOccluderCompensation, + Thickness, + DepthFadeRange, + CheckBackface, + BackfaceStrength, + EnableGIBounce, + GIBounceFade, + GIDistanceCompensation, + GICompensationMaxDist, + AOPower, + GIStrength, + DepthDisocclusion, + MaxAccumFrames) + +class DisableGuard +{ +private: + bool disable; + +public: + DisableGuard(bool disable) : + disable(disable) + { + if (disable) + ImGui::BeginDisabled(); + } + ~DisableGuard() + { + if (disable) + ImGui::EndDisabled(); + } +}; + +bool percentageSlider(const char* label, float* data, const char* format = "%.1f %%") +{ + float percentageData = (*data) * 1e2f; + bool retval = ImGui::SliderFloat(label, &percentageData, 0.f, 100.f, format); + (*data) = percentageData * 1e-2f; + return retval; +} + +//////////////////////////////////////////////////////////////////////////////////// + +void ScreenSpaceGI::RestoreDefaultSettings() +{ + settings = {}; +} + +void ScreenSpaceGI::DrawSettings() +{ + /////////////////////////////// + ImGui::SeparatorText("Toggles"); + + if (ImGui::BeginTable("Toggles", 3)) { + ImGui::TableNextColumn(); + ImGui::Checkbox("Enabled", &settings.Enabled); + ImGui::TableNextColumn(); + recompileFlag |= ImGui::Checkbox("GI", &settings.EnableGI); + ImGui::TableNextColumn(); + recompileFlag |= ImGui::Checkbox("Bitmask", &settings.UseBitmask); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("An alternative way to calculate AO/GI"); + + ImGui::EndTable(); + } + + /////////////////////////////// + ImGui::SeparatorText("Quality/Performance"); + + ImGui::SliderInt("Slices", (int*)&settings.NumSlices, 1, 10); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How many directions do the samples take. A greater value reduces noise but is more expensive."); + + ImGui::SliderInt("Steps Per Slice", (int*)&settings.NumSteps, 1, 20); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How many samples does it take in one direction. A greater value enhances the effects but is more expensive."); + + ImGui::SliderFloat("MIP Sampling Offset", &settings.DepthMIPSamplingOffset, 2.f, 6.f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Mainly performance (texture memory bandwidth) setting but as a side-effect reduces overshadowing by thin objects and increases temporal instability."); + + if (ImGui::BeginTable("Quality Toggles", 2)) { + ImGui::TableNextColumn(); + recompileFlag |= ImGui::Checkbox("Half Resolution", &settings.HalfRes); + + ImGui::EndTable(); + } + + /////////////////////////////// + ImGui::SeparatorText("Visual"); + + ImGui::SliderFloat("AO Power", &settings.AOPower, 0.f, 3.f, "%.2f"); + + { + auto _ = DisableGuard(!settings.EnableGI); + ImGui::SliderFloat("GI Strength", &settings.GIStrength, 0.f, 20.f, "%.2f"); + // percentageSlider("GI Saturation", &settings.GISaturation); + } + + ImGui::Separator(); + + ImGui::SliderFloat("Effect radius", &settings.EffectRadius, 10.f, 300.0f, "%.1f game units"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("World (viewspace) effect radius. Depends on the scene & requirements"); + + ImGui::SliderFloat2("Depth Fade Range", &settings.DepthFadeRange.x, 1e4, 5e4, "%.0f game units"); + + ImGui::Separator(); + + { + auto _ = DisableGuard(settings.UseBitmask); + + ImGui::SliderFloat("Falloff Range", &settings.EffectFalloffRange, 0.05, 1.0, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Gently reduce sample impact as it gets out of 'Effect radius' bounds"); + + ImGui::SliderFloat("Thin Occluder Compensation", &settings.ThinOccluderCompensation, 0.f, 0.7f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Slightly reduce impact of samples further back to counter the bias from depth-based (incomplete) input scene geometry data"); + } + { + auto _ = DisableGuard(!settings.UseBitmask); + + ImGui::SliderFloat("Thickness", &settings.Thickness, 0.f, 500.0f, "%.1f game units"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How thick the occluders are. 20 to 30 percent of effect radius is recommended."); + } + + /////////////////////////////// + ImGui::SeparatorText("Visual - GI"); + + { + auto _ = DisableGuard(!settings.EnableGI); + + ImGui::SliderFloat("GI Distance Compensation", &settings.GIDistanceCompensation, 0.0f, 9.0f, "%.1f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text( + "Brighten up further radiance samples that are otherwise too weak. Creates a wider GI look.\n" + "If using bitmask, this value should be roughly inverse to thickness."); + + ImGui::SliderFloat("GI Compensation Distance", &settings.GICompensationMaxDist, 10.0f, 500.0f, "%.1f game units"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("The distance of maximal compensation/brightening."); + + ImGui::Separator(); + + recompileFlag |= ImGui::Checkbox("GI Bounce", &settings.EnableGIBounce); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Simulates multiple light bounces. Better with denoiser on."); + + { + auto __ = DisableGuard(!settings.EnableGIBounce); + ImGui::Indent(); + percentageSlider("GI Bounce Strength", &settings.GIBounceFade); + ImGui::Unindent(); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How much of this frame's GI gets carried to the next frame."); + } + + ImGui::Separator(); + + recompileFlag |= ImGui::Checkbox("Backface Checks", &settings.CheckBackface); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Disable to get some frames, IF you don't care about light emitting from the back of objects."); + { + auto __ = DisableGuard(!settings.CheckBackface); + ImGui::Indent(); + percentageSlider("Backface Lighting Mix", &settings.BackfaceStrength); + ImGui::Unindent(); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How bright at the back of objects is compared to the front. A small value to make up for foliage translucency."); + } + } + + /////////////////////////////// + ImGui::SeparatorText("Denoising"); + + ImGui::TextWrapped("At full resolution, you can try disabling denoisers and let TAA handle the noise."); + + recompileFlag |= ImGui::Checkbox("Temporal Denoiser", &settings.EnableTemporalDenoiser); + + { + auto _ = DisableGuard(!settings.EnableTemporalDenoiser); + ImGui::Indent(); + ImGui::SliderInt("Max Frame Accumulation", (int*)&settings.MaxAccumFrames, 1, 64, "%d", ImGuiSliderFlags_AlwaysClamp); + ImGui::Unindent(); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How many past frames to accumulate results with. Higher values are less noisy but potentially cause ghosting."); + } + + // ImGui::SliderInt("Passes", (int*)&settings.DenoisePasses, 0, 10); + // if (auto _tt = Util::HoverTooltipWrapper()) + // ImGui::Text("How many denoising passes to go through. The more the blurrier."); + + { + auto _ = DisableGuard(!settings.EnableTemporalDenoiser && !(settings.EnableGI || settings.EnableGIBounce)); + + ImGui::SliderFloat("Movement Disocclusion", &settings.DepthDisocclusion, 0.f, 100.f, "%.1f game units"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text( + "If a pixel has moved this far from the last frame, its radiance will not be carried to this frame.\n" + "Lower values are stricter."); + } + + /////////////////////////////// + ImGui::SeparatorText("Debug"); + + if (ImGui::TreeNode("Buffer Viewer")) { + static float debugRescale = .3f; + ImGui::SliderFloat("View Resize", &debugRescale, 0.f, 1.f); + + // ImGui doesn't support U32 + // if (ImGui::TreeNode("texHilbertLUT")) { + // ImGui::Image(texHilbertLUT->srv.get(), { (float)texHilbertLUT->desc.Width, (float)texHilbertLUT->desc.Height }); + // ImGui::TreePop(); + // } + if (ImGui::TreeNode("texWorkingDepth")) { + ImGui::Image(texWorkingDepth->srv.get(), { texWorkingDepth->desc.Width * debugRescale, texWorkingDepth->desc.Height * debugRescale }); + ImGui::TreePop(); + } + if (ImGui::TreeNode("texPrevDepth")) { + ImGui::Image(texPrevDepth->srv.get(), { texPrevDepth->desc.Width * debugRescale, texPrevDepth->desc.Height * debugRescale }); + ImGui::TreePop(); + } + if (ImGui::TreeNode("texRadiance")) { + ImGui::Image(texRadiance->srv.get(), { texRadiance->desc.Width * debugRescale, texRadiance->desc.Height * debugRescale }); + ImGui::TreePop(); + } + if (ImGui::TreeNode("texGI0")) { + ImGui::Image(texGI0->srv.get(), { texGI0->desc.Width * debugRescale, texGI0->desc.Height * debugRescale }); + ImGui::TreePop(); + } + if (ImGui::TreeNode("texGI1")) { + ImGui::Image(texGI1->srv.get(), { texGI1->desc.Width * debugRescale, texGI1->desc.Height * debugRescale }); + ImGui::TreePop(); + } + if (ImGui::TreeNode("texPrevGIAlbedo")) { + ImGui::Image(texPrevGIAlbedo->srv.get(), { texPrevGIAlbedo->desc.Width * debugRescale, texPrevGIAlbedo->desc.Height * debugRescale }); + ImGui::TreePop(); + } + + ImGui::TreePop(); + } +} + +void ScreenSpaceGI::Load(json& o_json) +{ + if (o_json[GetName()].is_object()) + settings = o_json[GetName()]; + + Feature::Load(o_json); +} + +void ScreenSpaceGI::Save([[maybe_unused]] json& o_json) +{ + o_json[GetName()] = settings; +} + +void ScreenSpaceGI::SetupResources() +{ + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto device = renderer->GetRuntimeData().forwarder; + + logger::debug("Creating buffers..."); + { + ssgiCB = eastl::make_unique(ConstantBufferDesc()); + } + + logger::debug("Creating textures..."); + { + D3D11_TEXTURE2D_DESC texDesc{ + .Width = 64, + .Height = 64, + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_R32_UINT, + .SampleDesc = { 1, 0 }, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS, + .CPUAccessFlags = 0, + .MiscFlags = 0 + }; + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { + .MostDetailedMip = 0, + .MipLevels = texDesc.MipLevels } + }; + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } + }; + + { + texHilbertLUT = eastl::make_unique(texDesc); + texHilbertLUT->CreateSRV(srvDesc); + texHilbertLUT->CreateUAV(uavDesc); + } + + auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + mainTex.texture->GetDesc(&texDesc); + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R11G11B10_FLOAT; + texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + texDesc.MipLevels = srvDesc.Texture2D.MipLevels = 5; + texDesc.MiscFlags |= D3D11_RESOURCE_MISC_GENERATE_MIPS; + + { + texRadiance = eastl::make_unique(texDesc); + texRadiance->CreateSRV(srvDesc); + texRadiance->CreateUAV(uavDesc); + } + + texDesc.BindFlags &= ~D3D11_BIND_RENDER_TARGET; + texDesc.MiscFlags &= ~D3D11_RESOURCE_MISC_GENERATE_MIPS; + texDesc.Format = srvDesc.Format = uavDesc.Format = DXGI_FORMAT_R16_FLOAT; + + { + texWorkingDepth = eastl::make_unique(texDesc); + texWorkingDepth->CreateSRV(srvDesc); + for (int i = 0; i < 5; ++i) { + uavDesc.Texture2D.MipSlice = i; + DX::ThrowIfFailed(device->CreateUnorderedAccessView(texWorkingDepth->resource.get(), &uavDesc, uavWorkingDepth[i].put())); + } + } + + uavDesc.Texture2D.MipSlice = 0; + texDesc.MipLevels = srvDesc.Texture2D.MipLevels = 1; + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + { + texGI0 = eastl::make_unique(texDesc); + texGI0->CreateSRV(srvDesc); + texGI0->CreateUAV(uavDesc); + + texGI1 = eastl::make_unique(texDesc); + texGI1->CreateSRV(srvDesc); + texGI1->CreateUAV(uavDesc); + } + + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R11G11B10_FLOAT; + { + texPrevGIAlbedo = eastl::make_unique(texDesc); + texPrevGIAlbedo->CreateSRV(srvDesc); + texPrevGIAlbedo->CreateUAV(uavDesc); + } + + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R8_UINT; + { + texAccumFrames = eastl::make_unique(texDesc); + texAccumFrames->CreateSRV(srvDesc); + texAccumFrames->CreateUAV(uavDesc); + } + + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R16_FLOAT; + { + texPrevDepth = eastl::make_unique(texDesc); + texPrevDepth->CreateSRV(srvDesc); + texPrevDepth->CreateUAV(uavDesc); + } + } + + logger::debug("Creating samplers..."); + { + D3D11_SAMPLER_DESC samplerDesc = { + .Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR, + .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, + .MaxAnisotropy = 1, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX + }; + DX::ThrowIfFailed(device->CreateSamplerState(&samplerDesc, linearClampSampler.put())); + + samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + DX::ThrowIfFailed(device->CreateSamplerState(&samplerDesc, pointClampSampler.put())); + } + + CompileComputeShaders(); +} + +void ScreenSpaceGI::ClearShaderCache() +{ + static const std::vector*> shaderPtrs = { + &hilbertLutCompute, &prefilterDepthsCompute, &radianceDisoccCompute, &giCompute, &upsampleCompute, &outputCompute + }; + + for (auto shader : shaderPtrs) + if ((*shader)) { + (*shader)->Release(); + shader->detach(); + } + + CompileComputeShaders(); +} + +void ScreenSpaceGI::CompileComputeShaders() +{ + struct ShaderCompileInfo + { + winrt::com_ptr* programPtr; + std::string_view filename; + std::vector> defines; + }; + + std::vector + shaderInfos = { + { &hilbertLutCompute, "hilbert.cs.hlsl", {} }, + { &prefilterDepthsCompute, "prefilterDepths.cs.hlsl", {} }, + { &radianceDisoccCompute, "radianceDisocc.cs.hlsl", {} }, + { &giCompute, "gi.cs.hlsl", {} }, + { &upsampleCompute, "upsample.cs.hlsl", {} }, + { &outputCompute, "output.cs.hlsl", {} } + }; + for (auto& info : shaderInfos) { + if (REL::Module::IsVR()) + info.defines.push_back({ "VR", "" }); + if (settings.HalfRes) + info.defines.push_back({ "HALF_RES", "" }); + if (settings.EnableTemporalDenoiser) + info.defines.push_back({ "TEMPORAL_DENOISER", "" }); + if (settings.UseBitmask) + info.defines.push_back({ "BITMASK", "" }); + if (settings.EnableGI) + info.defines.push_back({ "GI", "" }); + if (settings.EnableGIBounce) + info.defines.push_back({ "GI_BOUNCE", "" }); + if (settings.CheckBackface) + info.defines.push_back({ "BACKFACE", "" }); + } + + for (auto& info : shaderInfos) { + auto path = std::filesystem::path("Data\\Shaders\\ScreenSpaceGI") / info.filename; + if (auto rawPtr = reinterpret_cast(Util::CompileShader(path.c_str(), info.defines, "cs_5_0"))) + info.programPtr->attach(rawPtr); + } + + hilbertLutGenFlag = true; + recompileFlag = false; +} + +bool ScreenSpaceGI::ShadersOK() +{ + return hilbertLutCompute && prefilterDepthsCompute && radianceDisoccCompute && giCompute && upsampleCompute && outputCompute; +} + +void ScreenSpaceGI::GenerateHilbertLUT() +{ + auto context = RE::BSGraphics::Renderer::GetSingleton()->GetRuntimeData().context; + + ID3D11UnorderedAccessView* uav = texHilbertLUT->uav.get(); + context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + context->CSSetShader(hilbertLutCompute.get(), nullptr, 0); + + context->Dispatch(2, 2, 1); + + uav = nullptr; + context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + context->CSSetShader(nullptr, nullptr, 0); + + hilbertLutGenFlag = false; +} + +void ScreenSpaceGI::UpdateSB() +{ + auto viewport = RE::BSGraphics::State::GetSingleton(); + auto state = RE::BSGraphics::RendererShadowState::GetSingleton(); + + uint resolution[2] = { + (uint)(State::GetSingleton()->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale), + (uint)(State::GetSingleton()->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale) + }; + uint halfRes[2] = { (resolution[0] + 1) >> 1, (resolution[1] + 1) >> 1 }; + + float2 res = settings.HalfRes ? float2{ (float)halfRes[0], (float)halfRes[1] } : float2{ (float)resolution[0], (float)resolution[1] }; + + static float4x4 prevInvView[2] = {}; + + SSGICB data; + { + for (int eyeIndex = 0; eyeIndex < (1 + REL::Module::IsVR()); ++eyeIndex) { + auto eye = (!REL::Module::IsVR()) ? state->GetRuntimeData().cameraData.getEye(eyeIndex) : state->GetVRRuntimeData().cameraData.getEye(eyeIndex); + + data.PrevInvViewMat[eyeIndex] = prevInvView[eyeIndex]; + data.DepthUnpackConsts[eyeIndex] = { -eye.projMat(3, 2), eye.projMat(2, 2) }; + data.NDCToViewMul[eyeIndex] = { 2.0f / eye.projMat(0, 0), -2.0f / eye.projMat(1, 1) }; + data.NDCToViewAdd[eyeIndex] = { -1.0f / eye.projMat(0, 0), 1.0f / eye.projMat(1, 1) }; + data.NDCToViewMul_x_PixelSize[eyeIndex] = data.NDCToViewMul[eyeIndex] / res; + if (REL::Module::IsVR()) + data.NDCToViewMul[eyeIndex].x *= 2; + + prevInvView[eyeIndex] = eye.viewMat.Invert(); + } + + data.FrameDim = res; + data.RcpFrameDim = float2(1.0f) / res; + data.FrameIndex = viewport->uiFrameCount; + + data.NumSlices = settings.NumSlices; + data.NumSteps = settings.NumSteps; + data.DepthMIPSamplingOffset = settings.DepthMIPSamplingOffset; + + data.EffectRadius = settings.EffectRadius; + data.EffectFalloffRange = settings.EffectFalloffRange; + data.ThinOccluderCompensation = settings.ThinOccluderCompensation; + data.Thickness = settings.Thickness; + data.DepthFadeRange = settings.DepthFadeRange; + data.DepthFadeScaleConst = 1 / (settings.DepthFadeRange.y - settings.DepthFadeRange.x); + + data.BackfaceStrength = settings.BackfaceStrength; + data.GIBounceFade = settings.GIBounceFade; + data.GIDistanceCompensation = settings.GIDistanceCompensation; + data.GICompensationMaxDist = settings.GICompensationMaxDist; + + data.AOPower = settings.AOPower; + data.GIStrength = settings.GIStrength; + + data.DepthDisocclusion = settings.DepthDisocclusion; + data.MaxAccumFrames = settings.MaxAccumFrames; + } + + ssgiCB->Update(data); +} + +void ScreenSpaceGI::DrawSSGI(Texture2D* outGI) +{ + if (!(settings.Enabled && ShadersOK())) + return; + + ////////////////////////////////////////////////////// + + if (recompileFlag) + ClearShaderCache(); + + if (hilbertLutGenFlag) + GenerateHilbertLUT(); + + UpdateSB(); + + ////////////////////////////////////////////////////// + + auto context = RE::BSGraphics::Renderer::GetSingleton()->GetRuntimeData().context; + auto viewport = RE::BSGraphics::State::GetSingleton(); + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto rts = renderer->GetRuntimeData().renderTargets; + auto bindings = Bindings::GetSingleton(); + + uint resolution[2] = { + (uint)(State::GetSingleton()->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale), + (uint)(State::GetSingleton()->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale) + }; + uint halfRes[2] = { resolution[0] >> 1, resolution[1] >> 1 }; + auto targetRes = settings.HalfRes ? halfRes : resolution; + + std::array srvs = { nullptr }; + std::array uavs = { nullptr }; + std::array samplers = { pointClampSampler.get(), linearClampSampler.get() }; + auto cb = ssgiCB->CB(); + + auto resetViews = [&]() { + srvs.fill(nullptr); + uavs.fill(nullptr); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + }; + + ////////////////////////////////////////////////////// + + context->CSSetConstantBuffers(1, 1, &cb); + context->CSSetSamplers(0, (uint)samplers.size(), samplers.data()); + + // prefilter depths + { + srvs[0] = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY].depthSRV; + for (int i = 0; i < 5; ++i) + uavs[i] = uavWorkingDepth[i].get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(prefilterDepthsCompute.get(), nullptr, 0); + context->Dispatch((resolution[0] + 15) >> 4, (resolution[1] + 15) >> 4, 1); + } + + // fetch radiance and disocclusion + { + resetViews(); + srvs[0] = rts[bindings->forwardRenderTargets[0]].SRV; + srvs[1] = texGI0->srv.get(); + srvs[2] = texWorkingDepth->srv.get(); + srvs[3] = rts[NORMALROUGHNESS].SRV; + srvs[4] = texPrevDepth->srv.get(); + srvs[5] = rts[RE::RENDER_TARGET::kMOTION_VECTOR].SRV; + srvs[6] = texPrevGIAlbedo->srv.get(); + + uavs[0] = texRadiance->uav.get(); + uavs[1] = texAccumFrames->uav.get(); + uavs[2] = texGI1->uav.get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(radianceDisoccCompute.get(), nullptr, 0); + context->Dispatch((targetRes[0] + 7u) >> 3, (targetRes[1] + 7u) >> 3, 1); + + context->GenerateMips(texRadiance->srv.get()); + } + + // GI + { + resetViews(); + srvs[0] = texWorkingDepth->srv.get(); + srvs[1] = rts[NORMALROUGHNESS].SRV; + srvs[2] = texRadiance->srv.get(); + srvs[3] = texHilbertLUT->srv.get(); + srvs[4] = texAccumFrames->srv.get(); + srvs[5] = texGI1->srv.get(); + + uavs[0] = texGI0->uav.get(); + uavs[1] = nullptr; + uavs[2] = texPrevDepth->uav.get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(giCompute.get(), nullptr, 0); + context->Dispatch((targetRes[0] + 7u) >> 3, (targetRes[1] + 7u) >> 3, 1); + } + + // upsasmple + if (settings.HalfRes) { + resetViews(); + srvs[0] = texWorkingDepth->srv.get(); + srvs[1] = texGI0->srv.get(); + + uavs[0] = texGI1->uav.get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(upsampleCompute.get(), nullptr, 0); + context->Dispatch((resolution[0] + 7u) >> 3, (resolution[1] + 7u) >> 3, 1); + } + + // output + { + resetViews(); + srvs[0] = settings.HalfRes ? texGI1->srv.get() : texGI0->srv.get(); + srvs[1] = rts[ALBEDO].SRV; + + uavs[0] = outGI->uav.get(); + uavs[1] = texPrevGIAlbedo->uav.get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(outputCompute.get(), nullptr, 0); + context->Dispatch((resolution[0] + 7u) >> 3, (resolution[1] + 7u) >> 3, 1); + } + + // cleanup + resetViews(); + + samplers.fill(nullptr); + cb = nullptr; + + context->CSSetConstantBuffers(1, 1, &cb); + context->CSSetSamplers(0, (uint)samplers.size(), samplers.data()); + context->CSSetShader(nullptr, nullptr, 0); +} \ No newline at end of file diff --git a/src/Features/ScreenSpaceGI.h b/src/Features/ScreenSpaceGI.h new file mode 100644 index 000000000..fb8aec41a --- /dev/null +++ b/src/Features/ScreenSpaceGI.h @@ -0,0 +1,130 @@ +#pragma once + +#include "Buffer.h" +#include "Feature.h" + +struct ScreenSpaceGI : Feature +{ + static ScreenSpaceGI* GetSingleton() + { + static ScreenSpaceGI singleton; + return &singleton; + } + + virtual inline std::string GetName() override { return "Screen Space GI"; } + virtual inline std::string GetShortName() override { return "ScreenSpaceGI"; } + + virtual void RestoreDefaultSettings() override; + virtual void DrawSettings() override; + + virtual void Load(json& o_json) override; + virtual void Save(json& o_json) override; + + virtual inline void Reset() override{}; + virtual void SetupResources() override; + virtual void ClearShaderCache() override; + void CompileComputeShaders(); + bool ShadersOK(); + + virtual inline void Draw(const RE::BSShader*, const uint32_t) override{}; + + void DrawSSGI(Texture2D* outGI); + void GenerateHilbertLUT(); + void UpdateSB(); + + ////////////////////////////////////////////////////////////////////////////////// + + bool hilbertLutGenFlag = false; + bool recompileFlag = false; + + struct Settings + { + bool Enabled = true; + bool UseBitmask = true; + bool EnableGI = true; + // performance/quality + uint NumSlices = 2; + uint NumSteps = 5; + bool HalfRes = true; + // float SampleDistributionPower = 1.f; + float DepthMIPSamplingOffset = 3.3f; + // visual + float EffectRadius = 200.f; // world (viewspace) maximum size of the shadow + float EffectFalloffRange = .615f; + float ThinOccluderCompensation = 0.f; + float Thickness = 50.f; + float2 DepthFadeRange = { 2e4, 3e4 }; + // gi + bool CheckBackface = true; + float BackfaceStrength = 0.1f; + bool EnableGIBounce = true; + float GIBounceFade = 0.8f; + float GIDistanceCompensation = 1; + float GICompensationMaxDist = 200; + // mix + float AOPower = 1.f; + float GIStrength = 8.f; + // denoise + bool EnableTemporalDenoiser = true; + float DepthDisocclusion = 50.f; + uint MaxAccumFrames = 16; + } settings; + + struct alignas(16) SSGICB + { + float4x4 PrevInvViewMat[2]; + float2 DepthUnpackConsts[2]; + float2 NDCToViewMul[2]; + float2 NDCToViewAdd[2]; + float2 NDCToViewMul_x_PixelSize[2]; + + float2 FrameDim; + float2 RcpFrameDim; // + uint FrameIndex; + + uint NumSlices; + uint NumSteps; + float DepthMIPSamplingOffset; // + + float EffectRadius; + float EffectFalloffRange; + float ThinOccluderCompensation; + float Thickness; // + float2 DepthFadeRange; + float DepthFadeScaleConst; + + float BackfaceStrength; // + float GIBounceFade; + float GIDistanceCompensation; + float GICompensationMaxDist; + + float AOPower; // + float GIStrength; + + float DepthDisocclusion; + uint MaxAccumFrames; + + float pad[1]; + }; + eastl::unique_ptr ssgiCB; + + eastl::unique_ptr texHilbertLUT = nullptr; + eastl::unique_ptr texWorkingDepth = nullptr; + winrt::com_ptr uavWorkingDepth[5] = { nullptr }; + eastl::unique_ptr texPrevDepth = nullptr; + eastl::unique_ptr texRadiance = nullptr; + eastl::unique_ptr texAccumFrames = nullptr; + eastl::unique_ptr texGI0 = { nullptr }; + eastl::unique_ptr texGI1 = nullptr; + eastl::unique_ptr texPrevGIAlbedo = { nullptr }; + + winrt::com_ptr linearClampSampler = nullptr; + winrt::com_ptr pointClampSampler = nullptr; + + winrt::com_ptr hilbertLutCompute = nullptr; + winrt::com_ptr prefilterDepthsCompute = nullptr; + winrt::com_ptr radianceDisoccCompute = nullptr; + winrt::com_ptr giCompute = nullptr; + winrt::com_ptr upsampleCompute = nullptr; + winrt::com_ptr outputCompute = nullptr; +}; \ No newline at end of file diff --git a/src/Features/TerrainOcclusion.cpp b/src/Features/TerrainOcclusion.cpp new file mode 100644 index 000000000..ac1d73ff3 --- /dev/null +++ b/src/Features/TerrainOcclusion.cpp @@ -0,0 +1,594 @@ +#include "TerrainOcclusion.h" + +#include "Bindings.h" +#include "Util.h" + +#include + +#include +#include + +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( + TerrainOcclusion::Settings::AOGenSettings, + AoDistance, + SliceCount, + SampleCount) + +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( + TerrainOcclusion::Settings::EffectSettings, + EnableTerrainShadow, + EnableTerrainAO, + HeightBias, + ShadowSofteningRadiusAngle, + AOPower, + AOFadeOutHeight) + +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( + TerrainOcclusion::Settings, + AoGen, + Effect) + +void TerrainOcclusion::Load(json& o_json) +{ + if (o_json[GetName()].is_object()) + settings = o_json[GetName()]; + + Feature::Load(o_json); +} + +void TerrainOcclusion::Save(json& o_json) +{ + o_json[GetName()] = settings; +} + +void TerrainOcclusion::DrawSettings() +{ + ImGui::Checkbox("Enable Terrain Shadow", (bool*)&settings.Effect.EnableTerrainShadow); + ImGui::Checkbox("Enable Terrain AO", (bool*)&settings.Effect.EnableTerrainAO); + + ImGui::SliderFloat("Height Map Bias", &settings.Effect.HeightBias, -2000.f, 0.f, "%.0f units"); + + ImGui::SeparatorText("Shadow"); + { + // ImGui::SliderAngle("Softening", &settings.Effect.ShadowSofteningRadiusAngle, .1f, 10.f, "%.2f deg", ImGuiSliderFlags_AlwaysClamp); + // if (auto _tt = Util::HoverTooltipWrapper()) + // ImGui::Text("Controls the solid angle of sunlight, making terrain shadows softer."); + + ImGui::SliderFloat2("Fade Distance", &settings.Effect.ShadowFadeDistance.x, 0, 10000.f, "%.0f units"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Shadows around you are and should be handled by vanilla shadow maps."); + if (auto settingCollection = RE::INIPrefSettingCollection::GetSingleton()) { + auto gameShadowDist = settingCollection->GetSetting("fShadowDistance:Display")->GetFloat(); + ImGui::Text("Your fShadowDistance setting: %f", gameShadowDist); + } + } + } + + ImGui::SeparatorText("AO"); + { + ImGui::SliderFloat("Mix", &settings.Effect.AOMix, 0, 1, "%.2f", ImGuiSliderFlags_AlwaysClamp); + ImGui::SliderFloat("Power", &settings.Effect.AOPower, 0.2f, 5, "%.2f"); + ImGui::SliderFloat("Fadeout Height", &settings.Effect.AOFadeOutHeight, 500, 5000, "%.0f units"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("On the ground AO is the most prominent. Up to a certain height it will gradually fade out."); + + if (ImGui::TreeNodeEx("Precomputation", ImGuiTreeNodeFlags_DefaultOpen)) { + ImGui::SliderFloat("Distance", &settings.AoGen.AoDistance, 1.f / 32, 32, "%.2f cells"); + ImGui::InputScalar("Slices", ImGuiDataType_U32, &settings.AoGen.SliceCount); + ImGui::InputScalar("Samples", ImGuiDataType_U32, &settings.AoGen.SampleCount); + if (ImGui::Button("Force Regenerate AO", { -1, 0 })) + needPrecompute = true; + + ImGui::TreePop(); + } + } + + if (ImGui::CollapsingHeader("Debug")) { + std::string curr_worldspace = "N/A"; + std::string curr_worldspace_name = "N/A"; + auto tes = RE::TES::GetSingleton(); + if (tes) { + auto worldspace = tes->GetRuntimeData2().worldSpace; + if (worldspace) { + curr_worldspace = worldspace->GetFormEditorID(); + curr_worldspace_name = worldspace->GetName(); + } + } + ImGui::Text(fmt::format("Current worldspace: {} ({})", curr_worldspace, curr_worldspace_name).c_str()); + ImGui::Text(fmt::format("Has height map: {}", heightmaps.contains(curr_worldspace)).c_str()); + + ImGui::Separator(); + + if (texOcclusion) { + ImGui::BulletText("shadowUpdateCBData"); + ImGui::Indent(); + { + ImGui::Text(fmt::format("LightPxDir: ({}, {})", shadowUpdateCBData.LightPxDir.x, shadowUpdateCBData.LightPxDir.y).c_str()); + ImGui::Text(fmt::format("LightDeltaZ: ({}, {})", shadowUpdateCBData.LightDeltaZ.x, shadowUpdateCBData.LightDeltaZ.y).c_str()); + ImGui::Text(fmt::format("StartPxCoord: {}", shadowUpdateCBData.StartPxCoord).c_str()); + ImGui::Text(fmt::format("PxSize: ({}, {})", shadowUpdateCBData.PxSize.x, shadowUpdateCBData.PxSize.y).c_str()); + } + ImGui::Unindent(); + + ImGui::BulletText("texOcclusion"); + ImGui::Image(texOcclusion->srv.get(), { texOcclusion->desc.Width * .1f, texOcclusion->desc.Height * .1f }); + ImGui::BulletText("texNormalisedHeight"); + ImGui::Image(texNormalisedHeight->srv.get(), { texNormalisedHeight->desc.Width * .1f, texNormalisedHeight->desc.Height * .1f }); + ImGui::BulletText("texShadowHeight"); + ImGui::Image(texShadowHeight->srv.get(), { texShadowHeight->desc.Width * .1f, texShadowHeight->desc.Height * .1f }); + } + } +} + +void TerrainOcclusion::ClearShaderCache() +{ + if (occlusionProgram) { + occlusionProgram->Release(); + occlusionProgram = nullptr; + } + if (shadowUpdateProgram) { + shadowUpdateProgram->Release(); + shadowUpdateProgram = nullptr; + } + if (outputProgram) { + outputProgram->Release(); + outputProgram = nullptr; + } + + CompileComputeShaders(); +} + +void TerrainOcclusion::SetupResources() +{ + auto device = RE::BSGraphics::Renderer::GetSingleton()->GetRuntimeData().forwarder; + + logger::debug("Listing height maps..."); + { + std::filesystem::path texture_dir{ L"Data\\textures\\heightmaps\\" }; + for (auto const& dir_entry : std::filesystem::directory_iterator{ texture_dir }) { + auto filename = dir_entry.path().filename(); + if (filename.extension() != ".dds") + continue; + + logger::debug("Found dds: {}", filename.string()); + + auto splitstr = pystring::split(filename.stem().string(), "."); + + if (splitstr.size() != 10) + logger::warn("{} has incorrect number ({} instead of 10) of fields", filename.string(), splitstr.size()); + + if (splitstr[1] == "HeightMap") { + HeightMapMetadata metadata; + try { + metadata.worldspace = splitstr[0]; + metadata.pos0.x = std::stoi(splitstr[2]) * 4096.f; + metadata.pos1.y = std::stoi(splitstr[3]) * 4096.f; + metadata.pos1.x = (std::stoi(splitstr[4]) + 1) * 4096.f; + metadata.pos0.y = (std::stoi(splitstr[5]) + 1) * 4096.f; + metadata.pos0.z = std::stoi(splitstr[6]) * 8.f; + metadata.pos1.z = std::stoi(splitstr[7]) * 8.f; + metadata.zRange.x = std::stoi(splitstr[8]) * 8.f; + metadata.zRange.y = std::stoi(splitstr[9]) * 8.f; + } catch (std::exception& e) { + logger::warn("Failed to parse {}. Error: {}", filename.string(), e.what()); + continue; + } + + metadata.dir = dir_entry.path().parent_path().wstring(); + metadata.filename = filename.string(); + + if (heightmaps.contains(metadata.worldspace)) { + logger::warn("{} has more than one height maps!", metadata.worldspace); + } else { + heightmaps[metadata.worldspace] = metadata; + } + } else if (splitstr[1] != "AO" && splitstr[1] != "Cone") + logger::warn("{} has unknown type ({})", filename.string(), splitstr[1]); + } + } + + logger::debug("Creating structured buffers..."); + { + D3D11_BUFFER_DESC sbDesc{}; + sbDesc.Usage = D3D11_USAGE_DYNAMIC; + sbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + sbDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + sbDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + sbDesc.StructureByteStride = sizeof(AOGenBuffer); + sbDesc.ByteWidth = sizeof(AOGenBuffer); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Buffer.NumElements = 1; + + aoGenBuffer = std::make_unique(sbDesc); + aoGenBuffer->CreateSRV(srvDesc); + + sbDesc.StructureByteStride = sizeof(PerPass); + sbDesc.ByteWidth = sizeof(PerPass); + + perPass = std::make_unique(sbDesc); + perPass->CreateSRV(srvDesc); + } + + logger::debug("Creating constant buffers..."); + { + shadowUpdateCB = std::make_unique(ConstantBufferDesc()); + } + + CompileComputeShaders(); +} + +void TerrainOcclusion::CompileComputeShaders() +{ + logger::debug("Compiling shaders..."); + { + auto program_ptr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\TerrainOcclusion\\AOGen.cs.hlsl", { {} }, "cs_5_0")); + if (program_ptr) + occlusionProgram.attach(program_ptr); + + program_ptr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\TerrainOcclusion\\ShadowUpdate.cs.hlsl", { {} }, "cs_5_0")); + if (program_ptr) + shadowUpdateProgram.attach(program_ptr); + + program_ptr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\TerrainOcclusion\\Output.cs.hlsl", { {} }, "cs_5_0")); + if (program_ptr) + outputProgram.attach(program_ptr); + } +} + +bool TerrainOcclusion::IsHeightMapReady() +{ + if (auto tes = RE::TES::GetSingleton()) + if (auto worldspace = tes->GetRuntimeData2().worldSpace) + return cachedHeightmap && cachedHeightmap->worldspace == worldspace->GetFormEditorID(); + return false; +} + +void TerrainOcclusion::Draw(const RE::BSShader*, const uint32_t) +{ +} + +void TerrainOcclusion::UpdateBuffer() +{ + auto context = RE::BSGraphics::Renderer::GetSingleton()->GetRuntimeData().context; + + bool isHeightmapReady = IsHeightMapReady(); + + PerPass data = { + .effect = settings.Effect, + }; + data.effect.EnableTerrainAO = data.effect.EnableTerrainAO && isHeightmapReady; + data.effect.EnableTerrainShadow = data.effect.EnableTerrainShadow && isHeightmapReady; + + if (isHeightmapReady) { + data.effect.AOFadeOutHeight = 1.f / data.effect.AOFadeOutHeight; + + data.invScale = cachedHeightmap->pos1 - cachedHeightmap->pos0; + data.scale = float3(1.f, 1.f, 1.f) / data.invScale; + data.offset = -cachedHeightmap->pos0 * data.scale; + data.zRange = cachedHeightmap->zRange; + } + + D3D11_MAPPED_SUBRESOURCE mapped; + DX::ThrowIfFailed(context->Map(perPass->resource.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + size_t bytes = sizeof(PerPass); + memcpy_s(mapped.pData, bytes, &data, bytes); + context->Unmap(perPass->resource.get(), 0); +} + +void TerrainOcclusion::LoadHeightmap() +{ + auto tes = RE::TES::GetSingleton(); + if (!tes) + return; + auto worldspace = tes->GetRuntimeData2().worldSpace; + if (!worldspace) + return; + std::string worldspace_name = worldspace->GetFormEditorID(); + if (!heightmaps.contains(worldspace_name)) // no height map for that, but we don't remove cache + return; + if (cachedHeightmap && cachedHeightmap->worldspace == worldspace_name) // already cached + return; + + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto device = renderer->GetRuntimeData().forwarder; + + logger::debug("Loading height map..."); + { + auto& target_heightmap = heightmaps[worldspace_name]; + + DirectX::ScratchImage image; + try { + std::filesystem::path path{ target_heightmap.dir }; + path /= target_heightmap.filename; + + DX::ThrowIfFailed(LoadFromDDSFile(path.c_str(), DirectX::DDS_FLAGS_NONE, nullptr, image)); + } catch (const DX::com_exception& e) { + logger::error("{}", e.what()); + return; + } + + ID3D11Resource* pResource = nullptr; + try { + DX::ThrowIfFailed(CreateTexture(device, + image.GetImages(), image.GetImageCount(), + image.GetMetadata(), &pResource)); + } catch (const DX::com_exception& e) { + logger::error("{}", e.what()); + return; + } + + texHeightMap.release(); + texHeightMap = std::make_unique(reinterpret_cast(pResource)); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { + .Format = texHeightMap->desc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { + .MostDetailedMip = 0, + .MipLevels = 1 } + }; + texHeightMap->CreateSRV(srvDesc); + + cachedHeightmap = &heightmaps[worldspace_name]; + } + + shadowUpdateIdx = 0; + needPrecompute = true; +} + +void TerrainOcclusion::Precompute() +{ + if (!cachedHeightmap) + return; + + auto context = RE::BSGraphics::Renderer::GetSingleton()->GetRuntimeData().context; + + logger::info("Creating occlusion texture..."); + { + texOcclusion.release(); + texNormalisedHeight.release(); + texShadowHeight.release(); + + D3D11_TEXTURE2D_DESC texDesc = { + .Width = texHeightMap->desc.Width, + .Height = texHeightMap->desc.Height, + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_R8_UNORM, + .SampleDesc = { .Count = 1 }, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS + }; + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { + .MostDetailedMip = 0, + .MipLevels = 1 } + }; + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } + }; + + texOcclusion = std::make_unique(texDesc); + texOcclusion->CreateSRV(srvDesc); + texOcclusion->CreateUAV(uavDesc); + + texDesc.Format = srvDesc.Format = uavDesc.Format = DXGI_FORMAT_R16_FLOAT; + texNormalisedHeight = std::make_unique(texDesc); + texNormalisedHeight->CreateSRV(srvDesc); + texNormalisedHeight->CreateUAV(uavDesc); + + texDesc.Format = srvDesc.Format = uavDesc.Format = DXGI_FORMAT_R16G16_FLOAT; + texShadowHeight = std::make_unique(texDesc); + texShadowHeight->CreateSRV(srvDesc); + texShadowHeight->CreateUAV(uavDesc); + } + + { + AOGenBuffer data = { + .settings = settings.AoGen, + .pos0 = cachedHeightmap->pos0, + .pos1 = cachedHeightmap->pos1, + .zRange = cachedHeightmap->zRange + }; + + data.settings.AoDistance *= 4096.f; + + D3D11_MAPPED_SUBRESOURCE mapped; + DX::ThrowIfFailed(context->Map(aoGenBuffer->resource.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + size_t bytes = sizeof(AOGenBuffer); + memcpy_s(mapped.pData, bytes, &data, bytes); + context->Unmap(aoGenBuffer->resource.get(), 0); + } + + /* ---- BACKUP ---- */ + struct ShaderState + { + ID3D11ShaderResourceView* srvs[2] = { nullptr }; + ID3D11ComputeShader* shader = nullptr; + ID3D11UnorderedAccessView* uavs[2] = { nullptr }; + ID3D11ClassInstance* instance = nullptr; + ID3D11SamplerState* samplers[1] = { nullptr }; + UINT numInstances; + } old, newer; + context->CSGetShaderResources(0, ARRAYSIZE(old.srvs), old.srvs); + context->CSGetShader(&old.shader, &old.instance, &old.numInstances); + context->CSGetUnorderedAccessViews(0, ARRAYSIZE(old.uavs), old.uavs); + context->CSGetSamplers(0, ARRAYSIZE(old.samplers), old.samplers); + + /* ---- DISPATCH ---- */ + logger::info("Precomputation..."); + newer.srvs[0] = aoGenBuffer->srv.get(); + newer.srvs[1] = texHeightMap->srv.get(); + newer.uavs[0] = texOcclusion->uav.get(); + newer.uavs[1] = texNormalisedHeight->uav.get(); + + context->CSSetSamplers(0, ARRAYSIZE(newer.samplers), newer.samplers); + context->CSSetShaderResources(0, ARRAYSIZE(newer.srvs), newer.srvs); + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(newer.uavs), newer.uavs, nullptr); + context->CSSetShader(occlusionProgram.get(), nullptr, 0); + context->Dispatch(((texOcclusion->desc.Width - 1) >> 5) + 1, ((texOcclusion->desc.Height - 1) >> 5) + 1, 1); + + /* ---- RESTORE ---- */ + context->CSSetShaderResources(0, ARRAYSIZE(old.srvs), old.srvs); + context->CSSetShader(old.shader, &old.instance, old.numInstances); + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(old.uavs), old.uavs, nullptr); + context->CSSetSamplers(0, ARRAYSIZE(old.samplers), old.samplers); + + needPrecompute = false; +} + +void TerrainOcclusion::UpdateShadow() +{ + if (!IsHeightMapReady()) + return; + + auto context = RE::BSGraphics::Renderer::GetSingleton()->GetRuntimeData().context; + auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); + auto sunLight = skyrim_cast(accumulator->GetRuntimeData().activeShadowSceneNode->GetRuntimeData().sunLight->light.get()); + if (!sunLight) + return; + + /* ---- UPDATE CB ---- */ + uint width = texNormalisedHeight->desc.Width; + uint height = texNormalisedHeight->desc.Height; + + // only update direction at the start of each cycle + static float2 cachedDirLightPxDir; + static float2 cachedDirLightDZRange; + static uint edgePxCoord; + static int signDir; + static uint maxUpdates; + if (shadowUpdateIdx == 0) { + auto direction = sunLight->GetWorldDirection(); + float3 dirLightDir = { direction.x, direction.y, direction.z }; + if (dirLightDir.z > 0) + dirLightDir = -dirLightDir; + + // in UV + float3 invScale = cachedHeightmap->pos1 - cachedHeightmap->pos0; + invScale.z = cachedHeightmap->zRange.y - cachedHeightmap->zRange.x; + float3 dirLightPxDir = dirLightDir / invScale; + dirLightPxDir.x *= width; + dirLightPxDir.y *= height; + + float stepMult; + if (abs(dirLightPxDir.x) >= abs(dirLightPxDir.y)) { + stepMult = 1.f / abs(dirLightPxDir.x); + edgePxCoord = dirLightPxDir.x > 0 ? 0 : (width - 1); + signDir = dirLightPxDir.x > 0 ? 1 : -1; + maxUpdates = ((width - 1) >> 10) + 1; + } else { + stepMult = 1.f / abs(dirLightPxDir.y); + edgePxCoord = dirLightPxDir.y > 0 ? 0 : height - 1; + signDir = dirLightPxDir.y > 0 ? 1 : -1; + maxUpdates = ((height - 1) >> 10) + 1; + } + dirLightPxDir *= stepMult; + + cachedDirLightPxDir = { dirLightPxDir.x, dirLightPxDir.y }; + + // soft shadow angles + float lenUV = float2{ dirLightDir.x, dirLightDir.y }.Length(); + float dirLightAngle = atan2(-dirLightDir.z, lenUV); + float upperAngle = std::max(0.f, dirLightAngle - settings.Effect.ShadowSofteningRadiusAngle); + float lowerAngle = std::min(RE::NI_HALF_PI - 1e-2f, dirLightAngle + settings.Effect.ShadowSofteningRadiusAngle); + + cachedDirLightDZRange = -(lenUV / invScale.z * stepMult) * float2{ std::tan(upperAngle), std::tan(lowerAngle) }; + } + + shadowUpdateCBData = { + .LightPxDir = cachedDirLightPxDir, + .LightDeltaZ = cachedDirLightDZRange, + .StartPxCoord = edgePxCoord + signDir * shadowUpdateIdx * 1024u, + .PxSize = { 1.f / texNormalisedHeight->desc.Width, 1.f / texNormalisedHeight->desc.Height } + }; + shadowUpdateCB->Update(shadowUpdateCBData); + + shadowUpdateIdx = (shadowUpdateIdx + 1) % maxUpdates; + + /* ---- BACKUP ---- */ + struct ShaderState + { + ID3D11ShaderResourceView* srvs[1] = { nullptr }; + ID3D11ComputeShader* shader = nullptr; + ID3D11UnorderedAccessView* uavs[1] = { nullptr }; + ID3D11Buffer* buffer = nullptr; + } old, newer; + + /* ---- DISPATCH ---- */ + newer.srvs[0] = texNormalisedHeight->srv.get(); + newer.uavs[0] = texShadowHeight->uav.get(); + newer.buffer = shadowUpdateCB->CB(); + + context->CSSetShaderResources(0, ARRAYSIZE(newer.srvs), newer.srvs); + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(newer.uavs), newer.uavs, nullptr); + context->CSSetConstantBuffers(1, 1, &newer.buffer); + context->CSSetShader(shadowUpdateProgram.get(), nullptr, 0); + context->Dispatch(abs(cachedDirLightPxDir.x) >= abs(cachedDirLightPxDir.y) ? height : width, 1, 1); + + /* ---- RESTORE ---- */ + context->CSSetShaderResources(0, ARRAYSIZE(old.srvs), old.srvs); + context->CSSetShader(old.shader, nullptr, 0); + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(old.uavs), old.uavs, nullptr); + context->CSSetConstantBuffers(1, 1, &old.buffer); +} + +void TerrainOcclusion::DrawTerrainOcclusion() +{ + LoadHeightmap(); + UpdateBuffer(); + + if (!settings.Effect.EnableTerrainShadow && !settings.Effect.EnableTerrainAO) + return; + + if (needPrecompute) + Precompute(); + if (settings.Effect.EnableTerrainShadow) + UpdateShadow(); + + //////////////////////////////////////////////////////////////////////////////// + + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto context = renderer->GetRuntimeData().context; + auto bindings = Bindings::GetSingleton(); + + std::array srvs = { nullptr }; + std::array uavs = { nullptr }; + + { + srvs.at(0) = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY].depthSRV; + srvs.at(1) = perPass->srv.get(); + if (texOcclusion) + srvs.at(2) = texOcclusion->srv.get(); + if (texNormalisedHeight) + srvs.at(3) = texNormalisedHeight->srv.get(); + if (texShadowHeight) + srvs.at(4) = texShadowHeight->srv.get(); + + uavs.at(0) = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kSHADOW_MASK].UAV; + uavs.at(1) = bindings->giTexture->uav.get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(outputProgram.get(), nullptr, 0); + context->Dispatch((bindings->giTexture->desc.Width + 31u) >> 5, (bindings->giTexture->desc.Height + 31u) >> 5, 1); + } + + // clean up + + srvs.fill(nullptr); + uavs.fill(nullptr); + samplers.fill(nullptr); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetSamplers(0, (uint)samplers.size(), samplers.data()); +} diff --git a/src/Features/TerrainOcclusion.h b/src/Features/TerrainOcclusion.h new file mode 100644 index 000000000..5db365da0 --- /dev/null +++ b/src/Features/TerrainOcclusion.h @@ -0,0 +1,122 @@ +#pragma once + +#include "Buffer.h" +#include "Feature.h" + +struct TerrainOcclusion : public Feature +{ + static TerrainOcclusion* GetSingleton() + { + static TerrainOcclusion singleton; + return std::addressof(singleton); + } + + virtual inline std::string GetName() { return "Terrain Occlusion"; } + virtual inline std::string GetShortName() { return "TerrainOcclusion"; } + inline std::string_view GetShaderDefineName() override { return "TERRA_OCC"; } + inline bool HasShaderDefine(RE::BSShader::Type) override { return true; }; + + uint shadowUpdateIdx = 0; + + struct Settings + { + struct AOGenSettings + { + float AoDistance = 12; + uint SliceCount = 60; + uint SampleCount = 60; + } AoGen; + + struct EffectSettings + { + uint EnableTerrainShadow = true; + uint EnableTerrainAO = true; + + float HeightBias = -1000.f; // in game unit + + float ShadowSofteningRadiusAngle = 1.f * RE::NI_PI / 180.f; + float2 ShadowFadeDistance = { 1000.f, 2000.f }; + + float AOMix = 1.f; + float AOPower = 1.f; + float AOFadeOutHeight = 2000; + } Effect; + } settings; + + bool needPrecompute = false; + + struct HeightMapMetadata + { + std::wstring dir; + std::string filename; + std::string worldspace; + float3 pos0, pos1; // left-top-z=0 vs right-bottom-z=1 + float2 zRange; + }; + std::unordered_map heightmaps; + HeightMapMetadata* cachedHeightmap; + + struct AOGenBuffer + { + Settings::AOGenSettings settings; + + float3 pos0; + float3 pos1; + float2 zRange; + }; + std::unique_ptr aoGenBuffer = nullptr; + + struct ShadowUpdateCB + { + float2 LightPxDir; // direction on which light descends, from one pixel to next via dda + float2 LightDeltaZ; // per LightUVDir, upper penumbra and lower, should be negative + uint StartPxCoord; + float2 PxSize; + + float pad; + } shadowUpdateCBData; + static_assert(sizeof(ShadowUpdateCB) % 16 == 0); + std::unique_ptr shadowUpdateCB = nullptr; + + struct PerPass + { + Settings::EffectSettings effect; + + float3 scale; + float3 invScale; + float3 offset; + float2 zRange; + }; + std::unique_ptr perPass = nullptr; + + winrt::com_ptr occlusionProgram = nullptr; + winrt::com_ptr shadowUpdateProgram = nullptr; + winrt::com_ptr outputProgram = nullptr; + + std::unique_ptr texHeightMap = nullptr; + std::unique_ptr texOcclusion = nullptr; + std::unique_ptr texNormalisedHeight = nullptr; + std::unique_ptr texShadowHeight = nullptr; + + bool IsHeightMapReady(); + + virtual void SetupResources() override; + void CompileComputeShaders(); + + virtual void DrawSettings() override; + + virtual inline void Reset() override{}; + + virtual void Draw(const RE::BSShader*, const uint32_t) override; + void UpdateBuffer(); + void DrawTerrainOcclusion(); + void LoadHeightmap(); + void Precompute(); + void UpdateShadow(); + + virtual void Load(json& o_json) override; + virtual void Save(json&) override; + + virtual inline void RestoreDefaultSettings() override { settings = {}; } + virtual void ClearShaderCache() override; +}; \ No newline at end of file