Skip to content

Commit

Permalink
feat: add ssgi and terrain occlusion (#255)
Browse files Browse the repository at this point in the history
  • Loading branch information
Pentalimbed authored Apr 8, 2024
1 parent 00ddda5 commit 35e6bd3
Show file tree
Hide file tree
Showing 30 changed files with 3,507 additions and 259 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#include "../Common/DeferredShared.hlsl"
#include "../Common/VR.hlsl"

struct PerPassCloudShadow
{
uint EnableCloudShadows;
Expand All @@ -8,13 +11,17 @@ struct PerPassCloudShadow
float EffectMix;

float TransparencyPower;
float AbsorptionAmbient;

float RcpHPlusR;
};

StructuredBuffer<PerPassCloudShadow> perPassCloudShadow : register(t23);
TextureCube<float4> cloudShadows : register(t40);
StructuredBuffer<PerPassCloudShadow> perPassCloudShadow : register(t0);
TextureCube<float4> cloudShadows : register(t1);
Texture2D<unorm half> TexDepth : register(t2);

RWTexture2D<unorm float> RWTexShadowMask : register(u0);

SamplerState defaultSampler;

float3 getCloudShadowSampleDir(float3 rel_pos, float3 eye_to_sun)
{
Expand All @@ -38,13 +45,39 @@ float3 getCloudShadowSampleDirFlatEarth(float3 rel_pos, float3 eye_to_sun)
return v;
}

float3 getCloudShadowMult(float3 rel_pos, float3 eye_to_sun, SamplerState samp)
float3 getCloudShadowMult(float3 rel_pos, float3 eye_to_sun)
{
// float3 cloudSampleDir = getCloudShadowSampleDirFlatEarth(rel_pos, eye_to_sun).xyz;
float3 cloudSampleDir = getCloudShadowSampleDir(rel_pos, eye_to_sun).xyz;

float4 cloudCubeSample = cloudShadows.Sample(samp, cloudSampleDir);
float4 cloudCubeSample = cloudShadows.SampleLevel(defaultSampler, cloudSampleDir, 0); // TODO Sample in pixel shader
float alpha = pow(saturate(cloudCubeSample.w), perPassCloudShadow[0].TransparencyPower);

return lerp(1.0, 1.0 - alpha, perPassCloudShadow[0].EffectMix);
}

[numthreads(32, 32, 1)] void main(uint2 dtid : SV_DispatchThreadID) {
float2 uv = (dtid + .5) * RcpBufferDim;
#ifdef VR
const uint eyeIndex = uv > .5;
#else
const uint eyeIndex = 0;
#endif

float3 ndc = float3(ConvertToStereoUV(uv, eyeIndex), 1);
ndc = ndc * 2 - 1;
ndc.y = -ndc.y;
ndc.z = TexDepth[dtid];

if (ndc.z > 0.9999)
return;

float4 worldPos = mul(InvViewMatrix[eyeIndex], mul(InvProjMatrix[eyeIndex], float4(ndc, 1)));
worldPos.xyz /= worldPos.w;

float3 dirLightDirWS = mul((float3x3)InvViewMatrix[eyeIndex], DirLightDirectionVS[eyeIndex].xyz);
float cloudShadow = getCloudShadowMult(worldPos.xyz, dirLightDirWS);

half shadow = RWTexShadowMask[dtid];
RWTexShadowMask[dtid] = shadow * cloudShadow;
}
2 changes: 2 additions & 0 deletions features/Screen Space GI/Shaders/Features/ScreenSpaceGI.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[Info]
Version = 2-9-0
204 changes: 204 additions & 0 deletions features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2016-2021, Intel Corporation
//
// SPDX-License-Identifier: MIT
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion",
// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf
//
// Implementation: Filip Strugar (filip.strugar@intel.com), Steve Mccalla <stephen.mccalla@intel.com> (\_/)
// Version: (see XeGTAO.h) (='.'=)
// Details: https://github.com/GameTechDev/XeGTAO (")_(")
//
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

// with additional edits by FiveLimbedCat/ProfJack

#ifndef SSGI_COMMON
#define SSGI_COMMON

#ifndef USE_HALF_FLOAT_PRECISION
# define USE_HALF_FLOAT_PRECISION 1
#endif

#if (USE_HALF_FLOAT_PRECISION != 0)
# if 1 // old fp16 approach (<SM6.2)
typedef min16float lpfloat;
typedef min16float2 lpfloat2;
typedef min16float3 lpfloat3;
typedef min16float4 lpfloat4;
typedef min16float3x3 lpfloat3x3;
# else // new fp16 approach (requires SM6.2 and -enable-16bit-types) - WARNING: perf degradation noticed on some HW, while the old (min16float) path is mostly at least a minor perf gain so this is more useful for quality testing
typedef float16_t lpfloat;
typedef float16_t2 lpfloat2;
typedef float16_t3 lpfloat3;
typedef float16_t4 lpfloat4;
typedef float16_t3x3 lpfloat3x3;
# endif
#else
typedef float lpfloat;
typedef float2 lpfloat2;
typedef float3 lpfloat3;
typedef float4 lpfloat4;
typedef float3x3 lpfloat3x3;
#endif

///////////////////////////////////////////////////////////////////////////////

#include "../Common/DeferredShared.hlsl"

cbuffer SSGICB : register(b1)
{
float4x4 PrevInvViewMat[2];
float4 DepthUnpackConsts;
float4 NDCToViewMul;
float4 NDCToViewAdd;
float4 NDCToViewMul_x_PixelSize;

float2 FrameDim;
float2 RcpFrameDim;
uint FrameIndex;

uint NumSlices;
uint NumSteps;
float DepthMIPSamplingOffset;

float EffectRadius;
float EffectFalloffRange;
float ThinOccluderCompensation;
float Thickness;
float2 DepthFadeRange;
float DepthFadeScaleConst;

float BackfaceStrength;
float GIBounceFade;
float GIDistanceCompensation;
float GICompensationMaxDist;

float AOPower;
float GIStrength;

float DepthDisocclusion;
uint MaxAccumFrames;

float pad;
};

SamplerState samplerPointClamp : register(s0);
SamplerState samplerLinearClamp : register(s1);

///////////////////////////////////////////////////////////////////////////////

#ifdef HALF_RES
const static float res_scale = .5;
# define READ_DEPTH(tex, px) tex.Load(int3(px, 1))
# define FULLRES_LOAD(tex, px, uv, samp) tex.SampleLevel(samp, uv, 0)
#else
const static float res_scale = 1.;
# define READ_DEPTH(tex, px) tex[px]
# define FULLRES_LOAD(tex, px, uv, samp) tex[px]
#endif

#ifdef VR
# define GET_EYE_IDX(uv) (uv.x > 0.5)
#else
# define GET_EYE_IDX(uv) (0)
#endif

///////////////////////////////////////////////////////////////////////////////

#define ISNAN(x) (!(x < 0.f || x > 0.f || x == 0.f))

// http://h14s.p5r.org/2012/09/0x5f3759df.html, [Drobot2014a] Low Level Optimizations for GCN, https://blog.selfshadow.com/publications/s2016-shading-course/activision/s2016_pbs_activision_occlusion.pdf slide 63
lpfloat FastSqrt(float x)
{
return (lpfloat)(asfloat(0x1fbd1df5 + (asint(x) >> 1)));
}

// input [-1, 1] and output [0, PI], from https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/
lpfloat FastACos(lpfloat inX)
{
const lpfloat PI = 3.141593;
const lpfloat HALF_PI = 1.570796;
lpfloat x = abs(inX);
lpfloat res = -0.156583 * x + HALF_PI;
res *= FastSqrt(1.0 - x);
return (inX >= 0) ? res : PI - res;
}

///////////////////////////////////////////////////////////////////////////////

// Inputs are screen XY and viewspace depth, output is viewspace position
float3 ScreenToViewPosition(const float2 screenPos, const float viewspaceDepth, const uint eyeIndex)
{
const float2 _mul = eyeIndex == 0 ? NDCToViewMul.xy : NDCToViewMul.zw;
const float2 _add = eyeIndex == 0 ? NDCToViewAdd.xy : NDCToViewAdd.zw;

float3 ret;
ret.xy = (_mul * screenPos.xy + _add) * viewspaceDepth;
ret.z = viewspaceDepth;
return ret;
}

float ScreenToViewDepth(const float screenDepth, const uint eyeIndex)
{
const float2 consts = eyeIndex == 0 ? DepthUnpackConsts.xy : DepthUnpackConsts.zw;

float depthLinearizeMul = consts.x;
float depthLinearizeAdd = consts.y;
// Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar"
return depthLinearizeMul / (depthLinearizeAdd - screenDepth);
}

float3 ViewToWorldPosition(const float3 pos, const float4x4 invView)
{
float4 worldpos = mul(invView, float4(pos, 1));
return worldpos.xyz / worldpos.w;
}

float3 ViewToWorldVector(const float3 vec, const float4x4 invView)
{
return mul((float3x3)invView, vec);
}

///////////////////////////////////////////////////////////////////////////////

// "Efficiently building a matrix to rotate one vector to another"
// http://cs.brown.edu/research/pubs/pdfs/1999/Moller-1999-EBA.pdf / https://dl.acm.org/doi/10.1080/10867651.1999.10487509
// (using https://github.com/assimp/assimp/blob/master/include/assimp/matrix3x3.inl#L275 as a code reference as it seems to be best)
lpfloat3x3 RotFromToMatrix(lpfloat3 from, lpfloat3 to)
{
const lpfloat e = dot(from, to);
const lpfloat f = abs(e); //(e < 0)? -e:e;

// WARNING: This has not been tested/worked through, especially not for 16bit floats; seems to work in our special use case (from is always {0, 0, -1}) but wouldn't use it in general
if (f > lpfloat(1.0 - 0.0003))
return lpfloat3x3(1, 0, 0, 0, 1, 0, 0, 0, 1);

const lpfloat3 v = cross(from, to);
/* ... use this hand optimized version (9 mults less) */
const lpfloat h = (1.0) / (1.0 + e); /* optimization by Gottfried Chen */
const lpfloat hvx = h * v.x;
const lpfloat hvz = h * v.z;
const lpfloat hvxy = hvx * v.y;
const lpfloat hvxz = hvx * v.z;
const lpfloat hvyz = hvz * v.y;

lpfloat3x3 mtx;
mtx[0][0] = e + hvx * v.x;
mtx[0][1] = hvxy - v.z;
mtx[0][2] = hvxz + v.y;

mtx[1][0] = hvxy + v.z;
mtx[1][1] = e + h * v.y * v.y;
mtx[1][2] = hvyz - v.x;

mtx[2][0] = hvxz - v.y;
mtx[2][1] = hvyz + v.x;
mtx[2][2] = e + hvz * v.z;

return mtx;
}

#endif
Loading

0 comments on commit 35e6bd3

Please sign in to comment.