Skip to content

Commit

Permalink
FidelityFX Parallel Sort v1.1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
rys committed Oct 8, 2021
1 parent 40b1a9c commit 0c53994
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions ffx-parallelsort/FFX_ParallelSort.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,9 @@
void FFX_ParallelSort_ScanPrefix(uint numValuesToScan, uint localID, uint groupID, uint BinOffset, uint BaseIndex, bool AddPartialSums,
FFX_ParallelSortCB CBuffer, RWStructuredBuffer<uint> ScanSrc, RWStructuredBuffer<uint> ScanDst, RWStructuredBuffer<uint> ScanScratch)
{
uint i;
// Perform coalesced loads into LDS
for (uint i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
{
uint DataIndex = BaseIndex + (i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID;

Expand All @@ -263,7 +264,7 @@

uint threadgroupSum = 0;
// Calculate the local scan-prefix for current thread
for (uint i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
{
uint tmp = gs_FFX_PARALLELSORT_LDS[i][localID];
gs_FFX_PARALLELSORT_LDS[i][localID] = threadgroupSum;
Expand All @@ -283,14 +284,14 @@
}

// Add the block scanned-prefixes back in
for (uint i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
gs_FFX_PARALLELSORT_LDS[i][localID] += threadgroupSum;

// Wait for everyone to catch up
GroupMemoryBarrierWithGroupSync();

// Perform coalesced writes to scan dst
for (uint i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++)
{
uint DataIndex = BaseIndex + (i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID;

Expand Down Expand Up @@ -378,7 +379,7 @@
uint bitKey = (keyIndex >> bitShift) & 0x3;

// Create a packed histogram
uint packedHistogram = 1 << (bitKey * 8);
uint packedHistogram = 1U << (bitKey * 8);

// Sum up all the packed keys (generates counted offsets up to current thread group)
uint localSum = FFX_ParallelSort_BlockScanPrefix(packedHistogram, localID);
Expand Down

0 comments on commit 0c53994

Please sign in to comment.