-
Notifications
You must be signed in to change notification settings - Fork 15
Description
Hello dear stranger,
I have created and implemented an efficient method for sorting images that does not limit span length.
On my old laptop this code takes about 12ms to execute:
I essentially recreated the method proposed in the beginning of the pixel sorting video that uses the Bitonic Merge Sort. However, I added some efficient code to detect whether to pixels lie in the same span/run/sequence/etc..
This is my first shader, so I am honestly really curious about your opinion of my code, as you are a far more experienced person than me. Anyhow, I am also aware from reading the other issues on this repository that you do not have much spare time, so I won't be expecting an answer to that request.
Since I do not have access to any other computers, I am also very curious about is the performance of my shader on your device, which is - I assume - a lot faster than mine. Again though, I do not expect a response.
Lastly I would just like to thank you for your interesting, educational and entertaining videos; I would have never (or not as soon) gotten into shader programming without them. Thanks! :)
Always remember that your videos are amazing
Jonathan
The following code is mostly just a copy of this repository.
The C# file:
using System.Collections.Generic;
using UnityEngine;
using static UnityEngine.Mathf;
public class Sorting : MonoBehaviour
{
public enum SortBy
{
Red = 0, Green = 1, Blue = 2, Hue = 4, Saturation = 5, Lightness = 6
}
public enum ViewSource
{
Sorted, /*Synonymous to sortBy*/ SortValue, SequenceIds
}
public enum Direction
{
Up, Right, Down, Left
}
public ComputeShader computeShader;
public SortBy sortBy = SortBy.Lightness;
public ViewSource viewSource = ViewSource.Sorted;
// In which direction the smallest value will lie in each sequence/run
public Direction smallestValue = Direction.Left;
[Range(0, 1)]
public float lowThreshold = 0.25f;
[Range(0, 1)]
public float highThreshold = 0.75f;
int screenWidth;
int screenHeight;
RenderTexture sortByTex;
RenderTexture sequenceIds;
RenderTexture sorted;
// These are the kernels; They are all explained in further detail in the compute shader code
int initialiseSortBy, sortHoriz, sortVert, fillSequenceIds, connectSequencesHoriz, connectSequencesVert, viewOtherPass;
void Start()
{
Debug.Log("Shader is being initialised");
if (computeShader == null)
{
Debug.LogError("The compute shader has not been set in the 'Sorting' Component");
return;
}
initialiseSortBy = computeShader.FindKernel("InitialiseSortBy");
sortHoriz = computeShader.FindKernel("SortHoriz");
sortVert = computeShader.FindKernel("SortVert");
fillSequenceIds = computeShader.FindKernel("FillSequenceIds");
connectSequencesHoriz = computeShader.FindKernel("ConnectSequencesHoriz");
connectSequencesVert = computeShader.FindKernel("ConnectSequencesVert");
viewOtherPass = computeShader.FindKernel("ViewOther");
RegenerateTextures();
}
// Creates a new render texture with the width and height of the screen
RenderTexture createNewTexture(RenderTextureFormat format)
{
if (!SystemInfo.SupportsRenderTextureFormat(format)) Debug.LogError("The texture format " + format + " is not supported on this system");
var output = new RenderTexture(screenWidth, screenHeight, 0, format, RenderTextureReadWrite.Linear);
output.enableRandomWrite = true;
output.Create();
return output;
}
// This method is probably necessary to avoid memory leaks
void ReleaseTexture(RenderTexture tex)
{
if (tex != null) tex.Release();
}
// Regenerates the textures; Does the same as in Acerola's code
void RegenerateTextures(int width = -1, int height = -1)
{
Debug.Log("Regenerating textures");
screenWidth = width == -1 ? Screen.width : width;
screenHeight = height == -1 ? Screen.height : height;
ReleaseTexture(sortByTex);
ReleaseTexture(sequenceIds);
ReleaseTexture(sorted);
sortByTex = createNewTexture(RenderTextureFormat.R16);
sequenceIds = createNewTexture(RenderTextureFormat.RInt);
sorted = createNewTexture(RenderTextureFormat.ARGB64);
}
// Just a simple check
void EnsureCorrectTextureSize(int width = -1, int height = -1)
{
if (width == -1) width = Screen.width;
if (height == -1) height = Screen.height;
if (screenWidth != width || screenHeight != height)
RegenerateTextures(width, height);
}
private void Update()
{
if (computeShader == null) return;
EnsureCorrectTextureSize();
}
void OnRenderImage(RenderTexture source, RenderTexture destination)
{
if (computeShader == null) return;
// We call this method with the width and height of the source texture
// This feature exists to enable the sorting of images outside of the whole-screen effect
EnsureCorrectTextureSize(source.width, source.height);
// Map a direction to these two booleans
bool sortVertically;
bool reverseSortingOrder;
switch (smallestValue)
{
case Direction.Left:
sortVertically = false;
reverseSortingOrder = false;
break;
case Direction.Up:
sortVertically = true;
reverseSortingOrder = true;
break;
case Direction.Right:
sortVertically = false;
reverseSortingOrder = true;
break;
case Direction.Down:
sortVertically = true;
reverseSortingOrder = false;
break;
default:
return;
}
// Sorted is the main work texture
Graphics.Blit(source, sorted);
// Setting variables
computeShader.SetInt("sortingMode", (int)sortBy);
computeShader.SetInt("width", sortVertically ? screenHeight : screenWidth); // This code is extremely sensible
computeShader.SetBool("reverseSortingOrder", reverseSortingOrder);
computeShader.SetBool("sortVertically", sortVertically);
if (highThreshold < lowThreshold) highThreshold = lowThreshold;
computeShader.SetFloat("lowThreshold", lowThreshold);
computeShader.SetFloat("highThreshold", highThreshold);
// Initialising the sortBy texture with values generated from sorted
// Sorted currently just contains the source texture
computeShader.SetTexture(initialiseSortBy, "sorted", sorted);
computeShader.SetTexture(initialiseSortBy, "sortBy", sortByTex);
computeShader.Dispatch(initialiseSortBy, CeilToInt(screenWidth / 8f), CeilToInt(screenHeight / 8f), 1);
// This is where the fun begins
// Give each pixel that should be sorted a unique sequence id
computeShader.SetTexture(fillSequenceIds, "sortBy", sortByTex);
computeShader.SetTexture(fillSequenceIds, "sequenceIds", sequenceIds);
computeShader.Dispatch(fillSequenceIds, CeilToInt(screenWidth / 8f), CeilToInt(screenHeight / 8f), 1);
// Select the correct kernel
int connectSequences = sortVertically ? connectSequencesVert : connectSequencesHoriz;
// "Connect adjacent pixels" which means to that adjacent pixels will have the same id
computeShader.SetTexture(connectSequences, "sequenceIds", sequenceIds);
computeShader.Dispatch(connectSequences, sortVertically ? screenWidth : 1, sortVertically ? 1 : screenHeight, 1);
if (viewSource == ViewSource.Sorted)
{
// Sort
// Here we select the correct pass
int pass = sortVertically ? sortVert : sortHoriz;
computeShader.SetTexture(pass, "sortBy", sortByTex);
computeShader.SetTexture(pass, "sorted", sorted);
computeShader.SetTexture(pass, "sequenceIds", sequenceIds);
computeShader.Dispatch(pass,
sortVertically ? screenWidth : 1,
sortVertically ? 1 : screenHeight,
1
);
}
else
{
// View other source
computeShader.SetInt("alternateSource", (int)viewSource - 1);
computeShader.SetTexture(viewOtherPass, "sorted", sorted);
computeShader.SetTexture(viewOtherPass, "sortBy", sortByTex);
computeShader.SetTexture(viewOtherPass, "sequenceIds", sequenceIds);
computeShader.Dispatch(viewOtherPass, CeilToInt(screenWidth / 8f), CeilToInt(screenHeight / 8f), 1);
}
Graphics.Blit(sorted, destination);
}
// This class is used for testing
static class Profiler
{
private static Dictionary<string, long> segments = new Dictionary<string, long>();
// Returns the time in milliseconds
private static long time()
{
return (long)(Time.realtimeSinceStartup * 1000);
}
public static void StartSegment(string name)
{
segments[name] = time();
}
public static void EndSegment(string name)
{
long currentTime = time();
long timeElapsed = currentTime - segments[name];
Debug.Log(name + ": " + timeElapsed + "ms");
}
}
}The HLSL file:
// The kernels are listed in order of execution
#pragma kernel InitialiseSortBy
#pragma kernel FillSequenceIds
#pragma kernel ConnectSequencesHoriz
#pragma kernel ConnectSequencesVert
#pragma kernel SortHoriz
#pragma kernel SortVert
#pragma kernel ViewOther
// The following two functions have been copied from Acerola's implementation
// Conversions from https://www.chilliant.com/rgb2hsv.html
float Epsilon = 1e-10;
float3 RGBtoHCV(in float3 RGB) {
// Based on work by Sam Hocevar and Emil Persson (apparently)
float4 P = (RGB.g < RGB.b) ? float4(RGB.bg, -1.0, 2.0 / 3.0) : float4(RGB.gb, 0.0, -1.0 / 3.0);
float4 Q = (RGB.r < P.x) ? float4(P.xyw, RGB.r) : float4(RGB.r, P.yzx);
float C = Q.x - min(Q.w, Q.y);
float H = abs((Q.w - Q.y) / (6 * C + Epsilon) + Q.z);
return float3(H, C, Q.x);
}
float3 RGBtoHSL(in float3 RGB) {
float3 HCV = RGBtoHCV(RGB);
float L = HCV.z - HCV.y * 0.5;
float S = HCV.y / (1 - abs(L * 2 - 1) + Epsilon);
return float3(HCV.x, S, L);
}
/////////////////////////////////////////////////// MY CODE //////////////////////////////////////////////////////////////////
// The input Textures
globallycoherent RWTexture2D<half> sortBy;
globallycoherent RWTexture2D<uint> sequenceIds; // This texture stores the id of a sequence that a pixel is in or 0 which means the pixel is not part of a sequence (very wide screen required for this very long comment)
globallycoherent RWTexture2D<float4> sorted;
// The threshold variables
half lowThreshold;
half highThreshold;
// The width of the in- and output textures
// When sorting vertically this is actually the height
int width;
bool reverseSortingOrder;
bool sortVertically;
// 0: red
// 1: green
// 2: blue
// 4: hue Note the jump in this line
// 5: saturation
// 6: lightness
int sortingMode;
// 0: sortBy
// 1: mask
// 2: sequenceIds
int alternateSource;
// Stores the value to sort by and the index at which the colour data can be retrieved
struct PixelInfo {
half sortBy;
uint pixelIndex;
};
// I got tired of typing that long function name
#define SyncGroup AllMemoryBarrierWithGroupSync();
#define CalculateSmall(AXIS, INC) id.AXIS & ((1 << (INC - 1)) - 1)
#define CalculateLarge(AXIS, INC) id.AXIS >> (INC - 1) << INC
// These constants are used in two processes in this shader:
// - The bitonic merge sort
// - The sequence generation algorithm
// The Bitonic pattern will only sort 2048 pixels in this implementation, because the maximum threads per thread group is 1024
// In theory there is an easy fix: Each thread does double the work
#define BITONIC_ITERATIONS 12
#define BITONIC_SIZE 1024 // This is actually half of the actual width that this algorithm can process
groupshared PixelInfo pixelLine[BITONIC_SIZE * 2];
groupshared uint idLine[BITONIC_SIZE * 2]; // Sequence ids for one line
#define GroupsharedInitForConnecting(ID, AXIS) /*
*/ idLine[(ID).AXIS] = sequenceIds[(ID).xy];
#define GroupsharedInitForSorting(ID, AXIS) /*
*/ pixelLine[(ID).AXIS].sortBy = sortBy[(ID).xy]; /*
*/ pixelLine[(ID).AXIS].pixelIndex = (ID).AXIS; /*
*/ idLine[(ID).AXIS] = sequenceIds[(ID).xy];
// Kernel implementations
// Write the values by which we will sort into a texture
[numthreads(8, 8, 1)]
void InitialiseSortBy(uint3 id: SV_DispatchThreadID) {
float3 input = sorted[id.xy].rgb;
// This code just maps the 'sortingMode' integer into rgb/hsl
if (sortingMode & 4) {
input = RGBtoHSL(input);
}
switch (sortingMode & 3) {
case 0:
sortBy[id.xy] = input.r;
break;
case 1:
sortBy[id.xy] = input.g;
break;
case 2:
sortBy[id.xy] = input.b;
break;
}
}
// Stores either the pixel's x coordinate or 0 as its id in 'sequenceIds'
[numthreads(8, 8, 1)]
void FillSequenceIds(uint3 id: SV_DispatchThreadID) {
uint result = 0;
half value = sortBy[id.xy];
if (value <= highThreshold && value >= lowThreshold)
result = (sortVertically ? id.y : id.x) + 1;
sequenceIds[id.xy] = result;
}
// This Kernel connects adjacent pixels
// Here is some pseudo-code:
// for (height = 2; height < BTIONIC_SIZE; height *= 2) {
// Divide the current line of the texture into blocks of size 'height'
// (Several threads will end up working on each of these blocks)
//
// Select one of these blocks for the current using the 'id' field
//
// If the left and right half of the current block are connected, i.e. the end of the right half and the start of the left half both are 1 in the mask texture {
// Every pixel in the right half that is in the same sequence as the first pixel in the right half
// is marked as part of the same sequence as the last pixel in the left half
// }
// }
[numthreads(BITONIC_SIZE,1,1)]
void ConnectSequencesHoriz(uint3 id: SV_DispatchThreadID) {
GroupsharedInitForConnecting(id, x);
GroupsharedInitForConnecting(id.xy + int2(BITONIC_SIZE, 0), x);
int index = id.x * 2;
SyncGroup;
if (idLine[index] * idLine[index + 1] != 0)
idLine[index + 1] = idLine[index];
SyncGroup;
int small;
int large;
[unroll]
for (int i = 2; i < BITONIC_ITERATIONS; i++) {
small = CalculateSmall(x, i);
large = (CalculateLarge(x, i)) + (1 << (i - 1));
uint lastIdLeftHalf = idLine[large - 1];
uint firstIdRightHalf = idLine[large];
uint newId = idLine[large + small];
if (small + large < width && lastIdLeftHalf * firstIdRightHalf != 0 && firstIdRightHalf == idLine[large + small]) {
newId = lastIdLeftHalf;
}
SyncGroup;
idLine[large + small] = newId;
SyncGroup;
}
uint a = idLine[id.x];
uint b = idLine[id.x + BITONIC_SIZE];
SyncGroup;
sequenceIds[id.xy] = a;
sequenceIds[id.xy + int2(BITONIC_SIZE, 0)] = b;
}
// Same code as previous function, but connects sequences vertically
[numthreads(1, BITONIC_SIZE, 1)]
void ConnectSequencesVert(uint3 id: SV_DispatchThreadID) {
GroupsharedInitForConnecting(id, y);
GroupsharedInitForConnecting(id.xy + int2(0, BITONIC_SIZE), y);
int index = id.y * 2;
SyncGroup;
if (idLine[index] * idLine[index + 1] != 0)
idLine[index + 1] = idLine[index];
SyncGroup;
int small;
int large;
[unroll]
for (int i = 2; i < BITONIC_ITERATIONS; i++) {
small = CalculateSmall(x, i);
large = (CalculateLarge(x, i)) + (1 << (i - 1));
uint lastIdLeftHalf = idLine[large - 1];
uint firstIdRightHalf = idLine[large];
uint newId = idLine[large + small];
if (small + large < width && lastIdLeftHalf * firstIdRightHalf != 0 && firstIdRightHalf == idLine[large + small]) {
newId = lastIdLeftHalf;
}
SyncGroup;
idLine[large + small] = newId;
SyncGroup;
}
uint a = idLine[id.y];
uint b = idLine[id.y + BITONIC_SIZE];
SyncGroup;
sequenceIds[id.xy] = a;
sequenceIds[id.xy + int2(0, BITONIC_SIZE)] = b;
}
void swap(int a, int b) {
PixelInfo temp = pixelLine[a];
pixelLine[a] = pixelLine[b];
pixelLine[b] = temp;
}
void compareAndSwap(int small, int large) {
uint small_id = idLine[small];
uint large_id = idLine[large];
if (
(large < width) && (small_id != 0) // If we are in bounds and the sequence and in a run/sequence
&& (small_id == large_id) // If we are in the same sequence
&& ((pixelLine[small].sortBy > pixelLine[large].sortBy) ^ reverseSortingOrder) // If we should swap
)
{
swap(small, large);
}
}
// Long live the bitonic merge sort!!
[numthreads(BITONIC_SIZE, 1, 1)]
void SortHoriz(uint3 id: SV_DispatchThreadID) {
GroupsharedInitForSorting(id, x);
GroupsharedInitForSorting((id.xy + int2(BITONIC_SIZE, 0)), x);
int small;
int large;
[unroll]
for (int i = 1; i < BITONIC_ITERATIONS; i++) {
SyncGroup
//flip
small = CalculateSmall(x, i);
large = CalculateLarge(x, i);
int c = (1 << i) - 1 - small;
small += large;
large += c;
compareAndSwap(small, large);
[unroll]
for (int j = i - 1; j > 0; j--) {
SyncGroup
//disperse
small = CalculateSmall(x, j);
large = CalculateLarge(x, j);
small += large;
large = small + (1 << (j - 1));
compareAndSwap(small, large);
}
}
SyncGroup
float4 a = sorted[int2(pixelLine[id.x].pixelIndex, id.y)];
float4 b = sorted[int2(pixelLine[id.x + BITONIC_SIZE].pixelIndex, id.y)];
SyncGroup
sorted[id.xy] = a;
sorted[id.xy + int2(BITONIC_SIZE, 0)] = b;
}
// Just a copy of SortHoriz, but of course with vertical sorting
[numthreads(1, BITONIC_SIZE, 1)]
void SortVert(uint3 id: SV_DispatchThreadID) {
GroupsharedInitForSorting(id, y);
GroupsharedInitForSorting((id.xy + int2(0, BITONIC_SIZE)), y);
int small;
int large;
[unroll]
for (int i = 1; i < BITONIC_ITERATIONS; i++) {
SyncGroup
//flip
small = CalculateSmall(y, i);
large = CalculateLarge(y, i);
int c = (1 << i) - 1 - small;
small += large;
large += c;
compareAndSwap(small, large);
[unroll]
for (int j = i - 1; j > 0; j--) {
SyncGroup
//disperse
small = CalculateSmall(y, j);
large = CalculateLarge(y, j);
small += large;
large = small + (1 << (j - 1));
compareAndSwap(small, large);
}
}
SyncGroup
float4 a = sorted[int2(id.x, pixelLine[id.y].pixelIndex)];
float4 b = sorted[int2(id.x, pixelLine[id.y + BITONIC_SIZE].pixelIndex)];
SyncGroup
sorted[id.xy] = a;
sorted[id.xy + int2(0, BITONIC_SIZE)] = b;
}
// Displays content from an alternate source like e.g. the mask texture
[numthreads(8,8,1)]
void ViewOther(uint3 id: SV_DispatchThreadID) {
float value;
switch (alternateSource) {
case 0:
value = sortBy[id.xy];
break;
case 1:
uint s_id /* and Manny*/ = sequenceIds[id.xy];
value = 0.0;
if (s_id != 0) {
value = s_id / 2.0 / width + 0.5;
}
break;
default:
value = length(id.xy) / width; // Just to show that an invalid value was sent to the gpu
break;
}
sorted[id.xy] = float4(value, value, value, 1.0);
}