Skip to content

An efficient method of sorting pixels #6

@Askhor

Description

@Askhor

Hello dear stranger,

I have created and implemented an efficient method for sorting images that does not limit span length.

On my old laptop this code takes about 12ms to execute:

image

I essentially recreated the method proposed in the beginning of the pixel sorting video that uses the Bitonic Merge Sort. However, I added some efficient code to detect whether to pixels lie in the same span/run/sequence/etc..

This is my first shader, so I am honestly really curious about your opinion of my code, as you are a far more experienced person than me. Anyhow, I am also aware from reading the other issues on this repository that you do not have much spare time, so I won't be expecting an answer to that request.

Since I do not have access to any other computers, I am also very curious about is the performance of my shader on your device, which is - I assume - a lot faster than mine. Again though, I do not expect a response.

Lastly I would just like to thank you for your interesting, educational and entertaining videos; I would have never (or not as soon) gotten into shader programming without them. Thanks! :)

Always remember that your videos are amazing
Jonathan

The following code is mostly just a copy of this repository.

The C# file:

using System.Collections.Generic;
using UnityEngine;
using static UnityEngine.Mathf;

public class Sorting : MonoBehaviour
{
    public enum SortBy
    {
        Red = 0, Green = 1, Blue = 2, Hue = 4, Saturation = 5, Lightness = 6
    }

    public enum ViewSource
    {
        Sorted, /*Synonymous to sortBy*/ SortValue, SequenceIds
    }

    public enum Direction
    {
        Up, Right, Down, Left
    }

    public ComputeShader computeShader;

    public SortBy sortBy = SortBy.Lightness;
    public ViewSource viewSource = ViewSource.Sorted;
    // In which direction the smallest value will lie in each sequence/run
    public Direction smallestValue = Direction.Left;

    [Range(0, 1)]
    public float lowThreshold = 0.25f;
    [Range(0, 1)]
    public float highThreshold = 0.75f;

    int screenWidth;
    int screenHeight;

    RenderTexture sortByTex;
    RenderTexture sequenceIds;
    RenderTexture sorted;

    // These are the kernels; They are all explained in further detail in the compute shader code
    int initialiseSortBy, sortHoriz, sortVert, fillSequenceIds, connectSequencesHoriz, connectSequencesVert, viewOtherPass;

    void Start()
    {
        Debug.Log("Shader is being initialised");

        if (computeShader == null)
        {
            Debug.LogError("The compute shader has not been set in the 'Sorting' Component");
            return;
        }

        initialiseSortBy = computeShader.FindKernel("InitialiseSortBy");
        sortHoriz = computeShader.FindKernel("SortHoriz");
        sortVert = computeShader.FindKernel("SortVert");
        fillSequenceIds = computeShader.FindKernel("FillSequenceIds");
        connectSequencesHoriz = computeShader.FindKernel("ConnectSequencesHoriz");
        connectSequencesVert = computeShader.FindKernel("ConnectSequencesVert");
        viewOtherPass = computeShader.FindKernel("ViewOther");

        RegenerateTextures();
    }

    // Creates a new render texture with the width and height of the screen
    RenderTexture createNewTexture(RenderTextureFormat format)
    {
        if (!SystemInfo.SupportsRenderTextureFormat(format)) Debug.LogError("The texture format " + format + " is not supported on this system");
        var output = new RenderTexture(screenWidth, screenHeight, 0, format, RenderTextureReadWrite.Linear);
        output.enableRandomWrite = true;
        output.Create();
        return output;
    }

    // This method is probably necessary to avoid memory leaks
    void ReleaseTexture(RenderTexture tex)
    {
        if (tex != null) tex.Release();
    }

    // Regenerates the textures; Does the same as in Acerola's code
    void RegenerateTextures(int width = -1, int height = -1)
    {
        Debug.Log("Regenerating textures");

        screenWidth = width == -1 ? Screen.width : width;
        screenHeight = height == -1 ? Screen.height : height;

        ReleaseTexture(sortByTex);
        ReleaseTexture(sequenceIds);
        ReleaseTexture(sorted);
        sortByTex = createNewTexture(RenderTextureFormat.R16);
        sequenceIds = createNewTexture(RenderTextureFormat.RInt);
        sorted = createNewTexture(RenderTextureFormat.ARGB64);
    }

    // Just a simple check
    void EnsureCorrectTextureSize(int width = -1, int height = -1)
    {
        if (width == -1) width = Screen.width;
        if (height == -1) height = Screen.height;

        if (screenWidth != width || screenHeight != height)
            RegenerateTextures(width, height);
    }

    private void Update()
    {
        if (computeShader == null) return;

        EnsureCorrectTextureSize();
    }

    void OnRenderImage(RenderTexture source, RenderTexture destination)
    {
        if (computeShader == null) return;

        // We call this method with the width and height of the source texture
        // This feature exists to enable the sorting of images outside of the whole-screen effect
        EnsureCorrectTextureSize(source.width, source.height);

        // Map a direction to these two booleans
        bool sortVertically;
        bool reverseSortingOrder;
        switch (smallestValue)
        {
            case Direction.Left:
                sortVertically = false;
                reverseSortingOrder = false;
                break;
            case Direction.Up:
                sortVertically = true;
                reverseSortingOrder = true;
                break;
            case Direction.Right:
                sortVertically = false;
                reverseSortingOrder = true;
                break;
            case Direction.Down:
                sortVertically = true;
                reverseSortingOrder = false;
                break;
            default:
                return;
        }

        // Sorted is the main work texture
        Graphics.Blit(source, sorted);

        // Setting variables
        computeShader.SetInt("sortingMode", (int)sortBy);
        computeShader.SetInt("width", sortVertically ? screenHeight : screenWidth); // This code is extremely sensible
        computeShader.SetBool("reverseSortingOrder", reverseSortingOrder);
        computeShader.SetBool("sortVertically", sortVertically);

        if (highThreshold < lowThreshold) highThreshold = lowThreshold;
        computeShader.SetFloat("lowThreshold", lowThreshold);
        computeShader.SetFloat("highThreshold", highThreshold);

        // Initialising the sortBy texture with values generated from sorted
        // Sorted currently just contains the source texture
        computeShader.SetTexture(initialiseSortBy, "sorted", sorted);
        computeShader.SetTexture(initialiseSortBy, "sortBy", sortByTex);
        computeShader.Dispatch(initialiseSortBy, CeilToInt(screenWidth / 8f), CeilToInt(screenHeight / 8f), 1);

        // This is where the fun begins

        // Give each pixel that should be sorted a unique sequence id
        computeShader.SetTexture(fillSequenceIds, "sortBy", sortByTex);
        computeShader.SetTexture(fillSequenceIds, "sequenceIds", sequenceIds);
        computeShader.Dispatch(fillSequenceIds, CeilToInt(screenWidth / 8f), CeilToInt(screenHeight / 8f), 1);

        // Select the correct kernel
        int connectSequences = sortVertically ? connectSequencesVert : connectSequencesHoriz;

        // "Connect adjacent pixels" which means to that adjacent pixels will have the same id
        computeShader.SetTexture(connectSequences, "sequenceIds", sequenceIds);
        computeShader.Dispatch(connectSequences, sortVertically ? screenWidth : 1, sortVertically ? 1 : screenHeight, 1);

        if (viewSource == ViewSource.Sorted)
        {
            // Sort

            // Here we select the correct pass
            int pass = sortVertically ? sortVert : sortHoriz;

            computeShader.SetTexture(pass, "sortBy", sortByTex);
            computeShader.SetTexture(pass, "sorted", sorted);
            computeShader.SetTexture(pass, "sequenceIds", sequenceIds);
            computeShader.Dispatch(pass,
                sortVertically ? screenWidth : 1,
                sortVertically ? 1 : screenHeight,
                1
                );
        }
        else
        {
            // View other source
            computeShader.SetInt("alternateSource", (int)viewSource - 1);
            computeShader.SetTexture(viewOtherPass, "sorted", sorted);
            computeShader.SetTexture(viewOtherPass, "sortBy", sortByTex);
            computeShader.SetTexture(viewOtherPass, "sequenceIds", sequenceIds);
            computeShader.Dispatch(viewOtherPass, CeilToInt(screenWidth / 8f), CeilToInt(screenHeight / 8f), 1);
        }

        Graphics.Blit(sorted, destination);
    }

    // This class is used for testing
    static class Profiler
    {
        private static Dictionary<string, long> segments = new Dictionary<string, long>();

        // Returns the time in milliseconds
        private static long time()
        {
            return (long)(Time.realtimeSinceStartup * 1000);
        }

        public static void StartSegment(string name)
        {
            segments[name] = time();
        }

        public static void EndSegment(string name)
        {
            long currentTime = time();
            long timeElapsed = currentTime - segments[name];

            Debug.Log(name + ": " + timeElapsed + "ms");
        }
    }
}

The HLSL file:

// The kernels are listed in order of execution

#pragma kernel InitialiseSortBy
#pragma kernel FillSequenceIds
#pragma kernel ConnectSequencesHoriz
#pragma kernel ConnectSequencesVert
#pragma kernel SortHoriz
#pragma kernel SortVert
#pragma kernel ViewOther


//                      The following two functions have been copied from Acerola's implementation

// Conversions from https://www.chilliant.com/rgb2hsv.html
float Epsilon = 1e-10;

float3 RGBtoHCV(in float3 RGB) {
    // Based on work by Sam Hocevar and Emil Persson (apparently)
    float4 P = (RGB.g < RGB.b) ? float4(RGB.bg, -1.0, 2.0 / 3.0) : float4(RGB.gb, 0.0, -1.0 / 3.0);
    float4 Q = (RGB.r < P.x) ? float4(P.xyw, RGB.r) : float4(RGB.r, P.yzx);
    float C = Q.x - min(Q.w, Q.y);
    float H = abs((Q.w - Q.y) / (6 * C + Epsilon) + Q.z);
    return float3(H, C, Q.x);
}

float3 RGBtoHSL(in float3 RGB) {
    float3 HCV = RGBtoHCV(RGB);
    float L = HCV.z - HCV.y * 0.5;
    float S = HCV.y / (1 - abs(L * 2 - 1) + Epsilon);
    return float3(HCV.x, S, L);
}

/////////////////////////////////////////////////// MY CODE //////////////////////////////////////////////////////////////////

// The input Textures
globallycoherent RWTexture2D<half> sortBy;
globallycoherent RWTexture2D<uint> sequenceIds; // This texture stores the id of a sequence that a pixel is in or 0 which means the pixel is not part of a sequence (very wide screen required for this very long comment)
globallycoherent RWTexture2D<float4> sorted;

// The threshold variables

half lowThreshold;
half highThreshold;

// The width of the in- and output textures
// When sorting vertically this is actually the height
int width;

bool reverseSortingOrder;

bool sortVertically;




// 0: red
// 1: green
// 2: blue
// 4: hue         Note the jump in this line
// 5: saturation
// 6: lightness
int sortingMode;

// 0: sortBy
// 1: mask
// 2: sequenceIds
int alternateSource;

// Stores the value to sort by and the index at which the colour data can be retrieved
struct PixelInfo {
    half sortBy;
    uint pixelIndex;
};

// I got tired of typing that long function name
#define SyncGroup AllMemoryBarrierWithGroupSync();
#define CalculateSmall(AXIS, INC) id.AXIS & ((1 << (INC - 1)) - 1)
#define CalculateLarge(AXIS, INC) id.AXIS >> (INC - 1) << INC

// These constants are used in two processes in this shader:
// - The bitonic merge sort
// - The sequence generation algorithm
// The Bitonic pattern will only sort 2048 pixels in this implementation, because the maximum threads per thread group is 1024
// In theory there is an easy fix: Each thread does double the work
#define BITONIC_ITERATIONS 12
#define BITONIC_SIZE 1024 // This is actually half of the actual width that this algorithm can process

groupshared PixelInfo pixelLine[BITONIC_SIZE * 2];
groupshared uint idLine[BITONIC_SIZE * 2]; // Sequence ids for one line

#define GroupsharedInitForConnecting(ID, AXIS) /*
*/    idLine[(ID).AXIS] = sequenceIds[(ID).xy];

#define GroupsharedInitForSorting(ID, AXIS) /*
*/    pixelLine[(ID).AXIS].sortBy = sortBy[(ID).xy]; /*
*/    pixelLine[(ID).AXIS].pixelIndex = (ID).AXIS;      /*
*/    idLine[(ID).AXIS] = sequenceIds[(ID).xy];


//                                              Kernel implementations


// Write the values by which we will sort into a texture
[numthreads(8, 8, 1)]
void InitialiseSortBy(uint3 id: SV_DispatchThreadID) {

    float3 input = sorted[id.xy].rgb;

    // This code just maps the 'sortingMode' integer into rgb/hsl
    if (sortingMode & 4) {
        input = RGBtoHSL(input);
    }

    switch (sortingMode & 3) {
        case 0:
            sortBy[id.xy] = input.r;
            break;
        case 1:
            sortBy[id.xy] = input.g;
            break;
        case 2:
            sortBy[id.xy] = input.b;
            break;
    }
}

// Stores either the pixel's x coordinate or 0 as its id in 'sequenceIds'
[numthreads(8, 8, 1)]
void FillSequenceIds(uint3 id: SV_DispatchThreadID) {
    uint result = 0;
    half value = sortBy[id.xy];

    if (value <= highThreshold && value >= lowThreshold)
        result = (sortVertically ? id.y : id.x) + 1;
    sequenceIds[id.xy] = result;
}

// This Kernel connects adjacent pixels
// Here is some pseudo-code:
// for (height = 2; height < BTIONIC_SIZE; height *= 2) {
//     Divide the current line of the texture into blocks of size 'height'
//     (Several threads will end up working on each of these blocks)
// 
//     Select one of these blocks for the current using the 'id' field
//     
//     If the left and right half of the current block are connected, i.e. the end of the right half and the start of the left half both are 1 in the mask texture {
//         Every pixel in the right half that is in the same sequence as the first pixel in the right half
//             is marked as part of the same sequence as the last pixel in the left half
//     }
// }
[numthreads(BITONIC_SIZE,1,1)]
void ConnectSequencesHoriz(uint3 id: SV_DispatchThreadID) {
    GroupsharedInitForConnecting(id, x);
    GroupsharedInitForConnecting(id.xy + int2(BITONIC_SIZE, 0), x);

    int index = id.x * 2;

    SyncGroup;

    if (idLine[index] * idLine[index + 1] != 0)
        idLine[index + 1] = idLine[index];

    SyncGroup;

    int small;
    int large;

    [unroll]
    for (int i = 2; i < BITONIC_ITERATIONS; i++) {
        small = CalculateSmall(x, i);
        large = (CalculateLarge(x, i)) + (1 << (i - 1));

        uint lastIdLeftHalf = idLine[large - 1];
        uint firstIdRightHalf = idLine[large];

        uint newId = idLine[large + small];

        if (small + large < width && lastIdLeftHalf * firstIdRightHalf != 0 && firstIdRightHalf == idLine[large + small]) {
            newId = lastIdLeftHalf;
        }

        SyncGroup;

        idLine[large + small] = newId;

        SyncGroup;
    }

    uint a = idLine[id.x];
    uint b = idLine[id.x + BITONIC_SIZE];

    SyncGroup;

    sequenceIds[id.xy] = a;
    sequenceIds[id.xy + int2(BITONIC_SIZE, 0)] = b;
}

// Same code as previous function, but connects sequences vertically
[numthreads(1, BITONIC_SIZE, 1)]
void ConnectSequencesVert(uint3 id: SV_DispatchThreadID) {
    GroupsharedInitForConnecting(id, y);
    GroupsharedInitForConnecting(id.xy + int2(0, BITONIC_SIZE), y);

    int index = id.y * 2;

    SyncGroup;

    if (idLine[index] * idLine[index + 1] != 0)
        idLine[index + 1] = idLine[index];

    SyncGroup;

    int small;
    int large;

    [unroll]
    for (int i = 2; i < BITONIC_ITERATIONS; i++) {
        small = CalculateSmall(x, i);
        large = (CalculateLarge(x, i)) + (1 << (i - 1));

        uint lastIdLeftHalf = idLine[large - 1];
        uint firstIdRightHalf = idLine[large];

        uint newId = idLine[large + small];

        if (small + large < width && lastIdLeftHalf * firstIdRightHalf != 0 && firstIdRightHalf == idLine[large + small]) {
            newId = lastIdLeftHalf;
        }

        SyncGroup;

        idLine[large + small] = newId;

        SyncGroup;
    }

    uint a = idLine[id.y];
    uint b = idLine[id.y + BITONIC_SIZE];

    SyncGroup;

    sequenceIds[id.xy] = a;
    sequenceIds[id.xy + int2(0, BITONIC_SIZE)] = b;
}


void swap(int a, int b) {
    PixelInfo temp = pixelLine[a];
    pixelLine[a] = pixelLine[b];
    pixelLine[b] = temp;
}

void compareAndSwap(int small, int large) {
    uint small_id = idLine[small];
    uint large_id = idLine[large];

    if (
        (large < width) && (small_id != 0) // If we are in bounds and the sequence and in a run/sequence
        &&  (small_id == large_id) // If we are in the same sequence
        && ((pixelLine[small].sortBy > pixelLine[large].sortBy) ^ reverseSortingOrder) // If we should swap
        )
    {
        swap(small, large);
    }
}

// Long live the bitonic merge sort!!
[numthreads(BITONIC_SIZE, 1, 1)]
void SortHoriz(uint3 id: SV_DispatchThreadID) {
    GroupsharedInitForSorting(id, x);
    GroupsharedInitForSorting((id.xy + int2(BITONIC_SIZE, 0)), x);

    int small;
    int large;

  
    [unroll]
    for (int i = 1; i < BITONIC_ITERATIONS; i++) {

        SyncGroup

        //flip
        small = CalculateSmall(x, i);
        large = CalculateLarge(x, i);
        int c = (1 << i) - 1 - small;
        small += large;
        large += c;

        compareAndSwap(small, large);

        [unroll]
        for (int j = i - 1; j > 0; j--) {

            SyncGroup

            //disperse
            small = CalculateSmall(x, j);
            large = CalculateLarge(x, j);
            small += large;
            large = small + (1 << (j - 1));

            compareAndSwap(small, large);
        }
    }

    SyncGroup

    float4 a = sorted[int2(pixelLine[id.x].pixelIndex, id.y)];
    float4 b = sorted[int2(pixelLine[id.x + BITONIC_SIZE].pixelIndex, id.y)];

    SyncGroup

    sorted[id.xy] = a;
    sorted[id.xy + int2(BITONIC_SIZE, 0)] = b;
}

// Just a copy of SortHoriz, but of course with vertical sorting
[numthreads(1, BITONIC_SIZE, 1)]
void SortVert(uint3 id: SV_DispatchThreadID) {
    GroupsharedInitForSorting(id, y);
    GroupsharedInitForSorting((id.xy + int2(0, BITONIC_SIZE)), y);

    int small;
    int large;

    [unroll]
    for (int i = 1; i < BITONIC_ITERATIONS; i++) {

        SyncGroup

        //flip
        small = CalculateSmall(y, i);
        large = CalculateLarge(y, i);
        int c = (1 << i) - 1 - small;
        small += large;
        large += c;

        compareAndSwap(small, large);

        [unroll]
        for (int j = i - 1; j > 0; j--) {

            SyncGroup

            //disperse
            small = CalculateSmall(y, j);
            large = CalculateLarge(y, j);
            small += large;
            large = small + (1 << (j - 1));

            compareAndSwap(small, large);
        }
    }

    SyncGroup

    float4 a = sorted[int2(id.x, pixelLine[id.y].pixelIndex)];
    float4 b = sorted[int2(id.x, pixelLine[id.y + BITONIC_SIZE].pixelIndex)];

    SyncGroup

    sorted[id.xy] = a;
    sorted[id.xy + int2(0, BITONIC_SIZE)] = b;
}

// Displays content from an alternate source like e.g. the mask texture
[numthreads(8,8,1)]
void ViewOther(uint3 id: SV_DispatchThreadID) {
    float value;

    switch (alternateSource) {
        case 0:
            value = sortBy[id.xy];
            break;
        case 1:
            uint s_id /* and Manny*/ = sequenceIds[id.xy];
            value = 0.0;
            if (s_id != 0) {
                value = s_id / 2.0 / width + 0.5;
            }
            break;
        default:
            value = length(id.xy) / width; // Just to show that an invalid value was sent to the gpu
            break;
    }

    sorted[id.xy] = float4(value, value, value, 1.0);
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions