Skip to content

EgorBot for hamarb123 in #99596 #274

@EgorBot

Description

@EgorBot

Processing dotnet/runtime#99596 (comment) command:

Command

-intel -amd

using BenchmarkDotNet.Attributes;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

public class Benchmarks
{
    private byte[] source = null!;

    [Params(64, 128, 256, 1024, 4096, 16384, 65536, 1 << 20)]
    public int N;

    [GlobalSetup]
    public void Setup()
    {
        source = new byte[N];
        new Random(42).NextBytes(source);
    }

    [Benchmark]
    public byte TestWork() => Work(ref MemoryMarshal.GetArrayDataReference(source), (uint)source.Length);

    [Benchmark]
    public byte TestWorkWithZeroing() => WorkWithZeroing(ref MemoryMarshal.GetArrayDataReference(source), (uint)source.Length);

    //test to see if the new codegen using shuf over perm is meaningfully faster
    //this should be more pipelineable than the other test I ran

    public static byte Work(ref byte b, nuint size)
    {
        Vector256<byte> result = Vector256<byte>.Zero;
        while (size >= Vector256<byte>.Count + 7)
        {
            Vector256<byte> v0 = Vector256.LoadUnsafe(ref b);
            Vector256<byte> v1 = Vector256.LoadUnsafe(ref b, 1);
            Vector256<byte> v2 = Vector256.LoadUnsafe(ref b, 2);
            Vector256<byte> v3 = Vector256.LoadUnsafe(ref b, 3);
            Vector256<byte> v4 = Vector256.LoadUnsafe(ref b, 4);
            Vector256<byte> v5 = Vector256.LoadUnsafe(ref b, 5);
            Vector256<byte> v6 = Vector256.LoadUnsafe(ref b, 6);
            Vector256<byte> v7 = Vector256.LoadUnsafe(ref b, 7);
            v0 = Vector256.Shuffle(v0, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v1 = Vector256.Shuffle(v1, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v2 = Vector256.Shuffle(v2, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v3 = Vector256.Shuffle(v3, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v4 = Vector256.Shuffle(v4, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v5 = Vector256.Shuffle(v5, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v6 = Vector256.Shuffle(v6, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v7 = Vector256.Shuffle(v7, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            result += ((v0 ^ v1) - (v2 + v3)) ^ ((v4 + ~v5) + (v6 + v7));
            b = ref Unsafe.Add(ref b, 8);
            size -= 8;
        }
        while (size >= Vector256<byte>.Count)
        {
            Vector256<byte> v0 = Vector256.LoadUnsafe(ref b);
            v0 = Vector256.Shuffle(v0, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            result += v0;
            b = ref Unsafe.Add(ref b, 1);
            size--;
        }
        return Vector256.Sum(result);
    }

    public static byte WorkWithZeroing(ref byte b, nuint size)
    {
        Vector256<byte> result = Vector256<byte>.Zero;
        while (size >= Vector256<byte>.Count + 7)
        {
            Vector256<byte> v0 = Vector256.LoadUnsafe(ref b);
            Vector256<byte> v1 = Vector256.LoadUnsafe(ref b, 1);
            Vector256<byte> v2 = Vector256.LoadUnsafe(ref b, 2);
            Vector256<byte> v3 = Vector256.LoadUnsafe(ref b, 3);
            Vector256<byte> v4 = Vector256.LoadUnsafe(ref b, 4);
            Vector256<byte> v5 = Vector256.LoadUnsafe(ref b, 5);
            Vector256<byte> v6 = Vector256.LoadUnsafe(ref b, 6);
            Vector256<byte> v7 = Vector256.LoadUnsafe(ref b, 7);
            v0 = Vector256.Shuffle(v0, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v1 = Vector256.Shuffle(v1, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v2 = Vector256.Shuffle(v2, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v3 = Vector256.Shuffle(v3, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v4 = Vector256.Shuffle(v4, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v5 = Vector256.Shuffle(v5, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v6 = Vector256.Shuffle(v6, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            v7 = Vector256.Shuffle(v7, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            result += ((v0 ^ v1) - (v2 + v3)) ^ ((v4 + ~v5) + (v6 + v7));
            b = ref Unsafe.Add(ref b, 8);
            size -= 8;
        }
        while (size >= Vector256<byte>.Count)
        {
            Vector256<byte> v0 = Vector256.LoadUnsafe(ref b);
            v0 = Vector256.Shuffle(v0, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
            result += v0;
            b = ref Unsafe.Add(ref b, 1);
            size--;
        }
        return Vector256.Sum(result);
    }
}

(EgorBot will reply in this issue)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions