-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Description
Processing dotnet/runtime#99596 (comment) command:
Command
-intel -amd
using BenchmarkDotNet.Attributes;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
public class Benchmarks
{
private byte[] source = null!;
[Params(64, 128, 256, 1024, 4096, 16384, 65536, 1 << 20)]
public int N;
[GlobalSetup]
public void Setup()
{
source = new byte[N];
new Random(42).NextBytes(source);
}
[Benchmark]
public byte TestWork() => Work(ref MemoryMarshal.GetArrayDataReference(source), (uint)source.Length);
[Benchmark]
public byte TestWorkWithZeroing() => WorkWithZeroing(ref MemoryMarshal.GetArrayDataReference(source), (uint)source.Length);
//test to see if the new codegen using shuf over perm is meaningfully faster
//this should be more pipelineable than the other test I ran
public static byte Work(ref byte b, nuint size)
{
Vector256<byte> result = Vector256<byte>.Zero;
while (size >= Vector256<byte>.Count + 7)
{
Vector256<byte> v0 = Vector256.LoadUnsafe(ref b);
Vector256<byte> v1 = Vector256.LoadUnsafe(ref b, 1);
Vector256<byte> v2 = Vector256.LoadUnsafe(ref b, 2);
Vector256<byte> v3 = Vector256.LoadUnsafe(ref b, 3);
Vector256<byte> v4 = Vector256.LoadUnsafe(ref b, 4);
Vector256<byte> v5 = Vector256.LoadUnsafe(ref b, 5);
Vector256<byte> v6 = Vector256.LoadUnsafe(ref b, 6);
Vector256<byte> v7 = Vector256.LoadUnsafe(ref b, 7);
v0 = Vector256.Shuffle(v0, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v1 = Vector256.Shuffle(v1, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v2 = Vector256.Shuffle(v2, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v3 = Vector256.Shuffle(v3, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v4 = Vector256.Shuffle(v4, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v5 = Vector256.Shuffle(v5, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v6 = Vector256.Shuffle(v6, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v7 = Vector256.Shuffle(v7, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
result += ((v0 ^ v1) - (v2 + v3)) ^ ((v4 + ~v5) + (v6 + v7));
b = ref Unsafe.Add(ref b, 8);
size -= 8;
}
while (size >= Vector256<byte>.Count)
{
Vector256<byte> v0 = Vector256.LoadUnsafe(ref b);
v0 = Vector256.Shuffle(v0, Vector256.Shuffle((byte)15, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
result += v0;
b = ref Unsafe.Add(ref b, 1);
size--;
}
return Vector256.Sum(result);
}
public static byte WorkWithZeroing(ref byte b, nuint size)
{
Vector256<byte> result = Vector256<byte>.Zero;
while (size >= Vector256<byte>.Count + 7)
{
Vector256<byte> v0 = Vector256.LoadUnsafe(ref b);
Vector256<byte> v1 = Vector256.LoadUnsafe(ref b, 1);
Vector256<byte> v2 = Vector256.LoadUnsafe(ref b, 2);
Vector256<byte> v3 = Vector256.LoadUnsafe(ref b, 3);
Vector256<byte> v4 = Vector256.LoadUnsafe(ref b, 4);
Vector256<byte> v5 = Vector256.LoadUnsafe(ref b, 5);
Vector256<byte> v6 = Vector256.LoadUnsafe(ref b, 6);
Vector256<byte> v7 = Vector256.LoadUnsafe(ref b, 7);
v0 = Vector256.Shuffle(v0, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v1 = Vector256.Shuffle(v1, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v2 = Vector256.Shuffle(v2, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v3 = Vector256.Shuffle(v3, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v4 = Vector256.Shuffle(v4, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v5 = Vector256.Shuffle(v5, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v6 = Vector256.Shuffle(v6, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
v7 = Vector256.Shuffle(v7, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
result += ((v0 ^ v1) - (v2 + v3)) ^ ((v4 + ~v5) + (v6 + v7));
b = ref Unsafe.Add(ref b, 8);
size -= 8;
}
while (size >= Vector256<byte>.Count)
{
Vector256<byte> v0 = Vector256.LoadUnsafe(ref b);
v0 = Vector256.Shuffle(v0, Vector256.Shuffle((byte)99, 0, 14, 1, 13, 2, 12, 3, 11, 4, 10, 5, 9, 6, 8, 7, 31, 16, 30, 17, 29, 18, 28, 19, 27, 20, 26, 21, 25, 22, 24, 23));
result += v0;
b = ref Unsafe.Add(ref b, 1);
size--;
}
return Vector256.Sum(result);
}
}
(EgorBot will reply in this issue)
Metadata
Metadata
Assignees
Labels
No labels