-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[API Proposal]: GFNI Intrinsics #96170
Comments
Tagging subscribers to this area: @dotnet/area-system-runtime-intrinsics Issue DetailsBackground and motivation
API Proposalnamespace System.Runtime.Intrinsics.X86
{
public abstract class Avx512Gfni : Avx512F
{
public static bool IsSupported { get; }
public static Vector512<byte> GaloisFieldAffineTransformInverse(Vector512<byte> x, Vector512<byte> a, [ConstantExpected] byte b);
public static Vector512<byte> GaloisFieldAffineTransform(Vector512<byte> x, Vector512<byte> a, [ConstantExpected] byte b);
public static Vector512<byte> GaloisFieldMultiply(Vector512<byte> left, Vector512<byte> right);
public abstract class VL : Avx512F.VL
{
public static new bool IsSupported { get; }
public static Vector256<byte> GaloisFieldAffineTransformInverse(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldAffineTransformInverse(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldAffineTransform(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldAffineTransform(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldMultiply(Vector256<byte> left, Vector256<byte> right);
public static Vector128<byte> GaloisFieldMultiply(Vector128<byte> left, Vector128<byte> right);
}
}
public abstract class AvxGfni : Avx
{
public static bool IsSupported { get; }
public static Vector256<byte> GaloisFieldAffineTransformInverse(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldAffineTransform(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldMultiply(Vector256<byte> left, Vector256<byte> right);
}
public abstract class Gfni : Sse41
{
public static bool IsSupported { get; }
public static Vector128<byte> GaloisFieldAffineTransformInverse(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldAffineTransform(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldMultiply(Vector128<byte> left, Vector128<byte> right);
}
}
### API Usage
```csharp
// https://wunkolo.github.io/post/2020/11/gf2p8affineqb-bit-reversal/
public static Vector128<byte> ReverseBits128(Vector128<byte> value)
{
var xmm0 = Gfni.GaloisFieldAffineTransform(value, Vector128.Create(0b10000000_01000000_00100000_00010000_00001000_00000100_00000010_00000001ul).AsByte(), 0);
return Ssse3.Shuffle(xmm0, Vector128.Create(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, byte.MinValue));
} Alternative DesignsNo response RisksNo response
|
Here's more unexpected uses for the Galois Field Affine Transformation Instruction. collected by animetosho 👍 |
Should these be named Same thing with |
namespace System.Runtime.Intrinsics.X86;
public abstract class Avx512Gfni : Avx512F
{
public static bool IsSupported { get; }
public static Vector512<byte> GaloisFieldAffineTransformInverse(Vector512<byte> x, Vector512<byte> a, [ConstantExpected] byte b);
public static Vector512<byte> GaloisFieldAffineTransform(Vector512<byte> x, Vector512<byte> a, [ConstantExpected] byte b);
public static Vector512<byte> GaloisFieldMultiply(Vector512<byte> left, Vector512<byte> right);
public abstract class VL : Avx512F.VL
{
public static new bool IsSupported { get; }
public static Vector256<byte> GaloisFieldAffineTransformInverse(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldAffineTransformInverse(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldAffineTransform(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldAffineTransform(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldMultiply(Vector256<byte> left, Vector256<byte> right);
public static Vector128<byte> GaloisFieldMultiply(Vector128<byte> left, Vector128<byte> right);
}
}
public abstract class AvxGfni : Avx
{
public static bool IsSupported { get; }
public static Vector256<byte> GaloisFieldAffineTransformInverse(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldAffineTransform(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldMultiply(Vector256<byte> left, Vector256<byte> right);
}
public abstract class Gfni : Sse41
{
public static bool IsSupported { get; }
public static Vector128<byte> GaloisFieldAffineTransformInverse(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldAffineTransform(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldMultiply(Vector128<byte> left, Vector128<byte> right);
} |
For consistency with the AVX10 surface (and #86952), this should probably be revised to namespace System.Runtime.Intrinsics.X86;
public abstract class Gfni : Sse41
{
public static bool IsSupported { get; }
public static Vector128<byte> GaloisFieldAffineTransformInverse(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldAffineTransform(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldMultiply(Vector128<byte> left, Vector128<byte> right);
public abstract class X64 : Sse41.X64
{
public static bool IsSupported { get; }
}
public abstract class V256
{
public static new bool IsSupported { get; }
public static Vector256<byte> GaloisFieldAffineTransformInverse(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldAffineTransform(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldMultiply(Vector256<byte> left, Vector256<byte> right);
}
public abstract class V512
{
public static new bool IsSupported { get; }
public static Vector512<byte> GaloisFieldAffineTransformInverse(Vector512<byte> x, Vector512<byte> a, [ConstantExpected] byte b);
public static Vector512<byte> GaloisFieldAffineTransform(Vector512<byte> x, Vector512<byte> a, [ConstantExpected] byte b);
public static Vector512<byte> GaloisFieldMultiply(Vector512<byte> left, Vector512<byte> right);
}
} Also, the affine transform ops treat the second operand as an 8x8bit matrix and are named in the C intrinsics to indicate one operand is a vector of 64-bit values (e.g. _mm_gf2p8affine_epi64_epi8). It might make more sense to define those as |
namespace System.Runtime.Intrinsics.X86;
public abstract class Gfni : Sse41
{
public static bool IsSupported { get; }
public static Vector128<byte> GaloisFieldAffineTransformInverse(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte control);
public static Vector128<byte> GaloisFieldAffineTransform(Vector128<byte> x, Vector128<byte> a, [ConstantExpected] byte control);
public static Vector128<byte> GaloisFieldMultiply(Vector128<byte> left, Vector128<byte> right);
public abstract class X64 : Sse41.X64
{
public static bool IsSupported { get; }
}
public abstract class V256
{
public static new bool IsSupported { get; }
public static Vector256<byte> GaloisFieldAffineTransformInverse(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte control);
public static Vector256<byte> GaloisFieldAffineTransform(Vector256<byte> x, Vector256<byte> a, [ConstantExpected] byte control);
public static Vector256<byte> GaloisFieldMultiply(Vector256<byte> left, Vector256<byte> right);
}
public abstract class V512
{
public static new bool IsSupported { get; }
public static Vector512<byte> GaloisFieldAffineTransformInverse(Vector512<byte> x, Vector512<byte> a, [ConstantExpected] byte control);
public static Vector512<byte> GaloisFieldAffineTransform(Vector512<byte> x, Vector512<byte> a, [ConstantExpected] byte control);
public static Vector512<byte> GaloisFieldMultiply(Vector512<byte> left, Vector512<byte> right);
}
} |
I'll implement this one |
I just got a chance to watch the API review video. It sounds like there was some confusion around the immediate operand for the affine instructions. The documentation defines the affine transform as producing each output byte from the formula
This doesn't fit the pattern of what we typically call a 'control' byte, which might select a lane for processing or give a permute order. Since it's an actual operand used in the mathematical definition in this case, it would be more clear if the name matched the documentation. It should be noted that this discussion was part of the API review for the original shape, when it was decided to keep the name I also didn't hear any mention of the 8x8 matrix operand's type in the discussion. Typical use, as in the sample given in top issue, would have the same matrix for each 64-bit lane. Example repeated here: // https://wunkolo.github.io/post/2020/11/gf2p8affineqb-bit-reversal/
public static Vector128<byte> ReverseBits128(Vector128<byte> value)
{
var xmm0 = Gfni.GaloisFieldAffineTransform(value, Vector128.Create(0b10000000_01000000_00100000_00010000_00001000_00000100_00000010_00000001ul).AsByte(), 0);
return Ssse3.Shuffle(xmm0, Vector128.Create(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, byte.MinValue));
} Note that the sample creates the matrix vector by broadcast of a Proposed shape would be: namespace System.Runtime.Intrinsics.X86;
public abstract class Gfni : Sse2
{
public static bool IsSupported { get; }
public static Vector128<byte> GaloisFieldAffineTransformInverse(Vector128<byte> x, Vector128<ulong> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldAffineTransform(Vector128<byte> x, Vector128<ulong> a, [ConstantExpected] byte b);
public static Vector128<byte> GaloisFieldMultiply(Vector128<byte> left, Vector128<byte> right);
public abstract class X64 : Sse2.X64
{
public static bool IsSupported { get; }
}
public abstract class V256
{
public static new bool IsSupported { get; }
public static Vector256<byte> GaloisFieldAffineTransformInverse(Vector256<byte> x, Vector256<ulong> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldAffineTransform(Vector256<byte> x, Vector256<ulong> a, [ConstantExpected] byte b);
public static Vector256<byte> GaloisFieldMultiply(Vector256<byte> left, Vector256<byte> right);
}
public abstract class V512
{
public static new bool IsSupported { get; }
public static Vector512<byte> GaloisFieldAffineTransformInverse(Vector512<byte> x, Vector512<ulong> a, [ConstantExpected] byte b);
public static Vector512<byte> GaloisFieldAffineTransform(Vector512<byte> x, Vector512<ulong> a, [ConstantExpected] byte b);
public static Vector512<byte> GaloisFieldMultiply(Vector512<byte> left, Vector512<byte> right);
}
} |
Background and motivation
GFNI
is supported by Intel in the Ice Lake and newer architectures, and by AMD in Zen 4.These instructions are known to be useful for cryptography and bit manipulations.
An efficient bit-reversal can be implemented with it.
API Proposal
API Usage
Alternative Designs
No response
Risks
No response
The text was updated successfully, but these errors were encountered: