Skip to content
This repository was archived by the owner on Nov 1, 2020. It is now read-only.

Commit

Permalink
Improve Intel hardware intrinsic APIs (dotnet/coreclr#17637)
Browse files Browse the repository at this point in the history
* Improve Intel hardware intrinsic APIs

* Simplify Avx.Extract non-const fallback

Signed-off-by: dotnet-bot <dotnet-bot@microsoft.com>
  • Loading branch information
FeiPengIntel authored and jkotas committed Jun 18, 2018
1 parent d966f73 commit 1329682
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 120 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,6 @@ public static class Avx
/// </summary>
public static Vector256<float> DuplicateOddIndexed(Vector256<float> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// __int8 _mm256_extract_epi8 (__m256i a, const int index)
/// HELPER
/// </summary>
public static sbyte Extract(Vector256<sbyte> value, byte index) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __int8 _mm256_extract_epi8 (__m256i a, const int index)
/// HELPER
Expand All @@ -248,11 +243,6 @@ public static class Avx
/// __int16 _mm256_extract_epi16 (__m256i a, const int index)
/// HELPER
/// </summary>
public static short Extract(Vector256<short> value, byte index) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __int16 _mm256_extract_epi16 (__m256i a, const int index)
/// HELPER
/// </summary>
public static ushort Extract(Vector256<ushort> value, byte index) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __int32 _mm256_extract_epi32 (__m256i a, const int index)
Expand Down Expand Up @@ -638,45 +628,45 @@ public static class Avx
/// __m128 _mm_maskload_ps (float const * mem_addr, __m128i mask)
/// VMASKMOVPS xmm, xmm, m128
/// </summary>
public static unsafe Vector128<float> MaskLoad(float* address, Vector128<uint> mask) { throw new PlatformNotSupportedException(); }
public static unsafe Vector128<float> MaskLoad(float* address, Vector128<float> mask) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m128d _mm_maskload_pd (double const * mem_addr, __m128i mask)
/// VMASKMOVPD xmm, xmm, m128
/// </summary>
public static unsafe Vector128<double> MaskLoad(double* address, Vector128<ulong> mask) { throw new PlatformNotSupportedException(); }
public static unsafe Vector128<double> MaskLoad(double* address, Vector128<double> mask) { throw new PlatformNotSupportedException(); }

/// <summary>
/// __m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask)
/// VMASKMOVPS ymm, ymm, m256
/// </summary>
public static unsafe Vector256<float> MaskLoad(float* address, Vector256<uint> mask) { throw new PlatformNotSupportedException(); }
public static unsafe Vector256<float> MaskLoad(float* address, Vector256<float> mask) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m256d _mm256_maskload_pd (double const * mem_addr, __m256i mask)
/// VMASKMOVPD ymm, ymm, m256
/// </summary>
public static unsafe Vector256<double> MaskLoad(double* address, Vector256<ulong> mask) { throw new PlatformNotSupportedException(); }
public static unsafe Vector256<double> MaskLoad(double* address, Vector256<double> mask) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a)
/// VMASKMOVPS m128, xmm, xmm
/// </summary>
public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<uint> source) { throw new PlatformNotSupportedException(); }
public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<float> source) { throw new PlatformNotSupportedException(); }
/// <summary>
/// void _mm_maskstore_pd (double * mem_addr, __m128i mask, __m128d a)
/// VMASKMOVPD m128, xmm, xmm
/// </summary>
public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<ulong> source) { throw new PlatformNotSupportedException(); }
public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<double> source) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a)
/// VMASKMOVPS m256, ymm, ymm
/// </summary>
public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<uint> source) { throw new PlatformNotSupportedException(); }
public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<float> source) { throw new PlatformNotSupportedException(); }
/// <summary>
/// void _mm256_maskstore_pd (double * mem_addr, __m256i mask, __m256d a)
/// VMASKMOVPD m256, ymm, ymm
/// </summary>
public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<ulong> source) { throw new PlatformNotSupportedException(); }
public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<double> source) { throw new PlatformNotSupportedException(); }

/// <summary>
/// __m256 _mm256_max_ps (__m256 a, __m256 b)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

using System;
using System.Runtime.Intrinsics;
using System.Runtime.CompilerServices;
using Internal.Runtime.CompilerServices;

namespace System.Runtime.Intrinsics.X86
{
Expand Down Expand Up @@ -235,64 +235,23 @@ public static class Avx
/// </summary>
public static Vector256<float> DuplicateOddIndexed(Vector256<float> value) => DuplicateOddIndexed(value);

/// <summary>
/// __int8 _mm256_extract_epi8 (__m256i a, const int index)
/// HELPER
/// </summary>
public static sbyte Extract(Vector256<sbyte> value, byte index)
{
unsafe
{
index &= 0x1F;
sbyte* buffer = stackalloc sbyte[32];
Store(buffer, value);
return buffer[index];
}
}

/// <summary>
/// __int8 _mm256_extract_epi8 (__m256i a, const int index)
/// HELPER
/// </summary>
public static byte Extract(Vector256<byte> value, byte index)
{
unsafe
{
index &= 0x1F;
byte* buffer = stackalloc byte[32];
Store(buffer, value);
return buffer[index];
}
return Unsafe.Add<byte>(ref Unsafe.As<Vector256<byte>, byte>(ref value), index & 0x1F);
}

/// <summary>
/// __int16 _mm256_extract_epi16 (__m256i a, const int index)
/// HELPER
/// </summary>
public static short Extract(Vector256<short> value, byte index)
{
unsafe
{
index &= 0xF;
short* buffer = stackalloc short[16];
Store(buffer, value);
return buffer[index];
}
}

/// <summary>
/// __int16 _mm256_extract_epi16 (__m256i a, const int index)
/// HELPER
/// </summary>
public static ushort Extract(Vector256<ushort> value, byte index)
{
unsafe
{
index &= 0xF;
ushort* buffer = stackalloc ushort[16];
Store(buffer, value);
return buffer[index];
}
return Unsafe.Add<ushort>(ref Unsafe.As<Vector256<ushort>, ushort>(ref value), index & 0xF);
}

/// <summary>
Expand All @@ -301,13 +260,7 @@ public static ushort Extract(Vector256<ushort> value, byte index)
/// </summary>
public static int Extract(Vector256<int> value, byte index)
{
unsafe
{
index &= 0x7;
int* buffer = stackalloc int[8];
Store(buffer, value);
return buffer[index];
}
return Unsafe.Add<int>(ref Unsafe.As<Vector256<int>, int>(ref value), index & 0x7);
}

/// <summary>
Expand All @@ -316,13 +269,7 @@ public static int Extract(Vector256<int> value, byte index)
/// </summary>
public static uint Extract(Vector256<uint> value, byte index)
{
unsafe
{
index &= 0x7;
uint* buffer = stackalloc uint[8];
Store(buffer, value);
return buffer[index];
}
return Unsafe.Add<uint>(ref Unsafe.As<Vector256<uint>, uint>(ref value), index & 0x7);
}

/// <summary>
Expand All @@ -335,13 +282,7 @@ public static long Extract(Vector256<long> value, byte index)
{
throw new PlatformNotSupportedException();
}
unsafe
{
index &= 0x3;
long* buffer = stackalloc long[4];
Store(buffer, value);
return buffer[index];
}
return Unsafe.Add<long>(ref Unsafe.As<Vector256<long>, long>(ref value), index & 0x3);
}

/// <summary>
Expand All @@ -354,13 +295,7 @@ public static ulong Extract(Vector256<ulong> value, byte index)
{
throw new PlatformNotSupportedException();
}
unsafe
{
index &= 0x3;
ulong* buffer = stackalloc ulong[4];
Store(buffer, value);
return buffer[index];
}
return Unsafe.Add<ulong>(ref Unsafe.As<Vector256<ulong>, ulong>(ref value), index & 0x3);
}

/// <summary>
Expand Down Expand Up @@ -825,45 +760,45 @@ public static Vector256<T> InsertVector128<T>(Vector256<T> value, Vector128<T> d
/// __m128 _mm_maskload_ps (float const * mem_addr, __m128i mask)
/// VMASKMOVPS xmm, xmm, m128
/// </summary>
public static unsafe Vector128<float> MaskLoad(float* address, Vector128<uint> mask) => MaskLoad(address, mask);
public static unsafe Vector128<float> MaskLoad(float* address, Vector128<float> mask) => MaskLoad(address, mask);
/// <summary>
/// __m128d _mm_maskload_pd (double const * mem_addr, __m128i mask)
/// VMASKMOVPD xmm, xmm, m128
/// </summary>
public static unsafe Vector128<double> MaskLoad(double* address, Vector128<ulong> mask) => MaskLoad(address, mask);
public static unsafe Vector128<double> MaskLoad(double* address, Vector128<double> mask) => MaskLoad(address, mask);

/// <summary>
/// __m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask)
/// VMASKMOVPS ymm, ymm, m256
/// </summary>
public static unsafe Vector256<float> MaskLoad(float* address, Vector256<uint> mask) => MaskLoad(address, mask);
public static unsafe Vector256<float> MaskLoad(float* address, Vector256<float> mask) => MaskLoad(address, mask);
/// <summary>
/// __m256d _mm256_maskload_pd (double const * mem_addr, __m256i mask)
/// VMASKMOVPD ymm, ymm, m256
/// </summary>
public static unsafe Vector256<double> MaskLoad(double* address, Vector256<ulong> mask) => MaskLoad(address, mask);
public static unsafe Vector256<double> MaskLoad(double* address, Vector256<double> mask) => MaskLoad(address, mask);

/// <summary>
/// void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a)
/// VMASKMOVPS m128, xmm, xmm
/// </summary>
public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<uint> source) => MaskStore(address, mask, source);
public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<float> source) => MaskStore(address, mask, source);
/// <summary>
/// void _mm_maskstore_pd (double * mem_addr, __m128i mask, __m128d a)
/// VMASKMOVPD m128, xmm, xmm
/// </summary>
public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<ulong> source) => MaskStore(address, mask, source);
public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<double> source) => MaskStore(address, mask, source);

/// <summary>
/// void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a)
/// VMASKMOVPS m256, ymm, ymm
/// </summary>
public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<uint> source) => MaskStore(address, mask, source);
public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<float> source) => MaskStore(address, mask, source);
/// <summary>
/// void _mm256_maskstore_pd (double * mem_addr, __m256i mask, __m256d a)
/// VMASKMOVPD m256, ymm, ymm
/// </summary>
public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<ulong> source) => MaskStore(address, mask, source);
public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<double> source) => MaskStore(address, mask, source);

/// <summary>
/// __m256 _mm256_max_ps (__m256 a, __m256 b)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -609,11 +609,6 @@ public static class Sse2
/// </summary>
public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// int _mm_extract_epi16 (__m128i a, int immediate)
/// PEXTRW reg, xmm, imm8
/// </summary>
public static short Extract(Vector128<short> value, byte index) { throw new PlatformNotSupportedException(); }
/// <summary>
/// int _mm_extract_epi16 (__m128i a, int immediate)
/// PEXTRW reg, xmm, imm8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -610,11 +610,6 @@ public static class Sse2
/// </summary>
public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) => DivideScalar(left, right);

/// <summary>
/// int _mm_extract_epi16 (__m128i a, int immediate)
/// PEXTRW reg, xmm, imm8
/// </summary>
public static short Extract(Vector128<short> value, byte index) => Extract(value, index);
/// <summary>
/// int _mm_extract_epi16 (__m128i a, int immediate)
/// PEXTRW reg, xmm, imm8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,6 @@ public static class Sse41
/// </summary>
public static Vector128<double> DotProduct(Vector128<double> left, Vector128<double> right, byte control) { throw new PlatformNotSupportedException(); }

/// <summary>
/// int _mm_extract_epi8 (__m128i a, const int imm8)
/// PEXTRB reg/m8, xmm, imm8
/// </summary>
public static sbyte Extract(Vector128<sbyte> value, byte index) { throw new PlatformNotSupportedException(); }
/// <summary>
/// int _mm_extract_epi8 (__m128i a, const int imm8)
/// PEXTRB reg/m8, xmm, imm8
Expand Down Expand Up @@ -283,7 +278,7 @@ public static class Sse41
/// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8)
/// INSERTPS xmm, xmm/m32, imm8
/// </summary>
public static Vector128<float> Insert(Vector128<float> value, float data, byte index) { throw new PlatformNotSupportedException(); }
public static Vector128<float> Insert(Vector128<float> value, Vector128<float> data, byte index) { throw new PlatformNotSupportedException(); }

/// <summary>
/// __m128i _mm_max_epi8 (__m128i a, __m128i b)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,6 @@ public static class Sse41
/// </summary>
public static Vector128<double> DotProduct(Vector128<double> left, Vector128<double> right, byte control) => DotProduct(left, right, control);

/// <summary>
/// int _mm_extract_epi8 (__m128i a, const int imm8)
/// PEXTRB reg/m8, xmm, imm8
/// </summary>
public static sbyte Extract(Vector128<sbyte> value, byte index) => Extract(value, index);
/// <summary>
/// int _mm_extract_epi8 (__m128i a, const int imm8)
/// PEXTRB reg/m8, xmm, imm8
Expand Down Expand Up @@ -283,7 +278,7 @@ public static class Sse41
/// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8)
/// INSERTPS xmm, xmm/m32, imm8
/// </summary>
public static Vector128<float> Insert(Vector128<float> value, float data, byte index) => Insert(value, data, index);
public static Vector128<float> Insert(Vector128<float> value, Vector128<float> data, byte index) => Insert(value, data, index);

/// <summary>
/// __m128i _mm_max_epi8 (__m128i a, __m128i b)
Expand Down

0 comments on commit 1329682

Please sign in to comment.