Improve Intel hardware intrinsic APIs (dotnet/coreclr#17637)

* Improve Intel hardware intrinsic APIs * Simplify Avx.Extract non-const fallback Signed-off-by: dotnet-bot <dotnet-bot@microsoft.com>
dotnet · Jun 18, 2018 · 1329682 · 1329682
1 parent d966f73
commit 1329682
Show file tree

Hide file tree

Showing 6 changed files with 25 additions and 120 deletions.
diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs
@@ -234,11 +234,6 @@ public static class Avx
         /// </summary>
         public static Vector256<float> DuplicateOddIndexed(Vector256<float> value) { throw new PlatformNotSupportedException(); }
 
-        /// <summary>
-        /// __int8 _mm256_extract_epi8 (__m256i a, const int index)
-        ///   HELPER
-        /// </summary>
-        public static sbyte Extract(Vector256<sbyte> value, byte index) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __int8 _mm256_extract_epi8 (__m256i a, const int index)
         ///   HELPER
@@ -248,11 +243,6 @@ public static class Avx
         /// __int16 _mm256_extract_epi16 (__m256i a, const int index)
         ///   HELPER
         /// </summary>
-        public static short Extract(Vector256<short> value, byte index) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __int16 _mm256_extract_epi16 (__m256i a, const int index)
-        ///   HELPER
-        /// </summary>
         public static ushort Extract(Vector256<ushort> value, byte index) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __int32 _mm256_extract_epi32 (__m256i a, const int index)
@@ -638,45 +628,45 @@ public static class Avx
         /// __m128 _mm_maskload_ps (float const * mem_addr, __m128i mask)
         ///   VMASKMOVPS xmm, xmm, m128
         /// </summary>
-        public static unsafe Vector128<float> MaskLoad(float* address, Vector128<uint> mask) { throw new PlatformNotSupportedException(); }
+        public static unsafe Vector128<float> MaskLoad(float* address, Vector128<float> mask) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m128d _mm_maskload_pd (double const * mem_addr, __m128i mask)
         ///   VMASKMOVPD xmm, xmm, m128
         /// </summary>
-        public static unsafe Vector128<double> MaskLoad(double* address, Vector128<ulong> mask) { throw new PlatformNotSupportedException(); }
+        public static unsafe Vector128<double> MaskLoad(double* address, Vector128<double> mask) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// __m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask)
         ///   VMASKMOVPS ymm, ymm, m256
         /// </summary>
-        public static unsafe Vector256<float> MaskLoad(float* address, Vector256<uint> mask) { throw new PlatformNotSupportedException(); }
+        public static unsafe Vector256<float> MaskLoad(float* address, Vector256<float> mask) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m256d _mm256_maskload_pd (double const * mem_addr, __m256i mask)
         ///   VMASKMOVPD ymm, ymm, m256
         /// </summary>
-        public static unsafe Vector256<double> MaskLoad(double* address, Vector256<ulong> mask) { throw new PlatformNotSupportedException(); }
+        public static unsafe Vector256<double> MaskLoad(double* address, Vector256<double> mask) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a)
         ///   VMASKMOVPS m128, xmm, xmm
         /// </summary>
-        public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<uint> source) { throw new PlatformNotSupportedException(); }
+        public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<float> source) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// void _mm_maskstore_pd (double * mem_addr, __m128i mask, __m128d a)
         ///   VMASKMOVPD m128, xmm, xmm
         /// </summary>
-        public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<ulong> source) { throw new PlatformNotSupportedException(); }
+        public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<double> source) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a)
         ///   VMASKMOVPS m256, ymm, ymm
         /// </summary>
-        public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<uint> source) { throw new PlatformNotSupportedException(); }
+        public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<float> source) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// void _mm256_maskstore_pd (double * mem_addr, __m256i mask, __m256d a)
         ///   VMASKMOVPD m256, ymm, ymm
         /// </summary>
-        public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<ulong> source) { throw new PlatformNotSupportedException(); }
+        public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<double> source) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// __m256 _mm256_max_ps (__m256 a, __m256 b)

diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Avx.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Avx.cs
@@ -4,7 +4,7 @@
 
 using System;
 using System.Runtime.Intrinsics;
-using System.Runtime.CompilerServices;
+using Internal.Runtime.CompilerServices;
 
 namespace System.Runtime.Intrinsics.X86
 {
@@ -235,64 +235,23 @@ public static class Avx
         /// </summary>
         public static Vector256<float> DuplicateOddIndexed(Vector256<float> value) => DuplicateOddIndexed(value);
 
-        /// <summary>
-        /// __int8 _mm256_extract_epi8 (__m256i a, const int index)
-        ///   HELPER
-        /// </summary>
-        public static sbyte Extract(Vector256<sbyte> value, byte index)
-        {
-            unsafe
-            {
-                index &= 0x1F;
-                sbyte* buffer = stackalloc sbyte[32];
-                Store(buffer, value);
-                return buffer[index];
-            }
-        }
-
         /// <summary>
         /// __int8 _mm256_extract_epi8 (__m256i a, const int index)
         ///   HELPER
         /// </summary>
         public static byte Extract(Vector256<byte> value, byte index)
         {
-            unsafe
-            {
-                index &= 0x1F;
-                byte* buffer = stackalloc byte[32];
-                Store(buffer, value);
-                return buffer[index];   
-            }
+            return Unsafe.Add<byte>(ref Unsafe.As<Vector256<byte>, byte>(ref value), index & 0x1F);
         }
 
-        /// <summary>
-        /// __int16 _mm256_extract_epi16 (__m256i a, const int index)
-        ///   HELPER
-        /// </summary>
-        public static short Extract(Vector256<short> value, byte index)
-        {
-            unsafe
-            {
-                index &= 0xF;
-                short* buffer = stackalloc short[16];
-                Store(buffer, value);
-                return buffer[index];
-            }   
-        }
 
         /// <summary>
         /// __int16 _mm256_extract_epi16 (__m256i a, const int index)
         ///   HELPER
         /// </summary>
         public static ushort Extract(Vector256<ushort> value, byte index)
         {
-            unsafe
-            {
-                index &= 0xF;
-                ushort* buffer = stackalloc ushort[16];
-                Store(buffer, value);
-                return buffer[index];
-            }
+            return Unsafe.Add<ushort>(ref Unsafe.As<Vector256<ushort>, ushort>(ref value), index & 0xF);
         }
 
         /// <summary>
@@ -301,13 +260,7 @@ public static ushort Extract(Vector256<ushort> value, byte index)
         /// </summary>
         public static int Extract(Vector256<int> value, byte index)
         {
-            unsafe
-            {
-                index &= 0x7;
-                int* buffer = stackalloc int[8];
-                Store(buffer, value);
-                return buffer[index];
-            }
+            return Unsafe.Add<int>(ref Unsafe.As<Vector256<int>, int>(ref value), index & 0x7);
         }
 
         /// <summary>
@@ -316,13 +269,7 @@ public static int Extract(Vector256<int> value, byte index)
         /// </summary>
         public static uint Extract(Vector256<uint> value, byte index)
         {
-            unsafe
-            {
-                index &= 0x7;
-                uint* buffer = stackalloc uint[8];
-                Store(buffer, value);
-                return buffer[index];
-            }
+            return Unsafe.Add<uint>(ref Unsafe.As<Vector256<uint>, uint>(ref value), index & 0x7);
         }
 
         /// <summary>
@@ -335,13 +282,7 @@ public static long Extract(Vector256<long> value, byte index)
             {
                 throw new PlatformNotSupportedException();
             }
-            unsafe
-            {
-                index &= 0x3;
-                long* buffer = stackalloc long[4];
-                Store(buffer, value);
-                return buffer[index];
-            }
+            return Unsafe.Add<long>(ref Unsafe.As<Vector256<long>, long>(ref value), index & 0x3);
         }
 
         /// <summary>
@@ -354,13 +295,7 @@ public static ulong Extract(Vector256<ulong> value, byte index)
             {
                 throw new PlatformNotSupportedException();
             }
-            unsafe
-            {
-                index &= 0x3;
-                ulong* buffer = stackalloc ulong[4];
-                Store(buffer, value);
-                return buffer[index];
-            }
+            return Unsafe.Add<ulong>(ref Unsafe.As<Vector256<ulong>, ulong>(ref value), index & 0x3);
         }
 
         /// <summary>
@@ -825,45 +760,45 @@ public static Vector256<T> InsertVector128<T>(Vector256<T> value, Vector128<T> d
         /// __m128 _mm_maskload_ps (float const * mem_addr, __m128i mask)
         ///   VMASKMOVPS xmm, xmm, m128
         /// </summary>
-        public static unsafe Vector128<float> MaskLoad(float* address, Vector128<uint> mask) => MaskLoad(address, mask);
+        public static unsafe Vector128<float> MaskLoad(float* address, Vector128<float> mask) => MaskLoad(address, mask);
         /// <summary>
         /// __m128d _mm_maskload_pd (double const * mem_addr, __m128i mask)
         ///   VMASKMOVPD xmm, xmm, m128
         /// </summary>
-        public static unsafe Vector128<double> MaskLoad(double* address, Vector128<ulong> mask) => MaskLoad(address, mask);
+        public static unsafe Vector128<double> MaskLoad(double* address, Vector128<double> mask) => MaskLoad(address, mask);
 
         /// <summary>
         /// __m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask)
         ///   VMASKMOVPS ymm, ymm, m256
         /// </summary>
-        public static unsafe Vector256<float> MaskLoad(float* address, Vector256<uint> mask) => MaskLoad(address, mask);
+        public static unsafe Vector256<float> MaskLoad(float* address, Vector256<float> mask) => MaskLoad(address, mask);
         /// <summary>
         /// __m256d _mm256_maskload_pd (double const * mem_addr, __m256i mask)
         ///   VMASKMOVPD ymm, ymm, m256
         /// </summary>
-        public static unsafe Vector256<double> MaskLoad(double* address, Vector256<ulong> mask) => MaskLoad(address, mask);
+        public static unsafe Vector256<double> MaskLoad(double* address, Vector256<double> mask) => MaskLoad(address, mask);
 
         /// <summary>
         /// void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a)
         ///   VMASKMOVPS m128, xmm, xmm
         /// </summary>
-        public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<uint> source) => MaskStore(address, mask, source);
+        public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<float> source) => MaskStore(address, mask, source);
         /// <summary>
         /// void _mm_maskstore_pd (double * mem_addr, __m128i mask, __m128d a)
         ///   VMASKMOVPD m128, xmm, xmm
         /// </summary>
-        public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<ulong> source) => MaskStore(address, mask, source);
+        public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<double> source) => MaskStore(address, mask, source);
 
         /// <summary>
         /// void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a)
         ///   VMASKMOVPS m256, ymm, ymm
         /// </summary>
-        public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<uint> source) => MaskStore(address, mask, source);
+        public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<float> source) => MaskStore(address, mask, source);
         /// <summary>
         /// void _mm256_maskstore_pd (double * mem_addr, __m256i mask, __m256d a)
         ///   VMASKMOVPD m256, ymm, ymm
         /// </summary>
-        public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<ulong> source) => MaskStore(address, mask, source);
+        public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<double> source) => MaskStore(address, mask, source);
 
         /// <summary>
         /// __m256 _mm256_max_ps (__m256 a, __m256 b)

diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs
@@ -609,11 +609,6 @@ public static class Sse2
         /// </summary>
         public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
 
-        /// <summary>
-        /// int _mm_extract_epi16 (__m128i a,  int immediate)
-        ///   PEXTRW reg, xmm, imm8
-        /// </summary>
-        public static short Extract(Vector128<short> value, byte index) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// int _mm_extract_epi16 (__m128i a,  int immediate)
         ///   PEXTRW reg, xmm, imm8

diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse2.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse2.cs
@@ -610,11 +610,6 @@ public static class Sse2
         /// </summary>
         public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) => DivideScalar(left, right);
 
-        /// <summary>
-        /// int _mm_extract_epi16 (__m128i a,  int immediate)
-        ///   PEXTRW reg, xmm, imm8
-        /// </summary>
-        public static short Extract(Vector128<short> value, byte index) => Extract(value, index);
         /// <summary>
         /// int _mm_extract_epi16 (__m128i a,  int immediate)
         ///   PEXTRW reg, xmm, imm8

diff --git a/...System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs b/...System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs
@@ -178,11 +178,6 @@ public static class Sse41
         /// </summary>
         public static Vector128<double> DotProduct(Vector128<double> left, Vector128<double> right, byte control) { throw new PlatformNotSupportedException(); }
 
-        /// <summary>
-        /// int _mm_extract_epi8 (__m128i a, const int imm8)
-        ///   PEXTRB reg/m8, xmm, imm8
-        /// </summary>
-        public static sbyte Extract(Vector128<sbyte> value, byte index) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// int _mm_extract_epi8 (__m128i a, const int imm8)
         ///   PEXTRB reg/m8, xmm, imm8
@@ -283,7 +278,7 @@ public static class Sse41
         /// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8)
         ///   INSERTPS xmm, xmm/m32, imm8
         /// </summary>
-        public static Vector128<float> Insert(Vector128<float> value, float data, byte index) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> Insert(Vector128<float> value, Vector128<float> data, byte index) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// __m128i _mm_max_epi8 (__m128i a, __m128i b)

diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse41.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse41.cs
@@ -178,11 +178,6 @@ public static class Sse41
         /// </summary>
         public static Vector128<double> DotProduct(Vector128<double> left, Vector128<double> right, byte control) => DotProduct(left, right, control);
 
-        /// <summary>
-        /// int _mm_extract_epi8 (__m128i a, const int imm8)
-        ///   PEXTRB reg/m8, xmm, imm8
-        /// </summary>
-        public static sbyte Extract(Vector128<sbyte> value, byte index) => Extract(value, index);
         /// <summary>
         /// int _mm_extract_epi8 (__m128i a, const int imm8)
         ///   PEXTRB reg/m8, xmm, imm8
@@ -283,7 +278,7 @@ public static class Sse41
         /// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8)
         ///   INSERTPS xmm, xmm/m32, imm8
         /// </summary>
-        public static Vector128<float> Insert(Vector128<float> value, float data, byte index) => Insert(value, data, index);
+        public static Vector128<float> Insert(Vector128<float> value, Vector128<float> data, byte index) => Insert(value, data, index);
 
         /// <summary>
         /// __m128i _mm_max_epi8 (__m128i a, __m128i b)