diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/BitArray.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/BitArray.cs index 5158e033a9bef2..6ee0ecac6ebe63 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Collections/BitArray.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Collections/BitArray.cs @@ -738,7 +738,7 @@ public unsafe void CopyTo(Array array, int index) Vector128 lowerShuffleMask_CopyToBoolArray = Vector128.Create(0, 0x01010101_01010101).AsByte(); Vector128 upperShuffleMask_CopyToBoolArray = Vector128.Create(0x02020202_02020202, 0x03030303_03030303).AsByte(); - if (Avx512BW.IsSupported && (uint)_bitLength >= Vector512.Count) + if (Vector512.IsHardwareAccelerated && (uint)_bitLength >= Vector512.Count) { Vector256 upperShuffleMask_CopyToBoolArray256 = Vector256.Create(0x04040404_04040404, 0x05050505_05050505, 0x06060606_06060606, 0x07070707_07070707).AsByte(); @@ -753,17 +753,17 @@ public unsafe void CopyTo(Array array, int index) { ulong bits = (ulong)(uint)in32Span[(int)(i / (uint)BitsPerInt32)] + ((ulong)in32Span[(int)(i / (uint)BitsPerInt32) + 1] << BitsPerInt32); Vector512 scalar = Vector512.Create(bits); - Vector512 shuffled = Avx512BW.Shuffle(scalar.AsByte(), shuffleMask); - Vector512 extracted = Avx512F.And(shuffled, bitMask); + Vector512 shuffled = Vector512.Shuffle(scalar.AsByte(), shuffleMask); + Vector512 extracted = shuffled & bitMask; // The extracted bits can be anywhere between 0 and 255, so we normalise the value to either 0 or 1 // to ensure compatibility with "C# bool" (0 for false, 1 for true, rest undefined) - Vector512 normalized = Avx512BW.Min(extracted, ones); - Avx512F.Store((byte*)destination + i, normalized); + Vector512 normalized = Vector512.Min(extracted, ones); + normalized.Store((byte*)destination + i); } } } - else if (Avx2.IsSupported && (uint)_bitLength >= Vector256.Count) + else if (Vector256.IsHardwareAccelerated && (uint)_bitLength >= Vector256.Count) { Vector256 shuffleMask = Vector256.Create(lowerShuffleMask_CopyToBoolArray, upperShuffleMask_CopyToBoolArray); Vector256 bitMask = Vector256.Create(0x80402010_08040201).AsByte(); @@ -775,17 +775,17 @@ public unsafe void CopyTo(Array array, int index) { int bits = in32Span[(int)(i / (uint)BitsPerInt32)]; Vector256 scalar = Vector256.Create(bits); - Vector256 shuffled = Avx2.Shuffle(scalar.AsByte(), shuffleMask); - Vector256 extracted = Avx2.And(shuffled, bitMask); + Vector256 shuffled = Vector256.Shuffle(scalar.AsByte(), shuffleMask); + Vector256 extracted = shuffled & bitMask; // The extracted bits can be anywhere between 0 and 255, so we normalise the value to either 0 or 1 // to ensure compatibility with "C# bool" (0 for false, 1 for true, rest undefined) - Vector256 normalized = Avx2.Min(extracted, ones); - Avx.Store((byte*)destination + i, normalized); + Vector256 normalized = Vector256.Min(extracted, ones); + normalized.Store((byte*)destination + i); } } } - else if (Ssse3.IsSupported && ((uint)_bitLength >= Vector128.Count * 2u)) + else if (Vector128.IsHardwareAccelerated && ((uint)_bitLength >= Vector128.Count * 2u)) { Vector128 lowerShuffleMask = lowerShuffleMask_CopyToBoolArray; Vector128 upperShuffleMask = upperShuffleMask_CopyToBoolArray; @@ -799,82 +799,19 @@ public unsafe void CopyTo(Array array, int index) int bits = in32Span[(int)(i / (uint)BitsPerInt32)]; Vector128 scalar = Vector128.CreateScalarUnsafe(bits); - Vector128 shuffledLower = Ssse3.Shuffle(scalar.AsByte(), lowerShuffleMask); - Vector128 extractedLower = Sse2.And(shuffledLower, bitMask128); - Vector128 normalizedLower = Sse2.Min(extractedLower, ones); - Sse2.Store((byte*)destination + i, normalizedLower); + Vector128 shuffledLower = Vector128.Shuffle(scalar.AsByte(), lowerShuffleMask); + Vector128 extractedLower = shuffledLower & bitMask128; + Vector128 normalizedLower = Vector128.Min(extractedLower, ones); + normalizedLower.Store((byte*)destination + i); - Vector128 shuffledHigher = Ssse3.Shuffle(scalar.AsByte(), upperShuffleMask); - Vector128 extractedHigher = Sse2.And(shuffledHigher, bitMask128); - Vector128 normalizedHigher = Sse2.Min(extractedHigher, ones); - Sse2.Store((byte*)destination + i + Vector128.Count, normalizedHigher); + Vector128 shuffledHigher = Vector128.Shuffle(scalar.AsByte(), upperShuffleMask); + Vector128 extractedHigher = shuffledHigher & bitMask128; + Vector128 normalizedHigher = Vector128.Min(extractedHigher, ones); + normalizedHigher.Store((byte*)destination + i + Vector128.Count); } } } - else if (PackedSimd.IsSupported && ((uint)_bitLength >= Vector128.Count * 2u)) - { - Vector128 lowerShuffleMask = lowerShuffleMask_CopyToBoolArray; - Vector128 upperShuffleMask = upperShuffleMask_CopyToBoolArray; - Vector128 ones = Vector128.One; - Vector128 bitMask128 = Vector128.Create(0x80402010_08040201).AsByte(); - fixed (bool* destination = &boolArray[index]) - { - for (; (i + Vector128.Count * 2u) <= (uint)_bitLength; i += (uint)Vector128.Count * 2u) - { - int bits = in32Span[(int)(i / (uint)BitsPerInt32)]; - Vector128 scalar = Vector128.CreateScalarUnsafe(bits); - - Vector128 shuffledLower = PackedSimd.Swizzle(scalar.AsByte(), lowerShuffleMask); - Vector128 extractedLower = PackedSimd.And(shuffledLower, bitMask128); - Vector128 normalizedLower = PackedSimd.Min(extractedLower, ones); - PackedSimd.Store((byte*)destination + i, normalizedLower); - - Vector128 shuffledHigher = PackedSimd.Swizzle(scalar.AsByte(), upperShuffleMask); - Vector128 extractedHigher = PackedSimd.And(shuffledHigher, bitMask128); - Vector128 normalizedHigher = PackedSimd.Min(extractedHigher, ones); - PackedSimd.Store((byte*)destination + i + Vector128.Count, normalizedHigher); - } - } - } - else if (AdvSimd.Arm64.IsSupported) - { - Vector128 ones = Vector128.One; - Vector128 bitMask128 = Vector128.Create(0x80402010_08040201).AsByte(); - - fixed (bool* destination = &boolArray[index]) - { - for (; (i + Vector128.Count * 2u) <= (uint)_bitLength; i += (uint)Vector128.Count * 2u) - { - int bits = in32Span[(int)(i / (uint)BitsPerInt32)]; - - // Same logic as SSSE3 path, except we do not have Shuffle instruction. - // (TableVectorLookup could be an alternative - dotnet/runtime#1277) - // Instead we use chained ZIP1/2 instructions: - // (A0 is the byte containing LSB, A3 is the byte containing MSB) - // bits - A0 A1 A2 A3 - // v1 = Vector128.Create - A0 A1 A2 A3 A0 A1 A2 A3 A0 A1 A2 A3 A0 A1 A2 A3 - // v2 = ZipLow(v1, v1) - A0 A0 A1 A1 A2 A2 A3 A3 A0 A0 A1 A1 A2 A2 A3 A3 - // v3 = ZipLow(v2, v2) - A0 A0 A0 A0 A1 A1 A1 A1 A2 A2 A2 A2 A3 A3 A3 A3 - // shuffledLower = ZipLow(v3, v3) - A0 A0 A0 A0 A0 A0 A0 A0 A1 A1 A1 A1 A1 A1 A1 A1 - // shuffledHigher = ZipHigh(v3, v3) - A2 A2 A2 A2 A2 A2 A2 A2 A3 A3 A3 A3 A3 A3 A3 A3 - - Vector128 vector = Vector128.Create(bits).AsByte(); - vector = AdvSimd.Arm64.ZipLow(vector, vector); - vector = AdvSimd.Arm64.ZipLow(vector, vector); - - Vector128 shuffledLower = AdvSimd.Arm64.ZipLow(vector, vector); - Vector128 extractedLower = AdvSimd.And(shuffledLower, bitMask128); - Vector128 normalizedLower = AdvSimd.Min(extractedLower, ones); - - Vector128 shuffledHigher = AdvSimd.Arm64.ZipHigh(vector, vector); - Vector128 extractedHigher = AdvSimd.And(shuffledHigher, bitMask128); - Vector128 normalizedHigher = AdvSimd.Min(extractedHigher, ones); - - AdvSimd.Arm64.StorePair((byte*)destination + i, normalizedLower, normalizedHigher); - } - } - } Remainder: for (; i < (uint)_bitLength; i++)