Skip to content

Commit 363ae94

Browse files
committed
Ensure ShuffleNative's behaviour with reflection/function pointers/etc. is the same as a normal call
- Not a particularly pretty solution to the problem, but it should work correctly at least
1 parent d87cec8 commit 363ae94

File tree

12 files changed

+321
-47
lines changed

12 files changed

+321
-47
lines changed

src/coreclr/jit/gentree.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -30962,14 +30962,18 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec
3096230962

3096330963
case NI_Vector128_Shuffle:
3096430964
case NI_Vector128_ShuffleNative:
30965+
case NI_Vector128_ShuffleNativeFallback:
3096530966
#if defined(TARGET_XARCH)
3096630967
case NI_Vector256_Shuffle:
3096730968
case NI_Vector256_ShuffleNative:
30969+
case NI_Vector256_ShuffleNativeFallback:
3096830970
case NI_Vector512_Shuffle:
3096930971
case NI_Vector512_ShuffleNative:
30972+
case NI_Vector512_ShuffleNativeFallback:
3097030973
#elif defined(TARGET_ARM64)
3097130974
case NI_Vector64_Shuffle:
3097230975
case NI_Vector64_ShuffleNative:
30976+
case NI_Vector64_ShuffleNativeFallback:
3097330977
#endif
3097430978
{
3097530979
// The shuffle indices ideally are constant so we can get the best

src/coreclr/jit/hwintrinsicarm64.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -2253,12 +2253,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
22532253
case NI_Vector128_Shuffle:
22542254
case NI_Vector64_ShuffleNative:
22552255
case NI_Vector128_ShuffleNative:
2256+
case NI_Vector64_ShuffleNativeFallback:
2257+
case NI_Vector128_ShuffleNativeFallback:
22562258
{
22572259
assert((sig->numArgs == 2) || (sig->numArgs == 3));
22582260
assert((simdSize == 8) || (simdSize == 16));
22592261

22602262
// The Native variants are non-deterministic on arm64 (for element size > 1)
2261-
bool isShuffleNative = (intrinsic == NI_Vector64_ShuffleNative) || (intrinsic == NI_Vector128_ShuffleNative);
2263+
bool isShuffleNative = (intrinsic != NI_Vector64_Shuffle) && (intrinsic != NI_Vector128_Shuffle);
22622264
if (isShuffleNative && (genTypeSize(simdBaseType) > 1) && BlockNonDeterministicIntrinsics(mustExpand))
22632265
{
22642266
break;

src/coreclr/jit/hwintrinsiclistarm64.h

+2
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ HARDWARE_INTRINSIC(Vector64, Round,
9292
HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
9393
HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
9494
HARDWARE_INTRINSIC(Vector64, ShuffleNative, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
95+
HARDWARE_INTRINSIC(Vector64, ShuffleNativeFallback, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
9596
HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
9697
HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
9798
HARDWARE_INTRINSIC(Vector64, StoreAlignedNonTemporal, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
@@ -215,6 +216,7 @@ HARDWARE_INTRINSIC(Vector128, Round,
215216
HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
216217
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
217218
HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
219+
HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
218220
HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
219221
HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
220222
HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)

src/coreclr/jit/hwintrinsiclistxarch.h

+3
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ HARDWARE_INTRINSIC(Vector128, Round,
110110
HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
111111
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
112112
HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
113+
HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
113114
HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
114115
HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
115116
HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
@@ -230,6 +231,7 @@ HARDWARE_INTRINSIC(Vector256, Round,
230231
HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
231232
HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
232233
HARDWARE_INTRINSIC(Vector256, ShuffleNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
234+
HARDWARE_INTRINSIC(Vector256, ShuffleNativeFallback, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
233235
HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
234236
HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
235237
HARDWARE_INTRINSIC(Vector256, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
@@ -351,6 +353,7 @@ HARDWARE_INTRINSIC(Vector512, Round,
351353
HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
352354
HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
353355
HARDWARE_INTRINSIC(Vector512, ShuffleNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
356+
HARDWARE_INTRINSIC(Vector512, ShuffleNativeFallback, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp)
354357
HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
355358
HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
356359
HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)

src/coreclr/jit/hwintrinsicxarch.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -3644,12 +3644,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
36443644
case NI_Vector128_ShuffleNative:
36453645
case NI_Vector256_ShuffleNative:
36463646
case NI_Vector512_ShuffleNative:
3647+
case NI_Vector128_ShuffleNativeFallback:
3648+
case NI_Vector256_ShuffleNativeFallback:
3649+
case NI_Vector512_ShuffleNativeFallback:
36473650
{
36483651
assert((sig->numArgs == 2) || (sig->numArgs == 3));
36493652

36503653
// The Native variants are non-deterministic on xarch
3651-
bool isShuffleNative = (intrinsic == NI_Vector128_ShuffleNative) || (intrinsic == NI_Vector256_ShuffleNative) ||
3652-
(intrinsic == NI_Vector512_ShuffleNative);
3654+
bool isShuffleNative = (intrinsic != NI_Vector128_Shuffle) && (intrinsic != NI_Vector256_Shuffle) &&
3655+
(intrinsic == NI_Vector512_Shuffle);
36533656
if (isShuffleNative && BlockNonDeterministicIntrinsics(mustExpand))
36543657
{
36553658
break;

src/coreclr/jit/rationalize.cpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -342,14 +342,18 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStack<GenTre
342342
{
343343
case NI_Vector128_Shuffle:
344344
case NI_Vector128_ShuffleNative:
345+
case NI_Vector128_ShuffleNativeFallback:
345346
#if defined(TARGET_XARCH)
346347
case NI_Vector256_Shuffle:
347348
case NI_Vector256_ShuffleNative:
349+
case NI_Vector256_ShuffleNativeFallback:
348350
case NI_Vector512_Shuffle:
349351
case NI_Vector512_ShuffleNative:
352+
case NI_Vector512_ShuffleNativeFallback:
350353
#elif defined(TARGET_ARM64)
351354
case NI_Vector64_Shuffle:
352355
case NI_Vector64_ShuffleNative:
356+
case NI_Vector64_ShuffleNativeFallback:
353357
#endif
354358
{
355359
assert(operandCount == 2);
@@ -363,12 +367,12 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStack<GenTre
363367
GenTree* op1 = operands[0];
364368
GenTree* op2 = operands[1];
365369

366-
bool isShuffleNative = intrinsicId == NI_Vector128_ShuffleNative;
370+
bool isShuffleNative = intrinsicId != NI_Vector128_Shuffle;
367371
#if defined(TARGET_XARCH)
368-
isShuffleNative = isShuffleNative || (intrinsicId == NI_Vector256_ShuffleNative) ||
369-
(intrinsicId == NI_Vector512_ShuffleNative);
372+
isShuffleNative = isShuffleNative && (intrinsicId != NI_Vector256_Shuffle) &&
373+
(intrinsicId != NI_Vector512_Shuffle);
370374
#elif defined(TARGET_ARM64)
371-
isShuffleNative = isShuffleNative || (intrinsicId == NI_Vector64_ShuffleNative);
375+
isShuffleNative = isShuffleNative && (intrinsicId != NI_Vector64_Shuffle);
372376
#endif
373377

374378
// Check if the required intrinsics to emit are available.

0 commit comments

Comments
 (0)