diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 7bee3bfcdd86..20f71b701ac3 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -3107,7 +3107,7 @@ class Compiler bool isScalarISA(InstructionSet isa); static int ivalOfHWIntrinsic(NamedIntrinsic intrinsic); unsigned simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_INFO* sig); - static int numArgsOfHWIntrinsic(NamedIntrinsic intrinsic); + static int numArgsOfHWIntrinsic(NamedIntrinsic intrinsic, GenTreeHWIntrinsic* node = nullptr); static GenTree* lastOpOfHWIntrinsic(GenTreeHWIntrinsic* node, int numArgs); static instruction insOfHWIntrinsic(NamedIntrinsic intrinsic, var_types type); static HWIntrinsicCategory categoryOfHWIntrinsic(NamedIntrinsic intrinsic); diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 4ffe342c99af..0bd85f256b21 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -5535,15 +5535,28 @@ static bool isSseShift(instruction ins) } } -static bool isSSEExtract(instruction ins) +//------------------------------------------------------------------------ +// IsDstSrcImmAvxInstruction: check if instruction has RM R I format +// for all encodings: EVEX, VEX and legacy SSE +// +// Arguments: +// instruction -- processor instruction to check +// +// Return Value: +// true if instruction has RRI format +// +static bool IsDstSrcImmAvxInstruction(instruction ins) { switch (ins) { + case INS_extractps: case INS_pextrb: case INS_pextrw: case INS_pextrd: case INS_pextrq: - case INS_extractps: + case INS_pshufd: + case INS_pshufhw: + case INS_pshuflw: return true; default: return false; @@ -5554,7 +5567,7 @@ void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber reg, { // TODO-XARCH refactoring emitIns_R_R_I to handle SSE2/AVX2 shift as well as emitIns_R_I bool isShift = isSseShift(ins); - if (isSSEExtract(ins) || (UseVEXEncoding() && !isShift)) + if (IsDstSrcImmAvxInstruction(ins) || (UseVEXEncoding() && !isShift)) { emitIns_R_R_I(ins, attr, reg, reg1, ival); } diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 2f0e5f8c4170..c4d554662f0e 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -56,7 +56,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID); HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID); int ival = Compiler::ivalOfHWIntrinsic(intrinsicID); - int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID); + int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID, node); assert((flags & HW_Flag_NoCodeGen) == 0); diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 5365c6e3d015..c426071c7cbc 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -194,7 +194,7 @@ HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128, "LoadAligne HARDWARE_INTRINSIC(SSE2_LoadFence, "LoadFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_LoadScalarVector128, "LoadScalarVector128", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_LoadVector128, "LoadVector128", SSE2, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_MaskMove, "MaskMove", SSE2, -1, 16, 3, {INS_maskmovdqu,INS_maskmovdqu,INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_MaskMove, "MaskMove", SSE2, -1, 16, 3, {INS_maskmovdqu,INS_maskmovdqu,INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_Max, "Max", SSE2, -1, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(SSE2_MemoryFence, "MemoryFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_MaxScalar, "MaxScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) @@ -217,14 +217,17 @@ HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical128BitLane, "ShiftLeftL HARDWARE_INTRINSIC(SSE2_ShiftRightArithmetic, "ShiftRightArithmetic", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSE2_ShiftRightLogical, "ShiftRightLogical", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSE2_ShiftRightLogical128BitLane, "ShiftRightLogical128BitLane", SSE2, -1, 16, 2, {INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE2_Shuffle, "Shuffle", SSE2, -1, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE2_ShuffleHigh, "ShuffleHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE2_ShuffleLow, "ShuffleLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSE2_Sqrt, "Sqrt", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_SqrtScalar, "SqrtScalar", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_Store, "Store", SSE2, -1, 16, 2, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_StoreAligned, "StoreAligned", SSE2, -1, 16, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE2, -1, 16, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_StoreHigh, "StoreHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_Subtract, "Subtract", SSE2, -1, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_SubtractSaturate, "SubtractSaturate", SSE2, -1, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_SubtractScalar, "SubtractScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) @@ -320,7 +323,7 @@ HARDWARE_INTRINSIC(SSE42_CompareGreaterThan, "CompareGre // AVX Intrinsics HARDWARE_INTRINSIC(AVX_IsSupported, "get_IsSupported", AVX, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_Add, "Add", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX_AddSubtract, "AddSubtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX_AddSubtract, "AddSubtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_And, "And", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX_AndNot, "AndNot", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_Blend, "Blend", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 86a9489be0a7..ef64c4e35ca0 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -219,19 +219,64 @@ unsigned Compiler::simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_I } //------------------------------------------------------------------------ -// numArgsOfHWIntrinsic: get the number of arguments +// numArgsOfHWIntrinsic: get the number of arguments based on table and +// if numArgs is -1 check number of arguments using GenTreeHWIntrinsic +// node unless it is nullptr // // Arguments: -// intrinsic -- id of the intrinsic function. +// intrinsic -- id of the intrinsic function +// node -- GenTreeHWIntrinsic* node with nullptr default value // // Return Value: // number of arguments // -int Compiler::numArgsOfHWIntrinsic(NamedIntrinsic intrinsic) +int Compiler::numArgsOfHWIntrinsic(NamedIntrinsic intrinsic, GenTreeHWIntrinsic* node) { assert(intrinsic != NI_Illegal); assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END); - return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs; + + int numArgs = hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs; + if (numArgs >= 0) + { + return numArgs; + } + + noway_assert(node != nullptr); + assert(numArgs == -1); + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + if (op2 != nullptr) + { + return 2; + } + + if (op1 != nullptr) + { + if (op1->OperIsList()) + { + numArgs = 0; + GenTreeArgList* list = op1->AsArgList(); + + while (list != nullptr) + { + numArgs++; + list = list->Rest(); + } + + assert(numArgs > 0); + return numArgs; + } + else + { + return 1; + } + } + else + { + return 0; + } } //------------------------------------------------------------------------ diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index d448afc97746..71da64716380 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -369,8 +369,10 @@ INST3( pcmpeqb, "pcmpeqb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, INST3( pcmpgtb, "pcmpgtb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x64)) // Packed compare 8-bit signed integers for greater than INST3( pshufd, "pshufd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x70)) // Packed shuffle of 32-bit integers +INST3( pshufhw, "pshufhw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x70)) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. +INST3( pshuflw, "pshuflw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x70)) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. INST3( pextrw, "pextrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC5)) // Extract 16-bit value into a r32 with zero extended to 32-bits -INST3( pinsrw, "pinsrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC4)) // packed insert word +INST3( pinsrw, "pinsrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC4)) // Insert word at index INST3( punpckhbw, "punpckhbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x68)) // Packed logical (unsigned) widen ubyte to ushort (hi) INST3( punpcklbw, "punpcklbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x60)) // Packed logical (unsigned) widen ubyte to ushort (lo) @@ -448,7 +450,7 @@ INST3( phsubsw, "phsubsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS INST3( lddqu, "lddqu" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xF0)) // Load Unaligned integer INST3( movntdqa, "movntdqa" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2A)) // Load Double Quadword Non-Temporal Aligned Hint INST3( movddup, "movddup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x12)) // Replicate Double FP Values -INST3( movsldup, "movsldup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x12)) // Replicate even-indexed Single FP Values +INST3( movsldup, "movsldup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x12)) // Replicate even-indexed Single FP Values INST3( movshdup, "movshdup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x16)) // Replicate odd-indexed Single FP Values INST3( phminposuw, "phminposuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x41)) // Packed Horizontal Word Minimum INST3( mpsadbw, "mpsadbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x42)) // Compute Multiple Packed Sums of Absolute Difference diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 3f1deb825fb6..45799e326709 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -2367,7 +2367,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID); HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID); - int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID); + int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID, node); GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index fdb875a827af..296d558fba36 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -2259,7 +2259,7 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID); HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID); HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID); - int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID); + int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID, intrinsicTree); if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2) { @@ -2321,11 +2321,9 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) assert((flags & HW_Flag_NoCodeGen) == 0); - assert(numArgs != 0); - assert(numArgs != 1); - if (info->srcCount >= 2) { + assert(numArgs >= 2); LocationInfoListNode* op2Info = useList.Begin()->Next(); op2Info->info.isDelayFree = true; info->hasDelayFreeSrc = true; diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.cs new file mode 100644 index 000000000000..59ac72312e3c --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.cs @@ -0,0 +1,297 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace IntelHardwareIntrinsicTest +{ + internal static partial class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + int testCount = 16; + string methodUnderTestName = nameof(Sse2.Shuffle); + + + if (Sse2.IsSupported) + { + + string[] permuteData = new string[] + { + "0b11100100", // identity + "0b00011011", // invert + "0b00000000", // broadcast element 0 + "0b11111111", // broadcast element 3 + "0b01010101", // broadcast element 1 + "0b10101010", // broadcast element 2 + "0b11011000", // swap middle elements + "0b00100111", // swap external elements + "0b10110001", // swap internal with external elements + "0b11110000", // divide everything between external elements + "0b10100101", // divide everything between internal elements + "0b00010100", // pattern (0, 1, 1, 0) + "0b10000010", // pattern (2, 0, 0, 2) + "0b11001100", // pattern (3, 0, 3, 0) + "0b01100110", // pattern (1, 2, 1, 2) + "0b10011001" // pattern (2, 1, 2, 1) + }; + + string[] permuteDouble = new string[] + { + "0b00", + "0b01", + "0b10", + "0b11", + }; + + using (var doubleTable = TestTableTuvImmSse2.Create(permuteDouble.Length)) + using (var intTable = TestTableTuvImmSse2.Create(permuteData.Length)) + using (var uintTable = TestTableTuvImmSse2.Create(permuteData.Length)) + { + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + doubleTable.inArray1, 16, (int i, int elNo) => + { + return (uint) i % 2; + }); + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + doubleTable.inArray2, 16, (int i, int elNo) => + { + return (uint) i % 2 + 10; + }); + + (Vector128, Vector128) valueDouble_0 = doubleTable[0]; + Vector128 resultDouble_0 = Sse2.Shuffle(valueDouble_0.Item1, valueDouble_0.Item2, (byte) 0b00); + doubleTable.SetOutArray(resultDouble_0, 0, (byte) 0b00); + + (Vector128, Vector128) valueDouble_1 = doubleTable[1]; + Vector128 resultDouble_1 = Sse2.Shuffle(valueDouble_1.Item1, valueDouble_1.Item2, (byte) 0b01); + doubleTable.SetOutArray(resultDouble_1, 1, (byte) 0b01); + + (Vector128, Vector128) valueDouble_2 = doubleTable[2]; + Vector128 resultDouble_2 = Sse2.Shuffle(valueDouble_2.Item1, valueDouble_2.Item2, (byte) 0b10); + doubleTable.SetOutArray(resultDouble_2, 2, (byte) 0b10); + + (Vector128, Vector128) valueDouble_3 = doubleTable[3]; + Vector128 resultDouble_3 = Sse2.Shuffle(valueDouble_3.Item1, valueDouble_3.Item2, (byte) 0b11); + doubleTable.SetOutArray(resultDouble_3, 3, (byte) 0b11); + + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + uintTable.inArray1, 16, (int i, int elNo) => + { + return (uint) i % 4; + }); + + (Vector128, Vector128) valueInt32_0 = intTable[0]; + Vector128 resultInt32_0 = Sse2.Shuffle(valueInt32_0.Item1, (byte) 0b11100100); + intTable.SetOutArray(resultInt32_0, 0, (byte) 0b11100100); + + (Vector128, Vector128) valueInt32_1 = intTable[1]; + Vector128 resultInt32_1 = Sse2.Shuffle(valueInt32_1.Item1, (byte) 0b00011011); + intTable.SetOutArray(resultInt32_1, 1, (byte) 0b00011011); + + (Vector128, Vector128) valueInt32_2 = intTable[2]; + Vector128 resultInt32_2 = Sse2.Shuffle(valueInt32_2.Item1, (byte) 0b00000000); + intTable.SetOutArray(resultInt32_2, 2, (byte) 0b00000000); + + (Vector128, Vector128) valueInt32_3 = intTable[3]; + Vector128 resultInt32_3 = Sse2.Shuffle(valueInt32_3.Item1, (byte) 0b11111111); + intTable.SetOutArray(resultInt32_3, 3, (byte) 0b11111111); + + (Vector128, Vector128) valueInt32_4 = intTable[4]; + Vector128 resultInt32_4 = Sse2.Shuffle(valueInt32_4.Item1, (byte) 0b01010101); + intTable.SetOutArray(resultInt32_4, 4, (byte) 0b01010101); + + (Vector128, Vector128) valueInt32_5 = intTable[5]; + Vector128 resultInt32_5 = Sse2.Shuffle(valueInt32_5.Item1, (byte) 0b10101010); + intTable.SetOutArray(resultInt32_5, 5, (byte) 0b10101010); + + (Vector128, Vector128) valueInt32_6 = intTable[6]; + Vector128 resultInt32_6 = Sse2.Shuffle(valueInt32_6.Item1, (byte) 0b11011000); + intTable.SetOutArray(resultInt32_6, 6, (byte) 0b11011000); + + (Vector128, Vector128) valueInt32_7 = intTable[7]; + Vector128 resultInt32_7 = Sse2.Shuffle(valueInt32_7.Item1, (byte) 0b00100111); + intTable.SetOutArray(resultInt32_7, 7, (byte) 0b00100111); + + (Vector128, Vector128) valueInt32_8 = intTable[8]; + Vector128 resultInt32_8 = Sse2.Shuffle(valueInt32_8.Item1, (byte) 0b10110001); + intTable.SetOutArray(resultInt32_8, 8, (byte) 0b10110001); + + (Vector128, Vector128) valueInt32_9 = intTable[9]; + Vector128 resultInt32_9 = Sse2.Shuffle(valueInt32_9.Item1, (byte) 0b11110000); + intTable.SetOutArray(resultInt32_9, 9, (byte) 0b11110000); + + (Vector128, Vector128) valueInt32_10 = intTable[10]; + Vector128 resultInt32_10 = Sse2.Shuffle(valueInt32_10.Item1, (byte) 0b10100101); + intTable.SetOutArray(resultInt32_10, 10, (byte) 0b10100101); + + (Vector128, Vector128) valueInt32_11 = intTable[11]; + Vector128 resultInt32_11 = Sse2.Shuffle(valueInt32_11.Item1, (byte) 0b00010100); + intTable.SetOutArray(resultInt32_11, 11, (byte) 0b00010100); + + (Vector128, Vector128) valueInt32_12 = intTable[12]; + Vector128 resultInt32_12 = Sse2.Shuffle(valueInt32_12.Item1, (byte) 0b10000010); + intTable.SetOutArray(resultInt32_12, 12, (byte) 0b10000010); + + (Vector128, Vector128) valueInt32_13 = intTable[13]; + Vector128 resultInt32_13 = Sse2.Shuffle(valueInt32_13.Item1, (byte) 0b11001100); + intTable.SetOutArray(resultInt32_13, 13, (byte) 0b11001100); + + (Vector128, Vector128) valueInt32_14 = intTable[14]; + Vector128 resultInt32_14 = Sse2.Shuffle(valueInt32_14.Item1, (byte) 0b01100110); + intTable.SetOutArray(resultInt32_14, 14, (byte) 0b01100110); + + (Vector128, Vector128) valueInt32_15 = intTable[15]; + Vector128 resultInt32_15 = Sse2.Shuffle(valueInt32_15.Item1, (byte) 0b10011001); + intTable.SetOutArray(resultInt32_15, 15, (byte) 0b10011001); + + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + uintTable.inArray1, 16, (int i, int elNo) => + { + return (uint) i % 4; + }); + + (Vector128, Vector128) valueUInt32_0 = uintTable[0]; + Vector128 resultUInt32_0 = Sse2.Shuffle(valueUInt32_0.Item1, (byte) 0b11100100); + uintTable.SetOutArray(resultUInt32_0, 0, (byte) 0b11100100); + + (Vector128, Vector128) valueUInt32_1 = uintTable[1]; + Vector128 resultUInt32_1 = Sse2.Shuffle(valueUInt32_1.Item1, (byte) 0b00011011); + uintTable.SetOutArray(resultUInt32_1, 1, (byte) 0b00011011); + + (Vector128, Vector128) valueUInt32_2 = uintTable[2]; + Vector128 resultUInt32_2 = Sse2.Shuffle(valueUInt32_2.Item1, (byte) 0b00000000); + uintTable.SetOutArray(resultUInt32_2, 2, (byte) 0b00000000); + + (Vector128, Vector128) valueUInt32_3 = uintTable[3]; + Vector128 resultUInt32_3 = Sse2.Shuffle(valueUInt32_3.Item1, (byte) 0b11111111); + uintTable.SetOutArray(resultUInt32_3, 3, (byte) 0b11111111); + + (Vector128, Vector128) valueUInt32_4 = uintTable[4]; + Vector128 resultUInt32_4 = Sse2.Shuffle(valueUInt32_4.Item1, (byte) 0b01010101); + uintTable.SetOutArray(resultUInt32_4, 4, (byte) 0b01010101); + + (Vector128, Vector128) valueUInt32_5 = uintTable[5]; + Vector128 resultUInt32_5 = Sse2.Shuffle(valueUInt32_5.Item1, (byte) 0b10101010); + uintTable.SetOutArray(resultUInt32_5, 5, (byte) 0b10101010); + + (Vector128, Vector128) valueUInt32_6 = uintTable[6]; + Vector128 resultUInt32_6 = Sse2.Shuffle(valueUInt32_6.Item1, (byte) 0b11011000); + uintTable.SetOutArray(resultUInt32_6, 6, (byte) 0b11011000); + + (Vector128, Vector128) valueUInt32_7 = uintTable[7]; + Vector128 resultUInt32_7 = Sse2.Shuffle(valueUInt32_7.Item1, (byte) 0b00100111); + uintTable.SetOutArray(resultUInt32_7, 7, (byte) 0b00100111); + + (Vector128, Vector128) valueUInt32_8 = uintTable[8]; + Vector128 resultUInt32_8 = Sse2.Shuffle(valueUInt32_8.Item1, (byte) 0b10110001); + uintTable.SetOutArray(resultUInt32_8, 8, (byte) 0b10110001); + + (Vector128, Vector128) valueUInt32_9 = uintTable[9]; + Vector128 resultUInt32_9 = Sse2.Shuffle(valueUInt32_9.Item1, (byte) 0b11110000); + uintTable.SetOutArray(resultUInt32_9, 9, (byte) 0b11110000); + + (Vector128, Vector128) valueUInt32_10 = uintTable[10]; + Vector128 resultUInt32_10 = Sse2.Shuffle(valueUInt32_10.Item1, (byte) 0b10100101); + uintTable.SetOutArray(resultUInt32_10, 10, (byte) 0b10100101); + + (Vector128, Vector128) valueUInt32_11 = uintTable[11]; + Vector128 resultUInt32_11 = Sse2.Shuffle(valueUInt32_11.Item1, (byte) 0b00010100); + uintTable.SetOutArray(resultUInt32_11, 11, (byte) 0b00010100); + + (Vector128, Vector128) valueUInt32_12 = uintTable[12]; + Vector128 resultUInt32_12 = Sse2.Shuffle(valueUInt32_12.Item1, (byte) 0b10000010); + uintTable.SetOutArray(resultUInt32_12, 12, (byte) 0b10000010); + + (Vector128, Vector128) valueUInt32_13 = uintTable[13]; + Vector128 resultUInt32_13 = Sse2.Shuffle(valueUInt32_13.Item1, (byte) 0b11001100); + uintTable.SetOutArray(resultUInt32_13, 13, (byte) 0b11001100); + + (Vector128, Vector128) valueUInt32_14 = uintTable[14]; + Vector128 resultUInt32_14 = Sse2.Shuffle(valueUInt32_14.Item1, (byte) 0b01100110); + uintTable.SetOutArray(resultUInt32_14, 14, (byte) 0b01100110); + + (Vector128, Vector128) valueUInt32_15 = uintTable[15]; + Vector128 resultUInt32_15 = Sse2.Shuffle(valueUInt32_15.Item1, (byte) 0b10011001); + uintTable.SetOutArray(resultUInt32_15, 15, (byte) 0b10011001); + + + CheckMethodFiveDouble checkDouble = + (Span x, Span y, byte imm, Span z, Span a) => + { + a[0] = (0x01 & imm) > 0 ? x[1] : x[0]; + a[1] = (0x02 & imm) > 0 ? y[1] : y[0]; + return a[0] == z[0] && a[1] == z[1]; + }; + + if (!doubleTable.CheckResultShuffle(checkDouble)) + { + PrintError8(doubleTable, methodUnderTestName, "(double x, byte y, double z, ref double a) => (a = x * y) == z", checkDouble); + testResult = Fail; + } + + CheckMethodFive checkInt32 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + for (int i = 0; i < x.Length; i++) + { + a[i] = x[imm & 0x03]; + if (z[i] != a[i]) + result = false; + imm = (byte) (imm >> 2); + } + return result; + }; + + if (!intTable.CheckResultShuffle(checkInt32)) + { + PrintError(intTable, methodUnderTestName, "(int x, byte y, int z, ref int a) => (a = x << y) == z", checkInt32); + testResult = Fail; + } + + CheckMethodFive checkUInt32 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + for (int i = 0; i < x.Length; i++) + { + a[i] = x[imm & 0x03]; + if (z[i] != a[i]) + result = false; + imm = (byte) (imm >> 2); + } + return result; + }; + + if (!uintTable.CheckResultShuffle(checkUInt32)) + { + PrintError(uintTable, methodUnderTestName, "(uint x, byte y, uint z, ref uint a) => (a = x << y) == z", checkUInt32); + testResult = Fail; + } + } + } + else + { + Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}"); + } + return testResult; + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.tt b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.tt new file mode 100644 index 000000000000..8922e3ff48bc --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.tt @@ -0,0 +1,225 @@ +<#@ template debug="false" hostspecific="false" language="C#" #> +<#@ assembly name="System.Core" #> +<#@ import namespace="System.Linq" #> +<#@ import namespace="System.Text" #> +<#@ import namespace="System.Collections.Generic" #> +<#@ output extension=".cs" encoding="utf-8" #> +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace IntelHardwareIntrinsicTest +{ + internal static partial class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + int testCount = 16; + string methodUnderTestName = nameof(Sse2.Shuffle); + +<# + string[] permuteData = new string[] + { + "0b11100100", // identity + "0b00011011", // invert + "0b00000000", // broadcast element 0 + "0b11111111", // broadcast element 3 + "0b01010101", // broadcast element 1 + "0b10101010", // broadcast element 2 + "0b11011000", // swap middle elements + "0b00100111", // swap external elements + "0b10110001", // swap internal with external elements + "0b11110000", // divide everything between external elements + "0b10100101", // divide everything between internal elements + "0b00010100", // pattern (0, 1, 1, 0) + "0b10000010", // pattern (2, 0, 0, 2) + "0b11001100", // pattern (3, 0, 3, 0) + "0b01100110", // pattern (1, 2, 1, 2) + "0b10011001" // pattern (2, 1, 2, 1) + }; + + string[] permuteDouble = new string[] + { + "0b00", + "0b01", + "0b10", + "0b11", + }; + +#> + + if (Sse2.IsSupported) + { + + string[] permuteData = new string[] + { + "0b11100100", // identity + "0b00011011", // invert + "0b00000000", // broadcast element 0 + "0b11111111", // broadcast element 3 + "0b01010101", // broadcast element 1 + "0b10101010", // broadcast element 2 + "0b11011000", // swap middle elements + "0b00100111", // swap external elements + "0b10110001", // swap internal with external elements + "0b11110000", // divide everything between external elements + "0b10100101", // divide everything between internal elements + "0b00010100", // pattern (0, 1, 1, 0) + "0b10000010", // pattern (2, 0, 0, 2) + "0b11001100", // pattern (3, 0, 3, 0) + "0b01100110", // pattern (1, 2, 1, 2) + "0b10011001" // pattern (2, 1, 2, 1) + }; + + string[] permuteDouble = new string[] + { + "0b00", + "0b01", + "0b10", + "0b11", + }; + + using (var doubleTable = TestTableTuvImmSse2.Create(permuteDouble.Length)) + using (var intTable = TestTableTuvImmSse2.Create(permuteData.Length)) + using (var uintTable = TestTableTuvImmSse2.Create(permuteData.Length)) + { + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + doubleTable.inArray1, 16, (int i, int elNo) => + { + return (uint) i % 2; + }); + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + doubleTable.inArray2, 16, (int i, int elNo) => + { + return (uint) i % 2 + 10; + }); + +<# + for (int i = 0; i < permuteDouble.Length; i++) + { +#> + (Vector128, Vector128) valueDouble_<#= i #> = doubleTable[<#= i #>]; + Vector128 resultDouble_<#= i #> = Sse2.Shuffle(valueDouble_<#= i #>.Item1, valueDouble_<#= i #>.Item2, (byte) <#= permuteDouble[i] #>); + doubleTable.SetOutArray(resultDouble_<#= i #>, <#= i #>, (byte) <#= permuteDouble[i] #>); + +<# + + } +#> + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + uintTable.inArray1, 16, (int i, int elNo) => + { + return (uint) i % 4; + }); + +<# + for (int i = 0; i < permuteData.Length; i++) + { +#> + (Vector128, Vector128) valueInt32_<#= i #> = intTable[<#= i #>]; + Vector128 resultInt32_<#= i #> = Sse2.Shuffle(valueInt32_<#= i #>.Item1, (byte) <#= permuteData[i] #>); + intTable.SetOutArray(resultInt32_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>); + +<# + + } +#> + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + uintTable.inArray1, 16, (int i, int elNo) => + { + return (uint) i % 4; + }); + +<# + for (int i = 0; i < permuteData.Length; i++) + { +#> + (Vector128, Vector128) valueUInt32_<#= i #> = uintTable[<#= i #>]; + Vector128 resultUInt32_<#= i #> = Sse2.Shuffle(valueUInt32_<#= i #>.Item1, (byte) <#= permuteData[i] #>); + uintTable.SetOutArray(resultUInt32_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>); + +<# + + } +#> + + CheckMethodFiveDouble checkDouble = + (Span x, Span y, byte imm, Span z, Span a) => + { + a[0] = (0x01 & imm) > 0 ? x[1] : x[0]; + a[1] = (0x02 & imm) > 0 ? y[1] : y[0]; + return a[0] == z[0] && a[1] == z[1]; + }; + + if (!doubleTable.CheckResultShuffle(checkDouble)) + { + PrintError8(doubleTable, methodUnderTestName, "(double x, byte y, double z, ref double a) => (a = x * y) == z", checkDouble); + testResult = Fail; + } + + CheckMethodFive checkInt32 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + for (int i = 0; i < x.Length; i++) + { + a[i] = x[imm & 0x03]; + if (z[i] != a[i]) + result = false; + imm = (byte) (imm >> 2); + } + return result; + }; + + if (!intTable.CheckResultShuffle(checkInt32)) + { + PrintError(intTable, methodUnderTestName, "(int x, byte y, int z, ref int a) => (a = x << y) == z", checkInt32); + testResult = Fail; + } + + CheckMethodFive checkUInt32 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + for (int i = 0; i < x.Length; i++) + { + a[i] = x[imm & 0x03]; + if (z[i] != a[i]) + result = false; + imm = (byte) (imm >> 2); + } + return result; + }; + + if (!uintTable.CheckResultShuffle(checkUInt32)) + { + PrintError(uintTable, methodUnderTestName, "(uint x, byte y, uint z, ref uint a) => (a = x << y) == z", checkUInt32); + testResult = Fail; + } + } + } + else + { + Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}"); + } + return testResult; + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.cs new file mode 100644 index 000000000000..2bc3d0f9923e --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.cs @@ -0,0 +1,266 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace IntelHardwareIntrinsicTest +{ + internal static partial class Program + { + const short Pass = 100; + const short Fail = 0; + + static unsafe int Main(string[] args) + { + short testResult = Pass; + short testsCount = 16; + string methodUnderTestName = nameof(Sse2.ShuffleHigh); + + + + if (Sse2.IsSupported) + { + string[] permuteData = new string[] + { + "0b11100100", // identity + "0b00011011", // invert + "0b00000000", // broadcast element 0 + "0b11111111", // broadcast element 3 + "0b01010101", // broadcast element 1 + "0b10101010", // broadcast element 2 + "0b11011000", // swap middle elements + "0b00100111", // swap external elements + "0b10110001", // swap internal with external elements + "0b11110000", // divide everything between external elements + "0b10100101", // divide everything between internal elements + "0b00010100", // pattern (0, 1, 1, 0) + "0b10000010", // pattern (2, 0, 0, 2) + "0b11001100", // pattern (3, 0, 3, 0) + "0b01100110", // pattern (1, 2, 1, 2) + "0b10011001" // pattern (2, 1, 2, 1) + }; + + using (var shortTable = TestTableTuvImmSse2.Create(testsCount)) + using (var ushortTable = TestTableTuvImmSse2.Create(testsCount)) + { + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + shortTable.inArray1, 16, (int i, int elNo) => + { + return (short)(i % 8); + }); + + TestUtilities.InitializeWithConstValue(0, shortTable.inArray2); + + (Vector128, Vector128) valueInt16_0 = shortTable[0]; + Vector128 resultInt16_0 = Sse2.ShuffleHigh(valueInt16_0.Item1, (byte) 0b11100100); + shortTable.SetOutArray(resultInt16_0, 0, (byte) 0b11100100); + + (Vector128, Vector128) valueInt16_1 = shortTable[1]; + Vector128 resultInt16_1 = Sse2.ShuffleHigh(valueInt16_1.Item1, (byte) 0b00011011); + shortTable.SetOutArray(resultInt16_1, 1, (byte) 0b00011011); + + (Vector128, Vector128) valueInt16_2 = shortTable[2]; + Vector128 resultInt16_2 = Sse2.ShuffleHigh(valueInt16_2.Item1, (byte) 0b00000000); + shortTable.SetOutArray(resultInt16_2, 2, (byte) 0b00000000); + + (Vector128, Vector128) valueInt16_3 = shortTable[3]; + Vector128 resultInt16_3 = Sse2.ShuffleHigh(valueInt16_3.Item1, (byte) 0b11111111); + shortTable.SetOutArray(resultInt16_3, 3, (byte) 0b11111111); + + (Vector128, Vector128) valueInt16_4 = shortTable[4]; + Vector128 resultInt16_4 = Sse2.ShuffleHigh(valueInt16_4.Item1, (byte) 0b01010101); + shortTable.SetOutArray(resultInt16_4, 4, (byte) 0b01010101); + + (Vector128, Vector128) valueInt16_5 = shortTable[5]; + Vector128 resultInt16_5 = Sse2.ShuffleHigh(valueInt16_5.Item1, (byte) 0b10101010); + shortTable.SetOutArray(resultInt16_5, 5, (byte) 0b10101010); + + (Vector128, Vector128) valueInt16_6 = shortTable[6]; + Vector128 resultInt16_6 = Sse2.ShuffleHigh(valueInt16_6.Item1, (byte) 0b11011000); + shortTable.SetOutArray(resultInt16_6, 6, (byte) 0b11011000); + + (Vector128, Vector128) valueInt16_7 = shortTable[7]; + Vector128 resultInt16_7 = Sse2.ShuffleHigh(valueInt16_7.Item1, (byte) 0b00100111); + shortTable.SetOutArray(resultInt16_7, 7, (byte) 0b00100111); + + (Vector128, Vector128) valueInt16_8 = shortTable[8]; + Vector128 resultInt16_8 = Sse2.ShuffleHigh(valueInt16_8.Item1, (byte) 0b10110001); + shortTable.SetOutArray(resultInt16_8, 8, (byte) 0b10110001); + + (Vector128, Vector128) valueInt16_9 = shortTable[9]; + Vector128 resultInt16_9 = Sse2.ShuffleHigh(valueInt16_9.Item1, (byte) 0b11110000); + shortTable.SetOutArray(resultInt16_9, 9, (byte) 0b11110000); + + (Vector128, Vector128) valueInt16_10 = shortTable[10]; + Vector128 resultInt16_10 = Sse2.ShuffleHigh(valueInt16_10.Item1, (byte) 0b10100101); + shortTable.SetOutArray(resultInt16_10, 10, (byte) 0b10100101); + + (Vector128, Vector128) valueInt16_11 = shortTable[11]; + Vector128 resultInt16_11 = Sse2.ShuffleHigh(valueInt16_11.Item1, (byte) 0b00010100); + shortTable.SetOutArray(resultInt16_11, 11, (byte) 0b00010100); + + (Vector128, Vector128) valueInt16_12 = shortTable[12]; + Vector128 resultInt16_12 = Sse2.ShuffleHigh(valueInt16_12.Item1, (byte) 0b10000010); + shortTable.SetOutArray(resultInt16_12, 12, (byte) 0b10000010); + + (Vector128, Vector128) valueInt16_13 = shortTable[13]; + Vector128 resultInt16_13 = Sse2.ShuffleHigh(valueInt16_13.Item1, (byte) 0b11001100); + shortTable.SetOutArray(resultInt16_13, 13, (byte) 0b11001100); + + (Vector128, Vector128) valueInt16_14 = shortTable[14]; + Vector128 resultInt16_14 = Sse2.ShuffleHigh(valueInt16_14.Item1, (byte) 0b01100110); + shortTable.SetOutArray(resultInt16_14, 14, (byte) 0b01100110); + + (Vector128, Vector128) valueInt16_15 = shortTable[15]; + Vector128 resultInt16_15 = Sse2.ShuffleHigh(valueInt16_15.Item1, (byte) 0b10011001); + shortTable.SetOutArray(resultInt16_15, 15, (byte) 0b10011001); + + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + ushortTable.inArray1, 16, (int i, int elNo) => + { + return (ushort)(i % 8); + }); + + TestUtilities.InitializeWithConstValue(0, ushortTable.inArray2); + + + (Vector128, Vector128) valueUInt16_0 = ushortTable[0]; + Vector128 resultUInt16_0 = Sse2.ShuffleHigh(valueUInt16_0.Item1, (byte) 0b11100100); + ushortTable.SetOutArray(resultUInt16_0, 0, (byte) 0b11100100); + + (Vector128, Vector128) valueUInt16_1 = ushortTable[1]; + Vector128 resultUInt16_1 = Sse2.ShuffleHigh(valueUInt16_1.Item1, (byte) 0b00011011); + ushortTable.SetOutArray(resultUInt16_1, 1, (byte) 0b00011011); + + (Vector128, Vector128) valueUInt16_2 = ushortTable[2]; + Vector128 resultUInt16_2 = Sse2.ShuffleHigh(valueUInt16_2.Item1, (byte) 0b00000000); + ushortTable.SetOutArray(resultUInt16_2, 2, (byte) 0b00000000); + + (Vector128, Vector128) valueUInt16_3 = ushortTable[3]; + Vector128 resultUInt16_3 = Sse2.ShuffleHigh(valueUInt16_3.Item1, (byte) 0b11111111); + ushortTable.SetOutArray(resultUInt16_3, 3, (byte) 0b11111111); + + (Vector128, Vector128) valueUInt16_4 = ushortTable[4]; + Vector128 resultUInt16_4 = Sse2.ShuffleHigh(valueUInt16_4.Item1, (byte) 0b01010101); + ushortTable.SetOutArray(resultUInt16_4, 4, (byte) 0b01010101); + + (Vector128, Vector128) valueUInt16_5 = ushortTable[5]; + Vector128 resultUInt16_5 = Sse2.ShuffleHigh(valueUInt16_5.Item1, (byte) 0b10101010); + ushortTable.SetOutArray(resultUInt16_5, 5, (byte) 0b10101010); + + (Vector128, Vector128) valueUInt16_6 = ushortTable[6]; + Vector128 resultUInt16_6 = Sse2.ShuffleHigh(valueUInt16_6.Item1, (byte) 0b11011000); + ushortTable.SetOutArray(resultUInt16_6, 6, (byte) 0b11011000); + + (Vector128, Vector128) valueUInt16_7 = ushortTable[7]; + Vector128 resultUInt16_7 = Sse2.ShuffleHigh(valueUInt16_7.Item1, (byte) 0b00100111); + ushortTable.SetOutArray(resultUInt16_7, 7, (byte) 0b00100111); + + (Vector128, Vector128) valueUInt16_8 = ushortTable[8]; + Vector128 resultUInt16_8 = Sse2.ShuffleHigh(valueUInt16_8.Item1, (byte) 0b10110001); + ushortTable.SetOutArray(resultUInt16_8, 8, (byte) 0b10110001); + + (Vector128, Vector128) valueUInt16_9 = ushortTable[9]; + Vector128 resultUInt16_9 = Sse2.ShuffleHigh(valueUInt16_9.Item1, (byte) 0b11110000); + ushortTable.SetOutArray(resultUInt16_9, 9, (byte) 0b11110000); + + (Vector128, Vector128) valueUInt16_10 = ushortTable[10]; + Vector128 resultUInt16_10 = Sse2.ShuffleHigh(valueUInt16_10.Item1, (byte) 0b10100101); + ushortTable.SetOutArray(resultUInt16_10, 10, (byte) 0b10100101); + + (Vector128, Vector128) valueUInt16_11 = ushortTable[11]; + Vector128 resultUInt16_11 = Sse2.ShuffleHigh(valueUInt16_11.Item1, (byte) 0b00010100); + ushortTable.SetOutArray(resultUInt16_11, 11, (byte) 0b00010100); + + (Vector128, Vector128) valueUInt16_12 = ushortTable[12]; + Vector128 resultUInt16_12 = Sse2.ShuffleHigh(valueUInt16_12.Item1, (byte) 0b10000010); + ushortTable.SetOutArray(resultUInt16_12, 12, (byte) 0b10000010); + + (Vector128, Vector128) valueUInt16_13 = ushortTable[13]; + Vector128 resultUInt16_13 = Sse2.ShuffleHigh(valueUInt16_13.Item1, (byte) 0b11001100); + ushortTable.SetOutArray(resultUInt16_13, 13, (byte) 0b11001100); + + (Vector128, Vector128) valueUInt16_14 = ushortTable[14]; + Vector128 resultUInt16_14 = Sse2.ShuffleHigh(valueUInt16_14.Item1, (byte) 0b01100110); + ushortTable.SetOutArray(resultUInt16_14, 14, (byte) 0b01100110); + + (Vector128, Vector128) valueUInt16_15 = ushortTable[15]; + Vector128 resultUInt16_15 = Sse2.ShuffleHigh(valueUInt16_15.Item1, (byte) 0b10011001); + ushortTable.SetOutArray(resultUInt16_15, 15, (byte) 0b10011001); + + + CheckMethodFive checkInt16 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + int halfLength = x.Length/2; + for (int i = 0; i < x.Length; i++) + { + if (i < halfLength) + { + a[i] = x[i]; + } + else + { + a[i] = x[(imm & 0x03) + 4]; + imm = (byte) (imm >> 2); + } + + if (z[i] != a[i]) + result = false; + } + return result; + }; + + if (!shortTable.CheckResultShuffle(checkInt16)) + { + PrintError8(shortTable, methodUnderTestName, "CheckResultShuffleHigh", checkInt16); + testResult = Fail; + } + + CheckMethodFive checkUInt16 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + int halfLength = x.Length/2; + for (int i = 0; i < x.Length; i++) + { + if (i < halfLength) + { + a[i] = x[i]; + } + else + { + a[i] = x[(imm & 0x03) + 4]; + imm = (byte) (imm >> 2); + } + + if (z[i] != a[i]) + result = false; + } + return result; + }; + + if (!ushortTable.CheckResultShuffle(checkUInt16)) + { + PrintError8(ushortTable, methodUnderTestName, "CheckResultShuffleHigh", checkUInt16); + testResult = Fail; + } + } + } + else + { + Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}"); + } + return testResult; + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.tt b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.tt new file mode 100644 index 000000000000..b94c24c242b8 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.tt @@ -0,0 +1,190 @@ +<#@ template debug="false" hostspecific="false" language="C#" #> +<#@ assembly name="System.Core" #> +<#@ import namespace="System.Linq" #> +<#@ import namespace="System.Text" #> +<#@ import namespace="System.Collections.Generic" #> +<#@ output extension=".cs" encoding="utf-8" #> +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace IntelHardwareIntrinsicTest +{ + internal static partial class Program + { + const short Pass = 100; + const short Fail = 0; + + static unsafe int Main(string[] args) + { + short testResult = Pass; + short testsCount = 16; + string methodUnderTestName = nameof(Sse2.ShuffleHigh); + +<# + string[] permuteData = new string[] + { + "0b11100100", // identity + "0b00011011", // invert + "0b00000000", // broadcast element 0 + "0b11111111", // broadcast element 3 + "0b01010101", // broadcast element 1 + "0b10101010", // broadcast element 2 + "0b11011000", // swap middle elements + "0b00100111", // swap external elements + "0b10110001", // swap internal with external elements + "0b11110000", // divide everything between external elements + "0b10100101", // divide everything between internal elements + "0b00010100", // pattern (0, 1, 1, 0) + "0b10000010", // pattern (2, 0, 0, 2) + "0b11001100", // pattern (3, 0, 3, 0) + "0b01100110", // pattern (1, 2, 1, 2) + "0b10011001" // pattern (2, 1, 2, 1) + }; + +#> + + + if (Sse2.IsSupported) + { + string[] permuteData = new string[] + { + "0b11100100", // identity + "0b00011011", // invert + "0b00000000", // broadcast element 0 + "0b11111111", // broadcast element 3 + "0b01010101", // broadcast element 1 + "0b10101010", // broadcast element 2 + "0b11011000", // swap middle elements + "0b00100111", // swap external elements + "0b10110001", // swap internal with external elements + "0b11110000", // divide everything between external elements + "0b10100101", // divide everything between internal elements + "0b00010100", // pattern (0, 1, 1, 0) + "0b10000010", // pattern (2, 0, 0, 2) + "0b11001100", // pattern (3, 0, 3, 0) + "0b01100110", // pattern (1, 2, 1, 2) + "0b10011001" // pattern (2, 1, 2, 1) + }; + + using (var shortTable = TestTableTuvImmSse2.Create(testsCount)) + using (var ushortTable = TestTableTuvImmSse2.Create(testsCount)) + { + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + shortTable.inArray1, 16, (int i, int elNo) => + { + return (short)(i % 8); + }); + + TestUtilities.InitializeWithConstValue(0, shortTable.inArray2); + +<# + for (int i = 0; i < permuteData.Length; i++) + { +#> + (Vector128, Vector128) valueInt16_<#= i #> = shortTable[<#= i #>]; + Vector128 resultInt16_<#= i #> = Sse2.ShuffleHigh(valueInt16_<#= i #>.Item1, (byte) <#= permuteData[i] #>); + shortTable.SetOutArray(resultInt16_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>); + +<# + + } +#> + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + ushortTable.inArray1, 16, (int i, int elNo) => + { + return (ushort)(i % 8); + }); + + TestUtilities.InitializeWithConstValue(0, ushortTable.inArray2); + + +<# + for (int i = 0; i < permuteData.Length; i++) + { +#> + (Vector128, Vector128) valueUInt16_<#= i #> = ushortTable[<#= i #>]; + Vector128 resultUInt16_<#= i #> = Sse2.ShuffleHigh(valueUInt16_<#= i #>.Item1, (byte) <#= permuteData[i] #>); + ushortTable.SetOutArray(resultUInt16_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>); + +<# + + } +#> + + CheckMethodFive checkInt16 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + int halfLength = x.Length/2; + for (int i = 0; i < x.Length; i++) + { + if (i < halfLength) + { + a[i] = x[i]; + } + else + { + a[i] = x[(imm & 0x03) + 4]; + imm = (byte) (imm >> 2); + } + + if (z[i] != a[i]) + result = false; + } + return result; + }; + + if (!shortTable.CheckResultShuffle(checkInt16)) + { + PrintError8(shortTable, methodUnderTestName, "CheckResultShuffleHigh", checkInt16); + testResult = Fail; + } + + CheckMethodFive checkUInt16 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + int halfLength = x.Length/2; + for (int i = 0; i < x.Length; i++) + { + if (i < halfLength) + { + a[i] = x[i]; + } + else + { + a[i] = x[(imm & 0x03) + 4]; + imm = (byte) (imm >> 2); + } + + if (z[i] != a[i]) + result = false; + } + return result; + }; + + if (!ushortTable.CheckResultShuffle(checkUInt16)) + { + PrintError8(ushortTable, methodUnderTestName, "CheckResultShuffleHigh", checkUInt16); + testResult = Fail; + } + } + } + else + { + Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}"); + } + return testResult; + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_r.csproj new file mode 100644 index 000000000000..208548cd4577 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_r.csproj @@ -0,0 +1,49 @@ + + + + + Debug + AnyCPU + 2.0 + {5B060A38-515A-4C2C-9DFB-2EC23B20FCA1} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + False + + + + None + + + + + + + + + + True + True + ShuffleHigh.tt + + + + + + TextTemplatingFileGenerator + ShuffleHigh.cs + + + + + + \ No newline at end of file diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_ro.csproj new file mode 100644 index 000000000000..a9a3a47937f0 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_ro.csproj @@ -0,0 +1,48 @@ + + + + + Debug + AnyCPU + 2.0 + {47111150-B83B-44A1-AB94-807BEF42E5E0} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + False + + + + None + True + + + + + + + + True + True + ShuffleHigh.tt + + + + + + TextTemplatingFileGenerator + ShuffleHigh.cs + + + + + + \ No newline at end of file diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.cs new file mode 100644 index 000000000000..cfbf02b0526f --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.cs @@ -0,0 +1,266 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace IntelHardwareIntrinsicTest +{ + internal static partial class Program + { + const short Pass = 100; + const short Fail = 0; + + static unsafe int Main(string[] args) + { + short testResult = Pass; + short testsCount = 16; + string methodUnderTestName = nameof(Sse2.ShuffleLow); + + + + if (Sse2.IsSupported) + { + string[] permuteData = new string[] + { + "0b11100100", // identity + "0b00011011", // invert + "0b00000000", // broadcast element 0 + "0b11111111", // broadcast element 3 + "0b01010101", // broadcast element 1 + "0b10101010", // broadcast element 2 + "0b11011000", // swap middle elements + "0b00100111", // swap external elements + "0b10110001", // swap internal with external elements + "0b11110000", // divide everything between external elements + "0b10100101", // divide everything between internal elements + "0b00010100", // pattern (0, 1, 1, 0) + "0b10000010", // pattern (2, 0, 0, 2) + "0b11001100", // pattern (3, 0, 3, 0) + "0b01100110", // pattern (1, 2, 1, 2) + "0b10011001" // pattern (2, 1, 2, 1) + }; + + using (var shortTable = TestTableTuvImmSse2.Create(testsCount)) + using (var ushortTable = TestTableTuvImmSse2.Create(testsCount)) + { + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + shortTable.inArray1, 16, (int i, int elNo) => + { + return (short)(i % 8); + }); + + TestUtilities.InitializeWithConstValue(0, shortTable.inArray2); + + (Vector128, Vector128) valueInt16_0 = shortTable[0]; + Vector128 resultInt16_0 = Sse2.ShuffleLow(valueInt16_0.Item1, (byte) 0b11100100); + shortTable.SetOutArray(resultInt16_0, 0, (byte) 0b11100100); + + (Vector128, Vector128) valueInt16_1 = shortTable[1]; + Vector128 resultInt16_1 = Sse2.ShuffleLow(valueInt16_1.Item1, (byte) 0b00011011); + shortTable.SetOutArray(resultInt16_1, 1, (byte) 0b00011011); + + (Vector128, Vector128) valueInt16_2 = shortTable[2]; + Vector128 resultInt16_2 = Sse2.ShuffleLow(valueInt16_2.Item1, (byte) 0b00000000); + shortTable.SetOutArray(resultInt16_2, 2, (byte) 0b00000000); + + (Vector128, Vector128) valueInt16_3 = shortTable[3]; + Vector128 resultInt16_3 = Sse2.ShuffleLow(valueInt16_3.Item1, (byte) 0b11111111); + shortTable.SetOutArray(resultInt16_3, 3, (byte) 0b11111111); + + (Vector128, Vector128) valueInt16_4 = shortTable[4]; + Vector128 resultInt16_4 = Sse2.ShuffleLow(valueInt16_4.Item1, (byte) 0b01010101); + shortTable.SetOutArray(resultInt16_4, 4, (byte) 0b01010101); + + (Vector128, Vector128) valueInt16_5 = shortTable[5]; + Vector128 resultInt16_5 = Sse2.ShuffleLow(valueInt16_5.Item1, (byte) 0b10101010); + shortTable.SetOutArray(resultInt16_5, 5, (byte) 0b10101010); + + (Vector128, Vector128) valueInt16_6 = shortTable[6]; + Vector128 resultInt16_6 = Sse2.ShuffleLow(valueInt16_6.Item1, (byte) 0b11011000); + shortTable.SetOutArray(resultInt16_6, 6, (byte) 0b11011000); + + (Vector128, Vector128) valueInt16_7 = shortTable[7]; + Vector128 resultInt16_7 = Sse2.ShuffleLow(valueInt16_7.Item1, (byte) 0b00100111); + shortTable.SetOutArray(resultInt16_7, 7, (byte) 0b00100111); + + (Vector128, Vector128) valueInt16_8 = shortTable[8]; + Vector128 resultInt16_8 = Sse2.ShuffleLow(valueInt16_8.Item1, (byte) 0b10110001); + shortTable.SetOutArray(resultInt16_8, 8, (byte) 0b10110001); + + (Vector128, Vector128) valueInt16_9 = shortTable[9]; + Vector128 resultInt16_9 = Sse2.ShuffleLow(valueInt16_9.Item1, (byte) 0b11110000); + shortTable.SetOutArray(resultInt16_9, 9, (byte) 0b11110000); + + (Vector128, Vector128) valueInt16_10 = shortTable[10]; + Vector128 resultInt16_10 = Sse2.ShuffleLow(valueInt16_10.Item1, (byte) 0b10100101); + shortTable.SetOutArray(resultInt16_10, 10, (byte) 0b10100101); + + (Vector128, Vector128) valueInt16_11 = shortTable[11]; + Vector128 resultInt16_11 = Sse2.ShuffleLow(valueInt16_11.Item1, (byte) 0b00010100); + shortTable.SetOutArray(resultInt16_11, 11, (byte) 0b00010100); + + (Vector128, Vector128) valueInt16_12 = shortTable[12]; + Vector128 resultInt16_12 = Sse2.ShuffleLow(valueInt16_12.Item1, (byte) 0b10000010); + shortTable.SetOutArray(resultInt16_12, 12, (byte) 0b10000010); + + (Vector128, Vector128) valueInt16_13 = shortTable[13]; + Vector128 resultInt16_13 = Sse2.ShuffleLow(valueInt16_13.Item1, (byte) 0b11001100); + shortTable.SetOutArray(resultInt16_13, 13, (byte) 0b11001100); + + (Vector128, Vector128) valueInt16_14 = shortTable[14]; + Vector128 resultInt16_14 = Sse2.ShuffleLow(valueInt16_14.Item1, (byte) 0b01100110); + shortTable.SetOutArray(resultInt16_14, 14, (byte) 0b01100110); + + (Vector128, Vector128) valueInt16_15 = shortTable[15]; + Vector128 resultInt16_15 = Sse2.ShuffleLow(valueInt16_15.Item1, (byte) 0b10011001); + shortTable.SetOutArray(resultInt16_15, 15, (byte) 0b10011001); + + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + ushortTable.inArray1, 16, (int i, int elNo) => + { + return (ushort)(i % 8); + }); + + TestUtilities.InitializeWithConstValue(0, ushortTable.inArray2); + + + (Vector128, Vector128) valueUInt16_0 = ushortTable[0]; + Vector128 resultUInt16_0 = Sse2.ShuffleLow(valueUInt16_0.Item1, (byte) 0b11100100); + ushortTable.SetOutArray(resultUInt16_0, 0, (byte) 0b11100100); + + (Vector128, Vector128) valueUInt16_1 = ushortTable[1]; + Vector128 resultUInt16_1 = Sse2.ShuffleLow(valueUInt16_1.Item1, (byte) 0b00011011); + ushortTable.SetOutArray(resultUInt16_1, 1, (byte) 0b00011011); + + (Vector128, Vector128) valueUInt16_2 = ushortTable[2]; + Vector128 resultUInt16_2 = Sse2.ShuffleLow(valueUInt16_2.Item1, (byte) 0b00000000); + ushortTable.SetOutArray(resultUInt16_2, 2, (byte) 0b00000000); + + (Vector128, Vector128) valueUInt16_3 = ushortTable[3]; + Vector128 resultUInt16_3 = Sse2.ShuffleLow(valueUInt16_3.Item1, (byte) 0b11111111); + ushortTable.SetOutArray(resultUInt16_3, 3, (byte) 0b11111111); + + (Vector128, Vector128) valueUInt16_4 = ushortTable[4]; + Vector128 resultUInt16_4 = Sse2.ShuffleLow(valueUInt16_4.Item1, (byte) 0b01010101); + ushortTable.SetOutArray(resultUInt16_4, 4, (byte) 0b01010101); + + (Vector128, Vector128) valueUInt16_5 = ushortTable[5]; + Vector128 resultUInt16_5 = Sse2.ShuffleLow(valueUInt16_5.Item1, (byte) 0b10101010); + ushortTable.SetOutArray(resultUInt16_5, 5, (byte) 0b10101010); + + (Vector128, Vector128) valueUInt16_6 = ushortTable[6]; + Vector128 resultUInt16_6 = Sse2.ShuffleLow(valueUInt16_6.Item1, (byte) 0b11011000); + ushortTable.SetOutArray(resultUInt16_6, 6, (byte) 0b11011000); + + (Vector128, Vector128) valueUInt16_7 = ushortTable[7]; + Vector128 resultUInt16_7 = Sse2.ShuffleLow(valueUInt16_7.Item1, (byte) 0b00100111); + ushortTable.SetOutArray(resultUInt16_7, 7, (byte) 0b00100111); + + (Vector128, Vector128) valueUInt16_8 = ushortTable[8]; + Vector128 resultUInt16_8 = Sse2.ShuffleLow(valueUInt16_8.Item1, (byte) 0b10110001); + ushortTable.SetOutArray(resultUInt16_8, 8, (byte) 0b10110001); + + (Vector128, Vector128) valueUInt16_9 = ushortTable[9]; + Vector128 resultUInt16_9 = Sse2.ShuffleLow(valueUInt16_9.Item1, (byte) 0b11110000); + ushortTable.SetOutArray(resultUInt16_9, 9, (byte) 0b11110000); + + (Vector128, Vector128) valueUInt16_10 = ushortTable[10]; + Vector128 resultUInt16_10 = Sse2.ShuffleLow(valueUInt16_10.Item1, (byte) 0b10100101); + ushortTable.SetOutArray(resultUInt16_10, 10, (byte) 0b10100101); + + (Vector128, Vector128) valueUInt16_11 = ushortTable[11]; + Vector128 resultUInt16_11 = Sse2.ShuffleLow(valueUInt16_11.Item1, (byte) 0b00010100); + ushortTable.SetOutArray(resultUInt16_11, 11, (byte) 0b00010100); + + (Vector128, Vector128) valueUInt16_12 = ushortTable[12]; + Vector128 resultUInt16_12 = Sse2.ShuffleLow(valueUInt16_12.Item1, (byte) 0b10000010); + ushortTable.SetOutArray(resultUInt16_12, 12, (byte) 0b10000010); + + (Vector128, Vector128) valueUInt16_13 = ushortTable[13]; + Vector128 resultUInt16_13 = Sse2.ShuffleLow(valueUInt16_13.Item1, (byte) 0b11001100); + ushortTable.SetOutArray(resultUInt16_13, 13, (byte) 0b11001100); + + (Vector128, Vector128) valueUInt16_14 = ushortTable[14]; + Vector128 resultUInt16_14 = Sse2.ShuffleLow(valueUInt16_14.Item1, (byte) 0b01100110); + ushortTable.SetOutArray(resultUInt16_14, 14, (byte) 0b01100110); + + (Vector128, Vector128) valueUInt16_15 = ushortTable[15]; + Vector128 resultUInt16_15 = Sse2.ShuffleLow(valueUInt16_15.Item1, (byte) 0b10011001); + ushortTable.SetOutArray(resultUInt16_15, 15, (byte) 0b10011001); + + + CheckMethodFive checkInt16 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + int halfLength = x.Length/2; + for (int i = 0; i < x.Length; i++) + { + if (i >= halfLength) + { + a[i] = x[i]; + } + else + { + a[i] = x[(imm & 0x03)]; + imm = (byte) (imm >> 2); + } + + if (z[i] != a[i]) + result = false; + } + return result; + }; + + if (!shortTable.CheckResultShuffle(checkInt16)) + { + PrintError8(shortTable, methodUnderTestName, "CheckResultShuffleHigh", checkInt16); + testResult = Fail; + } + + CheckMethodFive checkUInt16 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + int halfLength = x.Length/2; + for (int i = 0; i < x.Length; i++) + { + if (i >= halfLength) + { + a[i] = x[i]; + } + else + { + a[i] = x[(imm & 0x03)]; + imm = (byte) (imm >> 2); + } + + if (z[i] != a[i]) + result = false; + } + return result; + }; + + if (!ushortTable.CheckResultShuffle(checkUInt16)) + { + PrintError8(ushortTable, methodUnderTestName, "CheckResultShuffleHigh", checkUInt16); + testResult = Fail; + } + } + } + else + { + Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}"); + } + return testResult; + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.tt b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.tt new file mode 100644 index 000000000000..04327da50e82 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.tt @@ -0,0 +1,190 @@ +<#@ template debug="false" hostspecific="false" language="C#" #> +<#@ assembly name="System.Core" #> +<#@ import namespace="System.Linq" #> +<#@ import namespace="System.Text" #> +<#@ import namespace="System.Collections.Generic" #> +<#@ output extension=".cs" encoding="utf-8" #> +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace IntelHardwareIntrinsicTest +{ + internal static partial class Program + { + const short Pass = 100; + const short Fail = 0; + + static unsafe int Main(string[] args) + { + short testResult = Pass; + short testsCount = 16; + string methodUnderTestName = nameof(Sse2.ShuffleLow); + +<# + string[] permuteData = new string[] + { + "0b11100100", // identity + "0b00011011", // invert + "0b00000000", // broadcast element 0 + "0b11111111", // broadcast element 3 + "0b01010101", // broadcast element 1 + "0b10101010", // broadcast element 2 + "0b11011000", // swap middle elements + "0b00100111", // swap external elements + "0b10110001", // swap internal with external elements + "0b11110000", // divide everything between external elements + "0b10100101", // divide everything between internal elements + "0b00010100", // pattern (0, 1, 1, 0) + "0b10000010", // pattern (2, 0, 0, 2) + "0b11001100", // pattern (3, 0, 3, 0) + "0b01100110", // pattern (1, 2, 1, 2) + "0b10011001" // pattern (2, 1, 2, 1) + }; + +#> + + + if (Sse2.IsSupported) + { + string[] permuteData = new string[] + { + "0b11100100", // identity + "0b00011011", // invert + "0b00000000", // broadcast element 0 + "0b11111111", // broadcast element 3 + "0b01010101", // broadcast element 1 + "0b10101010", // broadcast element 2 + "0b11011000", // swap middle elements + "0b00100111", // swap external elements + "0b10110001", // swap internal with external elements + "0b11110000", // divide everything between external elements + "0b10100101", // divide everything between internal elements + "0b00010100", // pattern (0, 1, 1, 0) + "0b10000010", // pattern (2, 0, 0, 2) + "0b11001100", // pattern (3, 0, 3, 0) + "0b01100110", // pattern (1, 2, 1, 2) + "0b10011001" // pattern (2, 1, 2, 1) + }; + + using (var shortTable = TestTableTuvImmSse2.Create(testsCount)) + using (var ushortTable = TestTableTuvImmSse2.Create(testsCount)) + { + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + shortTable.inArray1, 16, (int i, int elNo) => + { + return (short)(i % 8); + }); + + TestUtilities.InitializeWithConstValue(0, shortTable.inArray2); + +<# + for (int i = 0; i < permuteData.Length; i++) + { +#> + (Vector128, Vector128) valueInt16_<#= i #> = shortTable[<#= i #>]; + Vector128 resultInt16_<#= i #> = Sse2.ShuffleLow(valueInt16_<#= i #>.Item1, (byte) <#= permuteData[i] #>); + shortTable.SetOutArray(resultInt16_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>); + +<# + + } +#> + + // Vector128 tests + + TestUtilities.InitializeWithElementNumberingModuloVectorLength( + ushortTable.inArray1, 16, (int i, int elNo) => + { + return (ushort)(i % 8); + }); + + TestUtilities.InitializeWithConstValue(0, ushortTable.inArray2); + + +<# + for (int i = 0; i < permuteData.Length; i++) + { +#> + (Vector128, Vector128) valueUInt16_<#= i #> = ushortTable[<#= i #>]; + Vector128 resultUInt16_<#= i #> = Sse2.ShuffleLow(valueUInt16_<#= i #>.Item1, (byte) <#= permuteData[i] #>); + ushortTable.SetOutArray(resultUInt16_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>); + +<# + + } +#> + + CheckMethodFive checkInt16 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + int halfLength = x.Length/2; + for (int i = 0; i < x.Length; i++) + { + if (i >= halfLength) + { + a[i] = x[i]; + } + else + { + a[i] = x[(imm & 0x03)]; + imm = (byte) (imm >> 2); + } + + if (z[i] != a[i]) + result = false; + } + return result; + }; + + if (!shortTable.CheckResultShuffle(checkInt16)) + { + PrintError8(shortTable, methodUnderTestName, "CheckResultShuffleHigh", checkInt16); + testResult = Fail; + } + + CheckMethodFive checkUInt16 = (Span x, byte imm, Span z, Span a) => + { + bool result = true; + int halfLength = x.Length/2; + for (int i = 0; i < x.Length; i++) + { + if (i >= halfLength) + { + a[i] = x[i]; + } + else + { + a[i] = x[(imm & 0x03)]; + imm = (byte) (imm >> 2); + } + + if (z[i] != a[i]) + result = false; + } + return result; + }; + + if (!ushortTable.CheckResultShuffle(checkUInt16)) + { + PrintError8(ushortTable, methodUnderTestName, "CheckResultShuffleHigh", checkUInt16); + testResult = Fail; + } + } + } + else + { + Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}"); + } + return testResult; + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_r.csproj new file mode 100644 index 000000000000..8369d6ac11ec --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_r.csproj @@ -0,0 +1,49 @@ + + + + + Debug + AnyCPU + 2.0 + {3DCFB777-8A32-443E-ABD9-4636600D2B4F} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + False + + + + None + + + + + + + + + + True + True + ShuffleLow.tt + + + + + + TextTemplatingFileGenerator + ShuffleLow.cs + + + + + + \ No newline at end of file diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_ro.csproj new file mode 100644 index 000000000000..721cd0532726 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_ro.csproj @@ -0,0 +1,48 @@ + + + + + Debug + AnyCPU + 2.0 + {A9DAC473-F5CB-4DA9-ADE4-2F9EB53FC4A8} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + False + + + + None + True + + + + + + + + True + True + ShuffleLow.tt + + + + + + TextTemplatingFileGenerator + ShuffleLow.cs + + + + + + \ No newline at end of file diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_r.csproj new file mode 100644 index 000000000000..6cfebb744634 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_r.csproj @@ -0,0 +1,49 @@ + + + + + Debug + AnyCPU + 2.0 + {D25DF7E1-96B0-454A-A5BE-70C26BE49559} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + False + + + + None + + + + + + + + + + True + True + Shuffle.tt + + + + + + TextTemplatingFileGenerator + Shuffle.cs + + + + + + \ No newline at end of file diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_ro.csproj new file mode 100644 index 000000000000..46f64bc68810 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_ro.csproj @@ -0,0 +1,48 @@ + + + + + Debug + AnyCPU + 2.0 + {695760F3-DA13-4227-9ED6-AD8C5E5D88C6} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + False + + + + None + True + + + + + + + + True + True + Shuffle.tt + + + + + + TextTemplatingFileGenerator + Shuffle.cs + + + + + + \ No newline at end of file diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/TestTableSse2.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/TestTableSse2.cs index e3fc3e8b8f4d..00b7e009abdf 100644 --- a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/TestTableSse2.cs +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/TestTableSse2.cs @@ -14,16 +14,24 @@ namespace IntelHardwareIntrinsicTest { public delegate bool CheckMethod(T x, T y, T z, ref T c); + public delegate bool CheckMethodSpan(Span x, Span y, Span z, Span a); + public delegate bool CheckMethodTwo(T x, T y, U z, ref U c); public delegate bool CheckMethodTwo(T x, V y, U z, ref U c); + public delegate bool CheckMethodTwoSpan(Span x, V y, U z, ref U c); + public delegate bool CheckMethodThree(T x1, T x2, T y1, T y2, U z, ref U c); public delegate bool CheckMethodFour(T x1, T x2, U z1, U z2, ref U c1, ref U c2); public delegate bool CheckMethodFive(T x1, T x2, T y1, T y2, U z1, U z2, ref U c1, ref U c2); + public delegate bool CheckMethodFive(Span x, V imm, Span z, Span a); + + public delegate bool CheckMethodFiveDouble(Span x, Span y, V imm, Span z, Span a); + public delegate bool CheckMethodFourTFourU( ValueTuple x, ValueTuple y, @@ -51,6 +59,9 @@ public delegate bool CheckMethodEightOfTEightOfU( public delegate bool CheckMethodEightImm( Span x, T value, V i, Span z, Span a); + public delegate bool CheckMethodEightInsert( + Span x, T value, V i, Span z, Span a); + public delegate bool CheckMethodSixteen( ValueTuple> x, ValueTuple> y, @@ -73,7 +84,9 @@ public delegate bool CheckMethodSixteenOfAll( public enum InitMode { Undefined = 0, - NumberFirstVectors = 0b00000001 + NumberFirstVectors = 0b00000001, + NumberAllVectors = 0b00000010, + UseConstValue = 0b00000100, } public unsafe struct TestTableSse2 : IDisposable where T : struct @@ -129,6 +142,18 @@ public ValueTuple GetDataPoint(int index) return (inArray1[index], inArray2[index], outArray[index], checkArray[index]); } + public Memory GetAssignmentData(int index) + { + _index = index; + return new Memory(inArray2, index * _stepSize, _stepSize); + } + + public ValueTuple, Memory, Memory, Memory> GetAssignmentDataPoint(int index) + { + return (new Memory(inArray1, index, _stepSize), new Memory(inArray2, index, _stepSize), + new Memory(outArray, index, _stepSize), new Memory(outArray, index, _stepSize)); + } + public static TestTableSse2 Create(int lengthInVectors) { int length = _stepSize / Marshal.SizeOf() * lengthInVectors; @@ -195,6 +220,24 @@ public bool CheckResult(CheckMethod check) return result; } + public bool CheckResult(CheckMethodSpan check) + { + bool result = true; + for (int i = 0; i < inArray1.Length; i += _stepSize) + { + var x = new Span(inArray1, i, _stepSize); + var y = new Span(inArray2, i, _stepSize); + var z = new Span(inArray2, i, _stepSize); + var a = new Span(inArray2, i, _stepSize); + + if (!check(x, y, z, a)) + { + result = false; + } + } + return result; + } + public void Dispose() { _inHandle1.Free(); @@ -802,6 +845,12 @@ public void SetOutArray(Vector128 value, int index = -1) Unsafe.Write((byte*)OutArrayPtr + (_index * _stepSize), value); } + public void SetOutArray(U value, int index = -1) + { + index = index < 0 ? _index : index; + outArray[_index] = value; + } + public Vector128 this[int index] { get @@ -1050,7 +1099,8 @@ public unsafe struct TestTableImmSse2 : IDisposable where T : struct wh { private const int _stepSize = 16; private static int s_tSize; - private static int s_ElementCount; + public static int ElementCount; + private int _lengthInVectors; private GCHandle _inHandle1; private GCHandle _inHandle2; @@ -1073,12 +1123,26 @@ public unsafe struct TestTableImmSse2 : IDisposable where T : struct wh public void* CheckArrayPtr => _checkHandle.AddrOfPinnedObject().ToPointer(); public Vector128 Vector1 => Unsafe.Read>((byte*)InArray1Ptr + (_index * _stepSize)); - public T Value => Unsafe.Read((byte*)InArray2Ptr + (_index)); - public V Immediate => Unsafe.Read((byte*)ImmArrayPtr + (_index)); + public T Value => inArray2[_index]; + public V Immediate => immArray[_index]; public Vector128 Vector3 => Unsafe.Read>((byte*)OutArrayPtr + (_index * _stepSize)); public Vector128 Vector4 => Unsafe.Read>((byte*)CheckArrayPtr + (_index * _stepSize)); - public int Index { get => _index; set => _index = value; } + public int Index + { + get => _index; + set + { + if (value < 0 || value >= _lengthInVectors) + { + throw new IndexOutOfRangeException(); + } + else + { + _index = value; + } + } + } public void SetOutArray(Vector128 value, int index = -1) { @@ -1086,12 +1150,26 @@ public void SetOutArray(Vector128 value, int index = -1) Unsafe.Write((byte*)OutArrayPtr + (_index * _stepSize), value); } + public void SetOutArray(U value, V imm, int index = -1) + { + index = index < 0 ? _index : index; + outArray[_index] = value; + immArray[_index] = imm; + } + + public void SetOutArray(Vector128 value1, int index, V value2) + { + Index = index; + Unsafe.Write((byte*)OutArrayPtr + (index * _stepSize), value1); + immArray[index] = value2; + } + public (Vector128, T) this[int index] { get { - _index = index; - return (Vector1, Value); + Index = index; + return (Vector1, inArray2[index]); } } @@ -1103,6 +1181,12 @@ public void SetOutArray(Vector128 value, int index = -1) (checkArray[index], checkArray[index + 1], checkArray[index + 2], checkArray[index + 3], checkArray[index + 4], checkArray[index + 5], checkArray[index + 6], checkArray[index + 7])); } + public (Memory, V, U, U) GetExtractDataPoint(int index) + { + int ii = index / ElementCount; + return (new Memory(inArray1, index, ElementCount), immArray[ii], outArray[ii], checkArray[ii]); + } + public static TestTableImmSse2 Create(int lengthInVectors, double tSizeMultiplier = 1.0) { return new TestTableImmSse2(lengthInVectors, tSizeMultiplier); @@ -1111,8 +1195,9 @@ public static TestTableImmSse2 Create(int lengthInVectors, double tSize public TestTableImmSse2(int lengthInVectors, double tSizeMultiplier = 1.0, bool initialize = true) { s_tSize = Marshal.SizeOf(); - s_ElementCount = _stepSize / s_tSize; - int length = s_ElementCount * lengthInVectors; + ElementCount = _stepSize / s_tSize; + _lengthInVectors = lengthInVectors; + int length = ElementCount * lengthInVectors; inArray1 = new T[length]; inArray2 = new T[lengthInVectors]; immArray = new V[lengthInVectors]; @@ -1243,12 +1328,46 @@ private void InitializeWithVectorNumbering() } } + public bool CheckResultExtract(CheckMethodTwoSpan check) + { + int topIndex = inArray1.Length - ElementCount + 1; + bool result = true; + for (int i = 0; i < topIndex; i += ElementCount) + { + int ii = i / ElementCount; + var x = new Span(inArray1, i, ElementCount); + if (!check(x, immArray[ii], outArray[ii], ref checkArray[ii])) + { + result = false; + } + } + return result; + } + + public bool CheckResultInsert(CheckMethodEightInsert check) + { + bool result = true; + for (int i = 0; i < inArray1.Length - 1; i+= ElementCount) + { + var x = new Span(inArray1, i, ElementCount); + var z = new Span(outArray, i, ElementCount); + var a = new Span(checkArray, i, ElementCount); + int ii = i / ElementCount; + + if (!check(x, inArray2[ii], immArray[ii], z, a)) + { + result = false; + } + } + return result; + } + public bool CheckResultImm(CheckMethodEightImm check) { bool result = true; + int elNo = _stepSize / s_tSize; for (int i = 0; i < inArray1.Length; i++) { - int elNo = _stepSize / s_tSize; if (!check( new Span(inArray1, Index * elNo, elNo), inArray2[i], immArray[i], @@ -1522,6 +1641,398 @@ public void Dispose() } } + public unsafe struct TestTableTuvImmSse2 : IDisposable + where T : struct + where U : struct + where V : struct + { + private const int _vectorSize = 16; + private static int _tSize; + private static int _elementsNo; + private static int _lengthInVectors; + + private GCHandle _inHandle1; + private GCHandle _inHandle2; + private GCHandle _immHandle; + private GCHandle _outHandle; + private GCHandle _checkHandle; + + private int _index; + + public T[] inArray1; + public T[] inArray2; + public V[] immArray; + public U[] outArray; + public U[] checkArray; + + public void* InArray1Ptr => _inHandle1.AddrOfPinnedObject().ToPointer(); + public void* InArray2Ptr => _inHandle2.AddrOfPinnedObject().ToPointer(); + public void* ImmArrayPtr => _inHandle2.AddrOfPinnedObject().ToPointer(); + public void* OutArrayPtr => _outHandle.AddrOfPinnedObject().ToPointer(); + public void* CheckArrayPtr => _checkHandle.AddrOfPinnedObject().ToPointer(); + + public Vector128 Vector1 => Unsafe.Read>((byte*)InArray1Ptr + (_index * _vectorSize)); + public Vector128 Vector2 => Unsafe.Read>((byte*)InArray2Ptr + (_index * _vectorSize)); + public V Immediate => Unsafe.Read((byte*)ImmArrayPtr + (_index)); + public Vector128 Vector3 => Unsafe.Read>((byte*)OutArrayPtr + (_index * _vectorSize)); + public Vector128 Vector4 => Unsafe.Read>((byte*)CheckArrayPtr + (_index * _vectorSize)); + + public int Index + { + get => _index; + set + { + if (value < 0 || value >= _lengthInVectors) + { + throw new IndexOutOfRangeException(); + } + else + { + _index = value; + } + } + } + + public void SetOutArray(Vector128 value, int index = -1) + { + index = index < 0 ? _index : index; + Unsafe.Write((byte*)OutArrayPtr + (_index * _vectorSize), value); + } + + public void SetOutArray(Vector128 value1, int index, V value2) + { + Index = index; + Unsafe.Write((byte*)OutArrayPtr + (Index * _vectorSize), value1); + immArray[Index] = value2; + } + + public (Vector128, Vector128) this[int index] + { + get + { + _index = index; + return (Vector1, Vector2); + } + } + + public unsafe ValueTuple GetQuad22DataPoint(int index) + { + return (inArray1[index], immArray[index / (_vectorSize / _tSize)], outArray[index], checkArray[index]); + } + + public ((T, T), (T, T), V, (U, U), (U, U)) GetDoubleImmDataPoint(int index) + { + return ((inArray1[index], inArray1[index + 1]), + (inArray2[index], inArray2[index + 1]), + immArray[index / 2], + (outArray[index], outArray[index + 1]), + (checkArray[index], checkArray[index + 1])); + } + + public ((T, T, T, T), V, (U, U, U, U), (U, U, U, U)) GetQuadImmDataPoint(int index) + { + return ((inArray1[index], inArray1[index + 1], inArray1[index + 2], inArray1[index + 3]), + immArray[index / 4], + (outArray[index], outArray[index + 1], outArray[index + 2], outArray[index + 3]), + (checkArray[index], checkArray[index + 1], checkArray[index + 2], checkArray[index + 3])); + + } + + public ((T, T, T, T, T, T, T, T), T, V, (U, U, U, U, U, U, U, U), (U, U, U, U, U, U, U, U)) GetOctaImmDataPoint(int index) + { + return ((inArray1[index], inArray1[index + 1], inArray1[index + 2], inArray1[index + 3], inArray1[index + 4], inArray1[index + 5], inArray1[index + 6], inArray1[index + 7]), + inArray2[index / 8], immArray[index / 8], + (outArray[index], outArray[index + 1], outArray[index + 2], outArray[index + 3], outArray[index + 4], outArray[index + 5], outArray[index + 6], outArray[index + 7]), + (checkArray[index], checkArray[index + 1], checkArray[index + 2], checkArray[index + 3], checkArray[index + 4], checkArray[index + 5], checkArray[index + 6], checkArray[index + 7])); + } + + public static TestTableTuvImmSse2 Create(int lengthInVectors, double tSizeMultiplier = 1.0) + { + return new TestTableTuvImmSse2(lengthInVectors, tSizeMultiplier); + } + + public TestTableTuvImmSse2(int lengthInVectors, double tSizeMultiplier = 1.0, bool initialize = true) + { + _lengthInVectors = lengthInVectors; + _tSize = Marshal.SizeOf(); + _elementsNo = _vectorSize / _tSize; + int length = _elementsNo * lengthInVectors; + inArray1 = new T[length]; + inArray2 = new T[length]; + immArray = new V[lengthInVectors]; + outArray = new U[(int)(length * (1 / tSizeMultiplier))]; + checkArray = new U[(int)(length * (1 / tSizeMultiplier))]; + _index = 0; + _inHandle1 = GCHandle.Alloc(inArray1, GCHandleType.Pinned); + _inHandle2 = GCHandle.Alloc(inArray2, GCHandleType.Pinned); + _immHandle = GCHandle.Alloc(inArray2, GCHandleType.Pinned); + _outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned); + _checkHandle = GCHandle.Alloc(checkArray, GCHandleType.Pinned); + if (initialize) + { + Initialize(); + } + } + + public void Initialize(InitMode mode = InitMode.Undefined) + { + TestUtilities.InitializeData(inArray1, inArray2, mode, InArray1Ptr, InArray2Ptr); + } + + public bool CheckResultImm(CheckMethodTwo check) + { + bool result = true; + for (int i = 0; i < inArray1.Length; i++) + { + int elNo = _vectorSize / _tSize; + if (!check( + inArray1[i], immArray[i / elNo], + outArray[i], ref checkArray[i])) + { + result = false; + } + } + return result; + } + + public bool CheckResultImm(CheckMethodEightImm check) + { + bool result = true; + for (int i = 0; i < inArray1.Length; i++) + { + if (!check( + new Span(inArray1, Index * _elementsNo, _elementsNo), + inArray2[i], immArray[i], + new Span(outArray, Index * _elementsNo, _elementsNo), + new Span(checkArray, Index * _elementsNo, _elementsNo))) + { + result = false; + } + } + return result; + } + + public bool CheckResultShuffle(CheckMethodFive check) + { + bool result = true; + for (int i = 0; i < inArray1.Length; i += _elementsNo) + { + if (!check( + new Span(inArray1, i, _elementsNo), + immArray[i / _elementsNo], + new Span(outArray, i, _elementsNo), + new Span(checkArray, i, _elementsNo))) + { + result = false; + } + } + return result; + } + + public bool CheckResultShuffle(CheckMethodFiveDouble check) + { + bool result = true; + for (int i = 0; i < inArray1.Length; i += _elementsNo) + { + if (!check( + new Span(inArray1, i, _elementsNo), + new Span(inArray2, i, _elementsNo), + immArray[i / _elementsNo], + new Span(outArray, i, _elementsNo), + new Span(checkArray, i, _elementsNo))) + { + result = false; + } + } + return result; + } + + public void Dispose() + { + _inHandle1.Free(); + _inHandle2.Free(); + _immHandle.Free(); + _outHandle.Free(); + _checkHandle.Free(); + } + } + + public static class TestUtilities + { + public static unsafe void InitializeData( + T[] inArray1, T[] inArray2, InitMode mode = InitMode.Undefined, + void* InArray1Ptr = null, void* InArray2Ptr = null, T value = default(T)) + { + if (mode == InitMode.Undefined) + { + InitializeWithRandomData(inArray1, inArray2, InArray1Ptr, InArray2Ptr); + } + else if (mode == InitMode.NumberFirstVectors) + { + InitializeWithContinuosIndependentNumbering(inArray1, inArray2); + } + else if (mode == InitMode.UseConstValue) + { + InitializeWithConstValue(value, inArray1); + } + else if (mode == InitMode.NumberAllVectors) + { + } + } + + public static unsafe void InitializeWithRandomData( + T[] inArray1, T[] inArray2, void* InArray1Ptr = null, void* InArray2Ptr = null) + { + InitializeWithRandomData(inArray1, InArray1Ptr); + InitializeWithRandomData(inArray2, InArray2Ptr); + } + + public static unsafe void InitializeWithRandomData( + T[] array, void* arrayPtr = null) + { + Random random = new Random(unchecked((int)(DateTime.UtcNow.Ticks & 0x00000000ffffffffl))); + if (array is double[]) + { + var array1 = array as double[]; + for (int i = 0; i < array1.Length; i++) + { + array1[i] = random.NextDouble() * random.Next(); + } + } + else if (array is float[]) + { + var arrayFloat1 = array as float[]; + for (int i = 0; i < arrayFloat1.Length; i++) + { + arrayFloat1[i] = (float)(random.NextDouble() * random.Next(ushort.MaxValue)); + } + } + else + { + if (arrayPtr == null) + throw new ArgumentNullException(nameof(arrayPtr)); + + int tSize = Marshal.SizeOf(); + random.NextBytes(new Span(((byte*)arrayPtr), array.Length * tSize)); + } + } + + public static void InitializeWithContinuosIndependentNumbering(T[] array1, T[] array2) + { + if (array1 is double[] doubleArray1) + { + double[] doubleArray2 = array2 as double[]; + for (double i = 0.0, j = 10000.0; i < doubleArray1.Length; i++, j++) + { + doubleArray1[(int)i] = i; + doubleArray2[(int)i] = j; + } + } + else if (array1 is float[] floatArray1) + { + float[] floatArray2 = array2 as float[]; + for (float i = 0.0f, j = 10000.0f; i < floatArray1.Length; i++, j++) + { + floatArray1[(int)i] = i; + floatArray2[(int)i] = j; + } + } + else if (array1 is byte[] byteArray1) + { + byte[] byteArray2 = array2 as byte[]; + for (byte i = 0, j = 100; i < byteArray1.Length; i++, j++) + { + byteArray1[i] = i; + byteArray2[i] = j; + } + } + else if (array1 is sbyte[] sbyteArray1) + { + sbyte[] sbyteArray2 = array2 as sbyte[]; + for (sbyte i = 0, j = 100; i < sbyteArray1.Length; i++, j++) + { + sbyteArray1[i] = i; + sbyteArray2[i] = j; + } + } + else if (array1 is short[] shortArray1) + { + short[] shortArray2 = array2 as short[]; + for (short i = 0, j = 10000; i < shortArray1.Length; i++, j++) + { + shortArray1[i] = i; + shortArray2[i] = j; + } + + } + else if (array1 is ushort[] ushortArray1) + { + ushort[] ushortArray2 = array2 as ushort[]; + for (ushort i = 0, j = 10000; i < ushortArray1.Length; i++, j++) + { + ushortArray1[i] = i; + ushortArray2[i] = j; + } + } + else if (array1 is int[] intArray1) + { + int[] intArray2 = array2 as int[]; + for (int i = 0, j = 10000; i < intArray1.Length; i++, j++) + { + intArray1[i] = i; + intArray2[i] = j; + } + } + else if (array1 is uint[] uintArray1) + { + uint[] uintArray2 = array2 as uint[]; + for (uint i = 0, j = 10000; i < uintArray1.Length; i++, j++) + { + uintArray1[i] = i; + uintArray2[i] = j; + } + } + else if (array1 is long[] longArray1) + { + long[] longArray2 = array2 as long[]; + for (long i = 0, j = 10000; i < longArray1.Length; i++, j++) + { + longArray1[i] = i; + longArray2[i] = j; + } + } + else if (array1 is ulong[] ulongArray1) + { + ulong[] ulongArray2 = array2 as ulong[]; + for (uint i = 0, j = 10000; i < ulongArray1.Length; i++, j++) + { + ulongArray1[i] = i; + ulongArray2[i] = j; + } + } + } + + public static void InitializeWithConstValue(T value, T[] array) + { + if (array == null) + throw new ArgumentNullException(nameof(array)); + + for (int i = 0; i < array.Length; i++) + { + array[i] = value; + } + } + + public static void InitializeWithElementNumberingModuloVectorLength(T[] array, int vectorSize, Func function) + { + int elNo = vectorSize / Marshal.SizeOf(); + for (int i = 0; i < array.Length; i++) + { + array[i] = function(i, elNo); + } + } + } + public enum SpecialCheck { Undefined = 0, @@ -1571,6 +2082,20 @@ private static void PrintError(TestTableSse2 testTable, string functionNam Console.WriteLine("\n"); } + private static void PrintError(TestTableSse2 testTable, string functionName = "", string testFuncString = "", + CheckMethodSpan check = null) where T : struct + { + PrintErrorHeaderTu(functionName, testFuncString); + for (int i = 0; i < testTable.outArray.Length; i++) + { + (Memory, Memory, Memory, Memory) item = testTable.GetAssignmentDataPoint(i); + Console.Write( + $"({(PrintMemory(item.Item1), PrintMemory(item.Item2), PrintMemory(item.Item3), PrintMemory(item.Item4))})" + + (check != null ? $"->{check(item.Item1.Span, item.Item2.Span, item.Item3.Span, item.Item4.Span)}, " : ", ")); + } + Console.WriteLine("\n"); + } + private static void PrintError(TestTableSse2 testTable, string functionName = "", string testFuncString = "", CheckMethodTwo check = null) where T : struct where U : struct { @@ -1595,6 +2120,18 @@ private static void PrintError(TestTableTuvSse2 testTable, str Console.WriteLine(); } + private static void PrintError(TestTableImmSse2 testTable, string functionName = "", string testFuncString = "", + CheckMethodTwoSpan check = null) where T : struct where U : struct where V : struct + { + PrintErrorHeaderTuv(functionName, testFuncString); + for (int i = 0; i < testTable.inArray1.Length; i+= TestTableImmSse2.ElementCount) + { + (Memory, V, U, U) item = testTable.GetExtractDataPoint(i); + Console.Write($"({item})" + (check != null ? $"->{check(item.Item1.Span, item.Item2, item.Item3, ref item.Item4)}, " : ", ")); + } + Console.WriteLine(); + } + private static void PrintError(TestTableSse2 testTable, string functionName = "", string testFuncString = "", CheckMethodThree check = null) where T : struct where U : struct { @@ -1653,6 +2190,61 @@ private static void PrintError(TestTableSse2 testTable, string funct Console.WriteLine(); } + private static void PrintError(TestTableTuvImmSse2 testTable, string functionName = "", string testFuncString = "", + CheckMethodFive check = null) where T : struct where U : struct where V : struct + { + PrintErrorHeaderTu(functionName, testFuncString); + for (int i = 0; i < testTable.inArray1.Length - 3; i += 4) + { + // (T, T, T, T, U, U, U, U) + var item = testTable.GetQuadImmDataPoint(i); + Console.Write($"({item}), "); + } + Console.WriteLine(); + } + + private static void PrintError8(TestTableTuvImmSse2 testTable, string functionName = "", string testFuncString = "", + CheckMethodFive check = null) where T : struct where U : struct where V : struct + { + byte ConvertToByte(V value) + { + if (value is byte result) + return result; + else + return 0; + } + + PrintErrorHeaderTu(functionName, testFuncString); + for (int i = 0; i < testTable.inArray1.Length - 7; i += 8) + { + // ((T, T, T, T, T, T, T, T), T, V, (U, U, U, U, U, U, U, U), (U, U, U, U, U, U, U, U)) + var item = testTable.GetOctaImmDataPoint(i); + Console.Write($"((x{item.Item1}, y({item.Item2}), imm({Convert.ToString(ConvertToByte(item.Item3), 2)}), z{item.Item4}, a{item.Item5})), "); + } + Console.WriteLine(); + } + + private static void PrintError8(TestTableTuvImmSse2 testTable, string functionName = "", string testFuncString = "", + CheckMethodFiveDouble check = null) where T : struct where U : struct where V : struct + { + byte ConvertToByte(V value) + { + if (value is byte result) + return result; + else + return 0; + } + + PrintErrorHeaderTu(functionName, testFuncString); + for (int i = 0; i < testTable.inArray1.Length - 7; i += 8) + { + // ((T, T), (T, T), V, (U, U), (U, U)) + var item = testTable.GetDoubleImmDataPoint(i); + Console.Write($"((x{item.Item1}, y({item.Item2}), imm({Convert.ToString(ConvertToByte(item.Item3), 2)}), z{item.Item4}, a{item.Item5})), "); + } + Console.WriteLine(); + } + private static void PrintError(TestTableSse2 testTable, string functionName = "", string testFuncString = "", CheckMethodSix check = null) where T : struct where U : struct { @@ -1716,6 +2308,19 @@ private static void PrintError(TestTableImmSse2 testTable, str Console.WriteLine(); } + private static void PrintError(TestTableImmSse2 testTable, string functionName = "", string testFuncString = "", + CheckMethodEightInsert check = null) where T : struct where U : struct where V : struct + { + PrintErrorHeaderTu(functionName, testFuncString); + for (int i = 0, j = 0; i < testTable.inArray1.Length - 7 && j < testTable.inArray2.Length; i += 8, j += 1) + { + // ((T, T, T, T, T, T, T, T), T, V, (U, U, U, U, U, U, U, U), (U, U, U, U, U, U, U, U)) + var item = testTable.GetOctaImmDataPoint(i); + Console.Write($"({item})"); + } + Console.WriteLine(); + } + private static void PrintError(TestTableSse2 testTable, string functionName = "", string testFuncString = "", CheckMethodEightOfTEightOfU check = null) where T : struct where U : struct {