diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 56d8a1bca6c1f..49a2cdaa9b34b 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1403,7 +1403,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed case NI_Vector128_Create: case NI_Vector128_CreateScalar: case NI_Vector128_CreateScalarUnsafe: - case NI_VectorT_CreateBroadcast: #if defined(TARGET_XARCH) case NI_BMI1_TrailingZeroCount: case NI_BMI1_X64_TrailingZeroCount: @@ -1647,8 +1646,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed #endif // TARGET_ARM64 case NI_Vector128_get_AllBitsSet: case NI_Vector128_get_One: - case NI_VectorT_get_AllBitsSet: - case NI_VectorT_get_One: #if defined(TARGET_XARCH) case NI_Vector256_get_AllBitsSet: case NI_Vector256_get_One: diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 949d611519e4c..000def1e8f1eb 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -25510,20 +25510,48 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { assert(IsBaselineVector512IsaSupportedDebugOnly()); GenTree* op1Dup = fgMakeMultiUse(&op1); - op1 = gtNewSimdGetUpperNode(TYP_SIMD32, op1, simdBaseJitType, simdSize); - op1Dup = gtNewSimdGetLowerNode(TYP_SIMD32, op1Dup, simdBaseJitType, simdSize); - simdSize = simdSize / 2; - op1 = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD32, op1, op1Dup, simdBaseJitType, simdSize); + + op1 = gtNewSimdGetLowerNode(TYP_SIMD32, op1, simdBaseJitType, simdSize); + op1Dup = gtNewSimdGetUpperNode(TYP_SIMD32, op1Dup, simdBaseJitType, simdSize); + + if (varTypeIsFloating(simdBaseType)) + { + // We need to ensure deterministic results which requires + // consistently adding values together. Since many operations + // end up operating on 128-bit lanes, we break sum the same way. + + op1 = gtNewSimdSumNode(type, op1, simdBaseJitType, 32); + op1Dup = gtNewSimdSumNode(type, op1Dup, simdBaseJitType, 32); + + return gtNewOperNode(GT_ADD, type, op1, op1Dup); + } + + simdSize = 32; + op1 = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD32, op1, op1Dup, simdBaseJitType, 32); } if (simdSize == 32) { assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); GenTree* op1Dup = fgMakeMultiUse(&op1); - op1 = gtNewSimdGetUpperNode(TYP_SIMD16, op1, simdBaseJitType, simdSize); - op1Dup = gtNewSimdGetLowerNode(TYP_SIMD16, op1Dup, simdBaseJitType, simdSize); - simdSize = simdSize / 2; - op1 = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, op1, op1Dup, simdBaseJitType, simdSize); + + op1 = gtNewSimdGetLowerNode(TYP_SIMD16, op1, simdBaseJitType, simdSize); + op1Dup = gtNewSimdGetUpperNode(TYP_SIMD16, op1Dup, simdBaseJitType, simdSize); + + if (varTypeIsFloating(simdBaseType)) + { + // We need to ensure deterministic results which requires + // consistently adding values together. Since many operations + // end up operating on 128-bit lanes, we break sum the same way. + + op1 = gtNewSimdSumNode(type, op1, simdBaseJitType, 16); + op1Dup = gtNewSimdSumNode(type, op1Dup, simdBaseJitType, 16); + + return gtNewOperNode(GT_ADD, type, op1, op1Dup); + } + + simdSize = 16; + op1 = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, op1, op1Dup, simdBaseJitType, 16); } assert(simdSize == 16); @@ -25534,6 +25562,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); GenTree* op1Shuffled = fgMakeMultiUse(&op1); + if (compOpportunisticallyDependsOn(InstructionSet_AVX)) { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); @@ -25571,6 +25600,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); GenTree* op1Shuffled = fgMakeMultiUse(&op1); + if (compOpportunisticallyDependsOn(InstructionSet_AVX)) { assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 095f31246d0c6..b4eb52ab53410 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -534,39 +534,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, switch (intrinsic) { - case NI_Vector64_Abs: - case NI_Vector128_Abs: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdAbsNode(retType, op1, simdBaseJitType, simdSize); - break; - } - - case NI_Vector64_op_Addition: - case NI_Vector128_op_Addition: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - - case NI_Vector64_AndNot: - case NI_Vector128_AndNot: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_AND_NOT, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_Vector128_AsVector: { assert(!sig->hasThis()); @@ -682,30 +649,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector64_op_BitwiseAnd: - case NI_Vector128_op_BitwiseAnd: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - - case NI_Vector64_op_BitwiseOr: - case NI_Vector128_op_BitwiseOr: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_Vector64_Ceiling: case NI_Vector128_Ceiling: { @@ -1062,32 +1005,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector64_op_Division: - case NI_Vector128_op_Division: - { - assert(sig->numArgs == 2); - - if (!varTypeIsFloating(simdBaseType)) - { - // We can't trivially handle division for integral types using SIMD - break; - } - - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass); - - retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_Vector64_Dot: case NI_Vector128_Dot: { @@ -1653,32 +1570,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector64_op_Multiply: - case NI_Vector128_op_Multiply: - { - assert(sig->numArgs == 2); - - if (varTypeIsLong(simdBaseType)) - { - // TODO-ARM64-CQ: We should support long/ulong multiplication. - break; - } - - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass); - - retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_Vector64_MultiplyAddEstimate: case NI_Vector128_MultiplyAddEstimate: { @@ -1725,15 +1616,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector64_op_OnesComplement: - case NI_Vector128_op_OnesComplement: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize); - break; - } - case NI_Vector64_op_Inequality: case NI_Vector128_op_Inequality: { @@ -1747,55 +1629,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector64_op_Subtraction: - case NI_Vector128_op_Subtraction: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - - case NI_Vector64_op_LeftShift: - case NI_Vector128_op_LeftShift: - { - assert(sig->numArgs == 2); - - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - - case NI_Vector64_op_RightShift: - case NI_Vector128_op_RightShift: - { - assert(sig->numArgs == 2); - genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH; - - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - - case NI_Vector64_op_UnsignedRightShift: - case NI_Vector128_op_UnsignedRightShift: - { - assert(sig->numArgs == 2); - - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_Vector64_Shuffle: case NI_Vector128_Shuffle: { @@ -2205,18 +2038,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector64_op_ExclusiveOr: - case NI_Vector128_op_ExclusiveOr: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_AdvSimd_LoadVector64x2AndUnzip: case NI_AdvSimd_LoadVector64x3AndUnzip: case NI_AdvSimd_LoadVector64x4AndUnzip: diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index d3c880f38e0fd..69e1abf8f1eae 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -15,8 +15,6 @@ // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector64 Intrinsics -HARDWARE_INTRINSIC(Vector64, Abs, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, AndNot, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Ceiling, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, ConditionalSelect, 8, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, ConvertToDouble, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -62,18 +60,10 @@ HARDWARE_INTRINSIC(Vector64, Max, HARDWARE_INTRINSIC(Vector64, Min, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, MultiplyAddEstimate, 8, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Narrow, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_Addition, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_BitwiseAnd, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector64, op_BitwiseOr, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector64, op_Division, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, op_Equality, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector64, op_ExclusiveOr, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, op_Inequality, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector64, op_LeftShift, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_Multiply, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_OnesComplement, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, op_RightShift, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, op_Subtraction, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, op_UnaryNegation, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, op_UnsignedRightShift, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) @@ -95,8 +85,6 @@ HARDWARE_INTRINSIC(Vector64, WithElement, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector128 Intrinsics -HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, AsVector, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) @@ -148,19 +136,11 @@ HARDWARE_INTRINSIC(Vector128, Max, HARDWARE_INTRINSIC(Vector128, Min, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 578e50f76821c..3f508d9b2d0be 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -29,8 +29,6 @@ // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector128 Intrinsics -HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, AsVector, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsd_simd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) @@ -80,18 +78,10 @@ HARDWARE_INTRINSIC(Vector128, Max, HARDWARE_INTRINSIC(Vector128, Min, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) @@ -113,8 +103,6 @@ HARDWARE_INTRINSIC(Vector128, WithElement, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector256 Intrinsics -HARDWARE_INTRINSIC(Vector256, Abs, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, AndNot, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, AsVector, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, AsVector256, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, Ceiling, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) @@ -164,18 +152,10 @@ HARDWARE_INTRINSIC(Vector256, Max, HARDWARE_INTRINSIC(Vector256, Min, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, MultiplyAddEstimate, 32, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_Addition, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_BitwiseAnd, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_BitwiseOr, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_Division, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, op_Equality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector256, op_LeftShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_Multiply, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, op_RightShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_Subtraction, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, op_UnaryNegation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, op_UnsignedRightShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) @@ -198,8 +178,6 @@ HARDWARE_INTRINSIC(Vector256, WithUpper, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector512 Intrinsics -HARDWARE_INTRINSIC(Vector512, Abs, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, AndNot, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, AsVector, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, AsVector512, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, ConditionalSelect, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) @@ -249,18 +227,10 @@ HARDWARE_INTRINSIC(Vector512, Max, HARDWARE_INTRINSIC(Vector512, Min, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, MultiplyAddEstimate, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Addition, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_BitwiseAnd, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_BitwiseOr, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Division, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, op_Equality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, op_Inequality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(Vector512, op_LeftShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Multiply, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, op_RightShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Subtraction, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, op_UnaryNegation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, op_UnsignedRightShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 917c0c9d95641..9bf7ab94a62db 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -996,54 +996,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, switch (intrinsic) { - case NI_Vector128_Abs: - case NI_Vector256_Abs: - case NI_Vector512_Abs: - { - assert(sig->numArgs == 1); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || varTypeIsUnsigned(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op1 = impSIMDPopStack(); - retNode = gtNewSimdAbsNode(retType, op1, simdBaseJitType, simdSize); - } - break; - } - - case NI_Vector128_op_Addition: - case NI_Vector256_op_Addition: - case NI_Vector512_op_Addition: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize); - } - break; - } - - case NI_Vector128_AndNot: - case NI_Vector256_AndNot: - case NI_Vector512_AndNot: - { - assert(sig->numArgs == 2); - - impSpillSideEffect(true, - verCurrentState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_AND_NOT, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_Vector128_AsVector: { assert(sig->numArgs == 1); @@ -1276,32 +1228,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector128_op_BitwiseAnd: - case NI_Vector256_op_BitwiseAnd: - case NI_Vector512_op_BitwiseAnd: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - - case NI_Vector128_op_BitwiseOr: - case NI_Vector256_op_BitwiseOr: - case NI_Vector512_op_BitwiseOr: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_Vector128_Ceiling: case NI_Vector256_Ceiling: case NI_Vector512_Ceiling: @@ -1842,33 +1768,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector128_op_Division: - case NI_Vector256_op_Division: - case NI_Vector512_op_Division: - { - assert(sig->numArgs == 2); - - if (!varTypeIsFloating(simdBaseType)) - { - // We can't trivially handle division for integral types using SIMD - break; - } - - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass); - - retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_Vector128_Dot: case NI_Vector256_Dot: { @@ -2540,51 +2439,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector128_op_Multiply: - case NI_Vector256_op_Multiply: - case NI_Vector512_op_Multiply: - { - assert(sig->numArgs == 2); - - if ((simdSize == 32) && !varTypeIsFloating(simdBaseType) && - !compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2 - break; - } - - assert(simdSize != 64 || IsBaselineVector512IsaSupportedDebugOnly()); - - if (varTypeIsLong(simdBaseType)) - { - if (simdSize != 64 && !compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL)) - { - // TODO-XARCH-CQ: We should support long/ulong multiplication - break; - } - // else if simdSize == 64 then above assert would check if baseline isa supported - -#if defined(TARGET_X86) - // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast - break; -#endif // TARGET_X86 - } - - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); - - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass); - - retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_Vector128_MultiplyAddEstimate: case NI_Vector256_MultiplyAddEstimate: case NI_Vector512_MultiplyAddEstimate: @@ -2647,16 +2501,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector128_op_OnesComplement: - case NI_Vector256_op_OnesComplement: - case NI_Vector512_op_OnesComplement: - { - assert(sig->numArgs == 1); - op1 = impSIMDPopStack(); - retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize); - break; - } - case NI_Vector128_op_Inequality: case NI_Vector256_op_Inequality: { @@ -2692,94 +2536,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector128_op_Subtraction: - case NI_Vector256_op_Subtraction: - case NI_Vector512_op_Subtraction: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || - compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize); - } - break; - } - - case NI_Vector128_op_LeftShift: - case NI_Vector256_op_LeftShift: - case NI_Vector512_op_LeftShift: - { - assert(sig->numArgs == 2); - - if (varTypeIsByte(simdBaseType)) - { - // byte and sbyte would require more work to support - break; - } - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize); - } - break; - } - - case NI_Vector128_op_RightShift: - case NI_Vector256_op_RightShift: - case NI_Vector512_op_RightShift: - { - assert(sig->numArgs == 2); - - if (varTypeIsByte(simdBaseType)) - { - // byte and sbyte would require more work to support - break; - } - - if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE)) - { - if (!compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) - { - // long, ulong, and double would require more work to support - break; - } - } - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH; - - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize); - } - break; - } - - case NI_Vector128_op_UnsignedRightShift: - case NI_Vector256_op_UnsignedRightShift: - case NI_Vector512_op_UnsignedRightShift: - { - assert(sig->numArgs == 2); - - if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize); - } - break; - } - case NI_Vector128_Shuffle: case NI_Vector256_Shuffle: case NI_Vector512_Shuffle: @@ -3219,19 +2975,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Vector128_op_ExclusiveOr: - case NI_Vector256_op_ExclusiveOr: - case NI_Vector512_op_ExclusiveOr: - { - assert(sig->numArgs == 2); - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - - retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseJitType, simdSize); - break; - } - case NI_X86Base_Pause: case NI_X86Serialize_Serialize: { diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index c7ae69f4ca1e2..2b88668399a1a 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -514,37 +514,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, switch (intrinsic) { - case NI_VectorT_ConvertToInt32Native: - { - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - return nullptr; - } - break; - } - - case NI_VectorT_ConvertToInt64Native: - case NI_VectorT_ConvertToUInt32Native: - case NI_VectorT_ConvertToUInt64Native: - { - if (BlockNonDeterministicIntrinsics(mustExpand)) - { - return nullptr; - } - -#if defined(TARGET_XARCH) - if (!IsBaselineVector512IsaSupportedOpportunistically()) - { - return nullptr; - } -#endif // TARGET_XARCH - - break; - } - case NI_Vector2_MultiplyAddEstimate: case NI_Vector3_MultiplyAddEstimate: - case NI_VectorT_MultiplyAddEstimate: { if (BlockNonDeterministicIntrinsics(mustExpand)) { @@ -554,149 +525,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } #if defined(TARGET_XARCH) - case NI_VectorT_ConvertToDouble: - { - if (IsBaselineVector512IsaSupportedOpportunistically()) - { - break; - } - return nullptr; - } - - case NI_VectorT_ConvertToInt32: - { - if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - break; - } - return nullptr; - } - - case NI_VectorT_ConvertToInt64: - case NI_VectorT_ConvertToUInt32: - case NI_VectorT_ConvertToUInt64: - { - if (IsBaselineVector512IsaSupportedOpportunistically()) - { - break; - } - return nullptr; - } - - case NI_VectorT_ConvertToSingle: - { - if ((simdBaseType == TYP_INT) || - (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically())) - { - break; - } - return nullptr; - } -#endif // TARGET_XARCH - -#if defined(TARGET_X86) - case NI_VectorT_CreateBroadcast: - { - if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->IsIntegralConst()) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - return nullptr; - } - break; - } -#endif // TARGET_X86 - - case NI_VectorT_CreateSequence: - { - if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->OperIsConst()) - { -#if defined(TARGET_XARCH) - if (!compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL)) - { - // TODO-XARCH-CQ: We should support long/ulong multiplication - return nullptr; - } -#endif // TARGET_XARCH - -#if defined(TARGET_X86) || defined(TARGET_ARM64) - // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast - // TODO-ARM64-CQ: We should support long/ulong multiplication. - return nullptr; -#endif // TARGET_X86 || TARGET_ARM64 - } - break; - } - -#if defined(TARGET_XARCH) - case NI_VectorT_GetElement: - { - op2 = impStackTop(0).val; - - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: - case TYP_LONG: - case TYP_ULONG: - { - bool useToScalar = op2->IsIntegralConst(0); - -#if defined(TARGET_X86) - useToScalar &= !varTypeIsLong(simdBaseType); -#endif // TARGET_X86 - - if (!useToScalar && !compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - // Using software fallback if simdBaseType is not supported by hardware - return nullptr; - } - break; - } - - case TYP_DOUBLE: - case TYP_FLOAT: - case TYP_SHORT: - case TYP_USHORT: - { - // short/ushort/float/double is supported by SSE2 - break; - } - - default: - { - unreached(); - } - } - break; - } -#endif // TARGET_XARCH - -#if defined(TARGET_XARCH) - case NI_VectorT_Dot: - { - if ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) - { - if (!compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - // TODO-XARCH-CQ: We can support 32-bit integers if we updating multiplication - // to be lowered rather than imported as the relevant operations. - return nullptr; - } - } - else - { - assert(varTypeIsShort(simdBaseType) || varTypeIsFloating(simdBaseType)); - } - break; - } - case NI_Vector2_WithElement: case NI_Vector3_WithElement: - case NI_VectorT_WithElement: { assert(sig->numArgs == 3); GenTree* indexOp = impStackTop(1).val; @@ -761,24 +591,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, #endif // TARGET_XARCH #if defined(TARGET_ARM64) - case NI_VectorT_LoadAligned: - case NI_VectorT_LoadAlignedNonTemporal: - case NI_VectorT_StoreAligned: - case NI_VectorT_StoreAlignedNonTemporal: - { - if (opts.OptimizationDisabled()) - { - // ARM64 doesn't have aligned loads/stores, but aligned simd ops are only validated - // to be aligned when optimizations are disable, so only skip the intrinsic handling - // if optimizations are enabled - return nullptr; - } - break; - } - case NI_Vector2_WithElement: case NI_Vector3_WithElement: - case NI_VectorT_WithElement: { assert(numArgs == 3); GenTree* indexOp = impStackTop(1).val; @@ -803,21 +617,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } #endif -#if defined(TARGET_XARCH) - case NI_VectorT_Floor: - case NI_VectorT_Ceiling: - { - if (!compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - return nullptr; - } - break; - } -#endif // TARGET_XARCH - case NI_Vector2_FusedMultiplyAdd: case NI_Vector3_FusedMultiplyAdd: - case NI_VectorT_FusedMultiplyAdd: { bool isFmaAccelerated = false; @@ -834,41 +635,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } -#if defined(TARGET_XARCH) - case NI_VectorT_op_Multiply: - { - if (varTypeIsLong(simdBaseType)) - { - if (!compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL)) - { - // TODO-XARCH-CQ: We should support long/ulong multiplication - return nullptr; - } - -#if defined(TARGET_X86) - // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast - return nullptr; -#endif // TARGET_X86 - } - break; - } -#endif // TARGET_XARCH - -#if defined(TARGET_XARCH) - case NI_VectorT_op_RightShift: - { - if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE)) - { - if (!compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) - { - // TODO-XARCH-CQ: We should support long/ulong arithmetic shift - return nullptr; - } - } - break; - } -#endif // TARGET_XARCH - default: { // Most intrinsics have some path that works even if only SSE2/AdvSimd is available @@ -881,38 +647,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, switch (numArgs) { - case 0: - { - assert(newobjThis == nullptr); - - switch (intrinsic) - { - case NI_VectorT_get_AllBitsSet: - { - return gtNewAllBitsSetConNode(retType); - } - - case NI_VectorT_get_Indices: - { - assert(sig->numArgs == 0); - return gtNewSimdGetIndicesNode(retType, simdBaseJitType, simdSize); - } - - case NI_VectorT_get_One: - { - return gtNewOneConNode(retType, simdBaseType); - } - - default: - { - // Some platforms warn about unhandled switch cases - // We handle it more generally via the assert and nullptr return below. - break; - } - } - break; - } - case 1: { assert(newobjThis == nullptr); @@ -925,238 +659,22 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { case NI_Vector2_Abs: case NI_Vector3_Abs: - case NI_VectorT_Abs: { return gtNewSimdAbsNode(retType, op1, simdBaseJitType, simdSize); } - case NI_VectorT_Ceiling: - { - return gtNewSimdCeilNode(retType, op1, simdBaseJitType, simdSize); - } - - case NI_VectorT_Floor: - { - return gtNewSimdFloorNode(retType, op1, simdBaseJitType, simdSize); - } - - case NI_VectorT_LoadUnsafe: - { - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - return gtNewSimdLoadNode(retType, op1, simdBaseJitType, simdSize); - } - - case NI_VectorT_LoadAligned: - { - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - return gtNewSimdLoadAlignedNode(retType, op1, simdBaseJitType, simdSize); - } - - case NI_VectorT_LoadAlignedNonTemporal: - { - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - return gtNewSimdLoadNonTemporalNode(retType, op1, simdBaseJitType, simdSize); - } - case NI_Vector2_op_UnaryNegation: case NI_Vector3_op_UnaryNegation: - case NI_VectorT_op_UnaryNegation: { return gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize); } - case NI_VectorT_op_OnesComplement: - { - return gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize); - } - case NI_Vector2_Sqrt: case NI_Vector3_Sqrt: - case NI_VectorT_Sqrt: { return gtNewSimdSqrtNode(retType, op1, simdBaseJitType, simdSize); } - case NI_VectorT_Sum: - { - return gtNewSimdSumNode(retType, op1, simdBaseJitType, simdSize); - } - - case NI_VectorT_ToScalar: - { -#if defined(TARGET_X86) - if (varTypeIsLong(simdBaseType)) - { - op2 = gtNewIconNode(0); - return gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize); - } -#endif // TARGET_X86 - - return gtNewSimdToScalarNode(retType, op1, simdBaseJitType, simdSize); - } - - case NI_VectorT_WidenLower: - { - return gtNewSimdWidenLowerNode(retType, op1, simdBaseJitType, simdSize); - } - - case NI_VectorT_WidenUpper: - { - return gtNewSimdWidenUpperNode(retType, op1, simdBaseJitType, simdSize); - } - -#if defined(TARGET_XARCH) - case NI_VectorT_ConvertToDouble: - { - assert(sig->numArgs == 1); - assert(varTypeIsLong(simdBaseType)); - NamedIntrinsic intrinsic = NI_Illegal; - if (simdSize == 64) - { - intrinsic = NI_AVX512DQ_ConvertToVector512Double; - } - else if (simdSize == 32) - { - intrinsic = NI_AVX512DQ_VL_ConvertToVector256Double; - } - else - { - assert(simdSize == 16); - intrinsic = NI_AVX512DQ_VL_ConvertToVector128Double; - } - return gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); - } - - case NI_VectorT_ConvertToSingle: - { - assert(varTypeIsInt(simdBaseType)); - NamedIntrinsic intrinsic = NI_Illegal; - if (simdBaseType == TYP_INT) - { - switch (simdSize) - { - case 16: - intrinsic = NI_SSE2_ConvertToVector128Single; - break; - case 32: - intrinsic = NI_AVX_ConvertToVector256Single; - break; - case 64: - intrinsic = NI_AVX512F_ConvertToVector512Single; - break; - default: - unreached(); - } - } - else if (simdBaseType == TYP_UINT) - { - switch (simdSize) - { - case 16: - intrinsic = NI_AVX512F_VL_ConvertToVector128Single; - break; - case 32: - intrinsic = NI_AVX512F_VL_ConvertToVector256Single; - break; - case 64: - intrinsic = NI_AVX512F_ConvertToVector512Single; - break; - default: - unreached(); - } - } - assert(intrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); - } -#elif defined(TARGET_ARM64) - case NI_VectorT_ConvertToDouble: - { - assert((simdBaseType == TYP_LONG) || (simdBaseType == TYP_ULONG)); - return gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_Arm64_ConvertToDouble, simdBaseJitType, - simdSize); - } - - case NI_VectorT_ConvertToSingle: - { - assert((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)); - return gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToSingle, simdBaseJitType, - simdSize); - } -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - - case NI_VectorT_ConvertToInt32: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_FLOAT); - return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_INT, simdBaseJitType, simdSize); - } - - case NI_VectorT_ConvertToInt32Native: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_FLOAT); - return gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_INT, simdBaseJitType, simdSize); - } - - case NI_VectorT_ConvertToInt64: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_DOUBLE); - return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); - } - - case NI_VectorT_ConvertToInt64Native: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_DOUBLE); - return gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); - } - - case NI_VectorT_ConvertToUInt32: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_FLOAT); - return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); - } - - case NI_VectorT_ConvertToUInt32Native: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_FLOAT); - return gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); - } - - case NI_VectorT_ConvertToUInt64: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_DOUBLE); - return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); - } - - case NI_VectorT_ConvertToUInt64Native: - { - assert(sig->numArgs == 1); - assert(simdBaseType == TYP_DOUBLE); - return gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); - } - default: { // Some platforms warn about unhandled switch cases @@ -1201,29 +719,12 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { case NI_Vector2_op_Addition: case NI_Vector3_op_Addition: - case NI_VectorT_op_Addition: { return gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize); } - case NI_VectorT_AndNot: - { - return gtNewSimdBinOpNode(GT_AND_NOT, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_op_BitwiseAnd: - { - return gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_op_BitwiseOr: - { - return gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize); - } - case NI_Vector2_CreateBroadcast: case NI_Vector3_CreateBroadcast: - case NI_VectorT_CreateBroadcast: { assert(retType == TYP_VOID); @@ -1232,223 +733,57 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } - case NI_VectorT_CreateSequence: - { - return gtNewSimdCreateSequenceNode(simdType, op1, op2, simdBaseJitType, simdSize); - } - case NI_Vector2_op_Division: case NI_Vector3_op_Division: - case NI_VectorT_op_Division: { return gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize); } case NI_Vector2_Dot: case NI_Vector3_Dot: - case NI_VectorT_Dot: { op1 = gtNewSimdDotProdNode(simdType, op1, op2, simdBaseJitType, simdSize); return gtNewSimdGetElementNode(retType, op1, gtNewIconNode(0), simdBaseJitType, simdSize); } - case NI_VectorT_Equals: - { - return gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize); - } - case NI_Vector2_op_Equality: case NI_Vector3_op_Equality: - case NI_VectorT_op_Equality: { return gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize); } - case NI_VectorT_EqualsAny: - { - return gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_op_ExclusiveOr: - { - return gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseJitType, simdSize); - } - case NI_Vector2_GetElement: case NI_Vector3_GetElement: - case NI_VectorT_GetElement: { return gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize); } - case NI_VectorT_GreaterThan: - { - return gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_GreaterThanAll: - { - return gtNewSimdCmpOpAllNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_GreaterThanAny: - { - return gtNewSimdCmpOpAnyNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_GreaterThanOrEqual: - { - return gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_GreaterThanOrEqualAll: - { - return gtNewSimdCmpOpAllNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_GreaterThanOrEqualAny: - { - return gtNewSimdCmpOpAnyNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize); - } - case NI_Vector2_op_Inequality: case NI_Vector3_op_Inequality: - case NI_VectorT_op_Inequality: { return gtNewSimdCmpOpAnyNode(GT_NE, retType, op1, op2, simdBaseJitType, simdSize); } - case NI_VectorT_LessThan: - { - return gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_LessThanAll: - { - return gtNewSimdCmpOpAllNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_LessThanAny: - { - return gtNewSimdCmpOpAnyNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_LessThanOrEqual: - { - return gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_LessThanOrEqualAll: - { - return gtNewSimdCmpOpAllNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_LessThanOrEqualAny: - { - return gtNewSimdCmpOpAnyNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_LoadUnsafeIndex: - { - GenTree* tmp; - - if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op1 = op1->gtGetOp1(); - } - - tmp = gtNewIconNode(genTypeSize(simdBaseType), op2->TypeGet()); - op2 = gtNewOperNode(GT_MUL, op2->TypeGet(), op2, tmp); - op1 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1, op2); - - return gtNewSimdLoadNode(retType, op1, simdBaseJitType, simdSize); - } - case NI_Vector2_Max: case NI_Vector3_Max: - case NI_VectorT_Max: { return gtNewSimdMaxNode(retType, op1, op2, simdBaseJitType, simdSize); } case NI_Vector2_Min: case NI_Vector3_Min: - case NI_VectorT_Min: { return gtNewSimdMinNode(retType, op1, op2, simdBaseJitType, simdSize); } case NI_Vector2_op_Multiply: case NI_Vector3_op_Multiply: - case NI_VectorT_op_Multiply: { return gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize); } - case NI_VectorT_Narrow: - { - return gtNewSimdNarrowNode(retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_op_LeftShift: - { - return gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_op_RightShift: - { - genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH; - return gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_op_UnsignedRightShift: - { - return gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize); - } - - case NI_VectorT_StoreUnsafe: - { - assert(retType == TYP_VOID); - - if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op2 = op2->gtGetOp1(); - } - - return gtNewSimdStoreNode(op2, op1, simdBaseJitType, simdSize); - } - - case NI_VectorT_StoreAligned: - { - assert(retType == TYP_VOID); - - if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op2 = op2->gtGetOp1(); - } - - return gtNewSimdStoreAlignedNode(op2, op1, simdBaseJitType, simdSize); - } - - case NI_VectorT_StoreAlignedNonTemporal: - { - assert(retType == TYP_VOID); - - if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op2 = op2->gtGetOp1(); - } - - return gtNewSimdStoreNonTemporalNode(op2, op1, simdBaseJitType, simdSize); - } - case NI_Vector2_op_Subtraction: case NI_Vector3_op_Subtraction: - case NI_VectorT_op_Subtraction: { return gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize); } @@ -1504,14 +839,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, switch (intrinsic) { - case NI_VectorT_ConditionalSelect: - { - return gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseJitType, simdSize); - } - case NI_Vector2_FusedMultiplyAdd: case NI_Vector3_FusedMultiplyAdd: - case NI_VectorT_FusedMultiplyAdd: { return gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseJitType, simdSize); } @@ -1558,7 +887,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case NI_Vector2_MultiplyAddEstimate: case NI_Vector3_MultiplyAddEstimate: - case NI_VectorT_MultiplyAddEstimate: { bool isFmaAccelerated = false; @@ -1577,24 +905,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, return gtNewSimdBinOpNode(GT_ADD, retType, mulNode, op3, simdBaseJitType, simdSize); } - case NI_VectorT_StoreUnsafeIndex: - { - assert(retType == TYP_VOID); - GenTree* tmp; - - if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF)) - { - // If what we have is a BYREF, that's what we really want, so throw away the cast. - op2 = op2->gtGetOp1(); - } - - tmp = gtNewIconNode(genTypeSize(simdBaseType), op3->TypeGet()); - op3 = gtNewOperNode(GT_MUL, op3->TypeGet(), op3, tmp); - op2 = gtNewOperNode(GT_ADD, op2->TypeGet(), op2, op3); - - return gtNewSimdStoreNode(op2, op1, simdBaseJitType, simdSize); - } - case NI_Vector2_Create: { assert(retType == TYP_VOID); @@ -1678,7 +988,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case NI_Vector2_WithElement: case NI_Vector3_WithElement: - case NI_VectorT_WithElement: { return gtNewSimdWithElementNode(retType, op1, op2, op3, simdBaseJitType, simdSize); } @@ -1779,6 +1088,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } break; } + + default: + { + break; + } } if (copyBlkDst != nullptr) diff --git a/src/coreclr/jit/simdashwintrinsiclistarm64.h b/src/coreclr/jit/simdashwintrinsiclistarm64.h index 3f73df38f13dd..b10b15d347220 100644 --- a/src/coreclr/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/jit/simdashwintrinsiclistarm64.h @@ -83,81 +83,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, op_UnaryNegation, SIMD_AS_HWINTRINSIC_NM(Vector3, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA ID Name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// Vector Intrinsics -SIMD_AS_HWINTRINSIC_ID(VectorT, Abs, 1, {NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, AndNot, 2, {NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_Ceiling, NI_VectorT_Ceiling}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConditionalSelect, 3, {NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToDouble, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToDouble, NI_VectorT_ConvertToDouble, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToInt32, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToInt32, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToInt32Native, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToInt32Native, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToInt64, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToInt64}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToInt64Native, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToInt64Native}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToSingle, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToSingle, NI_VectorT_ConvertToSingle, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToUInt32, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToUInt32, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToUInt32Native, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToUInt32Native, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToUInt64, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToUInt64}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToUInt64Native, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToUInt64Native}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_NM(VectorT, CreateBroadcast, ".ctor", 2, {NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, CreateSequence, 2, {NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, Dot, 2, {NI_VectorT_Dot, NI_VectorT_Dot, NI_VectorT_Dot, NI_VectorT_Dot, NI_VectorT_Dot, NI_VectorT_Dot, NI_Illegal, NI_Illegal, NI_VectorT_Dot, NI_VectorT_Dot}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, Equals, 2, {NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, EqualsAny, 2, {NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_Floor, NI_VectorT_Floor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, FusedMultiplyAdd, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_FusedMultiplyAdd, NI_VectorT_FusedMultiplyAdd}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2) -SIMD_AS_HWINTRINSIC_ID(VectorT, get_AllBitsSet, 0, {NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, get_Indices, 0, {NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, get_One, 0, {NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GetElement, 2, {NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThan, 2, {NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThanAll, 2, {NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThanAny, 2, {NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThanOrEqual, 2, {NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThanOrEqualAll, 2, {NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThanOrEqualAny, 2, {NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThan, 2, {NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThanAll, 2, {NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThanAny, 2, {NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThanOrEqual, 2, {NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThanOrEqualAll, 2, {NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThanOrEqualAny, 2, {NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LoadAligned, 1, {NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet) -SIMD_AS_HWINTRINSIC_ID(VectorT, LoadAlignedNonTemporal, 1, {NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet) -SIMD_AS_HWINTRINSIC_ID(VectorT, LoadUnsafe, 1, {NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet) -SIMD_AS_HWINTRINSIC_NM(VectorT, LoadUnsafeIndex, "LoadUnsafe", 2, {NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet) -SIMD_AS_HWINTRINSIC_ID(VectorT, Max, 2, {NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, Min, 2, {NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, MultiplyAddEstimate, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_MultiplyAddEstimate, NI_VectorT_MultiplyAddEstimate}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, Narrow, 2, {NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Addition, 2, {NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_BitwiseAnd, 2, {NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_BitwiseOr, 2, {NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_op_Division, NI_VectorT_op_Division}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Equality, 2, {NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_ExclusiveOr, 2, {NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Inequality, 2, {NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_LeftShift, 2, {NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Multiply, 2, {NI_VectorT_op_Multiply, NI_VectorT_op_Multiply, NI_VectorT_op_Multiply, NI_VectorT_op_Multiply, NI_VectorT_op_Multiply, NI_VectorT_op_Multiply, NI_Illegal, NI_Illegal, NI_VectorT_op_Multiply, NI_VectorT_op_Multiply}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_OnesComplement, 1, {NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_RightShift, 2, {NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Subtraction, 2, {NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_UnaryNegation, 1, {NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_UnsignedRightShift, 2, {NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_NM(VectorT, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_Sqrt, NI_VectorT_Sqrt}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, StoreAligned, 2, {NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, StoreAlignedNonTemporal, 2, {NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, StoreUnsafe, 2, {NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_NM(VectorT, StoreUnsafeIndex, "StoreUnsafe", 3, {NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, Sum, 1, {NI_VectorT_Sum, NI_VectorT_Sum, NI_VectorT_Sum, NI_VectorT_Sum, NI_VectorT_Sum, NI_VectorT_Sum, NI_VectorT_Sum, NI_VectorT_Sum, NI_VectorT_Sum, NI_VectorT_Sum}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ToScalar, 1, {NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, WidenLower, 1, {NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, WidenUpper, 1, {NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, WithElement, 3, {NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement}, SimdAsHWIntrinsicFlag::None) - #undef SIMD_AS_HWINTRINSIC_NM #undef SIMD_AS_HWINTRINSIC_ID diff --git a/src/coreclr/jit/simdashwintrinsiclistxarch.h b/src/coreclr/jit/simdashwintrinsiclistxarch.h index f119a4e051d59..a70045bbd8f7f 100644 --- a/src/coreclr/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/jit/simdashwintrinsiclistxarch.h @@ -83,81 +83,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector3, op_UnaryNegation, SIMD_AS_HWINTRINSIC_NM(Vector3, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC_ID(Vector3, WithElement, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_WithElement, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA ID Name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// Vector Intrinsics -SIMD_AS_HWINTRINSIC_ID(VectorT, Abs, 1, {NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs, NI_VectorT_Abs}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, AndNot, 2, {NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot, NI_VectorT_AndNot}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_Ceiling, NI_VectorT_Ceiling}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConditionalSelect, 3, {NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect, NI_VectorT_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToDouble, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToDouble, NI_VectorT_ConvertToDouble, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToInt32, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToInt32, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToInt32Native, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToInt32Native, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToInt64, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToInt64}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToInt64Native, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToInt64Native}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToSingle, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToSingle, NI_VectorT_ConvertToSingle, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToUInt32, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToUInt32, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToUInt32Native, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToUInt32Native, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToUInt64, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToUInt64}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ConvertToUInt64Native, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_ConvertToUInt64Native}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_NM(VectorT, CreateBroadcast, ".ctor", 2, {NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast, NI_VectorT_CreateBroadcast}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, CreateSequence, 2, {NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence, NI_VectorT_CreateSequence}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, Dot, 2, {NI_Illegal, NI_Illegal, NI_VectorT_Dot, NI_VectorT_Dot, NI_VectorT_Dot, NI_VectorT_Dot, NI_Illegal, NI_Illegal, NI_VectorT_Dot, NI_VectorT_Dot}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, Equals, 2, {NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals, NI_VectorT_Equals}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, EqualsAny, 2, {NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny, NI_VectorT_EqualsAny}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_Floor, NI_VectorT_Floor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, FusedMultiplyAdd, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_FusedMultiplyAdd, NI_VectorT_FusedMultiplyAdd}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, get_AllBitsSet, 0, {NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet, NI_VectorT_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, get_Indices, 0, {NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices, NI_VectorT_get_Indices}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, get_One, 0, {NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One, NI_VectorT_get_One}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GetElement, 2, {NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement, NI_VectorT_GetElement}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThan, 2, {NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan, NI_VectorT_GreaterThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThanAll, 2, {NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll, NI_VectorT_GreaterThanAll}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThanAny, 2, {NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny, NI_VectorT_GreaterThanAny}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThanOrEqual, 2, {NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual, NI_VectorT_GreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThanOrEqualAll, 2, {NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll, NI_VectorT_GreaterThanOrEqualAll}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, GreaterThanOrEqualAny, 2, {NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny, NI_VectorT_GreaterThanOrEqualAny}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThan, 2, {NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan, NI_VectorT_LessThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThanAll, 2, {NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll, NI_VectorT_LessThanAll}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThanAny, 2, {NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny, NI_VectorT_LessThanAny}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThanOrEqual, 2, {NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual, NI_VectorT_LessThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThanOrEqualAll, 2, {NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll, NI_VectorT_LessThanOrEqualAll}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LessThanOrEqualAny, 2, {NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny, NI_VectorT_LessThanOrEqualAny}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, LoadAligned, 1, {NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned, NI_VectorT_LoadAligned}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet) -SIMD_AS_HWINTRINSIC_ID(VectorT, LoadAlignedNonTemporal, 1, {NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal, NI_VectorT_LoadAlignedNonTemporal}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet) -SIMD_AS_HWINTRINSIC_ID(VectorT, LoadUnsafe, 1, {NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe, NI_VectorT_LoadUnsafe}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet) -SIMD_AS_HWINTRINSIC_NM(VectorT, LoadUnsafeIndex, "LoadUnsafe", 2, {NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex, NI_VectorT_LoadUnsafeIndex}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet) -SIMD_AS_HWINTRINSIC_ID(VectorT, Max, 2, {NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max, NI_VectorT_Max}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, Min, 2, {NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min, NI_VectorT_Min}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, MultiplyAddEstimate, 3, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_MultiplyAddEstimate, NI_VectorT_MultiplyAddEstimate}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, Narrow, 2, {NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow, NI_VectorT_Narrow}, SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Addition, 2, {NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition, NI_VectorT_op_Addition}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_BitwiseAnd, 2, {NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd, NI_VectorT_op_BitwiseAnd}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_BitwiseOr, 2, {NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr, NI_VectorT_op_BitwiseOr}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_op_Division, NI_VectorT_op_Division}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Equality, 2, {NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality, NI_VectorT_op_Equality}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_ExclusiveOr, 2, {NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr, NI_VectorT_op_ExclusiveOr}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Inequality, 2, {NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality, NI_VectorT_op_Inequality}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_LeftShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift, NI_VectorT_op_LeftShift}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_VectorT_op_Multiply, NI_VectorT_op_Multiply, NI_VectorT_op_Multiply, NI_VectorT_op_Multiply, NI_Illegal, NI_Illegal, NI_VectorT_op_Multiply, NI_VectorT_op_Multiply}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_OnesComplement, 2, {NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement, NI_VectorT_op_OnesComplement}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_RightShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift, NI_VectorT_op_RightShift}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_Subtraction, 2, {NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction, NI_VectorT_op_Subtraction}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_UnaryNegation, 1, {NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation, NI_VectorT_op_UnaryNegation}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, op_UnsignedRightShift, 2, {NI_Illegal, NI_Illegal, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift, NI_VectorT_op_UnsignedRightShift}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_NM(VectorT, Sqrt, "SquareRoot", 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_VectorT_Sqrt, NI_VectorT_Sqrt}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, StoreAligned, 2, {NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned, NI_VectorT_StoreAligned}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, StoreAlignedNonTemporal, 2, {NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal, NI_VectorT_StoreAlignedNonTemporal}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, StoreUnsafe, -1, {NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe, NI_VectorT_StoreUnsafe}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_NM(VectorT, StoreUnsafeIndex, "StoreUnsafe", 3, {NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex, NI_VectorT_StoreUnsafeIndex}, SimdAsHWIntrinsicFlag::SpillSideEffectsOp1) -SIMD_AS_HWINTRINSIC_ID(VectorT, Sum, 1, {NI_Illegal, NI_Illegal, NI_VectorT_Sum, NI_VectorT_Sum, NI_VectorT_Sum, NI_VectorT_Sum, NI_Illegal, NI_Illegal, NI_VectorT_Sum, NI_VectorT_Sum}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, ToScalar, 1, {NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar, NI_VectorT_ToScalar}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, WidenLower, 1, {NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower, NI_VectorT_WidenLower}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, WidenUpper, 1, {NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper, NI_VectorT_WidenUpper}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC_ID(VectorT, WithElement, 3, {NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement, NI_VectorT_WithElement}, SimdAsHWIntrinsicFlag::None) - #undef SIMD_AS_HWINTRINSIC_NM #undef SIMD_AS_HWINTRINSIC_ID diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index 392cd58611e6e..f230a7299dc2c 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -180,13 +180,13 @@ ValueNumFuncDef(SimdType, 2, false, false, false, false) // A value number func #define HARDWARE_INTRINSIC(isa, name, size, argCount, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ ValueNumFuncDef(HWI_##isa##_##name, argCount, ((flag) & HW_Flag_Commutative) >> 0, false, false, extra) // All of the HARDWARE_INTRINSICS for x86/x64 #include "hwintrinsiclistxarch.h" -#define VNF_HWI_FIRST VNF_HWI_Vector128_Abs +#define VNF_HWI_FIRST VNF_HWI_Vector128_AsVector #elif defined (TARGET_ARM64) #define HARDWARE_INTRINSIC(isa, name, size, argCount, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ ValueNumFuncDef(HWI_##isa##_##name, argCount, ((flag) & HW_Flag_Commutative) >> 0, false, false, extra) // All of the HARDWARE_INTRINSICS for arm64 #include "hwintrinsiclistarm64.h" -#define VNF_HWI_FIRST VNF_HWI_Vector64_Abs +#define VNF_HWI_FIRST VNF_HWI_Vector64_Ceiling #elif defined (TARGET_ARM) // No Hardware Intrinsics on ARM32 diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs index 41961bbb54c79..2111de2ab68b2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs @@ -4,8 +4,8 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using static Interop; + +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') namespace System.Numerics { @@ -32,25 +32,18 @@ public static bool IsHardwareAccelerated [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Abs(Vector value) { - if ((typeof(T) == typeof(byte)) - || (typeof(T) == typeof(ushort)) - || (typeof(T) == typeof(uint)) - || (typeof(T) == typeof(ulong)) - || (typeof(T) == typeof(nuint))) + if (sizeof(Vector) == 64) + { + return Vector512.Abs(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) { - return value; + return Vector256.Abs(value.AsVector256()).AsVector(); } else { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) - { - T element = Scalar.Abs(value.GetElementUnsafe(index)); - result.SetElementUnsafe(index, element); - } - - return result; + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Abs(value.AsVector128()).AsVector(); } } @@ -68,7 +61,23 @@ public static Vector Abs(Vector value) /// The type of the elements in the vector. /// The bitwise-and of and the ones-complement of . [Intrinsic] - public static Vector AndNot(Vector left, Vector right) => left & ~right; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector AndNot(Vector left, Vector right) + { + if (sizeof(Vector) == 64) + { + return Vector512.AndNot(left.AsVector512(), right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.AndNot(left.AsVector256(), right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.AndNot(left.AsVector128(), right.AsVector128()).AsVector(); + } + } /// Reinterprets a as a new . /// The type of the input vector. @@ -215,15 +224,19 @@ public static Vector As(this Vector vector) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Ceiling(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - double element = Scalar.Ceiling(value.GetElementUnsafe(index)); - result.SetElementUnsafe(index, element); + return Vector512.Ceiling(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Ceiling(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Ceiling(value.AsVector128()).AsVector(); } - - return result; } /// Computes the ceiling of each element in a vector. @@ -234,15 +247,19 @@ public static Vector Ceiling(Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Ceiling(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - float element = Scalar.Ceiling(value.GetElementUnsafe(index)); - result.SetElementUnsafe(index, element); + return Vector512.Ceiling(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Ceiling(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Ceiling(value.AsVector128()).AsVector(); } - - return result; } /// Conditionally selects a value from two vectors on a bitwise basis. @@ -253,7 +270,22 @@ public static Vector Ceiling(Vector value) /// A vector whose bits come from or based on the value of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector ConditionalSelect(Vector condition, Vector left, Vector right) => (left & condition) | (right & ~condition); + public static Vector ConditionalSelect(Vector condition, Vector left, Vector right) + { + if (sizeof(Vector) == 64) + { + return Vector512.ConditionalSelect(condition.AsVector512(), left.AsVector512(), right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.ConditionalSelect(condition.AsVector256(), left.AsVector256(), right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.ConditionalSelect(condition.AsVector128(), left.AsVector128(), right.AsVector128()).AsVector(); + } + } /// Conditionally selects a value from two vectors on a bitwise basis. /// The mask that is used to select a value from or . @@ -278,14 +310,17 @@ public static Vector Ceiling(Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToDouble(Vector value) { - if (Avx2.IsSupported) + if (sizeof(Vector) == 64) + { + return Vector512.ConvertToDouble(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) { - Debug.Assert(Vector.Count == Vector256.Count); return Vector256.ConvertToDouble(value.AsVector256()).AsVector(); } else { - Debug.Assert(Vector.Count == Vector128.Count); + Debug.Assert(sizeof(Vector) == 16); return Vector128.ConvertToDouble(value.AsVector128()).AsVector(); } } @@ -298,14 +333,17 @@ public static Vector ConvertToDouble(Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToDouble(Vector value) { - if (Avx2.IsSupported) + if (sizeof(Vector) == 64) + { + return Vector512.ConvertToDouble(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) { - Debug.Assert(Vector.Count == Vector256.Count); return Vector256.ConvertToDouble(value.AsVector256()).AsVector(); } else { - Debug.Assert(Vector.Count == Vector128.Count); + Debug.Assert(sizeof(Vector) == 16); return Vector128.ConvertToDouble(value.AsVector128()).AsVector(); } } @@ -314,85 +352,110 @@ public static Vector ConvertToDouble(Vector value) /// The vector to convert. /// The converted vector. [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToInt32(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - int element = float.ConvertToInteger(value.GetElementUnsafe(i)); - result.SetElementUnsafe(i, element); + return Vector512.ConvertToInt32(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.ConvertToInt32(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.ConvertToInt32(value.AsVector128()).AsVector(); } - - return result; } /// Converts a to a using platform specific behavior on overflow. /// The vector to convert. /// The converted vector. [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToInt32Native(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - int element = float.ConvertToIntegerNative(value.GetElementUnsafe(i)); - result.SetElementUnsafe(i, element); + return Vector512.ConvertToInt32Native(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.ConvertToInt32Native(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.ConvertToInt32Native(value.AsVector128()).AsVector(); } - - return result; } /// Converts a to a using saturation on overflow. /// The vector to convert. /// The converted vector. [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToInt64(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - long element = double.ConvertToInteger(value.GetElementUnsafe(i)); - result.SetElementUnsafe(i, element); + return Vector512.ConvertToInt64(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.ConvertToInt64(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.ConvertToInt64(value.AsVector128()).AsVector(); } - - return result; } /// Converts a to a using platform specific behavior on overflow. /// The vector to convert. /// The converted vector. [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToInt64Native(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - long element = double.ConvertToIntegerNative(value.GetElementUnsafe(i)); - result.SetElementUnsafe(i, element); + return Vector512.ConvertToInt64Native(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.ConvertToInt64Native(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.ConvertToInt64Native(value.AsVector128()).AsVector(); } - - return result; } /// Converts a to a . /// The vector to convert. /// The converted vector. [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToSingle(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - float element = value.GetElementUnsafe(i); - result.SetElementUnsafe(i, element); + return Vector512.ConvertToSingle(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.ConvertToSingle(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.ConvertToSingle(value.AsVector128()).AsVector(); } - - return result; } /// Converts a to a . @@ -403,14 +466,17 @@ public static Vector ConvertToSingle(Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToSingle(Vector value) { - if (Avx2.IsSupported) + if (sizeof(Vector) == 64) + { + return Vector512.ConvertToSingle(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) { - Debug.Assert(Vector.Count == Vector256.Count); return Vector256.ConvertToSingle(value.AsVector256()).AsVector(); } else { - Debug.Assert(Vector.Count == Vector128.Count); + Debug.Assert(sizeof(Vector) == 16); return Vector128.ConvertToSingle(value.AsVector128()).AsVector(); } } @@ -420,17 +486,22 @@ public static Vector ConvertToSingle(Vector value) /// The converted vector. [Intrinsic] [CLSCompliant(false)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToUInt32(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - uint element = float.ConvertToInteger(value.GetElementUnsafe(i)); - result.SetElementUnsafe(i, element); + return Vector512.ConvertToUInt32(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.ConvertToUInt32(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.ConvertToUInt32(value.AsVector128()).AsVector(); } - - return result; } /// Converts a to a using platform specific behavior on overflow. @@ -438,17 +509,22 @@ public static Vector ConvertToUInt32(Vector value) /// The converted vector. [Intrinsic] [CLSCompliant(false)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToUInt32Native(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - uint element = float.ConvertToIntegerNative(value.GetElementUnsafe(i)); - result.SetElementUnsafe(i, element); + return Vector512.ConvertToUInt32Native(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.ConvertToUInt32Native(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.ConvertToUInt32Native(value.AsVector128()).AsVector(); } - - return result; } /// Converts a to a using saturation on overflow. @@ -456,17 +532,22 @@ public static Vector ConvertToUInt32Native(Vector value) /// The converted vector. [Intrinsic] [CLSCompliant(false)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToUInt64(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - ulong element = double.ConvertToInteger(value.GetElementUnsafe(i)); - result.SetElementUnsafe(i, element); + return Vector512.ConvertToUInt64(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.ConvertToUInt64(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.ConvertToUInt64(value.AsVector128()).AsVector(); } - - return result; } /// Converts a to a using platform specific behavior on overflow. @@ -474,17 +555,22 @@ public static Vector ConvertToUInt64(Vector value) /// The converted vector. [Intrinsic] [CLSCompliant(false)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector ConvertToUInt64Native(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - ulong element = double.ConvertToIntegerNative(value.GetElementUnsafe(i)); - result.SetElementUnsafe(i, element); + return Vector512.ConvertToUInt64Native(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.ConvertToUInt64Native(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.ConvertToUInt64Native(value.AsVector128()).AsVector(); } - - return result; } /// Creates a new instance where the elements begin at a specified value and which are spaced apart according to another specified value. @@ -494,7 +580,22 @@ public static Vector ConvertToUInt64Native(Vector value) /// A new instance with the first element initialized to and each subsequent element initialized to the the value of the previous element plus . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector CreateSequence(T start, T step) => (Vector.Indices * step) + new Vector(start); + public static Vector CreateSequence(T start, T step) + { + if (sizeof(Vector) == 64) + { + return Vector512.CreateSequence(start, step).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.CreateSequence(start, step).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.CreateSequence(start, step).AsVector(); + } + } /// Divides two vectors to compute their quotient. /// The vector that will be divided by . @@ -518,7 +619,23 @@ public static Vector ConvertToUInt64Native(Vector value) /// The type of the elements in the vector. /// The dot product of and . [Intrinsic] - public static T Dot(Vector left, Vector right) => Sum(left * right); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static T Dot(Vector left, Vector right) + { + if (sizeof(Vector) == 64) + { + return Vector512.Dot(left.AsVector512(), right.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Dot(left.AsVector256(), right.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Dot(left.AsVector128(), right.AsVector128()); + } + } /// Compares two vectors to determine if they are equal on a per-element basis. /// The vector to compare with . @@ -529,15 +646,19 @@ public static Vector ConvertToUInt64Native(Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Equals(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.Equals(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar.AllBitsSet : default!; - result.SetElementUnsafe(index, value); + return Vector512.Equals(left.AsVector512(), right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Equals(left.AsVector256(), right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Equals(left.AsVector128(), right.AsVector128()).AsVector(); } - - return result; } /// Compares two vectors to determine if they are equal on a per-element basis. @@ -585,15 +706,19 @@ public static Vector Equals(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool EqualsAny(Vector left, Vector right) { - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - if (Scalar.Equals(left.GetElementUnsafe(index), right.GetElementUnsafe(index))) - { - return true; - } + return Vector512.EqualsAny(left.AsVector512(), right.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.EqualsAny(left.AsVector256(), right.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.EqualsAny(left.AsVector128(), right.AsVector128()); } - - return false; } /// Computes the floor of each element in a vector. @@ -604,15 +729,19 @@ public static bool EqualsAny(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Floor(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - double element = Scalar.Floor(value.GetElementUnsafe(index)); - result.SetElementUnsafe(index, element); + return Vector512.Floor(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Floor(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Floor(value.AsVector128()).AsVector(); } - - return result; } /// Computes the floor of each element in a vector. @@ -623,15 +752,19 @@ public static Vector Floor(Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Floor(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - float element = Scalar.Floor(value.GetElementUnsafe(index)); - result.SetElementUnsafe(index, element); + return Vector512.Floor(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Floor(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Floor(value.AsVector128()).AsVector(); } - - return result; } /// Computes ( * ) + , rounded as one ternary operation. @@ -647,15 +780,19 @@ public static Vector Floor(Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector FusedMultiplyAdd(Vector left, Vector right, Vector addend) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - double value = double.FusedMultiplyAdd(left.GetElementUnsafe(index), right.GetElementUnsafe(index), addend.GetElementUnsafe(index)); - result.SetElementUnsafe(index, value); + return Vector512.FusedMultiplyAdd(left.AsVector512(), right.AsVector512(), addend.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.FusedMultiplyAdd(left.AsVector256(), right.AsVector256(), addend.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.FusedMultiplyAdd(left.AsVector128(), right.AsVector128(), addend.AsVector128()).AsVector(); } - - return result; } /// Computes ( * ) + , rounded as one ternary operation. @@ -671,15 +808,19 @@ public static Vector FusedMultiplyAdd(Vector left, Vector FusedMultiplyAdd(Vector left, Vector right, Vector addend) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - float value = float.FusedMultiplyAdd(left.GetElementUnsafe(index), right.GetElementUnsafe(index), addend.GetElementUnsafe(index)); - result.SetElementUnsafe(index, value); + return Vector512.FusedMultiplyAdd(left.AsVector512(), right.AsVector512(), addend.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.FusedMultiplyAdd(left.AsVector256(), right.AsVector256(), addend.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.FusedMultiplyAdd(left.AsVector128(), right.AsVector128(), addend.AsVector128()).AsVector(); } - - return result; } /// Gets the element at the specified index. @@ -693,12 +834,19 @@ public static Vector FusedMultiplyAdd(Vector left, Vector r [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T GetElement(this Vector vector, int index) { - if ((uint)(index) >= (uint)(Vector.Count)) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); + return vector.AsVector512().GetElement(index); + } + else if (sizeof(Vector) == 32) + { + return vector.AsVector256().GetElement(index); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return vector.AsVector128().GetElement(index); } - - return vector.GetElementUnsafe(index); } /// Compares two vectors to determine which is greater on a per-element basis. @@ -710,15 +858,19 @@ public static T GetElement(this Vector vector, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector GreaterThan(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.GreaterThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar.AllBitsSet : default!; - result.SetElementUnsafe(index, value); + return Vector512.GreaterThan(left.AsVector512(), right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.GreaterThan(left.AsVector256(), right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.GreaterThan(left.AsVector128(), right.AsVector128()).AsVector(); } - - return result; } /// Compares two vectors to determine which is greater on a per-element basis. @@ -758,15 +910,19 @@ public static Vector GreaterThan(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool GreaterThanAll(Vector left, Vector right) { - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - if (!Scalar.GreaterThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index))) - { - return false; - } + return Vector512.GreaterThanAll(left.AsVector512(), right.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.GreaterThanAll(left.AsVector256(), right.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.GreaterThanAll(left.AsVector128(), right.AsVector128()); } - - return true; } /// Compares two vectors to determine if any elements are greater. @@ -778,15 +934,19 @@ public static bool GreaterThanAll(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool GreaterThanAny(Vector left, Vector right) { - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - if (Scalar.GreaterThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index))) - { - return true; - } + return Vector512.GreaterThanAny(left.AsVector512(), right.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.GreaterThanAny(left.AsVector256(), right.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.GreaterThanAny(left.AsVector128(), right.AsVector128()); } - - return false; } /// Compares two vectors to determine which is greater or equal on a per-element basis. @@ -798,15 +958,19 @@ public static bool GreaterThanAny(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector GreaterThanOrEqual(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.GreaterThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar.AllBitsSet : default!; - result.SetElementUnsafe(index, value); + return Vector512.GreaterThanOrEqual(left.AsVector512(), right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.GreaterThanOrEqual(left.AsVector256(), right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.GreaterThanOrEqual(left.AsVector128(), right.AsVector128()).AsVector(); } - - return result; } /// Compares two vectors to determine which is greater or equal on a per-element basis. @@ -846,15 +1010,19 @@ public static Vector GreaterThanOrEqual(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool GreaterThanOrEqualAll(Vector left, Vector right) { - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - if (!Scalar.GreaterThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index))) - { - return false; - } + return Vector512.GreaterThanOrEqualAll(left.AsVector512(), right.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.GreaterThanOrEqualAll(left.AsVector256(), right.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.GreaterThanOrEqualAll(left.AsVector128(), right.AsVector128()); } - - return true; } /// Compares two vectors to determine if any elements are greater or equal. @@ -866,15 +1034,19 @@ public static bool GreaterThanOrEqualAll(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool GreaterThanOrEqualAny(Vector left, Vector right) { - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - if (Scalar.GreaterThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index))) - { - return true; - } + return Vector512.GreaterThanOrEqualAny(left.AsVector512(), right.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.GreaterThanOrEqualAny(left.AsVector256(), right.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.GreaterThanOrEqualAny(left.AsVector128(), right.AsVector128()); } - - return false; } /// Compares two vectors to determine which is less on a per-element basis. @@ -886,15 +1058,19 @@ public static bool GreaterThanOrEqualAny(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector LessThan(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.LessThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar.AllBitsSet : default!; - result.SetElementUnsafe(index, value); + return Vector512.LessThan(left.AsVector512(), right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.LessThan(left.AsVector256(), right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.LessThan(left.AsVector128(), right.AsVector128()).AsVector(); } - - return result; } /// Compares two vectors to determine which is less on a per-element basis. @@ -934,15 +1110,19 @@ public static Vector LessThan(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool LessThanAll(Vector left, Vector right) { - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - if (!Scalar.LessThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index))) - { - return false; - } + return Vector512.LessThanAll(left.AsVector512(), right.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.LessThanAll(left.AsVector256(), right.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.LessThanAll(left.AsVector128(), right.AsVector128()); } - - return true; } /// Compares two vectors to determine if any elements are less. @@ -954,15 +1134,19 @@ public static bool LessThanAll(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool LessThanAny(Vector left, Vector right) { - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - if (Scalar.LessThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index))) - { - return true; - } + return Vector512.LessThanAny(left.AsVector512(), right.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.LessThanAny(left.AsVector256(), right.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.LessThanAny(left.AsVector128(), right.AsVector128()); } - - return false; } /// Compares two vectors to determine which is less or equal on a per-element basis. @@ -974,15 +1158,19 @@ public static bool LessThanAny(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector LessThanOrEqual(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.LessThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar.AllBitsSet : default!; - result.SetElementUnsafe(index, value); + return Vector512.LessThanOrEqual(left.AsVector512(), right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.LessThanOrEqual(left.AsVector256(), right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.LessThanOrEqual(left.AsVector128(), right.AsVector128()).AsVector(); } - - return result; } /// Compares two vectors to determine which is less or equal on a per-element basis. @@ -1022,15 +1210,19 @@ public static Vector LessThanOrEqual(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool LessThanOrEqualAll(Vector left, Vector right) { - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - if (!Scalar.LessThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index))) - { - return false; - } + return Vector512.LessThanOrEqualAll(left.AsVector512(), right.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.LessThanOrEqualAll(left.AsVector256(), right.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.LessThanOrEqualAll(left.AsVector128(), right.AsVector128()); } - - return true; } /// Compares two vectors to determine if any elements are less or equal. @@ -1042,18 +1234,21 @@ public static bool LessThanOrEqualAll(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool LessThanOrEqualAny(Vector left, Vector right) { - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - if (Scalar.LessThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index))) - { - return true; - } + return Vector512.LessThanOrEqualAny(left.AsVector512(), right.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.LessThanOrEqualAny(left.AsVector256(), right.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.LessThanOrEqualAny(left.AsVector128(), right.AsVector128()); } - - return false; } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Loads a vector from the given source. /// The type of the elements in the vector. /// The source from which the vector will be loaded. @@ -1061,7 +1256,7 @@ public static bool LessThanOrEqualAny(Vector left, Vector right) /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static Vector Load(T* source) => LoadUnsafe(ref *source); + public static Vector Load(T* source) => LoadUnsafe(in *source); /// Loads a vector from the given aligned source. /// The type of the elements in the vector. @@ -1073,14 +1268,19 @@ public static bool LessThanOrEqualAny(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector LoadAligned(T* source) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if (((nuint)(source) % Alignment) != 0) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowAccessViolationException(); + return Vector512.LoadAligned(source).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.LoadAligned(source).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.LoadAligned(source).AsVector(); } - - return *(Vector*)source; } /// Loads a vector from the given aligned source. @@ -1091,8 +1291,22 @@ public static Vector LoadAligned(T* source) /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static Vector LoadAlignedNonTemporal(T* source) => LoadAligned(source); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + public static Vector LoadAlignedNonTemporal(T* source) + { + if (sizeof(Vector) == 64) + { + return Vector512.LoadAlignedNonTemporal(source).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.LoadAlignedNonTemporal(source).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.LoadAlignedNonTemporal(source).AsVector(); + } + } /// Loads a vector from the given source. /// The type of the elements in the vector. @@ -1103,9 +1317,19 @@ public static Vector LoadAligned(T* source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector LoadUnsafe(ref readonly T source) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - ref readonly byte address = ref Unsafe.As(ref Unsafe.AsRef(in source)); - return Unsafe.ReadUnaligned>(in address); + if (sizeof(Vector) == 64) + { + return Vector512.LoadUnsafe(in source).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.LoadUnsafe(in source).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.LoadUnsafe(in source).AsVector(); + } } /// Loads a vector from the given source and element offset. @@ -1119,9 +1343,19 @@ public static Vector LoadUnsafe(ref readonly T source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector LoadUnsafe(ref readonly T source, nuint elementOffset) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - ref readonly byte address = ref Unsafe.As(ref Unsafe.Add(ref Unsafe.AsRef(in source), (nint)elementOffset)); - return Unsafe.ReadUnaligned>(in address); + if (sizeof(Vector) == 64) + { + return Vector512.LoadUnsafe(in source, elementOffset).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.LoadUnsafe(in source, elementOffset).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.LoadUnsafe(in source, elementOffset).AsVector(); + } } /// Computes the maximum of two vectors on a per-element basis. @@ -1133,15 +1367,19 @@ public static Vector LoadUnsafe(ref readonly T source, nuint elementOffset [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Max(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.GreaterThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? left.GetElementUnsafe(index) : right.GetElementUnsafe(index); - result.SetElementUnsafe(index, value); + return Vector512.Max(left.AsVector512(), right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Max(left.AsVector256(), right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Max(left.AsVector128(), right.AsVector128()).AsVector(); } - - return result; } /// Computes the minimum of two vectors on a per-element basis. @@ -1153,15 +1391,19 @@ public static Vector Max(Vector left, Vector right) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Min(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.LessThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? left.GetElementUnsafe(index) : right.GetElementUnsafe(index); - result.SetElementUnsafe(index, value); + return Vector512.Min(left.AsVector512(), right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Min(left.AsVector256(), right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Min(left.AsVector128(), right.AsVector128()).AsVector(); } - - return result; } /// Multiplies two vectors to compute their element-wise product. @@ -1186,22 +1428,26 @@ public static Vector Min(Vector left, Vector right) /// The type of the elements in the vector. /// The product of and . [Intrinsic] - public static Vector Multiply(T left, Vector right) => left * right; + public static Vector Multiply(T left, Vector right) => right * left; /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector MultiplyAddEstimate(Vector left, Vector right, Vector addend) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - double element = double.MultiplyAddEstimate(left.GetElementUnsafe(index), right.GetElementUnsafe(index), addend.GetElementUnsafe(index)); - result.SetElementUnsafe(index, element); + return Vector512.MultiplyAddEstimate(left.AsVector512(), right.AsVector512(), addend.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.MultiplyAddEstimate(left.AsVector256(), right.AsVector256(), addend.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.MultiplyAddEstimate(left.AsVector128(), right.AsVector128(), addend.AsVector128()).AsVector(); } - - return result; } /// @@ -1209,15 +1455,19 @@ public static Vector MultiplyAddEstimate(Vector left, Vector MultiplyAddEstimate(Vector left, Vector right, Vector addend) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - float element = float.MultiplyAddEstimate(left.GetElementUnsafe(index), right.GetElementUnsafe(index), addend.GetElementUnsafe(index)); - result.SetElementUnsafe(index, element); + return Vector512.MultiplyAddEstimate(left.AsVector512(), right.AsVector512(), addend.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.MultiplyAddEstimate(left.AsVector256(), right.AsVector256(), addend.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.MultiplyAddEstimate(left.AsVector128(), right.AsVector128(), addend.AsVector128()).AsVector(); } - - return result; } /// Narrows two instances into one . @@ -1228,21 +1478,19 @@ public static Vector MultiplyAddEstimate(Vector left, Vector Narrow(Vector low, Vector high) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - float value = (float)low.GetElementUnsafe(i); - result.SetElementUnsafe(i, value); + return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector(); } - - for (int i = Vector.Count; i < Vector.Count; i++) + else if (sizeof(Vector) == 32) { - float value = (float)high.GetElementUnsafe(i - Vector.Count); - result.SetElementUnsafe(i, value); + return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector(); } - - return result; } /// Narrows two instances into one . @@ -1254,21 +1502,19 @@ public static Vector Narrow(Vector low, Vector high) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Narrow(Vector low, Vector high) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - sbyte value = (sbyte)low.GetElementUnsafe(i); - result.SetElementUnsafe(i, value); + return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector(); } - - for (int i = Vector.Count; i < Vector.Count; i++) + else if (sizeof(Vector) == 32) { - sbyte value = (sbyte)high.GetElementUnsafe(i - Vector.Count); - result.SetElementUnsafe(i, value); + return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector(); } - - return result; } /// Narrows two instances into one . @@ -1279,21 +1525,19 @@ public static Vector Narrow(Vector low, Vector high) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Narrow(Vector low, Vector high) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - short value = (short)low.GetElementUnsafe(i); - result.SetElementUnsafe(i, value); + return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector(); } - - for (int i = Vector.Count; i < Vector.Count; i++) + else if (sizeof(Vector) == 32) { - short value = (short)high.GetElementUnsafe(i - Vector.Count); - result.SetElementUnsafe(i, value); + return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector(); } - - return result; } /// Narrows two instances into one . @@ -1304,21 +1548,19 @@ public static Vector Narrow(Vector low, Vector high) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Narrow(Vector low, Vector high) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - int value = (int)low.GetElementUnsafe(i); - result.SetElementUnsafe(i, value); + return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector(); } - - for (int i = Vector.Count; i < Vector.Count; i++) + else if (sizeof(Vector) == 32) { - int value = (int)high.GetElementUnsafe(i - Vector.Count); - result.SetElementUnsafe(i, value); + return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector(); } - - return result; } /// Narrows two instances into one . @@ -1330,21 +1572,19 @@ public static Vector Narrow(Vector low, Vector high) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Narrow(Vector low, Vector high) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - byte value = (byte)low.GetElementUnsafe(i); - result.SetElementUnsafe(i, value); + return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector(); } - - for (int i = Vector.Count; i < Vector.Count; i++) + else if (sizeof(Vector) == 32) { - byte value = (byte)high.GetElementUnsafe(i - Vector.Count); - result.SetElementUnsafe(i, value); + return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector(); } - - return result; } /// Narrows two instances into one . @@ -1356,21 +1596,19 @@ public static Vector Narrow(Vector low, Vector high) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Narrow(Vector low, Vector high) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - ushort value = (ushort)low.GetElementUnsafe(i); - result.SetElementUnsafe(i, value); + return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector(); } - - for (int i = Vector.Count; i < Vector.Count; i++) + else if (sizeof(Vector) == 32) { - ushort value = (ushort)high.GetElementUnsafe(i - Vector.Count); - result.SetElementUnsafe(i, value); + return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector(); } - - return result; } /// Narrows two instances into one . @@ -1382,21 +1620,19 @@ public static Vector Narrow(Vector low, Vector high) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector Narrow(Vector low, Vector high) { - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - uint value = (uint)low.GetElementUnsafe(i); - result.SetElementUnsafe(i, value); + return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector(); } - - for (int i = Vector.Count; i < Vector.Count; i++) + else if (sizeof(Vector) == 32) { - uint value = (uint)high.GetElementUnsafe(i - Vector.Count); - result.SetElementUnsafe(i, value); + return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector(); } - - return result; } /// Computes the unary negation of a vector. @@ -1607,18 +1843,21 @@ public static Vector Narrow(Vector low, Vector high) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector SquareRoot(Vector value) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - T element = Scalar.Sqrt(value.GetElementUnsafe(index)); - result.SetElementUnsafe(index, element); + return Vector512.Sqrt(value.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Sqrt(value.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Sqrt(value.AsVector128()).AsVector(); } - - return result; } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Stores a vector at the given destination. /// The type of the elements in the vector. /// The vector that will be stored. @@ -1638,14 +1877,19 @@ public static Vector SquareRoot(Vector value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void StoreAligned(this Vector source, T* destination) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if (((nuint)destination % Alignment) != 0) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowAccessViolationException(); + source.AsVector512().StoreAligned(destination); + } + else if (sizeof(Vector) == 32) + { + source.AsVector256().StoreAligned(destination); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + source.AsVector128().StoreAligned(destination); } - - *(Vector*)destination = source; } /// Stores a vector at the given aligned destination. @@ -1656,8 +1900,22 @@ public static void StoreAligned(this Vector source, T* destination) /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static void StoreAlignedNonTemporal(this Vector source, T* destination) => source.StoreAligned(destination); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + public static void StoreAlignedNonTemporal(this Vector source, T* destination) + { + if (sizeof(Vector) == 64) + { + source.AsVector512().StoreAlignedNonTemporal(destination); + } + else if (sizeof(Vector) == 32) + { + source.AsVector256().StoreAlignedNonTemporal(destination); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + source.AsVector128().StoreAlignedNonTemporal(destination); + } + } /// Stores a vector at the given destination. /// The type of the elements in the vector. @@ -1668,9 +1926,19 @@ public static void StoreAligned(this Vector source, T* destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void StoreUnsafe(this Vector source, ref T destination) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - ref byte address = ref Unsafe.As(ref destination); - Unsafe.WriteUnaligned(ref address, source); + if (sizeof(Vector) == 64) + { + source.AsVector512().StoreUnsafe(ref destination); + } + else if (sizeof(Vector) == 32) + { + source.AsVector256().StoreUnsafe(ref destination); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + source.AsVector128().StoreUnsafe(ref destination); + } } /// Stores a vector at the given destination. @@ -1684,9 +1952,19 @@ public static void StoreUnsafe(this Vector source, ref T destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void StoreUnsafe(this Vector source, ref T destination, nuint elementOffset) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - destination = ref Unsafe.Add(ref destination, (nint)elementOffset); - Unsafe.WriteUnaligned(ref Unsafe.As(ref destination), source); + if (sizeof(Vector) == 64) + { + source.AsVector512().StoreUnsafe(ref destination, elementOffset); + } + else if (sizeof(Vector) == 32) + { + source.AsVector256().StoreUnsafe(ref destination, elementOffset); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + source.AsVector128().StoreUnsafe(ref destination, elementOffset); + } } /// Subtracts two vectors to compute their difference. @@ -1704,14 +1982,19 @@ public static void StoreUnsafe(this Vector source, ref T destination, nuin [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Sum(Vector value) { - T sum = default!; - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - sum = Scalar.Add(sum, value.GetElementUnsafe(index)); + return Vector512.Sum(value.AsVector512()); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Sum(value.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Sum(value.AsVector128()); } - - return sum; } /// Converts the given vector to a scalar containing the value of the first element. @@ -1720,10 +2003,22 @@ public static T Sum(Vector value) /// A scalar containing the value of the first element. /// The type of () is not supported. [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T ToScalar(this Vector vector) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - return vector.GetElementUnsafe(0); + if (sizeof(Vector) == 64) + { + return vector.AsVector512().ToScalar(); + } + else if (sizeof(Vector) == 32) + { + return vector.AsVector256().ToScalar(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return vector.AsVector128().ToScalar(); + } } /// Widens a into two . @@ -1815,15 +2110,19 @@ public static void Widen(Vector source, out Vector low, out Vector< [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenLower(Vector source) { - Unsafe.SkipInit(out Vector lower); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - ushort value = source.GetElementUnsafe(i); - lower.SetElementUnsafe(i, value); + return Vector512.WidenLower(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenLower(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenLower(source.AsVector128()).AsVector(); } - - return lower; } /// Widens the lower half of a into a . @@ -1833,15 +2132,19 @@ public static Vector WidenLower(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenLower(Vector source) { - Unsafe.SkipInit(out Vector lower); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - int value = source.GetElementUnsafe(i); - lower.SetElementUnsafe(i, value); + return Vector512.WidenLower(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenLower(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenLower(source.AsVector128()).AsVector(); } - - return lower; } /// Widens the lower half of a into a . @@ -1851,15 +2154,19 @@ public static Vector WidenLower(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenLower(Vector source) { - Unsafe.SkipInit(out Vector lower); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - long value = source.GetElementUnsafe(i); - lower.SetElementUnsafe(i, value); + return Vector512.WidenLower(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenLower(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenLower(source.AsVector128()).AsVector(); } - - return lower; } /// Widens the lower half of a into a . @@ -1870,15 +2177,19 @@ public static Vector WidenLower(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenLower(Vector source) { - Unsafe.SkipInit(out Vector lower); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - short value = source.GetElementUnsafe(i); - lower.SetElementUnsafe(i, value); + return Vector512.WidenLower(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenLower(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenLower(source.AsVector128()).AsVector(); } - - return lower; } /// Widens the lower half of a into a . @@ -1888,15 +2199,19 @@ public static Vector WidenLower(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenLower(Vector source) { - Unsafe.SkipInit(out Vector lower); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - double value = source.GetElementUnsafe(i); - lower.SetElementUnsafe(i, value); + return Vector512.WidenLower(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenLower(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenLower(source.AsVector128()).AsVector(); } - - return lower; } /// Widens the lower half of a into a . @@ -1907,15 +2222,19 @@ public static Vector WidenLower(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenLower(Vector source) { - Unsafe.SkipInit(out Vector lower); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - uint value = source.GetElementUnsafe(i); - lower.SetElementUnsafe(i, value); + return Vector512.WidenLower(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenLower(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenLower(source.AsVector128()).AsVector(); } - - return lower; } /// Widens the lower half of a into a . @@ -1926,15 +2245,19 @@ public static Vector WidenLower(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenLower(Vector source) { - Unsafe.SkipInit(out Vector lower); - - for (int i = 0; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - ulong value = source.GetElementUnsafe(i); - lower.SetElementUnsafe(i, value); + return Vector512.WidenLower(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenLower(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenLower(source.AsVector128()).AsVector(); } - - return lower; } /// Widens the upper half of a into a . @@ -1945,15 +2268,19 @@ public static Vector WidenLower(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenUpper(Vector source) { - Unsafe.SkipInit(out Vector upper); - - for (int i = Vector.Count; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - ushort value = source.GetElementUnsafe(i); - upper.SetElementUnsafe(i - Vector.Count, value); + return Vector512.WidenUpper(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenUpper(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenUpper(source.AsVector128()).AsVector(); } - - return upper; } /// Widens the upper half of a into a . @@ -1963,15 +2290,19 @@ public static Vector WidenUpper(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenUpper(Vector source) { - Unsafe.SkipInit(out Vector upper); - - for (int i = Vector.Count; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - int value = source.GetElementUnsafe(i); - upper.SetElementUnsafe(i - Vector.Count, value); + return Vector512.WidenUpper(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenUpper(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenUpper(source.AsVector128()).AsVector(); } - - return upper; } /// Widens the upper half of a into a . @@ -1981,15 +2312,19 @@ public static Vector WidenUpper(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenUpper(Vector source) { - Unsafe.SkipInit(out Vector upper); - - for (int i = Vector.Count; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - long value = source.GetElementUnsafe(i); - upper.SetElementUnsafe(i - Vector.Count, value); + return Vector512.WidenUpper(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenUpper(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenUpper(source.AsVector128()).AsVector(); } - - return upper; } /// Widens the upper half of a into a . @@ -2000,15 +2335,19 @@ public static Vector WidenUpper(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenUpper(Vector source) { - Unsafe.SkipInit(out Vector upper); - - for (int i = Vector.Count; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - short value = source.GetElementUnsafe(i); - upper.SetElementUnsafe(i - Vector.Count, value); + return Vector512.WidenUpper(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenUpper(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenUpper(source.AsVector128()).AsVector(); } - - return upper; } /// Widens the upper half of a into a . @@ -2018,15 +2357,19 @@ public static Vector WidenUpper(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenUpper(Vector source) { - Unsafe.SkipInit(out Vector upper); - - for (int i = Vector.Count; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - double value = source.GetElementUnsafe(i); - upper.SetElementUnsafe(i - Vector.Count, value); + return Vector512.WidenUpper(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenUpper(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenUpper(source.AsVector128()).AsVector(); } - - return upper; } /// Widens the upper half of a into a . @@ -2037,15 +2380,19 @@ public static Vector WidenUpper(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenUpper(Vector source) { - Unsafe.SkipInit(out Vector upper); - - for (int i = Vector.Count; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - uint value = source.GetElementUnsafe(i); - upper.SetElementUnsafe(i - Vector.Count, value); + return Vector512.WidenUpper(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenUpper(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenUpper(source.AsVector128()).AsVector(); } - - return upper; } /// Widens the upper half of a into a . @@ -2056,15 +2403,19 @@ public static Vector WidenUpper(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WidenUpper(Vector source) { - Unsafe.SkipInit(out Vector upper); - - for (int i = Vector.Count; i < Vector.Count; i++) + if (sizeof(Vector) == 64) { - ulong value = source.GetElementUnsafe(i); - upper.SetElementUnsafe(i - Vector.Count, value); + return Vector512.WidenUpper(source.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.WidenUpper(source.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.WidenUpper(source.AsVector128()).AsVector(); } - - return upper; } /// Creates a new with the element at the specified index set to the specified value and the remaining elements set to the same value as that in the given vector. @@ -2078,14 +2429,19 @@ public static Vector WidenUpper(Vector source) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector WithElement(this Vector vector, int index, T value) { - if ((uint)(index) >= (uint)(Vector.Count)) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); + return vector.AsVector512().WithElement(index, value).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return vector.AsVector256().WithElement(index, value).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return vector.AsVector128().WithElement(index, value).AsVector(); } - - Vector result = vector; - result.SetElementUnsafe(index, value); - return result; } /// Computes the exclusive-or of two vectors. @@ -2095,21 +2451,5 @@ public static Vector WithElement(this Vector vector, int index, T value /// The exclusive-or of and . [Intrinsic] public static Vector Xor(Vector left, Vector right) => left ^ right; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static T GetElementUnsafe(in this Vector vector, int index) - { - Debug.Assert((index >= 0) && (index < Vector.Count)); - ref T address = ref Unsafe.As, T>(ref Unsafe.AsRef(in vector)); - return Unsafe.Add(ref address, index); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static void SetElementUnsafe(in this Vector vector, int index, T value) - { - Debug.Assert((index >= 0) && (index < Vector.Count)); - ref T address = ref Unsafe.As, T>(ref Unsafe.AsRef(in vector)); - Unsafe.Add(ref address, index) = value; - } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs index 4009387a4f796..4d1c62cf4876e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs @@ -5,9 +5,9 @@ using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Runtime.Intrinsics; -using System.Text; + +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') namespace System.Numerics { @@ -28,7 +28,7 @@ namespace System.Numerics [Intrinsic] [DebuggerDisplay("{DisplayString,nq}")] [DebuggerTypeProxy(typeof(VectorDebugView<>))] - public readonly struct Vector : IEquatable>, IFormattable + public readonly unsafe struct Vector : IEquatable>, IFormattable { // These fields exist to ensure the alignment is 8, rather than 1. internal readonly ulong _00; @@ -38,13 +38,21 @@ namespace System.Numerics /// The value that all elements will be initialized to. /// A new with all elements initialized to . [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector(T value) { - Unsafe.SkipInit(out this); - - for (int index = 0; index < Count; index++) + if (sizeof(Vector) == 64) { - this.SetElementUnsafe(index, value); + this = Vector512.Create(value).AsVector(); + } + else if (sizeof(Vector) == 32) + { + this = Vector256.Create(value).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + this = Vector128.Create(value).AsVector(); } } @@ -56,14 +64,19 @@ public Vector(T value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector(T[] values) { - // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - - if (values.Length < Count) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); + this = Vector512.Create(values).AsVector(); + } + else if (sizeof(Vector) == 32) + { + this = Vector256.Create(values).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + this = Vector128.Create(values).AsVector(); } - - this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[0])); } /// Creates a new from a given array. @@ -75,14 +88,19 @@ public Vector(T[] values) [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector(T[] values, int index) { - // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - - if ((index < 0) || ((values.Length - index) < Count)) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException(); + this = Vector512.Create(values, index).AsVector(); + } + else if (sizeof(Vector) == 32) + { + this = Vector256.Create(values, index).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + this = Vector128.Create(values, index).AsVector(); } - - this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref values[index])); } /// Creates a new from a given readonly span. @@ -92,14 +110,19 @@ public Vector(T[] values, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector(ReadOnlySpan values) { - // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - - if (values.Length < Count) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); + this = Vector512.Create(values).AsVector(); + } + else if (sizeof(Vector) == 32) + { + this = Vector256.Create(values).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + this = Vector128.Create(values).AsVector(); } - - this = Unsafe.ReadUnaligned>(ref Unsafe.As(ref MemoryMarshal.GetReference(values))); } /// Creates a new from a given readonly span. @@ -107,17 +130,21 @@ public Vector(ReadOnlySpan values) /// A new with its elements set to the first sizeof() elements from . /// The length of is less than sizeof(). [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe Vector(ReadOnlySpan values) + public Vector(ReadOnlySpan values) { - // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if (values.Length < Vector.Count) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values); + this = Vector512.Create(values).AsVector().As(); + } + else if (sizeof(Vector) == 32) + { + this = Vector256.Create(values).AsVector().As(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + this = Vector128.Create(values).AsVector().As(); } - - this = Unsafe.ReadUnaligned>(ref MemoryMarshal.GetReference(values)); } /// Creates a new from a given span. @@ -136,19 +163,28 @@ public static Vector AllBitsSet get => new Vector(Scalar.AllBitsSet); } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Gets the number of that are in a . /// The type of the current instance () is not supported. - public static unsafe int Count + public static int Count { [Intrinsic] get { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - return sizeof(Vector) / sizeof(T); + if (sizeof(Vector) == 64) + { + return Vector512.Count; + } + else if (sizeof(Vector) == 32) + { + return Vector256.Count; + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Count; + } } } -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Gets a new with the elements set to their index. /// The type of the vector () is not supported. @@ -158,15 +194,19 @@ public static Vector Indices [MethodImpl(MethodImplOptions.AggressiveInlining)] get { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - Unsafe.SkipInit(out Vector result); - - for (int i = 0; i < Count; i++) + if (sizeof(Vector) == 64) { - result.SetElementUnsafe(i, Scalar.Convert(i)); + return Vector512.Indices.AsVector(); + } + else if (sizeof(Vector) == 32) + { + return Vector256.Indices.AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return Vector128.Indices.AsVector(); } - - return result; } } @@ -231,15 +271,19 @@ public T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator +(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.Add(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); - result.SetElementUnsafe(index, value); + return (left.AsVector512() + right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (left.AsVector256() + right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (left.AsVector128() + right.AsVector128()).AsVector(); } - - return result; } /// Computes the bitwise-and of two vectors. @@ -250,19 +294,19 @@ public T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator &(Vector left, Vector right) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - Unsafe.SkipInit(out Vector result); - - Vector vleft = left.As(); - Vector vright = right.As(); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - ulong value = vleft.GetElementUnsafe(index) & vright.GetElementUnsafe(index); - result.SetElementUnsafe(index, value); + return (left.AsVector512() & right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (left.AsVector256() & right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (left.AsVector128() & right.AsVector128()).AsVector(); } - - return result.As(); } /// Computes the bitwise-or of two vectors. @@ -273,19 +317,19 @@ public T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator |(Vector left, Vector right) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - Unsafe.SkipInit(out Vector result); - - Vector vleft = left.As(); - Vector vright = right.As(); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - ulong value = vleft.GetElementUnsafe(index) | vright.GetElementUnsafe(index); - result.SetElementUnsafe(index, value); + return (left.AsVector512() | right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (left.AsVector256() | right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (left.AsVector128() | right.AsVector128()).AsVector(); } - - return result.As(); } /// Divides two vectors to compute their quotient. @@ -296,15 +340,19 @@ public T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator /(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.Divide(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); - result.SetElementUnsafe(index, value); + return (left.AsVector512() / right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (left.AsVector256() / right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (left.AsVector128() / right.AsVector128()).AsVector(); } - - return result; } /// Divides a vector by a scalar to compute the per-element quotient. @@ -313,18 +361,7 @@ public T this[int index] /// The quotient of divided by . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector operator /(Vector left, T right) - { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Count; index++) - { - T value = Scalar.Divide(left.GetElementUnsafe(index), right); - result.SetElementUnsafe(index, value); - } - - return result; - } + public static Vector operator /(Vector left, T right) => left / new Vector(right); /// Compares two vectors to determine if all elements are equal. /// The vector to compare with . @@ -334,14 +371,19 @@ public T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool operator ==(Vector left, Vector right) { - for (int index = 0; index < Count; index++) + if (sizeof(Vector) == 64) { - if (!Scalar.Equals(left.GetElementUnsafe(index), right.GetElementUnsafe(index))) - { - return false; - } + return left.AsVector512() == right.AsVector512(); + } + else if (sizeof(Vector) == 32) + { + return left.AsVector256() == right.AsVector256(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return left.AsVector128() == right.AsVector128(); } - return true; } /// Computes the exclusive-or of two vectors. @@ -352,19 +394,19 @@ public T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator ^(Vector left, Vector right) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - Unsafe.SkipInit(out Vector result); - - Vector vleft = left.As(); - Vector vright = right.As(); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - ulong value = vleft.GetElementUnsafe(index) ^ vright.GetElementUnsafe(index); - result.SetElementUnsafe(index, value); + return (left.AsVector512() ^ right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (left.AsVector256() ^ right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (left.AsVector128() ^ right.AsVector128()).AsVector(); } - - return result.As(); } /// Reinterprets a as a new . @@ -471,15 +513,19 @@ public T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator <<(Vector value, int shiftCount) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Count; index++) + if (sizeof(Vector) == 64) { - T element = Scalar.ShiftLeft(value.GetElementUnsafe(index), shiftCount); - result.SetElementUnsafe(index, element); + return (value.AsVector512() << shiftCount).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (value.AsVector256() << shiftCount).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (value.AsVector128() << shiftCount).AsVector(); } - - return result; } /// Multiplies two vectors to compute their element-wise product. @@ -490,15 +536,19 @@ public T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator *(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.Multiply(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); - result.SetElementUnsafe(index, value); + return (left.AsVector512() * right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (left.AsVector256() * right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (left.AsVector128() * right.AsVector128()).AsVector(); } - - return result; } /// Multiplies a vector by a scalar to compute their product. @@ -513,7 +563,7 @@ public T this[int index] /// The vector to multiply with . /// The product of and . [Intrinsic] - public static Vector operator *(T factor, Vector value) => value * factor; + public static Vector operator *(T factor, Vector value) => value * new Vector(factor); /// Computes the ones-complement of a vector. /// The vector whose ones-complement is to be computed. @@ -522,18 +572,19 @@ public T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator ~(Vector value) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - Unsafe.SkipInit(out Vector result); - - Vector vector = value.As(); - - for (int index = 0; index < Vector.Count; index++) + if (sizeof(Vector) == 64) { - ulong element = ~vector.GetElementUnsafe(index); - result.SetElementUnsafe(index, element); + return (~(value.AsVector512())).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (~(value.AsVector256())).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (~(value.AsVector128())).AsVector(); } - - return result.As(); } /// Shifts (signed) each element of a vector right by the specified amount. @@ -544,15 +595,19 @@ public T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator >>(Vector value, int shiftCount) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Count; index++) + if (sizeof(Vector) == 64) { - T element = Scalar.ShiftRightArithmetic(value.GetElementUnsafe(index), shiftCount); - result.SetElementUnsafe(index, element); + return (value.AsVector512() >> shiftCount).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (value.AsVector256() >> shiftCount).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (value.AsVector128() >> shiftCount).AsVector(); } - - return result; } /// Subtracts two vectors to compute their difference. @@ -563,15 +618,19 @@ public static Vector operator >>(Vector value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator -(Vector left, Vector right) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Count; index++) + if (sizeof(Vector) == 64) { - T value = Scalar.Subtract(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); - result.SetElementUnsafe(index, value); + return (left.AsVector512() - right.AsVector512()).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (left.AsVector256() - right.AsVector256()).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (left.AsVector128() - right.AsVector128()).AsVector(); } - - return result; } /// Computes the unary negation of a vector. @@ -599,15 +658,19 @@ public static Vector operator >>(Vector value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator >>>(Vector value, int shiftCount) { - Unsafe.SkipInit(out Vector result); - - for (int index = 0; index < Count; index++) + if (sizeof(Vector) == 64) { - T element = Scalar.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount); - result.SetElementUnsafe(index, element); + return (value.AsVector512() >>> shiftCount).AsVector(); + } + else if (sizeof(Vector) == 32) + { + return (value.AsVector256() >>> shiftCount).AsVector(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return (value.AsVector128() >>> shiftCount).AsVector(); } - - return result; } /// Copies a to a given array. @@ -617,14 +680,19 @@ public static Vector operator >>>(Vector value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyTo(T[] destination) { - // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - - if (destination.Length < Count) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowArgumentException_DestinationTooShort(); + this.AsVector512().CopyTo(destination); + } + else if (sizeof(Vector) == 32) + { + this.AsVector256().CopyTo(destination); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + this.AsVector128().CopyTo(destination); } - - Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[0]), this); } /// Copies a to a given array starting at the specified index. @@ -636,35 +704,40 @@ public void CopyTo(T[] destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyTo(T[] destination, int startIndex) { - // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons - - if ((uint)startIndex >= (uint)destination.Length) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowStartIndexArgumentOutOfRange_ArgumentOutOfRange_IndexMustBeLess(); + this.AsVector512().CopyTo(destination, startIndex); } - - if ((destination.Length - startIndex) < Count) + else if (sizeof(Vector) == 32) { - ThrowHelper.ThrowArgumentException_DestinationTooShort(); + this.AsVector256().CopyTo(destination, startIndex); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + this.AsVector128().CopyTo(destination, startIndex); } - - Unsafe.WriteUnaligned(ref Unsafe.As(ref destination[startIndex]), this); } /// Copies a to a given span. /// The span to which the current instance is copied. /// The length of is less than sizeof(). [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe void CopyTo(Span destination) + public void CopyTo(Span destination) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if (destination.Length < Vector.Count) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowArgumentException_DestinationTooShort(); + this.AsVector512().As().CopyTo(destination); + } + else if (sizeof(Vector) == 32) + { + this.AsVector256().As().CopyTo(destination); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + this.AsVector128().As().CopyTo(destination); } - - Unsafe.WriteUnaligned(ref MemoryMarshal.GetReference(destination), this); } /// Copies a to a given span. @@ -673,12 +746,19 @@ public unsafe void CopyTo(Span destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyTo(Span destination) { - if (destination.Length < Count) + if (sizeof(Vector) == 64) { - ThrowHelper.ThrowArgumentException_DestinationTooShort(); + this.AsVector512().CopyTo(destination); + } + else if (sizeof(Vector) == 32) + { + this.AsVector256().CopyTo(destination); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + this.AsVector128().CopyTo(destination); } - - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); } /// Returns a boolean indicating whether the given Object is equal to this vector instance. @@ -692,34 +772,18 @@ public void CopyTo(Span destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Equals(Vector other) { - // This function needs to account for floating-point equality around NaN - // and so must behave equivalently to the underlying float/double.Equals - - if (Vector.IsHardwareAccelerated) + if (sizeof(Vector) == 64) { - if ((typeof(T) == typeof(double)) || (typeof(T) == typeof(float))) - { - Vector result = Vector.Equals(this, other) | ~(Vector.Equals(this, this) | Vector.Equals(other, other)); - return result.As() == Vector.AllBitsSet; - } - else - { - return this == other; - } + return this.AsVector512().Equals(other.AsVector512()); } - - return SoftwareFallback(in this, other); - - static bool SoftwareFallback(in Vector self, Vector other) + else if (sizeof(Vector) == 32) { - for (int index = 0; index < Count; index++) - { - if (!Scalar.ObjectEquals(self.GetElementUnsafe(index), other.GetElementUnsafe(index))) - { - return false; - } - } - return true; + return this.AsVector256().Equals(other.AsVector256()); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return this.AsVector128().Equals(other.AsVector128()); } } @@ -727,15 +791,19 @@ static bool SoftwareFallback(in Vector self, Vector other) /// The hash code. public override int GetHashCode() { - HashCode hashCode = default; - - for (int index = 0; index < Count; index++) + if (sizeof(Vector) == 64) { - T value = this.GetElementUnsafe(index); - hashCode.Add(value); + return this.AsVector512().GetHashCode(); + } + else if (sizeof(Vector) == 32) + { + return this.AsVector256().GetHashCode(); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return this.AsVector128().GetHashCode(); } - - return hashCode.ToHashCode(); } /// Returns a String representing this vector. @@ -753,40 +821,40 @@ public override int GetHashCode() /// The string representation. public string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - var sb = new ValueStringBuilder(stackalloc char[64]); - string separator = NumberFormatInfo.GetInstance(formatProvider).NumberGroupSeparator; - - sb.Append('<'); - sb.Append(((IFormattable)this.GetElementUnsafe(0)).ToString(format, formatProvider)); - - for (int i = 1; i < Count; i++) + if (sizeof(Vector) == 64) { - sb.Append(separator); - sb.Append(' '); - sb.Append(((IFormattable)this.GetElementUnsafe(i)).ToString(format, formatProvider)); + return this.AsVector512().ToString(format, formatProvider); + } + else if (sizeof(Vector) == 32) + { + return this.AsVector256().ToString(format, formatProvider); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return this.AsVector128().ToString(format, formatProvider); } - sb.Append('>'); - - return sb.ToString(); } /// Tries to copy a to a given span. /// The span to which the current instance is copied. /// true if the current instance was successfully copied to ; otherwise, false if the length of is less than sizeof(). [MethodImpl(MethodImplOptions.AggressiveInlining)] - public unsafe bool TryCopyTo(Span destination) + public bool TryCopyTo(Span destination) { - ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType(); - - if (destination.Length < Vector.Count) + if (sizeof(Vector) == 64) { - return false; + return this.AsVector512().As().TryCopyTo(destination); + } + else if (sizeof(Vector) == 32) + { + return this.AsVector256().As().TryCopyTo(destination); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return this.AsVector128().As().TryCopyTo(destination); } - - Unsafe.WriteUnaligned(ref MemoryMarshal.GetReference(destination), this); - return true; } /// Tries to copy a to a given span. @@ -795,13 +863,19 @@ public unsafe bool TryCopyTo(Span destination) [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryCopyTo(Span destination) { - if (destination.Length < Count) + if (sizeof(Vector) == 64) { - return false; + return this.AsVector512().TryCopyTo(destination); + } + else if (sizeof(Vector) == 32) + { + return this.AsVector256().TryCopyTo(destination); + } + else + { + Debug.Assert(sizeof(Vector) == 16); + return this.AsVector128().TryCopyTo(destination); } - - Unsafe.WriteUnaligned(ref Unsafe.As(ref MemoryMarshal.GetReference(destination)), this); - return true; } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 7f83b8c09d79e..cce0407615b60 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -9,6 +9,8 @@ using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + namespace System.Runtime.Intrinsics { // We mark certain methods with AggressiveInlining to ensure that the JIT will @@ -30,7 +32,7 @@ namespace System.Runtime.Intrinsics // the internal inlining limits of the JIT. /// Provides a collection of static methods for creating, manipulating, and otherwise operating on 128-bit vectors. - public static class Vector128 + public static unsafe class Vector128 { internal const int Size = 16; @@ -58,10 +60,149 @@ public static bool IsHardwareAccelerated [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Abs(Vector128 vector) { - return Create( - Vector64.Abs(vector._lower), - Vector64.Abs(vector._upper) - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong)) + || (typeof(T) == typeof(nuint))) + { + return vector; + } + else if (AdvSimd.IsSupported) + { + return ArmImpl(vector); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(vector); + } + else if (Sse.IsSupported) + { + return XarchImpl(vector); + } + return SoftwareImpl(vector); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 vector) + { + if (typeof(T) == typeof(float)) + { + return AdvSimd.Abs(vector.AsSingle()).As(); + } + else if (sizeof(T) == 1) + { + return AdvSimd.Abs(vector.AsSByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Abs(vector.AsInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Abs(vector.AsInt32()).As(); + } + else if (AdvSimd.Arm64.IsSupported) + { + if (typeof(T) == typeof(double)) + { + return AdvSimd.Arm64.Abs(vector.AsDouble()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.Arm64.Abs(vector.AsInt64()).As(); + } + } + return SoftwareImpl(vector); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 vector) + { + if (typeof(T) == typeof(float)) + { + return PackedSimd.Abs(vector.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return PackedSimd.Abs(vector.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return PackedSimd.Abs(vector.AsSByte()).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.Abs(vector.AsInt16()).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.Abs(vector.AsInt32()).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.Abs(vector.AsInt64()).As(); + } + return SoftwareImpl(vector); + } + + [CompExactlyDependsOn(typeof(Sse))] + [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(Avx512F.VL))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 vector) + { + if (typeof(T) == typeof(float)) + { + return Sse.AndNot(Vector128.Create(-0.0f), vector.AsSingle()).As(); + } + else if (Sse2.IsSupported) + { + if (typeof(T) == typeof(double)) + { + return Sse2.AndNot(Vector128.Create(-0.0), vector.AsDouble()).As(); + } + else if (Sse3.IsSupported) + { + if (sizeof(T) == 1) + { + return Ssse3.Abs(vector.AsSByte()).As(); + } + else if (sizeof(T) == 2) + { + return Ssse3.Abs(vector.AsInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Ssse3.Abs(vector.AsInt32()).As(); + } + else if (sizeof(T) == 8) + { + if (Avx512F.VL.IsSupported) + { + return Avx512F.VL.Abs(vector.AsInt64()).As(); + } + } + } + + if (sizeof(T) is 1 or 2 or 4 or 8) + { + return ConditionalSelect(LessThan(vector, Vector128.Zero), Vector128.Zero - vector, vector); + } + } + return SoftwareImpl(vector); + } + + static Vector128 SoftwareImpl(Vector128 vector) + { + return Create( + Vector64.Abs(vector._lower), + Vector64.Abs(vector._upper) + ); + } } /// Adds two vectors to compute their sum. @@ -80,13 +221,7 @@ public static Vector128 Abs(Vector128 vector) /// The bitwise-and of and the ones-complement of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 AndNot(Vector128 left, Vector128 right) - { - return Create( - Vector64.AndNot(left._lower, right._lower), - Vector64.AndNot(left._upper, right._upper) - ); - } + public static Vector128 AndNot(Vector128 left, Vector128 right) => left & ~right; /// Reinterprets a as a new . /// The type of the elements in the input vector. @@ -377,10 +512,26 @@ public static Vector AsVector(this Vector128 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static Vector128 Ceiling(Vector128 vector) { - return Create( - Vector64.Ceiling(vector._lower), - Vector64.Ceiling(vector._upper) - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(short)) + || (typeof(T) == typeof(int)) + || (typeof(T) == typeof(long)) + || (typeof(T) == typeof(nint)) + || (typeof(T) == typeof(nuint)) + || (typeof(T) == typeof(sbyte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong))) + { + return vector; + } + else + { + return Create( + Vector64.Ceiling(vector._lower), + Vector64.Ceiling(vector._upper) + ); + } } /// Computes the ceiling of each element in a vector. @@ -406,20 +557,14 @@ internal static Vector128 Ceiling(Vector128 vector) /// The type of , , and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 ConditionalSelect(Vector128 condition, Vector128 left, Vector128 right) - { - return Create( - Vector64.ConditionalSelect(condition._lower, left._lower, right._lower), - Vector64.ConditionalSelect(condition._upper, left._upper, right._upper) - ); - } + public static Vector128 ConditionalSelect(Vector128 condition, Vector128 left, Vector128 right) => (left & condition) | AndNot(right, condition); /// Converts a to a . /// The vector to convert. /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToDouble(Vector128 vector) + public static Vector128 ConvertToDouble(Vector128 vector) { if (Sse2.IsSupported) { @@ -460,7 +605,7 @@ public static unsafe Vector128 ConvertToDouble(Vector128 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToDouble(Vector128 vector) + public static Vector128 ConvertToDouble(Vector128 vector) { if (Sse2.IsSupported) { @@ -500,7 +645,7 @@ public static unsafe Vector128 ConvertToDouble(Vector128 vector) /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToInt32(Vector128 vector) + public static Vector128 ConvertToInt32(Vector128 vector) { return Create( Vector64.ConvertToInt32(vector._lower), @@ -513,7 +658,7 @@ public static unsafe Vector128 ConvertToInt32(Vector128 vector) /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToInt32Native(Vector128 vector) + public static Vector128 ConvertToInt32Native(Vector128 vector) { return Create( Vector64.ConvertToInt32Native(vector._lower), @@ -526,7 +671,7 @@ public static unsafe Vector128 ConvertToInt32Native(Vector128 vector /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToInt64(Vector128 vector) + public static Vector128 ConvertToInt64(Vector128 vector) { return Create( Vector64.ConvertToInt64(vector._lower), @@ -539,7 +684,7 @@ public static unsafe Vector128 ConvertToInt64(Vector128 vector) /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToInt64Native(Vector128 vector) + public static Vector128 ConvertToInt64Native(Vector128 vector) { return Create( Vector64.ConvertToInt64Native(vector._lower), @@ -552,7 +697,7 @@ public static unsafe Vector128 ConvertToInt64Native(Vector128 vect /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToSingle(Vector128 vector) + public static Vector128 ConvertToSingle(Vector128 vector) { return Create( Vector64.ConvertToSingle(vector._lower), @@ -566,7 +711,7 @@ public static unsafe Vector128 ConvertToSingle(Vector128 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToSingle(Vector128 vector) + public static Vector128 ConvertToSingle(Vector128 vector) { if (Sse2.IsSupported) { @@ -623,7 +768,7 @@ static Vector128 SoftwareFallback(Vector128 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToUInt32(Vector128 vector) + public static Vector128 ConvertToUInt32(Vector128 vector) { return Create( Vector64.ConvertToUInt32(vector._lower), @@ -637,7 +782,7 @@ public static unsafe Vector128 ConvertToUInt32(Vector128 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToUInt32Native(Vector128 vector) + public static Vector128 ConvertToUInt32Native(Vector128 vector) { return Create( Vector64.ConvertToUInt32Native(vector._lower), @@ -651,7 +796,7 @@ public static unsafe Vector128 ConvertToUInt32Native(Vector128 vect [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToUInt64(Vector128 vector) + public static Vector128 ConvertToUInt64(Vector128 vector) { return Create( Vector64.ConvertToUInt64(vector._lower), @@ -665,7 +810,7 @@ public static unsafe Vector128 ConvertToUInt64(Vector128 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ConvertToUInt64Native(Vector128 vector) + public static Vector128 ConvertToUInt64Native(Vector128 vector) { return Create( Vector64.ConvertToUInt64Native(vector._lower), @@ -703,7 +848,7 @@ public static void CopyTo(this Vector128 vector, T[] destination) /// The type of and () is not supported. /// is null. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe void CopyTo(this Vector128 vector, T[] destination, int startIndex) + public static void CopyTo(this Vector128 vector, T[] destination, int startIndex) { // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons @@ -743,7 +888,7 @@ public static void CopyTo(this Vector128 vector, Span destination) /// A new with all elements initialized to . /// The type of () is not supported. [Intrinsic] - public static unsafe Vector128 Create(T value) + public static Vector128 Create(T value) { Vector64 vector = Vector64.Create(value); return Create(vector, vector); @@ -754,48 +899,48 @@ public static unsafe Vector128 Create(T value) /// A new with all elements initialized to . /// On x86, this method corresponds to __m128i _mm_set1_epi8 [Intrinsic] - public static unsafe Vector128 Create(byte value) => Create(value); + public static Vector128 Create(byte value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . /// On x86, this method corresponds to __m128d _mm_set1_pd [Intrinsic] - public static unsafe Vector128 Create(double value) => Create(value); + public static Vector128 Create(double value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . /// On x86, this method corresponds to __m128i _mm_set1_epi16 [Intrinsic] - public static unsafe Vector128 Create(short value) => Create(value); + public static Vector128 Create(short value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . /// On x86, this method corresponds to __m128i _mm_set1_epi32 [Intrinsic] - public static unsafe Vector128 Create(int value) => Create(value); + public static Vector128 Create(int value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . /// On x86, this method corresponds to __m128i _mm_set1_epi64x [Intrinsic] - public static unsafe Vector128 Create(long value) => Create(value); + public static Vector128 Create(long value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . [Intrinsic] - public static unsafe Vector128 Create(nint value) => Create(value); + public static Vector128 Create(nint value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 Create(nuint value) => Create(value); + public static Vector128 Create(nuint value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. @@ -803,14 +948,14 @@ public static unsafe Vector128 Create(T value) /// On x86, this method corresponds to __m128i _mm_set1_epi8 [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 Create(sbyte value) => Create(value); + public static Vector128 Create(sbyte value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . /// On x86, this method corresponds to __m128 _mm_set1_ps [Intrinsic] - public static unsafe Vector128 Create(float value) => Create(value); + public static Vector128 Create(float value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. @@ -818,7 +963,7 @@ public static unsafe Vector128 Create(T value) /// On x86, this method corresponds to __m128i _mm_set1_epi16 [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 Create(ushort value) => Create(value); + public static Vector128 Create(ushort value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. @@ -826,7 +971,7 @@ public static unsafe Vector128 Create(T value) /// On x86, this method corresponds to __m128i _mm_set1_epi32 [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 Create(uint value) => Create(value); + public static Vector128 Create(uint value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. @@ -834,7 +979,7 @@ public static unsafe Vector128 Create(T value) /// On x86, this method corresponds to __m128i _mm_set1_epi64x [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 Create(ulong value) => Create(value); + public static Vector128 Create(ulong value) => Create(value); /// Creates a new from a given array. /// The type of the elements in the vector. @@ -915,7 +1060,7 @@ public static Vector128 Create(ReadOnlySpan values) /// On x86, this method corresponds to __m128i _mm_setr_epi8 [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15) + public static Vector128 Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15) { return Create( Vector64.Create(e0, e1, e2, e3, e4, e5, e6, e7), @@ -930,7 +1075,7 @@ public static unsafe Vector128 Create(byte e0, byte e1, byte e2, byte e3, /// On x86, this method corresponds to __m128d _mm_setr_pd [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Create(double e0, double e1) + public static Vector128 Create(double e0, double e1) { return Create( Vector64.Create(e0), @@ -951,7 +1096,7 @@ public static unsafe Vector128 Create(double e0, double e1) /// On x86, this method corresponds to __m128i _mm_setr_epi16 [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Create(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7) + public static Vector128 Create(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7) { return Create( Vector64.Create(e0, e1, e2, e3), @@ -968,7 +1113,7 @@ public static unsafe Vector128 Create(short e0, short e1, short e2, short /// On x86, this method corresponds to __m128i _mm_setr_epi32 [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Create(int e0, int e1, int e2, int e3) + public static Vector128 Create(int e0, int e1, int e2, int e3) { return Create( Vector64.Create(e0, e1), @@ -983,7 +1128,7 @@ public static unsafe Vector128 Create(int e0, int e1, int e2, int e3) /// On x86, this method corresponds to __m128i _mm_setr_epi64x [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Create(long e0, long e1) + public static Vector128 Create(long e0, long e1) { return Create( Vector64.Create(e0), @@ -1013,7 +1158,7 @@ public static unsafe Vector128 Create(long e0, long e1) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15) + public static Vector128 Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15) { return Create( Vector64.Create(e0, e1, e2, e3, e4, e5, e6, e7), @@ -1030,7 +1175,7 @@ public static unsafe Vector128 Create(sbyte e0, sbyte e1, sbyte e2, sbyte /// On x86, this method corresponds to __m128 _mm_setr_ps [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Create(float e0, float e1, float e2, float e3) + public static Vector128 Create(float e0, float e1, float e2, float e3) { return Create( Vector64.Create(e0, e1), @@ -1052,7 +1197,7 @@ public static unsafe Vector128 Create(float e0, float e1, float e2, float [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Create(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7) + public static Vector128 Create(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7) { return Create( Vector64.Create(e0, e1, e2, e3), @@ -1070,7 +1215,7 @@ public static unsafe Vector128 Create(ushort e0, ushort e1, ushort e2, u [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Create(uint e0, uint e1, uint e2, uint e3) + public static Vector128 Create(uint e0, uint e1, uint e2, uint e3) { return Create( Vector64.Create(e0, e1), @@ -1086,7 +1231,7 @@ public static unsafe Vector128 Create(uint e0, uint e1, uint e2, uint e3) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Create(ulong e0, ulong e1) + public static Vector128 Create(ulong e0, ulong e1) { return Create( Vector64.Create(e0), @@ -1123,65 +1268,65 @@ public static Vector128 Create(Vector64 lower, Vector64 upper) /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// A new initialized from and . - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// A new initialized from and . - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// A new initialized from and . - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// On x86, this method corresponds to __m128i _mm_setr_epi64 /// A new initialized from and . - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// A new initialized from and . - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// A new initialized from and . - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// A new initialized from and . [CLSCompliant(false)] - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// A new initialized from and . [CLSCompliant(false)] - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// A new initialized from and . - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// A new initialized from and . [CLSCompliant(false)] - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. @@ -1189,14 +1334,14 @@ public static Vector128 Create(Vector64 lower, Vector64 upper) /// On x86, this method corresponds to __m128i _mm_setr_epi64 /// A new initialized from and . [CLSCompliant(false)] - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance from two instances. /// The value that the lower 64-bits will be initialized to. /// The value that the upper 64-bits will be initialized to. /// A new initialized from and . [CLSCompliant(false)] - public static unsafe Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); + public static Vector128 Create(Vector64 lower, Vector64 upper) => Create(lower, upper); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The type of the elements in the vector. @@ -1204,84 +1349,84 @@ public static Vector128 Create(Vector64 lower, Vector64 upper) /// A new instance with the first element initialized to and the remaining elements initialized to zero. /// The type of () is not supported. [Intrinsic] - public static unsafe Vector128 CreateScalar(T value) => Vector64.CreateScalar(value).ToVector128(); + public static Vector128 CreateScalar(T value) => Vector64.CreateScalar(value).ToVector128(); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector128 CreateScalar(byte value) => CreateScalar(value); + public static Vector128 CreateScalar(byte value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector128 CreateScalar(double value) => CreateScalar(value); + public static Vector128 CreateScalar(double value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector128 CreateScalar(short value) => CreateScalar(value); + public static Vector128 CreateScalar(short value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector128 CreateScalar(int value) => CreateScalar(value); + public static Vector128 CreateScalar(int value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector128 CreateScalar(long value) => CreateScalar(value); + public static Vector128 CreateScalar(long value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector128 CreateScalar(nint value) => CreateScalar(value); + public static Vector128 CreateScalar(nint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 CreateScalar(nuint value) => CreateScalar(value); + public static Vector128 CreateScalar(nuint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 CreateScalar(sbyte value) => CreateScalar(value); + public static Vector128 CreateScalar(sbyte value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector128 CreateScalar(float value) => CreateScalar(value); + public static Vector128 CreateScalar(float value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 CreateScalar(ushort value) => CreateScalar(value); + public static Vector128 CreateScalar(ushort value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 CreateScalar(uint value) => CreateScalar(value); + public static Vector128 CreateScalar(uint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 CreateScalar(ulong value) => CreateScalar(value); + public static Vector128 CreateScalar(ulong value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The type of the elements in the vector. @@ -1306,78 +1451,78 @@ public static Vector128 CreateScalarUnsafe(T value) /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector128 CreateScalarUnsafe(byte value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(byte value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector128 CreateScalarUnsafe(double value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(double value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector128 CreateScalarUnsafe(short value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(short value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector128 CreateScalarUnsafe(int value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(int value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector128 CreateScalarUnsafe(long value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(long value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector128 CreateScalarUnsafe(nint value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(nint value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 CreateScalarUnsafe(nuint value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(nuint value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 CreateScalarUnsafe(sbyte value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(sbyte value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector128 CreateScalarUnsafe(float value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(float value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 CreateScalarUnsafe(ushort value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(ushort value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 CreateScalarUnsafe(uint value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(uint value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 CreateScalarUnsafe(ulong value) => CreateScalarUnsafe(value); + public static Vector128 CreateScalarUnsafe(ulong value) => CreateScalarUnsafe(value); /// Creates a new instance where the elements begin at a specified value and which are spaced apart according to another specified value. /// The type of the elements in the vector. @@ -1413,16 +1558,7 @@ public static Vector128 CreateScalarUnsafe(T value) /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static T Dot(Vector128 left, Vector128 right) - { - // Doing this as Dot(lower) + Dot(upper) is important for floating-point determinism - // This is because the underlying dpps instruction on x86/x64 will do this equivalently - // and otherwise the software vs accelerated implementations may differ in returned result. - - T result = Vector64.Dot(left._lower, right._lower); - result = Scalar.Add(result, Vector64.Dot(left._upper, right._upper)); - return result; - } + public static T Dot(Vector128 left, Vector128 right) => Sum(left * right); /// Compares two vectors to determine if they are equal on a per-element basis. /// The type of the elements in the vector. @@ -1519,10 +1655,26 @@ public static uint ExtractMostSignificantBits(this Vector128 vector) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static Vector128 Floor(Vector128 vector) { - return Create( - Vector64.Floor(vector._lower), - Vector64.Floor(vector._upper) - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(short)) + || (typeof(T) == typeof(int)) + || (typeof(T) == typeof(long)) + || (typeof(T) == typeof(nint)) + || (typeof(T) == typeof(nuint)) + || (typeof(T) == typeof(sbyte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong))) + { + return vector; + } + else + { + return Create( + Vector64.Floor(vector._lower), + Vector64.Floor(vector._upper) + ); + } } /// Computes the floor of each element in a vector. @@ -1782,7 +1934,6 @@ public static bool LessThanOrEqualAny(Vector128 left, Vector128 right) || Vector64.LessThanOrEqualAny(left._upper, right._upper); } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Loads a vector from the given source. /// The type of the elements in the vector. /// The source from which the vector will be loaded. @@ -1790,7 +1941,7 @@ public static bool LessThanOrEqualAny(Vector128 left, Vector128 right) /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 Load(T* source) => LoadUnsafe(ref *source); + public static Vector128 Load(T* source) => LoadUnsafe(ref *source); /// Loads a vector from the given aligned source. /// The type of the elements in the vector. @@ -1800,7 +1951,7 @@ public static bool LessThanOrEqualAny(Vector128 left, Vector128 right) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 LoadAligned(T* source) + public static Vector128 LoadAligned(T* source) { ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType(); @@ -1820,8 +1971,7 @@ public static unsafe Vector128 LoadAligned(T* source) /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector128 LoadAlignedNonTemporal(T* source) => LoadAligned(source); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + public static Vector128 LoadAlignedNonTemporal(T* source) => LoadAligned(source); /// Loads a vector from the given source. /// The type of the elements in the vector. @@ -1989,7 +2139,7 @@ public static Vector128 Min(Vector128 left, Vector128 right) /// The product of and . /// The type of and () is not supported. [Intrinsic] - public static Vector128 Multiply(T left, Vector128 right) => left * right; + public static Vector128 Multiply(T left, Vector128 right) => right * left; /// [Intrinsic] @@ -2019,7 +2169,7 @@ public static Vector128 MultiplyAddEstimate(Vector128 left, Vector /// A containing elements narrowed from and . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Narrow(Vector128 lower, Vector128 upper) + public static Vector128 Narrow(Vector128 lower, Vector128 upper) { return Create( Vector64.Narrow(lower._lower, lower._upper), @@ -2034,7 +2184,7 @@ public static unsafe Vector128 Narrow(Vector128 lower, Vector128< [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Narrow(Vector128 lower, Vector128 upper) + public static Vector128 Narrow(Vector128 lower, Vector128 upper) { return Create( Vector64.Narrow(lower._lower, lower._upper), @@ -2048,7 +2198,7 @@ public static unsafe Vector128 Narrow(Vector128 lower, Vector128A containing elements narrowed from and . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Narrow(Vector128 lower, Vector128 upper) + public static Vector128 Narrow(Vector128 lower, Vector128 upper) { return Create( Vector64.Narrow(lower._lower, lower._upper), @@ -2062,7 +2212,7 @@ public static unsafe Vector128 Narrow(Vector128 lower, Vector128A containing elements narrowed from and . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 Narrow(Vector128 lower, Vector128 upper) + public static Vector128 Narrow(Vector128 lower, Vector128 upper) { return Create( Vector64.Narrow(lower._lower, lower._upper), @@ -2077,7 +2227,7 @@ public static unsafe Vector128 Narrow(Vector128 lower, Vector128 Narrow(Vector128 lower, Vector128 upper) + public static Vector128 Narrow(Vector128 lower, Vector128 upper) { return Create( Vector64.Narrow(lower._lower, lower._upper), @@ -2092,7 +2242,7 @@ public static unsafe Vector128 Narrow(Vector128 lower, Vector128 Narrow(Vector128 lower, Vector128 upper) + public static Vector128 Narrow(Vector128 lower, Vector128 upper) { return Create( Vector64.Narrow(lower._lower, lower._upper), @@ -2107,7 +2257,7 @@ public static unsafe Vector128 Narrow(Vector128 lower, Vector128 Narrow(Vector128 lower, Vector128 upper) + public static Vector128 Narrow(Vector128 lower, Vector128 upper) { return Create( Vector64.Narrow(lower._lower, lower._upper), @@ -2629,7 +2779,6 @@ public static Vector128 Sqrt(Vector128 vector) ); } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Stores a vector at the given destination. /// The type of the elements in the vector. /// The vector that will be stored. @@ -2637,7 +2786,7 @@ public static Vector128 Sqrt(Vector128 vector) /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static unsafe void Store(this Vector128 source, T* destination) => source.StoreUnsafe(ref *destination); + public static void Store(this Vector128 source, T* destination) => source.StoreUnsafe(ref *destination); /// Stores a vector at the given aligned destination. /// The type of the elements in the vector. @@ -2647,7 +2796,7 @@ public static Vector128 Sqrt(Vector128 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe void StoreAligned(this Vector128 source, T* destination) + public static void StoreAligned(this Vector128 source, T* destination) { ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType(); @@ -2667,8 +2816,7 @@ public static unsafe void StoreAligned(this Vector128 source, T* destinati /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static unsafe void StoreAlignedNonTemporal(this Vector128 source, T* destination) => source.StoreAligned(destination); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + public static void StoreAlignedNonTemporal(this Vector128 source, T* destination) => source.StoreAligned(destination); /// /// Stores to lower 64 bits of to memory destination of [] @@ -2735,14 +2883,13 @@ public static void StoreUnsafe(this Vector128 source, ref T destination, n [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Sum(Vector128 vector) { - T sum = default!; - - for (int index = 0; index < Vector128.Count; index++) - { - sum = Scalar.Add(sum, vector.GetElementUnsafe(index)); - } + // Doing this as Sum(lower) + Sum(upper) is important for floating-point determinism + // This is because the underlying dpps instruction on x86/x64 will do this equivalently + // and otherwise the software vs accelerated implementations may differ in returned result. - return sum; + T result = Vector64.Sum(vector._lower); + result = Scalar.Add(result, Vector64.Sum(vector._upper)); + return result; } /// Converts the given vector to a scalar containing the value of the first element. @@ -2780,7 +2927,7 @@ public static Vector256 ToVector256(this Vector128 vector) /// The type of () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector256 ToVector256Unsafe(this Vector128 vector) + public static Vector256 ToVector256Unsafe(this Vector128 vector) { ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType(); @@ -2815,46 +2962,46 @@ public static bool TryCopyTo(this Vector128 vector, Span destination) /// A pair of vectors that contain the widened lower and upper halves of . [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector128 Lower, Vector128 Upper) Widen(Vector128 source) => (WidenLower(source), WidenUpper(source)); /// Widens the lower half of a into a . /// The vector whose elements are to be widened. @@ -2877,7 +3024,7 @@ public static Vector128 WidenLower(Vector128 source) /// A vector that contain the widened lower half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenLower(Vector128 source) + public static Vector128 WidenLower(Vector128 source) { Vector64 lower = source._lower; @@ -2892,7 +3039,7 @@ public static unsafe Vector128 WidenLower(Vector128 source) /// A vector that contain the widened lower half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenLower(Vector128 source) + public static Vector128 WidenLower(Vector128 source) { Vector64 lower = source._lower; @@ -2908,7 +3055,7 @@ public static unsafe Vector128 WidenLower(Vector128 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenLower(Vector128 source) + public static Vector128 WidenLower(Vector128 source) { Vector64 lower = source._lower; @@ -2923,7 +3070,7 @@ public static unsafe Vector128 WidenLower(Vector128 source) /// A vector that contain the widened lower half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenLower(Vector128 source) + public static Vector128 WidenLower(Vector128 source) { Vector64 lower = source._lower; @@ -2939,7 +3086,7 @@ public static unsafe Vector128 WidenLower(Vector128 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenLower(Vector128 source) + public static Vector128 WidenLower(Vector128 source) { Vector64 lower = source._lower; @@ -2955,7 +3102,7 @@ public static unsafe Vector128 WidenLower(Vector128 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenLower(Vector128 source) + public static Vector128 WidenLower(Vector128 source) { Vector64 lower = source._lower; @@ -2986,7 +3133,7 @@ public static Vector128 WidenUpper(Vector128 source) /// A vector that contain the widened upper half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenUpper(Vector128 source) + public static Vector128 WidenUpper(Vector128 source) { Vector64 upper = source._upper; @@ -3001,7 +3148,7 @@ public static unsafe Vector128 WidenUpper(Vector128 source) /// A vector that contain the widened upper half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenUpper(Vector128 source) + public static Vector128 WidenUpper(Vector128 source) { Vector64 upper = source._upper; @@ -3017,7 +3164,7 @@ public static unsafe Vector128 WidenUpper(Vector128 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenUpper(Vector128 source) + public static Vector128 WidenUpper(Vector128 source) { Vector64 upper = source._upper; @@ -3032,7 +3179,7 @@ public static unsafe Vector128 WidenUpper(Vector128 source) /// A vector that contain the widened upper half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenUpper(Vector128 source) + public static Vector128 WidenUpper(Vector128 source) { Vector64 upper = source._upper; @@ -3048,7 +3195,7 @@ public static unsafe Vector128 WidenUpper(Vector128 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenUpper(Vector128 source) + public static Vector128 WidenUpper(Vector128 source) { Vector64 upper = source._upper; @@ -3064,7 +3211,7 @@ public static unsafe Vector128 WidenUpper(Vector128 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 WidenUpper(Vector128 source) + public static Vector128 WidenUpper(Vector128 source) { Vector64 upper = source._upper; diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs index c9f1b6e3f21ef..7b0f847fd37bf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs @@ -7,8 +7,13 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; +using System.Runtime.Intrinsics.X86; using System.Text; +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + namespace System.Runtime.Intrinsics { // We mark certain methods with AggressiveInlining to ensure that the JIT will @@ -22,7 +27,6 @@ namespace System.Runtime.Intrinsics // This ensures we get good codegen for the "fast-path" and allows the JIT to // determine inline profitability of the other paths as it would normally. - /// Represents a 128-bit vector of a specified numeric type that is suitable for low-level optimization of parallel algorithms. /// The type of the elements in the vector. [Intrinsic] @@ -39,11 +43,7 @@ namespace System.Runtime.Intrinsics public static Vector128 AllBitsSet { [Intrinsic] - get - { - Vector64 vector = Vector64.AllBitsSet; - return Vector128.Create(vector, vector); - } + get => Vector128.Create(Scalar.AllBitsSet); } /// Gets the number of that are in a . @@ -51,7 +51,11 @@ public static Vector128 AllBitsSet public static int Count { [Intrinsic] - get => Vector64.Count * 2; + get + { + ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType(); + return Vector128.Size / sizeof(T); + } } /// Gets a new with the elements set to their index. @@ -102,11 +106,7 @@ public static bool IsSupported public static Vector128 One { [Intrinsic] - get - { - Vector64 vector = Vector64.One; - return Vector128.Create(vector, vector); - } + get => Vector128.Create(Scalar.One); } /// Gets a new with all elements initialized to zero. @@ -139,10 +139,128 @@ public static Vector128 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator +(Vector128 left, Vector128 right) { - return Vector128.Create( - left._lower + right._lower, - left._upper + right._upper - ); + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(left, right); + } + else if (Sse.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return AdvSimd.Add(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + if (AdvSimd.Arm64.IsSupported) + { + return AdvSimd.Arm64.Add(left.AsDouble(), right.AsDouble()).As(); + } + } + else if (sizeof(T) == 1) + { + return AdvSimd.Add(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Add(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Add(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.Add(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + static Vector128 SoftwareImpl(Vector128 left, Vector128 right) + { + return Vector128.Create( + left._lower + right._lower, + left._upper + right._upper + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return PackedSimd.Add(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return PackedSimd.Add(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return PackedSimd.Add(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.Add(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.Add(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.Add(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + [CompExactlyDependsOn(typeof(Sse))] + [CompExactlyDependsOn(typeof(Sse2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return Sse.Add(left.AsSingle(), right.AsSingle()).As(); + } + else if (Sse2.IsSupported) + { + if (typeof(T) == typeof(double)) + { + return Sse2.Add(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return Sse2.Add(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Sse2.Add(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Sse2.Add(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Sse2.Add(left.AsUInt64(), right.AsUInt64()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Computes the bitwise-and of two vectors. @@ -154,10 +272,112 @@ public static Vector128 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator &(Vector128 left, Vector128 right) { - return Vector128.Create( - left._lower & right._lower, - left._upper & right._upper - ); + // While op_BitwiseAnd is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(left, right); + } + else if (Sse.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 left, Vector128 right) + { + if (sizeof(T) == 1) + { + return AdvSimd.And(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.And(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.And(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.And(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + static Vector128 SoftwareImpl(Vector128 left, Vector128 right) + { + return Vector128.Create( + left._lower & right._lower, + left._upper & right._upper + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 left, Vector128 right) + { + if (sizeof(T) == 1) + { + return PackedSimd.And(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.And(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.And(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.And(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + [CompExactlyDependsOn(typeof(Sse))] + [CompExactlyDependsOn(typeof(Sse2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return Sse.And(left.AsSingle(), right.AsSingle()).As(); + } + else if (Sse2.IsSupported) + { + if (typeof(T) == typeof(double)) + { + return Sse2.And(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return Sse2.And(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Sse2.And(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Sse2.And(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Sse2.And(left.AsUInt64(), right.AsUInt64()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Computes the bitwise-or of two vectors. @@ -169,10 +389,112 @@ public static Vector128 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator |(Vector128 left, Vector128 right) { - return Vector128.Create( - left._lower | right._lower, - left._upper | right._upper - ); + // While op_BitwiseOr is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(left, right); + } + else if (Sse.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 left, Vector128 right) + { + if (sizeof(T) == 1) + { + return AdvSimd.Or(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Or(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Or(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.Or(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + static Vector128 SoftwareImpl(Vector128 left, Vector128 right) + { + return Vector128.Create( + left._lower | right._lower, + left._upper | right._upper + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 left, Vector128 right) + { + if (sizeof(T) == 1) + { + return PackedSimd.Or(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.Or(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.Or(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.Or(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + [CompExactlyDependsOn(typeof(Sse))] + [CompExactlyDependsOn(typeof(Sse2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return Sse.Or(left.AsSingle(), right.AsSingle()).As(); + } + else if (Sse2.IsSupported) + { + if (typeof(T) == typeof(double)) + { + return Sse2.Or(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return Sse2.Or(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Sse2.Or(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Sse2.Or(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Sse2.Or(left.AsUInt64(), right.AsUInt64()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Divides two vectors to compute their quotient. @@ -184,10 +506,76 @@ public static Vector128 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator /(Vector128 left, Vector128 right) { - return Vector128.Create( - left._lower / right._lower, - left._upper / right._upper - ); + if (AdvSimd.Arm64.IsSupported) + { + return ArmImpl(left, right); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(left, right); + } + else if (Sse.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return AdvSimd.Arm64.Divide(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return AdvSimd.Arm64.Divide(left.AsDouble(), right.AsDouble()).As(); + } + return SoftwareImpl(left, right); + } + + static Vector128 SoftwareImpl(Vector128 left, Vector128 right) + { + return Vector128.Create( + left._lower / right._lower, + left._upper / right._upper + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return PackedSimd.Divide(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return PackedSimd.Divide(left.AsDouble(), right.AsDouble()).As(); + } + return SoftwareImpl(left, right); + } + + [CompExactlyDependsOn(typeof(Sse))] + [CompExactlyDependsOn(typeof(Sse2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return Sse.Divide(left.AsSingle(), right.AsSingle()).As(); + } + else if (Sse2.IsSupported) + { + if (typeof(T) == typeof(double)) + { + return Sse2.Divide(left.AsDouble(), right.AsDouble()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Divides a vector by a scalar to compute the per-element quotient. @@ -196,13 +584,7 @@ public static Vector128 Zero /// The quotient of divided by . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 operator /(Vector128 left, T right) - { - return Vector128.Create( - left._lower / right, - left._upper / right - ); - } + public static Vector128 operator /(Vector128 left, T right) => left / Vector128.Create(right); /// Compares two vectors to determine if all elements are equal. /// The vector to compare with . @@ -226,10 +608,112 @@ public static Vector128 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator ^(Vector128 left, Vector128 right) { - return Vector128.Create( - left._lower ^ right._lower, - left._upper ^ right._upper - ); + // While op_ExclusiveOr is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(left, right); + } + else if (Sse.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 left, Vector128 right) + { + if (sizeof(T) == 1) + { + return AdvSimd.Xor(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Xor(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Xor(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.Xor(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + static Vector128 SoftwareImpl(Vector128 left, Vector128 right) + { + return Vector128.Create( + left._lower ^ right._lower, + left._upper ^ right._upper + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 left, Vector128 right) + { + if (sizeof(T) == 1) + { + return PackedSimd.Xor(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.Xor(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.Xor(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.Xor(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + [CompExactlyDependsOn(typeof(Sse))] + [CompExactlyDependsOn(typeof(Sse2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return Sse.Xor(left.AsSingle(), right.AsSingle()).As(); + } + else if (Sse2.IsSupported) + { + if (typeof(T) == typeof(double)) + { + return Sse2.Xor(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return Sse2.Xor(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Sse2.Xor(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Sse2.Xor(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Sse2.Xor(left.AsUInt64(), right.AsUInt64()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Compares two vectors to determine if any elements are not equal. @@ -248,10 +732,98 @@ public static Vector128 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator <<(Vector128 value, int shiftCount) { - return Vector128.Create( - value._lower << shiftCount, - value._upper << shiftCount - ); + if (AdvSimd.IsSupported) + { + return ArmImpl(value, shiftCount); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(value, shiftCount); + } + else if (Sse2.IsSupported) + { + return XarchImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 value, int shiftCount) + { + if (sizeof(T) == 1) + { + return AdvSimd.ShiftLogical(value.AsByte(), Vector128.Create((sbyte)(shiftCount & 0x7))).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.ShiftLogical(value.AsUInt16(), Vector128.Create((short)(shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.ShiftLogical(value.AsUInt32(), Vector128.Create(shiftCount & 0x1F)).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.ShiftLogical(value.AsUInt64(), Vector128.Create(shiftCount & 0x3F)).As(); + } + return SoftwareImpl(value, shiftCount); + } + + static Vector128 SoftwareImpl(Vector128 value, int shiftCount) + { + return Vector128.Create( + value._lower << shiftCount, + value._upper << shiftCount + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 value, int shiftCount) + { + if (sizeof(T) == 1) + { + return PackedSimd.ShiftLeft(value.AsByte(), shiftCount & 0x7).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.ShiftLeft(value.AsUInt16(), shiftCount & 0xF).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.ShiftLeft(value.AsUInt32(), shiftCount & 0x1F).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.ShiftLeft(value.AsUInt64(), shiftCount & 0x3F).As(); + } + return SoftwareImpl(value, shiftCount); + } + + [CompExactlyDependsOn(typeof(Sse2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 value, int shiftCount) + { + if (sizeof(T) == 1) + { + byte maskedShiftCount = (byte)(shiftCount & 0x7); + Vector128 tmp = Sse2.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar(maskedShiftCount)); + return Sse2.And(tmp, Vector128.Create((ushort)(0xFF << maskedShiftCount))).As(); + } + else if (sizeof(T) == 2) + { + return Sse2.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar((ushort)(shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return Sse2.ShiftLeftLogical(value.AsUInt32(), Vector128.CreateScalar((uint)(shiftCount & 0x1F))).As(); + } + else if (sizeof(T) == 8) + { + return Sse2.ShiftLeftLogical(value.AsUInt64(), Vector128.CreateScalar((uint)(shiftCount & 0x3F))).As(); + } + return SoftwareImpl(value, shiftCount); + } } /// Multiplies two vectors to compute their element-wise product. @@ -263,10 +835,190 @@ public static Vector128 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator *(Vector128 left, Vector128 right) { - return Vector128.Create( - left._lower * right._lower, - left._upper * right._upper - ); + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(left, right); + } + else if (Sse.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return AdvSimd.Multiply(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + if (AdvSimd.Arm64.IsSupported) + { + return AdvSimd.Arm64.Multiply(left.AsDouble(), right.AsDouble()).As(); + } + } + else if (sizeof(T) == 1) + { + return AdvSimd.Multiply(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Multiply(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Multiply(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + // TODO-ARM64-CQ: We should support long/ulong multiplication. + } + return SoftwareImpl(left, right); + } + + static Vector128 SoftwareImpl(Vector128 left, Vector128 right) + { + return Vector128.Create( + left._lower * right._lower, + left._upper * right._upper + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return PackedSimd.Multiply(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return PackedSimd.Multiply(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + (Vector128 al, Vector128 ah) = Vector128.Widen(left.AsByte()); + (Vector128 bl, Vector128 bh) = Vector128.Widen(right.AsByte()); + + Vector128 rl = PackedSimd.Multiply(al, bl); + Vector128 rh = PackedSimd.Multiply(ah, bh); + + return Vector128.Narrow(rl, rh).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.Multiply(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.Multiply(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.Multiply(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + [CompExactlyDependsOn(typeof(Sse))] + [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(Sse41))] + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512BW.VL))] + [CompExactlyDependsOn(typeof(Avx512DQ.VL))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return Sse.Multiply(left.AsSingle(), right.AsSingle()).As(); + } + else if (Sse2.IsSupported) + { + if (typeof(T) == typeof(double)) + { + return Sse2.Multiply(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + if (Avx2.IsSupported) + { + Vector256 a = Avx2.ConvertToVector256Int16(left.AsByte()); + Vector256 b = Avx2.ConvertToVector256Int16(right.AsByte()); + + Vector256 r = Avx2.MultiplyLow(a, b); + + if (Avx512BW.VL.IsSupported) + { + return Avx512BW.VL.ConvertToVector128Byte(r).As(); + } + else + { + r = Avx2.And(r, Vector256.Create(0x00FF)); + return Avx2.Permute4x64(Avx2.PackUnsignedSaturate(r, r).AsUInt64(), 0b11_01_10_00).GetLower().As(); + } + } + else + { + (Vector128 al, Vector128 ah) = Vector128.Widen(left.AsByte()); + (Vector128 bl, Vector128 bh) = Vector128.Widen(right.AsByte()); + + Vector128 rl = Sse2.MultiplyLow(al, bl); + Vector128 rh = Sse2.MultiplyLow(ah, bh); + + return Vector128.Narrow(rl, rh).As(); + } + } + else if (sizeof(T) == 2) + { + return Sse2.MultiplyLow(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + if (Sse41.IsSupported) + { + return Sse41.MultiplyLow(left.AsUInt32(), right.AsUInt32()).As(); + } + else + { + Vector128 al = left.AsUInt32(); + Vector128 bl = right.AsUInt32(); + + Vector128 rl = Sse2.Multiply(al, bl).AsUInt32(); + rl = Sse2.Shuffle(rl, 0b00_00_10_00); + + Vector128 ah = Sse2.ShiftRightLogical128BitLane(al, 4); + Vector128 bh = Sse2.ShiftRightLogical128BitLane(bl, 4); + + Vector128 rh = Sse2.Multiply(ah, bh).AsUInt32(); + rh = Sse2.Shuffle(rh, 0b00_00_10_00); + + return Sse2.UnpackLow(rl, rh).As(); + } + } + else if (sizeof(T) == 8) + { + if (Avx512DQ.VL.IsSupported) + { + return Avx512DQ.VL.MultiplyLow(left.AsUInt64(), right.AsUInt64()).As(); + } + else + { + // TODO-XARCH-CQ: We should support long/ulong multiplication. + } + } + } + return SoftwareImpl(left, right); + } } /// Multiplies a vector by a scalar to compute their product. @@ -276,13 +1028,7 @@ public static Vector128 Zero /// The type of the vector () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 operator *(Vector128 left, T right) - { - return Vector128.Create( - left._lower * right, - left._upper * right - ); - } + public static Vector128 operator *(Vector128 left, T right) => left * Vector128.Create(right); /// Multiplies a vector by a scalar to compute their product. /// The scalar to multiply with . @@ -291,7 +1037,7 @@ public static Vector128 Zero /// The type of the vector () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 operator *(T left, Vector128 right) => right * left; + public static Vector128 operator *(T left, Vector128 right) => Vector128.Create(left) * right; /// Computes the ones-complement of a vector. /// The vector whose ones-complement is to be computed. @@ -301,10 +1047,112 @@ public static Vector128 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator ~(Vector128 vector) { - return Vector128.Create( - ~vector._lower, - ~vector._upper - ); + // While op_OnesComplement is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (AdvSimd.IsSupported) + { + return ArmImpl(vector); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(vector); + } + else if (Sse.IsSupported) + { + return XarchImpl(vector); + } + return SoftwareImpl(vector); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 vector) + { + if (sizeof(T) == 1) + { + return AdvSimd.Not(vector.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Not(vector.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Not(vector.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.Not(vector.AsUInt64()).As(); + } + return SoftwareImpl(vector); + } + + static Vector128 SoftwareImpl(Vector128 vector) + { + return Vector128.Create( + ~vector._lower, + ~vector._upper + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 vector) + { + if (sizeof(T) == 1) + { + return PackedSimd.Not(vector.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.Not(vector.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.Not(vector.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.Not(vector.AsUInt64()).As(); + } + return SoftwareImpl(vector); + } + + [CompExactlyDependsOn(typeof(Sse))] + [CompExactlyDependsOn(typeof(Sse2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 vector) + { + if (typeof(T) == typeof(float)) + { + return Sse.Xor(vector.AsSingle(), Vector128.AllBitsSet).As(); + } + else if (Sse2.IsSupported) + { + if (typeof(T) == typeof(double)) + { + return Sse2.Xor(vector.AsDouble(), Vector128.AllBitsSet).As(); + } + else if (sizeof(T) == 1) + { + return Sse2.Xor(vector.AsByte(), Vector128.AllBitsSet).As(); + } + else if (sizeof(T) == 2) + { + return Sse2.Xor(vector.AsUInt16(), Vector128.AllBitsSet).As(); + } + else if (sizeof(T) == 4) + { + return Sse2.Xor(vector.AsUInt32(), Vector128.AllBitsSet).As(); + } + else if (sizeof(T) == 8) + { + return Sse2.Xor(vector.AsUInt64(), Vector128.AllBitsSet).As(); + } + } + return SoftwareImpl(vector); + } } /// Shifts (signed) each element of a vector right by the specified amount. @@ -315,10 +1163,112 @@ public static Vector128 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator >>(Vector128 value, int shiftCount) { - return Vector128.Create( - value._lower >> shiftCount, - value._upper >> shiftCount - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong)) + || (typeof(T) == typeof(nuint))) + { + return value >>> shiftCount; + } + else if (AdvSimd.IsSupported) + { + return ArmImpl(value, shiftCount); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(value, shiftCount); + } + else if (Sse2.IsSupported) + { + return XarchImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 value, int shiftCount) + { + if (sizeof(T) == 1) + { + return AdvSimd.ShiftArithmetic(value.AsSByte(), Vector128.Create((sbyte)(-shiftCount & 0x7))).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.ShiftArithmetic(value.AsInt16(), Vector128.Create((short)(-shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.ShiftArithmetic(value.AsInt32(), Vector128.Create(-shiftCount & 0x1F)).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.ShiftArithmetic(value.AsInt64(), Vector128.Create(-shiftCount & 0x3F)).As(); + } + return SoftwareImpl(value, shiftCount); + } + + static Vector128 SoftwareImpl(Vector128 value, int shiftCount) + { + return Vector128.Create( + value._lower >> shiftCount, + value._upper >> shiftCount + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 value, int shiftCount) + { + if (sizeof(T) == 1) + { + return PackedSimd.ShiftRightArithmetic(value.AsSByte(), shiftCount & 0x7).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.ShiftRightArithmetic(value.AsInt16(), shiftCount & 0xF).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.ShiftRightArithmetic(value.AsInt32(), shiftCount & 0x1F).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.ShiftRightArithmetic(value.AsInt64(), shiftCount & 0x3F).As(); + } + return SoftwareImpl(value, shiftCount); + } + + [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(Avx512F.VL))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 value, int shiftCount) + { + if (sizeof(T) == 1) + { + // TODO-XARCH-CQ: We should support sbyte arithmetic shift. + } + else if (sizeof(T) == 2) + { + return Sse2.ShiftRightArithmetic(value.AsInt16(), Vector128.CreateScalar((short)(shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return Sse2.ShiftRightArithmetic(value.AsInt32(), Vector128.CreateScalar(shiftCount & 0x1F)).As(); + } + else if (sizeof(T) == 8) + { + if (Avx512F.VL.IsSupported) + { + return Avx512F.VL.ShiftRightArithmetic(value.AsInt64(), Vector128.CreateScalar(shiftCount & 0x3F)).As(); + } + else + { + // TODO-XARCH-CQ: We should support double/long arithmetic shift. + } + } + return SoftwareImpl(value, shiftCount); + } } /// Subtracts two vectors to compute their difference. @@ -330,10 +1280,128 @@ public static Vector128 operator >>(Vector128 value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator -(Vector128 left, Vector128 right) { - return Vector128.Create( - left._lower - right._lower, - left._upper - right._upper - ); + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(left, right); + } + else if (Sse.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return AdvSimd.Subtract(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + if (AdvSimd.Arm64.IsSupported) + { + return AdvSimd.Arm64.Subtract(left.AsDouble(), right.AsDouble()).As(); + } + } + else if (sizeof(T) == 1) + { + return AdvSimd.Subtract(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Subtract(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Subtract(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.Subtract(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + static Vector128 SoftwareImpl(Vector128 left, Vector128 right) + { + return Vector128.Create( + left._lower - right._lower, + left._upper - right._upper + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return PackedSimd.Subtract(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return PackedSimd.Subtract(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return PackedSimd.Subtract(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.Subtract(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.Subtract(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.Subtract(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + [CompExactlyDependsOn(typeof(Sse))] + [CompExactlyDependsOn(typeof(Sse2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 left, Vector128 right) + { + if (typeof(T) == typeof(float)) + { + return Sse.Subtract(left.AsSingle(), right.AsSingle()).As(); + } + else if (Sse2.IsSupported) + { + if (typeof(T) == typeof(double)) + { + return Sse2.Subtract(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return Sse2.Subtract(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Sse2.Subtract(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Sse2.Subtract(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Sse2.Subtract(left.AsUInt64(), right.AsUInt64()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Computes the unary negation of a vector. @@ -341,14 +1409,7 @@ public static Vector128 operator >>(Vector128 value, int shiftCount) /// A vector whose elements are the unary negation of the corresponding elements in . /// The type of the vector () is not supported. [Intrinsic] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 operator -(Vector128 vector) - { - return Vector128.Create( - -vector._lower, - -vector._upper - ); - } + public static Vector128 operator -(Vector128 vector) => Zero - vector; /// Returns a given vector unchanged. /// The vector. @@ -369,10 +1430,98 @@ public static Vector128 operator >>(Vector128 value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 operator >>>(Vector128 value, int shiftCount) { - return Vector128.Create( - value._lower >>> shiftCount, - value._upper >>> shiftCount - ); + if (AdvSimd.IsSupported) + { + return ArmImpl(value, shiftCount); + } + else if (PackedSimd.IsSupported) + { + return WasmImpl(value, shiftCount); + } + else if (Sse2.IsSupported) + { + return XarchImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 ArmImpl(Vector128 value, int shiftCount) + { + if (sizeof(T) == 1) + { + return AdvSimd.ShiftLogical(value.AsByte(), Vector128.Create((sbyte)(-shiftCount & 0x7))).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.ShiftLogical(value.AsUInt16(), Vector128.Create((short)(-shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.ShiftLogical(value.AsUInt32(), Vector128.Create(-shiftCount & 0x1F)).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.ShiftLogical(value.AsUInt64(), Vector128.Create(-shiftCount & 0x3F)).As(); + } + return SoftwareImpl(value, shiftCount); + } + + static Vector128 SoftwareImpl(Vector128 value, int shiftCount) + { + return Vector128.Create( + value._lower >>> shiftCount, + value._upper >>> shiftCount + ); + } + + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WasmImpl(Vector128 value, int shiftCount) + { + if (sizeof(T) == 1) + { + return PackedSimd.ShiftRightLogical(value.AsByte(), shiftCount & 0x7).As(); + } + else if (sizeof(T) == 2) + { + return PackedSimd.ShiftRightLogical(value.AsUInt16(), shiftCount & 0xF).As(); + } + else if (sizeof(T) == 4) + { + return PackedSimd.ShiftRightLogical(value.AsUInt32(), shiftCount & 0x1F).As(); + } + else if (sizeof(T) == 8) + { + return PackedSimd.ShiftRightLogical(value.AsUInt64(), shiftCount & 0x3F).As(); + } + return SoftwareImpl(value, shiftCount); + } + + [CompExactlyDependsOn(typeof(Sse2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 XarchImpl(Vector128 value, int shiftCount) + { + if (sizeof(T) == 1) + { + byte maskedShiftCount = (byte)(shiftCount & 0x7); + Vector128 tmp = Sse2.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar(maskedShiftCount)); + return Sse2.And(tmp, Vector128.Create((ushort)(0xFF >>> maskedShiftCount))).As(); + } + else if (sizeof(T) == 2) + { + return Sse2.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar((ushort)(shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return Sse2.ShiftRightLogical(value.AsUInt32(), Vector128.CreateScalar((uint)(shiftCount & 0x1F))).As(); + } + else if (sizeof(T) == 8) + { + return Sse2.ShiftRightLogical(value.AsUInt64(), Vector128.CreateScalar((uint)(shiftCount & 0x3F))).As(); + } + return SoftwareImpl(value, shiftCount); + } } /// Determines whether the specified object is equal to the current instance. @@ -439,7 +1588,7 @@ public override int GetHashCode() /// The type of the vector () is not supported. public override string ToString() => ToString("G", CultureInfo.InvariantCulture); - private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider) + internal string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider) { ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType(); @@ -474,16 +1623,16 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector128 ISimdVector, T>.Abs(Vector128 vector) => Vector128.Abs(vector); /// - static Vector128 ISimdVector, T>.Add(Vector128 left, Vector128 right) => Vector128.Add(left, right); + static Vector128 ISimdVector, T>.Add(Vector128 left, Vector128 right) => left + right; /// static Vector128 ISimdVector, T>.AndNot(Vector128 left, Vector128 right) => Vector128.AndNot(left, right); /// - static Vector128 ISimdVector, T>.BitwiseAnd(Vector128 left, Vector128 right) => Vector128.BitwiseAnd(left, right); + static Vector128 ISimdVector, T>.BitwiseAnd(Vector128 left, Vector128 right) => left & right; /// - static Vector128 ISimdVector, T>.BitwiseOr(Vector128 left, Vector128 right) => Vector128.BitwiseOr(left, right); + static Vector128 ISimdVector, T>.BitwiseOr(Vector128 left, Vector128 right) => left | right; /// static Vector128 ISimdVector, T>.Ceiling(Vector128 vector) => Vector128.Ceiling(vector); @@ -519,10 +1668,10 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector128 ISimdVector, T>.CreateScalarUnsafe(T value) => Vector128.CreateScalarUnsafe(value); /// - static Vector128 ISimdVector, T>.Divide(Vector128 left, Vector128 right) => Vector128.Divide(left, right); + static Vector128 ISimdVector, T>.Divide(Vector128 left, Vector128 right) => left / right; /// - static Vector128 ISimdVector, T>.Divide(Vector128 left, T right) => Vector128.Divide(left, right); + static Vector128 ISimdVector, T>.Divide(Vector128 left, T right) => left / right; /// static T ISimdVector, T>.Dot(Vector128 left, Vector128 right) => Vector128.Dot(left, right); @@ -531,7 +1680,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector128 ISimdVector, T>.Equals(Vector128 left, Vector128 right) => Vector128.Equals(left, right); /// - static bool ISimdVector, T>.EqualsAll(Vector128 left, Vector128 right) => Vector128.EqualsAll(left, right); + static bool ISimdVector, T>.EqualsAll(Vector128 left, Vector128 right) => left == right; /// static bool ISimdVector, T>.EqualsAny(Vector128 left, Vector128 right) => Vector128.EqualsAny(left, right); @@ -540,7 +1689,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector128 ISimdVector, T>.Floor(Vector128 vector) => Vector128.Floor(vector); /// - static T ISimdVector, T>.GetElement(Vector128 vector, int index) => Vector128.GetElement(vector, index); + static T ISimdVector, T>.GetElement(Vector128 vector, int index) => vector.GetElement(index); /// static Vector128 ISimdVector, T>.GreaterThan(Vector128 left, Vector128 right) => Vector128.GreaterThan(left, right); @@ -578,7 +1727,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri /// static bool ISimdVector, T>.LessThanOrEqualAny(Vector128 left, Vector128 right) => Vector128.LessThanOrEqualAny(left, right); -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// static Vector128 ISimdVector, T>.Load(T* source) => Vector128.Load(source); @@ -587,7 +1735,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri /// static Vector128 ISimdVector, T>.LoadAlignedNonTemporal(T* source) => Vector128.LoadAlignedNonTemporal(source); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// static Vector128 ISimdVector, T>.LoadUnsafe(ref readonly T source) => Vector128.LoadUnsafe(in source); @@ -602,63 +1749,61 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector128 ISimdVector, T>.Min(Vector128 left, Vector128 right) => Vector128.Min(left, right); /// - static Vector128 ISimdVector, T>.Multiply(Vector128 left, Vector128 right) => Vector128.Multiply(left, right); + static Vector128 ISimdVector, T>.Multiply(Vector128 left, Vector128 right) => left * right; /// - static Vector128 ISimdVector, T>.Multiply(Vector128 left, T right) => Vector128.Multiply(left, right); + static Vector128 ISimdVector, T>.Multiply(Vector128 left, T right) => left * right; /// - static Vector128 ISimdVector, T>.Negate(Vector128 vector) => Vector128.Negate(vector); + static Vector128 ISimdVector, T>.Negate(Vector128 vector) => -vector; /// - static Vector128 ISimdVector, T>.OnesComplement(Vector128 vector) => Vector128.OnesComplement(vector); + static Vector128 ISimdVector, T>.OnesComplement(Vector128 vector) => ~vector; /// - static Vector128 ISimdVector, T>.ShiftLeft(Vector128 vector, int shiftCount) => Vector128.ShiftLeft(vector, shiftCount); + static Vector128 ISimdVector, T>.ShiftLeft(Vector128 vector, int shiftCount) => vector << shiftCount; /// - static Vector128 ISimdVector, T>.ShiftRightArithmetic(Vector128 vector, int shiftCount) => Vector128.ShiftRightArithmetic(vector, shiftCount); + static Vector128 ISimdVector, T>.ShiftRightArithmetic(Vector128 vector, int shiftCount) => vector >> shiftCount; /// - static Vector128 ISimdVector, T>.ShiftRightLogical(Vector128 vector, int shiftCount) => Vector128.ShiftRightLogical(vector, shiftCount); + static Vector128 ISimdVector, T>.ShiftRightLogical(Vector128 vector, int shiftCount) => vector >>> shiftCount; /// static Vector128 ISimdVector, T>.Sqrt(Vector128 vector) => Vector128.Sqrt(vector); -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// - static void ISimdVector, T>.Store(Vector128 source, T* destination) => Vector128.Store(source, destination); + static void ISimdVector, T>.Store(Vector128 source, T* destination) => source.Store(destination); /// - static void ISimdVector, T>.StoreAligned(Vector128 source, T* destination) => Vector128.StoreAligned(source, destination); + static void ISimdVector, T>.StoreAligned(Vector128 source, T* destination) => source.StoreAligned(destination); /// - static void ISimdVector, T>.StoreAlignedNonTemporal(Vector128 source, T* destination) => Vector128.StoreAlignedNonTemporal(source, destination); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + static void ISimdVector, T>.StoreAlignedNonTemporal(Vector128 source, T* destination) => source.StoreAlignedNonTemporal(destination); /// - static void ISimdVector, T>.StoreUnsafe(Vector128 vector, ref T destination) => Vector128.StoreUnsafe(vector, ref destination); + static void ISimdVector, T>.StoreUnsafe(Vector128 vector, ref T destination) => vector.StoreUnsafe(ref destination); /// - static void ISimdVector, T>.StoreUnsafe(Vector128 vector, ref T destination, nuint elementOffset) => Vector128.StoreUnsafe(vector, ref destination, elementOffset); + static void ISimdVector, T>.StoreUnsafe(Vector128 vector, ref T destination, nuint elementOffset) => vector.StoreUnsafe(ref destination, elementOffset); /// - static Vector128 ISimdVector, T>.Subtract(Vector128 left, Vector128 right) => Vector128.Subtract(left, right); + static Vector128 ISimdVector, T>.Subtract(Vector128 left, Vector128 right) => left - right; /// static T ISimdVector, T>.Sum(Vector128 vector) => Vector128.Sum(vector); /// - static T ISimdVector, T>.ToScalar(Vector128 vector) => Vector128.ToScalar(vector); + static T ISimdVector, T>.ToScalar(Vector128 vector) => vector.ToScalar(); /// - static bool ISimdVector, T>.TryCopyTo(Vector128 vector, Span destination) => Vector128.TryCopyTo(vector, destination); + static bool ISimdVector, T>.TryCopyTo(Vector128 vector, Span destination) => vector.TryCopyTo(destination); /// - static Vector128 ISimdVector, T>.WithElement(Vector128 vector, int index, T value) => Vector128.WithElement(vector, index, value); + static Vector128 ISimdVector, T>.WithElement(Vector128 vector, int index, T value) => vector.WithElement(index, value); /// - static Vector128 ISimdVector, T>.Xor(Vector128 left, Vector128 right) => Vector128.Xor(left, right); + static Vector128 ISimdVector, T>.Xor(Vector128 left, Vector128 right) => left ^ right; // // New Surface Area diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index e8f16472b3695..34abfc4c8984f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -6,8 +6,11 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + namespace System.Runtime.Intrinsics { // We mark certain methods with AggressiveInlining to ensure that the JIT will @@ -62,10 +65,70 @@ public static bool IsHardwareAccelerated [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Abs(Vector256 vector) { - return Create( - Vector128.Abs(vector._lower), - Vector128.Abs(vector._upper) - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong)) + || (typeof(T) == typeof(nuint))) + { + return vector; + } + else if (Avx.IsSupported) + { + return XarchImpl(vector); + } + return SoftwareImpl(vector); + + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512F.VL))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 vector) + { + if (typeof(T) == typeof(float)) + { + return Avx.AndNot(Vector256.Create(-0.0f), vector.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx.AndNot(Vector256.Create(-0.0), vector.AsDouble()).As(); + } + else if (Avx2.IsSupported) + { + if (sizeof(T) == 1) + { + return Avx2.Abs(vector.AsSByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx2.Abs(vector.AsInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.Abs(vector.AsInt32()).As(); + } + else if (sizeof(T) == 8) + { + if (Avx512F.VL.IsSupported) + { + return Avx512F.VL.Abs(vector.AsInt64()).As(); + } + else + { + return ConditionalSelect(LessThan(vector, Vector256.Zero), Vector256.Zero - vector, vector); + } + } + } + return SoftwareImpl(vector); + } + + static Vector256 SoftwareImpl(Vector256 vector) + { + return Create( + Vector128.Abs(vector._lower), + Vector128.Abs(vector._upper) + ); + } } /// Adds two vectors to compute their sum. @@ -85,13 +148,7 @@ public static Vector256 Abs(Vector256 vector) /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 AndNot(Vector256 left, Vector256 right) - { - return Create( - Vector128.AndNot(left._lower, right._lower), - Vector128.AndNot(left._upper, right._upper) - ); - } + public static Vector256 AndNot(Vector256 left, Vector256 right) => left & ~right; /// Reinterprets a as a new . /// The type of the elements in the input vector. @@ -272,10 +329,26 @@ public static Vector AsVector(this Vector256 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static Vector256 Ceiling(Vector256 vector) { - return Create( - Vector128.Ceiling(vector._lower), - Vector128.Ceiling(vector._upper) - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(short)) + || (typeof(T) == typeof(int)) + || (typeof(T) == typeof(long)) + || (typeof(T) == typeof(nint)) + || (typeof(T) == typeof(nuint)) + || (typeof(T) == typeof(sbyte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong))) + { + return vector; + } + else + { + return Create( + Vector128.Ceiling(vector._lower), + Vector128.Ceiling(vector._upper) + ); + } } /// Computes the ceiling of each element in a vector. @@ -301,13 +374,7 @@ internal static Vector256 Ceiling(Vector256 vector) /// The type of , , and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ConditionalSelect(Vector256 condition, Vector256 left, Vector256 right) - { - return Create( - Vector128.ConditionalSelect(condition._lower, left._lower, right._lower), - Vector128.ConditionalSelect(condition._upper, left._upper, right._upper) - ); - } + public static Vector256 ConditionalSelect(Vector256 condition, Vector256 left, Vector256 right) => (left & condition) | AndNot(right, condition); /// Converts a to a . /// The vector to convert. @@ -1353,16 +1420,7 @@ public static Vector256 CreateScalarUnsafe(T value) /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static T Dot(Vector256 left, Vector256 right) - { - // Doing this as Dot(lower) + Dot(upper) is important for floating-point determinism - // This is because the underlying dpps instruction on x86/x64 will do this equivalently - // and otherwise the software vs accelerated implementations may differ in returned result. - - T result = Vector128.Dot(left._lower, right._lower); - result = Scalar.Add(result, Vector128.Dot(left._upper, right._upper)); - return result; - } + public static T Dot(Vector256 left, Vector256 right) => Sum(left * right); /// Compares two vectors to determine if they are equal on a per-element basis. /// The type of the elements in the vector. @@ -1459,10 +1517,26 @@ public static uint ExtractMostSignificantBits(this Vector256 vector) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static Vector256 Floor(Vector256 vector) { - return Create( - Vector128.Floor(vector._lower), - Vector128.Floor(vector._upper) - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(short)) + || (typeof(T) == typeof(int)) + || (typeof(T) == typeof(long)) + || (typeof(T) == typeof(nint)) + || (typeof(T) == typeof(nuint)) + || (typeof(T) == typeof(sbyte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong))) + { + return vector; + } + else + { + return Create( + Vector128.Floor(vector._lower), + Vector128.Floor(vector._upper) + ); + } } /// Computes the floor of each element in a vector. @@ -1720,7 +1794,6 @@ public static bool LessThanOrEqualAny(Vector256 left, Vector256 right) || Vector128.LessThanOrEqualAny(left._upper, right._upper); } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Loads a vector from the given source. /// The type of the elements in the vector. /// The source from which the vector will be loaded. @@ -1759,7 +1832,6 @@ public static Vector256 LoadAligned(T* source) [Intrinsic] [CLSCompliant(false)] public static Vector256 LoadAlignedNonTemporal(T* source) => LoadAligned(source); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Loads a vector from the given source. /// The type of the elements in the vector. @@ -1927,7 +1999,7 @@ public static Vector256 Min(Vector256 left, Vector256 right) /// The product of and . /// The type of and () is not supported. [Intrinsic] - public static Vector256 Multiply(T left, Vector256 right) => left * right; + public static Vector256 Multiply(T left, Vector256 right) => right * left; /// [Intrinsic] @@ -2067,13 +2139,7 @@ public static Vector256 Narrow(Vector256 lower, Vector256 up /// A vector whose elements are the ones-complement of the corresponding elements in . /// The type of () is not supported. [Intrinsic] - public static Vector256 OnesComplement(Vector256 vector) - { - return Create( - Vector128.OnesComplement(vector._lower), - Vector128.OnesComplement(vector._upper) - ); - } + public static Vector256 OnesComplement(Vector256 vector) => ~vector; /// Shifts each element of a vector left by the specified amount. /// The vector whose elements are to be shifted. @@ -2541,7 +2607,6 @@ public static Vector256 Sqrt(Vector256 vector) ); } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Stores a vector at the given destination. /// The type of the elements in the vector. /// The vector that will be stored. @@ -2580,7 +2645,6 @@ public static void StoreAligned(this Vector256 source, T* destination) [Intrinsic] [CLSCompliant(false)] public static void StoreAlignedNonTemporal(this Vector256 source, T* destination) => source.StoreAligned(destination); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Stores a vector at the given destination. /// The type of the elements in the vector. @@ -2672,7 +2736,7 @@ public static Vector512 ToVector512(this Vector256 vector) /// A new with the lower 256-bits set to the value of and the upper 256-bits left uninitialized. /// The type of () is not supported. [Intrinsic] - public static unsafe Vector512 ToVector512Unsafe(this Vector256 vector) + public static Vector512 ToVector512Unsafe(this Vector256 vector) { ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType(); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs index 36b704682d320..46d09538cb432 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs @@ -7,8 +7,13 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; +using System.Runtime.Intrinsics.X86; using System.Text; +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + namespace System.Runtime.Intrinsics { // We mark certain methods with AggressiveInlining to ensure that the JIT will @@ -38,11 +43,7 @@ namespace System.Runtime.Intrinsics public static Vector256 AllBitsSet { [Intrinsic] - get - { - Vector128 vector = Vector128.AllBitsSet; - return Vector256.Create(vector, vector); - } + get => Vector256.Create(Scalar.AllBitsSet); } /// Gets the number of that are in a . @@ -50,7 +51,11 @@ public static Vector256 AllBitsSet public static int Count { [Intrinsic] - get => Vector128.Count * 2; + get + { + ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType(); + return Vector256.Size / sizeof(T); + } } /// Gets a new with the elements set to their index. @@ -101,11 +106,7 @@ public static bool IsSupported public static Vector256 One { [Intrinsic] - get - { - Vector128 vector = Vector128.One; - return Vector256.Create(vector, vector); - } + get => Vector256.Create(Scalar.One); } /// Gets a new with all elements initialized to zero. @@ -138,10 +139,54 @@ public static Vector256 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator +(Vector256 left, Vector256 right) { - return Vector256.Create( - left._lower + right._lower, - left._upper + right._upper - ); + if (Avx.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector256 SoftwareImpl(Vector256 left, Vector256 right) + { + return Vector256.Create( + left._lower + right._lower, + left._upper + right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(Avx2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 left, Vector256 right) + { + if (typeof(T) == typeof(float)) + { + return Avx.Add(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx.Add(left.AsDouble(), right.AsDouble()).As(); + } + else if (Avx2.IsSupported) + { + if (sizeof(T) == 1) + { + return Avx2.Add(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx2.Add(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.Add(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx2.Add(left.AsUInt64(), right.AsUInt64()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Computes the bitwise-and of two vectors. @@ -153,10 +198,58 @@ public static Vector256 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator &(Vector256 left, Vector256 right) { - return Vector256.Create( - left._lower & right._lower, - left._upper & right._upper - ); + // While op_BitwiseAnd is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (Avx.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector256 SoftwareImpl(Vector256 left, Vector256 right) + { + return Vector256.Create( + left._lower & right._lower, + left._upper & right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(Avx2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 left, Vector256 right) + { + if (typeof(T) == typeof(float)) + { + return Avx.And(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx.And(left.AsDouble(), right.AsDouble()).As(); + } + else if (Avx2.IsSupported) + { + if (sizeof(T) == 1) + { + return Avx2.And(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx2.And(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.And(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx2.And(left.AsUInt64(), right.AsUInt64()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Computes the bitwise-or of two vectors. @@ -168,10 +261,58 @@ public static Vector256 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator |(Vector256 left, Vector256 right) { - return Vector256.Create( - left._lower | right._lower, - left._upper | right._upper - ); + // While op_BitwiseOr is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (Avx.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector256 SoftwareImpl(Vector256 left, Vector256 right) + { + return Vector256.Create( + left._lower | right._lower, + left._upper | right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(Avx2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 left, Vector256 right) + { + if (typeof(T) == typeof(float)) + { + return Avx.Or(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx.Or(left.AsDouble(), right.AsDouble()).As(); + } + else if (Avx2.IsSupported) + { + if (sizeof(T) == 1) + { + return Avx2.Or(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx2.Or(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.Or(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx2.Or(left.AsUInt64(), right.AsUInt64()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Divides two vectors to compute their quotient. @@ -183,10 +324,34 @@ public static Vector256 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator /(Vector256 left, Vector256 right) { - return Vector256.Create( - left._lower / right._lower, - left._upper / right._upper - ); + if (Avx.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector256 SoftwareImpl(Vector256 left, Vector256 right) + { + return Vector256.Create( + left._lower / right._lower, + left._upper / right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 left, Vector256 right) + { + if (typeof(T) == typeof(float)) + { + return Avx.Divide(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx.Divide(left.AsDouble(), right.AsDouble()).As(); + } + return SoftwareImpl(left, right); + } } /// Divides a vector by a scalar to compute the per-element quotient. @@ -195,13 +360,7 @@ public static Vector256 Zero /// The quotient of divided by . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 operator /(Vector256 left, T right) - { - return Vector256.Create( - left._lower / right, - left._upper / right - ); - } + public static Vector256 operator /(Vector256 left, T right) => left / Vector256.Create(right); /// Compares two vectors to determine if all elements are equal. /// The vector to compare with . @@ -225,10 +384,58 @@ public static Vector256 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator ^(Vector256 left, Vector256 right) { - return Vector256.Create( - left._lower ^ right._lower, - left._upper ^ right._upper - ); + // While op_ExclusiveOr is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (Avx.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector256 SoftwareImpl(Vector256 left, Vector256 right) + { + return Vector256.Create( + left._lower ^ right._lower, + left._upper ^ right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(Avx2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 left, Vector256 right) + { + if (typeof(T) == typeof(float)) + { + return Avx.Xor(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx.Xor(left.AsDouble(), right.AsDouble()).As(); + } + else if (Avx2.IsSupported) + { + if (sizeof(T) == 1) + { + return Avx2.Xor(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx2.Xor(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.Xor(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx2.Xor(left.AsUInt64(), right.AsUInt64()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Compares two vectors to determine if any elements are not equal. @@ -247,10 +454,44 @@ public static Vector256 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator <<(Vector256 value, int shiftCount) { - return Vector256.Create( - value._lower << shiftCount, - value._upper << shiftCount - ); + if (Avx2.IsSupported) + { + return XarchImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); + + static Vector256 SoftwareImpl(Vector256 value, int shiftCount) + { + return Vector256.Create( + value._lower << shiftCount, + value._upper << shiftCount + ); + } + + [CompExactlyDependsOn(typeof(Avx2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 value, int shiftCount) + { + if (sizeof(T) == 1) + { + byte maskedShiftCount = (byte)(shiftCount & 0x7); + Vector256 tmp = Avx2.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar(maskedShiftCount)); + return Avx2.And(tmp, Vector256.Create((ushort)(0xFF << maskedShiftCount))).As(); + } + else if (sizeof(T) == 2) + { + return Avx2.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar((ushort)(shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.ShiftLeftLogical(value.AsUInt32(), Vector128.CreateScalar((uint)(shiftCount & 0x1F))).As(); + } + else if (sizeof(T) == 8) + { + return Avx2.ShiftLeftLogical(value.AsUInt64(), Vector128.CreateScalar((uint)(shiftCount & 0x3F))).As(); + } + return SoftwareImpl(value, shiftCount); + } } /// Multiplies two vectors to compute their element-wise product. @@ -262,10 +503,81 @@ public static Vector256 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator *(Vector256 left, Vector256 right) { - return Vector256.Create( - left._lower * right._lower, - left._upper * right._upper - ); + if (Avx.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector256 SoftwareImpl(Vector256 left, Vector256 right) + { + return Vector256.Create( + left._lower * right._lower, + left._upper * right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512BW.VL))] + [CompExactlyDependsOn(typeof(Avx512DQ.VL))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 left, Vector256 right) + { + if (typeof(T) == typeof(float)) + { + return Avx.Multiply(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx.Multiply(left.AsDouble(), right.AsDouble()).As(); + } + else if (Avx2.IsSupported) + { + if (sizeof(T) == 1) + { + if (Avx512BW.VL.IsSupported) + { + Vector512 a = Avx512BW.ConvertToVector512UInt16(left.AsByte()); + Vector512 b = Avx512BW.ConvertToVector512UInt16(right.AsByte()); + + Vector512 r = Avx512BW.MultiplyLow(a, b); + + return Avx512BW.ConvertToVector256Byte(r).As(); + } + else + { + (Vector256 al, Vector256 ah) = Vector256.Widen(left.AsByte()); + (Vector256 bl, Vector256 bh) = Vector256.Widen(right.AsByte()); + + Vector256 rl = Avx2.MultiplyLow(al, bl); + Vector256 rh = Avx2.MultiplyLow(ah, bh); + + return Vector256.Narrow(rl, rh).As(); + } + } + else if (sizeof(T) == 2) + { + return Avx2.MultiplyLow(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.MultiplyLow(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + if (Avx512DQ.VL.IsSupported) + { + return Avx512DQ.VL.MultiplyLow(left.AsUInt64(), right.AsUInt64()).As(); + } + else + { + // TODO-XARCH-CQ: We should support long/ulong multiplication. + } + } + } + return SoftwareImpl(left, right); + } } /// Multiplies a vector by a scalar to compute their product. @@ -275,13 +587,7 @@ public static Vector256 Zero /// The type of the vector () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 operator *(Vector256 left, T right) - { - return Vector256.Create( - left._lower * right, - left._upper * right - ); - } + public static Vector256 operator *(Vector256 left, T right) => left * Vector256.Create(right); /// Multiplies a vector by a scalar to compute their product. /// The scalar to multiply with . @@ -289,7 +595,7 @@ public static Vector256 Zero /// The product of and . /// The type of the vector () is not supported. [Intrinsic] - public static Vector256 operator *(T left, Vector256 right) => right * left; + public static Vector256 operator *(T left, Vector256 right) => Vector256.Create(left) * right; /// Computes the ones-complement of a vector. /// The vector whose ones-complement is to be computed. @@ -299,10 +605,58 @@ public static Vector256 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator ~(Vector256 vector) { - return Vector256.Create( - ~vector._lower, - ~vector._upper - ); + // While op_OnesComplement is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (Avx.IsSupported) + { + return XarchImpl(vector); + } + return SoftwareImpl(vector); + + static Vector256 SoftwareImpl(Vector256 vector) + { + return Vector256.Create( + ~vector._lower, + ~vector._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(Avx2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 vector) + { + if (typeof(T) == typeof(float)) + { + return Avx.Xor(vector.AsSingle(), Vector256.AllBitsSet).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx.Xor(vector.AsDouble(), Vector256.AllBitsSet).As(); + } + else if (Avx2.IsSupported) + { + if (sizeof(T) == 1) + { + return Avx2.Xor(vector.AsByte(), Vector256.AllBitsSet).As(); + } + else if (sizeof(T) == 2) + { + return Avx2.Xor(vector.AsUInt16(), Vector256.AllBitsSet).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.Xor(vector.AsUInt32(), Vector256.AllBitsSet).As(); + } + else if (sizeof(T) == 8) + { + return Avx2.Xor(vector.AsUInt64(), Vector256.AllBitsSet).As(); + } + } + return SoftwareImpl(vector); + } } /// Shifts (signed) each element of a vector right by the specified amount. @@ -313,10 +667,58 @@ public static Vector256 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator >>(Vector256 value, int shiftCount) { - return Vector256.Create( - value._lower >> shiftCount, - value._upper >> shiftCount - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong)) + || (typeof(T) == typeof(nuint))) + { + return value >>> shiftCount; + } + else if (Avx2.IsSupported) + { + return XarchImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); + + static Vector256 SoftwareImpl(Vector256 value, int shiftCount) + { + return Vector256.Create( + value._lower >> shiftCount, + value._upper >> shiftCount + ); + } + + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512F.VL))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 value, int shiftCount) + { + if (sizeof(T) == 1) + { + // TODO-XARCH-CQ: We should support sbyte arithmetic shift. + } + else if (sizeof(T) == 2) + { + return Avx2.ShiftRightArithmetic(value.AsInt16(), Vector128.CreateScalar((short)(shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.ShiftRightArithmetic(value.AsInt32(), Vector128.CreateScalar(shiftCount & 0x1F)).As(); + } + else if (sizeof(T) == 8) + { + if (Avx512F.VL.IsSupported) + { + return Avx512F.VL.ShiftRightArithmetic(value.AsInt64(), Vector128.CreateScalar(shiftCount & 0x3F)).As(); + } + else + { + // TODO-XARCH-CQ: We should support double/long arithmetic shift. + } + } + return SoftwareImpl(value, shiftCount); + } } /// Subtracts two vectors to compute their difference. @@ -328,10 +730,54 @@ public static Vector256 operator >>(Vector256 value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator -(Vector256 left, Vector256 right) { - return Vector256.Create( - left._lower - right._lower, - left._upper - right._upper - ); + if (Avx.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector256 SoftwareImpl(Vector256 left, Vector256 right) + { + return Vector256.Create( + left._lower - right._lower, + left._upper - right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(Avx2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 left, Vector256 right) + { + if (typeof(T) == typeof(float)) + { + return Avx.Subtract(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx.Subtract(left.AsDouble(), right.AsDouble()).As(); + } + else if (Avx2.IsSupported) + { + if (sizeof(T) == 1) + { + return Avx2.Subtract(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx2.Subtract(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.Subtract(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx2.Subtract(left.AsUInt64(), right.AsUInt64()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Computes the unary negation of a vector. @@ -339,14 +785,7 @@ public static Vector256 operator >>(Vector256 value, int shiftCount) /// A vector whose elements are the unary negation of the corresponding elements in . /// The type of the vector () is not supported. [Intrinsic] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 operator -(Vector256 vector) - { - return Vector256.Create( - -vector._lower, - -vector._upper - ); - } + public static Vector256 operator -(Vector256 vector) => Zero - vector; /// Returns a given vector unchanged. /// The vector. @@ -367,10 +806,44 @@ public static Vector256 operator >>(Vector256 value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 operator >>>(Vector256 value, int shiftCount) { - return Vector256.Create( - value._lower >>> shiftCount, - value._upper >>> shiftCount - ); + if (Avx2.IsSupported) + { + return XarchImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); + + static Vector256 SoftwareImpl(Vector256 value, int shiftCount) + { + return Vector256.Create( + value._lower >>> shiftCount, + value._upper >>> shiftCount + ); + } + + [CompExactlyDependsOn(typeof(Avx2))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector256 XarchImpl(Vector256 value, int shiftCount) + { + if (sizeof(T) == 1) + { + byte maskedShiftCount = (byte)(shiftCount & 0x7); + Vector256 tmp = Avx2.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar(maskedShiftCount)); + return Avx2.And(tmp, Vector256.Create((ushort)(0xFF >>> maskedShiftCount))).As(); + } + else if (sizeof(T) == 2) + { + return Avx2.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar((ushort)(shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return Avx2.ShiftRightLogical(value.AsUInt32(), Vector128.CreateScalar((uint)(shiftCount & 0x1F))).As(); + } + else if (sizeof(T) == 8) + { + return Avx2.ShiftRightLogical(value.AsUInt64(), Vector128.CreateScalar((uint)(shiftCount & 0x3F))).As(); + } + return SoftwareImpl(value, shiftCount); + } } /// Determines whether the specified object is equal to the current instance. @@ -428,7 +901,7 @@ public override int GetHashCode() /// The type of the vector () is not supported. public override string ToString() => ToString("G", CultureInfo.InvariantCulture); - private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider) + internal string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider) { ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType(); @@ -463,16 +936,16 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector256 ISimdVector, T>.Abs(Vector256 vector) => Vector256.Abs(vector); /// - static Vector256 ISimdVector, T>.Add(Vector256 left, Vector256 right) => Vector256.Add(left, right); + static Vector256 ISimdVector, T>.Add(Vector256 left, Vector256 right) => left + right; /// static Vector256 ISimdVector, T>.AndNot(Vector256 left, Vector256 right) => Vector256.AndNot(left, right); /// - static Vector256 ISimdVector, T>.BitwiseAnd(Vector256 left, Vector256 right) => Vector256.BitwiseAnd(left, right); + static Vector256 ISimdVector, T>.BitwiseAnd(Vector256 left, Vector256 right) => left & right; /// - static Vector256 ISimdVector, T>.BitwiseOr(Vector256 left, Vector256 right) => Vector256.BitwiseOr(left, right); + static Vector256 ISimdVector, T>.BitwiseOr(Vector256 left, Vector256 right) => left | right; /// static Vector256 ISimdVector, T>.Ceiling(Vector256 vector) => Vector256.Ceiling(vector); @@ -508,10 +981,10 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector256 ISimdVector, T>.CreateScalarUnsafe(T value) => Vector256.CreateScalarUnsafe(value); /// - static Vector256 ISimdVector, T>.Divide(Vector256 left, Vector256 right) => Vector256.Divide(left, right); + static Vector256 ISimdVector, T>.Divide(Vector256 left, Vector256 right) => left / right; /// - static Vector256 ISimdVector, T>.Divide(Vector256 left, T right) => Vector256.Divide(left, right); + static Vector256 ISimdVector, T>.Divide(Vector256 left, T right) => left / right; /// static T ISimdVector, T>.Dot(Vector256 left, Vector256 right) => Vector256.Dot(left, right); @@ -520,7 +993,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector256 ISimdVector, T>.Equals(Vector256 left, Vector256 right) => Vector256.Equals(left, right); /// - static bool ISimdVector, T>.EqualsAll(Vector256 left, Vector256 right) => Vector256.EqualsAll(left, right); + static bool ISimdVector, T>.EqualsAll(Vector256 left, Vector256 right) => left == right; /// static bool ISimdVector, T>.EqualsAny(Vector256 left, Vector256 right) => Vector256.EqualsAny(left, right); @@ -529,7 +1002,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector256 ISimdVector, T>.Floor(Vector256 vector) => Vector256.Floor(vector); /// - static T ISimdVector, T>.GetElement(Vector256 vector, int index) => Vector256.GetElement(vector, index); + static T ISimdVector, T>.GetElement(Vector256 vector, int index) => vector.GetElement(index); /// static Vector256 ISimdVector, T>.GreaterThan(Vector256 left, Vector256 right) => Vector256.GreaterThan(left, right); @@ -567,7 +1040,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri /// static bool ISimdVector, T>.LessThanOrEqualAny(Vector256 left, Vector256 right) => Vector256.LessThanOrEqualAny(left, right); -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// static Vector256 ISimdVector, T>.Load(T* source) => Vector256.Load(source); @@ -576,7 +1048,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri /// static Vector256 ISimdVector, T>.LoadAlignedNonTemporal(T* source) => Vector256.LoadAlignedNonTemporal(source); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// static Vector256 ISimdVector, T>.LoadUnsafe(ref readonly T source) => Vector256.LoadUnsafe(in source); @@ -591,63 +1062,61 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector256 ISimdVector, T>.Min(Vector256 left, Vector256 right) => Vector256.Min(left, right); /// - static Vector256 ISimdVector, T>.Multiply(Vector256 left, Vector256 right) => Vector256.Multiply(left, right); + static Vector256 ISimdVector, T>.Multiply(Vector256 left, Vector256 right) => left * right; /// - static Vector256 ISimdVector, T>.Multiply(Vector256 left, T right) => Vector256.Multiply(left, right); + static Vector256 ISimdVector, T>.Multiply(Vector256 left, T right) => left * right; /// - static Vector256 ISimdVector, T>.Negate(Vector256 vector) => Vector256.Negate(vector); + static Vector256 ISimdVector, T>.Negate(Vector256 vector) => -vector; /// - static Vector256 ISimdVector, T>.OnesComplement(Vector256 vector) => Vector256.OnesComplement(vector); + static Vector256 ISimdVector, T>.OnesComplement(Vector256 vector) => ~vector; /// - static Vector256 ISimdVector, T>.ShiftLeft(Vector256 vector, int shiftCount) => Vector256.ShiftLeft(vector, shiftCount); + static Vector256 ISimdVector, T>.ShiftLeft(Vector256 vector, int shiftCount) => vector << shiftCount; /// - static Vector256 ISimdVector, T>.ShiftRightArithmetic(Vector256 vector, int shiftCount) => Vector256.ShiftRightArithmetic(vector, shiftCount); + static Vector256 ISimdVector, T>.ShiftRightArithmetic(Vector256 vector, int shiftCount) => vector >> shiftCount; /// - static Vector256 ISimdVector, T>.ShiftRightLogical(Vector256 vector, int shiftCount) => Vector256.ShiftRightLogical(vector, shiftCount); + static Vector256 ISimdVector, T>.ShiftRightLogical(Vector256 vector, int shiftCount) => vector >>> shiftCount; /// static Vector256 ISimdVector, T>.Sqrt(Vector256 vector) => Vector256.Sqrt(vector); -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// - static void ISimdVector, T>.Store(Vector256 source, T* destination) => Vector256.Store(source, destination); + static void ISimdVector, T>.Store(Vector256 source, T* destination) => source.Store(destination); /// - static void ISimdVector, T>.StoreAligned(Vector256 source, T* destination) => Vector256.StoreAligned(source, destination); + static void ISimdVector, T>.StoreAligned(Vector256 source, T* destination) => source.StoreAligned(destination); /// - static void ISimdVector, T>.StoreAlignedNonTemporal(Vector256 source, T* destination) => Vector256.StoreAlignedNonTemporal(source, destination); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + static void ISimdVector, T>.StoreAlignedNonTemporal(Vector256 source, T* destination) => source.StoreAlignedNonTemporal(destination); /// - static void ISimdVector, T>.StoreUnsafe(Vector256 vector, ref T destination) => Vector256.StoreUnsafe(vector, ref destination); + static void ISimdVector, T>.StoreUnsafe(Vector256 vector, ref T destination) => vector.StoreUnsafe(ref destination); /// - static void ISimdVector, T>.StoreUnsafe(Vector256 vector, ref T destination, nuint elementOffset) => Vector256.StoreUnsafe(vector, ref destination, elementOffset); + static void ISimdVector, T>.StoreUnsafe(Vector256 vector, ref T destination, nuint elementOffset) => vector.StoreUnsafe(ref destination, elementOffset); /// - static Vector256 ISimdVector, T>.Subtract(Vector256 left, Vector256 right) => Vector256.Subtract(left, right); + static Vector256 ISimdVector, T>.Subtract(Vector256 left, Vector256 right) => left - right; /// static T ISimdVector, T>.Sum(Vector256 vector) => Vector256.Sum(vector); /// - static T ISimdVector, T>.ToScalar(Vector256 vector) => Vector256.ToScalar(vector); + static T ISimdVector, T>.ToScalar(Vector256 vector) => vector.ToScalar(); /// - static bool ISimdVector, T>.TryCopyTo(Vector256 vector, Span destination) => Vector256.TryCopyTo(vector, destination); + static bool ISimdVector, T>.TryCopyTo(Vector256 vector, Span destination) => vector.TryCopyTo(destination); /// - static Vector256 ISimdVector, T>.WithElement(Vector256 vector, int index, T value) => Vector256.WithElement(vector, index, value); + static Vector256 ISimdVector, T>.WithElement(Vector256 vector, int index, T value) => vector.WithElement(index, value); /// - static Vector256 ISimdVector, T>.Xor(Vector256 left, Vector256 right) => Vector256.Xor(left, right); + static Vector256 ISimdVector, T>.Xor(Vector256 left, Vector256 right) => left ^ right; // // New Surface Area diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 8d44bf2899841..7fb23ba1d3b9f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -8,6 +8,8 @@ using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + namespace System.Runtime.Intrinsics { // We mark certain methods with AggressiveInlining to ensure that the JIT will @@ -62,10 +64,69 @@ public static bool IsHardwareAccelerated [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Abs(Vector512 vector) { - return Create( - Vector256.Abs(vector._lower), - Vector256.Abs(vector._upper) - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong)) + || (typeof(T) == typeof(nuint))) + { + return vector; + } + else if (Avx512F.IsSupported) + { + return XarchImpl(vector); + } + return SoftwareImpl(vector); + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512BW))] + [CompExactlyDependsOn(typeof(Avx512DQ))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 vector) + { + if (typeof(T) == typeof(float)) + { + if (Avx512DQ.IsSupported) + { + return Avx512DQ.AndNot(Vector512.Create(-0.0f), vector.AsSingle()).As(); + } + } + else if (typeof(T) == typeof(double)) + { + if (Avx512DQ.IsSupported) + { + return Avx512DQ.AndNot(Vector512.Create(-0.0), vector.AsDouble()).As(); + } + } + else if (sizeof(T) == 4) + { + return Avx512F.Abs(vector.AsInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx512F.Abs(vector.AsInt64()).As(); + } + else if (Avx512BW.IsSupported) + { + if (sizeof(T) == 1) + { + return Avx512BW.Abs(vector.AsSByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx512BW.Abs(vector.AsInt16()).As(); + } + } + return SoftwareImpl(vector); + } + + static Vector512 SoftwareImpl(Vector512 vector) + { + return Create( + Vector256.Abs(vector._lower), + Vector256.Abs(vector._upper) + ); + } } /// Adds two vectors to compute their sum. @@ -85,13 +146,7 @@ public static Vector512 Abs(Vector512 vector) /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 AndNot(Vector512 left, Vector512 right) - { - return Create( - Vector256.AndNot(left._lower, right._lower), - Vector256.AndNot(left._upper, right._upper) - ); - } + public static Vector512 AndNot(Vector512 left, Vector512 right) => left & ~right; /// Reinterprets a as a new . /// The type of the elements in the input vector. @@ -272,10 +327,26 @@ public static Vector AsVector(this Vector512 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static Vector512 Ceiling(Vector512 vector) { - return Create( - Vector256.Ceiling(vector._lower), - Vector256.Ceiling(vector._upper) - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(short)) + || (typeof(T) == typeof(int)) + || (typeof(T) == typeof(long)) + || (typeof(T) == typeof(nint)) + || (typeof(T) == typeof(nuint)) + || (typeof(T) == typeof(sbyte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong))) + { + return vector; + } + else + { + return Create( + Vector256.Ceiling(vector._lower), + Vector256.Ceiling(vector._upper) + ); + } } /// Computes the ceiling of each element in a vector. @@ -301,13 +372,7 @@ internal static Vector512 Ceiling(Vector512 vector) /// The type of , , and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 ConditionalSelect(Vector512 condition, Vector512 left, Vector512 right) - { - return Create( - Vector256.ConditionalSelect(condition._lower, left._lower, right._lower), - Vector256.ConditionalSelect(condition._upper, left._upper, right._upper) - ); - } + public static Vector512 ConditionalSelect(Vector512 condition, Vector512 left, Vector512 right) => (left & condition) | AndNot(right, condition); /// Converts a to a . /// The vector to convert. @@ -1390,13 +1455,7 @@ public static Vector512 CreateScalarUnsafe(T value) /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 Divide(Vector512 left, Vector512 right) - { - return Create( - Vector256.Divide(left._lower, right._lower), - Vector256.Divide(left._upper, right._upper) - ); - } + public static Vector512 Divide(Vector512 left, Vector512 right) => left / right; /// Divides a vector by a scalar to compute the per-element quotient. /// The vector that will be divided by . @@ -1414,16 +1473,7 @@ public static Vector512 Divide(Vector512 left, Vector512 right) /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static T Dot(Vector512 left, Vector512 right) - { - // Doing this as Dot(lower) + Dot(upper) is important for floating-point determinism - // This is because the underlying dpps instruction on x86/x64 will do this equivalently - // and otherwise the software vs accelerated implementations may differ in returned result. - - T result = Vector256.Dot(left._lower, right._lower); - result = Scalar.Add(result, Vector256.Dot(left._upper, right._upper)); - return result; - } + public static T Dot(Vector512 left, Vector512 right) => Sum(left * right); /// Compares two vectors to determine if they are equal on a per-element basis. /// The type of the elements in the vector. @@ -1520,10 +1570,26 @@ public static ulong ExtractMostSignificantBits(this Vector512 vector) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static Vector512 Floor(Vector512 vector) { - return Create( - Vector256.Floor(vector._lower), - Vector256.Floor(vector._upper) - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(short)) + || (typeof(T) == typeof(int)) + || (typeof(T) == typeof(long)) + || (typeof(T) == typeof(nint)) + || (typeof(T) == typeof(nuint)) + || (typeof(T) == typeof(sbyte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong))) + { + return vector; + } + else + { + return Create( + Vector256.Floor(vector._lower), + Vector256.Floor(vector._upper) + ); + } } /// Computes the floor of each element in a vector. @@ -1781,7 +1847,6 @@ public static bool LessThanOrEqualAny(Vector512 left, Vector512 right) || Vector256.LessThanOrEqualAny(left._upper, right._upper); } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Loads a vector from the given source. /// The type of the elements in the vector. /// The source from which the vector will be loaded. @@ -1820,7 +1885,6 @@ public static Vector512 LoadAligned(T* source) [Intrinsic] [CLSCompliant(false)] public static Vector512 LoadAlignedNonTemporal(T* source) => LoadAligned(source); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Loads a vector from the given source. /// The type of the elements in the vector. @@ -1988,7 +2052,7 @@ public static Vector512 Min(Vector512 left, Vector512 right) /// The product of and . /// The type of and () is not supported. [Intrinsic] - public static Vector512 Multiply(T left, Vector512 right) => left * right; + public static Vector512 Multiply(T left, Vector512 right) => right * left; /// [Intrinsic] @@ -2127,13 +2191,7 @@ public static Vector512 Narrow(Vector512 lower, Vector512 up /// A vector whose elements are the ones-complement of the corresponding elements in . /// The type of () is not supported. [Intrinsic] - public static Vector512 OnesComplement(Vector512 vector) - { - return Create( - Vector256.OnesComplement(vector._lower), - Vector256.OnesComplement(vector._upper) - ); - } + public static Vector512 OnesComplement(Vector512 vector) => ~vector; /// Shifts each element of a vector left by the specified amount. /// The vector whose elements are to be shifted. @@ -2601,7 +2659,6 @@ public static Vector512 Sqrt(Vector512 vector) ); } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Stores a vector at the given destination. /// The type of the elements in the vector. /// The vector that will be stored. @@ -2640,7 +2697,6 @@ public static void StoreAligned(this Vector512 source, T* destination) [Intrinsic] [CLSCompliant(false)] public static void StoreAlignedNonTemporal(this Vector512 source, T* destination) => source.StoreAligned(destination); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Stores a vector at the given destination. /// The type of the elements in the vector. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs index 235871dbe6069..9e117e67eeb47 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs @@ -7,8 +7,11 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; using System.Text; +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + namespace System.Runtime.Intrinsics { // We mark certain methods with AggressiveInlining to ensure that the JIT will @@ -38,11 +41,7 @@ namespace System.Runtime.Intrinsics public static Vector512 AllBitsSet { [Intrinsic] - get - { - Vector256 vector = Vector256.AllBitsSet; - return Vector512.Create(vector, vector); - } + get => Vector512.Create(Scalar.AllBitsSet); } /// Gets the number of that are in a . @@ -50,7 +49,11 @@ public static Vector512 AllBitsSet public static int Count { [Intrinsic] - get => Vector256.Count * 2; + get + { + ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType(); + return Vector512.Size / sizeof(T); + } } /// Gets a new with the elements set to their index. @@ -101,11 +104,7 @@ public static bool IsSupported public static Vector512 One { [Intrinsic] - get - { - Vector256 vector = Vector256.One; - return Vector512.Create(vector, vector); - } + get => Vector512.Create(Scalar.One); } /// Gets a new with all elements initialized to zero. @@ -138,10 +137,54 @@ public static Vector512 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator +(Vector512 left, Vector512 right) { - return Vector512.Create( - left._lower + right._lower, - left._upper + right._upper - ); + if (Avx512F.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector512 SoftwareImpl(Vector512 left, Vector512 right) + { + return Vector512.Create( + left._lower + right._lower, + left._upper + right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512BW))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 left, Vector512 right) + { + if (typeof(T) == typeof(float)) + { + return Avx512F.Add(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx512F.Add(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 4) + { + return Avx512F.Add(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx512F.Add(left.AsUInt64(), right.AsUInt64()).As(); + } + else if (Avx512BW.IsSupported) + { + if (sizeof(T) == 1) + { + return Avx512BW.Add(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx512BW.Add(left.AsUInt16(), right.AsUInt16()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Computes the bitwise-and of two vectors. @@ -153,10 +196,60 @@ public static Vector512 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator &(Vector512 left, Vector512 right) { - return Vector512.Create( - left._lower & right._lower, - left._upper & right._upper - ); + // While op_BitwiseAnd is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (Avx512F.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector512 SoftwareImpl(Vector512 left, Vector512 right) + { + return Vector512.Create( + left._lower & right._lower, + left._upper & right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512DQ))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 left, Vector512 right) + { + if (Avx512DQ.IsSupported) + { + if ((typeof(T) == typeof(float))) + { + return Avx512DQ.And(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx512DQ.And(left.AsDouble(), right.AsDouble()).As(); + } + } + + if (sizeof(T) == 1) + { + return Avx512F.And(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx512F.And(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Avx512F.And(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx512F.And(left.AsUInt64(), right.AsUInt64()).As(); + } + + return SoftwareImpl(left, right); + } } /// Computes the bitwise-or of two vectors. @@ -168,10 +261,60 @@ public static Vector512 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator |(Vector512 left, Vector512 right) { - return Vector512.Create( - left._lower | right._lower, - left._upper | right._upper - ); + // While op_BitwiseOr is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (Avx512F.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector512 SoftwareImpl(Vector512 left, Vector512 right) + { + return Vector512.Create( + left._lower | right._lower, + left._upper | right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512DQ))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 left, Vector512 right) + { + if (Avx512DQ.IsSupported) + { + if ((typeof(T) == typeof(float))) + { + return Avx512DQ.Or(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx512DQ.Or(left.AsDouble(), right.AsDouble()).As(); + } + } + + if (sizeof(T) == 1) + { + return Avx512F.Or(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx512F.Or(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Avx512F.Or(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx512F.Or(left.AsUInt64(), right.AsUInt64()).As(); + } + + return SoftwareImpl(left, right); + } } /// Divides two vectors to compute their quotient. @@ -183,10 +326,34 @@ public static Vector512 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator /(Vector512 left, Vector512 right) { - return Vector512.Create( - left._lower / right._lower, - left._upper / right._upper - ); + if (Avx512F.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector512 SoftwareImpl(Vector512 left, Vector512 right) + { + return Vector512.Create( + left._lower / right._lower, + left._upper / right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 left, Vector512 right) + { + if (typeof(T) == typeof(float)) + { + return Avx512F.Divide(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx512F.Divide(left.AsDouble(), right.AsDouble()).As(); + } + return SoftwareImpl(left, right); + } } /// Divides a vector by a scalar to compute the per-element quotient. @@ -195,13 +362,7 @@ public static Vector512 Zero /// The quotient of divided by . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 operator /(Vector512 left, T right) - { - return Vector512.Create( - left._lower / right, - left._upper / right - ); - } + public static Vector512 operator /(Vector512 left, T right) => left / Vector512.Create(right); /// Compares two vectors to determine if all elements are equal. /// The vector to compare with . @@ -225,10 +386,60 @@ public static Vector512 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator ^(Vector512 left, Vector512 right) { - return Vector512.Create( - left._lower ^ right._lower, - left._upper ^ right._upper - ); + // While op_ExclusiveOr is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (Avx512F.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector512 SoftwareImpl(Vector512 left, Vector512 right) + { + return Vector512.Create( + left._lower ^ right._lower, + left._upper ^ right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512DQ))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 left, Vector512 right) + { + if (Avx512DQ.IsSupported) + { + if ((typeof(T) == typeof(float))) + { + return Avx512DQ.Xor(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx512DQ.Xor(left.AsDouble(), right.AsDouble()).As(); + } + } + + if (sizeof(T) == 1) + { + return Avx512F.Xor(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx512F.Xor(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return Avx512F.Xor(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx512F.Xor(left.AsUInt64(), right.AsUInt64()).As(); + } + + return SoftwareImpl(left, right); + } } /// Compares two vectors to determine if any elements are not equal. @@ -247,10 +458,48 @@ public static Vector512 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator <<(Vector512 value, int shiftCount) { - return Vector512.Create( - value._lower << shiftCount, - value._upper << shiftCount - ); + if (Avx512F.IsSupported) + { + return XarchImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); + + static Vector512 SoftwareImpl(Vector512 value, int shiftCount) + { + return Vector512.Create( + value._lower << shiftCount, + value._upper << shiftCount + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512BW))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 value, int shiftCount) + { + if (sizeof(T) == 4) + { + return Avx512F.ShiftLeftLogical(value.AsUInt32(), Vector128.CreateScalar((uint)(shiftCount & 0x1F))).As(); + } + else if (sizeof(T) == 8) + { + return Avx512F.ShiftLeftLogical(value.AsUInt64(), Vector128.CreateScalar((uint)(shiftCount & 0x3F))).As(); + } + else if (Avx512BW.IsSupported) + { + if (sizeof(T) == 1) + { + byte maskedShiftCount = (byte)(shiftCount & 0x7); + Vector512 tmp = Avx512BW.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar(maskedShiftCount)); + return Avx512F.And(tmp, Vector512.Create((ushort)(0xFF << maskedShiftCount))).As(); + } + else if (sizeof(T) == 2) + { + return Avx512BW.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar((ushort)(shiftCount & 0xF))).As(); + } + } + return SoftwareImpl(value, shiftCount); + } } /// Multiplies two vectors to compute their element-wise product. @@ -262,10 +511,64 @@ public static Vector512 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator *(Vector512 left, Vector512 right) { - return Vector512.Create( - left._lower * right._lower, - left._upper * right._upper - ); + if (Avx512F.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector512 SoftwareImpl(Vector512 left, Vector512 right) + { + return Vector512.Create( + left._lower * right._lower, + left._upper * right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512BW))] + [CompExactlyDependsOn(typeof(Avx512DQ))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 left, Vector512 right) + { + if (typeof(T) == typeof(float)) + { + return Avx512F.Multiply(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx512F.Multiply(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 4) + { + return Avx512F.MultiplyLow(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + if (Avx512DQ.IsSupported) + { + return Avx512DQ.MultiplyLow(left.AsUInt64(), right.AsUInt64()).As(); + } + } + else if (Avx512BW.IsSupported) + { + if (sizeof(T) == 1) + { + (Vector512 al, Vector512 ah) = Vector512.Widen(left.AsByte()); + (Vector512 bl, Vector512 bh) = Vector512.Widen(right.AsByte()); + + Vector512 rl = Avx512BW.MultiplyLow(al, bl); + Vector512 rh = Avx512BW.MultiplyLow(ah, bh); + + return Vector512.Narrow(rl, rh).As(); + } + else if (sizeof(T) == 2) + { + return Avx512BW.MultiplyLow(left.AsUInt16(), right.AsUInt16()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Multiplies a vector by a scalar to compute their product. @@ -275,13 +578,7 @@ public static Vector512 Zero /// The type of the vector () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 operator *(Vector512 left, T right) - { - return Vector512.Create( - left._lower * right, - left._upper * right - ); - } + public static Vector512 operator *(Vector512 left, T right) => left * Vector512.Create(right); /// Multiplies a vector by a scalar to compute their product. /// The scalar to multiply with . @@ -289,7 +586,7 @@ public static Vector512 Zero /// The product of and . /// The type of the vector () is not supported. [Intrinsic] - public static Vector512 operator *(T left, Vector512 right) => right * left; + public static Vector512 operator *(T left, Vector512 right) => Vector512.Create(left) * right; /// Computes the ones-complement of a vector. /// The vector whose ones-complement is to be computed. @@ -299,10 +596,60 @@ public static Vector512 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator ~(Vector512 vector) { - return Vector512.Create( - ~vector._lower, - ~vector._upper - ); + // While op_OnesComplement is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (Avx512F.IsSupported) + { + return XarchImpl(vector); + } + return SoftwareImpl(vector); + + static Vector512 SoftwareImpl(Vector512 vector) + { + return Vector512.Create( + ~vector._lower, + ~vector._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512DQ))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 vector) + { + if (Avx512DQ.IsSupported) + { + if (typeof(T) == typeof(float)) + { + return Avx512DQ.Xor(vector.AsSingle(), Vector512.AllBitsSet).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx512DQ.Xor(vector.AsDouble(), Vector512.AllBitsSet).As(); + } + } + + if (sizeof(T) == 1) + { + return Avx512F.Xor(vector.AsByte(), Vector512.AllBitsSet).As(); + } + else if (sizeof(T) == 2) + { + return Avx512F.Xor(vector.AsUInt16(), Vector512.AllBitsSet).As(); + } + else if (sizeof(T) == 4) + { + return Avx512F.Xor(vector.AsUInt32(), Vector512.AllBitsSet).As(); + } + else if (sizeof(T) == 8) + { + return Avx512F.Xor(vector.AsUInt64(), Vector512.AllBitsSet).As(); + } + + return SoftwareImpl(vector); + } } /// Shifts (signed) each element of a vector right by the specified amount. @@ -313,10 +660,54 @@ public static Vector512 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator >>(Vector512 value, int shiftCount) { - return Vector512.Create( - value._lower >> shiftCount, - value._upper >> shiftCount - ); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong)) + || (typeof(T) == typeof(nuint))) + { + return value >>> shiftCount; + } + else if (Avx512F.IsSupported) + { + return XarchImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); + + static Vector512 SoftwareImpl(Vector512 value, int shiftCount) + { + return Vector512.Create( + value._lower >> shiftCount, + value._upper >> shiftCount + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512BW))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 value, int shiftCount) + { + if (sizeof(T) == 4) + { + return Avx512F.ShiftRightArithmetic(value.AsInt32(), Vector128.CreateScalar(shiftCount & 0x1F)).As(); + } + else if (sizeof(T) == 8) + { + return Avx512F.ShiftRightArithmetic(value.AsInt64(), Vector128.CreateScalar(shiftCount & 0x3F)).As(); + } + else if (Avx512BW.IsSupported) + { + if (sizeof(T) == 1) + { + // TODO-XARCH-CQ: We should support sbyte arithmetic shift. + } + else if (sizeof(T) == 2) + { + return Avx512BW.ShiftRightArithmetic(value.AsInt16(), Vector128.CreateScalar((short)(shiftCount & 0xF))).As(); + } + } + return SoftwareImpl(value, shiftCount); + } } /// Subtracts two vectors to compute their difference. @@ -328,10 +719,54 @@ public static Vector512 operator >>(Vector512 value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator -(Vector512 left, Vector512 right) { - return Vector512.Create( - left._lower - right._lower, - left._upper - right._upper - ); + if (Avx512F.IsSupported) + { + return XarchImpl(left, right); + } + return SoftwareImpl(left, right); + + static Vector512 SoftwareImpl(Vector512 left, Vector512 right) + { + return Vector512.Create( + left._lower - right._lower, + left._upper - right._upper + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512BW))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 left, Vector512 right) + { + if (typeof(T) == typeof(float)) + { + return Avx512F.Subtract(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return Avx512F.Subtract(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 4) + { + return Avx512F.Subtract(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return Avx512F.Subtract(left.AsUInt64(), right.AsUInt64()).As(); + } + else if (Avx512BW.IsSupported) + { + if (sizeof(T) == 1) + { + return Avx512BW.Subtract(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return Avx512BW.Subtract(left.AsUInt16(), right.AsUInt16()).As(); + } + } + return SoftwareImpl(left, right); + } } /// Computes the unary negation of a vector. @@ -339,14 +774,7 @@ public static Vector512 operator >>(Vector512 value, int shiftCount) /// A vector whose elements are the unary negation of the corresponding elements in . /// The type of the vector () is not supported. [Intrinsic] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 operator -(Vector512 vector) - { - return Vector512.Create( - -vector._lower, - -vector._upper - ); - } + public static Vector512 operator -(Vector512 vector) => Zero - vector; /// Returns a given vector unchanged. /// The vector. @@ -367,10 +795,48 @@ public static Vector512 operator >>(Vector512 value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 operator >>>(Vector512 value, int shiftCount) { - return Vector512.Create( - value._lower >>> shiftCount, - value._upper >>> shiftCount - ); + if (Avx512F.IsSupported) + { + return XarchImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); + + static Vector512 SoftwareImpl(Vector512 value, int shiftCount) + { + return Vector512.Create( + value._lower >>> shiftCount, + value._upper >>> shiftCount + ); + } + + [CompExactlyDependsOn(typeof(Avx512F))] + [CompExactlyDependsOn(typeof(Avx512BW))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector512 XarchImpl(Vector512 value, int shiftCount) + { + if (sizeof(T) == 4) + { + return Avx512F.ShiftRightLogical(value.AsUInt32(), Vector128.CreateScalar((uint)(shiftCount & 0x1F))).As(); + } + else if (sizeof(T) == 8) + { + return Avx512F.ShiftRightLogical(value.AsUInt64(), Vector128.CreateScalar((uint)(shiftCount & 0x3F))).As(); + } + else if (Avx512BW.IsSupported) + { + if (sizeof(T) == 1) + { + byte maskedShiftCount = (byte)(shiftCount & 0x7); + Vector512 tmp = Avx512BW.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar(maskedShiftCount)); + return Avx512F.And(tmp, Vector512.Create((ushort)(0xFF >>> maskedShiftCount))).As(); + } + else if (sizeof(T) == 2) + { + return Avx512BW.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar((ushort)(shiftCount & 0xF))).As(); + } + } + return SoftwareImpl(value, shiftCount); + } } /// Determines whether the specified object is equal to the current instance. @@ -428,7 +894,7 @@ public override int GetHashCode() /// The type of the vector () is not supported. public override string ToString() => ToString("G", CultureInfo.InvariantCulture); - private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider) + internal string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider) { ThrowHelper.ThrowForUnsupportedIntrinsicsVector512BaseType(); @@ -463,16 +929,16 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector512 ISimdVector, T>.Abs(Vector512 vector) => Vector512.Abs(vector); /// - static Vector512 ISimdVector, T>.Add(Vector512 left, Vector512 right) => Vector512.Add(left, right); + static Vector512 ISimdVector, T>.Add(Vector512 left, Vector512 right) => left + right; /// static Vector512 ISimdVector, T>.AndNot(Vector512 left, Vector512 right) => Vector512.AndNot(left, right); /// - static Vector512 ISimdVector, T>.BitwiseAnd(Vector512 left, Vector512 right) => Vector512.BitwiseAnd(left, right); + static Vector512 ISimdVector, T>.BitwiseAnd(Vector512 left, Vector512 right) => left & right; /// - static Vector512 ISimdVector, T>.BitwiseOr(Vector512 left, Vector512 right) => Vector512.BitwiseOr(left, right); + static Vector512 ISimdVector, T>.BitwiseOr(Vector512 left, Vector512 right) => left | right; /// static Vector512 ISimdVector, T>.Ceiling(Vector512 vector) => Vector512.Ceiling(vector); @@ -508,10 +974,10 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector512 ISimdVector, T>.CreateScalarUnsafe(T value) => Vector512.CreateScalarUnsafe(value); /// - static Vector512 ISimdVector, T>.Divide(Vector512 left, Vector512 right) => Vector512.Divide(left, right); + static Vector512 ISimdVector, T>.Divide(Vector512 left, Vector512 right) => left / right; /// - static Vector512 ISimdVector, T>.Divide(Vector512 left, T right) => Vector512.Divide(left, right); + static Vector512 ISimdVector, T>.Divide(Vector512 left, T right) => left / right; /// static T ISimdVector, T>.Dot(Vector512 left, Vector512 right) => Vector512.Dot(left, right); @@ -520,7 +986,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector512 ISimdVector, T>.Equals(Vector512 left, Vector512 right) => Vector512.Equals(left, right); /// - static bool ISimdVector, T>.EqualsAll(Vector512 left, Vector512 right) => Vector512.EqualsAll(left, right); + static bool ISimdVector, T>.EqualsAll(Vector512 left, Vector512 right) => left == right; /// static bool ISimdVector, T>.EqualsAny(Vector512 left, Vector512 right) => Vector512.EqualsAny(left, right); @@ -529,7 +995,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector512 ISimdVector, T>.Floor(Vector512 vector) => Vector512.Floor(vector); /// - static T ISimdVector, T>.GetElement(Vector512 vector, int index) => Vector512.GetElement(vector, index); + static T ISimdVector, T>.GetElement(Vector512 vector, int index) => vector.GetElement(index); /// static Vector512 ISimdVector, T>.GreaterThan(Vector512 left, Vector512 right) => Vector512.GreaterThan(left, right); @@ -567,7 +1033,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri /// static bool ISimdVector, T>.LessThanOrEqualAny(Vector512 left, Vector512 right) => Vector512.LessThanOrEqualAny(left, right); -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// static Vector512 ISimdVector, T>.Load(T* source) => Vector512.Load(source); @@ -576,7 +1041,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri /// static Vector512 ISimdVector, T>.LoadAlignedNonTemporal(T* source) => Vector512.LoadAlignedNonTemporal(source); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// static Vector512 ISimdVector, T>.LoadUnsafe(ref readonly T source) => Vector512.LoadUnsafe(in source); @@ -591,63 +1055,61 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector512 ISimdVector, T>.Min(Vector512 left, Vector512 right) => Vector512.Min(left, right); /// - static Vector512 ISimdVector, T>.Multiply(Vector512 left, Vector512 right) => Vector512.Multiply(left, right); + static Vector512 ISimdVector, T>.Multiply(Vector512 left, Vector512 right) => left * right; /// - static Vector512 ISimdVector, T>.Multiply(Vector512 left, T right) => Vector512.Multiply(left, right); + static Vector512 ISimdVector, T>.Multiply(Vector512 left, T right) => left * right; /// - static Vector512 ISimdVector, T>.Negate(Vector512 vector) => Vector512.Negate(vector); + static Vector512 ISimdVector, T>.Negate(Vector512 vector) => -vector; /// - static Vector512 ISimdVector, T>.OnesComplement(Vector512 vector) => Vector512.OnesComplement(vector); + static Vector512 ISimdVector, T>.OnesComplement(Vector512 vector) => ~vector; /// - static Vector512 ISimdVector, T>.ShiftLeft(Vector512 vector, int shiftCount) => Vector512.ShiftLeft(vector, shiftCount); + static Vector512 ISimdVector, T>.ShiftLeft(Vector512 vector, int shiftCount) => vector << shiftCount; /// - static Vector512 ISimdVector, T>.ShiftRightArithmetic(Vector512 vector, int shiftCount) => Vector512.ShiftRightArithmetic(vector, shiftCount); + static Vector512 ISimdVector, T>.ShiftRightArithmetic(Vector512 vector, int shiftCount) => vector >> shiftCount; /// - static Vector512 ISimdVector, T>.ShiftRightLogical(Vector512 vector, int shiftCount) => Vector512.ShiftRightLogical(vector, shiftCount); + static Vector512 ISimdVector, T>.ShiftRightLogical(Vector512 vector, int shiftCount) => vector >>> shiftCount; /// static Vector512 ISimdVector, T>.Sqrt(Vector512 vector) => Vector512.Sqrt(vector); -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// - static void ISimdVector, T>.Store(Vector512 source, T* destination) => Vector512.Store(source, destination); + static void ISimdVector, T>.Store(Vector512 source, T* destination) => source.Store(destination); /// - static void ISimdVector, T>.StoreAligned(Vector512 source, T* destination) => Vector512.StoreAligned(source, destination); + static void ISimdVector, T>.StoreAligned(Vector512 source, T* destination) => source.StoreAligned(destination); /// - static void ISimdVector, T>.StoreAlignedNonTemporal(Vector512 source, T* destination) => Vector512.StoreAlignedNonTemporal(source, destination); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + static void ISimdVector, T>.StoreAlignedNonTemporal(Vector512 source, T* destination) => source.StoreAlignedNonTemporal(destination); /// - static void ISimdVector, T>.StoreUnsafe(Vector512 vector, ref T destination) => Vector512.StoreUnsafe(vector, ref destination); + static void ISimdVector, T>.StoreUnsafe(Vector512 vector, ref T destination) => vector.StoreUnsafe(ref destination); /// - static void ISimdVector, T>.StoreUnsafe(Vector512 vector, ref T destination, nuint elementOffset) => Vector512.StoreUnsafe(vector, ref destination, elementOffset); + static void ISimdVector, T>.StoreUnsafe(Vector512 vector, ref T destination, nuint elementOffset) => vector.StoreUnsafe(ref destination, elementOffset); /// - static Vector512 ISimdVector, T>.Subtract(Vector512 left, Vector512 right) => Vector512.Subtract(left, right); + static Vector512 ISimdVector, T>.Subtract(Vector512 left, Vector512 right) => left - right; /// static T ISimdVector, T>.Sum(Vector512 vector) => Vector512.Sum(vector); /// - static T ISimdVector, T>.ToScalar(Vector512 vector) => Vector512.ToScalar(vector); + static T ISimdVector, T>.ToScalar(Vector512 vector) => vector.ToScalar(); /// - static bool ISimdVector, T>.TryCopyTo(Vector512 vector, Span destination) => Vector512.TryCopyTo(vector, destination); + static bool ISimdVector, T>.TryCopyTo(Vector512 vector, Span destination) => vector.TryCopyTo(destination); /// - static Vector512 ISimdVector, T>.WithElement(Vector512 vector, int index, T value) => Vector512.WithElement(vector, index, value); + static Vector512 ISimdVector, T>.WithElement(Vector512 vector, int index, T value) => vector.WithElement(index, value); /// - static Vector512 ISimdVector, T>.Xor(Vector512 left, Vector512 right) => Vector512.Xor(left, right); + static Vector512 ISimdVector, T>.Xor(Vector512 left, Vector512 right) => left ^ right; // // New Surface Area diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 743aad0a6503f..908d28f9ef153 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -9,10 +9,12 @@ using System.Runtime.Intrinsics.X86; using System.Text; +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + namespace System.Runtime.Intrinsics { /// Provides a collection of static methods for creating, manipulating, and otherwise operating on 64-bit vectors. - public static class Vector64 + public static unsafe class Vector64 { internal const int Size = 8; @@ -44,7 +46,48 @@ public static Vector64 Abs(Vector64 vector) { return vector; } - else + else if (AdvSimd.IsSupported) + { + return ArmImpl(vector); + } + return SoftwareImpl(vector); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 vector) + { + if (typeof(T) == typeof(float)) + { + return AdvSimd.Abs(vector.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return AdvSimd.AbsScalar(vector.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return AdvSimd.Abs(vector.AsSByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Abs(vector.AsInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Abs(vector.AsInt32()).As(); + } + else if (sizeof(T) == 8) + { + if (AdvSimd.Arm64.IsSupported) + { + return AdvSimd.Arm64.AbsScalar(vector.AsInt64()).As(); + } + } + return SoftwareImpl(vector); + } + + static Vector64 SoftwareImpl(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -272,14 +315,14 @@ internal static Vector64 Ceiling(Vector64 vector) /// The type of , , and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector64 ConditionalSelect(Vector64 condition, Vector64 left, Vector64 right) => (left & condition) | (right & ~condition); + public static Vector64 ConditionalSelect(Vector64 condition, Vector64 left, Vector64 right) => (left & condition) | AndNot(right, condition); /// Converts a to a . /// The vector to convert. /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToDouble(Vector64 vector) + public static Vector64 ConvertToDouble(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -298,7 +341,7 @@ public static unsafe Vector64 ConvertToDouble(Vector64 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToDouble(Vector64 vector) + public static Vector64 ConvertToDouble(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -316,7 +359,7 @@ public static unsafe Vector64 ConvertToDouble(Vector64 vector) /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToInt32(Vector64 vector) + public static Vector64 ConvertToInt32(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -334,7 +377,7 @@ public static unsafe Vector64 ConvertToInt32(Vector64 vector) /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToInt32Native(Vector64 vector) + public static Vector64 ConvertToInt32Native(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -352,7 +395,7 @@ public static unsafe Vector64 ConvertToInt32Native(Vector64 vector) /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToInt64(Vector64 vector) + public static Vector64 ConvertToInt64(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -370,7 +413,7 @@ public static unsafe Vector64 ConvertToInt64(Vector64 vector) /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToInt64Native(Vector64 vector) + public static Vector64 ConvertToInt64Native(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -388,7 +431,7 @@ public static unsafe Vector64 ConvertToInt64Native(Vector64 vector /// The converted vector. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToSingle(Vector64 vector) + public static Vector64 ConvertToSingle(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -407,7 +450,7 @@ public static unsafe Vector64 ConvertToSingle(Vector64 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToSingle(Vector64 vector) + public static Vector64 ConvertToSingle(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -426,7 +469,7 @@ public static unsafe Vector64 ConvertToSingle(Vector64 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToUInt32(Vector64 vector) + public static Vector64 ConvertToUInt32(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -445,7 +488,7 @@ public static unsafe Vector64 ConvertToUInt32(Vector64 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToUInt32Native(Vector64 vector) + public static Vector64 ConvertToUInt32Native(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -464,7 +507,7 @@ public static unsafe Vector64 ConvertToUInt32Native(Vector64 vector [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToUInt64(Vector64 vector) + public static Vector64 ConvertToUInt64(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -483,7 +526,7 @@ public static unsafe Vector64 ConvertToUInt64(Vector64 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 ConvertToUInt64Native(Vector64 vector) + public static Vector64 ConvertToUInt64Native(Vector64 vector) { Unsafe.SkipInit(out Vector64 result); @@ -526,7 +569,7 @@ public static void CopyTo(this Vector64 vector, T[] destination) /// The type of and () is not supported. /// is null. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe void CopyTo(this Vector64 vector, T[] destination, int startIndex) + public static void CopyTo(this Vector64 vector, T[] destination, int startIndex) { // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons @@ -566,7 +609,7 @@ public static void CopyTo(this Vector64 vector, Span destination) /// A new with all elements initialized to . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Create(T value) + public static Vector64 Create(T value) { Unsafe.SkipInit(out Vector64 result); @@ -583,46 +626,46 @@ public static unsafe Vector64 Create(T value) /// On x86, this method corresponds to __m64 _mm_set1_pi8 /// A new with all elements initialized to . [Intrinsic] - public static unsafe Vector64 Create(byte value) => Create(value); + public static Vector64 Create(byte value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . [Intrinsic] - public static unsafe Vector64 Create(double value) => Create(value); + public static Vector64 Create(double value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// On x86, this method corresponds to __m64 _mm_set1_pi16 /// A new with all elements initialized to . [Intrinsic] - public static unsafe Vector64 Create(short value) => Create(value); + public static Vector64 Create(short value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// On x86, this method corresponds to __m64 _mm_set1_pi32 /// A new with all elements initialized to . [Intrinsic] - public static unsafe Vector64 Create(int value) => Create(value); + public static Vector64 Create(int value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . [Intrinsic] - public static unsafe Vector64 Create(long value) => Create(value); + public static Vector64 Create(long value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . [Intrinsic] - public static unsafe Vector64 Create(nint value) => Create(value); + public static Vector64 Create(nint value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 Create(nuint value) => Create(value); + public static Vector64 Create(nuint value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. @@ -630,13 +673,13 @@ public static unsafe Vector64 Create(T value) /// A new with all elements initialized to . [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 Create(sbyte value) => Create(value); + public static Vector64 Create(sbyte value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . [Intrinsic] - public static unsafe Vector64 Create(float value) => Create(value); + public static Vector64 Create(float value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. @@ -644,7 +687,7 @@ public static unsafe Vector64 Create(T value) /// A new with all elements initialized to . [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 Create(ushort value) => Create(value); + public static Vector64 Create(ushort value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. @@ -652,14 +695,14 @@ public static unsafe Vector64 Create(T value) /// A new with all elements initialized to . [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 Create(uint value) => Create(value); + public static Vector64 Create(uint value) => Create(value); /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 Create(ulong value) => Create(value); + public static Vector64 Create(ulong value) => Create(value); /// Creates a new from a given array. /// The type of the elements in the vector. @@ -732,7 +775,7 @@ public static Vector64 Create(ReadOnlySpan values) /// A new with each element initialized to corresponding specified value. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7) + public static Vector64 Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7) { Unsafe.SkipInit(out Vector64 result); result.SetElementUnsafe(0, e0); @@ -755,7 +798,7 @@ public static unsafe Vector64 Create(byte e0, byte e1, byte e2, byte e3, b /// A new with each element initialized to corresponding specified value. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Create(short e0, short e1, short e2, short e3) + public static Vector64 Create(short e0, short e1, short e2, short e3) { Unsafe.SkipInit(out Vector64 result); result.SetElementUnsafe(0, e0); @@ -772,7 +815,7 @@ public static unsafe Vector64 Create(short e0, short e1, short e2, short /// A new with each element initialized to corresponding specified value. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Create(int e0, int e1) + public static Vector64 Create(int e0, int e1) { Unsafe.SkipInit(out Vector64 result); result.SetElementUnsafe(0, e0); @@ -794,7 +837,7 @@ public static unsafe Vector64 Create(int e0, int e1) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7) + public static Vector64 Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7) { Unsafe.SkipInit(out Vector64 result); result.SetElementUnsafe(0, e0); @@ -814,7 +857,7 @@ public static unsafe Vector64 Create(sbyte e0, sbyte e1, sbyte e2, sbyte /// A new with each element initialized to corresponding specified value. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Create(float e0, float e1) + public static Vector64 Create(float e0, float e1) { Unsafe.SkipInit(out Vector64 result); result.SetElementUnsafe(0, e0); @@ -832,7 +875,7 @@ public static unsafe Vector64 Create(float e0, float e1) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Create(ushort e0, ushort e1, ushort e2, ushort e3) + public static Vector64 Create(ushort e0, ushort e1, ushort e2, ushort e3) { Unsafe.SkipInit(out Vector64 result); result.SetElementUnsafe(0, e0); @@ -850,7 +893,7 @@ public static unsafe Vector64 Create(ushort e0, ushort e1, ushort e2, us [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Create(uint e0, uint e1) + public static Vector64 Create(uint e0, uint e1) { Unsafe.SkipInit(out Vector64 result); result.SetElementUnsafe(0, e0); @@ -865,7 +908,7 @@ public static unsafe Vector64 Create(uint e0, uint e1) /// The type of () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 CreateScalar(T value) + public static Vector64 CreateScalar(T value) { Vector64 result = Vector64.Zero; result.SetElementUnsafe(0, value); @@ -876,78 +919,78 @@ public static unsafe Vector64 CreateScalar(T value) /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector64 CreateScalar(byte value) => CreateScalar(value); + public static Vector64 CreateScalar(byte value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector64 CreateScalar(double value) => CreateScalar(value); + public static Vector64 CreateScalar(double value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector64 CreateScalar(short value) => CreateScalar(value); + public static Vector64 CreateScalar(short value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector64 CreateScalar(int value) => CreateScalar(value); + public static Vector64 CreateScalar(int value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector64 CreateScalar(long value) => CreateScalar(value); + public static Vector64 CreateScalar(long value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector64 CreateScalar(nint value) => CreateScalar(value); + public static Vector64 CreateScalar(nint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 CreateScalar(nuint value) => CreateScalar(value); + public static Vector64 CreateScalar(nuint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 CreateScalar(sbyte value) => CreateScalar(value); + public static Vector64 CreateScalar(sbyte value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] - public static unsafe Vector64 CreateScalar(float value) => CreateScalar(value); + public static Vector64 CreateScalar(float value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 CreateScalar(ushort value) => CreateScalar(value); + public static Vector64 CreateScalar(ushort value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 CreateScalar(uint value) => CreateScalar(value); + public static Vector64 CreateScalar(uint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 CreateScalar(ulong value) => CreateScalar(value); + public static Vector64 CreateScalar(ulong value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The type of the elements in the vector. @@ -972,78 +1015,78 @@ public static Vector64 CreateScalarUnsafe(T value) /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector64 CreateScalarUnsafe(byte value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(byte value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector64 CreateScalarUnsafe(double value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(double value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector64 CreateScalarUnsafe(short value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(short value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector64 CreateScalarUnsafe(int value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(int value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector64 CreateScalarUnsafe(long value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(long value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector64 CreateScalarUnsafe(nint value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(nint value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 CreateScalarUnsafe(nuint value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(nuint value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 CreateScalarUnsafe(sbyte value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(sbyte value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] - public static unsafe Vector64 CreateScalarUnsafe(float value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(float value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 CreateScalarUnsafe(ushort value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(ushort value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 CreateScalarUnsafe(uint value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(uint value) => CreateScalarUnsafe(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements left uninitialized. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 CreateScalarUnsafe(ulong value) => CreateScalarUnsafe(value); + public static Vector64 CreateScalarUnsafe(ulong value) => CreateScalarUnsafe(value); /// Creates a new instance where the elements begin at a specified value and which are spaced apart according to another specified value. /// The type of the elements in the vector. @@ -1565,7 +1608,6 @@ public static bool LessThanOrEqualAny(Vector64 left, Vector64 right) return false; } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Loads a vector from the given source. /// The type of the elements in the vector. /// The source from which the vector will be loaded. @@ -1573,7 +1615,7 @@ public static bool LessThanOrEqualAny(Vector64 left, Vector64 right) /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 Load(T* source) => LoadUnsafe(ref *source); + public static Vector64 Load(T* source) => LoadUnsafe(ref *source); /// Loads a vector from the given aligned source. /// The type of the elements in the vector. @@ -1583,7 +1625,7 @@ public static bool LessThanOrEqualAny(Vector64 left, Vector64 right) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 LoadAligned(T* source) + public static Vector64 LoadAligned(T* source) { ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); @@ -1603,8 +1645,7 @@ public static unsafe Vector64 LoadAligned(T* source) /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static unsafe Vector64 LoadAlignedNonTemporal(T* source) => LoadAligned(source); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + public static Vector64 LoadAlignedNonTemporal(T* source) => LoadAligned(source); /// Loads a vector from the given source. /// The type of the elements in the vector. @@ -1795,7 +1836,7 @@ public static Vector64 Min(Vector64 left, Vector64 right) /// The product of and . /// The type of and () is not supported. [Intrinsic] - public static Vector64 Multiply(T left, Vector64 right) => left * right; + public static Vector64 Multiply(T left, Vector64 right) => right * left; /// Computes an estimate of ( * ) + . /// The vector to be multiplied with . @@ -1851,7 +1892,7 @@ public static Vector64 MultiplyAddEstimate(Vector64 left, Vector64 /// A containing elements narrowed from and . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Narrow(Vector64 lower, Vector64 upper) + public static Vector64 Narrow(Vector64 lower, Vector64 upper) { Unsafe.SkipInit(out Vector64 result); @@ -1877,7 +1918,7 @@ public static unsafe Vector64 Narrow(Vector64 lower, Vector64 Narrow(Vector64 lower, Vector64 upper) + public static Vector64 Narrow(Vector64 lower, Vector64 upper) { Unsafe.SkipInit(out Vector64 result); @@ -1902,7 +1943,7 @@ public static unsafe Vector64 Narrow(Vector64 lower, Vector64A containing elements narrowed from and . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Narrow(Vector64 lower, Vector64 upper) + public static Vector64 Narrow(Vector64 lower, Vector64 upper) { Unsafe.SkipInit(out Vector64 result); @@ -1927,7 +1968,7 @@ public static unsafe Vector64 Narrow(Vector64 lower, Vector64 u /// A containing elements narrowed from and . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Narrow(Vector64 lower, Vector64 upper) + public static Vector64 Narrow(Vector64 lower, Vector64 upper) { Unsafe.SkipInit(out Vector64 result); @@ -1953,7 +1994,7 @@ public static unsafe Vector64 Narrow(Vector64 lower, Vector64 u [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 Narrow(Vector64 lower, Vector64 upper) + public static Vector64 Narrow(Vector64 lower, Vector64 upper) { Unsafe.SkipInit(out Vector64 result); @@ -1979,7 +2020,7 @@ public static unsafe Vector64 Narrow(Vector64 lower, Vector64 Narrow(Vector64 lower, Vector64 upper) + public static Vector64 Narrow(Vector64 lower, Vector64 upper) { Unsafe.SkipInit(out Vector64 result); @@ -2005,7 +2046,7 @@ public static unsafe Vector64 Narrow(Vector64 lower, Vector64 Narrow(Vector64 lower, Vector64 upper) + public static Vector64 Narrow(Vector64 lower, Vector64 upper) { Unsafe.SkipInit(out Vector64 result); @@ -2438,7 +2479,6 @@ public static Vector64 Sqrt(Vector64 vector) return result; } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Stores a vector at the given destination. /// The type of the elements in the vector. /// The vector that will be stored. @@ -2446,7 +2486,7 @@ public static Vector64 Sqrt(Vector64 vector) /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static unsafe void Store(this Vector64 source, T* destination) => source.StoreUnsafe(ref *destination); + public static void Store(this Vector64 source, T* destination) => source.StoreUnsafe(ref *destination); /// Stores a vector at the given aligned destination. /// The type of the elements in the vector. @@ -2456,7 +2496,7 @@ public static Vector64 Sqrt(Vector64 vector) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe void StoreAligned(this Vector64 source, T* destination) + public static void StoreAligned(this Vector64 source, T* destination) { ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); @@ -2476,8 +2516,7 @@ public static unsafe void StoreAligned(this Vector64 source, T* destinatio /// The type of () is not supported. [Intrinsic] [CLSCompliant(false)] - public static unsafe void StoreAlignedNonTemporal(this Vector64 source, T* destination) => source.StoreAligned(destination); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + public static void StoreAlignedNonTemporal(this Vector64 source, T* destination) => source.StoreAligned(destination); /// Stores a vector at the given destination. /// The type of the elements in the vector. @@ -2572,7 +2611,7 @@ public static Vector128 ToVector128(this Vector64 vector) /// The type of () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 ToVector128Unsafe(this Vector64 vector) + public static Vector128 ToVector128Unsafe(this Vector64 vector) { ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); @@ -2607,46 +2646,46 @@ public static bool TryCopyTo(this Vector64 vector, Span destination) /// A pair of vectors that contain the widened lower and upper halves of . [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); /// Widens a into two . /// The vector whose elements are to be widened. /// A pair of vectors that contain the widened lower and upper halves of . [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); + public static (Vector64 Lower, Vector64 Upper) Widen(Vector64 source) => (WidenLower(source), WidenUpper(source)); /// Widens the lower half of a into a . /// The vector whose elements are to be widened. @@ -2672,7 +2711,7 @@ public static Vector64 WidenLower(Vector64 source) /// A vector that contain the widened lower half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenLower(Vector64 source) + public static Vector64 WidenLower(Vector64 source) { Unsafe.SkipInit(out Vector64 lower); @@ -2690,7 +2729,7 @@ public static unsafe Vector64 WidenLower(Vector64 source) /// A vector that contain the widened lower half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenLower(Vector64 source) + public static Vector64 WidenLower(Vector64 source) { Unsafe.SkipInit(out Vector64 lower); @@ -2709,7 +2748,7 @@ public static unsafe Vector64 WidenLower(Vector64 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenLower(Vector64 source) + public static Vector64 WidenLower(Vector64 source) { Unsafe.SkipInit(out Vector64 lower); @@ -2727,7 +2766,7 @@ public static unsafe Vector64 WidenLower(Vector64 source) /// A vector that contain the widened lower half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenLower(Vector64 source) + public static Vector64 WidenLower(Vector64 source) { Unsafe.SkipInit(out Vector64 lower); @@ -2746,7 +2785,7 @@ public static unsafe Vector64 WidenLower(Vector64 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenLower(Vector64 source) + public static Vector64 WidenLower(Vector64 source) { Unsafe.SkipInit(out Vector64 lower); @@ -2765,7 +2804,7 @@ public static unsafe Vector64 WidenLower(Vector64 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenLower(Vector64 source) + public static Vector64 WidenLower(Vector64 source) { Unsafe.SkipInit(out Vector64 lower); @@ -2802,7 +2841,7 @@ public static Vector64 WidenUpper(Vector64 source) /// A vector that contain the widened upper half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenUpper(Vector64 source) + public static Vector64 WidenUpper(Vector64 source) { Unsafe.SkipInit(out Vector64 upper); @@ -2820,7 +2859,7 @@ public static unsafe Vector64 WidenUpper(Vector64 source) /// A vector that contain the widened upper half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenUpper(Vector64 source) + public static Vector64 WidenUpper(Vector64 source) { Unsafe.SkipInit(out Vector64 upper); @@ -2839,7 +2878,7 @@ public static unsafe Vector64 WidenUpper(Vector64 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenUpper(Vector64 source) + public static Vector64 WidenUpper(Vector64 source) { Unsafe.SkipInit(out Vector64 upper); @@ -2857,7 +2896,7 @@ public static unsafe Vector64 WidenUpper(Vector64 source) /// A vector that contain the widened upper half of . [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenUpper(Vector64 source) + public static Vector64 WidenUpper(Vector64 source) { Unsafe.SkipInit(out Vector64 upper); @@ -2876,7 +2915,7 @@ public static unsafe Vector64 WidenUpper(Vector64 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenUpper(Vector64 source) + public static Vector64 WidenUpper(Vector64 source) { Unsafe.SkipInit(out Vector64 upper); @@ -2895,7 +2934,7 @@ public static unsafe Vector64 WidenUpper(Vector64 source) [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 WidenUpper(Vector64 source) + public static Vector64 WidenUpper(Vector64 source) { Unsafe.SkipInit(out Vector64 upper); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs index 23730bb5b0090..bd87e547eb712 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs @@ -7,8 +7,11 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.Arm; using System.Text; +#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + namespace System.Runtime.Intrinsics { // We mark certain methods with AggressiveInlining to ensure that the JIT will @@ -38,22 +41,12 @@ namespace System.Runtime.Intrinsics public static Vector64 AllBitsSet { [Intrinsic] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get - { - ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); - - Unsafe.SkipInit(out Vector64 result); - Unsafe.AsRef(in result._00) = ulong.MaxValue; - - return result; - } + get => Vector64.Create(Scalar.AllBitsSet); } -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Gets the number of that are in a . /// The type of the vector () is not supported. - public static unsafe int Count + public static int Count { [Intrinsic] get @@ -62,7 +55,6 @@ public static unsafe int Count return Vector64.Size / sizeof(T); } } -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// Gets a new with the elements set to their index. /// The type of the vector () is not supported. @@ -145,15 +137,55 @@ public static Vector64 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator +(Vector64 left, Vector64 right) { - Unsafe.SkipInit(out Vector64 result); + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + return SoftwareImpl(left, right); - for (int index = 0; index < Count; index++) + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 left, Vector64 right) { - T value = Scalar.Add(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); - result.SetElementUnsafe(index, value); + if (typeof(T) == typeof(float)) + { + return AdvSimd.Add(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return AdvSimd.AddScalar(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return AdvSimd.Add(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Add(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Add(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.AddScalar(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); } - return result; + static Vector64 SoftwareImpl(Vector64 left, Vector64 right) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Count; index++) + { + T value = Scalar.Add(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); + result.SetElementUnsafe(index, value); + } + + return result; + } } /// Computes the bitwise-and of two vectors. @@ -165,12 +197,48 @@ public static Vector64 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator &(Vector64 left, Vector64 right) { - ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); + // While op_BitwiseAnd is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + return SoftwareImpl(left, right); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 left, Vector64 right) + { + if (sizeof(T) == 1) + { + return AdvSimd.And(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.And(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.And(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.And(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } - Unsafe.SkipInit(out Vector64 result); - Unsafe.AsRef(in result._00) = left._00 & right._00; + static Vector64 SoftwareImpl(Vector64 left, Vector64 right) + { + ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); + + Unsafe.SkipInit(out Vector64 result); + Unsafe.AsRef(in result._00) = left._00 & right._00; - return result; + return result; + } } /// Computes the bitwise-or of two vectors. @@ -182,12 +250,48 @@ public static Vector64 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator |(Vector64 left, Vector64 right) { - ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); + // While op_BitwiseOr is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + return SoftwareImpl(left, right); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 left, Vector64 right) + { + if (sizeof(T) == 1) + { + return AdvSimd.Or(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Or(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Or(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.Or(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } + + static Vector64 SoftwareImpl(Vector64 left, Vector64 right) + { + ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); - Unsafe.SkipInit(out Vector64 result); - Unsafe.AsRef(in result._00) = left._00 | right._00; + Unsafe.SkipInit(out Vector64 result); + Unsafe.AsRef(in result._00) = left._00 | right._00; - return result; + return result; + } } /// Divides two vectors to compute their quotient. @@ -199,15 +303,43 @@ public static Vector64 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator /(Vector64 left, Vector64 right) { - Unsafe.SkipInit(out Vector64 result); + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + return SoftwareImpl(left, right); - for (int index = 0; index < Count; index++) + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 left, Vector64 right) { - T value = Scalar.Divide(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); - result.SetElementUnsafe(index, value); + if (typeof(T) == typeof(float)) + { + if (AdvSimd.Arm64.IsSupported) + { + return AdvSimd.Arm64.Divide(left.AsSingle(), right.AsSingle()).As(); + } + } + else if (typeof(T) == typeof(double)) + { + return AdvSimd.DivideScalar(left.AsDouble(), right.AsDouble()).As(); + } + return SoftwareImpl(left, right); } - return result; + static Vector64 SoftwareImpl(Vector64 left, Vector64 right) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Count; index++) + { + T value = Scalar.Divide(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); + result.SetElementUnsafe(index, value); + } + + return result; + } } /// Divides a vector by a scalar to compute the per-element quotient. @@ -215,19 +347,7 @@ public static Vector64 Zero /// The scalar that will divide . /// The quotient of divided by . [Intrinsic] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector64 operator /(Vector64 left, T right) - { - Unsafe.SkipInit(out Vector64 result); - - for (int index = 0; index < Count; index++) - { - T value = Scalar.Divide(left.GetElementUnsafe(index), right); - result.SetElementUnsafe(index, value); - } - - return result; - } + public static Vector64 operator /(Vector64 left, T right) => left / Vector64.Create(right); /// Compares two vectors to determine if all elements are equal. /// The vector to compare with . @@ -257,12 +377,48 @@ public static Vector64 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator ^(Vector64 left, Vector64 right) { - ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); + // While op_ExclusiveOr is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + return SoftwareImpl(left, right); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 left, Vector64 right) + { + if (sizeof(T) == 1) + { + return AdvSimd.Xor(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Xor(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Xor(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.Xor(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); + } - Unsafe.SkipInit(out Vector64 result); - Unsafe.AsRef(in result._00) = left._00 ^ right._00; + static Vector64 SoftwareImpl(Vector64 left, Vector64 right) + { + ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); - return result; + Unsafe.SkipInit(out Vector64 result); + Unsafe.AsRef(in result._00) = left._00 ^ right._00; + + return result; + } } /// Compares two vectors to determine if any elements are not equal. @@ -281,15 +437,47 @@ public static Vector64 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator <<(Vector64 value, int shiftCount) { - Unsafe.SkipInit(out Vector64 result); + if (AdvSimd.IsSupported) + { + return ArmImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); - for (int index = 0; index < Count; index++) + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 value, int shiftCount) { - T element = Scalar.ShiftLeft(value.GetElementUnsafe(index), shiftCount); - result.SetElementUnsafe(index, element); + if (sizeof(T) == 1) + { + return AdvSimd.ShiftLogical(value.AsByte(), Vector64.Create((sbyte)(shiftCount & 0x7))).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.ShiftLogical(value.AsUInt16(), Vector64.Create((short)(shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.ShiftLogical(value.AsUInt32(), Vector64.Create(shiftCount & 0x1F)).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.ShiftLogicalScalar(value.AsUInt64(), Vector64.Create(shiftCount & 0x3F)).As(); + } + return SoftwareImpl(value, shiftCount); } - return result; + static Vector64 SoftwareImpl(Vector64 value, int shiftCount) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Count; index++) + { + T element = Scalar.ShiftLeft(value.GetElementUnsafe(index), shiftCount); + result.SetElementUnsafe(index, element); + } + + return result; + } } /// Multiplies two vectors to compute their element-wise product. @@ -301,15 +489,55 @@ public static Vector64 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator *(Vector64 left, Vector64 right) { - Unsafe.SkipInit(out Vector64 result); + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + return SoftwareImpl(left, right); - for (int index = 0; index < Count; index++) + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + static Vector64 ArmImpl(Vector64 left, Vector64 right) { - T value = Scalar.Multiply(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); - result.SetElementUnsafe(index, value); + if (typeof(T) == typeof(float)) + { + return AdvSimd.Multiply(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return AdvSimd.MultiplyScalar(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return AdvSimd.Multiply(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Multiply(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Multiply(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + // TODO-ARM64-CQ: We should support long/ulong multiplication. + } + return SoftwareImpl(left, right); } - return result; + static Vector64 SoftwareImpl(Vector64 left, Vector64 right) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Count; index++) + { + T value = Scalar.Multiply(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); + result.SetElementUnsafe(index, value); + } + + return result; + } } /// Multiplies a vector by a scalar to compute their product. @@ -319,18 +547,7 @@ public static Vector64 Zero /// The type of the vector () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector64 operator *(Vector64 left, T right) - { - Unsafe.SkipInit(out Vector64 result); - - for (int index = 0; index < Count; index++) - { - T value = Scalar.Multiply(left.GetElementUnsafe(index), right); - result.SetElementUnsafe(index, value); - } - - return result; - } + public static Vector64 operator *(Vector64 left, T right) => left * Vector64.Create(right); /// Multiplies a vector by a scalar to compute their product. /// The scalar to multiply with . @@ -338,7 +555,7 @@ public static Vector64 Zero /// The product of and . /// The type of the vector () is not supported. [Intrinsic] - public static Vector64 operator *(T left, Vector64 right) => right * left; + public static Vector64 operator *(T left, Vector64 right) => Vector64.Create(left) * right; /// Computes the ones-complement of a vector. /// The vector whose ones-complement is to be computed. @@ -348,12 +565,48 @@ public static Vector64 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator ~(Vector64 vector) { - ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); + // While op_OnesComplement is technically size independent, there are + // some opportunistic lightup optimizations that can kick in depending + // on the size of T. One such example is embedded masking. + + if (AdvSimd.IsSupported) + { + return ArmImpl(vector); + } + return SoftwareImpl(vector); + + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 vector) + { + if (sizeof(T) == 1) + { + return AdvSimd.Not(vector.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Not(vector.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Not(vector.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.Not(vector.AsUInt64()).As(); + } + return SoftwareImpl(vector); + } - Unsafe.SkipInit(out Vector64 result); - Unsafe.AsRef(in result._00) = ~vector._00; + static Vector64 SoftwareImpl(Vector64 vector) + { + ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType(); + + Unsafe.SkipInit(out Vector64 result); + Unsafe.AsRef(in result._00) = ~vector._00; - return result; + return result; + } } /// Shifts (signed) each element of a vector right by the specified amount. @@ -364,15 +617,55 @@ public static Vector64 Zero [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator >>(Vector64 value, int shiftCount) { - Unsafe.SkipInit(out Vector64 result); + if ((typeof(T) == typeof(byte)) + || (typeof(T) == typeof(ushort)) + || (typeof(T) == typeof(uint)) + || (typeof(T) == typeof(ulong)) + || (typeof(T) == typeof(nuint))) + { + return value >>> shiftCount; + } + else if (AdvSimd.IsSupported) + { + return ArmImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); - for (int index = 0; index < Count; index++) + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 value, int shiftCount) { - T element = Scalar.ShiftRightArithmetic(value.GetElementUnsafe(index), shiftCount); - result.SetElementUnsafe(index, element); + if (sizeof(T) == 1) + { + return AdvSimd.ShiftArithmetic(value.AsSByte(), Vector64.Create((sbyte)(-shiftCount & 0x7))).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.ShiftArithmetic(value.AsInt16(), Vector64.Create((short)(-shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.ShiftArithmetic(value.AsInt32(), Vector64.Create(-shiftCount & 0x1F)).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.ShiftArithmeticScalar(value.AsInt64(), Vector64.Create(-shiftCount & 0x3F)).As(); + } + return SoftwareImpl(value, shiftCount); } - return result; + static Vector64 SoftwareImpl(Vector64 left, int shiftCount) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Count; index++) + { + T value = Scalar.ShiftRightArithmetic(left.GetElementUnsafe(index), shiftCount); + result.SetElementUnsafe(index, value); + } + + return result; + } } /// Subtracts two vectors to compute their difference. @@ -384,15 +677,55 @@ public static Vector64 operator >>(Vector64 value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator -(Vector64 left, Vector64 right) { - Unsafe.SkipInit(out Vector64 result); + if (AdvSimd.IsSupported) + { + return ArmImpl(left, right); + } + return SoftwareImpl(left, right); - for (int index = 0; index < Count; index++) + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 left, Vector64 right) { - T value = Scalar.Subtract(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); - result.SetElementUnsafe(index, value); + if (typeof(T) == typeof(float)) + { + return AdvSimd.Subtract(left.AsSingle(), right.AsSingle()).As(); + } + else if (typeof(T) == typeof(double)) + { + return AdvSimd.SubtractScalar(left.AsDouble(), right.AsDouble()).As(); + } + else if (sizeof(T) == 1) + { + return AdvSimd.Subtract(left.AsByte(), right.AsByte()).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.Subtract(left.AsUInt16(), right.AsUInt16()).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.Subtract(left.AsUInt32(), right.AsUInt32()).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.SubtractScalar(left.AsUInt64(), right.AsUInt64()).As(); + } + return SoftwareImpl(left, right); } - return result; + static Vector64 SoftwareImpl(Vector64 left, Vector64 right) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Count; index++) + { + T value = Scalar.Subtract(left.GetElementUnsafe(index), right.GetElementUnsafe(index)); + result.SetElementUnsafe(index, value); + } + + return result; + } } /// Computes the unary negation of a vector. @@ -421,15 +754,47 @@ public static Vector64 operator >>(Vector64 value, int shiftCount) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 operator >>>(Vector64 value, int shiftCount) { - Unsafe.SkipInit(out Vector64 result); + if (AdvSimd.IsSupported) + { + return ArmImpl(value, shiftCount); + } + return SoftwareImpl(value, shiftCount); - for (int index = 0; index < Count; index++) + [CompExactlyDependsOn(typeof(AdvSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector64 ArmImpl(Vector64 value, int shiftCount) { - T element = Scalar.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount); - result.SetElementUnsafe(index, element); + if (sizeof(T) == 1) + { + return AdvSimd.ShiftLogical(value.AsSByte(), Vector64.Create((sbyte)(-shiftCount & 0x7))).As(); + } + else if (sizeof(T) == 2) + { + return AdvSimd.ShiftLogical(value.AsInt16(), Vector64.Create((short)(-shiftCount & 0xF))).As(); + } + else if (sizeof(T) == 4) + { + return AdvSimd.ShiftLogical(value.AsInt32(), Vector64.Create(-shiftCount & 0x1F)).As(); + } + else if (sizeof(T) == 8) + { + return AdvSimd.ShiftLogicalScalar(value.AsInt64(), Vector64.Create(-shiftCount & 0x3F)).As(); + } + return SoftwareImpl(value, shiftCount); } - return result; + static Vector64 SoftwareImpl(Vector64 left, int shiftCount) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Count; index++) + { + T value = Scalar.ShiftRightLogical(left.GetElementUnsafe(index), shiftCount); + result.SetElementUnsafe(index, value); + } + + return result; + } } /// Determines whether the specified object is equal to the current instance. @@ -531,16 +896,16 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector64 ISimdVector, T>.Abs(Vector64 vector) => Vector64.Abs(vector); /// - static Vector64 ISimdVector, T>.Add(Vector64 left, Vector64 right) => Vector64.Add(left, right); + static Vector64 ISimdVector, T>.Add(Vector64 left, Vector64 right) => left + right; /// static Vector64 ISimdVector, T>.AndNot(Vector64 left, Vector64 right) => Vector64.AndNot(left, right); /// - static Vector64 ISimdVector, T>.BitwiseAnd(Vector64 left, Vector64 right) => Vector64.BitwiseAnd(left, right); + static Vector64 ISimdVector, T>.BitwiseAnd(Vector64 left, Vector64 right) => left & right; /// - static Vector64 ISimdVector, T>.BitwiseOr(Vector64 left, Vector64 right) => Vector64.BitwiseOr(left, right); + static Vector64 ISimdVector, T>.BitwiseOr(Vector64 left, Vector64 right) => left | right; /// static Vector64 ISimdVector, T>.Ceiling(Vector64 vector) => Vector64.Ceiling(vector); @@ -576,10 +941,10 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector64 ISimdVector, T>.CreateScalarUnsafe(T value) => Vector64.CreateScalarUnsafe(value); /// - static Vector64 ISimdVector, T>.Divide(Vector64 left, Vector64 right) => Vector64.Divide(left, right); + static Vector64 ISimdVector, T>.Divide(Vector64 left, Vector64 right) => left / right; /// - static Vector64 ISimdVector, T>.Divide(Vector64 left, T right) => Vector64.Divide(left, right); + static Vector64 ISimdVector, T>.Divide(Vector64 left, T right) => left / right; /// static T ISimdVector, T>.Dot(Vector64 left, Vector64 right) => Vector64.Dot(left, right); @@ -588,7 +953,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector64 ISimdVector, T>.Equals(Vector64 left, Vector64 right) => Vector64.Equals(left, right); /// - static bool ISimdVector, T>.EqualsAll(Vector64 left, Vector64 right) => Vector64.EqualsAll(left, right); + static bool ISimdVector, T>.EqualsAll(Vector64 left, Vector64 right) => left == right; /// static bool ISimdVector, T>.EqualsAny(Vector64 left, Vector64 right) => Vector64.EqualsAny(left, right); @@ -597,7 +962,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector64 ISimdVector, T>.Floor(Vector64 vector) => Vector64.Floor(vector); /// - static T ISimdVector, T>.GetElement(Vector64 vector, int index) => Vector64.GetElement(vector, index); + static T ISimdVector, T>.GetElement(Vector64 vector, int index) => vector.GetElement(index); /// static Vector64 ISimdVector, T>.GreaterThan(Vector64 left, Vector64 right) => Vector64.GreaterThan(left, right); @@ -635,7 +1000,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri /// static bool ISimdVector, T>.LessThanOrEqualAny(Vector64 left, Vector64 right) => Vector64.LessThanOrEqualAny(left, right); -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// static Vector64 ISimdVector, T>.Load(T* source) => Vector64.Load(source); @@ -644,7 +1008,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri /// static Vector64 ISimdVector, T>.LoadAlignedNonTemporal(T* source) => Vector64.LoadAlignedNonTemporal(source); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// static Vector64 ISimdVector, T>.LoadUnsafe(ref readonly T source) => Vector64.LoadUnsafe(in source); @@ -659,63 +1022,61 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri static Vector64 ISimdVector, T>.Min(Vector64 left, Vector64 right) => Vector64.Min(left, right); /// - static Vector64 ISimdVector, T>.Multiply(Vector64 left, Vector64 right) => Vector64.Multiply(left, right); + static Vector64 ISimdVector, T>.Multiply(Vector64 left, Vector64 right) => left * right; /// - static Vector64 ISimdVector, T>.Multiply(Vector64 left, T right) => Vector64.Multiply(left, right); + static Vector64 ISimdVector, T>.Multiply(Vector64 left, T right) => left * right; /// - static Vector64 ISimdVector, T>.Negate(Vector64 vector) => Vector64.Negate(vector); + static Vector64 ISimdVector, T>.Negate(Vector64 vector) => -vector; /// - static Vector64 ISimdVector, T>.OnesComplement(Vector64 vector) => Vector64.OnesComplement(vector); + static Vector64 ISimdVector, T>.OnesComplement(Vector64 vector) => ~vector; /// - static Vector64 ISimdVector, T>.ShiftLeft(Vector64 vector, int shiftCount) => Vector64.ShiftLeft(vector, shiftCount); + static Vector64 ISimdVector, T>.ShiftLeft(Vector64 vector, int shiftCount) => vector << shiftCount; /// - static Vector64 ISimdVector, T>.ShiftRightArithmetic(Vector64 vector, int shiftCount) => Vector64.ShiftRightArithmetic(vector, shiftCount); + static Vector64 ISimdVector, T>.ShiftRightArithmetic(Vector64 vector, int shiftCount) => vector >> shiftCount; /// - static Vector64 ISimdVector, T>.ShiftRightLogical(Vector64 vector, int shiftCount) => Vector64.ShiftRightLogical(vector, shiftCount); + static Vector64 ISimdVector, T>.ShiftRightLogical(Vector64 vector, int shiftCount) => vector >>> shiftCount; /// static Vector64 ISimdVector, T>.Sqrt(Vector64 vector) => Vector64.Sqrt(vector); -#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') /// - static void ISimdVector, T>.Store(Vector64 source, T* destination) => Vector64.Store(source, destination); + static void ISimdVector, T>.Store(Vector64 source, T* destination) => source.Store(destination); /// - static void ISimdVector, T>.StoreAligned(Vector64 source, T* destination) => Vector64.StoreAligned(source, destination); + static void ISimdVector, T>.StoreAligned(Vector64 source, T* destination) => source.StoreAligned(destination); /// - static void ISimdVector, T>.StoreAlignedNonTemporal(Vector64 source, T* destination) => Vector64.StoreAlignedNonTemporal(source, destination); -#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T') + static void ISimdVector, T>.StoreAlignedNonTemporal(Vector64 source, T* destination) => source.StoreAlignedNonTemporal(destination); /// - static void ISimdVector, T>.StoreUnsafe(Vector64 vector, ref T destination) => Vector64.StoreUnsafe(vector, ref destination); + static void ISimdVector, T>.StoreUnsafe(Vector64 vector, ref T destination) => vector.StoreUnsafe(ref destination); /// - static void ISimdVector, T>.StoreUnsafe(Vector64 vector, ref T destination, nuint elementOffset) => Vector64.StoreUnsafe(vector, ref destination, elementOffset); + static void ISimdVector, T>.StoreUnsafe(Vector64 vector, ref T destination, nuint elementOffset) => vector.StoreUnsafe(ref destination, elementOffset); /// - static Vector64 ISimdVector, T>.Subtract(Vector64 left, Vector64 right) => Vector64.Subtract(left, right); + static Vector64 ISimdVector, T>.Subtract(Vector64 left, Vector64 right) => left - right; /// static T ISimdVector, T>.Sum(Vector64 vector) => Vector64.Sum(vector); /// - static T ISimdVector, T>.ToScalar(Vector64 vector) => Vector64.ToScalar(vector); + static T ISimdVector, T>.ToScalar(Vector64 vector) => vector.ToScalar(); /// - static bool ISimdVector, T>.TryCopyTo(Vector64 vector, Span destination) => Vector64.TryCopyTo(vector, destination); + static bool ISimdVector, T>.TryCopyTo(Vector64 vector, Span destination) => vector.TryCopyTo(destination); /// - static Vector64 ISimdVector, T>.WithElement(Vector64 vector, int index, T value) => Vector64.WithElement(vector, index, value); + static Vector64 ISimdVector, T>.WithElement(Vector64 vector, int index, T value) => vector.WithElement(index, value); /// - static Vector64 ISimdVector, T>.Xor(Vector64 left, Vector64 right) => Vector64.Xor(left, right); + static Vector64 ISimdVector, T>.Xor(Vector64 left, Vector64 right) => left ^ right; // // New Surface Area