diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 30abcb301f6b4..8b43c8f72354e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -17321,32 +17321,48 @@ bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp) // true if node represents a constant; otherwise, false bool GenTreeVecCon::IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val) { - var_types simdType = node->TypeGet(); - var_types simdBaseType = node->GetSimdBaseType(); - unsigned simdSize = node->GetSimdSize(); + NamedIntrinsic intrinsic = node->GetHWIntrinsicId(); + var_types simdType = node->TypeGet(); + var_types simdBaseType = node->GetSimdBaseType(); + unsigned simdSize = node->GetSimdSize(); size_t argCnt = node->GetOperandCount(); size_t cnsArgCnt = 0; - switch (node->GetHWIntrinsicId()) + switch (intrinsic) { case NI_Vector128_Create: + case NI_Vector128_CreateScalar: case NI_Vector128_CreateScalarUnsafe: #if defined(TARGET_XARCH) case NI_Vector256_Create: + case NI_Vector256_CreateScalar: case NI_Vector256_CreateScalarUnsafe: #elif defined(TARGET_ARM64) case NI_Vector64_Create: + case NI_Vector64_CreateScalar: case NI_Vector64_CreateScalarUnsafe: #endif { + // Zero out the simd32Val + simd32Val = {}; + // These intrinsics are meant to set the same value to every element. if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, simd32Val, simdBaseType)) { - // Now assign the rest of the arguments. - for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) +// CreateScalar leaves the upper bits as zero + +#if defined(TARGET_XARCH) + if ((intrinsic != NI_Vector128_CreateScalar) && (intrinsic != NI_Vector256_CreateScalar)) +#elif defined(TARGET_ARM64) + if ((intrinsic != NI_Vector64_CreateScalar) && (intrinsic != NI_Vector128_CreateScalar)) +#endif { - HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType); + // Now assign the rest of the arguments. + for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) + { + HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType); + } } cnsArgCnt = 1; @@ -18933,6 +18949,13 @@ bool GenTree::isContainableHWIntrinsic() const return true; } + case NI_Vector128_get_Zero: + case NI_Vector256_get_Zero: + { + // These HWIntrinsic operations are contained as part of Sse41.Insert + return true; + } + default: { return false; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 7b966f0a56dba..e4653df3e6d6b 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -545,6 +545,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector64_CreateScalar: case NI_Vector64_CreateScalarUnsafe: { if (genTypeSize(simdBaseType) == 8) @@ -556,12 +557,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector64_Create: case NI_Vector128_Create: + case NI_Vector128_CreateScalar: case NI_Vector128_CreateScalarUnsafe: { uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); assert((sig->numArgs == 1) || (sig->numArgs == simdLength)); - bool isConstant = true; + bool isConstant = true; + bool isCreateScalar = (intrinsic == NI_Vector64_CreateScalar) || (intrinsic == NI_Vector128_CreateScalar); if (varTypeIsFloating(simdBaseType)) { @@ -620,7 +623,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd16Val.u8[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.u8[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < simdLength - 1; index++) { @@ -641,7 +649,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd16Val.u16[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.u16[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < (simdLength - 1); index++) { @@ -662,7 +675,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd16Val.u32[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.u32[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < (simdLength - 1); index++) { @@ -683,7 +701,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd16Val.u64[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.u64[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < (simdLength - 1); index++) { @@ -703,7 +726,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd16Val.f32[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.f32[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < (simdLength - 1); index++) { @@ -723,7 +751,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd16Val.f64[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.f64[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < (simdLength - 1); index++) { diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 8bd12f54d8d1d..b2b08c8d828c8 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -596,6 +596,13 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, regNumber op1Reg = op1->GetRegNum(); + if ((ins == INS_insertps) && (op1Reg == REG_NA)) + { + // insertps is special and can contain op1 when it is zero + assert(op1->isContained() && op1->IsVectorZero()); + op1Reg = targetReg; + } + assert(targetReg != REG_NA); assert(op1Reg != REG_NA); diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 92c1c0848b6fc..cbe956299d166 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -39,7 +39,8 @@ HARDWARE_INTRINSIC(Vector64, ConvertToInt64, HARDWARE_INTRINSIC(Vector64, ConvertToSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, ConvertToUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, ConvertToUInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector64, CreateScalar, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(Vector64, Divide, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, Dot, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) @@ -143,6 +144,7 @@ HARDWARE_INTRINSIC(Vector128, ConvertToSingle, HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 37738201789ab..cb2610315eca2 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -57,6 +57,7 @@ HARDWARE_INTRINSIC(Vector128, ConvertToSingle, HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) @@ -158,6 +159,7 @@ HARDWARE_INTRINSIC(Vector256, ConvertToSingle, HARDWARE_INTRINSIC(Vector256, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, ConvertToUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, Divide, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 04bab108a2e5a..8eea6cb6c1af2 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -899,13 +899,16 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_Create: case NI_Vector256_Create: + case NI_Vector128_CreateScalar: + case NI_Vector256_CreateScalar: case NI_Vector128_CreateScalarUnsafe: case NI_Vector256_CreateScalarUnsafe: { uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); assert((sig->numArgs == 1) || (sig->numArgs == simdLength)); - bool isConstant = true; + bool isConstant = true; + bool isCreateScalar = (intrinsic == NI_Vector128_CreateScalar) || (intrinsic == NI_Vector256_CreateScalar); if (varTypeIsFloating(simdBaseType)) { @@ -964,7 +967,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd32Val.u8[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.u8[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < simdLength - 1; index++) { @@ -985,7 +993,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd32Val.u16[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.u16[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < (simdLength - 1); index++) { @@ -1006,7 +1019,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd32Val.u32[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.u32[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < (simdLength - 1); index++) { @@ -1027,7 +1045,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd32Val.u64[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.u64[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < (simdLength - 1); index++) { @@ -1047,7 +1070,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd32Val.f32[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.f32[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < (simdLength - 1); index++) { @@ -1067,7 +1095,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, vecCon->gtSimd32Val.f64[simdLength - 1 - index] = cnsVal; } - if (sig->numArgs == 1) + if (isCreateScalar) + { + vecCon->gtSimd32Val = {}; + vecCon->gtSimd32Val.f64[0] = cnsVal; + } + else if (sig->numArgs == 1) { for (uint32_t index = 0; index < (simdLength - 1); index++) { diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 1401de10b4705..7f988dabad669 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1100,6 +1100,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { case NI_Vector64_Create: case NI_Vector128_Create: + case NI_Vector64_CreateScalar: + case NI_Vector128_CreateScalar: { // We don't directly support the Vector64.Create or Vector128.Create methods in codegen // and instead lower them to other intrinsic nodes in LowerHWIntrinsicCreate so we expect @@ -1439,8 +1441,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); - bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val); - size_t argCnt = node->GetOperandCount(); + bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val); + bool isCreateScalar = (intrinsicId == NI_Vector64_CreateScalar) || (intrinsicId == NI_Vector128_CreateScalar); + size_t argCnt = node->GetOperandCount(); // Check if we have a cast that we can remove. Note that "IsValidConstForMovImm" // will reset Op(1) if it finds such a cast, so we do not need to handle it here. @@ -1482,6 +1485,22 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } else if (argCnt == 1) { + if (isCreateScalar) + { + GenTree* op1 = node->Op(1); + + GenTree* tmp = comp->gtNewZeroConNode(simdType); + BlockRange().InsertBefore(op1, tmp); + LowerNode(tmp); + + GenTree* idx = comp->gtNewIconNode(0); + BlockRange().InsertAfter(tmp, idx); + LowerNode(idx); + + node->ResetHWIntrinsicId(NI_AdvSimd_Insert, comp, tmp, idx, op1); + return LowerNode(node); + } + // We have the following (where simd is simd8 or simd16): // /--* op1 T // node = * HWINTRINSIC simd T Create diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 44d6781d081fc..40ccaebec0f93 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1023,6 +1023,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Vector128_Create: case NI_Vector256_Create: + case NI_Vector128_CreateScalar: + case NI_Vector256_CreateScalar: { // We don't directly support the Vector128.Create or Vector256.Create methods in codegen // and instead lower them to other intrinsic nodes in LowerHWIntrinsicCreate so we expect @@ -1705,8 +1707,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) GenTree* tmp2 = nullptr; GenTree* tmp3 = nullptr; - bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val); - size_t argCnt = node->GetOperandCount(); + bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val); + bool isCreateScalar = (intrinsicId == NI_Vector128_CreateScalar) || (intrinsicId == NI_Vector256_CreateScalar); + size_t argCnt = node->GetOperandCount(); if (isConstant) { @@ -1745,6 +1748,156 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } else if (argCnt == 1) { + if (isCreateScalar) + { + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + // Types need to be explicitly zero-extended to ensure upper-bits are zero + // + // We need to explicitly use TYP_UBYTE since unsigned is ignored for small types + // Explicitly handle both BYTE and UBYTE to account for reinterpret casts and the like + // + // The from type is INT since that is the input type tracked by IR, where-as the target + // type needs to be UBYTE so it implicitly zero-extends back to TYP_INT + + tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* unsigned */ true, TYP_UBYTE); + BlockRange().InsertAfter(op1, tmp1); + LowerNode(tmp1); + + node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32, tmp1); + node->SetSimdBaseJitType(CORINFO_TYPE_INT); + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + // Types need to be explicitly zero-extended to ensure upper-bits are zero + // + // We need to explicitly use TYP_USHORT since unsigned is ignored for small types + // Explicitly handle both SHORT and USHORT to account for reinterpret casts and the like + // + // The from type is INT since that is the input type tracked by IR, where-as the target + // type needs to be USHORT so it implicitly zero-extends back to TYP_INT + + tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* unsigned */ true, TYP_USHORT); + BlockRange().InsertAfter(op1, tmp1); + LowerNode(tmp1); + + node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32, tmp1); + node->SetSimdBaseJitType(CORINFO_TYPE_INT); + break; + } + + case TYP_INT: + { + node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32); + break; + } + + case TYP_UINT: + { + node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128UInt32); + break; + } + +#if defined(TARGET_AMD64) + case TYP_LONG: + { + node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertScalarToVector128Int64); + break; + } + + case TYP_ULONG: + { + node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertScalarToVector128UInt64); + break; + } +#endif // TARGET_AMD64 + + case TYP_FLOAT: + { + tmp1 = comp->gtNewZeroConNode(simdType); + BlockRange().InsertBefore(op1, tmp1); + LowerNode(tmp1); + + if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + // Sse41.Insert has: + // * Bits 0-3: zmask + // * Bits 4-5: count_d + // * Bits 6-7: count_s (register form only) + // + // We want zmask 0b1110 (0xE) to zero elements 1/2/3 + // We want count_d 0b00 (0x0) to insert the value to element 0 + // We want count_s 0b00 (0x0) as we're just taking element 0 of the source + + idx = comp->gtNewIconNode(0x0E); + BlockRange().InsertAfter(op1, idx); + LowerNode(idx); + + node->ResetHWIntrinsicId(NI_SSE41_Insert, comp, tmp1, op1, idx); + } + else + { + node->ResetHWIntrinsicId(NI_SSE_MoveScalar, comp, tmp1, op1); + } + break; + } + + case TYP_DOUBLE: + { + tmp1 = comp->gtNewZeroConNode(simdType); + BlockRange().InsertBefore(op1, tmp1); + LowerNode(tmp1); + + node->ResetHWIntrinsicId(NI_SSE2_MoveScalar, comp, tmp1, op1); + break; + } + + default: + { + unreached(); + } + } + + if (simdSize == 32) + { + // We're creating a Vector256 scalar so we need to treat the original op as Vector128, + // we need to unsafely extend up to Vector256 (which is actually safe since the 128-bit + // op will zero extend up to 256-bits), and then we need to replace the original use + // with the new TYP_SIMD32 node. + + node->ChangeType(TYP_SIMD16); + node->SetSimdSize(16); + LowerNode(node); + + tmp2 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node, NI_Vector128_ToVector256Unsafe, simdBaseJitType, + 16); + + LIR::Use use; + bool foundUse = BlockRange().TryGetUse(node, &use); + BlockRange().InsertAfter(node, tmp2); + + if (foundUse) + { + use.ReplaceWith(tmp2); + } + else + { + node->ClearUnusedValue(); + tmp2->SetUnusedValue(); + } + + node = tmp2->AsHWIntrinsic(); + } + + return LowerNode(node); + } + // We have the following (where simd is simd16 or simd32): // /--* op1 T // node = * HWINTRINSIC simd T Create @@ -1822,6 +1975,8 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) return LowerNode(node); } + assert(intrinsicId == NI_Vector128_Create); + // We will be constructing the following parts: // /--* op1 T // tmp1 = * HWINTRINSIC simd16 T CreateScalarUnsafe @@ -6940,7 +7095,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_SSSE3_AlignRight: case NI_SSE41_Blend: case NI_SSE41_DotProduct: - case NI_SSE41_Insert: case NI_SSE41_X64_Insert: case NI_SSE41_MultipleSumAbsoluteDifferences: case NI_AVX_Blend: @@ -6968,6 +7122,53 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_SSE41_Insert: + { + GenTree* lastOp = node->Op(numArgs); + + if ((simdBaseType == TYP_FLOAT) && lastOp->IsCnsIntOrI()) + { + // Sse41.Insert has: + // * Bits 0-3: zmask + // * Bits 4-5: count_d + // * Bits 6-7: count_s (register form only) + // + // Where zmask specifies which elements to zero + // Where count_d specifies the destination index the value is being inserted to + // Where count_s specifies the source index of the value being inserted + + ssize_t ival = lastOp->AsIntConCommon()->IconValue(); + + ssize_t zmask = (ival & 0x0F); + ssize_t count_d = (ival & 0x30) >> 4; + ssize_t count_s = (ival & 0xC0) >> 6; + + if (op1->IsVectorZero()) + { + // When op1 is zero, we can contain op1 and modify the mask + // to zero everything except for the element we're inserting to + + MakeSrcContained(node, op1); + + zmask |= ~(1 << count_d); + zmask &= 0x0F; + + ival = (count_s << 6) | (count_d << 4) | (zmask); + lastOp->AsIntConCommon()->SetIconValue(ival); + } + } + + if (TryGetContainableHWIntrinsicOp(node, &op2, &supportsRegOptional)) + { + MakeSrcContained(node, op2); + } + else if (supportsRegOptional) + { + op2->SetRegOptional(); + } + break; + } + default: { assert(!"Unhandled containment for ternary hardware intrinsic with immediate operand"); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 98d9964570942..a1d433731dffe 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -1156,6 +1156,7 @@ public static Vector128 Create(Vector64 lower, Vector64 upper) /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. /// The type of () is not supported. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 CreateScalar(T value) where T : struct => Vector64.CreateScalar(value).ToVector128(); @@ -1163,241 +1164,91 @@ public static unsafe Vector128 CreateScalar(T value) /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(byte value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector128.Zero, 0, value); - } - else if (Sse2.IsSupported) - { - // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we call - // the UInt32 overload to ensure zero extension. We can then just treat the result as byte and return. - return Sse2.ConvertScalarToVector128UInt32(value).AsByte(); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector128 CreateScalar(byte value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(double value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector128.Zero, 0, value); - } - else if (Sse2.IsSupported) - { - return Sse2.MoveScalar(Vector128.Zero, CreateScalarUnsafe(value)); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector128 CreateScalar(double value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(short value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector128.Zero, 0, value); - } - else if (Sse2.IsSupported) - { - // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we cast - // to ushort and call the UInt32 overload to ensure zero extension. We can then just treat the result as short and return. - return Sse2.ConvertScalarToVector128UInt32((ushort)(value)).AsInt16(); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector128 CreateScalar(short value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(int value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector128.Zero, 0, value); - } - else if (Sse2.IsSupported) - { - return Sse2.ConvertScalarToVector128Int32(value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector128 CreateScalar(int value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(long value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector128.Zero, 0, value); - } - else if (Sse2.X64.IsSupported) - { - return Sse2.X64.ConvertScalarToVector128Int64(value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector128 CreateScalar(long value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(nint value) - { -#if TARGET_64BIT - return CreateScalar((long)(value)).AsNInt(); -#else - return CreateScalar((int)(value)).AsNInt(); -#endif - } + public static unsafe Vector128 CreateScalar(nint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(nuint value) - { -#if TARGET_64BIT - return CreateScalar((ulong)(value)).AsNUInt(); -#else - return CreateScalar((uint)(value)).AsNUInt(); -#endif - } + public static unsafe Vector128 CreateScalar(nuint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(sbyte value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector128.Zero, 0, value); - } - else if (Sse2.IsSupported) - { - // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we cast - // to byte and call the UInt32 overload to ensure zero extension. We can then just treat the result as sbyte and return. - return Sse2.ConvertScalarToVector128UInt32((byte)(value)).AsSByte(); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector128 CreateScalar(sbyte value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(float value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector128.Zero, 0, value); - } - else if (Sse.IsSupported) - { - return Sse.MoveScalar(Vector128.Zero, CreateScalarUnsafe(value)); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector128 CreateScalar(float value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(ushort value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector128.Zero, 0, value); - } - else if (Sse2.IsSupported) - { - // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we call - // the UInt32 overload to ensure zero extension. We can then just treat the result as ushort and return. - return Sse2.ConvertScalarToVector128UInt32(value).AsUInt16(); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector128 CreateScalar(ushort value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(uint value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector128.Zero, 0, value); - } - else if (Sse2.IsSupported) - { - return Sse2.ConvertScalarToVector128UInt32(value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector128 CreateScalar(uint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector128 CreateScalar(ulong value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector128.Zero, 0, value); - } - else if (Sse2.X64.IsSupported) - { - return Sse2.X64.ConvertScalarToVector128UInt64(value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector128 CreateScalar(ulong value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. /// The type of the elements in the vector. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 8a6f11b1015b2..552236ddc4e35 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1151,6 +1151,7 @@ public static Vector256 Create(Vector128 lower, Vector128 upper) /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. /// The type of () is not supported. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(T value) where T : struct => Vector128.CreateScalar(value).ToVector256(); @@ -1158,42 +1159,49 @@ public static Vector256 CreateScalar(T value) /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(byte value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(double value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(short value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(int value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(long value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(nint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(nuint value) => CreateScalar(value); @@ -1201,6 +1209,7 @@ public static Vector256 CreateScalar(T value) /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(sbyte value) => CreateScalar(value); @@ -1208,12 +1217,14 @@ public static Vector256 CreateScalar(T value) /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(float value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(ushort value) => CreateScalar(value); @@ -1221,6 +1232,7 @@ public static Vector256 CreateScalar(T value) /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(uint value) => CreateScalar(value); @@ -1228,6 +1240,7 @@ public static Vector256 CreateScalar(T value) /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateScalar(ulong value) => CreateScalar(value); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index dad208aeb2999..6a897cd90364a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -859,6 +859,7 @@ public static unsafe Vector64 Create(uint e0, uint e1) /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. /// The type of () is not supported. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector64 CreateScalar(T value) where T : struct @@ -871,160 +872,88 @@ public static unsafe Vector64 CreateScalar(T value) /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 CreateScalar(byte value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector64.Zero, 0, value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector64 CreateScalar(byte value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector64 CreateScalar(double value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 CreateScalar(short value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector64.Zero, 0, value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector64 CreateScalar(short value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 CreateScalar(int value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector64.Zero, 0, value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector64 CreateScalar(int value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector64 CreateScalar(long value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 CreateScalar(nint value) - { -#if TARGET_64BIT - return CreateScalar((long)(value)).AsNInt(); -#else - return CreateScalar((int)(value)).AsNInt(); -#endif - } + public static unsafe Vector64 CreateScalar(nint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 CreateScalar(nuint value) - { -#if TARGET_64BIT - return CreateScalar((ulong)(value)).AsNUInt(); -#else - return CreateScalar((uint)(value)).AsNUInt(); -#endif - } + public static unsafe Vector64 CreateScalar(nuint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 CreateScalar(sbyte value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector64.Zero, 0, value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector64 CreateScalar(sbyte value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 CreateScalar(float value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector64.Zero, 0, value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector64 CreateScalar(float value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 CreateScalar(ushort value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector64.Zero, 0, value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector64 CreateScalar(ushort value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector64 CreateScalar(uint value) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.Insert(Vector64.Zero, 0, value); - } - else - { - return CreateScalar(value); - } - } + public static unsafe Vector64 CreateScalar(uint value) => CreateScalar(value); /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [Intrinsic] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector64 CreateScalar(ulong value) => CreateScalar(value);