diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index ca38c26ab7c845..f109f0b2ef1d89 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -2010,10 +2010,19 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Sve_ConvertVectorToMask: + { // PMOV would be ideal here, but it is in SVE2.1. - // Instead, use a compare: CMPNE ., /Z, ., #0 - GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt); + // + // Instead, to test if lowest bit is set, we LSL elementWidthInBits - 1 + // and then compare: CMPNE ., /Z, ., #0 + + int elementWidthInBits = 8 * genTypeSize(intrin.baseType); + regNumber ztemp = internalRegisters.GetSingle(node); + + GetEmitter()->emitIns_R_R_I(INS_sve_lsl, emitSize, ztemp, op2Reg, elementWidthInBits - 1, opt); + GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, ztemp, 0, opt); break; + } case NI_Sve_Count16BitElements: case NI_Sve_Count32BitElements: diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 7cc1a231391c71..a0aff9a0900666 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1400,6 +1400,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou delayFreeOp = getDelayFreeOperand(embeddedOp, /* embedded */ true); } + if (intrin.id == NI_Sve_ConvertVectorToMask) + { + // Need an extra temp to test LSB of source vector + buildInternalFloatRegisterDefForNode(intrinsicTree); + } + // Build any immediates BuildHWIntrinsicImmediate(intrinsicTree, intrin); diff --git a/src/coreclr/jit/optimizemaskconversions.cpp b/src/coreclr/jit/optimizemaskconversions.cpp index b328e884637e79..131a52a8e34db9 100644 --- a/src/coreclr/jit/optimizemaskconversions.cpp +++ b/src/coreclr/jit/optimizemaskconversions.cpp @@ -20,8 +20,8 @@ struct MaskConversionsWeight static constexpr const weight_t costOfConvertMaskToVector = 1.0; #if defined(TARGET_ARM64) - // Conversion of vector to mask is two instructions. - static constexpr const weight_t costOfConvertVectorToMask = 2.0; + // Conversion of vector to mask is three instructions. + static constexpr const weight_t costOfConvertVectorToMask = 3.0; #else // Conversion of vector to mask is one instructions. static constexpr const weight_t costOfConvertVectorToMask = 1.0;