From 881dfbe3d8ac25519ddf4438c3c5ae3d01ae0b7f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 7 Aug 2024 17:42:57 +0100 Subject: [PATCH 1/5] ARM64-SVE: Ensure MOVPRFX is next to SVE instruction in immediate jump tables --- src/coreclr/jit/codegen.h | 3 +- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 172 +++++++++++++++----- 2 files changed, 130 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 7b0c517aac9a15..252d5bab813631 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1010,7 +1010,7 @@ class CodeGen final : public CodeGenInterface class HWIntrinsicImmOpHelper final { public: - HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTree* immOp, GenTreeHWIntrinsic* intrin); + HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTree* immOp, GenTreeHWIntrinsic* intrin, int numInstrs = 1); HWIntrinsicImmOpHelper( CodeGen* codeGen, regNumber immReg, int immLowerBound, int immUpperBound, GenTreeHWIntrinsic* intrin); @@ -1058,6 +1058,7 @@ class CodeGen final : public CodeGenInterface int immUpperBound; regNumber nonConstImmReg; regNumber branchTargetReg; + int numInstrs; }; #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index bc7953f34eea97..3ee01933d0f247 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -23,6 +23,7 @@ // codeGen -- an instance of CodeGen class. // immOp -- an immediate operand of the intrinsic. // intrin -- a hardware intrinsic tree node. +// numInstrs -- number of instructions that will be in each switch entry. Default 1. // // Note: This class is designed to be used in the following way // HWIntrinsicImmOpHelper helper(this, immOp, intrin); @@ -35,11 +36,15 @@ // This allows to combine logic for cases when immOp->isContainedIntOrIImmed() is either true or false in a form // of a for-loop. // -CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTree* immOp, GenTreeHWIntrinsic* intrin) +CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(CodeGen* codeGen, + GenTree* immOp, + GenTreeHWIntrinsic* intrin, + int numInstrs) : codeGen(codeGen) , endLabel(nullptr) , nonZeroLabel(nullptr) , branchTargetReg(REG_NA) + , numInstrs(numInstrs) { assert(codeGen != nullptr); assert(varTypeIsIntegral(immOp)); @@ -132,6 +137,7 @@ CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper( , immUpperBound(immUpperBound) , nonConstImmReg(immReg) , branchTargetReg(REG_NA) + , numInstrs(1) { assert(codeGen != nullptr); @@ -181,18 +187,32 @@ void CodeGen::HWIntrinsicImmOpHelper::EmitBegin() } else { - // Here we assume that each case consists of one arm64 instruction followed by "b endLabel". + assert(numInstrs == 1 || numInstrs == 2); + + // Here we assume that each case consists of numInstrs arm64 instructions followed by "b endLabel". // Since an arm64 instruction is 4 bytes, we branch to AddressOf(beginLabel) + (nonConstImmReg << 3). GetEmitter()->emitIns_R_L(INS_adr, EA_8BYTE, beginLabel, branchTargetReg); GetEmitter()->emitIns_R_R_R_I(INS_add, EA_8BYTE, branchTargetReg, branchTargetReg, nonConstImmReg, 3, INS_OPTS_LSL); + // For two instructions, add the extra one. + if (numInstrs == 2) + { + GetEmitter()->emitIns_R_R_R_I(INS_add, EA_8BYTE, branchTargetReg, branchTargetReg, nonConstImmReg, 2, + INS_OPTS_LSL); + } + // If the lower bound is non zero we need to adjust the branch target value by subtracting - // (immLowerBound << 3). + // the lower bound if (immLowerBound != 0) { - GetEmitter()->emitIns_R_R_I(INS_sub, EA_8BYTE, branchTargetReg, branchTargetReg, - ((ssize_t)immLowerBound << 3)); + ssize_t lowerReduce = ((ssize_t)immLowerBound << 3); + if (numInstrs == 2) + { + lowerReduce += ((ssize_t)immLowerBound << 2); + } + + GetEmitter()->emitIns_R_R_I(INS_sub, EA_8BYTE, branchTargetReg, branchTargetReg, lowerReduce); } GetEmitter()->emitIns_R(INS_br, EA_8BYTE, branchTargetReg); @@ -516,6 +536,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } // Shared code for setting up embedded mask arg for intrinsics with 3+ operands + + auto emitEmbeddedMaskSetupInstrs = [&] { + if (intrin.op3->IsVectorZero() || (targetReg != falseReg) || (targetReg != embMaskOp1Reg)) + { + return 1; + } + return 0; + }; + auto emitEmbeddedMaskSetup = [&] { if (intrin.op3->IsVectorZero()) { @@ -736,12 +765,28 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) default: assert(targetReg != embMaskOp2Reg); - GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, - embMaskOp1Reg, opt); // Finally, perform the actual "predicated" operation so that `targetReg` is the first // operand and `embMaskOp2Reg` is the second operand. - emitInsHelper(targetReg, maskReg, embMaskOp2Reg); + if (hasShift) + { + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, + embMaskOp1Reg, opt); + GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, targetReg, maskReg, + helper.ImmValue(), embOpt, sopt); + } + } + else + { + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, + embMaskOp1Reg, opt); + GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embOpt, sopt); + } + break; } } @@ -769,26 +814,57 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(targetReg != embMaskOp2Reg); assert(HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinEmbMask.id)); - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg); - - emitInsHelper(targetReg, maskReg, embMaskOp2Reg); + if (hasShift) + { + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, + embMaskOp1Reg); + GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, targetReg, maskReg, + helper.ImmValue(), embOpt, sopt); + } + } + else + { + GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg); + GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embOpt, sopt); + } } GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, falseReg, opt); - break; } else if (targetReg != embMaskOp1Reg) { // embMaskOp1Reg is same as `falseReg`, but not same as `targetReg`. Move the // `embMaskOp1Reg` i.e. `falseReg` in `targetReg`, using "unpredicated movprfx", so the // subsequent `insEmbMask` operation can be merged on top of it. - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg); - } - // Finally, perform the actual "predicated" operation so that `targetReg` is the first operand - // and `embMaskOp2Reg` is the second operand. - emitInsHelper(targetReg, maskReg, embMaskOp2Reg); + if (hasShift) + { + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg); + GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, targetReg, maskReg, + helper.ImmValue(), embOpt, sopt); + } + } + else + { + GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg); + GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embOpt, sopt); + } + } + else + { + // Finally, perform the actual "predicated" operation so that `targetReg` is the first + // operand and `embMaskOp2Reg` is the second operand. + emitInsHelper(targetReg, maskReg, embMaskOp2Reg); + } } else { @@ -904,14 +980,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } } - emitEmbeddedMaskSetup(); - // Finally, perform the desired operation. if (HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id)) { - HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic()); + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic(), + emitEmbeddedMaskSetupInstrs() + 1); for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { + emitEmbeddedMaskSetup(); GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, helper.ImmValue(), opt); } @@ -919,6 +995,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) else { assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)); + emitEmbeddedMaskSetup(); GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, embMaskOp3Reg, opt); } @@ -932,11 +1009,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(intrinEmbMask.op4->isContained() == (embMaskOp4Reg == REG_NA)); assert(HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id)); - emitEmbeddedMaskSetup(); - - HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic()); + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic(), + emitEmbeddedMaskSetupInstrs() + 1); for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { + emitEmbeddedMaskSetup(); GetEmitter()->emitInsSve_R_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, embMaskOp3Reg, helper.ImmValue(), opt); } @@ -2328,17 +2405,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(isRMW); - if (targetReg != op1Reg) - { - assert(targetReg != op2Reg); - - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg); - } - HWIntrinsicImmOpHelper helper(this, intrin.op3, node); for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { + if (targetReg != op1Reg) + { + assert(targetReg != op2Reg); + + GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg); + } + const int elementIndex = helper.ImmValue(); const int byteIndex = genTypeSize(intrin.baseType) * elementIndex; @@ -2460,17 +2537,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(isRMW); - if (targetReg != op1Reg) - { - assert(targetReg != op2Reg); - - GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg); - } - HWIntrinsicImmOpHelper helper(this, intrin.op3, node); for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { + if (targetReg != op1Reg) + { + assert(targetReg != op2Reg); + + GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg); + } + GetEmitter()->emitInsSve_R_R_I(ins, emitSize, targetReg, op2Reg, helper.ImmValue(), opt); } break; @@ -2481,16 +2558,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(isRMW); assert(hasImmediateOperand); - if (targetReg != op1Reg) - { - assert(targetReg != op2Reg); - assert(targetReg != op3Reg); - GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg); - } - // If both immediates are constant, we don't need a jump table if (intrin.op4->IsCnsIntOrI() && intrin.op5->IsCnsIntOrI()) { + if (targetReg != op1Reg) + { + assert(targetReg != op2Reg); + assert(targetReg != op3Reg); + GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg); + } + assert(intrin.op4->isContainedIntOrIImmed() && intrin.op5->isContainedIntOrIImmed()); GetEmitter()->emitInsSve_R_R_R_I_I(ins, emitSize, targetReg, op2Reg, op3Reg, intrin.op4->AsIntCon()->gtIconVal, @@ -2514,6 +2591,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) HWIntrinsicImmOpHelper helper(this, op4Reg, 0, 7, node); for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { + if (targetReg != op1Reg) + { + assert(targetReg != op2Reg); + assert(targetReg != op3Reg); + GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg); + } + // Extract index and rotation from the immediate const int value = helper.ImmValue(); const ssize_t index = value & 1; From 0178a14344c5383412b0aa7df9e604ea7fe6f9cd Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 9 Aug 2024 08:11:48 +0100 Subject: [PATCH 2/5] Add emitInsMovPrfxHelper --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 77 ++++++--------------- 1 file changed, 23 insertions(+), 54 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 3ee01933d0f247..940a4a5dd42d8f 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -749,6 +749,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } }; + + auto emitInsMovPrfxHelper = [&](regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4) { + if (hasShift) + { + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, reg1, + reg3); + GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, reg1, reg2, + helper.ImmValue(), embOpt, sopt); + } + } + else + { + GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, reg1, reg3); + GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, reg1, reg2, reg4, embOpt, sopt); + } + }; + if (intrin.op3->IsVectorZero()) { // If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the @@ -768,25 +788,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // Finally, perform the actual "predicated" operation so that `targetReg` is the first // operand and `embMaskOp2Reg` is the second operand. - if (hasShift) - { - HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); - for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) - { - GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, - embMaskOp1Reg, opt); - GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, targetReg, maskReg, - helper.ImmValue(), embOpt, sopt); - } - } - else - { - GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, - embMaskOp1Reg, opt); - GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, - embOpt, sopt); - } - + emitInsMovPrfxHelper(targetReg, maskReg, embMaskOp1Reg, embMaskOp2Reg); break; } } @@ -810,27 +812,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // into targetReg. Next, do the predicated operation on the targetReg and last, // use "sel" to select the active lanes based on mask, and set inactive lanes // to falseReg. - assert(targetReg != embMaskOp2Reg); assert(HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinEmbMask.id)); - if (hasShift) - { - HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); - for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) - { - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, - embMaskOp1Reg); - GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, targetReg, maskReg, - helper.ImmValue(), embOpt, sopt); - } - } - else - { - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg); - GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, - embOpt, sopt); - } + emitInsMovPrfxHelper(targetReg, maskReg, embMaskOp1Reg, embMaskOp2Reg); } GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, @@ -841,23 +826,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // embMaskOp1Reg is same as `falseReg`, but not same as `targetReg`. Move the // `embMaskOp1Reg` i.e. `falseReg` in `targetReg`, using "unpredicated movprfx", so the // subsequent `insEmbMask` operation can be merged on top of it. - - if (hasShift) - { - HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); - for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) - { - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg); - GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, targetReg, maskReg, - helper.ImmValue(), embOpt, sopt); - } - } - else - { - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, falseReg); - GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, - embOpt, sopt); - } + emitInsMovPrfxHelper(targetReg, maskReg, falseReg, embMaskOp2Reg); } else { From 9de4da2f2862deb0be004f7930cfb4b02c4a666d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 9 Aug 2024 08:15:26 +0100 Subject: [PATCH 3/5] Fix formatting --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 940a4a5dd42d8f..eb6127184ba260 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -749,17 +749,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } }; - auto emitInsMovPrfxHelper = [&](regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4) { if (hasShift) { HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, reg1, - reg3); - GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, reg1, reg2, - helper.ImmValue(), embOpt, sopt); + GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, reg1, reg3); + GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, reg1, reg2, helper.ImmValue(), + embOpt, sopt); } } else From 0edb2d54997e19a03f2b7e34e7bc925b0a4c6666 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 9 Aug 2024 17:22:45 +0100 Subject: [PATCH 4/5] Restore a predicated movprfx use --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index eb6127184ba260..30b2f521fd995c 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -786,7 +786,24 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // Finally, perform the actual "predicated" operation so that `targetReg` is the first // operand and `embMaskOp2Reg` is the second operand. - emitInsMovPrfxHelper(targetReg, maskReg, embMaskOp1Reg, embMaskOp2Reg); + if (hasShift) + { + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, + embMaskOp1Reg, opt); + GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, targetReg, maskReg, + helper.ImmValue(), embOpt, sopt); + } + } + else + { + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, + embMaskOp1Reg, opt); + GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embOpt, sopt); + } break; } } From 2372c13e4f5cfa062a36b2bd78d5aa4c20d8b9a9 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 12 Aug 2024 14:25:33 +0100 Subject: [PATCH 5/5] Fix use of predicated movprfx --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 24 ++++----------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 5555b1a3c74f63..4e06ba49a30964 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -756,14 +756,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, reg1, reg3); + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, EA_SCALABLE, reg1, reg2, reg3, opt); GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, reg1, reg2, helper.ImmValue(), embOpt, sopt); } } else { - GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, reg1, reg3); + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, EA_SCALABLE, reg1, reg2, reg3, opt); GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, reg1, reg2, reg4, embOpt, sopt); } }; @@ -789,24 +789,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // Finally, perform the actual "predicated" operation so that `targetReg` is the first // operand and `embMaskOp2Reg` is the second operand. - if (hasShift) - { - HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic(), 2); - for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) - { - GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, - embMaskOp1Reg, opt); - GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, targetReg, maskReg, - helper.ImmValue(), embOpt, sopt); - } - } - else - { - GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, - embMaskOp1Reg, opt); - GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, - embOpt, sopt); - } + + emitInsMovPrfxHelper(targetReg, maskReg, embMaskOp1Reg, embMaskOp2Reg); break; } }