Skip to content

Commit

Permalink
JIT: Build and consume FMA and Permute node operands in standard order (
Browse files Browse the repository at this point in the history
#103100)

The building and consumption of these operands can happen in op1, op2,
op3 order regardless of whether the codegen uses the registers in a
different order.

Fix #102773
  • Loading branch information
jakobbotsch authored Jun 6, 2024
1 parent 3178a5f commit f6a7ebb
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 29 deletions.
22 changes: 2 additions & 20 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3071,6 +3071,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)

regNumber targetReg = node->GetRegNum();

genConsumeMultiOpOperands(node);

regNumber op1NodeReg = op1->GetRegNum();
regNumber op2NodeReg = op2->GetRegNum();
regNumber op3NodeReg = op3->GetRegNum();
Expand All @@ -3084,11 +3086,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
// Intrinsics with CopyUpperBits semantics cannot have op1 be contained
assert(!copiesUpperBits || !op1->isContained());

// We need to keep this in sync with lsraxarch.cpp
// Ideally we'd actually swap the operands in lsra and simplify codegen
// but its a bit more complicated to do so for many operands as well
// as being complicated to tell codegen how to pick the right instruction

instruction ins = INS_invalid;

if (op1->isContained() || op1->isUsedFromSpillTemp())
Expand Down Expand Up @@ -3159,21 +3156,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
}
}

#ifdef DEBUG
// Use nums are assigned in LIR order but this node is special and doesn't
// actually use operands. Fix up the use nums here to avoid asserts.
unsigned useNum1 = op1->gtUseNum;
unsigned useNum2 = op2->gtUseNum;
unsigned useNum3 = op3->gtUseNum;
emitOp1->gtUseNum = useNum1;
emitOp2->gtUseNum = useNum2;
emitOp3->gtUseNum = useNum3;
#endif

genConsumeRegs(emitOp1);
genConsumeRegs(emitOp2);
genConsumeRegs(emitOp3);

assert(ins != INS_invalid);
genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3, instOptions);
genProduceReg(node);
Expand Down
43 changes: 34 additions & 9 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2544,11 +2544,24 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
std::swap(emitOp1, emitOp3);
}
}
tgtPrefUse = BuildUse(emitOp1);

srcCount += 1;
srcCount += BuildDelayFreeUses(emitOp2, emitOp1);
srcCount += emitOp3->isContained() ? BuildOperandUses(emitOp3) : BuildDelayFreeUses(emitOp3, emitOp1);
GenTree* ops[] = {op1, op2, op3};
for (GenTree* op : ops)
{
if (op == emitOp1)
{
tgtPrefUse = BuildUse(op);
srcCount++;
}
else if (op == emitOp2)
{
srcCount += BuildDelayFreeUses(op, emitOp1);
}
else if (op == emitOp3)
{
srcCount += op->isContained() ? BuildOperandUses(op) : BuildDelayFreeUses(op, emitOp1);
}
}

if (intrinsicTree->OperIsEmbRoundingEnabled() && !intrinsicTree->Op(4)->IsCnsIntOrI())
{
Expand Down Expand Up @@ -2643,11 +2656,23 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
std::swap(emitOp1, emitOp2);
}

tgtPrefUse = BuildUse(emitOp1);

srcCount += 1;
srcCount += BuildDelayFreeUses(emitOp2, emitOp1);
srcCount += op3->isContained() ? BuildOperandUses(emitOp3) : BuildDelayFreeUses(emitOp3, emitOp1);
GenTree* ops[] = {op1, op2, op3};
for (GenTree* op : ops)
{
if (op == emitOp1)
{
tgtPrefUse = BuildUse(op);
srcCount++;
}
else if (op == emitOp2)
{
srcCount += BuildDelayFreeUses(op, emitOp1);
}
else if (op == emitOp3)
{
srcCount += op->isContained() ? BuildOperandUses(op) : BuildDelayFreeUses(op, emitOp1);
}
}

buildUses = false;
break;
Expand Down

0 comments on commit f6a7ebb

Please sign in to comment.