Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JIT] X64 - Using cmovns for faster division #81252

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genLeaInstruction(GenTreeAddrMode* lea);
void genSetRegToCond(regNumber dstReg, GenTree* tree);

#ifdef TARGET_AMD64
void genCodeForDivCnsPow2(GenTreeOp* treeNode);
#endif // TARGET_AMD64

#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
void genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale);
#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
Expand Down
57 changes: 57 additions & 0 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,15 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
{
assert(treeNode->OperIs(GT_DIV, GT_UDIV, GT_MOD, GT_UMOD));

#ifdef TARGET_AMD64
if (treeNode->OperIs(GT_DIV) && treeNode->gtGetOp2()->isContained() &&
treeNode->gtGetOp2()->IsIntegralConstAbsPow2())
{
genCodeForDivCnsPow2(treeNode);
return;
}
#endif // TARGET_AMD64

GenTree* dividend = treeNode->gtOp1;

#ifdef TARGET_X86
Expand Down Expand Up @@ -873,6 +882,54 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
genProduceReg(treeNode);
}

#ifdef TARGET_AMD64
//------------------------------------------------------------------------
// genCodeForDivCnsPow2: Generate code for a DIV with a divisor that is a constant power of two.
//
// Arguments:
// treeNode - the node to generate the code for
//
void CodeGen::genCodeForDivCnsPow2(GenTreeOp* treeNode)
{
assert(treeNode->OperIs(GT_DIV));

GenTree* dividend = treeNode->gtOp1;

GenTree* divisor = treeNode->gtOp2;
emitAttr size = emitTypeSize(treeNode);
regNumber targetReg = treeNode->GetRegNum();
var_types targetType = treeNode->TypeGet();
emitter* emit = GetEmitter();

assert(varTypeIsIntOrI(targetType));
assert(divisor->IsIntegralConstAbsPow2());
assert(divisor->isContained());

genConsumeOperands(treeNode);

regNumber dividendReg = dividend->GetRegNum();

const ssize_t cnsDivisor = divisor->AsIntConCommon()->IntegralValue();
const size_t absCnsDivisor = abs(cnsDivisor);

assert(absCnsDivisor != 2);
assert(dividendReg != targetReg);

emit->emitIns_R_AR(INS_lea, size, targetReg, dividendReg, static_cast<int>(absCnsDivisor - 1));
emit->emitIns_R_R(INS_test, size, dividendReg, dividendReg);
emit->emitIns_R_R(INS_cmovns, size, targetReg, dividendReg);

emit->emitIns_R_I(INS_sar_N, size, targetReg, genLog2(static_cast<size_t>(absCnsDivisor)));

if (cnsDivisor < 0)
{
emit->emitIns_R(INS_neg, size, targetReg);
}

genProduceReg(treeNode);
}
#endif // TARGET_AMD64

//------------------------------------------------------------------------
// genCodeForBinary: Generate code for many binary arithmetic operators
//
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6123,6 +6123,14 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
LIR::Use opDividend(BlockRange(), &divMod->AsOp()->gtOp1, divMod);
dividend = ReplaceWithLclVar(opDividend);

#ifdef TARGET_AMD64
if (comp->opts.OptimizationEnabled() && isDiv && (absDivisorValue >= 4))
{
MakeSrcContained(divMod, divisor);
return divMod->gtNext;
}
#endif // TARGET_AMD64

GenTree* adjustment = comp->gtNewOperNode(GT_RSH, type, dividend, comp->gtNewIconNode(type == TYP_INT ? 31 : 63));

if (absDivisorValue == 2)
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1781,6 +1781,16 @@ int LinearScan::BuildModDiv(GenTree* tree)
return BuildSimple(tree);
}

#ifdef TARGET_AMD64
if (tree->OperIs(GT_DIV) && op2->isContained() && op2->IsIntegralConstAbsPow2())
{
srcCount = BuildDelayFreeUses(tree->gtGetOp1());
buildInternalRegisterUses();
BuildDef(tree);
return 1;
}
#endif // TARGET_AMD64

// Amd64 Div/Idiv instruction:
// Dividend in RAX:RDX and computes
// Quotient in RAX, Remainder in RDX
Expand Down