Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Enable CORINFO_INTRINSIC Round, Ceiling, and Floor to generate ROUNDSS and ROUNDSD #14736

Merged
merged 2 commits into from
Jan 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ class CodeGen : public CodeGenInterface

// Generates SSE2 code for the given tree as "Operand BitWiseOp BitMask"
void genSSE2BitwiseOp(GenTreePtr treeNode);

// Generates SSE41 code for the given tree as a round operation
void genSSE41RoundOp(GenTreeOp* treeNode);
#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87

void genPrepForCompiler();
Expand Down
173 changes: 173 additions & 0 deletions src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7331,6 +7331,173 @@ void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
inst_RV_RV(ins, targetReg, operandReg, targetType);
}

//-----------------------------------------------------------------------------------------
// genSSE41RoundOp - generate SSE41 code for the given tree as a round operation
//
// Arguments:
// treeNode - tree node
//
// Return value:
// None
//
// Assumptions:
// i) SSE4.1 is supported by the underlying hardware
// ii) treeNode oper is a GT_INTRINSIC
// iii) treeNode type is a floating point type
// iv) treeNode is not used from memory
// v) tree oper is CORINFO_INTRINSIC_Round, _Ceiling, or _Floor
// vi) caller of this routine needs to call genProduceReg()
void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode)
{
// i) SSE4.1 is supported by the underlying hardware
assert(compiler->compSupports(InstructionSet_SSE41));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! I like the way the asserts are documented here.


// ii) treeNode oper is a GT_INTRINSIC
assert(treeNode->OperGet() == GT_INTRINSIC);

GenTree* srcNode = treeNode->gtGetOp1();

// iii) treeNode type is floating point type
assert(varTypeIsFloating(srcNode));
assert(srcNode->TypeGet() == treeNode->TypeGet());

// iv) treeNode is not used from memory
assert(!treeNode->isUsedFromMemory());

genConsumeOperands(treeNode);

instruction ins = (treeNode->TypeGet() == TYP_FLOAT) ? INS_roundss : INS_roundsd;
emitAttr size = emitTypeSize(treeNode);

regNumber dstReg = treeNode->gtRegNum;

unsigned ival = 0;

// v) tree oper is CORINFO_INTRINSIC_Round, _Ceiling, or _Floor
switch (treeNode->gtIntrinsic.gtIntrinsicId)
{
case CORINFO_INTRINSIC_Round:
ival = 4;
break;

case CORINFO_INTRINSIC_Ceiling:
ival = 10;
break;

case CORINFO_INTRINSIC_Floor:
ival = 9;
break;

default:
ins = INS_invalid;
assert(!"genSSE41RoundOp: unsupported intrinsic");
unreached();
}

if (srcNode->isContained() || srcNode->isUsedFromSpillTemp())
{
emitter* emit = getEmitter();

TempDsc* tmpDsc = nullptr;
unsigned varNum = BAD_VAR_NUM;
unsigned offset = (unsigned)-1;

if (srcNode->isUsedFromSpillTemp())
{
assert(srcNode->IsRegOptional());

tmpDsc = getSpillTempDsc(srcNode);
varNum = tmpDsc->tdTempNum();
offset = 0;

compiler->tmpRlsTemp(tmpDsc);
}
else if (srcNode->isIndir())
{
GenTreeIndir* memIndir = srcNode->AsIndir();
GenTree* memBase = memIndir->gtOp1;

switch (memBase->OperGet())
{
case GT_LCL_VAR_ADDR:
{
varNum = memBase->AsLclVarCommon()->GetLclNum();
offset = 0;

// Ensure that all the GenTreeIndir values are set to their defaults.
assert(memBase->gtRegNum == REG_NA);
assert(!memIndir->HasIndex());
assert(memIndir->Scale() == 1);
assert(memIndir->Offset() == 0);

break;
}

case GT_CLS_VAR_ADDR:
{
emit->emitIns_R_C_I(ins, size, dstReg, memBase->gtClsVar.gtClsVarHnd, 0, ival);
return;
}

default:
{
emit->emitIns_R_A_I(ins, size, dstReg, memIndir, ival);
return;
}
}
}
else
{
switch (srcNode->OperGet())
{
case GT_CNS_DBL:
{
GenTreeDblCon* dblConst = srcNode->AsDblCon();
CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst->gtDconVal, emitTypeSize(dblConst));

emit->emitIns_R_C_I(ins, size, dstReg, hnd, 0, ival);
return;
}

case GT_LCL_FLD:
{
GenTreeLclFld* lclField = srcNode->AsLclFld();

varNum = lclField->GetLclNum();
offset = lclField->gtLclFld.gtLclOffs;
break;
}

case GT_LCL_VAR:
{
assert(srcNode->IsRegOptional() ||
!compiler->lvaTable[srcNode->gtLclVar.gtLclNum].lvIsRegCandidate());

varNum = srcNode->AsLclVar()->GetLclNum();
offset = 0;
break;
}

default:
unreached();
break;
}
}

// Ensure we got a good varNum and offset.
// We also need to check for `tmpDsc != nullptr` since spill temp numbers
// are negative and start with -1, which also happens to be BAD_VAR_NUM.
assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
assert(offset != (unsigned)-1);

emit->emitIns_R_S_I(ins, size, dstReg, varNum, offset, ival);
}
else
{
inst_RV_RV_IV(ins, size, dstReg, srcNode->gtRegNum, ival);
}
}

//---------------------------------------------------------------------
// genIntrinsic - generate code for a given intrinsic
//
Expand Down Expand Up @@ -7361,6 +7528,12 @@ void CodeGen::genIntrinsic(GenTreePtr treeNode)
genSSE2BitwiseOp(treeNode);
break;

case CORINFO_INTRINSIC_Round:
case CORINFO_INTRINSIC_Ceiling:
case CORINFO_INTRINSIC_Floor:
genSSE41RoundOp(treeNode->AsOp());
break;

default:
assert(!"genIntrinsic: Unsupported intrinsic");
unreached();
Expand Down
8 changes: 4 additions & 4 deletions src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3222,10 +3222,10 @@ class Compiler
unsigned* typeSize,
bool forReturn);

static bool IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId);
static bool IsTargetIntrinsic(CorInfoIntrinsics intrinsicId);
static bool IsMathIntrinsic(CorInfoIntrinsics intrinsicId);
static bool IsMathIntrinsic(GenTreePtr tree);
bool IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId);
bool IsTargetIntrinsic(CorInfoIntrinsics intrinsicId);
bool IsMathIntrinsic(CorInfoIntrinsics intrinsicId);
bool IsMathIntrinsic(GenTreePtr tree);

private:
//----------------- Importing the method ----------------------------------
Expand Down
3 changes: 3 additions & 0 deletions src/jit/emitfmtsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ IF_DEF(MRD_OFF, IS_GM_RD, DSP) // offset mem
IF_DEF(RRD_MRD, IS_GM_RD|IS_R1_RD, DSP) // read reg , read [mem]
IF_DEF(RWR_MRD, IS_GM_RD|IS_R1_WR, DSP) // write reg , read [mem]
IF_DEF(RRW_MRD, IS_GM_RD|IS_R1_RW, DSP) // r/w reg , read [mem]
IF_DEF(RRW_MRD_CNS, IS_GM_RD|IS_R1_RW, DSP_CNS) // r/w reg , read [mem], const
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The R_R_I instructions define and use RRW_RRW_CNS and I mirrored that here (but for mem and stk).

I actually expected the first register to be RWR (write, rather than read/write). Does anyone know why it was set as read/write? (maybe to do with AVX support...)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear to me either why this is this way.


IF_DEF(RWR_RRD_MRD, IS_GM_RD|IS_R1_WR|IS_R2_RD, DSP) // write reg , read reg2 , read [mem]
IF_DEF(RWR_MRD_OFF, IS_GM_RD|IS_R1_WR, DSP) // write reg , offset mem
Expand All @@ -147,6 +148,7 @@ IF_DEF(SRW, IS_SF_RW, NONE) // r/w [stk]
IF_DEF(RRD_SRD, IS_SF_RD|IS_R1_RD, NONE) // read reg , read [stk]
IF_DEF(RWR_SRD, IS_SF_RD|IS_R1_WR, NONE) // write reg , read [stk]
IF_DEF(RRW_SRD, IS_SF_RD|IS_R1_RW, NONE) // r/w reg , read [stk]
IF_DEF(RRW_SRD_CNS, IS_SF_RD|IS_R1_RW, CNS ) // r/w reg , read [stk], const

IF_DEF(RWR_RRD_SRD, IS_SF_RD|IS_R1_WR|IS_R2_RD, NONE) // write reg , read reg2, read [stk]

Expand All @@ -172,6 +174,7 @@ IF_DEF(ARW, IS_AM_RW, AMD ) // r/w [adr]
IF_DEF(RRD_ARD, IS_AM_RD|IS_R1_RD, AMD ) // read reg , read [adr]
IF_DEF(RWR_ARD, IS_AM_RD|IS_R1_WR, AMD ) // write reg , read [adr]
IF_DEF(RRW_ARD, IS_AM_RD|IS_R1_RW, AMD ) // r/w reg , read [adr]
IF_DEF(RRW_ARD_CNS, IS_AM_RD|IS_R1_RW, AMD_CNS) // r/w reg , read [adr], const

IF_DEF(RWR_RRD_ARD, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD ) // write reg , read reg2, read [adr]

Expand Down
Loading