Skip to content

Commit

Permalink
Floating point to integer conversions with saturating behavior on Int…
Browse files Browse the repository at this point in the history
…el IA
  • Loading branch information
khushal1996 committed Mar 5, 2024
1 parent d150f78 commit 9294288
Show file tree
Hide file tree
Showing 17 changed files with 825 additions and 140 deletions.
11 changes: 7 additions & 4 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7667,21 +7667,24 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));

// We shouldn't be seeing uint64 here as it should have been converted
// into a helper call by either front-end or lowering phase.
assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
// into a helper call by either front-end or lowering phase, unless we have AVX512F
// accelerated conversions.
assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));

// If the dstType is TYP_UINT, we have 32-bits to encode the
// float number. Any of 33rd or above bits can be the sign bit.
// To achieve it we pretend as if we are converting it to a long.
if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) &&
!compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
dstType = TYP_LONG;
}

// Note that we need to specify dstType here so that it will determine
// the size of destination integer register and also the rex.w prefix.
genConsumeOperands(treeNode->AsOp());
instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType));
instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
genProduceReg(treeNode);
}
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -3998,7 +3998,8 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
case INS_comiss:
case INS_cvtss2sd:
case INS_cvtss2si:
case INS_cvttss2si:
case INS_cvttss2si32:
case INS_cvttss2si64:
case INS_divss:
case INS_extractps:
case INS_insertps:
Expand Down Expand Up @@ -4041,7 +4042,8 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
case INS_comisd:
case INS_cvtsd2si:
case INS_cvtsd2ss:
case INS_cvttsd2si:
case INS_cvttsd2si32:
case INS_cvttsd2si64:
case INS_divsd:
case INS_maxsd:
case INS_minsd:
Expand Down
41 changes: 23 additions & 18 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1522,9 +1522,11 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
switch (ins)
{
case INS_cvtss2si:
case INS_cvttss2si:
case INS_cvttss2si32:
case INS_cvttss2si64:
case INS_cvtsd2si:
case INS_cvttsd2si:
case INS_cvttsd2si32:
case INS_cvttsd2si64:
case INS_movd:
case INS_movnti:
case INS_andn:
Expand All @@ -1544,7 +1546,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
#endif // TARGET_AMD64
case INS_vcvtsd2usi:
case INS_vcvtss2usi:
case INS_vcvttsd2usi:
{
if (attr == EA_8BYTE)
{
Expand Down Expand Up @@ -2723,8 +2724,10 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
case INS_blsmsk:
case INS_blsr:
case INS_bzhi:
case INS_cvttsd2si:
case INS_cvttss2si:
case INS_cvttsd2si32:
case INS_cvttsd2si64:
case INS_cvttss2si32:
case INS_cvttss2si64:
case INS_cvtsd2si:
case INS_cvtss2si:
case INS_extractps:
Expand All @@ -2748,7 +2751,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
#endif
case INS_vcvtsd2usi:
case INS_vcvtss2usi:
case INS_vcvttsd2usi:
case INS_vcvttsd2usi32:
case INS_vcvttsd2usi64:
case INS_vcvttss2usi32:
case INS_vcvttss2usi64:
{
Expand Down Expand Up @@ -11605,22 +11609,20 @@ void emitter::emitDispIns(
break;
}

case INS_cvttsd2si:
case INS_cvttsd2si32:
case INS_cvttsd2si64:
case INS_cvtss2si:
case INS_cvtsd2si:
case INS_cvttss2si:
case INS_cvttss2si32:
case INS_cvttss2si64:
case INS_vcvtsd2usi:
case INS_vcvtss2usi:
case INS_vcvttsd2usi:
{
printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
break;
}

case INS_vcvttsd2usi32:
case INS_vcvttsd2usi64:
case INS_vcvttss2usi32:
case INS_vcvttss2usi64:
{
printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_4BYTE));
printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
break;
}

Expand Down Expand Up @@ -19050,7 +19052,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;
}

case INS_cvttsd2si:
case INS_cvttsd2si32:
case INS_cvttsd2si64:
case INS_cvtsd2si:
case INS_cvtsi2sd32:
case INS_cvtsi2ss32:
Expand All @@ -19059,7 +19062,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vcvtsd2usi:
case INS_vcvtusi2ss32:
case INS_vcvtusi2ss64:
case INS_vcvttsd2usi:
case INS_vcvttsd2usi32:
case INS_vcvttsd2usi64:
case INS_vcvttss2usi32:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_7C;
Expand All @@ -19071,7 +19075,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency += PERFSCORE_LATENCY_5C;
break;

case INS_cvttss2si:
case INS_cvttss2si32:
case INS_cvttss2si64:
case INS_cvtss2si:
case INS_vcvtss2usi:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,8 @@ enum GenTreeFlags : unsigned int
#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
GTF_HW_EM_OP = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask
#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS

GTF_CONVERSION_SATURATED = 0x20000000, // GT_CAST -- conversion operation has saturation behavior
};

inline constexpr GenTreeFlags operator ~(GenTreeFlags a)
Expand Down Expand Up @@ -3886,6 +3888,16 @@ struct GenTreeCast : public GenTreeOp

return false;
}

bool IsSaturatedConversion()
{
return (gtFlags & GTF_CONVERSION_SATURATED) != 0;
}

void SetSaturatedConversion()
{
gtFlags |= GTF_CONVERSION_SATURATED;
}
};

// GT_BOX nodes are place markers for boxed values. The "real" tree
Expand Down
Loading

0 comments on commit 9294288

Please sign in to comment.