Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Implement simple Sse2 hardware instrinsics #15585

Merged
merged 1 commit into from
Jan 31, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7975,7 +7975,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#endif
}

// Whether SSE3, SSE3, SSE4.1 and SSE4.2 is available
// Whether SSE3, SSSE3, SSE4.1 and SSE4.2 is available
bool CanUseSSE4() const
{
#ifdef _TARGET_XARCH_
Expand Down
18 changes: 17 additions & 1 deletion src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,15 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
case INS_paddb:
case INS_paddd:
case INS_paddq:
case INS_paddsb:
case INS_paddsw:
case INS_paddusb:
case INS_paddusw:
case INS_paddw:
case INS_pand:
case INS_pandn:
case INS_pavgb:
case INS_pavgw:
case INS_pcmpeqb:
case INS_pcmpeqd:
case INS_pcmpeqq:
Expand All @@ -135,6 +141,7 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
case INS_pcmpgtw:
case INS_phaddd:
case INS_pinsrw:
case INS_pmaddwd:
case INS_pmaxsb:
case INS_pmaxsd:
case INS_pmaxsw:
Expand All @@ -148,13 +155,20 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
case INS_pminud:
case INS_pminuw:
case INS_pmuldq:
case INS_pmulhuw:
case INS_pmulhw:
case INS_pmulld:
case INS_pmullw:
case INS_pmuludq:
case INS_por:
case INS_psadbw:
case INS_psubb:
case INS_psubd:
case INS_psubq:
case INS_psubsb:
case INS_psubsw:
case INS_psubusb:
case INS_psubusw:
case INS_psubw:
case INS_punpckhbw:
case INS_punpckhdq:
Expand All @@ -173,6 +187,8 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
case INS_subss:
case INS_unpckhps:
case INS_unpcklps:
case INS_unpckhpd:
case INS_unpcklpd:
case INS_vinsertf128:
case INS_vinserti128:
case INS_vperm2i128:
Expand Down Expand Up @@ -5432,7 +5448,7 @@ void emitter::emitIns_SIMD_R_R_S_I(
emitIns_R_S_I(ins, attr, reg, varx, offs, ival);
}
}
#endif
#endif // FEATURE_HW_INTRINSICS

/*****************************************************************************
*
Expand Down
14 changes: 14 additions & 0 deletions src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,10 @@ void GenTree::InitNodeSize()
#ifdef FEATURE_SIMD
static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL);
#endif // FEATURE_SIMD

#ifdef FEATURE_HW_INTRINSICS
static_assert_no_msg(sizeof(GenTreeHWIntrinsic) <= TREE_NODE_SZ_SMALL);
#endif // FEATURE_HW_INTRINSICS
// clang-format on
}

Expand Down Expand Up @@ -8838,6 +8842,16 @@ unsigned GenTree::NumChildren()
}
return childCount;
}
#ifdef FEATURE_HW_INTRINSICS
// GT_HWIntrinsic require special handling
if (OperGet() == GT_HWIntrinsic)
{
if (gtOp.gtOp1 == nullptr)
{
return 0;
}
}
#endif
// Special case for assignment of dynamic block.
// This is here to duplicate the former case where the size may be evaluated prior to the
// source and destination addresses. In order to do this, we treat the size as a child of the
Expand Down
53 changes: 49 additions & 4 deletions src/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else
{

emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg);
}
break;
Expand All @@ -98,7 +97,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
emit->emitIns_AR_R(ins, emitTypeSize(TYP_SIMD16), op2->gtRegNum, op1->gtRegNum, 0);
}
else if (ival != -1)
else if ((ival != -1) && varTypeIsFloating(baseType))
{
genHWIntrinsic_R_R_RM_I(node, ins);
}
Expand Down Expand Up @@ -677,13 +676,59 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
regNumber targetReg = node->gtRegNum;
var_types targetType = node->TypeGet();
var_types baseType = node->gtSIMDBaseType;
instruction ins = INS_invalid;
instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
regNumber op1Reg = REG_NA;
regNumber op2Reg = REG_NA;
emitter* emit = getEmitter();
int ival = -1;

genConsumeOperands(node);
if ((op1 != nullptr) && !op1->OperIsList())
{
op1Reg = op1->gtRegNum;
genConsumeOperands(node);
}

switch (intrinsicID)
{
// All integer overloads are handled by table codegen
case NI_SSE2_CompareLessThan:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to combine this with the Sse logic, since they should be the same outside of instruction.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There will be even more cases where code could be combined/abstracted for all SSE, SSE2, SSE3, SSE4.1, SSE4.2 hardware intrinsics. From what I have worked on it will be the case for all Scalar operations on double and later on all integer operations.

I have created issue to track this #16014 and plan to work on refactoring code in next PRs.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems I have double comment :)

{
assert(op1 != nullptr);
assert(op2 != nullptr);
assert(baseType == TYP_DOUBLE);

op2Reg = op2->gtRegNum;
ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);

break;
}

case NI_SSE2_MoveMask:
{
assert(op2 == nullptr);
assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE);

emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
break;
}

case NI_SSE2_SetZeroVector128:
{
assert(baseType != TYP_FLOAT);
assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE);
assert(op1 == nullptr);
assert(op2 == nullptr);

emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
break;
}

default:
unreached();
break;
Expand Down
Loading