Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose remaining Avx512 integer intrinsics which don't require VectorMask #86130

Merged
merged 4 commits into from
May 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genAESIntrinsic(GenTreeHWIntrinsic* node);
void genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node);
void genFMAIntrinsic(GenTreeHWIntrinsic* node);
void genPermuteVar2x(GenTreeHWIntrinsic* node);
void genLZCNTIntrinsic(GenTreeHWIntrinsic* node);
void genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node);
void genPOPCNTIntrinsic(GenTreeHWIntrinsic* node);
Expand Down
97 changes: 68 additions & 29 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,42 @@ bool emitter::IsBMIInstruction(instruction ins)
return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
}

//------------------------------------------------------------------------
// IsPermuteVar2xInstruction: Is this an Avx512 permutex2var instruction?
//
// Arguments:
// ins - The instruction to check.
//
// Returns:
// `true` if it is a permutex2var instruction.
//
bool emitter::IsPermuteVar2xInstruction(instruction ins)
{
switch (ins)
{
case INS_vpermi2d:
case INS_vpermi2pd:
case INS_vpermi2ps:
case INS_vpermi2q:
case INS_vpermt2d:
case INS_vpermt2pd:
case INS_vpermt2ps:
case INS_vpermt2q:
case INS_vpermi2w:
case INS_vpermt2w:
case INS_vpermi2b:
case INS_vpermt2b:
{
return true;
}

default:
{
return false;
}
}
}

regNumber emitter::getBmiRegNumber(instruction ins)
{
switch (ins)
Expand Down Expand Up @@ -8250,7 +8286,7 @@ void emitter::emitIns_SIMD_R_R_S_I(
void emitter::emitIns_SIMD_R_R_R_A(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir)
{
assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(UseSimdEncoding());

// Ensure we aren't overwriting op2
Expand All @@ -8260,31 +8296,6 @@ void emitter::emitIns_SIMD_R_R_R_A(
emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir);
}

//------------------------------------------------------------------------
// emitIns_SIMD_R_R_R_AR: emits the code for a SIMD instruction that takes two register operands, a base memory
// register, and that returns a value in register
//
// Arguments:
// ins -- The instruction being emitted
// attr -- The emit attribute
// targetReg -- The target register
// op1Reg -- The register of the first operands
// op2Reg -- The register of the second operand
// base -- The base register used for the memory address
//
void emitter::emitIns_SIMD_R_R_R_AR(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber base)
{
assert(IsFMAInstruction(ins));
assert(UseSimdEncoding());

// Ensure we aren't overwriting op2
assert((op2Reg != targetReg) || (op1Reg == targetReg));

emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
emitIns_R_R_AR(ins, attr, targetReg, op2Reg, base, 0);
}

//------------------------------------------------------------------------
// emitIns_SIMD_R_R_R_C: emits the code for a SIMD instruction that takes two register operands, a field handle +
// offset, and that returns a value in register
Expand All @@ -8306,7 +8317,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins,
CORINFO_FIELD_HANDLE fldHnd,
int offs)
{
assert(IsFMAInstruction(ins));
assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(UseSimdEncoding());

// Ensure we aren't overwriting op2
Expand All @@ -8331,7 +8342,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins,
void emitter::emitIns_SIMD_R_R_R_R(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg)
{
if (IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins))
if (IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins))
{
assert(UseSimdEncoding());

Expand Down Expand Up @@ -8399,7 +8410,7 @@ void emitter::emitIns_SIMD_R_R_R_R(
void emitter::emitIns_SIMD_R_R_R_S(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs)
{
assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(UseSimdEncoding());

// Ensure we aren't overwriting op2
Expand Down Expand Up @@ -18207,6 +18218,22 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;
}

case INS_vpermi2b:
case INS_vpermt2b:
{
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency += PERFSCORE_LATENCY_5C;
break;
}

case INS_vpermi2w:
case INS_vpermt2w:
{
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency += PERFSCORE_LATENCY_7C;
break;
}

case INS_vpmovdb:
case INS_vpmovdw:
case INS_vpmovqb:
Expand Down Expand Up @@ -18644,6 +18671,18 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vinserti32x8:
case INS_vinserti64x2:
case INS_vinserti64x4:
case INS_vpermi2d:
case INS_vpermi2pd:
case INS_vpermi2ps:
case INS_vpermi2q:
case INS_vpermt2d:
case INS_vpermt2pd:
case INS_vpermt2ps:
case INS_vpermt2q:
case INS_vshuff32x4:
case INS_vshuff64x2:
case INS_vshufi32x4:
case INS_vshufi64x2:
{
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_3C;
Expand Down
3 changes: 1 addition & 2 deletions src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ static bool IsAvx512OrPriorInstruction(instruction ins);
static bool IsAVXOnlyInstruction(instruction ins);
static bool IsAvx512OnlyInstruction(instruction ins);
static bool IsFMAInstruction(instruction ins);
static bool IsPermuteVar2xInstruction(instruction ins);
static bool IsAVXVNNIInstruction(instruction ins);
static bool IsBMIInstruction(instruction ins);
static bool IsKInstruction(instruction ins);
Expand Down Expand Up @@ -710,8 +711,6 @@ void emitIns_SIMD_R_R_S_I(
#ifdef FEATURE_HW_INTRINSICS
void emitIns_SIMD_R_R_R_A(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir);
void emitIns_SIMD_R_R_R_AR(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber base);
void emitIns_SIMD_R_R_R_C(instruction ins,
emitAttr attr,
regNumber targetReg,
Expand Down
58 changes: 18 additions & 40 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19311,34 +19311,13 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
return HWIntrinsicInfo::HasRMWSemantics(intrinsicId);
}

switch (intrinsicId)
if (HWIntrinsicInfo::IsRmwIntrinsic(intrinsicId))
{
// TODO-XArch-Cleanup: Move this switch block to be table driven.

case NI_AVX512F_FusedMultiplyAdd:
case NI_AVX512F_FusedMultiplyAddNegated:
case NI_AVX512F_FusedMultiplyAddSubtract:
case NI_AVX512F_FusedMultiplySubtract:
case NI_AVX512F_FusedMultiplySubtractAdd:
case NI_AVX512F_FusedMultiplySubtractNegated:
case NI_SSE42_Crc32:
case NI_SSE42_X64_Crc32:
case NI_FMA_MultiplyAdd:
case NI_FMA_MultiplyAddNegated:
case NI_FMA_MultiplyAddNegatedScalar:
case NI_FMA_MultiplyAddScalar:
case NI_FMA_MultiplyAddSubtract:
case NI_FMA_MultiplySubtract:
case NI_FMA_MultiplySubtractAdd:
case NI_FMA_MultiplySubtractNegated:
case NI_FMA_MultiplySubtractNegatedScalar:
case NI_FMA_MultiplySubtractScalar:
case NI_X86Base_DivRem:
case NI_X86Base_X64_DivRem:
{
return true;
}
return true;
}

switch (intrinsicId)
{
case NI_AVX512F_Fixup:
case NI_AVX512F_FixupScalar:
case NI_AVX512F_VL_Fixup:
Expand Down Expand Up @@ -26115,40 +26094,33 @@ bool GenTreeLclVar::IsNeverNegative(Compiler* comp) const

#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
//------------------------------------------------------------------------
// GetResultOpNumForFMA: check if the result is written into one of the operands.
// GetResultOpNumForRmwIntrinsic: check if the result is written into one of the operands.
// In the case that none of the operand is overwritten, check if any of them is lastUse.
//
// Return Value:
// The operand number overwritten or lastUse. 0 is the default value, where the result is written into
// a destination that is not one of the source operands and there is no last use op.
//
unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3)
unsigned GenTreeHWIntrinsic::GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3)
{
#if defined(DEBUG)
// only FMA intrinsic node should call into this function
if (HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) != InstructionSet_FMA)
{
assert((gtHWIntrinsicId >= NI_AVX512F_FusedMultiplyAdd) &&
(gtHWIntrinsicId <= NI_AVX512F_FusedMultiplySubtractNegated));
assert((NI_AVX512F_FusedMultiplySubtractNegated - NI_AVX512F_FusedMultiplyAdd) + 1 == 6);
}
#endif // DEBUG
assert(HWIntrinsicInfo::IsFmaIntrinsic(gtHWIntrinsicId) || HWIntrinsicInfo::IsPermuteVar2x(gtHWIntrinsicId));

if (use != nullptr && use->OperIs(GT_STORE_LCL_VAR))
{
// For store_lcl_var, check if any op is overwritten

GenTreeLclVarCommon* overwritten = use->AsLclVarCommon();
unsigned overwrittenLclNum = overwritten->GetLclNum();
if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum)

if (op1->IsLocal() && (op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum))
{
return 1;
}
else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwrittenLclNum)
else if (op2->IsLocal() && (op2->AsLclVarCommon()->GetLclNum() == overwrittenLclNum))
{
return 2;
}
else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwrittenLclNum)
else if (op3->IsLocal() && (op3->AsLclVarCommon()->GetLclNum() == overwrittenLclNum))
{
return 3;
}
Expand All @@ -26158,11 +26130,17 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, Ge
// https://github.com/dotnet/runtime/issues/62215

if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0))
{
return 1;
}
else if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0))
{
return 2;
}
else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0))
{
return 3;
}

return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6217,7 +6217,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
bool OperRequiresAsgFlag() const;
bool OperRequiresCallFlag() const;

unsigned GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3);
unsigned GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3);

ClassLayout* GetLayout(Compiler* compiler) const;

Expand Down
31 changes: 31 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,17 @@ enum HWIntrinsicFlag : unsigned int
// MaybeNoJmpTable IMM
// the imm intrinsic may not need jumptable fallback when it gets non-const argument
HW_Flag_MaybeNoJmpTableIMM = 0x800000,

#if defined(TARGET_XARCH)
// The intrinsic is an RMW intrinsic
HW_Flag_RmwIntrinsic = 0x1000000,

// The intrinsic is a FusedMultiplyAdd intrinsic
HW_Flag_FmaIntrinsic = 0x2000000,

// The intrinsic is a PermuteVar2x intrinsic
HW_Flag_PermuteVar2x = 0x4000000,
#endif // TARGET_XARCH
};

#if defined(TARGET_XARCH)
Expand Down Expand Up @@ -983,6 +994,26 @@ struct HWIntrinsicInfo
HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_MaybeNoJmpTableIMM) != 0;
}

#if defined(TARGET_XARCH)
static bool IsRmwIntrinsic(NamedIntrinsic id)
{
HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_RmwIntrinsic) != 0;
}

static bool IsFmaIntrinsic(NamedIntrinsic id)
{
HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_FmaIntrinsic) != 0;
}

static bool IsPermuteVar2x(NamedIntrinsic id)
{
HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_PermuteVar2x) != 0;
}
#endif // TARGET_XARCH
};

#ifdef TARGET_ARM64
Expand Down
Loading