Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm64/Sve: Add FFR register liveness tracking #105348

Merged
merged 14 commits into from
Jul 26, 2024
14 changes: 12 additions & 2 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1508,8 +1508,18 @@ void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree)
var_types targetType = tree->TypeGet();
regNumber targetReg = tree->GetRegNum();

inst_Mov(targetType, targetReg, tree->gtSrcReg, /* canSkip */ true);
genTransferRegGCState(targetReg, tree->gtSrcReg);
#ifdef TARGET_ARM64
if (varTypeIsMask(targetType))
{
assert(tree->gtSrcReg == REG_FFR);
GetEmitter()->emitIns_R(INS_sve_rdffr, EA_SCALABLE, REG_FFR);
}
else
#endif
{
inst_Mov(targetType, targetReg, tree->gtSrcReg, /* canSkip */ true);
genTransferRegGCState(targetReg, tree->gtSrcReg);
}

genProduceReg(tree);
}
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4344,6 +4344,10 @@ class Compiler
#endif // defined(FEATURE_SIMD)

unsigned lvaGSSecurityCookie; // LclVar number
#ifdef TARGET_ARM64
unsigned lvaFfrRegister; // LclVar number
unsigned getFFRegisterVarNum();
#endif
bool lvaTempsHaveLargerOffsetThanVars();

// Returns "true" iff local variable "lclNum" is in SSA form.
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7448,7 +7448,11 @@ GenTreeIntCon* Compiler::gtNewFalse()
// return a new node representing the value in a physical register
GenTree* Compiler::gtNewPhysRegNode(regNumber reg, var_types type)
{
#ifdef TARGET_ARM64
assert(genIsValidIntReg(reg) || (reg == REG_SPBASE) || (reg == REG_FFR));
#else
assert(genIsValidIntReg(reg) || (reg == REG_SPBASE));
#endif
GenTree* result = new (this, GT_PHYSREG) GenTreePhysReg(reg, type);
return result;
}
Expand Down Expand Up @@ -11533,6 +11537,14 @@ void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, cons
ilKind = "cse";
ilNum = lclNum - optCSEstart;
}
#ifdef TARGET_ARM64
else if (lclNum == lvaFfrRegister)
{
// We introduce this LclVar in lowering, hence special case the printing of
// it instead of handling it in "rationalizer" below.
ilName = "FFReg";
}
#endif
else if (lclNum >= optCSEstart)
{
// Currently any new LclVar's introduced after the CSE phase
Expand Down
29 changes: 10 additions & 19 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2297,10 +2297,15 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

switch (intrinsic)
{
case NI_Sve_CreateBreakAfterMask:
case NI_Sve_CreateBreakAfterPropagateMask:
case NI_Sve_CreateBreakBeforeMask:
case NI_Sve_CreateBreakBeforePropagateMask:
{
// HWInstrinsic requires a mask for op3
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(3));
FALLTHROUGH;
}
case NI_Sve_CreateBreakAfterMask:
case NI_Sve_CreateBreakBeforeMask:
case NI_Sve_CreateMaskForFirstActiveElement:
case NI_Sve_CreateMaskForNextActiveElement:
case NI_Sve_GetActiveElementCount:
Expand All @@ -2310,30 +2315,16 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
{
// HWInstrinsic requires a mask for op2
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(2));
break;
FALLTHROUGH;
}

default:
break;
}

switch (intrinsic)
{
case NI_Sve_CreateBreakAfterPropagateMask:
case NI_Sve_CreateBreakBeforePropagateMask:
{
// HWInstrinsic requires a mask for op3
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(3));
// HWInstrinsic requires a mask for op1
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(1));
break;
}

default:
break;
}

// HWInstrinsic requires a mask for op1
convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(1));

if (HWIntrinsicInfo::IsMultiReg(intrinsic))
{
assert(HWIntrinsicInfo::IsExplicitMaskedOperation(retNode->AsHWIntrinsic()->GetHWIntrinsicId()));
Expand Down
20 changes: 20 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2366,6 +2366,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_LoadVectorFirstFaulting:
{
if (intrin.numOperands == 3)
{
// We have extra argument which means there is a "use" of FFR here. Restore it back in FFR register.
assert(op3Reg != REG_NA);
GetEmitter()->emitIns_R(INS_sve_wrffr, emitSize, op3Reg, opt);
}

insScalableOpts sopt = (opt == INS_OPTS_SCALABLE_B) ? INS_SCALABLE_OPTS_NONE : INS_SCALABLE_OPTS_LSL_N;
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, REG_ZR, opt, sopt);
break;
}

case NI_Sve_SetFfr:
{
assert(targetReg == REG_NA);
GetEmitter()->emitIns_R(ins, emitSize, op1Reg, opt);
break;
}
case NI_Sve_ConditionalExtractAfterLastActiveElementScalar:
case NI_Sve_ConditionalExtractLastActiveElementScalar:
{
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtend,
HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, GatherVectorWithByteOffsets, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, GetActiveElementCount, -1, 2, true, {INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation)
HARDWARE_INTRINSIC(Sve, GetFfrByte, -1, -1, false, {INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrInt16, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrSByte, -1, -1, false, {INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrUInt16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, GetFfrUInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_rdffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialSideEffectMask)
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
HARDWARE_INTRINSIC(Sve, InsertIntoShiftedVector, -1, 2, true, {INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr, INS_sve_insr}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand All @@ -142,6 +150,7 @@ HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt64,
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorFirstFaulting, -1, -1, false, {INS_sve_ldff1b, INS_sve_ldff1b, INS_sve_ldff1h, INS_sve_ldff1h, INS_sve_ldff1w, INS_sve_ldff1w, INS_sve_ldff1d, INS_sve_ldff1d, INS_sve_ldff1w, INS_sve_ldff1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialSideEffectMask)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorInt16NonFaultingSignExtendToUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down Expand Up @@ -237,6 +246,7 @@ HARDWARE_INTRINSIC(Sve, Scatter32BitNarrowing,
HARDWARE_INTRINSIC(Sve, Scatter32BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Scatter8BitNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Scatter8BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SetFfr, -1, 1, true, {INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_sve_wrffr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialSideEffectMask|HW_Flag_SpecialCodeGen)
jakobbotsch marked this conversation as resolved.
Show resolved Hide resolved
HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, false, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, false, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticForDivide, -1, -1, false, {INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand)
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,7 @@ void CodeGen::inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumbe
unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
assert(varNum < compiler->lvaCount);
#if CPU_LOAD_STORE_ARCH
assert(GetEmitter()->emitInsIsStore(ins));
assert(GetEmitter()->emitInsIsStore(ins) || (ins == INS_sve_str));
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
#endif
GetEmitter()->emitIns_S_R(ins, size, reg, varNum, 0);
}
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ void Compiler::lvaInit()
#endif // JIT32_GCENCODER
lvaNewObjArrayArgs = BAD_VAR_NUM;
lvaGSSecurityCookie = BAD_VAR_NUM;
#ifdef TARGET_ARM64
lvaFfrRegister = BAD_VAR_NUM;
#endif
#ifdef TARGET_X86
lvaVarargsBaseOfStkArgs = BAD_VAR_NUM;
#endif // TARGET_X86
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,9 @@ GenTree* Lowering::LowerNode(GenTree* node)
{
return newNode;
}
#ifdef TARGET_ARM64
m_ffrTrashed = true;
#endif
}
break;

Expand Down Expand Up @@ -7902,6 +7905,7 @@ void Lowering::LowerBlock(BasicBlock* block)
m_block = block;
#ifdef TARGET_ARM64
m_blockIndirs.Reset();
m_ffrTrashed = true;
#endif

// NOTE: some of the lowering methods insert calls before the node being
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,8 @@ class Lowering final : public Phase
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
void LowerModPow2(GenTree* node);
bool TryLowerAddForPossibleContainment(GenTreeOp* node, GenTree** next);
void CreateFFRDefIfNeeded(GenTreeHWIntrinsic* node);
void StoreFFRValue(GenTreeHWIntrinsic* node);
#endif // !TARGET_XARCH && !TARGET_ARM64
GenTree* InsertNewSimdCreateScalarUnsafeNode(var_types type,
GenTree* op1,
Expand Down Expand Up @@ -629,6 +631,7 @@ class Lowering final : public Phase
}
};
ArrayStack<SavedIndir> m_blockIndirs;
bool m_ffrTrashed;
#endif
};

Expand Down
Loading
Loading