Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm64: Implement LoadVector64x* and LoadVector128x* APIs #92855

Merged
merged 19 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,19 @@ unsigned GenTree::GetMultiRegCount(Compiler* comp) const
return 1;
}

#ifdef TARGET_ARM64
//-----------------------------------------------------------------------------------
// NeedsConsecutiveRegisters: Checks if this tree node needs consecutive registers
//
// Return Value:
// Returns if the tree needs consecutive registers.
//
bool GenTree::NeedsConsecutiveRegisters() const
{
return HWIntrinsicInfo::NeedsConsecutiveRegisters(AsHWIntrinsic()->GetHWIntrinsicId());
}
#endif

//---------------------------------------------------------------
// gtGetContainedRegMask: Get the reg mask of the node including
// contained nodes (recursive).
Expand Down Expand Up @@ -3438,7 +3451,7 @@ unsigned Compiler::gtHashValue(GenTree* tree)
hash += tree->AsHWIntrinsic()->GetSimdBaseType();
hash += tree->AsHWIntrinsic()->GetSimdSize();
hash += tree->AsHWIntrinsic()->GetAuxiliaryType();
hash += tree->AsHWIntrinsic()->GetOtherReg();
hash += tree->AsHWIntrinsic()->GetRegByIndex(1);
break;
#endif // FEATURE_HW_INTRINSICS

Expand Down Expand Up @@ -25538,11 +25551,24 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
case NI_AdvSimd_Arm64_LoadPairScalarVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector64:
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
case NI_AdvSimd_LoadVector64x2:
return compiler->typGetBlkLayout(16);

case NI_AdvSimd_Arm64_LoadPairVector128:
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
case NI_AdvSimd_Arm64_LoadVector128x2:
case NI_AdvSimd_LoadVector64x4:
return compiler->typGetBlkLayout(32);

case NI_AdvSimd_LoadVector64x3:
return compiler->typGetBlkLayout(24);

case NI_AdvSimd_Arm64_LoadVector128x3:
return compiler->typGetBlkLayout(48);

case NI_AdvSimd_Arm64_LoadVector128x4:
return compiler->typGetBlkLayout(64);

#endif // TARGET_ARM64

default:
Expand Down Expand Up @@ -25579,7 +25605,7 @@ void GenTreeHWIntrinsic::SetHWIntrinsicId(NamedIntrinsic intrinsicId)
{
return (op1->TypeGet() == op2->TypeGet()) && (op1->GetHWIntrinsicId() == op2->GetHWIntrinsicId()) &&
(op1->GetSimdBaseType() == op2->GetSimdBaseType()) && (op1->GetSimdSize() == op2->GetSimdSize()) &&
(op1->GetAuxiliaryType() == op2->GetAuxiliaryType()) && (op1->GetOtherReg() == op2->GetOtherReg()) &&
(op1->GetAuxiliaryType() == op2->GetAuxiliaryType()) && (op1->GetRegByIndex(1) == op2->GetRegByIndex(1)) &&
OperandsAreEqual(op1, op2);
}

Expand Down
67 changes: 60 additions & 7 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1836,6 +1836,10 @@ struct GenTree
// Sets the GTF flag equivalent for the regIndex'th register of a multi-reg node.
void SetRegSpillFlagByIdx(GenTreeFlags flags, int regIndex);

#ifdef TARGET_ARM64
bool NeedsConsecutiveRegisters() const;
#endif

// Last-use information for either GenTreeLclVar or GenTreeCopyOrReload nodes.
private:
GenTreeFlags GetLastUseBit(int regIndex) const;
Expand Down Expand Up @@ -3646,7 +3650,7 @@ struct GenTreeLclVar : public GenTreeLclVarCommon
}
else
{
gtOtherReg[regIndex - 1] = regNumberSmall(reg);
gtOtherReg[regIndex - 1] = (regNumberSmall)reg;
}
}

Expand Down Expand Up @@ -6085,15 +6089,66 @@ struct GenTreeJitIntrinsic : public GenTreeMultiOp
NamedIntrinsic gtHWIntrinsicId;

public:
regNumber GetOtherReg() const
//-----------------------------------------------------------
// GetRegNumByIdx: Get regNumber of i'th position.
//
// Arguments:
// idx - register position.
//
// Return Value:
// Returns regNumber assigned to i'th position.
//
regNumber GetRegNumByIdx(unsigned idx) const
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
{
#ifdef TARGET_ARM64
assert(idx < MAX_MULTIREG_COUNT);

if (idx == 0)
{
return GetRegNum();
}

if (NeedsConsecutiveRegisters())
{
assert(IsMultiRegNode());
return (regNumber)(GetRegNum() + idx);
}
#endif
// should only be used to get otherReg
assert(idx == 1);
return (regNumber)gtOtherReg;
}

void SetOtherReg(regNumber reg)
//-----------------------------------------------------------
// SetRegNumByIdx: Set the regNumber for i'th position.
//
// Arguments:
// reg - reg number
// idx - register position.
//
// Return Value:
// None.
//
void SetRegNumByIdx(regNumber reg, unsigned idx)
{
#ifdef TARGET_ARM64
assert(idx < MAX_MULTIREG_COUNT);

if (idx == 0)
{
SetRegNum(reg);
return;
}
if (NeedsConsecutiveRegisters())
{
assert(IsMultiRegNode());
assert(reg == (regNumber)(GetRegNum() + idx));
return;
}
#endif
// should only be used to get otherReg
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
assert(idx == 1);
gtOtherReg = (regNumberSmall)reg;
assert(gtOtherReg == reg);
}

GenTreeFlags GetRegSpillFlagByIdx(unsigned idx) const
Expand Down Expand Up @@ -9323,9 +9378,7 @@ inline regNumber GenTree::GetRegByIndex(int regIndex) const
#ifdef FEATURE_HW_INTRINSICS
if (OperIs(GT_HWINTRINSIC))
{
assert(regIndex == 1);
// TODO-ARM64-NYI: Support hardware intrinsics operating on multiple contiguous registers.
return AsHWIntrinsic()->GetOtherReg();
return AsHWIntrinsic()->GetRegNumByIdx(regIndex);
}
#endif // FEATURE_HW_INTRINSICS

Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,17 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector128:
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_Arm64_LoadVector128x2:
return 2;

case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_Arm64_LoadVector128x3:
return 3;

case NI_AdvSimd_LoadVector64x4:
case NI_AdvSimd_Arm64_LoadVector128x4:
return 4;
#endif

#ifdef TARGET_XARCH
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1867,6 +1867,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_LoadVector64x4:
case NI_AdvSimd_Arm64_LoadVector128x2:
case NI_AdvSimd_Arm64_LoadVector128x3:
case NI_AdvSimd_Arm64_LoadVector128x4:
info.compNeedsConsecutiveRegisters = true;
FALLTHROUGH;
case NI_AdvSimd_Arm64_LoadPairScalarVector64:
case NI_AdvSimd_Arm64_LoadPairScalarVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector128:
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -732,12 +732,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector64:
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, node->GetOtherReg(), op1Reg);
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, node->GetRegByIndex(1), op1Reg);
break;

case NI_AdvSimd_Arm64_LoadPairScalarVector64:
case NI_AdvSimd_Arm64_LoadPairScalarVector64NonTemporal:
GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), targetReg, node->GetOtherReg(), op1Reg);
GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), targetReg, node->GetRegByIndex(1),
op1Reg);
break;

case NI_AdvSimd_StoreSelectedScalar:
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,9 @@ HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64,
HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector128, 16, 1, true, {INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_ld1r, INS_invalid, INS_invalid, INS_ld1r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64, 8, 1, true, {INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AdvSimd, LoadVector128, 16, 1, true, {INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x2, 8, 1, true, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_invalid, INS_invalid, INS_ld2, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x3, 8, 1, true, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_invalid, INS_invalid, INS_ld3, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x4, 8, 1, true, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_invalid, INS_invalid, INS_ld4, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, Max, -1, 2, true, {INS_smax, INS_umax, INS_smax, INS_umax, INS_smax, INS_umax, INS_invalid, INS_invalid, INS_fmax, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AdvSimd, MaxNumber, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnm, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AdvSimd, MaxNumberScalar, 8, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnm, INS_fmaxnm}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar)
Expand Down Expand Up @@ -573,6 +576,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector64,
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector64NonTemporal, 8, 1, true, {INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector128, 16, 1, true, {INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp, INS_ldp}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadPairVector128NonTemporal, 16, 1, true, {INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp, INS_ldnp}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiReg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x2, 16, 1, true, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x3, 16, 1, true, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x4, 16, 1, true, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, Max, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmax}, HW_Category_SIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxAcross, -1, 1, true, {INS_smaxv, INS_umaxv, INS_smaxv, INS_umaxv, INS_smaxv, INS_umaxv, INS_invalid, INS_invalid, INS_fmaxv, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AdvSimd_Arm64, MaxNumber, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmaxnm}, HW_Category_SIMD, HW_Flag_Commutative)
Expand Down
4 changes: 1 addition & 3 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,7 @@ void lsraAssignRegToTree(GenTree* tree, regNumber reg, unsigned regIdx)
#ifdef FEATURE_HW_INTRINSICS
else if (tree->OperIs(GT_HWINTRINSIC))
{
assert(regIdx == 1);
// TODO-ARM64-NYI: Support hardware intrinsics operating on multiple contiguous registers.
tree->AsHWIntrinsic()->SetOtherReg(reg);
tree->AsHWIntrinsic()->SetRegNumByIdx(reg, regIdx);
}
#endif // FEATURE_HW_INTRINSICS
else if (tree->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR))
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -2011,6 +2011,7 @@ class LinearScan : public LinearScanInterface
int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount);
#ifdef TARGET_ARM64
int BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwNode = nullptr);
void BuildConsecutiveRegistersForDef(GenTree* treeNode, int fieldCount);
#endif // TARGET_ARM64
#endif // FEATURE_HW_INTRINSICS

Expand Down
Loading
Loading