Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

'cmeq' and 'fcmeq' Vector64<T>.Zero/Vector128<T>.Zero ARM64 containment optimizations #62933

Merged
merged 37 commits into from
Jan 20, 2022
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
7aca5dc
Initial work
TIHan Dec 16, 2021
2752913
Added a comma to display
TIHan Dec 16, 2021
526e6c8
Cleanup
TIHan Dec 16, 2021
d850426
Fixing build
TIHan Dec 17, 2021
0ee6450
More cleanup
TIHan Dec 17, 2021
cb5a82e
Update comment
TIHan Dec 17, 2021
6ae9a94
Update comment
TIHan Dec 17, 2021
5c1997f
Added CompareEqual Vector64/128 with Zero tests
TIHan Jan 4, 2022
b33c72c
Merge remote-tracking branch 'upstream/main' into vector-64-128-zero-…
TIHan Jan 4, 2022
fd19fdc
Do not contain op1 for now
TIHan Jan 4, 2022
178ff52
Wrong intrinsic id used
TIHan Jan 5, 2022
c0a23c0
Removing generated tests
TIHan Jan 6, 2022
ad780ea
Removing generated tests
TIHan Jan 6, 2022
83c26ab
Added CompareEqual tests
TIHan Jan 6, 2022
9f7e7da
Supporting containment for first operand
TIHan Jan 6, 2022
1e67415
Fix test build
TIHan Jan 6, 2022
25b9d92
Passing correct register
TIHan Jan 6, 2022
c0dc7e4
Check IsVectorZero before not allocing a register
TIHan Jan 7, 2022
a718944
Update comment
TIHan Jan 7, 2022
1f45fa2
Fixing test
TIHan Jan 7, 2022
cb872da
Minor format change
TIHan Jan 7, 2022
c0708d7
Fixed formatting
TIHan Jan 7, 2022
bf49d11
Renamed test
TIHan Jan 8, 2022
bc7a557
Adding AdvSimd_Arm64 tests:
TIHan Jan 10, 2022
5939f36
Adding support for rest of 'cmeq' and 'fcmeq' instructions
TIHan Jan 10, 2022
6cd0ea8
Removing github csproj
TIHan Jan 10, 2022
2b30421
Minor test fix
TIHan Jan 10, 2022
0828de6
Fixed tests
TIHan Jan 10, 2022
fa43d19
Fix print
TIHan Jan 10, 2022
911f929
Minor format change
TIHan Jan 10, 2022
b91be1e
Fixing test
TIHan Jan 11, 2022
c1a90b4
Added some emitter tests
TIHan Jan 18, 2022
32f86c1
Feedback
TIHan Jan 19, 2022
b08f552
Update emitarm64.cpp
TIHan Jan 19, 2022
c89e47b
Feedback
TIHan Jan 19, 2022
956e50a
Merge branch 'vector-64-128-zero-arm64-opts1' of github.com:TIHan/run…
TIHan Jan 19, 2022
2b526c9
Merge remote-tracking branch 'upstream' into vector-64-128-zero-arm64…
TIHan Jan 19, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1657,7 +1657,7 @@ void CodeGen::genConsumeRegs(GenTree* tree)
#ifdef FEATURE_SIMD
// (In)Equality operation that produces bool result, when compared
// against Vector zero, marks its Vector Zero operand as contained.
assert(tree->OperIsLeaf() || tree->IsSIMDZero());
assert(tree->OperIsLeaf() || tree->IsSIMDZero() || tree->IsVectorZero());
TIHan marked this conversation as resolved.
Show resolved Hide resolved
TIHan marked this conversation as resolved.
Show resolved Hide resolved
#else
assert(tree->OperIsLeaf());
#endif
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12990,6 +12990,11 @@ void emitter::emitDispIns(
emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
}
if (ins == INS_cmeq)
{
printf(", ");
emitDispFloatZero();
}
break;

case IF_DV_2N: // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar)
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17807,6 +17807,20 @@ bool GenTree::isContainableHWIntrinsic() const
return true;
}

default:
{
return false;
}
}
#elif TARGET_ARM64
switch (AsHWIntrinsic()->GetHWIntrinsicId())
{
case NI_Vector64_get_Zero:
case NI_Vector128_get_Zero:
TIHan marked this conversation as resolved.
Show resolved Hide resolved
{
return true;
}

default:
{
return false;
Expand Down
77 changes: 75 additions & 2 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1730,6 +1730,8 @@ struct GenTree
inline bool IsIntegralConst(ssize_t constVal) const;
inline bool IsIntegralConstVector(ssize_t constVal) const;
inline bool IsSIMDZero() const;
inline bool IsFloatPositiveZero() const;
inline bool IsVectorZero() const;

inline bool IsBoxedValue();

Expand Down Expand Up @@ -2132,7 +2134,7 @@ struct GenTree

inline bool IsCnsFltOrDbl() const;

inline bool IsCnsNonZeroFltOrDbl();
inline bool IsCnsNonZeroFltOrDbl() const;

bool IsIconHandle() const
{
Expand Down Expand Up @@ -7689,6 +7691,77 @@ inline bool GenTree::IsSIMDZero() const
return false;
}

//-------------------------------------------------------------------
// IsFloatPositiveZero: returns true if this is exactly a const float value of postive zero (+0.0)
//
// Returns:
// True if this represents a const floating-point value of exactly positive zero (+0.0).
// Will return false if the value is negative zero (-0.0).
//
inline bool GenTree::IsFloatPositiveZero() const
{
return !(IsCnsNonZeroFltOrDbl());
}

//-------------------------------------------------------------------
// IsVectorZero: returns true if this is an integral or floating-point (SIMD or HW intrinsic) vector
// with all its elements equal to zero.
//
// Returns:
// True if this represents an integral or floating-point const (SIMD or HW intrinsic) vector with all its elements equal to zero.
//
// TODO: We already have IsSIMDZero() and IsIntegralConstVector(0),
// however, IsSIMDZero() does not cover hardware intrinsics, and IsIntegralConstVector(0) does not cover floating
// point. In order to not risk adverse behaviour by modifying those, this function 'IsVectorZero' was introduced.
// At some point, it makes sense to normalize this logic to be a single function call rather than have several
// separate ones; preferably this one.
inline bool GenTree::IsVectorZero() const
{
#ifdef FEATURE_SIMD
if (gtOper == GT_SIMD)
{
const GenTreeSIMD* node = AsSIMD();

if (node->GetSIMDIntrinsicId() == SIMDIntrinsicInit)
{
return (node->Op(1)->IsIntegralConst(0) || node->Op(1)->IsFloatPositiveZero());
}
}
#endif

#ifdef FEATURE_HW_INTRINSICS
if (gtOper == GT_HWINTRINSIC)
{
const GenTreeHWIntrinsic* node = AsHWIntrinsic();
const var_types simdBaseType = node->GetSimdBaseType();

if (varTypeIsIntegral(simdBaseType) || varTypeIsFloating(simdBaseType))
TIHan marked this conversation as resolved.
Show resolved Hide resolved
{
const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();

if (node->GetOperandCount() == 0)
{
#if defined(TARGET_XARCH)
return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero);
#elif defined(TARGET_ARM64)
return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero);
#endif // !TARGET_XARCH && !TARGET_ARM64
}
else if ((node->GetOperandCount() == 1) && (node->Op(1)->IsIntegralConst(0) || node->Op(1)->IsFloatPositiveZero()))
{
#if defined(TARGET_XARCH)
return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create);
TIHan marked this conversation as resolved.
Show resolved Hide resolved
#elif defined(TARGET_ARM64)
return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create);
#endif // !TARGET_XARCH && !TARGET_ARM64
}
}
}
#endif // FEATURE_HW_INTRINSICS

return false;
}

inline bool GenTree::IsBoxedValue()
{
assert(gtOper != GT_BOX || AsBox()->BoxOp() != nullptr);
Expand Down Expand Up @@ -8353,7 +8426,7 @@ inline bool GenTree::IsCnsFltOrDbl() const
return OperGet() == GT_CNS_DBL;
}

inline bool GenTree::IsCnsNonZeroFltOrDbl()
inline bool GenTree::IsCnsNonZeroFltOrDbl() const
{
if (OperGet() == GT_CNS_DBL)
{
Expand Down
21 changes: 21 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,27 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
break;

case NI_AdvSimd_CompareEqual:
if (intrin.op1->isContained())
{
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
assert(intrin.op1->IsVectorZero());

GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
}
else if (intrin.op2->isContained())
{
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
assert(intrin.op2->IsVectorZero());

GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
}
else
{
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
}
break;

case NI_AdvSimd_AbsoluteCompareLessThan:
case NI_AdvSimd_AbsoluteCompareLessThanOrEqual:
case NI_AdvSimd_CompareLessThan:
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ HARDWARE_INTRINSIC(AdvSimd, BitwiseClear,
HARDWARE_INTRINSIC(AdvSimd, BitwiseSelect, -1, 3, {INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd, Ceiling, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, CeilingScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative)
TIHan marked this conversation as resolved.
Show resolved Hide resolved
HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, CompareLessThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/instrsarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ INST4(neg, "neg", 0, IF_EN4G, 0x4B0003E0, 0x4B0003E0,
INST4(cmeq, "cmeq", 0, IF_EN4H, 0x7EE08C00, 0x2E208C00, 0x5E209800, 0x0E209800)
// cmeq Vd,Vn,Vm DV_3E 01111110111mmmmm 100011nnnnnddddd 7EE0 8C00 Vd,Vn,Vm (scalar)
// cmeq Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 100011nnnnnddddd 2E20 8C00 Vd,Vn,Vm (vector)
// cmeq Vd,Vn DV_2L 01011110XX100000 100110nnnnnddddd 5E20 9800 Vd,Vn (scalar)
// cmeq Vd,Vn DV_2M 0Q001110XX100000 100110nnnnnddddd 0E20 9800 Vd,Vn (vector)
// cmeq Vd,Vn DV_2L 01011110XX100000 100110nnnnnddddd 5E20 9800 Vd,Vn,#0 (scalar - with zero)
// cmeq Vd,Vn DV_2M 0Q001110XX100000 100110nnnnnddddd 0E20 9800 Vd,Vn,#0 (vector - with zero)

INST4(cmge, "cmge", 0, IF_EN4H, 0x5EE03C00, 0x0E203C00, 0x7E208800, 0x2E208800)
// cmge Vd,Vn,Vm DV_3E 01011110111mmmmm 001111nnnnnddddd 5EE0 3C00 Vd,Vn,Vm (scalar)
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1853,6 +1853,19 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
MakeSrcContained(node, intrin.op4);
break;

case NI_AdvSimd_CompareEqual:
{
if (intrin.op1->IsVectorZero())
{
MakeSrcContained(node, intrin.op1);
}
else if (intrin.op2->IsVectorZero())
{
MakeSrcContained(node, intrin.op2);
}
break;
}

case NI_Vector64_CreateScalarUnsafe:
case NI_Vector128_CreateScalarUnsafe:
case NI_AdvSimd_DuplicateToVector64:
Expand Down
77 changes: 46 additions & 31 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1011,40 +1011,49 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)

if (intrin.op1 != nullptr)
{
bool simdRegToSimdRegMove = false;

if ((intrin.id == NI_Vector64_CreateScalarUnsafe) || (intrin.id == NI_Vector128_CreateScalarUnsafe))
{
simdRegToSimdRegMove = varTypeIsFloating(intrin.op1);
}
else if (intrin.id == NI_AdvSimd_Arm64_DuplicateToVector64)
// Do not give a register to the second operand if it is contained.
if (!hasImmediateOperand && intrin.op1->isContained())
TIHan marked this conversation as resolved.
Show resolved Hide resolved
{
simdRegToSimdRegMove = (intrin.op1->TypeGet() == TYP_DOUBLE);
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
assert(intrin.op1->IsVectorZero());
}
else if ((intrin.id == NI_Vector64_ToScalar) || (intrin.id == NI_Vector128_ToScalar))
else
{
simdRegToSimdRegMove = varTypeIsFloating(intrinsicTree);
}
bool simdRegToSimdRegMove = false;
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

// If we have an RMW intrinsic or an intrinsic with simple move semantic between two SIMD registers,
// we want to preference op1Reg to the target if op1 is not contained.
if (isRMW || simdRegToSimdRegMove)
{
tgtPrefOp1 = !intrin.op1->isContained();
}
if ((intrin.id == NI_Vector64_CreateScalarUnsafe) || (intrin.id == NI_Vector128_CreateScalarUnsafe))
{
simdRegToSimdRegMove = varTypeIsFloating(intrin.op1);
}
else if (intrin.id == NI_AdvSimd_Arm64_DuplicateToVector64)
{
simdRegToSimdRegMove = (intrin.op1->TypeGet() == TYP_DOUBLE);
}
else if ((intrin.id == NI_Vector64_ToScalar) || (intrin.id == NI_Vector128_ToScalar))
{
simdRegToSimdRegMove = varTypeIsFloating(intrinsicTree);
}

if (intrinsicTree->OperIsMemoryLoadOrStore())
{
srcCount += BuildAddrUses(intrin.op1);
}
else if (tgtPrefOp1)
{
tgtPrefUse = BuildUse(intrin.op1);
srcCount++;
}
else
{
srcCount += BuildOperandUses(intrin.op1);
// If we have an RMW intrinsic or an intrinsic with simple move semantic between two SIMD registers,
// we want to preference op1Reg to the target if op1 is not contained.
if (isRMW || simdRegToSimdRegMove)
{
tgtPrefOp1 = !intrin.op1->isContained();
}

if (intrinsicTree->OperIsMemoryLoadOrStore())
{
srcCount += BuildAddrUses(intrin.op1);
}
else if (tgtPrefOp1)
{
tgtPrefUse = BuildUse(intrin.op1);
srcCount++;
}
else
{
srcCount += BuildOperandUses(intrin.op1);
TIHan marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

Expand Down Expand Up @@ -1091,9 +1100,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
}
}
}
else
else if (intrin.op2 != nullptr)
TIHan marked this conversation as resolved.
Show resolved Hide resolved
{
if (intrin.op2 != nullptr)
// Do not give a register to the second operand if it is contained.
if (!hasImmediateOperand && intrin.op2->isContained())
{
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
assert(intrin.op2->IsVectorZero());
}
else
{
// RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg
// (i.e. a register that corresponds to read-modify-write operand) and one of them is the last use.
Expand Down
Loading