Skip to content

Commit af22395

Browse files
committed
Lower AVX10v1 hwintrinsic in lowering and gentree.cpp for simdSize 32/16
1 parent ab2c78e commit af22395

11 files changed

+936
-224
lines changed

src/coreclr/jit/compiler.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -9502,7 +9502,8 @@ class Compiler
95029502
//
95039503
bool canUseEvexEncoding() const
95049504
{
9505-
return compOpportunisticallyDependsOn(InstructionSet_AVX512F);
9505+
return (compOpportunisticallyDependsOn(InstructionSet_AVX512F) ||
9506+
compOpportunisticallyDependsOn(InstructionSet_AVX10v1));
95069507
}
95079508

95089509
private:

src/coreclr/jit/gentree.cpp

+465-103
Large diffs are not rendered by default.

src/coreclr/jit/hwintrinsiccodegenxarch.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -1447,6 +1447,8 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic*
14471447

14481448
case NI_AVX512F_ConvertToInt32:
14491449
case NI_AVX512F_ConvertToUInt32:
1450+
case NI_AVX10v1_ConvertToInt32:
1451+
case NI_AVX10v1_ConvertToUInt32:
14501452
#if defined(TARGET_AMD64)
14511453
case NI_AVX512F_X64_ConvertToInt64:
14521454
case NI_AVX512F_X64_ConvertToUInt64:
@@ -2833,6 +2835,9 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption
28332835
case NI_AVX512F_X64_ConvertToInt64:
28342836
case NI_AVX512F_X64_ConvertToUInt64:
28352837
case NI_AVX512F_X64_ConvertToUInt64WithTruncation:
2838+
case NI_AVX10v1_ConvertToInt32:
2839+
case NI_AVX10v1_ConvertToUInt32:
2840+
case NI_AVX10v1_ConvertToUInt32WithTruncation:
28362841
{
28372842
assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT);
28382843
emitAttr attr = emitTypeSize(targetType);

src/coreclr/jit/hwintrinsiclistxarch.h

+85-85
Large diffs are not rendered by default.

src/coreclr/jit/hwintrinsicxarch.cpp

+43-7
Original file line numberDiff line numberDiff line change
@@ -1458,7 +1458,21 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
14581458
{
14591459
assert(sig->numArgs == 1);
14601460
assert(varTypeIsLong(simdBaseType));
1461-
if (IsBaselineVector512IsaSupportedOpportunistically())
1461+
if ((simdSize != 64) && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
1462+
{
1463+
if (simdSize == 32)
1464+
{
1465+
intrinsic = NI_AVX10v1_ConvertToVector256Double;
1466+
}
1467+
else
1468+
{
1469+
assert(simdSize == 16);
1470+
intrinsic = NI_AVX10v1_ConvertToVector128Double;
1471+
}
1472+
op1 = impSIMDPopStack();
1473+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
1474+
}
1475+
else if (IsBaselineVector512IsaSupportedOpportunistically())
14621476
{
14631477
if (simdSize == 64)
14641478
{
@@ -1513,7 +1527,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
15131527
assert(sig->numArgs == 1);
15141528
assert(simdBaseType == TYP_DOUBLE);
15151529

1516-
if (IsBaselineVector512IsaSupportedOpportunistically())
1530+
if (IsBaselineVector512IsaSupportedOpportunistically() ||
1531+
(simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)))
15171532
{
15181533
op1 = impSIMDPopStack();
15191534
retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize);
@@ -1528,7 +1543,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
15281543
assert(sig->numArgs == 1);
15291544
assert(simdBaseType == TYP_DOUBLE);
15301545

1531-
if (IsBaselineVector512IsaSupportedOpportunistically())
1546+
if (IsBaselineVector512IsaSupportedOpportunistically() ||
1547+
(simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)))
15321548
{
15331549
op1 = impSIMDPopStack();
15341550
retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize);
@@ -1560,6 +1576,21 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
15601576
unreached();
15611577
}
15621578
}
1579+
else if (simdBaseType == TYP_UINT && simdSize != 64 &&
1580+
compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
1581+
{
1582+
switch (simdSize)
1583+
{
1584+
case 16:
1585+
intrinsic = NI_AVX10v1_ConvertToVector128Single;
1586+
break;
1587+
case 32:
1588+
intrinsic = NI_AVX10v1_ConvertToVector256Single;
1589+
break;
1590+
default:
1591+
unreached();
1592+
}
1593+
}
15631594
else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically())
15641595
{
15651596
switch (simdSize)
@@ -1592,7 +1623,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
15921623
assert(sig->numArgs == 1);
15931624
assert(simdBaseType == TYP_FLOAT);
15941625

1595-
if (IsBaselineVector512IsaSupportedOpportunistically())
1626+
if (IsBaselineVector512IsaSupportedOpportunistically() ||
1627+
(simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)))
15961628
{
15971629
op1 = impSIMDPopStack();
15981630
retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize);
@@ -1607,7 +1639,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
16071639
assert(sig->numArgs == 1);
16081640
assert(simdBaseType == TYP_FLOAT);
16091641

1610-
if (IsBaselineVector512IsaSupportedOpportunistically())
1642+
if (IsBaselineVector512IsaSupportedOpportunistically() ||
1643+
(simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)))
16111644
{
16121645
op1 = impSIMDPopStack();
16131646
retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize);
@@ -1621,7 +1654,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
16211654
{
16221655
assert(sig->numArgs == 1);
16231656
assert(simdBaseType == TYP_DOUBLE);
1624-
if (IsBaselineVector512IsaSupportedOpportunistically())
1657+
if (IsBaselineVector512IsaSupportedOpportunistically() ||
1658+
(simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)))
16251659
{
16261660
op1 = impSIMDPopStack();
16271661
retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize);
@@ -1636,7 +1670,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
16361670
assert(sig->numArgs == 1);
16371671
assert(simdBaseType == TYP_DOUBLE);
16381672

1639-
if (IsBaselineVector512IsaSupportedOpportunistically())
1673+
if (IsBaselineVector512IsaSupportedOpportunistically() ||
1674+
(simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)))
16401675
{
16411676
op1 = impSIMDPopStack();
16421677
retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize);
@@ -3630,6 +3665,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
36303665
case NI_AVX512F_FixupScalar:
36313666
case NI_AVX512F_VL_Fixup:
36323667
case NI_AVX10v1_Fixup:
3668+
case NI_AVX10v1_FixupScalar:
36333669
{
36343670
assert(sig->numArgs == 4);
36353671

src/coreclr/jit/importercalls.cpp

+175-2
Original file line numberDiff line numberDiff line change
@@ -5225,6 +5225,10 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
52255225
{
52265226
hwIntrinsicId = NI_SSE_ConvertToInt32WithTruncation;
52275227
}
5228+
else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
5229+
{
5230+
hwIntrinsicId = NI_AVX10v1_ConvertToUInt32WithTruncation;
5231+
}
52285232
else if (IsBaselineVector512IsaSupportedOpportunistically())
52295233
{
52305234
hwIntrinsicId = NI_AVX512F_ConvertToUInt32WithTruncation;
@@ -5238,6 +5242,10 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
52385242
{
52395243
hwIntrinsicId = NI_SSE2_ConvertToInt32WithTruncation;
52405244
}
5245+
else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
5246+
{
5247+
hwIntrinsicId = NI_AVX10v1_ConvertToUInt32WithTruncation;
5248+
}
52415249
else if (IsBaselineVector512IsaSupportedOpportunistically())
52425250
{
52435251
hwIntrinsicId = NI_AVX512F_ConvertToUInt32WithTruncation;
@@ -8784,7 +8792,12 @@ GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method,
87848792
case NI_System_Math_ReciprocalEstimate:
87858793
{
87868794
#if defined(TARGET_XARCH)
8787-
if (compExactlyDependsOn(InstructionSet_AVX512F))
8795+
if (compExactlyDependsOn(InstructionSet_AVX10v1))
8796+
{
8797+
simdType = TYP_SIMD16;
8798+
intrinsicId = NI_AVX10v1_Reciprocal14Scalar;
8799+
}
8800+
else if (compExactlyDependsOn(InstructionSet_AVX512F))
87888801
{
87898802
simdType = TYP_SIMD16;
87908803
intrinsicId = NI_AVX512F_Reciprocal14Scalar;
@@ -9234,7 +9247,167 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
92349247
}
92359248

92369249
#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
9237-
if (compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
9250+
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
9251+
{
9252+
// We are constructing a chain of intrinsics similar to:
9253+
// var op1 = Vector128.CreateScalarUnsafe(x);
9254+
// var op2 = Vector128.CreateScalarUnsafe(y);
9255+
//
9256+
// var tmp = Avx10v1.RangeScalar(op1, op2, imm8);
9257+
// var tbl = Vector128.CreateScalarUnsafe(0x00);
9258+
//
9259+
// tmp = Avx10v1.FixupScalar(tmp, op2, tbl, 0x00);
9260+
// tmp = Avx10v1.FixupScalar(tmp, op1, tbl, 0x00);
9261+
//
9262+
// return tmp.ToScalar();
9263+
9264+
// RangeScalar operates by default almost as MaxNumber or MinNumber
9265+
// but, it propagates sNaN and does not propagate qNaN. So we need
9266+
// an additional fixup to ensure we propagate qNaN as well.
9267+
9268+
uint8_t imm8;
9269+
9270+
if (isMax)
9271+
{
9272+
if (isMagnitude)
9273+
{
9274+
// 0b01_11: Sign(CompareResult), Max-Abs Value
9275+
imm8 = 0x07;
9276+
}
9277+
else
9278+
{
9279+
// 0b01_01: Sign(CompareResult), Max Value
9280+
imm8 = 0x05;
9281+
}
9282+
}
9283+
else if (isMagnitude)
9284+
{
9285+
// 0b01_10: Sign(CompareResult), Min-Abs Value
9286+
imm8 = 0x06;
9287+
}
9288+
else
9289+
{
9290+
// 0b01_00: Sign(CompareResult), Min Value
9291+
imm8 = 0x04;
9292+
}
9293+
9294+
GenTree* op3 = gtNewIconNode(imm8);
9295+
GenTree* op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, impPopStack().val, callJitType, 16);
9296+
GenTree* op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, impPopStack().val, callJitType, 16);
9297+
9298+
GenTree* op2Clone;
9299+
op2 = impCloneExpr(op2, &op2Clone, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning op2 for Math.Max/Min"));
9300+
9301+
GenTree* op1Clone;
9302+
op1 = impCloneExpr(op1, &op1Clone, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning op1 for Math.Max/Min"));
9303+
9304+
GenTree* tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX10v1_RangeScalar, callJitType, 16);
9305+
9306+
// FixupScalar(left, right, table, control) computes the input type of right
9307+
// adjusts it based on the table and then returns
9308+
//
9309+
// In our case, left is going to be the result of the RangeScalar operation,
9310+
// which is either sNaN or a normal value, and right is going to be op1 or op2.
9311+
9312+
GenTree* tbl1 = gtNewVconNode(TYP_SIMD16);
9313+
GenTree* tbl2;
9314+
9315+
// We currently have (commutative)
9316+
// * snan, snan = snan
9317+
// * snan, qnan = snan
9318+
// * snan, norm = snan
9319+
// * qnan, qnan = qnan
9320+
// * qnan, norm = norm
9321+
// * norm, norm = norm
9322+
9323+
if (isNumber)
9324+
{
9325+
// We need to fixup the case of:
9326+
// * snan, norm = snan
9327+
//
9328+
// Instead, it should be:
9329+
// * snan, norm = norm
9330+
9331+
// First look at op1 and op2 using op2 as the classification
9332+
//
9333+
// If op2 is norm, we take op2 (norm)
9334+
// If op2 is nan, we take op1 ( nan or norm)
9335+
//
9336+
// Thus, if one input was norm the fixup is now norm
9337+
9338+
// QNAN: 0b0000: Preserve left
9339+
// SNAN: 0b0000
9340+
// ZERO: 0b0001: Preserve right
9341+
// +ONE: 0b0001
9342+
// -INF: 0b0001
9343+
// +INF: 0b0001
9344+
// -VAL: 0b0001
9345+
// +VAL: 0b0001
9346+
tbl1->AsVecCon()->gtSimdVal.i32[0] = 0x11111100;
9347+
9348+
// Next look at result and fixup using result as the classification
9349+
//
9350+
// If result is norm, we take the result (norm)
9351+
// If result is nan, we take the fixup ( nan or norm)
9352+
//
9353+
// Thus if either input was snan, we now have norm as expected
9354+
// Otherwise, the result was already correct
9355+
9356+
tbl1 = impCloneExpr(tbl1, &tbl2, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning tbl for Math.Max/Min"));
9357+
9358+
op1Clone = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, op2Clone, tbl1, gtNewIconNode(0),
9359+
NI_AVX10v1_FixupScalar, callJitType, 16);
9360+
9361+
tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, tmp, tbl2, gtNewIconNode(0), NI_AVX10v1_FixupScalar,
9362+
callJitType, 16);
9363+
}
9364+
else
9365+
{
9366+
// We need to fixup the case of:
9367+
// * qnan, norm = norm
9368+
//
9369+
// Instead, it should be:
9370+
// * qnan, norm = qnan
9371+
9372+
// First look at op1 and op2 using op2 as the classification
9373+
//
9374+
// If op2 is norm, we take op1 ( nan or norm)
9375+
// If op2 is snan, we take op1 ( nan or norm)
9376+
// If op2 is qnan, we take op2 (qnan)
9377+
//
9378+
// Thus, if either input was qnan the fixup is now qnan
9379+
9380+
// QNAN: 0b0001: Preserve right
9381+
// SNAN: 0b0000: Preserve left
9382+
// ZERO: 0b0000
9383+
// +ONE: 0b0000
9384+
// -INF: 0b0000
9385+
// +INF: 0b0000
9386+
// -VAL: 0b0000
9387+
// +VAL: 0b0000
9388+
tbl1->AsVecCon()->gtSimdVal.i32[0] = 0x00000001;
9389+
9390+
// Next look at result and fixup using fixup as the classification
9391+
//
9392+
// If fixup is norm, we take the result (norm)
9393+
// If fixup is sNaN, we take the result (sNaN)
9394+
// If fixup is qNaN, we take the fixup (qNaN)
9395+
//
9396+
// Thus if the fixup was qnan, we now have qnan as expected
9397+
// Otherwise, the result was already correct
9398+
9399+
tbl1 = impCloneExpr(tbl1, &tbl2, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning tbl for Math.Max/Min"));
9400+
9401+
op1Clone = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, op2Clone, tbl1, gtNewIconNode(0),
9402+
NI_AVX10v1_FixupScalar, callJitType, 16);
9403+
9404+
tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp, op1Clone, tbl2, gtNewIconNode(0), NI_AVX10v1_FixupScalar,
9405+
callJitType, 16);
9406+
}
9407+
9408+
return gtNewSimdToScalarNode(callType, tmp, callJitType, 16);
9409+
}
9410+
else if (compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
92389411
{
92399412
// We are constructing a chain of intrinsics similar to:
92409413
// var op1 = Vector128.CreateScalarUnsafe(x);

src/coreclr/jit/importervectorization.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,8 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD(
161161
GenTreeVecCon* toLowerVec2 = gtNewVconNode(simdType, (BYTE*)toLowerMask + byteLen - simdSize);
162162

163163
#if defined(TARGET_XARCH)
164-
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
164+
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL) ||
165+
compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
165166
{
166167
GenTree* control;
167168

@@ -185,7 +186,8 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD(
185186
// ((v1 ^ cns1) | (v2 ^ cns2)) == zero
186187

187188
#if defined(TARGET_XARCH)
188-
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
189+
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL) ||
190+
compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
189191
{
190192
GenTree* control;
191193

0 commit comments

Comments
 (0)