@@ -5225,6 +5225,10 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
5225
5225
{
5226
5226
hwIntrinsicId = NI_SSE_ConvertToInt32WithTruncation;
5227
5227
}
5228
+ else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
5229
+ {
5230
+ hwIntrinsicId = NI_AVX10v1_ConvertToUInt32WithTruncation;
5231
+ }
5228
5232
else if (IsBaselineVector512IsaSupportedOpportunistically())
5229
5233
{
5230
5234
hwIntrinsicId = NI_AVX512F_ConvertToUInt32WithTruncation;
@@ -5238,6 +5242,10 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
5238
5242
{
5239
5243
hwIntrinsicId = NI_SSE2_ConvertToInt32WithTruncation;
5240
5244
}
5245
+ else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
5246
+ {
5247
+ hwIntrinsicId = NI_AVX10v1_ConvertToUInt32WithTruncation;
5248
+ }
5241
5249
else if (IsBaselineVector512IsaSupportedOpportunistically())
5242
5250
{
5243
5251
hwIntrinsicId = NI_AVX512F_ConvertToUInt32WithTruncation;
@@ -8784,7 +8792,12 @@ GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method,
8784
8792
case NI_System_Math_ReciprocalEstimate:
8785
8793
{
8786
8794
#if defined(TARGET_XARCH)
8787
- if (compExactlyDependsOn(InstructionSet_AVX512F))
8795
+ if (compExactlyDependsOn(InstructionSet_AVX10v1))
8796
+ {
8797
+ simdType = TYP_SIMD16;
8798
+ intrinsicId = NI_AVX10v1_Reciprocal14Scalar;
8799
+ }
8800
+ else if (compExactlyDependsOn(InstructionSet_AVX512F))
8788
8801
{
8789
8802
simdType = TYP_SIMD16;
8790
8803
intrinsicId = NI_AVX512F_Reciprocal14Scalar;
@@ -9234,7 +9247,167 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
9234
9247
}
9235
9248
9236
9249
#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
9237
- if (compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
9250
+ if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
9251
+ {
9252
+ // We are constructing a chain of intrinsics similar to:
9253
+ // var op1 = Vector128.CreateScalarUnsafe(x);
9254
+ // var op2 = Vector128.CreateScalarUnsafe(y);
9255
+ //
9256
+ // var tmp = Avx10v1.RangeScalar(op1, op2, imm8);
9257
+ // var tbl = Vector128.CreateScalarUnsafe(0x00);
9258
+ //
9259
+ // tmp = Avx10v1.FixupScalar(tmp, op2, tbl, 0x00);
9260
+ // tmp = Avx10v1.FixupScalar(tmp, op1, tbl, 0x00);
9261
+ //
9262
+ // return tmp.ToScalar();
9263
+
9264
+ // RangeScalar operates by default almost as MaxNumber or MinNumber
9265
+ // but, it propagates sNaN and does not propagate qNaN. So we need
9266
+ // an additional fixup to ensure we propagate qNaN as well.
9267
+
9268
+ uint8_t imm8;
9269
+
9270
+ if (isMax)
9271
+ {
9272
+ if (isMagnitude)
9273
+ {
9274
+ // 0b01_11: Sign(CompareResult), Max-Abs Value
9275
+ imm8 = 0x07;
9276
+ }
9277
+ else
9278
+ {
9279
+ // 0b01_01: Sign(CompareResult), Max Value
9280
+ imm8 = 0x05;
9281
+ }
9282
+ }
9283
+ else if (isMagnitude)
9284
+ {
9285
+ // 0b01_10: Sign(CompareResult), Min-Abs Value
9286
+ imm8 = 0x06;
9287
+ }
9288
+ else
9289
+ {
9290
+ // 0b01_00: Sign(CompareResult), Min Value
9291
+ imm8 = 0x04;
9292
+ }
9293
+
9294
+ GenTree* op3 = gtNewIconNode(imm8);
9295
+ GenTree* op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, impPopStack().val, callJitType, 16);
9296
+ GenTree* op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, impPopStack().val, callJitType, 16);
9297
+
9298
+ GenTree* op2Clone;
9299
+ op2 = impCloneExpr(op2, &op2Clone, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning op2 for Math.Max/Min"));
9300
+
9301
+ GenTree* op1Clone;
9302
+ op1 = impCloneExpr(op1, &op1Clone, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning op1 for Math.Max/Min"));
9303
+
9304
+ GenTree* tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX10v1_RangeScalar, callJitType, 16);
9305
+
9306
+ // FixupScalar(left, right, table, control) computes the input type of right
9307
+ // adjusts it based on the table and then returns
9308
+ //
9309
+ // In our case, left is going to be the result of the RangeScalar operation,
9310
+ // which is either sNaN or a normal value, and right is going to be op1 or op2.
9311
+
9312
+ GenTree* tbl1 = gtNewVconNode(TYP_SIMD16);
9313
+ GenTree* tbl2;
9314
+
9315
+ // We currently have (commutative)
9316
+ // * snan, snan = snan
9317
+ // * snan, qnan = snan
9318
+ // * snan, norm = snan
9319
+ // * qnan, qnan = qnan
9320
+ // * qnan, norm = norm
9321
+ // * norm, norm = norm
9322
+
9323
+ if (isNumber)
9324
+ {
9325
+ // We need to fixup the case of:
9326
+ // * snan, norm = snan
9327
+ //
9328
+ // Instead, it should be:
9329
+ // * snan, norm = norm
9330
+
9331
+ // First look at op1 and op2 using op2 as the classification
9332
+ //
9333
+ // If op2 is norm, we take op2 (norm)
9334
+ // If op2 is nan, we take op1 ( nan or norm)
9335
+ //
9336
+ // Thus, if one input was norm the fixup is now norm
9337
+
9338
+ // QNAN: 0b0000: Preserve left
9339
+ // SNAN: 0b0000
9340
+ // ZERO: 0b0001: Preserve right
9341
+ // +ONE: 0b0001
9342
+ // -INF: 0b0001
9343
+ // +INF: 0b0001
9344
+ // -VAL: 0b0001
9345
+ // +VAL: 0b0001
9346
+ tbl1->AsVecCon()->gtSimdVal.i32[0] = 0x11111100;
9347
+
9348
+ // Next look at result and fixup using result as the classification
9349
+ //
9350
+ // If result is norm, we take the result (norm)
9351
+ // If result is nan, we take the fixup ( nan or norm)
9352
+ //
9353
+ // Thus if either input was snan, we now have norm as expected
9354
+ // Otherwise, the result was already correct
9355
+
9356
+ tbl1 = impCloneExpr(tbl1, &tbl2, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning tbl for Math.Max/Min"));
9357
+
9358
+ op1Clone = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, op2Clone, tbl1, gtNewIconNode(0),
9359
+ NI_AVX10v1_FixupScalar, callJitType, 16);
9360
+
9361
+ tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, tmp, tbl2, gtNewIconNode(0), NI_AVX10v1_FixupScalar,
9362
+ callJitType, 16);
9363
+ }
9364
+ else
9365
+ {
9366
+ // We need to fixup the case of:
9367
+ // * qnan, norm = norm
9368
+ //
9369
+ // Instead, it should be:
9370
+ // * qnan, norm = qnan
9371
+
9372
+ // First look at op1 and op2 using op2 as the classification
9373
+ //
9374
+ // If op2 is norm, we take op1 ( nan or norm)
9375
+ // If op2 is snan, we take op1 ( nan or norm)
9376
+ // If op2 is qnan, we take op2 (qnan)
9377
+ //
9378
+ // Thus, if either input was qnan the fixup is now qnan
9379
+
9380
+ // QNAN: 0b0001: Preserve right
9381
+ // SNAN: 0b0000: Preserve left
9382
+ // ZERO: 0b0000
9383
+ // +ONE: 0b0000
9384
+ // -INF: 0b0000
9385
+ // +INF: 0b0000
9386
+ // -VAL: 0b0000
9387
+ // +VAL: 0b0000
9388
+ tbl1->AsVecCon()->gtSimdVal.i32[0] = 0x00000001;
9389
+
9390
+ // Next look at result and fixup using fixup as the classification
9391
+ //
9392
+ // If fixup is norm, we take the result (norm)
9393
+ // If fixup is sNaN, we take the result (sNaN)
9394
+ // If fixup is qNaN, we take the fixup (qNaN)
9395
+ //
9396
+ // Thus if the fixup was qnan, we now have qnan as expected
9397
+ // Otherwise, the result was already correct
9398
+
9399
+ tbl1 = impCloneExpr(tbl1, &tbl2, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning tbl for Math.Max/Min"));
9400
+
9401
+ op1Clone = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Clone, op2Clone, tbl1, gtNewIconNode(0),
9402
+ NI_AVX10v1_FixupScalar, callJitType, 16);
9403
+
9404
+ tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp, op1Clone, tbl2, gtNewIconNode(0), NI_AVX10v1_FixupScalar,
9405
+ callJitType, 16);
9406
+ }
9407
+
9408
+ return gtNewSimdToScalarNode(callType, tmp, callJitType, 16);
9409
+ }
9410
+ else if (compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
9238
9411
{
9239
9412
// We are constructing a chain of intrinsics similar to:
9240
9413
// var op1 = Vector128.CreateScalarUnsafe(x);
0 commit comments