Skip to content

Commit d057ab9

Browse files
committed
Reduce the amount of copying required to evaluated vector constants
1 parent 1cca48e commit d057ab9

File tree

3 files changed

+83
-153
lines changed

3 files changed

+83
-153
lines changed

src/coreclr/jit/simd.h

+9-8
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
415415
}
416416

417417
template <typename TSimd, typename TBase>
418-
void EvaluateUnarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0)
418+
void EvaluateUnarySimd(genTreeOps oper, bool scalar, TSimd* result, const TSimd& arg0)
419419
{
420420
uint32_t count = sizeof(TSimd) / sizeof(TBase);
421421

@@ -445,7 +445,7 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0)
445445
}
446446

447447
template <typename TSimd>
448-
void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* result, TSimd arg0)
448+
void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* result, const TSimd& arg0)
449449
{
450450
switch (baseType)
451451
{
@@ -725,7 +725,7 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
725725
}
726726

727727
template <typename TSimd, typename TBase>
728-
void EvaluateBinarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0, TSimd arg1)
728+
void EvaluateBinarySimd(genTreeOps oper, bool scalar, TSimd* result, const TSimd& arg0, const TSimd& arg1)
729729
{
730730
uint32_t count = sizeof(TSimd) / sizeof(TBase);
731731

@@ -758,7 +758,8 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0,
758758
}
759759

760760
template <typename TSimd>
761-
void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* result, TSimd arg0, TSimd arg1)
761+
void EvaluateBinarySimd(
762+
genTreeOps oper, bool scalar, var_types baseType, TSimd* result, const TSimd& arg0, const TSimd& arg1)
762763
{
763764
switch (baseType)
764765
{
@@ -830,7 +831,7 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
830831
}
831832

832833
template <typename TSimd>
833-
double EvaluateGetElementFloating(var_types simdBaseType, TSimd arg0, int32_t arg1)
834+
double EvaluateGetElementFloating(var_types simdBaseType, const TSimd& arg0, int32_t arg1)
834835
{
835836
switch (simdBaseType)
836837
{
@@ -852,7 +853,7 @@ double EvaluateGetElementFloating(var_types simdBaseType, TSimd arg0, int32_t ar
852853
}
853854

854855
template <typename TSimd>
855-
int64_t EvaluateGetElementIntegral(var_types simdBaseType, TSimd arg0, int32_t arg1)
856+
int64_t EvaluateGetElementIntegral(var_types simdBaseType, const TSimd& arg0, int32_t arg1)
856857
{
857858
switch (simdBaseType)
858859
{
@@ -904,7 +905,7 @@ int64_t EvaluateGetElementIntegral(var_types simdBaseType, TSimd arg0, int32_t a
904905
}
905906

906907
template <typename TSimd>
907-
void EvaluateWithElementFloating(var_types simdBaseType, TSimd* result, TSimd arg0, int32_t arg1, double arg2)
908+
void EvaluateWithElementFloating(var_types simdBaseType, TSimd* result, const TSimd& arg0, int32_t arg1, double arg2)
908909
{
909910
*result = arg0;
910911

@@ -930,7 +931,7 @@ void EvaluateWithElementFloating(var_types simdBaseType, TSimd* result, TSimd ar
930931
}
931932

932933
template <typename TSimd>
933-
void EvaluateWithElementIntegral(var_types simdBaseType, TSimd* result, TSimd arg0, int32_t arg1, int64_t arg2)
934+
void EvaluateWithElementIntegral(var_types simdBaseType, TSimd* result, const TSimd& arg0, int32_t arg1, int64_t arg2)
934935
{
935936
*result = arg0;
936937

src/coreclr/jit/valuenum.cpp

+56-127
Original file line numberDiff line numberDiff line change
@@ -1856,33 +1856,33 @@ ValueNum ValueNumStore::VNForByrefCon(target_size_t cnsVal)
18561856
}
18571857

18581858
#if defined(FEATURE_SIMD)
1859-
ValueNum ValueNumStore::VNForSimd8Con(simd8_t cnsVal)
1859+
ValueNum ValueNumStore::VNForSimd8Con(const simd8_t& cnsVal)
18601860
{
18611861
return VnForConst(cnsVal, GetSimd8CnsMap(), TYP_SIMD8);
18621862
}
18631863

1864-
ValueNum ValueNumStore::VNForSimd12Con(simd12_t cnsVal)
1864+
ValueNum ValueNumStore::VNForSimd12Con(const simd12_t& cnsVal)
18651865
{
18661866
return VnForConst(cnsVal, GetSimd12CnsMap(), TYP_SIMD12);
18671867
}
18681868

1869-
ValueNum ValueNumStore::VNForSimd16Con(simd16_t cnsVal)
1869+
ValueNum ValueNumStore::VNForSimd16Con(const simd16_t& cnsVal)
18701870
{
18711871
return VnForConst(cnsVal, GetSimd16CnsMap(), TYP_SIMD16);
18721872
}
18731873

18741874
#if defined(TARGET_XARCH)
1875-
ValueNum ValueNumStore::VNForSimd32Con(simd32_t cnsVal)
1875+
ValueNum ValueNumStore::VNForSimd32Con(const simd32_t& cnsVal)
18761876
{
18771877
return VnForConst(cnsVal, GetSimd32CnsMap(), TYP_SIMD32);
18781878
}
18791879

1880-
ValueNum ValueNumStore::VNForSimd64Con(simd64_t cnsVal)
1880+
ValueNum ValueNumStore::VNForSimd64Con(const simd64_t& cnsVal)
18811881
{
18821882
return VnForConst(cnsVal, GetSimd64CnsMap(), TYP_SIMD64);
18831883
}
18841884

1885-
ValueNum ValueNumStore::VNForSimdMaskCon(simdmask_t cnsVal)
1885+
ValueNum ValueNumStore::VNForSimdMaskCon(const simdmask_t& cnsVal)
18861886
{
18871887
return VnForConst(cnsVal, GetSimdMaskCnsMap(), TYP_MASK);
18881888
}
@@ -2217,70 +2217,59 @@ ValueNum ValueNumStore::VNAllBitsForType(var_types typ)
22172217
}
22182218

22192219
#ifdef FEATURE_SIMD
2220-
ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseType)
2220+
template <typename TSimd>
2221+
TSimd BroadcastConstantToSimd(ValueNumStore* vns, var_types baseType, ValueNum argVN)
22212222
{
2222-
assert(varTypeIsSIMD(simdType));
2223+
assert(vns->IsVNConstant(argVN));
2224+
assert(!varTypeIsSIMD(vns->TypeOfVN(argVN)));
22232225

2224-
simd_t simdVal = {};
2225-
int simdSize = genTypeSize(simdType);
2226+
TSimd result = {};
22262227

2227-
switch (simdBaseType)
2228+
switch (baseType)
22282229
{
2229-
case TYP_BYTE:
2230-
case TYP_UBYTE:
2230+
case TYP_FLOAT:
22312231
{
2232-
for (int i = 0; i < simdSize; i++)
2233-
{
2234-
simdVal.u8[i] = 1;
2235-
}
2232+
float arg = vns->GetConstantSingle(argVN);
2233+
BroadcastConstantToSimd<TSimd, float>(&result, arg);
22362234
break;
22372235
}
22382236

2239-
case TYP_SHORT:
2240-
case TYP_USHORT:
2237+
case TYP_DOUBLE:
22412238
{
2242-
for (int i = 0; i < (simdSize / 2); i++)
2243-
{
2244-
simdVal.u16[i] = 1;
2245-
}
2239+
double arg = vns->GetConstantDouble(argVN);
2240+
BroadcastConstantToSimd<TSimd, double>(&result, arg);
22462241
break;
22472242
}
22482243

2249-
case TYP_INT:
2250-
case TYP_UINT:
2244+
case TYP_BYTE:
2245+
case TYP_UBYTE:
22512246
{
2252-
for (int i = 0; i < (simdSize / 4); i++)
2253-
{
2254-
simdVal.u32[i] = 1;
2255-
}
2247+
uint8_t arg = static_cast<uint8_t>(vns->GetConstantInt32(argVN));
2248+
BroadcastConstantToSimd<TSimd, uint8_t>(&result, arg);
22562249
break;
22572250
}
22582251

2259-
case TYP_LONG:
2260-
case TYP_ULONG:
2252+
case TYP_SHORT:
2253+
case TYP_USHORT:
22612254
{
2262-
for (int i = 0; i < (simdSize / 8); i++)
2263-
{
2264-
simdVal.u64[i] = 1;
2265-
}
2255+
uint16_t arg = static_cast<uint16_t>(vns->GetConstantInt32(argVN));
2256+
BroadcastConstantToSimd<TSimd, uint16_t>(&result, arg);
22662257
break;
22672258
}
22682259

2269-
case TYP_FLOAT:
2260+
case TYP_INT:
2261+
case TYP_UINT:
22702262
{
2271-
for (int i = 0; i < (simdSize / 4); i++)
2272-
{
2273-
simdVal.f32[i] = 1.0f;
2274-
}
2263+
uint32_t arg = static_cast<uint32_t>(vns->GetConstantInt32(argVN));
2264+
BroadcastConstantToSimd<TSimd, uint32_t>(&result, arg);
22752265
break;
22762266
}
22772267

2278-
case TYP_DOUBLE:
2268+
case TYP_LONG:
2269+
case TYP_ULONG:
22792270
{
2280-
for (int i = 0; i < (simdSize / 8); i++)
2281-
{
2282-
simdVal.f64[i] = 1.0;
2283-
}
2271+
uint64_t arg = static_cast<uint64_t>(vns->GetConstantInt64(argVN));
2272+
BroadcastConstantToSimd<TSimd, uint64_t>(&result, arg);
22842273
break;
22852274
}
22862275

@@ -2290,42 +2279,46 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT
22902279
}
22912280
}
22922281

2282+
return result;
2283+
}
2284+
2285+
ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseType)
2286+
{
2287+
assert(varTypeIsSIMD(simdType));
2288+
2289+
ValueNum oneVN = VNOneForType(simdBaseType);
2290+
22932291
switch (simdType)
22942292
{
22952293
case TYP_SIMD8:
22962294
{
2297-
simd8_t simd8Val;
2298-
memcpy(&simd8Val, &simdVal, sizeof(simd8_t));
2299-
return VNForSimd8Con(simd8Val);
2295+
simd8_t result = BroadcastConstantToSimd<simd8_t>(this, simdBaseType, oneVN);
2296+
return VNForSimd8Con(result);
23002297
}
23012298

23022299
case TYP_SIMD12:
23032300
{
2304-
simd12_t simd12Val;
2305-
memcpy(&simd12Val, &simdVal, sizeof(simd12_t));
2306-
return VNForSimd12Con(simd12Val);
2301+
simd12_t result = BroadcastConstantToSimd<simd12_t>(this, simdBaseType, oneVN);
2302+
return VNForSimd12Con(result);
23072303
}
23082304

23092305
case TYP_SIMD16:
23102306
{
2311-
simd16_t simd16Val;
2312-
memcpy(&simd16Val, &simdVal, sizeof(simd16_t));
2313-
return VNForSimd16Con(simd16Val);
2307+
simd16_t result = BroadcastConstantToSimd<simd16_t>(this, simdBaseType, oneVN);
2308+
return VNForSimd16Con(result);
23142309
}
23152310

23162311
#if defined(TARGET_XARCH)
23172312
case TYP_SIMD32:
23182313
{
2319-
simd32_t simd32Val;
2320-
memcpy(&simd32Val, &simdVal, sizeof(simd32_t));
2321-
return VNForSimd32Con(simd32Val);
2314+
simd32_t result = BroadcastConstantToSimd<simd32_t>(this, simdBaseType, oneVN);
2315+
return VNForSimd32Con(result);
23222316
}
23232317

23242318
case TYP_SIMD64:
23252319
{
2326-
simd64_t simd64Val;
2327-
memcpy(&simd64Val, &simdVal, sizeof(simd64_t));
2328-
return VNForSimd64Con(simd64Val);
2320+
simd64_t result = BroadcastConstantToSimd<simd64_t>(this, simdBaseType, oneVN);
2321+
return VNForSimd64Con(result);
23292322
}
23302323

23312324
case TYP_MASK:
@@ -6870,71 +6863,6 @@ void ValueNumStore::SetVNIsCheckedBound(ValueNum vn)
68706863
}
68716864

68726865
#ifdef FEATURE_HW_INTRINSICS
6873-
template <typename TSimd>
6874-
TSimd BroadcastConstantToSimd(ValueNumStore* vns, var_types baseType, ValueNum argVN)
6875-
{
6876-
assert(vns->IsVNConstant(argVN));
6877-
assert(!varTypeIsSIMD(vns->TypeOfVN(argVN)));
6878-
6879-
TSimd result = {};
6880-
6881-
switch (baseType)
6882-
{
6883-
case TYP_FLOAT:
6884-
{
6885-
float arg = vns->GetConstantSingle(argVN);
6886-
BroadcastConstantToSimd<TSimd, float>(&result, arg);
6887-
break;
6888-
}
6889-
6890-
case TYP_DOUBLE:
6891-
{
6892-
double arg = vns->GetConstantDouble(argVN);
6893-
BroadcastConstantToSimd<TSimd, double>(&result, arg);
6894-
break;
6895-
}
6896-
6897-
case TYP_BYTE:
6898-
case TYP_UBYTE:
6899-
{
6900-
uint8_t arg = static_cast<uint8_t>(vns->GetConstantInt32(argVN));
6901-
BroadcastConstantToSimd<TSimd, uint8_t>(&result, arg);
6902-
break;
6903-
}
6904-
6905-
case TYP_SHORT:
6906-
case TYP_USHORT:
6907-
{
6908-
uint16_t arg = static_cast<uint16_t>(vns->GetConstantInt32(argVN));
6909-
BroadcastConstantToSimd<TSimd, uint16_t>(&result, arg);
6910-
break;
6911-
}
6912-
6913-
case TYP_INT:
6914-
case TYP_UINT:
6915-
{
6916-
uint32_t arg = static_cast<uint32_t>(vns->GetConstantInt32(argVN));
6917-
BroadcastConstantToSimd<TSimd, uint32_t>(&result, arg);
6918-
break;
6919-
}
6920-
6921-
case TYP_LONG:
6922-
case TYP_ULONG:
6923-
{
6924-
uint64_t arg = static_cast<uint64_t>(vns->GetConstantInt64(argVN));
6925-
BroadcastConstantToSimd<TSimd, uint64_t>(&result, arg);
6926-
break;
6927-
}
6928-
6929-
default:
6930-
{
6931-
unreached();
6932-
}
6933-
}
6934-
6935-
return result;
6936-
}
6937-
69386866
simd8_t GetConstantSimd8(ValueNumStore* vns, var_types baseType, ValueNum argVN)
69396867
{
69406868
assert(vns->IsVNConstant(argVN));
@@ -7126,7 +7054,7 @@ ValueNum EvaluateBinarySimd(ValueNumStore* vns,
71267054
}
71277055

71287056
template <typename TSimd>
7129-
ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types baseType, TSimd arg0, int32_t arg1)
7057+
ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types baseType, const TSimd& arg0, int32_t arg1)
71307058
{
71317059
switch (baseType)
71327060
{
@@ -7617,7 +7545,8 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(GenTreeHWIntrinsic* tree,
76177545
if (TypeOfVN(arg1VN) == TYP_SIMD16)
76187546
{
76197547
if ((ni != NI_AVX2_ShiftLeftLogicalVariable) && (ni != NI_AVX2_ShiftRightArithmeticVariable) &&
7620-
(ni != NI_AVX512F_VL_ShiftRightArithmeticVariable) && (ni != NI_AVX2_ShiftRightLogicalVariable))
7548+
(ni != NI_AVX512F_VL_ShiftRightArithmeticVariable) &&
7549+
(ni != NI_AVX10v1_ShiftRightArithmeticVariable) && (ni != NI_AVX2_ShiftRightLogicalVariable))
76217550
{
76227551
// The xarch shift instructions support taking the shift amount as
76237552
// a simd16, in which case they take the shift amount from the lower

0 commit comments

Comments
 (0)