Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit a7a6953

Browse files
committed
Optimize AVX Insert/Extract intrinsics
1 parent 2205498 commit a7a6953

File tree

2 files changed

+166
-153
lines changed

2 files changed

+166
-153
lines changed

src/jit/hwintrinsicxarch.cpp

+77
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,83 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
10711071

10721072
switch (intrinsic)
10731073
{
1074+
case NI_AVX_Extract:
1075+
{
1076+
// Avx.Extract executes software implementation when the imm8 argument is not complie-time constant
1077+
assert(!mustExpand);
1078+
1079+
GenTree* lastOp = impPopStack().val;
1080+
GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
1081+
assert(lastOp->IsCnsIntOrI());
1082+
int ival = (int)lastOp->AsIntCon()->IconValue();
1083+
baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
1084+
var_types retType = JITtype2varType(sig->retType);
1085+
assert(varTypeIsArithmetic(baseType));
1086+
1087+
ival = ival & (32 / genTypeSize(baseType) - 1); // clear the unused bits
1088+
int halfIndex = 16 / genTypeSize(baseType);
1089+
NamedIntrinsic extractIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Extract : NI_SSE41_Extract;
1090+
GenTree* half = nullptr;
1091+
1092+
if (ival >= halfIndex)
1093+
{
1094+
half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
1095+
baseType, 32);
1096+
ival -= halfIndex;
1097+
}
1098+
else
1099+
{
1100+
half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32);
1101+
}
1102+
1103+
retNode = gtNewSimdHWIntrinsicNode(retType, half, gtNewIconNode(ival), extractIntrinsic, baseType, 16);
1104+
break;
1105+
}
1106+
1107+
case NI_AVX_Insert:
1108+
{
1109+
// Avx.Extract executes software implementation when the imm8 argument is not complie-time constant
1110+
assert(!mustExpand);
1111+
1112+
GenTree* lastOp = impPopStack().val;
1113+
GenTree* dataOp = impPopStack().val;
1114+
GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
1115+
assert(lastOp->IsCnsIntOrI());
1116+
int ival = (int)lastOp->AsIntCon()->IconValue();
1117+
baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1118+
assert(varTypeIsArithmetic(baseType));
1119+
1120+
ival = ival & (32 / genTypeSize(baseType) - 1); // clear the unused bits
1121+
int halfIndex = 16 / genTypeSize(baseType);
1122+
NamedIntrinsic insertIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Insert : NI_SSE41_Insert;
1123+
1124+
GenTree* clonedVectorOp;
1125+
vectorOp =
1126+
impCloneExpr(vectorOp, &clonedVectorOp, info.compCompHnd->getArgClass(sig, sig->args),
1127+
(unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("AVX Insert clones the vector operand"));
1128+
1129+
if (ival >= halfIndex)
1130+
{
1131+
GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1),
1132+
NI_AVX_ExtractVector128, baseType, 32);
1133+
GenTree* ModifiedHalfVector =
1134+
gtNewSimdHWIntrinsicNode(TYP_SIMD16, halfVector, dataOp, gtNewIconNode(ival - halfIndex),
1135+
insertIntrinsic, baseType, 16);
1136+
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(1),
1137+
NI_AVX_InsertVector128, baseType, 32);
1138+
}
1139+
else
1140+
{
1141+
GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32);
1142+
GenTree* ModifiedHalfVector =
1143+
gtNewSimdHWIntrinsicNode(TYP_SIMD32, halfVector, dataOp, gtNewIconNode(ival), insertIntrinsic,
1144+
baseType, 16);
1145+
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(15),
1146+
NI_AVX_Blend, TYP_FLOAT, 32);
1147+
}
1148+
break;
1149+
}
1150+
10741151
case NI_AVX_ExtractVector128:
10751152
case NI_AVX2_ExtractVector128:
10761153
{

0 commit comments

Comments
 (0)