@@ -1071,6 +1071,83 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
1071
1071
1072
1072
switch (intrinsic)
1073
1073
{
1074
+ case NI_AVX_Extract:
1075
+ {
1076
+ // Avx.Extract executes software implementation when the imm8 argument is not complie-time constant
1077
+ assert (!mustExpand);
1078
+
1079
+ GenTree* lastOp = impPopStack ().val ;
1080
+ GenTree* vectorOp = impSIMDPopStack (TYP_SIMD32);
1081
+ assert (lastOp->IsCnsIntOrI ());
1082
+ int ival = (int )lastOp->AsIntCon ()->IconValue ();
1083
+ baseType = getBaseTypeOfSIMDType (info.compCompHnd ->getArgClass (sig, sig->args ));
1084
+ var_types retType = JITtype2varType (sig->retType );
1085
+ assert (varTypeIsArithmetic (baseType));
1086
+
1087
+ ival = ival & (32 / genTypeSize (baseType) - 1 ); // clear the unused bits
1088
+ int halfIndex = 16 / genTypeSize (baseType);
1089
+ NamedIntrinsic extractIntrinsic = varTypeIsShort (baseType) ? NI_SSE2_Extract : NI_SSE41_Extract;
1090
+ GenTree* half = nullptr ;
1091
+
1092
+ if (ival >= halfIndex)
1093
+ {
1094
+ half = gtNewSimdHWIntrinsicNode (TYP_SIMD16, vectorOp, gtNewIconNode (1 ), NI_AVX_ExtractVector128,
1095
+ baseType, 32 );
1096
+ ival -= halfIndex;
1097
+ }
1098
+ else
1099
+ {
1100
+ half = gtNewSimdHWIntrinsicNode (TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32 );
1101
+ }
1102
+
1103
+ retNode = gtNewSimdHWIntrinsicNode (retType, half, gtNewIconNode (ival), extractIntrinsic, baseType, 16 );
1104
+ break ;
1105
+ }
1106
+
1107
+ case NI_AVX_Insert:
1108
+ {
1109
+ // Avx.Extract executes software implementation when the imm8 argument is not complie-time constant
1110
+ assert (!mustExpand);
1111
+
1112
+ GenTree* lastOp = impPopStack ().val ;
1113
+ GenTree* dataOp = impPopStack ().val ;
1114
+ GenTree* vectorOp = impSIMDPopStack (TYP_SIMD32);
1115
+ assert (lastOp->IsCnsIntOrI ());
1116
+ int ival = (int )lastOp->AsIntCon ()->IconValue ();
1117
+ baseType = getBaseTypeOfSIMDType (sig->retTypeSigClass );
1118
+ assert (varTypeIsArithmetic (baseType));
1119
+
1120
+ ival = ival & (32 / genTypeSize (baseType) - 1 ); // clear the unused bits
1121
+ int halfIndex = 16 / genTypeSize (baseType);
1122
+ NamedIntrinsic insertIntrinsic = varTypeIsShort (baseType) ? NI_SSE2_Insert : NI_SSE41_Insert;
1123
+
1124
+ GenTree* clonedVectorOp;
1125
+ vectorOp =
1126
+ impCloneExpr (vectorOp, &clonedVectorOp, info.compCompHnd ->getArgClass (sig, sig->args ),
1127
+ (unsigned )CHECK_SPILL_ALL, nullptr DEBUGARG (" AVX Insert clones the vector operand" ));
1128
+
1129
+ if (ival >= halfIndex)
1130
+ {
1131
+ GenTree* halfVector = gtNewSimdHWIntrinsicNode (TYP_SIMD16, vectorOp, gtNewIconNode (1 ),
1132
+ NI_AVX_ExtractVector128, baseType, 32 );
1133
+ GenTree* ModifiedHalfVector =
1134
+ gtNewSimdHWIntrinsicNode (TYP_SIMD16, halfVector, dataOp, gtNewIconNode (ival - halfIndex),
1135
+ insertIntrinsic, baseType, 16 );
1136
+ retNode = gtNewSimdHWIntrinsicNode (TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode (1 ),
1137
+ NI_AVX_InsertVector128, baseType, 32 );
1138
+ }
1139
+ else
1140
+ {
1141
+ GenTree* halfVector = gtNewSimdHWIntrinsicNode (TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32 );
1142
+ GenTree* ModifiedHalfVector =
1143
+ gtNewSimdHWIntrinsicNode (TYP_SIMD32, halfVector, dataOp, gtNewIconNode (ival), insertIntrinsic,
1144
+ baseType, 16 );
1145
+ retNode = gtNewSimdHWIntrinsicNode (TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode (15 ),
1146
+ NI_AVX_Blend, TYP_FLOAT, 32 );
1147
+ }
1148
+ break ;
1149
+ }
1150
+
1074
1151
case NI_AVX_ExtractVector128:
1075
1152
case NI_AVX2_ExtractVector128:
1076
1153
{
0 commit comments