@@ -143,6 +143,7 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
143
143
case INS_pminub:
144
144
case INS_pminud:
145
145
case INS_pminuw:
146
+ case INS_pmuldq:
146
147
case INS_pmulld:
147
148
case INS_pmullw:
148
149
case INS_pmuludq:
@@ -4135,6 +4136,43 @@ void emitter::emitIns_R_R_R_I(
4135
4136
emitCurIGsize += sz;
4136
4137
}
4137
4138
4139
+ static bool isAvxBlendv(instruction ins)
4140
+ {
4141
+ return ins == INS_vblendvps || ins == INS_vblendvpd || ins == INS_vpblendvb;
4142
+ }
4143
+
4144
+ static bool isSse41Blendv(instruction ins)
4145
+ {
4146
+ return ins == INS_blendvps || ins == INS_blendvpd || ins == INS_pblendvb;
4147
+ }
4148
+
4149
+ void emitter::emitIns_R_R_R_R(
4150
+ instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3)
4151
+ {
4152
+ assert(isAvxBlendv(ins));
4153
+ assert(UseVEXEncoding());
4154
+ // Currently vex prefix only use three bytes mode.
4155
+ // size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6
4156
+ // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4157
+ UNATIVE_OFFSET sz = 6;
4158
+
4159
+ // AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb,
4160
+ // which encodes the fourth register into imm8[7:4]
4161
+ int ival = (reg3 - XMMBASE) << 4; // convert reg3 to ival
4162
+
4163
+ instrDesc* id = emitNewInstrCns(attr, ival);
4164
+ id->idIns(ins);
4165
+ id->idInsFmt(IF_RWR_RRD_RRD_RRD);
4166
+ id->idReg1(targetReg);
4167
+ id->idReg2(reg1);
4168
+ id->idReg3(reg2);
4169
+ id->idReg4(reg3);
4170
+
4171
+ id->idCodeSize(sz);
4172
+ dispIns(id);
4173
+ emitCurIGsize += sz;
4174
+ }
4175
+
4138
4176
/*****************************************************************************
4139
4177
*
4140
4178
* Add an instruction with a register + static member operands.
@@ -5074,69 +5112,107 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu
5074
5112
}
5075
5113
5076
5114
#if FEATURE_HW_INTRINSICS
5077
- void emitter::emitIns_SIMD_R_R_A(
5078
- instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype)
5115
+ void emitter::emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir)
5079
5116
{
5080
5117
if (UseVEXEncoding())
5081
5118
{
5082
- emitIns_R_R_A(ins, emitTypeSize(simdtype) , reg, reg1, indir, IF_RWR_RRD_ARD);
5119
+ emitIns_R_R_A(ins, attr , reg, reg1, indir, IF_RWR_RRD_ARD);
5083
5120
}
5084
5121
else
5085
5122
{
5086
5123
if (reg1 != reg)
5087
5124
{
5088
- emitIns_R_R(INS_movaps, emitTypeSize(simdtype) , reg, reg1);
5125
+ emitIns_R_R(INS_movaps, attr , reg, reg1);
5089
5126
}
5090
- emitIns_R_A(ins, emitTypeSize(simdtype) , reg, indir, IF_RRW_ARD);
5127
+ emitIns_R_A(ins, attr , reg, indir, IF_RRW_ARD);
5091
5128
}
5092
5129
}
5093
5130
5094
5131
void emitter::emitIns_SIMD_R_R_C(
5095
- instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype)
5132
+ instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs)
5133
+ {
5134
+ if (UseVEXEncoding())
5135
+ {
5136
+ emitIns_R_R_C(ins, attr, reg, reg1, fldHnd, offs);
5137
+ }
5138
+ else
5139
+ {
5140
+ if (reg1 != reg)
5141
+ {
5142
+ emitIns_R_R(INS_movaps, attr, reg, reg1);
5143
+ }
5144
+ emitIns_R_C(ins, attr, reg, fldHnd, offs);
5145
+ }
5146
+ }
5147
+
5148
+ void emitter::emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2)
5096
5149
{
5097
5150
if (UseVEXEncoding())
5098
5151
{
5099
- emitIns_R_R_C (ins, emitTypeSize(simdtype) , reg, reg1, fldHnd, offs );
5152
+ emitIns_R_R_R (ins, attr , reg, reg1, reg2 );
5100
5153
}
5101
5154
else
5102
5155
{
5103
5156
if (reg1 != reg)
5104
5157
{
5105
- emitIns_R_R(INS_movaps, emitTypeSize(simdtype) , reg, reg1);
5158
+ emitIns_R_R(INS_movaps, attr , reg, reg1);
5106
5159
}
5107
- emitIns_R_C (ins, emitTypeSize(simdtype) , reg, fldHnd, offs );
5160
+ emitIns_R_R (ins, attr , reg, reg2 );
5108
5161
}
5109
5162
}
5110
5163
5111
- void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype)
5164
+ void emitter::emitIns_SIMD_R_R_R_R(
5165
+ instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, regNumber reg3)
5112
5166
{
5167
+ assert(isAvxBlendv(ins) || isSse41Blendv(ins));
5113
5168
if (UseVEXEncoding())
5114
5169
{
5115
- emitIns_R_R_R(ins, emitTypeSize(simdtype), reg, reg1, reg2);
5170
+ // convert SSE encoding of SSE4.1 instructions to VEX encoding
5171
+ switch (ins)
5172
+ {
5173
+ case INS_blendvps:
5174
+ ins = INS_vblendvps;
5175
+ break;
5176
+ case INS_blendvpd:
5177
+ ins = INS_vblendvpd;
5178
+ break;
5179
+ case INS_pblendvb:
5180
+ ins = INS_vpblendvb;
5181
+ break;
5182
+ default:
5183
+ break;
5184
+ }
5185
+ emitIns_R_R_R_R(ins, attr, reg, reg1, reg2, reg3);
5116
5186
}
5117
5187
else
5118
5188
{
5189
+ assert(isSse41Blendv(ins));
5190
+ // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
5191
+ if (reg3 != REG_XMM0)
5192
+ {
5193
+ emitIns_R_R(INS_movaps, attr, REG_XMM0, reg3);
5194
+ }
5119
5195
if (reg1 != reg)
5120
5196
{
5121
- emitIns_R_R(INS_movaps, emitTypeSize(simdtype) , reg, reg1);
5197
+ emitIns_R_R(INS_movaps, attr , reg, reg1);
5122
5198
}
5123
- emitIns_R_R(ins, emitTypeSize(simdtype) , reg, reg2);
5199
+ emitIns_R_R(ins, attr , reg, reg2);
5124
5200
}
5125
5201
}
5126
5202
5127
- void emitter::emitIns_SIMD_R_R_S(instruction ins, regNumber reg, regNumber reg1, int varx, int offs, var_types simdtype )
5203
+ void emitter::emitIns_SIMD_R_R_S(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs)
5128
5204
{
5129
5205
if (UseVEXEncoding())
5130
5206
{
5131
- emitIns_R_R_S(ins, emitTypeSize(simdtype) , reg, reg1, varx, offs);
5207
+ emitIns_R_R_S(ins, attr , reg, reg1, varx, offs);
5132
5208
}
5133
5209
else
5134
5210
{
5135
5211
if (reg1 != reg)
5136
5212
{
5137
- emitIns_R_R(INS_movaps, emitTypeSize(simdtype) , reg, reg1);
5213
+ emitIns_R_R(INS_movaps, attr , reg, reg1);
5138
5214
}
5139
- emitIns_R_S(ins, emitTypeSize(simdtype) , reg, varx, offs);
5215
+ emitIns_R_S(ins, attr , reg, varx, offs);
5140
5216
}
5141
5217
}
5142
5218
#endif
@@ -7423,6 +7499,14 @@ void emitter::emitDispIns(
7423
7499
val = emitGetInsSC(id);
7424
7500
goto PRINT_CONSTANT;
7425
7501
break;
7502
+ case IF_RWR_RRD_RRD_RRD:
7503
+ assert(IsAVXOnlyInstruction(ins));
7504
+ assert(UseVEXEncoding());
7505
+ printf("%s, ", emitRegName(id->idReg1(), attr));
7506
+ printf("%s, ", emitRegName(id->idReg2(), attr));
7507
+ printf("%s, ", emitRegName(id->idReg3(), attr));
7508
+ printf("%s", emitRegName(id->idReg4(), attr));
7509
+ break;
7426
7510
case IF_RRW_RRW_CNS:
7427
7511
printf("%s,", emitRegName(id->idReg1(), attr));
7428
7512
printf(" %s", emitRegName(id->idReg2(), attr));
@@ -10046,7 +10130,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
10046
10130
10047
10131
instruction ins = id->idIns();
10048
10132
assert(IsAVXInstruction(ins));
10049
- assert(IsThreeOperandAVXInstruction(ins));
10133
+ assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins) );
10050
10134
regNumber targetReg = id->idReg1();
10051
10135
regNumber src1 = id->idReg2();
10052
10136
regNumber src2 = id->idReg3();
@@ -11312,6 +11396,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
11312
11396
sz = emitSizeOfInsDsc(id);
11313
11397
break;
11314
11398
case IF_RWR_RRD_RRD_CNS:
11399
+ case IF_RWR_RRD_RRD_RRD:
11315
11400
dst = emitOutputRRR(dst, id);
11316
11401
sz = emitSizeOfInsDsc(id);
11317
11402
dst += emitOutputByte(dst, emitGetInsSC(id));
0 commit comments