Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose helper APIS for GetLower/Upper and WithLower/Upper #83982

Merged
merged 13 commits into from
Mar 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5670,6 +5670,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
case NI_SSE41_X64_Extract:
case NI_AVX_ExtractVector128:
case NI_AVX2_ExtractVector128:
case NI_AVX512F_ExtractVector256:
{
// These intrinsics are "ins reg/mem, xmm, imm8"
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
Expand Down
26 changes: 26 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2700,6 +2700,18 @@ class Compiler
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdGetLowerNode(var_types type,
GenTree* op1,
CorInfoType simdBaseJitType,
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdGetUpperNode(var_types type,
GenTree* op1,
CorInfoType simdBaseJitType,
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdLoadNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);

Expand Down Expand Up @@ -2773,6 +2785,20 @@ class Compiler
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdWithLowerNode(var_types type,
GenTree* op1,
GenTree* op2,
CorInfoType simdBaseJitType,
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTree* gtNewSimdWithUpperNode(var_types type,
GenTree* op1,
GenTree* op2,
CorInfoType simdBaseJitType,
unsigned simdSize,
bool isSimdAsHWIntrinsic);

GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID);
GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID);
GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type,
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -3350,6 +3350,7 @@ inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
// Note: vextractf128 has a 128-bit output (register or memory) but a 256-bit input (register).
// vinsertf128 is the inverse with a 256-bit output (register), a 256-bit input(register),
// and a 128-bit input (register or memory).
// Similarly, vextractf64x4 has a 256-bit output and 128-bit input and vinsertf64x4 the inverse
// This method is mainly used for such instructions to return the appropriate memory operand
// size, otherwise returns the regular operand size of the instruction.

Expand Down Expand Up @@ -3492,6 +3493,18 @@ inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
return EA_16BYTE;
}

case INS_vextractf32x8:
case INS_vextracti32x8:
case INS_vextractf64x4:
case INS_vextracti64x4:
case INS_vinsertf32x8:
case INS_vinserti32x8:
case INS_vinsertf64x4:
case INS_vinserti64x4:
{
return EA_32BYTE;
}

case INS_movddup:
{
if (defaultSize == 32)
Expand Down
84 changes: 73 additions & 11 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6357,7 +6357,11 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN
case INS_pextrw_sse41:
case INS_extractps:
case INS_vextractf128:
case INS_vextractf32x8:
case INS_vextractf64x4:
case INS_vextracti128:
case INS_vextracti64x4:
case INS_vextracti32x8:
case INS_shld:
case INS_shrd:
{
Expand Down Expand Up @@ -6841,7 +6845,11 @@ void emitter::emitIns_R_R_R_I(
case INS_pextrw_sse41:
case INS_extractps:
case INS_vextractf128:
case INS_vextractf32x8:
case INS_vextractf64x4:
case INS_vextracti128:
case INS_vextracti64x4:
case INS_vextracti32x8:
{
code = insCodeMR(ins);
break;
Expand Down Expand Up @@ -10721,10 +10729,30 @@ void emitter::emitDispIns(

case IF_AWR_RRD_CNS:
{
if ((ins == INS_vextracti128) || (ins == INS_vextractf128))
switch (ins)
{
// vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
sstr = codeGen->genSizeStr(EA_ATTR(16));
case INS_vextractf128:
case INS_vextracti128:
{
// vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
sstr = codeGen->genSizeStr(EA_ATTR(16));
break;
}

case INS_vextractf32x8:
case INS_vextractf64x4:
case INS_vextracti64x4:
case INS_vextracti32x8:
{
// vextracti/f*x* extracts 256-bit data, so we fix sstr as "ymm ptr"
sstr = codeGen->genSizeStr(EA_ATTR(32));
break;
}

default:
{
break;
}
}

printf(sstr);
Expand Down Expand Up @@ -11118,6 +11146,7 @@ void emitter::emitDispIns(
attr = EA_32BYTE;
break;
}

case INS_vinsertf128:
case INS_vinserti128:
{
Expand Down Expand Up @@ -11170,6 +11199,15 @@ void emitter::emitDispIns(
break;
}

case INS_vextractf32x8:
case INS_vextractf64x4:
case INS_vextracti64x4:
case INS_vextracti32x8:
{
tgtAttr = EA_32BYTE;
break;
}

case INS_extractps:
case INS_pextrb:
case INS_pextrw:
Expand Down Expand Up @@ -11286,10 +11324,30 @@ void emitter::emitDispIns(

case IF_MWR_RRD_CNS:
{
if ((ins == INS_vextracti128) || (ins == INS_vextractf128))
switch (ins)
{
// vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
sstr = codeGen->genSizeStr(EA_ATTR(16));
case INS_vextractf128:
case INS_vextracti128:
{
// vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
sstr = codeGen->genSizeStr(EA_ATTR(16));
break;
}

case INS_vextractf32x8:
case INS_vextractf64x4:
case INS_vextracti64x4:
case INS_vextracti32x8:
{
// vextracti/f*x* extracts 256-bit data, so we fix sstr as "ymm ptr"
sstr = codeGen->genSizeStr(EA_ATTR(32));
break;
}

default:
{
break;
}
}

printf(sstr);
Expand Down Expand Up @@ -16269,12 +16327,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
break;

case IF_MWR_RRD_CNS:
assert(ins == INS_vextracti128 || ins == INS_vextractf128);
assert((ins == INS_vextractf128) || (ins == INS_vextractf32x8) || (ins == INS_vextractf64x4) ||
(ins == INS_vextracti128) || (ins == INS_vextracti32x8) || (ins == INS_vextracti64x4));
assert(UseSimdEncoding());
emitGetInsDcmCns(id, &cnsVal);
code = insCodeMR(ins);
// only AVX2 vextracti128 and AVX vextractf128 can reach this path,
// they do not need VEX.vvvv to encode the register operand
// we do not need VEX.vvvv to encode the register operand
dst = emitOutputCV(dst, id, code, &cnsVal);
sz = emitSizeOfInsDsc(id);
break;
Expand Down Expand Up @@ -17846,12 +17904,16 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vperm2i128:
case INS_vperm2f128:
case INS_vextractf128:
case INS_vextractf32x8:
case INS_vextractf64x4:
case INS_vextracti128:
case INS_vextracti32x8:
case INS_vextracti64x4:
case INS_vinsertf128:
case INS_vinserti128:
case INS_vinsertf64x4:
case INS_vinserti64x4:
case INS_vinsertf32x8:
case INS_vinserti128:
case INS_vinserti64x4:
case INS_vinserti32x8:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_3C;
Expand Down
Loading