-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable EVEX feature: Embedded Rounding for Avx512F.Add() #94684
Changes from 19 commits
292b60a
2a54e18
e78c595
e81a7a2
4ccc51d
69c59e4
4726a6c
d2d98f0
78e13d7
00e3324
4bb78f7
ef92def
2e16fb2
3973035
07b7d21
9fccdf5
2039f33
1f4c359
02355cf
dacaadf
501e37d
957bd26
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -774,13 +774,13 @@ class emitter | |||||
unsigned _idCallAddr : 1; // IL indirect calls: can make a direct call to iiaAddr | ||||||
unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables | ||||||
#if defined(TARGET_XARCH) | ||||||
unsigned _idEvexbContext : 1; // does EVEX.b need to be set. | ||||||
#endif // TARGET_XARCH | ||||||
unsigned _idEvexbContext : 2 // Does Evex.b need to be set for embedded broadcast/embedded rounding. | ||||||
#endif // TARGET_XARCH | ||||||
|
||||||
#ifdef TARGET_ARM64 | ||||||
|
||||||
unsigned _idLclVar : 1; // access a local on stack | ||||||
unsigned _idLclVarPair : 1 // carries information for 2 GC lcl vars. | ||||||
unsigned _idLclVar : 1; // access a local on stack | ||||||
unsigned _idLclVarPair : 1 // carries information for 2 GC lcl vars. | ||||||
#endif | ||||||
|
||||||
#ifdef TARGET_LOONGARCH64 | ||||||
|
@@ -808,8 +808,8 @@ class emitter | |||||
|
||||||
//////////////////////////////////////////////////////////////////////// | ||||||
// Space taken up to here: | ||||||
// x86: 47 bits | ||||||
// amd64: 47 bits | ||||||
// x86: 48 bits | ||||||
// amd64: 48 bits | ||||||
// arm: 48 bits | ||||||
// arm64: 53 bits | ||||||
// loongarch64: 46 bits | ||||||
|
@@ -828,7 +828,7 @@ class emitter | |||||
#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) | ||||||
#define ID_EXTRA_BITFIELD_BITS (14) | ||||||
#elif defined(TARGET_XARCH) | ||||||
#define ID_EXTRA_BITFIELD_BITS (15) | ||||||
#define ID_EXTRA_BITFIELD_BITS (16) | ||||||
#else | ||||||
#error Unsupported or unset target architecture | ||||||
#endif | ||||||
|
@@ -863,8 +863,8 @@ class emitter | |||||
|
||||||
//////////////////////////////////////////////////////////////////////// | ||||||
// Space taken up to here (with/without prev offset, assuming host==target): | ||||||
// x86: 53/49 bits | ||||||
// amd64: 54/49 bits | ||||||
// x86: 54/50 bits | ||||||
// amd64: 55/51 bits | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The "51" should be "50" AFAICT
Suggested change
|
||||||
// arm: 54/50 bits | ||||||
// arm64: 60/55 bits | ||||||
// loongarch64: 53/48 bits | ||||||
|
@@ -880,8 +880,8 @@ class emitter | |||||
|
||||||
//////////////////////////////////////////////////////////////////////// | ||||||
// Small constant size (with/without prev offset, assuming host==target): | ||||||
// x86: 11/15 bits | ||||||
// amd64: 10/15 bits | ||||||
// x86: 10/14 bits | ||||||
// amd64: 9/14 bits | ||||||
// arm: 10/14 bits | ||||||
// arm64: 4/9 bits | ||||||
// loongarch64: 11/16 bits | ||||||
|
@@ -1542,11 +1542,31 @@ class emitter | |||||
{ | ||||||
return _idEvexbContext != 0; | ||||||
} | ||||||
void idSetEvexbContext() | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The function above, |
||||||
void idSetEvexbContext(insOpts instOptions) | ||||||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
{ | ||||||
assert(_idEvexbContext == 0); | ||||||
_idEvexbContext = 1; | ||||||
assert(_idEvexbContext == 1); | ||||||
if (instOptions == INS_OPTS_EVEX_eb_er_rd) | ||||||
{ | ||||||
_idEvexbContext = 1; | ||||||
} | ||||||
else if (instOptions == INS_OPTS_EVEX_er_ru) | ||||||
{ | ||||||
_idEvexbContext = 2; | ||||||
} | ||||||
else if (instOptions == INS_OPTS_EVEX_er_rz) | ||||||
{ | ||||||
_idEvexbContext = 3; | ||||||
} | ||||||
else | ||||||
{ | ||||||
unreached(); | ||||||
} | ||||||
} | ||||||
|
||||||
unsigned idGetEvexbContext() const | ||||||
{ | ||||||
return _idEvexbContext; | ||||||
} | ||||||
#endif | ||||||
|
||||||
|
@@ -2126,6 +2146,7 @@ class emitter | |||||
void emitDispInsOffs(unsigned offs, bool doffs); | ||||||
void emitDispInsHex(instrDesc* id, BYTE* code, size_t sz); | ||||||
void emitDispEmbBroadcastCount(instrDesc* id); | ||||||
void emitDispEmbRounding(instrDesc* id); | ||||||
void emitDispIns(instrDesc* id, | ||||||
bool isNew, | ||||||
bool doffs, | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1139,6 +1139,29 @@ static bool isLowSimdReg(regNumber reg) | |||||
#endif | ||||||
} | ||||||
|
||||||
//------------------------------------------------------------------------ | ||||||
// GetEmbRoudingMode: Get the rounding mode for embedded rounding | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
// | ||||||
// Arguments: | ||||||
// mode -- the flag from the correspoding gentree node indicating the mode. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit (typo)
Suggested change
|
||||||
// | ||||||
// Return Value: | ||||||
// the instruction option carrying the rounding mode information. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: in general, we prefer to have a blank |
||||||
insOpts emitter::GetEmbRoundingMode(uint8_t mode) const | ||||||
{ | ||||||
switch (mode) | ||||||
{ | ||||||
case 1: | ||||||
return INS_OPTS_EVEX_eb_er_rd; | ||||||
case 2: | ||||||
return INS_OPTS_EVEX_er_ru; | ||||||
case 3: | ||||||
return INS_OPTS_EVEX_er_rz; | ||||||
default: | ||||||
unreached(); | ||||||
} | ||||||
} | ||||||
|
||||||
//------------------------------------------------------------------------ | ||||||
// encodeRegAsIval: Encodes a register as an ival for use by a SIMD instruction | ||||||
// | ||||||
|
@@ -1309,18 +1332,50 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt | |||||
|
||||||
if (attr == EA_32BYTE) | ||||||
{ | ||||||
// Set L bit to 1 in case of instructions that operate on 256-bits. | ||||||
// Set L bit to 01 in case of instructions that operate on 256-bits. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You write "bit" (singular) but give an example of 2 bits. Should it either be left alone (as it was), or:
Suggested change
|
||||||
code |= LBIT_IN_BYTE_EVEX_PREFIX; | ||||||
} | ||||||
else if (attr == EA_64BYTE) | ||||||
{ | ||||||
// Set L' bits to 11 in case of instructions that operate on 512-bits. | ||||||
// Set L' bits to 10 in case of instructions that operate on 512-bits. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to above: should it say L'L?
Suggested change
|
||||||
code |= LPRIMEBIT_IN_BYTE_EVEX_PREFIX; | ||||||
} | ||||||
|
||||||
if (id->idIsEvexbContext()) | ||||||
{ | ||||||
code |= EVEX_B_BIT; | ||||||
|
||||||
if (!id->idHasMem()) | ||||||
{ | ||||||
// embedded rounding case. | ||||||
unsigned roundingMode = id->idGetEvexbContext(); | ||||||
if (roundingMode == 1) | ||||||
{ | ||||||
// {rd-sae} | ||||||
code &= ~(LPRIMEBIT_IN_BYTE_EVEX_PREFIX); | ||||||
code |= LBIT_IN_BYTE_EVEX_PREFIX; | ||||||
} | ||||||
else if (roundingMode == 2) | ||||||
{ | ||||||
// {ru-sae} | ||||||
code |= LPRIMEBIT_IN_BYTE_EVEX_PREFIX; | ||||||
code &= ~(LBIT_IN_BYTE_EVEX_PREFIX); | ||||||
} | ||||||
else if (roundingMode == 3) | ||||||
{ | ||||||
// {rz-sae} | ||||||
code |= LPRIMEBIT_IN_BYTE_EVEX_PREFIX; | ||||||
code |= LBIT_IN_BYTE_EVEX_PREFIX; | ||||||
} | ||||||
else | ||||||
{ | ||||||
unreached(); | ||||||
} | ||||||
} | ||||||
else | ||||||
{ | ||||||
assert(id->idGetEvexbContext() == 1); | ||||||
} | ||||||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
|
||||||
regNumber maskReg = REG_NA; | ||||||
|
@@ -6734,11 +6789,7 @@ void emitter::emitIns_R_R_A( | |||||
id->idIns(ins); | ||||||
id->idReg1(reg1); | ||||||
id->idReg2(reg2); | ||||||
if (instOptions == INS_OPTS_EVEX_b) | ||||||
{ | ||||||
assert(UseEvexEncoding()); | ||||||
id->idSetEvexbContext(); | ||||||
} | ||||||
SetEvexBroadcastIfNeeded(id, instOptions); | ||||||
|
||||||
emitHandleMemOp(indir, id, (ins == INS_mulx) ? IF_RWR_RWR_ARD : emitInsModeFormat(ins, IF_RRD_RRD_ARD), ins); | ||||||
|
||||||
|
@@ -6863,11 +6914,7 @@ void emitter::emitIns_R_R_C(instruction ins, | |||||
id->idReg1(reg1); | ||||||
id->idReg2(reg2); | ||||||
id->idAddr()->iiaFieldHnd = fldHnd; | ||||||
if (instOptions == INS_OPTS_EVEX_b) | ||||||
{ | ||||||
assert(UseEvexEncoding()); | ||||||
id->idSetEvexbContext(); | ||||||
} | ||||||
SetEvexBroadcastIfNeeded(id, instOptions); | ||||||
|
||||||
UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)); | ||||||
id->idCodeSize(sz); | ||||||
|
@@ -6881,7 +6928,8 @@ void emitter::emitIns_R_R_C(instruction ins, | |||||
* Add an instruction with three register operands. | ||||||
*/ | ||||||
|
||||||
void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2) | ||||||
void emitter::emitIns_R_R_R( | ||||||
instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, insOpts instOptions) | ||||||
{ | ||||||
assert(IsAvx512OrPriorInstruction(ins)); | ||||||
assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins)); | ||||||
|
@@ -6893,6 +6941,13 @@ void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, | |||||
id->idReg2(reg1); | ||||||
id->idReg3(reg2); | ||||||
|
||||||
if ((instOptions & INS_OPTS_b_MASK) != INS_OPTS_NONE) | ||||||
{ | ||||||
// if EVEX.b needs to be set in this path, then it should be embedded rounding. | ||||||
assert(UseEvexEncoding()); | ||||||
id->idSetEvexbContext(instOptions); | ||||||
} | ||||||
|
||||||
UNATIVE_OFFSET sz = emitInsSizeRR(id, insCodeRM(ins)); | ||||||
id->idCodeSize(sz); | ||||||
|
||||||
|
@@ -6913,12 +6968,8 @@ void emitter::emitIns_R_R_S( | |||||
id->idReg1(reg1); | ||||||
id->idReg2(reg2); | ||||||
id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); | ||||||
SetEvexBroadcastIfNeeded(id, instOptions); | ||||||
|
||||||
if (instOptions == INS_OPTS_EVEX_b) | ||||||
{ | ||||||
assert(UseEvexEncoding()); | ||||||
id->idSetEvexbContext(); | ||||||
} | ||||||
#ifdef DEBUG | ||||||
id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; | ||||||
#endif | ||||||
|
@@ -8216,11 +8267,11 @@ void emitter::emitIns_SIMD_R_R_C(instruction ins, | |||||
// op2Reg -- The register of the second operand | ||||||
// | ||||||
void emitter::emitIns_SIMD_R_R_R( | ||||||
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg) | ||||||
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, insOpts instOptions) | ||||||
{ | ||||||
if (UseSimdEncoding()) | ||||||
{ | ||||||
emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg); | ||||||
emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, instOptions); | ||||||
} | ||||||
else | ||||||
{ | ||||||
|
@@ -10657,6 +10708,37 @@ void emitter::emitDispEmbBroadcastCount(instrDesc* id) | |||||
printf(" {1to%d}", vectorSize / baseSize); | ||||||
} | ||||||
|
||||||
// emitDispEmbRounding: Display the tag where embedded rounding is activated | ||||||
// | ||||||
// Arguments: | ||||||
// id - The instruction descriptor | ||||||
// | ||||||
void emitter::emitDispEmbRounding(instrDesc* id) | ||||||
{ | ||||||
if (!id->idIsEvexbContext()) | ||||||
{ | ||||||
return; | ||||||
} | ||||||
assert(!id->idHasMem()); | ||||||
unsigned roundingMode = id->idGetEvexbContext(); | ||||||
if (roundingMode == 1) | ||||||
{ | ||||||
printf(" {rd-sae}"); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: in the Intel manual I have, section "15.6.4 Static Rounding Mode and Suppress All Exceptions", there are these examples:
In each case there is a comma before the It's not clear from the instruction set reference whether the comma is expected or not. So what is the standard? Comma or no comma? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
} | ||||||
else if (roundingMode == 2) | ||||||
{ | ||||||
printf(" {ru-sae}"); | ||||||
} | ||||||
else if (roundingMode == 3) | ||||||
{ | ||||||
printf(" {rz-sae}"); | ||||||
} | ||||||
else | ||||||
{ | ||||||
unreached(); | ||||||
} | ||||||
} | ||||||
|
||||||
//-------------------------------------------------------------------- | ||||||
// emitDispIns: Dump the given instruction to jitstdout. | ||||||
// | ||||||
|
@@ -11525,6 +11607,7 @@ void emitter::emitDispIns( | |||||
printf("%s, ", emitRegName(id->idReg1(), attr)); | ||||||
printf("%s, ", emitRegName(reg2, attr)); | ||||||
printf("%s", emitRegName(reg3, attr)); | ||||||
emitDispEmbRounding(id); | ||||||
break; | ||||||
} | ||||||
|
||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing a semicolon (probably got lucky that there is a "stray" semicolon below on
CLANG_FORMAT_COMMENT_ANCHOR
Also, "Does Evex.b need to be set" makes it sound like this is a 0/1, false/true boolean. Then why does it need 2 bits? I think you should expand the comment to state what the two bits are used for. (You don't need to put the comment all on this one line; make it a multi-line comment above this line if necessary)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the comments!
_idEvexbContext
is used to differentiate several features: normal intrinsics, embedded broadcast, embedded rounding. In the normal or embedded broadcast cases, the semantic forEvex.L'L
remains the same, intrinsic vector length, while in embedded rounding case, the semantic changes to indicate the rounding mode._idEvexbContext
will be used to inform emitter to specially handle theEvex.L'L
in the embedded rounding scenario. I will properly document this part in the comments.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A comment like you just wrote would be great.