Skip to content

Commit a433485

Browse files
committed
CR Fixes chakra-core#2:
- Avoid pinning opnds to byte-addressable reg. Copy to new TyInt8 sym instead. - Removed ununsed IRBuilder/BytecodeDumper functions. - SccLiveness will process dst of SHUFPS/D as a src - Minor nit fixes in LowererMDSharedSimd128
1 parent 2b1ccd0 commit a433485

File tree

5 files changed

+61
-95
lines changed

5 files changed

+61
-95
lines changed

lib/Backend/IRBuilderAsmJs.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4710,6 +4710,7 @@ void IRBuilderAsmJs::BuildReg1Int8x16_1(Js::OpCodeAsmJs newOpcode, uint32 offset
47104710

47114711
/* Float64x2 */
47124712
// Disabled for now
4713+
#if 0
47134714
template <typename SizePolicy>
47144715
void
47154716
IRBuilderAsmJs::BuildFloat64x2_2(Js::OpCodeAsmJs newOpcode, uint32 offset)
@@ -4782,8 +4783,6 @@ IRBuilderAsmJs::BuildFloat64x2_3(Js::OpCodeAsmJs newOpcode, uint32 offset)
47824783
AddInstr(instr, offset);
47834784
}
47844785

4785-
4786-
#if 0
47874786
template <typename SizePolicy>
47884787
void
47894788
IRBuilderAsmJs::BuildFloat64x2_4(Js::OpCodeAsmJs newOpcode, uint32 offset)
@@ -4873,7 +4872,6 @@ void IRBuilderAsmJs::BuildFloat64x2_1Double1(Js::OpCodeAsmJs newOpcode, uint32 o
48734872
AddInstr(instr, offset);
48744873
}
48754874

4876-
#endif // 0
48774875

48784876
template <typename SizePolicy>
48794877
void
@@ -4902,7 +4900,7 @@ IRBuilderAsmJs::BuildFloat64x2_2Double1(Js::OpCodeAsmJs newOpcode, uint32 offset
49024900
IR::Instr * instr = IR::Instr::New(opcode, dstOpnd, src1Opnd, src2Opnd, m_func);
49034901
AddInstr(instr, offset);
49044902
}
4905-
#if 0
4903+
49064904
template <typename SizePolicy>
49074905
void
49084906
IRBuilderAsmJs::BuildFloat64x2_2Int2(Js::OpCodeAsmJs newOpcode, uint32 offset)
@@ -5087,6 +5085,7 @@ void IRBuilderAsmJs::BuildReg1Float64x2_1(Js::OpCodeAsmJs newOpcode, uint32 offs
50875085
}
50885086
}
50895087
#endif // 0
5088+
50905089
/* Int16x8 */
50915090
template <typename SizePolicy>
50925091
void IRBuilderAsmJs::BuildInt16x8_1Int8(Js::OpCodeAsmJs newOpcode, uint32 offset)

lib/Backend/LowerMDShared.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ class LowererMD
354354
IR::Instr* Simd128LowerAnyTrue(IR::Instr* instr);
355355
IR::Instr* Simd128LowerAllTrue(IR::Instr* instr);
356356
BYTE Simd128GetTypedArrBytesPerElem(ValueType arrType);
357-
IR::Instr* Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode& CMPopCode, IR::Opnd& dstOpnd);
357+
IR::Instr* Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode& cmpOpcode, IR::Opnd& dstOpnd);
358358
IR::Opnd* EnregisterIntConst(IR::Instr* instr, IR::Opnd *constOpnd, IRType type = TyInt32);
359359
SList<IR::Opnd*> * Simd128GetExtendedArgs(IR::Instr *instr);
360360
void GenerateCheckedSimdLoad(IR::Instr * instr);

lib/Backend/LowerMDSharedSimd128.cpp

Lines changed: 33 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -397,13 +397,13 @@ IR::Instr* LowererMD::Simd128LoadConst(IR::Instr* instr)
397397
return instr->m_prev;
398398
}
399399

400-
IR::Instr* LowererMD::Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode &CMPopCode, IR::Opnd& dstOpnd)
400+
IR::Instr* LowererMD::Simd128CanonicalizeToBools(IR::Instr* instr, const Js::OpCode &cmpOpcode, IR::Opnd& dstOpnd)
401401
{
402402
Assert(instr->m_opcode == Js::OpCode::Simd128_IntsToB4 || instr->m_opcode == Js::OpCode::Simd128_IntsToB8 || instr->m_opcode == Js::OpCode::Simd128_IntsToB16 ||
403403
instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B4 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B8 || instr->m_opcode == Js::OpCode::Simd128_ReplaceLane_B16);
404404
IR::Instr *pInstr;
405-
//dst = SIMDCMPOpCode dst, X86_ALL_ZEROS
406-
pInstr = IR::Instr::New(CMPopCode, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New((void*)&X86_ALL_ZEROS, TySimd128I4, m_func), m_func);
405+
//dst = cmpOpcode dst, X86_ALL_ZEROS
406+
pInstr = IR::Instr::New(cmpOpcode, &dstOpnd, &dstOpnd, IR::MemRefOpnd::New((void*)&X86_ALL_ZEROS, TySimd128I4, m_func), m_func);
407407
instr->InsertBefore(pInstr);
408408
Legalize(pInstr);
409409
// dst = PANDN dst, X86_ALL_NEG_ONES
@@ -741,16 +741,10 @@ IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
741741
{
742742
if (laneType == TyInt8)
743743
{
744-
// move value to byte-addressable reg
745-
IR::RegOpnd * ebxOpnd = IR::RegOpnd::New(TyInt32, m_func);
746-
#ifdef _M_IX86
747-
ebxOpnd->SetReg(RegEBX);
748-
#else
749-
ebxOpnd->SetReg(RegRBX);
750-
#endif
751-
newInstr = IR::Instr::New(Js::OpCode::MOV, ebxOpnd, dst, m_func);
744+
IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
745+
newInstr = IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func);
752746
instr->InsertBefore(newInstr);
753-
newInstr = IR::Instr::New(Js::OpCode::MOVSX, dst, ebxOpnd->UseWithNewType(laneType, m_func), m_func);
747+
newInstr = IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func);
754748
}
755749
else
756750
{
@@ -770,7 +764,7 @@ IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
770764
instr->m_opcode == Js::OpCode::Simd128_ExtractLane_B16)
771765
{
772766
IR::Instr* pInstr = nullptr;
773-
IR::RegOpnd* ebxOpnd = IR::RegOpnd::New(TyInt32, m_func);
767+
IR::RegOpnd* tmp = IR::RegOpnd::New(TyInt8, m_func);
774768

775769
// cmp dst, -1
776770
pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
@@ -779,19 +773,16 @@ IR::Instr* LowererMD::Simd128LowerLdLane(IR::Instr *instr)
779773
instr->InsertBefore(pInstr);
780774
Legalize(pInstr);
781775

782-
// Need an EBX register for byte addressing.
783-
// movd ebx, dst
784-
// type cast ebx to Int8.
785-
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, ebxOpnd, dst, m_func));
786-
IR::Opnd *dst_i8 = ebxOpnd->UseWithNewType(TyInt8, this->m_func);
776+
// mov tmp(TyInt8), dst
777+
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func));
787778

788-
// sete dst_i8, al
789-
pInstr = IR::Instr::New(Js::OpCode::SETE, dst_i8, dst_i8, m_func);
779+
// sete tmp(TyInt8)
780+
pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func);
790781
instr->InsertBefore(pInstr);
791782
Legalize(pInstr);
792783

793-
// mov dst, dst_i8
794-
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, dst, dst_i8, m_func));
784+
// movsx dst, tmp(TyInt8)
785+
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
795786
}
796787

797788
IR::Instr* prevInstr = instr->m_prev;
@@ -1343,7 +1334,7 @@ IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr)
13431334
{
13441335
IR::RegOpnd * shamtReg = IR::RegOpnd::New(TyInt8, m_func);
13451336
shamtReg->SetReg(LowererMDArch::GetRegShiftCount());
1346-
IR::RegOpnd * ebxOpnd = IR::RegOpnd::New(TyInt32, m_func);
1337+
IR::RegOpnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
13471338

13481339
// MOVAPS dst, src1
13491340
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
@@ -1355,17 +1346,12 @@ IR::Instr* LowererMD::Simd128LowerShift(IR::Instr *instr)
13551346
instr->InsertBefore(IR::Instr::New(Js::OpCode::SHR, reg2, reg2, shamtReg, m_func));
13561347

13571348

1358-
// MOV ebx, reg2
1359-
// MOVSX reg2, bl (lower 8 bit)
1360-
// make sure reg2 can be in a byte reg
1361-
#ifdef _M_IX86
1362-
reg2->SetReg(RegEBX);
1363-
#else
1364-
reg2->SetReg(RegRBX);
1365-
#endif
1366-
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, ebxOpnd, reg2, m_func));
1349+
// MOV tmp(TyInt8), reg2
1350+
// MOVSX reg2, tmp(TyInt8)
13671351

1368-
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, reg2, ebxOpnd->UseWithNewType(TyInt8, m_func), m_func));
1352+
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tmp, reg2, m_func));
1353+
1354+
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, reg2, tmp, m_func));
13691355
IR::RegOpnd *mask = IR::RegOpnd::New(TySimd128I4, m_func);
13701356
// PSRLW dst, mask
13711357
instr->InsertBefore(IR::Instr::New(Js::OpCode::PSRLW, dst, dst, tmp0, m_func));
@@ -2463,50 +2449,36 @@ IR::Instr* LowererMD::Simd128LowerAllTrue(IR::Instr* instr)
24632449
IR::Instr *pInstr;
24642450
IR::Opnd* dst = instr->GetDst();
24652451
IR::Opnd* src1 = instr->GetSrc1();
2452+
24662453
Assert(dst->IsRegOpnd() && dst->IsInt32());
24672454
Assert(src1->IsRegOpnd() && src1->IsSimd128());
2468-
// mov dst, 0
2469-
// pmovmskb reg, src1
2470-
// cmp reg, 0FFFFh
2471-
// sete dst, (al)
2472-
2473-
IR::RegOpnd *reg = IR::RegOpnd::New(TyInt32, this->m_func);
2474-
IR::RegOpnd *ebxOpnd = IR::RegOpnd::New(TyInt32, m_func);
2475-
2476-
// mov dst, 0
2477-
pInstr = IR::Instr::New(Js::OpCode::MOV, dst, IR::IntConstOpnd::New(0, TyInt32, m_func), m_func);
2478-
instr->InsertBefore(pInstr);
2479-
Legalize(pInstr);
2455+
2456+
IR::Opnd * tmp = IR::RegOpnd::New(TyInt8, m_func);
24802457

2481-
// pmovmskb reg, src1
2482-
pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, reg, src1, m_func);
2458+
// pmovmskb dst, src1
2459+
pInstr = IR::Instr::New(Js::OpCode::PMOVMSKB, dst, src1, m_func);
24832460
instr->InsertBefore(pInstr);
24842461

2485-
// cmp reg, 0FFFFh
2462+
// cmp dst, 0FFFFh
24862463
pInstr = IR::Instr::New(Js::OpCode::CMP, m_func);
2487-
pInstr->SetSrc1(reg);
2488-
pInstr->SetSrc2(IR::IntConstOpnd::New(0xFFFF, TyInt32, m_func, true));
2464+
pInstr->SetSrc1(dst);
2465+
pInstr->SetSrc2(IR::IntConstOpnd::New(0x0FFFF, TyInt32, m_func, true));
24892466
instr->InsertBefore(pInstr);
24902467
Legalize(pInstr);
24912468

2492-
// mov ebx, dst
2493-
//Need an EBX register to be byte addressable.
2494-
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, ebxOpnd, dst, m_func));
2495-
IR::Opnd *dst_i8 = ebxOpnd->UseWithNewType(TyInt8, this->m_func);
2469+
// mov tmp(TyInt8), dst
2470+
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, tmp, dst, m_func));
24962471

2497-
// sete dst, al
2498-
pInstr = IR::Instr::New(Js::OpCode::SETE, dst_i8, dst_i8, m_func);
2472+
// sete tmp(TyInt8)
2473+
pInstr = IR::Instr::New(Js::OpCode::SETE, tmp, tmp, m_func);
24992474
instr->InsertBefore(pInstr);
25002475
Legalize(pInstr);
25012476

2502-
// mov dst_i8, dst
2503-
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOV, dst, dst_i8, m_func));
2504-
2477+
// movsx dst, dst(TyInt8)
2478+
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVSX, dst, tmp, m_func));
25052479

25062480
pInstr = instr->m_prev;
25072481
instr->Remove();
2508-
2509-
25102482
return pInstr;
25112483
}
25122484

@@ -3085,7 +3057,6 @@ LowererMD::Simd128LoadHeadSegment(IR::IndirOpnd *indirOpnd, ValueType arrType, I
30853057
}
30863058
else
30873059
{
3088-
// REVIEW: Is this needed ? Shouldn't globOpt make sure headSegSym is set and alive ?
30893060
// MOV headSegment, [base + offset(head)]
30903061
int32 headOffset = m_lowerer->GetArrayOffsetOfHeadSegment(arrType);
30913062
IR::IndirOpnd * indirOpnd = IR::IndirOpnd::New(arrayRegOpnd, headOffset, TyMachPtr, this->m_func);
@@ -3476,8 +3447,6 @@ void LowererMD::InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::O
34763447
}
34773448
else if (dst->IsEqual(src2))
34783449
{
3479-
// REVIEW: Could this be a problem with RegAlloc ? If the regAlloc decides to assign two regs for dst in
3480-
// the two definitions, we will have incorrect code.
34813450

34823451
// MOVAPS tmp, dst
34833452
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, tmp, dst, m_func));
@@ -3488,9 +3457,6 @@ void LowererMD::InsertShufps(uint8 lanes[], IR::Opnd *dst, IR::Opnd *src1, IR::O
34883457
}
34893458
else
34903459
{
3491-
// REVIEW: Could this be a problem with RegAlloc ? If the regAlloc decides to assign two regs for dst in
3492-
// the two definitions, we will have incorrect code.
3493-
34943460
// MOVAPS dst, src1
34953461
instr->InsertBefore(IR::Instr::New(Js::OpCode::MOVAPS, dst, src1, m_func));
34963462
// SHUF dst, src2, imm8

lib/Backend/SccLiveness.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,13 @@ SCCLiveness::ProcessDst(IR::Opnd *dst, IR::Instr *instr)
403403
}
404404
}
405405
}
406+
#if defined(_M_X64) || defined(_M_IX86)
407+
else if (instr->m_opcode == Js::OpCode::SHUFPS || instr->m_opcode == Js::OpCode::SHUFPD)
408+
{
409+
// dst is the first src, make sure it gets the same live reg
410+
this->ProcessRegUse(dst->AsRegOpnd(), instr);
411+
}
412+
#endif
406413
else if (dst->IsRegOpnd())
407414
{
408415
this->ProcessRegDef(dst->AsRegOpnd(), instr);

lib/Runtime/ByteCode/AsmJsByteCodeDumper.cpp

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -963,13 +963,6 @@ namespace Js
963963
DumpFloatReg(data->F2);
964964
}
965965

966-
template <class T>
967-
void AsmJsByteCodeDumper::DumpFloat32x4_1Float64x2_1(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader)
968-
{
969-
DumpFloat32x4Reg(data->F4_0);
970-
DumpFloat64x2Reg(data->D2_1);
971-
}
972-
973966
template <class T>
974967
void AsmJsByteCodeDumper::DumpFloat32x4_1Int32x4_1(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader)
975968
{
@@ -1012,15 +1005,6 @@ namespace Js
10121005
DumpUint8x16Reg(data->U16_1);
10131006
}
10141007

1015-
template <class T>
1016-
void AsmJsByteCodeDumper::DumpFloat32x4_1Int32x4_1Float32x4_2(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader)
1017-
{
1018-
DumpFloat32x4Reg(data->F4_0);
1019-
DumpInt32x4Reg(data->I4_1);
1020-
DumpFloat32x4Reg(data->F4_2);
1021-
DumpFloat32x4Reg(data->F4_3);
1022-
}
1023-
10241008
template <class T>
10251009
void AsmJsByteCodeDumper::DumpReg1Float32x4_1(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader)
10261010
{
@@ -1192,12 +1176,8 @@ namespace Js
11921176
DumpFloat32x4Reg(data->U16_1);
11931177
}
11941178

1195-
template <class T>
1196-
void AsmJsByteCodeDumper::DumpInt32x4_1Float64x2_1(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader)
1197-
{
1198-
DumpInt32x4Reg(data->I4_0);
1199-
DumpFloat64x2Reg(data->D2_1);
1200-
}
1179+
1180+
12011181

12021182
// Bool32x4
12031183
template <class T>
@@ -1570,7 +1550,21 @@ namespace Js
15701550
}
15711551

15721552
// Disabled for now
1553+
#if 0
15731554
// Float64x2
1555+
template <class T>
1556+
void AsmJsByteCodeDumper::DumpInt32x4_1Float64x2_1(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader)
1557+
{
1558+
DumpInt32x4Reg(data->I4_0);
1559+
DumpFloat64x2Reg(data->D2_1);
1560+
}
1561+
template <class T>
1562+
void AsmJsByteCodeDumper::DumpFloat32x4_1Float64x2_1(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader)
1563+
{
1564+
DumpFloat32x4Reg(data->F4_0);
1565+
DumpFloat64x2Reg(data->D2_1);
1566+
}
1567+
15741568
template <class T>
15751569
void AsmJsByteCodeDumper::DumpFloat64x2_2(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader)
15761570
{
@@ -1666,7 +1660,7 @@ namespace Js
16661660
DumpReg(data->R0);
16671661
DumpFloat64x2Reg(data->D2_1);
16681662
}
1669-
1663+
#endif
16701664
// Int16x8
16711665
template <class T>
16721666
void AsmJsByteCodeDumper::DumpInt16x8_1Int8(OpCodeAsmJs op, const unaligned T * data, FunctionBody * dumpFunction, ByteCodeReader& reader)

0 commit comments

Comments
 (0)