Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit ca07e25

Browse files
committed
[FastISel][AArch64] Fix "Fold sign-/zero-extends into the load instruction."
This commit fixes an issue with sign-/zero-extending loads that was discovered by Richard Barton. We use now the correct load instructions for sign-extending loads to 64bit. Also updated and added more unit tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219185 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 4f17a54 commit ca07e25

File tree

2 files changed

+472
-135
lines changed

2 files changed

+472
-135
lines changed

lib/Target/AArch64/AArch64FastISel.cpp

+90-64
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ class AArch64FastISel final : public FastISel {
178178
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
179179
bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
180180
bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
181-
bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr, bool WantZExt = true,
182-
MachineMemOperand *MMO = nullptr);
181+
bool emitLoad(MVT VT, MVT ResultVT, unsigned &ResultReg, Address Addr,
182+
bool WantZExt = true, MachineMemOperand *MMO = nullptr);
183183
bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
184184
MachineMemOperand *MMO = nullptr);
185185
unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
@@ -260,6 +260,8 @@ class AArch64FastISel final : public FastISel {
260260
static bool isIntExtFree(const Instruction *I) {
261261
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
262262
"Unexpected integer extend instruction.");
263+
assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
264+
"Unexpected value type.");
263265
bool IsZExt = isa<ZExtInst>(I);
264266

265267
if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
@@ -1589,8 +1591,9 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
15891591
return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
15901592
}
15911593

1592-
bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1593-
bool WantZExt, MachineMemOperand *MMO) {
1594+
bool AArch64FastISel::emitLoad(MVT VT, MVT RetVT, unsigned &ResultReg,
1595+
Address Addr, bool WantZExt,
1596+
MachineMemOperand *MMO) {
15941597
// Simplify this down to something we can handle.
15951598
if (!simplifyAddress(Addr, VT))
15961599
return false;
@@ -1607,24 +1610,40 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
16071610
ScaleFactor = 1;
16081611
}
16091612

1610-
static const unsigned GPOpcTable[2][4][4] = {
1613+
static const unsigned GPOpcTable[2][8][4] = {
16111614
// Sign-extend.
1612-
{ { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURSWi,
1615+
{ { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1616+
AArch64::LDURXi },
1617+
{ AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
16131618
AArch64::LDURXi },
1614-
{ AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRSWui,
1619+
{ AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
16151620
AArch64::LDRXui },
1616-
{ AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRSWroX,
1621+
{ AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1622+
AArch64::LDRXui },
1623+
{ AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1624+
AArch64::LDRXroX },
1625+
{ AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
16171626
AArch64::LDRXroX },
1618-
{ AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRSWroW,
1627+
{ AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
16191628
AArch64::LDRXroW },
1629+
{ AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1630+
AArch64::LDRXroW }
16201631
},
16211632
// Zero-extend.
16221633
{ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
16231634
AArch64::LDURXi },
1635+
{ AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1636+
AArch64::LDURXi },
16241637
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
16251638
AArch64::LDRXui },
1639+
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1640+
AArch64::LDRXui },
1641+
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1642+
AArch64::LDRXroX },
16261643
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
16271644
AArch64::LDRXroX },
1645+
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1646+
AArch64::LDRXroW },
16281647
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
16291648
AArch64::LDRXroW }
16301649
}
@@ -1646,24 +1665,28 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
16461665
Addr.getExtendType() == AArch64_AM::SXTW)
16471666
Idx++;
16481667

1668+
bool IsRet64Bit = RetVT == MVT::i64;
16491669
switch (VT.SimpleTy) {
16501670
default:
16511671
llvm_unreachable("Unexpected value type.");
16521672
case MVT::i1: // Intentional fall-through.
16531673
case MVT::i8:
1654-
Opc = GPOpcTable[WantZExt][Idx][0];
1655-
RC = &AArch64::GPR32RegClass;
1674+
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1675+
RC = (IsRet64Bit && !WantZExt) ?
1676+
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
16561677
break;
16571678
case MVT::i16:
1658-
Opc = GPOpcTable[WantZExt][Idx][1];
1659-
RC = &AArch64::GPR32RegClass;
1679+
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1680+
RC = (IsRet64Bit && !WantZExt) ?
1681+
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
16601682
break;
16611683
case MVT::i32:
1662-
Opc = GPOpcTable[WantZExt][Idx][2];
1663-
RC = WantZExt ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1684+
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1685+
RC = (IsRet64Bit && !WantZExt) ?
1686+
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
16641687
break;
16651688
case MVT::i64:
1666-
Opc = GPOpcTable[WantZExt][Idx][3];
1689+
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
16671690
RC = &AArch64::GPR64RegClass;
16681691
break;
16691692
case MVT::f32:
@@ -1682,15 +1705,22 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
16821705
TII.get(Opc), ResultReg);
16831706
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
16841707

1685-
// For 32bit loads we do sign-extending loads to 64bit and then extract the
1686-
// subreg. In the end this is just a NOOP.
1687-
if (VT == MVT::i32 && !WantZExt)
1688-
ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, /*IsKill=*/true,
1689-
AArch64::sub_32);
1708+
// For zero-extending loads to 64bit we emit a 32bit load and then convert
1709+
// the w-reg to an x-reg. In the end this is just an noop and will be removed.
1710+
if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1711+
unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1712+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1713+
TII.get(AArch64::SUBREG_TO_REG), Reg64)
1714+
.addImm(0)
1715+
.addReg(ResultReg, getKillRegState(true))
1716+
.addImm(AArch64::sub_32);
1717+
ResultReg = Reg64;
1718+
}
16901719

16911720
// Loading an i1 requires special handling.
16921721
if (VT == MVT::i1) {
1693-
unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1722+
unsigned ANDReg = emitAnd_ri(IsRet64Bit ? MVT::i64 : MVT::i32, ResultReg,
1723+
/*IsKill=*/true, 1);
16941724
assert(ANDReg && "Unexpected AND instruction emission failure.");
16951725
ResultReg = ANDReg;
16961726
}
@@ -1767,11 +1797,21 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
17671797
return false;
17681798

17691799
bool WantZExt = true;
1770-
if (I->hasOneUse() && isa<SExtInst>(I->use_begin()->getUser()))
1771-
WantZExt = false;
1800+
MVT RetVT = VT;
1801+
if (I->hasOneUse()) {
1802+
if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1803+
if (!isTypeSupported(ZE->getType(), RetVT, /*IsVectorAllowed=*/false))
1804+
RetVT = VT;
1805+
} else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1806+
if (!isTypeSupported(SE->getType(), RetVT, /*IsVectorAllowed=*/false))
1807+
RetVT = VT;
1808+
WantZExt = false;
1809+
}
1810+
}
17721811

17731812
unsigned ResultReg;
1774-
if (!emitLoad(VT, ResultReg, Addr, WantZExt, createMachineMemOperandFor(I)))
1813+
if (!emitLoad(VT, RetVT, ResultReg, Addr, WantZExt,
1814+
createMachineMemOperandFor(I)))
17751815
return false;
17761816

17771817
updateValueMap(I, ResultReg);
@@ -2897,7 +2937,7 @@ bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
28972937

28982938
bool RV;
28992939
unsigned ResultReg;
2900-
RV = emitLoad(VT, ResultReg, Src);
2940+
RV = emitLoad(VT, VT, ResultReg, Src);
29012941
if (!RV)
29022942
return false;
29032943

@@ -3917,51 +3957,37 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) {
39173957
if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
39183958
return false;
39193959

3920-
if (isIntExtFree(I)) {
3921-
unsigned SrcReg = getRegForValue(I->getOperand(0));
3922-
if (!SrcReg)
3923-
return false;
3924-
bool SrcIsKill = hasTrivialKill(I->getOperand(0));
3925-
3926-
const TargetRegisterClass *RC = (RetVT == MVT::i64) ?
3927-
&AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3928-
unsigned ResultReg = createResultReg(RC);
3929-
if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
3930-
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3931-
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3932-
.addImm(0)
3933-
.addReg(SrcReg, getKillRegState(SrcIsKill))
3934-
.addImm(AArch64::sub_32);
3935-
} else {
3936-
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3937-
TII.get(TargetOpcode::COPY), ResultReg)
3938-
.addReg(SrcReg, getKillRegState(SrcIsKill));
3939-
}
3940-
updateValueMap(I, ResultReg);
3941-
return true;
3942-
}
3943-
39443960
unsigned SrcReg = getRegForValue(I->getOperand(0));
39453961
if (!SrcReg)
39463962
return false;
3947-
bool SrcRegIsKill = hasTrivialKill(I->getOperand(0));
3963+
bool SrcIsKill = hasTrivialKill(I->getOperand(0));
39483964

3949-
unsigned ResultReg = 0;
3950-
if (isIntExtFree(I)) {
3951-
if (RetVT == MVT::i64) {
3952-
ResultReg = createResultReg(&AArch64::GPR64RegClass);
3953-
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3954-
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3955-
.addImm(0)
3956-
.addReg(SrcReg, getKillRegState(SrcRegIsKill))
3957-
.addImm(AArch64::sub_32);
3958-
} else
3959-
ResultReg = SrcReg;
3965+
// The load instruction selection code handles the sign-/zero-extension.
3966+
if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) {
3967+
if (LI->hasOneUse()) {
3968+
updateValueMap(I, SrcReg);
3969+
return true;
3970+
}
39603971
}
39613972

3962-
if (!ResultReg)
3963-
ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, isa<ZExtInst>(I));
3973+
bool IsZExt = isa<ZExtInst>(I);
3974+
if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
3975+
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
3976+
if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
3977+
unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
3978+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3979+
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3980+
.addImm(0)
3981+
.addReg(SrcReg, getKillRegState(SrcIsKill))
3982+
.addImm(AArch64::sub_32);
3983+
SrcReg = ResultReg;
3984+
}
3985+
updateValueMap(I, SrcReg);
3986+
return true;
3987+
}
3988+
}
39643989

3990+
unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
39653991
if (!ResultReg)
39663992
return false;
39673993

0 commit comments

Comments
 (0)