@@ -178,8 +178,8 @@ class AArch64FastISel final : public FastISel {
178
178
bool emitICmp (MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
179
179
bool emitICmp_ri (MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
180
180
bool emitFCmp (MVT RetVT, const Value *LHS, const Value *RHS);
181
- bool emitLoad (MVT VT, unsigned &ResultReg, Address Addr, bool WantZExt = true ,
182
- MachineMemOperand *MMO = nullptr );
181
+ bool emitLoad (MVT VT, MVT ResultVT, unsigned &ResultReg, Address Addr,
182
+ bool WantZExt = true , MachineMemOperand *MMO = nullptr );
183
183
bool emitStore (MVT VT, unsigned SrcReg, Address Addr,
184
184
MachineMemOperand *MMO = nullptr );
185
185
unsigned emitIntExt (MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
@@ -260,6 +260,8 @@ class AArch64FastISel final : public FastISel {
260
260
static bool isIntExtFree (const Instruction *I) {
261
261
assert ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
262
262
" Unexpected integer extend instruction." );
263
+ assert (!I->getType ()->isVectorTy () && I->getType ()->isIntegerTy () &&
264
+ " Unexpected value type." );
263
265
bool IsZExt = isa<ZExtInst>(I);
264
266
265
267
if (const auto *LI = dyn_cast<LoadInst>(I->getOperand (0 )))
@@ -1589,8 +1591,9 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1589
1591
return emitLogicalOp_ri (ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1590
1592
}
1591
1593
1592
- bool AArch64FastISel::emitLoad (MVT VT, unsigned &ResultReg, Address Addr,
1593
- bool WantZExt, MachineMemOperand *MMO) {
1594
+ bool AArch64FastISel::emitLoad (MVT VT, MVT RetVT, unsigned &ResultReg,
1595
+ Address Addr, bool WantZExt,
1596
+ MachineMemOperand *MMO) {
1594
1597
// Simplify this down to something we can handle.
1595
1598
if (!simplifyAddress (Addr, VT))
1596
1599
return false ;
@@ -1607,24 +1610,40 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1607
1610
ScaleFactor = 1 ;
1608
1611
}
1609
1612
1610
- static const unsigned GPOpcTable[2 ][4 ][4 ] = {
1613
+ static const unsigned GPOpcTable[2 ][8 ][4 ] = {
1611
1614
// Sign-extend.
1612
- { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURSWi,
1615
+ { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1616
+ AArch64::LDURXi },
1617
+ { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1613
1618
AArch64::LDURXi },
1614
- { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRSWui ,
1619
+ { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui ,
1615
1620
AArch64::LDRXui },
1616
- { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRSWroX,
1621
+ { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1622
+ AArch64::LDRXui },
1623
+ { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1624
+ AArch64::LDRXroX },
1625
+ { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1617
1626
AArch64::LDRXroX },
1618
- { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRSWroW ,
1627
+ { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW ,
1619
1628
AArch64::LDRXroW },
1629
+ { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1630
+ AArch64::LDRXroW }
1620
1631
},
1621
1632
// Zero-extend.
1622
1633
{ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1623
1634
AArch64::LDURXi },
1635
+ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1636
+ AArch64::LDURXi },
1624
1637
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1625
1638
AArch64::LDRXui },
1639
+ { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1640
+ AArch64::LDRXui },
1641
+ { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1642
+ AArch64::LDRXroX },
1626
1643
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1627
1644
AArch64::LDRXroX },
1645
+ { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1646
+ AArch64::LDRXroW },
1628
1647
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1629
1648
AArch64::LDRXroW }
1630
1649
}
@@ -1646,24 +1665,28 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1646
1665
Addr.getExtendType () == AArch64_AM::SXTW)
1647
1666
Idx++;
1648
1667
1668
+ bool IsRet64Bit = RetVT == MVT::i64;
1649
1669
switch (VT.SimpleTy ) {
1650
1670
default :
1651
1671
llvm_unreachable (" Unexpected value type." );
1652
1672
case MVT::i1: // Intentional fall-through.
1653
1673
case MVT::i8:
1654
- Opc = GPOpcTable[WantZExt][Idx][0 ];
1655
- RC = &AArch64::GPR32RegClass;
1674
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0 ];
1675
+ RC = (IsRet64Bit && !WantZExt) ?
1676
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1656
1677
break ;
1657
1678
case MVT::i16:
1658
- Opc = GPOpcTable[WantZExt][Idx][1 ];
1659
- RC = &AArch64::GPR32RegClass;
1679
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1 ];
1680
+ RC = (IsRet64Bit && !WantZExt) ?
1681
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1660
1682
break ;
1661
1683
case MVT::i32:
1662
- Opc = GPOpcTable[WantZExt][Idx][2 ];
1663
- RC = WantZExt ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1684
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2 ];
1685
+ RC = (IsRet64Bit && !WantZExt) ?
1686
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1664
1687
break ;
1665
1688
case MVT::i64:
1666
- Opc = GPOpcTable[WantZExt][Idx][3 ];
1689
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit ][3 ];
1667
1690
RC = &AArch64::GPR64RegClass;
1668
1691
break ;
1669
1692
case MVT::f32:
@@ -1682,15 +1705,22 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1682
1705
TII.get (Opc), ResultReg);
1683
1706
addLoadStoreOperands (Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1684
1707
1685
- // For 32bit loads we do sign-extending loads to 64bit and then extract the
1686
- // subreg. In the end this is just a NOOP.
1687
- if (VT == MVT::i32 && !WantZExt)
1688
- ResultReg = fastEmitInst_extractsubreg (MVT::i32, ResultReg, /* IsKill=*/ true ,
1689
- AArch64::sub_32);
1708
+ // For zero-extending loads to 64bit we emit a 32bit load and then convert
1709
+ // the w-reg to an x-reg. In the end this is just an noop and will be removed.
1710
+ if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1711
+ unsigned Reg64 = createResultReg (&AArch64::GPR64RegClass);
1712
+ BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
1713
+ TII.get (AArch64::SUBREG_TO_REG), Reg64)
1714
+ .addImm (0 )
1715
+ .addReg (ResultReg, getKillRegState (true ))
1716
+ .addImm (AArch64::sub_32);
1717
+ ResultReg = Reg64;
1718
+ }
1690
1719
1691
1720
// Loading an i1 requires special handling.
1692
1721
if (VT == MVT::i1) {
1693
- unsigned ANDReg = emitAnd_ri (MVT::i32, ResultReg, /* IsKill=*/ true , 1 );
1722
+ unsigned ANDReg = emitAnd_ri (IsRet64Bit ? MVT::i64 : MVT::i32, ResultReg,
1723
+ /* IsKill=*/ true , 1 );
1694
1724
assert (ANDReg && " Unexpected AND instruction emission failure." );
1695
1725
ResultReg = ANDReg;
1696
1726
}
@@ -1767,11 +1797,21 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
1767
1797
return false ;
1768
1798
1769
1799
bool WantZExt = true ;
1770
- if (I->hasOneUse () && isa<SExtInst>(I->use_begin ()->getUser ()))
1771
- WantZExt = false ;
1800
+ MVT RetVT = VT;
1801
+ if (I->hasOneUse ()) {
1802
+ if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin ()->getUser ())) {
1803
+ if (!isTypeSupported (ZE->getType (), RetVT, /* IsVectorAllowed=*/ false ))
1804
+ RetVT = VT;
1805
+ } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin ()->getUser ())) {
1806
+ if (!isTypeSupported (SE->getType (), RetVT, /* IsVectorAllowed=*/ false ))
1807
+ RetVT = VT;
1808
+ WantZExt = false ;
1809
+ }
1810
+ }
1772
1811
1773
1812
unsigned ResultReg;
1774
- if (!emitLoad (VT, ResultReg, Addr, WantZExt, createMachineMemOperandFor (I)))
1813
+ if (!emitLoad (VT, RetVT, ResultReg, Addr, WantZExt,
1814
+ createMachineMemOperandFor (I)))
1775
1815
return false ;
1776
1816
1777
1817
updateValueMap (I, ResultReg);
@@ -2897,7 +2937,7 @@ bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
2897
2937
2898
2938
bool RV;
2899
2939
unsigned ResultReg;
2900
- RV = emitLoad (VT, ResultReg, Src);
2940
+ RV = emitLoad (VT, VT, ResultReg, Src);
2901
2941
if (!RV)
2902
2942
return false ;
2903
2943
@@ -3917,51 +3957,37 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) {
3917
3957
if (!isTypeSupported (I->getOperand (0 )->getType (), SrcVT))
3918
3958
return false ;
3919
3959
3920
- if (isIntExtFree (I)) {
3921
- unsigned SrcReg = getRegForValue (I->getOperand (0 ));
3922
- if (!SrcReg)
3923
- return false ;
3924
- bool SrcIsKill = hasTrivialKill (I->getOperand (0 ));
3925
-
3926
- const TargetRegisterClass *RC = (RetVT == MVT::i64) ?
3927
- &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3928
- unsigned ResultReg = createResultReg (RC);
3929
- if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
3930
- BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
3931
- TII.get (AArch64::SUBREG_TO_REG), ResultReg)
3932
- .addImm (0 )
3933
- .addReg (SrcReg, getKillRegState (SrcIsKill))
3934
- .addImm (AArch64::sub_32);
3935
- } else {
3936
- BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
3937
- TII.get (TargetOpcode::COPY), ResultReg)
3938
- .addReg (SrcReg, getKillRegState (SrcIsKill));
3939
- }
3940
- updateValueMap (I, ResultReg);
3941
- return true ;
3942
- }
3943
-
3944
3960
unsigned SrcReg = getRegForValue (I->getOperand (0 ));
3945
3961
if (!SrcReg)
3946
3962
return false ;
3947
- bool SrcRegIsKill = hasTrivialKill (I->getOperand (0 ));
3963
+ bool SrcIsKill = hasTrivialKill (I->getOperand (0 ));
3948
3964
3949
- unsigned ResultReg = 0 ;
3950
- if (isIntExtFree (I)) {
3951
- if (RetVT == MVT::i64) {
3952
- ResultReg = createResultReg (&AArch64::GPR64RegClass);
3953
- BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
3954
- TII.get (AArch64::SUBREG_TO_REG), ResultReg)
3955
- .addImm (0 )
3956
- .addReg (SrcReg, getKillRegState (SrcRegIsKill))
3957
- .addImm (AArch64::sub_32);
3958
- } else
3959
- ResultReg = SrcReg;
3965
+ // The load instruction selection code handles the sign-/zero-extension.
3966
+ if (const auto *LI = dyn_cast<LoadInst>(I->getOperand (0 ))) {
3967
+ if (LI->hasOneUse ()) {
3968
+ updateValueMap (I, SrcReg);
3969
+ return true ;
3970
+ }
3960
3971
}
3961
3972
3962
- if (!ResultReg)
3963
- ResultReg = emitIntExt (SrcVT, SrcReg, RetVT, isa<ZExtInst>(I));
3973
+ bool IsZExt = isa<ZExtInst>(I);
3974
+ if (const auto *Arg = dyn_cast<Argument>(I->getOperand (0 ))) {
3975
+ if ((IsZExt && Arg->hasZExtAttr ()) || (!IsZExt && Arg->hasSExtAttr ())) {
3976
+ if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
3977
+ unsigned ResultReg = createResultReg (&AArch64::GPR64RegClass);
3978
+ BuildMI (*FuncInfo.MBB , FuncInfo.InsertPt , DbgLoc,
3979
+ TII.get (AArch64::SUBREG_TO_REG), ResultReg)
3980
+ .addImm (0 )
3981
+ .addReg (SrcReg, getKillRegState (SrcIsKill))
3982
+ .addImm (AArch64::sub_32);
3983
+ SrcReg = ResultReg;
3984
+ }
3985
+ updateValueMap (I, SrcReg);
3986
+ return true ;
3987
+ }
3988
+ }
3964
3989
3990
+ unsigned ResultReg = emitIntExt (SrcVT, SrcReg, RetVT, IsZExt);
3965
3991
if (!ResultReg)
3966
3992
return false ;
3967
3993
0 commit comments