Skip to content

Commit c5d000b

Browse files
authored
[Thumb] Resolve FIXME: Use 'mov hi, $src; mov $dst, hi' (#81908)
Consider the following: ldr r0, [r4] ldr r7, [r0, #4] cmp r7, r3 bhi .LBB0_6 cmp r0, r2 push {r0} pop {r4} bne .LBB0_3 movs r0, r6 pop {r4, r5, r6, r7} pop {r1} bx r1 Here is a snippet of the generated THUMB1 code of the K&R malloc function that clang currently compiles to. push {r0} ends up being popped to pop {r4}. movs r4, r0 would destroy the flags set by cmp right above. The compiler has no alternative in this case, except one: the only alternative is to transfer through a high register. However, it seems like LLVM does not consider that this is a valid approach, even though it is a free clobbering a high register. This patch addresses the FIXME so the compiler can do that when it can in r10 or r11, or r12.
1 parent 5ed60ff commit c5d000b

File tree

6 files changed

+66
-31
lines changed

6 files changed

+66
-31
lines changed

Diff for: llvm/lib/Target/ARM/Thumb1InstrInfo.cpp

+42-7
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
#include "Thumb1InstrInfo.h"
1414
#include "ARMSubtarget.h"
15+
#include "llvm/ADT/BitVector.h"
16+
#include "llvm/CodeGen/LiveRegUnits.h"
1517
#include "llvm/CodeGen/MachineFrameInfo.h"
1618
#include "llvm/CodeGen/MachineInstrBuilder.h"
1719
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -47,24 +49,57 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
4749
assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
4850
"Thumb1 can only copy GPR registers");
4951

50-
if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
51-
|| !ARM::tGPRRegClass.contains(DestReg))
52+
if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) ||
53+
!ARM::tGPRRegClass.contains(DestReg))
5254
BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
5355
.addReg(SrcReg, getKillRegState(KillSrc))
5456
.add(predOps(ARMCC::AL));
5557
else {
56-
// FIXME: Can also use 'mov hi, $src; mov $dst, hi',
57-
// with hi as either r10 or r11.
58-
5958
const TargetRegisterInfo *RegInfo = st.getRegisterInfo();
60-
if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I)
61-
== MachineBasicBlock::LQR_Dead) {
59+
LiveRegUnits UsedRegs(*RegInfo);
60+
UsedRegs.addLiveOuts(MBB);
61+
62+
auto InstUpToI = MBB.end();
63+
while (InstUpToI != I)
64+
// The pre-decrement is on purpose here.
65+
// We want to have the liveness right before I.
66+
UsedRegs.stepBackward(*--InstUpToI);
67+
68+
if (UsedRegs.available(ARM::CPSR)) {
6269
BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg)
6370
.addReg(SrcReg, getKillRegState(KillSrc))
6471
->addRegisterDead(ARM::CPSR, RegInfo);
6572
return;
6673
}
6774

75+
// Use high register to move source to destination
76+
// if movs is not an option.
77+
BitVector Allocatable = RegInfo->getAllocatableSet(
78+
MF, RegInfo->getRegClass(ARM::hGPRRegClassID));
79+
80+
Register TmpReg = ARM::NoRegister;
81+
// Prefer R12 as it is known to not be preserved anyway
82+
if (UsedRegs.available(ARM::R12) && Allocatable.test(ARM::R12)) {
83+
TmpReg = ARM::R12;
84+
} else {
85+
for (Register Reg : Allocatable.set_bits()) {
86+
if (UsedRegs.available(Reg)) {
87+
TmpReg = Reg;
88+
break;
89+
}
90+
}
91+
}
92+
93+
if (TmpReg) {
94+
BuildMI(MBB, I, DL, get(ARM::tMOVr), TmpReg)
95+
.addReg(SrcReg, getKillRegState(KillSrc))
96+
.add(predOps(ARMCC::AL));
97+
BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
98+
.addReg(TmpReg, getKillRegState(true))
99+
.add(predOps(ARMCC::AL));
100+
return;
101+
}
102+
68103
// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
69104
BuildMI(MBB, I, DL, get(ARM::tPUSH))
70105
.add(predOps(ARMCC::AL))

Diff for: llvm/test/CodeGen/ARM/sadd_sat.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,8 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
130130
; CHECK-T15TE-NEXT: bics r4, r1
131131
; CHECK-T15TE-NEXT: asrs r1, r3, #31
132132
; CHECK-T15TE-NEXT: cmp r4, #0
133-
; CHECK-T15TE-NEXT: push {r1}
134-
; CHECK-T15TE-NEXT: pop {r0}
133+
; CHECK-T15TE-NEXT: mov r12, r1
134+
; CHECK-T15TE-NEXT: mov r0, r12
135135
; CHECK-T15TE-NEXT: bmi .LBB1_2
136136
; CHECK-T15TE-NEXT: @ %bb.1:
137137
; CHECK-T15TE-NEXT: movs r0, r2

Diff for: llvm/test/CodeGen/ARM/select_const.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -665,8 +665,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
665665
; THUMB-NEXT: movs r7, #1
666666
; THUMB-NEXT: ands r0, r7
667667
; THUMB-NEXT: subs r1, r0, #1
668-
; THUMB-NEXT: push {r0}
669-
; THUMB-NEXT: pop {r4}
668+
; THUMB-NEXT: mov r12, r0
669+
; THUMB-NEXT: mov r4, r12
670670
; THUMB-NEXT: sbcs r4, r1
671671
; THUMB-NEXT: cmp r0, #0
672672
; THUMB-NEXT: bne .LBB24_2
@@ -681,8 +681,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
681681
; THUMB-NEXT: ands r5, r0
682682
; THUMB-NEXT: movs r6, #0
683683
; THUMB-NEXT: subs r0, r5, #1
684-
; THUMB-NEXT: push {r4}
685-
; THUMB-NEXT: pop {r1}
684+
; THUMB-NEXT: mov r12, r4
685+
; THUMB-NEXT: mov r1, r12
686686
; THUMB-NEXT: sbcs r1, r6
687687
; THUMB-NEXT: eors r3, r7
688688
; THUMB-NEXT: ldr r6, .LCPI24_0
@@ -786,11 +786,11 @@ define i64 @func(i64 %arg) {
786786
; THUMB-NEXT: push {r4, lr}
787787
; THUMB-NEXT: movs r2, #0
788788
; THUMB-NEXT: adds r3, r0, #1
789-
; THUMB-NEXT: push {r1}
790-
; THUMB-NEXT: pop {r3}
789+
; THUMB-NEXT: mov r12, r1
790+
; THUMB-NEXT: mov r3, r12
791791
; THUMB-NEXT: adcs r3, r2
792-
; THUMB-NEXT: push {r2}
793-
; THUMB-NEXT: pop {r3}
792+
; THUMB-NEXT: mov r12, r2
793+
; THUMB-NEXT: mov r3, r12
794794
; THUMB-NEXT: adcs r3, r2
795795
; THUMB-NEXT: subs r4, r3, #1
796796
; THUMB-NEXT: adds r0, r0, #1

Diff for: llvm/test/CodeGen/ARM/wide-compares.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -257,12 +257,12 @@ define {i32, i32} @test_slt_not(i32 %c, i32 %d, i64 %a, i64 %b) {
257257
; CHECK-THUMB1-NOMOV-NEXT: ldr r5, [sp, #16]
258258
; CHECK-THUMB1-NOMOV-NEXT: subs r2, r2, r5
259259
; CHECK-THUMB1-NOMOV-NEXT: sbcs r3, r0
260-
; CHECK-THUMB1-NOMOV-NEXT: push {r1}
261-
; CHECK-THUMB1-NOMOV-NEXT: pop {r0}
260+
; CHECK-THUMB1-NOMOV-NEXT: mov r12, r1
261+
; CHECK-THUMB1-NOMOV-NEXT: mov r0, r12
262262
; CHECK-THUMB1-NOMOV-NEXT: blt .LBB3_2
263263
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.1: @ %entry
264-
; CHECK-THUMB1-NOMOV-NEXT: push {r4}
265-
; CHECK-THUMB1-NOMOV-NEXT: pop {r0}
264+
; CHECK-THUMB1-NOMOV-NEXT: mov r12, r4
265+
; CHECK-THUMB1-NOMOV-NEXT: mov r0, r12
266266
; CHECK-THUMB1-NOMOV-NEXT: .LBB3_2: @ %entry
267267
; CHECK-THUMB1-NOMOV-NEXT: bge .LBB3_4
268268
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.3: @ %entry

Diff for: llvm/test/CodeGen/Thumb/pr35836.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,18 @@ while.body:
3535
br label %while.body
3636
}
3737
; CHECK: adds r3, r0, r1
38-
; CHECK: push {r5}
39-
; CHECK: pop {r1}
38+
; CHECK: mov r12, r5
39+
; CHECK: mov r1, r12
4040
; CHECK: adcs r1, r5
4141
; CHECK: ldr r0, [sp, #12] @ 4-byte Reload
4242
; CHECK: ldr r2, [sp, #8] @ 4-byte Reload
4343
; CHECK: adds r2, r0, r2
44-
; CHECK: push {r5}
45-
; CHECK: pop {r4}
44+
; CHECK: mov r12, r5
45+
; CHECK: mov r4, r12
4646
; CHECK: adcs r4, r5
4747
; CHECK: adds r0, r2, r5
48-
; CHECK: push {r3}
49-
; CHECK: pop {r0}
48+
; CHECK: mov r12, r3
49+
; CHECK: mov r0, r12
5050
; CHECK: adcs r0, r4
5151
; CHECK: ldr r6, [sp, #4] @ 4-byte Reload
5252
; CHECK: str r0, [r6]

Diff for: llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
122122
; CHECK-NEXT: movs r3, #1
123123
; CHECK-NEXT: movs r4, #0
124124
; CHECK-NEXT: cmp r0, #170
125-
; CHECK-NEXT: push {r3}
126-
; CHECK-NEXT: pop {r0}
125+
; CHECK-NEXT: mov r12, r3
126+
; CHECK-NEXT: mov r0, r12
127127
; CHECK-NEXT: bhi .LBB4_2
128128
; CHECK-NEXT: @ %bb.1:
129129
; CHECK-NEXT: movs r0, r4
@@ -134,8 +134,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
134134
; CHECK-NEXT: movs r1, #73
135135
; CHECK-NEXT: lsls r1, r1, #23
136136
; CHECK-NEXT: cmp r5, r1
137-
; CHECK-NEXT: push {r3}
138-
; CHECK-NEXT: pop {r1}
137+
; CHECK-NEXT: mov r12, r3
138+
; CHECK-NEXT: mov r1, r12
139139
; CHECK-NEXT: bhi .LBB4_4
140140
; CHECK-NEXT: @ %bb.3:
141141
; CHECK-NEXT: movs r1, r4

0 commit comments

Comments
 (0)