Skip to content

Commit 79dfe48

Browse files
authored
[ARM] Set isCheapToSpeculateCtlz as true for hasV5TOps and no Thumb 1 (#154848)
This is so that we don't expand to include unneeded 0 checks. Also fix the logic error in LegalizerInfo so it is NOT legal on Thumb1 in Fast-ISEL. Finally, Remove the README entry regarding this issue.
1 parent aa1dd4b commit 79dfe48

File tree

5 files changed

+117
-23
lines changed

5 files changed

+117
-23
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21380,11 +21380,11 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
2138021380
}
2138121381

2138221382
bool ARMTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
21383-
return Subtarget->hasV6T2Ops();
21383+
return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
2138421384
}
2138521385

2138621386
bool ARMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
21387-
return Subtarget->hasV6T2Ops();
21387+
return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
2138821388
}
2138921389

2139021390
bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial(

llvm/lib/Target/ARM/ARMLegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) : ST(ST) {
206206

207207
getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
208208

209-
if (ST.hasV5TOps()) {
209+
if (ST.hasV5TOps() && !ST.isThumb1Only()) {
210210
getActionDefinitionsBuilder(G_CTLZ)
211211
.legalFor({s32, s32})
212212
.clampScalar(1, s32, s32)

llvm/lib/Target/ARM/README.txt

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -697,22 +697,6 @@ target-neutral one.
697697

698698
//===---------------------------------------------------------------------===//
699699

700-
Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins
701-
are specified to be undefined at zero, so portable code must check for zero
702-
and handle it as a special case. That is unnecessary on ARM where those
703-
operations are implemented in a way that is well-defined for zero. For
704-
example:
705-
706-
int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; }
707-
708-
should just be implemented with a CLZ instruction. Since there are other
709-
targets, e.g., PPC, that share this behavior, it would be best to implement
710-
this in a target-independent way: we should probably fold that (when using
711-
"undefined at zero" semantics) to set the "defined at zero" bit and have
712-
the code generator expand out the right code.
713-
714-
//===---------------------------------------------------------------------===//
715-
716700
Clean up the test/MC/ARM files to have more robust register choices.
717701

718702
R0 should not be used as a register operand in the assembler tests as it's then

llvm/test/CodeGen/ARM/clz.ll

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,41 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - | FileCheck %s -check-prefixes=CHECK,INLINE
23
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefixes=CHECK,LIBCALL
34

45
declare i32 @llvm.ctlz.i32(i32, i1)
56

6-
define i32 @test(i32 %x) {
7-
; CHECK-LABEL: test
8-
; INLINE: clz r0, r0
9-
; LIBCALL: b __clzsi2
7+
define i32 @undef_zero(i32 %x) {
8+
; INLINE-LABEL: undef_zero:
9+
; INLINE: @ %bb.0:
10+
; INLINE-NEXT: clz r0, r0
11+
; INLINE-NEXT: bx lr
12+
;
13+
; LIBCALL-LABEL: undef_zero:
14+
; LIBCALL: @ %bb.0:
15+
; LIBCALL-NEXT: b __clzsi2
1016
%tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
1117
ret i32 %tmp.1
1218
}
19+
20+
define i32 @no_undef_zero(i32 %x) {
21+
; INLINE-LABEL: no_undef_zero:
22+
; INLINE: @ %bb.0:
23+
; INLINE-NEXT: clz r0, r0
24+
; INLINE-NEXT: bx lr
25+
;
26+
; LIBCALL-LABEL: no_undef_zero:
27+
; LIBCALL: @ %bb.0:
28+
; LIBCALL-NEXT: cmp r0, #0
29+
; LIBCALL-NEXT: moveq r0, #32
30+
; LIBCALL-NEXT: moveq pc, lr
31+
; LIBCALL-NEXT: .LBB1_1: @ %cond.false
32+
; LIBCALL-NEXT: .save {r11, lr}
33+
; LIBCALL-NEXT: push {r11, lr}
34+
; LIBCALL-NEXT: bl __clzsi2
35+
; LIBCALL-NEXT: pop {r11, lr}
36+
; LIBCALL-NEXT: mov pc, lr
37+
%tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 false )
38+
ret i32 %tmp.1
39+
}
40+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
41+
; CHECK: {{.*}}

llvm/test/CodeGen/ARM/cttz.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple arm-eabi -mattr=+v5t | FileCheck %s --check-prefix=CHECK-5
23
; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 | FileCheck %s
34
; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s
45
; RUN: llc < %s -mtriple thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-6M
@@ -14,6 +15,15 @@ declare i64 @llvm.cttz.i64(i64, i1)
1415
;------------------------------------------------------------------------------
1516

1617
define i8 @test_i8(i8 %a) {
18+
; CHECK-5-LABEL: test_i8:
19+
; CHECK-5: @ %bb.0:
20+
; CHECK-5-NEXT: orr r0, r0, #256
21+
; CHECK-5-NEXT: sub r1, r0, #1
22+
; CHECK-5-NEXT: bic r0, r1, r0
23+
; CHECK-5-NEXT: clz r0, r0
24+
; CHECK-5-NEXT: rsb r0, r0, #32
25+
; CHECK-5-NEXT: bx lr
26+
;
1727
; CHECK-LABEL: test_i8:
1828
; CHECK: @ %bb.0:
1929
; CHECK-NEXT: orr r0, r0, #256
@@ -81,6 +91,15 @@ define i8 @test_i8(i8 %a) {
8191
}
8292

8393
define i16 @test_i16(i16 %a) {
94+
; CHECK-5-LABEL: test_i16:
95+
; CHECK-5: @ %bb.0:
96+
; CHECK-5-NEXT: orr r0, r0, #65536
97+
; CHECK-5-NEXT: sub r1, r0, #1
98+
; CHECK-5-NEXT: bic r0, r1, r0
99+
; CHECK-5-NEXT: clz r0, r0
100+
; CHECK-5-NEXT: rsb r0, r0, #32
101+
; CHECK-5-NEXT: bx lr
102+
;
84103
; CHECK-LABEL: test_i16:
85104
; CHECK: @ %bb.0:
86105
; CHECK-NEXT: orr r0, r0, #65536
@@ -148,6 +167,14 @@ define i16 @test_i16(i16 %a) {
148167
}
149168

150169
define i32 @test_i32(i32 %a) {
170+
; CHECK-5-LABEL: test_i32:
171+
; CHECK-5: @ %bb.0:
172+
; CHECK-5-NEXT: sub r1, r0, #1
173+
; CHECK-5-NEXT: bic r0, r1, r0
174+
; CHECK-5-NEXT: clz r0, r0
175+
; CHECK-5-NEXT: rsb r0, r0, #32
176+
; CHECK-5-NEXT: bx lr
177+
;
151178
; CHECK-LABEL: test_i32:
152179
; CHECK: @ %bb.0:
153180
; CHECK-NEXT: rbit r0, r0
@@ -207,6 +234,21 @@ define i32 @test_i32(i32 %a) {
207234
}
208235

209236
define i64 @test_i64(i64 %a) {
237+
; CHECK-5-LABEL: test_i64:
238+
; CHECK-5: @ %bb.0:
239+
; CHECK-5-NEXT: sub r3, r1, #1
240+
; CHECK-5-NEXT: sub r2, r0, #1
241+
; CHECK-5-NEXT: bic r1, r3, r1
242+
; CHECK-5-NEXT: bic r2, r2, r0
243+
; CHECK-5-NEXT: clz r1, r1
244+
; CHECK-5-NEXT: clz r2, r2
245+
; CHECK-5-NEXT: rsb r1, r1, #64
246+
; CHECK-5-NEXT: cmp r0, #0
247+
; CHECK-5-NEXT: rsbne r1, r2, #32
248+
; CHECK-5-NEXT: mov r0, r1
249+
; CHECK-5-NEXT: mov r1, #0
250+
; CHECK-5-NEXT: bx lr
251+
;
210252
; CHECK-LABEL: test_i64:
211253
; CHECK: @ %bb.0:
212254
; CHECK-NEXT: rbit r1, r1
@@ -323,6 +365,14 @@ define i64 @test_i64(i64 %a) {
323365
;------------------------------------------------------------------------------
324366

325367
define i8 @test_i8_zero_undef(i8 %a) {
368+
; CHECK-5-LABEL: test_i8_zero_undef:
369+
; CHECK-5: @ %bb.0:
370+
; CHECK-5-NEXT: sub r1, r0, #1
371+
; CHECK-5-NEXT: bic r0, r1, r0
372+
; CHECK-5-NEXT: clz r0, r0
373+
; CHECK-5-NEXT: rsb r0, r0, #32
374+
; CHECK-5-NEXT: bx lr
375+
;
326376
; CHECK-LABEL: test_i8_zero_undef:
327377
; CHECK: @ %bb.0:
328378
; CHECK-NEXT: rbit r0, r0
@@ -377,6 +427,14 @@ define i8 @test_i8_zero_undef(i8 %a) {
377427
}
378428

379429
define i16 @test_i16_zero_undef(i16 %a) {
430+
; CHECK-5-LABEL: test_i16_zero_undef:
431+
; CHECK-5: @ %bb.0:
432+
; CHECK-5-NEXT: sub r1, r0, #1
433+
; CHECK-5-NEXT: bic r0, r1, r0
434+
; CHECK-5-NEXT: clz r0, r0
435+
; CHECK-5-NEXT: rsb r0, r0, #32
436+
; CHECK-5-NEXT: bx lr
437+
;
380438
; CHECK-LABEL: test_i16_zero_undef:
381439
; CHECK: @ %bb.0:
382440
; CHECK-NEXT: rbit r0, r0
@@ -432,6 +490,14 @@ define i16 @test_i16_zero_undef(i16 %a) {
432490

433491

434492
define i32 @test_i32_zero_undef(i32 %a) {
493+
; CHECK-5-LABEL: test_i32_zero_undef:
494+
; CHECK-5: @ %bb.0:
495+
; CHECK-5-NEXT: sub r1, r0, #1
496+
; CHECK-5-NEXT: bic r0, r1, r0
497+
; CHECK-5-NEXT: clz r0, r0
498+
; CHECK-5-NEXT: rsb r0, r0, #32
499+
; CHECK-5-NEXT: bx lr
500+
;
435501
; CHECK-LABEL: test_i32_zero_undef:
436502
; CHECK: @ %bb.0:
437503
; CHECK-NEXT: rbit r0, r0
@@ -486,6 +552,21 @@ define i32 @test_i32_zero_undef(i32 %a) {
486552
}
487553

488554
define i64 @test_i64_zero_undef(i64 %a) {
555+
; CHECK-5-LABEL: test_i64_zero_undef:
556+
; CHECK-5: @ %bb.0:
557+
; CHECK-5-NEXT: sub r3, r1, #1
558+
; CHECK-5-NEXT: sub r2, r0, #1
559+
; CHECK-5-NEXT: bic r1, r3, r1
560+
; CHECK-5-NEXT: bic r2, r2, r0
561+
; CHECK-5-NEXT: clz r1, r1
562+
; CHECK-5-NEXT: clz r2, r2
563+
; CHECK-5-NEXT: rsb r1, r1, #64
564+
; CHECK-5-NEXT: cmp r0, #0
565+
; CHECK-5-NEXT: rsbne r1, r2, #32
566+
; CHECK-5-NEXT: mov r0, r1
567+
; CHECK-5-NEXT: mov r1, #0
568+
; CHECK-5-NEXT: bx lr
569+
;
489570
; CHECK-LABEL: test_i64_zero_undef:
490571
; CHECK: @ %bb.0:
491572
; CHECK-NEXT: rbit r1, r1

0 commit comments

Comments
 (0)