Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21394,11 +21394,11 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
}

bool ARMTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
return Subtarget->hasV6T2Ops();
return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
}

bool ARMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
return Subtarget->hasV6T2Ops();
return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
}

bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial(
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) : ST(ST) {

getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});

if (ST.hasV5TOps()) {
if (ST.hasV5TOps() && !ST.isThumb1Only()) {
getActionDefinitionsBuilder(G_CTLZ)
.legalFor({s32, s32})
.clampScalar(1, s32, s32)
Expand Down
16 changes: 0 additions & 16 deletions llvm/lib/Target/ARM/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -697,22 +697,6 @@ target-neutral one.

//===---------------------------------------------------------------------===//

Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins
are specified to be undefined at zero, so portable code must check for zero
and handle it as a special case. That is unnecessary on ARM where those
operations are implemented in a way that is well-defined for zero. For
example:

int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; }

should just be implemented with a CLZ instruction. Since there are other
targets, e.g., PPC, that share this behavior, it would be best to implement
this in a target-independent way: we should probably fold that (when using
"undefined at zero" semantics) to set the "defined at zero" bit and have
the code generator expand out the right code.

//===---------------------------------------------------------------------===//

Clean up the test/MC/ARM files to have more robust register choices.

R0 should not be used as a register operand in the assembler tests as it's then
Expand Down
37 changes: 33 additions & 4 deletions llvm/test/CodeGen/ARM/clz.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - | FileCheck %s -check-prefixes=CHECK,INLINE
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefixes=CHECK,LIBCALL

declare i32 @llvm.ctlz.i32(i32, i1)

define i32 @test(i32 %x) {
; CHECK-LABEL: test
; INLINE: clz r0, r0
; LIBCALL: b __clzsi2
define i32 @undef_zero(i32 %x) {
; INLINE-LABEL: undef_zero:
; INLINE: @ %bb.0:
; INLINE-NEXT: clz r0, r0
; INLINE-NEXT: bx lr
;
; LIBCALL-LABEL: undef_zero:
; LIBCALL: @ %bb.0:
; LIBCALL-NEXT: b __clzsi2
%tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
ret i32 %tmp.1
}

define i32 @no_undef_zero(i32 %x) {
; INLINE-LABEL: no_undef_zero:
; INLINE: @ %bb.0:
; INLINE-NEXT: clz r0, r0
; INLINE-NEXT: bx lr
;
; LIBCALL-LABEL: no_undef_zero:
; LIBCALL: @ %bb.0:
; LIBCALL-NEXT: cmp r0, #0
; LIBCALL-NEXT: moveq r0, #32
; LIBCALL-NEXT: moveq pc, lr
; LIBCALL-NEXT: .LBB1_1: @ %cond.false
; LIBCALL-NEXT: .save {r11, lr}
; LIBCALL-NEXT: push {r11, lr}
; LIBCALL-NEXT: bl __clzsi2
; LIBCALL-NEXT: pop {r11, lr}
; LIBCALL-NEXT: mov pc, lr
%tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 false )
ret i32 %tmp.1
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
81 changes: 81 additions & 0 deletions llvm/test/CodeGen/ARM/cttz.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple arm-eabi -mattr=+v5t | FileCheck %s --check-prefix=CHECK-5
; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 | FileCheck %s
; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s
; RUN: llc < %s -mtriple thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-6M
Expand All @@ -14,6 +15,15 @@ declare i64 @llvm.cttz.i64(i64, i1)
;------------------------------------------------------------------------------

define i8 @test_i8(i8 %a) {
; CHECK-5-LABEL: test_i8:
; CHECK-5: @ %bb.0:
; CHECK-5-NEXT: orr r0, r0, #256
; CHECK-5-NEXT: sub r1, r0, #1
; CHECK-5-NEXT: bic r0, r1, r0
; CHECK-5-NEXT: clz r0, r0
; CHECK-5-NEXT: rsb r0, r0, #32
; CHECK-5-NEXT: bx lr
;
; CHECK-LABEL: test_i8:
; CHECK: @ %bb.0:
; CHECK-NEXT: orr r0, r0, #256
Expand Down Expand Up @@ -81,6 +91,15 @@ define i8 @test_i8(i8 %a) {
}

define i16 @test_i16(i16 %a) {
; CHECK-5-LABEL: test_i16:
; CHECK-5: @ %bb.0:
; CHECK-5-NEXT: orr r0, r0, #65536
; CHECK-5-NEXT: sub r1, r0, #1
; CHECK-5-NEXT: bic r0, r1, r0
; CHECK-5-NEXT: clz r0, r0
; CHECK-5-NEXT: rsb r0, r0, #32
; CHECK-5-NEXT: bx lr
;
; CHECK-LABEL: test_i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: orr r0, r0, #65536
Expand Down Expand Up @@ -148,6 +167,14 @@ define i16 @test_i16(i16 %a) {
}

define i32 @test_i32(i32 %a) {
; CHECK-5-LABEL: test_i32:
; CHECK-5: @ %bb.0:
; CHECK-5-NEXT: sub r1, r0, #1
; CHECK-5-NEXT: bic r0, r1, r0
; CHECK-5-NEXT: clz r0, r0
; CHECK-5-NEXT: rsb r0, r0, #32
; CHECK-5-NEXT: bx lr
;
; CHECK-LABEL: test_i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r0, r0
Expand Down Expand Up @@ -207,6 +234,21 @@ define i32 @test_i32(i32 %a) {
}

define i64 @test_i64(i64 %a) {
; CHECK-5-LABEL: test_i64:
; CHECK-5: @ %bb.0:
; CHECK-5-NEXT: sub r3, r1, #1
; CHECK-5-NEXT: sub r2, r0, #1
; CHECK-5-NEXT: bic r1, r3, r1
; CHECK-5-NEXT: bic r2, r2, r0
; CHECK-5-NEXT: clz r1, r1
; CHECK-5-NEXT: clz r2, r2
; CHECK-5-NEXT: rsb r1, r1, #64
; CHECK-5-NEXT: cmp r0, #0
; CHECK-5-NEXT: rsbne r1, r2, #32
; CHECK-5-NEXT: mov r0, r1
; CHECK-5-NEXT: mov r1, #0
; CHECK-5-NEXT: bx lr
;
; CHECK-LABEL: test_i64:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r1, r1
Expand Down Expand Up @@ -323,6 +365,14 @@ define i64 @test_i64(i64 %a) {
;------------------------------------------------------------------------------

define i8 @test_i8_zero_undef(i8 %a) {
; CHECK-5-LABEL: test_i8_zero_undef:
; CHECK-5: @ %bb.0:
; CHECK-5-NEXT: sub r1, r0, #1
; CHECK-5-NEXT: bic r0, r1, r0
; CHECK-5-NEXT: clz r0, r0
; CHECK-5-NEXT: rsb r0, r0, #32
; CHECK-5-NEXT: bx lr
;
; CHECK-LABEL: test_i8_zero_undef:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r0, r0
Expand Down Expand Up @@ -377,6 +427,14 @@ define i8 @test_i8_zero_undef(i8 %a) {
}

define i16 @test_i16_zero_undef(i16 %a) {
; CHECK-5-LABEL: test_i16_zero_undef:
; CHECK-5: @ %bb.0:
; CHECK-5-NEXT: sub r1, r0, #1
; CHECK-5-NEXT: bic r0, r1, r0
; CHECK-5-NEXT: clz r0, r0
; CHECK-5-NEXT: rsb r0, r0, #32
; CHECK-5-NEXT: bx lr
;
; CHECK-LABEL: test_i16_zero_undef:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r0, r0
Expand Down Expand Up @@ -432,6 +490,14 @@ define i16 @test_i16_zero_undef(i16 %a) {


define i32 @test_i32_zero_undef(i32 %a) {
; CHECK-5-LABEL: test_i32_zero_undef:
; CHECK-5: @ %bb.0:
; CHECK-5-NEXT: sub r1, r0, #1
; CHECK-5-NEXT: bic r0, r1, r0
; CHECK-5-NEXT: clz r0, r0
; CHECK-5-NEXT: rsb r0, r0, #32
; CHECK-5-NEXT: bx lr
;
; CHECK-LABEL: test_i32_zero_undef:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r0, r0
Expand Down Expand Up @@ -486,6 +552,21 @@ define i32 @test_i32_zero_undef(i32 %a) {
}

define i64 @test_i64_zero_undef(i64 %a) {
; CHECK-5-LABEL: test_i64_zero_undef:
; CHECK-5: @ %bb.0:
; CHECK-5-NEXT: sub r3, r1, #1
; CHECK-5-NEXT: sub r2, r0, #1
; CHECK-5-NEXT: bic r1, r3, r1
; CHECK-5-NEXT: bic r2, r2, r0
; CHECK-5-NEXT: clz r1, r1
; CHECK-5-NEXT: clz r2, r2
; CHECK-5-NEXT: rsb r1, r1, #64
; CHECK-5-NEXT: cmp r0, #0
; CHECK-5-NEXT: rsbne r1, r2, #32
; CHECK-5-NEXT: mov r0, r1
; CHECK-5-NEXT: mov r1, #0
; CHECK-5-NEXT: bx lr
;
; CHECK-LABEL: test_i64_zero_undef:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r1, r1
Expand Down