diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 830156359e9e8..8ef22663c26a8 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -21394,11 +21394,11 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, } bool ARMTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { - return Subtarget->hasV6T2Ops(); + return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only(); } bool ARMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { - return Subtarget->hasV6T2Ops(); + return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only(); } bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial( diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index fc12f050fa5a5..cdff649ecfa57 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -206,7 +206,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) : ST(ST) { getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64}); - if (ST.hasV5TOps()) { + if (ST.hasV5TOps() && !ST.isThumb1Only()) { getActionDefinitionsBuilder(G_CTLZ) .legalFor({s32, s32}) .clampScalar(1, s32, s32) diff --git a/llvm/lib/Target/ARM/README.txt b/llvm/lib/Target/ARM/README.txt index def67cfae7277..ff84e07fa084a 100644 --- a/llvm/lib/Target/ARM/README.txt +++ b/llvm/lib/Target/ARM/README.txt @@ -697,22 +697,6 @@ target-neutral one. //===---------------------------------------------------------------------===// -Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins -are specified to be undefined at zero, so portable code must check for zero -and handle it as a special case. That is unnecessary on ARM where those -operations are implemented in a way that is well-defined for zero. For -example: - -int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; } - -should just be implemented with a CLZ instruction. Since there are other -targets, e.g., PPC, that share this behavior, it would be best to implement -this in a target-independent way: we should probably fold that (when using -"undefined at zero" semantics) to set the "defined at zero" bit and have -the code generator expand out the right code. - -//===---------------------------------------------------------------------===// - Clean up the test/MC/ARM files to have more robust register choices. R0 should not be used as a register operand in the assembler tests as it's then diff --git a/llvm/test/CodeGen/ARM/clz.ll b/llvm/test/CodeGen/ARM/clz.ll index 0f49fbba11845..9e1e9f6ce6daa 100644 --- a/llvm/test/CodeGen/ARM/clz.ll +++ b/llvm/test/CodeGen/ARM/clz.ll @@ -1,12 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - | FileCheck %s -check-prefixes=CHECK,INLINE ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefixes=CHECK,LIBCALL declare i32 @llvm.ctlz.i32(i32, i1) -define i32 @test(i32 %x) { -; CHECK-LABEL: test -; INLINE: clz r0, r0 -; LIBCALL: b __clzsi2 +define i32 @undef_zero(i32 %x) { +; INLINE-LABEL: undef_zero: +; INLINE: @ %bb.0: +; INLINE-NEXT: clz r0, r0 +; INLINE-NEXT: bx lr +; +; LIBCALL-LABEL: undef_zero: +; LIBCALL: @ %bb.0: +; LIBCALL-NEXT: b __clzsi2 %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true ) ret i32 %tmp.1 } + +define i32 @no_undef_zero(i32 %x) { +; INLINE-LABEL: no_undef_zero: +; INLINE: @ %bb.0: +; INLINE-NEXT: clz r0, r0 +; INLINE-NEXT: bx lr +; +; LIBCALL-LABEL: no_undef_zero: +; LIBCALL: @ %bb.0: +; LIBCALL-NEXT: cmp r0, #0 +; LIBCALL-NEXT: moveq r0, #32 +; LIBCALL-NEXT: moveq pc, lr +; LIBCALL-NEXT: .LBB1_1: @ %cond.false +; LIBCALL-NEXT: .save {r11, lr} +; LIBCALL-NEXT: push {r11, lr} +; LIBCALL-NEXT: bl __clzsi2 +; LIBCALL-NEXT: pop {r11, lr} +; LIBCALL-NEXT: mov pc, lr + %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 false ) + ret i32 %tmp.1 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll index 1146ad64ee709..bf42e9f1104b4 100644 --- a/llvm/test/CodeGen/ARM/cttz.ll +++ b/llvm/test/CodeGen/ARM/cttz.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple arm-eabi -mattr=+v5t | FileCheck %s --check-prefix=CHECK-5 ; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 | FileCheck %s ; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s ; RUN: llc < %s -mtriple thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-6M @@ -14,6 +15,15 @@ declare i64 @llvm.cttz.i64(i64, i1) ;------------------------------------------------------------------------------ define i8 @test_i8(i8 %a) { +; CHECK-5-LABEL: test_i8: +; CHECK-5: @ %bb.0: +; CHECK-5-NEXT: orr r0, r0, #256 +; CHECK-5-NEXT: sub r1, r0, #1 +; CHECK-5-NEXT: bic r0, r1, r0 +; CHECK-5-NEXT: clz r0, r0 +; CHECK-5-NEXT: rsb r0, r0, #32 +; CHECK-5-NEXT: bx lr +; ; CHECK-LABEL: test_i8: ; CHECK: @ %bb.0: ; CHECK-NEXT: orr r0, r0, #256 @@ -81,6 +91,15 @@ define i8 @test_i8(i8 %a) { } define i16 @test_i16(i16 %a) { +; CHECK-5-LABEL: test_i16: +; CHECK-5: @ %bb.0: +; CHECK-5-NEXT: orr r0, r0, #65536 +; CHECK-5-NEXT: sub r1, r0, #1 +; CHECK-5-NEXT: bic r0, r1, r0 +; CHECK-5-NEXT: clz r0, r0 +; CHECK-5-NEXT: rsb r0, r0, #32 +; CHECK-5-NEXT: bx lr +; ; CHECK-LABEL: test_i16: ; CHECK: @ %bb.0: ; CHECK-NEXT: orr r0, r0, #65536 @@ -148,6 +167,14 @@ define i16 @test_i16(i16 %a) { } define i32 @test_i32(i32 %a) { +; CHECK-5-LABEL: test_i32: +; CHECK-5: @ %bb.0: +; CHECK-5-NEXT: sub r1, r0, #1 +; CHECK-5-NEXT: bic r0, r1, r0 +; CHECK-5-NEXT: clz r0, r0 +; CHECK-5-NEXT: rsb r0, r0, #32 +; CHECK-5-NEXT: bx lr +; ; CHECK-LABEL: test_i32: ; CHECK: @ %bb.0: ; CHECK-NEXT: rbit r0, r0 @@ -207,6 +234,21 @@ define i32 @test_i32(i32 %a) { } define i64 @test_i64(i64 %a) { +; CHECK-5-LABEL: test_i64: +; CHECK-5: @ %bb.0: +; CHECK-5-NEXT: sub r3, r1, #1 +; CHECK-5-NEXT: sub r2, r0, #1 +; CHECK-5-NEXT: bic r1, r3, r1 +; CHECK-5-NEXT: bic r2, r2, r0 +; CHECK-5-NEXT: clz r1, r1 +; CHECK-5-NEXT: clz r2, r2 +; CHECK-5-NEXT: rsb r1, r1, #64 +; CHECK-5-NEXT: cmp r0, #0 +; CHECK-5-NEXT: rsbne r1, r2, #32 +; CHECK-5-NEXT: mov r0, r1 +; CHECK-5-NEXT: mov r1, #0 +; CHECK-5-NEXT: bx lr +; ; CHECK-LABEL: test_i64: ; CHECK: @ %bb.0: ; CHECK-NEXT: rbit r1, r1 @@ -323,6 +365,14 @@ define i64 @test_i64(i64 %a) { ;------------------------------------------------------------------------------ define i8 @test_i8_zero_undef(i8 %a) { +; CHECK-5-LABEL: test_i8_zero_undef: +; CHECK-5: @ %bb.0: +; CHECK-5-NEXT: sub r1, r0, #1 +; CHECK-5-NEXT: bic r0, r1, r0 +; CHECK-5-NEXT: clz r0, r0 +; CHECK-5-NEXT: rsb r0, r0, #32 +; CHECK-5-NEXT: bx lr +; ; CHECK-LABEL: test_i8_zero_undef: ; CHECK: @ %bb.0: ; CHECK-NEXT: rbit r0, r0 @@ -377,6 +427,14 @@ define i8 @test_i8_zero_undef(i8 %a) { } define i16 @test_i16_zero_undef(i16 %a) { +; CHECK-5-LABEL: test_i16_zero_undef: +; CHECK-5: @ %bb.0: +; CHECK-5-NEXT: sub r1, r0, #1 +; CHECK-5-NEXT: bic r0, r1, r0 +; CHECK-5-NEXT: clz r0, r0 +; CHECK-5-NEXT: rsb r0, r0, #32 +; CHECK-5-NEXT: bx lr +; ; CHECK-LABEL: test_i16_zero_undef: ; CHECK: @ %bb.0: ; CHECK-NEXT: rbit r0, r0 @@ -432,6 +490,14 @@ define i16 @test_i16_zero_undef(i16 %a) { define i32 @test_i32_zero_undef(i32 %a) { +; CHECK-5-LABEL: test_i32_zero_undef: +; CHECK-5: @ %bb.0: +; CHECK-5-NEXT: sub r1, r0, #1 +; CHECK-5-NEXT: bic r0, r1, r0 +; CHECK-5-NEXT: clz r0, r0 +; CHECK-5-NEXT: rsb r0, r0, #32 +; CHECK-5-NEXT: bx lr +; ; CHECK-LABEL: test_i32_zero_undef: ; CHECK: @ %bb.0: ; CHECK-NEXT: rbit r0, r0 @@ -486,6 +552,21 @@ define i32 @test_i32_zero_undef(i32 %a) { } define i64 @test_i64_zero_undef(i64 %a) { +; CHECK-5-LABEL: test_i64_zero_undef: +; CHECK-5: @ %bb.0: +; CHECK-5-NEXT: sub r3, r1, #1 +; CHECK-5-NEXT: sub r2, r0, #1 +; CHECK-5-NEXT: bic r1, r3, r1 +; CHECK-5-NEXT: bic r2, r2, r0 +; CHECK-5-NEXT: clz r1, r1 +; CHECK-5-NEXT: clz r2, r2 +; CHECK-5-NEXT: rsb r1, r1, #64 +; CHECK-5-NEXT: cmp r0, #0 +; CHECK-5-NEXT: rsbne r1, r2, #32 +; CHECK-5-NEXT: mov r0, r1 +; CHECK-5-NEXT: mov r1, #0 +; CHECK-5-NEXT: bx lr +; ; CHECK-LABEL: test_i64_zero_undef: ; CHECK: @ %bb.0: ; CHECK-NEXT: rbit r1, r1