diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 6c3a1ae7e1775..919a1e8737e85 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -410,6 +410,22 @@ static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm) { return isa(FP); } +/// isLegalICmpImmediate - Return true if the specified immediate is legal +/// icmp immediate, that is the target has icmp instructions which can compare +/// a register against the immediate without having to materialize the +/// immediate into a register. +static bool isLegalCmpImmed(int64_t Imm, const ARMSubtarget *Subtarget) { + // Thumb2 and ARM modes can use cmn for negative immediates. + if (!Subtarget->isThumb()) + return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 || + ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1; + if (Subtarget->isThumb2()) + return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 || + ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1; + // Thumb1 doesn't have cmn, and only 8-bit immediates. + return Imm >= 0 && Imm <= 255; +} + InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, @@ -428,6 +444,13 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, if (Opcode == Instruction::GetElementPtr && Idx != 0) return 0; + if ((Opcode == Instruction::Shl || Opcode == Instruction::LShr || + Opcode == Instruction::AShr) && + Idx == 1) { + // Shifts are free (are we really going to get a shift of more than 64)? + return 0; + } + if (Opcode == Instruction::And) { // UXTB/UXTH if (Imm == 255 || Imm == 65535) @@ -437,19 +460,41 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, getIntImmCost(~Imm, Ty, CostKind)); } - if (Opcode == Instruction::Add) + if (Opcode == Instruction::Add || Opcode == Instruction::Sub) { + if (Ty->getIntegerBitWidth() <= 32) { + int64_t ImmVal = Imm.getSExtValue(); + if (!ST->isThumb()) + if (ARM_AM::getSOImmVal((uint32_t)ImmVal) != -1 || + ARM_AM::getSOImmVal(-(uint32_t)ImmVal) != -1) + return 0; + if (ST->isThumb2()) + if (ARM_AM::getT2SOImmVal((uint32_t)ImmVal) != -1 || + ARM_AM::getT2SOImmVal(-(uint32_t)ImmVal) != -1) + return 0; + // Thumb1 doesn't have cmn, and only 8-bit immediates. + ImmVal = ImmVal < 0 ? -ImmVal : ImmVal; + if (ImmVal >= 0 && ImmVal <= 255) + return 0; + } + // Conversion to SUB is free, and means we can use -Imm instead. return std::min(getIntImmCost(Imm, Ty, CostKind), getIntImmCost(-Imm, Ty, CostKind)); + } - if (Opcode == Instruction::ICmp && Imm.isNegative() && - Ty->getIntegerBitWidth() == 32) { - int64_t NegImm = -Imm.getSExtValue(); - if (ST->isThumb2() && NegImm < 1<<12) - // icmp X, #-C -> cmn X, #C - return 0; - if (ST->isThumb() && NegImm < 1<<8) - // icmp X, #-C -> adds X, #C + if (Opcode == Instruction::ICmp && Ty->getIntegerBitWidth() < 64) { + int64_t ImmVal = Imm.getSExtValue(); + if (!ST->isThumb()) + if (ARM_AM::getSOImmVal((uint32_t)ImmVal) != -1 || + ARM_AM::getSOImmVal(-(uint32_t)ImmVal) != -1) + return 0; + if (ST->isThumb2()) + if (ARM_AM::getT2SOImmVal((uint32_t)ImmVal) != -1 || + ARM_AM::getT2SOImmVal(-(uint32_t)ImmVal) != -1) + return 0; + // Thumb1 doesn't have cmn, and only 8-bit immediates. + ImmVal = ImmVal < 0 ? -ImmVal : ImmVal; + if (ImmVal >= 0 && ImmVal <= 255) return 0; } @@ -470,12 +515,31 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, if (Inst && ST->hasVFP2Base() && isFPSatMinMaxPattern(Inst, Imm)) return 0; - // We can convert <= -1 to < 0, which is generally quite cheap. - if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) { + // We can convert <= to <, which is generally quite cheap. + if (Inst && Opcode == Instruction::ICmp && Idx == 1 && + ((Ty->getIntegerBitWidth() <= 32 && + (!isLegalCmpImmed(Imm.getSExtValue(), ST))) || + Imm.isAllOnes() || Imm.isOne())) { ICmpInst::Predicate Pred = cast(Inst)->getPredicate(); - if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) + if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) && + !Imm.isMaxSignedValue()) + return std::min(getIntImmCost(Imm, Ty, CostKind), + getIntImmCost(Imm + 1, Ty, CostKind)); + + if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) && + !Imm.isAllOnes()) return std::min(getIntImmCost(Imm, Ty, CostKind), getIntImmCost(Imm + 1, Ty, CostKind)); + + if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) && + !Imm.isMinSignedValue()) + return std::min(getIntImmCost(Imm, Ty, CostKind), + getIntImmCost(Imm - 1, Ty, CostKind)); + + if ((Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) && + !Imm.isZero()) + return std::min(getIntImmCost(Imm, Ty, CostKind), + getIntImmCost(Imm - 1, Ty, CostKind)); } return getIntImmCost(Imm, Ty, CostKind); diff --git a/llvm/test/CodeGen/ARM/ssat.ll b/llvm/test/CodeGen/ARM/ssat.ll index ed777f2b1882b..175ed4db1c49e 100644 --- a/llvm/test/CodeGen/ARM/ssat.ll +++ b/llvm/test/CodeGen/ARM/ssat.ll @@ -387,15 +387,14 @@ entry: } ; Lower constant is different in the select and in the compare +; FIXME: 0xff800001 can be constructed with mov r2, 0x7f, ror 6; or r2, r2, 0xe0, ror 14 define i32 @no_sat_incorrect_constant(i32 %x) #0 { ; V4T-LABEL: no_sat_incorrect_constant: ; V4T: @ %bb.0: @ %entry -; V4T-NEXT: mov r1, #1065353216 +; V4T-NEXT: ldr r2, .LCPI11_0 ; V4T-NEXT: cmn r0, #8388608 -; V4T-NEXT: orr r1, r1, #-1073741824 -; V4T-NEXT: mov r2, r0 -; V4T-NEXT: orrlt r2, r1, #1 -; V4T-NEXT: ldr r1, .LCPI11_0 +; V4T-NEXT: movge r2, r0 +; V4T-NEXT: ldr r1, .LCPI11_1 ; V4T-NEXT: cmp r0, #8388608 ; V4T-NEXT: movlt r1, r2 ; V4T-NEXT: mov r0, r1 @@ -403,15 +402,16 @@ define i32 @no_sat_incorrect_constant(i32 %x) #0 { ; V4T-NEXT: .p2align 2 ; V4T-NEXT: @ %bb.1: ; V4T-NEXT: .LCPI11_0: +; V4T-NEXT: .long 4286578689 @ 0xff800001 +; V4T-NEXT: .LCPI11_1: ; V4T-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: no_sat_incorrect_constant: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r2, #0 ; V6T2-NEXT: cmn r0, #8388608 ; V6T2-NEXT: mov r1, r0 -; V6T2-NEXT: movt r2, #65408 -; V6T2-NEXT: orrlt r1, r2, #1 +; V6T2-NEXT: movwlt r1, #1 +; V6T2-NEXT: movtlt r1, #65408 ; V6T2-NEXT: cmp r0, #8388608 ; V6T2-NEXT: movwge r1, #65535 ; V6T2-NEXT: movtge r1, #127