diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 757f68999691e..1b0710e09b0f9 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1766,6 +1766,9 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { // This handles the case only when the Cmp instruction is guarding a recursive // call that will cause the Cmp to fail/succeed for the recursive call. bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) { + // FIXME Regression on AVR: github.com/llvm/llvm-project/issues/153156 + if (!DL.isLegalInteger(32) && DL.isLegalInteger(8)) + return false; // Bail out if LHS is not a function argument or RHS is NOT const: if (!isa(Cmp.getOperand(0)) || !isa(Cmp.getOperand(1))) return false; diff --git a/llvm/lib/Target/AVR/AVRTargetTransformInfo.h b/llvm/lib/Target/AVR/AVRTargetTransformInfo.h index 0daeeb8f11cfe..e6862d8743bbe 100644 --- a/llvm/lib/Target/AVR/AVRTargetTransformInfo.h +++ b/llvm/lib/Target/AVR/AVRTargetTransformInfo.h @@ -44,6 +44,12 @@ class AVRTTIImpl final : public BasicTTIImplBase { bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override; + + TypeSize + getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override { + // default is 32, so change it to 16 + return TypeSize::getFixed(16); + } }; } // end namespace llvm diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 6477141ab095f..88801a22ffa51 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -1632,12 +1632,17 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { return replaceInstUsesWith(PN, &IdenticalPN); } - // If this is an integer PHI and we know that it has an illegal type, see if + // For 8/16 bit CPUs prefer 8 bit registers + bool preferByteRegister = !DL.isLegalInteger(32); + + // If this is an integer PHI and we know that it has an illegal type, + // (or 16 bit on 8/16 bit CPUs), see if // it is only used by trunc or trunc(lshr) operations. If so, we split the // PHI into the various pieces being extracted. This sort of thing is // introduced when SROA promotes an aggregate to a single large integer type. if (PN.getType()->isIntegerTy() && - !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) + ((!DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) || + (preferByteRegister && PN.getType()->getPrimitiveSizeInBits() == 16))) if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 5ee3bb1abe86e..bb4144ad109ca 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -311,6 +311,10 @@ bool InstCombinerImpl::shouldChangeType(unsigned FromWidth, bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth); bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth); + // For 8/16 bit CPUs prefer 8 bit. + if (!DL.isLegalInteger(32) && ToWidth == 16) + ToLegal = false; + // Convert to desirable widths even if they are not legal types. // Only shrink types, to prevent infinite loops. if (ToWidth < FromWidth && isDesirableIntType(ToWidth)) diff --git a/llvm/test/CodeGen/AVR/issue-151080-mod.ll b/llvm/test/CodeGen/AVR/issue-151080-mod.ll new file mode 100644 index 0000000000000..e2981236482e6 --- /dev/null +++ b/llvm/test/CodeGen/AVR/issue-151080-mod.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O=3 -mtriple=avr-none -mcpu=attiny85 -verify-machineinstrs | FileCheck %s + +@c = dso_local local_unnamed_addr global i8 0, align 1 +define dso_local void @mod(i16 noundef %0) local_unnamed_addr addrspace(1) { +; CHECK-LABEL: mod: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r14 +; CHECK-NEXT: push r16 +; CHECK-NEXT: push r17 +; CHECK-NEXT: cpi r24, 10 +; CHECK-NEXT: cpc r25, r1 +; CHECK-NEXT: brlo .LBB0_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: ldi r18, 205 +; CHECK-NEXT: ldi r19, 204 +; CHECK-NEXT: ldi r20, 0 +; CHECK-NEXT: ldi r21, 0 +; CHECK-NEXT: movw r22, r24 +; CHECK-NEXT: mov r14, r24 +; CHECK-NEXT: movw r24, r20 +; CHECK-NEXT: rcall __mulsi3 +; CHECK-NEXT: movw r16, r24 +; CHECK-NEXT: lsr r17 +; CHECK-NEXT: ror r16 +; CHECK-NEXT: lsr r17 +; CHECK-NEXT: ror r16 +; CHECK-NEXT: lsr r17 +; CHECK-NEXT: ror r16 +; CHECK-NEXT: movw r24, r16 +; CHECK-NEXT: rcall mod +; CHECK-NEXT: mov r24, r16 +; CHECK-NEXT: ldi r22, -10 +; CHECK-NEXT: rcall __mulqi3 +; CHECK-NEXT: add r24, r14 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: ori r24, 48 +; CHECK-NEXT: sts c, r24 +; CHECK-NEXT: pop r17 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: pop r14 +; CHECK-NEXT: ret + %2 = icmp ugt i16 %0, 9 + %3 = trunc i16 %0 to i8 + br i1 %2, label %4, label %9 +4: ; preds = %1 + %5 = udiv i16 %0, 10 + %6 = trunc i16 %5 to i8 + %7 = mul i8 %6, -10 + tail call addrspace(1) void @mod(i16 noundef %5) + %8 = add i8 %7, %3 + br label %9 +9: ; preds = %4, %1 + %10 = phi i8 [ %3, %1 ], [ %8, %4 ] + %11 = or disjoint i8 %10, 48 + store i8 %11, ptr @c, align 1 + ret void +} diff --git a/llvm/test/CodeGen/AVR/issue-153156.ll b/llvm/test/CodeGen/AVR/issue-153156.ll new file mode 100644 index 0000000000000..f9d08fc095d3f --- /dev/null +++ b/llvm/test/CodeGen/AVR/issue-153156.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: opt -Os -mtriple=avr-none < %s | llc -mtriple=avr-none -mcpu=attiny85 -verify-machineinstrs | FileCheck %s + +@c = dso_local global i8 0, align 1 +@ti = dso_local global i16 0, align 1 + +define dso_local void @mod(i16 noundef %0) local_unnamed_addr addrspace(1) { +; CHECK-LABEL: mod: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r14 +; CHECK-NEXT: push r16 +; CHECK-NEXT: push r17 +; CHECK-NEXT: cpi r24, 10 +; CHECK-NEXT: cpc r25, r1 +; CHECK-NEXT: brlo .LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %tailrecurse.preheader +; CHECK-NEXT: ldi r18, 205 +; CHECK-NEXT: ldi r19, 204 +; CHECK-NEXT: ldi r20, 0 +; CHECK-NEXT: ldi r21, 0 +; CHECK-NEXT: movw r22, r24 +; CHECK-NEXT: mov r14, r24 +; CHECK-NEXT: movw r24, r20 +; CHECK-NEXT: rcall __mulsi3 +; CHECK-NEXT: movw r16, r24 +; CHECK-NEXT: lsr r17 +; CHECK-NEXT: ror r16 +; CHECK-NEXT: lsr r17 +; CHECK-NEXT: ror r16 +; CHECK-NEXT: lsr r17 +; CHECK-NEXT: ror r16 +; CHECK-NEXT: movw r24, r16 +; CHECK-NEXT: rcall mod +; CHECK-NEXT: mov r24, r16 +; CHECK-NEXT: ldi r22, -10 +; CHECK-NEXT: rcall __mulqi3 +; CHECK-NEXT: add r24, r14 +; CHECK-NEXT: .LBB0_2: ; %tailrecurse._crit_edge +; CHECK-NEXT: ori r24, 48 +; CHECK-NEXT: sts c, r24 +; CHECK-NEXT: pop r17 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: pop r14 +; CHECK-NEXT: ret + %2 = alloca i16, align 1 + store i16 %0, ptr %2, align 1 + %3 = load i16, ptr %2, align 1 + %4 = icmp ugt i16 %3, 9 + br i1 %4, label %5, label %10 + +5: ; preds = %1 + %6 = load i16, ptr %2, align 1 + %7 = udiv i16 %6, 10 + call addrspace(1) void @mod(i16 noundef %7) + %8 = load i16, ptr %2, align 1 + %9 = urem i16 %8, 10 + call addrspace(1) void @mod(i16 noundef %9) + br label %14 + +10: ; preds = %1 + %11 = load i16, ptr %2, align 1 + %12 = add i16 48, %11 + %13 = trunc i16 %12 to i8 + store volatile i8 %13, ptr @c, align 1 + br label %14 + +14: ; preds = %10, %5 + ret void +} + +define dso_local void @t(i16 noundef %0) addrspace(1) { +; CHECK-LABEL: t: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldi r22, 57 +; CHECK-NEXT: rcall __mulqi3 +; CHECK-NEXT: mov r25, r24 +; CHECK-NEXT: lsl r25 +; CHECK-NEXT: sbc r25, r25 +; CHECK-NEXT: sts ti+1, r25 +; CHECK-NEXT: sts ti, r24 +; CHECK-NEXT: ret + %2 = alloca i16, align 1 + store i16 %0, ptr %2, align 1 + %3 = load i16, ptr %2, align 1 + %4 = mul nsw i16 57, %3 + %5 = trunc i16 %4 to i8 + %6 = sext i8 %5 to i16 + store i16 %6, ptr @ti, align 1 + ret void +}