Skip to content

Loop rerolling bug #53736

@kmod

Description

@kmod

I'm running into an optimization bug when I enable the loop-rerolling pass. I bisected it to 1badf7c33a but I don't understand how that commit could cause it. @LebedevRI

To reproduce:

tar xvf bug.tar.gz
opt orig.ll -S --loop-unroll --simplifycfg --instcombine > unrolled.ll
opt unrolled.ll -S --loop-reroll --simplifycfg --instcombine > bad.ll
clang bad.ll -c -o bad.o
clang main.c -c -o main.o
clang bad.o main.o -o a.out
./a.out
- segfault

This transforms this original IR (orig.ll):

define void @foo(i64** %0, i64 %1) local_unnamed_addr {
  %3 = icmp sgt i64 %1, 0
  br i1 %3, label %5, label %4

4:                                                ; preds = %5, %2
  ret void

5:                                                ; preds = %2, %5
  %6 = phi i64 [ %11, %5 ], [ 0, %2 ]
  %7 = getelementptr inbounds i64*, i64** %0, i64 %6
  %8 = load i64*, i64** %7, align 8
  %9 = load i64, i64* %8, align 8
  %10 = add nsw i64 %9, 1
  store i64 %10, i64* %8, align 8
  %11 = add nuw nsw i64 %6, 1
  %12 = icmp eq i64 %11, %1
  br i1 %12, label %4, label %5
}

into this (bad.ll):

define void @foo(i64** nocapture readonly %0, i64 %1) local_unnamed_addr #0 {
  %3 = icmp sgt i64 %1, 0
  br i1 %3, label %.preheader, label %.loopexit

.preheader:                                       ; preds = %2
  %4 = add nsw i64 %1, -1
  %xtraiter = and i64 %1, 3
  %5 = icmp ult i64 %4, 3
  br i1 %5, label %.loopexit.unr-lcssa, label %.preheader.new

.preheader.new:                                   ; preds = %.preheader
  %6 = add i64 %1, -4
  %7 = or i64 %6, 3
  br label %14

.loopexit.unr-lcssa:                              ; preds = %14, %.preheader
  %.unr = phi i64 [ 0, %.preheader ], [ %20, %14 ]
  %lcmp.mod.not = icmp eq i64 %xtraiter, 0
  br i1 %lcmp.mod.not, label %.loopexit, label %.epil.preheader

.epil.preheader:                                  ; preds = %.loopexit.unr-lcssa, %.epil.preheader
  %8 = phi i64 [ %13, %.epil.preheader ], [ %.unr, %.loopexit.unr-lcssa ]
  %epil.iter = phi i64 [ %epil.iter.sub, %.epil.preheader ], [ %xtraiter, %.loopexit.unr-lcssa ]
  %9 = getelementptr inbounds i64*, i64** %0, i64 %8
  %10 = load i64*, i64** %9, align 8
  %11 = load i64, i64* %10, align 8
  %12 = add nsw i64 %11, 1
  store i64 %12, i64* %10, align 8
  %13 = add nuw nsw i64 %8, 1
  %epil.iter.sub = add nsw i64 %epil.iter, -1
  %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0
  br i1 %epil.iter.cmp.not, label %.loopexit, label %.epil.preheader, !llvm.loop !0

.loopexit:                                        ; preds = %.epil.preheader, %.loopexit.unr-lcssa, %2
  ret void

14:                                               ; preds = %14, %.preheader.new
  %indvar = phi i64 [ %indvar.next, %14 ], [ 0, %.preheader.new ]
  %15 = phi i64 [ %20, %14 ], [ 0, %.preheader.new ]
  %16 = getelementptr inbounds i64*, i64** %0, i64 %indvar
  %17 = load i64*, i64** %16, align 8
  %18 = load i64, i64* %17, align 8
  %19 = add nsw i64 %18, 1
  store i64 %19, i64* %17, align 8
  %20 = add nuw nsw i64 %15, 4
  %indvar.next = add i64 %indvar, 1
  %exitcond = icmp eq i64 %indvar, %7
  br i1 %exitcond, label %.loopexit.unr-lcssa, label %14
}

I believe the issue is that the array index (which is apparently separate from the trip count) is passed from the unrolled body to the epilogue, but the wrong value / wrongly-computed value (%20) is passed, which is 4x too large (the original unrolling factor is 4x).

The IR input file was taken from the clang output of the following C file:

void foo(long** arr, long size) {
    for (long i = 0; i < size; i++) {
        long* item = arr[i];
        (*item)++;
    }
}

with some optimization attributes removed so that unrolling+rerolling would happen.

bug.tar.gz

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions