Skip to content

Misoptimization on s390x (and likely ppc64) when compiling LLVM with Clang #44617

@aaronpuchert

Description

@aaronpuchert
Bugzilla Link 45272
Resolution FIXED
Resolved on May 22, 2020 12:46
Version 10.0
OS Linux
Blocks #44654
CC @efriedma-quic,@fhahn,@zmodem,@LebedevRI,@nikic,@serge-sans-paille,@sylvestre,@tstellar
Fixed by commit(s) 4878aa3 bace7be

Extended Description

LLVM misoptimizes llvm::BitstreamCursor::advance(unsigned) for s390x-ibm-linux, and likely also on ppc64le (I'm seeing similar test failures there). When compiling llvm/lib/Bitcode/Reader/BitcodeReader.cpp with -O2 -m64 and LLVM_ENABLE_ABI_BREAKING_CHECKS=0, the relevant code looks as follows:

define linkonce_odr hidden void @​_ZN4llvm15BitstreamCursor7advanceEj(%"class.llvm::Expected.41"* noalias sret %0, %"class.llvm::BitstreamCursor"* %1, i32 zeroext %2) local_unnamed_addr #​3 comdat align 2 {
%4 = alloca %"class.llvm::Expected.410", align 8
%5 = alloca %"class.llvm::Expected.410", align 8
%6 = alloca %"class.llvm::Expected.410", align 8
%7 = alloca %"class.llvm::Error", align 8
%8 = bitcast %"class.llvm::BitstreamCursor"* %1 to %"class.llvm::SimpleBitstreamCursor"*
%9 = getelementptr inbounds %"class.llvm::BitstreamCursor", %"class.llvm::BitstreamCursor"* %1, i64 0, i32 0, i32 3
%10 = getelementptr inbounds %"class.llvm::BitstreamCursor", %"class.llvm::BitstreamCursor"* %1, i64 0, i32 0, i32 0, i32 1
%11 = getelementptr inbounds %"class.llvm::BitstreamCursor", %"class.llvm::BitstreamCursor"* %1, i64 0, i32 0, i32 1
%12 = getelementptr inbounds %"class.llvm::Expected.410", %"class.llvm::Expected.410"* %6, i64 0, i32 0, i32 0, i32 0, i64 0
%13 = getelementptr inbounds %"class.llvm::BitstreamCursor", %"class.llvm::BitstreamCursor"* %1, i64 0, i32 1
%14 = getelementptr inbounds %"class.llvm::Expected.410", %"class.llvm::Expected.410"* %6, i64 0, i32 1
%15 = bitcast %"class.llvm::Expected.410"* %6 to i64*
%16 = bitcast %"class.llvm::Expected.410"* %6 to %"class.llvm::ErrorInfoBase"**
%17 = and i32 %2, 2
%18 = icmp eq i32 %17, 0
%19 = bitcast %"class.llvm::Error"* %7 to i8*
%20 = getelementptr inbounds %"class.llvm::Error", %"class.llvm::Error"* %7, i64 0, i32 0
br label %21

21: ; preds = %315, %3
%22 = phi i8 [ undef, %3 ], [ %39, %315 ]
%23 = phi i32 [ undef, %3 ], [ %47, %315 ]
%24 = load i32, i32* %9, align 8, !tbaa !​79
%25 = icmp eq i32 %24, 0
br i1 %25, label %26, label %35

[...]

35: ; preds = %26, %21
call void @​llvm.lifetime.start.p0i8(i64 16, i8* nonnull %12) #​20, !noalias !​230
%36 = load i32, i32* %13, align 4, !tbaa !​191, !noalias !​230
call void @​_ZN4llvm21SimpleBitstreamCursor4ReadEj(%"class.llvm::Expected.410"* nonnull sret %6, %"class.llvm::SimpleBitstreamCursor"* nonnull %8, i32 zeroext %36) #​20, !noalias !​230
%37 = load i8, i8* %14, align 8, !noalias !​230
%38 = and i8 %37, -128
%39 = or i8 %38, %22
%40 = icmp slt i8 %37, 0
%41 = load i64, i64* %15, align 8, !tbaa !​43
br i1 %40, label %42, label %45

42: ; preds = %35
%43 = lshr i64 %41, 32
%44 = trunc i64 %41 to i32
store %"class.llvm::ErrorInfoBase"* null, %"class.llvm::ErrorInfoBase"** %16, align 8, !tbaa !​24, !noalias !​230
br label %45

45: ; preds = %35, %42
%46 = phi i64 [ %43, %42 ], [ %41, %35 ]
%47 = phi i32 [ %44, %42 ], [ %23, %35 ]
call void @​llvm.lifetime.end.p0i8(i64 16, i8* nonnull %12) #​20, !noalias !​230
%48 = icmp sgt i8 %39, -1
br i1 %48, label %57, label %49

49: ; preds = %45
%50 = zext i32 %47 to i64
%51 = shl i64 %46, 32
%52 = or i64 %51, %50
%53 = getelementptr inbounds %"class.llvm::Expected.41", %"class.llvm::Expected.41"* %0, i64 0, i32 1
%54 = load i8, i8* %53, align 8
%55 = or i8 %54, -128
store i8 %55, i8* %53, align 8
%56 = bitcast %"class.llvm::Expected.41"* %0 to i64*
store i64 %52, i64* %56, align 8, !tbaa !​24, !alias.scope !​233
br label %302

[...]

The call to llvm::SimpleBitstreamCursor::Read(unsigned) returns an llvm::Expected<size_t>, which has to be converted to a llvm::Expected, which happens after the call. Let's assume here that there was no error.

We read the byte, select the HasError bit via "& 1000'0000b" and bit-or this with %22. This is where the trouble starts, because that's undef in the first iteration, so the result is undef, too. This is %39.

For %40 we want to know if the byte is < 0, which is equivalent to the HasError being set, and since we have no error we skip the 42 block. Then we branch on %39, this time with > -1, which is equivalent to HasError not being set. We would expect that this branch is taken, but because we bit-or'd with something undef that might not be the case. And indeed in some cases we go to %49.

There we write %52 into our return value, which is pieced together from %46 and %47 (pretty weird construction, by the way, but seems Ok), which come from %41 and %23, the latter being another undef value. So we return with an llvm::Error in the Expected, despite there not being an error, and we return a garbage pointer. That's how I noticed the bug, because much later that error wants to be logged, and then we get a SEGV.

This can be observed in the assembly as well:

llgf	%r4, 36(%r12)
la	%r2, 176(%r15)
lgr	%r3, %r12
brasl	%r14, _ZN4llvm21SimpleBitstreamCursor4ReadEj@PLT
llc	%r0, 184(%r15)
lbr	%r1, %r0
lg	%r7, 176(%r15)
rosbg	%r8, %r0, 56, 56, 0
lbr	%r8, %r8
cijhe	%r1, 0, .LBB6_5

%bb.4: # ~ %42

lr	%r9, %r7
srlg	%r7, %r7, 32
mvghi	176(%r15), 0

.LBB6_5: # ~ %45
cijl %r8, 0, .LBB6_10
[...]
.LBB6_10: # ~ %49
sllg %r0, %r7, 32
lr %r0, %r9
oi 8(%r13), 128
stg %r0, 0(%r13)
lghi %r7, 0
lhi %r9, 0

The "rosbg %r8, %r0, 56, 56, 0" instruction corresponds to "%39 = or i8 %38, %22" with r8 = %22 = undef and r0 = %38, and "lr %r0, %r9" corresponds to "%52 = or i64 %51, (zext i32 %47 to i64)" with r0 = %51 and r9 = %47 = %23 = undef. There are indeed no previous writes to either r8 or r9, so they contain whatever they contain.

The original -O0 code generated by Clang seems to be Ok. The conversion of Expected values happens in llvm::Expected::move_construct:

define linkonce_odr hidden void @​_ZN4llvm8ExpectedIjE13moveConstructImEEvONS0_IT_EE(%"class.llvm::Expected.52"* %0, %"class.llvm::Expected.410"* dereferenceable(16) %1) #​1 comdat align 2 {
%3 = alloca %"class.llvm::Expected.52", align 8
%4 = alloca %"class.llvm::Expected.410"
, align 8
store %"class.llvm::Expected.52"* %0, %"class.llvm::Expected.52"** %3, align 8
store %"class.llvm::Expected.410"* %1, %"class.llvm::Expected.410"** %4, align 8
%5 = load %"class.llvm::Expected.52", %"class.llvm::Expected.52"** %3, align 8
%6 = load %"class.llvm::Expected.410"
, %"class.llvm::Expected.410"** %4, align 8
%7 = getelementptr inbounds %"class.llvm::Expected.410", %"class.llvm::Expected.410"* %6, i32 0, i32 1
%8 = load i8, i8* %7, align 8
%9 = lshr i8 %8, 7
%10 = trunc i8 %9 to i1
%11 = getelementptr inbounds %"class.llvm::Expected.52", %"class.llvm::Expected.52"* %5, i32 0, i32 1
%12 = zext i1 %10 to i8
%13 = load i8, i8* %11, align 8
%14 = shl i8 %12, 7
%15 = and i8 %13, 127
%16 = or i8 %15, %14
store i8 %16, i8* %11, align 8

Here "%15 = and i8 %13, 127" properly masks the bit of the bitfield.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugzillaIssues migrated from bugzilla

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions