Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OS X exception tables wrong with internal assembler (file/parallelism release mode crashes) #362

Closed
dnadlinger opened this issue May 19, 2013 · 5 comments

Comments

@dnadlinger
Copy link
Member

This one has been a giant trip down the rabbit hole.

Rather reduced test case:

; ModuleID = 'file.ll'
target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin11.4.2"

%1 = type opaque
%14 = type { %1*, i8*, { i64, i8* }, { i64, i8* }, i64, %1*, %1* }
@aconstant = external constant %14, align 8

@0 = internal global [72 x i8] zeroinitializer
@1 = internal constant %14 zeroinitializer
@_ZTIi = external constant i8*

define i32 @main() noreturn uwtable {
  %a = alloca [2048 x i8], align 1
  %c = call noalias i8* @newclass()
  %d = getelementptr i8* %c, i64 16
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* bitcast ({ i64, i8* }* getelementptr (%14* @aconstant, i64 0, i32 2) to i8*), i64 56, i32 1, i1 false)
  %e = call noalias i8* @newclass()
  %f = bitcast i8* %e to %14*
  %g = getelementptr i8* %e, i64 16
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %g, i8* bitcast ({ i64, i8* }* getelementptr (%14* @aconstant, i64 0, i32 2) to i8*), i64 56, i32 1, i1 false)
  %h = call i8* @__cxa_allocate_exception(i64 4) nounwind
  %i = bitcast i8* %h to i32*
  store i32 42, i32* %i
  call void @__cxa_throw(i8* %h, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
  unreachable
}

define noalias i8* @newclass() {
  ret i8* getelementptr inbounds ([72 x i8]* @0, i32 0, i32 0)
}

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind

declare i8* @__cxa_allocate_exception(i64)

declare void @__cxa_throw(i8*, i8*, i8*)
; ModuleID = 'constant.ll'
target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin11.4.2"

%0 = type { %1*, i8*, { i64, i8* }, { i64, i8* }, i64, %1*, %1* }
%1 = type opaque
@aconstant = constant %0 zeroinitializer, align 8

Compile constant.ll using llc -filetype=obj constant.ll. Then, try two versions of compiling file.ll:

llc -O1 file.ll && clang -c file.s && clang++ file.o constant.o -o file.good && ./file.good

vs.

llc -O1 -filetype=obj file.ll && clang++ file.o constant.o -o file.bad && ./file.bad

test.bad crashes in the same way as the std.file/std.parallelism tests do in release mode, while test.good works fine ("terminate called throwing an exception").

The only difference between the two files is in the unwinding info. A diff of the unwinddump output shows that the stack size in the "bad" version is horribly wrong.

- [  1] funcOffset=0x00000DB0, encoding[  1]=0x030DF800 (stack size=0x00000840, rbx,r12,r13,r14,r15,rbp) _main
+ [  1] funcOffset=0x00000DB0, encoding[  1]=0x0309F800 (stack size=0xEC81488B, rbx,r12,r13,r14,r15,rbp) _main

Will open an upstream bug report asap.

@dnadlinger
Copy link
Member Author

Assembly (same for both versions):

_main:
0000000100000db0    pushq   %rbp
0000000100000db1    pushq   %r15
0000000100000db3    pushq   %r14
0000000100000db5    pushq   %r13
0000000100000db7    pushq   %r12
0000000100000db9    pushq   %rbx
0000000100000dba    subq    $0x00000808,%rsp
0000000100000dc1    callq   _newclass
0000000100000dc6    leaq    0x0000011b(%rip),%rcx
0000000100000dcd    movq    0x30(%rcx),%rdx
0000000100000dd1    movq    %rdx,(%rsp)
0000000100000dd5    movq    0x38(%rcx),%r15
0000000100000dd9    movq    0x40(%rcx),%r12
0000000100000ddd    movq    %r12,0x40(%rax)
0000000100000de1    movq    %r15,0x38(%rax)
0000000100000de5    movq    %rdx,0x30(%rax)
0000000100000de9    movq    0x28(%rcx),%r13
0000000100000ded    movq    %r13,0x28(%rax)
0000000100000df1    movq    0x20(%rcx),%rbx
0000000100000df5    movq    %rbx,0x20(%rax)
0000000100000df9    movq    0x10(%rcx),%rbp
0000000100000dfd    movq    0x18(%rcx),%r14
0000000100000e01    movq    %r14,0x18(%rax)
0000000100000e05    movq    %rbp,0x10(%rax)
0000000100000e09    callq   _newclass
0000000100000e0e    movq    %r12,0x40(%rax)
0000000100000e12    movq    %r15,0x38(%rax)
0000000100000e16    movq    (%rsp),%rcx
0000000100000e1a    movq    %rcx,0x30(%rax)
0000000100000e1e    movq    %r13,0x28(%rax)
0000000100000e22    movq    %rbx,0x20(%rax)
0000000100000e26    movq    %r14,0x18(%rax)
0000000100000e2a    movq    %rbp,0x10(%rax)
0000000100000e2e    movl    $0x00000004,%edi
0000000100000e33    callq   0x100000e58 ; symbol stub for: ___cxa_allocate_exception
0000000100000e38    movq    0x000001f9(%rip),%rsi
0000000100000e3f    movl    $0x0000002a,(%rax)
0000000100000e45    movq    %rax,%rdi
0000000100000e48    xorl    %edx,%edx
0000000100000e4a    callq   0x100000e5e ; symbol stub for: ___cxa_throw
0000000100000e4f    nop

llc -filetype=obj -O0 does not crash, probably because the memcpy is not expanded.

@dnadlinger
Copy link
Member Author

And, for reference, the annotated ASM output:

    .section    __TEXT,__text,regular,pure_instructions
    .globl  _main
    .align  4, 0x90
_main:                                  ## @main
    .cfi_startproc
## BB#0:
    pushq   %rbp
Ltmp7:
    .cfi_def_cfa_offset 16
    pushq   %r15
Ltmp8:
    .cfi_def_cfa_offset 24
    pushq   %r14
Ltmp9:
    .cfi_def_cfa_offset 32
    pushq   %r13
Ltmp10:
    .cfi_def_cfa_offset 40
    pushq   %r12
Ltmp11:
    .cfi_def_cfa_offset 48
    pushq   %rbx
Ltmp12:
    .cfi_def_cfa_offset 56
    subq    $2056, %rsp             ## imm = 0x808
Ltmp13:
    .cfi_def_cfa_offset 2112
Ltmp14:
    .cfi_offset %rbx, -56
Ltmp15:
    .cfi_offset %r12, -48
Ltmp16:
    .cfi_offset %r13, -40
Ltmp17:
    .cfi_offset %r14, -32
Ltmp18:
    .cfi_offset %r15, -24
Ltmp19:
    .cfi_offset %rbp, -16
    callq   _newclass
    movq    _aconstant@GOTPCREL(%rip), %rcx
    movq    48(%rcx), %rdx
    movq    %rdx, (%rsp)            ## 8-byte Spill
    movq    56(%rcx), %r15
    movq    64(%rcx), %r12
    movq    %r12, 64(%rax)
    movq    %r15, 56(%rax)
    movq    %rdx, 48(%rax)
    movq    40(%rcx), %r13
    movq    %r13, 40(%rax)
    movq    32(%rcx), %rbx
    movq    %rbx, 32(%rax)
    movq    16(%rcx), %rbp
    movq    24(%rcx), %r14
    movq    %r14, 24(%rax)
    movq    %rbp, 16(%rax)
    callq   _newclass
    movq    %r12, 64(%rax)
    movq    %r15, 56(%rax)
    movq    (%rsp), %rcx            ## 8-byte Reload
    movq    %rcx, 48(%rax)
    movq    %r13, 40(%rax)
    movq    %rbx, 32(%rax)
    movq    %r14, 24(%rax)
    movq    %rbp, 16(%rax)
    movl    $4, %edi
    callq   ___cxa_allocate_exception
    movq    __ZTIi@GOTPCREL(%rip), %rsi
    movl    $42, (%rax)
    movq    %rax, %rdi
    xorl    %edx, %edx
    callq   ___cxa_throw
    .cfi_endproc

    .globl  _newclass
    .align  4, 0x90
_newclass:                              ## @newclass
    .cfi_startproc
## BB#0:
    leaq    ___unnamed_1(%rip), %rax
    ret
    .cfi_endproc

.zerofill __DATA,__bss,___unnamed_1,72,4 ## @0
    .section    __TEXT,__const
    .align  4                       ## @1
___unnamed_2:
    .space  72

@dnadlinger
Copy link
Member Author

Hm, this particular bugs seems to be fixed in 3.3. Can't easily throw the whole test case at it, though, as we are currently blocked by http://llvm.org/bugs/show_bug.cgi?id=15972.

@dnadlinger
Copy link
Member Author

This issue is still present in LLVM 3.4 trunk, even though the above test case doesn't trigger it anymore: phobos_std_file_release_run and phobos_std_parallelism_release_run still crash, and running unwinddump on the files reveals that the stack size for some of the functions is off:

[137] funcOffset=0x0000AE20, encoding=0x5309F800 (stack size=0xEC81488B, rbx,r12,r13,r14,r15,rbp LSDA     ) __D3std11parallelism8TaskPool28__T6reduceVAyaa5_61202b2062Z14__T6reduceTAiZ6reduceMFAiZi
[138] funcOffset=0x0000B610, encoding=0x5309F800 (stack size=0xEC81488B, rbx,r12,r13,r14,r15,rbp LSDA     ) __D3std11parallelism8TaskPool28__T6reduceVAyaa5_61202b2062Z16__T6reduceTdTAiZ6reduceMFdAiZd
[139] funcOffset=0x0000BD90, encoding=0x5309F800 (stack size=0xEC81488B, rbx,r12,r13,r14,r15,rbp LSDA     ) __D3std11parallelism8TaskPool28__T6reduceVAyaa5_61202b2062Z18__T6reduceTdTAiTiZ6reduceMFdAiiZd

@dnadlinger
Copy link
Member Author

Closing for now, as #1031 might well be related.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant