Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

runtime: crashes on cgo callbacks #11907

Closed
dvyukov opened this issue Jul 28, 2015 · 3 comments
Closed

runtime: crashes on cgo callbacks #11907

dvyukov opened this issue Jul 28, 2015 · 3 comments
Milestone

Comments

@dvyukov
Copy link
Member

dvyukov commented Jul 28, 2015

This was reported privately as episodic crashes on 1.4:

runtime: garbage collector found invalid heap pointer *(0xc208237fe0+0x0)=0xc208104010 span=0xc208104000-0xc208109a00-0xc20810a000 state=2
fatal error: invalid heap pointer

runtime stack:
runtime.throw(0xb0e6c3)
    /usr/local/go/src/runtime/panic.go:491 +0xad fp=0x7fb617ffbbd8 sp=0x7fb617ffbba8
scanblock(0xc208237fe0, 0x18, 0x80bc68)
    /usr/local/go/src/runtime/mgc0.c:381 +0x551 fp=0x7fb617ffbd18 sp=0x7fb617ffbbd8
scanframe(0x7fb617ffbe20, 0x0, 0x7fb600000001)
    /usr/local/go/src/runtime/mgc0.c:743 +0x1c2 fp=0x7fb617ffbd88 sp=0x7fb617ffbd18
runtime.gentraceback(0x44a770, 0xc208237b50, 0x0, 0xc20841a5a0, 0x0, 0x0, 0x7fffffff, 0x7fb617ffbed0, 0x0, 0x0, ...)
    /usr/local/go/src/runtime/traceback.go:311 +0x7a8 fp=0x7fb617ffbe78 sp=0x7fb617ffbd88
scanstack(0xc20841a5a0)
    /usr/local/go/src/runtime/mgc0.c:780 +0x21c fp=0x7fb617ffbee8 sp=0x7fb617ffbe78
markroot(0xc208010000, 0x32)
    /usr/local/go/src/runtime/mgc0.c:556 +0xe7 fp=0x7fb617ffbf48 sp=0x7fb617ffbee8
runtime.parfordo(0xc208010000)
    /usr/local/go/src/runtime/parfor.c:91 +0x13b fp=0x7fb617ffbfc8 sp=0x7fb617ffbf48
gc(0x7fb617ffc100)
    /usr/local/go/src/runtime/mgc0.c:1442 +0x25e fp=0x7fb617ffc0e0 sp=0x7fb617ffbfc8
runtime.gc_m()
    /usr/local/go/src/runtime/mgc0.c:1371 +0xe0 fp=0x7fb617ffc118 sp=0x7fb617ffc0e0
runtime.onM(0x7fb617ffc120)
    /usr/local/go/src/runtime/asm_amd64.s:257 +0x68 fp=0x7fb617ffc120 sp=0x7fb617ffc118
runtime.mstart()
    /usr/local/go/src/runtime/proc.c:818 fp=0x7fb617ffc128 sp=0x7fb617ffc120

goroutine 389 [garbage collection, locked to thread]:
runtime.switchtoM()
    /usr/local/go/src/runtime/asm_amd64.s:198 fp=0xc208237b58 sp=0xc208237b50
runtime.gogc(0xc200000000)
    /usr/local/go/src/runtime/malloc.go:469 +0x1cf fp=0xc208237b90 sp=0xc208237b58
runtime.mallocgc(0xa00000, 0x0, 0xc200000003, 0xc20824a380)
    /usr/local/go/src/runtime/malloc.go:341 +0x391 fp=0xc208237c40 sp=0xc208237b90
runtime.rawstring(0xa00000, 0x0, 0x0, 0x0, 0x0, 0x0)
    /usr/local/go/src/runtime/string.go:195 +0x93 fp=0xc208237c70 sp=0xc208237c40
runtime.gostringn(0x7fb60c002330, 0xa00000, 0x0, 0x0)
    /usr/local/go/src/runtime/string.go:275 +0x57 fp=0xc208237cd0 sp=0xc208237c70
github.com/bioothod/elliptics-go/elliptics._Cfunc_GoStringN(0x7fb60c002330, 0x7fb600a00000, 0x0, 0x0)
    /root/go/src/github.com/bioothod/elliptics-go/elliptics/:83 +0x4b fp=0xc208237cf8 sp=0xc208237cd0
github.com/bioothod/elliptics-go/elliptics.NewDnetFile(0x7fb617ffc230, 0x0, 0x0, 0x0)
    /root/go/src/github.com/bioothod/elliptics-go/elliptics/ctogo.go:203 +0x7f fp=0xc208237d50 sp=0xc208237cf8
github.com/bioothod/elliptics-go/elliptics.go_read_callback(0x7fb617ffc230, 0x10f62)
    /root/go/src/github.com/bioothod/elliptics-go/elliptics/callback.go:124 +0x184 fp=0xc208237f28 sp=0xc208237d50
runtime.call16(0x7fb617ffc150, 0x7fb617ffc1d0, 0x10)
    /usr/local/go/src/runtime/asm_amd64.s:401 +0x45 fp=0xc208237f40 sp=0xc208237f28
runtime.cgocallbackg1()
    /usr/local/go/src/runtime/cgocall.go:239 +0x12a fp=0xc208237fa0 sp=0xc208237f40
runtime.cgocallbackg()
    /usr/local/go/src/runtime/cgocall.go:193 +0x6e fp=0xc208237fd0 sp=0xc208237fa0
runtime.cgocallback_gofunc(0xc208104010, 0xc208597000, 0x1000)
    /usr/local/go/src/runtime/asm_amd64.s:766 +0x57 fp=0xc208237fe0 sp=0xc208237fd0
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:2232 +0x1 fp=0xc208237fe8 sp=0xc208237fe0

I can reproduce it on tip using the following program:

// test.go
package main

import "runtime"

// void foo();
import "C"

//export go_callback
func go_callback() {
    runtime.GC()
    grow()
    runtime.GC()
}

var cnt int

func grow() {
    x := 10000
    sum := 0
    if grow1(&x, &sum) == 0 {
        panic("bad")
    }
}

func grow1(x, sum *int) int {
    if *x == 0 {
        return *sum+1
    }
    *x--
    sum1 := *sum + *x
    return grow1(x, &sum1)
}

func main() {
    const P = 100
    done := make(chan bool)
    // allocate a bunch of stack frames and spray them with pointers
    for i := 0; i < P; i++ {
        go func() {
            grow()
            done <- true
        }()
    }
    for i := 0; i < P; i++ {
        <-done 
    }
    // now give these stack frames to cgo callbacks
    for i := 0; i < P; i++ {
        go func() {
            C.foo()
            done <- true
        }()
    }
    for i := 0; i < P; i++ {
        <-done 
    }
}
// test.c
#include <pthread.h>

void go_callback();

void *thr(void *arg) {
    go_callback();
    return 0;
}

void foo() {
    pthread_t th;
    pthread_create(&th, 0, thr, 0);
    pthread_join(th, 0);
}
$ GODEBUG=invalidptr=1,gccheckmark=1 GOGC=0 GOTRACEBACK=2 GOMAXPROCS=4 ./test
runtime:greyobject: checkmarks finds unexpected unmarked object obj=0xc8203acd80
runtime: found obj at *(0xc8203b7f80+0x8)
base=0xc8203b7f80 k=0x64101db s.start*_PageSize=0xc8203b0000 s.limit=0x0 s.sizeclass=0 s.elemsize=0
obj=0xc8203acd80 k=0x64101d6 s.start*_PageSize=0xc8203ac000 s.limit=0xc8203adf80 s.sizeclass=32 s.elemsize=1152
 *(obj+0) = 0xc8203ad200
 *(obj+8) = 0x0
 *(obj+16) = 0x0
 *(obj+24) = 0x0
 *(obj+32) = 0x0
 *(obj+40) = 0x0
 *(obj+48) = 0x0

And if I enable object validation during GC, then it crashes as:

runtime:objectstart Span weird: p=0xc820ed0fc0 k=0x6410768 s.start=0xc820e72000 s.limit=0xc820e73f80 s.state=0
fatal error: objectstart: bad pointer in unexpected span
runtime:objectstart Span weird: p=0xc820b3fee8 k=0x641059f s.start=0xc820c96000 s.limit=0xc820c97f80 s.state=0
fatal error: objectstart: bad pointer in unexpected span
runtime:objectstart Span weird: p=0xc820b0cfb0 k=0x6410586 s.start=0xc820c96000 s.limit=0xc820c97f80 s.state=0
fatal error: objectstart: bad pointer in unexpected span
runtime:objectstart Span weird: p=0xc820ed2fc8 k=0x6410769 s.start=0xc820e72000 s.limit=0xc820e73f80 s.state=0
fatal error: objectstart: bad pointer in unexpected span

runtime stack:
runtime.throw(0x48a1a0, 0x2b)
    /usr/local/google/home/dvyukov/src/go/src/runtime/panic.go:527 +0x96 fp=0x7fa4fd7f98a0 sp=0x7fa4fd7f9888
runtime.heapBitsForObject(0xc820ed0fc0, 0x0, 0x0, 0x0, 0x7fa65020b5a0)
    /usr/local/google/home/dvyukov/src/go/src/runtime/mbitmap.go:214 +0x27d fp=0x7fa4fd7f98d8 sp=0x7fa4fd7f98a0
runtime.scanblock(0xc820c28fb0, 0x30, 0x492ea8, 0xc820024720)
    /usr/local/google/home/dvyukov/src/go/src/runtime/mgcmark.go:797 +0xef fp=0x7fa4fd7f9960 sp=0x7fa4fd7f98d8
runtime.scanframeworker(0x7fa4fd7f9b18, 0x0, 0xc820024720)
    /usr/local/google/home/dvyukov/src/go/src/runtime/mgcmark.go:485 +0x1b9 fp=0x7fa4fd7f99f8 sp=0x7fa4fd7f9960
runtime.scanstack.func1(0x7fa4fd7f9b18, 0x0, 0x1)
    /usr/local/google/home/dvyukov/src/go/src/runtime/mgcmark.go:384 +0x65 fp=0x7fa4fd7f9a40 sp=0x7fa4fd7f99f8
runtime.gentraceback(0x426719, 0xc820c28b00, 0x0, 0xc82017c600, 0x0, 0x0, 0x7fffffff, 0x7fa4fd7f9c38, 0x0, 0x0, ...)
    /usr/local/google/home/dvyukov/src/go/src/runtime/traceback.go:336 +0xa7e fp=0x7fa4fd7f9b70 sp=0x7fa4fd7f9a40
runtime.scanstack(0xc82017c600)
    /usr/local/google/home/dvyukov/src/go/src/runtime/mgcmark.go:406 +0x391 fp=0x7fa4fd7f9c78 sp=0x7fa4fd7f9b70
runtime.scang(0xc82017c600)
    /usr/local/google/home/dvyukov/src/go/src/runtime/proc1.go:417 +0x96 fp=0x7fa4fd7f9c98 sp=0x7fa4fd7f9c78
runtime.markroot(0xc820020000, 0xd6)
    /usr/local/google/home/dvyukov/src/go/src/runtime/mgcmark.go:133 +0x1ba fp=0x7fa4fd7f9d38 sp=0x7fa4fd7f9c98
runtime.parfordo(0xc820020000)
    /usr/local/google/home/dvyukov/src/go/src/runtime/parfor.go:110 +0x1d4 fp=0x7fa4fd7f9dc0 sp=0x7fa4fd7f9d38
runtime.gchelper()
    /usr/local/google/home/dvyukov/src/go/src/runtime/mgc.go:1665 +0x59 fp=0x7fa4fd7f9e08 sp=0x7fa4fd7f9dc0
runtime.stopm()
    /usr/local/google/home/dvyukov/src/go/src/runtime/proc1.go:1125 +0x15e fp=0x7fa4fd7f9e30 sp=0x7fa4fd7f9e08
runtime.gcstopm()
    /usr/local/google/home/dvyukov/src/go/src/runtime/proc1.go:1315 +0xfe fp=0x7fa4fd7f9e60 sp=0x7fa4fd7f9e30
runtime.schedule()
    /usr/local/google/home/dvyukov/src/go/src/runtime/proc1.go:1574 +0xa5 fp=0x7fa4fd7f9e98 sp=0x7fa4fd7f9e60
runtime.mstart1()
    /usr/local/google/home/dvyukov/src/go/src/runtime/proc1.go:725 +0x129 fp=0x7fa4fd7f9eb8 sp=0x7fa4fd7f9e98
runtime.mstart()
    /usr/local/google/home/dvyukov/src/go/src/runtime/proc1.go:685 +0x72 fp=0x7fa4fd7f9ec8 sp=0x7fa4fd7f9eb8

The bug is that cgocallback_gofunc allocates a fake frame for cgocallback_gofunc:

    // Switch to m->curg stack and call runtime.cgocallbackg.
    // Because we are taking over the execution of m->curg
    // but *not* resuming what had been running, we need to
    // save that information (m->curg->sched) so we can restore it.
    // We can restore m->curg->sched.sp easily, because calling
    // runtime.cgocallbackg leaves SP unchanged upon return.
    // To save m->curg->sched.pc, we push it onto the stack.
    // This has the added benefit that it looks to the traceback
    // routine like cgocallbackg is going to return to that
    // PC (because the frame we allocate below has the same
    // size as cgocallback_gofunc's frame declared above)
    // so that the traceback will seamlessly trace back into
    // the earlier calls.
    //
    // In the new goroutine, 0(SP) holds the saved R8.
    MOVQ    m_curg(BX), SI
    MOVQ    SI, g(CX)
    MOVQ    (g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
    MOVQ    (g_sched+gobuf_pc)(SI), BX
    MOVQ    BX, -8(DI)
    // Compute the size of the frame, including return PC and, if
    // GOEXPERIMENT=framepointer, the saved based pointer
    LEAQ    fv+0(FP), AX
    SUBQ    SP, AX
    SUBQ    AX, DI
    MOVQ    DI, SP

But the frame contains garbage while cgocallback_gofunc arguments contain 2 pointers, so these pointer slots contain garbage during GC and stack copying.

I can confirm that by spraying newly allocated stacks with any magic value and then observing these values in panic traces:

runtime.cgocallback_gofunc(0x67, 0x67, 0x67)

This happens on 1.4 and tip and can lead to false memory retention and crashes.

@bradfitz
Copy link
Contributor

/cc @RLH @aclements @rsc

@ianlancetaylor ianlancetaylor added this to the Go1.5 milestone Jul 28, 2015
@gopherbot
Copy link
Contributor

CL https://golang.org/cl/12851 mentions this issue.

@gopherbot
Copy link
Contributor

CL https://golang.org/cl/12852 mentions this issue.

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Projects
None yet
Development

No branches or pull requests

4 participants