Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

runtime: TestStackBarrierProfiling missed stack barrier #13362

Closed
aclements opened this issue Nov 22, 2015 · 8 comments
Closed

runtime: TestStackBarrierProfiling missed stack barrier #13362

aclements opened this issue Nov 22, 2015 · 8 comments
Milestone

Comments

@aclements
Copy link
Member

Found while stress testing TestStackBarrierProfiling at 54bd5a7 on master linux/amd64:

--- FAIL: TestStackBarrierProfiling (4.94s)
    pprof_test.go:342: subprocess failed with exit status 2:
        found next stack barrier at 0xc82043d798; expected [*0xc820455798=0x48d610 *0xc8204557d0=0x48d610 *0xc820455808=0x48d610 *0xc820455840=0x48d610 *0xc820455878=0x48d610 *0xc8204558b0=0x48d610 *0xc8204558e8=0x48d610 *0xc820455920=0x48d610 *0xc820455958=0x48d610 *0xc820455990=0x48d610 *0xc8204559c8=0x48d610 *0xc820455a00=0x48d610 *0xc820455a38=0x48d610 *0xc820455a70=0x48d610 *0xc820455aa8=0x48d610 *0xc820455ae0=0x48d610 *0xc820455b18=0x48d610 *0xc820455b50=0x48d610 *0xc820455b88=0x48d610 *0xc820455bc0=0x48f5b4 *0xc820455bf0=0x48bb53 *0xc820455e48=0x48d564 *0xc820455f90=0x47c098 *0xc820455fc8=0x45bba1]
        fatal error: missed stack barrier

        goroutine 0 [idle]:
        runtime.throw(0x5f1140, 0x14)
            /home/austin/go.dev/src/runtime/panic.go:530 +0x90 fp=0xc8200b74b0 sp=0xc8200b7498
        runtime.gentraceback(0x419e66, 0xc8200c0e90, 0x0, 0xc820092180, 0x0, 0xc8200b7778, 0x40, 0x0, 0x0, 0x6, ...)
            /home/austin/go.dev/src/runtime/traceback.go:293 +0x13c9 fp=0xc8200b7708 sp=0xc8200b74b0
        runtime.sigprof(0x419e66, 0xc8200c0e90, 0x0, 0xc820092180, 0xc82008a400)
            /home/austin/go.dev/src/runtime/proc.go:2940 +0x533 fp=0xc8200b79d8 sp=0xc8200b7708
        runtime.sighandler(0xc80000001b, 0xc8200b7bf0, 0xc8200b7ac0, 0xc820092180)
            /home/austin/go.dev/src/runtime/signal_amd64x.go:48 +0xbe fp=0xc8200b7a70 sp=0xc8200b79d8
        runtime.sigtrampgo(0x1b, 0xc8200b7bf0, 0xc8200b7ac0)
            /home/austin/go.dev/src/runtime/signal_linux.go:21 +0x95 fp=0xc8200b7aa0 sp=0xc8200b7a70
        runtime.sigtramp(0x1, 0x0, 0xc8200b4000, 0x0, 0x4000, 0xe, 0x6b5a20, 0x0, 0x212, 0x1000, ...)
            /home/austin/go.dev/src/runtime/sys_linux_amd64.s:234 +0x1b fp=0xc8200b7ac0 sp=0xc8200b7aa0
        runtime.sigreturn(0x0, 0xc8200b4000, 0x0, 0x4000, 0xe, 0x6b5a20, 0x0, 0x212, 0x1000, 0x400, ...)
            /home/austin/go.dev/src/runtime/sys_linux_amd64.s:238 fp=0xc8200b7ac8 sp=0xc8200b7ac0

        goroutine 9 [garbage collection]:
        runtime.systemstack_switch()
            /home/austin/go.dev/src/runtime/asm_amd64.s:245 fp=0xc8204552b8 sp=0xc8204552b0
        runtime.gcMarkTermination()
            /home/austin/go.dev/src/runtime/mgc.go:1164 +0x136 fp=0xc820455508 sp=0xc8204552b8
        runtime.gcMarkDone()
            /home/austin/go.dev/src/runtime/mgc.go:1128 +0x262 fp=0xc820455528 sp=0xc820455508
        runtime.gcMarkDone()
            /home/austin/go.dev/src/runtime/mgc.go:1095 +0x1f0 fp=0xc820455548 sp=0xc820455528
        runtime.gcAssistAlloc(0xc820001980)
            /home/austin/go.dev/src/runtime/mgcmark.go:428 +0x18c fp=0xc8204555c8 sp=0xc820455548
        runtime.mallocgc(0x400, 0x553360, 0xc800000001, 0x0)
            /home/austin/go.dev/src/runtime/malloc.go:533 +0x184 fp=0xc8204556a0 sp=0xc8204555c8
        runtime.newarray(0x553360, 0x400, 0xc820102000)
            /home/austin/go.dev/src/runtime/malloc.go:795 +0xc9 fp=0xc8204556e0 sp=0xc8204556a0
        runtime.makeslice(0x54a6c0, 0x400, 0x400, 0x0, 0x0, 0x0)
            /home/austin/go.dev/src/runtime/slice.go:32 +0x165 fp=0xc820455730 sp=0xc8204556e0
        runtime/pprof_test.deepStack(0x3d4, 0xc820102000)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:372 +0x52 fp=0xc820455768 sp=0xc820455730
        runtime/pprof_test.deepStack(0x3d5, 0xc820100000)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc8204557a0 sp=0xc820455768
        runtime/pprof_test.deepStack(0x3d6, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc8204557d8 sp=0xc8204557a0
        runtime/pprof_test.deepStack(0x3d7, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455810 sp=0xc8204557d8
        runtime/pprof_test.deepStack(0x3d8, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455848 sp=0xc820455810
        runtime/pprof_test.deepStack(0x3d9, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455880 sp=0xc820455848
        runtime/pprof_test.deepStack(0x3da, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc8204558b8 sp=0xc820455880
        runtime/pprof_test.deepStack(0x3db, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc8204558f0 sp=0xc8204558b8
        runtime/pprof_test.deepStack(0x3dc, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455928 sp=0xc8204558f0
        runtime/pprof_test.deepStack(0x3dd, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455960 sp=0xc820455928
        runtime/pprof_test.deepStack(0x3de, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455998 sp=0xc820455960
        runtime/pprof_test.deepStack(0x3df, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc8204559d0 sp=0xc820455998
        runtime/pprof_test.deepStack(0x3e0, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455a08 sp=0xc8204559d0
        runtime/pprof_test.deepStack(0x3e1, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455a40 sp=0xc820455a08
        runtime/pprof_test.deepStack(0x3e2, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455a78 sp=0xc820455a40
        runtime/pprof_test.deepStack(0x3e3, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455ab0 sp=0xc820455a78
        runtime/pprof_test.deepStack(0x3e4, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455ae8 sp=0xc820455ab0
        runtime/pprof_test.deepStack(0x3e5, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455b20 sp=0xc820455ae8
        runtime/pprof_test.deepStack(0x3e6, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455b58 sp=0xc820455b20
        runtime/pprof_test.deepStack(0x3e7, 0x400)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455b90 sp=0xc820455b58
        runtime/pprof_test.deepStack(0x3e8, 0x0)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:373 +0x90 fp=0xc820455bc8 sp=0xc820455b90
        runtime/pprof_test.TestStackBarrierProfiling.func1()
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:356 +0x54 fp=0xc820455bf8 sp=0xc820455bc8
        runtime/pprof_test.testCPUProfile(0xc8200a6000, 0x0, 0x0, 0x0, 0xc820455e90)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:145 +0x1a3 fp=0xc820455e50 sp=0xc820455bf8
        runtime/pprof_test.TestStackBarrierProfiling(0xc8200a6000)
            /home/austin/go.dev/src/runtime/pprof/pprof_test.go:363 +0x564 fp=0xc820455f98 sp=0xc820455e50
        testing.tRunner(0xc8200a6000, 0x6ba4b0)
            /home/austin/go.dev/src/testing/testing.go:456 +0x98 fp=0xc820455fd0 sp=0xc820455f98
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc820455fd8 sp=0xc820455fd0
        created by testing.RunTests
            /home/austin/go.dev/src/testing/testing.go:565 +0x879

        goroutine 1 [chan receive]:
        runtime.gopark(0x618fe0, 0xc820066238, 0x5d1910, 0xc, 0x17, 0x3)
            /home/austin/go.dev/src/runtime/proc.go:206 +0x163 fp=0xc8200b2ba0 sp=0xc8200b2b78
        runtime.goparkunlock(0xc820066238, 0x5d1910, 0xc, 0x7fd062ba5017, 0x3)
            /home/austin/go.dev/src/runtime/proc.go:212 +0x54 fp=0xc8200b2bd8 sp=0xc8200b2ba0
        runtime.chanrecv(0x54ab40, 0xc8200661e0, 0xc8200b2db8, 0xc820053c01, 0x470000)
            /home/austin/go.dev/src/runtime/chan.go:470 +0x49f fp=0xc8200b2c60 sp=0xc8200b2bd8
        runtime.chanrecv1(0x54ab40, 0xc8200661e0, 0xc8200b2db8)
            /home/austin/go.dev/src/runtime/chan.go:355 +0x2b fp=0xc8200b2c90 sp=0xc8200b2c60
        testing.RunTests(0x618c48, 0x6ba420, 0x8, 0x8, 0x7fd000000001)
            /home/austin/go.dev/src/testing/testing.go:566 +0x8b9 fp=0xc8200b2e10 sp=0xc8200b2c90
        testing.(*M).Run(0xc8200b2f38, 0x408e2d)
            /home/austin/go.dev/src/testing/testing.go:498 +0x81 fp=0xc8200b2e98 sp=0xc8200b2e10
        main.main()
            runtime/pprof/_test/_testmain.go:70 +0x117 fp=0xc8200b2f90 sp=0xc8200b2e98
        runtime.main()
            /home/austin/go.dev/src/runtime/proc.go:132 +0x2b0 fp=0xc8200b2fe0 sp=0xc8200b2f90
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc8200b2fe8 sp=0xc8200b2fe0

        goroutine 2 [force gc (idle)]:
        runtime.gopark(0x618fe0, 0x6bc8e0, 0x5d2330, 0xf, 0x14, 0x1)
            /home/austin/go.dev/src/runtime/proc.go:206 +0x163 fp=0xc82002e378 sp=0xc82002e350
        runtime.goparkunlock(0x6bc8e0, 0x5d2330, 0xf, 0xc820000114, 0x1)
            /home/austin/go.dev/src/runtime/proc.go:212 +0x54 fp=0xc82002e3b0 sp=0xc82002e378
        runtime.forcegchelper()
            /home/austin/go.dev/src/runtime/proc.go:173 +0xb8 fp=0xc82002e3e0 sp=0xc82002e3b0
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc82002e3e8 sp=0xc82002e3e0
        created by runtime.init.4
            /home/austin/go.dev/src/runtime/proc.go:162 +0x2b

        goroutine 3 [GC sweep wait]:
        runtime.gopark(0x618fe0, 0x6bca60, 0x5cf670, 0xd, 0x44f614, 0x1)
            /home/austin/go.dev/src/runtime/proc.go:206 +0x163 fp=0xc82004a768 sp=0xc82004a740
        runtime.goparkunlock(0x6bca60, 0x5cf670, 0xd, 0x14, 0x1)
            /home/austin/go.dev/src/runtime/proc.go:212 +0x54 fp=0xc82004a7a0 sp=0xc82004a768
        runtime.bgsweep(0xc8200180e0)
            /home/austin/go.dev/src/runtime/mgcsweep.go:79 +0x14d fp=0xc82004a7d8 sp=0xc82004a7a0
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc82004a7e0 sp=0xc82004a7d8
        created by runtime.gcenable
            /home/austin/go.dev/src/runtime/mgc.go:191 +0x53

        goroutine 7 [GC worker (idle)]:
        runtime.gopark(0x618e80, 0xc820034c00, 0x5ebbe0, 0x10, 0x14, 0x0)
            /home/austin/go.dev/src/runtime/proc.go:206 +0x163 fp=0xc8200d5768 sp=0xc8200d5740
        runtime.gcBgMarkWorker(0xc820026000)
            /home/austin/go.dev/src/runtime/mgc.go:1369 +0x12c fp=0xc8200d57d8 sp=0xc8200d5768
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc8200d57e0 sp=0xc8200d57d8
        created by runtime.gcBgMarkStartWorkers
            /home/austin/go.dev/src/runtime/mgc.go:1316 +0x93

        goroutine 8 [finalizer wait]:
        runtime.gopark(0x618fe0, 0x6d79a8, 0x5d1f90, 0xe, 0x14, 0x1)
            /home/austin/go.dev/src/runtime/proc.go:206 +0x163 fp=0xc820030b38 sp=0xc820030b10
        runtime.goparkunlock(0x6d79a8, 0x5d1f90, 0xe, 0x14, 0x1)
            /home/austin/go.dev/src/runtime/proc.go:212 +0x54 fp=0xc820030b70 sp=0xc820030b38
        runtime.runfinq()
            /home/austin/go.dev/src/runtime/mfinal.go:158 +0xaa fp=0xc820030be0 sp=0xc820030b70
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc820030be8 sp=0xc820030be0
        created by runtime.createfing
            /home/austin/go.dev/src/runtime/mfinal.go:139 +0x60

        goroutine 10 [syscall]:
        runtime.notetsleepg(0x7fd0627c4008, 0xffffffffffffffff, 0xc8200a6000)
            /home/austin/go.dev/src/runtime/lock_futex.go:205 +0x4e fp=0xc820046640 sp=0xc820046618
        runtime.(*cpuProfile).getprofile(0x7fd0627c4000, 0x0, 0x0, 0x0)
            /home/austin/go.dev/src/runtime/cpuprof.go:341 +0x5dd fp=0xc820046748 sp=0xc820046640
        runtime.CPUProfile(0x0, 0x0, 0x0)
            /home/austin/go.dev/src/runtime/cpuprof.go:421 +0x34 fp=0xc820046770 sp=0xc820046748
        runtime/pprof.profileWriter(0x7fd062b5b580, 0xc8200aa000)
            /home/austin/go.dev/src/runtime/pprof/pprof.go:599 +0x1c fp=0xc8200467d0 sp=0xc820046770
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc8200467d8 sp=0xc8200467d0
        created by runtime/pprof.StartCPUProfile
            /home/austin/go.dev/src/runtime/pprof/pprof.go:593 +0x145

        goroutine 11 [syscall]:
        runtime.notetsleepg(0x6bcad8, 0x129f39f59, 0x0)
            /home/austin/go.dev/src/runtime/lock_futex.go:205 +0x4e fp=0xc82002c358 sp=0xc82002c330
        runtime.timerproc()
            /home/austin/go.dev/src/runtime/time.go:209 +0xde fp=0xc82002c3e0 sp=0xc82002c358
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc82002c3e8 sp=0xc82002c3e0
        created by runtime.addtimerLocked
            /home/austin/go.dev/src/runtime/time.go:116 +0x11f

        goroutine 14 [GC worker (idle)]:
        runtime.gopark(0x618e80, 0xc82008a400, 0x5ebbe0, 0x10, 0x14, 0x0)
            /home/austin/go.dev/src/runtime/proc.go:206 +0x163 fp=0xc8200d7768 sp=0xc8200d7740
        runtime.gcBgMarkWorker(0xc820023500)
            /home/austin/go.dev/src/runtime/mgc.go:1369 +0x12c fp=0xc8200d77d8 sp=0xc8200d7768
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc8200d77e0 sp=0xc8200d77d8
        created by runtime.gcBgMarkStartWorkers
            /home/austin/go.dev/src/runtime/mgc.go:1316 +0x93

        goroutine 17 [GC worker (idle)]:
        runtime.gopark(0x618e80, 0xc8200ce400, 0x5ebbe0, 0x10, 0x48d514, 0x0)
            /home/austin/go.dev/src/runtime/proc.go:206 +0x163 fp=0xc82004b768 sp=0xc82004b740
        runtime.gcBgMarkWorker(0xc820022000)
            /home/austin/go.dev/src/runtime/mgc.go:1369 +0x12c fp=0xc82004b7d8 sp=0xc82004b768
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc82004b7e0 sp=0xc82004b7d8
        created by runtime.gcBgMarkStartWorkers
            /home/austin/go.dev/src/runtime/mgc.go:1316 +0x93

        goroutine 18 [GC worker (idle)]:
        runtime.gopark(0x618e80, 0xc820034800, 0x5ebbe0, 0x10, 0x48d514, 0x0)
            /home/austin/go.dev/src/runtime/proc.go:206 +0x163 fp=0xc82004c768 sp=0xc82004c740
        runtime.gcBgMarkWorker(0xc820024a00)
            /home/austin/go.dev/src/runtime/mgc.go:1369 +0x12c fp=0xc82004c7d8 sp=0xc82004c768
        runtime.goexit()
            /home/austin/go.dev/src/runtime/asm_amd64.s:1988 +0x1 fp=0xc82004c7e0 sp=0xc82004c7d8
        created by runtime.gcBgMarkStartWorkers
            /home/austin/go.dev/src/runtime/mgc.go:1316 +0x93
FAIL
@aclements aclements self-assigned this Nov 22, 2015
@aclements aclements added this to the Go1.6 milestone Nov 22, 2015
@mwhudson
Copy link
Contributor

I saw this on a builder too I think, which was it... ah, android/arm: http://build.golang.org/log/60c9e0384b64e90ad26b2235f044d172f84b2811

@aclements
Copy link
Member Author

I've reproduced this several times now and it's clear that it's always a SIGPROF that came in during the systemstack(gcMark) in gcMarkTermination called from a mutator assist. Given where it found a barrier versus where it expects them, I'd say the stack was shrunk. However, we don't allow sigprof to walk the user stack when it's in _Gcopystack (and the traceback doesn't say that the user goroutine is in _Gcopystack), so we must have finished shrinking the stack, but not yet returned to the new stack. But in this case, we should have adjusted all of the barriers as well as sched.sp (which gentraceback uses to find the user stack), so I'm not sure how this inconsistency is arising.

@aclements
Copy link
Member Author

For the record, 10 failures of this form out of 173932 runs on my linux/amd64 workstation.

@aclements
Copy link
Member Author

Reproduced with somewhat more detailed stack barrier debugging. The @@@ in the stack barrier list indicates stkbarPos and the ==> indicates the mismatched stack barrier. The fact that we failed this far in to the stack barrier list suggests to me that we did a stack copy while in the middle of gentraceback. This is further supported by the fact that the panic itself was able to traceback without hitting this same panic. Based on the sp's in the panic and the stack barrier positions of the stack barriers we've hit (before the @@@), we're tracing the pre-copy stack (at 0xc280199*) and the stack has been moved to 0xc8200ef*.

Stack barrier list:

*0xc820199070=0x41a62c
*0xc8201990f0=0x40eb64
*0xc8201991c8=0x40f709
*0xc820199208=0x43ec25
*0xc820199258=0x48d542
@@@
*0xc8200ef290=0x48d580
*0xc8200ef2c8=0x48d580
*0xc8200ef300=0x48d580
*0xc8200ef338=0x48d580
*0xc8200ef370=0x48d580
*0xc8200ef3a8=0x48d580
*0xc8200ef3e0=0x48d580
*0xc8200ef418=0x48d580
*0xc8200ef450=0x48d580
*0xc8200ef488=0x48d580
*0xc8200ef4c0=0x48d580
*0xc8200ef4f8=0x48d580
*0xc8200ef530=0x48d580
*0xc8200ef568=0x48d580
*0xc8200ef5a0=0x48d580
*0xc8200ef5d8=0x48d580
*0xc8200ef610=0x48d580
*0xc8200ef648=0x48d580
*0xc8200ef680=0x48d580
*0xc8200ef6b8=0x48d580
*0xc8200ef6f0=0x48d580
==>
*0xc8200ef728=0x48d580
*0xc8200ef760=0x48d580
*0xc8200ef798=0x48d580
*0xc8200ef7d0=0x48d580
*0xc8200ef808=0x48d580
*0xc8200ef840=0x48d580
*0xc8200ef878=0x48d580
*0xc8200ef8b0=0x48d580
*0xc8200ef8e8=0x48d580
*0xc8200ef920=0x48d580
*0xc8200ef958=0x48d580
*0xc8200ef990=0x48d580
*0xc8200ef9c8=0x48d580
*0xc8200efa00=0x48d580
*0xc8200efa38=0x48d580
*0xc8200efa70=0x48d580
*0xc8200efaa8=0x48d580
*0xc8200efae0=0x48d580
*0xc8200efb18=0x48d580
*0xc8200efb50=0x48d580
*0xc8200efb88=0x48d580
*0xc8200efbc0=0x48f524
*0xc8200efbf0=0x48bad3
*0xc8200efe48=0x48d4db
*0xc8200eff90=0x47c018
*0xc8200effc8=0x45bb31

Traceback:

--- FAIL: TestStackBarrierProfiling (2.30s)
        pprof_test.go:338: subprocess failed with exit status 2:
                found next stack barrier at 0xc820199728; expected [*0xc820199070=0x41a62c *0xc8201990f0=0x40eb64 *0xc8201991c8=0x40f709 *0xc820199208=0x43ec25 *0xc820199258=0x48d542 @@@ *0xc8200ef290=0x48d580 *0xc8200ef2c8=0x48d580 *0xc8200ef300=0x48d580 *0xc8200ef338=0x48d580 *0xc8200ef370=0x48d580 *0xc8200ef3a8=0x48d580 *0xc8200ef3e0=0x48d580 *0xc8200ef418=0x48d580 *0xc8200ef450=0x48d580 *0xc8200ef488=0x48d580 *0xc8200ef4c0=0x48d580 *0xc8200ef4f8=0x48d580 *0xc8200ef530=0x48d580 *0xc8200ef568=0x48d580 *0xc8200ef5a0=0x48d580 *0xc8200ef5d8=0x48d580 *0xc8200ef610=0x48d580 *0xc8200ef648=0x48d580 *0xc8200ef680=0x48d580 *0xc8200ef6b8=0x48d580 *0xc8200ef6f0=0x48d580 ==> *0xc8200ef728=0x48d580 *0xc8200ef760=0x48d580 *0xc8200ef798=0x48d580 *0xc8200ef7d0=0x48d580 *0xc8200ef808=0x48d580 *0xc8200ef840=0x48d580 *0xc8200ef878=0x48d580 *0xc8200ef8b0=0x48d580 *0xc8200ef8e8=0x48d580 *0xc8200ef920=0x48d580 *0xc8200ef958=0x48d580 *0xc8200ef990=0x48d580 *0xc8200ef9c8=0x48d580 *0xc8200efa00=0x48d580 *0xc8200efa38=0x48d580 *0xc8200efa70=0x48d580 *0xc8200efaa8=0x48d580 *0xc8200efae0=0x48d580 *0xc8200efb18=0x48d580 *0xc8200efb50=0x48d580 *0xc8200efb88=0x48d580 *0xc8200efbc0=0x48f524 *0xc8200efbf0=0x48bad3 *0xc8200efe48=0x48d4db *0xc8200eff90=0x47c018 *0xc8200effc8=0x45bb31]
                fatal error: missed stack barrier

                goroutine 0 [idle]:
                runtime.throw(0x5f0c40, 0x14)
                        /usr/local/google/home/austin/go.dev/src/runtime/panic.go:530 +0x90 fp=0xc8200fb4b0 sp=0xc8200fb498
                runtime.gentraceback(0x45c9c3, 0xc820084ea0, 0x0, 0xc82008a600, 0x0, 0xc8200fb778, 0x40, 0x0, 0x0, 0x6, ...)
                        /usr/local/google/home/austin/go.dev/src/runtime/traceback.go:295 +0x1587 fp=0xc8200fb708 sp=0xc8200fb4b0
                runtime.sigprof(0x45c9c3, 0xc820084ea0, 0x0, 0xc82008a600, 0xc820078c00)
                        /usr/local/google/home/austin/go.dev/src/runtime/proc.go:2937 +0x533 fp=0xc8200fb9d8 sp=0xc8200fb708
                runtime.sighandler(0xc80000001b, 0xc8200fbbf0, 0xc8200fbac0, 0xc82008a600)
                        /usr/local/google/home/austin/go.dev/src/runtime/signal_amd64x.go:48 +0xbe fp=0xc8200fba70 sp=0xc8200fb9d8
                runtime.sigtrampgo(0x1b, 0xc8200fbbf0, 0xc8200fbac0)
                        /usr/local/google/home/austin/go.dev/src/runtime/signal_linux.go:21 +0x95 fp=0xc8200fbaa0 sp=0xc8200fba70
                runtime.sigtramp(0x1, 0x0, 0xc8200f8000, 0x0, 0x4000, 0x0, 0x0, 0x0, 0x212, 0x1400, ...)
                        /usr/local/google/home/austin/go.dev/src/runtime/sys_linux_amd64.s:234 +0x1b fp=0xc8200fbac0 sp=0xc8200fbaa0
                runtime.sigreturn(0x0, 0xc8200f8000, 0x0, 0x4000, 0x0, 0x0, 0x0, 0x212, 0x1400, 0x400, ...)
                        /usr/local/google/home/austin/go.dev/src/runtime/sys_linux_amd64.s:238 fp=0xc8200fbac8 sp=0xc8200fbac0

                goroutine 13 [garbage collection]:
                runtime.systemstack_switch()
                        /usr/local/google/home/austin/go.dev/src/runtime/asm_amd64.s:245 fp=0xc8200eedd0 sp=0xc8200eedc8
                runtime.gcMarkTermination()
                        /usr/local/google/home/austin/go.dev/src/runtime/mgc.go:1173 +0x136 fp=0xc8200ef020 sp=0xc8200eedd0
                runtime.gcMarkDone()
                        /usr/local/google/home/austin/go.dev/src/runtime/mgc.go:1137 +0x286 fp=0xc8200ef040 sp=0xc8200ef020
                runtime.gcAssistAlloc(0xc8200d8000)
                        /usr/local/google/home/austin/go.dev/src/runtime/mgcmark.go:428 +0x18c fp=0xc8200ef0c0 sp=0xc8200ef040
                runtime.mallocgc(0x400, 0x553360, 0xc800000001, 0x0)
                        /usr/local/google/home/austin/go.dev/src/runtime/malloc.go:533 +0x184 fp=0xc8200ef198 sp=0xc8200ef0c0
                runtime.newarray(0x553360, 0x400, 0xc820116400)
                        /usr/local/google/home/austin/go.dev/src/runtime/malloc.go:795 +0xc9 fp=0xc8200ef1d8 sp=0xc8200ef198
                runtime.makeslice(0x54a6c0, 0x400, 0x400, 0x0, 0x0, 0x0)
                        /usr/local/google/home/austin/go.dev/src/runtime/slice.go:32 +0x165 fp=0xc8200ef228 sp=0xc8200ef1d8
                runtime/pprof_test.deepStack(0x3bd, 0xc820116400)
                        /usr/local/google/home/austin/go.dev/src/runtime/pprof/pprof_test.go:368 +0x52 fp=0xc8200ef260 sp=0xc8200ef228
                runtime/pprof_test.deepStack(0x3be, 0xc820116000)
                        /usr/local/google/home/austin/go.dev/src/runtime/pprof/pprof_test.go:369 +0x90 fp=0xc8200ef298 sp=0xc8200ef260
                runtime/pprof_test.deepStack(0x3bf, 0x400)
                        /usr/local/google/home/austin/go.dev/src/runtime/pprof/pprof_test.go:369 +0x90 fp=0xc8200ef2d0 sp=0xc8200ef298
                runtime/pprof_test.deepStack(0x3c0, 0x400)
                        /usr/local/google/home/austin/go.dev/src/runtime/pprof/pprof_test.go:369 +0x90 fp=0xc8200ef308 sp=0xc8200ef2d0
                runtime/pprof_test.deepStack(0x3c1, 0x400)
                        /usr/local/google/home/austin/go.dev/src/runtime/pprof/pprof_test.go:369 +0x90 fp=0xc8200ef340 sp=0xc8200ef308
...

@aclements
Copy link
Member Author

The fact that we failed this far in to the stack barrier list suggests to me that we did a stack copy while in the middle of gentraceback.

I believe this is exactly what's going on. We prevent sigprof from tracing the user stack if it's currently being copied, but we don't interlock the other way around: if sigprof starts tracing the user stack first, it's possible for another thread to come along and shrink it while sigprof is still tracing.

So, in fact bbd1a1c is only a partial fix to #12932. (/cc @DanielMorsing)

It's possible this could be causing #12528 as well, but I haven't worked out an exact sequence of events for that yet.

@gopherbot
Copy link
Contributor

CL https://golang.org/cl/17192 mentions this issue.

@gopherbot
Copy link
Contributor

CL https://golang.org/cl/17191 mentions this issue.

@gopherbot
Copy link
Contributor

CL https://golang.org/cl/17194 mentions this issue.

aclements added a commit that referenced this issue Nov 24, 2015
Commit bbd1a1c prevented SIGPROF from scanning stacks that were being
copied, but it didn't prevent a stack copy (specifically a stack
shrink) from happening while SIGPROF is scanning the stack. As a
result, a stack copy may adjust stack barriers while SIGPROF is in the
middle of scanning a stack, causing SIGPROF to panic when it detects
an inconsistent stack barrier.

Fix this by taking the stack barrier lock while adjusting the stack.
In addition to preventing SIGPROF from scanning this stack, this will
block until any in-progress SIGPROF is done scanning the stack.

For 1.5.2.

Fixes #13362.
Updates #12932.

Change-Id: I422219c363054410dfa56381f7b917e04690e5dd
Reviewed-on: https://go-review.googlesource.com/17191
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-on: https://go-review.googlesource.com/17194
aclements added a commit that referenced this issue Dec 15, 2015
The addition of stack barrier locking to copystack subsumes the
partial fix from commit bbd1a1c for SIGPROF during copystack. With the
stack barrier locking, this commit simplifies the rule in sigprof to:
the user stack can be traced only if sigprof can acquire the stack
barrier lock.

Updates #12932, #13362.

Change-Id: I1c1f80015053d0ac7761e9e0c7437c2aba26663f
Reviewed-on: https://go-review.googlesource.com/17192
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
@golang golang locked and limited conversation to collaborators Nov 27, 2016
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Projects
None yet
Development

No branches or pull requests

3 participants