From 332719f7cee2abafb3963009d44ad7cc93474707 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Wed, 13 Sep 2017 15:53:47 -0700 Subject: [PATCH] runtime: don't call lockOSThread for every cgo call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a trivial benchmark with a do-nothing cgo call: name old time/op new time/op delta Call-4 64.5ns ± 7% 63.0ns ± 6% -2.25% (p=0.027 n=20+16) Because Windows uses the cgocall mechanism to make system calls, and passes arguments in a struct held in the m, we need to do the lockOSThread/unlockOSThread in that code. Because deferreturn was getting a nosplit stack overflow error, change it to avoid calling typedmemmove. Updates #21827. Change-Id: I9b1d61434c44faeb29805b46b409c812c9acadc2 Reviewed-on: https://go-review.googlesource.com/64070 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Austin Clements Reviewed-by: David Crawshaw --- src/runtime/cgocall.go | 80 ++++++++++++++++++++-------------- src/runtime/panic.go | 12 ++++- src/runtime/proc.go | 6 +++ src/runtime/runtime2.go | 3 +- src/runtime/syscall_windows.go | 16 +++++++ 5 files changed, 82 insertions(+), 35 deletions(-) diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index 672d190f12da92..02c4cb3622df34 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -8,9 +8,9 @@ // runtime.cgocall(_cgo_Cfunc_f, frame), where _cgo_Cfunc_f is a // gcc-compiled function written by cgo. // -// runtime.cgocall (below) locks g to m, calls entersyscall -// so as not to block other goroutines or the garbage collector, -// and then calls runtime.asmcgocall(_cgo_Cfunc_f, frame). +// runtime.cgocall (below) calls entersyscall so as not to block +// other goroutines or the garbage collector, and then calls +// runtime.asmcgocall(_cgo_Cfunc_f, frame). // // runtime.asmcgocall (in asm_$GOARCH.s) switches to the m->g0 stack // (assumed to be an operating system-allocated stack, so safe to run @@ -104,13 +104,9 @@ func cgocall(fn, arg unsafe.Pointer) int32 { racereleasemerge(unsafe.Pointer(&racecgosync)) } - // Lock g to m to ensure we stay on the same stack if we do a - // cgo callback. In case of panic, unwindm calls endcgo. - lockOSThread() mp := getg().m mp.ncgocall++ mp.ncgo++ - mp.incgo = true // Reset traceback. mp.cgoCallers[0] = 0 @@ -130,7 +126,14 @@ func cgocall(fn, arg unsafe.Pointer) int32 { // and then re-enter the "system call" reusing the PC and SP // saved by entersyscall here. entersyscall(0) + + mp.incgo = true errno := asmcgocall(fn, arg) + + // Call endcgo before exitsyscall because exitsyscall may + // reschedule us on to a different M. + endcgo(mp) + exitsyscall(0) // From the garbage collector's perspective, time can move @@ -145,8 +148,8 @@ func cgocall(fn, arg unsafe.Pointer) int32 { // GC by forcing them to stay live across this time warp. KeepAlive(fn) KeepAlive(arg) + KeepAlive(mp) - endcgo(mp) return errno } @@ -158,8 +161,6 @@ func endcgo(mp *m) { if raceenabled { raceacquire(unsafe.Pointer(&racecgosync)) } - - unlockOSThread() // invalidates mp } // Call from C back to Go. @@ -171,6 +172,12 @@ func cgocallbackg(ctxt uintptr) { exit(2) } + // The call from C is on gp.m's g0 stack, so we must ensure + // that we stay on that M. We have to do this before calling + // exitsyscall, since it would otherwise be free to move us to + // a different M. The call to unlockOSThread is in unwindm. + lockOSThread() + // Save current syscall parameters, so m.syscall can be // used again if callback decide to make syscall. syscall := gp.m.syscall @@ -186,6 +193,10 @@ func cgocallbackg(ctxt uintptr) { cgocallbackg1(ctxt) + // At this point unlockOSThread has been called. + // The following code must not change to a different m. + // This is enforced by checking incgo in the schedule function. + gp.m.incgo = true // going back to cgo call reentersyscall(savedpc, uintptr(savedsp)) @@ -321,32 +332,35 @@ func cgocallbackg1(ctxt uintptr) { } func unwindm(restore *bool) { - if !*restore { - return - } - // Restore sp saved by cgocallback during - // unwind of g's stack (see comment at top of file). - mp := acquirem() - sched := &mp.g0.sched - switch GOARCH { - default: - throw("unwindm not implemented") - case "386", "amd64", "arm", "ppc64", "ppc64le", "mips64", "mips64le", "s390x", "mips", "mipsle": - sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + sys.MinFrameSize)) - case "arm64": - sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 16)) - } + if *restore { + // Restore sp saved by cgocallback during + // unwind of g's stack (see comment at top of file). + mp := acquirem() + sched := &mp.g0.sched + switch GOARCH { + default: + throw("unwindm not implemented") + case "386", "amd64", "arm", "ppc64", "ppc64le", "mips64", "mips64le", "s390x", "mips", "mipsle": + sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + sys.MinFrameSize)) + case "arm64": + sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 16)) + } - // Call endcgo to do the accounting that cgocall will not have a - // chance to do during an unwind. - // - // In the case where a Go call originates from C, ncgo is 0 - // and there is no matching cgocall to end. - if mp.ncgo > 0 { - endcgo(mp) + // Call endcgo to do the accounting that cgocall will not have a + // chance to do during an unwind. + // + // In the case where a Go call originates from C, ncgo is 0 + // and there is no matching cgocall to end. + if mp.ncgo > 0 { + endcgo(mp) + } + + releasem(mp) } - releasem(mp) + // Undo the call to lockOSThread in cgocallbackg. + // We must still stay on the same m. + unlockOSThread() } // called from assembly diff --git a/src/runtime/panic.go b/src/runtime/panic.go index 1f8e37e14f46cc..2cda10565b02db 100644 --- a/src/runtime/panic.go +++ b/src/runtime/panic.go @@ -273,7 +273,17 @@ func freedefer(d *_defer) { unlock(&sched.deferlock) }) } - *d = _defer{} + + // These lines used to be simply `*d = _defer{}` but that + // started causing a nosplit stack overflow via typedmemmove. + d.siz = 0 + d.started = false + d.sp = 0 + d.pc = 0 + d.fn = nil + d._panic = nil + d.link = nil + pp.deferpool[sc] = append(pp.deferpool[sc], d) } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 0a85986f6c14bb..29e681e26b6c7a 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2221,6 +2221,12 @@ func schedule() { execute(_g_.m.lockedg.ptr(), false) // Never returns. } + // We should not schedule away from a g that is executing a cgo call, + // since the cgo call is using the m's g0 stack. + if _g_.m.incgo { + throw("schedule: in cgo") + } + top: if sched.gcwaiting != 0 { gcstopm() diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 174a73bdb3682f..27b1e378035e2c 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -675,7 +675,8 @@ func extendRandom(r []byte, n int) { } } -// deferred subroutine calls +// A _defer holds an entry on the list of deferred calls. +// If you add a field here, add code to clear it in freedefer. type _defer struct { siz int32 started bool diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index ca8ea8b04fd60a..f170bc3f8f9a8b 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -93,6 +93,8 @@ const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 //go:linkname syscall_loadsystemlibrary syscall.loadsystemlibrary //go:nosplit func syscall_loadsystemlibrary(filename *uint16) (handle, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall if useLoadLibraryEx { @@ -126,6 +128,8 @@ func syscall_loadsystemlibrary(filename *uint16) (handle, err uintptr) { //go:linkname syscall_loadlibrary syscall.loadlibrary //go:nosplit func syscall_loadlibrary(filename *uint16) (handle, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = getLoadLibrary() c.n = 1 @@ -141,6 +145,8 @@ func syscall_loadlibrary(filename *uint16) (handle, err uintptr) { //go:linkname syscall_getprocaddress syscall.getprocaddress //go:nosplit func syscall_getprocaddress(handle uintptr, procname *byte) (outhandle, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = getGetProcAddress() c.n = 2 @@ -156,6 +162,8 @@ func syscall_getprocaddress(handle uintptr, procname *byte) (outhandle, err uint //go:linkname syscall_Syscall syscall.Syscall //go:nosplit func syscall_Syscall(fn, nargs, a1, a2, a3 uintptr) (r1, r2, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = fn c.n = nargs @@ -167,6 +175,8 @@ func syscall_Syscall(fn, nargs, a1, a2, a3 uintptr) (r1, r2, err uintptr) { //go:linkname syscall_Syscall6 syscall.Syscall6 //go:nosplit func syscall_Syscall6(fn, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = fn c.n = nargs @@ -178,6 +188,8 @@ func syscall_Syscall6(fn, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err ui //go:linkname syscall_Syscall9 syscall.Syscall9 //go:nosplit func syscall_Syscall9(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = fn c.n = nargs @@ -189,6 +201,8 @@ func syscall_Syscall9(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1 //go:linkname syscall_Syscall12 syscall.Syscall12 //go:nosplit func syscall_Syscall12(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12 uintptr) (r1, r2, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = fn c.n = nargs @@ -200,6 +214,8 @@ func syscall_Syscall12(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, //go:linkname syscall_Syscall15 syscall.Syscall15 //go:nosplit func syscall_Syscall15(fn, nargs, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 uintptr) (r1, r2, err uintptr) { + lockOSThread() + defer unlockOSThread() c := &getg().m.syscall c.fn = fn c.n = nargs