From a15818fed3032d43e57c395c3fe8b46b37a5f1ea Mon Sep 17 00:00:00 2001 From: Rick Hudson Date: Thu, 5 Feb 2015 19:58:18 -0500 Subject: [PATCH] runtime: cache workbufs on Ms and add consistency checks Add local workbufs to the m struct in order to reduce contention. Add consistency checks for workbuf ownership. Chain workbufs through call change to avoid swapping them to and from the m struct. Adjust the size of the workbuf so that the mutators can more frequently pass modifications to the GC thus shifting some work from the STW mark termination phase to the concurrent mark phase. Change-Id: I557b53af34ad9972265e0ed9f5996e52d548563d Reviewed-on: https://go-review.googlesource.com/3972 Reviewed-by: Austin Clements --- src/runtime/malloc.go | 2 + src/runtime/mgc.go | 454 ++++++++++++++++++++++++++++++---------- src/runtime/runtime2.go | 1 + 3 files changed, 348 insertions(+), 109 deletions(-) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index b8b1f4ed36b52..06ba124473a80 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -312,6 +312,7 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { // For now this must be bracketed with a stoptheworld and a starttheworld to ensure // all go routines see the new barrier. +//go:nowritebarrier func gcinstallmarkwb() { gcphase = _GCmark } @@ -389,6 +390,7 @@ func gcwork(force int32) { gctimer.cycle.installmarkwb = nanotime() systemstack(stoptheworld) systemstack(gcinstallmarkwb) + systemstack(harvestwbufs) systemstack(starttheworld) gctimer.cycle.mark = nanotime() systemstack(gcmark_m) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 75b1e5291616a..0a237ff3b1ced 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -119,11 +119,11 @@ package runtime import "unsafe" const ( + _Debugwbufs = true // if true check wbufs consistency _DebugGC = 0 _DebugGCPtrs = false // if true, print trace of every pointer load during GC _ConcurrentSweep = true - - _WorkbufSize = 4 * 256 + _WorkbufSize = 1 * 256 // in bytes - if small wbufs are passed to GC in a timely fashion. _FinBlockSize = 4 * 1024 _RootData = 0 _RootBss = 1 @@ -154,10 +154,18 @@ var gcpercent int32 // var worldsema uint32 = 1 +type workbufhdr struct { + node lfnode // must be first + nobj uintptr + id uintptr + inuse bool // This workbuf is in use by some gorotuine and is not on the work.empty/partial/full queues. + log [4]uintptr // line numbers forming a history of ownership changes to workbuf +} + type workbuf struct { - node lfnode // must be first - nobj uintptr - obj [(_WorkbufSize - unsafe.Sizeof(lfnode{}) - ptrSize) / ptrSize]uintptr + workbufhdr + // account for the above fields + obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / ptrSize]uintptr } var data, edata, bss, ebss, gcdata, gcbss struct{} @@ -347,7 +355,8 @@ func greyobject(obj, base, off uintptr, hbits heapBits, wbuf *workbuf) *workbuf // If workbuf is full, obtain an empty one. if wbuf.nobj >= uintptr(len(wbuf.obj)) { - wbuf = getempty(wbuf) + putfull(wbuf, 358) + wbuf = getempty(359) } wbuf.obj[wbuf.nobj] = obj @@ -426,13 +435,20 @@ func scanobject(b, n uintptr, ptrmask *uint8, wbuf *workbuf) *workbuf { return wbuf } +// checknocurrentwbuf checks that the m's currentwbuf field is empty +func checknocurrentwbuf() { + if getg().m.currentwbuf != 0 { + throw("unexpected currentwbuf") + } +} + // scanblock starts by scanning b as scanobject would. // If the gcphase is GCscan, that's all scanblock does. // Otherwise it traverses some fraction of the pointers it found in b, recursively. // As a special case, scanblock(nil, 0, nil) means to scan previously queued work, // stopping only when no work is left in the system. //go:nowritebarrier -func scanblock(b0, n0 uintptr, ptrmask *uint8) { +func scanblock(b0, n0 uintptr, ptrmask *uint8, wbuf *workbuf) *workbuf { // Use local copies of original parameters, so that a stack trace // due to one of the throws below shows the original block // base and extent. @@ -443,7 +459,9 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) { // 1. nil - obtain pointer mask from GC bitmap. // 2. pointer to a compact mask (for stacks and data). - wbuf := getpartialorempty() + if wbuf == nil { + wbuf = getpartialorempty(460) // no wbuf passed in. + } if b != 0 { wbuf = scanobject(b, n, ptrmask, wbuf) if gcphase == _GCscan { @@ -451,40 +469,40 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) { // b is in heap, we are in GCscan so there should be a ptrmask. throw("scanblock: In GCscan phase and inheap is true.") } - // GCscan only goes one level deep since mark wb not turned on. - putpartial(wbuf) - return + return wbuf } } drainallwbufs := b == 0 drainworkbuf(wbuf, drainallwbufs) + checknocurrentwbuf() + return nil } -// Scan objects in wbuf until wbuf is empty. +// Scan objects in wbuf until wbuf is empty (and on empty queue) or +// lets scanobject put partially emptied wbuf on partial queue. +// In any case there is no workbuf to return. // If drainallwbufs is true find all other available workbufs and repeat the process. //go:nowritebarrier func drainworkbuf(wbuf *workbuf, drainallwbufs bool) { + checknocurrentwbuf() if gcphase != _GCmark && gcphase != _GCmarktermination { - println("gcphase", gcphase) - throw("scanblock phase") + throw("scanblock phase incorrect") } for { if wbuf.nobj == 0 { + putempty(wbuf, 496) if !drainallwbufs { - putempty(wbuf) + checknocurrentwbuf() return } // Refill workbuf from global queue. - wbuf = getfull(wbuf) + wbuf = getfull(504) if wbuf == nil { // nil means out of work barrier reached return } - - if wbuf.nobj <= 0 { - throw("runtime:scanblock getfull returns empty buffer") - } + wbuf.checknonempty() } // If another proc wants a pointer, give it some. @@ -493,52 +511,59 @@ func drainworkbuf(wbuf *workbuf, drainallwbufs bool) { } // This might be a good place to add prefetch code... - // if(wbuf->nobj > 4) { - // PREFETCH(wbuf->obj[wbuf->nobj - 3]; + // if(wbuf.nobj > 4) { + // PREFETCH(wbuf->obj[wbuf.nobj - 3]; // } wbuf.nobj-- b := wbuf.obj[wbuf.nobj] + // If the current wbuf is filled by the scan a new wbuf might be + // returned that could possibly hold only a single object. This + // could result in each iteration draining only a single object + // out of the wbuf passed in + a single object placed + // into an empty wbuf in scanobject so there could be + // a performance hit as we keep fetching fresh wbufs. wbuf = scanobject(b, 0, nil, wbuf) } } // Scan count objects starting with those in wbuf. //go:nowritebarrier -func drainobjects(wbuf *workbuf, count uintptr) { +func drainobjects(wbuf *workbuf, count uintptr) *workbuf { + checknocurrentwbuf() for i := uintptr(0); i < count; i++ { if wbuf.nobj == 0 { - putempty(wbuf) - wbuf = trygetfull() + putempty(wbuf, 544) + wbuf = trygetfull(545) if wbuf == nil { - return + return nil } } // This might be a good place to add prefetch code... - // if(wbuf->nobj > 4) { - // PREFETCH(wbuf->obj[wbuf->nobj - 3]; + // if(wbuf.nobj > 4) { + // PREFETCH(wbuf->obj[wbuf.nobj - 3]; // } wbuf.nobj-- b := wbuf.obj[wbuf.nobj] wbuf = scanobject(b, 0, nil, wbuf) } - putpartial(wbuf) - return + return wbuf } //go:nowritebarrier func markroot(desc *parfor, i uint32) { // Note: if you add a case here, please also update heapdump.c:dumproots. + wbuf := (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, 0))) switch i { case _RootData: - scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata) + wbuf = scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata, wbuf) case _RootBss: - scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata) + wbuf = scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata, wbuf) case _RootFinalizers: for fb := allfin; fb != nil; fb = fb.alllink { - scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0]) + wbuf = scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], wbuf) } case _RootSpans: @@ -564,9 +589,9 @@ func markroot(desc *parfor, i uint32) { // A finalizer can be set for an inner byte of an object, find object beginning. p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize if gcphase != _GCscan { - scanblock(p, s.elemsize, nil) // scanned during mark phase + wbuf = scanblock(p, s.elemsize, nil, wbuf) // scanned during mark phase } - scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0]) + wbuf = scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0], wbuf) } } @@ -626,81 +651,196 @@ func markroot(desc *parfor, i uint32) { restartg(gp) } } + if wbuf == nil { + return + } else { + putpartial(wbuf, 670) + } } -// Get an empty work buffer off the work.empty list, -// allocating new buffers as needed. -//go:nowritebarrier -func getempty(b *workbuf) *workbuf { - if b != nil { - putfull(b) - b = nil +// workbuf factory routines. These funcs are used to manage the +// workbufs. They cache workbuf in the m struct field currentwbuf. +// If the GC asks for some work these are the only routines that +// make partially full wbufs available to the GC. +// Each of the gets and puts also take an distinct integer that is used +// to record a brief history of changes to ownership of the workbuf. +// The convention is to use a unique line number but any encoding +// is permissible. For example if you want to pass in 2 bits of information +// you could simple add lineno1*100000+lineno2. + +// logget records the past few values of entry to aid in debugging. +// logget checks the buffer b is not currently in use. +func (b *workbuf) logget(entry uintptr) { + if !_Debugwbufs { + return + } + if b.inuse { + println("runtime: logget fails log entry=", entry, + "b.log[0]=", b.log[0], "b.log[1]=", b.log[1], + "b.log[2]=", b.log[2], "b.log[3]=", b.log[3]) + throw("logget: get not legal") } + b.inuse = true + copy(b.log[1:], b.log[:]) + b.log[0] = entry +} + +// logput records the past few values of entry to aid in debugging. +// logput checks the buffer b is currently in use. +func (b *workbuf) logput(entry uintptr) { + if !_Debugwbufs { + return + } + if !b.inuse { + println("runtime:logput fails log entry=", entry, + "b.log[0]=", b.log[0], "b.log[1]=", b.log[1], + "b.log[2]=", b.log[2], "b.log[3]=", b.log[3]) + throw("logput: put not legal") + } + b.inuse = false + copy(b.log[1:], b.log[:]) + b.log[0] = entry +} + +func (b *workbuf) checknonempty() { + if b.nobj == 0 { + println("runtime: nonempty check fails", + "b.log[0]=", b.log[0], "b.log[1]=", b.log[1], + "b.log[2]=", b.log[2], "b.log[3]=", b.log[3]) + throw("workbuf is empty") + } +} + +func (b *workbuf) checkempty() { + if b.nobj != 0 { + println("runtime: empty check fails", + "b.log[0]=", b.log[0], "b.log[1]=", b.log[1], + "b.log[2]=", b.log[2], "b.log[3]=", b.log[3]) + throw("workbuf is not empty") + } +} + +// getempty pops an empty work buffer off the work.empty list, +// allocating new buffers if none are available. +// entry is used to record a brief history of ownership. +//go:nowritebarrier +func getempty(entry uintptr) *workbuf { + var b *workbuf if work.empty != 0 { b = (*workbuf)(lfstackpop(&work.empty)) - } - if b != nil && b.nobj != 0 { - _g_ := getg() - print("m", _g_.m.id, ": getempty: popped b=", b, " with non-zero b.nobj=", b.nobj, "\n") - throw("getempty: workbuffer not empty, b->nobj not 0") + if b != nil { + b.checkempty() + } } if b == nil { b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), _CacheLineSize, &memstats.gc_sys)) - b.nobj = 0 } + b.logget(entry) return b } +// putempty puts a workbuf onto the work.empty list. +// Upon entry this go routine owns b. The lfstackpush relinquishes ownership. //go:nowritebarrier -func putempty(b *workbuf) { - if b.nobj != 0 { - throw("putempty: b->nobj not 0") - } +func putempty(b *workbuf, entry uintptr) { + b.checkempty() + b.logput(entry) lfstackpush(&work.empty, &b.node) } +// putfull puts the workbuf on the work.full list for the GC. +// putfull accepts partially full buffers so the GC can avoid competing +// with the mutators for ownership of partially full buffers. //go:nowritebarrier -func putfull(b *workbuf) { - if b.nobj <= 0 { - throw("putfull: b->nobj <= 0") - } +func putfull(b *workbuf, entry uintptr) { + b.checknonempty() + b.logput(entry) lfstackpush(&work.full, &b.node) } -// Get an partially empty work buffer -// if none are available get an empty one. +// getpartialorempty tries to return a partially empty +// and if none are available returns an empty one. +// entry is used to provide a brief histoy of ownership +// using entry + xxx00000 to +// indicating that two line numbers in the call chain. //go:nowritebarrier -func getpartialorempty() *workbuf { - b := (*workbuf)(lfstackpop(&work.partial)) - if b == nil { - b = getempty(nil) +func getpartialorempty(entry uintptr) *workbuf { + var b *workbuf + // If this m has a buf in currentwbuf then as an optimization + // simply return that buffer. If it turns out currentwbuf + // is full, put it on the work.full queue and get another + // workbuf off the partial or empty queue. + if getg().m.currentwbuf != 0 { + b = (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, 0))) + if b != nil { + if b.nobj <= uintptr(len(b.obj)) { + return b + } + putfull(b, entry+80100000) + } + } + b = (*workbuf)(lfstackpop(&work.partial)) + if b != nil { + b.logget(entry) + return b } + // Let getempty do the logget check but + // use the entry to encode that it passed + // through this routine. + b = getempty(entry + 80700000) return b } +// putpartial puts empty buffers on the work.empty queue, +// full buffers on the work.full queue and +// others on the work.partial queue. +// entry is used to provide a brief histoy of ownership +// using entry + xxx00000 to +// indicating that two call chain line numbers. //go:nowritebarrier -func putpartial(b *workbuf) { +func putpartial(b *workbuf, entry uintptr) { if b.nobj == 0 { - lfstackpush(&work.empty, &b.node) + putempty(b, entry+81500000) } else if b.nobj < uintptr(len(b.obj)) { + b.logput(entry) lfstackpush(&work.partial, &b.node) } else if b.nobj == uintptr(len(b.obj)) { + b.logput(entry) lfstackpush(&work.full, &b.node) } else { - print("b=", b, " b.nobj=", b.nobj, " len(b.obj)=", len(b.obj), "\n") throw("putpartial: bad Workbuf b.nobj") } } // trygetfull tries to get a full or partially empty workbuffer. -// if one is not immediately available return nil +// If one is not immediately available return nil //go:nowritebarrier -func trygetfull() *workbuf { - wbuf := (*workbuf)(lfstackpop(&work.full)) - if wbuf == nil { - wbuf = (*workbuf)(lfstackpop(&work.partial)) +func trygetfull(entry uintptr) *workbuf { + b := (*workbuf)(lfstackpop(&work.full)) + if b == nil { + b = (*workbuf)(lfstackpop(&work.partial)) } - return wbuf + if b != nil { + b.logget(entry) + b.checknonempty() + return b + } + // full and partial are both empty so see if there + // is an work available on currentwbuf. + // This is an optimization to shift + // processing from the STW marktermination phase into + // the concurrent mark phase. + if getg().m.currentwbuf != 0 { + b = (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, 0))) + if b != nil { + if b.nobj != 0 { + return b + } + putempty(b, 839) + b = nil + } + } + return b } // Get a full work buffer off the work.full or a partially @@ -716,18 +856,31 @@ func trygetfull() *workbuf { // This is in fact the termination condition for the STW mark // phase. //go:nowritebarrier -func getfull(b *workbuf) *workbuf { +func getfull(entry uintptr) *workbuf { + b := (*workbuf)(lfstackpop(&work.full)) if b != nil { - putempty(b) - } - - b = (*workbuf)(lfstackpop(&work.full)) - if b == nil { - b = (*workbuf)(lfstackpop(&work.partial)) + b.logget(entry) + b.checknonempty() + return b } + b = (*workbuf)(lfstackpop(&work.partial)) if b != nil { + b.logget(entry) return b } + // Make sure that currentwbuf is also not a source for pointers to be + // processed. This is an optimization that shifts processing + // from the mark termination STW phase to the concurrent mark phase. + if getg().m.currentwbuf != 0 { + b = (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, 0))) + if b != nil { + if b.nobj != 0 { + return b + } + putempty(b, 877) + b = nil + } + } xadd(&work.nwait, +1) for i := 0; ; i++ { @@ -738,6 +891,8 @@ func getfull(b *workbuf) *workbuf { b = (*workbuf)(lfstackpop(&work.partial)) } if b != nil { + b.logget(entry) + b.checknonempty() return b } xadd(&work.nwait, +1) @@ -762,7 +917,7 @@ func getfull(b *workbuf) *workbuf { //go:nowritebarrier func handoff(b *workbuf) *workbuf { // Make new buffer with half of b's pointers. - b1 := getempty(nil) + b1 := getempty(915) n := b.nobj / 2 b.nobj -= n b1.nobj = n @@ -772,10 +927,42 @@ func handoff(b *workbuf) *workbuf { _g_.m.gcstats.nhandoffcnt += uint64(n) // Put b on full list - let first half of b get stolen. - lfstackpush(&work.full, &b.node) + putfull(b, 942) return b1 } +// 1 when you are harvesting so that the write buffer code shade can +// detect calls during a presumable STW write barrier. +var harvestingwbufs uint32 + +// harvestwbufs moves non-empty workbufs to work.full from m.currentwuf +// Must be in a STW phase. +// xchguintptr is used since there are write barrier calls from the GC helper +// routines even during a STW phase. +// TODO: chase down write barrier calls in STW phase and understand and eliminate +// them. +//go:nowritebarrier +func harvestwbufs() { + // announce to write buffer that you are harvesting the currentwbufs + atomicstore(&harvestingwbufs, 1) + + for mp := allm; mp != nil; mp = mp.alllink { + wbuf := (*workbuf)(unsafe.Pointer(xchguintptr(&mp.currentwbuf, 0))) + // TODO: beat write barriers out of the mark termination and eliminate xchg + // tempwbuf := (*workbuf)(unsafe.Pointer(tempm.currentwbuf)) + // tempm.currentwbuf = 0 + if wbuf != nil { + if wbuf.nobj == 0 { + putempty(wbuf, 945) + } else { + putfull(wbuf, 947) //use full instead of partial so GC doesn't compete to get wbuf + } + } + } + + atomicstore(&harvestingwbufs, 0) +} + //go:nowritebarrier func stackmapdata(stkmap *stackmap, n int32) bitvector { if n < 0 || n >= stkmap.n { @@ -786,13 +973,13 @@ func stackmapdata(stkmap *stackmap, n int32) bitvector { // Scan a stack frame: local variables and function arguments/results. //go:nowritebarrier -func scanframe(frame *stkframe, unused unsafe.Pointer) bool { +func scanframeworker(frame *stkframe, unused unsafe.Pointer, wbuf *workbuf) *workbuf { f := frame.fn targetpc := frame.continpc if targetpc == 0 { // Frame is dead. - return true + return wbuf } if _DebugGC > 1 { print("scanframe ", funcname(f), "\n") @@ -831,7 +1018,7 @@ func scanframe(frame *stkframe, unused unsafe.Pointer) bool { } bv := stackmapdata(stkmap, pcdata) size = (uintptr(bv.n) / typeBitsWidth) * ptrSize - scanblock(frame.varp-size, size, bv.bytedata) + wbuf = scanblock(frame.varp-size, size, bv.bytedata, wbuf) } // Scan arguments. @@ -852,9 +1039,9 @@ func scanframe(frame *stkframe, unused unsafe.Pointer) bool { } bv = stackmapdata(stkmap, pcdata) } - scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata) + wbuf = scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata, wbuf) } - return true + return wbuf } //go:nowritebarrier @@ -889,8 +1076,19 @@ func scanstack(gp *g) { throw("can't scan gchelper stack") } + wbuf := (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, 0))) + scanframe := func(frame *stkframe, unused unsafe.Pointer) bool { + // Pick up wbuf as free variable so gentraceback and friends can + // keep the same signature. + wbuf = scanframeworker(frame, unused, wbuf) + return true + } gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0) tracebackdefers(gp, scanframe, nil) + wbuf = (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, uintptr(unsafe.Pointer(wbuf))))) + if wbuf != nil { + throw("wbuf not nil after stack scans") + } gp.gcscanvalid = true } @@ -898,17 +1096,34 @@ func scanstack(gp *g) { // The object is not nil and known to be in the heap. //go:nowritebarrier func shade(b uintptr) { + var wbuf *workbuf + if !inheap(b) { throw("shade: passed an address not in the heap") } - - wbuf := getpartialorempty() - if obj, hbits := heapBitsForObject(b); obj != 0 { - wbuf = greyobject(obj, 0, 0, hbits, wbuf) + // TODO: this would be a great place to put a check to see + // if we are harvesting and if we are then we should + // figure out why there is a call to shade when the + // harvester thinks we are in a STW. + // if atomicload(&harvestingwbufs) == uint32(1) { + // // Throw here to discover write barriers + // // being executed during a STW. + // } + + wbuf = getpartialorempty(1181) + wbuf := greyobject(obj, 0, 0, hbits, wbuf) + checknocurrentwbuf() + // This is part of the write barrier so put the wbuf back. + if gcphase == _GCmarktermination { + putpartial(wbuf, 1191) // Put on full??? + } else { + wbuf = (*workbuf)(unsafe.Pointer(xchguintptr(&getg().m.currentwbuf, uintptr(unsafe.Pointer(wbuf))))) + if wbuf != nil { + throw("m.currentwbuf lost in shade") + } + } } - - putpartial(wbuf) } // gchelpwork does a small bounded amount of gc work. The purpose is to @@ -931,10 +1146,24 @@ func gchelpwork() { // scanstack(gp) case _GCmark: // Get a full work buffer and empty it. - var wbuf *workbuf - wbuf = trygetfull() + m := getg().m + // drain your own currentwbuf first in the hopes that it will + // be more cache friendly. + wbuf := (*workbuf)(unsafe.Pointer(xchguintptr(&m.currentwbuf, 0))) + // wbuf := (*workbuf)(unsafe.Pointer(m.currentwbuf)) + // m.currentwbuf = 0 + if wbuf == nil { + wbuf = trygetfull(1228) + } if wbuf != nil { - drainobjects(wbuf, uintptr(len(wbuf.obj))) // drain upto one buffer's worth of objects + wbuf = drainobjects(wbuf, uintptr(len(wbuf.obj))) // drain upto one buffer's worth of objects + if wbuf != nil { + if wbuf.nobj != 0 { + putfull(wbuf, 1175) + } else { + putempty(wbuf, 1177) + } + } } case _GCmarktermination: // We should never be here since the world is stopped. @@ -1249,7 +1478,10 @@ func gchelper() { // parallel mark for over GC roots parfordo(work.markfor) if gcphase != _GCscan { - scanblock(0, 0, nil) // blocks in getfull + wbuf := scanblock(0, 0, nil, nil) // blocks in getfull + if wbuf != nil { + throw("gchelper did not extinguish wbuf") + } } if trace.enabled { @@ -1380,7 +1612,7 @@ var heapminimum = uint64(4 << 20) func gcinit() { if unsafe.Sizeof(workbuf{}) != _WorkbufSize { - throw("runtime: size of Workbuf is suboptimal") + throw("size of Workbuf is suboptimal") } work.markfor = parforalloc(_MaxGcproc) @@ -1508,16 +1740,16 @@ func gcscan_m() { } // Mark all objects that are known about. +// This is the concurrent mark phase. //go:nowritebarrier func gcmark_m() { - scanblock(0, 0, nil) -} - -// For now this must be bracketed with a stoptheworld and a starttheworld to ensure -// all go routines see the new barrier. -//go:nowritebarrier -func gcinstallmarkwb_m() { - gcphase = _GCmark + // If one is available grab this M's workbuffer. + wbuf := scanblock(0, 0, nil, nil) + if wbuf != nil { + throw("gcmark_m did not extinguish wbuf") + } + // TODO add another harvestwbuf and reset work.nwait=0, work.ndone=0, and work.nproc=1 + // and repeat the above scanblock. } // For now this must be bracketed with a stoptheworld and a starttheworld to ensure @@ -1527,12 +1759,14 @@ func gcinstalloffwb_m() { gcphase = _GCoff } +// STW is in effect at this point. //TODO go:nowritebarrier func gc(start_time int64, eagersweep bool) { if _DebugGCPtrs { print("GC start\n") } + gcphase = _GCmarktermination if debug.allocfreetrace > 0 { tracegc() } @@ -1571,12 +1805,10 @@ func gc(start_time int64, eagersweep bool) { mheap_.gcspans = mheap_.allspans work.spans = h_allspans unlock(&mheap_.lock) - oldphase := gcphase work.nwait = 0 work.ndone = 0 work.nproc = uint32(gcprocs()) - gcphase = _GCmarktermination // World is stopped so allglen will not change. for i := uintptr(0); i < allglen; i++ { @@ -1599,9 +1831,13 @@ func gc(start_time int64, eagersweep bool) { t2 = nanotime() } + harvestwbufs() // move local workbufs onto global queues where the GC can find them gchelperstart() parfordo(work.markfor) - scanblock(0, 0, nil) + wbuf := scanblock(0, 0, nil, nil) + if wbuf != nil { + throw("gc does not extinguish wbuf") + } if work.full != 0 { throw("work.full != 0") @@ -1610,7 +1846,7 @@ func gc(start_time int64, eagersweep bool) { throw("work.partial != 0") } - gcphase = oldphase + gcphase = _GCoff var t3 int64 if debug.gctrace > 0 { t3 = nanotime() @@ -1755,7 +1991,7 @@ func readGCStats_m(pauses *[]uint64) { p := *pauses // Calling code in runtime/debug should make the slice large enough. if cap(p) < len(memstats.pause_ns)+3 { - throw("runtime: short slice passed to readGCStats") + throw("short slice passed to readGCStats") } // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns. diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index fab2ccbebf00a..ca3e7d564e7ef 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -274,6 +274,7 @@ type m struct { waitsemacount uint32 waitsemalock uint32 gcstats gcstats + currentwbuf uintptr // use locks or atomic operations such as xchguinptr to access. needextram bool traceback uint8 waitunlockf unsafe.Pointer // todo go func(*g, unsafe.pointer) bool