Skip to content

Commit f8e0057

Browse files
committed
sync: scalable Pool
Introduce fixed-size P-local caches. When local caches overflow/underflow a batch of items is transferred to/from global mutex-protected cache. benchmark old ns/op new ns/op delta BenchmarkPool 50554 22423 -55.65% BenchmarkPool-4 400359 5904 -98.53% BenchmarkPool-16 403311 1598 -99.60% BenchmarkPool-32 367310 1526 -99.58% BenchmarkPoolOverlflow 5214 3633 -30.32% BenchmarkPoolOverlflow-4 42663 9539 -77.64% BenchmarkPoolOverlflow-8 46919 11385 -75.73% BenchmarkPoolOverlflow-16 39454 13048 -66.93% BenchmarkSprintfEmpty 84 63 -25.68% BenchmarkSprintfEmpty-2 371 32 -91.13% BenchmarkSprintfEmpty-4 465 22 -95.25% BenchmarkSprintfEmpty-8 565 12 -97.77% BenchmarkSprintfEmpty-16 498 5 -98.87% BenchmarkSprintfEmpty-32 492 4 -99.04% BenchmarkSprintfString 259 229 -11.58% BenchmarkSprintfString-2 574 144 -74.91% BenchmarkSprintfString-4 651 77 -88.05% BenchmarkSprintfString-8 868 47 -94.48% BenchmarkSprintfString-16 825 33 -95.96% BenchmarkSprintfString-32 825 30 -96.28% BenchmarkSprintfInt 213 188 -11.74% BenchmarkSprintfInt-2 448 138 -69.20% BenchmarkSprintfInt-4 624 52 -91.63% BenchmarkSprintfInt-8 691 31 -95.43% BenchmarkSprintfInt-16 724 18 -97.46% BenchmarkSprintfInt-32 718 16 -97.70% BenchmarkSprintfIntInt 311 282 -9.32% BenchmarkSprintfIntInt-2 333 145 -56.46% BenchmarkSprintfIntInt-4 642 110 -82.87% BenchmarkSprintfIntInt-8 832 42 -94.90% BenchmarkSprintfIntInt-16 817 24 -97.00% BenchmarkSprintfIntInt-32 805 22 -97.17% BenchmarkSprintfPrefixedInt 309 269 -12.94% BenchmarkSprintfPrefixedInt-2 245 168 -31.43% BenchmarkSprintfPrefixedInt-4 598 99 -83.36% BenchmarkSprintfPrefixedInt-8 770 67 -91.23% BenchmarkSprintfPrefixedInt-16 829 54 -93.49% BenchmarkSprintfPrefixedInt-32 824 50 -93.83% BenchmarkSprintfFloat 418 398 -4.78% BenchmarkSprintfFloat-2 295 203 -31.19% BenchmarkSprintfFloat-4 585 128 -78.12% BenchmarkSprintfFloat-8 873 60 -93.13% BenchmarkSprintfFloat-16 884 33 -96.24% BenchmarkSprintfFloat-32 881 29 -96.62% BenchmarkManyArgs 1097 1069 -2.55% BenchmarkManyArgs-2 705 567 -19.57% BenchmarkManyArgs-4 792 319 -59.72% BenchmarkManyArgs-8 963 172 -82.14% BenchmarkManyArgs-16 1115 103 -90.76% BenchmarkManyArgs-32 1133 90 -92.03% LGTM=rsc R=golang-codereviews, bradfitz, minux.ma, gobot, rsc CC=golang-codereviews https://golang.org/cl/46010043
1 parent 9fa9613 commit f8e0057

File tree

5 files changed

+207
-36
lines changed

5 files changed

+207
-36
lines changed

src/pkg/go/build/deps_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ var pkgDeps = map[string][]string{
2929
"errors": {},
3030
"io": {"errors", "sync"},
3131
"runtime": {"unsafe"},
32-
"sync": {"sync/atomic", "unsafe"},
32+
"sync": {"runtime", "sync/atomic", "unsafe"},
3333
"sync/atomic": {"unsafe"},
3434
"unsafe": {},
3535

src/pkg/runtime/mgc0.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,19 @@ clearpools(void)
6868
{
6969
void **pool, **next;
7070
P *p, **pp;
71+
uintptr off;
7172
int32 i;
7273

7374
// clear sync.Pool's
7475
for(pool = pools.head; pool != nil; pool = next) {
7576
next = pool[0];
7677
pool[0] = nil; // next
77-
pool[1] = nil; // slice
78-
pool[2] = nil;
79-
pool[3] = nil;
78+
pool[1] = nil; // local
79+
pool[2] = nil; // localSize
80+
off = (uintptr)pool[3] / sizeof(void*);
81+
pool[off+0] = nil; // global slice
82+
pool[off+1] = nil;
83+
pool[off+2] = nil;
8084
}
8185
pools.head = nil;
8286

src/pkg/runtime/proc.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3046,3 +3046,23 @@ haveexperiment(int8 *name)
30463046
}
30473047
return 0;
30483048
}
3049+
3050+
// func runtime_procPin() int
3051+
void
3052+
sync·runtime_procPin(intgo p)
3053+
{
3054+
M *mp;
3055+
3056+
mp = m;
3057+
// Disable preemption.
3058+
mp->locks++;
3059+
p = mp->p->id;
3060+
FLUSH(&p);
3061+
}
3062+
3063+
// func runtime_procUnpin()
3064+
void
3065+
sync·runtime_procUnpin(void)
3066+
{
3067+
m->locks--;
3068+
}

src/pkg/sync/pool.go

Lines changed: 150 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,18 @@
44

55
package sync
66

7+
import (
8+
"runtime"
9+
"sync/atomic"
10+
"unsafe"
11+
)
12+
13+
const (
14+
cacheLineSize = 128
15+
poolLocalSize = 2 * cacheLineSize
16+
poolLocalCap = poolLocalSize/unsafe.Sizeof(*(*interface{})(nil)) - 1
17+
)
18+
719
// A Pool is a set of temporary objects that may be individually saved
820
// and retrieved.
921
//
@@ -26,29 +38,52 @@ package sync
2638
//
2739
// This is an experimental type and might not be released.
2840
type Pool struct {
29-
next *Pool // for use by runtime. must be first.
30-
list []interface{} // offset known to runtime
31-
mu Mutex // guards list
41+
// The following fields are known to runtime.
42+
next *Pool // for use by runtime
43+
local *poolLocal // local fixed-size per-P pool, actually an array
44+
localSize uintptr // size of the local array
45+
globalOffset uintptr // offset of global
46+
// The rest is not known to runtime.
3247

3348
// New optionally specifies a function to generate
3449
// a value when Get would otherwise return nil.
3550
// It may not be changed concurrently with calls to Get.
3651
New func() interface{}
52+
53+
pad [cacheLineSize]byte
54+
// Read-mostly date above this point, mutable data follows.
55+
mu Mutex
56+
global []interface{} // global fallback pool
3757
}
3858

39-
func runtime_registerPool(*Pool)
59+
// Local per-P Pool appendix.
60+
type poolLocal struct {
61+
tail int
62+
unused int
63+
buf [poolLocalCap]interface{}
64+
}
65+
66+
func init() {
67+
var v poolLocal
68+
if unsafe.Sizeof(v) != poolLocalSize {
69+
panic("sync: incorrect pool size")
70+
}
71+
}
4072

4173
// Put adds x to the pool.
4274
func (p *Pool) Put(x interface{}) {
4375
if x == nil {
4476
return
4577
}
46-
p.mu.Lock()
47-
if p.list == nil {
48-
runtime_registerPool(p)
78+
l := p.pin()
79+
t := l.tail
80+
if t < int(poolLocalCap) {
81+
l.buf[t] = x
82+
l.tail = t + 1
83+
runtime_procUnpin()
84+
return
4985
}
50-
p.list = append(p.list, x)
51-
p.mu.Unlock()
86+
p.putSlow(l, x)
5287
}
5388

5489
// Get selects an arbitrary item from the Pool, removes it from the
@@ -60,16 +95,116 @@ func (p *Pool) Put(x interface{}) {
6095
// If Get would otherwise return nil and p.New is non-nil, Get returns
6196
// the result of calling p.New.
6297
func (p *Pool) Get() interface{} {
98+
l := p.pin()
99+
t := l.tail
100+
if t > 0 {
101+
t -= 1
102+
x := l.buf[t]
103+
l.tail = t
104+
runtime_procUnpin()
105+
return x
106+
}
107+
return p.getSlow()
108+
}
109+
110+
func (p *Pool) putSlow(l *poolLocal, x interface{}) {
111+
// Grab half of items from local pool and put to global pool.
112+
// Can not lock the mutex while pinned.
113+
const N = int(poolLocalCap/2 + 1)
114+
var buf [N]interface{}
115+
buf[0] = x
116+
for i := 1; i < N; i++ {
117+
l.tail--
118+
buf[i] = l.buf[l.tail]
119+
}
120+
runtime_procUnpin()
121+
63122
p.mu.Lock()
64-
var x interface{}
65-
if n := len(p.list); n > 0 {
66-
x = p.list[n-1]
67-
p.list[n-1] = nil // Just to be safe
68-
p.list = p.list[:n-1]
123+
p.global = append(p.global, buf[:]...)
124+
p.mu.Unlock()
125+
}
126+
127+
func (p *Pool) getSlow() (x interface{}) {
128+
// Grab a batch of items from global pool and put to local pool.
129+
// Can not lock the mutex while pinned.
130+
runtime_procUnpin()
131+
p.mu.Lock()
132+
pid := runtime_procPin()
133+
s := p.localSize
134+
l := p.local
135+
if uintptr(pid) < s {
136+
l = indexLocal(l, pid)
137+
// Get the item to return.
138+
last := len(p.global) - 1
139+
if last >= 0 {
140+
x = p.global[last]
141+
p.global = p.global[:last]
142+
}
143+
// Try to refill local pool, we may have been rescheduled to another P.
144+
if last > 0 && l.tail == 0 {
145+
n := int(poolLocalCap / 2)
146+
gl := len(p.global)
147+
if n > gl {
148+
n = gl
149+
}
150+
copy(l.buf[:], p.global[gl-n:])
151+
p.global = p.global[:gl-n]
152+
l.tail = n
153+
}
69154
}
155+
runtime_procUnpin()
70156
p.mu.Unlock()
157+
71158
if x == nil && p.New != nil {
72159
x = p.New()
73160
}
74-
return x
161+
return
75162
}
163+
164+
// pin pins current goroutine to P, disables preemption and returns poolLocal pool for the P.
165+
// Caller must call runtime_procUnpin() when done with the pool.
166+
func (p *Pool) pin() *poolLocal {
167+
pid := runtime_procPin()
168+
// In pinSlow we store to localSize and then to local, here we load in opposite order.
169+
// Since we've disabled preemption, GC can not happen in between.
170+
// Thus here we must observe local at least as large localSize.
171+
// We can observe a newer/larger local, it is fine (we must observe its zero-initialized-ness).
172+
s := atomic.LoadUintptr(&p.localSize) // load-acquire
173+
l := p.local // load-consume
174+
if uintptr(pid) < s {
175+
return indexLocal(l, pid)
176+
}
177+
return p.pinSlow()
178+
}
179+
180+
func (p *Pool) pinSlow() *poolLocal {
181+
// Retry under the mutex.
182+
runtime_procUnpin()
183+
p.mu.Lock()
184+
defer p.mu.Unlock()
185+
pid := runtime_procPin()
186+
s := p.localSize
187+
l := p.local
188+
if uintptr(pid) < s {
189+
return indexLocal(l, pid)
190+
}
191+
if p.local == nil {
192+
p.globalOffset = unsafe.Offsetof(p.global)
193+
runtime_registerPool(p)
194+
}
195+
// If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one.
196+
size := runtime.GOMAXPROCS(0)
197+
local := make([]poolLocal, size)
198+
atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&p.local)), unsafe.Pointer(&local[0])) // store-release
199+
atomic.StoreUintptr(&p.localSize, uintptr(size)) // store-release
200+
return &local[pid]
201+
}
202+
203+
func indexLocal(l *poolLocal, i int) *poolLocal {
204+
return (*poolLocal)(unsafe.Pointer(uintptr(unsafe.Pointer(l)) + unsafe.Sizeof(*l)*uintptr(i))) // uh...
205+
}
206+
207+
// Implemented in runtime.
208+
func runtime_registerPool(*Pool)
209+
func runtime_procPin() int
210+
func runtime_procUnpin()

src/pkg/sync/pool_test.go

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"sync/atomic"
1212
"testing"
1313
"time"
14-
"unsafe"
1514
)
1615

1716
func TestPool(t *testing.T) {
@@ -125,28 +124,41 @@ func TestPoolStress(t *testing.T) {
125124
}
126125

127126
func BenchmarkPool(b *testing.B) {
128-
procs := runtime.GOMAXPROCS(-1)
129-
var dec func() bool
130-
if unsafe.Sizeof(b.N) == 8 {
131-
n := int64(b.N)
132-
dec = func() bool {
133-
return atomic.AddInt64(&n, -1) >= 0
134-
}
135-
} else {
136-
n := int32(b.N)
137-
dec = func() bool {
138-
return atomic.AddInt32(&n, -1) >= 0
139-
}
127+
var p Pool
128+
var wg WaitGroup
129+
n0 := uintptr(b.N)
130+
n := n0
131+
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
132+
wg.Add(1)
133+
go func() {
134+
defer wg.Done()
135+
for atomic.AddUintptr(&n, ^uintptr(0)) < n0 {
136+
for b := 0; b < 100; b++ {
137+
p.Put(1)
138+
p.Get()
139+
}
140+
}
141+
}()
140142
}
143+
wg.Wait()
144+
}
145+
146+
func BenchmarkPoolOverlflow(b *testing.B) {
141147
var p Pool
142148
var wg WaitGroup
143-
for i := 0; i < procs; i++ {
149+
n0 := uintptr(b.N)
150+
n := n0
151+
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
144152
wg.Add(1)
145153
go func() {
146154
defer wg.Done()
147-
for dec() {
148-
p.Put(1)
149-
p.Get()
155+
for atomic.AddUintptr(&n, ^uintptr(0)) < n0 {
156+
for b := 0; b < 100; b++ {
157+
p.Put(1)
158+
}
159+
for b := 0; b < 100; b++ {
160+
p.Get()
161+
}
150162
}
151163
}()
152164
}

0 commit comments

Comments
 (0)