Skip to content

Commit

Permalink
runtime: improve Linux mutex
Browse files Browse the repository at this point in the history
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.

On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark                     old ns/op    new ns/op    delta
BenchmarkSelectContended         521.00       503.00   -3.45%
BenchmarkSelectContended-2       661.00       320.00  -51.59%
BenchmarkSelectContended-4      1139.00       629.00  -44.78%
BenchmarkSelectContended-8      2870.00       878.00  -69.41%
BenchmarkSelectContended-16     5276.00       818.00  -84.50%
BenchmarkChanContended           112.00       103.00   -8.04%
BenchmarkChanContended-2         631.00       174.00  -72.42%
BenchmarkChanContended-4         682.00       272.00  -60.12%
BenchmarkChanContended-8        1601.00       520.00  -67.52%
BenchmarkChanContended-16       3100.00       372.00  -88.00%
BenchmarkChanSync                253.00       239.00   -5.53%
BenchmarkChanSync-2             5030.00      4648.00   -7.59%
BenchmarkChanSync-4             4826.00      4694.00   -2.74%
BenchmarkChanSync-8             4778.00      4713.00   -1.36%
BenchmarkChanSync-16            5289.00      4710.00  -10.95%
BenchmarkChanProdCons0           273.00       254.00   -6.96%
BenchmarkChanProdCons0-2         599.00       400.00  -33.22%
BenchmarkChanProdCons0-4        1168.00       659.00  -43.58%
BenchmarkChanProdCons0-8        2831.00      1057.00  -62.66%
BenchmarkChanProdCons0-16       4197.00      1037.00  -75.29%
BenchmarkChanProdCons10          150.00       140.00   -6.67%
BenchmarkChanProdCons10-2        607.00       268.00  -55.85%
BenchmarkChanProdCons10-4       1137.00       404.00  -64.47%
BenchmarkChanProdCons10-8       2115.00       828.00  -60.85%
BenchmarkChanProdCons10-16      4283.00       855.00  -80.04%
BenchmarkChanProdCons100         117.00       110.00   -5.98%
BenchmarkChanProdCons100-2       558.00       218.00  -60.93%
BenchmarkChanProdCons100-4       722.00       287.00  -60.25%
BenchmarkChanProdCons100-8      1840.00       431.00  -76.58%
BenchmarkChanProdCons100-16     3394.00       448.00  -86.80%
BenchmarkChanProdConsWork0      2014.00      1996.00   -0.89%
BenchmarkChanProdConsWork0-2    1207.00      1127.00   -6.63%
BenchmarkChanProdConsWork0-4    1913.00       611.00  -68.06%
BenchmarkChanProdConsWork0-8    3016.00       949.00  -68.53%
BenchmarkChanProdConsWork0-16   4320.00      1154.00  -73.29%
BenchmarkChanProdConsWork10     1906.00      1897.00   -0.47%
BenchmarkChanProdConsWork10-2   1123.00      1033.00   -8.01%
BenchmarkChanProdConsWork10-4   1076.00       571.00  -46.93%
BenchmarkChanProdConsWork10-8   2748.00      1096.00  -60.12%
BenchmarkChanProdConsWork10-16  4600.00      1105.00  -75.98%
BenchmarkChanProdConsWork100    1884.00      1852.00   -1.70%
BenchmarkChanProdConsWork100-2  1235.00      1146.00   -7.21%
BenchmarkChanProdConsWork100-4  1217.00       619.00  -49.14%
BenchmarkChanProdConsWork100-8  1534.00       509.00  -66.82%
BenchmarkChanProdConsWork100-16 4126.00       918.00  -77.75%
BenchmarkSyscall                  34.40        33.30   -3.20%
BenchmarkSyscall-2               160.00       121.00  -24.38%
BenchmarkSyscall-4               131.00       136.00   +3.82%
BenchmarkSyscall-8               139.00       131.00   -5.76%
BenchmarkSyscall-16              161.00       168.00   +4.35%
BenchmarkSyscallWork             950.00       950.00   +0.00%
BenchmarkSyscallWork-2           481.00       480.00   -0.21%
BenchmarkSyscallWork-4           268.00       270.00   +0.75%
BenchmarkSyscallWork-8           156.00       169.00   +8.33%
BenchmarkSyscallWork-16          188.00       184.00   -2.13%
BenchmarkSemaSyntNonblock         36.40        35.60   -2.20%
BenchmarkSemaSyntNonblock-2       81.40        45.10  -44.59%
BenchmarkSemaSyntNonblock-4      126.00       108.00  -14.29%
BenchmarkSemaSyntNonblock-8      112.00       112.00   +0.00%
BenchmarkSemaSyntNonblock-16     110.00       112.00   +1.82%
BenchmarkSemaSyntBlock            35.30        35.30   +0.00%
BenchmarkSemaSyntBlock-2         118.00       124.00   +5.08%
BenchmarkSemaSyntBlock-4         105.00       108.00   +2.86%
BenchmarkSemaSyntBlock-8         101.00       111.00   +9.90%
BenchmarkSemaSyntBlock-16        112.00       118.00   +5.36%
BenchmarkSemaWorkNonblock        810.00       811.00   +0.12%
BenchmarkSemaWorkNonblock-2      476.00       414.00  -13.03%
BenchmarkSemaWorkNonblock-4      238.00       228.00   -4.20%
BenchmarkSemaWorkNonblock-8      140.00       126.00  -10.00%
BenchmarkSemaWorkNonblock-16     117.00       116.00   -0.85%
BenchmarkSemaWorkBlock           810.00       811.00   +0.12%
BenchmarkSemaWorkBlock-2         454.00       466.00   +2.64%
BenchmarkSemaWorkBlock-4         243.00       241.00   -0.82%
BenchmarkSemaWorkBlock-8         145.00       137.00   -5.52%
BenchmarkSemaWorkBlock-16        132.00       123.00   -6.82%
BenchmarkContendedSemaphore      123.00       102.00  -17.07%
BenchmarkContendedSemaphore-2     34.80        34.90   +0.29%
BenchmarkContendedSemaphore-4     34.70        34.80   +0.29%
BenchmarkContendedSemaphore-8     34.70        34.70   +0.00%
BenchmarkContendedSemaphore-16    34.80        34.70   -0.29%
BenchmarkMutex                    26.80        26.00   -2.99%
BenchmarkMutex-2                 108.00        45.20  -58.15%
BenchmarkMutex-4                 103.00       127.00  +23.30%
BenchmarkMutex-8                 109.00       147.00  +34.86%
BenchmarkMutex-16                102.00       152.00  +49.02%
BenchmarkMutexSlack               27.00        26.90   -0.37%
BenchmarkMutexSlack-2            149.00       165.00  +10.74%
BenchmarkMutexSlack-4            121.00       209.00  +72.73%
BenchmarkMutexSlack-8            101.00       158.00  +56.44%
BenchmarkMutexSlack-16            97.00       129.00  +32.99%
BenchmarkMutexWork               792.00       794.00   +0.25%
BenchmarkMutexWork-2             407.00       409.00   +0.49%
BenchmarkMutexWork-4             220.00       209.00   -5.00%
BenchmarkMutexWork-8             267.00       160.00  -40.07%
BenchmarkMutexWork-16            315.00       300.00   -4.76%
BenchmarkMutexWorkSlack          792.00       793.00   +0.13%
BenchmarkMutexWorkSlack-2        406.00       404.00   -0.49%
BenchmarkMutexWorkSlack-4        225.00       212.00   -5.78%
BenchmarkMutexWorkSlack-8        268.00       136.00  -49.25%
BenchmarkMutexWorkSlack-16       300.00       300.00   +0.00%
BenchmarkRWMutexWrite100          27.10        27.00   -0.37%
BenchmarkRWMutexWrite100-2        33.10        40.80  +23.26%
BenchmarkRWMutexWrite100-4       113.00        88.10  -22.04%
BenchmarkRWMutexWrite100-8       119.00        95.30  -19.92%
BenchmarkRWMutexWrite100-16      148.00       109.00  -26.35%
BenchmarkRWMutexWrite10           29.60        29.40   -0.68%
BenchmarkRWMutexWrite10-2        111.00        61.40  -44.68%
BenchmarkRWMutexWrite10-4        270.00       208.00  -22.96%
BenchmarkRWMutexWrite10-8        204.00       185.00   -9.31%
BenchmarkRWMutexWrite10-16       261.00       190.00  -27.20%
BenchmarkRWMutexWorkWrite100    1040.00      1036.00   -0.38%
BenchmarkRWMutexWorkWrite100-2   593.00       580.00   -2.19%
BenchmarkRWMutexWorkWrite100-4   470.00       365.00  -22.34%
BenchmarkRWMutexWorkWrite100-8   468.00       289.00  -38.25%
BenchmarkRWMutexWorkWrite100-16  604.00       374.00  -38.08%
BenchmarkRWMutexWorkWrite10      951.00       951.00   +0.00%
BenchmarkRWMutexWorkWrite10-2   1001.00       928.00   -7.29%
BenchmarkRWMutexWorkWrite10-4   1555.00      1006.00  -35.31%
BenchmarkRWMutexWorkWrite10-8   2085.00      1171.00  -43.84%
BenchmarkRWMutexWorkWrite10-16  2082.00      1614.00  -22.48%

R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
  • Loading branch information
dvyukov authored and rsc committed Jul 29, 2011
1 parent bed7e3e commit 4e5086b
Show file tree
Hide file tree
Showing 18 changed files with 278 additions and 91 deletions.
1 change: 1 addition & 0 deletions src/cmd/6a/lex.c
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ struct
"OUTSB", LTYPE0, AOUTSB,
"OUTSL", LTYPE0, AOUTSL,
"OUTSW", LTYPE0, AOUTSW,
"PAUSE", LTYPEN, APAUSE,
"POPAL", LTYPE0, APOPAL,
"POPAW", LTYPE0, APOPAW,
"POPFL", LTYPE0, APOPFL,
Expand Down
1 change: 1 addition & 0 deletions src/cmd/6l/6.out.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ enum as
AOUTSB,
AOUTSL,
AOUTSW,
APAUSE,
APOPAL,
APOPAW,
APOPFL,
Expand Down
1 change: 1 addition & 0 deletions src/cmd/6l/optab.c
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,7 @@ Optab optab[] =
{ APADDW, ymm, Py, 0xfd,Pe,0xfd },
{ APAND, ymm, Py, 0xdb,Pe,0xdb },
{ APANDN, ymm, Py, 0xdf,Pe,0xdf },
{ APAUSE, ynone, Px, 0xf3,0x90 },
{ APAVGB, ymm, Py, 0xe0,Pe,0xe0 },
{ APAVGW, ymm, Py, 0xe3,Pe,0xe3 },
{ APCMPEQB, ymm, Py, 0x74,Pe,0x74 },
Expand Down
1 change: 1 addition & 0 deletions src/cmd/8a/lex.c
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ struct
"OUTSB", LTYPE0, AOUTSB,
"OUTSL", LTYPE0, AOUTSL,
"OUTSW", LTYPE0, AOUTSW,
"PAUSE", LTYPEN, APAUSE,
"POPAL", LTYPE0, APOPAL,
"POPAW", LTYPE0, APOPAW,
"POPFL", LTYPE0, APOPFL,
Expand Down
1 change: 1 addition & 0 deletions src/cmd/8l/8.out.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ enum as
AOUTSB,
AOUTSL,
AOUTSW,
APAUSE,
APOPAL,
APOPAW,
APOPFL,
Expand Down
1 change: 1 addition & 0 deletions src/cmd/8l/optab.c
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,7 @@ Optab optab[] =
{ AOUTSB, ynone, Pb, 0x6e },
{ AOUTSL, ynone, Px, 0x6f },
{ AOUTSW, ynone, Pe, 0x6f },
{ APAUSE, ynone, Px, 0xf3,0x90 },
{ APOPAL, ynone, Px, 0x61 },
{ APOPAW, ynone, Pe, 0x61 },
{ APOPFL, ynone, Px, 0x9d },
Expand Down
14 changes: 14 additions & 0 deletions src/pkg/runtime/386/asm.s
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,20 @@ TEXT runtime·xadd(SB), 7, $0
ADDL CX, AX
RET

TEXT runtime·xchg(SB), 7, $0
MOVL 4(SP), BX
MOVL 8(SP), AX
XCHGL AX, 0(BX)
RET

TEXT runtime·procyield(SB),7,$0
MOVL 4(SP), AX
again:
PAUSE
SUBL $1, AX
JNZ again
RET

TEXT runtime·atomicstorep(SB), 7, $0
MOVL 4(SP), BX
MOVL 8(SP), AX
Expand Down
14 changes: 14 additions & 0 deletions src/pkg/runtime/amd64/asm.s
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,20 @@ TEXT runtime·xadd(SB), 7, $0
ADDL CX, AX
RET

TEXT runtime·xchg(SB), 7, $0
MOVQ 8(SP), BX
MOVL 16(SP), AX
XCHGL AX, 0(BX)
RET

TEXT runtime·procyield(SB),7,$0
MOVL 8(SP), AX
again:
PAUSE
SUBL $1, AX
JNZ again
RET

TEXT runtime·atomicstorep(SB), 7, $0
MOVQ 8(SP), BX
MOVQ 16(SP), AX
Expand Down
23 changes: 23 additions & 0 deletions src/pkg/runtime/arm/atomic.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,29 @@ runtime·xadd(uint32 volatile *val, int32 delta)
}
}

#pragma textflag 7
uint32
runtime·xchg(uint32 volatile* addr, uint32 v)
{
uint32 old;

for(;;) {
old = *addr;
if(runtime·cas(addr, old, v))
return old;
}
}

#pragma textflag 7
void
runtime·procyield(uint32 cnt)
{
uint32 volatile i;

for(i = 0; i < cnt; i++) {
}
}

#pragma textflag 7
uint32
runtime·atomicload(uint32 volatile* addr)
Expand Down
2 changes: 2 additions & 0 deletions src/pkg/runtime/linux/386/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ enum {
ITIMER_REAL = 0,
ITIMER_VIRTUAL = 0x1,
ITIMER_PROF = 0x2,
O_RDONLY = 0,
O_CLOEXEC = 02000000,
};

// Types
Expand Down
29 changes: 28 additions & 1 deletion src/pkg/runtime/linux/386/sys.s
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,31 @@ TEXT runtime·exit1(SB),7,$0
INT $3 // not reached
RET

TEXT runtime·open(SB),7,$0
MOVL $5, AX // syscall - open
MOVL 4(SP), BX
MOVL 8(SP), CX
MOVL 12(SP), DX
INT $0x80
RET

TEXT runtime·close(SB),7,$0
MOVL $6, AX // syscall - close
MOVL 4(SP), BX
INT $0x80
RET

TEXT runtime·write(SB),7,$0
MOVL $4, AX // syscall - write
MOVL 4(SP), BX
MOVL 4(SP), BX
MOVL 8(SP), CX
MOVL 12(SP), DX
INT $0x80
RET

TEXT runtime·read(SB),7,$0
MOVL $3, AX // syscall - read
MOVL 4(SP), BX
MOVL 8(SP), CX
MOVL 12(SP), DX
INT $0x80
Expand Down Expand Up @@ -315,3 +337,8 @@ TEXT runtime·setldt(SB),7,$32
MOVW AX, GS

RET

TEXT runtime·osyield(SB),7,$0
MOVL $158, AX
INT $0x80
RET
2 changes: 2 additions & 0 deletions src/pkg/runtime/linux/amd64/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ enum {
ITIMER_REAL = 0,
ITIMER_VIRTUAL = 0x1,
ITIMER_PROF = 0x2,
O_RDONLY = 0,
O_CLOEXEC = 02000000,
};

// Types
Expand Down
18 changes: 18 additions & 0 deletions src/pkg/runtime/linux/amd64/sys.s
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ TEXT runtime·open(SB),7,$0-16
SYSCALL
RET

TEXT runtime·close(SB),7,$0-16
MOVL 8(SP), DI
MOVL $3, AX // syscall entry
SYSCALL
RET

TEXT runtime·write(SB),7,$0-24
MOVL 8(SP), DI
MOVQ 16(SP), SI
Expand All @@ -36,6 +42,14 @@ TEXT runtime·write(SB),7,$0-24
SYSCALL
RET

TEXT runtime·read(SB),7,$0-24
MOVL 8(SP), DI
MOVQ 16(SP), SI
MOVL 24(SP), DX
MOVL $0, AX // syscall entry
SYSCALL
RET

TEXT runtime·raisesigpipe(SB),7,$12
MOVL $186, AX // syscall - gettid
SYSCALL
Expand Down Expand Up @@ -232,3 +246,7 @@ TEXT runtime·settls(SB),7,$32
CALL runtime·notok(SB)
RET

TEXT runtime·osyield(SB),7,$0
MOVL $24, AX
SYSCALL
RET
2 changes: 2 additions & 0 deletions src/pkg/runtime/linux/arm/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ enum {
ITIMER_REAL = 0,
ITIMER_PROF = 0x2,
ITIMER_VIRTUAL = 0x1,
O_RDONLY = 0,
O_CLOEXEC = 02000000,
};

// Types
Expand Down
30 changes: 30 additions & 0 deletions src/pkg/runtime/linux/arm/sys.s
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
#define SYS_BASE 0x0

#define SYS_exit (SYS_BASE + 1)
#define SYS_read (SYS_BASE + 3)
#define SYS_write (SYS_BASE + 4)
#define SYS_open (SYS_BASE + 5)
#define SYS_close (SYS_BASE + 6)
#define SYS_gettimeofday (SYS_BASE + 78)
#define SYS_clone (SYS_BASE + 120)
#define SYS_rt_sigreturn (SYS_BASE + 173)
Expand All @@ -29,10 +32,25 @@
#define SYS_mincore (SYS_BASE + 219)
#define SYS_gettid (SYS_BASE + 224)
#define SYS_tkill (SYS_BASE + 238)
#define SYS_sched_yield (SYS_BASE + 158)

#define ARM_BASE (SYS_BASE + 0x0f0000)
#define SYS_ARM_cacheflush (ARM_BASE + 2)

TEXT runtime·open(SB),7,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW $SYS_open, R7
SWI $0
RET

TEXT runtime·close(SB),7,$0
MOVW 0(FP), R0
MOVW $SYS_close, R7
SWI $0
RET

TEXT runtime·write(SB),7,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
Expand All @@ -41,6 +59,14 @@ TEXT runtime·write(SB),7,$0
SWI $0
RET

TEXT runtime·read(SB),7,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW $SYS_read, R7
SWI $0
RET

TEXT runtime·exit(SB),7,$-4
MOVW 0(FP), R0
MOVW $SYS_exit_group, R7
Expand Down Expand Up @@ -287,3 +313,7 @@ cascheck:
TEXT runtime·casp(SB),7,$0
B runtime·cas(SB)

TEXT runtime·osyield(SB),7,$0
MOVW $SYS_sched_yield, R7
SWI $0
RET
Loading

0 comments on commit 4e5086b

Please sign in to comment.