Skip to content
This repository has been archived by the owner on Apr 2, 2024. It is now read-only.

Commit

Permalink
Add BenchmarkCacheFalseSharing
Browse files Browse the repository at this point in the history
It seems as though the existing benchmarks cannot be used to measure the
effect of false sharing, so I added a new benchmark.
  • Loading branch information
JamesGuthrie committed Jul 22, 2022
1 parent e7745aa commit a92e15e
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pkg/clockcache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ type element struct {
used uint32
size uint64

// pad Elements out to be cache aligned
// pad Elements out to be cache-aligned, see BenchmarkCacheFalseSharing
_ [16]byte
}

Expand Down
66 changes: 66 additions & 0 deletions pkg/clockcache/cache_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package clockcache
import (
"fmt"
"math/rand"
"sync"
"testing"
)

Expand Down Expand Up @@ -236,6 +237,71 @@ func BenchmarkInsertConcurrent(b *testing.B) {
}
}

// BenchmarkCacheFalseSharing is a benchmark to measure the effect of the false
// sharing of CPU cache lines. In the clockcache.element struct, we introduce
// padding to ensure that only one clockcache.element fits in a CPU cache line,
// avoiding false sharing.
//
// The principle behind this benchmark is simple: construct a cache with two
// entries, and start two goroutines which each clobber one of the cache values
// over and over again. If there is false sharing, it should be measurable by
// toggling the padding on and off, and measuring the difference in output of
// this benchmark.
//
// At the time of writing, this code was tested on an M1 MacBook Pro, where the
// advantage obtained by introducing padding is approximately 16%:
// go test -bench=BenchmarkCacheFalseSharing -cpu=2 -count=10 > no-padding.txt
// go test -bench=BenchmarkCacheFalseSharing -cpu=2 -count=10 > padding.txt
// benchstat no-padding.txt padding.txt
// name old time/op new time/op delta
// CacheFalseSharing-2 230ns ± 6% 193ns ±19% -16.09% (p=0.001 n=10+9)
//
// Note: This benchmark _must_ be run with the `-cpu=2` argument, to ensure
// that each goroutine ends up on a different CPU, possibly causing contention
// for the same cache line.
func BenchmarkCacheFalseSharing(b *testing.B) {
cache := WithMax(2)
b.ReportAllocs()

// define waitgroup so that we can coordinate the start of the stressors
startWg := &sync.WaitGroup{}
startWg.Add(2)

// define waitgroup, so we can wait until concurrent stressors are finished
endWg := &sync.WaitGroup{}
endWg.Add(2)

key1 := 0
key2 := 1
times := b.N

// stressor is a function to be run in a goroutine which continually writes
// and reads to/from a specific key in the cache
stressor := func(key, count int) {
var val interface{}

// Coordinate the start of the two stressors
startWg.Done()
startWg.Wait()

// Reset the timer immediately before doing the real work
b.ResetTimer()
for i := 0; i < count; i++ {
cache.Insert(key, i, 16)
val, _ = cache.Get(key)
}

bval = val
endWg.Done()
}

// run two contending goroutines
go stressor(key1, times)
go stressor(key2, times)

// wait for tasks to complete
endWg.Wait()
}
func BenchmarkMemoryEmptyCache(b *testing.B) {
b.ReportAllocs()
WithMax(uint64(b.N))
Expand Down

0 comments on commit a92e15e

Please sign in to comment.