From 6ba7c2670325f27107c4267c597bccf375980b2a Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Thu, 7 Mar 2024 19:56:31 +0800 Subject: [PATCH] common/lru: add generic LRU implementation (#26162) --- common/lru/basiclru.go | 223 +++++++++++++++++++++++++++++++ common/lru/basiclru_test.go | 255 ++++++++++++++++++++++++++++++++++++ common/lru/blob_lru.go | 84 ++++++++++++ common/lru/blob_lru_test.go | 155 ++++++++++++++++++++++ common/lru/lru.go | 95 ++++++++++++++ 5 files changed, 812 insertions(+) create mode 100644 common/lru/basiclru.go create mode 100644 common/lru/basiclru_test.go create mode 100644 common/lru/blob_lru.go create mode 100644 common/lru/blob_lru_test.go create mode 100644 common/lru/lru.go diff --git a/common/lru/basiclru.go b/common/lru/basiclru.go new file mode 100644 index 000000000000..a429157fe50a --- /dev/null +++ b/common/lru/basiclru.go @@ -0,0 +1,223 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Package lru implements generically-typed LRU caches. +package lru + +// BasicLRU is a simple LRU cache. +// +// This type is not safe for concurrent use. +// The zero value is not valid, instances must be created using NewCache. +type BasicLRU[K comparable, V any] struct { + list *list[K] + items map[K]cacheItem[K, V] + cap int +} + +type cacheItem[K any, V any] struct { + elem *listElem[K] + value V +} + +// NewBasicLRU creates a new LRU cache. +func NewBasicLRU[K comparable, V any](capacity int) BasicLRU[K, V] { + if capacity <= 0 { + capacity = 1 + } + c := BasicLRU[K, V]{ + items: make(map[K]cacheItem[K, V]), + list: newList[K](), + cap: capacity, + } + return c +} + +// Add adds a value to the cache. Returns true if an item was evicted to store the new item. +func (c *BasicLRU[K, V]) Add(key K, value V) (evicted bool) { + item, ok := c.items[key] + if ok { + // Already exists in cache. + item.value = value + c.items[key] = item + c.list.moveToFront(item.elem) + return false + } + + var elem *listElem[K] + if c.Len() >= c.cap { + elem = c.list.removeLast() + delete(c.items, elem.v) + evicted = true + } else { + elem = new(listElem[K]) + } + + // Store the new item. + // Note that, if another item was evicted, we re-use its list element here. + elem.v = key + c.items[key] = cacheItem[K, V]{elem, value} + c.list.pushElem(elem) + return evicted +} + +// Contains reports whether the given key exists in the cache. +func (c *BasicLRU[K, V]) Contains(key K) bool { + _, ok := c.items[key] + return ok +} + +// Get retrieves a value from the cache. This marks the key as recently used. +func (c *BasicLRU[K, V]) Get(key K) (value V, ok bool) { + item, ok := c.items[key] + if !ok { + return value, false + } + c.list.moveToFront(item.elem) + return item.value, true +} + +// GetOldest retrieves the least-recently-used item. +// Note that this does not update the item's recency. +func (c *BasicLRU[K, V]) GetOldest() (key K, value V, ok bool) { + lastElem := c.list.last() + if lastElem == nil { + return key, value, false + } + key = lastElem.v + item := c.items[key] + return key, item.value, true +} + +// Len returns the current number of items in the cache. +func (c *BasicLRU[K, V]) Len() int { + return len(c.items) +} + +// Peek retrieves a value from the cache, but does not mark the key as recently used. +func (c *BasicLRU[K, V]) Peek(key K) (value V, ok bool) { + item, ok := c.items[key] + return item.value, ok +} + +// Purge empties the cache. +func (c *BasicLRU[K, V]) Purge() { + c.list.init() + for k := range c.items { + delete(c.items, k) + } +} + +// Remove drops an item from the cache. Returns true if the key was present in cache. +func (c *BasicLRU[K, V]) Remove(key K) bool { + item, ok := c.items[key] + if ok { + delete(c.items, key) + c.list.remove(item.elem) + } + return ok +} + +// RemoveOldest drops the least recently used item. +func (c *BasicLRU[K, V]) RemoveOldest() (key K, value V, ok bool) { + lastElem := c.list.last() + if lastElem == nil { + return key, value, false + } + + key = lastElem.v + item := c.items[key] + delete(c.items, key) + c.list.remove(lastElem) + return key, item.value, true +} + +// Keys returns all keys in the cache. +func (c *BasicLRU[K, V]) Keys() []K { + keys := make([]K, 0, len(c.items)) + return c.list.appendTo(keys) +} + +// list is a doubly-linked list holding items of type he. +// The zero value is not valid, use newList to create lists. +type list[T any] struct { + root listElem[T] +} + +type listElem[T any] struct { + next *listElem[T] + prev *listElem[T] + v T +} + +func newList[T any]() *list[T] { + l := new(list[T]) + l.init() + return l +} + +// init reinitializes the list, making it empty. +func (l *list[T]) init() { + l.root.next = &l.root + l.root.prev = &l.root +} + +// push adds an element to the front of the list. +func (l *list[T]) pushElem(e *listElem[T]) { + e.prev = &l.root + e.next = l.root.next + l.root.next = e + e.next.prev = e +} + +// moveToFront makes 'node' the head of the list. +func (l *list[T]) moveToFront(e *listElem[T]) { + e.prev.next = e.next + e.next.prev = e.prev + l.pushElem(e) +} + +// remove removes an element from the list. +func (l *list[T]) remove(e *listElem[T]) { + e.prev.next = e.next + e.next.prev = e.prev + e.next, e.prev = nil, nil +} + +// removeLast removes the last element of the list. +func (l *list[T]) removeLast() *listElem[T] { + last := l.last() + if last != nil { + l.remove(last) + } + return last +} + +// last returns the last element of the list, or nil if the list is empty. +func (l *list[T]) last() *listElem[T] { + e := l.root.prev + if e == &l.root { + return nil + } + return e +} + +// appendTo appends all list elements to a slice. +func (l *list[T]) appendTo(slice []T) []T { + for e := l.root.prev; e != &l.root; e = e.prev { + slice = append(slice, e.v) + } + return slice +} diff --git a/common/lru/basiclru_test.go b/common/lru/basiclru_test.go new file mode 100644 index 000000000000..29812bda157a --- /dev/null +++ b/common/lru/basiclru_test.go @@ -0,0 +1,255 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package lru + +import ( + crand "crypto/rand" + "fmt" + "io" + "math/rand" + "testing" +) + +// Some of these test cases were adapted +// from https://github.com/hashicorp/golang-lru/blob/master/simplelru/lru_test.go + +func TestBasicLRU(t *testing.T) { + cache := NewBasicLRU[int, int](128) + + for i := 0; i < 256; i++ { + cache.Add(i, i) + } + if cache.Len() != 128 { + t.Fatalf("bad len: %v", cache.Len()) + } + + // Check that Keys returns least-recent key first. + keys := cache.Keys() + if len(keys) != 128 { + t.Fatal("wrong Keys() length", len(keys)) + } + for i, k := range keys { + v, ok := cache.Peek(k) + if !ok { + t.Fatalf("expected key %d be present", i) + } + if v != k { + t.Fatalf("expected %d == %d", k, v) + } + if v != i+128 { + t.Fatalf("wrong value at key %d: %d, want %d", i, v, i+128) + } + } + + for i := 0; i < 128; i++ { + _, ok := cache.Get(i) + if ok { + t.Fatalf("%d should be evicted", i) + } + } + for i := 128; i < 256; i++ { + _, ok := cache.Get(i) + if !ok { + t.Fatalf("%d should not be evicted", i) + } + } + + for i := 128; i < 192; i++ { + ok := cache.Remove(i) + if !ok { + t.Fatalf("%d should be in cache", i) + } + ok = cache.Remove(i) + if ok { + t.Fatalf("%d should not be in cache", i) + } + _, ok = cache.Get(i) + if ok { + t.Fatalf("%d should be deleted", i) + } + } + + // Request item 192. + cache.Get(192) + // It should be the last item returned by Keys(). + for i, k := range cache.Keys() { + if (i < 63 && k != i+193) || (i == 63 && k != 192) { + t.Fatalf("out of order key: %v", k) + } + } + + cache.Purge() + if cache.Len() != 0 { + t.Fatalf("bad len: %v", cache.Len()) + } + if _, ok := cache.Get(200); ok { + t.Fatalf("should contain nothing") + } +} + +func TestBasicLRUAddExistingKey(t *testing.T) { + cache := NewBasicLRU[int, int](1) + + cache.Add(1, 1) + cache.Add(1, 2) + + v, _ := cache.Get(1) + if v != 2 { + t.Fatal("wrong value:", v) + } +} + +// This test checks GetOldest and RemoveOldest. +func TestBasicLRUGetOldest(t *testing.T) { + cache := NewBasicLRU[int, int](128) + for i := 0; i < 256; i++ { + cache.Add(i, i) + } + + k, _, ok := cache.GetOldest() + if !ok { + t.Fatalf("missing") + } + if k != 128 { + t.Fatalf("bad: %v", k) + } + + k, _, ok = cache.RemoveOldest() + if !ok { + t.Fatalf("missing") + } + if k != 128 { + t.Fatalf("bad: %v", k) + } + + k, _, ok = cache.RemoveOldest() + if !ok { + t.Fatalf("missing oldest item") + } + if k != 129 { + t.Fatalf("wrong oldest item: %v", k) + } +} + +// Test that Add returns true/false if an eviction occurred +func TestBasicLRUAddReturnValue(t *testing.T) { + cache := NewBasicLRU[int, int](1) + if cache.Add(1, 1) { + t.Errorf("first add shouldn't have evicted") + } + if !cache.Add(2, 2) { + t.Errorf("second add should have evicted") + } +} + +// This test verifies that Contains doesn't change item recency. +func TestBasicLRUContains(t *testing.T) { + cache := NewBasicLRU[int, int](2) + cache.Add(1, 1) + cache.Add(2, 2) + if !cache.Contains(1) { + t.Errorf("1 should be in the cache") + } + cache.Add(3, 3) + if cache.Contains(1) { + t.Errorf("Contains should not have updated recency of 1") + } +} + +// Test that Peek doesn't update recent-ness +func TestBasicLRUPeek(t *testing.T) { + cache := NewBasicLRU[int, int](2) + cache.Add(1, 1) + cache.Add(2, 2) + if v, ok := cache.Peek(1); !ok || v != 1 { + t.Errorf("1 should be set to 1") + } + cache.Add(3, 3) + if cache.Contains(1) { + t.Errorf("should not have updated recent-ness of 1") + } +} + +func BenchmarkLRU(b *testing.B) { + var ( + capacity = 1000 + indexes = make([]int, capacity*20) + keys = make([]string, capacity) + values = make([][]byte, capacity) + ) + for i := range indexes { + indexes[i] = rand.Intn(capacity) + } + for i := range keys { + b := make([]byte, 32) + crand.Read(b) + keys[i] = string(b) + crand.Read(b) + values[i] = b + } + + var sink []byte + + b.Run("Add/BasicLRU", func(b *testing.B) { + cache := NewBasicLRU[int, int](capacity) + for i := 0; i < b.N; i++ { + cache.Add(i, i) + } + }) + b.Run("Get/BasicLRU", func(b *testing.B) { + cache := NewBasicLRU[string, []byte](capacity) + for i := 0; i < capacity; i++ { + index := indexes[i] + cache.Add(keys[index], values[index]) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + k := keys[indexes[i%len(indexes)]] + v, ok := cache.Get(k) + if ok { + sink = v + } + } + }) + + // // vs. github.com/hashicorp/golang-lru/simplelru + // b.Run("Add/simplelru.LRU", func(b *testing.B) { + // cache, _ := simplelru.NewLRU(capacity, nil) + // for i := 0; i < b.N; i++ { + // cache.Add(i, i) + // } + // }) + // b.Run("Get/simplelru.LRU", func(b *testing.B) { + // cache, _ := simplelru.NewLRU(capacity, nil) + // for i := 0; i < capacity; i++ { + // index := indexes[i] + // cache.Add(keys[index], values[index]) + // } + // + // b.ResetTimer() + // for i := 0; i < b.N; i++ { + // k := keys[indexes[i%len(indexes)]] + // v, ok := cache.Get(k) + // if ok { + // sink = v.([]byte) + // } + // } + // }) + + fmt.Fprintln(io.Discard, sink) +} diff --git a/common/lru/blob_lru.go b/common/lru/blob_lru.go new file mode 100644 index 000000000000..c9b33985032c --- /dev/null +++ b/common/lru/blob_lru.go @@ -0,0 +1,84 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package lru + +import ( + "math" + "sync" +) + +// blobType is the type constraint for values stored in SizeConstrainedCache. +type blobType interface { + ~[]byte | ~string +} + +// SizeConstrainedCache is a cache where capacity is in bytes (instead of item count). When the cache +// is at capacity, and a new item is added, older items are evicted until the size +// constraint is met. +// +// OBS: This cache assumes that items are content-addressed: keys are unique per content. +// In other words: two Add(..) with the same key K, will always have the same value V. +type SizeConstrainedCache[K comparable, V blobType] struct { + size uint64 + maxSize uint64 + lru BasicLRU[K, V] + lock sync.Mutex +} + +// NewSizeConstrainedCache creates a new size-constrained LRU cache. +func NewSizeConstrainedCache[K comparable, V blobType](maxSize uint64) *SizeConstrainedCache[K, V] { + return &SizeConstrainedCache[K, V]{ + size: 0, + maxSize: maxSize, + lru: NewBasicLRU[K, V](math.MaxInt), + } +} + +// Add adds a value to the cache. Returns true if an eviction occurred. +// OBS: This cache assumes that items are content-addressed: keys are unique per content. +// In other words: two Add(..) with the same key K, will always have the same value V. +// OBS: The value is _not_ copied on Add, so the caller must not modify it afterwards. +func (c *SizeConstrainedCache[K, V]) Add(key K, value V) (evicted bool) { + c.lock.Lock() + defer c.lock.Unlock() + + // Unless it is already present, might need to evict something. + // OBS: If it is present, we still call Add internally to bump the recentness. + if !c.lru.Contains(key) { + targetSize := c.size + uint64(len(value)) + for targetSize > c.maxSize { + evicted = true + _, v, ok := c.lru.RemoveOldest() + if !ok { + // list is now empty. Break + break + } + targetSize -= uint64(len(v)) + } + c.size = targetSize + } + c.lru.Add(key, value) + return evicted +} + +// Get looks up a key's value from the cache. +func (c *SizeConstrainedCache[K, V]) Get(key K) (V, bool) { + c.lock.Lock() + defer c.lock.Unlock() + + return c.lru.Get(key) +} diff --git a/common/lru/blob_lru_test.go b/common/lru/blob_lru_test.go new file mode 100644 index 000000000000..ca1b0ddd742a --- /dev/null +++ b/common/lru/blob_lru_test.go @@ -0,0 +1,155 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package lru + +import ( + "encoding/binary" + "fmt" + "testing" +) + +type testKey [8]byte + +func mkKey(i int) (key testKey) { + binary.LittleEndian.PutUint64(key[:], uint64(i)) + return key +} + +func TestSizeConstrainedCache(t *testing.T) { + lru := NewSizeConstrainedCache[testKey, []byte](100) + var want uint64 + // Add 11 items of 10 byte each. First item should be swapped out + for i := 0; i < 11; i++ { + k := mkKey(i) + v := fmt.Sprintf("value-%04d", i) + lru.Add(k, []byte(v)) + want += uint64(len(v)) + if want > 100 { + want = 100 + } + if have := lru.size; have != want { + t.Fatalf("size wrong, have %d want %d", have, want) + } + } + // Zero:th should be evicted + { + k := mkKey(0) + if _, ok := lru.Get(k); ok { + t.Fatalf("should be evicted: %v", k) + } + } + // Elems 1-11 should be present + for i := 1; i < 11; i++ { + k := mkKey(i) + want := fmt.Sprintf("value-%04d", i) + have, ok := lru.Get(k) + if !ok { + t.Fatalf("missing key %v", k) + } + if string(have) != want { + t.Fatalf("wrong value, have %v want %v", have, want) + } + } +} + +// This test adds inserting an element exceeding the max size. +func TestSizeConstrainedCacheOverflow(t *testing.T) { + lru := NewSizeConstrainedCache[testKey, []byte](100) + + // Add 10 items of 10 byte each, filling the cache + for i := 0; i < 10; i++ { + k := mkKey(i) + v := fmt.Sprintf("value-%04d", i) + lru.Add(k, []byte(v)) + } + // Add one single large elem. We expect it to swap out all entries. + { + k := mkKey(1337) + v := make([]byte, 200) + lru.Add(k, v) + } + // Elems 0-9 should be missing + for i := 1; i < 10; i++ { + k := mkKey(i) + if _, ok := lru.Get(k); ok { + t.Fatalf("should be evicted: %v", k) + } + } + // The size should be accurate + if have, want := lru.size, uint64(200); have != want { + t.Fatalf("size wrong, have %d want %d", have, want) + } + // Adding one small item should swap out the large one + { + i := 0 + k := mkKey(i) + v := fmt.Sprintf("value-%04d", i) + lru.Add(k, []byte(v)) + if have, want := lru.size, uint64(10); have != want { + t.Fatalf("size wrong, have %d want %d", have, want) + } + } +} + +// This checks what happens when inserting the same k/v multiple times. +func TestSizeConstrainedCacheSameItem(t *testing.T) { + lru := NewSizeConstrainedCache[testKey, []byte](100) + + // Add one 10 byte-item 10 times. + k := mkKey(0) + v := fmt.Sprintf("value-%04d", 0) + for i := 0; i < 10; i++ { + lru.Add(k, []byte(v)) + } + + // The size should be accurate. + if have, want := lru.size, uint64(10); have != want { + t.Fatalf("size wrong, have %d want %d", have, want) + } +} + +// This tests that empty/nil values are handled correctly. +func TestSizeConstrainedCacheEmpties(t *testing.T) { + lru := NewSizeConstrainedCache[testKey, []byte](100) + + // This test abuses the lru a bit, using different keys for identical value(s). + for i := 0; i < 10; i++ { + lru.Add(testKey{byte(i)}, []byte{}) + lru.Add(testKey{byte(255 - i)}, nil) + } + + // The size should not count, only the values count. So this could be a DoS + // since it basically has no cap, and it is intentionally overloaded with + // different-keyed 0-length values. + if have, want := lru.size, uint64(0); have != want { + t.Fatalf("size wrong, have %d want %d", have, want) + } + + for i := 0; i < 10; i++ { + if v, ok := lru.Get(testKey{byte(i)}); !ok { + t.Fatalf("test %d: expected presence", i) + } else if v == nil { + t.Fatalf("test %d, v is nil", i) + } + + if v, ok := lru.Get(testKey{byte(255 - i)}); !ok { + t.Fatalf("test %d: expected presence", i) + } else if v != nil { + t.Fatalf("test %d, v is not nil", i) + } + } +} diff --git a/common/lru/lru.go b/common/lru/lru.go new file mode 100644 index 000000000000..45965adb0dfc --- /dev/null +++ b/common/lru/lru.go @@ -0,0 +1,95 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package lru + +import "sync" + +// Cache is a LRU cache. +// This type is safe for concurrent use. +type Cache[K comparable, V any] struct { + cache BasicLRU[K, V] + mu sync.Mutex +} + +// NewCache creates an LRU cache. +func NewCache[K comparable, V any](capacity int) *Cache[K, V] { + return &Cache[K, V]{cache: NewBasicLRU[K, V](capacity)} +} + +// Add adds a value to the cache. Returns true if an item was evicted to store the new item. +func (c *Cache[K, V]) Add(key K, value V) (evicted bool) { + c.mu.Lock() + defer c.mu.Unlock() + + return c.cache.Add(key, value) +} + +// Contains reports whether the given key exists in the cache. +func (c *Cache[K, V]) Contains(key K) bool { + c.mu.Lock() + defer c.mu.Unlock() + + return c.cache.Contains(key) +} + +// Get retrieves a value from the cache. This marks the key as recently used. +func (c *Cache[K, V]) Get(key K) (value V, ok bool) { + c.mu.Lock() + defer c.mu.Unlock() + + return c.cache.Get(key) +} + +// Len returns the current number of items in the cache. +func (c *Cache[K, V]) Len() int { + c.mu.Lock() + defer c.mu.Unlock() + + return c.cache.Len() +} + +// Peek retrieves a value from the cache, but does not mark the key as recently used. +func (c *Cache[K, V]) Peek(key K) (value V, ok bool) { + c.mu.Lock() + defer c.mu.Unlock() + + return c.cache.Peek(key) +} + +// Purge empties the cache. +func (c *Cache[K, V]) Purge() { + c.mu.Lock() + defer c.mu.Unlock() + + c.cache.Purge() +} + +// Remove drops an item from the cache. Returns true if the key was present in cache. +func (c *Cache[K, V]) Remove(key K) bool { + c.mu.Lock() + defer c.mu.Unlock() + + return c.cache.Remove(key) +} + +// Keys returns all keys of items currently in the LRU. +func (c *Cache[K, V]) Keys() []K { + c.mu.Lock() + defer c.mu.Unlock() + + return c.cache.Keys() +}