From 04557c3d0b5bc7134dd772f238f9ebc5e1fd2787 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Mon, 17 Feb 2020 14:47:08 +0100 Subject: [PATCH 1/2] Stack-based trie implementation --- trie/hashtrie.go | 175 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 174 insertions(+), 1 deletion(-) diff --git a/trie/hashtrie.go b/trie/hashtrie.go index d0b1cd66b541..4add33306470 100644 --- a/trie/hashtrie.go +++ b/trie/hashtrie.go @@ -17,7 +17,9 @@ package trie import ( + "bytes" "fmt" + "github.com/ethereum/go-ethereum/common" ) @@ -27,10 +29,11 @@ type HashTrie struct { root node rootKey []byte build []node + hasher *hasher } func NewHashTrie() *HashTrie { - return &HashTrie{root: nil, rootKey: nil, build: nil} + return &HashTrie{root: nil, rootKey: nil, build: nil, hasher: newHasher(false)} } func (t *HashTrie) TryUpdate(key, value []byte) error { @@ -121,3 +124,173 @@ func (t *HashTrie) Hash() common.Hash { t.root = cached return common.BytesToHash(hashed.(hashNode)) } + +type StackTrieItem struct { + ext shortNode + branch fullNode + depth int + useBranch bool + keyUntilHere []byte +} + +type StackTrie struct { + stack []StackTrieItem + top int + hasher *hasher +} + +func NewStackTrie() *StackTrie { + return &StackTrie{ + top: -1, + stack: []StackTrieItem{ + StackTrieItem{}, + }, + hasher: newHasher(false), + } +} + +func (st *StackTrie) TryUpdate(key, value []byte) error { + k := keybytesToHex(key) + if len(value) == 0 { + panic("deletion not supported") + } + st.insert(&st.stack[0].ext, nil, k, valueNode(value)) + return nil +} + +func (st *StackTrie) insert(n node, prefix, key []byte, value node) node { + // Special case: the trie is empty + if st.top == -1 { + st.top = 0 + st.stack[st.top].depth = 0 + st.stack[st.top].ext.Key = key + st.stack[st.top].ext.Val, _ = st.hasher.hash(value, false) + st.stack[st.top].keyUntilHere = []byte("") + + return &st.stack[st.top].ext + } + + // Use the prefix key to find the stack level in which the code needs to + // be inserted. + level := -1 + for index := st.top; index >= 0; index-- { + level = index + if bytes.Equal(st.stack[level].keyUntilHere, key[:len(st.stack[level].keyUntilHere)]) { + // Found the common denominator, stop the search + break + } + } + + // Already hash the value, which it will be anyway + hv, _ := st.hasher.hash(value, false) + + // The difference happens at this level, find out where + // exactly. The extension part of the fullnode part? + extStart := len(st.stack[level].keyUntilHere) + extEnd := extStart + len(st.stack[level].ext.Key) + if bytes.Equal(st.stack[level].ext.Key, key[extStart:extEnd]) { + // The extension and the key are identical on the length of + // the extension, so st.stack[level].ext.Val should be a fullNode and + // the difference should be found there. Panic if this is + // not the case. + fn := st.stack[level].ext.Val.(*fullNode) + + // The correct entry is the only one that isn't nil + for i := 15; i >= 0; i-- { + if fn.Children[i] != nil { + switch fn.Children[i].(type) { + // Only hash entries that are not already hashed + case *fullNode, *shortNode: + fn.Children[i], _ = st.hasher.hash(fn.Children[i], false) + st.top = level + default: + } + break + } + } + + // That fullNode should have at most one non-hashNode child, + // hash it because no more nodes will be inserted in it. + if len(st.stack) == st.top+1 { + st.stack = append(st.stack, StackTrieItem{}) + } + + st.top++ + keyUntilHere := len(st.stack[level].keyUntilHere) + len(st.stack[level].ext.Key) + 1 + st.stack[level].branch.Children[key[keyUntilHere]] = &st.stack[st.top].ext + st.stack[st.top].keyUntilHere = key[:keyUntilHere] + st.stack[st.top].ext.Key = key[keyUntilHere:] + st.stack[st.top].ext.Val = hv + st.stack[st.top].ext.flags = nodeFlag{dirty: true} + st.stack[st.top].depth = st.stack[level].depth + 1 + } else { + // extension keys differ, need to create a split and + // hash the former node. + whereitdiffers := 0 + offset := len(st.stack[level].keyUntilHere) + for i := range st.stack[level].ext.Key { + if key[offset+i] != st.stack[level].ext.Key[i] { + whereitdiffers = i + break + } + } + + // Start by hashing the node right after the extension, + // to free some space. + var hn node + switch st.stack[level].ext.Val.(type) { + case *fullNode: + h, _ := st.hasher.hash(st.stack[level].ext.Val, false) + hn = h.(hashNode) + case hashNode, valueNode: + hn = st.stack[level].ext.Val + default: + panic("Encountered unexpected node type") + } + + // Allocate the next full node, it's going to be + // reused several times. + if len(st.stack) == st.top+1 { + st.stack = append(st.stack, StackTrieItem{}) + } + st.top++ + + // Store the partially-hashed old node in the newly allocated + // slot, in order to finish the hashing. + slot := st.stack[level].ext.Key[whereitdiffers] + st.stack[st.top].ext.Key = st.stack[level].ext.Key[whereitdiffers+1:] + st.stack[st.top].ext.Val = hn + st.stack[st.top].ext.flags = nodeFlag{dirty: true} + + // Hasher directement la branche si l'ext est vide + h, _ := st.hasher.hash(&st.stack[st.top].ext, false) + st.stack[level].branch.Children[slot] = h.(hashNode) + st.stack[level].ext.Val = &st.stack[level].branch + st.stack[level].ext.Key = st.stack[level].ext.Key[:whereitdiffers] + + // Now use the newly allocated+hashed stack st.stack[level] to store + // the rest of the inserted (key, value) pair. + slot = key[whereitdiffers+len(st.stack[level].keyUntilHere)] + st.stack[level].branch.Children[slot] = &st.stack[st.top].ext + st.stack[st.top].ext.Key = key[whereitdiffers+len(st.stack[level].keyUntilHere)+1:] + st.stack[st.top].ext.Val = hv + st.stack[st.top].keyUntilHere = key[:whereitdiffers+len(st.stack[level].keyUntilHere)+1] + st.stack[st.top].depth = st.stack[level].depth + 1 + + } + + // if ext.length == 0, directly return the full node. + if len(st.stack[0].ext.Key) == 0 { + return &st.stack[0].branch + } + return &st.stack[0].ext +} + +func (st *StackTrie) Hash() common.Hash { + if st.top == -1 { + return emptyRoot + } + + h, _ := st.hasher.hash(&st.stack[0].ext, false) + return common.BytesToHash(h.(hashNode)) +} From 7abf43cf55c186ee499f97094678c257a6126fd3 Mon Sep 17 00:00:00 2001 From: Guillaume Ballet Date: Mon, 17 Feb 2020 18:07:16 +0100 Subject: [PATCH 2/2] Add the benchmarking code that was left out. --- core/state/snapshot/hextrie_generator.go | 10 +++++++++ core/state/snapshot/trie_generator_test.go | 26 ++++++++++++++++++---- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/core/state/snapshot/hextrie_generator.go b/core/state/snapshot/hextrie_generator.go index 51200668b660..e2f50033ef5a 100644 --- a/core/state/snapshot/hextrie_generator.go +++ b/core/state/snapshot/hextrie_generator.go @@ -29,6 +29,16 @@ type leaf struct { type trieGeneratorFn func(in chan (leaf), out chan (common.Hash)) +// StackGenerate is a hexary trie builder which is built from the bottom-up as +// keys are added. +func StackGenerate(in chan (leaf), out chan (common.Hash)) { + t := trie.NewStackTrie() + for leaf := range in { + t.TryUpdate(leaf.key[:], leaf.value) + } + out <- t.Hash() +} + // PruneGenerate is a hexary trie builder which collapses old nodes, but is still // based on more or less the ordinary trie builder func PruneGenerate(in chan (leaf), out chan (common.Hash)) { diff --git a/core/state/snapshot/trie_generator_test.go b/core/state/snapshot/trie_generator_test.go index c43b63eeec69..28727f90f96e 100644 --- a/core/state/snapshot/trie_generator_test.go +++ b/core/state/snapshot/trie_generator_test.go @@ -101,10 +101,12 @@ func TestTrieGenerationAppendonly(t *testing.T) { } } -// BenchmarkTrieGeneration/4K/standard-6 98 14141790 ns/op 6164989 B/op 57929 allocs/op -// BenchmarkTrieGeneration/4K/pruning-6 72 14015967 ns/op 6604020 B/op 54962 allocs/op -// BenchmarkTrieGeneration/10K/standard-6 42 30085495 ns/op 17280084 B/op 151006 allocs/op -// BenchmarkTrieGeneration/10K/pruning-6 32 34536586 ns/op 16510731 B/op 137402 allocs/op +// BenchmarkTrieGeneration/4K/standard-8 127 9429425 ns/op 6188077 B/op 58026 allocs/op +// BenchmarkTrieGeneration/4K/pruning-8 72 16544534 ns/op 6617322 B/op 55016 allocs/op +// BenchmarkTrieGeneration/4K/stack-8 159 6452936 ns/op 6308393 B/op 12022 allocs/op +// BenchmarkTrieGeneration/10K/standard-8 50 25025175 ns/op 17283703 B/op 151023 allocs/op +// BenchmarkTrieGeneration/10K/pruning-8 28 38141602 ns/op 16540254 B/op 137520 allocs/op +// BenchmarkTrieGeneration/10K/stack-8 60 18888649 ns/op 17557314 B/op 30067 allocs/op func BenchmarkTrieGeneration(b *testing.B) { // Get a fairly large trie // Create a custom account factory to recreate the same addresses @@ -150,6 +152,14 @@ func BenchmarkTrieGeneration(b *testing.B) { generateTrie(it, PruneGenerate) } }) + b.Run("stack", func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + it := head.(*diffLayer).AccountIterator(common.HexToHash("0x00")) + generateTrie(it, StackGenerate) + } + }) }) b.Run("10K", func(b *testing.B) { // 4K accounts @@ -173,5 +183,13 @@ func BenchmarkTrieGeneration(b *testing.B) { generateTrie(it, PruneGenerate) } }) + b.Run("stack", func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + it := head.(*diffLayer).AccountIterator(common.HexToHash("0x00")) + generateTrie(it, StackGenerate) + } + }) }) }