Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trie: simplify StackTrie implementation #23950

Merged
merged 4 commits into from
Nov 29, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 32 additions & 40 deletions trie/stacktrie.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,11 @@ func returnToPool(st *StackTrie) {
// in order. Once it determines that a subtree will no longer be inserted
// into, it will hash it and free up the memory it uses.
type StackTrie struct {
nodeType uint8 // node type (as in branch, ext, leaf)
val []byte // value contained by this node if it's a leaf
key []byte // key chunk covered by this (full|ext) node
keyOffset int // offset of the key chunk inside a full key
children [16]*StackTrie // list of children (for fullnodes and exts)
db ethdb.KeyValueWriter // Pointer to the commit db, can be nil
nodeType uint8 // node type (as in branch, ext, leaf)
val []byte // value contained by this node if it's a leaf
key []byte // key chunk covered by this (leaf|ext) node
children [16]*StackTrie // list of children (for branch and exts)
db ethdb.KeyValueWriter // Pointer to the commit db, can be nil
}

// NewStackTrie allocates and initializes an empty trie.
Expand Down Expand Up @@ -90,15 +89,13 @@ func (st *StackTrie) MarshalBinary() (data []byte, err error) {
w = bufio.NewWriter(&b)
)
if err := gob.NewEncoder(w).Encode(struct {
Nodetype uint8
Val []byte
Key []byte
KeyOffset uint8
Nodetype uint8
Val []byte
Key []byte
Comment on lines +92 to +94
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need to lookup why we implemented binary marshalling, but if we somewhere do encode this to disk, then this is a breaking change.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, we added it here: #22685

@gballet do you recall why we/I did that?

}{
st.nodeType,
st.val,
st.key,
uint8(st.keyOffset),
}); err != nil {
return nil, err
}
Expand Down Expand Up @@ -126,16 +123,14 @@ func (st *StackTrie) UnmarshalBinary(data []byte) error {

func (st *StackTrie) unmarshalBinary(r io.Reader) error {
var dec struct {
Nodetype uint8
Val []byte
Key []byte
KeyOffset uint8
Nodetype uint8
Val []byte
Key []byte
}
gob.NewDecoder(r).Decode(&dec)
st.nodeType = dec.Nodetype
st.val = dec.Val
st.key = dec.Key
st.keyOffset = int(dec.KeyOffset)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks.

This mistake might have hidden a binary incompatibility issue: we could properly read old binaries by ignoring KeyOffset byte.

This also looks like UnmarshalBinary() is currently incompatible with MarshalBinary(). Why is this not detected by any test?


var hasChild = make([]byte, 1)
for i := range st.children {
Expand All @@ -160,20 +155,18 @@ func (st *StackTrie) setDb(db ethdb.KeyValueWriter) {
}
}

func newLeaf(ko int, key, val []byte, db ethdb.KeyValueWriter) *StackTrie {
func newLeaf(key, val []byte, db ethdb.KeyValueWriter) *StackTrie {
st := stackTrieFromPool(db)
st.nodeType = leafNode
st.keyOffset = ko
st.key = append(st.key, key[ko:]...)
st.key = append(st.key, key...)
st.val = val
return st
}

func newExt(ko int, key []byte, child *StackTrie, db ethdb.KeyValueWriter) *StackTrie {
func newExt(key []byte, child *StackTrie, db ethdb.KeyValueWriter) *StackTrie {
st := stackTrieFromPool(db)
st.nodeType = extNode
st.keyOffset = ko
st.key = append(st.key, key[ko:]...)
st.key = append(st.key, key...)
st.children[0] = child
return st
}
Expand Down Expand Up @@ -211,25 +204,26 @@ func (st *StackTrie) Reset() {
st.children[i] = nil
}
st.nodeType = emptyNode
st.keyOffset = 0
}

// Helper function that, given a full key, determines the index
// at which the chunk pointed by st.keyOffset is different from
// the same chunk in the full key.
func (st *StackTrie) getDiffIndex(key []byte) int {
diffindex := 0
for ; diffindex < len(st.key) && st.key[diffindex] == key[st.keyOffset+diffindex]; diffindex++ {
for idx, nibble := range st.key {
if nibble != key[idx] {
return idx
}
}
return diffindex
return len(st.key)
}

// Helper function to that inserts a (key, value) pair into
// the trie.
func (st *StackTrie) insert(key, value []byte) {
switch st.nodeType {
case branchNode: /* Branch */
idx := int(key[st.keyOffset])
idx := int(key[0])
// Unresolve elder siblings
for i := idx - 1; i >= 0; i-- {
if st.children[i] != nil {
Expand All @@ -241,10 +235,10 @@ func (st *StackTrie) insert(key, value []byte) {
}
// Add new child
if st.children[idx] == nil {
st.children[idx] = stackTrieFromPool(st.db)
st.children[idx].keyOffset = st.keyOffset + 1
st.children[idx] = newLeaf(key[1:], value, st.db)
} else {
st.children[idx].insert(key[1:], value)
}
st.children[idx].insert(key, value)
case extNode: /* Ext */
// Compare both key chunks and see where they differ
diffidx := st.getDiffIndex(key)
Expand All @@ -257,7 +251,7 @@ func (st *StackTrie) insert(key, value []byte) {
if diffidx == len(st.key) {
// Ext key and key segment are identical, recurse into
// the child node.
st.children[0].insert(key, value)
st.children[0].insert(key[diffidx:], value)
return
}
// Save the original part. Depending if the break is
Expand All @@ -266,7 +260,7 @@ func (st *StackTrie) insert(key, value []byte) {
// node directly.
var n *StackTrie
if diffidx < len(st.key)-1 {
n = newExt(diffidx+1, st.key, st.children[0], st.db)
n = newExt(st.key[diffidx+1:], st.children[0], st.db)
} else {
// Break on the last byte, no need to insert
// an extension node: reuse the current node
Expand All @@ -288,15 +282,14 @@ func (st *StackTrie) insert(key, value []byte) {
// node.
st.children[0] = stackTrieFromPool(st.db)
st.children[0].nodeType = branchNode
st.children[0].keyOffset = st.keyOffset + diffidx
p = st.children[0]
}
// Create a leaf for the inserted part
o := newLeaf(st.keyOffset+diffidx+1, key, value, st.db)
o := newLeaf(key[diffidx+1:], value, st.db)

// Insert both child leaves where they belong:
origIdx := st.key[diffidx]
newIdx := key[diffidx+st.keyOffset]
newIdx := key[diffidx]
p.children[origIdx] = n
p.children[newIdx] = o
st.key = st.key[:diffidx]
Expand Down Expand Up @@ -330,7 +323,6 @@ func (st *StackTrie) insert(key, value []byte) {
st.nodeType = extNode
st.children[0] = NewStackTrie(st.db)
st.children[0].nodeType = branchNode
st.children[0].keyOffset = st.keyOffset + diffidx
p = st.children[0]
}

Expand All @@ -339,19 +331,19 @@ func (st *StackTrie) insert(key, value []byte) {
// The child leave will be hashed directly in order to
// free up some memory.
origIdx := st.key[diffidx]
p.children[origIdx] = newLeaf(diffidx+1, st.key, st.val, st.db)
p.children[origIdx] = newLeaf(st.key[diffidx+1:], st.val, st.db)
p.children[origIdx].hash()

newIdx := key[diffidx+st.keyOffset]
p.children[newIdx] = newLeaf(p.keyOffset+1, key, value, st.db)
newIdx := key[diffidx]
p.children[newIdx] = newLeaf(key[diffidx+1:], value, st.db)

// Finally, cut off the key part that has been passed
// over to the children.
st.key = st.key[:diffidx]
st.val = nil
case emptyNode: /* Empty */
st.nodeType = leafNode
st.key = key[st.keyOffset:]
st.key = key
st.val = value
case hashedNode:
panic("trying to insert into hash")
Expand Down