From 04e5c08b0e6fe35d31646addd5a8c30c0baeae20 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 1 Feb 2022 14:28:37 -0600
Subject: [PATCH 01/37] Add NewUniqueNodeIterator() to skip shared nodes

NewUniqueNodeIterator() can be used to optimize node iteration for
forest.  It skips shared sub-tries that were visited and only iterates
unique nodes.
---
 ledger/complete/mtrie/flattener/iterator.go   |  50 ++++-
 .../complete/mtrie/flattener/iterator_test.go | 191 ++++++++++++++++++
 2 files changed, 236 insertions(+), 5 deletions(-)

diff --git a/ledger/complete/mtrie/flattener/iterator.go b/ledger/complete/mtrie/flattener/iterator.go
index 7e49a426471..ec552cc8108 100644
--- a/ledger/complete/mtrie/flattener/iterator.go
+++ b/ledger/complete/mtrie/flattener/iterator.go
@@ -38,7 +38,7 @@ type NodeIterator struct {
 	//	   no children, it can be recalled without restriction.
 	//   * When popping node `n` from the stack, its parent `p` (if it exists) is now the
 	//     head of the stack.
-	//     - If `p` has only one child, this child is must be `n`.
+	//     - If `p` has only one child, this child must be `n`.
 	//       Therefore, by recalling `n`, we have recalled all ancestors of `p`.
 	//     - If `n` is the right child, we haven already searched through all of `p`
 	//       descendents (as the `p.LeftChild` must have been searched before)
@@ -53,6 +53,15 @@ type NodeIterator struct {
 	// This has the advantage, that we gracefully handle tries whose root node is nil.
 	unprocessedRoot *node.Node
 	stack           []*node.Node
+	// visitedNodes are nodes that were visited and can be skipped during
+	// traversal through dig(). visitedNodes is used to optimize node traveral
+	// IN FOREST by skipping nodes in shared sub-tries after they are visited,
+	// because sub-tries are shared between tries (original MTrie before register updates
+	// and updated MTrie after register writes).
+	// NodeIterator only uses visitedNodes for read operation.
+	// No special handling is needed if visitedNodes is nil.
+	// WARNING: visitedNodes is not safe for concurrent use.
+	visitedNodes map[*node.Node]uint64
 }
 
 // NewNodeIterator returns a node NodeIterator, which iterates through all nodes
@@ -75,6 +84,30 @@ func NewNodeIterator(mTrie *trie.MTrie) *NodeIterator {
 	return i
 }
 
+// NewUniqueNodeIterator returns a node NodeIterator, which iterates through all unique nodes
+// that weren't visited.  This should be used for forest node iteration to avoid repeatedly
+// traversing shared sub-tries.
+// The Iterator guarantees a DESCENDANTS-FIRST-RELATIONSHIP in the sequence of nodes it generates:
+//   * Consider the sequence of nodes, in the order they are generated by NodeIterator.
+//     Let `node[k]` denote the node with index `k` in this sequence.
+//   * Descendents-First-Relationship means that for any `node[k]`, all its descendents
+//     have indices strictly smaller than k in the iterator's sequence.
+// The Descendents-First-Relationship has the following important property:
+// When re-building the Trie from the sequence of nodes, one can build the trie on the fly,
+// as for each node, the children have been previously encountered.
+// WARNING: visitedNodes is not safe for concurrent use.
+func NewUniqueNodeIterator(mTrie *trie.MTrie, visitedNodes map[*node.Node]uint64) *NodeIterator {
+	// For a Trie with height H (measured by number of edges), the longest possible path
+	// contains H+1 vertices.
+	stackSize := ledger.NodeMaxHeight + 1
+	i := &NodeIterator{
+		stack:        make([]*node.Node, 0, stackSize),
+		visitedNodes: visitedNodes,
+	}
+	i.unprocessedRoot = mTrie.RootNode()
+	return i
+}
+
 func (i *NodeIterator) Next() bool {
 	if i.unprocessedRoot != nil {
 		// initial call to Next() for a non-empty trie
@@ -125,15 +158,22 @@ func (i *NodeIterator) dig(n *node.Node) {
 	if n == nil {
 		return
 	}
+	if _, found := i.visitedNodes[n]; found {
+		return
+	}
 	for {
 		i.stack = append(i.stack, n)
 		if lChild := n.LeftChild(); lChild != nil {
-			n = lChild
-			continue
+			if _, found := i.visitedNodes[lChild]; !found {
+				n = lChild
+				continue
+			}
 		}
 		if rChild := n.RightChild(); rChild != nil {
-			n = rChild
-			continue
+			if _, found := i.visitedNodes[rChild]; !found {
+				n = rChild
+				continue
+			}
 		}
 		return
 	}
diff --git a/ledger/complete/mtrie/flattener/iterator_test.go b/ledger/complete/mtrie/flattener/iterator_test.go
index 8fda1ca4dc7..a80d40ca74b 100644
--- a/ledger/complete/mtrie/flattener/iterator_test.go
+++ b/ledger/complete/mtrie/flattener/iterator_test.go
@@ -9,6 +9,7 @@ import (
 	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/common/utils"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 )
 
@@ -73,3 +74,193 @@ func TestPopulatedTrie(t *testing.T) {
 	require.False(t, itr.Next())
 	require.True(t, nil == itr.Value())
 }
+
+func TestUniqueNodeIterator(t *testing.T) {
+	t.Run("empty trie", func(t *testing.T) {
+		emptyTrie := trie.NewEmptyMTrie()
+
+		// visitedNodes is nil
+		itr := flattener.NewUniqueNodeIterator(emptyTrie, nil)
+		require.False(t, itr.Next())
+		require.True(t, nil == itr.Value()) // initial iterator should return nil
+
+		// visitedNodes is empty map
+		visitedNodes := make(map[*node.Node]uint64)
+		itr = flattener.NewUniqueNodeIterator(emptyTrie, visitedNodes)
+		require.False(t, itr.Next())
+		require.True(t, nil == itr.Value()) // initial iterator should return nil
+	})
+
+	t.Run("trie", func(t *testing.T) {
+		emptyTrie := trie.NewEmptyMTrie()
+
+		// key: 0000...
+		p1 := utils.PathByUint8(1)
+		v1 := utils.LightPayload8('A', 'a')
+
+		// key: 0100....
+		p2 := utils.PathByUint8(64)
+		v2 := utils.LightPayload8('B', 'b')
+
+		paths := []ledger.Path{p1, p2}
+		payloads := []ledger.Payload{*v1, *v2}
+
+		updatedTrie, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
+		require.NoError(t, err)
+
+		//              n4
+		//             /
+		//            /
+		//          n3
+		//        /     \
+		//      /         \
+		//   n1 (p1/v1)     n2 (p2/v2)
+		//
+
+		expectedNodes := []*node.Node{
+			updatedTrie.RootNode().LeftChild().LeftChild(),  // n1
+			updatedTrie.RootNode().LeftChild().RightChild(), // n2
+			updatedTrie.RootNode().LeftChild(),              // n3
+			updatedTrie.RootNode(),                          // n4
+		}
+
+		// visitedNodes is nil
+		i := 0
+		for itr := flattener.NewUniqueNodeIterator(updatedTrie, nil); itr.Next(); {
+			n := itr.Value()
+			require.True(t, i < len(expectedNodes))
+			require.Equal(t, expectedNodes[i], n)
+			i++
+		}
+		require.Equal(t, i, len(expectedNodes))
+
+		// visitedNodes is not nil, but it's pointless for iterating a single trie because
+		// there isn't any shared sub-trie.
+		visitedNodes := make(map[*node.Node]uint64)
+		i = 0
+		for itr := flattener.NewUniqueNodeIterator(updatedTrie, visitedNodes); itr.Next(); {
+			n := itr.Value()
+			visitedNodes[n] = uint64(i)
+
+			require.True(t, i < len(expectedNodes))
+			require.Equal(t, expectedNodes[i], n)
+			i++
+		}
+		require.Equal(t, i, len(expectedNodes))
+	})
+
+	t.Run("forest", func(t *testing.T) {
+
+		// Forest is a slice of mtries to guarantee order.
+		f := make([]*trie.MTrie, 0)
+
+		emptyTrie := trie.NewEmptyMTrie()
+
+		// key: 0000...
+		p1 := utils.PathByUint8(1)
+		v1 := utils.LightPayload8('A', 'a')
+
+		// key: 0100....
+		p2 := utils.PathByUint8(64)
+		v2 := utils.LightPayload8('B', 'b')
+
+		paths := []ledger.Path{p1, p2}
+		payloads := []ledger.Payload{*v1, *v2}
+
+		trie1, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
+		require.NoError(t, err)
+
+		f = append(f, trie1)
+
+		//              n4
+		//             /
+		//            /
+		//          n3
+		//        /     \
+		//      /         \
+		//   n1 (p1/v1)     n2 (p2/v2)
+		//
+
+		// New trie reuses its parent's left sub-trie.
+
+		// key: 1000...
+		p3 := utils.PathByUint8(128)
+		v3 := utils.LightPayload8('C', 'c')
+
+		// key: 1100....
+		p4 := utils.PathByUint8(192)
+		v4 := utils.LightPayload8('D', 'd')
+
+		paths = []ledger.Path{p3, p4}
+		payloads = []ledger.Payload{*v3, *v4}
+
+		trie2, err := trie.NewTrieWithUpdatedRegisters(trie1, paths, payloads, true)
+		require.NoError(t, err)
+
+		f = append(f, trie2)
+
+		//              n8
+		//             /   \
+		//            /      \
+		//          n3       n7
+		//       (shared)   /   \
+		//                /       \
+		//              n5         n6
+		//            (p3/v3)    (p4/v4)
+
+		// New trie reuses its parent's right sub-trie, and left sub-trie's leaf node.
+
+		// key: 0000...
+		v5 := utils.LightPayload8('E', 'e')
+
+		paths = []ledger.Path{p1}
+		payloads = []ledger.Payload{*v5}
+
+		trie3, err := trie.NewTrieWithUpdatedRegisters(trie2, paths, payloads, true)
+		require.NoError(t, err)
+
+		f = append(f, trie3)
+
+		//              n11
+		//             /   \
+		//            /      \
+		//          n10       n7
+		//         /   \    (shared)
+		//       /       \
+		//     n9         n2
+		//  (p1/v5)    (shared)
+
+		expectedNodes := []*node.Node{
+			// unique nodes from trie1
+			trie1.RootNode().LeftChild().LeftChild(),  // n1
+			trie1.RootNode().LeftChild().RightChild(), // n2
+			trie1.RootNode().LeftChild(),              // n3
+			trie1.RootNode(),                          // n4
+			// unique nodes from trie2
+			trie2.RootNode().RightChild().LeftChild(),  // n5
+			trie2.RootNode().RightChild().RightChild(), // n6
+			trie2.RootNode().RightChild(),              // n7
+			trie2.RootNode(),                           // n8
+			// unique nodes from trie3
+			trie3.RootNode().LeftChild().LeftChild(), // n9
+			trie3.RootNode().LeftChild(),             // n10
+			trie3.RootNode(),                         // n11
+
+		}
+
+		// Use visitedNodes to prevent revisiting shared sub-tries.
+		visitedNodes := make(map[*node.Node]uint64)
+		i := 0
+		for _, trie := range f {
+			for itr := flattener.NewUniqueNodeIterator(trie, visitedNodes); itr.Next(); {
+				n := itr.Value()
+				visitedNodes[n] = uint64(i)
+
+				require.True(t, i < len(expectedNodes))
+				require.Equal(t, expectedNodes[i], n)
+				i++
+			}
+		}
+		require.Equal(t, i, len(expectedNodes))
+	})
+}

From dfafbd0f507e662c53b66190e434fa3d613df6ef Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 1 Feb 2022 14:59:01 -0600
Subject: [PATCH 02/37] Optimize FlattenForest() with NewUniqueNodeIterator

Use NewUniqueNodeIterator() in FlattenForest() to skip traversing
visited shared sub-trie while flattening forest.
---
 ledger/complete/mtrie/flattener/forest.go | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/ledger/complete/mtrie/flattener/forest.go b/ledger/complete/mtrie/flattener/forest.go
index c2a7ac72f98..f9628eb9671 100644
--- a/ledger/complete/mtrie/flattener/forest.go
+++ b/ledger/complete/mtrie/flattener/forest.go
@@ -49,18 +49,15 @@ func FlattenForest(f *mtrie.Forest) (*FlattenedForest, error) {
 
 	counter := uint64(1) // start from 1, as 0 marks nil
 	for _, t := range tries {
-		for itr := NewNodeIterator(t); itr.Next(); {
+		for itr := NewUniqueNodeIterator(t, allNodes); itr.Next(); {
 			n := itr.Value()
-			// if node not in map
-			if _, has := allNodes[n]; !has {
-				allNodes[n] = counter
-				counter++
-				storableNode, err := toStorableNode(n, allNodes)
-				if err != nil {
-					return nil, fmt.Errorf("failed to construct storable node: %w", err)
-				}
-				storableNodes = append(storableNodes, storableNode)
+			allNodes[n] = counter
+			counter++
+			storableNode, err := toStorableNode(n, allNodes)
+			if err != nil {
+				return nil, fmt.Errorf("failed to construct storable node: %w", err)
 			}
+			storableNodes = append(storableNodes, storableNode)
 		}
 		//fix root nodes indices
 		// since we indexed all nodes, root must be present

From f185fd93e24bf1323a2feb8661039cca03d8a629 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 2 Feb 2022 08:24:47 -0600
Subject: [PATCH 03/37] Skip StorableNode/StorableTrie for new checkpoint

- Merge FlattenForest() with StoreCheckpoint() to iterate and serialize
nodes without creating intermediate StorableNode/StorableTrie objects.

- Stream encode nodes to avoid creating 400+ million element slice
  holding all nodes.

- Change checkpoint file format (v4) to store node count and trie count
at the footer (instead of header) required for stream encoding.

- Support previous checkpoint formats (v1, v3).
---
 ledger/complete/ledger.go                     |   8 +-
 ledger/complete/mtrie/flattener/encoding.go   |  56 ++--
 .../complete/mtrie/flattener/encoding_test.go |  33 ++-
 ledger/complete/wal/checkpointer.go           | 262 +++++++++++++++---
 ledger/complete/wal/checkpointer_test.go      |  76 +++--
 .../wal/checkpointer_versioning_test.go       | 132 +++++++++
 ledger/complete/wal/test_data/checkpoint.v3   | Bin 0 -> 1249 bytes
 7 files changed, 469 insertions(+), 98 deletions(-)
 create mode 100644 ledger/complete/wal/test_data/checkpoint.v3

diff --git a/ledger/complete/ledger.go b/ledger/complete/ledger.go
index 79a7b8d69da..dc39a29e660 100644
--- a/ledger/complete/ledger.go
+++ b/ledger/complete/ledger.go
@@ -12,7 +12,6 @@ import (
 	"github.com/onflow/flow-go/ledger/common/hash"
 	"github.com/onflow/flow-go/ledger/common/pathfinder"
 	"github.com/onflow/flow-go/ledger/complete/mtrie"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 	"github.com/onflow/flow-go/ledger/complete/wal"
 	"github.com/onflow/flow-go/module"
@@ -359,14 +358,9 @@ func (l *Ledger) ExportCheckpointAt(
 		return ledger.State(hash.DummyHash), fmt.Errorf("failed to create a checkpoint writer: %w", err)
 	}
 
-	flatTrie, err := flattener.FlattenTrie(newTrie)
-	if err != nil {
-		return ledger.State(hash.DummyHash), fmt.Errorf("failed to flatten the trie: %w", err)
-	}
-
 	l.logger.Info().Msg("storing the checkpoint to the file")
 
-	err = wal.StoreCheckpoint(flatTrie.ToFlattenedForestWithASingleTrie(), writer)
+	err = wal.StoreCheckpoint(writer, newTrie)
 	if err != nil {
 		return ledger.State(hash.DummyHash), fmt.Errorf("failed to store the checkpoint: %w", err)
 	}
diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index efc34137190..1e7a888dae3 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -4,42 +4,57 @@ import (
 	"fmt"
 	"io"
 
+	"github.com/onflow/flow-go/ledger"
+	"github.com/onflow/flow-go/ledger/common/encoding"
 	"github.com/onflow/flow-go/ledger/common/utils"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 )
 
 const encodingDecodingVersion = uint16(0)
 
-// EncodeStorableNode encodes StorableNode
-func EncodeStorableNode(storableNode *StorableNode) []byte {
+// EncodeNode encodes node.
+// TODO: reuse buffer
+func EncodeNode(n *node.Node, lchildIndex uint64, rchildIndex uint64) []byte {
+
+	encPayload := encoding.EncodePayload(n.Payload())
+
+	length := 2 + 2 + 8 + 8 + 2 + 8 + 2 + len(n.Path()) + 4 + len(encPayload) + 2 + len(n.Hash())
 
-	length := 2 + 2 + 8 + 8 + 2 + 8 + 2 + len(storableNode.Path) + 4 + len(storableNode.EncPayload) + 2 + len(storableNode.HashValue)
 	buf := make([]byte, 0, length)
+
 	// 2-bytes encoding version
 	buf = utils.AppendUint16(buf, encodingDecodingVersion)
 
 	// 2-bytes Big Endian uint16 height
-	buf = utils.AppendUint16(buf, storableNode.Height)
+	buf = utils.AppendUint16(buf, uint16(n.Height()))
 
 	// 8-bytes Big Endian uint64 LIndex
-	buf = utils.AppendUint64(buf, storableNode.LIndex)
+	buf = utils.AppendUint64(buf, lchildIndex)
 
 	// 8-bytes Big Endian uint64 RIndex
-	buf = utils.AppendUint64(buf, storableNode.RIndex)
+	buf = utils.AppendUint64(buf, rchildIndex)
 
 	// 2-bytes Big Endian maxDepth
-	buf = utils.AppendUint16(buf, storableNode.MaxDepth)
+	buf = utils.AppendUint16(buf, n.MaxDepth())
 
 	// 8-bytes Big Endian regCount
-	buf = utils.AppendUint64(buf, storableNode.RegCount)
+	buf = utils.AppendUint64(buf, n.RegCount())
 
 	// 2-bytes Big Endian uint16 encoded path length and n-bytes encoded path
-	buf = utils.AppendShortData(buf, storableNode.Path)
+	path := n.Path()
+	if path != nil {
+		buf = utils.AppendShortData(buf, path[:])
+	} else {
+		buf = utils.AppendShortData(buf, nil)
+	}
 
 	// 4-bytes Big Endian uint32 encoded payload length and n-bytes encoded payload
-	buf = utils.AppendLongData(buf, storableNode.EncPayload)
+	buf = utils.AppendLongData(buf, encPayload)
 
 	// 2-bytes Big Endian uint16 hashValue length and n-bytes hashValue
-	buf = utils.AppendShortData(buf, storableNode.HashValue)
+	hash := n.Hash()
+	buf = utils.AppendShortData(buf, hash[:])
 
 	return buf
 }
@@ -122,18 +137,27 @@ func ReadStorableNode(reader io.Reader) (*StorableNode, error) {
 	return storableNode, nil
 }
 
-// EncodeStorableTrie encodes StorableTrie
-func EncodeStorableTrie(storableTrie *StorableTrie) []byte {
-	length := 2 + 8 + 2 + len(storableTrie.RootHash)
+// EncodeTrie encodes trie root node
+// TODO: reuse buffer
+func EncodeTrie(rootNode *node.Node, rootIndex uint64) []byte {
+	// Get root hash
+	var rootHash ledger.RootHash
+	if rootNode == nil {
+		rootHash = trie.EmptyTrieRootHash()
+	} else {
+		rootHash = ledger.RootHash(rootNode.Hash())
+	}
+
+	length := 2 + 8 + 2 + len(rootHash)
 	buf := make([]byte, 0, length)
 	// 2-bytes encoding version
 	buf = utils.AppendUint16(buf, encodingDecodingVersion)
 
 	// 8-bytes Big Endian uint64 RootIndex
-	buf = utils.AppendUint64(buf, storableTrie.RootIndex)
+	buf = utils.AppendUint64(buf, rootIndex)
 
 	// 2-bytes Big Endian uint16 RootHash length and n-bytes RootHash
-	buf = utils.AppendShortData(buf, storableTrie.RootHash)
+	buf = utils.AppendShortData(buf, rootHash[:])
 
 	return buf
 }
diff --git a/ledger/complete/mtrie/flattener/encoding_test.go b/ledger/complete/mtrie/flattener/encoding_test.go
index f1c2657371e..d4c790fb52e 100644
--- a/ledger/complete/mtrie/flattener/encoding_test.go
+++ b/ledger/complete/mtrie/flattener/encoding_test.go
@@ -7,21 +7,28 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
+	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/common/encoding"
+	"github.com/onflow/flow-go/ledger/common/hash"
 	"github.com/onflow/flow-go/ledger/common/utils"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
 )
 
 func TestStorableNode(t *testing.T) {
 	path := utils.PathByUint8(3)
+	payload := utils.LightPayload8('A', 'a')
+	hashValue := hash.Hash([32]byte{4, 4, 4})
+
+	n := node.NewNode(2137, nil, nil, path, payload, hashValue, 7, 5000)
 
 	storableNode := &flattener.StorableNode{
 		LIndex:     1,
 		RIndex:     2,
 		Height:     2137,
 		Path:       path[:],
-		EncPayload: encoding.EncodePayload(utils.LightPayload8('A', 'a')),
-		HashValue:  []byte{4, 4, 4},
+		EncPayload: encoding.EncodePayload(payload),
+		HashValue:  hashValue[:],
 		MaxDepth:   7,
 		RegCount:   5000,
 	}
@@ -39,12 +46,15 @@ func TestStorableNode(t *testing.T) {
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // path data
 		0, 0, 0, 25, // payload data len
 		0, 0, 6, 0, 0, 0, 9, 0, 1, 0, 0, 0, 3, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 1, 97, // payload data
-		0, 3, // hashValue length
-		4, 4, 4, // hashValue
+		0, 32, // hashValue length
+		4, 4, 4, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, // hashValue
 	}
 
 	t.Run("encode", func(t *testing.T) {
-		data := flattener.EncodeStorableNode(storableNode)
+		data := flattener.EncodeNode(n, 1, 2)
 		assert.Equal(t, expected, data)
 	})
 
@@ -57,21 +67,28 @@ func TestStorableNode(t *testing.T) {
 }
 
 func TestStorableTrie(t *testing.T) {
+	hashValue := hash.Hash([32]byte{2, 2, 2})
+
+	rootNode := node.NewNode(256, nil, nil, ledger.DummyPath, nil, hashValue, 7, 5000)
 
 	storableTrie := &flattener.StorableTrie{
 		RootIndex: 21,
-		RootHash:  []byte{2, 2, 2},
+		RootHash:  hashValue[:],
 	}
 
 	// Version 0
 	expected := []byte{
 		0, 0, // encoding version
 		0, 0, 0, 0, 0, 0, 0, 21, // RootIndex
-		0, 3, 2, 2, 2, // RootHash length + data
+		0, 32, // RootHash length
+		2, 2, 2, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, // RootHash data
 	}
 
 	t.Run("encode", func(t *testing.T) {
-		data := flattener.EncodeStorableTrie(storableTrie)
+		data := flattener.EncodeTrie(rootNode, 21)
 
 		assert.Equal(t, expected, data)
 	})
diff --git a/ledger/complete/wal/checkpointer.go b/ledger/complete/wal/checkpointer.go
index 0b753476a39..e115293fda9 100644
--- a/ledger/complete/wal/checkpointer.go
+++ b/ledger/complete/wal/checkpointer.go
@@ -3,6 +3,7 @@ package wal
 import (
 	"bufio"
 	"encoding/binary"
+	"encoding/hex"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -15,6 +16,7 @@ import (
 	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/complete/mtrie"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 	"github.com/onflow/flow-go/model/bootstrap"
 	"github.com/onflow/flow-go/module/metrics"
@@ -30,6 +32,10 @@ const VersionV1 uint16 = 0x01
 // Version 3 contains a file checksum for detecting corrupted checkpoint files.
 const VersionV3 uint16 = 0x03
 
+// Version 4 contains a footer with node count and trie count (previously in the header).
+// Version 4 also reduces checkpoint data size.  See EncodeNode() and EncodeTrie() for more details.
+const VersionV4 uint16 = 0x04
+
 type Checkpointer struct {
 	dir            string
 	wal            *DiskWAL
@@ -185,11 +191,9 @@ func (c *Checkpointer) Checkpoint(to int, targetWriter func() (io.WriteCloser, e
 		return fmt.Errorf("cannot replay WAL: %w", err)
 	}
 
-	c.wal.log.Info().Msgf("flattening forest for checkpoint %d", to)
-
-	forestSequencing, err := flattener.FlattenForest(forest)
+	tries, err := forest.GetTries()
 	if err != nil {
-		return fmt.Errorf("cannot get storables: %w", err)
+		return fmt.Errorf("cannot get forest tries: %w", err)
 	}
 
 	c.wal.log.Info().Msgf("serializing checkpoint %d", to)
@@ -206,7 +210,7 @@ func (c *Checkpointer) Checkpoint(to int, targetWriter func() (io.WriteCloser, e
 		}
 	}()
 
-	err = StoreCheckpoint(forestSequencing, writer)
+	err = StoreCheckpoint(writer, tries...)
 
 	return err
 }
@@ -251,40 +255,102 @@ func CreateCheckpointWriterForFile(dir, filename string) (io.WriteCloser, error)
 }
 
 // StoreCheckpoint writes the given checkpoint to disk, and also append with a CRC32 file checksum for integrity check.
-func StoreCheckpoint(forestSequencing *flattener.FlattenedForest, writer io.Writer) error {
-	storableNodes := forestSequencing.Nodes
-	storableTries := forestSequencing.Tries
-	header := make([]byte, 4+8+2)
+func StoreCheckpoint(writer io.Writer, tries ...*trie.MTrie) error {
+
+	var err error
 
 	crc32Writer := NewCRC32Writer(writer)
 
+	// Write header: magic (2 bytes) + version (2 bytes)
+	header := make([]byte, 4)
 	pos := writeUint16(header, 0, MagicBytes)
-	pos = writeUint16(header, pos, VersionV3)
-	pos = writeUint64(header, pos, uint64(len(storableNodes)-1)) // -1 to account for 0 node meaning nil
-	writeUint16(header, pos, uint16(len(storableTries)))
+	_ = writeUint16(header, pos, VersionV4)
 
-	_, err := crc32Writer.Write(header)
+	_, err = crc32Writer.Write(header)
 	if err != nil {
 		return fmt.Errorf("cannot write checkpoint header: %w", err)
 	}
 
-	// 0 element = nil, we don't need to store it
-	for i := 1; i < len(storableNodes); i++ {
-		bytes := flattener.EncodeStorableNode(storableNodes[i])
-		_, err = crc32Writer.Write(bytes)
-		if err != nil {
-			return fmt.Errorf("error while writing node date: %w", err)
+	// assign unique value to every node
+	allNodes := make(map[*node.Node]uint64)
+	allNodes[nil] = 0 // 0th element is nil
+
+	allRootNodes := make([]*node.Node, len(tries))
+
+	// Serialize all unique nodes
+	nodeCounter := uint64(1) // start from 1, as 0 marks nil
+	for i, t := range tries {
+
+		// Traverse all unique nodes for trie t.
+		for itr := flattener.NewUniqueNodeIterator(t, allNodes); itr.Next(); {
+			n := itr.Value()
+
+			allNodes[n] = nodeCounter
+			nodeCounter++
+
+			var lchildIndex, rchildIndex uint64
+
+			if lchild := n.LeftChild(); lchild != nil {
+				var found bool
+				lchildIndex, found = allNodes[lchild]
+				if !found {
+					hash := lchild.Hash()
+					return fmt.Errorf("internal error: missing node with hash %s", hex.EncodeToString(hash[:]))
+				}
+			}
+			if rchild := n.RightChild(); rchild != nil {
+				var found bool
+				rchildIndex, found = allNodes[rchild]
+				if !found {
+					hash := rchild.Hash()
+					return fmt.Errorf("internal error: missing node with hash %s", hex.EncodeToString(hash[:]))
+				}
+			}
+
+			// TODO: reuse scratch buffer for encoding
+			bytes := flattener.EncodeNode(n, lchildIndex, rchildIndex)
+			_, err = crc32Writer.Write(bytes)
+			if err != nil {
+				return fmt.Errorf("error while writing node data: %w", err)
+			}
 		}
+
+		// Save trie root for serialization later.
+		allRootNodes[i] = t.RootNode()
 	}
 
-	for _, storableTrie := range storableTries {
-		bytes := flattener.EncodeStorableTrie(storableTrie)
+	// Serialize trie root nodes
+	for _, rootNode := range allRootNodes {
+		// Get root node index
+		rootIndex, found := allNodes[rootNode]
+		if !found {
+			var rootHash ledger.RootHash
+			if rootNode == nil {
+				rootHash = trie.EmptyTrieRootHash()
+			} else {
+				rootHash = ledger.RootHash(rootNode.Hash())
+			}
+			return fmt.Errorf("internal error: missing node with hash %s", hex.EncodeToString(rootHash[:]))
+		}
+
+		// TODO: reuse scratch buffer for encoding
+		bytes := flattener.EncodeTrie(rootNode, rootIndex)
 		_, err = crc32Writer.Write(bytes)
 		if err != nil {
-			return fmt.Errorf("error while writing trie date: %w", err)
+			return fmt.Errorf("error while writing trie data: %w", err)
 		}
 	}
 
+	// Write footer with nodes count and tries count
+	footer := make([]byte, 10)
+	pos = writeUint64(footer, 0, uint64(len(allNodes)-1)) // -1 to account for 0 node meaning nil
+	writeUint16(footer, pos, uint16(len(allRootNodes)))
+
+	_, err = crc32Writer.Write(footer)
+	if err != nil {
+		return fmt.Errorf("cannot write checkpoint footer: %w", err)
+	}
+
 	// add CRC32 sum
 	crc32buf := make([]byte, 4)
 	writeUint32(crc32buf, 0, crc32Writer.Crc32())
@@ -330,38 +396,71 @@ func LoadCheckpoint(filepath string) (*flattener.FlattenedForest, error) {
 		_ = file.Close()
 	}()
 
-	return ReadCheckpoint(file)
+	return readCheckpoint(file)
 }
 
-func ReadCheckpoint(r io.Reader) (*flattener.FlattenedForest, error) {
-
-	var bufReader io.Reader = bufio.NewReader(r)
-	crcReader := NewCRC32Reader(bufReader)
-	var reader io.Reader = crcReader
-
-	header := make([]byte, 4+8+2)
-
-	_, err := io.ReadFull(reader, header)
+func readCheckpoint(f *os.File) (*flattener.FlattenedForest, error) {
+	// Read header: magic (2 bytes) + version (2 bytes)
+	header := make([]byte, 4)
+	_, err := io.ReadFull(f, header)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read header bytes: %w", err)
 	}
 
 	magicBytes, pos := readUint16(header, 0)
-	version, pos := readUint16(header, pos)
-	nodesCount, pos := readUint64(header, pos)
-	triesCount, _ := readUint16(header, pos)
+	version, _ := readUint16(header, pos)
 
 	if magicBytes != MagicBytes {
 		return nil, fmt.Errorf("unknown file format. Magic constant %x does not match expected %x", magicBytes, MagicBytes)
 	}
-	if version != VersionV1 && version != VersionV3 {
+
+	// Reset offset
+	_, err = f.Seek(0, io.SeekStart)
+	if err != nil {
+		return nil, fmt.Errorf("cannot seek to start of file: %w", err)
+	}
+
+	switch version {
+	case VersionV1, VersionV3:
+		return readCheckpointV3AndEarlier(f, version)
+	case VersionV4:
+		return readCheckpointV4(f)
+	default:
 		return nil, fmt.Errorf("unsupported file version %x ", version)
 	}
+}
+
+// readCheckpointV3AndEarlier deserializes checkpoint file (version 3 and earlier) and returns flattened forest.
+// Header (magic and version) are verified by the caller.
+// TODO: return []*trie.MTrie directly without conversion to FlattenedForest.
+func readCheckpointV3AndEarlier(f *os.File, version uint16) (*flattener.FlattenedForest, error) {
+
+	var bufReader io.Reader = bufio.NewReader(f)
+	crcReader := NewCRC32Reader(bufReader)
+
+	var reader io.Reader
 
 	if version != VersionV3 {
-		reader = bufReader //switch back to plain reader
+		reader = bufReader
+	} else {
+		reader = crcReader
+	}
+
+	// Header has: magic (2 bytes) + version (2 bytes) + node count (8 bytes) + trie count (2 bytes)
+	header := make([]byte, 2+2+8+2)
+
+	_, err := io.ReadFull(reader, header)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read header bytes: %w", err)
 	}
 
+	// Magic and version are verified by the caller.
+
+	// Get node count and trie count
+	const nodesCountOffset = 2 + 2
+	nodesCount, pos := readUint64(header, nodesCountOffset)
+	triesCount, _ := readUint16(header, pos)
+
 	nodes := make([]*flattener.StorableNode, nodesCount+1) //+1 for 0 index meaning nil
 	tries := make([]*flattener.StorableTrie, triesCount)
 
@@ -404,6 +503,95 @@ func ReadCheckpoint(r io.Reader) (*flattener.FlattenedForest, error) {
 
 }
 
+// readCheckpointV4 deserializes checkpoint file (version 4) and returns flattened forest.
+// Checkpoint file header (magic and version) are verified by the caller.
+func readCheckpointV4(f *os.File) (*flattener.FlattenedForest, error) {
+
+	// Read footer to get node count and trie count
+
+	// footer offset: nodes count (8 bytes) + tries count (2 bytes) + CRC32 sum (4 bytes)
+	const footerOffset = 8 + 2 + 4
+	const footerSize = 8 + 2 // footer doesn't include crc32 sum
+
+	_, err := f.Seek(-footerOffset, io.SeekEnd)
+	if err != nil {
+		return nil, fmt.Errorf("cannot seek to footer: %w", err)
+	}
+
+	footer := make([]byte, footerSize)
+
+	_, err = io.ReadFull(f, footer)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read footer bytes: %w", err)
+	}
+
+	nodesCount, pos := readUint64(footer, 0)
+	triesCount, _ := readUint16(footer, pos)
+
+	// Seek to the start of file
+	_, err = f.Seek(0, io.SeekStart)
+	if err != nil {
+		return nil, fmt.Errorf("cannot seek to start of file: %w", err)
+	}
+
+	var bufReader io.Reader = bufio.NewReader(f)
+	crcReader := NewCRC32Reader(bufReader)
+	var reader io.Reader = crcReader
+
+	// Read header: magic (2 bytes) + version (2 bytes)
+	// No action is needed for header because it is verified by the caller.
+	header := make([]byte, 4)
+
+	_, err = io.ReadFull(reader, header)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read header bytes: %w", err)
+	}
+
+	nodes := make([]*flattener.StorableNode, nodesCount+1) //+1 for 0 index meaning nil
+	tries := make([]*flattener.StorableTrie, triesCount)
+
+	for i := uint64(1); i <= nodesCount; i++ {
+		storableNode, err := flattener.ReadStorableNode(reader)
+		if err != nil {
+			return nil, fmt.Errorf("cannot read storable node %d: %w", i, err)
+		}
+		nodes[i] = storableNode
+	}
+
+	for i := uint16(0); i < triesCount; i++ {
+		storableTrie, err := flattener.ReadStorableTrie(reader)
+		if err != nil {
+			return nil, fmt.Errorf("cannot read storable trie %d: %w", i, err)
+		}
+		tries[i] = storableTrie
+	}
+
+	// Read footer again for crc32 computation
+	// No action is needed.
+	_, err = io.ReadFull(reader, footer)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read footer bytes: %w", err)
+	}
+
+	crc32buf := make([]byte, 4)
+	_, err = bufReader.Read(crc32buf)
+	if err != nil {
+		return nil, fmt.Errorf("error while reading CRC32 checksum: %w", err)
+	}
+	readCrc32, _ := readUint32(crc32buf, 0)
+
+	calculatedCrc32 := crcReader.Crc32()
+
+	if calculatedCrc32 != readCrc32 {
+		return nil, fmt.Errorf("checkpoint checksum failed! File contains %x but read data checksums to %x", readCrc32, calculatedCrc32)
+	}
+
+	return &flattener.FlattenedForest{
+		Nodes: nodes,
+		Tries: tries,
+	}, nil
+}
+
 func writeUint16(buffer []byte, location int, value uint16) int {
 	binary.BigEndian.PutUint16(buffer[location:], value)
 	return location + 2
diff --git a/ledger/complete/wal/checkpointer_test.go b/ledger/complete/wal/checkpointer_test.go
index ef7cb6d0ef0..4576cca97f3 100644
--- a/ledger/complete/wal/checkpointer_test.go
+++ b/ledger/complete/wal/checkpointer_test.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"math/rand"
 	"os"
 	"path"
@@ -503,45 +504,60 @@ func randomlyModifyFile(t *testing.T, filename string) {
 
 func Test_StoringLoadingCheckpoints(t *testing.T) {
 
-	// some hash will be literally copied into the output file
-	// so we can find it and modify - to make sure we get a different checksum
-	// but not fail process by, for example, modifying saved data length causing EOF
-	someHash := []byte{22, 22, 22}
-	forest := &flattener.FlattenedForest{
-		Nodes: []*flattener.StorableNode{
-			{}, {},
-		},
-		Tries: []*flattener.StorableTrie{
-			{}, {
-				RootHash: someHash,
-			},
-		},
-	}
-	buffer := &bytes.Buffer{}
+	unittest.RunWithTempDir(t, func(dir string) {
+		// some hash will be literally encoded in output file
+		// so we can find it and modify - to make sure we get a different checksum
+		// but not fail process by, for example, modifying saved data length causing EOF
 
-	err := realWAL.StoreCheckpoint(forest, buffer)
-	require.NoError(t, err)
+		emptyTrie := trie.NewEmptyMTrie()
 
-	// copy buffer data
-	bytes2 := buffer.Bytes()[:]
+		p1 := utils.PathByUint8(0)
+		v1 := utils.LightPayload8('A', 'a')
 
-	t.Run("works without data modification", func(t *testing.T) {
+		p2 := utils.PathByUint8(1)
+		v2 := utils.LightPayload8('B', 'b')
 
-		// first buffer reads ok
-		_, err = realWAL.ReadCheckpoint(buffer)
+		paths := []ledger.Path{p1, p2}
+		payloads := []ledger.Payload{*v1, *v2}
+
+		updatedTrie, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
 		require.NoError(t, err)
-	})
 
-	t.Run("detects modified data", func(t *testing.T) {
+		someHash := updatedTrie.RootNode().LeftChild().Hash() // Hash of left child
 
-		index := bytes.Index(bytes2, someHash)
-		bytes2[index] = 23
+		file, err := ioutil.TempFile(dir, "temp-checkpoint")
+		filepath := file.Name()
+		require.NoError(t, err)
 
-		_, err = realWAL.ReadCheckpoint(bytes.NewBuffer(bytes2))
-		require.Error(t, err)
-		require.Contains(t, err.Error(), "checksum")
-	})
+		err = realWAL.StoreCheckpoint(file, updatedTrie)
+		require.NoError(t, err)
+
+		file.Close()
+
+		t.Run("works without data modification", func(t *testing.T) {
+
+			// first buffer reads ok
+			_, err = realWAL.LoadCheckpoint(filepath)
+			require.NoError(t, err)
+		})
+
+		t.Run("detects modified data", func(t *testing.T) {
+
+			b, err := ioutil.ReadFile(filepath)
+			require.NoError(t, err)
+
+			index := bytes.Index(b, someHash[:])
+			require.NotEqual(t, -1, index)
+			b[index] = 23
 
+			err = os.WriteFile(filepath, b, 0644)
+			require.NoError(t, err)
+
+			_, err = realWAL.LoadCheckpoint(filepath)
+			require.Error(t, err)
+			require.Contains(t, err.Error(), "checksum")
+		})
+	})
 }
 
 func loadIntoForest(forest *mtrie.Forest, forestSequencing *flattener.FlattenedForest) error {
diff --git a/ledger/complete/wal/checkpointer_versioning_test.go b/ledger/complete/wal/checkpointer_versioning_test.go
index 9f2ead3e3a4..e5f86cedeca 100644
--- a/ledger/complete/wal/checkpointer_versioning_test.go
+++ b/ledger/complete/wal/checkpointer_versioning_test.go
@@ -1,10 +1,12 @@
 package wal
 
 import (
+	"encoding/hex"
 	"testing"
 
 	"github.com/stretchr/testify/require"
 
+	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
 )
 
@@ -50,3 +52,133 @@ func Test_LoadingV1Checkpoint(t *testing.T) {
 
 	require.Equal(t, v1Forest, forest)
 }
+
+func Test_LoadingV3Checkpoint(t *testing.T) {
+
+	forest, err := LoadCheckpoint("test_data/checkpoint.v3")
+	require.NoError(t, err)
+
+	expectedRootHash := [4]ledger.RootHash{
+		mustToHash("568f4ec740fe3b5de88034cb7b1fbddb41548b068f31aebc8ae9189e429c5749"), // empty trie root hash
+		mustToHash("f53f9696b85b7428227f1b39f40b2ce07c175f58dea2b86cb6f84dc7c9fbeabd"),
+		mustToHash("7ac8daf34733cce3d5d03b5a1afde33a572249f81c45da91106412e94661e109"),
+		mustToHash("63df641430e5e0745c3d99ece6ac209467ccfdb77e362e7490a830db8e8803ae"),
+	}
+
+	tries, err := flattener.RebuildTries(forest)
+	require.NoError(t, err)
+	require.Equal(t, len(expectedRootHash), len(tries))
+
+	for i, trie := range tries {
+		require.Equal(t, expectedRootHash[i], trie.RootHash())
+		require.True(t, trie.RootNode().VerifyCachedHash())
+	}
+}
+
+func mustToHash(s string) ledger.RootHash {
+	b, err := hex.DecodeString(s)
+	if err != nil {
+		panic(err)
+	}
+	h, err := ledger.ToRootHash(b)
+	if err != nil {
+		panic(err)
+	}
+	return h
+}
+
+/*
+// CreateCheckpointV3 is used to create checkpoint.v3 test file used by Test_LoadingV3Checkpoint.
+func CreateCheckpointV3() {
+
+	f, err := mtrie.NewForest(size*10, metricsCollector, func(tree *trie.MTrie) error { return nil })
+	require.NoError(t, err)
+
+	emptyTrie := trie.NewEmptyMTrie()
+
+	// key: 0000...
+	p1 := utils.PathByUint8(1)
+	v1 := utils.LightPayload8('A', 'a')
+
+	// key: 0100....
+	p2 := utils.PathByUint8(64)
+	v2 := utils.LightPayload8('B', 'b')
+
+	paths := []ledger.Path{p1, p2}
+	payloads := []ledger.Payload{*v1, *v2}
+
+	trie1, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
+	require.NoError(t, err)
+
+	f.AddTrie(trie1)
+
+	//              n4
+	//             /
+	//            /
+	//          n3
+	//        /     \
+	//      /         \
+	//   n1 (p1/v1)     n2 (p2/v2)
+	//
+
+	// New trie reuses its parent's left sub-trie.
+
+	// key: 1000...
+	p3 := utils.PathByUint8(128)
+	v3 := utils.LightPayload8('C', 'c')
+
+	// key: 1100....
+	p4 := utils.PathByUint8(192)
+	v4 := utils.LightPayload8('D', 'd')
+
+	paths = []ledger.Path{p3, p4}
+	payloads = []ledger.Payload{*v3, *v4}
+
+	trie2, err := trie.NewTrieWithUpdatedRegisters(trie1, paths, payloads, true)
+	require.NoError(t, err)
+
+	f.AddTrie(trie2)
+
+	//              n8
+	//             /   \
+	//            /      \
+	//          n3       n7
+	//       (shared)   /   \
+	//                /       \
+	//              n5         n6
+	//            (p3/v3)    (p4/v4)
+
+	// New trie reuses its parent's right sub-trie, and left sub-trie's leaf node.
+
+	// key: 0000...
+	v5 := utils.LightPayload8('E', 'e')
+
+	paths = []ledger.Path{p1}
+	payloads = []ledger.Payload{*v5}
+
+	trie3, err := trie.NewTrieWithUpdatedRegisters(trie2, paths, payloads, true)
+	require.NoError(t, err)
+
+	f.AddTrie(trie3)
+
+	//              n11
+	//             /   \
+	//            /      \
+	//          n10       n7
+	//         /   \    (shared)
+	//       /       \
+	//     n9         n2
+	//  (p1/v5)    (shared)
+
+	flattenedForest, err := flattener.FlattenForest(f)
+	require.NoError(t, err)
+
+	file, err := os.OpenFile("checkpoint.v3", os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
+	require.NoError(t, err)
+
+	err = realWAL.StoreCheckpoint(flattenedForest, file)
+	require.NoError(t, err)
+
+	file.Close()
+}
+*/
diff --git a/ledger/complete/wal/test_data/checkpoint.v3 b/ledger/complete/wal/test_data/checkpoint.v3
new file mode 100644
index 0000000000000000000000000000000000000000..e7745c797eb87b16b20830a96d0f3bbb0689b8f9
GIT binary patch
literal 1249
zcmY#1XJBT40B!~rAmbmDhe<IqC@^A*qKil}Ft7o|I2k}{fkrz*6)+|;D9l>?&Tns`
z=H+*Lncr1rWIFF%vwP3!MIFs8cU<}xw<w+mD#dHP1Jo=$6uR|JP_>Ln3<{AK!?z_r
z-4n~7{@o}=)cxd^3Fb#G@$g*}j1_t;#V&jjsPsQfIh1AsdjrH_f{G|GzkGMFf#+@3
z(sg;;Zm(}zZ!q1({|NuK4?Hdb%<dQNZDoLKfrl(YH{6u3_S2^Ah%V7ks+YF>!mab5
zMm#>^-l82j+kW^SKl%IBUZ8DwgQ5ZI96S_yP&h-?GA1)9*sK=~F@2{YT=3}hzPzg1
z!l^U)LjTMWTi+#@wnv!Xy9}rluk{D;*b9|Hx84OR&X~fWFnj4?mcF$+W|ZDYk6HKT
z%afI}{YnB3v<AdXJ#19&H35_mU@^f8)y4)7R(KeG`*g?kOW;~*qcf+bG|Y%lx_o3N
ze`8Dh(N9l3UOk%d5@;bjy~5HsJ3JmiX&nR<s!rVc>~4JK@zo2~QBr>&TZJom{*ZCK
zHBlf%=%riYLr$O?yg>oC18f{t0zD{Pp{f{D859(Hxk3(Jd&c(5F8|}1eY&A+HF<H{
z<WFf#w4L(PgKeE6P$?`ZKu!SxEQ#`FR^DG0_SF_C(c5m#bdSBxD<9}77W?4oN2$bx
ziCv1)NI}5`F@yn2P$b_^5ixlBpd`k2=9_106sDw~`MbT&Os{0Z3WM8y9n9;XIzf~|
zSij$Khkw?wFB(ivSIh6c?HJO{)^E6OPuEL{c}{b}J>hCu7!-&|>=5M~L`4TgEjNP#
LA@`Q@Pg(!~yyyq9

literal 0
HcmV?d00001


From 8b03a22c12f51cb1bcb41b2d6e97554b7025be5d Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 2 Feb 2022 16:55:42 -0600
Subject: [PATCH 04/37] Skip StorableNode/Trie when reading checkpoint

- Merge RebuildTries() with LoadCheckpoint() to deserialize data
to nodes without creating intermediate StorableNode/StorableTrie
objects.

- Avoid creating 400+ million element slice holding all StorableNodes
read from checkpoint file

- DiskWal.Replay*() APIs are changed.  checkpointFn receives
[]*trie.MTrie instead of FlattenedForest.

- Remove files contaning StorableNode/StorableTrie/FlattenedForest etc.
  * mtrie/flattener/forest.go
  * mtrie/flattener/forest_test.go
  * mtrie/flattener/storables.go
  * mtrie/flattener/trie.go
  * mtrie/flattener/trie_test.go
---
 cmd/util/cmd/checkpoint-list-tries/cmd.go     |   6 +-
 .../cmd/read-execution-state/list-wals/cmd.go |   4 +-
 ledger/complete/mtrie/flattener/encoding.go   | 119 ++++++++---
 .../complete/mtrie/flattener/encoding_test.go | 132 +++++++-----
 ledger/complete/mtrie/flattener/forest.go     | 191 ------------------
 .../complete/mtrie/flattener/forest_test.go   |  72 -------
 ledger/complete/mtrie/flattener/storables.go  |  18 --
 ledger/complete/mtrie/flattener/trie.go       |  74 -------
 ledger/complete/mtrie/flattener/trie_test.go  |  56 -----
 ledger/complete/wal/checkpointer.go           | 107 +++++-----
 ledger/complete/wal/checkpointer_test.go      |  25 +--
 .../wal/checkpointer_versioning_test.go       |  56 ++---
 ledger/complete/wal/compactor_test.go         |  19 +-
 ledger/complete/wal/fixtures/noopwal.go       |   6 +-
 ledger/complete/wal/test_data/checkpoint.v1   | Bin 122 -> 1245 bytes
 ledger/complete/wal/wal.go                    |  20 +-
 16 files changed, 272 insertions(+), 633 deletions(-)
 delete mode 100644 ledger/complete/mtrie/flattener/forest.go
 delete mode 100644 ledger/complete/mtrie/flattener/forest_test.go
 delete mode 100644 ledger/complete/mtrie/flattener/storables.go
 delete mode 100644 ledger/complete/mtrie/flattener/trie.go
 delete mode 100644 ledger/complete/mtrie/flattener/trie_test.go

diff --git a/cmd/util/cmd/checkpoint-list-tries/cmd.go b/cmd/util/cmd/checkpoint-list-tries/cmd.go
index 105f7408fb7..bfad7c18bef 100644
--- a/cmd/util/cmd/checkpoint-list-tries/cmd.go
+++ b/cmd/util/cmd/checkpoint-list-tries/cmd.go
@@ -28,12 +28,12 @@ func init() {
 
 func run(*cobra.Command, []string) {
 
-	flattenedForest, err := wal.LoadCheckpoint(flagCheckpoint)
+	tries, err := wal.LoadCheckpoint(flagCheckpoint)
 	if err != nil {
 		log.Fatal().Err(err).Msg("error while loading checkpoint")
 	}
 
-	for _, trie := range flattenedForest.Tries {
-		fmt.Printf("%x\n", trie.RootHash)
+	for _, trie := range tries {
+		fmt.Printf("%x\n", trie.RootHash())
 	}
 }
diff --git a/cmd/util/cmd/read-execution-state/list-wals/cmd.go b/cmd/util/cmd/read-execution-state/list-wals/cmd.go
index 6d71edd0da4..f2a91553a55 100644
--- a/cmd/util/cmd/read-execution-state/list-wals/cmd.go
+++ b/cmd/util/cmd/read-execution-state/list-wals/cmd.go
@@ -11,7 +11,7 @@ import (
 	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/common/pathfinder"
 	"github.com/onflow/flow-go/ledger/complete"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 	"github.com/onflow/flow-go/ledger/complete/wal"
 	"github.com/onflow/flow-go/module/metrics"
 )
@@ -52,7 +52,7 @@ func run(*cobra.Command, []string) {
 	}()
 
 	err = w.ReplayLogsOnly(
-		func(forestSequencing *flattener.FlattenedForest) error {
+		func(tries []*trie.MTrie) error {
 			fmt.Printf("forest sequencing \n")
 			return nil
 		},
diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index 1e7a888dae3..c2e209c9632 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -1,11 +1,13 @@
 package flattener
 
 import (
+	"bytes"
 	"fmt"
 	"io"
 
 	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/common/encoding"
+	"github.com/onflow/flow-go/ledger/common/hash"
 	"github.com/onflow/flow-go/ledger/common/utils"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
@@ -59,26 +61,27 @@ func EncodeNode(n *node.Node, lchildIndex uint64, rchildIndex uint64) []byte {
 	return buf
 }
 
-// ReadStorableNode reads a storable node from io
-func ReadStorableNode(reader io.Reader) (*StorableNode, error) {
+// ReadNode reconstructs a node from data read from reader.
+// TODO: reuse read buffer
+func ReadNode(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, error)) (*node.Node, error) {
 
 	// reading version
 	buf := make([]byte, 2)
 	read, err := io.ReadFull(reader, buf)
 	if err != nil {
-		return nil, fmt.Errorf("error reading storable node, cannot read version part: %w", err)
+		return nil, fmt.Errorf("failed to read serialized node, cannot read version part: %w", err)
 	}
 	if read != len(buf) {
-		return nil, fmt.Errorf("not enough bytes read %d expected %d", read, len(buf))
+		return nil, fmt.Errorf("failed to read serialized node: not enough bytes read %d expected %d", read, len(buf))
 	}
 
 	version, _, err := utils.ReadUint16(buf)
 	if err != nil {
-		return nil, fmt.Errorf("error reading storable node: %w", err)
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
 	}
 
 	if version > encodingDecodingVersion {
-		return nil, fmt.Errorf("error reading storable node: unsuported version %d > %d", version, encodingDecodingVersion)
+		return nil, fmt.Errorf("failed to read serialized node: unsuported version %d > %d", version, encodingDecodingVersion)
 	}
 
 	// reading fixed-length part
@@ -86,55 +89,99 @@ func ReadStorableNode(reader io.Reader) (*StorableNode, error) {
 
 	read, err = io.ReadFull(reader, buf)
 	if err != nil {
-		return nil, fmt.Errorf("error reading storable node, cannot read fixed-length part: %w", err)
+		return nil, fmt.Errorf("failed to read serialized node, cannot read fixed-length part: %w", err)
 	}
 	if read != len(buf) {
-		return nil, fmt.Errorf("not enough bytes read %d expected %d", read, len(buf))
+		return nil, fmt.Errorf("failed to read serialized node: not enough bytes read %d expected %d", read, len(buf))
 	}
 
-	storableNode := &StorableNode{}
+	var height, maxDepth uint16
+	var lchildIndex, rchildIndex, regCount uint64
+	var path, hashValue, encPayload []byte
 
-	storableNode.Height, buf, err = utils.ReadUint16(buf)
+	height, buf, err = utils.ReadUint16(buf)
 	if err != nil {
-		return nil, fmt.Errorf("error reading storable node: %w", err)
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
 	}
 
-	storableNode.LIndex, buf, err = utils.ReadUint64(buf)
+	lchildIndex, buf, err = utils.ReadUint64(buf)
 	if err != nil {
-		return nil, fmt.Errorf("error reading storable node: %w", err)
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
 	}
 
-	storableNode.RIndex, buf, err = utils.ReadUint64(buf)
+	rchildIndex, buf, err = utils.ReadUint64(buf)
 	if err != nil {
-		return nil, fmt.Errorf("error reading storable node: %w", err)
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
 	}
 
-	storableNode.MaxDepth, buf, err = utils.ReadUint16(buf)
+	maxDepth, buf, err = utils.ReadUint16(buf)
 	if err != nil {
-		return nil, fmt.Errorf("error reading storable node: %w", err)
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
 	}
 
-	storableNode.RegCount, _, err = utils.ReadUint64(buf)
+	regCount, _, err = utils.ReadUint64(buf)
 	if err != nil {
-		return nil, fmt.Errorf("error reading storable node: %w", err)
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
 	}
 
-	storableNode.Path, err = utils.ReadShortDataFromReader(reader)
+	path, err = utils.ReadShortDataFromReader(reader)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read key data: %w", err)
 	}
 
-	storableNode.EncPayload, err = utils.ReadLongDataFromReader(reader)
+	encPayload, err = utils.ReadLongDataFromReader(reader)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read value data: %w", err)
 	}
 
-	storableNode.HashValue, err = utils.ReadShortDataFromReader(reader)
+	hashValue, err = utils.ReadShortDataFromReader(reader)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read hashValue data: %w", err)
 	}
 
-	return storableNode, nil
+	// Create (and copy) hash from raw data.
+	nodeHash, err := hash.ToHash(hashValue)
+	if err != nil {
+		return nil, fmt.Errorf("failed to decode hash from checkpoint: %w", err)
+	}
+
+	if len(path) > 0 {
+		// Create (and copy) path from raw data.
+		path, err := ledger.ToPath(path)
+		if err != nil {
+			return nil, fmt.Errorf("failed to decode path from checkpoint: %w", err)
+		}
+
+		// Decode payload (payload data isn't copied).
+		payload, err := encoding.DecodePayload(encPayload)
+		if err != nil {
+			return nil, fmt.Errorf("failed to decode payload from checkpoint: %w", err)
+		}
+
+		// make a copy of payload
+		var pl *ledger.Payload
+		if payload != nil {
+			pl = payload.DeepCopy()
+		}
+
+		n := node.NewNode(int(height), nil, nil, path, pl, nodeHash, maxDepth, regCount)
+		return n, nil
+	}
+
+	// Get left child node by node index
+	lchild, err := getNode(lchildIndex)
+	if err != nil {
+		return nil, fmt.Errorf("failed to find left child node: %w", err)
+	}
+
+	// Get right child node by node index
+	rchild, err := getNode(rchildIndex)
+	if err != nil {
+		return nil, fmt.Errorf("failed to find right child node: %w", err)
+	}
+
+	n := node.NewNode(int(height), lchild, rchild, ledger.DummyPath, nil, nodeHash, maxDepth, regCount)
+	return n, nil
 }
 
 // EncodeTrie encodes trie root node
@@ -162,9 +209,8 @@ func EncodeTrie(rootNode *node.Node, rootIndex uint64) []byte {
 	return buf
 }
 
-// ReadStorableTrie reads a storable trie from io
-func ReadStorableTrie(reader io.Reader) (*StorableTrie, error) {
-	storableTrie := &StorableTrie{}
+// ReadTrie reconstructs a trie from data read from reader.
+func ReadTrie(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, error)) (*trie.MTrie, error) {
 
 	// reading version
 	buf := make([]byte, 2)
@@ -199,13 +245,26 @@ func ReadStorableTrie(reader io.Reader) (*StorableTrie, error) {
 	if err != nil {
 		return nil, fmt.Errorf("cannot read root index data: %w", err)
 	}
-	storableTrie.RootIndex = rootIndex
 
-	roothash, err := utils.ReadShortDataFromReader(reader)
+	readRootHash, err := utils.ReadShortDataFromReader(reader)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read roothash data: %w", err)
 	}
-	storableTrie.RootHash = roothash
 
-	return storableTrie, nil
+	rootNode, err := getNode(rootIndex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot find root node: %w", err)
+	}
+
+	mtrie, err := trie.NewMTrie(rootNode)
+	if err != nil {
+		return nil, fmt.Errorf("restoring trie failed: %w", err)
+	}
+
+	rootHash := mtrie.RootHash()
+	if !bytes.Equal(readRootHash, rootHash[:]) {
+		return nil, fmt.Errorf("restoring trie failed: roothash doesn't match")
+	}
+
+	return mtrie, nil
 }
diff --git a/ledger/complete/mtrie/flattener/encoding_test.go b/ledger/complete/mtrie/flattener/encoding_test.go
index d4c790fb52e..b64df5bde6f 100644
--- a/ledger/complete/mtrie/flattener/encoding_test.go
+++ b/ledger/complete/mtrie/flattener/encoding_test.go
@@ -2,79 +2,115 @@ package flattener_test
 
 import (
 	"bytes"
+	"fmt"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
 	"github.com/onflow/flow-go/ledger"
-	"github.com/onflow/flow-go/ledger/common/encoding"
 	"github.com/onflow/flow-go/ledger/common/hash"
 	"github.com/onflow/flow-go/ledger/common/utils"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
 )
 
-func TestStorableNode(t *testing.T) {
-	path := utils.PathByUint8(3)
-	payload := utils.LightPayload8('A', 'a')
-	hashValue := hash.Hash([32]byte{4, 4, 4})
-
-	n := node.NewNode(2137, nil, nil, path, payload, hashValue, 7, 5000)
-
-	storableNode := &flattener.StorableNode{
-		LIndex:     1,
-		RIndex:     2,
-		Height:     2137,
-		Path:       path[:],
-		EncPayload: encoding.EncodePayload(payload),
-		HashValue:  hashValue[:],
-		MaxDepth:   7,
-		RegCount:   5000,
-	}
+func TestNodeSerialization(t *testing.T) {
+
+	path1 := utils.PathByUint8(0)
+	payload1 := utils.LightPayload8('A', 'a')
+	hashValue1 := hash.Hash([32]byte{1, 1, 1})
+
+	path2 := utils.PathByUint8(1)
+	payload2 := utils.LightPayload8('B', 'b')
+	hashValue2 := hash.Hash([32]byte{2, 2, 2})
+
+	hashValue3 := hash.Hash([32]byte{3, 3, 3})
+
+	leafNode1 := node.NewNode(255, nil, nil, ledger.Path(path1), payload1, hashValue1, 0, 1)
+	leafNode2 := node.NewNode(255, nil, nil, ledger.Path(path2), payload2, hashValue2, 0, 1)
+	rootNode := node.NewNode(256, leafNode1, leafNode2, ledger.DummyPath, nil, hashValue3, 1, 2)
 
 	// Version 0
-	expected := []byte{
+	expectedLeafNode1 := []byte{
 		0, 0, // encoding version
-		8, 89, // height
-		0, 0, 0, 0, 0, 0, 0, 1, // LIndex
-		0, 0, 0, 0, 0, 0, 0, 2, // RIndex
-		0, 7, // max depth
-		0, 0, 0, 0, 0, 0, 19, 136, // reg count
+		0, 255, // height
+		0, 0, 0, 0, 0, 0, 0, 0, // LIndex
+		0, 0, 0, 0, 0, 0, 0, 0, // RIndex
+		0, 0, // max depth
+		0, 0, 0, 0, 0, 0, 0, 1, // reg count
 		0, 32, // path data len
-		3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // path data
 		0, 0, 0, 25, // payload data len
 		0, 0, 6, 0, 0, 0, 9, 0, 1, 0, 0, 0, 3, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 1, 97, // payload data
 		0, 32, // hashValue length
-		4, 4, 4, 0, 0, 0, 0, 0,
+		1, 1, 1, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, // hashValue
 	}
 
-	t.Run("encode", func(t *testing.T) {
-		data := flattener.EncodeNode(n, 1, 2)
-		assert.Equal(t, expected, data)
+	// Version 0
+	expectedRootNode := []byte{
+		0, 0, // encoding version
+		1, 0, // height
+		0, 0, 0, 0, 0, 0, 0, 1, // LIndex
+		0, 0, 0, 0, 0, 0, 0, 2, // RIndex
+		0, 1, // max depth
+		0, 0, 0, 0, 0, 0, 0, 2, // reg count
+		0, 0, // path data len
+		0, 0, 0, 0, // payload data len
+		0, 32, // hashValue length
+		3, 3, 3, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, // hashValue
+	}
+
+	t.Run("encode leaf node", func(t *testing.T) {
+		data := flattener.EncodeNode(leafNode1, 0, 0)
+		assert.Equal(t, expectedLeafNode1, data)
 	})
 
-	t.Run("decode", func(t *testing.T) {
-		reader := bytes.NewReader(expected)
-		newStorableNode, err := flattener.ReadStorableNode(reader)
+	t.Run("encode interim node", func(t *testing.T) {
+		data := flattener.EncodeNode(rootNode, 1, 2)
+		assert.Equal(t, expectedRootNode, data)
+	})
+
+	t.Run("decode leaf node", func(t *testing.T) {
+		reader := bytes.NewReader(expectedLeafNode1)
+		newNode, err := flattener.ReadNode(reader, func(nodeIndex uint64) (*node.Node, error) {
+			if nodeIndex != 0 {
+				return nil, fmt.Errorf("expect child node index 0, got %d", nodeIndex)
+			}
+			return nil, nil
+		})
+		require.NoError(t, err)
+		assert.Equal(t, leafNode1, newNode)
+	})
+
+	t.Run("decode interim node", func(t *testing.T) {
+		reader := bytes.NewReader(expectedRootNode)
+		newNode, err := flattener.ReadNode(reader, func(nodeIndex uint64) (*node.Node, error) {
+			switch nodeIndex {
+			case 1:
+				return leafNode1, nil
+			case 2:
+				return leafNode2, nil
+			default:
+				return nil, fmt.Errorf("unexpected child node index %d ", nodeIndex)
+			}
+		})
 		require.NoError(t, err)
-		assert.Equal(t, storableNode, newStorableNode)
+		assert.Equal(t, rootNode, newNode)
 	})
 }
 
-func TestStorableTrie(t *testing.T) {
+func TestTrieSerialization(t *testing.T) {
 	hashValue := hash.Hash([32]byte{2, 2, 2})
-
 	rootNode := node.NewNode(256, nil, nil, ledger.DummyPath, nil, hashValue, 7, 5000)
-
-	storableTrie := &flattener.StorableTrie{
-		RootIndex: 21,
-		RootHash:  hashValue[:],
-	}
+	rootNodeIndex := uint64(21)
 
 	// Version 0
 	expected := []byte{
@@ -88,19 +124,19 @@ func TestStorableTrie(t *testing.T) {
 	}
 
 	t.Run("encode", func(t *testing.T) {
-		data := flattener.EncodeTrie(rootNode, 21)
-
+		data := flattener.EncodeTrie(rootNode, rootNodeIndex)
 		assert.Equal(t, expected, data)
 	})
 
 	t.Run("decode", func(t *testing.T) {
-
 		reader := bytes.NewReader(expected)
-
-		newStorableNode, err := flattener.ReadStorableTrie(reader)
+		trie, err := flattener.ReadTrie(reader, func(nodeIndex uint64) (*node.Node, error) {
+			if nodeIndex != rootNodeIndex {
+				return nil, fmt.Errorf("unexpected root node index %d ", nodeIndex)
+			}
+			return rootNode, nil
+		})
 		require.NoError(t, err)
-
-		assert.Equal(t, storableTrie, newStorableNode)
+		assert.Equal(t, rootNode, trie.RootNode())
 	})
-
 }
diff --git a/ledger/complete/mtrie/flattener/forest.go b/ledger/complete/mtrie/flattener/forest.go
deleted file mode 100644
index f9628eb9671..00000000000
--- a/ledger/complete/mtrie/flattener/forest.go
+++ /dev/null
@@ -1,191 +0,0 @@
-package flattener
-
-import (
-	"bytes"
-	"encoding/hex"
-	"fmt"
-
-	"github.com/onflow/flow-go/ledger"
-	"github.com/onflow/flow-go/ledger/common/encoding"
-	"github.com/onflow/flow-go/ledger/common/hash"
-	"github.com/onflow/flow-go/ledger/complete/mtrie"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
-)
-
-// FlattenedForest represents an Forest as a flattened data structure.
-// Specifically it consists of :
-//   * a list of storable nodes, where references to nodes are replaced by index in the slice
-//   * and a list of storable tries, each referencing their respective root node by index.
-// 0 is a special index, meaning nil, but is included in this list for ease of use
-// and removing would make it necessary to constantly add/subtract indexes
-//
-// As an important property, the nodes are listed in an order which satisfies
-// Descendents-First-Relationship. The Descendents-First-Relationship has the
-// following important property:
-// When re-building the Trie from the sequence of nodes, one can build the trie on the fly,
-// as for each node, the children have been previously encountered.
-type FlattenedForest struct {
-	Nodes []*StorableNode
-	Tries []*StorableTrie
-}
-
-// node2indexMap maps a node pointer to the node index in the serialization
-type node2indexMap map[*node.Node]uint64
-
-// FlattenForest returns forest FlattenedForest, which contains all nodes and tries of the Forest.
-func FlattenForest(f *mtrie.Forest) (*FlattenedForest, error) {
-	tries, err := f.GetTries()
-	if err != nil {
-		return nil, fmt.Errorf("cannot get cached tries root hashes: %w", err)
-	}
-
-	storableTries := make([]*StorableTrie, 0, len(tries))
-	storableNodes := []*StorableNode{nil} // 0th element is nil
-
-	// assign unique value to every node
-	allNodes := make(node2indexMap)
-	allNodes[nil] = 0 // 0th element is nil
-
-	counter := uint64(1) // start from 1, as 0 marks nil
-	for _, t := range tries {
-		for itr := NewUniqueNodeIterator(t, allNodes); itr.Next(); {
-			n := itr.Value()
-			allNodes[n] = counter
-			counter++
-			storableNode, err := toStorableNode(n, allNodes)
-			if err != nil {
-				return nil, fmt.Errorf("failed to construct storable node: %w", err)
-			}
-			storableNodes = append(storableNodes, storableNode)
-		}
-		//fix root nodes indices
-		// since we indexed all nodes, root must be present
-		storableTrie, err := toStorableTrie(t, allNodes)
-		if err != nil {
-			return nil, fmt.Errorf("failed to construct storable trie: %w", err)
-		}
-		storableTries = append(storableTries, storableTrie)
-	}
-
-	return &FlattenedForest{
-		Nodes: storableNodes,
-		Tries: storableTries,
-	}, nil
-}
-
-func toStorableNode(node *node.Node, indexForNode node2indexMap) (*StorableNode, error) {
-	leftIndex, found := indexForNode[node.LeftChild()]
-	if !found {
-		hash := node.LeftChild().Hash()
-		return nil, fmt.Errorf("internal error: missing node with hash %s", hex.EncodeToString(hash[:]))
-	}
-	rightIndex, found := indexForNode[node.RightChild()]
-	if !found {
-		hash := node.RightChild().Hash()
-		return nil, fmt.Errorf("internal error: missing node with hash %s", hex.EncodeToString(hash[:]))
-	}
-
-	hash := node.Hash()
-	// if node is a leaf, path is a slice of 32 bytes, otherwise path is nil
-	var path []byte
-	if node.IsLeaf() {
-		temp := *node.Path()
-		path = temp[:]
-	}
-	storableNode := &StorableNode{
-		LIndex:     leftIndex,
-		RIndex:     rightIndex,
-		Height:     uint16(node.Height()),
-		Path:       path,
-		EncPayload: encoding.EncodePayload(node.Payload()),
-		HashValue:  hash[:],
-		MaxDepth:   node.MaxDepth(),
-		RegCount:   node.RegCount(),
-	}
-	return storableNode, nil
-}
-
-func toStorableTrie(mtrie *trie.MTrie, indexForNode node2indexMap) (*StorableTrie, error) {
-	rootIndex, found := indexForNode[mtrie.RootNode()]
-	if !found {
-		hash := mtrie.RootNode().Hash()
-		return nil, fmt.Errorf("internal error: missing node with hash %s", hex.EncodeToString(hash[:]))
-	}
-	hash := mtrie.RootHash()
-	storableTrie := &StorableTrie{
-		RootIndex: rootIndex,
-		RootHash:  hash[:],
-	}
-
-	return storableTrie, nil
-}
-
-// RebuildTries construct a forest from a storable FlattenedForest
-func RebuildTries(flatForest *FlattenedForest) ([]*trie.MTrie, error) {
-	tries := make([]*trie.MTrie, 0, len(flatForest.Tries))
-	nodes, err := RebuildNodes(flatForest.Nodes)
-	if err != nil {
-		return nil, fmt.Errorf("reconstructing nodes from storables failed: %w", err)
-	}
-
-	//restore tries
-	for _, storableTrie := range flatForest.Tries {
-		mtrie, err := trie.NewMTrie(nodes[storableTrie.RootIndex])
-		if err != nil {
-			return nil, fmt.Errorf("restoring trie failed: %w", err)
-		}
-		rootHash := mtrie.RootHash()
-		if !bytes.Equal(storableTrie.RootHash, rootHash[:]) {
-			return nil, fmt.Errorf("restoring trie failed: roothash doesn't match")
-		}
-		tries = append(tries, mtrie)
-	}
-	return tries, nil
-}
-
-// RebuildNodes generates a list of Nodes from a sequence of StorableNodes.
-// The sequence must obey the DESCENDANTS-FIRST-RELATIONSHIP
-func RebuildNodes(storableNodes []*StorableNode) ([]*node.Node, error) {
-	nodes := make([]*node.Node, 0, len(storableNodes))
-	for i, snode := range storableNodes {
-		if snode == nil {
-			nodes = append(nodes, nil)
-			continue
-		}
-		if (snode.LIndex >= uint64(i)) || (snode.RIndex >= uint64(i)) {
-			return nil, fmt.Errorf("sequence of StorableNodes does not satisfy Descendents-First-Relationship")
-		}
-
-		if len(snode.Path) > 0 {
-			path, err := ledger.ToPath(snode.Path)
-			if err != nil {
-				return nil, fmt.Errorf("failed to decode a path of a storableNode %w", err)
-			}
-			payload, err := encoding.DecodePayload(snode.EncPayload)
-			if err != nil {
-				return nil, fmt.Errorf("failed to decode a payload for an storableNode %w", err)
-			}
-			nodeHash, err := hash.ToHash(snode.HashValue)
-			if err != nil {
-				return nil, fmt.Errorf("failed to decode a hash of a storableNode %w", err)
-			}
-			// make a copy of payload
-			var pl *ledger.Payload
-			if payload != nil {
-				pl = payload.DeepCopy()
-			}
-
-			node := node.NewNode(int(snode.Height), nodes[snode.LIndex], nodes[snode.RIndex], path, pl, nodeHash, snode.MaxDepth, snode.RegCount)
-			nodes = append(nodes, node)
-			continue
-		}
-		nodeHash, err := hash.ToHash(snode.HashValue)
-		if err != nil {
-			return nil, fmt.Errorf("failed to decode a hash of a storableNode %w", err)
-		}
-		node := node.NewNode(int(snode.Height), nodes[snode.LIndex], nodes[snode.RIndex], ledger.DummyPath, nil, nodeHash, snode.MaxDepth, snode.RegCount)
-		nodes = append(nodes, node)
-	}
-	return nodes, nil
-}
diff --git a/ledger/complete/mtrie/flattener/forest_test.go b/ledger/complete/mtrie/flattener/forest_test.go
deleted file mode 100644
index 4762dbb7bf0..00000000000
--- a/ledger/complete/mtrie/flattener/forest_test.go
+++ /dev/null
@@ -1,72 +0,0 @@
-package flattener_test
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-
-	"github.com/onflow/flow-go/ledger"
-	"github.com/onflow/flow-go/ledger/common/utils"
-	"github.com/onflow/flow-go/ledger/complete/mtrie"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
-	"github.com/onflow/flow-go/module/metrics"
-)
-
-func TestForestStoreAndLoad(t *testing.T) {
-
-	metricsCollector := &metrics.NoopCollector{}
-	mForest, err := mtrie.NewForest(5, metricsCollector, nil)
-	require.NoError(t, err)
-	rootHash := mForest.GetEmptyRootHash()
-
-	p1 := utils.PathByUint8(1)
-	v1 := utils.LightPayload8('A', 'a')
-	p2 := utils.PathByUint8(2)
-	v2 := utils.LightPayload8('B', 'b')
-	p3 := utils.PathByUint8(130)
-	v3 := utils.LightPayload8('C', 'c')
-	p4 := utils.PathByUint8(131)
-	v4 := utils.LightPayload8('D', 'd')
-	p5 := utils.PathByUint8(132)
-	v5 := utils.LightPayload8('E', 'e')
-
-	paths := []ledger.Path{p1, p2, p3, p4, p5}
-	payloads := []*ledger.Payload{v1, v2, v3, v4, v5}
-
-	update := &ledger.TrieUpdate{RootHash: rootHash, Paths: paths, Payloads: payloads}
-	rootHash, err = mForest.Update(update)
-	require.NoError(t, err)
-
-	p6 := utils.PathByUint8(133)
-	v6 := utils.LightPayload8('F', 'f')
-	update = &ledger.TrieUpdate{RootHash: rootHash, Paths: []ledger.Path{p6}, Payloads: []*ledger.Payload{v6}}
-	rootHash, err = mForest.Update(update)
-	require.NoError(t, err)
-
-	forestSequencing, err := flattener.FlattenForest(mForest)
-	require.NoError(t, err)
-
-	newForest, err := mtrie.NewForest(5, metricsCollector, nil)
-	require.NoError(t, err)
-
-	//forests are different
-	assert.NotEqual(t, mForest, newForest)
-
-	rebuiltTries, err := flattener.RebuildTries(forestSequencing)
-	require.NoError(t, err)
-	err = newForest.AddTries(rebuiltTries)
-	require.NoError(t, err)
-
-	//forests are the same now
-	assert.Equal(t, mForest, newForest)
-
-	read := &ledger.TrieRead{RootHash: rootHash, Paths: paths}
-	retPayloads, err := mForest.Read(read)
-	require.NoError(t, err)
-	newRetPayloads, err := newForest.Read(read)
-	require.NoError(t, err)
-	for i := range paths {
-		require.True(t, retPayloads[i].Equals(newRetPayloads[i]))
-	}
-}
diff --git a/ledger/complete/mtrie/flattener/storables.go b/ledger/complete/mtrie/flattener/storables.go
deleted file mode 100644
index 9f35a812228..00000000000
--- a/ledger/complete/mtrie/flattener/storables.go
+++ /dev/null
@@ -1,18 +0,0 @@
-package flattener
-
-type StorableNode struct {
-	LIndex     uint64
-	RIndex     uint64
-	Height     uint16 // Height where the node is at
-	Path       []byte // path
-	EncPayload []byte // encoded data for payload
-	HashValue  []byte
-	MaxDepth   uint16
-	RegCount   uint64
-}
-
-// StorableTrie is a data structure for storing trie
-type StorableTrie struct {
-	RootIndex uint64
-	RootHash  []byte
-}
diff --git a/ledger/complete/mtrie/flattener/trie.go b/ledger/complete/mtrie/flattener/trie.go
deleted file mode 100644
index ec53bc564b5..00000000000
--- a/ledger/complete/mtrie/flattener/trie.go
+++ /dev/null
@@ -1,74 +0,0 @@
-package flattener
-
-import (
-	"fmt"
-
-	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
-)
-
-// FlattenedTrie is similar to FlattenedForest except only including a single trie
-type FlattenedTrie struct {
-	Nodes []*StorableNode
-	Trie  *StorableTrie
-}
-
-// ToFlattenedForestWithASingleTrie converts the flattenedTrie into a FlattenedForest with only one trie included
-func (ft *FlattenedTrie) ToFlattenedForestWithASingleTrie() *FlattenedForest {
-	storableTries := make([]*StorableTrie, 1)
-	storableTries[0] = ft.Trie
-	return &FlattenedForest{
-		Nodes: ft.Nodes,
-		Tries: storableTries,
-	}
-}
-
-// FlattenTrie returns the trie as a FlattenedTrie, which contains all nodes of that trie.
-func FlattenTrie(trie *trie.MTrie) (*FlattenedTrie, error) {
-	storableNodes := []*StorableNode{nil} // 0th element is nil
-
-	// assign unique value to every node
-	allNodes := make(map[*node.Node]uint64)
-	allNodes[nil] = 0 // 0th element is nil
-
-	counter := uint64(1) // start from 1, as 0 marks nil
-	for itr := NewNodeIterator(trie); itr.Next(); {
-		n := itr.Value()
-		// if node not in map
-		if _, has := allNodes[n]; !has {
-			allNodes[n] = counter
-			counter++
-			storableNode, err := toStorableNode(n, allNodes)
-			if err != nil {
-				return nil, fmt.Errorf("failed to construct storable node: %w", err)
-			}
-			storableNodes = append(storableNodes, storableNode)
-		}
-	}
-	// fix root nodes indices
-	// since we indexed all nodes, root must be present
-	storableTrie, err := toStorableTrie(trie, allNodes)
-	if err != nil {
-		return nil, fmt.Errorf("failed to construct storable trie: %w", err)
-	}
-
-	return &FlattenedTrie{
-		Nodes: storableNodes,
-		Trie:  storableTrie,
-	}, nil
-}
-
-// RebuildTrie construct a trie from a storable FlattenedForest
-func RebuildTrie(flatTrie *FlattenedTrie) (*trie.MTrie, error) {
-	nodes, err := RebuildNodes(flatTrie.Nodes)
-	if err != nil {
-		return nil, fmt.Errorf("reconstructing nodes from storables failed: %w", err)
-	}
-
-	//restore tries
-	mtrie, err := trie.NewMTrie(nodes[flatTrie.Trie.RootIndex])
-	if err != nil {
-		return nil, fmt.Errorf("restoring trie failed: %w", err)
-	}
-	return mtrie, nil
-}
diff --git a/ledger/complete/mtrie/flattener/trie_test.go b/ledger/complete/mtrie/flattener/trie_test.go
deleted file mode 100644
index f2d0f573ff2..00000000000
--- a/ledger/complete/mtrie/flattener/trie_test.go
+++ /dev/null
@@ -1,56 +0,0 @@
-package flattener_test
-
-import (
-	"io/ioutil"
-	"os"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-
-	"github.com/onflow/flow-go/ledger"
-	"github.com/onflow/flow-go/ledger/common/utils"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
-)
-
-func TestTrieStoreAndLoad(t *testing.T) {
-	dir, err := ioutil.TempDir("", "test-mtrie-")
-	require.NoError(t, err)
-	defer os.RemoveAll(dir)
-
-	emptyTrie := trie.NewEmptyMTrie()
-	require.NoError(t, err)
-
-	p1 := utils.PathByUint8(1)
-	v1 := utils.LightPayload8('A', 'a')
-	p2 := utils.PathByUint8(2)
-	v2 := utils.LightPayload8('B', 'b')
-	p3 := utils.PathByUint8(130)
-	v3 := utils.LightPayload8('C', 'c')
-	p4 := utils.PathByUint8(131)
-	v4 := utils.LightPayload8('D', 'd')
-	p5 := utils.PathByUint8(132)
-	v5 := utils.LightPayload8('E', 'e')
-
-	paths := []ledger.Path{p1, p2, p3, p4, p5}
-	payloads := []ledger.Payload{*v1, *v2, *v3, *v4, *v5}
-
-	newTrie, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
-	require.NoError(t, err)
-
-	flattedTrie, err := flattener.FlattenTrie(newTrie)
-	require.NoError(t, err)
-
-	rebuiltTrie, err := flattener.RebuildTrie(flattedTrie)
-	require.NoError(t, err)
-
-	//tries are the same now
-	assert.Equal(t, newTrie, rebuiltTrie)
-
-	retPayloads := newTrie.UnsafeRead(paths)
-	newRetPayloads := rebuiltTrie.UnsafeRead(paths)
-	for i := range paths {
-		require.True(t, retPayloads[i].Equals(newRetPayloads[i]))
-	}
-}
diff --git a/ledger/complete/wal/checkpointer.go b/ledger/complete/wal/checkpointer.go
index e115293fda9..e8f48bb51a0 100644
--- a/ledger/complete/wal/checkpointer.go
+++ b/ledger/complete/wal/checkpointer.go
@@ -167,18 +167,8 @@ func (c *Checkpointer) Checkpoint(to int, targetWriter func() (io.WriteCloser, e
 	}
 
 	err = c.wal.replay(0, to,
-		func(forestSequencing *flattener.FlattenedForest) error {
-			tries, err := flattener.RebuildTries(forestSequencing)
-			if err != nil {
-				return err
-			}
-			for _, t := range tries {
-				err := forest.AddTrie(t)
-				if err != nil {
-					return err
-				}
-			}
-			return nil
+		func(tries []*trie.MTrie) error {
+			return forest.AddTries(tries)
 		},
 		func(update *ledger.TrieUpdate) error {
 			_, err := forest.Update(update)
@@ -254,7 +244,20 @@ func CreateCheckpointWriterForFile(dir, filename string) (io.WriteCloser, error)
 	}, nil
 }
 
-// StoreCheckpoint writes the given checkpoint to disk, and also append with a CRC32 file checksum for integrity check.
+// StoreCheckpoint writes the given tries to checkpoint file, and also appends
+// a CRC32 file checksum for integrity check.
+// Checkpoint file consists of a flattened forest. Specifically, it consists of:
+//   * a list of encoded nodes, where references to other nodes are by list index.
+//   * a list of encoded tries, each referencing their respective root node by index.
+// Referencing to other nodes by index 0 is a special case, meaning nil.
+//
+// As an important property, the nodes are listed in an order which satisfies
+// Descendents-First-Relationship. The Descendents-First-Relationship has the
+// following important property:
+// When rebuilding the trie from the sequence of nodes, build the trie on the fly,
+// as for each node, the children have been previously encountered.
+// TODO: evaluate alternatives to CRC32 since checkpoint file is many GB in size.
+// TODO: add concurrency if the performance gains are enough to offset complexity.
 func StoreCheckpoint(writer io.Writer, tries ...*trie.MTrie) error {
 
 	var err error
@@ -271,7 +274,7 @@ func StoreCheckpoint(writer io.Writer, tries ...*trie.MTrie) error {
 		return fmt.Errorf("cannot write checkpoint header: %w", err)
 	}
 
-	// assign unique value to every node
+	// assign unique index to every node
 	allNodes := make(map[*node.Node]uint64)
 	allNodes[nil] = 0 // 0th element is nil
 
@@ -363,12 +366,12 @@ func StoreCheckpoint(writer io.Writer, tries ...*trie.MTrie) error {
 	return nil
 }
 
-func (c *Checkpointer) LoadCheckpoint(checkpoint int) (*flattener.FlattenedForest, error) {
+func (c *Checkpointer) LoadCheckpoint(checkpoint int) ([]*trie.MTrie, error) {
 	filepath := path.Join(c.dir, NumberToFilename(checkpoint))
 	return LoadCheckpoint(filepath)
 }
 
-func (c *Checkpointer) LoadRootCheckpoint() (*flattener.FlattenedForest, error) {
+func (c *Checkpointer) LoadRootCheckpoint() ([]*trie.MTrie, error) {
 	filepath := path.Join(c.dir, bootstrap.FilenameWALRootCheckpoint)
 	return LoadCheckpoint(filepath)
 }
@@ -387,7 +390,7 @@ func (c *Checkpointer) RemoveCheckpoint(checkpoint int) error {
 	return os.Remove(path.Join(c.dir, NumberToFilename(checkpoint)))
 }
 
-func LoadCheckpoint(filepath string) (*flattener.FlattenedForest, error) {
+func LoadCheckpoint(filepath string) ([]*trie.MTrie, error) {
 	file, err := os.Open(filepath)
 	if err != nil {
 		return nil, fmt.Errorf("cannot open checkpoint file %s: %w", filepath, err)
@@ -399,7 +402,7 @@ func LoadCheckpoint(filepath string) (*flattener.FlattenedForest, error) {
 	return readCheckpoint(file)
 }
 
-func readCheckpoint(f *os.File) (*flattener.FlattenedForest, error) {
+func readCheckpoint(f *os.File) ([]*trie.MTrie, error) {
 	// Read header: magic (2 bytes) + version (2 bytes)
 	header := make([]byte, 4)
 	_, err := io.ReadFull(f, header)
@@ -430,10 +433,10 @@ func readCheckpoint(f *os.File) (*flattener.FlattenedForest, error) {
 	}
 }
 
-// readCheckpointV3AndEarlier deserializes checkpoint file (version 3 and earlier) and returns flattened forest.
+// readCheckpointV3AndEarlier deserializes checkpoint file (version 3 and earlier) and returns a list of tries.
 // Header (magic and version) are verified by the caller.
 // TODO: return []*trie.MTrie directly without conversion to FlattenedForest.
-func readCheckpointV3AndEarlier(f *os.File, version uint16) (*flattener.FlattenedForest, error) {
+func readCheckpointV3AndEarlier(f *os.File, version uint16) ([]*trie.MTrie, error) {
 
 	var bufReader io.Reader = bufio.NewReader(f)
 	crcReader := NewCRC32Reader(bufReader)
@@ -461,24 +464,33 @@ func readCheckpointV3AndEarlier(f *os.File, version uint16) (*flattener.Flattene
 	nodesCount, pos := readUint64(header, nodesCountOffset)
 	triesCount, _ := readUint16(header, pos)
 
-	nodes := make([]*flattener.StorableNode, nodesCount+1) //+1 for 0 index meaning nil
-	tries := make([]*flattener.StorableTrie, triesCount)
+	nodes := make([]*node.Node, nodesCount+1) //+1 for 0 index meaning nil
+	tries := make([]*trie.MTrie, triesCount)
 
 	for i := uint64(1); i <= nodesCount; i++ {
-		storableNode, err := flattener.ReadStorableNode(reader)
+		n, err := flattener.ReadNode(reader, func(nodeIndex uint64) (*node.Node, error) {
+			if nodeIndex >= uint64(i) {
+				return nil, fmt.Errorf("sequence of stored nodes does not satisfy Descendents-First-Relationship")
+			}
+			return nodes[nodeIndex], nil
+		})
 		if err != nil {
-			return nil, fmt.Errorf("cannot read storable node %d: %w", i, err)
+			return nil, fmt.Errorf("cannot read node %d: %w", i, err)
 		}
-		nodes[i] = storableNode
+		nodes[i] = n
 	}
 
-	// TODO version ?
 	for i := uint16(0); i < triesCount; i++ {
-		storableTrie, err := flattener.ReadStorableTrie(reader)
+		trie, err := flattener.ReadTrie(reader, func(nodeIndex uint64) (*node.Node, error) {
+			if nodeIndex >= uint64(len(nodes)) {
+				return nil, fmt.Errorf("sequence of stored nodes doesn't contain node")
+			}
+			return nodes[nodeIndex], nil
+		})
 		if err != nil {
 			return nil, fmt.Errorf("cannot read storable trie %d: %w", i, err)
 		}
-		tries[i] = storableTrie
+		tries[i] = trie
 	}
 
 	if version == VersionV3 {
@@ -496,16 +508,13 @@ func readCheckpointV3AndEarlier(f *os.File, version uint16) (*flattener.Flattene
 		}
 	}
 
-	return &flattener.FlattenedForest{
-		Nodes: nodes,
-		Tries: tries,
-	}, nil
+	return tries, nil
 
 }
 
-// readCheckpointV4 deserializes checkpoint file (version 4) and returns flattened forest.
+// readCheckpointV4 deserializes checkpoint file (version 4) and returns a list of tries.
 // Checkpoint file header (magic and version) are verified by the caller.
-func readCheckpointV4(f *os.File) (*flattener.FlattenedForest, error) {
+func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 
 	// Read footer to get node count and trie count
 
@@ -547,23 +556,34 @@ func readCheckpointV4(f *os.File) (*flattener.FlattenedForest, error) {
 		return nil, fmt.Errorf("cannot read header bytes: %w", err)
 	}
 
-	nodes := make([]*flattener.StorableNode, nodesCount+1) //+1 for 0 index meaning nil
-	tries := make([]*flattener.StorableTrie, triesCount)
+	// nodes's element at index 0 is a special, meaning nil .
+	nodes := make([]*node.Node, nodesCount+1) //+1 for 0 index meaning nil
+	tries := make([]*trie.MTrie, triesCount)
 
 	for i := uint64(1); i <= nodesCount; i++ {
-		storableNode, err := flattener.ReadStorableNode(reader)
+		n, err := flattener.ReadNode(reader, func(nodeIndex uint64) (*node.Node, error) {
+			if nodeIndex >= uint64(i) {
+				return nil, fmt.Errorf("sequence of stored nodes does not satisfy Descendents-First-Relationship")
+			}
+			return nodes[nodeIndex], nil
+		})
 		if err != nil {
-			return nil, fmt.Errorf("cannot read storable node %d: %w", i, err)
+			return nil, fmt.Errorf("cannot read node %d: %w", i, err)
 		}
-		nodes[i] = storableNode
+		nodes[i] = n
 	}
 
 	for i := uint16(0); i < triesCount; i++ {
-		storableTrie, err := flattener.ReadStorableTrie(reader)
+		trie, err := flattener.ReadTrie(reader, func(nodeIndex uint64) (*node.Node, error) {
+			if nodeIndex >= uint64(len(nodes)) {
+				return nil, fmt.Errorf("sequence of stored nodes doesn't contain node")
+			}
+			return nodes[nodeIndex], nil
+		})
 		if err != nil {
 			return nil, fmt.Errorf("cannot read storable trie %d: %w", i, err)
 		}
-		tries[i] = storableTrie
+		tries[i] = trie
 	}
 
 	// Read footer again for crc32 computation
@@ -586,10 +606,7 @@ func readCheckpointV4(f *os.File) (*flattener.FlattenedForest, error) {
 		return nil, fmt.Errorf("checkpoint checksum failed! File contains %x but read data checksums to %x", readCrc32, calculatedCrc32)
 	}
 
-	return &flattener.FlattenedForest{
-		Nodes: nodes,
-		Tries: tries,
-	}, nil
+	return tries, nil
 }
 
 func writeUint16(buffer []byte, location int, value uint16) int {
diff --git a/ledger/complete/wal/checkpointer_test.go b/ledger/complete/wal/checkpointer_test.go
index 4576cca97f3..80c2374ea5f 100644
--- a/ledger/complete/wal/checkpointer_test.go
+++ b/ledger/complete/wal/checkpointer_test.go
@@ -22,7 +22,6 @@ import (
 	"github.com/onflow/flow-go/ledger/common/utils"
 	"github.com/onflow/flow-go/ledger/complete"
 	"github.com/onflow/flow-go/ledger/complete/mtrie"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 	realWAL "github.com/onflow/flow-go/ledger/complete/wal"
 	"github.com/onflow/flow-go/module/metrics"
@@ -182,7 +181,7 @@ func Test_Checkpointing(t *testing.T) {
 			require.NoError(t, err)
 
 			err = wal2.Replay(
-				func(forestSequencing *flattener.FlattenedForest) error {
+				func(tries []*trie.MTrie) error {
 					return fmt.Errorf("I should fail as there should be no checkpoints")
 				},
 				func(update *ledger.TrieUpdate) error {
@@ -216,8 +215,8 @@ func Test_Checkpointing(t *testing.T) {
 			require.NoError(t, err)
 
 			err = wal3.Replay(
-				func(forestSequencing *flattener.FlattenedForest) error {
-					return loadIntoForest(f3, forestSequencing)
+				func(tries []*trie.MTrie) error {
+					return f3.AddTries(tries)
 				},
 				func(update *ledger.TrieUpdate) error {
 					return fmt.Errorf("I should fail as there should be no updates")
@@ -298,8 +297,8 @@ func Test_Checkpointing(t *testing.T) {
 			updatesLeft := 1 // there should be only one update
 
 			err = wal5.Replay(
-				func(forestSequencing *flattener.FlattenedForest) error {
-					return loadIntoForest(f5, forestSequencing)
+				func(tries []*trie.MTrie) error {
+					return f5.AddTries(tries)
 				},
 				func(update *ledger.TrieUpdate) error {
 					if updatesLeft == 0 {
@@ -560,20 +559,6 @@ func Test_StoringLoadingCheckpoints(t *testing.T) {
 	})
 }
 
-func loadIntoForest(forest *mtrie.Forest, forestSequencing *flattener.FlattenedForest) error {
-	tries, err := flattener.RebuildTries(forestSequencing)
-	if err != nil {
-		return err
-	}
-	for _, t := range tries {
-		err := forest.AddTrie(t)
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
 type writeCloserWithErrors struct {
 	writeError error
 	closeError error
diff --git a/ledger/complete/wal/checkpointer_versioning_test.go b/ledger/complete/wal/checkpointer_versioning_test.go
index e5f86cedeca..81e412ba1c0 100644
--- a/ledger/complete/wal/checkpointer_versioning_test.go
+++ b/ledger/complete/wal/checkpointer_versioning_test.go
@@ -7,57 +7,29 @@ import (
 	"github.com/stretchr/testify/require"
 
 	"github.com/onflow/flow-go/ledger"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
 )
 
-var v1Forest = &flattener.FlattenedForest{
-	Nodes: []*flattener.StorableNode{
-		nil, // node 0 is special and skipped
-		{
-			LIndex:     0,
-			RIndex:     0,
-			Height:     0,
-			Path:       []byte{1},
-			EncPayload: []byte{2},
-			HashValue:  []byte{3},
-			MaxDepth:   1,
-			RegCount:   1,
-		}, {
-			LIndex:     1,
-			RIndex:     2,
-			Height:     3,
-			Path:       []byte{11},
-			EncPayload: []byte{22},
-			HashValue:  []byte{33},
-			MaxDepth:   11,
-			RegCount:   11,
-		},
-	},
-	Tries: []*flattener.StorableTrie{
-		{
-			RootIndex: 0,
-			RootHash:  []byte{4},
-		},
-		{
-			RootIndex: 1,
-			RootHash:  []byte{44},
-		},
-	},
-}
-
 func Test_LoadingV1Checkpoint(t *testing.T) {
 
-	forest, err := LoadCheckpoint("test_data/checkpoint.v1")
+	expectedRootHash := [4]ledger.RootHash{
+		mustToHash("568f4ec740fe3b5de88034cb7b1fbddb41548b068f31aebc8ae9189e429c5749"), // empty trie root hash
+		mustToHash("f53f9696b85b7428227f1b39f40b2ce07c175f58dea2b86cb6f84dc7c9fbeabd"),
+		mustToHash("7ac8daf34733cce3d5d03b5a1afde33a572249f81c45da91106412e94661e109"),
+		mustToHash("63df641430e5e0745c3d99ece6ac209467ccfdb77e362e7490a830db8e8803ae"),
+	}
+
+	tries, err := LoadCheckpoint("test_data/checkpoint.v1")
 	require.NoError(t, err)
+	require.Equal(t, len(expectedRootHash), len(tries))
 
-	require.Equal(t, v1Forest, forest)
+	for i, trie := range tries {
+		require.Equal(t, expectedRootHash[i], trie.RootHash())
+		require.True(t, trie.RootNode().VerifyCachedHash())
+	}
 }
 
 func Test_LoadingV3Checkpoint(t *testing.T) {
 
-	forest, err := LoadCheckpoint("test_data/checkpoint.v3")
-	require.NoError(t, err)
-
 	expectedRootHash := [4]ledger.RootHash{
 		mustToHash("568f4ec740fe3b5de88034cb7b1fbddb41548b068f31aebc8ae9189e429c5749"), // empty trie root hash
 		mustToHash("f53f9696b85b7428227f1b39f40b2ce07c175f58dea2b86cb6f84dc7c9fbeabd"),
@@ -65,7 +37,7 @@ func Test_LoadingV3Checkpoint(t *testing.T) {
 		mustToHash("63df641430e5e0745c3d99ece6ac209467ccfdb77e362e7490a830db8e8803ae"),
 	}
 
-	tries, err := flattener.RebuildTries(forest)
+	tries, err := LoadCheckpoint("test_data/checkpoint.v3")
 	require.NoError(t, err)
 	require.Equal(t, len(expectedRootHash), len(tries))
 
diff --git a/ledger/complete/wal/compactor_test.go b/ledger/complete/wal/compactor_test.go
index 8abab425ead..efcbb0160c5 100644
--- a/ledger/complete/wal/compactor_test.go
+++ b/ledger/complete/wal/compactor_test.go
@@ -14,7 +14,6 @@ import (
 	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/common/utils"
 	"github.com/onflow/flow-go/ledger/complete/mtrie"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 	"github.com/onflow/flow-go/model/bootstrap"
 	"github.com/onflow/flow-go/module/metrics"
@@ -168,8 +167,8 @@ func Test_Compactor(t *testing.T) {
 			require.NoError(t, err)
 
 			err = wal2.Replay(
-				func(forestSequencing *flattener.FlattenedForest) error {
-					return loadIntoForest(f2, forestSequencing)
+				func(tries []*trie.MTrie) error {
+					return f2.AddTries(tries)
 				},
 				func(update *ledger.TrieUpdate) error {
 					_, err := f2.Update(update)
@@ -330,17 +329,3 @@ func Test_Compactor_checkpointInterval(t *testing.T) {
 		})
 	})
 }
-
-func loadIntoForest(forest *mtrie.Forest, forestSequencing *flattener.FlattenedForest) error {
-	tries, err := flattener.RebuildTries(forestSequencing)
-	if err != nil {
-		return err
-	}
-	for _, t := range tries {
-		err := forest.AddTrie(t)
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
diff --git a/ledger/complete/wal/fixtures/noopwal.go b/ledger/complete/wal/fixtures/noopwal.go
index 3f88fc6e557..8f705efdbf2 100644
--- a/ledger/complete/wal/fixtures/noopwal.go
+++ b/ledger/complete/wal/fixtures/noopwal.go
@@ -3,7 +3,7 @@ package fixtures
 import (
 	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/complete/mtrie"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 	"github.com/onflow/flow-go/ledger/complete/wal"
 )
 
@@ -37,10 +37,10 @@ func (w *NoopWAL) ReplayOnForest(forest *mtrie.Forest) error { return nil }
 
 func (w *NoopWAL) Segments() (first, last int, err error) { return 0, 0, nil }
 
-func (w *NoopWAL) Replay(checkpointFn func(forestSequencing *flattener.FlattenedForest) error, updateFn func(update *ledger.TrieUpdate) error, deleteFn func(ledger.RootHash) error) error {
+func (w *NoopWAL) Replay(checkpointFn func(tries []*trie.MTrie) error, updateFn func(update *ledger.TrieUpdate) error, deleteFn func(ledger.RootHash) error) error {
 	return nil
 }
 
-func (w *NoopWAL) ReplayLogsOnly(checkpointFn func(forestSequencing *flattener.FlattenedForest) error, updateFn func(update *ledger.TrieUpdate) error, deleteFn func(rootHash ledger.RootHash) error) error {
+func (w *NoopWAL) ReplayLogsOnly(checkpointFn func(tries []*trie.MTrie) error, updateFn func(update *ledger.TrieUpdate) error, deleteFn func(rootHash ledger.RootHash) error) error {
 	return nil
 }
diff --git a/ledger/complete/wal/test_data/checkpoint.v1 b/ledger/complete/wal/test_data/checkpoint.v1
index c5567395b83728d4cf932c99e1545e813fe52f5b..86f6d15684aa9e58496508739a70134e072b057e 100644
GIT binary patch
literal 1245
zcmY#1XJBN20B!~rAmbmDhe<IqD8Q6rO2S1X85r1r98R#Y%nS^UP#MNV28CIR-}&uL
z)V%y|FY~*~j7;afYj*EBy{MzP<&I1L;ugj8K&5!CcYvCOheEgB396Pci9sRqV)(Y?
zr+Z@g)4v;~h`OKLGQs@FB_6(Og0Vu6rPzf}0+s%UDTmTb@X%p`iYPF@e0Q*c=WW)~
zb$Q!vuWwp!Fx|!f2>-SZJT3vu?icQDWq@m80D2#84uk<W<*WU)X*;4zG?eP4Ex&N<
zJg5<mkGQvJN6xk%zQ<4gezg~<9B)uGKn=!2p$COCR4rpNgM!U^(Gb&j8o~vSPVdXB
zsx6#4lP~no9I^FXa%p>n`Mt}4O7U8M0FS*;DRk>ypyG@v3<|TC9%kuVyJJS_jr5px
zZ@xTPIoq!!;6Q6Y%+$k1<z5p&2>})ptWa%i@L+|9;kQqBT)zaal{PwaYD&Y52&KzM
zX7V?-#2@|i)Z^8o2`_;b!qY1(CfFHZx<P3j1Qe=H-1_WpeCF}h3)WFme;-?gD|!Bq
zalJKBAVuh<TjE1bpc=eE0k;Ee999B7C|sec7*iP(6neQr4qki4_RB8+<C%TBp=>pI
zaogliX-u@8^3#KDogz>vEGR%u0Rb$D@@7`vUl#V&7Aeu&Zq0O$z0NBi=qVQa;OR%H
z#D$4niqc3y!38md0ZULM-%k-Sc>16u#&+hLXKNIuq@Ve_z0ORpWWoxA+kG9(>!3P8
zltNg)-*Jb3*0C=dOiowJ@4f99(#_UyxNc9^ONn_-bHY90YFQW*h)C=Z<s3vs2ShD5
Jg90J<0sxnT1^EB~

literal 122
zcmY#1XJBN2044?|C<~Q>$ulxAf|!g<42;Y`8iYXrCe6ga4G{p+j36eX7z3jsR0u>d
KvcNe&qjUf~<N*Bu

diff --git a/ledger/complete/wal/wal.go b/ledger/complete/wal/wal.go
index 3ce1bdbd7aa..63b4891c599 100644
--- a/ledger/complete/wal/wal.go
+++ b/ledger/complete/wal/wal.go
@@ -11,7 +11,7 @@ import (
 
 	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/complete/mtrie"
-	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 	"github.com/onflow/flow-go/module"
 	"github.com/onflow/flow-go/utils/io"
 )
@@ -105,12 +105,8 @@ func (w *DiskWAL) RecordDelete(rootHash ledger.RootHash) error {
 
 func (w *DiskWAL) ReplayOnForest(forest *mtrie.Forest) error {
 	return w.Replay(
-		func(forestSequencing *flattener.FlattenedForest) error {
-			rebuiltTries, err := flattener.RebuildTries(forestSequencing)
-			if err != nil {
-				return fmt.Errorf("rebuilding forest from sequenced nodes failed: %w", err)
-			}
-			err = forest.AddTries(rebuiltTries)
+		func(tries []*trie.MTrie) error {
+			err := forest.AddTries(tries)
 			if err != nil {
 				return fmt.Errorf("adding rebuilt tries to forest failed: %w", err)
 			}
@@ -132,7 +128,7 @@ func (w *DiskWAL) Segments() (first, last int, err error) {
 }
 
 func (w *DiskWAL) Replay(
-	checkpointFn func(forestSequencing *flattener.FlattenedForest) error,
+	checkpointFn func(tries []*trie.MTrie) error,
 	updateFn func(update *ledger.TrieUpdate) error,
 	deleteFn func(ledger.RootHash) error,
 ) error {
@@ -144,7 +140,7 @@ func (w *DiskWAL) Replay(
 }
 
 func (w *DiskWAL) ReplayLogsOnly(
-	checkpointFn func(forestSequencing *flattener.FlattenedForest) error,
+	checkpointFn func(tries []*trie.MTrie) error,
 	updateFn func(update *ledger.TrieUpdate) error,
 	deleteFn func(rootHash ledger.RootHash) error,
 ) error {
@@ -157,7 +153,7 @@ func (w *DiskWAL) ReplayLogsOnly(
 
 func (w *DiskWAL) replay(
 	from, to int,
-	checkpointFn func(forestSequencing *flattener.FlattenedForest) error,
+	checkpointFn func(tries []*trie.MTrie) error,
 	updateFn func(update *ledger.TrieUpdate) error,
 	deleteFn func(rootHash ledger.RootHash) error,
 	useCheckpoints bool,
@@ -343,12 +339,12 @@ type LedgerWAL interface {
 	ReplayOnForest(forest *mtrie.Forest) error
 	Segments() (first, last int, err error)
 	Replay(
-		checkpointFn func(forestSequencing *flattener.FlattenedForest) error,
+		checkpointFn func(tries []*trie.MTrie) error,
 		updateFn func(update *ledger.TrieUpdate) error,
 		deleteFn func(ledger.RootHash) error,
 	) error
 	ReplayLogsOnly(
-		checkpointFn func(forestSequencing *flattener.FlattenedForest) error,
+		checkpointFn func(tries []*trie.MTrie) error,
 		updateFn func(update *ledger.TrieUpdate) error,
 		deleteFn func(rootHash ledger.RootHash) error,
 	) error

From 800cca1c52997ac5a7c8dbf0031c6bfd937bc16f Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 2 Feb 2022 17:18:39 -0600
Subject: [PATCH 05/37] Fix lint errors

---
 ledger/complete/ledger_test.go           | 2 +-
 ledger/complete/mtrie/forest.go          | 2 +-
 ledger/complete/mtrie/trie/trie_test.go  | 2 +-
 ledger/complete/wal/checkpointer_test.go | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ledger/complete/ledger_test.go b/ledger/complete/ledger_test.go
index c94edae414c..d136e6db7fe 100644
--- a/ledger/complete/ledger_test.go
+++ b/ledger/complete/ledger_test.go
@@ -440,7 +440,7 @@ func TestLedgerFunctionality(t *testing.T) {
 				// capture new values for future query
 				for j, k := range keys {
 					encKey := encoding.EncodeKey(&k)
-					histStorage[string(newState[:])+string(encKey[:])] = values[j]
+					histStorage[string(newState[:])+string(encKey)] = values[j]
 					latestValue[string(encKey)] = values[j]
 				}
 
diff --git a/ledger/complete/mtrie/forest.go b/ledger/complete/mtrie/forest.go
index 69812496bfd..c783493cac6 100644
--- a/ledger/complete/mtrie/forest.go
+++ b/ledger/complete/mtrie/forest.go
@@ -50,7 +50,7 @@ func NewForest(forestCapacity int, metrics module.LedgerMetrics, onTreeEvicted f
 			if !ok {
 				panic(fmt.Sprintf("cache contains item of type %T", value))
 			}
-			//TODO Log error
+			// TODO Log error
 			_ = onTreeEvicted(trie)
 		})
 	} else {
diff --git a/ledger/complete/mtrie/trie/trie_test.go b/ledger/complete/mtrie/trie/trie_test.go
index ee3c1a282eb..aa72767d31b 100644
--- a/ledger/complete/mtrie/trie/trie_test.go
+++ b/ledger/complete/mtrie/trie/trie_test.go
@@ -257,7 +257,7 @@ func sampleRandomRegisterWritesWithPrefix(rng *LinearCongruentialGenerator, numb
 				nextRandomByteIndex = 0
 			}
 			p[b] = nextRandomBytes[nextRandomByteIndex]
-			nextRandomByteIndex += 1
+			nextRandomByteIndex++
 		}
 		paths = append(paths, p)
 
diff --git a/ledger/complete/wal/checkpointer_test.go b/ledger/complete/wal/checkpointer_test.go
index 80c2374ea5f..f8f43e930ce 100644
--- a/ledger/complete/wal/checkpointer_test.go
+++ b/ledger/complete/wal/checkpointer_test.go
@@ -495,7 +495,7 @@ func randomlyModifyFile(t *testing.T, filename string) {
 	require.NoError(t, err)
 
 	// byte addition will simply wrap around
-	buf[0] += 1
+	buf[0]++
 
 	_, err = file.WriteAt(buf, offset)
 	require.NoError(t, err)

From 422b75b1c56fdcb968569fde82bfdfd5f726f2c3 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Thu, 3 Feb 2022 08:59:44 -0600
Subject: [PATCH 06/37] Reduce checkpoint file size by 5.8+GB

Further reduction of 4.4+GB is planned for a total
reduction of 10.2+GB.  (see TODOs at bottom).

Leaf node is encoded as:
- node type (1 byte) (new in v4)
- height (2 bytes)
- max depth (2 bytes)
- reg count (8 bytes)
- hash (2 bytes + 32 bytes)
- path (2 bytes + 32 bytes)
- payload (4 bytes + n bytes)

Encoded payload size also reduced by removing prefix (version 2 bytes +
type 1 byte).

Interim node is encoded as:
- node type (1 byte) (new in v4)
- height (2 bytes)
- max depth (2 bytes)
- reg count (8 bytes)
- lchild index (8 bytes)
- rchild index (8 bytes)
- hash (2 bytes + 32 bytes)

Trie is encoded as:
- root node index (8 bytes)
- hash (2 bytes + 32 bytes)

Removed v3 leaf node fields:
- version (2 bytes)
- left child index (8 bytes)
- right child index (8 bytes)
- payload version and type (3 bytes)

Removed v3 interim node fields:
- version (2 bytes)
- path (2 bytes)
- payload (4 bytes)

Removed v3 trie field:
- version (2 bytes)

Leaf node data is reduced by 20 bytes (2+8+8+3-1).
Interim node data is reduced by 7 bytes (2+2+4-1).
Trie is reduced by 2 bytes.

TODO: remove max depth and reg count fields from both leaf node and
interim node types.
TODO: reduce hash length from 2 bytes to 1 byte for both leaf node and
interim node types.
---
 ledger/common/encoding/encoding.go            |  12 +
 ledger/complete/mtrie/flattener/encoding.go   | 330 +++++++++++-------
 .../complete/mtrie/flattener/encoding_test.go |  36 +-
 .../complete/mtrie/flattener/encoding_v3.go   | 201 +++++++++++
 ledger/complete/wal/checkpointer.go           |   4 +-
 5 files changed, 424 insertions(+), 159 deletions(-)
 create mode 100644 ledger/complete/mtrie/flattener/encoding_v3.go

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index 2bf7a1a5a53..e7818e8bfcd 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -327,6 +327,12 @@ func EncodePayload(p *ledger.Payload) []byte {
 	return buffer
 }
 
+// EncodePayloadWithoutPrefix encodes a ledger payload
+// without prefix (version and type).
+func EncodePayloadWithoutPrefix(p *ledger.Payload) []byte {
+	return encodePayload(p)
+}
+
 func encodePayload(p *ledger.Payload) []byte {
 	buffer := make([]byte, 0)
 
@@ -370,6 +376,12 @@ func DecodePayload(encodedPayload []byte) (*ledger.Payload, error) {
 	return decodePayload(rest)
 }
 
+// DecodePayloadWithoutPrefix construct a payload from an encoded byte slice
+// without prefix (version and type).
+func DecodePayloadWithoutPrefix(encodedPayload []byte) (*ledger.Payload, error) {
+	return decodePayload(encodedPayload)
+}
+
 func decodePayload(inp []byte) (*ledger.Payload, error) {
 
 	// read encoded key size
diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index c2e209c9632..29092abee4c 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -2,6 +2,7 @@ package flattener
 
 import (
 	"bytes"
+	"encoding/binary"
 	"fmt"
 	"io"
 
@@ -13,159 +14,241 @@ import (
 	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 )
 
-const encodingDecodingVersion = uint16(0)
+type nodeType byte
 
-// EncodeNode encodes node.
-// TODO: reuse buffer
-func EncodeNode(n *node.Node, lchildIndex uint64, rchildIndex uint64) []byte {
+const (
+	leafNodeType nodeType = iota
+	interimNodeType
+)
+
+// encodeLeafNode encodes leaf node in the following format:
+// - node type (1 byte)
+// - height (2 bytes)
+// - max depth (2 bytes)
+// - reg count (8 bytes)
+// - hash (2 bytes + 32 bytes)
+// - path (2 bytes + 32 bytes)
+// - payload (4 bytes + n bytes)
+// Encoded leaf node size is 85 bytes (assuming length of hash/path is 32 bytes) +
+// length of encoded payload size.
+// TODO: encode payload more efficiently.
+// TODO: reuse buffer.
+// TODO: reduce hash size from 2 bytes to 1 byte.
+func encodeLeafNode(n *node.Node) []byte {
+
+	hash := n.Hash()
+	path := n.Path()
+	encPayload := encoding.EncodePayloadWithoutPrefix(n.Payload())
 
-	encPayload := encoding.EncodePayload(n.Payload())
+	buf := make([]byte, 1+2+2+8+2+len(hash)+2+len(path)+4+len(encPayload))
+	pos := 0
 
-	length := 2 + 2 + 8 + 8 + 2 + 8 + 2 + len(n.Path()) + 4 + len(encPayload) + 2 + len(n.Hash())
+	// Encode node type (1 byte)
+	buf[pos] = byte(leafNodeType)
+	pos++
 
-	buf := make([]byte, 0, length)
+	// Encode height (2-bytes Big Endian)
+	binary.BigEndian.PutUint16(buf[pos:], uint16(n.Height()))
+	pos += 2
 
-	// 2-bytes encoding version
-	buf = utils.AppendUint16(buf, encodingDecodingVersion)
+	// Encode max depth (2-bytes Big Endian)
+	binary.BigEndian.PutUint16(buf[pos:], n.MaxDepth())
+	pos += 2
 
-	// 2-bytes Big Endian uint16 height
-	buf = utils.AppendUint16(buf, uint16(n.Height()))
+	// Encode reg count (8-bytes Big Endian)
+	binary.BigEndian.PutUint64(buf[pos:], n.RegCount())
+	pos += 8
 
-	// 8-bytes Big Endian uint64 LIndex
-	buf = utils.AppendUint64(buf, lchildIndex)
+	// Encode hash (2-bytes Big Endian for hashValue length and n-bytes hashValue)
+	binary.BigEndian.PutUint16(buf[pos:], uint16(len(hash)))
+	pos += 2
 
-	// 8-bytes Big Endian uint64 RIndex
-	buf = utils.AppendUint64(buf, rchildIndex)
+	pos += copy(buf[pos:], hash[:])
 
-	// 2-bytes Big Endian maxDepth
-	buf = utils.AppendUint16(buf, n.MaxDepth())
+	// Encode path (2-bytes Big Endian for path length and n-bytes path)
+	binary.BigEndian.PutUint16(buf[pos:], uint16(len(path)))
+	pos += 2
 
-	// 8-bytes Big Endian regCount
-	buf = utils.AppendUint64(buf, n.RegCount())
+	pos += copy(buf[pos:], path[:])
 
-	// 2-bytes Big Endian uint16 encoded path length and n-bytes encoded path
-	path := n.Path()
-	if path != nil {
-		buf = utils.AppendShortData(buf, path[:])
-	} else {
-		buf = utils.AppendShortData(buf, nil)
-	}
+	// Encode payload (4-bytes Big Endian for encoded payload length and n-bytes encoded payload)
+	binary.BigEndian.PutUint32(buf[pos:], uint32(len(encPayload)))
+	pos += 4
+
+	copy(buf[pos:], encPayload)
 
-	// 4-bytes Big Endian uint32 encoded payload length and n-bytes encoded payload
-	buf = utils.AppendLongData(buf, encPayload)
+	return buf
+}
+
+// encodeInterimNode encodes interim node in the following format:
+// - node type (1 byte)
+// - height (2 bytes)
+// - max depth (2 bytes)
+// - reg count (8 bytes)
+// - lchild index (8 bytes)
+// - rchild index (8 bytes)
+// - hash (2 bytes + 32 bytes)
+// Encoded interim node size is 63 bytes (assuming length of hash is 32 bytes).
+// TODO: reuse buffer.
+// TODO: reduce hash size from 2 bytes to 1 byte.
+func encodeInterimNode(n *node.Node, lchildIndex uint64, rchildIndex uint64) []byte {
 
-	// 2-bytes Big Endian uint16 hashValue length and n-bytes hashValue
 	hash := n.Hash()
-	buf = utils.AppendShortData(buf, hash[:])
+
+	buf := make([]byte, 1+2+2+8+8+8+2+len(hash))
+	pos := 0
+
+	// Encode node type (1-byte)
+	buf[pos] = byte(interimNodeType)
+	pos++
+
+	// Encode height (2-bytes Big Endian)
+	binary.BigEndian.PutUint16(buf[pos:], uint16(n.Height()))
+	pos += 2
+
+	// Encode max depth (2-bytes Big Endian)
+	binary.BigEndian.PutUint16(buf[pos:], n.MaxDepth())
+	pos += 2
+
+	// Encode reg count (8-bytes Big Endian)
+	binary.BigEndian.PutUint64(buf[pos:], n.RegCount())
+	pos += 8
+
+	// Encode left child index (8-bytes Big Endian)
+	binary.BigEndian.PutUint64(buf[pos:], lchildIndex)
+	pos += 8
+
+	// Encode right child index (8-bytes Big Endian)
+	binary.BigEndian.PutUint64(buf[pos:], rchildIndex)
+	pos += 8
+
+	// Encode hash (2-bytes Big Endian hashValue length and n-bytes hashValue)
+	binary.BigEndian.PutUint16(buf[pos:], uint16(len(hash)))
+	pos += 2
+
+	copy(buf[pos:], hash[:])
 
 	return buf
 }
 
+// EncodeNode encodes node.
+func EncodeNode(n *node.Node, lchildIndex uint64, rchildIndex uint64) []byte {
+	if n.IsLeaf() {
+		return encodeLeafNode(n)
+	}
+	return encodeInterimNode(n, lchildIndex, rchildIndex)
+}
+
 // ReadNode reconstructs a node from data read from reader.
 // TODO: reuse read buffer
 func ReadNode(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, error)) (*node.Node, error) {
 
-	// reading version
-	buf := make([]byte, 2)
-	read, err := io.ReadFull(reader, buf)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node, cannot read version part: %w", err)
-	}
-	if read != len(buf) {
-		return nil, fmt.Errorf("failed to read serialized node: not enough bytes read %d expected %d", read, len(buf))
-	}
-
-	version, _, err := utils.ReadUint16(buf)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
-	}
+	// bufSize is large enough to be used for:
+	// - fixed-length data: node type (1 byte) + height (2 bytes) + max depth (2 bytes) + reg count (8 bytes), or
+	// - child node indexes: 8 bytes * 2
+	const bufSize = 16
+	const fixLengthSize = 1 + 2 + 2 + 8
 
-	if version > encodingDecodingVersion {
-		return nil, fmt.Errorf("failed to read serialized node: unsuported version %d > %d", version, encodingDecodingVersion)
-	}
+	// Read fixed-length part
+	buf := make([]byte, bufSize)
+	pos := 0
 
-	// reading fixed-length part
-	buf = make([]byte, 2+8+8+2+8)
-
-	read, err = io.ReadFull(reader, buf)
+	_, err := io.ReadFull(reader, buf[:fixLengthSize])
 	if err != nil {
 		return nil, fmt.Errorf("failed to read serialized node, cannot read fixed-length part: %w", err)
 	}
-	if read != len(buf) {
-		return nil, fmt.Errorf("failed to read serialized node: not enough bytes read %d expected %d", read, len(buf))
-	}
 
-	var height, maxDepth uint16
-	var lchildIndex, rchildIndex, regCount uint64
-	var path, hashValue, encPayload []byte
+	// Read node type (1 byte)
+	nType := buf[pos]
+	pos++
 
-	height, buf, err = utils.ReadUint16(buf)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
-	}
+	// Read height (2 bytes)
+	height := binary.BigEndian.Uint16(buf[pos:])
+	pos += 2
 
-	lchildIndex, buf, err = utils.ReadUint64(buf)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
-	}
+	// Read max depth (2 bytes)
+	maxDepth := binary.BigEndian.Uint16(buf[pos:])
+	pos += 2
 
-	rchildIndex, buf, err = utils.ReadUint64(buf)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
-	}
+	// Read reg count (8 bytes)
+	regCount := binary.BigEndian.Uint64(buf[pos:])
 
-	maxDepth, buf, err = utils.ReadUint16(buf)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
-	}
+	if nType == byte(leafNodeType) {
 
-	regCount, _, err = utils.ReadUint64(buf)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
-	}
-
-	path, err = utils.ReadShortDataFromReader(reader)
-	if err != nil {
-		return nil, fmt.Errorf("cannot read key data: %w", err)
-	}
+		// Read hash
+		encHash, err := utils.ReadShortDataFromReader(reader)
+		if err != nil {
+			return nil, fmt.Errorf("cannot read hash: %w", err)
+		}
 
-	encPayload, err = utils.ReadLongDataFromReader(reader)
-	if err != nil {
-		return nil, fmt.Errorf("cannot read value data: %w", err)
-	}
+		// Read path
+		encPath, err := utils.ReadShortDataFromReader(reader)
+		if err != nil {
+			return nil, fmt.Errorf("cannot read path: %w", err)
+		}
 
-	hashValue, err = utils.ReadShortDataFromReader(reader)
-	if err != nil {
-		return nil, fmt.Errorf("cannot read hashValue data: %w", err)
-	}
+		// Read payload
+		encPayload, err := utils.ReadLongDataFromReader(reader)
+		if err != nil {
+			return nil, fmt.Errorf("cannot read payload: %w", err)
+		}
 
-	// Create (and copy) hash from raw data.
-	nodeHash, err := hash.ToHash(hashValue)
-	if err != nil {
-		return nil, fmt.Errorf("failed to decode hash from checkpoint: %w", err)
-	}
+		// ToHash copies encHash
+		nodeHash, err := hash.ToHash(encHash)
+		if err != nil {
+			return nil, fmt.Errorf("failed to decode hash from checkpoint: %w", err)
+		}
 
-	if len(path) > 0 {
-		// Create (and copy) path from raw data.
-		path, err := ledger.ToPath(path)
+		// ToPath copies encPath
+		path, err := ledger.ToPath(encPath)
 		if err != nil {
 			return nil, fmt.Errorf("failed to decode path from checkpoint: %w", err)
 		}
 
-		// Decode payload (payload data isn't copied).
-		payload, err := encoding.DecodePayload(encPayload)
+		// TODO: maybe optimize DecodePayload
+		payload, err := encoding.DecodePayloadWithoutPrefix(encPayload)
 		if err != nil {
 			return nil, fmt.Errorf("failed to decode payload from checkpoint: %w", err)
 		}
 
 		// make a copy of payload
+		// TODO: copying may not be necessary
 		var pl *ledger.Payload
 		if payload != nil {
 			pl = payload.DeepCopy()
 		}
 
-		n := node.NewNode(int(height), nil, nil, path, pl, nodeHash, maxDepth, regCount)
-		return n, nil
+		node := node.NewNode(int(height), nil, nil, path, pl, nodeHash, maxDepth, regCount)
+		return node, nil
+	}
+
+	// Read interim node
+
+	pos = 0
+
+	// Read left and right child index (8 bytes each)
+	_, err = io.ReadFull(reader, buf[:16])
+	if err != nil {
+		return nil, fmt.Errorf("cannot read children index: %w", err)
+	}
+
+	// Read left child index (8 bytes)
+	lchildIndex := binary.BigEndian.Uint64(buf[pos:])
+	pos += 8
+
+	// Read right child index (8 bytes)
+	rchildIndex := binary.BigEndian.Uint64(buf[pos:])
+
+	// Read hash
+	hashValue, err := utils.ReadShortDataFromReader(reader)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read hash data: %w", err)
+	}
+
+	// ToHash copies hashValue
+	nodeHash, err := hash.ToHash(hashValue)
+	if err != nil {
+		return nil, fmt.Errorf("failed to decode hash from checkpoint: %w", err)
 	}
 
 	// Get left child node by node index
@@ -195,16 +278,19 @@ func EncodeTrie(rootNode *node.Node, rootIndex uint64) []byte {
 		rootHash = ledger.RootHash(rootNode.Hash())
 	}
 
-	length := 2 + 8 + 2 + len(rootHash)
-	buf := make([]byte, 0, length)
-	// 2-bytes encoding version
-	buf = utils.AppendUint16(buf, encodingDecodingVersion)
+	length := 8 + 2 + len(rootHash)
+	buf := make([]byte, length)
+	pos := 0
 
 	// 8-bytes Big Endian uint64 RootIndex
-	buf = utils.AppendUint64(buf, rootIndex)
+	binary.BigEndian.PutUint64(buf, rootIndex)
+	pos += 8
 
-	// 2-bytes Big Endian uint16 RootHash length and n-bytes RootHash
-	buf = utils.AppendShortData(buf, rootHash[:])
+	// Encode hash (2-bytes Big Endian for hashValue length and n-bytes hashValue)
+	binary.BigEndian.PutUint16(buf[pos:], uint16(len(rootHash)))
+	pos += 2
+
+	copy(buf[pos:], rootHash[:])
 
 	return buf
 }
@@ -212,34 +298,12 @@ func EncodeTrie(rootNode *node.Node, rootIndex uint64) []byte {
 // ReadTrie reconstructs a trie from data read from reader.
 func ReadTrie(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, error)) (*trie.MTrie, error) {
 
-	// reading version
-	buf := make([]byte, 2)
-	read, err := io.ReadFull(reader, buf)
-	if err != nil {
-		return nil, fmt.Errorf("error reading storable node, cannot read version part: %w", err)
-	}
-	if read != len(buf) {
-		return nil, fmt.Errorf("not enough bytes read %d expected %d", read, len(buf))
-	}
-
-	version, _, err := utils.ReadUint16(buf)
-	if err != nil {
-		return nil, fmt.Errorf("error reading storable node: %w", err)
-	}
-
-	if version > encodingDecodingVersion {
-		return nil, fmt.Errorf("error reading storable node: unsuported version %d > %d", version, encodingDecodingVersion)
-	}
-
 	// read root uint64 RootIndex
-	buf = make([]byte, 8)
-	read, err = io.ReadFull(reader, buf)
+	buf := make([]byte, 8)
+	_, err := io.ReadFull(reader, buf)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read fixed-legth part: %w", err)
 	}
-	if read != len(buf) {
-		return nil, fmt.Errorf("not enough bytes read %d expected %d", read, len(buf))
-	}
 
 	rootIndex, _, err := utils.ReadUint64(buf)
 	if err != nil {
diff --git a/ledger/complete/mtrie/flattener/encoding_test.go b/ledger/complete/mtrie/flattener/encoding_test.go
index b64df5bde6f..315aff6f071 100644
--- a/ledger/complete/mtrie/flattener/encoding_test.go
+++ b/ledger/complete/mtrie/flattener/encoding_test.go
@@ -31,41 +31,31 @@ func TestNodeSerialization(t *testing.T) {
 	leafNode2 := node.NewNode(255, nil, nil, ledger.Path(path2), payload2, hashValue2, 0, 1)
 	rootNode := node.NewNode(256, leafNode1, leafNode2, ledger.DummyPath, nil, hashValue3, 1, 2)
 
-	// Version 0
 	expectedLeafNode1 := []byte{
-		0, 0, // encoding version
+		0,      // node type
 		0, 255, // height
-		0, 0, 0, 0, 0, 0, 0, 0, // LIndex
-		0, 0, 0, 0, 0, 0, 0, 0, // RIndex
 		0, 0, // max depth
 		0, 0, 0, 0, 0, 0, 0, 1, // reg count
+		0, 32, // hash data len
+		1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // hash data
 		0, 32, // path data len
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // path data
-		0, 0, 0, 25, // payload data len
-		0, 0, 6, 0, 0, 0, 9, 0, 1, 0, 0, 0, 3, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 1, 97, // payload data
-		0, 32, // hashValue length
-		1, 1, 1, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, // hashValue
+		0, 0, 0, 22, // payload data len
+		0, 0, 0, 9, 0, 1, 0, 0, 0, 3, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 1, 97, // payload data
 	}
 
-	// Version 0
 	expectedRootNode := []byte{
-		0, 0, // encoding version
+		1,    // node type
 		1, 0, // height
-		0, 0, 0, 0, 0, 0, 0, 1, // LIndex
-		0, 0, 0, 0, 0, 0, 0, 2, // RIndex
 		0, 1, // max depth
 		0, 0, 0, 0, 0, 0, 0, 2, // reg count
-		0, 0, // path data len
-		0, 0, 0, 0, // payload data len
-		0, 32, // hashValue length
-		3, 3, 3, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, // hashValue
+		0, 0, 0, 0, 0, 0, 0, 1, // LIndex
+		0, 0, 0, 0, 0, 0, 0, 2, // RIndex
+		0, 32, // hash data len
+		3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // hash data
 	}
 
 	t.Run("encode leaf node", func(t *testing.T) {
@@ -112,9 +102,7 @@ func TestTrieSerialization(t *testing.T) {
 	rootNode := node.NewNode(256, nil, nil, ledger.DummyPath, nil, hashValue, 7, 5000)
 	rootNodeIndex := uint64(21)
 
-	// Version 0
 	expected := []byte{
-		0, 0, // encoding version
 		0, 0, 0, 0, 0, 0, 0, 21, // RootIndex
 		0, 32, // RootHash length
 		2, 2, 2, 0, 0, 0, 0, 0,
diff --git a/ledger/complete/mtrie/flattener/encoding_v3.go b/ledger/complete/mtrie/flattener/encoding_v3.go
new file mode 100644
index 00000000000..896045d3088
--- /dev/null
+++ b/ledger/complete/mtrie/flattener/encoding_v3.go
@@ -0,0 +1,201 @@
+package flattener
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"github.com/onflow/flow-go/ledger"
+	"github.com/onflow/flow-go/ledger/common/encoding"
+	"github.com/onflow/flow-go/ledger/common/hash"
+	"github.com/onflow/flow-go/ledger/common/utils"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
+)
+
+// This file contains decoding functions for checkpoint v3 and earlier versions.
+// These functions are for backwards compatibility.
+
+const encodingDecodingVersion = uint16(0)
+
+// ReadNodeFromCheckpointV3AndEarlier reconstructs a node from data in checkpoint v3 and earlier versions.
+func ReadNodeFromCheckpointV3AndEarlier(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, error)) (*node.Node, error) {
+
+	// reading version
+	buf := make([]byte, 2)
+	read, err := io.ReadFull(reader, buf)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read serialized node, cannot read version part: %w", err)
+	}
+	if read != len(buf) {
+		return nil, fmt.Errorf("failed to read serialized node: not enough bytes read %d expected %d", read, len(buf))
+	}
+
+	version, _, err := utils.ReadUint16(buf)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+	}
+
+	if version > encodingDecodingVersion {
+		return nil, fmt.Errorf("failed to read serialized node: unsuported version %d > %d", version, encodingDecodingVersion)
+	}
+
+	// reading fixed-length part
+	buf = make([]byte, 2+8+8+2+8)
+
+	read, err = io.ReadFull(reader, buf)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read serialized node, cannot read fixed-length part: %w", err)
+	}
+	if read != len(buf) {
+		return nil, fmt.Errorf("failed to read serialized node: not enough bytes read %d expected %d", read, len(buf))
+	}
+
+	var height, maxDepth uint16
+	var lchildIndex, rchildIndex, regCount uint64
+	var path, hashValue, encPayload []byte
+
+	height, buf, err = utils.ReadUint16(buf)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+	}
+
+	lchildIndex, buf, err = utils.ReadUint64(buf)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+	}
+
+	rchildIndex, buf, err = utils.ReadUint64(buf)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+	}
+
+	maxDepth, buf, err = utils.ReadUint16(buf)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+	}
+
+	regCount, _, err = utils.ReadUint64(buf)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+	}
+
+	path, err = utils.ReadShortDataFromReader(reader)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read key data: %w", err)
+	}
+
+	encPayload, err = utils.ReadLongDataFromReader(reader)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read value data: %w", err)
+	}
+
+	hashValue, err = utils.ReadShortDataFromReader(reader)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read hashValue data: %w", err)
+	}
+
+	// Create (and copy) hash from raw data.
+	nodeHash, err := hash.ToHash(hashValue)
+	if err != nil {
+		return nil, fmt.Errorf("failed to decode hash from checkpoint: %w", err)
+	}
+
+	if len(path) > 0 {
+		// Create (and copy) path from raw data.
+		path, err := ledger.ToPath(path)
+		if err != nil {
+			return nil, fmt.Errorf("failed to decode path from checkpoint: %w", err)
+		}
+
+		// Decode payload (payload data isn't copied).
+		payload, err := encoding.DecodePayload(encPayload)
+		if err != nil {
+			return nil, fmt.Errorf("failed to decode payload from checkpoint: %w", err)
+		}
+
+		// make a copy of payload
+		var pl *ledger.Payload
+		if payload != nil {
+			pl = payload.DeepCopy()
+		}
+
+		n := node.NewNode(int(height), nil, nil, path, pl, nodeHash, maxDepth, regCount)
+		return n, nil
+	}
+
+	// Get left child node by node index
+	lchild, err := getNode(lchildIndex)
+	if err != nil {
+		return nil, fmt.Errorf("failed to find left child node: %w", err)
+	}
+
+	// Get right child node by node index
+	rchild, err := getNode(rchildIndex)
+	if err != nil {
+		return nil, fmt.Errorf("failed to find right child node: %w", err)
+	}
+
+	n := node.NewNode(int(height), lchild, rchild, ledger.DummyPath, nil, nodeHash, maxDepth, regCount)
+	return n, nil
+}
+
+// ReadTrieFromCheckpointV3AndEarlier reconstructs a trie from data in checkpoint v3 and earlier versions.
+func ReadTrieFromCheckpointV3AndEarlier(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, error)) (*trie.MTrie, error) {
+
+	// reading version
+	buf := make([]byte, 2)
+	read, err := io.ReadFull(reader, buf)
+	if err != nil {
+		return nil, fmt.Errorf("error reading storable node, cannot read version part: %w", err)
+	}
+	if read != len(buf) {
+		return nil, fmt.Errorf("not enough bytes read %d expected %d", read, len(buf))
+	}
+
+	version, _, err := utils.ReadUint16(buf)
+	if err != nil {
+		return nil, fmt.Errorf("error reading storable node: %w", err)
+	}
+
+	if version > encodingDecodingVersion {
+		return nil, fmt.Errorf("error reading storable node: unsuported version %d > %d", version, encodingDecodingVersion)
+	}
+
+	// read root uint64 RootIndex
+	buf = make([]byte, 8)
+	read, err = io.ReadFull(reader, buf)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read fixed-legth part: %w", err)
+	}
+	if read != len(buf) {
+		return nil, fmt.Errorf("not enough bytes read %d expected %d", read, len(buf))
+	}
+
+	rootIndex, _, err := utils.ReadUint64(buf)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read root index data: %w", err)
+	}
+
+	readRootHash, err := utils.ReadShortDataFromReader(reader)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read roothash data: %w", err)
+	}
+
+	rootNode, err := getNode(rootIndex)
+	if err != nil {
+		return nil, fmt.Errorf("cannot find root node: %w", err)
+	}
+
+	mtrie, err := trie.NewMTrie(rootNode)
+	if err != nil {
+		return nil, fmt.Errorf("restoring trie failed: %w", err)
+	}
+
+	rootHash := mtrie.RootHash()
+	if !bytes.Equal(readRootHash, rootHash[:]) {
+		return nil, fmt.Errorf("restoring trie failed: roothash doesn't match")
+	}
+
+	return mtrie, nil
+}
diff --git a/ledger/complete/wal/checkpointer.go b/ledger/complete/wal/checkpointer.go
index e8f48bb51a0..e5f370e7aef 100644
--- a/ledger/complete/wal/checkpointer.go
+++ b/ledger/complete/wal/checkpointer.go
@@ -468,7 +468,7 @@ func readCheckpointV3AndEarlier(f *os.File, version uint16) ([]*trie.MTrie, erro
 	tries := make([]*trie.MTrie, triesCount)
 
 	for i := uint64(1); i <= nodesCount; i++ {
-		n, err := flattener.ReadNode(reader, func(nodeIndex uint64) (*node.Node, error) {
+		n, err := flattener.ReadNodeFromCheckpointV3AndEarlier(reader, func(nodeIndex uint64) (*node.Node, error) {
 			if nodeIndex >= uint64(i) {
 				return nil, fmt.Errorf("sequence of stored nodes does not satisfy Descendents-First-Relationship")
 			}
@@ -481,7 +481,7 @@ func readCheckpointV3AndEarlier(f *os.File, version uint16) ([]*trie.MTrie, erro
 	}
 
 	for i := uint16(0); i < triesCount; i++ {
-		trie, err := flattener.ReadTrie(reader, func(nodeIndex uint64) (*node.Node, error) {
+		trie, err := flattener.ReadTrieFromCheckpointV3AndEarlier(reader, func(nodeIndex uint64) (*node.Node, error) {
 			if nodeIndex >= uint64(len(nodes)) {
 				return nil, fmt.Errorf("sequence of stored nodes doesn't contain node")
 			}

From dd636e500ebc213d837296a9ea16660785733adf Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Fri, 4 Feb 2022 08:48:37 -0600
Subject: [PATCH 07/37] Encode and decode empty payload in checkpoint

---
 ledger/common/encoding/encoding.go | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index e7818e8bfcd..f0bf7851c9e 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -205,12 +205,15 @@ func DecodeKey(encodedKey []byte) (*ledger.Key, error) {
 }
 
 func decodeKey(inp []byte) (*ledger.Key, error) {
-	key := &ledger.Key{}
 	numOfParts, rest, err := utils.ReadUint16(inp)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding key (content): %w", err)
 	}
 
+	key := &ledger.Key{
+		KeyParts: make([]ledger.KeyPart, numOfParts),
+	}
+
 	for i := 0; i < int(numOfParts); i++ {
 		var kpEncSize uint32
 		var kpEnc []byte
@@ -231,7 +234,8 @@ func decodeKey(inp []byte) (*ledger.Key, error) {
 		if err != nil {
 			return nil, fmt.Errorf("error decoding key (content): %w", err)
 		}
-		key.KeyParts = append(key.KeyParts, *kp)
+
+		key.KeyParts[i] = *kp
 	}
 	return key, nil
 }
@@ -330,6 +334,9 @@ func EncodePayload(p *ledger.Payload) []byte {
 // EncodePayloadWithoutPrefix encodes a ledger payload
 // without prefix (version and type).
 func EncodePayloadWithoutPrefix(p *ledger.Payload) []byte {
+	if p == nil {
+		return []byte{}
+	}
 	return encodePayload(p)
 }
 
@@ -379,6 +386,10 @@ func DecodePayload(encodedPayload []byte) (*ledger.Payload, error) {
 // DecodePayloadWithoutPrefix construct a payload from an encoded byte slice
 // without prefix (version and type).
 func DecodePayloadWithoutPrefix(encodedPayload []byte) (*ledger.Payload, error) {
+	// if empty don't decode
+	if len(encodedPayload) == 0 {
+		return nil, nil
+	}
 	return decodePayload(encodedPayload)
 }
 

From 6c5ad37be8b14e7d5aefa6265270a9d7dd44c781 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Fri, 4 Feb 2022 10:11:18 -0600
Subject: [PATCH 08/37] Optimize reading checkpoint file by reusing buffer

Reduce allocs by using a 4096 byte scratch buffer
to reduce another 400+ million allocs during
checkpoint reading.
---
 ledger/complete/mtrie/flattener/encoding.go   | 200 ++++++++++++------
 .../complete/mtrie/flattener/encoding_test.go |  86 +++++---
 ledger/complete/wal/checkpointer.go           |  22 +-
 3 files changed, 202 insertions(+), 106 deletions(-)

diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index 29092abee4c..eaf4c1eefdb 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -1,7 +1,6 @@
 package flattener
 
 import (
-	"bytes"
 	"encoding/binary"
 	"fmt"
 	"io"
@@ -9,7 +8,6 @@ import (
 	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/common/encoding"
 	"github.com/onflow/flow-go/ledger/common/hash"
-	"github.com/onflow/flow-go/ledger/common/utils"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
 	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 )
@@ -140,85 +138,64 @@ func EncodeNode(n *node.Node, lchildIndex uint64, rchildIndex uint64) []byte {
 }
 
 // ReadNode reconstructs a node from data read from reader.
-// TODO: reuse read buffer
-func ReadNode(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, error)) (*node.Node, error) {
+func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (*node.Node, error)) (*node.Node, error) {
 
-	// bufSize is large enough to be used for:
-	// - fixed-length data: node type (1 byte) + height (2 bytes) + max depth (2 bytes) + reg count (8 bytes), or
-	// - child node indexes: 8 bytes * 2
-	const bufSize = 16
+	// minBufSize should be large enough for interim node and leaf node with small payload.
+	// minBufSize is a failsafe and is only used when len(scratch) is much smaller
+	// than expected (4096 by default).
+	const minBufSize = 1024
+
+	if len(scratch) < minBufSize {
+		scratch = make([]byte, minBufSize)
+	}
+
+	// fixed-length data: node type (1 byte) + height (2 bytes) + max depth (2 bytes) + reg count (8 bytes), or
 	const fixLengthSize = 1 + 2 + 2 + 8
 
 	// Read fixed-length part
-	buf := make([]byte, bufSize)
 	pos := 0
 
-	_, err := io.ReadFull(reader, buf[:fixLengthSize])
+	_, err := io.ReadFull(reader, scratch[:fixLengthSize])
 	if err != nil {
 		return nil, fmt.Errorf("failed to read serialized node, cannot read fixed-length part: %w", err)
 	}
 
 	// Read node type (1 byte)
-	nType := buf[pos]
+	nType := scratch[pos]
 	pos++
 
 	// Read height (2 bytes)
-	height := binary.BigEndian.Uint16(buf[pos:])
+	height := binary.BigEndian.Uint16(scratch[pos:])
 	pos += 2
 
 	// Read max depth (2 bytes)
-	maxDepth := binary.BigEndian.Uint16(buf[pos:])
+	maxDepth := binary.BigEndian.Uint16(scratch[pos:])
 	pos += 2
 
 	// Read reg count (8 bytes)
-	regCount := binary.BigEndian.Uint64(buf[pos:])
+	regCount := binary.BigEndian.Uint64(scratch[pos:])
 
 	if nType == byte(leafNodeType) {
 
-		// Read hash
-		encHash, err := utils.ReadShortDataFromReader(reader)
-		if err != nil {
-			return nil, fmt.Errorf("cannot read hash: %w", err)
-		}
-
-		// Read path
-		encPath, err := utils.ReadShortDataFromReader(reader)
-		if err != nil {
-			return nil, fmt.Errorf("cannot read path: %w", err)
-		}
-
-		// Read payload
-		encPayload, err := utils.ReadLongDataFromReader(reader)
-		if err != nil {
-			return nil, fmt.Errorf("cannot read payload: %w", err)
-		}
-
-		// ToHash copies encHash
-		nodeHash, err := hash.ToHash(encHash)
+		// Read encoded hash data from reader and create hash.Hash.
+		nodeHash, err := readHashFromReader(reader, scratch)
 		if err != nil {
 			return nil, fmt.Errorf("failed to decode hash from checkpoint: %w", err)
 		}
 
-		// ToPath copies encPath
-		path, err := ledger.ToPath(encPath)
+		// Read encoded path data from reader and create ledger.Path.
+		path, err := readPathFromReader(reader, scratch)
 		if err != nil {
 			return nil, fmt.Errorf("failed to decode path from checkpoint: %w", err)
 		}
 
-		// TODO: maybe optimize DecodePayload
-		payload, err := encoding.DecodePayloadWithoutPrefix(encPayload)
+		// Read encoded payload data from reader and create ledger.Payload.
+		payload, err := readPayloadFromReader(reader, scratch)
 		if err != nil {
-			return nil, fmt.Errorf("failed to decode payload from checkpoint: %w", err)
-		}
-
-		// make a copy of payload
-		// TODO: copying may not be necessary
-		var pl *ledger.Payload
-		if payload != nil {
-			pl = payload.DeepCopy()
+			return nil, fmt.Errorf("cannot read payload: %w", err)
 		}
 
-		node := node.NewNode(int(height), nil, nil, path, pl, nodeHash, maxDepth, regCount)
+		node := node.NewNode(int(height), nil, nil, path, payload, nodeHash, maxDepth, regCount)
 		return node, nil
 	}
 
@@ -227,26 +204,20 @@ func ReadNode(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, erro
 	pos = 0
 
 	// Read left and right child index (8 bytes each)
-	_, err = io.ReadFull(reader, buf[:16])
+	_, err = io.ReadFull(reader, scratch[:16])
 	if err != nil {
 		return nil, fmt.Errorf("cannot read children index: %w", err)
 	}
 
 	// Read left child index (8 bytes)
-	lchildIndex := binary.BigEndian.Uint64(buf[pos:])
+	lchildIndex := binary.BigEndian.Uint64(scratch[pos:])
 	pos += 8
 
 	// Read right child index (8 bytes)
-	rchildIndex := binary.BigEndian.Uint64(buf[pos:])
-
-	// Read hash
-	hashValue, err := utils.ReadShortDataFromReader(reader)
-	if err != nil {
-		return nil, fmt.Errorf("cannot read hash data: %w", err)
-	}
+	rchildIndex := binary.BigEndian.Uint64(scratch[pos:])
 
-	// ToHash copies hashValue
-	nodeHash, err := hash.ToHash(hashValue)
+	// Read encoded hash data from reader and create hash.Hash
+	nodeHash, err := readHashFromReader(reader, scratch)
 	if err != nil {
 		return nil, fmt.Errorf("failed to decode hash from checkpoint: %w", err)
 	}
@@ -296,21 +267,26 @@ func EncodeTrie(rootNode *node.Node, rootIndex uint64) []byte {
 }
 
 // ReadTrie reconstructs a trie from data read from reader.
-func ReadTrie(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, error)) (*trie.MTrie, error) {
+func ReadTrie(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (*node.Node, error)) (*trie.MTrie, error) {
 
-	// read root uint64 RootIndex
-	buf := make([]byte, 8)
-	_, err := io.ReadFull(reader, buf)
-	if err != nil {
-		return nil, fmt.Errorf("cannot read fixed-legth part: %w", err)
+	// minBufSize should be large enough for encoded trie (42 bytes).
+	// minBufSize is a failsafe and is only used when len(scratch) is much smaller
+	// than expected (4096 by default).
+	const minBufSize = 42
+
+	if len(scratch) < minBufSize {
+		scratch = make([]byte, minBufSize)
 	}
 
-	rootIndex, _, err := utils.ReadUint64(buf)
+	// read root index (8 bytes)
+	_, err := io.ReadFull(reader, scratch[:8])
 	if err != nil {
 		return nil, fmt.Errorf("cannot read root index data: %w", err)
 	}
 
-	readRootHash, err := utils.ReadShortDataFromReader(reader)
+	rootIndex := binary.BigEndian.Uint64(scratch)
+
+	readRootHash, err := readHashFromReader(reader, scratch)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read roothash data: %w", err)
 	}
@@ -326,9 +302,99 @@ func ReadTrie(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, erro
 	}
 
 	rootHash := mtrie.RootHash()
-	if !bytes.Equal(readRootHash, rootHash[:]) {
+	if !rootHash.Equals(ledger.RootHash(readRootHash)) {
 		return nil, fmt.Errorf("restoring trie failed: roothash doesn't match")
 	}
 
 	return mtrie, nil
 }
+
+func readHashFromReader(reader io.Reader, scratch []byte) (hash.Hash, error) {
+
+	const encHashBufSize = 2 + hash.HashLen
+
+	if len(scratch) < encHashBufSize {
+		scratch = make([]byte, encHashBufSize)
+	} else {
+		scratch = scratch[:encHashBufSize]
+	}
+
+	_, err := io.ReadFull(reader, scratch)
+	if err != nil {
+		return hash.DummyHash, fmt.Errorf("cannot read hash: %w", err)
+	}
+
+	sizeBuf, encHashBuf := scratch[:2], scratch[2:]
+
+	size := binary.BigEndian.Uint16(sizeBuf)
+	if size != hash.HashLen {
+		return hash.DummyHash, fmt.Errorf("encoded hash size is wrong: want %d bytes, got %d bytes", hash.HashLen, size)
+	}
+
+	// hash.ToHash copies data
+	return hash.ToHash(encHashBuf)
+}
+
+func readPathFromReader(reader io.Reader, scratch []byte) (ledger.Path, error) {
+
+	const encPathBufSize = 2 + ledger.PathLen
+
+	if len(scratch) < encPathBufSize {
+		scratch = make([]byte, encPathBufSize)
+	} else {
+		scratch = scratch[:encPathBufSize]
+	}
+
+	_, err := io.ReadFull(reader, scratch)
+	if err != nil {
+		return ledger.DummyPath, fmt.Errorf("cannot read path: %w", err)
+	}
+
+	sizeBuf, encPathBuf := scratch[:2], scratch[2:]
+
+	size := binary.BigEndian.Uint16(sizeBuf)
+	if size != ledger.PathLen {
+		return ledger.DummyPath, fmt.Errorf("encoded path size is wrong: want %d bytes, got %d bytes", ledger.PathLen, size)
+	}
+
+	// ToPath copies encPath
+	return ledger.ToPath(encPathBuf)
+}
+
+func readPayloadFromReader(reader io.Reader, scratch []byte) (*ledger.Payload, error) {
+
+	if len(scratch) < 4 {
+		scratch = make([]byte, 4)
+	}
+
+	// Read payload size
+	_, err := io.ReadFull(reader, scratch[:4])
+	if err != nil {
+		return nil, fmt.Errorf("cannot read long data length: %w", err)
+	}
+
+	size := binary.BigEndian.Uint32(scratch)
+
+	if len(scratch) < int(size) {
+		scratch = make([]byte, size)
+	} else {
+		scratch = scratch[:size]
+	}
+
+	_, err = io.ReadFull(reader, scratch)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read long data: %w", err)
+	}
+
+	payload, err := encoding.DecodePayloadWithoutPrefix(scratch)
+	if err != nil {
+		return nil, fmt.Errorf("failed to decode payload from checkpoint: %w", err)
+	}
+
+	if payload == nil {
+		return nil, nil
+	}
+
+	// make a copy of payload
+	return payload.DeepCopy(), nil
+}
diff --git a/ledger/complete/mtrie/flattener/encoding_test.go b/ledger/complete/mtrie/flattener/encoding_test.go
index 315aff6f071..7a43df4475e 100644
--- a/ledger/complete/mtrie/flattener/encoding_test.go
+++ b/ledger/complete/mtrie/flattener/encoding_test.go
@@ -69,31 +69,47 @@ func TestNodeSerialization(t *testing.T) {
 	})
 
 	t.Run("decode leaf node", func(t *testing.T) {
-		reader := bytes.NewReader(expectedLeafNode1)
-		newNode, err := flattener.ReadNode(reader, func(nodeIndex uint64) (*node.Node, error) {
-			if nodeIndex != 0 {
-				return nil, fmt.Errorf("expect child node index 0, got %d", nodeIndex)
-			}
-			return nil, nil
-		})
-		require.NoError(t, err)
-		assert.Equal(t, leafNode1, newNode)
+		scratchBuffers := [][]byte{
+			nil,
+			make([]byte, 0),
+			make([]byte, 1024),
+		}
+
+		for _, scratch := range scratchBuffers {
+			reader := bytes.NewReader(expectedLeafNode1)
+			newNode, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
+				if nodeIndex != 0 {
+					return nil, fmt.Errorf("expect child node index 0, got %d", nodeIndex)
+				}
+				return nil, nil
+			})
+			require.NoError(t, err)
+			assert.Equal(t, leafNode1, newNode)
+		}
 	})
 
 	t.Run("decode interim node", func(t *testing.T) {
-		reader := bytes.NewReader(expectedRootNode)
-		newNode, err := flattener.ReadNode(reader, func(nodeIndex uint64) (*node.Node, error) {
-			switch nodeIndex {
-			case 1:
-				return leafNode1, nil
-			case 2:
-				return leafNode2, nil
-			default:
-				return nil, fmt.Errorf("unexpected child node index %d ", nodeIndex)
-			}
-		})
-		require.NoError(t, err)
-		assert.Equal(t, rootNode, newNode)
+		scratchBuffers := [][]byte{
+			nil,
+			make([]byte, 0),
+			make([]byte, 1024),
+		}
+
+		for _, scratch := range scratchBuffers {
+			reader := bytes.NewReader(expectedRootNode)
+			newNode, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
+				switch nodeIndex {
+				case 1:
+					return leafNode1, nil
+				case 2:
+					return leafNode2, nil
+				default:
+					return nil, fmt.Errorf("unexpected child node index %d ", nodeIndex)
+				}
+			})
+			require.NoError(t, err)
+			assert.Equal(t, rootNode, newNode)
+		}
 	})
 }
 
@@ -117,14 +133,22 @@ func TestTrieSerialization(t *testing.T) {
 	})
 
 	t.Run("decode", func(t *testing.T) {
-		reader := bytes.NewReader(expected)
-		trie, err := flattener.ReadTrie(reader, func(nodeIndex uint64) (*node.Node, error) {
-			if nodeIndex != rootNodeIndex {
-				return nil, fmt.Errorf("unexpected root node index %d ", nodeIndex)
-			}
-			return rootNode, nil
-		})
-		require.NoError(t, err)
-		assert.Equal(t, rootNode, trie.RootNode())
+		scratchBuffers := [][]byte{
+			nil,
+			make([]byte, 0),
+			make([]byte, 1024),
+		}
+
+		for _, scratch := range scratchBuffers {
+			reader := bytes.NewReader(expected)
+			trie, err := flattener.ReadTrie(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
+				if nodeIndex != rootNodeIndex {
+					return nil, fmt.Errorf("unexpected root node index %d ", nodeIndex)
+				}
+				return rootNode, nil
+			})
+			require.NoError(t, err)
+			assert.Equal(t, rootNode, trie.RootNode())
+		}
 	})
 }
diff --git a/ledger/complete/wal/checkpointer.go b/ledger/complete/wal/checkpointer.go
index e5f370e7aef..db12e6fff27 100644
--- a/ledger/complete/wal/checkpointer.go
+++ b/ledger/complete/wal/checkpointer.go
@@ -516,6 +516,13 @@ func readCheckpointV3AndEarlier(f *os.File, version uint16) ([]*trie.MTrie, erro
 // Checkpoint file header (magic and version) are verified by the caller.
 func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 
+	// Scratch buffer is used as temporary buffer that reader can read into.
+	// Raw data in scratch buffer should be copied or converted into desired
+	// objects before next Read operation.  If the scratch buffer isn't large
+	// enough, a new buffer will be allocated.  However, 4096 bytes will
+	// be large enough to handle almost all payloads and 100% of interim nodes.
+	scratch := make([]byte, 1024*4) // must not be less than 1024
+
 	// Read footer to get node count and trie count
 
 	// footer offset: nodes count (8 bytes) + tries count (2 bytes) + CRC32 sum (4 bytes)
@@ -527,15 +534,15 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 		return nil, fmt.Errorf("cannot seek to footer: %w", err)
 	}
 
-	footer := make([]byte, footerSize)
+	footer := scratch[:footerSize]
 
 	_, err = io.ReadFull(f, footer)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read footer bytes: %w", err)
 	}
 
-	nodesCount, pos := readUint64(footer, 0)
-	triesCount, _ := readUint16(footer, pos)
+	nodesCount := binary.BigEndian.Uint64(footer)
+	triesCount := binary.BigEndian.Uint16(footer[8:])
 
 	// Seek to the start of file
 	_, err = f.Seek(0, io.SeekStart)
@@ -549,9 +556,8 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 
 	// Read header: magic (2 bytes) + version (2 bytes)
 	// No action is needed for header because it is verified by the caller.
-	header := make([]byte, 4)
 
-	_, err = io.ReadFull(reader, header)
+	_, err = io.ReadFull(reader, scratch[:4])
 	if err != nil {
 		return nil, fmt.Errorf("cannot read header bytes: %w", err)
 	}
@@ -561,7 +567,7 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 	tries := make([]*trie.MTrie, triesCount)
 
 	for i := uint64(1); i <= nodesCount; i++ {
-		n, err := flattener.ReadNode(reader, func(nodeIndex uint64) (*node.Node, error) {
+		n, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
 			if nodeIndex >= uint64(i) {
 				return nil, fmt.Errorf("sequence of stored nodes does not satisfy Descendents-First-Relationship")
 			}
@@ -574,7 +580,7 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 	}
 
 	for i := uint16(0); i < triesCount; i++ {
-		trie, err := flattener.ReadTrie(reader, func(nodeIndex uint64) (*node.Node, error) {
+		trie, err := flattener.ReadTrie(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
 			if nodeIndex >= uint64(len(nodes)) {
 				return nil, fmt.Errorf("sequence of stored nodes doesn't contain node")
 			}
@@ -593,7 +599,7 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 		return nil, fmt.Errorf("cannot read footer bytes: %w", err)
 	}
 
-	crc32buf := make([]byte, 4)
+	crc32buf := scratch[:4]
 	_, err = bufReader.Read(crc32buf)
 	if err != nil {
 		return nil, fmt.Errorf("error while reading CRC32 checksum: %w", err)

From 3aacebb89931d7ba294d7da5f6defb4b100c901e Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Fri, 4 Feb 2022 11:28:06 -0600
Subject: [PATCH 09/37] Increase bufio read size to 8192 for checkpoint

---
 ledger/complete/wal/checkpointer.go | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/ledger/complete/wal/checkpointer.go b/ledger/complete/wal/checkpointer.go
index db12e6fff27..03ba9e9c232 100644
--- a/ledger/complete/wal/checkpointer.go
+++ b/ledger/complete/wal/checkpointer.go
@@ -36,6 +36,11 @@ const VersionV3 uint16 = 0x03
 // Version 4 also reduces checkpoint data size.  See EncodeNode() and EncodeTrie() for more details.
 const VersionV4 uint16 = 0x04
 
+// defaultBufioReadSize replaces the default bufio buffer size of 4096 bytes.
+// defaultBufioReadSize can be increased to 16KiB, 32KiB, etc. if it improves performance on
+// typical EN hardware.
+const defaultBufioReadSize = 1024 * 8
+
 type Checkpointer struct {
 	dir            string
 	wal            *DiskWAL
@@ -438,7 +443,7 @@ func readCheckpoint(f *os.File) ([]*trie.MTrie, error) {
 // TODO: return []*trie.MTrie directly without conversion to FlattenedForest.
 func readCheckpointV3AndEarlier(f *os.File, version uint16) ([]*trie.MTrie, error) {
 
-	var bufReader io.Reader = bufio.NewReader(f)
+	var bufReader io.Reader = bufio.NewReaderSize(f, defaultBufioReadSize)
 	crcReader := NewCRC32Reader(bufReader)
 
 	var reader io.Reader
@@ -550,7 +555,7 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 		return nil, fmt.Errorf("cannot seek to start of file: %w", err)
 	}
 
-	var bufReader io.Reader = bufio.NewReader(f)
+	var bufReader io.Reader = bufio.NewReaderSize(f, defaultBufioReadSize)
 	crcReader := NewCRC32Reader(bufReader)
 	var reader io.Reader = crcReader
 

From c9a8f145835d7addbc4fbacb98ba091c6892aca6 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Sat, 5 Feb 2022 20:26:49 -0600
Subject: [PATCH 10/37] Optimize creating checkpoint by reusing buffer

Reduce allocs by using a 4096 byte scratch buffer
to reduce another 400+ million allocs during
checkpoint writing.
---
 ledger/common/encoding/encoding.go            |  89 ++++++++++----
 ledger/complete/mtrie/flattener/encoding.go   | 110 +++++++++++-------
 .../complete/mtrie/flattener/encoding_test.go |  36 +++++-
 ledger/complete/wal/checkpointer.go           |  13 ++-
 4 files changed, 173 insertions(+), 75 deletions(-)

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index f0bf7851c9e..d73fb089636 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -108,16 +108,25 @@ func EncodeKeyPart(kp *ledger.KeyPart) []byte {
 }
 
 func encodeKeyPart(kp *ledger.KeyPart) []byte {
-	buffer := make([]byte, 0)
+	buffer := make([]byte, 0, encodedKeyPartLength(kp))
+	return encodeAndAppendKeyPart(kp, buffer)
+}
 
+func encodeAndAppendKeyPart(kp *ledger.KeyPart, buffer []byte) []byte {
 	// encode "Type" field of the key part
 	buffer = utils.AppendUint16(buffer, kp.Type)
 
 	// encode "Value" field of the key part
 	buffer = append(buffer, kp.Value...)
+
 	return buffer
 }
 
+func encodedKeyPartLength(kp *ledger.KeyPart) int {
+	// Key part is encoded as: type (2 bytes) + value
+	return 2 + len(kp.Value)
+}
+
 // DecodeKeyPart constructs a key part from an encoded key part
 func DecodeKeyPart(encodedKeyPart []byte) (*ledger.KeyPart, error) {
 	// currently we ignore the version but in the future we
@@ -168,21 +177,37 @@ func EncodeKey(k *ledger.Key) []byte {
 
 // encodeKey encodes a key into a byte slice
 func encodeKey(k *ledger.Key) []byte {
-	buffer := make([]byte, 0)
+	buffer := make([]byte, 0, encodedKeyLength(k))
+	return encodeAndAppendKey(k, buffer)
+}
+
+// encodeKey encodes a key into a byte slice
+func encodeAndAppendKey(k *ledger.Key, buffer []byte) []byte {
 	// encode number of key parts
 	buffer = utils.AppendUint16(buffer, uint16(len(k.KeyParts)))
+
 	// iterate over key parts
 	for _, kp := range k.KeyParts {
-		// encode the key part
-		encKP := encodeKeyPart(&kp)
 		// encode the len of the encoded key part
-		buffer = utils.AppendUint32(buffer, uint32(len(encKP)))
-		// append the encoded key part
-		buffer = append(buffer, encKP...)
+		buffer = utils.AppendUint32(buffer, uint32(encodedKeyPartLength(&kp)))
+
+		// encode the key part
+		buffer = encodeAndAppendKeyPart(&kp, buffer)
 	}
+
 	return buffer
 }
 
+func encodedKeyLength(k *ledger.Key) int {
+	// Key is encoded as: number of key parts (2 bytes) and for each key part,
+	// the key part size (4 bytes) + encoded key part (n bytes).
+	size := 2
+	for _, kp := range k.KeyParts {
+		size += 4 + encodedKeyPartLength(&kp)
+	}
+	return size
+}
+
 // DecodeKey constructs a key from an encoded key part
 func DecodeKey(encodedKey []byte) (*ledger.Key, error) {
 	// check the enc dec version
@@ -258,6 +283,14 @@ func encodeValue(v ledger.Value) []byte {
 	return v
 }
 
+func encodeAndAppendValue(v ledger.Value, buffer []byte) []byte {
+	return append(buffer, v...)
+}
+
+func encodedValueLength(v ledger.Value) int {
+	return len(v)
+}
+
 // DecodeValue constructs a ledger value using an encoded byte slice
 func DecodeValue(encodedValue []byte) (ledger.Value, error) {
 	// check enc dec version
@@ -331,39 +364,49 @@ func EncodePayload(p *ledger.Payload) []byte {
 	return buffer
 }
 
-// EncodePayloadWithoutPrefix encodes a ledger payload
-// without prefix (version and type).
-func EncodePayloadWithoutPrefix(p *ledger.Payload) []byte {
+// EncodeAndAppendPayloadWithoutPrefix encodes a ledger payload
+// without prefix (version and type) and appends to buffer.
+func EncodeAndAppendPayloadWithoutPrefix(p *ledger.Payload, buffer []byte) []byte {
 	if p == nil {
 		return []byte{}
 	}
-	return encodePayload(p)
+	return encodeAndAppendPayload(p, buffer)
+}
+
+func EncodedPayloadLengthWithoutPrefix(p *ledger.Payload) int {
+	return encodedPayloadLength(p)
 }
 
 func encodePayload(p *ledger.Payload) []byte {
-	buffer := make([]byte, 0)
+	buffer := make([]byte, 0, encodedPayloadLength(p))
+	return encodeAndAppendPayload(p, buffer)
+}
 
-	// encode key
-	encK := encodeKey(&p.Key)
+func encodeAndAppendPayload(p *ledger.Payload, buffer []byte) []byte {
 
 	// encode encoded key size
-	buffer = utils.AppendUint32(buffer, uint32(len(encK)))
-
-	// append encoded key content
-	buffer = append(buffer, encK...)
+	buffer = utils.AppendUint32(buffer, uint32(encodedKeyLength(&p.Key)))
 
-	// encode value
-	encV := encodeValue(p.Value)
+	// encode key
+	buffer = encodeAndAppendKey(&p.Key, buffer)
 
 	// encode encoded value size
-	buffer = utils.AppendUint64(buffer, uint64(len(encV)))
+	buffer = utils.AppendUint64(buffer, uint64(encodedValueLength(p.Value)))
 
-	// append encoded key content
-	buffer = append(buffer, encV...)
+	// encode value
+	buffer = encodeAndAppendValue(p.Value, buffer)
 
 	return buffer
 }
 
+func encodedPayloadLength(p *ledger.Payload) int {
+	if p == nil {
+		return 0
+	}
+	// Payload is encoded as: encode key length (4 bytes) + encoded key + encoded value length (8 bytes) + encode value
+	return 4 + encodedKeyLength(&p.Key) + 8 + encodedValueLength(p.Value)
+}
+
 // DecodePayload construct a payload from an encoded byte slice
 func DecodePayload(encodedPayload []byte) (*ledger.Payload, error) {
 	// if empty don't decode
diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index eaf4c1eefdb..883ba4048f8 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -29,53 +29,60 @@ const (
 // - payload (4 bytes + n bytes)
 // Encoded leaf node size is 85 bytes (assuming length of hash/path is 32 bytes) +
 // length of encoded payload size.
-// TODO: encode payload more efficiently.
-// TODO: reuse buffer.
+// Scratch buffer is used to avoid allocs.
+// WARNING: The returned buffer is likely to share the same underlying array as
+// the scratch buffer. Caller is responsible for copying or using returned buffer
+// before scratch buffer is used again.
 // TODO: reduce hash size from 2 bytes to 1 byte.
-func encodeLeafNode(n *node.Node) []byte {
+func encodeLeafNode(n *node.Node, scratch []byte) []byte {
 
-	hash := n.Hash()
-	path := n.Path()
-	encPayload := encoding.EncodePayloadWithoutPrefix(n.Payload())
+	encPayloadSize := encoding.EncodedPayloadLengthWithoutPrefix(n.Payload())
+
+	encodedNodeSize := 1 + 2 + 2 + 8 + 2 + hash.HashLen + 2 + ledger.PathLen + 4 + encPayloadSize
+
+	if len(scratch) < encodedNodeSize {
+		scratch = make([]byte, encodedNodeSize)
+	}
 
-	buf := make([]byte, 1+2+2+8+2+len(hash)+2+len(path)+4+len(encPayload))
 	pos := 0
 
 	// Encode node type (1 byte)
-	buf[pos] = byte(leafNodeType)
+	scratch[pos] = byte(leafNodeType)
 	pos++
 
 	// Encode height (2-bytes Big Endian)
-	binary.BigEndian.PutUint16(buf[pos:], uint16(n.Height()))
+	binary.BigEndian.PutUint16(scratch[pos:], uint16(n.Height()))
 	pos += 2
 
 	// Encode max depth (2-bytes Big Endian)
-	binary.BigEndian.PutUint16(buf[pos:], n.MaxDepth())
+	binary.BigEndian.PutUint16(scratch[pos:], n.MaxDepth())
 	pos += 2
 
 	// Encode reg count (8-bytes Big Endian)
-	binary.BigEndian.PutUint64(buf[pos:], n.RegCount())
+	binary.BigEndian.PutUint64(scratch[pos:], n.RegCount())
 	pos += 8
 
 	// Encode hash (2-bytes Big Endian for hashValue length and n-bytes hashValue)
-	binary.BigEndian.PutUint16(buf[pos:], uint16(len(hash)))
+	hash := n.Hash()
+	binary.BigEndian.PutUint16(scratch[pos:], uint16(len(hash)))
 	pos += 2
 
-	pos += copy(buf[pos:], hash[:])
+	pos += copy(scratch[pos:], hash[:])
 
 	// Encode path (2-bytes Big Endian for path length and n-bytes path)
-	binary.BigEndian.PutUint16(buf[pos:], uint16(len(path)))
+	path := n.Path()
+	binary.BigEndian.PutUint16(scratch[pos:], uint16(len(path)))
 	pos += 2
 
-	pos += copy(buf[pos:], path[:])
+	pos += copy(scratch[pos:], path[:])
 
 	// Encode payload (4-bytes Big Endian for encoded payload length and n-bytes encoded payload)
-	binary.BigEndian.PutUint32(buf[pos:], uint32(len(encPayload)))
+	binary.BigEndian.PutUint32(scratch[pos:], uint32(encPayloadSize))
 	pos += 4
 
-	copy(buf[pos:], encPayload)
+	scratch = encoding.EncodeAndAppendPayloadWithoutPrefix(n.Payload(), scratch[:pos])
 
-	return buf
+	return scratch
 }
 
 // encodeInterimNode encodes interim node in the following format:
@@ -87,54 +94,65 @@ func encodeLeafNode(n *node.Node) []byte {
 // - rchild index (8 bytes)
 // - hash (2 bytes + 32 bytes)
 // Encoded interim node size is 63 bytes (assuming length of hash is 32 bytes).
-// TODO: reuse buffer.
+// Scratch buffer is used to avoid allocs.
+// WARNING: The returned buffer is likely to share the same underlying array as
+// the scratch buffer. Caller is responsible for copying or using returned buffer
+// before scratch buffer is used again.
 // TODO: reduce hash size from 2 bytes to 1 byte.
-func encodeInterimNode(n *node.Node, lchildIndex uint64, rchildIndex uint64) []byte {
+func encodeInterimNode(n *node.Node, lchildIndex uint64, rchildIndex uint64, scratch []byte) []byte {
 
-	hash := n.Hash()
+	encodedNodeSize := 1 + 2 + 2 + 8 + 8 + 8 + 2 + hash.HashLen
+
+	if len(scratch) < encodedNodeSize {
+		scratch = make([]byte, encodedNodeSize)
+	}
 
-	buf := make([]byte, 1+2+2+8+8+8+2+len(hash))
 	pos := 0
 
 	// Encode node type (1-byte)
-	buf[pos] = byte(interimNodeType)
+	scratch[pos] = byte(interimNodeType)
 	pos++
 
 	// Encode height (2-bytes Big Endian)
-	binary.BigEndian.PutUint16(buf[pos:], uint16(n.Height()))
+	binary.BigEndian.PutUint16(scratch[pos:], uint16(n.Height()))
 	pos += 2
 
 	// Encode max depth (2-bytes Big Endian)
-	binary.BigEndian.PutUint16(buf[pos:], n.MaxDepth())
+	binary.BigEndian.PutUint16(scratch[pos:], n.MaxDepth())
 	pos += 2
 
 	// Encode reg count (8-bytes Big Endian)
-	binary.BigEndian.PutUint64(buf[pos:], n.RegCount())
+	binary.BigEndian.PutUint64(scratch[pos:], n.RegCount())
 	pos += 8
 
 	// Encode left child index (8-bytes Big Endian)
-	binary.BigEndian.PutUint64(buf[pos:], lchildIndex)
+	binary.BigEndian.PutUint64(scratch[pos:], lchildIndex)
 	pos += 8
 
 	// Encode right child index (8-bytes Big Endian)
-	binary.BigEndian.PutUint64(buf[pos:], rchildIndex)
+	binary.BigEndian.PutUint64(scratch[pos:], rchildIndex)
 	pos += 8
 
 	// Encode hash (2-bytes Big Endian hashValue length and n-bytes hashValue)
-	binary.BigEndian.PutUint16(buf[pos:], uint16(len(hash)))
+	binary.BigEndian.PutUint16(scratch[pos:], hash.HashLen)
 	pos += 2
 
-	copy(buf[pos:], hash[:])
+	h := n.Hash()
+	pos += copy(scratch[pos:], h[:])
 
-	return buf
+	return scratch[:pos]
 }
 
 // EncodeNode encodes node.
-func EncodeNode(n *node.Node, lchildIndex uint64, rchildIndex uint64) []byte {
+// Scratch buffer is used to avoid allocs.
+// WARNING: The returned buffer is likely to share the same underlying array as
+// the scratch buffer. Caller is responsible for copying or using returned buffer
+// before scratch buffer is used again.
+func EncodeNode(n *node.Node, lchildIndex uint64, rchildIndex uint64, scratch []byte) []byte {
 	if n.IsLeaf() {
-		return encodeLeafNode(n)
+		return encodeLeafNode(n, scratch)
 	}
-	return encodeInterimNode(n, lchildIndex, rchildIndex)
+	return encodeInterimNode(n, lchildIndex, rchildIndex, scratch)
 }
 
 // ReadNode reconstructs a node from data read from reader.
@@ -239,8 +257,12 @@ func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 }
 
 // EncodeTrie encodes trie root node
-// TODO: reuse buffer
-func EncodeTrie(rootNode *node.Node, rootIndex uint64) []byte {
+// Scratch buffer is used to avoid allocs.
+// WARNING: The returned buffer is likely to share the same underlying array as
+// the scratch buffer. Caller is responsible for copying or using returned buffer
+// before scratch buffer is used again.
+func EncodeTrie(rootNode *node.Node, rootIndex uint64, scratch []byte) []byte {
+
 	// Get root hash
 	var rootHash ledger.RootHash
 	if rootNode == nil {
@@ -249,21 +271,25 @@ func EncodeTrie(rootNode *node.Node, rootIndex uint64) []byte {
 		rootHash = ledger.RootHash(rootNode.Hash())
 	}
 
-	length := 8 + 2 + len(rootHash)
-	buf := make([]byte, length)
+	const encodedTrieSize = 8 + 2 + len(rootHash)
+
+	if len(scratch) < encodedTrieSize {
+		scratch = make([]byte, encodedTrieSize)
+	}
+
 	pos := 0
 
 	// 8-bytes Big Endian uint64 RootIndex
-	binary.BigEndian.PutUint64(buf, rootIndex)
+	binary.BigEndian.PutUint64(scratch, rootIndex)
 	pos += 8
 
 	// Encode hash (2-bytes Big Endian for hashValue length and n-bytes hashValue)
-	binary.BigEndian.PutUint16(buf[pos:], uint16(len(rootHash)))
+	binary.BigEndian.PutUint16(scratch[pos:], uint16(len(rootHash)))
 	pos += 2
 
-	copy(buf[pos:], rootHash[:])
+	pos += copy(scratch[pos:], rootHash[:])
 
-	return buf
+	return scratch[:pos]
 }
 
 // ReadTrie reconstructs a trie from data read from reader.
diff --git a/ledger/complete/mtrie/flattener/encoding_test.go b/ledger/complete/mtrie/flattener/encoding_test.go
index 7a43df4475e..cf1b9d8910a 100644
--- a/ledger/complete/mtrie/flattener/encoding_test.go
+++ b/ledger/complete/mtrie/flattener/encoding_test.go
@@ -59,13 +59,29 @@ func TestNodeSerialization(t *testing.T) {
 	}
 
 	t.Run("encode leaf node", func(t *testing.T) {
-		data := flattener.EncodeNode(leafNode1, 0, 0)
-		assert.Equal(t, expectedLeafNode1, data)
+		scratchBuffers := [][]byte{
+			nil,
+			make([]byte, 0),
+			make([]byte, 1024),
+		}
+
+		for _, scratch := range scratchBuffers {
+			data := flattener.EncodeNode(leafNode1, 0, 0, scratch)
+			assert.Equal(t, expectedLeafNode1, data)
+		}
 	})
 
 	t.Run("encode interim node", func(t *testing.T) {
-		data := flattener.EncodeNode(rootNode, 1, 2)
-		assert.Equal(t, expectedRootNode, data)
+		scratchBuffers := [][]byte{
+			nil,
+			make([]byte, 0),
+			make([]byte, 1024),
+		}
+
+		for _, scratch := range scratchBuffers {
+			data := flattener.EncodeNode(rootNode, 1, 2, scratch)
+			assert.Equal(t, expectedRootNode, data)
+		}
 	})
 
 	t.Run("decode leaf node", func(t *testing.T) {
@@ -128,8 +144,16 @@ func TestTrieSerialization(t *testing.T) {
 	}
 
 	t.Run("encode", func(t *testing.T) {
-		data := flattener.EncodeTrie(rootNode, rootNodeIndex)
-		assert.Equal(t, expected, data)
+		scratchBuffers := [][]byte{
+			nil,
+			make([]byte, 0),
+			make([]byte, 1024),
+		}
+
+		for _, scratch := range scratchBuffers {
+			data := flattener.EncodeTrie(rootNode, rootNodeIndex, scratch)
+			assert.Equal(t, expected, data)
+		}
 	})
 
 	t.Run("decode", func(t *testing.T) {
diff --git a/ledger/complete/wal/checkpointer.go b/ledger/complete/wal/checkpointer.go
index 03ba9e9c232..580826bec09 100644
--- a/ledger/complete/wal/checkpointer.go
+++ b/ledger/complete/wal/checkpointer.go
@@ -285,6 +285,13 @@ func StoreCheckpoint(writer io.Writer, tries ...*trie.MTrie) error {
 
 	allRootNodes := make([]*node.Node, len(tries))
 
+	// Scratch buffer is used as temporary buffer that node can encode into.
+	// Data in scratch buffer should be copied or used before scratch buffer is used again.
+	// If the scratch buffer isn't large enough, a new buffer will be allocated.
+	// However, 4096 bytes will be large enough to handle almost all payloads
+	// and 100% of interim nodes.
+	scratch := make([]byte, 1024*4)
+
 	// Serialize all unique nodes
 	nodeCounter := uint64(1) // start from 1, as 0 marks nil
 	for i, t := range tries {
@@ -315,8 +322,7 @@ func StoreCheckpoint(writer io.Writer, tries ...*trie.MTrie) error {
 				}
 			}
 
-			// TODO: reuse scratch buffer for encoding
-			bytes := flattener.EncodeNode(n, lchildIndex, rchildIndex)
+			bytes := flattener.EncodeNode(n, lchildIndex, rchildIndex, scratch)
 			_, err = crc32Writer.Write(bytes)
 			if err != nil {
 				return fmt.Errorf("error while writing node data: %w", err)
@@ -341,8 +347,7 @@ func StoreCheckpoint(writer io.Writer, tries ...*trie.MTrie) error {
 			return fmt.Errorf("internal error: missing node with hash %s", hex.EncodeToString(rootHash[:]))
 		}
 
-		// TODO: reuse scratch buffer for encoding
-		bytes := flattener.EncodeTrie(rootNode, rootIndex)
+		bytes := flattener.EncodeTrie(rootNode, rootIndex, scratch)
 		_, err = crc32Writer.Write(bytes)
 		if err != nil {
 			return fmt.Errorf("error while writing trie data: %w", err)

From 681d9fedc0db088e2d2773b924ad6cac2b0337eb Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Mon, 7 Feb 2022 12:53:22 -0600
Subject: [PATCH 11/37] Increase bufio write size to 8192 for checkpoint

---
 ledger/complete/wal/checkpointer.go | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ledger/complete/wal/checkpointer.go b/ledger/complete/wal/checkpointer.go
index 580826bec09..b270ba2a043 100644
--- a/ledger/complete/wal/checkpointer.go
+++ b/ledger/complete/wal/checkpointer.go
@@ -41,6 +41,11 @@ const VersionV4 uint16 = 0x04
 // typical EN hardware.
 const defaultBufioReadSize = 1024 * 8
 
+// defaultBufioWriteSize replaces the default bufio buffer size of 4096 bytes.
+// defaultBufioWriteSize can be increased to 16KiB, 32KiB, etc. if it improves performance on
+// typical EN hardware.
+const defaultBufioWriteSize = 1024 * 8
+
 type Checkpointer struct {
 	dir            string
 	wal            *DiskWAL
@@ -241,7 +246,7 @@ func CreateCheckpointWriterForFile(dir, filename string) (io.WriteCloser, error)
 		return nil, fmt.Errorf("cannot create temporary file for checkpoint %v: %w", tmpFile, err)
 	}
 
-	writer := bufio.NewWriter(tmpFile)
+	writer := bufio.NewWriterSize(tmpFile, defaultBufioWriteSize)
 	return &SyncOnCloseRenameFile{
 		file:       tmpFile,
 		targetName: fullname,

From c7c62cd234040a21a1b65324cc81a0230df89b12 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Mon, 7 Feb 2022 16:21:35 -0600
Subject: [PATCH 12/37] Preallocate slice when decoding trie update

---
 ledger/common/encoding/encoding.go | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index d73fb089636..adeda3a123c 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -541,9 +541,6 @@ func DecodeTrieUpdate(encodedTrieUpdate []byte) (*ledger.TrieUpdate, error) {
 
 func decodeTrieUpdate(inp []byte) (*ledger.TrieUpdate, error) {
 
-	paths := make([]ledger.Path, 0)
-	payloads := make([]*ledger.Payload, 0)
-
 	// decode root hash
 	rhSize, rest, err := utils.ReadUint16(inp)
 	if err != nil {
@@ -571,6 +568,9 @@ func decodeTrieUpdate(inp []byte) (*ledger.TrieUpdate, error) {
 		return nil, fmt.Errorf("error decoding trie update: %w", err)
 	}
 
+	paths := make([]ledger.Path, numOfPaths)
+	payloads := make([]*ledger.Payload, numOfPaths)
+
 	var path ledger.Path
 	var encPath []byte
 	for i := 0; i < int(numOfPaths); i++ {
@@ -582,7 +582,7 @@ func decodeTrieUpdate(inp []byte) (*ledger.TrieUpdate, error) {
 		if err != nil {
 			return nil, fmt.Errorf("error decoding trie update: %w", err)
 		}
-		paths = append(paths, path)
+		paths[i] = path
 	}
 
 	var payloadSize uint32
@@ -602,7 +602,7 @@ func decodeTrieUpdate(inp []byte) (*ledger.TrieUpdate, error) {
 		if err != nil {
 			return nil, fmt.Errorf("error decoding trie update: %w", err)
 		}
-		payloads = append(payloads, payload)
+		payloads[i] = payload
 	}
 	return &ledger.TrieUpdate{RootHash: rh, Paths: paths, Payloads: payloads}, nil
 }
@@ -737,7 +737,8 @@ func decodeTrieProof(inp []byte) (*ledger.TrieProof, error) {
 	if err != nil {
 		return nil, fmt.Errorf("error decoding proof: %w", err)
 	}
-	interims := make([]hash.Hash, 0)
+
+	interims := make([]hash.Hash, interimsLen)
 
 	var interimSize uint16
 	var interim hash.Hash
@@ -758,7 +759,7 @@ func decodeTrieProof(inp []byte) (*ledger.TrieProof, error) {
 			return nil, fmt.Errorf("error decoding proof: %w", err)
 		}
 
-		interims = append(interims, interim)
+		interims[i] = interim
 	}
 	pInst.Interims = interims
 

From 1b6878e7c2875a1c72f3c10769f6d714f558e198 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 8 Feb 2022 14:19:15 -0600
Subject: [PATCH 13/37] Avoid allocs when loading checkpoint

Decode payload from input buffer and only copy shared data,
to avoid extra allocs of payload's Key.KeyParts ([]KeyPart).

Loading checkpoint v4 and replaying WALs used 3,076,382,277 fewer
allocs/op compared to v3.

Prior to this change:
When decoding payload during checkpoint loading, payload object was
created with shared data from read buffer and was deep copied.
Since payload contains key parts as []KeyPart, new slice was
created during deep copying.
---
 ledger/common/encoding/encoding.go          | 43 +++++++++++++++++----
 ledger/complete/mtrie/flattener/encoding.go |  9 +----
 2 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index adeda3a123c..d97096c36b1 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -152,12 +152,21 @@ func DecodeKeyPart(encodedKeyPart []byte) (*ledger.KeyPart, error) {
 }
 
 func decodeKeyPart(inp []byte) (*ledger.KeyPart, error) {
+	return _decodeKeyPart(inp, true)
+}
+
+func _decodeKeyPart(inp []byte, zeroCopy bool) (*ledger.KeyPart, error) {
 	// read key part type and the rest is the key item part
 	kpt, kpv, err := utils.ReadUint16(inp)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding key part (content): %w", err)
 	}
-	return &ledger.KeyPart{Type: kpt, Value: kpv}, nil
+	if zeroCopy {
+		return &ledger.KeyPart{Type: kpt, Value: kpv}, nil
+	}
+	v := make([]byte, len(kpv))
+	copy(v, kpv)
+	return &ledger.KeyPart{Type: kpt, Value: v}, nil
 }
 
 // EncodeKey encodes a key into a byte slice
@@ -230,6 +239,10 @@ func DecodeKey(encodedKey []byte) (*ledger.Key, error) {
 }
 
 func decodeKey(inp []byte) (*ledger.Key, error) {
+	return _decodeKey(inp, true)
+}
+
+func _decodeKey(inp []byte, zeroCopy bool) (*ledger.Key, error) {
 	numOfParts, rest, err := utils.ReadUint16(inp)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding key (content): %w", err)
@@ -255,7 +268,7 @@ func decodeKey(inp []byte) (*ledger.Key, error) {
 		}
 
 		// decode encoded key part
-		kp, err := decodeKeyPart(kpEnc)
+		kp, err := _decodeKeyPart(kpEnc, zeroCopy)
 		if err != nil {
 			return nil, fmt.Errorf("error decoding key (content): %w", err)
 		}
@@ -426,17 +439,25 @@ func DecodePayload(encodedPayload []byte) (*ledger.Payload, error) {
 	return decodePayload(rest)
 }
 
-// DecodePayloadWithoutPrefix construct a payload from an encoded byte slice
-// without prefix (version and type).
-func DecodePayloadWithoutPrefix(encodedPayload []byte) (*ledger.Payload, error) {
+// DecodeAndCopyPayloadWithoutPrefix constructs a payload from
+// an encoded byte slice without prefix (version and type).
+// Returned payload doesn't share data with encodedPayload.
+func DecodeAndCopyPayloadWithoutPrefix(encodedPayload []byte) (*ledger.Payload, error) {
 	// if empty don't decode
 	if len(encodedPayload) == 0 {
 		return nil, nil
 	}
-	return decodePayload(encodedPayload)
+	return _decodePayload(encodedPayload, false)
 }
 
 func decodePayload(inp []byte) (*ledger.Payload, error) {
+	return _decodePayload(inp, true)
+}
+
+// _decodePayload constructs a payload from inp.  If zeroCopy is true,
+// payload's value and key parts' value are not copied
+// (they share the same underlying data with inp).
+func _decodePayload(inp []byte, zeroCopy bool) (*ledger.Payload, error) {
 
 	// read encoded key size
 	encKeySize, rest, err := utils.ReadUint32(inp)
@@ -451,7 +472,7 @@ func decodePayload(inp []byte) (*ledger.Payload, error) {
 	}
 
 	// decode the key
-	key, err := decodeKey(encKey)
+	key, err := _decodeKey(encKey, zeroCopy)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding payload: %w", err)
 	}
@@ -468,7 +489,13 @@ func decodePayload(inp []byte) (*ledger.Payload, error) {
 		return nil, fmt.Errorf("error decoding payload: %w", err)
 	}
 
-	return &ledger.Payload{Key: *key, Value: encValue}, nil
+	if zeroCopy {
+		return &ledger.Payload{Key: *key, Value: encValue}, nil
+	}
+
+	v := make([]byte, len(encValue))
+	copy(v, encValue)
+	return &ledger.Payload{Key: *key, Value: v}, nil
 }
 
 // EncodeTrieUpdate encodes a trie update struct
diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index 883ba4048f8..59f50897fd0 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -412,15 +412,10 @@ func readPayloadFromReader(reader io.Reader, scratch []byte) (*ledger.Payload, e
 		return nil, fmt.Errorf("cannot read long data: %w", err)
 	}
 
-	payload, err := encoding.DecodePayloadWithoutPrefix(scratch)
+	payload, err := encoding.DecodeAndCopyPayloadWithoutPrefix(scratch)
 	if err != nil {
 		return nil, fmt.Errorf("failed to decode payload from checkpoint: %w", err)
 	}
 
-	if payload == nil {
-		return nil, nil
-	}
-
-	// make a copy of payload
-	return payload.DeepCopy(), nil
+	return payload, nil
 }

From 81ddd43db6007c394e9e0ec9eb2d3c1ed516a1aa Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Mon, 14 Feb 2022 09:17:50 -0600
Subject: [PATCH 14/37] Increase checkpoint bufio buffer to 32KiB

---
 ledger/complete/wal/checkpointer.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ledger/complete/wal/checkpointer.go b/ledger/complete/wal/checkpointer.go
index b270ba2a043..681576ff27a 100644
--- a/ledger/complete/wal/checkpointer.go
+++ b/ledger/complete/wal/checkpointer.go
@@ -37,14 +37,14 @@ const VersionV3 uint16 = 0x03
 const VersionV4 uint16 = 0x04
 
 // defaultBufioReadSize replaces the default bufio buffer size of 4096 bytes.
-// defaultBufioReadSize can be increased to 16KiB, 32KiB, etc. if it improves performance on
-// typical EN hardware.
-const defaultBufioReadSize = 1024 * 8
+// defaultBufioReadSize can be increased to 8KiB, 16KiB, 32KiB, etc. if it
+// improves performance on typical EN hardware.
+const defaultBufioReadSize = 1024 * 32
 
 // defaultBufioWriteSize replaces the default bufio buffer size of 4096 bytes.
-// defaultBufioWriteSize can be increased to 16KiB, 32KiB, etc. if it improves performance on
-// typical EN hardware.
-const defaultBufioWriteSize = 1024 * 8
+// defaultBufioWriteSize can be increased to 8KiB, 16KiB, 32KiB, etc. if it
+//  improves performance on typical EN hardware.
+const defaultBufioWriteSize = 1024 * 32
 
 type Checkpointer struct {
 	dir            string

From 889c2f22df5ba2eb8103533726a630829ec11f6d Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 15 Feb 2022 14:02:10 -0600
Subject: [PATCH 15/37] Rename payload encoding functions

---
 ledger/common/encoding/encoding.go          | 58 +++++++++------------
 ledger/complete/mtrie/flattener/encoding.go |  3 +-
 2 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index d97096c36b1..d1bf623679b 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -142,8 +142,8 @@ func DecodeKeyPart(encodedKeyPart []byte) (*ledger.KeyPart, error) {
 		return nil, fmt.Errorf("error decoding key part: %w", err)
 	}
 
-	// decode the key part content
-	key, err := decodeKeyPart(rest)
+	// decode the key part content (zerocopy)
+	key, err := decodeKeyPart(rest, true)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding key part: %w", err)
 	}
@@ -151,11 +151,9 @@ func DecodeKeyPart(encodedKeyPart []byte) (*ledger.KeyPart, error) {
 	return key, nil
 }
 
-func decodeKeyPart(inp []byte) (*ledger.KeyPart, error) {
-	return _decodeKeyPart(inp, true)
-}
-
-func _decodeKeyPart(inp []byte, zeroCopy bool) (*ledger.KeyPart, error) {
+// decodeKeyPart decodes inp into KeyPart. If zeroCopy is true, KeyPart
+// references data in inp.  Otherwise, it is copied.
+func decodeKeyPart(inp []byte, zeroCopy bool) (*ledger.KeyPart, error) {
 	// read key part type and the rest is the key item part
 	kpt, kpv, err := utils.ReadUint16(inp)
 	if err != nil {
@@ -230,19 +228,17 @@ func DecodeKey(encodedKey []byte) (*ledger.Key, error) {
 		return nil, fmt.Errorf("error decoding key: %w", err)
 	}
 
-	// decode the key content
-	key, err := decodeKey(rest)
+	// decode the key content (zerocopy)
+	key, err := decodeKey(rest, true)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding key: %w", err)
 	}
 	return key, nil
 }
 
-func decodeKey(inp []byte) (*ledger.Key, error) {
-	return _decodeKey(inp, true)
-}
-
-func _decodeKey(inp []byte, zeroCopy bool) (*ledger.Key, error) {
+// decodeKey decodes inp into Key. If zeroCopy is true, returned key
+// references data in inp.  Otherwise, it is copied.
+func decodeKey(inp []byte, zeroCopy bool) (*ledger.Key, error) {
 	numOfParts, rest, err := utils.ReadUint16(inp)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding key (content): %w", err)
@@ -268,7 +264,7 @@ func _decodeKey(inp []byte, zeroCopy bool) (*ledger.Key, error) {
 		}
 
 		// decode encoded key part
-		kp, err := _decodeKeyPart(kpEnc, zeroCopy)
+		kp, err := decodeKeyPart(kpEnc, zeroCopy)
 		if err != nil {
 			return nil, fmt.Errorf("error decoding key (content): %w", err)
 		}
@@ -436,28 +432,24 @@ func DecodePayload(encodedPayload []byte) (*ledger.Payload, error) {
 	if err != nil {
 		return nil, fmt.Errorf("error decoding payload: %w", err)
 	}
-	return decodePayload(rest)
+	// decode payload (zerocopy)
+	return decodePayload(rest, true)
 }
 
-// DecodeAndCopyPayloadWithoutPrefix constructs a payload from
-// an encoded byte slice without prefix (version and type).
-// Returned payload doesn't share data with encodedPayload.
-func DecodeAndCopyPayloadWithoutPrefix(encodedPayload []byte) (*ledger.Payload, error) {
+// DecodePayloadWithoutPrefix constructs a payload from encoded byte slice
+// without prefix (version and type). If zeroCopy is true, returned payload
+// references data in encodedPayload. Otherwise, it is copied.
+func DecodePayloadWithoutPrefix(encodedPayload []byte, zeroCopy bool) (*ledger.Payload, error) {
 	// if empty don't decode
 	if len(encodedPayload) == 0 {
 		return nil, nil
 	}
-	return _decodePayload(encodedPayload, false)
-}
-
-func decodePayload(inp []byte) (*ledger.Payload, error) {
-	return _decodePayload(inp, true)
+	return decodePayload(encodedPayload, zeroCopy)
 }
 
-// _decodePayload constructs a payload from inp.  If zeroCopy is true,
-// payload's value and key parts' value are not copied
-// (they share the same underlying data with inp).
-func _decodePayload(inp []byte, zeroCopy bool) (*ledger.Payload, error) {
+// decodePayload decodes inp into payload.  If zeroCopy is true,
+// returned payload references data in inp.  Otherwise, it is copied.
+func decodePayload(inp []byte, zeroCopy bool) (*ledger.Payload, error) {
 
 	// read encoded key size
 	encKeySize, rest, err := utils.ReadUint32(inp)
@@ -472,7 +464,7 @@ func _decodePayload(inp []byte, zeroCopy bool) (*ledger.Payload, error) {
 	}
 
 	// decode the key
-	key, err := _decodeKey(encKey, zeroCopy)
+	key, err := decodeKey(encKey, zeroCopy)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding payload: %w", err)
 	}
@@ -625,7 +617,8 @@ func decodeTrieUpdate(inp []byte) (*ledger.TrieUpdate, error) {
 		if err != nil {
 			return nil, fmt.Errorf("error decoding trie update: %w", err)
 		}
-		payload, err = decodePayload(encPayload)
+		// Decode payload (zerocopy)
+		payload, err = decodePayload(encPayload, true)
 		if err != nil {
 			return nil, fmt.Errorf("error decoding trie update: %w", err)
 		}
@@ -753,7 +746,8 @@ func decodeTrieProof(inp []byte) (*ledger.TrieProof, error) {
 	if err != nil {
 		return nil, fmt.Errorf("error decoding proof: %w", err)
 	}
-	payload, err := decodePayload(encPayload)
+	// Decode payload (zerocopy)
+	payload, err := decodePayload(encPayload, true)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding proof: %w", err)
 	}
diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index 59f50897fd0..1432f772aab 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -412,7 +412,8 @@ func readPayloadFromReader(reader io.Reader, scratch []byte) (*ledger.Payload, e
 		return nil, fmt.Errorf("cannot read long data: %w", err)
 	}
 
-	payload, err := encoding.DecodeAndCopyPayloadWithoutPrefix(scratch)
+	// Decode and copy payload
+	payload, err := encoding.DecodePayloadWithoutPrefix(scratch, false)
 	if err != nil {
 		return nil, fmt.Errorf("failed to decode payload from checkpoint: %w", err)
 	}

From 51c32493819a3b303f207ac88f451e23273983e7 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 15 Feb 2022 14:21:21 -0600
Subject: [PATCH 16/37] Refactor payload encoding function APIs

---
 ledger/common/encoding/encoding.go | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index d1bf623679b..cd94a2dfd40 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -109,10 +109,10 @@ func EncodeKeyPart(kp *ledger.KeyPart) []byte {
 
 func encodeKeyPart(kp *ledger.KeyPart) []byte {
 	buffer := make([]byte, 0, encodedKeyPartLength(kp))
-	return encodeAndAppendKeyPart(kp, buffer)
+	return encodeAndAppendKeyPart(buffer, kp)
 }
 
-func encodeAndAppendKeyPart(kp *ledger.KeyPart, buffer []byte) []byte {
+func encodeAndAppendKeyPart(buffer []byte, kp *ledger.KeyPart) []byte {
 	// encode "Type" field of the key part
 	buffer = utils.AppendUint16(buffer, kp.Type)
 
@@ -185,11 +185,10 @@ func EncodeKey(k *ledger.Key) []byte {
 // encodeKey encodes a key into a byte slice
 func encodeKey(k *ledger.Key) []byte {
 	buffer := make([]byte, 0, encodedKeyLength(k))
-	return encodeAndAppendKey(k, buffer)
+	return encodeAndAppendKey(buffer, k)
 }
 
-// encodeKey encodes a key into a byte slice
-func encodeAndAppendKey(k *ledger.Key, buffer []byte) []byte {
+func encodeAndAppendKey(buffer []byte, k *ledger.Key) []byte {
 	// encode number of key parts
 	buffer = utils.AppendUint16(buffer, uint16(len(k.KeyParts)))
 
@@ -199,7 +198,7 @@ func encodeAndAppendKey(k *ledger.Key, buffer []byte) []byte {
 		buffer = utils.AppendUint32(buffer, uint32(encodedKeyPartLength(&kp)))
 
 		// encode the key part
-		buffer = encodeAndAppendKeyPart(&kp, buffer)
+		buffer = encodeAndAppendKeyPart(buffer, &kp)
 	}
 
 	return buffer
@@ -292,7 +291,7 @@ func encodeValue(v ledger.Value) []byte {
 	return v
 }
 
-func encodeAndAppendValue(v ledger.Value, buffer []byte) []byte {
+func encodeAndAppendValue(buffer []byte, v ledger.Value) []byte {
 	return append(buffer, v...)
 }
 
@@ -379,7 +378,7 @@ func EncodeAndAppendPayloadWithoutPrefix(p *ledger.Payload, buffer []byte) []byt
 	if p == nil {
 		return []byte{}
 	}
-	return encodeAndAppendPayload(p, buffer)
+	return encodeAndAppendPayload(buffer, p)
 }
 
 func EncodedPayloadLengthWithoutPrefix(p *ledger.Payload) int {
@@ -388,22 +387,22 @@ func EncodedPayloadLengthWithoutPrefix(p *ledger.Payload) int {
 
 func encodePayload(p *ledger.Payload) []byte {
 	buffer := make([]byte, 0, encodedPayloadLength(p))
-	return encodeAndAppendPayload(p, buffer)
+	return encodeAndAppendPayload(buffer, p)
 }
 
-func encodeAndAppendPayload(p *ledger.Payload, buffer []byte) []byte {
+func encodeAndAppendPayload(buffer []byte, p *ledger.Payload) []byte {
 
 	// encode encoded key size
 	buffer = utils.AppendUint32(buffer, uint32(encodedKeyLength(&p.Key)))
 
 	// encode key
-	buffer = encodeAndAppendKey(&p.Key, buffer)
+	buffer = encodeAndAppendKey(buffer, &p.Key)
 
 	// encode encoded value size
 	buffer = utils.AppendUint64(buffer, uint64(encodedValueLength(p.Value)))
 
 	// encode value
-	buffer = encodeAndAppendValue(p.Value, buffer)
+	buffer = encodeAndAppendValue(buffer, p.Value)
 
 	return buffer
 }

From 2d0936c7009a79e9757dfd0c9b859ea1829fea35 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 15 Feb 2022 14:48:26 -0600
Subject: [PATCH 17/37] Refactor payload encoding functions

---
 ledger/common/encoding/encoding.go          | 7 +++++--
 ledger/complete/mtrie/flattener/encoding.go | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index cd94a2dfd40..803b187fa05 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -374,7 +374,8 @@ func EncodePayload(p *ledger.Payload) []byte {
 
 // EncodeAndAppendPayloadWithoutPrefix encodes a ledger payload
 // without prefix (version and type) and appends to buffer.
-func EncodeAndAppendPayloadWithoutPrefix(p *ledger.Payload, buffer []byte) []byte {
+// If payload is nil, empty byte slice is returned.
+func EncodeAndAppendPayloadWithoutPrefix(buffer []byte, p *ledger.Payload) []byte {
 	if p == nil {
 		return []byte{}
 	}
@@ -411,7 +412,9 @@ func encodedPayloadLength(p *ledger.Payload) int {
 	if p == nil {
 		return 0
 	}
-	// Payload is encoded as: encode key length (4 bytes) + encoded key + encoded value length (8 bytes) + encode value
+	// Payload is encoded as:
+	//   encode key length (4 bytes) + encoded key +
+	//   encoded value length (8 bytes) + encode value
 	return 4 + encodedKeyLength(&p.Key) + 8 + encodedValueLength(p.Value)
 }
 
diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index 1432f772aab..96086e34e57 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -80,7 +80,7 @@ func encodeLeafNode(n *node.Node, scratch []byte) []byte {
 	binary.BigEndian.PutUint32(scratch[pos:], uint32(encPayloadSize))
 	pos += 4
 
-	scratch = encoding.EncodeAndAppendPayloadWithoutPrefix(n.Payload(), scratch[:pos])
+	scratch = encoding.EncodeAndAppendPayloadWithoutPrefix(scratch[:pos], n.Payload())
 
 	return scratch
 }

From 0cc1ab20943a3021a19c0f447a5c339e1cb63b29 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 16 Feb 2022 09:54:12 -0600
Subject: [PATCH 18/37] Fix EncodeAndAppendPayload[...] with nil payload

EncodeAndAppendPayloadWithoutPrefix() appends encoded payload to
input buffer.  If payload is nil, unmodified buffer is returned.

This edge case is uncommon and didn't get triggered when creating
checkpoint.3485 from checkpoint.3443 with 41 WAL segments
(both v3->v4 and v4->v4).

Add tests.
---
 ledger/common/encoding/encoding.go      |   4 +-
 ledger/common/encoding/encoding_test.go | 112 ++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index 803b187fa05..e8e5bee5b91 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -374,10 +374,10 @@ func EncodePayload(p *ledger.Payload) []byte {
 
 // EncodeAndAppendPayloadWithoutPrefix encodes a ledger payload
 // without prefix (version and type) and appends to buffer.
-// If payload is nil, empty byte slice is returned.
+// If payload is nil, unmodified buffer is returned.
 func EncodeAndAppendPayloadWithoutPrefix(buffer []byte, p *ledger.Payload) []byte {
 	if p == nil {
-		return []byte{}
+		return buffer
 	}
 	return encodeAndAppendPayload(buffer, p)
 }
diff --git a/ledger/common/encoding/encoding_test.go b/ledger/common/encoding/encoding_test.go
index ed3baa9f72b..d51afd6a907 100644
--- a/ledger/common/encoding/encoding_test.go
+++ b/ledger/common/encoding/encoding_test.go
@@ -71,6 +71,118 @@ func Test_PayloadEncodingDecoding(t *testing.T) {
 	require.True(t, newp.Equals(p))
 }
 
+func Test_NilPayloadWithoutPrefixEncodingDecoding(t *testing.T) {
+
+	buf := []byte{1, 2, 3}
+	bufLen := len(buf)
+
+	// Test encoded payload data length
+	encodedPayloadLen := encoding.EncodedPayloadLengthWithoutPrefix(nil)
+	require.Equal(t, 0, encodedPayloadLen)
+
+	// Encode payload and append to buffer
+	encoded := encoding.EncodeAndAppendPayloadWithoutPrefix(buf, nil)
+	// Test encoded data size
+	require.Equal(t, bufLen, len(encoded))
+	// Test original input data isn't modified
+	require.Equal(t, buf, encoded)
+	// Test returned encoded data reuses input data
+	require.True(t, &buf[0] == &encoded[0])
+
+	// Decode and copy payload (excluding prefix)
+	newp, err := encoding.DecodePayloadWithoutPrefix(encoded[bufLen:], false)
+	require.NoError(t, err)
+	require.Nil(t, newp)
+
+	// Zerocopy option has no effect for nil payload, but test it anyway.
+	// Decode payload (excluding prefix) with zero copy
+	newp, err = encoding.DecodePayloadWithoutPrefix(encoded[bufLen:], true)
+	require.NoError(t, err)
+	require.Nil(t, newp)
+}
+
+func Test_PayloadWithoutPrefixEncodingDecoding(t *testing.T) {
+
+	kp1t := uint16(1)
+	kp1v := []byte("key part 1")
+	kp1 := ledger.NewKeyPart(kp1t, kp1v)
+
+	kp2t := uint16(22)
+	kp2v := []byte("key part 2")
+	kp2 := ledger.NewKeyPart(kp2t, kp2v)
+
+	k := ledger.NewKey([]ledger.KeyPart{kp1, kp2})
+	v := ledger.Value([]byte{'A'})
+	p := ledger.NewPayload(k, v)
+
+	const encodedPayloadSize = 47 // size of encoded payload p without prefix (version + type)
+
+	testCases := []struct {
+		name     string
+		payload  *ledger.Payload
+		bufCap   int
+		zeroCopy bool
+	}{
+		// full cap means no capacity for appending payload (new alloc)
+		{"full cap zerocopy", p, 0, true},
+		{"full cap", p, 0, false},
+		// small cap means not enough capacity for appending payload (new alloc)
+		{"small cap zerocopy", p, encodedPayloadSize - 1, true},
+		{"small cap", p, encodedPayloadSize - 1, false},
+		// exact cap means exact capacity for appending payload (no alloc)
+		{"exact cap zerocopy", p, encodedPayloadSize, true},
+		{"exact cap", p, encodedPayloadSize, false},
+		// large cap means extra capacity than is needed for appending payload (no alloc)
+		{"large cap zerocopy", p, encodedPayloadSize + 1, true},
+		{"large cap", p, encodedPayloadSize + 1, false},
+	}
+
+	bufPrefix := []byte{1, 2, 3}
+	bufPrefixLen := len(bufPrefix)
+
+	for _, tc := range testCases {
+
+		t.Run(tc.name, func(t *testing.T) {
+
+			// Create a buffer of specified cap + prefix length
+			buffer := make([]byte, bufPrefixLen, bufPrefixLen+tc.bufCap)
+			copy(buffer, bufPrefix)
+
+			// Encode payload and append to buffer
+			encoded := encoding.EncodeAndAppendPayloadWithoutPrefix(buffer, tc.payload)
+			encodedPayloadLen := encoding.EncodedPayloadLengthWithoutPrefix(tc.payload)
+			// Test encoded data size
+			require.Equal(t, len(encoded), bufPrefixLen+encodedPayloadLen)
+			// Test if original input data is modified
+			require.Equal(t, bufPrefix, encoded[:bufPrefixLen])
+			// Test if input buffer is reused if it fits
+			if tc.bufCap >= encodedPayloadLen {
+				require.True(t, &buffer[0] == &encoded[0])
+			} else {
+				// new alloc
+				require.True(t, &buffer[0] != &encoded[0])
+			}
+
+			// Decode payload (excluding prefix)
+			newp, err := encoding.DecodePayloadWithoutPrefix(encoded[bufPrefixLen:], tc.zeroCopy)
+			require.NoError(t, err)
+			require.True(t, newp.Equals(tc.payload))
+
+			// modify encoded value's last byte
+			encoded[len(encoded)-1] = ^encoded[len(encoded)-1]
+
+			if tc.zeroCopy {
+				// Test if decoded payload is changed after source data is modified
+				// because data is shared.
+				require.False(t, newp.Equals(tc.payload))
+			} else {
+				// Test if decoded payload is unchanged after source data is modified.
+				require.True(t, newp.Equals(tc.payload))
+			}
+		})
+	}
+}
+
 // Test_ProofEncodingDecoding tests encoding decoding functionality of a proof
 func Test_TrieProofEncodingDecoding(t *testing.T) {
 	p, _ := utils.TrieProofFixture()

From f09677dc3d59118c49c0f349ab0dbf65dd49da35 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 16 Feb 2022 16:33:08 -0600
Subject: [PATCH 19/37] Improve test

---
 ledger/common/encoding/encoding_test.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ledger/common/encoding/encoding_test.go b/ledger/common/encoding/encoding_test.go
index d51afd6a907..df7a7cb126b 100644
--- a/ledger/common/encoding/encoding_test.go
+++ b/ledger/common/encoding/encoding_test.go
@@ -168,8 +168,10 @@ func Test_PayloadWithoutPrefixEncodingDecoding(t *testing.T) {
 			require.NoError(t, err)
 			require.True(t, newp.Equals(tc.payload))
 
-			// modify encoded value's last byte
-			encoded[len(encoded)-1] = ^encoded[len(encoded)-1]
+			// Reset encoded payload
+			for i := 0; i < len(encoded); i++ {
+				encoded[i] = 0
+			}
 
 			if tc.zeroCopy {
 				// Test if decoded payload is changed after source data is modified

From 2ca95f4dda48434e00becd824a452928c71d946a Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 16 Feb 2022 18:25:17 -0600
Subject: [PATCH 20/37] Cleanup checkpoint encoding and decoding

- Add comments
- Rename variables
- Improve error messages to be more consistent
---
 ledger/complete/mtrie/flattener/encoding.go | 140 ++++++++++++--------
 1 file changed, 85 insertions(+), 55 deletions(-)

diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index 96086e34e57..bd7371e7ed2 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -29,7 +29,9 @@ const (
 // - payload (4 bytes + n bytes)
 // Encoded leaf node size is 85 bytes (assuming length of hash/path is 32 bytes) +
 // length of encoded payload size.
-// Scratch buffer is used to avoid allocs.
+// Scratch buffer is used to avoid allocs. It should be used directly instead
+// of using append.  This function uses len(scratch) and ignores cap(scratch),
+// so any extra capacity will not be utilized.
 // WARNING: The returned buffer is likely to share the same underlying array as
 // the scratch buffer. Caller is responsible for copying or using returned buffer
 // before scratch buffer is used again.
@@ -40,49 +42,56 @@ func encodeLeafNode(n *node.Node, scratch []byte) []byte {
 
 	encodedNodeSize := 1 + 2 + 2 + 8 + 2 + hash.HashLen + 2 + ledger.PathLen + 4 + encPayloadSize
 
+	// buf uses received scratch buffer if it's large enough.
+	// Otherwise, a new buffer is allocated.
+	// buf is used directly so len(buf) must not be 0.
+	// buf will be resliced to proper size before being returned from this function.
+	buf := scratch
 	if len(scratch) < encodedNodeSize {
-		scratch = make([]byte, encodedNodeSize)
+		buf = make([]byte, encodedNodeSize)
 	}
 
 	pos := 0
 
 	// Encode node type (1 byte)
-	scratch[pos] = byte(leafNodeType)
+	buf[pos] = byte(leafNodeType)
 	pos++
 
 	// Encode height (2-bytes Big Endian)
-	binary.BigEndian.PutUint16(scratch[pos:], uint16(n.Height()))
+	binary.BigEndian.PutUint16(buf[pos:], uint16(n.Height()))
 	pos += 2
 
 	// Encode max depth (2-bytes Big Endian)
-	binary.BigEndian.PutUint16(scratch[pos:], n.MaxDepth())
+	binary.BigEndian.PutUint16(buf[pos:], n.MaxDepth())
 	pos += 2
 
 	// Encode reg count (8-bytes Big Endian)
-	binary.BigEndian.PutUint64(scratch[pos:], n.RegCount())
+	binary.BigEndian.PutUint64(buf[pos:], n.RegCount())
 	pos += 8
 
 	// Encode hash (2-bytes Big Endian for hashValue length and n-bytes hashValue)
 	hash := n.Hash()
-	binary.BigEndian.PutUint16(scratch[pos:], uint16(len(hash)))
+	binary.BigEndian.PutUint16(buf[pos:], uint16(len(hash)))
 	pos += 2
 
-	pos += copy(scratch[pos:], hash[:])
+	pos += copy(buf[pos:], hash[:])
 
 	// Encode path (2-bytes Big Endian for path length and n-bytes path)
 	path := n.Path()
-	binary.BigEndian.PutUint16(scratch[pos:], uint16(len(path)))
+	binary.BigEndian.PutUint16(buf[pos:], uint16(len(path)))
 	pos += 2
 
-	pos += copy(scratch[pos:], path[:])
+	pos += copy(buf[pos:], path[:])
 
 	// Encode payload (4-bytes Big Endian for encoded payload length and n-bytes encoded payload)
-	binary.BigEndian.PutUint32(scratch[pos:], uint32(encPayloadSize))
+	binary.BigEndian.PutUint32(buf[pos:], uint32(encPayloadSize))
 	pos += 4
 
-	scratch = encoding.EncodeAndAppendPayloadWithoutPrefix(scratch[:pos], n.Payload())
+	// EncodeAndAppendPayloadWithoutPrefix appends encoded payload to the resliced buf.
+	// Returned buf is resliced to include appended payload.
+	buf = encoding.EncodeAndAppendPayloadWithoutPrefix(buf[:pos], n.Payload())
 
-	return scratch
+	return buf
 }
 
 // encodeInterimNode encodes interim node in the following format:
@@ -94,7 +103,9 @@ func encodeLeafNode(n *node.Node, scratch []byte) []byte {
 // - rchild index (8 bytes)
 // - hash (2 bytes + 32 bytes)
 // Encoded interim node size is 63 bytes (assuming length of hash is 32 bytes).
-// Scratch buffer is used to avoid allocs.
+// Scratch buffer is used to avoid allocs. It should be used directly instead
+// of using append.  This function uses len(scratch) and ignores cap(scratch),
+// so any extra capacity will not be utilized.
 // WARNING: The returned buffer is likely to share the same underlying array as
 // the scratch buffer. Caller is responsible for copying or using returned buffer
 // before scratch buffer is used again.
@@ -103,44 +114,49 @@ func encodeInterimNode(n *node.Node, lchildIndex uint64, rchildIndex uint64, scr
 
 	encodedNodeSize := 1 + 2 + 2 + 8 + 8 + 8 + 2 + hash.HashLen
 
+	// buf uses received scratch buffer if it's large enough.
+	// Otherwise, a new buffer is allocated.
+	// buf is used directly so len(buf) must not be 0.
+	// buf will be resliced to proper size before being returned from this function.
+	buf := scratch
 	if len(scratch) < encodedNodeSize {
-		scratch = make([]byte, encodedNodeSize)
+		buf = make([]byte, encodedNodeSize)
 	}
 
 	pos := 0
 
 	// Encode node type (1-byte)
-	scratch[pos] = byte(interimNodeType)
+	buf[pos] = byte(interimNodeType)
 	pos++
 
 	// Encode height (2-bytes Big Endian)
-	binary.BigEndian.PutUint16(scratch[pos:], uint16(n.Height()))
+	binary.BigEndian.PutUint16(buf[pos:], uint16(n.Height()))
 	pos += 2
 
 	// Encode max depth (2-bytes Big Endian)
-	binary.BigEndian.PutUint16(scratch[pos:], n.MaxDepth())
+	binary.BigEndian.PutUint16(buf[pos:], n.MaxDepth())
 	pos += 2
 
 	// Encode reg count (8-bytes Big Endian)
-	binary.BigEndian.PutUint64(scratch[pos:], n.RegCount())
+	binary.BigEndian.PutUint64(buf[pos:], n.RegCount())
 	pos += 8
 
 	// Encode left child index (8-bytes Big Endian)
-	binary.BigEndian.PutUint64(scratch[pos:], lchildIndex)
+	binary.BigEndian.PutUint64(buf[pos:], lchildIndex)
 	pos += 8
 
 	// Encode right child index (8-bytes Big Endian)
-	binary.BigEndian.PutUint64(scratch[pos:], rchildIndex)
+	binary.BigEndian.PutUint64(buf[pos:], rchildIndex)
 	pos += 8
 
 	// Encode hash (2-bytes Big Endian hashValue length and n-bytes hashValue)
-	binary.BigEndian.PutUint16(scratch[pos:], hash.HashLen)
+	binary.BigEndian.PutUint16(buf[pos:], hash.HashLen)
 	pos += 2
 
 	h := n.Hash()
-	pos += copy(scratch[pos:], h[:])
+	pos += copy(buf[pos:], h[:])
 
-	return scratch[:pos]
+	return buf[:pos]
 }
 
 // EncodeNode encodes node.
@@ -156,18 +172,22 @@ func EncodeNode(n *node.Node, lchildIndex uint64, rchildIndex uint64, scratch []
 }
 
 // ReadNode reconstructs a node from data read from reader.
+// Scratch buffer is used to avoid allocs. It should be used directly instead
+// of using append.  This function uses len(scratch) and ignores cap(scratch),
+// so any extra capacity will not be utilized.
+// If len(scratch) < 1024, then a new buffer will be allocated and used.
 func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (*node.Node, error)) (*node.Node, error) {
 
 	// minBufSize should be large enough for interim node and leaf node with small payload.
 	// minBufSize is a failsafe and is only used when len(scratch) is much smaller
-	// than expected (4096 by default).
+	// than expected.  len(scratch) is 4096 by default, so minBufSize isn't likely to be used.
 	const minBufSize = 1024
 
 	if len(scratch) < minBufSize {
 		scratch = make([]byte, minBufSize)
 	}
 
-	// fixed-length data: node type (1 byte) + height (2 bytes) + max depth (2 bytes) + reg count (8 bytes), or
+	// fixed-length data: node type (1 byte) + height (2 bytes) + max depth (2 bytes) + reg count (8 bytes)
 	const fixLengthSize = 1 + 2 + 2 + 8
 
 	// Read fixed-length part
@@ -175,42 +195,42 @@ func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 
 	_, err := io.ReadFull(reader, scratch[:fixLengthSize])
 	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node, cannot read fixed-length part: %w", err)
+		return nil, fmt.Errorf("failed to read fixed-length part of serialized node: %w", err)
 	}
 
-	// Read node type (1 byte)
+	// Decode node type (1 byte)
 	nType := scratch[pos]
 	pos++
 
-	// Read height (2 bytes)
+	// Decode height (2 bytes)
 	height := binary.BigEndian.Uint16(scratch[pos:])
 	pos += 2
 
-	// Read max depth (2 bytes)
+	// Decode max depth (2 bytes)
 	maxDepth := binary.BigEndian.Uint16(scratch[pos:])
 	pos += 2
 
-	// Read reg count (8 bytes)
+	// Decode reg count (8 bytes)
 	regCount := binary.BigEndian.Uint64(scratch[pos:])
 
 	if nType == byte(leafNodeType) {
 
-		// Read encoded hash data from reader and create hash.Hash.
+		// Read encoded hash data and create hash.Hash.
 		nodeHash, err := readHashFromReader(reader, scratch)
 		if err != nil {
-			return nil, fmt.Errorf("failed to decode hash from checkpoint: %w", err)
+			return nil, fmt.Errorf("failed to read and decode hash of serialized node: %w", err)
 		}
 
-		// Read encoded path data from reader and create ledger.Path.
+		// Read encoded path data and create ledger.Path.
 		path, err := readPathFromReader(reader, scratch)
 		if err != nil {
-			return nil, fmt.Errorf("failed to decode path from checkpoint: %w", err)
+			return nil, fmt.Errorf("failed to read and decode path of serialized node: %w", err)
 		}
 
-		// Read encoded payload data from reader and create ledger.Payload.
+		// Read encoded payload data and create ledger.Payload.
 		payload, err := readPayloadFromReader(reader, scratch)
 		if err != nil {
-			return nil, fmt.Errorf("cannot read payload: %w", err)
+			return nil, fmt.Errorf("failed to read and decode payload of serialized node: %w", err)
 		}
 
 		node := node.NewNode(int(height), nil, nil, path, payload, nodeHash, maxDepth, regCount)
@@ -224,39 +244,41 @@ func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 	// Read left and right child index (8 bytes each)
 	_, err = io.ReadFull(reader, scratch[:16])
 	if err != nil {
-		return nil, fmt.Errorf("cannot read children index: %w", err)
+		return nil, fmt.Errorf("failed to read child index of serialized node: %w", err)
 	}
 
-	// Read left child index (8 bytes)
+	// Decode left child index (8 bytes)
 	lchildIndex := binary.BigEndian.Uint64(scratch[pos:])
 	pos += 8
 
-	// Read right child index (8 bytes)
+	// Decode right child index (8 bytes)
 	rchildIndex := binary.BigEndian.Uint64(scratch[pos:])
 
 	// Read encoded hash data from reader and create hash.Hash
 	nodeHash, err := readHashFromReader(reader, scratch)
 	if err != nil {
-		return nil, fmt.Errorf("failed to decode hash from checkpoint: %w", err)
+		return nil, fmt.Errorf("failed to read and decode hash of serialized node: %w", err)
 	}
 
 	// Get left child node by node index
 	lchild, err := getNode(lchildIndex)
 	if err != nil {
-		return nil, fmt.Errorf("failed to find left child node: %w", err)
+		return nil, fmt.Errorf("failed to find left child node of serialized node: %w", err)
 	}
 
 	// Get right child node by node index
 	rchild, err := getNode(rchildIndex)
 	if err != nil {
-		return nil, fmt.Errorf("failed to find right child node: %w", err)
+		return nil, fmt.Errorf("failed to find right child node of serialized node: %w", err)
 	}
 
 	n := node.NewNode(int(height), lchild, rchild, ledger.DummyPath, nil, nodeHash, maxDepth, regCount)
 	return n, nil
 }
 
-// EncodeTrie encodes trie root node
+// EncodeTrie encodes trie in the following format:
+// - root node index (8 byte)
+// - root node hash (2 bytes + 32 bytes)
 // Scratch buffer is used to avoid allocs.
 // WARNING: The returned buffer is likely to share the same underlying array as
 // the scratch buffer. Caller is responsible for copying or using returned buffer
@@ -279,7 +301,7 @@ func EncodeTrie(rootNode *node.Node, rootIndex uint64, scratch []byte) []byte {
 
 	pos := 0
 
-	// 8-bytes Big Endian uint64 RootIndex
+	// Encode root node index (8-bytes Big Endian)
 	binary.BigEndian.PutUint64(scratch, rootIndex)
 	pos += 8
 
@@ -304,37 +326,41 @@ func ReadTrie(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 		scratch = make([]byte, minBufSize)
 	}
 
-	// read root index (8 bytes)
+	// Read root node index (8 bytes)
 	_, err := io.ReadFull(reader, scratch[:8])
 	if err != nil {
-		return nil, fmt.Errorf("cannot read root index data: %w", err)
+		return nil, fmt.Errorf("failed to read root node index of serialized trie: %w", err)
 	}
 
+	// Decode root node index
 	rootIndex := binary.BigEndian.Uint64(scratch)
 
+	// Read and decode root node hash
 	readRootHash, err := readHashFromReader(reader, scratch)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read roothash data: %w", err)
+		return nil, fmt.Errorf("failed to read and decode hash of serialized trie: %w", err)
 	}
 
 	rootNode, err := getNode(rootIndex)
 	if err != nil {
-		return nil, fmt.Errorf("cannot find root node: %w", err)
+		return nil, fmt.Errorf("failed to find root node of serialized trie: %w", err)
 	}
 
 	mtrie, err := trie.NewMTrie(rootNode)
 	if err != nil {
-		return nil, fmt.Errorf("restoring trie failed: %w", err)
+		return nil, fmt.Errorf("failed to restore serialized trie: %w", err)
 	}
 
 	rootHash := mtrie.RootHash()
 	if !rootHash.Equals(ledger.RootHash(readRootHash)) {
-		return nil, fmt.Errorf("restoring trie failed: roothash doesn't match")
+		return nil, fmt.Errorf("failed to restore serialized trie: roothash doesn't match")
 	}
 
 	return mtrie, nil
 }
 
+// readHashFromReader reads and decodes hash from reader.
+// Returned hash is a copy.
 func readHashFromReader(reader io.Reader, scratch []byte) (hash.Hash, error) {
 
 	const encHashBufSize = 2 + hash.HashLen
@@ -361,6 +387,8 @@ func readHashFromReader(reader io.Reader, scratch []byte) (hash.Hash, error) {
 	return hash.ToHash(encHashBuf)
 }
 
+// readPathFromReader reads and decodes path from reader.
+// Returned path is a copy.
 func readPathFromReader(reader io.Reader, scratch []byte) (ledger.Path, error) {
 
 	const encPathBufSize = 2 + ledger.PathLen
@@ -383,10 +411,12 @@ func readPathFromReader(reader io.Reader, scratch []byte) (ledger.Path, error) {
 		return ledger.DummyPath, fmt.Errorf("encoded path size is wrong: want %d bytes, got %d bytes", ledger.PathLen, size)
 	}
 
-	// ToPath copies encPath
+	// ledger.ToPath copies data
 	return ledger.ToPath(encPathBuf)
 }
 
+// readPayloadFromReader reads and decodes payload from reader.
+// Returned payload is a copy.
 func readPayloadFromReader(reader io.Reader, scratch []byte) (*ledger.Payload, error) {
 
 	if len(scratch) < 4 {
@@ -396,7 +426,7 @@ func readPayloadFromReader(reader io.Reader, scratch []byte) (*ledger.Payload, e
 	// Read payload size
 	_, err := io.ReadFull(reader, scratch[:4])
 	if err != nil {
-		return nil, fmt.Errorf("cannot read long data length: %w", err)
+		return nil, fmt.Errorf("cannot read payload length: %w", err)
 	}
 
 	size := binary.BigEndian.Uint32(scratch)
@@ -409,13 +439,13 @@ func readPayloadFromReader(reader io.Reader, scratch []byte) (*ledger.Payload, e
 
 	_, err = io.ReadFull(reader, scratch)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read long data: %w", err)
+		return nil, fmt.Errorf("cannot read payload: %w", err)
 	}
 
 	// Decode and copy payload
 	payload, err := encoding.DecodePayloadWithoutPrefix(scratch, false)
 	if err != nil {
-		return nil, fmt.Errorf("failed to decode payload from checkpoint: %w", err)
+		return nil, fmt.Errorf("failed to decode payload: %w", err)
 	}
 
 	return payload, nil

From d12c3b3dd1572b167c6ee15cdf12edaa241350a6 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 16 Feb 2022 19:50:07 -0600
Subject: [PATCH 21/37] Optimize encodedKeyLength()

Reduce 2 CPU instructions in the loop to 1.
linux_amd64 with Go 1.16 and 1.17.
Thanks for suggestion @tarakby

Co-authored-by: Tarak Ben Youssef <50252200+tarakby@users.noreply.github.com>
---
 ledger/common/encoding/encoding.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index e8e5bee5b91..257aaf24267 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -207,9 +207,9 @@ func encodeAndAppendKey(buffer []byte, k *ledger.Key) []byte {
 func encodedKeyLength(k *ledger.Key) int {
 	// Key is encoded as: number of key parts (2 bytes) and for each key part,
 	// the key part size (4 bytes) + encoded key part (n bytes).
-	size := 2
+	size := 2 + 4*len(k.KeyParts)
 	for _, kp := range k.KeyParts {
-		size += 4 + encodedKeyPartLength(&kp)
+		size += encodedKeyPartLength(&kp)
 	}
 	return size
 }

From 802347fc627b345cf5b9150549dc91d5be04cc2e Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Fri, 18 Feb 2022 09:56:01 -0600
Subject: [PATCH 22/37] Add flattening encoding tests

Also revert change to create Payload.KeyParts as empty slice when number
of key parts is 0.  Now Paylod.KeyParts is nil when number of key parts
is 0.
---
 ledger/common/encoding/encoding.go            |   8 +-
 .../complete/mtrie/flattener/encoding_test.go | 459 ++++++++++++++----
 2 files changed, 364 insertions(+), 103 deletions(-)

diff --git a/ledger/common/encoding/encoding.go b/ledger/common/encoding/encoding.go
index 257aaf24267..0f1b3a18097 100644
--- a/ledger/common/encoding/encoding.go
+++ b/ledger/common/encoding/encoding.go
@@ -238,15 +238,19 @@ func DecodeKey(encodedKey []byte) (*ledger.Key, error) {
 // decodeKey decodes inp into Key. If zeroCopy is true, returned key
 // references data in inp.  Otherwise, it is copied.
 func decodeKey(inp []byte, zeroCopy bool) (*ledger.Key, error) {
+	key := &ledger.Key{}
+
 	numOfParts, rest, err := utils.ReadUint16(inp)
 	if err != nil {
 		return nil, fmt.Errorf("error decoding key (content): %w", err)
 	}
 
-	key := &ledger.Key{
-		KeyParts: make([]ledger.KeyPart, numOfParts),
+	if numOfParts == 0 {
+		return key, nil
 	}
 
+	key.KeyParts = make([]ledger.KeyPart, numOfParts)
+
 	for i := 0; i < int(numOfParts); i++ {
 		var kpEncSize uint32
 		var kpEnc []byte
diff --git a/ledger/complete/mtrie/flattener/encoding_test.go b/ledger/complete/mtrie/flattener/encoding_test.go
index cf1b9d8910a..f634ab1975a 100644
--- a/ledger/complete/mtrie/flattener/encoding_test.go
+++ b/ledger/complete/mtrie/flattener/encoding_test.go
@@ -2,7 +2,9 @@ package flattener_test
 
 import (
 	"bytes"
+	"errors"
 	"fmt"
+	"math/rand"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -15,164 +17,419 @@ import (
 	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
 )
 
-func TestNodeSerialization(t *testing.T) {
+func TestLeafNodeEncodingDecoding(t *testing.T) {
 
+	// Leaf node with nil payload
 	path1 := utils.PathByUint8(0)
-	payload1 := utils.LightPayload8('A', 'a')
+	payload1 := (*ledger.Payload)(nil)
 	hashValue1 := hash.Hash([32]byte{1, 1, 1})
+	leafNodeNilPayload := node.NewNode(255, nil, nil, ledger.Path(path1), payload1, hashValue1, 0, 1)
 
+	// Leaf node with empty payload (not nil)
+	// EmptyPayload() not used because decoded playload's value is empty slice (not nil)
 	path2 := utils.PathByUint8(1)
-	payload2 := utils.LightPayload8('B', 'b')
+	payload2 := &ledger.Payload{Value: []byte{}}
 	hashValue2 := hash.Hash([32]byte{2, 2, 2})
+	leafNodeEmptyPayload := node.NewNode(255, nil, nil, ledger.Path(path2), payload2, hashValue2, 0, 1)
 
+	// Leaf node with payload
+	path3 := utils.PathByUint8(2)
+	payload3 := utils.LightPayload8('A', 'a')
 	hashValue3 := hash.Hash([32]byte{3, 3, 3})
+	leafNodePayload := node.NewNode(255, nil, nil, ledger.Path(path3), payload3, hashValue3, 0, 1)
 
-	leafNode1 := node.NewNode(255, nil, nil, ledger.Path(path1), payload1, hashValue1, 0, 1)
-	leafNode2 := node.NewNode(255, nil, nil, ledger.Path(path2), payload2, hashValue2, 0, 1)
-	rootNode := node.NewNode(256, leafNode1, leafNode2, ledger.DummyPath, nil, hashValue3, 1, 2)
-
-	expectedLeafNode1 := []byte{
-		0,      // node type
-		0, 255, // height
-		0, 0, // max depth
-		0, 0, 0, 0, 0, 0, 0, 1, // reg count
-		0, 32, // hash data len
-		1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // hash data
-		0, 32, // path data len
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // path data
-		0, 0, 0, 22, // payload data len
-		0, 0, 0, 9, 0, 1, 0, 0, 0, 3, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 1, 97, // payload data
+	encodedLeafNodeNilPayload := []byte{
+		0x00,       // node type
+		0x00, 0xff, // height
+		0x00, 0x00, // max depth
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // reg count
+		0x00, 0x20, // hash data len
+		0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash data
+		0x00, 0x20, // path data len
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // path data
+		0x00, 0x00, 0x00, 0x00, // payload data len
 	}
 
-	expectedRootNode := []byte{
-		1,    // node type
-		1, 0, // height
-		0, 1, // max depth
-		0, 0, 0, 0, 0, 0, 0, 2, // reg count
-		0, 0, 0, 0, 0, 0, 0, 1, // LIndex
-		0, 0, 0, 0, 0, 0, 0, 2, // RIndex
-		0, 32, // hash data len
-		3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // hash data
+	encodedLeafNodeEmptyPayload := []byte{
+		0x00,       // node type
+		0x00, 0xff, // height
+		0x00, 0x00, // max depth
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // reg count
+		0x00, 0x20, // hash data len
+		0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash data
+		0x00, 0x20, // path data len
+		0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // path data
+		0x00, 0x00, 0x00, 0x0e, // payload data len
+		0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // payload data
 	}
 
-	t.Run("encode leaf node", func(t *testing.T) {
-		scratchBuffers := [][]byte{
-			nil,
-			make([]byte, 0),
-			make([]byte, 1024),
-		}
+	encodedLeafNodePayload := []byte{
+		0x00,       // node type
+		0x00, 0xff, // height
+		0x00, 0x00, // max depth
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // reg count
+		0x00, 0x20, // hash data len
+		0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash data
+		0x00, 0x20, // path data len
+		0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // path data
+		0x00, 0x00, 0x00, 0x16, // payload data len
+		0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0x00, 0x00,
+		0x00, 0x03, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x01, 0x61, // payload data
+	}
 
-		for _, scratch := range scratchBuffers {
-			data := flattener.EncodeNode(leafNode1, 0, 0, scratch)
-			assert.Equal(t, expectedLeafNode1, data)
-		}
-	})
+	testCases := []struct {
+		name        string
+		node        *node.Node
+		encodedNode []byte
+	}{
+		{"nil payload", leafNodeNilPayload, encodedLeafNodeNilPayload},
+		{"empty payload", leafNodeEmptyPayload, encodedLeafNodeEmptyPayload},
+		{"payload", leafNodePayload, encodedLeafNodePayload},
+	}
 
-	t.Run("encode interim node", func(t *testing.T) {
-		scratchBuffers := [][]byte{
-			nil,
-			make([]byte, 0),
-			make([]byte, 1024),
-		}
+	for _, tc := range testCases {
+		t.Run("encode "+tc.name, func(t *testing.T) {
+			scratchBuffers := [][]byte{
+				nil,
+				make([]byte, 0),
+				make([]byte, 16),
+				make([]byte, 1024),
+			}
 
-		for _, scratch := range scratchBuffers {
-			data := flattener.EncodeNode(rootNode, 1, 2, scratch)
-			assert.Equal(t, expectedRootNode, data)
+			for _, scratch := range scratchBuffers {
+				encodedNode := flattener.EncodeNode(tc.node, 0, 0, scratch)
+				assert.Equal(t, tc.encodedNode, encodedNode)
+
+				if len(scratch) > 0 {
+					if len(scratch) >= len(encodedNode) {
+						// reuse scratch buffer
+						require.True(t, &scratch[0] == &encodedNode[0])
+					} else {
+						// new alloc
+						require.True(t, &scratch[0] != &encodedNode[0])
+					}
+				}
+			}
+		})
+
+		t.Run("decode "+tc.name, func(t *testing.T) {
+			scratchBuffers := [][]byte{
+				nil,
+				make([]byte, 0),
+				make([]byte, 16),
+				make([]byte, 1024),
+			}
+
+			for _, scratch := range scratchBuffers {
+				reader := bytes.NewReader(tc.encodedNode)
+				newNode, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
+					return nil, fmt.Errorf("no call expected")
+				})
+				require.NoError(t, err)
+				assert.Equal(t, tc.node, newNode)
+			}
+		})
+
+		t.Run("decode "+tc.name+" incomplete input error", func(t *testing.T) {
+			scratch := make([]byte, 1024)
+
+			for i := 0; i < len(tc.encodedNode)-1; i++ {
+				for j := i; j < len(tc.encodedNode)-1; j++ {
+					reader := bytes.NewReader(tc.encodedNode[i:j])
+					newNode, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
+						return nil, fmt.Errorf("no call expected")
+					})
+					require.Nil(t, newNode)
+					require.Error(t, err)
+				}
+			}
+		})
+	}
+}
+
+func TestRandomLeafNodeEncodingDecoding(t *testing.T) {
+	const count = 1000
+	const minPayloadSize = 40
+	const maxPayloadSize = 1024 * 2
+	// scratchBufferSize is intentionally small here to test
+	// when encoded node size is sometimes larger than scratch buffer.
+	const scratchBufferSize = 512
+
+	paths := utils.RandomPaths(count)
+	payloads := utils.RandomPayloads(count, minPayloadSize, maxPayloadSize)
+
+	writeScratch := make([]byte, scratchBufferSize)
+	readScratch := make([]byte, scratchBufferSize)
+
+	for i := 0; i < count; i++ {
+		height := rand.Intn(257)
+
+		var hashValue hash.Hash
+		rand.Read(hashValue[:])
+
+		n := node.NewNode(height, nil, nil, paths[i], payloads[i], hashValue, 0, 1)
+
+		encodedNode := flattener.EncodeNode(n, 0, 0, writeScratch)
+
+		if len(writeScratch) >= len(encodedNode) {
+			// reuse scratch buffer
+			require.True(t, &writeScratch[0] == &encodedNode[0])
+		} else {
+			// new alloc because scratch buffer isn't big enough
+			require.True(t, &writeScratch[0] != &encodedNode[0])
 		}
-	})
 
-	t.Run("decode leaf node", func(t *testing.T) {
+		reader := bytes.NewReader(encodedNode)
+		newNode, err := flattener.ReadNode(reader, readScratch, func(nodeIndex uint64) (*node.Node, error) {
+			return nil, fmt.Errorf("no call expected")
+		})
+		require.NoError(t, err)
+		assert.Equal(t, n, newNode)
+	}
+}
+
+func TestInterimNodeEncodingDecoding(t *testing.T) {
+
+	const lchildIndex = 1
+	const rchildIndex = 2
+
+	// Child node
+	path1 := utils.PathByUint8(0)
+	payload1 := utils.LightPayload8('A', 'a')
+	hashValue1 := hash.Hash([32]byte{1, 1, 1})
+	leafNode1 := node.NewNode(255, nil, nil, ledger.Path(path1), payload1, hashValue1, 0, 1)
+
+	// Child node
+	path2 := utils.PathByUint8(1)
+	payload2 := utils.LightPayload8('B', 'b')
+	hashValue2 := hash.Hash([32]byte{2, 2, 2})
+	leafNode2 := node.NewNode(255, nil, nil, ledger.Path(path2), payload2, hashValue2, 0, 1)
+
+	// Interim node
+	hashValue3 := hash.Hash([32]byte{3, 3, 3})
+	interimNode := node.NewNode(256, leafNode1, leafNode2, ledger.DummyPath, nil, hashValue3, 1, 2)
+
+	encodedInterimNode := []byte{
+		0x01,       // node type
+		0x01, 0x00, // height
+		0x00, 0x01, // max depth
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // reg count
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // LIndex
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // RIndex
+		0x00, 0x20, // hash data len
+		0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash data
+	}
+
+	t.Run("encode", func(t *testing.T) {
 		scratchBuffers := [][]byte{
 			nil,
 			make([]byte, 0),
+			make([]byte, 16),
 			make([]byte, 1024),
 		}
 
 		for _, scratch := range scratchBuffers {
-			reader := bytes.NewReader(expectedLeafNode1)
-			newNode, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
-				if nodeIndex != 0 {
-					return nil, fmt.Errorf("expect child node index 0, got %d", nodeIndex)
-				}
-				return nil, nil
-			})
-			require.NoError(t, err)
-			assert.Equal(t, leafNode1, newNode)
+			data := flattener.EncodeNode(interimNode, lchildIndex, rchildIndex, scratch)
+			assert.Equal(t, encodedInterimNode, data)
 		}
 	})
 
-	t.Run("decode interim node", func(t *testing.T) {
+	t.Run("decode", func(t *testing.T) {
 		scratchBuffers := [][]byte{
 			nil,
 			make([]byte, 0),
+			make([]byte, 16),
 			make([]byte, 1024),
 		}
 
 		for _, scratch := range scratchBuffers {
-			reader := bytes.NewReader(expectedRootNode)
+			reader := bytes.NewReader(encodedInterimNode)
 			newNode, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
 				switch nodeIndex {
-				case 1:
+				case lchildIndex:
 					return leafNode1, nil
-				case 2:
+				case rchildIndex:
 					return leafNode2, nil
 				default:
 					return nil, fmt.Errorf("unexpected child node index %d ", nodeIndex)
 				}
 			})
 			require.NoError(t, err)
-			assert.Equal(t, rootNode, newNode)
+			assert.Equal(t, interimNode, newNode)
+		}
+	})
+
+	t.Run("decode child node not found error", func(t *testing.T) {
+		nodeNotFoundError := errors.New("failed to find node by index")
+		scratch := make([]byte, 1024)
+
+		reader := bytes.NewReader(encodedInterimNode)
+		newNode, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
+			return nil, nodeNotFoundError
+		})
+		require.Nil(t, newNode)
+		require.ErrorIs(t, err, nodeNotFoundError)
+	})
+
+	t.Run("decode incomplete input error", func(t *testing.T) {
+		scratch := make([]byte, 1024)
+
+		for i := 0; i < len(encodedInterimNode)-1; i++ {
+			for j := i; j < len(encodedInterimNode)-1; j++ {
+				reader := bytes.NewReader(encodedInterimNode[i:j])
+				newNode, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
+					switch nodeIndex {
+					case lchildIndex:
+						return leafNode1, nil
+					case rchildIndex:
+						return leafNode2, nil
+					default:
+						return nil, fmt.Errorf("unexpected child node index %d ", nodeIndex)
+					}
+				})
+				require.Nil(t, newNode)
+				require.Error(t, err)
+			}
 		}
 	})
 }
 
-func TestTrieSerialization(t *testing.T) {
+func TestTrieEncodingDecoding(t *testing.T) {
+	// Nil root node
+	rootNodeNil := (*node.Node)(nil)
+	rootNodeNilIndex := uint64(20)
+
+	// Not nil root node
 	hashValue := hash.Hash([32]byte{2, 2, 2})
 	rootNode := node.NewNode(256, nil, nil, ledger.DummyPath, nil, hashValue, 7, 5000)
 	rootNodeIndex := uint64(21)
 
-	expected := []byte{
-		0, 0, 0, 0, 0, 0, 0, 21, // RootIndex
-		0, 32, // RootHash length
-		2, 2, 2, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, // RootHash data
+	encodedNilTrie := []byte{
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, // RootIndex
+		0x00, 0x20, // RootHash length
+		0x56, 0x8f, 0x4e, 0xc7, 0x40, 0xfe, 0x3b, 0x5d,
+		0xe8, 0x80, 0x34, 0xcb, 0x7b, 0x1f, 0xbd, 0xdb,
+		0x41, 0x54, 0x8b, 0x06, 0x8f, 0x31, 0xae, 0xbc,
+		0x8a, 0xe9, 0x18, 0x9e, 0x42, 0x9c, 0x57, 0x49, // RootHash data
 	}
 
-	t.Run("encode", func(t *testing.T) {
-		scratchBuffers := [][]byte{
-			nil,
-			make([]byte, 0),
-			make([]byte, 1024),
-		}
+	encodedTrie := []byte{
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, // RootIndex
+		0x00, 0x20, // RootHash length
+		0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash data
+	}
 
-		for _, scratch := range scratchBuffers {
-			data := flattener.EncodeTrie(rootNode, rootNodeIndex, scratch)
-			assert.Equal(t, expected, data)
-		}
-	})
+	testCases := []struct {
+		name          string
+		rootNode      *node.Node
+		rootNodeIndex uint64
+		encodedTrie   []byte
+	}{
+		{"nil trie", rootNodeNil, rootNodeNilIndex, encodedNilTrie},
+		{"trie", rootNode, rootNodeIndex, encodedTrie},
+	}
 
-	t.Run("decode", func(t *testing.T) {
-		scratchBuffers := [][]byte{
-			nil,
-			make([]byte, 0),
-			make([]byte, 1024),
-		}
+	for _, tc := range testCases {
 
-		for _, scratch := range scratchBuffers {
-			reader := bytes.NewReader(expected)
-			trie, err := flattener.ReadTrie(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
-				if nodeIndex != rootNodeIndex {
-					return nil, fmt.Errorf("unexpected root node index %d ", nodeIndex)
+		t.Run("encode "+tc.name, func(t *testing.T) {
+			scratchBuffers := [][]byte{
+				nil,
+				make([]byte, 0),
+				make([]byte, 16),
+				make([]byte, 1024),
+			}
+
+			for _, scratch := range scratchBuffers {
+				encodedTrie := flattener.EncodeTrie(tc.rootNode, tc.rootNodeIndex, scratch)
+				assert.Equal(t, tc.encodedTrie, encodedTrie)
+
+				if len(scratch) > 0 {
+					if len(scratch) >= len(encodedTrie) {
+						// reuse scratch buffer
+						require.True(t, &scratch[0] == &encodedTrie[0])
+					} else {
+						// new alloc
+						require.True(t, &scratch[0] != &encodedTrie[0])
+					}
 				}
-				return rootNode, nil
+			}
+		})
+
+		t.Run("decode "+tc.name, func(t *testing.T) {
+			scratchBuffers := [][]byte{
+				nil,
+				make([]byte, 0),
+				make([]byte, 16),
+				make([]byte, 1024),
+			}
+
+			for _, scratch := range scratchBuffers {
+				reader := bytes.NewReader(tc.encodedTrie)
+				trie, err := flattener.ReadTrie(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
+					if nodeIndex != tc.rootNodeIndex {
+						return nil, fmt.Errorf("unexpected root node index %d ", nodeIndex)
+					}
+					return tc.rootNode, nil
+				})
+				require.NoError(t, err)
+				assert.Equal(t, tc.rootNode, trie.RootNode())
+			}
+		})
+
+		t.Run("decode "+tc.name+" node not found error", func(t *testing.T) {
+			nodeNotFoundError := errors.New("failed to find node by index")
+			scratch := make([]byte, 1024)
+
+			reader := bytes.NewReader(tc.encodedTrie)
+			newNode, err := flattener.ReadTrie(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
+				return nil, nodeNotFoundError
 			})
-			require.NoError(t, err)
-			assert.Equal(t, rootNode, trie.RootNode())
-		}
-	})
+			require.Nil(t, newNode)
+			require.ErrorIs(t, err, nodeNotFoundError)
+		})
+
+		t.Run("decode "+tc.name+" incomplete input error", func(t *testing.T) {
+			scratch := make([]byte, 1024)
+
+			for i := 0; i < len(tc.encodedTrie)-1; i++ {
+				for j := i; j < len(tc.encodedTrie)-1; j++ {
+					reader := bytes.NewReader(tc.encodedTrie[i:j])
+					newNode, err := flattener.ReadTrie(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
+						if nodeIndex != tc.rootNodeIndex {
+							return nil, fmt.Errorf("unexpected root node index %d ", nodeIndex)
+						}
+						return tc.rootNode, nil
+					})
+					require.Nil(t, newNode)
+					require.Error(t, err)
+				}
+			}
+		})
+	}
 }

From aa1549eb65ca1e74d740ba17e55b0da42b9b2539 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Fri, 18 Feb 2022 10:02:06 -0600
Subject: [PATCH 23/37] Add node type check when decoding checkpoint

---
 ledger/complete/mtrie/flattener/encoding.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index bd7371e7ed2..991d107f9b2 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -202,6 +202,10 @@ func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 	nType := scratch[pos]
 	pos++
 
+	if nType != byte(leafNodeType) && nType != byte(interimNodeType) {
+		return nil, fmt.Errorf("failed to decode node type %d", nType)
+	}
+
 	// Decode height (2 bytes)
 	height := binary.BigEndian.Uint16(scratch[pos:])
 	pos += 2

From f91a1bef22d01fdb79f6091cf978e35602e2dd6f Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Fri, 18 Feb 2022 17:13:55 -0600
Subject: [PATCH 24/37] Cleanup checkpoint v3 decoding

---
 .../complete/mtrie/flattener/encoding_v3.go   | 117 +++++++++++-------
 1 file changed, 69 insertions(+), 48 deletions(-)

diff --git a/ledger/complete/mtrie/flattener/encoding_v3.go b/ledger/complete/mtrie/flattener/encoding_v3.go
index 896045d3088..98994557325 100644
--- a/ledger/complete/mtrie/flattener/encoding_v3.go
+++ b/ledger/complete/mtrie/flattener/encoding_v3.go
@@ -14,107 +14,126 @@ import (
 )
 
 // This file contains decoding functions for checkpoint v3 and earlier versions.
-// These functions are for backwards compatibility.
+// These functions are for backwards compatibility, not optimized.
 
 const encodingDecodingVersion = uint16(0)
 
 // ReadNodeFromCheckpointV3AndEarlier reconstructs a node from data in checkpoint v3 and earlier versions.
+// Encoded node in checkpoint v3 and earlier is in the following format:
+// - version (2 bytes)
+// - height (2 bytes)
+// - lindex (8 bytes)
+// - rindex (8 bytes)
+// - max depth (2 bytes)
+// - reg count (8 bytes)
+// - path (2 bytes + 32 bytes)
+// - payload (4 bytes + n bytes)
+// - hash (2 bytes + 32 bytes)
 func ReadNodeFromCheckpointV3AndEarlier(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, error)) (*node.Node, error) {
 
-	// reading version
+	// Read version (2 bytes)
 	buf := make([]byte, 2)
-	read, err := io.ReadFull(reader, buf)
+	_, err := io.ReadFull(reader, buf)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node, cannot read version part: %w", err)
-	}
-	if read != len(buf) {
-		return nil, fmt.Errorf("failed to read serialized node: not enough bytes read %d expected %d", read, len(buf))
+		return nil, fmt.Errorf("failed to read version of serialized node in v3: %w", err)
 	}
 
+	// Decode version
 	version, _, err := utils.ReadUint16(buf)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+		return nil, fmt.Errorf("failed to decode version of serialized node in v3: %w", err)
 	}
 
 	if version > encodingDecodingVersion {
-		return nil, fmt.Errorf("failed to read serialized node: unsuported version %d > %d", version, encodingDecodingVersion)
+		return nil, fmt.Errorf("found unsuported version %d (> %d) of serialized node in v3", version, encodingDecodingVersion)
 	}
 
-	// reading fixed-length part
+	// fixed-length data:
+	//   height (2 bytes) +
+	//   left child node index (8 bytes) +
+	//   right child node index (8 bytes) +
+	//   max depth (2 bytes) +
+	//   reg count (8 bytes)
 	buf = make([]byte, 2+8+8+2+8)
 
-	read, err = io.ReadFull(reader, buf)
+	// Read fixed-length part
+	_, err = io.ReadFull(reader, buf)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node, cannot read fixed-length part: %w", err)
-	}
-	if read != len(buf) {
-		return nil, fmt.Errorf("failed to read serialized node: not enough bytes read %d expected %d", read, len(buf))
+		return nil, fmt.Errorf("failed to read fixed-length part of serialized node in v3: %w", err)
 	}
 
 	var height, maxDepth uint16
 	var lchildIndex, rchildIndex, regCount uint64
 	var path, hashValue, encPayload []byte
 
+	// Decode height (2 bytes)
 	height, buf, err = utils.ReadUint16(buf)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+		return nil, fmt.Errorf("failed to decode height of serialized node in v3: %w", err)
 	}
 
+	// Decode left child index (8 bytes)
 	lchildIndex, buf, err = utils.ReadUint64(buf)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+		return nil, fmt.Errorf("failed to decode left child index of serialized node in v3: %w", err)
 	}
 
+	// Decode right child index (8 bytes)
 	rchildIndex, buf, err = utils.ReadUint64(buf)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+		return nil, fmt.Errorf("failed to decode right child index of serialized node in v3: %w", err)
 	}
 
+	// Decode max depth (2 bytes)
 	maxDepth, buf, err = utils.ReadUint16(buf)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+		return nil, fmt.Errorf("failed to decode max depth of serialized node in v3: %w", err)
 	}
 
+	// Decode reg count (8 bytes)
 	regCount, _, err = utils.ReadUint64(buf)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read serialized node: %w", err)
+		return nil, fmt.Errorf("failed to decode reg count of serialized node in v3: %w", err)
 	}
 
+	// Read path (2 bytes + 32 bytes)
 	path, err = utils.ReadShortDataFromReader(reader)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read key data: %w", err)
+		return nil, fmt.Errorf("failed to read path of serialized node in v3: %w", err)
 	}
 
+	// Read payload (4 bytes + n bytes)
 	encPayload, err = utils.ReadLongDataFromReader(reader)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read value data: %w", err)
+		return nil, fmt.Errorf("failed to read payload of serialized node in v3: %w", err)
 	}
 
+	// Read hash (2 bytes + 32 bytes)
 	hashValue, err = utils.ReadShortDataFromReader(reader)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read hashValue data: %w", err)
+		return nil, fmt.Errorf("failed to read hash of serialized node in v3: %w", err)
 	}
 
 	// Create (and copy) hash from raw data.
 	nodeHash, err := hash.ToHash(hashValue)
 	if err != nil {
-		return nil, fmt.Errorf("failed to decode hash from checkpoint: %w", err)
+		return nil, fmt.Errorf("failed to decode hash of serialized node in v3: %w", err)
 	}
 
 	if len(path) > 0 {
 		// Create (and copy) path from raw data.
 		path, err := ledger.ToPath(path)
 		if err != nil {
-			return nil, fmt.Errorf("failed to decode path from checkpoint: %w", err)
+			return nil, fmt.Errorf("failed to decode path of serialized node in v3: %w", err)
 		}
 
 		// Decode payload (payload data isn't copied).
 		payload, err := encoding.DecodePayload(encPayload)
 		if err != nil {
-			return nil, fmt.Errorf("failed to decode payload from checkpoint: %w", err)
+			return nil, fmt.Errorf("failed to decode payload of serialized node in v3: %w", err)
 		}
 
-		// make a copy of payload
+		// Make a copy of payload
 		var pl *ledger.Payload
 		if payload != nil {
 			pl = payload.DeepCopy()
@@ -127,13 +146,13 @@ func ReadNodeFromCheckpointV3AndEarlier(reader io.Reader, getNode func(nodeIndex
 	// Get left child node by node index
 	lchild, err := getNode(lchildIndex)
 	if err != nil {
-		return nil, fmt.Errorf("failed to find left child node: %w", err)
+		return nil, fmt.Errorf("failed to find left child node of serialized node in v3: %w", err)
 	}
 
 	// Get right child node by node index
 	rchild, err := getNode(rchildIndex)
 	if err != nil {
-		return nil, fmt.Errorf("failed to find right child node: %w", err)
+		return nil, fmt.Errorf("failed to find right child node of serialized node in v3: %w", err)
 	}
 
 	n := node.NewNode(int(height), lchild, rchild, ledger.DummyPath, nil, nodeHash, maxDepth, regCount)
@@ -141,60 +160,62 @@ func ReadNodeFromCheckpointV3AndEarlier(reader io.Reader, getNode func(nodeIndex
 }
 
 // ReadTrieFromCheckpointV3AndEarlier reconstructs a trie from data in checkpoint v3 and earlier versions.
+// Encoded trie in checkpoint v3 and earlier is in the following format:
+// - version (2 bytes)
+// - root node index (8 bytes)
+// - root node hash (2 bytes + 32 bytes)
 func ReadTrieFromCheckpointV3AndEarlier(reader io.Reader, getNode func(nodeIndex uint64) (*node.Node, error)) (*trie.MTrie, error) {
 
-	// reading version
+	// Read version (2 bytes)
 	buf := make([]byte, 2)
-	read, err := io.ReadFull(reader, buf)
+	_, err := io.ReadFull(reader, buf)
 	if err != nil {
-		return nil, fmt.Errorf("error reading storable node, cannot read version part: %w", err)
-	}
-	if read != len(buf) {
-		return nil, fmt.Errorf("not enough bytes read %d expected %d", read, len(buf))
+		return nil, fmt.Errorf("failed to read version of serialized trie in v3: %w", err)
 	}
 
+	// Decode version
 	version, _, err := utils.ReadUint16(buf)
 	if err != nil {
-		return nil, fmt.Errorf("error reading storable node: %w", err)
+		return nil, fmt.Errorf("failed to decode version of serialized trie in v3: %w", err)
 	}
 
 	if version > encodingDecodingVersion {
-		return nil, fmt.Errorf("error reading storable node: unsuported version %d > %d", version, encodingDecodingVersion)
+		return nil, fmt.Errorf("found unsuported version %d (> %d) of serialized trie in v3", version, encodingDecodingVersion)
 	}
 
-	// read root uint64 RootIndex
+	// Read root index (8 bytes)
 	buf = make([]byte, 8)
-	read, err = io.ReadFull(reader, buf)
+	_, err = io.ReadFull(reader, buf)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read fixed-legth part: %w", err)
-	}
-	if read != len(buf) {
-		return nil, fmt.Errorf("not enough bytes read %d expected %d", read, len(buf))
+		return nil, fmt.Errorf("failed to read root index of serialized trie in v3: %w", err)
 	}
 
+	// Decode root index
 	rootIndex, _, err := utils.ReadUint64(buf)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read root index data: %w", err)
+		return nil, fmt.Errorf("failed to decode root index of serialized trie in v3: %w", err)
 	}
 
+	// Read root hash (2 bytes + 32 bytes)
 	readRootHash, err := utils.ReadShortDataFromReader(reader)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read roothash data: %w", err)
+		return nil, fmt.Errorf("failed to read root hash of serialized trie in v3: %w", err)
 	}
 
+	// Get node by index
 	rootNode, err := getNode(rootIndex)
 	if err != nil {
-		return nil, fmt.Errorf("cannot find root node: %w", err)
+		return nil, fmt.Errorf("failed to find root node of serialized trie in v3: %w", err)
 	}
 
 	mtrie, err := trie.NewMTrie(rootNode)
 	if err != nil {
-		return nil, fmt.Errorf("restoring trie failed: %w", err)
+		return nil, fmt.Errorf("failed to restore serialized trie in v3: %w", err)
 	}
 
 	rootHash := mtrie.RootHash()
 	if !bytes.Equal(readRootHash, rootHash[:]) {
-		return nil, fmt.Errorf("restoring trie failed: roothash doesn't match")
+		return nil, fmt.Errorf("failed to restore serialized trie in v3: roothash doesn't match")
 	}
 
 	return mtrie, nil

From 2b9d1aeca4a45cadfefaf32ee26a3b94f2b56ecf Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Fri, 18 Feb 2022 17:15:20 -0600
Subject: [PATCH 25/37] Add checkpoint v3 decoding tests

---
 .../mtrie/flattener/encoding_v3_test.go       | 126 ++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 ledger/complete/mtrie/flattener/encoding_v3_test.go

diff --git a/ledger/complete/mtrie/flattener/encoding_v3_test.go b/ledger/complete/mtrie/flattener/encoding_v3_test.go
new file mode 100644
index 00000000000..374de31cf9c
--- /dev/null
+++ b/ledger/complete/mtrie/flattener/encoding_v3_test.go
@@ -0,0 +1,126 @@
+package flattener_test
+
+import (
+	"bytes"
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/onflow/flow-go/ledger"
+	"github.com/onflow/flow-go/ledger/common/hash"
+	"github.com/onflow/flow-go/ledger/common/utils"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/node"
+)
+
+// This file contains node/trie decoding tests for checkpoint v3 and earlier versions.
+// These tests are based on TestStorableNode and TestStorableTrie.
+
+func TestNodeV3Decoding(t *testing.T) {
+
+	const leafNode1Index = 1
+	const leafNode2Index = 2
+
+	leafNode1 := node.NewNode(255, nil, nil, utils.PathByUint8(0), utils.LightPayload8('A', 'a'), hash.Hash([32]byte{1, 1, 1}), 0, 1)
+	leafNode2 := node.NewNode(255, nil, nil, utils.PathByUint8(1), utils.LightPayload8('B', 'b'), hash.Hash([32]byte{2, 2, 2}), 0, 1)
+
+	interimNode := node.NewNode(256, leafNode1, leafNode2, ledger.DummyPath, nil, hash.Hash([32]byte{3, 3, 3}), 1, 2)
+
+	encodedLeafNode1 := []byte{
+		0x00, 0x00, // encoding version
+		0x00, 0xff, // height
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // LIndex
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // RIndex
+		0x00, 0x00, // max depth
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // reg count
+		0x00, 0x20, // path data len
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // path data
+		0x00, 0x00, 0x00, 0x19, // payload data len
+		0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x09, 0x00,
+		0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x41,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+		0x61,       // payload data
+		0x00, 0x20, // hashValue length
+		0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash value
+	}
+
+	encodedInterimNode := []byte{
+		0x00, 0x00, // encoding version
+		0x01, 0x00, // height
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // LIndex
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // RIndex
+		0x00, 0x01, // max depth
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // reg count
+		0x00, 0x00, // path data len
+		0x00, 0x00, 0x00, 0x00, // payload data len
+		0x00, 0x20, // hashValue length
+		0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash value
+	}
+
+	t.Run("leaf node", func(t *testing.T) {
+		reader := bytes.NewReader(encodedLeafNode1)
+		newNode, err := flattener.ReadNodeFromCheckpointV3AndEarlier(reader, func(nodeIndex uint64) (*node.Node, error) {
+			return nil, fmt.Errorf("no call expected")
+		})
+		require.NoError(t, err)
+		assert.Equal(t, leafNode1, newNode)
+	})
+
+	t.Run("interim node", func(t *testing.T) {
+		reader := bytes.NewReader(encodedInterimNode)
+		newNode, err := flattener.ReadNodeFromCheckpointV3AndEarlier(reader, func(nodeIndex uint64) (*node.Node, error) {
+			switch nodeIndex {
+			case leafNode1Index:
+				return leafNode1, nil
+			case leafNode2Index:
+				return leafNode2, nil
+			default:
+				return nil, fmt.Errorf("unexpected child node index %d ", nodeIndex)
+			}
+		})
+		require.NoError(t, err)
+		assert.Equal(t, interimNode, newNode)
+	})
+}
+
+func TestTrieV3Decoding(t *testing.T) {
+
+	const rootNodeIndex = 21
+
+	hashValue := hash.Hash([32]byte{2, 2, 2})
+	rootNode := node.NewNode(256, nil, nil, ledger.DummyPath, nil, hashValue, 7, 5000)
+
+	expected := []byte{
+		0x00, 0x00, // encoding version
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 21, // RootIndex
+		0x00, 0x20, // hashValue length
+		0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash value
+	}
+
+	reader := bytes.NewReader(expected)
+
+	trie, err := flattener.ReadTrieFromCheckpointV3AndEarlier(reader, func(nodeIndex uint64) (*node.Node, error) {
+		switch nodeIndex {
+		case rootNodeIndex:
+			return rootNode, nil
+		default:
+			return nil, fmt.Errorf("unexpected root node index %d ", nodeIndex)
+		}
+	})
+	require.NoError(t, err)
+	assert.Equal(t, rootNode, trie.RootNode())
+}

From dcca74efe8e79b27f4e1ac3919e118fac481156d Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 22 Feb 2022 18:20:50 -0600
Subject: [PATCH 26/37] Refactor mtrie checkpoint encoding to reduce size

Remove encoded hash size (400+ million per checkpoint file).
Remove encoded path size (164+ million per checkpoint file).
Remove functions no longer required after removing hash and path sizes.
Replace magic numbers with constants for readability.
---
 ledger/complete/mtrie/flattener/encoding.go   | 271 ++++++++----------
 .../complete/mtrie/flattener/encoding_test.go |  72 +----
 2 files changed, 125 insertions(+), 218 deletions(-)

diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index 991d107f9b2..1b50fae4498 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -19,15 +19,26 @@ const (
 	interimNodeType
 )
 
+const (
+	encNodeTypeSize      = 1
+	encHeightSize        = 2
+	encMaxDepthSize      = 2
+	encRegCountSize      = 8
+	encHashSize          = hash.HashLen
+	encPathSize          = ledger.PathLen
+	encNodeIndexSize     = 8
+	encPayloadLengthSize = 4
+)
+
 // encodeLeafNode encodes leaf node in the following format:
 // - node type (1 byte)
 // - height (2 bytes)
 // - max depth (2 bytes)
 // - reg count (8 bytes)
-// - hash (2 bytes + 32 bytes)
-// - path (2 bytes + 32 bytes)
+// - hash (32 bytes)
+// - path (32 bytes)
 // - payload (4 bytes + n bytes)
-// Encoded leaf node size is 85 bytes (assuming length of hash/path is 32 bytes) +
+// Encoded leaf node size is 81 bytes (assuming length of hash/path is 32 bytes) +
 // length of encoded payload size.
 // Scratch buffer is used to avoid allocs. It should be used directly instead
 // of using append.  This function uses len(scratch) and ignores cap(scratch),
@@ -35,12 +46,18 @@ const (
 // WARNING: The returned buffer is likely to share the same underlying array as
 // the scratch buffer. Caller is responsible for copying or using returned buffer
 // before scratch buffer is used again.
-// TODO: reduce hash size from 2 bytes to 1 byte.
 func encodeLeafNode(n *node.Node, scratch []byte) []byte {
 
 	encPayloadSize := encoding.EncodedPayloadLengthWithoutPrefix(n.Payload())
 
-	encodedNodeSize := 1 + 2 + 2 + 8 + 2 + hash.HashLen + 2 + ledger.PathLen + 4 + encPayloadSize
+	encodedNodeSize := encNodeTypeSize +
+		encHeightSize +
+		encMaxDepthSize +
+		encRegCountSize +
+		encHashSize +
+		encPathSize +
+		encPayloadLengthSize +
+		encPayloadSize
 
 	// buf uses received scratch buffer if it's large enough.
 	// Otherwise, a new buffer is allocated.
@@ -55,37 +72,33 @@ func encodeLeafNode(n *node.Node, scratch []byte) []byte {
 
 	// Encode node type (1 byte)
 	buf[pos] = byte(leafNodeType)
-	pos++
+	pos += encNodeTypeSize
 
-	// Encode height (2-bytes Big Endian)
+	// Encode height (2 bytes Big Endian)
 	binary.BigEndian.PutUint16(buf[pos:], uint16(n.Height()))
-	pos += 2
+	pos += encHeightSize
 
-	// Encode max depth (2-bytes Big Endian)
+	// Encode max depth (2 bytes Big Endian)
 	binary.BigEndian.PutUint16(buf[pos:], n.MaxDepth())
-	pos += 2
+	pos += encMaxDepthSize
 
-	// Encode reg count (8-bytes Big Endian)
+	// Encode reg count (8 bytes Big Endian)
 	binary.BigEndian.PutUint64(buf[pos:], n.RegCount())
-	pos += 8
+	pos += encRegCountSize
 
-	// Encode hash (2-bytes Big Endian for hashValue length and n-bytes hashValue)
+	// Encode hash (32 bytes hashValue)
 	hash := n.Hash()
-	binary.BigEndian.PutUint16(buf[pos:], uint16(len(hash)))
-	pos += 2
-
-	pos += copy(buf[pos:], hash[:])
+	copy(buf[pos:], hash[:])
+	pos += encHashSize
 
-	// Encode path (2-bytes Big Endian for path length and n-bytes path)
+	// Encode path (32 bytes path)
 	path := n.Path()
-	binary.BigEndian.PutUint16(buf[pos:], uint16(len(path)))
-	pos += 2
+	copy(buf[pos:], path[:])
+	pos += encPathSize
 
-	pos += copy(buf[pos:], path[:])
-
-	// Encode payload (4-bytes Big Endian for encoded payload length and n-bytes encoded payload)
+	// Encode payload (4 bytes Big Endian for encoded payload length and n bytes encoded payload)
 	binary.BigEndian.PutUint32(buf[pos:], uint32(encPayloadSize))
-	pos += 4
+	pos += encPayloadLengthSize
 
 	// EncodeAndAppendPayloadWithoutPrefix appends encoded payload to the resliced buf.
 	// Returned buf is resliced to include appended payload.
@@ -99,20 +112,25 @@ func encodeLeafNode(n *node.Node, scratch []byte) []byte {
 // - height (2 bytes)
 // - max depth (2 bytes)
 // - reg count (8 bytes)
+// - hash (32 bytes)
 // - lchild index (8 bytes)
 // - rchild index (8 bytes)
-// - hash (2 bytes + 32 bytes)
-// Encoded interim node size is 63 bytes (assuming length of hash is 32 bytes).
+// Encoded interim node size is 61 bytes (assuming length of hash is 32 bytes).
 // Scratch buffer is used to avoid allocs. It should be used directly instead
 // of using append.  This function uses len(scratch) and ignores cap(scratch),
 // so any extra capacity will not be utilized.
 // WARNING: The returned buffer is likely to share the same underlying array as
 // the scratch buffer. Caller is responsible for copying or using returned buffer
 // before scratch buffer is used again.
-// TODO: reduce hash size from 2 bytes to 1 byte.
 func encodeInterimNode(n *node.Node, lchildIndex uint64, rchildIndex uint64, scratch []byte) []byte {
 
-	encodedNodeSize := 1 + 2 + 2 + 8 + 8 + 8 + 2 + hash.HashLen
+	const encodedNodeSize = encNodeTypeSize +
+		encHeightSize +
+		encMaxDepthSize +
+		encRegCountSize +
+		encHashSize +
+		encNodeIndexSize +
+		encNodeIndexSize
 
 	// buf uses received scratch buffer if it's large enough.
 	// Otherwise, a new buffer is allocated.
@@ -125,36 +143,34 @@ func encodeInterimNode(n *node.Node, lchildIndex uint64, rchildIndex uint64, scr
 
 	pos := 0
 
-	// Encode node type (1-byte)
+	// Encode node type (1 byte)
 	buf[pos] = byte(interimNodeType)
-	pos++
+	pos += encNodeTypeSize
 
-	// Encode height (2-bytes Big Endian)
+	// Encode height (2 bytes Big Endian)
 	binary.BigEndian.PutUint16(buf[pos:], uint16(n.Height()))
-	pos += 2
+	pos += encHeightSize
 
-	// Encode max depth (2-bytes Big Endian)
+	// Encode max depth (2 bytes Big Endian)
 	binary.BigEndian.PutUint16(buf[pos:], n.MaxDepth())
-	pos += 2
+	pos += encMaxDepthSize
 
-	// Encode reg count (8-bytes Big Endian)
+	// Encode reg count (8 bytes Big Endian)
 	binary.BigEndian.PutUint64(buf[pos:], n.RegCount())
-	pos += 8
+	pos += encRegCountSize
+
+	// Encode hash (32 bytes hashValue)
+	h := n.Hash()
+	copy(buf[pos:], h[:])
+	pos += encHashSize
 
-	// Encode left child index (8-bytes Big Endian)
+	// Encode left child index (8 bytes Big Endian)
 	binary.BigEndian.PutUint64(buf[pos:], lchildIndex)
-	pos += 8
+	pos += encNodeIndexSize
 
-	// Encode right child index (8-bytes Big Endian)
+	// Encode right child index (8 bytes Big Endian)
 	binary.BigEndian.PutUint64(buf[pos:], rchildIndex)
-	pos += 8
-
-	// Encode hash (2-bytes Big Endian hashValue length and n-bytes hashValue)
-	binary.BigEndian.PutUint16(buf[pos:], hash.HashLen)
-	pos += 2
-
-	h := n.Hash()
-	pos += copy(buf[pos:], h[:])
+	pos += encNodeIndexSize
 
 	return buf[:pos]
 }
@@ -187,20 +203,23 @@ func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 		scratch = make([]byte, minBufSize)
 	}
 
-	// fixed-length data: node type (1 byte) + height (2 bytes) + max depth (2 bytes) + reg count (8 bytes)
-	const fixLengthSize = 1 + 2 + 2 + 8
-
-	// Read fixed-length part
-	pos := 0
+	// fixLengthSize is the size of shared data of leaf node and interim node
+	const fixLengthSize = encNodeTypeSize +
+		encHeightSize +
+		encMaxDepthSize +
+		encRegCountSize +
+		encHashSize
 
 	_, err := io.ReadFull(reader, scratch[:fixLengthSize])
 	if err != nil {
 		return nil, fmt.Errorf("failed to read fixed-length part of serialized node: %w", err)
 	}
 
+	pos := 0
+
 	// Decode node type (1 byte)
 	nType := scratch[pos]
-	pos++
+	pos += encNodeTypeSize
 
 	if nType != byte(leafNodeType) && nType != byte(interimNodeType) {
 		return nil, fmt.Errorf("failed to decode node type %d", nType)
@@ -208,27 +227,36 @@ func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 
 	// Decode height (2 bytes)
 	height := binary.BigEndian.Uint16(scratch[pos:])
-	pos += 2
+	pos += encHeightSize
 
 	// Decode max depth (2 bytes)
 	maxDepth := binary.BigEndian.Uint16(scratch[pos:])
-	pos += 2
+	pos += encMaxDepthSize
 
 	// Decode reg count (8 bytes)
 	regCount := binary.BigEndian.Uint64(scratch[pos:])
+	pos += encRegCountSize
+
+	// Decode and create hash.Hash (32 bytes)
+	nodeHash, err := hash.ToHash(scratch[pos : pos+encHashSize])
+	pos += encHashSize
+	if err != nil {
+		return nil, fmt.Errorf("failed to decode hash of serialized node: %w", err)
+	}
 
 	if nType == byte(leafNodeType) {
 
-		// Read encoded hash data and create hash.Hash.
-		nodeHash, err := readHashFromReader(reader, scratch)
+		// Read path (32 bytes)
+		encPath := scratch[:encPathSize]
+		_, err := io.ReadFull(reader, encPath)
 		if err != nil {
-			return nil, fmt.Errorf("failed to read and decode hash of serialized node: %w", err)
+			return nil, fmt.Errorf("failed to read path of serialized node: %w", err)
 		}
 
-		// Read encoded path data and create ledger.Path.
-		path, err := readPathFromReader(reader, scratch)
+		// Decode and create ledger.Path.
+		path, err := ledger.ToPath(encPath)
 		if err != nil {
-			return nil, fmt.Errorf("failed to read and decode path of serialized node: %w", err)
+			return nil, fmt.Errorf("failed to decode path of serialized node: %w", err)
 		}
 
 		// Read encoded payload data and create ledger.Payload.
@@ -243,27 +271,21 @@ func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 
 	// Read interim node
 
-	pos = 0
-
-	// Read left and right child index (8 bytes each)
-	_, err = io.ReadFull(reader, scratch[:16])
+	// Read left and right child index (16 bytes)
+	_, err = io.ReadFull(reader, scratch[:encNodeIndexSize*2])
 	if err != nil {
 		return nil, fmt.Errorf("failed to read child index of serialized node: %w", err)
 	}
 
+	pos = 0
+
 	// Decode left child index (8 bytes)
 	lchildIndex := binary.BigEndian.Uint64(scratch[pos:])
-	pos += 8
+	pos += encNodeIndexSize
 
 	// Decode right child index (8 bytes)
 	rchildIndex := binary.BigEndian.Uint64(scratch[pos:])
 
-	// Read encoded hash data from reader and create hash.Hash
-	nodeHash, err := readHashFromReader(reader, scratch)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read and decode hash of serialized node: %w", err)
-	}
-
 	// Get left child node by node index
 	lchild, err := getNode(lchildIndex)
 	if err != nil {
@@ -282,13 +304,15 @@ func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 
 // EncodeTrie encodes trie in the following format:
 // - root node index (8 byte)
-// - root node hash (2 bytes + 32 bytes)
+// - root node hash (32 bytes)
 // Scratch buffer is used to avoid allocs.
 // WARNING: The returned buffer is likely to share the same underlying array as
 // the scratch buffer. Caller is responsible for copying or using returned buffer
 // before scratch buffer is used again.
 func EncodeTrie(rootNode *node.Node, rootIndex uint64, scratch []byte) []byte {
 
+	const encodedTrieSize = encNodeIndexSize + encHashSize
+
 	// Get root hash
 	var rootHash ledger.RootHash
 	if rootNode == nil {
@@ -297,23 +321,19 @@ func EncodeTrie(rootNode *node.Node, rootIndex uint64, scratch []byte) []byte {
 		rootHash = ledger.RootHash(rootNode.Hash())
 	}
 
-	const encodedTrieSize = 8 + 2 + len(rootHash)
-
 	if len(scratch) < encodedTrieSize {
 		scratch = make([]byte, encodedTrieSize)
 	}
 
 	pos := 0
 
-	// Encode root node index (8-bytes Big Endian)
+	// Encode root node index (8 bytes Big Endian)
 	binary.BigEndian.PutUint64(scratch, rootIndex)
-	pos += 8
-
-	// Encode hash (2-bytes Big Endian for hashValue length and n-bytes hashValue)
-	binary.BigEndian.PutUint16(scratch[pos:], uint16(len(rootHash)))
-	pos += 2
+	pos += encNodeIndexSize
 
-	pos += copy(scratch[pos:], rootHash[:])
+	// Encode hash (32-bytes hashValue)
+	copy(scratch[pos:], rootHash[:])
+	pos += encHashSize
 
 	return scratch[:pos]
 }
@@ -321,28 +341,30 @@ func EncodeTrie(rootNode *node.Node, rootIndex uint64, scratch []byte) []byte {
 // ReadTrie reconstructs a trie from data read from reader.
 func ReadTrie(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (*node.Node, error)) (*trie.MTrie, error) {
 
-	// minBufSize should be large enough for encoded trie (42 bytes).
-	// minBufSize is a failsafe and is only used when len(scratch) is much smaller
+	// encodedTrieSize is a failsafe and is only used when len(scratch) is much smaller
 	// than expected (4096 by default).
-	const minBufSize = 42
+	const encodedTrieSize = encNodeIndexSize + encHashSize
 
-	if len(scratch) < minBufSize {
-		scratch = make([]byte, minBufSize)
+	if len(scratch) < encodedTrieSize {
+		scratch = make([]byte, encodedTrieSize)
 	}
 
-	// Read root node index (8 bytes)
-	_, err := io.ReadFull(reader, scratch[:8])
+	// Read encoded trie (8 + 32 bytes)
+	_, err := io.ReadFull(reader, scratch[:encodedTrieSize])
 	if err != nil {
-		return nil, fmt.Errorf("failed to read root node index of serialized trie: %w", err)
+		return nil, fmt.Errorf("failed to read serialized trie: %w", err)
 	}
 
+	pos := 0
+
 	// Decode root node index
 	rootIndex := binary.BigEndian.Uint64(scratch)
+	pos += encNodeIndexSize
 
-	// Read and decode root node hash
-	readRootHash, err := readHashFromReader(reader, scratch)
+	// Decode root node hash
+	readRootHash, err := hash.ToHash(scratch[pos : pos+encHashSize])
 	if err != nil {
-		return nil, fmt.Errorf("failed to read and decode hash of serialized trie: %w", err)
+		return nil, fmt.Errorf("failed to decode hash of serialized trie: %w", err)
 	}
 
 	rootNode, err := getNode(rootIndex)
@@ -363,76 +385,21 @@ func ReadTrie(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 	return mtrie, nil
 }
 
-// readHashFromReader reads and decodes hash from reader.
-// Returned hash is a copy.
-func readHashFromReader(reader io.Reader, scratch []byte) (hash.Hash, error) {
-
-	const encHashBufSize = 2 + hash.HashLen
-
-	if len(scratch) < encHashBufSize {
-		scratch = make([]byte, encHashBufSize)
-	} else {
-		scratch = scratch[:encHashBufSize]
-	}
-
-	_, err := io.ReadFull(reader, scratch)
-	if err != nil {
-		return hash.DummyHash, fmt.Errorf("cannot read hash: %w", err)
-	}
-
-	sizeBuf, encHashBuf := scratch[:2], scratch[2:]
-
-	size := binary.BigEndian.Uint16(sizeBuf)
-	if size != hash.HashLen {
-		return hash.DummyHash, fmt.Errorf("encoded hash size is wrong: want %d bytes, got %d bytes", hash.HashLen, size)
-	}
-
-	// hash.ToHash copies data
-	return hash.ToHash(encHashBuf)
-}
-
-// readPathFromReader reads and decodes path from reader.
-// Returned path is a copy.
-func readPathFromReader(reader io.Reader, scratch []byte) (ledger.Path, error) {
-
-	const encPathBufSize = 2 + ledger.PathLen
-
-	if len(scratch) < encPathBufSize {
-		scratch = make([]byte, encPathBufSize)
-	} else {
-		scratch = scratch[:encPathBufSize]
-	}
-
-	_, err := io.ReadFull(reader, scratch)
-	if err != nil {
-		return ledger.DummyPath, fmt.Errorf("cannot read path: %w", err)
-	}
-
-	sizeBuf, encPathBuf := scratch[:2], scratch[2:]
-
-	size := binary.BigEndian.Uint16(sizeBuf)
-	if size != ledger.PathLen {
-		return ledger.DummyPath, fmt.Errorf("encoded path size is wrong: want %d bytes, got %d bytes", ledger.PathLen, size)
-	}
-
-	// ledger.ToPath copies data
-	return ledger.ToPath(encPathBuf)
-}
-
 // readPayloadFromReader reads and decodes payload from reader.
 // Returned payload is a copy.
 func readPayloadFromReader(reader io.Reader, scratch []byte) (*ledger.Payload, error) {
 
-	if len(scratch) < 4 {
-		scratch = make([]byte, 4)
+	if len(scratch) < encPayloadLengthSize {
+		scratch = make([]byte, encPayloadLengthSize)
 	}
 
 	// Read payload size
-	_, err := io.ReadFull(reader, scratch[:4])
+	_, err := io.ReadFull(reader, scratch[:encPayloadLengthSize])
 	if err != nil {
 		return nil, fmt.Errorf("cannot read payload length: %w", err)
 	}
 
+	// Decode payload size
 	size := binary.BigEndian.Uint32(scratch)
 
 	if len(scratch) < int(size) {
diff --git a/ledger/complete/mtrie/flattener/encoding_test.go b/ledger/complete/mtrie/flattener/encoding_test.go
index f634ab1975a..2e78d4de22c 100644
--- a/ledger/complete/mtrie/flattener/encoding_test.go
+++ b/ledger/complete/mtrie/flattener/encoding_test.go
@@ -43,12 +43,10 @@ func TestLeafNodeEncodingDecoding(t *testing.T) {
 		0x00, 0xff, // height
 		0x00, 0x00, // max depth
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // reg count
-		0x00, 0x20, // hash data len
 		0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash data
-		0x00, 0x20, // path data len
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -61,12 +59,10 @@ func TestLeafNodeEncodingDecoding(t *testing.T) {
 		0x00, 0xff, // height
 		0x00, 0x00, // max depth
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // reg count
-		0x00, 0x20, // hash data len
 		0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash data
-		0x00, 0x20, // path data len
 		0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -81,12 +77,10 @@ func TestLeafNodeEncodingDecoding(t *testing.T) {
 		0x00, 0xff, // height
 		0x00, 0x00, // max depth
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // reg count
-		0x00, 0x20, // hash data len
 		0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash data
-		0x00, 0x20, // path data len
 		0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -147,21 +141,7 @@ func TestLeafNodeEncodingDecoding(t *testing.T) {
 				})
 				require.NoError(t, err)
 				assert.Equal(t, tc.node, newNode)
-			}
-		})
-
-		t.Run("decode "+tc.name+" incomplete input error", func(t *testing.T) {
-			scratch := make([]byte, 1024)
-
-			for i := 0; i < len(tc.encodedNode)-1; i++ {
-				for j := i; j < len(tc.encodedNode)-1; j++ {
-					reader := bytes.NewReader(tc.encodedNode[i:j])
-					newNode, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
-						return nil, fmt.Errorf("no call expected")
-					})
-					require.Nil(t, newNode)
-					require.Error(t, err)
-				}
+				assert.Equal(t, 0, reader.Len())
 			}
 		})
 	}
@@ -205,6 +185,7 @@ func TestRandomLeafNodeEncodingDecoding(t *testing.T) {
 		})
 		require.NoError(t, err)
 		assert.Equal(t, n, newNode)
+		assert.Equal(t, 0, reader.Len())
 	}
 }
 
@@ -234,13 +215,12 @@ func TestInterimNodeEncodingDecoding(t *testing.T) {
 		0x01, 0x00, // height
 		0x00, 0x01, // max depth
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // reg count
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // LIndex
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // RIndex
-		0x00, 0x20, // hash data len
 		0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // hash data
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // LIndex
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // RIndex
 	}
 
 	t.Run("encode", func(t *testing.T) {
@@ -279,6 +259,7 @@ func TestInterimNodeEncodingDecoding(t *testing.T) {
 			})
 			require.NoError(t, err)
 			assert.Equal(t, interimNode, newNode)
+			assert.Equal(t, 0, reader.Len())
 		}
 	})
 
@@ -293,28 +274,6 @@ func TestInterimNodeEncodingDecoding(t *testing.T) {
 		require.Nil(t, newNode)
 		require.ErrorIs(t, err, nodeNotFoundError)
 	})
-
-	t.Run("decode incomplete input error", func(t *testing.T) {
-		scratch := make([]byte, 1024)
-
-		for i := 0; i < len(encodedInterimNode)-1; i++ {
-			for j := i; j < len(encodedInterimNode)-1; j++ {
-				reader := bytes.NewReader(encodedInterimNode[i:j])
-				newNode, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
-					switch nodeIndex {
-					case lchildIndex:
-						return leafNode1, nil
-					case rchildIndex:
-						return leafNode2, nil
-					default:
-						return nil, fmt.Errorf("unexpected child node index %d ", nodeIndex)
-					}
-				})
-				require.Nil(t, newNode)
-				require.Error(t, err)
-			}
-		}
-	})
 }
 
 func TestTrieEncodingDecoding(t *testing.T) {
@@ -329,7 +288,6 @@ func TestTrieEncodingDecoding(t *testing.T) {
 
 	encodedNilTrie := []byte{
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, // RootIndex
-		0x00, 0x20, // RootHash length
 		0x56, 0x8f, 0x4e, 0xc7, 0x40, 0xfe, 0x3b, 0x5d,
 		0xe8, 0x80, 0x34, 0xcb, 0x7b, 0x1f, 0xbd, 0xdb,
 		0x41, 0x54, 0x8b, 0x06, 0x8f, 0x31, 0xae, 0xbc,
@@ -338,7 +296,6 @@ func TestTrieEncodingDecoding(t *testing.T) {
 
 	encodedTrie := []byte{
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, // RootIndex
-		0x00, 0x20, // RootHash length
 		0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -399,6 +356,7 @@ func TestTrieEncodingDecoding(t *testing.T) {
 				})
 				require.NoError(t, err)
 				assert.Equal(t, tc.rootNode, trie.RootNode())
+				assert.Equal(t, 0, reader.Len())
 			}
 		})
 
@@ -413,23 +371,5 @@ func TestTrieEncodingDecoding(t *testing.T) {
 			require.Nil(t, newNode)
 			require.ErrorIs(t, err, nodeNotFoundError)
 		})
-
-		t.Run("decode "+tc.name+" incomplete input error", func(t *testing.T) {
-			scratch := make([]byte, 1024)
-
-			for i := 0; i < len(tc.encodedTrie)-1; i++ {
-				for j := i; j < len(tc.encodedTrie)-1; j++ {
-					reader := bytes.NewReader(tc.encodedTrie[i:j])
-					newNode, err := flattener.ReadTrie(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
-						if nodeIndex != tc.rootNodeIndex {
-							return nil, fmt.Errorf("unexpected root node index %d ", nodeIndex)
-						}
-						return tc.rootNode, nil
-					})
-					require.Nil(t, newNode)
-					require.Error(t, err)
-				}
-			}
-		})
 	}
 }

From 9d9d2bb480f04a2d1a146ce2eae03ba9a7967fac Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 22 Feb 2022 18:51:45 -0600
Subject: [PATCH 27/37] Fix lint error

---
 ledger/complete/mtrie/flattener/encoding.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ledger/complete/mtrie/flattener/encoding.go b/ledger/complete/mtrie/flattener/encoding.go
index 1b50fae4498..ccd55782259 100644
--- a/ledger/complete/mtrie/flattener/encoding.go
+++ b/ledger/complete/mtrie/flattener/encoding.go
@@ -239,7 +239,6 @@ func ReadNode(reader io.Reader, scratch []byte, getNode func(nodeIndex uint64) (
 
 	// Decode and create hash.Hash (32 bytes)
 	nodeHash, err := hash.ToHash(scratch[pos : pos+encHashSize])
-	pos += encHashSize
 	if err != nil {
 		return nil, fmt.Errorf("failed to decode hash of serialized node: %w", err)
 	}

From bfa2c67b31698a31c4769cceca5459d38d6b225a Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 23 Feb 2022 17:21:00 -0600
Subject: [PATCH 28/37] Refactor tests

---
 .../complete/mtrie/flattener/iterator_test.go | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/ledger/complete/mtrie/flattener/iterator_test.go b/ledger/complete/mtrie/flattener/iterator_test.go
index a80d40ca74b..64d3574034e 100644
--- a/ledger/complete/mtrie/flattener/iterator_test.go
+++ b/ledger/complete/mtrie/flattener/iterator_test.go
@@ -151,8 +151,8 @@ func TestUniqueNodeIterator(t *testing.T) {
 
 	t.Run("forest", func(t *testing.T) {
 
-		// Forest is a slice of mtries to guarantee order.
-		f := make([]*trie.MTrie, 0)
+		// tries is a slice of mtries to guarantee order.
+		var tries []*trie.MTrie
 
 		emptyTrie := trie.NewEmptyMTrie()
 
@@ -170,8 +170,7 @@ func TestUniqueNodeIterator(t *testing.T) {
 		trie1, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
 		require.NoError(t, err)
 
-		f = append(f, trie1)
-
+		// trie1
 		//              n4
 		//             /
 		//            /
@@ -181,6 +180,8 @@ func TestUniqueNodeIterator(t *testing.T) {
 		//   n1 (p1/v1)     n2 (p2/v2)
 		//
 
+		tries = append(tries, trie1)
+
 		// New trie reuses its parent's left sub-trie.
 
 		// key: 1000...
@@ -197,8 +198,7 @@ func TestUniqueNodeIterator(t *testing.T) {
 		trie2, err := trie.NewTrieWithUpdatedRegisters(trie1, paths, payloads, true)
 		require.NoError(t, err)
 
-		f = append(f, trie2)
-
+		// trie2
 		//              n8
 		//             /   \
 		//            /      \
@@ -208,6 +208,8 @@ func TestUniqueNodeIterator(t *testing.T) {
 		//              n5         n6
 		//            (p3/v3)    (p4/v4)
 
+		tries = append(tries, trie2)
+
 		// New trie reuses its parent's right sub-trie, and left sub-trie's leaf node.
 
 		// key: 0000...
@@ -219,8 +221,7 @@ func TestUniqueNodeIterator(t *testing.T) {
 		trie3, err := trie.NewTrieWithUpdatedRegisters(trie2, paths, payloads, true)
 		require.NoError(t, err)
 
-		f = append(f, trie3)
-
+		// trie3
 		//              n11
 		//             /   \
 		//            /      \
@@ -230,6 +231,8 @@ func TestUniqueNodeIterator(t *testing.T) {
 		//     n9         n2
 		//  (p1/v5)    (shared)
 
+		tries = append(tries, trie3)
+
 		expectedNodes := []*node.Node{
 			// unique nodes from trie1
 			trie1.RootNode().LeftChild().LeftChild(),  // n1
@@ -245,13 +248,12 @@ func TestUniqueNodeIterator(t *testing.T) {
 			trie3.RootNode().LeftChild().LeftChild(), // n9
 			trie3.RootNode().LeftChild(),             // n10
 			trie3.RootNode(),                         // n11
-
 		}
 
 		// Use visitedNodes to prevent revisiting shared sub-tries.
 		visitedNodes := make(map[*node.Node]uint64)
 		i := 0
-		for _, trie := range f {
+		for _, trie := range tries {
 			for itr := flattener.NewUniqueNodeIterator(trie, visitedNodes); itr.Next(); {
 				n := itr.Value()
 				visitedNodes[n] = uint64(i)

From 6618948bb1d290087d306cedd6e8d8fdbe0ab9ef Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 23 Feb 2022 17:30:07 -0600
Subject: [PATCH 29/37] Refactor checkpointer

---
 ledger/complete/wal/checkpointer.go | 183 +++++++++++++---------------
 1 file changed, 85 insertions(+), 98 deletions(-)

diff --git a/ledger/complete/wal/checkpointer.go b/ledger/complete/wal/checkpointer.go
index 681576ff27a..e0ef1282970 100644
--- a/ledger/complete/wal/checkpointer.go
+++ b/ledger/complete/wal/checkpointer.go
@@ -36,6 +36,15 @@ const VersionV3 uint16 = 0x03
 // Version 4 also reduces checkpoint data size.  See EncodeNode() and EncodeTrie() for more details.
 const VersionV4 uint16 = 0x04
 
+const (
+	encMagicSize     = 2
+	encVersionSize   = 2
+	headerSize       = encMagicSize + encVersionSize
+	encNodeCountSize = 8
+	encTrieCountSize = 2
+	crc32SumSize     = 4
+)
+
 // defaultBufioReadSize replaces the default bufio buffer size of 4096 bytes.
 // defaultBufioReadSize can be increased to 8KiB, 16KiB, 32KiB, etc. if it
 // improves performance on typical EN hardware.
@@ -176,6 +185,8 @@ func (c *Checkpointer) Checkpoint(to int, targetWriter func() (io.WriteCloser, e
 		return fmt.Errorf("cannot create Forest: %w", err)
 	}
 
+	c.wal.log.Info().Msgf("creating checkpoint %d", to)
+
 	err = c.wal.replay(0, to,
 		func(tries []*trie.MTrie) error {
 			return forest.AddTries(tries)
@@ -270,35 +281,35 @@ func CreateCheckpointWriterForFile(dir, filename string) (io.WriteCloser, error)
 // TODO: add concurrency if the performance gains are enough to offset complexity.
 func StoreCheckpoint(writer io.Writer, tries ...*trie.MTrie) error {
 
-	var err error
-
 	crc32Writer := NewCRC32Writer(writer)
 
+	// Scratch buffer is used as temporary buffer that node can encode into.
+	// Data in scratch buffer should be copied or used before scratch buffer is used again.
+	// If the scratch buffer isn't large enough, a new buffer will be allocated.
+	// However, 4096 bytes will be large enough to handle almost all payloads
+	// and 100% of interim nodes.
+	scratch := make([]byte, 1024*4)
+
 	// Write header: magic (2 bytes) + version (2 bytes)
-	header := make([]byte, 4)
-	pos := writeUint16(header, 0, MagicBytes)
-	_ = writeUint16(header, pos, VersionV4)
+	header := scratch[:headerSize]
+	binary.BigEndian.PutUint16(header, MagicBytes)
+	binary.BigEndian.PutUint16(header[encMagicSize:], VersionV4)
 
-	_, err = crc32Writer.Write(header)
+	_, err := crc32Writer.Write(header)
 	if err != nil {
 		return fmt.Errorf("cannot write checkpoint header: %w", err)
 	}
 
-	// assign unique index to every node
+	// allNodes contains all unique nodes of given tries and their index
+	// (ordered by node traversal sequence).
+	// Index 0 is a special case with nil node.
 	allNodes := make(map[*node.Node]uint64)
-	allNodes[nil] = 0 // 0th element is nil
+	allNodes[nil] = 0
 
 	allRootNodes := make([]*node.Node, len(tries))
 
-	// Scratch buffer is used as temporary buffer that node can encode into.
-	// Data in scratch buffer should be copied or used before scratch buffer is used again.
-	// If the scratch buffer isn't large enough, a new buffer will be allocated.
-	// However, 4096 bytes will be large enough to handle almost all payloads
-	// and 100% of interim nodes.
-	scratch := make([]byte, 1024*4)
-
 	// Serialize all unique nodes
-	nodeCounter := uint64(1) // start from 1, as 0 marks nil
+	nodeCounter := uint64(1) // start from 1, as 0 marks nil node
 	for i, t := range tries {
 
 		// Traverse all unique nodes for trie t.
@@ -327,10 +338,10 @@ func StoreCheckpoint(writer io.Writer, tries ...*trie.MTrie) error {
 				}
 			}
 
-			bytes := flattener.EncodeNode(n, lchildIndex, rchildIndex, scratch)
-			_, err = crc32Writer.Write(bytes)
+			encNode := flattener.EncodeNode(n, lchildIndex, rchildIndex, scratch)
+			_, err = crc32Writer.Write(encNode)
 			if err != nil {
-				return fmt.Errorf("error while writing node data: %w", err)
+				return fmt.Errorf("cannot serialize node: %w", err)
 			}
 		}
 
@@ -352,30 +363,30 @@ func StoreCheckpoint(writer io.Writer, tries ...*trie.MTrie) error {
 			return fmt.Errorf("internal error: missing node with hash %s", hex.EncodeToString(rootHash[:]))
 		}
 
-		bytes := flattener.EncodeTrie(rootNode, rootIndex, scratch)
-		_, err = crc32Writer.Write(bytes)
+		encTrie := flattener.EncodeTrie(rootNode, rootIndex, scratch)
+		_, err = crc32Writer.Write(encTrie)
 		if err != nil {
-			return fmt.Errorf("error while writing trie data: %w", err)
+			return fmt.Errorf("cannot serialize trie: %w", err)
 		}
 	}
 
 	// Write footer with nodes count and tries count
-	footer := make([]byte, 10)
-	pos = writeUint64(footer, 0, uint64(len(allNodes)-1)) // -1 to account for 0 node meaning nil
-	writeUint16(footer, pos, uint16(len(allRootNodes)))
+	footer := scratch[:encNodeCountSize+encTrieCountSize]
+	binary.BigEndian.PutUint64(footer, uint64(len(allNodes)-1)) // -1 to account for 0 node meaning nil
+	binary.BigEndian.PutUint16(footer[encNodeCountSize:], uint16(len(allRootNodes)))
 
 	_, err = crc32Writer.Write(footer)
 	if err != nil {
 		return fmt.Errorf("cannot write checkpoint footer: %w", err)
 	}
 
-	// add CRC32 sum
-	crc32buf := make([]byte, 4)
-	writeUint32(crc32buf, 0, crc32Writer.Crc32())
+	// Write CRC32 sum
+	crc32buf := scratch[:crc32SumSize]
+	binary.BigEndian.PutUint32(crc32buf, crc32Writer.Crc32())
 
 	_, err = writer.Write(crc32buf)
 	if err != nil {
-		return fmt.Errorf("cannot write crc32: %w", err)
+		return fmt.Errorf("cannot write CRC32: %w", err)
 	}
 
 	return nil
@@ -418,19 +429,17 @@ func LoadCheckpoint(filepath string) ([]*trie.MTrie, error) {
 }
 
 func readCheckpoint(f *os.File) ([]*trie.MTrie, error) {
+
 	// Read header: magic (2 bytes) + version (2 bytes)
-	header := make([]byte, 4)
+	header := make([]byte, headerSize)
 	_, err := io.ReadFull(f, header)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read header bytes: %w", err)
+		return nil, fmt.Errorf("cannot read header: %w", err)
 	}
 
-	magicBytes, pos := readUint16(header, 0)
-	version, _ := readUint16(header, pos)
-
-	if magicBytes != MagicBytes {
-		return nil, fmt.Errorf("unknown file format. Magic constant %x does not match expected %x", magicBytes, MagicBytes)
-	}
+	// Decode header
+	magicBytes := binary.BigEndian.Uint16(header)
+	version := binary.BigEndian.Uint16(header[encMagicSize:])
 
 	// Reset offset
 	_, err = f.Seek(0, io.SeekStart)
@@ -438,19 +447,23 @@ func readCheckpoint(f *os.File) ([]*trie.MTrie, error) {
 		return nil, fmt.Errorf("cannot seek to start of file: %w", err)
 	}
 
+	if magicBytes != MagicBytes {
+		return nil, fmt.Errorf("unknown file format. Magic constant %x does not match expected %x", magicBytes, MagicBytes)
+	}
+
 	switch version {
 	case VersionV1, VersionV3:
 		return readCheckpointV3AndEarlier(f, version)
 	case VersionV4:
 		return readCheckpointV4(f)
 	default:
-		return nil, fmt.Errorf("unsupported file version %x ", version)
+		return nil, fmt.Errorf("unsupported file version %x", version)
 	}
 }
 
 // readCheckpointV3AndEarlier deserializes checkpoint file (version 3 and earlier) and returns a list of tries.
-// Header (magic and version) are verified by the caller.
-// TODO: return []*trie.MTrie directly without conversion to FlattenedForest.
+// Header (magic and version) is verified by the caller.
+// This function is for backwards compatibility, not optimized.
 func readCheckpointV3AndEarlier(f *os.File, version uint16) ([]*trie.MTrie, error) {
 
 	var bufReader io.Reader = bufio.NewReaderSize(f, defaultBufioReadSize)
@@ -464,20 +477,19 @@ func readCheckpointV3AndEarlier(f *os.File, version uint16) ([]*trie.MTrie, erro
 		reader = crcReader
 	}
 
-	// Header has: magic (2 bytes) + version (2 bytes) + node count (8 bytes) + trie count (2 bytes)
-	header := make([]byte, 2+2+8+2)
+	// Read header (magic + version), node count, and trie count.
+	header := make([]byte, headerSize+encNodeCountSize+encTrieCountSize)
 
 	_, err := io.ReadFull(reader, header)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read header bytes: %w", err)
+		return nil, fmt.Errorf("cannot read header: %w", err)
 	}
 
 	// Magic and version are verified by the caller.
 
-	// Get node count and trie count
-	const nodesCountOffset = 2 + 2
-	nodesCount, pos := readUint64(header, nodesCountOffset)
-	triesCount, _ := readUint16(header, pos)
+	// Decode node count and trie count
+	nodesCount := binary.BigEndian.Uint64(header[headerSize:])
+	triesCount := binary.BigEndian.Uint16(header[headerSize+encNodeCountSize:])
 
 	nodes := make([]*node.Node, nodesCount+1) //+1 for 0 index meaning nil
 	tries := make([]*trie.MTrie, triesCount)
@@ -503,28 +515,29 @@ func readCheckpointV3AndEarlier(f *os.File, version uint16) ([]*trie.MTrie, erro
 			return nodes[nodeIndex], nil
 		})
 		if err != nil {
-			return nil, fmt.Errorf("cannot read storable trie %d: %w", i, err)
+			return nil, fmt.Errorf("cannot read trie %d: %w", i, err)
 		}
 		tries[i] = trie
 	}
 
 	if version == VersionV3 {
-		crc32buf := make([]byte, 4)
-		_, err := bufReader.Read(crc32buf)
+		crc32buf := make([]byte, crc32SumSize)
+
+		_, err := io.ReadFull(bufReader, crc32buf)
 		if err != nil {
-			return nil, fmt.Errorf("error while reading CRC32 checksum: %w", err)
+			return nil, fmt.Errorf("cannot read CRC32: %w", err)
 		}
-		readCrc32, _ := readUint32(crc32buf, 0)
+
+		readCrc32 := binary.BigEndian.Uint32(crc32buf)
 
 		calculatedCrc32 := crcReader.Crc32()
 
 		if calculatedCrc32 != readCrc32 {
-			return nil, fmt.Errorf("checkpoint checksum failed! File contains %x but read data checksums to %x", readCrc32, calculatedCrc32)
+			return nil, fmt.Errorf("checkpoint checksum failed! File contains %x but calculated crc32 is %x", readCrc32, calculatedCrc32)
 		}
 	}
 
 	return tries, nil
-
 }
 
 // readCheckpointV4 deserializes checkpoint file (version 4) and returns a list of tries.
@@ -541,9 +554,10 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 	// Read footer to get node count and trie count
 
 	// footer offset: nodes count (8 bytes) + tries count (2 bytes) + CRC32 sum (4 bytes)
-	const footerOffset = 8 + 2 + 4
-	const footerSize = 8 + 2 // footer doesn't include crc32 sum
+	const footerOffset = encNodeCountSize + encTrieCountSize + crc32SumSize
+	const footerSize = encNodeCountSize + encTrieCountSize // footer doesn't include crc32 sum
 
+	// Seek to footer
 	_, err := f.Seek(-footerOffset, io.SeekEnd)
 	if err != nil {
 		return nil, fmt.Errorf("cannot seek to footer: %w", err)
@@ -553,11 +567,12 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 
 	_, err = io.ReadFull(f, footer)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read footer bytes: %w", err)
+		return nil, fmt.Errorf("cannot read footer: %w", err)
 	}
 
+	// Decode node count and trie count
 	nodesCount := binary.BigEndian.Uint64(footer)
-	triesCount := binary.BigEndian.Uint16(footer[8:])
+	triesCount := binary.BigEndian.Uint16(footer[encNodeCountSize:])
 
 	// Seek to the start of file
 	_, err = f.Seek(0, io.SeekStart)
@@ -572,9 +587,9 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 	// Read header: magic (2 bytes) + version (2 bytes)
 	// No action is needed for header because it is verified by the caller.
 
-	_, err = io.ReadFull(reader, scratch[:4])
+	_, err = io.ReadFull(reader, scratch[:headerSize])
 	if err != nil {
-		return nil, fmt.Errorf("cannot read header bytes: %w", err)
+		return nil, fmt.Errorf("cannot read header: %w", err)
 	}
 
 	// nodes's element at index 0 is a special, meaning nil .
@@ -584,7 +599,7 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 	for i := uint64(1); i <= nodesCount; i++ {
 		n, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) {
 			if nodeIndex >= uint64(i) {
-				return nil, fmt.Errorf("sequence of stored nodes does not satisfy Descendents-First-Relationship")
+				return nil, fmt.Errorf("sequence of serialized nodes does not satisfy Descendents-First-Relationship")
 			}
 			return nodes[nodeIndex], nil
 		})
@@ -602,7 +617,7 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 			return nodes[nodeIndex], nil
 		})
 		if err != nil {
-			return nil, fmt.Errorf("cannot read storable trie %d: %w", i, err)
+			return nil, fmt.Errorf("cannot read trie %d: %w", i, err)
 		}
 		tries[i] = trie
 	}
@@ -611,51 +626,23 @@ func readCheckpointV4(f *os.File) ([]*trie.MTrie, error) {
 	// No action is needed.
 	_, err = io.ReadFull(reader, footer)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read footer bytes: %w", err)
+		return nil, fmt.Errorf("cannot read footer: %w", err)
 	}
 
-	crc32buf := scratch[:4]
-	_, err = bufReader.Read(crc32buf)
+	// Read CRC32
+	crc32buf := scratch[:crc32SumSize]
+	_, err = io.ReadFull(bufReader, crc32buf)
 	if err != nil {
-		return nil, fmt.Errorf("error while reading CRC32 checksum: %w", err)
+		return nil, fmt.Errorf("cannot read CRC32: %w", err)
 	}
-	readCrc32, _ := readUint32(crc32buf, 0)
+
+	readCrc32 := binary.BigEndian.Uint32(crc32buf)
 
 	calculatedCrc32 := crcReader.Crc32()
 
 	if calculatedCrc32 != readCrc32 {
-		return nil, fmt.Errorf("checkpoint checksum failed! File contains %x but read data checksums to %x", readCrc32, calculatedCrc32)
+		return nil, fmt.Errorf("checkpoint checksum failed! File contains %x but calculated crc32 is %x", readCrc32, calculatedCrc32)
 	}
 
 	return tries, nil
 }
-
-func writeUint16(buffer []byte, location int, value uint16) int {
-	binary.BigEndian.PutUint16(buffer[location:], value)
-	return location + 2
-}
-
-func readUint16(buffer []byte, location int) (uint16, int) {
-	value := binary.BigEndian.Uint16(buffer[location:])
-	return value, location + 2
-}
-
-func writeUint32(buffer []byte, location int, value uint32) int {
-	binary.BigEndian.PutUint32(buffer[location:], value)
-	return location + 4
-}
-
-func readUint32(buffer []byte, location int) (uint32, int) {
-	value := binary.BigEndian.Uint32(buffer[location:])
-	return value, location + 4
-}
-
-func readUint64(buffer []byte, location int) (uint64, int) {
-	value := binary.BigEndian.Uint64(buffer[location:])
-	return value, location + 8
-}
-
-func writeUint64(buffer []byte, location int, value uint64) int {
-	binary.BigEndian.PutUint64(buffer[location:], value)
-	return location + 8
-}

From 0f679fdab8afdd1ddcd05bbaa109bd615f9c355b Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 23 Feb 2022 17:54:02 -0600
Subject: [PATCH 30/37] Refactor checkpointer test

---
 ledger/complete/wal/checkpointer_test.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ledger/complete/wal/checkpointer_test.go b/ledger/complete/wal/checkpointer_test.go
index f8f43e930ce..e7da5aae6c7 100644
--- a/ledger/complete/wal/checkpointer_test.go
+++ b/ledger/complete/wal/checkpointer_test.go
@@ -534,14 +534,13 @@ func Test_StoringLoadingCheckpoints(t *testing.T) {
 		file.Close()
 
 		t.Run("works without data modification", func(t *testing.T) {
-
-			// first buffer reads ok
-			_, err = realWAL.LoadCheckpoint(filepath)
+			tries, err := realWAL.LoadCheckpoint(filepath)
 			require.NoError(t, err)
+			require.Equal(t, 1, len(tries))
+			require.Equal(t, updatedTrie, tries[0])
 		})
 
 		t.Run("detects modified data", func(t *testing.T) {
-
 			b, err := ioutil.ReadFile(filepath)
 			require.NoError(t, err)
 
@@ -552,8 +551,9 @@ func Test_StoringLoadingCheckpoints(t *testing.T) {
 			err = os.WriteFile(filepath, b, 0644)
 			require.NoError(t, err)
 
-			_, err = realWAL.LoadCheckpoint(filepath)
+			tries, err := realWAL.LoadCheckpoint(filepath)
 			require.Error(t, err)
+			require.Nil(t, tries)
 			require.Contains(t, err.Error(), "checksum")
 		})
 	})

From 65514fac97471dfb1a7f84dac139ed5b1237bd81 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Wed, 23 Feb 2022 18:14:22 -0600
Subject: [PATCH 31/37] Refactor checkpoint v3 and earlier tests

---
 .../complete/wal/checkpointer_versioning_test.go  | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/ledger/complete/wal/checkpointer_versioning_test.go b/ledger/complete/wal/checkpointer_versioning_test.go
index 81e412ba1c0..1e093cc605f 100644
--- a/ledger/complete/wal/checkpointer_versioning_test.go
+++ b/ledger/complete/wal/checkpointer_versioning_test.go
@@ -82,8 +82,7 @@ func CreateCheckpointV3() {
 	trie1, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
 	require.NoError(t, err)
 
-	f.AddTrie(trie1)
-
+	// trie1
 	//              n4
 	//             /
 	//            /
@@ -93,6 +92,8 @@ func CreateCheckpointV3() {
 	//   n1 (p1/v1)     n2 (p2/v2)
 	//
 
+	f.AddTrie(trie1)
+
 	// New trie reuses its parent's left sub-trie.
 
 	// key: 1000...
@@ -109,8 +110,7 @@ func CreateCheckpointV3() {
 	trie2, err := trie.NewTrieWithUpdatedRegisters(trie1, paths, payloads, true)
 	require.NoError(t, err)
 
-	f.AddTrie(trie2)
-
+	// trie2
 	//              n8
 	//             /   \
 	//            /      \
@@ -120,6 +120,8 @@ func CreateCheckpointV3() {
 	//              n5         n6
 	//            (p3/v3)    (p4/v4)
 
+	f.AddTrie(trie2)
+
 	// New trie reuses its parent's right sub-trie, and left sub-trie's leaf node.
 
 	// key: 0000...
@@ -131,8 +133,7 @@ func CreateCheckpointV3() {
 	trie3, err := trie.NewTrieWithUpdatedRegisters(trie2, paths, payloads, true)
 	require.NoError(t, err)
 
-	f.AddTrie(trie3)
-
+	// trie3
 	//              n11
 	//             /   \
 	//            /      \
@@ -142,6 +143,8 @@ func CreateCheckpointV3() {
 	//     n9         n2
 	//  (p1/v5)    (shared)
 
+	f.AddTrie(trie3)
+
 	flattenedForest, err := flattener.FlattenForest(f)
 	require.NoError(t, err)
 

From 0ee96753c82d52d49152860d19ff44484a98ddec Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Fri, 25 Feb 2022 10:56:56 -0600
Subject: [PATCH 32/37] Remove TODO comment to log mtrie eviction cb error

NewForest() doesn't have a logger.  Error is returned by onTreeEvicted()
callback function passed to NewForest() by NewLedger().

NewLedger() has a Logger and defines onTreeEvicted(), so we can
probably log the error from inside onTreeEvicted() instead. However,
that change is outside the scope for PR #1944.
---
 ledger/complete/mtrie/forest.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ledger/complete/mtrie/forest.go b/ledger/complete/mtrie/forest.go
index c783493cac6..96b9d542aa4 100644
--- a/ledger/complete/mtrie/forest.go
+++ b/ledger/complete/mtrie/forest.go
@@ -50,7 +50,6 @@ func NewForest(forestCapacity int, metrics module.LedgerMetrics, onTreeEvicted f
 			if !ok {
 				panic(fmt.Sprintf("cache contains item of type %T", value))
 			}
-			// TODO Log error
 			_ = onTreeEvicted(trie)
 		})
 	} else {

From 0f1ba3526fc7b33ad4da972e7f04b5ac3bcbd567 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 1 Mar 2022 21:01:58 -0600
Subject: [PATCH 33/37] Add checkpoint benchmarks

Add benchmarks for checkpoint creation and
checkpoint loading.

Checkpoint creation time can vary. For example:
926 seconds (first run after OS boot)
863 seconds (2nd run of benchmark without OS reboot in between)

Also fix off-by-one error in the checkpoint filename
created by benchmarks:
was checkpoint.00003485 (number should've been 3484)
now checkpoint.00003484 (same file size and hash, only name changed)

For BenchmmarkNewCheckpoint and BenchmarkLoadCheckpointAndWALs:
If the current folder doesn't contain the checkpoint and WAL
files, then the -dir option should be used.  These two benchmarks
should be used to benchmark real data. For example, use
checkpoint.00003443 and 41 WAL files (00003444 - 00003484)
to create checkpoint.00003484.

BenchmarkNewCheckpointRandom* generates random WAL segments and doesn't
require -dir option or any files. They can be used for regression tests.
---
 ledger/complete/checkpoint_benchmark_test.go | 383 +++++++++++++++++++
 1 file changed, 383 insertions(+)
 create mode 100644 ledger/complete/checkpoint_benchmark_test.go

diff --git a/ledger/complete/checkpoint_benchmark_test.go b/ledger/complete/checkpoint_benchmark_test.go
new file mode 100644
index 00000000000..410e85b654b
--- /dev/null
+++ b/ledger/complete/checkpoint_benchmark_test.go
@@ -0,0 +1,383 @@
+package complete_test
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"math/rand"
+	"os"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/onflow/flow-go/ledger"
+	"github.com/onflow/flow-go/ledger/common/hash"
+	"github.com/onflow/flow-go/ledger/common/pathfinder"
+	"github.com/onflow/flow-go/ledger/common/utils"
+	"github.com/onflow/flow-go/ledger/complete"
+	"github.com/onflow/flow-go/ledger/complete/mtrie"
+	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
+	"github.com/onflow/flow-go/ledger/complete/wal"
+	"github.com/onflow/flow-go/module/metrics"
+	"github.com/rs/zerolog"
+	"github.com/stretchr/testify/require"
+)
+
+var dir = flag.String("dir", ".", "dir containing checkpoint and wal files")
+
+// BenchmarkNewCheckpoint benchmarks checkpoint file creation from existing checkpoint and wal segments.
+// This requires a checkpoint file and one or more segments following the checkpoint file.
+// This benchmark will create a checkpoint file.
+func BenchmarkNewCheckpoint(b *testing.B) {
+	// Check if there is any segment in specified dir
+	foundSeg, err := hasSegmentInDir(*dir)
+	if err != nil {
+		b.Fatal(err)
+	}
+	if !foundSeg {
+		b.Fatalf("failed to find segment in %s.  Use -dir to specify dir containing segments and checkpoint files.", *dir)
+	}
+
+	// Check if there is any checkpoint file in specified dir
+	foundCheckpoint, err := hasCheckpointInDir(*dir)
+	if err != nil {
+		b.Fatal(err)
+	}
+	if !foundCheckpoint {
+		b.Fatalf("failed to find checkpoint in %s.  Use -dir to specify dir containing segments and checkpoint files.", *dir)
+	}
+
+	diskwal, err := wal.NewDiskWAL(
+		zerolog.Nop(),
+		nil,
+		metrics.NewNoopCollector(),
+		*dir,
+		500,
+		pathfinder.PathByteSize,
+		wal.SegmentSize,
+	)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	_, to, err := diskwal.Segments()
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	checkpointer, err := diskwal.NewCheckpointer()
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	start := time.Now()
+	b.ResetTimer()
+
+	err = checkpointer.Checkpoint(to-1, func() (io.WriteCloser, error) {
+		return checkpointer.CheckpointWriter(to - 1)
+	})
+
+	b.StopTimer()
+	elapsed := time.Since(start)
+
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ReportMetric(float64(elapsed/time.Millisecond), "newcheckpoint_time_(ms)")
+	b.ReportAllocs()
+}
+
+// BenchmarkLoadCheckpointAndWALs benchmarks checkpoint file loading and wal segments replaying.
+// This requires a checkpoint file and one or more segments following the checkpoint file.
+// This mimics rebuliding mtrie at EN startup.
+func BenchmarkLoadCheckpointAndWALs(b *testing.B) {
+	// Check if there is any segment in specified dir
+	foundSeg, err := hasSegmentInDir(*dir)
+	if err != nil {
+		b.Fatal(err)
+	}
+	if !foundSeg {
+		b.Fatalf("failed to find segment in %s.  Use -dir to specify dir containing segments and checkpoint files.", *dir)
+	}
+
+	// Check if there is any checkpoint file in specified dir
+	foundCheckpoint, err := hasCheckpointInDir(*dir)
+	if err != nil {
+		b.Fatal(err)
+	}
+	if !foundCheckpoint {
+		b.Fatalf("failed to find checkpoint in %s.  Use -dir to specify dir containing segments and checkpoint files.", *dir)
+	}
+
+	forest, err := mtrie.NewForest(500, metrics.NewNoopCollector(), func(evictedTrie *trie.MTrie) error {
+		return nil
+	})
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	diskwal, err := wal.NewDiskWAL(
+		zerolog.Nop(),
+		nil,
+		metrics.NewNoopCollector(),
+		*dir,
+		500,
+		pathfinder.PathByteSize,
+		wal.SegmentSize,
+	)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	// pause records to prevent double logging trie removals
+	diskwal.PauseRecord()
+	defer diskwal.UnpauseRecord()
+
+	start := time.Now()
+	b.ResetTimer()
+
+	err = diskwal.Replay(
+		func(tries []*trie.MTrie) error {
+			err := forest.AddTries(tries)
+			if err != nil {
+				return fmt.Errorf("adding rebuilt tries to forest failed: %w", err)
+			}
+			return nil
+		},
+		func(update *ledger.TrieUpdate) error {
+			_, err := forest.Update(update)
+			return err
+		},
+		func(rootHash ledger.RootHash) error {
+			forest.RemoveTrie(rootHash)
+			return nil
+		},
+	)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.StopTimer()
+	elapsed := time.Since(start)
+
+	b.ReportMetric(float64(elapsed/time.Millisecond), "loadcheckpointandwals_time_(ms)")
+	b.ReportAllocs()
+}
+
+func hasSegmentInDir(dir string) (bool, error) {
+	files, err := os.ReadDir(dir)
+	if err != nil {
+		return false, err
+	}
+
+	for _, fn := range files {
+		fname := fn.Name()
+		_, err := strconv.Atoi(fname)
+		if err != nil {
+			continue
+		}
+		return true, nil
+	}
+	return false, nil
+}
+
+func hasCheckpointInDir(dir string) (bool, error) {
+	const checkpointFilenamePrefix = "checkpoint."
+
+	files, err := os.ReadDir(dir)
+	if err != nil {
+		return false, err
+	}
+
+	for _, fn := range files {
+		fname := fn.Name()
+		if !strings.HasPrefix(fname, checkpointFilenamePrefix) {
+			continue
+		}
+		justNumber := fname[len(checkpointFilenamePrefix):]
+		_, err := strconv.Atoi(justNumber)
+		if err != nil {
+			continue
+		}
+		return true, nil
+	}
+
+	return false, nil
+}
+
+func BenchmarkNewCheckpointRandom5Seg(b *testing.B) { benchmarkNewCheckpointRandomData(b, 5) }
+
+func BenchmarkNewCheckpointRandom10Seg(b *testing.B) { benchmarkNewCheckpointRandomData(b, 10) }
+
+func BenchmarkNewCheckpointRandom20Seg(b *testing.B) { benchmarkNewCheckpointRandomData(b, 20) }
+
+func BenchmarkNewCheckpointRandom30Seg(b *testing.B) { benchmarkNewCheckpointRandomData(b, 30) }
+
+func BenchmarkNewCheckpointRandom40Seg(b *testing.B) { benchmarkNewCheckpointRandomData(b, 40) }
+
+// benchmarkCheckpointCreate benchmarks checkpoint file creation.
+// This benchmark creates segmentCount+1 WAL segments.  It also creates two checkpoint files:
+// - checkpoint file A from segment 0, and
+// - checkpoint file B from checkpoint file A and all segments after segment 0.
+// This benchmark measures the creation of checkpoint file B:
+// - loading checkpoint file A
+// - replaying all segments after segment 0
+// - creating checkpoint file B
+// Because payload data is random, number of segments created can differ from segmentCount.
+func benchmarkNewCheckpointRandomData(b *testing.B, segmentCount int) {
+
+	const (
+		updatePerSegment = 75  // 75 updates for 1 segment by approximation.
+		kvBatchCount     = 500 // Each update has 500 new payloads.
+	)
+
+	if segmentCount < 1 {
+		segmentCount = 1
+	}
+
+	kvOpts := randKeyValueOptions{
+		keyNumberOfParts:   3,
+		keyPartMinByteSize: 1,
+		keyPartMaxByteSize: 50,
+		valueMinByteSize:   50,
+		valueMaxByteSize:   1024 * 1.5,
+	}
+	updateCount := (segmentCount + 1) * updatePerSegment
+
+	seed := uint64(0x9E3779B97F4A7C15) // golden ratio
+	rand.Seed(int64(seed))
+
+	dir, err := os.MkdirTemp("", "test-mtrie-")
+	fmt.Printf("dir %s\n", dir)
+	defer os.RemoveAll(dir)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	wal1, err := wal.NewDiskWAL(
+		zerolog.Nop(),
+		nil,
+		metrics.NewNoopCollector(),
+		dir,
+		500,
+		pathfinder.PathByteSize,
+		wal.SegmentSize)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	led, err := complete.NewLedger(
+		wal1,
+		500,
+		&metrics.NoopCollector{},
+		zerolog.Logger{},
+		complete.DefaultPathFinderVersion,
+	)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	state := led.InitialState()
+
+	_, err = updateLedgerWithRandomData(led, state, updateCount, kvBatchCount, kvOpts)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	<-wal1.Done()
+	<-led.Done()
+
+	wal2, err := wal.NewDiskWAL(
+		zerolog.Nop(),
+		nil,
+		metrics.NewNoopCollector(),
+		dir,
+		500,
+		pathfinder.PathByteSize,
+		wal.SegmentSize,
+	)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	checkpointer, err := wal2.NewCheckpointer()
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	// Create checkpoint with only one segment as the base checkpoint for the next step.
+	err = checkpointer.Checkpoint(0, func() (io.WriteCloser, error) {
+		return checkpointer.CheckpointWriter(0)
+	})
+	require.NoError(b, err)
+
+	// Create checkpoint with remaining segments
+	_, to, err := wal2.Segments()
+	require.NoError(b, err)
+
+	if to == 1 {
+		fmt.Printf("skip creating second checkpoint file because to segment is 1\n")
+		return
+	}
+
+	start := time.Now()
+	b.ResetTimer()
+
+	err = checkpointer.Checkpoint(to-1, func() (io.WriteCloser, error) {
+		return checkpointer.CheckpointWriter(to)
+	})
+
+	b.StopTimer()
+	elapsed := time.Since(start)
+
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ReportMetric(float64(elapsed/time.Millisecond), "newcheckpoint_rand_time_(ms)")
+	b.ReportAllocs()
+
+	files, _ := os.ReadDir(dir)
+	for _, fn := range files {
+		info, _ := fn.Info()
+		fmt.Printf("%s, %d\n", fn.Name(), info.Size())
+	}
+}
+
+type randKeyValueOptions struct {
+	keyNumberOfParts   int
+	keyPartMinByteSize int
+	keyPartMaxByteSize int
+	valueMinByteSize   int
+	valueMaxByteSize   int
+}
+
+func updateLedgerWithRandomData(
+	led ledger.Ledger,
+	state ledger.State,
+	updateCount int,
+	kvBatchCount int,
+	kvOpts randKeyValueOptions,
+) (ledger.State, error) {
+
+	for i := 0; i < updateCount; i++ {
+		keys := utils.RandomUniqueKeys(kvBatchCount, kvOpts.keyNumberOfParts, kvOpts.keyPartMinByteSize, kvOpts.keyPartMaxByteSize)
+		values := utils.RandomValues(kvBatchCount, kvOpts.valueMinByteSize, kvOpts.valueMaxByteSize)
+
+		update, err := ledger.NewUpdate(state, keys, values)
+		if err != nil {
+			return ledger.State(hash.DummyHash), err
+		}
+
+		newState, _, err := led.Set(update)
+		if err != nil {
+			return ledger.State(hash.DummyHash), err
+		}
+
+		state = newState
+	}
+
+	return state, nil
+}

From c91910085502879155a47337a92d67df39bc5687 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 1 Mar 2022 22:10:14 -0600
Subject: [PATCH 34/37] Fix linter error

---
 ledger/complete/checkpoint_benchmark_test.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ledger/complete/checkpoint_benchmark_test.go b/ledger/complete/checkpoint_benchmark_test.go
index 410e85b654b..8a40f70b6c6 100644
--- a/ledger/complete/checkpoint_benchmark_test.go
+++ b/ledger/complete/checkpoint_benchmark_test.go
@@ -11,6 +11,9 @@ import (
 	"testing"
 	"time"
 
+	"github.com/rs/zerolog"
+	"github.com/stretchr/testify/require"
+
 	"github.com/onflow/flow-go/ledger"
 	"github.com/onflow/flow-go/ledger/common/hash"
 	"github.com/onflow/flow-go/ledger/common/pathfinder"
@@ -20,8 +23,6 @@ import (
 	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
 	"github.com/onflow/flow-go/ledger/complete/wal"
 	"github.com/onflow/flow-go/module/metrics"
-	"github.com/rs/zerolog"
-	"github.com/stretchr/testify/require"
 )
 
 var dir = flag.String("dir", ".", "dir containing checkpoint and wal files")

From 1ebde088e9e978de4fc3a522168f349d9eb6eb15 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Tue, 1 Mar 2022 22:55:54 -0600
Subject: [PATCH 35/37] Remove extra logs

---
 ledger/complete/checkpoint_benchmark_test.go | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/ledger/complete/checkpoint_benchmark_test.go b/ledger/complete/checkpoint_benchmark_test.go
index 8a40f70b6c6..5a991d3a20d 100644
--- a/ledger/complete/checkpoint_benchmark_test.go
+++ b/ledger/complete/checkpoint_benchmark_test.go
@@ -251,7 +251,6 @@ func benchmarkNewCheckpointRandomData(b *testing.B, segmentCount int) {
 	rand.Seed(int64(seed))
 
 	dir, err := os.MkdirTemp("", "test-mtrie-")
-	fmt.Printf("dir %s\n", dir)
 	defer os.RemoveAll(dir)
 	if err != nil {
 		b.Fatal(err)
@@ -339,12 +338,6 @@ func benchmarkNewCheckpointRandomData(b *testing.B, segmentCount int) {
 
 	b.ReportMetric(float64(elapsed/time.Millisecond), "newcheckpoint_rand_time_(ms)")
 	b.ReportAllocs()
-
-	files, _ := os.ReadDir(dir)
-	for _, fn := range files {
-		info, _ := fn.Info()
-		fmt.Printf("%s, %d\n", fn.Name(), info.Size())
-	}
 }
 
 type randKeyValueOptions struct {

From c99bc8c9128a56bc0569c38f91964432c7cbe4d3 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Mon, 7 Mar 2022 13:56:54 -0600
Subject: [PATCH 36/37] Remove returned error from onTreeEvicted callback

- Remove return value (error) from onTreeEvicted()
- Log error in onTreeEvicted() in NewLedger()
- Replace all empty onTreeEvicted callbacks with nil
---
 ledger/complete/checkpoint_benchmark_test.go |  4 +---
 ledger/complete/ledger.go                    | 11 +++++++----
 ledger/complete/mtrie/forest.go              |  6 +++---
 ledger/complete/wal/checkpointer.go          |  4 +---
 ledger/complete/wal/checkpointer_test.go     | 10 +++++-----
 ledger/complete/wal/compactor_test.go        |  6 +++---
 6 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/ledger/complete/checkpoint_benchmark_test.go b/ledger/complete/checkpoint_benchmark_test.go
index 5a991d3a20d..799651035a8 100644
--- a/ledger/complete/checkpoint_benchmark_test.go
+++ b/ledger/complete/checkpoint_benchmark_test.go
@@ -112,9 +112,7 @@ func BenchmarkLoadCheckpointAndWALs(b *testing.B) {
 		b.Fatalf("failed to find checkpoint in %s.  Use -dir to specify dir containing segments and checkpoint files.", *dir)
 	}
 
-	forest, err := mtrie.NewForest(500, metrics.NewNoopCollector(), func(evictedTrie *trie.MTrie) error {
-		return nil
-	})
+	forest, err := mtrie.NewForest(500, metrics.NewNoopCollector(), nil)
 	if err != nil {
 		b.Fatal(err)
 	}
diff --git a/ledger/complete/ledger.go b/ledger/complete/ledger.go
index dc39a29e660..6c05eb17dac 100644
--- a/ledger/complete/ledger.go
+++ b/ledger/complete/ledger.go
@@ -47,15 +47,18 @@ func NewLedger(
 	log zerolog.Logger,
 	pathFinderVer uint8) (*Ledger, error) {
 
-	forest, err := mtrie.NewForest(capacity, metrics, func(evictedTrie *trie.MTrie) error {
-		return wal.RecordDelete(evictedTrie.RootHash())
+	logger := log.With().Str("ledger", "complete").Logger()
+
+	forest, err := mtrie.NewForest(capacity, metrics, func(evictedTrie *trie.MTrie) {
+		err := wal.RecordDelete(evictedTrie.RootHash())
+		if err != nil {
+			logger.Error().Err(err).Msg("failed to save delete record in wal")
+		}
 	})
 	if err != nil {
 		return nil, fmt.Errorf("cannot create forest: %w", err)
 	}
 
-	logger := log.With().Str("ledger", "complete").Logger()
-
 	storage := &Ledger{
 		forest:            forest,
 		wal:               wal,
diff --git a/ledger/complete/mtrie/forest.go b/ledger/complete/mtrie/forest.go
index 96b9d542aa4..46e4783aab4 100644
--- a/ledger/complete/mtrie/forest.go
+++ b/ledger/complete/mtrie/forest.go
@@ -29,7 +29,7 @@ type Forest struct {
 	// needed trie in the forest might cause a fatal application logic error.
 	tries          *lru.Cache
 	forestCapacity int
-	onTreeEvicted  func(tree *trie.MTrie) error
+	onTreeEvicted  func(tree *trie.MTrie)
 	metrics        module.LedgerMetrics
 }
 
@@ -40,7 +40,7 @@ type Forest struct {
 // THIS IS A ROUGH HEURISTIC as it might evict tries that are still needed.
 // Make sure you chose a sufficiently large forestCapacity, such that, when reaching the capacity, the
 // Least Recently Used trie will never be needed again.
-func NewForest(forestCapacity int, metrics module.LedgerMetrics, onTreeEvicted func(tree *trie.MTrie) error) (*Forest, error) {
+func NewForest(forestCapacity int, metrics module.LedgerMetrics, onTreeEvicted func(tree *trie.MTrie)) (*Forest, error) {
 	// init LRU cache as a SHORTCUT for a usage-related storage eviction policy
 	var cache *lru.Cache
 	var err error
@@ -50,7 +50,7 @@ func NewForest(forestCapacity int, metrics module.LedgerMetrics, onTreeEvicted f
 			if !ok {
 				panic(fmt.Sprintf("cache contains item of type %T", value))
 			}
-			_ = onTreeEvicted(trie)
+			onTreeEvicted(trie)
 		})
 	} else {
 		cache, err = lru.New(forestCapacity)
diff --git a/ledger/complete/wal/checkpointer.go b/ledger/complete/wal/checkpointer.go
index e0ef1282970..9e5080e8a2a 100644
--- a/ledger/complete/wal/checkpointer.go
+++ b/ledger/complete/wal/checkpointer.go
@@ -178,9 +178,7 @@ func (c *Checkpointer) Checkpoint(to int, targetWriter func() (io.WriteCloser, e
 		return fmt.Errorf("no segments to checkpoint to %d, latests not checkpointed segment: %d", to, notCheckpointedTo)
 	}
 
-	forest, err := mtrie.NewForest(c.forestCapacity, &metrics.NoopCollector{}, func(evictedTrie *trie.MTrie) error {
-		return nil
-	})
+	forest, err := mtrie.NewForest(c.forestCapacity, &metrics.NoopCollector{}, nil)
 	if err != nil {
 		return fmt.Errorf("cannot create Forest: %w", err)
 	}
diff --git a/ledger/complete/wal/checkpointer_test.go b/ledger/complete/wal/checkpointer_test.go
index e7da5aae6c7..cd811ce45c4 100644
--- a/ledger/complete/wal/checkpointer_test.go
+++ b/ledger/complete/wal/checkpointer_test.go
@@ -120,7 +120,7 @@ func Test_Checkpointing(t *testing.T) {
 
 	unittest.RunWithTempDir(t, func(dir string) {
 
-		f, err := mtrie.NewForest(size*10, metricsCollector, func(tree *trie.MTrie) error { return nil })
+		f, err := mtrie.NewForest(size*10, metricsCollector, nil)
 		require.NoError(t, err)
 
 		var rootHash = f.GetEmptyRootHash()
@@ -170,7 +170,7 @@ func Test_Checkpointing(t *testing.T) {
 		})
 
 		// create a new forest and replay WAL
-		f2, err := mtrie.NewForest(size*10, metricsCollector, func(tree *trie.MTrie) error { return nil })
+		f2, err := mtrie.NewForest(size*10, metricsCollector, nil)
 		require.NoError(t, err)
 
 		t.Run("replay WAL and create checkpoint", func(t *testing.T) {
@@ -207,7 +207,7 @@ func Test_Checkpointing(t *testing.T) {
 			<-wal2.Done()
 		})
 
-		f3, err := mtrie.NewForest(size*10, metricsCollector, func(tree *trie.MTrie) error { return nil })
+		f3, err := mtrie.NewForest(size*10, metricsCollector, nil)
 		require.NoError(t, err)
 
 		t.Run("read checkpoint", func(t *testing.T) {
@@ -287,7 +287,7 @@ func Test_Checkpointing(t *testing.T) {
 			require.FileExists(t, path.Join(dir, "00000011")) //make sure we have extra segment
 		})
 
-		f5, err := mtrie.NewForest(size*10, metricsCollector, func(tree *trie.MTrie) error { return nil })
+		f5, err := mtrie.NewForest(size*10, metricsCollector, nil)
 		require.NoError(t, err)
 
 		t.Run("replay both checkpoint and updates after checkpoint", func(t *testing.T) {
@@ -338,7 +338,7 @@ func Test_Checkpointing(t *testing.T) {
 
 		t.Run("corrupted checkpoints are skipped", func(t *testing.T) {
 
-			f6, err := mtrie.NewForest(size*10, metricsCollector, func(tree *trie.MTrie) error { return nil })
+			f6, err := mtrie.NewForest(size*10, metricsCollector, nil)
 			require.NoError(t, err)
 
 			wal6, err := realWAL.NewDiskWAL(zerolog.Nop(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize)
diff --git a/ledger/complete/wal/compactor_test.go b/ledger/complete/wal/compactor_test.go
index efcbb0160c5..ab957c412a4 100644
--- a/ledger/complete/wal/compactor_test.go
+++ b/ledger/complete/wal/compactor_test.go
@@ -53,7 +53,7 @@ func Test_Compactor(t *testing.T) {
 
 	unittest.RunWithTempDir(t, func(dir string) {
 
-		f, err := mtrie.NewForest(size*10, metricsCollector, func(tree *trie.MTrie) error { return nil })
+		f, err := mtrie.NewForest(size*10, metricsCollector, nil)
 		require.NoError(t, err)
 
 		var rootHash = f.GetEmptyRootHash()
@@ -157,7 +157,7 @@ func Test_Compactor(t *testing.T) {
 			}
 		})
 
-		f2, err := mtrie.NewForest(size*10, metricsCollector, func(tree *trie.MTrie) error { return nil })
+		f2, err := mtrie.NewForest(size*10, metricsCollector, nil)
 		require.NoError(t, err)
 
 		time.Sleep(2 * time.Second)
@@ -233,7 +233,7 @@ func Test_Compactor_checkpointInterval(t *testing.T) {
 
 	unittest.RunWithTempDir(t, func(dir string) {
 
-		f, err := mtrie.NewForest(size*10, metricsCollector, func(tree *trie.MTrie) error { return nil })
+		f, err := mtrie.NewForest(size*10, metricsCollector, nil)
 		require.NoError(t, err)
 
 		var rootHash = f.GetEmptyRootHash()

From d3a32c6eae1a1606f90a837876f7699765c3e550 Mon Sep 17 00:00:00 2001
From: Faye Amacker <33205765+fxamacker@users.noreply.github.com>
Date: Mon, 7 Mar 2022 14:18:37 -0600
Subject: [PATCH 37/37] Add extra comment for NewNodeIterator

NodeIterator created by NewNodeIterator is safe for concurrent use
because visitedNodes is always nil in this case.
---
 ledger/complete/mtrie/flattener/iterator.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ledger/complete/mtrie/flattener/iterator.go b/ledger/complete/mtrie/flattener/iterator.go
index ec552cc8108..dd01e5b0733 100644
--- a/ledger/complete/mtrie/flattener/iterator.go
+++ b/ledger/complete/mtrie/flattener/iterator.go
@@ -74,6 +74,8 @@ type NodeIterator struct {
 // The Descendents-First-Relationship has the following important property:
 // When re-building the Trie from the sequence of nodes, one can build the trie on the fly,
 // as for each node, the children have been previously encountered.
+// NodeIterator created by NewNodeIterator is safe for concurrent use
+// because visitedNodes is always nil in this case.
 func NewNodeIterator(mTrie *trie.MTrie) *NodeIterator {
 	// for a Trie with height H (measured by number of edges), the longest possible path contains H+1 vertices
 	stackSize := ledger.NodeMaxHeight + 1