chore: improves documentation (#96)

A PR inline with EPIC celestiaorg/celestia-app#1296 Aiming at improving the documentation of the NMT library. As part of this PR, some variables have been renamed to more accurately reflect their values.
celestiaorg · Feb 14, 2023 · b67b45e · b67b45e
1 parent b04eea5
commit b67b45e
Show file tree

Hide file tree

Showing 7 changed files with 367 additions and 188 deletions.
diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ import (
 )
 
 func main() {
-    // the tree will use this namespace size
+    // the tree will use this namespace size (number of bytes)
     nidSize := 1
     // the leaves that will be pushed
     data := [][]byte{

diff --git a/hasher.go b/hasher.go
@@ -15,35 +15,33 @@ const (
 
 var _ hash.Hash = (*Hasher)(nil)
 
-// defaultHasher uses sha256 as a base-hasher, 8 bytes
-// for the namespace IDs and ignores the maximum possible namespace.
+// defaultHasher uses sha256 as a base-hasher, 8 bytes for the namespace IDs and
+// ignores the maximum possible namespace.
 var defaultHasher = NewNmtHasher(sha256.New(), DefaultNamespaceIDLen, true)
 
-// Sha256Namespace8FlaggedLeaf uses sha256 as a base-hasher, 8 bytes
-// for the namespace IDs and ignores the maximum possible namespace.
+// Sha256Namespace8FlaggedLeaf uses sha256 as a base-hasher, 8 bytes for the
+// namespace IDs and ignores the maximum possible namespace.
 //
-// Sha256Namespace8FlaggedLeaf(namespacedData) results in:
-// ns(rawData) || ns(rawData) || sha256(LeafPrefix || rawData),
-// where rawData is the leaf's data minus the namespace.ID prefix
-// (namely namespacedData[NamespaceLen:]).
+// Sha256Namespace8FlaggedLeaf(namespacedData) results in: ns(rawData) ||
+// ns(rawData) || sha256(LeafPrefix || rawData), where rawData is the leaf's
+// data minus the namespace.ID prefix (namely namespacedData[NamespaceLen:]).
 //
-// Note that different from other cryptographic hash functions, this here
-// makes assumptions on the input:
-// len(namespacedData) >= DefaultNamespaceIDLen has to hold,
-// as the first DefaultNamespaceIDLen bytes are interpreted as the namespace ID).
-// If the input does not fulfil this, we will panic.
-// The output will be of length 2*DefaultNamespaceIDLen+sha256.Size = 48 bytes.
+// Note that different from other cryptographic hash functions, this here makes
+// assumptions on the input: len(namespacedData) >= DefaultNamespaceIDLen has to
+// hold, as the first DefaultNamespaceIDLen bytes are interpreted as the
+// namespace ID). If the input does not fulfil this, we will panic. The output
+// will be of length 2*DefaultNamespaceIDLen+sha256.Size = 48 bytes.
 func Sha256Namespace8FlaggedLeaf(namespacedData []byte) []byte {
 	return defaultHasher.HashLeaf(namespacedData)
 }
 
-// Sha256Namespace8FlaggedInner hashes inner nodes to:
-// minNID || maxNID || sha256(NodePrefix || leftRight), where leftRight consists of the full
-// left and right child node bytes, including their respective min and max namespace IDs.
-// Hence, the input has to be of size:
-// 48 = 32 + 8 + 8  = sha256.Size + 2*DefaultNamespaceIDLen bytes.
-// If the input does not fulfil this, we will panic.
-// The output will also be of length 2*DefaultNamespaceIDLen+sha256.Size = 48 bytes.
+// Sha256Namespace8FlaggedInner hashes inner nodes to: minNID || maxNID ||
+// sha256(NodePrefix || leftRight), where leftRight consists of the full left
+// and right child node bytes, including their respective min and max namespace
+// IDs. Hence, the input has to be of size: 48 = 32 + 8 + 8  = sha256.Size +
+// 2*DefaultNamespaceIDLen bytes. If the input does not fulfil this, we will
+// panic. The output will also be of length 2*DefaultNamespaceIDLen+sha256.Size
+// = 48 bytes.
 func Sha256Namespace8FlaggedInner(leftRight []byte) []byte {
 	const flagLen = DefaultNamespaceIDLen * 2
 	sha256Len := defaultHasher.baseHasher.Size()
@@ -57,6 +55,13 @@ type Hasher struct {
 	baseHasher   hash.Hash
 	NamespaceLen namespace.IDSize
 
+	// The "ignoreMaxNs" flag influences the calculation of the namespace ID
+	// range for intermediate nodes in the tree i.e., HashNode method. This flag
+	// signals that, when determining the upper limit of the namespace ID range
+	// for a tree node, the maximum possible namespace ID (equivalent to
+	// "NamespaceLen" bytes of 0xFF, or 2^NamespaceLen-1) should be omitted if
+	// feasible. For a more in-depth understanding of this field, refer to the
+	// "HashNode".
 	ignoreMaxNs      bool
 	precomputedMaxNs namespace.ID
 
@@ -88,8 +93,8 @@ func (n *Hasher) Size() int {
 
 // Write writes the namespaced data to be hashed.
 //
-// Requires data of fixed size to match leaf or inner NMT nodes.
-// Only a single write is allowed.
+// Requires data of fixed size to match leaf or inner NMT nodes. Only a single
+// write is allowed.
 func (n *Hasher) Write(data []byte) (int, error) {
 	if n.data != nil {
 		panic("only a single Write is allowed")
@@ -109,8 +114,8 @@ func (n *Hasher) Write(data []byte) (int, error) {
 	return ln, nil
 }
 
-// Sum computes the hash.
-// Does not append the given suffix, violating the interface.
+// Sum computes the hash. Does not append the given suffix, violating the
+// interface.
 func (n *Hasher) Sum([]byte) []byte {
 	switch n.tp {
 	case LeafPrefix:
@@ -165,19 +170,30 @@ func (n *Hasher) HashLeaf(leaf []byte) []byte {
 	return h.Sum(res)
 }
 
-// HashNode hashes inner nodes to:
-// minNID || maxNID || hash(NodePrefix || left || right), where left and right are the full
-// left and right child node bytes, including their respective min and max namespace IDs:
-// left = left.Min() || left.Max() || l.Hash().
-func (n *Hasher) HashNode(l, r []byte) []byte {
+// HashNode calculates a namespaced hash of a node using the supplied left and
+// right children. The input values, "left" and "right," are namespaced hash
+// values with the format "minNID || maxNID || hash." By default, the normal
+// namespace hash calculation is followed, which is "res = min(left.minNID,
+// right.minNID) || max(left.maxNID, right.maxNID) || H(NodePrefix, left,
+// right)". "res" refers to the return value of the HashNode. However, if the
+// "ignoreMaxNs" property of the Hasher is set to true, the calculation of the
+// namespace ID range of the node slightly changes. In this case, when setting
+// the upper range, the maximum possible namespace ID (i.e.,
+// 2^NamespaceIDSize-1) should be ignored if possible. This is achieved by
+// taking the maximum value among the namespace IDs available in the range of
+// its left and right children (i.e., max(left.minNID, left.maxNID ,
+// right.minNID, right.maxNID)), which is not equal to the maximum possible
+// namespace ID value. If such a namespace ID does not exist, the maximum NID is
+// calculated as normal, i.e., "res.maxNID = max(left.maxNID , right.maxNID).
+func (n *Hasher) HashNode(left, right []byte) []byte {
 	h := n.baseHasher
 	h.Reset()
 
 	// the actual hash result of the children got extended (or flagged) by their
 	// children's minNs || maxNs; hence the flagLen = 2 * NamespaceLen:
 	flagLen := 2 * n.NamespaceLen
-	leftMinNs, leftMaxNs := l[:n.NamespaceLen], l[n.NamespaceLen:flagLen]
-	rightMinNs, rightMaxNs := r[:n.NamespaceLen], r[n.NamespaceLen:flagLen]
+	leftMinNs, leftMaxNs := left[:n.NamespaceLen], left[n.NamespaceLen:flagLen]
+	rightMinNs, rightMaxNs := right[:n.NamespaceLen], right[n.NamespaceLen:flagLen]
 
 	minNs := min(leftMinNs, rightMinNs)
 	var maxNs []byte
@@ -189,15 +205,17 @@ func (n *Hasher) HashNode(l, r []byte) []byte {
 		maxNs = max(leftMaxNs, rightMaxNs)
 	}
 
-	res := append(append(make([]byte, 0), minNs...), maxNs...)
+	res := make([]byte, 0)
+	res = append(res, minNs...)
+	res = append(res, maxNs...)
 
 	// Note this seems a little faster than calling several Write()s on the
-	// underlying Hash function (see: https://github.com/google/trillian/pull/1503):
-	data := append(append(append(
-		make([]byte, 0, 1+len(l)+len(r)),
-		NodePrefix),
-		l...),
-		r...)
+	// underlying Hash function (see:
+	// https://github.com/google/trillian/pull/1503):
+	data := make([]byte, 0, 1+len(left)+len(right))
+	data = append(data, NodePrefix)
+	data = append(data, left...)
+	data = append(data, right...)
 	//nolint:errcheck
 	h.Write(data)
 	return h.Sum(res)

diff --git a/namespace/data.go b/namespace/data.go
@@ -1,16 +1,15 @@
 package namespace
 
-// PrefixedData simply represents a slice of bytes which consists of
-// a namespace.ID and raw data.
-// The user has to guarantee that the bytes are valid namespace prefixed data.
-// Go's type system does not allow enforcing the structure we want:
-// [namespaceID, rawData ...], especially as this type does not expect any
-// particular size for the namespace.
+// PrefixedData simply represents a slice of bytes which consists of a
+// namespace.ID and raw data. The user has to guarantee that the bytes are valid
+// namespace prefixed data. Go's type system does not allow enforcing the
+// structure we want: [namespaceID, rawData ...], especially as this type does
+// not expect any particular size for the namespace.
 type PrefixedData []byte
 
-// PrefixedData8 like PrefixedData is just a slice of bytes.
-// It assumes that the slice it represents is at least 8 bytes.
-// This assumption is not enforced by the type system though.
+// PrefixedData8 like PrefixedData is just a slice of bytes. It assumes that the
+// slice it represents is at least 8 bytes. This assumption is not enforced by
+// the type system though.
 type PrefixedData8 []byte
 
 func (d PrefixedData8) NamespaceID() ID {

diff --git a/namespace/id.go b/namespace/id.go
@@ -4,22 +4,27 @@ import "bytes"
 
 type ID []byte
 
+// Less returns true if nid < other, otherwise, false.
 func (nid ID) Less(other ID) bool {
 	return bytes.Compare(nid, other) < 0
 }
 
+// Equal returns true if nid == other, otherwise, false.
 func (nid ID) Equal(other ID) bool {
 	return bytes.Equal(nid, other)
 }
 
+// LessOrEqual returns true if nid <= other, otherwise, false.
 func (nid ID) LessOrEqual(other ID) bool {
 	return bytes.Compare(nid, other) <= 0
 }
 
+// Size returns the byte size of the nid.
 func (nid ID) Size() IDSize {
 	return IDSize(len(nid))
 }
 
+// String stringifies the nid.
 func (nid ID) String() string {
 	return string(nid)
 }