core, trie: rework trie database (#26813)

* core, trie: rework trie database * trie: fix comment

core, trie: rework trie database (#26813)
* core, trie: rework trie database * trie: fix comment
bbcb5ea3 · rjl493456442 · GitHub · 1e556d22 · bbcb5ea3 · bbcb5ea3
Unverified Commit bbcb5ea3 authored Apr 24, 2023 by rjl493456442 Committed by GitHub Apr 24, 2023
10 changed files
--- a/trie/committer.go
+++ b/trie/committer.go
@@ -142,12 +142,10 @@ func (c *committer) store(path []byte, n node) node {
 	// We have the hash already, estimate the RLP encoding-size of the node.
 	// The size is used for mem tracking, does not need to be exact
 	var (
-		size  = estimateSize(n)
 		nhash = common.BytesToHash(hash)
 		mnode = &memoryNode{
 			hash: nhash,
-			node: simplifyNode(n),
+			node: nodeToBytes(n),
-			size: uint16(size),
 		}
 	)
 	// Collect the dirty node to nodeset for return.
@@ -166,31 +164,29 @@ func (c *committer) store(path []byte, n node) node {
 	return hash
 }
-// estimateSize estimates the size of an rlp-encoded node, without actually
+// mptResolver the children resolver in merkle-patricia-tree.
-// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie
+type mptResolver struct{}
-// with 1000 leaves, the only errors above 1% are on small shortnodes, where this
-// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35)
+// ForEach implements childResolver, decodes the provided node and
-func estimateSize(n node) int {
+// traverses the children inside.
+func (resolver mptResolver) forEach(node []byte, onChild func(common.Hash)) {
+	forGatherChildren(mustDecodeNodeUnsafe(nil, node), onChild)
+}
+// forGatherChildren traverses the node hierarchy and invokes the callback
+// for all the hashnode children.
+func forGatherChildren(n node, onChild func(hash common.Hash)) {
 	switch n := n.(type) {
 	case *shortNode:
-		// A short node contains a compacted key, and a value.
+		forGatherChildren(n.Val, onChild)
-		return 3 + len(n.Key) + estimateSize(n.Val)
 	case *fullNode:
-		// A full node contains up to 16 hashes (some nils), and a key
-		s := 3
 		for i := 0; i < 16; i++ {
-			if child := n.Children[i]; child != nil {
+			forGatherChildren(n.Children[i], onChild)
-				s += estimateSize(child)
-			} else {
-				s++
-			}
 		}
-		return s
-	case valueNode:
-		return 1 + len(n)
 	case hashNode:
-		return 1 + len(n)
+		onChild(common.BytesToHash(n))
+	case valueNode, nil:
 	default:
-		panic(fmt.Sprintf("node type %T", n))
+		panic(fmt.Sprintf("unknown node type: %T", n))
 	}
 }
--- a/trie/database.go
+++ b/trie/database.go
--- a/trie/iterator.go
+++ b/trie/iterator.go
@@ -387,7 +387,14 @@ func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) {
 	// loaded blob will be tracked, while it's not required here since
 	// all loaded nodes won't be linked to trie at all and track nodes
 	// may lead to out-of-memory issue.
-	return it.trie.reader.node(path, common.BytesToHash(hash))
+	blob, err := it.trie.reader.node(path, common.BytesToHash(hash))
+	if err != nil {
+		return nil, err
+	}
+	// The raw-blob format nodes are loaded either from the
+	// clean cache or the database, they are all in their own
+	// copy and safe to use unsafe decoder.
+	return mustDecodeNodeUnsafe(hash, blob), nil
 }
 func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) {
@@ -401,7 +408,7 @@ func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error)
 	// loaded blob will be tracked, while it's not required here since
 	// all loaded nodes won't be linked to trie at all and track nodes
 	// may lead to out-of-memory issue.
-	return it.trie.reader.nodeBlob(path, common.BytesToHash(hash))
+	return it.trie.reader.node(path, common.BytesToHash(hash))
 }
 func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error {

--- a/trie/node.go
+++ b/trie/node.go
@@ -99,6 +99,19 @@ func (n valueNode) fstring(ind string) string {
 	return fmt.Sprintf("%x ", []byte(n))
 }
+// rawNode is a simple binary blob used to differentiate between collapsed trie
+// nodes and already encoded RLP binary blobs (while at the same time store them
+// in the same cache fields).
+type rawNode []byte
+func (n rawNode) cache() (hashNode, bool)   { panic("this should never end up in a live trie") }
+func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") }
+func (n rawNode) EncodeRLP(w io.Writer) error {
+	_, err := w.Write(n)
+	return err
+}
 // mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered.
 func mustDecodeNode(hash, buf []byte) node {
 	n, err := decodeNode(hash, buf)

--- a/trie/node_enc.go
+++ b/trie/node_enc.go
@@ -59,29 +59,6 @@ func (n valueNode) encode(w rlp.EncoderBuffer) {
 	w.WriteBytes(n)
 }
-func (n rawFullNode) encode(w rlp.EncoderBuffer) {
-	offset := w.List()
-	for _, c := range n {
-		if c != nil {
-			c.encode(w)
-		} else {
-			w.Write(rlp.EmptyString)
-		}
-	}
-	w.ListEnd(offset)
-}
-func (n *rawShortNode) encode(w rlp.EncoderBuffer) {
-	offset := w.List()
-	w.WriteBytes(n.Key)
-	if n.Val != nil {
-		n.Val.encode(w)
-	} else {
-		w.Write(rlp.EmptyString)
-	}
-	w.ListEnd(offset)
-}
 func (n rawNode) encode(w rlp.EncoderBuffer) {
 	w.Write(n)
 }
--- a/trie/nodeset.go
+++ b/trie/nodeset.go
@@ -18,7 +18,6 @@ package trie
 import (
 	"fmt"
-	"reflect"
 	"sort"
 	"strings"
@@ -28,41 +27,28 @@ import (
 // memoryNode is all the information we know about a single cached trie node
 // in the memory.
 type memoryNode struct {
-	hash common.Hash // Node hash, computed by hashing rlp value, empty for deleted nodes
+	hash common.Hash // Node hash by hashing node blob, empty for deleted nodes
-	size uint16      // Byte size of the useful cached data, 0 for deleted nodes
+	node []byte      // Encoded node blob, nil for deleted nodes
-	node node        // Cached collapsed trie node, or raw rlp data, nil for deleted nodes
 }
-// memoryNodeSize is the raw size of a memoryNode data structure without any
-// node data included. It's an approximate size, but should be a lot better
-// than not counting them.
-// nolint:unused
-var memoryNodeSize = int(reflect.TypeOf(memoryNode{}).Size())
 // memorySize returns the total memory size used by this node.
 // nolint:unused
 func (n *memoryNode) memorySize(pathlen int) int {
-	return int(n.size) + memoryNodeSize + pathlen
+	return len(n.node) + common.HashLength + pathlen
 }
 // rlp returns the raw rlp encoded blob of the cached trie node, either directly
 // from the cache, or by regenerating it from the collapsed node.
 // nolint:unused
 func (n *memoryNode) rlp() []byte {
-	if node, ok := n.node.(rawNode); ok {
+	return n.node
-		return node
-	}
-	return nodeToBytes(n.node)
 }
 // obj returns the decoded and expanded trie node, either directly from the cache,
 // or by regenerating it from the rlp encoded blob.
 // nolint:unused
 func (n *memoryNode) obj() node {
-	if node, ok := n.node.(rawNode); ok {
+	return mustDecodeNode(n.hash[:], n.node)
-		return mustDecodeNode(n.hash[:], node)
-	}
-	return expandNode(n.hash[:], n.node)
 }
 // isDeleted returns the indicator if the node is marked as deleted.

--- a/trie/proof.go
+++ b/trie/proof.go
@@ -64,12 +64,15 @@ func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) e
 			// loaded blob will be tracked, while it's not required here since
 			// all loaded nodes won't be linked to trie at all and track nodes
 			// may lead to out-of-memory issue.
-			var err error
+			blob, err := t.reader.node(prefix, common.BytesToHash(n))
-			tn, err = t.reader.node(prefix, common.BytesToHash(n))
 			if err != nil {
 				log.Error("Unhandled trie error in Trie.Prove", "err", err)
 				return err
 			}
+			// The raw-blob format nodes are loaded either from the
+			// clean cache or the database, they are all in their own
+			// copy and safe to use unsafe decoder.
+			tn = mustDecodeNodeUnsafe(n, blob)
 		default:
 			panic(fmt.Sprintf("%T: invalid node: %v", tn, tn))
 		}

--- a/trie/stacktrie.go
+++ b/trie/stacktrie.go
@@ -420,17 +420,17 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) {
 		return
 	case branchNode:
-		var nodes rawFullNode
+		var nodes fullNode
 		for i, child := range st.children {
 			if child == nil {
-				nodes[i] = nilValueNode
+				nodes.Children[i] = nilValueNode
 				continue
 			}
 			child.hashRec(hasher, append(path, byte(i)))
 			if len(child.val) < 32 {
-				nodes[i] = rawNode(child.val)
+				nodes.Children[i] = rawNode(child.val)
 			} else {
-				nodes[i] = hashNode(child.val)
+				nodes.Children[i] = hashNode(child.val)
 			}
 			// Release child back to pool.
@@ -444,7 +444,7 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) {
 	case extNode:
 		st.children[0].hashRec(hasher, append(path, st.key...))
-		n := rawShortNode{Key: hexToCompact(st.key)}
+		n := shortNode{Key: hexToCompact(st.key)}
 		if len(st.children[0].val) < 32 {
 			n.Val = rawNode(st.children[0].val)
 		} else {
@@ -460,7 +460,7 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) {
 	case leafNode:
 		st.key = append(st.key, byte(16))
-		n := rawShortNode{Key: hexToCompact(st.key), Val: valueNode(st.val)}
+		n := shortNode{Key: hexToCompact(st.key), Val: valueNode(st.val)}
 		n.encode(hasher.encbuf)
 		encodedNode = hasher.encodedBytes()

--- a/trie/trie.go
+++ b/trie/trie.go
@@ -212,7 +212,7 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod
 		if hash == nil {
 			return nil, origNode, 0, errors.New("non-consensus node")
 		}
-		blob, err := t.reader.nodeBlob(path, common.BytesToHash(hash))
+		blob, err := t.reader.node(path, common.BytesToHash(hash))
 		return blob, origNode, 1, err
 	}
 	// Path still needs to be traversed, descend into children
@@ -549,7 +549,7 @@ func (t *Trie) resolve(n node, prefix []byte) (node, error) {
 // node's original value. The rlp-encoded blob is preferred to be loaded from
 // database because it's easy to decode node while complex to encode node to blob.
 func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) {
-	blob, err := t.reader.nodeBlob(prefix, common.BytesToHash(n))
+	blob, err := t.reader.node(prefix, common.BytesToHash(n))
 	if err != nil {
 		return nil, err
 	}

--- a/trie/trie_reader.go
+++ b/trie/trie_reader.go
@@ -22,17 +22,12 @@ import (
 	"github.com/ethereum/go-ethereum/common"
 )
-// Reader wraps the Node and NodeBlob method of a backing trie store.
+// Reader wraps the Node method of a backing trie store.
 type Reader interface {
-	// Node retrieves the trie node with the provided trie identifier, hexary
+	// Node retrieves the RLP-encoded trie node blob with the provided trie
-	// node path and the corresponding node hash.
+	// identifier, node path and the corresponding node hash. No error will
-	// No error will be returned if the node is not found.
+	// be returned if the node is not found.
-	Node(owner common.Hash, path []byte, hash common.Hash) (node, error)
+	Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error)
-	// NodeBlob retrieves the RLP-encoded trie node blob with the provided trie
-	// identifier, hexary node path and the corresponding node hash.
-	// No error will be returned if the node is not found.
-	NodeBlob(owner common.Hash, path []byte, hash common.Hash) ([]byte, error)
 }
 // NodeReader wraps all the necessary functions for accessing trie node.
@@ -65,30 +60,10 @@ func newEmptyReader() *trieReader {
 	return &trieReader{}
 }
-// node retrieves the trie node with the provided trie node information.
-// An MissingNodeError will be returned in case the node is not found or
-// any error is encountered.
-func (r *trieReader) node(path []byte, hash common.Hash) (node, error) {
-	// Perform the logics in tests for preventing trie node access.
-	if r.banned != nil {
-		if _, ok := r.banned[string(path)]; ok {
-			return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path}
-		}
-	}
-	if r.reader == nil {
-		return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path}
-	}
-	node, err := r.reader.Node(r.owner, path, hash)
-	if err != nil || node == nil {
-		return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path, err: err}
-	}
-	return node, nil
-}
 // node retrieves the rlp-encoded trie node with the provided trie node
 // information. An MissingNodeError will be returned in case the node is
 // not found or any error is encountered.
-func (r *trieReader) nodeBlob(path []byte, hash common.Hash) ([]byte, error) {
+func (r *trieReader) node(path []byte, hash common.Hash) ([]byte, error) {
 	// Perform the logics in tests for preventing trie node access.
 	if r.banned != nil {
 		if _, ok := r.banned[string(path)]; ok {
@@ -98,7 +73,7 @@ func (r *trieReader) nodeBlob(path []byte, hash common.Hash) ([]byte, error) {
 	if r.reader == nil {
 		return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path}
 	}
-	blob, err := r.reader.NodeBlob(r.owner, path, hash)
+	blob, err := r.reader.Node(r.owner, path, hash)
 	if err != nil || len(blob) == 0 {
 		return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path, err: err}
 	}