Unverified Commit bbcb5ea3 authored by rjl493456442's avatar rjl493456442 Committed by GitHub

core, trie: rework trie database (#26813)

* core, trie: rework trie database

* trie: fix comment
parent 1e556d22
...@@ -142,12 +142,10 @@ func (c *committer) store(path []byte, n node) node { ...@@ -142,12 +142,10 @@ func (c *committer) store(path []byte, n node) node {
// We have the hash already, estimate the RLP encoding-size of the node. // We have the hash already, estimate the RLP encoding-size of the node.
// The size is used for mem tracking, does not need to be exact // The size is used for mem tracking, does not need to be exact
var ( var (
size = estimateSize(n)
nhash = common.BytesToHash(hash) nhash = common.BytesToHash(hash)
mnode = &memoryNode{ mnode = &memoryNode{
hash: nhash, hash: nhash,
node: simplifyNode(n), node: nodeToBytes(n),
size: uint16(size),
} }
) )
// Collect the dirty node to nodeset for return. // Collect the dirty node to nodeset for return.
...@@ -166,31 +164,29 @@ func (c *committer) store(path []byte, n node) node { ...@@ -166,31 +164,29 @@ func (c *committer) store(path []byte, n node) node {
return hash return hash
} }
// estimateSize estimates the size of an rlp-encoded node, without actually // mptResolver the children resolver in merkle-patricia-tree.
// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie type mptResolver struct{}
// with 1000 leaves, the only errors above 1% are on small shortnodes, where this
// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35) // ForEach implements childResolver, decodes the provided node and
func estimateSize(n node) int { // traverses the children inside.
func (resolver mptResolver) forEach(node []byte, onChild func(common.Hash)) {
forGatherChildren(mustDecodeNodeUnsafe(nil, node), onChild)
}
// forGatherChildren traverses the node hierarchy and invokes the callback
// for all the hashnode children.
func forGatherChildren(n node, onChild func(hash common.Hash)) {
switch n := n.(type) { switch n := n.(type) {
case *shortNode: case *shortNode:
// A short node contains a compacted key, and a value. forGatherChildren(n.Val, onChild)
return 3 + len(n.Key) + estimateSize(n.Val)
case *fullNode: case *fullNode:
// A full node contains up to 16 hashes (some nils), and a key
s := 3
for i := 0; i < 16; i++ { for i := 0; i < 16; i++ {
if child := n.Children[i]; child != nil { forGatherChildren(n.Children[i], onChild)
s += estimateSize(child)
} else {
s++
}
} }
return s
case valueNode:
return 1 + len(n)
case hashNode: case hashNode:
return 1 + len(n) onChild(common.BytesToHash(n))
case valueNode, nil:
default: default:
panic(fmt.Sprintf("node type %T", n)) panic(fmt.Sprintf("unknown node type: %T", n))
} }
} }
This diff is collapsed.
...@@ -387,7 +387,14 @@ func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) { ...@@ -387,7 +387,14 @@ func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) {
// loaded blob will be tracked, while it's not required here since // loaded blob will be tracked, while it's not required here since
// all loaded nodes won't be linked to trie at all and track nodes // all loaded nodes won't be linked to trie at all and track nodes
// may lead to out-of-memory issue. // may lead to out-of-memory issue.
return it.trie.reader.node(path, common.BytesToHash(hash)) blob, err := it.trie.reader.node(path, common.BytesToHash(hash))
if err != nil {
return nil, err
}
// The raw-blob format nodes are loaded either from the
// clean cache or the database, they are all in their own
// copy and safe to use unsafe decoder.
return mustDecodeNodeUnsafe(hash, blob), nil
} }
func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) { func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) {
...@@ -401,7 +408,7 @@ func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error) ...@@ -401,7 +408,7 @@ func (it *nodeIterator) resolveBlob(hash hashNode, path []byte) ([]byte, error)
// loaded blob will be tracked, while it's not required here since // loaded blob will be tracked, while it's not required here since
// all loaded nodes won't be linked to trie at all and track nodes // all loaded nodes won't be linked to trie at all and track nodes
// may lead to out-of-memory issue. // may lead to out-of-memory issue.
return it.trie.reader.nodeBlob(path, common.BytesToHash(hash)) return it.trie.reader.node(path, common.BytesToHash(hash))
} }
func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error { func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error {
......
...@@ -99,6 +99,19 @@ func (n valueNode) fstring(ind string) string { ...@@ -99,6 +99,19 @@ func (n valueNode) fstring(ind string) string {
return fmt.Sprintf("%x ", []byte(n)) return fmt.Sprintf("%x ", []byte(n))
} }
// rawNode is a simple binary blob used to differentiate between collapsed trie
// nodes and already encoded RLP binary blobs (while at the same time store them
// in the same cache fields).
type rawNode []byte
func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") }
func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") }
func (n rawNode) EncodeRLP(w io.Writer) error {
_, err := w.Write(n)
return err
}
// mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered. // mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered.
func mustDecodeNode(hash, buf []byte) node { func mustDecodeNode(hash, buf []byte) node {
n, err := decodeNode(hash, buf) n, err := decodeNode(hash, buf)
......
...@@ -59,29 +59,6 @@ func (n valueNode) encode(w rlp.EncoderBuffer) { ...@@ -59,29 +59,6 @@ func (n valueNode) encode(w rlp.EncoderBuffer) {
w.WriteBytes(n) w.WriteBytes(n)
} }
func (n rawFullNode) encode(w rlp.EncoderBuffer) {
offset := w.List()
for _, c := range n {
if c != nil {
c.encode(w)
} else {
w.Write(rlp.EmptyString)
}
}
w.ListEnd(offset)
}
func (n *rawShortNode) encode(w rlp.EncoderBuffer) {
offset := w.List()
w.WriteBytes(n.Key)
if n.Val != nil {
n.Val.encode(w)
} else {
w.Write(rlp.EmptyString)
}
w.ListEnd(offset)
}
func (n rawNode) encode(w rlp.EncoderBuffer) { func (n rawNode) encode(w rlp.EncoderBuffer) {
w.Write(n) w.Write(n)
} }
...@@ -18,7 +18,6 @@ package trie ...@@ -18,7 +18,6 @@ package trie
import ( import (
"fmt" "fmt"
"reflect"
"sort" "sort"
"strings" "strings"
...@@ -28,41 +27,28 @@ import ( ...@@ -28,41 +27,28 @@ import (
// memoryNode is all the information we know about a single cached trie node // memoryNode is all the information we know about a single cached trie node
// in the memory. // in the memory.
type memoryNode struct { type memoryNode struct {
hash common.Hash // Node hash, computed by hashing rlp value, empty for deleted nodes hash common.Hash // Node hash by hashing node blob, empty for deleted nodes
size uint16 // Byte size of the useful cached data, 0 for deleted nodes node []byte // Encoded node blob, nil for deleted nodes
node node // Cached collapsed trie node, or raw rlp data, nil for deleted nodes
} }
// memoryNodeSize is the raw size of a memoryNode data structure without any
// node data included. It's an approximate size, but should be a lot better
// than not counting them.
// nolint:unused
var memoryNodeSize = int(reflect.TypeOf(memoryNode{}).Size())
// memorySize returns the total memory size used by this node. // memorySize returns the total memory size used by this node.
// nolint:unused // nolint:unused
func (n *memoryNode) memorySize(pathlen int) int { func (n *memoryNode) memorySize(pathlen int) int {
return int(n.size) + memoryNodeSize + pathlen return len(n.node) + common.HashLength + pathlen
} }
// rlp returns the raw rlp encoded blob of the cached trie node, either directly // rlp returns the raw rlp encoded blob of the cached trie node, either directly
// from the cache, or by regenerating it from the collapsed node. // from the cache, or by regenerating it from the collapsed node.
// nolint:unused // nolint:unused
func (n *memoryNode) rlp() []byte { func (n *memoryNode) rlp() []byte {
if node, ok := n.node.(rawNode); ok { return n.node
return node
}
return nodeToBytes(n.node)
} }
// obj returns the decoded and expanded trie node, either directly from the cache, // obj returns the decoded and expanded trie node, either directly from the cache,
// or by regenerating it from the rlp encoded blob. // or by regenerating it from the rlp encoded blob.
// nolint:unused // nolint:unused
func (n *memoryNode) obj() node { func (n *memoryNode) obj() node {
if node, ok := n.node.(rawNode); ok { return mustDecodeNode(n.hash[:], n.node)
return mustDecodeNode(n.hash[:], node)
}
return expandNode(n.hash[:], n.node)
} }
// isDeleted returns the indicator if the node is marked as deleted. // isDeleted returns the indicator if the node is marked as deleted.
......
...@@ -64,12 +64,15 @@ func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) e ...@@ -64,12 +64,15 @@ func (t *Trie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWriter) e
// loaded blob will be tracked, while it's not required here since // loaded blob will be tracked, while it's not required here since
// all loaded nodes won't be linked to trie at all and track nodes // all loaded nodes won't be linked to trie at all and track nodes
// may lead to out-of-memory issue. // may lead to out-of-memory issue.
var err error blob, err := t.reader.node(prefix, common.BytesToHash(n))
tn, err = t.reader.node(prefix, common.BytesToHash(n))
if err != nil { if err != nil {
log.Error("Unhandled trie error in Trie.Prove", "err", err) log.Error("Unhandled trie error in Trie.Prove", "err", err)
return err return err
} }
// The raw-blob format nodes are loaded either from the
// clean cache or the database, they are all in their own
// copy and safe to use unsafe decoder.
tn = mustDecodeNodeUnsafe(n, blob)
default: default:
panic(fmt.Sprintf("%T: invalid node: %v", tn, tn)) panic(fmt.Sprintf("%T: invalid node: %v", tn, tn))
} }
......
...@@ -420,17 +420,17 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) { ...@@ -420,17 +420,17 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) {
return return
case branchNode: case branchNode:
var nodes rawFullNode var nodes fullNode
for i, child := range st.children { for i, child := range st.children {
if child == nil { if child == nil {
nodes[i] = nilValueNode nodes.Children[i] = nilValueNode
continue continue
} }
child.hashRec(hasher, append(path, byte(i))) child.hashRec(hasher, append(path, byte(i)))
if len(child.val) < 32 { if len(child.val) < 32 {
nodes[i] = rawNode(child.val) nodes.Children[i] = rawNode(child.val)
} else { } else {
nodes[i] = hashNode(child.val) nodes.Children[i] = hashNode(child.val)
} }
// Release child back to pool. // Release child back to pool.
...@@ -444,7 +444,7 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) { ...@@ -444,7 +444,7 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) {
case extNode: case extNode:
st.children[0].hashRec(hasher, append(path, st.key...)) st.children[0].hashRec(hasher, append(path, st.key...))
n := rawShortNode{Key: hexToCompact(st.key)} n := shortNode{Key: hexToCompact(st.key)}
if len(st.children[0].val) < 32 { if len(st.children[0].val) < 32 {
n.Val = rawNode(st.children[0].val) n.Val = rawNode(st.children[0].val)
} else { } else {
...@@ -460,7 +460,7 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) { ...@@ -460,7 +460,7 @@ func (st *StackTrie) hashRec(hasher *hasher, path []byte) {
case leafNode: case leafNode:
st.key = append(st.key, byte(16)) st.key = append(st.key, byte(16))
n := rawShortNode{Key: hexToCompact(st.key), Val: valueNode(st.val)} n := shortNode{Key: hexToCompact(st.key), Val: valueNode(st.val)}
n.encode(hasher.encbuf) n.encode(hasher.encbuf)
encodedNode = hasher.encodedBytes() encodedNode = hasher.encodedBytes()
......
...@@ -212,7 +212,7 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod ...@@ -212,7 +212,7 @@ func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnod
if hash == nil { if hash == nil {
return nil, origNode, 0, errors.New("non-consensus node") return nil, origNode, 0, errors.New("non-consensus node")
} }
blob, err := t.reader.nodeBlob(path, common.BytesToHash(hash)) blob, err := t.reader.node(path, common.BytesToHash(hash))
return blob, origNode, 1, err return blob, origNode, 1, err
} }
// Path still needs to be traversed, descend into children // Path still needs to be traversed, descend into children
...@@ -549,7 +549,7 @@ func (t *Trie) resolve(n node, prefix []byte) (node, error) { ...@@ -549,7 +549,7 @@ func (t *Trie) resolve(n node, prefix []byte) (node, error) {
// node's original value. The rlp-encoded blob is preferred to be loaded from // node's original value. The rlp-encoded blob is preferred to be loaded from
// database because it's easy to decode node while complex to encode node to blob. // database because it's easy to decode node while complex to encode node to blob.
func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) { func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) {
blob, err := t.reader.nodeBlob(prefix, common.BytesToHash(n)) blob, err := t.reader.node(prefix, common.BytesToHash(n))
if err != nil { if err != nil {
return nil, err return nil, err
} }
......
...@@ -22,17 +22,12 @@ import ( ...@@ -22,17 +22,12 @@ import (
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
) )
// Reader wraps the Node and NodeBlob method of a backing trie store. // Reader wraps the Node method of a backing trie store.
type Reader interface { type Reader interface {
// Node retrieves the trie node with the provided trie identifier, hexary // Node retrieves the RLP-encoded trie node blob with the provided trie
// node path and the corresponding node hash. // identifier, node path and the corresponding node hash. No error will
// No error will be returned if the node is not found. // be returned if the node is not found.
Node(owner common.Hash, path []byte, hash common.Hash) (node, error) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error)
// NodeBlob retrieves the RLP-encoded trie node blob with the provided trie
// identifier, hexary node path and the corresponding node hash.
// No error will be returned if the node is not found.
NodeBlob(owner common.Hash, path []byte, hash common.Hash) ([]byte, error)
} }
// NodeReader wraps all the necessary functions for accessing trie node. // NodeReader wraps all the necessary functions for accessing trie node.
...@@ -65,30 +60,10 @@ func newEmptyReader() *trieReader { ...@@ -65,30 +60,10 @@ func newEmptyReader() *trieReader {
return &trieReader{} return &trieReader{}
} }
// node retrieves the trie node with the provided trie node information.
// An MissingNodeError will be returned in case the node is not found or
// any error is encountered.
func (r *trieReader) node(path []byte, hash common.Hash) (node, error) {
// Perform the logics in tests for preventing trie node access.
if r.banned != nil {
if _, ok := r.banned[string(path)]; ok {
return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path}
}
}
if r.reader == nil {
return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path}
}
node, err := r.reader.Node(r.owner, path, hash)
if err != nil || node == nil {
return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path, err: err}
}
return node, nil
}
// node retrieves the rlp-encoded trie node with the provided trie node // node retrieves the rlp-encoded trie node with the provided trie node
// information. An MissingNodeError will be returned in case the node is // information. An MissingNodeError will be returned in case the node is
// not found or any error is encountered. // not found or any error is encountered.
func (r *trieReader) nodeBlob(path []byte, hash common.Hash) ([]byte, error) { func (r *trieReader) node(path []byte, hash common.Hash) ([]byte, error) {
// Perform the logics in tests for preventing trie node access. // Perform the logics in tests for preventing trie node access.
if r.banned != nil { if r.banned != nil {
if _, ok := r.banned[string(path)]; ok { if _, ok := r.banned[string(path)]; ok {
...@@ -98,7 +73,7 @@ func (r *trieReader) nodeBlob(path []byte, hash common.Hash) ([]byte, error) { ...@@ -98,7 +73,7 @@ func (r *trieReader) nodeBlob(path []byte, hash common.Hash) ([]byte, error) {
if r.reader == nil { if r.reader == nil {
return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path} return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path}
} }
blob, err := r.reader.NodeBlob(r.owner, path, hash) blob, err := r.reader.Node(r.owner, path, hash)
if err != nil || len(blob) == 0 { if err != nil || len(blob) == 0 {
return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path, err: err} return nil, &MissingNodeError{Owner: r.owner, NodeHash: hash, Path: path, err: err}
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment