core, eth, trie: prepare trie sync for path based operation

parent 5883afb3
......@@ -26,6 +26,7 @@ import (
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/ethdb/memorydb"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
)
......@@ -44,7 +45,7 @@ func makeTestState() (Database, common.Hash, []*testAccount) {
state, _ := New(common.Hash{}, db, nil)
// Fill it with some arbitrary data
accounts := []*testAccount{}
var accounts []*testAccount
for i := byte(0); i < 96; i++ {
obj := state.GetOrNewStateObject(common.BytesToAddress([]byte{i}))
acc := &testAccount{address: common.BytesToAddress([]byte{i})}
......@@ -59,6 +60,11 @@ func makeTestState() (Database, common.Hash, []*testAccount) {
obj.SetCode(crypto.Keccak256Hash([]byte{i, i, i, i, i}), []byte{i, i, i, i, i})
acc.code = []byte{i, i, i, i, i}
}
if i%5 == 0 {
for j := byte(0); j < 5; j++ {
obj.SetState(db, crypto.Keccak256Hash([]byte{i, i, i, i, i, j, j}), crypto.Keccak256Hash([]byte{i, i, i, i, i, j, j}))
}
}
state.updateStateObject(obj)
accounts = append(accounts, acc)
}
......@@ -126,44 +132,94 @@ func checkStateConsistency(db ethdb.Database, root common.Hash) error {
// Tests that an empty state is not scheduled for syncing.
func TestEmptyStateSync(t *testing.T) {
empty := common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
if req := NewStateSync(empty, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New())).Missing(1); len(req) != 0 {
t.Errorf("content requested for empty state: %v", req)
sync := NewStateSync(empty, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New()))
if nodes, paths, codes := sync.Missing(1); len(nodes) != 0 || len(paths) != 0 || len(codes) != 0 {
t.Errorf(" content requested for empty state: %v, %v, %v", nodes, paths, codes)
}
}
// Tests that given a root hash, a state can sync iteratively on a single thread,
// requesting retrieval tasks and returning all of them in one go.
func TestIterativeStateSyncIndividual(t *testing.T) { testIterativeStateSync(t, 1, false) }
func TestIterativeStateSyncBatched(t *testing.T) { testIterativeStateSync(t, 100, false) }
func TestIterativeStateSyncIndividualFromDisk(t *testing.T) { testIterativeStateSync(t, 1, true) }
func TestIterativeStateSyncBatchedFromDisk(t *testing.T) { testIterativeStateSync(t, 100, true) }
func TestIterativeStateSyncIndividual(t *testing.T) {
testIterativeStateSync(t, 1, false, false)
}
func TestIterativeStateSyncBatched(t *testing.T) {
testIterativeStateSync(t, 100, false, false)
}
func TestIterativeStateSyncIndividualFromDisk(t *testing.T) {
testIterativeStateSync(t, 1, true, false)
}
func TestIterativeStateSyncBatchedFromDisk(t *testing.T) {
testIterativeStateSync(t, 100, true, false)
}
func TestIterativeStateSyncIndividualByPath(t *testing.T) {
testIterativeStateSync(t, 1, false, true)
}
func TestIterativeStateSyncBatchedByPath(t *testing.T) {
testIterativeStateSync(t, 100, false, true)
}
func testIterativeStateSync(t *testing.T, count int, commit bool) {
func testIterativeStateSync(t *testing.T, count int, commit bool, bypath bool) {
// Create a random state to copy
srcDb, srcRoot, srcAccounts := makeTestState()
if commit {
srcDb.TrieDB().Commit(srcRoot, false, nil)
}
srcTrie, _ := trie.New(srcRoot, srcDb.TrieDB())
// Create a destination state and sync with the scheduler
dstDb := rawdb.NewMemoryDatabase()
sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
queue := append([]common.Hash{}, sched.Missing(count)...)
for len(queue) > 0 {
results := make([]trie.SyncResult, len(queue))
for i, hash := range queue {
nodes, paths, codes := sched.Missing(count)
var (
hashQueue []common.Hash
pathQueue []trie.SyncPath
)
if !bypath {
hashQueue = append(append(hashQueue[:0], nodes...), codes...)
} else {
hashQueue = append(hashQueue[:0], codes...)
pathQueue = append(pathQueue[:0], paths...)
}
for len(hashQueue)+len(pathQueue) > 0 {
results := make([]trie.SyncResult, len(hashQueue)+len(pathQueue))
for i, hash := range hashQueue {
data, err := srcDb.TrieDB().Node(hash)
if err != nil {
data, err = srcDb.ContractCode(common.Hash{}, hash)
}
if err != nil {
t.Fatalf("failed to retrieve node data for %x", hash)
t.Fatalf("failed to retrieve node data for hash %x", hash)
}
results[i] = trie.SyncResult{Hash: hash, Data: data}
}
for i, path := range pathQueue {
if len(path) == 1 {
data, _, err := srcTrie.TryGetNode(path[0])
if err != nil {
t.Fatalf("failed to retrieve node data for path %x: %v", path, err)
}
results[len(hashQueue)+i] = trie.SyncResult{Hash: crypto.Keccak256Hash(data), Data: data}
} else {
var acc Account
if err := rlp.DecodeBytes(srcTrie.Get(path[0]), &acc); err != nil {
t.Fatalf("failed to decode account on path %x: %v", path, err)
}
stTrie, err := trie.New(acc.Root, srcDb.TrieDB())
if err != nil {
t.Fatalf("failed to retriev storage trie for path %x: %v", path, err)
}
data, _, err := stTrie.TryGetNode(path[1])
if err != nil {
t.Fatalf("failed to retrieve node data for path %x: %v", path, err)
}
results[len(hashQueue)+i] = trie.SyncResult{Hash: crypto.Keccak256Hash(data), Data: data}
}
}
for _, result := range results {
if err := sched.Process(result); err != nil {
t.Fatalf("failed to process result %v", err)
t.Errorf("failed to process result %v", err)
}
}
batch := dstDb.NewBatch()
......@@ -171,7 +227,14 @@ func testIterativeStateSync(t *testing.T, count int, commit bool) {
t.Fatalf("failed to commit data: %v", err)
}
batch.Write()
queue = append(queue[:0], sched.Missing(count)...)
nodes, paths, codes = sched.Missing(count)
if !bypath {
hashQueue = append(append(hashQueue[:0], nodes...), codes...)
} else {
hashQueue = append(hashQueue[:0], codes...)
pathQueue = append(pathQueue[:0], paths...)
}
}
// Cross check that the two states are in sync
checkStateAccounts(t, dstDb, srcRoot, srcAccounts)
......@@ -187,7 +250,9 @@ func TestIterativeDelayedStateSync(t *testing.T) {
dstDb := rawdb.NewMemoryDatabase()
sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
queue := append([]common.Hash{}, sched.Missing(0)...)
nodes, _, codes := sched.Missing(0)
queue := append(append([]common.Hash{}, nodes...), codes...)
for len(queue) > 0 {
// Sync only half of the scheduled nodes
results := make([]trie.SyncResult, len(queue)/2+1)
......@@ -211,7 +276,9 @@ func TestIterativeDelayedStateSync(t *testing.T) {
t.Fatalf("failed to commit data: %v", err)
}
batch.Write()
queue = append(queue[len(results):], sched.Missing(0)...)
nodes, _, codes = sched.Missing(0)
queue = append(append(queue[len(results):], nodes...), codes...)
}
// Cross check that the two states are in sync
checkStateAccounts(t, dstDb, srcRoot, srcAccounts)
......@@ -232,7 +299,8 @@ func testIterativeRandomStateSync(t *testing.T, count int) {
sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
queue := make(map[common.Hash]struct{})
for _, hash := range sched.Missing(count) {
nodes, _, codes := sched.Missing(count)
for _, hash := range append(nodes, codes...) {
queue[hash] = struct{}{}
}
for len(queue) > 0 {
......@@ -259,8 +327,10 @@ func testIterativeRandomStateSync(t *testing.T, count int) {
t.Fatalf("failed to commit data: %v", err)
}
batch.Write()
queue = make(map[common.Hash]struct{})
for _, hash := range sched.Missing(count) {
nodes, _, codes = sched.Missing(count)
for _, hash := range append(nodes, codes...) {
queue[hash] = struct{}{}
}
}
......@@ -279,7 +349,8 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) {
sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
queue := make(map[common.Hash]struct{})
for _, hash := range sched.Missing(0) {
nodes, _, codes := sched.Missing(0)
for _, hash := range append(nodes, codes...) {
queue[hash] = struct{}{}
}
for len(queue) > 0 {
......@@ -312,7 +383,11 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) {
t.Fatalf("failed to commit data: %v", err)
}
batch.Write()
for _, hash := range sched.Missing(0) {
for _, result := range results {
delete(queue, result.Hash)
}
nodes, _, codes = sched.Missing(0)
for _, hash := range append(nodes, codes...) {
queue[hash] = struct{}{}
}
}
......@@ -341,8 +416,11 @@ func TestIncompleteStateSync(t *testing.T) {
dstDb := rawdb.NewMemoryDatabase()
sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
added := []common.Hash{}
queue := append([]common.Hash{}, sched.Missing(1)...)
var added []common.Hash
nodes, _, codes := sched.Missing(1)
queue := append(append([]common.Hash{}, nodes...), codes...)
for len(queue) > 0 {
// Fetch a batch of state nodes
results := make([]trie.SyncResult, len(queue))
......@@ -382,7 +460,8 @@ func TestIncompleteStateSync(t *testing.T) {
}
}
// Fetch the next batch to retrieve
queue = append(queue[:0], sched.Missing(1)...)
nodes, _, codes = sched.Missing(1)
queue = append(append(queue[:0], nodes...), codes...)
}
// Sanity check that removing any node from the database is detected
for _, node := range added[1:] {
......
This diff is collapsed.
......@@ -79,6 +79,12 @@ func (t *SecureTrie) TryGet(key []byte) ([]byte, error) {
return t.trie.TryGet(t.hashKey(key))
}
// TryGetNode attempts to retrieve a trie node by compact-encoded path. It is not
// possible to use keybyte-encoding as the path might contain odd nibbles.
func (t *SecureTrie) TryGetNode(path []byte) ([]byte, int, error) {
return t.trie.TryGetNode(path)
}
// Update associates key with value in the trie. Subsequent calls to
// Get will return value. If value has length zero, any existing value
// is deleted from the trie and calls to Get will return nil.
......
......@@ -52,6 +52,39 @@ type request struct {
callback LeafCallback // Callback to invoke if a leaf node it reached on this branch
}
// SyncPath is a path tuple identifying a particular trie node either in a single
// trie (account) or a layered trie (account -> storage).
//
// Content wise the tuple either has 1 element if it addresses a node in a single
// trie or 2 elements if it addresses a node in a stacked trie.
//
// To support aiming arbitrary trie nodes, the path needs to support odd nibble
// lengths. To avoid transferring expanded hex form over the network, the last
// part of the tuple (which needs to index into the middle of a trie) is compact
// encoded. In case of a 2-tuple, the first item is always 32 bytes so that is
// simple binary encoded.
//
// Examples:
// - Path 0x9 -> {0x19}
// - Path 0x99 -> {0x0099}
// - Path 0x01234567890123456789012345678901012345678901234567890123456789019 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19}
// - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099}
type SyncPath [][]byte
// newSyncPath converts an expanded trie path from nibble form into a compact
// version that can be sent over the network.
func newSyncPath(path []byte) SyncPath {
// If the hash is from the account trie, append a single item, if it
// is from the a storage trie, append a tuple. Note, the length 64 is
// clashing between account leaf and storage root. It's fine though
// because having a trie node at 64 depth means a hash collision was
// found and we're long dead.
if len(path) < 64 {
return SyncPath{hexToCompact(path)}
}
return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])}
}
// SyncResult is a response with requested data along with it's hash.
type SyncResult struct {
Hash common.Hash // Hash of the originally unknown trie node
......@@ -193,10 +226,16 @@ func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash) {
s.schedule(req)
}
// Missing retrieves the known missing nodes from the trie for retrieval.
func (s *Sync) Missing(max int) []common.Hash {
var requests []common.Hash
for !s.queue.Empty() && (max == 0 || len(requests) < max) {
// Missing retrieves the known missing nodes from the trie for retrieval. To aid
// both eth/6x style fast sync and snap/1x style state sync, the paths of trie
// nodes are returned too, as well as separate hash list for codes.
func (s *Sync) Missing(max int) (nodes []common.Hash, paths []SyncPath, codes []common.Hash) {
var (
nodeHashes []common.Hash
nodePaths []SyncPath
codeHashes []common.Hash
)
for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) {
// Retrieve th enext item in line
item, prio := s.queue.Peek()
......@@ -208,9 +247,16 @@ func (s *Sync) Missing(max int) []common.Hash {
// Item is allowed to be scheduled, add it to the task list
s.queue.Pop()
s.fetches[depth]++
requests = append(requests, item.(common.Hash))
hash := item.(common.Hash)
if req, ok := s.nodeReqs[hash]; ok {
nodeHashes = append(nodeHashes, hash)
nodePaths = append(nodePaths, newSyncPath(req.path))
} else {
codeHashes = append(codeHashes, hash)
}
}
return requests
return nodeHashes, nodePaths, codeHashes
}
// Process injects the received data for requested item. Note it can
......@@ -322,9 +368,13 @@ func (s *Sync) children(req *request, object node) ([]*request, error) {
switch node := (object).(type) {
case *shortNode:
key := node.Key
if hasTerm(key) {
key = key[:len(key)-1]
}
children = []child{{
node: node.Val,
path: append(append([]byte(nil), req.path...), node.Key...),
path: append(append([]byte(nil), req.path...), key...),
}}
case *fullNode:
for i := 0; i < 17; i++ {
......@@ -344,7 +394,7 @@ func (s *Sync) children(req *request, object node) ([]*request, error) {
// Notify any external watcher of a new key/value node
if req.callback != nil {
if node, ok := (child.node).(valueNode); ok {
if err := req.callback(req.path, node, req.hash); err != nil {
if err := req.callback(child.path, node, req.hash); err != nil {
return nil, err
}
}
......
This diff is collapsed.
......@@ -25,6 +25,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
)
var (
......@@ -102,8 +103,7 @@ func (t *Trie) Get(key []byte) []byte {
// The value bytes must not be modified by the caller.
// If a node was not found in the database, a MissingNodeError is returned.
func (t *Trie) TryGet(key []byte) ([]byte, error) {
key = keybytesToHex(key)
value, newroot, didResolve, err := t.tryGet(t.root, key, 0)
value, newroot, didResolve, err := t.tryGet(t.root, keybytesToHex(key), 0)
if err == nil && didResolve {
t.root = newroot
}
......@@ -146,6 +146,86 @@ func (t *Trie) tryGet(origNode node, key []byte, pos int) (value []byte, newnode
}
}
// TryGetNode attempts to retrieve a trie node by compact-encoded path. It is not
// possible to use keybyte-encoding as the path might contain odd nibbles.
func (t *Trie) TryGetNode(path []byte) ([]byte, int, error) {
item, newroot, resolved, err := t.tryGetNode(t.root, compactToHex(path), 0)
if err != nil {
return nil, resolved, err
}
if resolved > 0 {
t.root = newroot
}
if item == nil {
return nil, resolved, nil
}
enc, err := rlp.EncodeToBytes(item)
if err != nil {
log.Error("Encoding existing trie node failed", "err", err)
return nil, resolved, err
}
return enc, resolved, err
}
func (t *Trie) tryGetNode(origNode node, path []byte, pos int) (item node, newnode node, resolved int, err error) {
// If we reached the requested path, return the current node
if pos >= len(path) {
// Don't return collapsed hash nodes though
if _, ok := origNode.(hashNode); !ok {
// Short nodes have expanded keys, compact them before returning
item := origNode
if sn, ok := item.(*shortNode); ok {
item = &shortNode{
Key: hexToCompact(sn.Key),
Val: sn.Val,
}
}
return item, origNode, 0, nil
}
}
// Path still needs to be traversed, descend into children
switch n := (origNode).(type) {
case nil:
// Non-existent path requested, abort
return nil, nil, 0, nil
case valueNode:
// Path prematurely ended, abort
return nil, nil, 0, nil
case *shortNode:
if len(path)-pos < len(n.Key) || !bytes.Equal(n.Key, path[pos:pos+len(n.Key)]) {
// Path branches off from short node
return nil, n, 0, nil
}
item, newnode, resolved, err = t.tryGetNode(n.Val, path, pos+len(n.Key))
if err == nil && resolved > 0 {
n = n.copy()
n.Val = newnode
}
return item, n, resolved, err
case *fullNode:
item, newnode, resolved, err = t.tryGetNode(n.Children[path[pos]], path, pos+1)
if err == nil && resolved > 0 {
n = n.copy()
n.Children[path[pos]] = newnode
}
return item, n, resolved, err
case hashNode:
child, err := t.resolveHash(n, path[:pos])
if err != nil {
return nil, n, 1, err
}
item, newnode, resolved, err := t.tryGetNode(child, path, pos)
return item, newnode, resolved + 1, err
default:
panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode))
}
}
// Update associates key with value in the trie. Subsequent calls to
// Get will return value. If value has length zero, any existing value
// is deleted from the trie and calls to Get will return nil.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment