core/state/snapshot: full featured account iteration

parent e5708353
......@@ -229,6 +229,11 @@ func (dl *diffLayer) Root() common.Hash {
return dl.root
}
// Parent returns the subsequent layer of a diff layer.
func (dl *diffLayer) Parent() snapshot {
return dl.parent
}
// Stale return whether this layer has become stale (was flattened across) or if
// it's still live.
func (dl *diffLayer) Stale() bool {
......@@ -405,7 +410,7 @@ func (dl *diffLayer) flatten() snapshot {
for hash, data := range dl.accountData {
parent.accountData[hash] = data
}
// Overwrite all the updates storage slots (individually)
// Overwrite all the updated storage slots (individually)
for accountHash, storage := range dl.storageData {
// If storage didn't exist (or was deleted) in the parent; or if the storage
// was freshly deleted in the child, overwrite blindly
......@@ -425,53 +430,62 @@ func (dl *diffLayer) flatten() snapshot {
parent: parent.parent,
origin: parent.origin,
root: dl.root,
storageList: parent.storageList,
storageData: parent.storageData,
accountList: parent.accountList,
accountData: parent.accountData,
storageData: parent.storageData,
storageList: make(map[common.Hash][]common.Hash),
diffed: dl.diffed,
memory: parent.memory + dl.memory,
}
}
// AccountList returns a sorted list of all accounts in this difflayer.
// AccountList returns a sorted list of all accounts in this difflayer, including
// the deleted ones.
//
// Note, the returned slice is not a copy, so do not modify it.
func (dl *diffLayer) AccountList() []common.Hash {
// If an old list already exists, return it
dl.lock.RLock()
list := dl.accountList
dl.lock.RUnlock()
if list != nil {
return list
}
// No old sorted account list exists, generate a new one
dl.lock.Lock()
defer dl.lock.Unlock()
if dl.accountList != nil {
return dl.accountList
}
accountList := make([]common.Hash, len(dl.accountData))
i := 0
for k, _ := range dl.accountData {
accountList[i] = k
i++
// This would be a pretty good opportunity to also
// calculate the size, if we want to
dl.accountList = make([]common.Hash, 0, len(dl.accountData))
for hash := range dl.accountData {
dl.accountList = append(dl.accountList, hash)
}
sort.Sort(hashes(accountList))
dl.accountList = accountList
sort.Sort(hashes(dl.accountList))
return dl.accountList
}
// StorageList returns a sorted list of all storage slot hashes
// in this difflayer for the given account.
// StorageList returns a sorted list of all storage slot hashes in this difflayer
// for the given account.
//
// Note, the returned slice is not a copy, so do not modify it.
func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
// If an old list already exists, return it
dl.lock.RLock()
list := dl.storageList[accountHash]
dl.lock.RUnlock()
if list != nil {
return list
}
// No old sorted account list exists, generate a new one
dl.lock.Lock()
defer dl.lock.Unlock()
if dl.storageList[accountHash] != nil {
return dl.storageList[accountHash]
}
accountStorageMap := dl.storageData[accountHash]
accountStorageList := make([]common.Hash, len(accountStorageMap))
i := 0
for k, _ := range accountStorageMap {
accountStorageList[i] = k
i++
// This would be a pretty good opportunity to also
// calculate the size, if we want to
storageMap := dl.storageData[accountHash]
storageList := make([]common.Hash, 0, len(storageMap))
for k, _ := range storageMap {
storageList = append(storageList, k)
}
sort.Sort(hashes(accountStorageList))
dl.storageList[accountHash] = accountStorageList
return accountStorageList
sort.Sort(hashes(storageList))
dl.storageList[accountHash] = storageList
return storageList
}
......@@ -18,7 +18,6 @@ package snapshot
import (
"bytes"
"math/big"
"math/rand"
"testing"
......@@ -26,21 +25,8 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb/memorydb"
"github.com/ethereum/go-ethereum/rlp"
)
func randomAccount() []byte {
root := randomHash()
a := Account{
Balance: big.NewInt(rand.Int63()),
Nonce: rand.Uint64(),
Root: root[:],
CodeHash: emptyCode[:],
}
data, _ := rlp.EncodeToBytes(a)
return data
}
// TestMergeBasics tests some simple merges
func TestMergeBasics(t *testing.T) {
var (
......
......@@ -48,6 +48,11 @@ func (dl *diskLayer) Root() common.Hash {
return dl.root
}
// Parent always returns nil as there's no layer below the disk.
func (dl *diskLayer) Parent() snapshot {
return nil
}
// Stale return whether this layer has become stale (was flattened across) or if
// it's still live.
func (dl *diskLayer) Stale() bool {
......
......@@ -18,18 +18,17 @@ package snapshot
import (
"bytes"
"fmt"
"sort"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
)
// AccountIterator is an iterator to step over all the accounts in a snapshot,
// which may or may npt be composed of multiple layers.
type AccountIterator interface {
// Seek steps the iterator forward as many elements as needed, so that after
// calling Next(), the iterator will be at a key higher than the given hash.
Seek(hash common.Hash)
// Next steps the iterator forward one element, returning false if exhausted,
// or an error if iteration failed for some reason (e.g. root being iterated
// becomes stale and garbage collected).
......@@ -39,78 +38,159 @@ type AccountIterator interface {
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
Error() error
// Key returns the hash of the account the iterator is currently at.
Key() common.Hash
// Hash returns the hash of the account the iterator is currently at.
Hash() common.Hash
// Value returns the RLP encoded slim account the iterator is currently at.
// Account returns the RLP encoded slim account the iterator is currently at.
// An error will be returned if the iterator becomes invalid (e.g. snaph
Value() []byte
Account() []byte
// Release releases associated resources. Release should always succeed and
// can be called multiple times without causing error.
Release()
}
// diffAccountIterator is an account iterator that steps over the accounts (both
// live and deleted) contained within a single
// live and deleted) contained within a single diff layer. Higher order iterators
// will use the deleted accounts to skip deeper iterators.
type diffAccountIterator struct {
layer *diffLayer
index int
// curHash is the current hash the iterator is positioned on. The field is
// explicitly tracked since the referenced diff layer might go stale after
// the iterator was positioned and we don't want to fail accessing the old
// hash as long as the iterator is not touched any more.
curHash common.Hash
// curAccount is the current value the iterator is positioned on. The field
// is explicitly tracked since the referenced diff layer might go stale after
// the iterator was positioned and we don't want to fail accessing the old
// value as long as the iterator is not touched any more.
curAccount []byte
layer *diffLayer // Live layer to retrieve values from
keys []common.Hash // Keys left in the layer to iterate
fail error // Any failures encountered (stale)
}
func (dl *diffLayer) newAccountIterator() *diffAccountIterator {
dl.AccountList()
return &diffAccountIterator{layer: dl, index: -1}
}
// Seek steps the iterator forward as many elements as needed, so that after
// calling Next(), the iterator will be at a key higher than the given hash.
func (it *diffAccountIterator) Seek(key common.Hash) {
// Search uses binary search to find and return the smallest index i
// in [0, n) at which f(i) is true
index := sort.Search(len(it.layer.accountList), func(i int) bool {
return bytes.Compare(key[:], it.layer.accountList[i][:]) < 0
// AccountIterator creates an account iterator over a single diff layer.
func (dl *diffLayer) AccountIterator(seek common.Hash) AccountIterator {
// Seek out the requested starting account
hashes := dl.AccountList()
index := sort.Search(len(hashes), func(i int) bool {
return bytes.Compare(seek[:], hashes[i][:]) < 0
})
it.index = index - 1
// Assemble and returned the already seeked iterator
return &diffAccountIterator{
layer: dl,
keys: hashes[index:],
}
}
// Next steps the iterator forward one element, returning false if exhausted.
func (it *diffAccountIterator) Next() bool {
if it.index < len(it.layer.accountList) {
it.index++
// If the iterator was already stale, consider it a programmer error. Although
// we could just return false here, triggering this path would probably mean
// somebody forgot to check for Error, so lets blow up instead of undefined
// behavior that's hard to debug.
if it.fail != nil {
panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail))
}
// Stop iterating if all keys were exhausted
if len(it.keys) == 0 {
return false
}
// Iterator seems to be still alive, retrieve and cache the live hash and
// account value, or fail now if layer became stale
it.layer.lock.RLock()
defer it.layer.lock.RUnlock()
if it.layer.stale {
it.fail, it.keys = ErrSnapshotStale, nil
return false
}
return it.index < len(it.layer.accountList)
it.curHash = it.keys[0]
if blob, ok := it.layer.accountData[it.curHash]; !ok {
panic(fmt.Sprintf("iterator referenced non-existent account: %x", it.curHash))
} else {
it.curAccount = blob
}
// Values cached, shift the iterator and notify the user of success
it.keys = it.keys[1:]
return true
}
// Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
//
// A diff layer is immutable after creation content wise and can always be fully
// iterated without error, so this method always returns nil.
func (it *diffAccountIterator) Error() error {
return nil
return it.fail
}
// Key returns the hash of the account the iterator is currently at.
func (it *diffAccountIterator) Key() common.Hash {
if it.index < len(it.layer.accountList) {
return it.layer.accountList[it.index]
}
return common.Hash{}
// Hash returns the hash of the account the iterator is currently at.
func (it *diffAccountIterator) Hash() common.Hash {
return it.curHash
}
// Value returns the RLP encoded slim account the iterator is currently at.
func (it *diffAccountIterator) Value() []byte {
it.layer.lock.RLock()
defer it.layer.lock.RUnlock()
// Account returns the RLP encoded slim account the iterator is currently at.
func (it *diffAccountIterator) Account() []byte {
return it.curAccount
}
// Release is a noop for diff account iterators as there are no held resources.
func (it *diffAccountIterator) Release() {}
hash := it.layer.accountList[it.index]
if data, ok := it.layer.accountData[hash]; ok {
return data
// diskAccountIterator is an account iterator that steps over the live accounts
// contained within a disk layer.
type diskAccountIterator struct {
layer *diskLayer
it ethdb.Iterator
}
// AccountIterator creates an account iterator over a disk layer.
func (dl *diskLayer) AccountIterator(seek common.Hash) AccountIterator {
return &diskAccountIterator{
layer: dl,
it: dl.diskdb.NewIteratorWithPrefix(append(rawdb.SnapshotAccountPrefix, seek[:]...)),
}
panic("iterator references non-existent layer account")
}
func (dl *diffLayer) iterators() []AccountIterator {
if parent, ok := dl.parent.(*diffLayer); ok {
iterators := parent.iterators()
return append(iterators, dl.newAccountIterator())
// Next steps the iterator forward one element, returning false if exhausted.
func (it *diskAccountIterator) Next() bool {
// If the iterator was already exhausted, don't bother
if it.it == nil {
return false
}
// Try to advance the iterator and release it if we reahed the end
if !it.it.Next() || !bytes.HasPrefix(it.it.Key(), rawdb.SnapshotAccountPrefix) {
it.it.Release()
it.it = nil
return false
}
return true
}
// Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
//
// A diff layer is immutable after creation content wise and can always be fully
// iterated without error, so this method always returns nil.
func (it *diskAccountIterator) Error() error {
return it.it.Error()
}
// Hash returns the hash of the account the iterator is currently at.
func (it *diskAccountIterator) Hash() common.Hash {
return common.BytesToHash(it.it.Key())
}
// Account returns the RLP encoded slim account the iterator is currently at.
func (it *diskAccountIterator) Account() []byte {
return it.it.Value()
}
// Release releases the database snapshot held during iteration.
func (it *diskAccountIterator) Release() {
// The iterator is auto-released on exhaustion, so make sure it's still alive
if it.it != nil {
it.it.Release()
it.it = nil
}
return []AccountIterator{dl.newAccountIterator()}
}
......@@ -40,10 +40,10 @@ func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
parent, ok := dl.parent.(*diffLayer)
if !ok {
// parent is the disk layer
return dl.newAccountIterator()
return dl.AccountIterator(common.Hash{})
}
l := &binaryAccountIterator{
a: dl.newAccountIterator(),
a: dl.AccountIterator(common.Hash{}).(*diffAccountIterator),
b: parent.newBinaryAccountIterator(),
}
l.aDone = !l.a.Next()
......@@ -51,12 +51,6 @@ func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
return l
}
// Seek steps the iterator forward as many elements as needed, so that after
// calling Next(), the iterator will be at a key higher than the given hash.
func (it *binaryAccountIterator) Seek(key common.Hash) {
panic("todo: implement")
}
// Next steps the iterator forward one element, returning false if exhausted,
// or an error if iteration failed for some reason (e.g. root being iterated
// becomes stale and garbage collected).
......@@ -64,9 +58,9 @@ func (it *binaryAccountIterator) Next() bool {
if it.aDone && it.bDone {
return false
}
nextB := it.b.Key()
nextB := it.b.Hash()
first:
nextA := it.a.Key()
nextA := it.a.Hash()
if it.aDone {
it.bDone = !it.b.Next()
it.k = nextB
......@@ -97,15 +91,15 @@ func (it *binaryAccountIterator) Error() error {
return it.fail
}
// Key returns the hash of the account the iterator is currently at.
func (it *binaryAccountIterator) Key() common.Hash {
// Hash returns the hash of the account the iterator is currently at.
func (it *binaryAccountIterator) Hash() common.Hash {
return it.k
}
// Value returns the RLP encoded slim account the iterator is currently at, or
// Account returns the RLP encoded slim account the iterator is currently at, or
// nil if the iterated snapshot stack became stale (you can check Error after
// to see if it failed or not).
func (it *binaryAccountIterator) Value() []byte {
func (it *binaryAccountIterator) Account() []byte {
blob, err := it.a.layer.AccountRLP(it.k)
if err != nil {
it.fail = err
......@@ -113,3 +107,9 @@ func (it *binaryAccountIterator) Value() []byte {
}
return blob
}
// Release recursively releases all the iterators in the stack.
func (it *binaryAccountIterator) Release() {
it.a.Release()
it.b.Release()
}
This diff is collapsed.
This diff is collapsed.
......@@ -113,9 +113,17 @@ type Snapshot interface {
type snapshot interface {
Snapshot
// Parent returns the subsequent layer of a snapshot, or nil if the base was
// reached.
//
// Note, the method is an internal helper to avoid type switching between the
// disk and diff layers. There is no locking involved.
Parent() snapshot
// Update creates a new layer on top of the existing snapshot diff tree with
// the specified data items. Note, the maps are retained by the method to avoid
// copying everything.
// the specified data items.
//
// Note, the maps are retained by the method to avoid copying everything.
Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
// Journal commits an entire diff hierarchy to disk into a single journal entry.
......@@ -126,6 +134,9 @@ type snapshot interface {
// Stale return whether this layer has become stale (was flattened across) or
// if it's still live.
Stale() bool
// AccountIterator creates an account iterator over an arbitrary layer.
AccountIterator(seek common.Hash) AccountIterator
}
// SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
......@@ -170,15 +181,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
// Existing snapshot loaded, seed all the layers
for head != nil {
snap.layers[head.Root()] = head
switch self := head.(type) {
case *diffLayer:
head = self.parent
case *diskLayer:
head = nil
default:
panic(fmt.Sprintf("unknown data layer: %T", self))
}
head = head.Parent()
}
return snap
}
......@@ -563,3 +566,9 @@ func (t *Tree) Rebuild(root common.Hash) {
root: generateSnapshot(t.diskdb, t.triedb, t.cache, root, wiper),
}
}
// AccountIterator creates a new account iterator for the specified root hash and
// seeks to a starting account hash.
func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
return newFastAccountIterator(t, root, seek)
}
......@@ -18,13 +18,48 @@ package snapshot
import (
"fmt"
"math/big"
"math/rand"
"testing"
"github.com/VictoriaMetrics/fastcache"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/rlp"
)
// randomHash generates a random blob of data and returns it as a hash.
func randomHash() common.Hash {
var hash common.Hash
if n, err := rand.Read(hash[:]); n != common.HashLength || err != nil {
panic(err)
}
return hash
}
// randomAccount generates a random account and returns it RLP encoded.
func randomAccount() []byte {
root := randomHash()
a := Account{
Balance: big.NewInt(rand.Int63()),
Nonce: rand.Uint64(),
Root: root[:],
CodeHash: emptyCode[:],
}
data, _ := rlp.EncodeToBytes(a)
return data
}
// randomAccountSet generates a set of random accounts with the given strings as
// the account address hashes.
func randomAccountSet(hashes ...string) map[common.Hash][]byte {
accounts := make(map[common.Hash][]byte)
for _, hash := range hashes {
accounts[common.HexToHash(hash)] = randomAccount()
}
return accounts
}
// Tests that if a disk layer becomes stale, no active external references will
// be returned with junk data. This version of the test flattens every diff layer
// to check internal corner case around the bottom-most memory accumulator.
......@@ -46,8 +81,7 @@ func TestDiskLayerExternalInvalidationFullFlatten(t *testing.T) {
accounts := map[common.Hash][]byte{
common.HexToHash("0xa1"): randomAccount(),
}
storage := make(map[common.Hash]map[common.Hash][]byte)
if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
t.Fatalf("failed to create a diff layer: %v", err)
}
if n := len(snaps.layers); n != 2 {
......@@ -91,11 +125,10 @@ func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) {
accounts := map[common.Hash][]byte{
common.HexToHash("0xa1"): randomAccount(),
}
storage := make(map[common.Hash]map[common.Hash][]byte)
if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
t.Fatalf("failed to create a diff layer: %v", err)
}
if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, storage); err != nil {
if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, nil); err != nil {
t.Fatalf("failed to create a diff layer: %v", err)
}
if n := len(snaps.layers); n != 3 {
......@@ -140,11 +173,10 @@ func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) {
accounts := map[common.Hash][]byte{
common.HexToHash("0xa1"): randomAccount(),
}
storage := make(map[common.Hash]map[common.Hash][]byte)
if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
t.Fatalf("failed to create a diff layer: %v", err)
}
if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, storage); err != nil {
if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, nil); err != nil {
t.Fatalf("failed to create a diff layer: %v", err)
}
if n := len(snaps.layers); n != 3 {
......@@ -188,14 +220,13 @@ func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) {
accounts := map[common.Hash][]byte{
common.HexToHash("0xa1"): randomAccount(),
}
storage := make(map[common.Hash]map[common.Hash][]byte)
if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, storage); err != nil {
if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), accounts, nil); err != nil {
t.Fatalf("failed to create a diff layer: %v", err)
}
if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, storage); err != nil {
if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), accounts, nil); err != nil {
t.Fatalf("failed to create a diff layer: %v", err)
}
if err := snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), accounts, storage); err != nil {
if err := snaps.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), accounts, nil); err != nil {
t.Fatalf("failed to create a diff layer: %v", err)
}
if n := len(snaps.layers); n != 4 {
......
......@@ -25,15 +25,6 @@ import (
"github.com/ethereum/go-ethereum/ethdb/memorydb"
)
// randomHash generates a random blob of data and returns it as a hash.
func randomHash() common.Hash {
var hash common.Hash
if n, err := rand.Read(hash[:]); n != common.HashLength || err != nil {
panic(err)
}
return hash
}
// Tests that given a database with random data content, all parts of a snapshot
// can be crrectly wiped without touching anything else.
func TestWipe(t *testing.T) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment