Unverified Commit 26d271df authored by gary rong's avatar gary rong Committed by GitHub

core/state/snapshot: implement storage iterator (#20971)

* core/state/snapshot: implement storage iterator

* core/state/snapshot, tests: implement helper function

* core/state/snapshot: fix storage issue

If an account is deleted in the tx_1 but recreated in the tx_2,
the it can happen that in this diff layer, both destructedSet
and storageData records this account. In this case, the storage
iterator should be able to iterate the slots belong to new account
but disable further iteration in deeper layers(belong to old account)

* core/state/snapshot: address peter and martin's comment

* core/state: address comments

* core/state/snapshot: fix test
parent 1264c19f
...@@ -24,8 +24,10 @@ import ( ...@@ -24,8 +24,10 @@ import (
"github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/rlp"
) )
// Account is a slim version of a state.Account, where the root and code hash // Account is a modified version of a state.Account, where the root is replaced
// are replaced with a nil byte slice for empty accounts. // with a byte slice. This format can be used to represent full-consensus format
// or slim-snapshot format which replaces the empty root and code hash as nil
// byte slice.
type Account struct { type Account struct {
Nonce uint64 Nonce uint64
Balance *big.Int Balance *big.Int
...@@ -33,9 +35,8 @@ type Account struct { ...@@ -33,9 +35,8 @@ type Account struct {
CodeHash []byte CodeHash []byte
} }
// AccountRLP converts a state.Account content into a slim snapshot version RLP // SlimAccount converts a state.Account content into a slim snapshot account
// encoded. func SlimAccount(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) Account {
func AccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) []byte {
slim := Account{ slim := Account{
Nonce: nonce, Nonce: nonce,
Balance: balance, Balance: balance,
...@@ -46,9 +47,40 @@ func AccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byt ...@@ -46,9 +47,40 @@ func AccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byt
if !bytes.Equal(codehash, emptyCode[:]) { if !bytes.Equal(codehash, emptyCode[:]) {
slim.CodeHash = codehash slim.CodeHash = codehash
} }
data, err := rlp.EncodeToBytes(slim) return slim
}
// SlimAccountRLP converts a state.Account content into a slim snapshot
// version RLP encoded.
func SlimAccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) []byte {
data, err := rlp.EncodeToBytes(SlimAccount(nonce, balance, root, codehash))
if err != nil { if err != nil {
panic(err) panic(err)
} }
return data return data
} }
// FullAccount decodes the data on the 'slim RLP' format and return
// the consensus format account.
func FullAccount(data []byte) (Account, error) {
var account Account
if err := rlp.DecodeBytes(data, &account); err != nil {
return Account{}, err
}
if len(account.Root) == 0 {
account.Root = emptyRoot[:]
}
if len(account.CodeHash) == 0 {
account.CodeHash = emptyCode[:]
}
return account, nil
}
// FullAccountRLP converts data on the 'slim RLP' format into the full RLP-format.
func FullAccountRLP(data []byte) ([]byte, error) {
account, err := FullAccount(data)
if err != nil {
return nil, err
}
return rlp.EncodeToBytes(account)
}
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
package snapshot package snapshot
import ( import (
"bytes"
"fmt"
"sync" "sync"
"time" "time"
...@@ -27,80 +29,239 @@ import ( ...@@ -27,80 +29,239 @@ import (
"github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie"
) )
// conversionAccount is used for converting between full and slim format. When // trieKV represents a trie key-value pair
// doing this, we can consider 'balance' as a byte array, as it has already type trieKV struct {
// been converted from big.Int into an rlp-byteslice. key common.Hash
type conversionAccount struct { value []byte
Nonce uint64 }
Balance []byte
Root []byte type (
CodeHash []byte // trieGeneratorFn is the interface of trie generation which can
// be implemented by different trie algorithm.
trieGeneratorFn func(in chan (trieKV), out chan (common.Hash))
// leafCallbackFn is the callback invoked at the leaves of the trie,
// returns the subtrie root with the specified subtrie identifier.
leafCallbackFn func(hash common.Hash, stat *generateStats) common.Hash
)
// GenerateAccountTrieRoot takes an account iterator and reproduces the root hash.
func GenerateAccountTrieRoot(it AccountIterator) (common.Hash, error) {
return generateTrieRoot(it, common.Hash{}, stdGenerate, nil, &generateStats{start: time.Now()}, true)
}
// GenerateStorageTrieRoot takes a storage iterator and reproduces the root hash.
func GenerateStorageTrieRoot(account common.Hash, it StorageIterator) (common.Hash, error) {
return generateTrieRoot(it, account, stdGenerate, nil, &generateStats{start: time.Now()}, true)
} }
// SlimToFull converts data on the 'slim RLP' format into the full RLP-format // VerifyState takes the whole snapshot tree as the input, traverses all the accounts
func SlimToFull(data []byte) ([]byte, error) { // as well as the corresponding storages and compares the re-computed hash with the
acc := &conversionAccount{} // original one(state root and the storage root).
if err := rlp.DecodeBytes(data, acc); err != nil { func VerifyState(snaptree *Tree, root common.Hash) error {
return nil, err acctIt, err := snaptree.AccountIterator(root, common.Hash{})
if err != nil {
return err
} }
if len(acc.Root) == 0 { got, err := generateTrieRoot(acctIt, common.Hash{}, stdGenerate, func(account common.Hash, stat *generateStats) common.Hash {
acc.Root = emptyRoot[:] storageIt, err := snaptree.StorageIterator(root, account, common.Hash{})
if err != nil {
return common.Hash{}
}
hash, err := generateTrieRoot(storageIt, account, stdGenerate, nil, stat, false)
if err != nil {
return common.Hash{}
}
return hash
}, &generateStats{start: time.Now()}, true)
if err != nil {
return err
} }
if len(acc.CodeHash) == 0 { if got != root {
acc.CodeHash = emptyCode[:] return fmt.Errorf("State root hash mismatch, got %x, want %x", got, root)
} }
fullData, err := rlp.EncodeToBytes(acc) return nil
if err != nil { }
return nil, err
// generateStats is a collection of statistics gathered by the trie generator
// for logging purposes.
type generateStats struct {
accounts uint64
slots uint64
curAccount common.Hash
curSlot common.Hash
start time.Time
lock sync.RWMutex
}
// progress records the progress trie generator made recently.
func (stat *generateStats) progress(accounts, slots uint64, curAccount common.Hash, curSlot common.Hash) {
stat.lock.Lock()
defer stat.lock.Unlock()
stat.accounts += accounts
stat.slots += slots
if curAccount != (common.Hash{}) {
stat.curAccount = curAccount
}
if curSlot != (common.Hash{}) {
stat.curSlot = curSlot
} }
return fullData, nil
} }
// trieKV represents a trie key-value pair // report prints the cumulative progress statistic smartly.
type trieKV struct { func (stat *generateStats) report() {
key common.Hash stat.lock.RLock()
value []byte defer stat.lock.RUnlock()
var ctx []interface{}
if stat.curSlot != (common.Hash{}) {
ctx = append(ctx, []interface{}{
"in", stat.curAccount,
"at", stat.curSlot,
}...)
} else {
ctx = append(ctx, []interface{}{"at", stat.curAccount}...)
}
// Add the usual measurements
ctx = append(ctx, []interface{}{"accounts", stat.accounts}...)
if stat.slots != 0 {
ctx = append(ctx, []interface{}{"slots", stat.slots}...)
}
ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...)
log.Info("Generating trie hash from snapshot", ctx...)
} }
type trieGeneratorFn func(in chan (trieKV), out chan (common.Hash)) // reportDone prints the last log when the whole generation is finished.
func (stat *generateStats) reportDone() {
stat.lock.RLock()
defer stat.lock.RUnlock()
// GenerateTrieRoot takes an account iterator and reproduces the root hash. var ctx []interface{}
func GenerateTrieRoot(it AccountIterator) common.Hash { ctx = append(ctx, []interface{}{"accounts", stat.accounts}...)
return generateTrieRoot(it, stdGenerate) if stat.slots != 0 {
ctx = append(ctx, []interface{}{"slots", stat.slots}...)
}
ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...)
log.Info("Generated trie hash from snapshot", ctx...)
} }
func generateTrieRoot(it AccountIterator, generatorFn trieGeneratorFn) common.Hash { // generateTrieRoot generates the trie hash based on the snapshot iterator.
// It can be used for generating account trie, storage trie or even the
// whole state which connects the accounts and the corresponding storages.
func generateTrieRoot(it Iterator, account common.Hash, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) {
var ( var (
in = make(chan trieKV) // chan to pass leaves in = make(chan trieKV) // chan to pass leaves
out = make(chan common.Hash) // chan to collect result out = make(chan common.Hash, 1) // chan to collect result
wg sync.WaitGroup stoplog = make(chan bool, 1) // 1-size buffer, works when logging is not enabled
wg sync.WaitGroup
) )
// Spin up a go-routine for trie hash re-generation
wg.Add(1) wg.Add(1)
go func() { go func() {
defer wg.Done()
generatorFn(in, out) generatorFn(in, out)
wg.Done()
}() }()
// Feed leaves
start := time.Now() // Spin up a go-routine for progress logging
logged := time.Now() if report && stats != nil {
accounts := 0 wg.Add(1)
go func() {
defer wg.Done()
timer := time.NewTimer(0)
defer timer.Stop()
for {
select {
case <-timer.C:
stats.report()
timer.Reset(time.Second * 8)
case success := <-stoplog:
if success {
stats.reportDone()
}
return
}
}
}()
}
// stop is a helper function to shutdown the background threads
// and return the re-generated trie hash.
stop := func(success bool) common.Hash {
close(in)
result := <-out
stoplog <- success
wg.Wait()
return result
}
var (
logged = time.Now()
processed = uint64(0)
leaf trieKV
last common.Hash
)
// Start to feed leaves
for it.Next() { for it.Next() {
slimData := it.Account() if account == (common.Hash{}) {
fullData, _ := SlimToFull(slimData) var (
l := trieKV{it.Hash(), fullData} err error
in <- l fullData []byte
if time.Since(logged) > 8*time.Second { )
log.Info("Generating trie hash from snapshot", if leafCallback == nil {
"at", l.key, "accounts", accounts, "elapsed", time.Since(start)) fullData, err = FullAccountRLP(it.(AccountIterator).Account())
logged = time.Now() if err != nil {
stop(false)
return common.Hash{}, err
}
} else {
account, err := FullAccount(it.(AccountIterator).Account())
if err != nil {
stop(false)
return common.Hash{}, err
}
// Apply the leaf callback. Normally the callback is used to traverse
// the storage trie and re-generate the subtrie root.
subroot := leafCallback(it.Hash(), stats)
if !bytes.Equal(account.Root, subroot.Bytes()) {
stop(false)
return common.Hash{}, fmt.Errorf("invalid subroot(%x), want %x, got %x", it.Hash(), account.Root, subroot)
}
fullData, err = rlp.EncodeToBytes(account)
if err != nil {
stop(false)
return common.Hash{}, err
}
}
leaf = trieKV{it.Hash(), fullData}
} else {
leaf = trieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())}
}
in <- leaf
// Accumulate the generaation statistic if it's required.
processed++
if time.Since(logged) > 3*time.Second && stats != nil {
if account == (common.Hash{}) {
stats.progress(processed, 0, it.Hash(), common.Hash{})
} else {
stats.progress(0, processed, account, it.Hash())
}
logged, processed = time.Now(), 0
}
last = it.Hash()
}
// Commit the last part statistic.
if processed > 0 && stats != nil {
if account == (common.Hash{}) {
stats.progress(processed, 0, last, common.Hash{})
} else {
stats.progress(0, processed, account, last)
} }
accounts++
} }
close(in) result := stop(true)
result := <-out return result, nil
log.Info("Generated trie hash from snapshot", "accounts", accounts, "elapsed", time.Since(start))
wg.Wait()
return result
} }
// stdGenerate is a very basic hexary trie builder which uses the same Trie // stdGenerate is a very basic hexary trie builder which uses the same Trie
......
...@@ -105,6 +105,13 @@ type diffLayer struct { ...@@ -105,6 +105,13 @@ type diffLayer struct {
root common.Hash // Root hash to which this snapshot diff belongs to root common.Hash // Root hash to which this snapshot diff belongs to
stale uint32 // Signals that the layer became stale (state progressed) stale uint32 // Signals that the layer became stale (state progressed)
// destructSet is a very special helper marker. If an account is marked as
// deleted, then it's recorded in this set. However it's allowed that an account
// is included here but still available in other sets(e.g. storageData). The
// reason is the diff layer includes all the changes in a *block*. It can
// happen that in the tx_1, account A is self-destructed while in the tx_2
// it's recreated. But we still need this marker to indicate the "old" A is
// deleted, all data in other set belongs to the "new" A.
destructSet map[common.Hash]struct{} // Keyed markers for deleted (and potentially) recreated accounts destructSet map[common.Hash]struct{} // Keyed markers for deleted (and potentially) recreated accounts
accountList []common.Hash // List of account for iteration. If it exists, it's sorted, otherwise it's nil accountList []common.Hash // List of account for iteration. If it exists, it's sorted, otherwise it's nil
accountData map[common.Hash][]byte // Keyed accounts for direct retrival (nil means deleted) accountData map[common.Hash][]byte // Keyed accounts for direct retrival (nil means deleted)
...@@ -169,6 +176,7 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s ...@@ -169,6 +176,7 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s
destructSet: destructs, destructSet: destructs,
accountData: accounts, accountData: accounts,
storageData: storage, storageData: storage,
storageList: make(map[common.Hash][]common.Hash),
} }
switch parent := parent.(type) { switch parent := parent.(type) {
case *diskLayer: case *diskLayer:
...@@ -194,11 +202,6 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s ...@@ -194,11 +202,6 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s
dl.memory += uint64(common.HashLength + len(data)) dl.memory += uint64(common.HashLength + len(data))
snapshotDirtyAccountWriteMeter.Mark(int64(len(data))) snapshotDirtyAccountWriteMeter.Mark(int64(len(data)))
} }
// Fill the storage hashes and sort them for the iterator
dl.storageList = make(map[common.Hash][]common.Hash)
for accountHash := range destructs {
dl.storageList[accountHash] = nil
}
// Determine memory size and track the dirty writes // Determine memory size and track the dirty writes
for _, slots := range storage { for _, slots := range storage {
for _, data := range slots { for _, data := range slots {
...@@ -206,7 +209,7 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s ...@@ -206,7 +209,7 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s
snapshotDirtyStorageWriteMeter.Mark(int64(len(data))) snapshotDirtyStorageWriteMeter.Mark(int64(len(data)))
} }
} }
dl.memory += uint64(len(dl.storageList) * common.HashLength) dl.memory += uint64(len(destructs) * common.HashLength)
return dl return dl
} }
...@@ -287,6 +290,8 @@ func (dl *diffLayer) Account(hash common.Hash) (*Account, error) { ...@@ -287,6 +290,8 @@ func (dl *diffLayer) Account(hash common.Hash) (*Account, error) {
// AccountRLP directly retrieves the account RLP associated with a particular // AccountRLP directly retrieves the account RLP associated with a particular
// hash in the snapshot slim data format. // hash in the snapshot slim data format.
//
// Note the returned account is not a copy, please don't modify it.
func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) { func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
// Check the bloom filter first whether there's even a point in reaching into // Check the bloom filter first whether there's even a point in reaching into
// all the maps in all the layers below // all the maps in all the layers below
...@@ -347,6 +352,8 @@ func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) { ...@@ -347,6 +352,8 @@ func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) {
// Storage directly retrieves the storage data associated with a particular hash, // Storage directly retrieves the storage data associated with a particular hash,
// within a particular account. If the slot is unknown to this diff, it's parent // within a particular account. If the slot is unknown to this diff, it's parent
// is consulted. // is consulted.
//
// Note the returned slot is not a copy, please don't modify it.
func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) { func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
// Check the bloom filter first whether there's even a point in reaching into // Check the bloom filter first whether there's even a point in reaching into
// all the maps in all the layers below // all the maps in all the layers below
...@@ -502,22 +509,29 @@ func (dl *diffLayer) AccountList() []common.Hash { ...@@ -502,22 +509,29 @@ func (dl *diffLayer) AccountList() []common.Hash {
} }
} }
sort.Sort(hashes(dl.accountList)) sort.Sort(hashes(dl.accountList))
dl.memory += uint64(len(dl.accountList) * common.HashLength)
return dl.accountList return dl.accountList
} }
// StorageList returns a sorted list of all storage slot hashes in this difflayer // StorageList returns a sorted list of all storage slot hashes in this difflayer
// for the given account. // for the given account. If the whole storage is destructed in this layer, then
// an additional flag *destructed = true* will be returned, otherwise the flag is
// false. Besides, the returned list will include the hash of deleted storage slot.
// Note a special case is an account is deleted in a prior tx but is recreated in
// the following tx with some storage slots set. In this case the returned list is
// not empty but the flag is true.
// //
// Note, the returned slice is not a copy, so do not modify it. // Note, the returned slice is not a copy, so do not modify it.
func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash { func (dl *diffLayer) StorageList(accountHash common.Hash) ([]common.Hash, bool) {
// If an old list already exists, return it // If an old list already exists, return it
dl.lock.RLock() dl.lock.RLock()
list := dl.storageList[accountHash] _, destructed := dl.destructSet[accountHash]
if list, exist := dl.storageList[accountHash]; exist {
dl.lock.RUnlock()
return list, destructed // The list might be nil
}
dl.lock.RUnlock() dl.lock.RUnlock()
if list != nil {
return list
}
// No old sorted account list exists, generate a new one // No old sorted account list exists, generate a new one
dl.lock.Lock() dl.lock.Lock()
defer dl.lock.Unlock() defer dl.lock.Unlock()
...@@ -529,5 +543,6 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash { ...@@ -529,5 +543,6 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
} }
sort.Sort(hashes(storageList)) sort.Sort(hashes(storageList))
dl.storageList[accountHash] = storageList dl.storageList[accountHash] = storageList
return storageList dl.memory += uint64(len(dl.storageList)*common.HashLength + common.HashLength)
return storageList, destructed
} }
...@@ -109,7 +109,8 @@ func TestMergeBasics(t *testing.T) { ...@@ -109,7 +109,8 @@ func TestMergeBasics(t *testing.T) {
if have, want := len(merged.storageList), i; have != want { if have, want := len(merged.storageList), i; have != want {
t.Errorf("[1] storageList wrong: have %v, want %v", have, want) t.Errorf("[1] storageList wrong: have %v, want %v", have, want)
} }
if have, want := len(merged.StorageList(aHash)), len(sMap); have != want { list, _ := merged.StorageList(aHash)
if have, want := len(list), len(sMap); have != want {
t.Errorf("[2] StorageList() wrong: have %v, want %v", have, want) t.Errorf("[2] StorageList() wrong: have %v, want %v", have, want)
} }
if have, want := len(merged.storageList[aHash]), len(sMap); have != want { if have, want := len(merged.storageList[aHash]), len(sMap); have != want {
......
...@@ -42,7 +42,7 @@ var ( ...@@ -42,7 +42,7 @@ var (
) )
// generatorStats is a collection of statistics gathered by the snapshot generator // generatorStats is a collection of statistics gathered by the snapshot generator
// for logging purposes. // for logging purposes.
type generatorStats struct { type generatorStats struct {
wiping chan struct{} // Notification channel if wiping is in progress wiping chan struct{} // Notification channel if wiping is in progress
origin uint64 // Origin prefix where generation started origin uint64 // Origin prefix where generation started
...@@ -167,7 +167,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { ...@@ -167,7 +167,7 @@ func (dl *diskLayer) generate(stats *generatorStats) {
if err := rlp.DecodeBytes(accIt.Value, &acc); err != nil { if err := rlp.DecodeBytes(accIt.Value, &acc); err != nil {
log.Crit("Invalid account encountered during snapshot creation", "err", err) log.Crit("Invalid account encountered during snapshot creation", "err", err)
} }
data := AccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash) data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash)
// If the account is not yet in-progress, write it out // If the account is not yet in-progress, write it out
if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) { if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) {
......
...@@ -26,9 +26,9 @@ import ( ...@@ -26,9 +26,9 @@ import (
"github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb"
) )
// AccountIterator is an iterator to step over all the accounts in a snapshot, // Iterator is a iterator to step over all the accounts or the specific
// which may or may npt be composed of multiple layers. // storage in a snapshot which may or may not be composed of multiple layers.
type AccountIterator interface { type Iterator interface {
// Next steps the iterator forward one element, returning false if exhausted, // Next steps the iterator forward one element, returning false if exhausted,
// or an error if iteration failed for some reason (e.g. root being iterated // or an error if iteration failed for some reason (e.g. root being iterated
// becomes stale and garbage collected). // becomes stale and garbage collected).
...@@ -38,18 +38,35 @@ type AccountIterator interface { ...@@ -38,18 +38,35 @@ type AccountIterator interface {
// caused a premature iteration exit (e.g. snapshot stack becoming stale). // caused a premature iteration exit (e.g. snapshot stack becoming stale).
Error() error Error() error
// Hash returns the hash of the account the iterator is currently at. // Hash returns the hash of the account or storage slot the iterator is
// currently at.
Hash() common.Hash Hash() common.Hash
// Account returns the RLP encoded slim account the iterator is currently at.
// An error will be returned if the iterator becomes invalid (e.g. snaph
Account() []byte
// Release releases associated resources. Release should always succeed and // Release releases associated resources. Release should always succeed and
// can be called multiple times without causing error. // can be called multiple times without causing error.
Release() Release()
} }
// AccountIterator is a iterator to step over all the accounts in a snapshot,
// which may or may not be composed of multiple layers.
type AccountIterator interface {
Iterator
// Account returns the RLP encoded slim account the iterator is currently at.
// An error will be returned if the iterator becomes invalid
Account() []byte
}
// StorageIterator is a iterator to step over the specific storage in a snapshot,
// which may or may not be composed of multiple layers.
type StorageIterator interface {
Iterator
// Slot returns the storage slot the iterator is currently at. An error will
// be returned if the iterator becomes invalid
Slot() []byte
}
// diffAccountIterator is an account iterator that steps over the accounts (both // diffAccountIterator is an account iterator that steps over the accounts (both
// live and deleted) contained within a single diff layer. Higher order iterators // live and deleted) contained within a single diff layer. Higher order iterators
// will use the deleted accounts to skip deeper iterators. // will use the deleted accounts to skip deeper iterators.
...@@ -120,6 +137,8 @@ func (it *diffAccountIterator) Hash() common.Hash { ...@@ -120,6 +137,8 @@ func (it *diffAccountIterator) Hash() common.Hash {
// This method assumes that flattening does not delete elements from // This method assumes that flattening does not delete elements from
// the accountdata mapping (writing nil into it is fine though), and will panic // the accountdata mapping (writing nil into it is fine though), and will panic
// if elements have been deleted. // if elements have been deleted.
//
// Note the returned account is not a copy, please don't modify it.
func (it *diffAccountIterator) Account() []byte { func (it *diffAccountIterator) Account() []byte {
it.layer.lock.RLock() it.layer.lock.RLock()
blob, ok := it.layer.accountData[it.curHash] blob, ok := it.layer.accountData[it.curHash]
...@@ -164,7 +183,7 @@ func (it *diskAccountIterator) Next() bool { ...@@ -164,7 +183,7 @@ func (it *diskAccountIterator) Next() bool {
} }
// Try to advance the iterator and release it if we reached the end // Try to advance the iterator and release it if we reached the end
for { for {
if !it.it.Next() || !bytes.HasPrefix(it.it.Key(), rawdb.SnapshotAccountPrefix) { if !it.it.Next() {
it.it.Release() it.it.Release()
it.it = nil it.it = nil
return false return false
...@@ -182,12 +201,15 @@ func (it *diskAccountIterator) Next() bool { ...@@ -182,12 +201,15 @@ func (it *diskAccountIterator) Next() bool {
// A diff layer is immutable after creation content wise and can always be fully // A diff layer is immutable after creation content wise and can always be fully
// iterated without error, so this method always returns nil. // iterated without error, so this method always returns nil.
func (it *diskAccountIterator) Error() error { func (it *diskAccountIterator) Error() error {
if it.it == nil {
return nil // Iterator is exhausted and released
}
return it.it.Error() return it.it.Error()
} }
// Hash returns the hash of the account the iterator is currently at. // Hash returns the hash of the account the iterator is currently at.
func (it *diskAccountIterator) Hash() common.Hash { func (it *diskAccountIterator) Hash() common.Hash {
return common.BytesToHash(it.it.Key()) return common.BytesToHash(it.it.Key()) // The prefix will be truncated
} }
// Account returns the RLP encoded slim account the iterator is currently at. // Account returns the RLP encoded slim account the iterator is currently at.
...@@ -203,3 +225,176 @@ func (it *diskAccountIterator) Release() { ...@@ -203,3 +225,176 @@ func (it *diskAccountIterator) Release() {
it.it = nil it.it = nil
} }
} }
// diffStorageIterator is a storage iterator that steps over the specific storage
// (both live and deleted) contained within a single diff layer. Higher order
// iterators will use the deleted slot to skip deeper iterators.
type diffStorageIterator struct {
// curHash is the current hash the iterator is positioned on. The field is
// explicitly tracked since the referenced diff layer might go stale after
// the iterator was positioned and we don't want to fail accessing the old
// hash as long as the iterator is not touched any more.
curHash common.Hash
account common.Hash
layer *diffLayer // Live layer to retrieve values from
keys []common.Hash // Keys left in the layer to iterate
fail error // Any failures encountered (stale)
}
// StorageIterator creates a storage iterator over a single diff layer.
// Execept the storage iterator is returned, there is an additional flag
// "destructed" returned. If it's true then it means the whole storage is
// destructed in this layer(maybe recreated too), don't bother deeper layer
// for storage retrieval.
func (dl *diffLayer) StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) {
// Create the storage for this account even it's marked
// as destructed. The iterator is for the new one which
// just has the same adddress as the deleted one.
hashes, destructed := dl.StorageList(account)
index := sort.Search(len(hashes), func(i int) bool {
return bytes.Compare(seek[:], hashes[i][:]) <= 0
})
// Assemble and returned the already seeked iterator
return &diffStorageIterator{
layer: dl,
account: account,
keys: hashes[index:],
}, destructed
}
// Next steps the iterator forward one element, returning false if exhausted.
func (it *diffStorageIterator) Next() bool {
// If the iterator was already stale, consider it a programmer error. Although
// we could just return false here, triggering this path would probably mean
// somebody forgot to check for Error, so lets blow up instead of undefined
// behavior that's hard to debug.
if it.fail != nil {
panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail))
}
// Stop iterating if all keys were exhausted
if len(it.keys) == 0 {
return false
}
if it.layer.Stale() {
it.fail, it.keys = ErrSnapshotStale, nil
return false
}
// Iterator seems to be still alive, retrieve and cache the live hash
it.curHash = it.keys[0]
// key cached, shift the iterator and notify the user of success
it.keys = it.keys[1:]
return true
}
// Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
func (it *diffStorageIterator) Error() error {
return it.fail
}
// Hash returns the hash of the storage slot the iterator is currently at.
func (it *diffStorageIterator) Hash() common.Hash {
return it.curHash
}
// Slot returns the raw storage slot value the iterator is currently at.
// This method may _fail_, if the underlying layer has been flattened between
// the call to Next and Value. That type of error will set it.Err.
// This method assumes that flattening does not delete elements from
// the storage mapping (writing nil into it is fine though), and will panic
// if elements have been deleted.
//
// Note the returned slot is not a copy, please don't modify it.
func (it *diffStorageIterator) Slot() []byte {
it.layer.lock.RLock()
storage, ok := it.layer.storageData[it.account]
if !ok {
panic(fmt.Sprintf("iterator referenced non-existent account storage: %x", it.account))
}
// Storage slot might be nil(deleted), but it must exist
blob, ok := storage[it.curHash]
if !ok {
panic(fmt.Sprintf("iterator referenced non-existent storage slot: %x", it.curHash))
}
it.layer.lock.RUnlock()
if it.layer.Stale() {
it.fail, it.keys = ErrSnapshotStale, nil
}
return blob
}
// Release is a noop for diff account iterators as there are no held resources.
func (it *diffStorageIterator) Release() {}
// diskStorageIterator is a storage iterator that steps over the live storage
// contained within a disk layer.
type diskStorageIterator struct {
layer *diskLayer
account common.Hash
it ethdb.Iterator
}
// StorageIterator creates a storage iterator over a disk layer.
// If the whole storage is destructed, then all entries in the disk
// layer are deleted already. So the "destructed" flag returned here
// is always false.
func (dl *diskLayer) StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) {
pos := common.TrimRightZeroes(seek[:])
return &diskStorageIterator{
layer: dl,
account: account,
it: dl.diskdb.NewIterator(append(rawdb.SnapshotStoragePrefix, account.Bytes()...), pos),
}, false
}
// Next steps the iterator forward one element, returning false if exhausted.
func (it *diskStorageIterator) Next() bool {
// If the iterator was already exhausted, don't bother
if it.it == nil {
return false
}
// Try to advance the iterator and release it if we reached the end
for {
if !it.it.Next() {
it.it.Release()
it.it = nil
return false
}
if len(it.it.Key()) == len(rawdb.SnapshotStoragePrefix)+common.HashLength+common.HashLength {
break
}
}
return true
}
// Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
//
// A diff layer is immutable after creation content wise and can always be fully
// iterated without error, so this method always returns nil.
func (it *diskStorageIterator) Error() error {
if it.it == nil {
return nil // Iterator is exhausted and released
}
return it.it.Error()
}
// Hash returns the hash of the storage slot the iterator is currently at.
func (it *diskStorageIterator) Hash() common.Hash {
return common.BytesToHash(it.it.Key()) // The prefix will be truncated
}
// Slot returns the raw strorage slot content the iterator is currently at.
func (it *diskStorageIterator) Slot() []byte {
return it.it.Value()
}
// Release releases the database snapshot held during iteration.
func (it *diskStorageIterator) Release() {
// The iterator is auto-released on exhaustion, so make sure it's still alive
if it.it != nil {
it.it.Release()
it.it = nil
}
}
...@@ -22,35 +22,91 @@ import ( ...@@ -22,35 +22,91 @@ import (
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
) )
// binaryAccountIterator is a simplistic iterator to step over the accounts in // binaryIterator is a simplistic iterator to step over the accounts or storage
// a snapshot, which may or may npt be composed of multiple layers. Performance // in a snapshot, which may or may not be composed of multiple layers. Performance
// wise this iterator is slow, it's meant for cross validating the fast one, // wise this iterator is slow, it's meant for cross validating the fast one,
type binaryAccountIterator struct { type binaryIterator struct {
a AccountIterator a Iterator
b AccountIterator b Iterator
aDone bool aDone bool
bDone bool bDone bool
k common.Hash accountIterator bool
fail error k common.Hash
account common.Hash
fail error
} }
// newBinaryAccountIterator creates a simplistic account iterator to step over // initBinaryAccountIterator creates a simplistic iterator to step over all the
// all the accounts in a slow, but eaily verifiable way. // accounts in a slow, but eaily verifiable way. Note this function is used for
func (dl *diffLayer) newBinaryAccountIterator() AccountIterator { // initialization, use `newBinaryAccountIterator` as the API.
func (dl *diffLayer) initBinaryAccountIterator() Iterator {
parent, ok := dl.parent.(*diffLayer)
if !ok {
l := &binaryIterator{
a: dl.AccountIterator(common.Hash{}),
b: dl.Parent().AccountIterator(common.Hash{}),
accountIterator: true,
}
l.aDone = !l.a.Next()
l.bDone = !l.b.Next()
return l
}
l := &binaryIterator{
a: dl.AccountIterator(common.Hash{}),
b: parent.initBinaryAccountIterator(),
accountIterator: true,
}
l.aDone = !l.a.Next()
l.bDone = !l.b.Next()
return l
}
// initBinaryStorageIterator creates a simplistic iterator to step over all the
// storage slots in a slow, but eaily verifiable way. Note this function is used
// for initialization, use `newBinaryStorageIterator` as the API.
func (dl *diffLayer) initBinaryStorageIterator(account common.Hash) Iterator {
parent, ok := dl.parent.(*diffLayer) parent, ok := dl.parent.(*diffLayer)
if !ok { if !ok {
// parent is the disk layer // If the storage in this layer is already destructed, discard all
l := &binaryAccountIterator{ // deeper layers but still return an valid single-branch iterator.
a: dl.AccountIterator(common.Hash{}), a, destructed := dl.StorageIterator(account, common.Hash{})
b: dl.Parent().AccountIterator(common.Hash{}), if destructed {
l := &binaryIterator{
a: a,
account: account,
}
l.aDone = !l.a.Next()
l.bDone = true
return l
}
// The parent is disk layer, don't need to take care "destructed"
// anymore.
b, _ := dl.Parent().StorageIterator(account, common.Hash{})
l := &binaryIterator{
a: a,
b: b,
account: account,
} }
l.aDone = !l.a.Next() l.aDone = !l.a.Next()
l.bDone = !l.b.Next() l.bDone = !l.b.Next()
return l return l
} }
l := &binaryAccountIterator{ // If the storage in this layer is already destructed, discard all
a: dl.AccountIterator(common.Hash{}), // deeper layers but still return an valid single-branch iterator.
b: parent.newBinaryAccountIterator(), a, destructed := dl.StorageIterator(account, common.Hash{})
if destructed {
l := &binaryIterator{
a: a,
account: account,
}
l.aDone = !l.a.Next()
l.bDone = true
return l
}
l := &binaryIterator{
a: a,
b: parent.initBinaryStorageIterator(account),
account: account,
} }
l.aDone = !l.a.Next() l.aDone = !l.a.Next()
l.bDone = !l.b.Next() l.bDone = !l.b.Next()
...@@ -60,7 +116,7 @@ func (dl *diffLayer) newBinaryAccountIterator() AccountIterator { ...@@ -60,7 +116,7 @@ func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
// Next steps the iterator forward one element, returning false if exhausted, // Next steps the iterator forward one element, returning false if exhausted,
// or an error if iteration failed for some reason (e.g. root being iterated // or an error if iteration failed for some reason (e.g. root being iterated
// becomes stale and garbage collected). // becomes stale and garbage collected).
func (it *binaryAccountIterator) Next() bool { func (it *binaryIterator) Next() bool {
if it.aDone && it.bDone { if it.aDone && it.bDone {
return false return false
} }
...@@ -92,19 +148,24 @@ first: ...@@ -92,19 +148,24 @@ first:
// Error returns any failure that occurred during iteration, which might have // Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale). // caused a premature iteration exit (e.g. snapshot stack becoming stale).
func (it *binaryAccountIterator) Error() error { func (it *binaryIterator) Error() error {
return it.fail return it.fail
} }
// Hash returns the hash of the account the iterator is currently at. // Hash returns the hash of the account the iterator is currently at.
func (it *binaryAccountIterator) Hash() common.Hash { func (it *binaryIterator) Hash() common.Hash {
return it.k return it.k
} }
// Account returns the RLP encoded slim account the iterator is currently at, or // Account returns the RLP encoded slim account the iterator is currently at, or
// nil if the iterated snapshot stack became stale (you can check Error after // nil if the iterated snapshot stack became stale (you can check Error after
// to see if it failed or not). // to see if it failed or not).
func (it *binaryAccountIterator) Account() []byte { //
// Note the returned account is not a copy, please don't modify it.
func (it *binaryIterator) Account() []byte {
if !it.accountIterator {
return nil
}
// The topmost iterator must be `diffAccountIterator` // The topmost iterator must be `diffAccountIterator`
blob, err := it.a.(*diffAccountIterator).layer.AccountRLP(it.k) blob, err := it.a.(*diffAccountIterator).layer.AccountRLP(it.k)
if err != nil { if err != nil {
...@@ -114,8 +175,39 @@ func (it *binaryAccountIterator) Account() []byte { ...@@ -114,8 +175,39 @@ func (it *binaryAccountIterator) Account() []byte {
return blob return blob
} }
// Slot returns the raw storage slot data the iterator is currently at, or
// nil if the iterated snapshot stack became stale (you can check Error after
// to see if it failed or not).
//
// Note the returned slot is not a copy, please don't modify it.
func (it *binaryIterator) Slot() []byte {
if it.accountIterator {
return nil
}
blob, err := it.a.(*diffStorageIterator).layer.Storage(it.account, it.k)
if err != nil {
it.fail = err
return nil
}
return blob
}
// Release recursively releases all the iterators in the stack. // Release recursively releases all the iterators in the stack.
func (it *binaryAccountIterator) Release() { func (it *binaryIterator) Release() {
it.a.Release() it.a.Release()
it.b.Release() it.b.Release()
} }
// newBinaryAccountIterator creates a simplistic account iterator to step over
// all the accounts in a slow, but eaily verifiable way.
func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
iter := dl.initBinaryAccountIterator()
return iter.(AccountIterator)
}
// newBinaryStorageIterator creates a simplistic account iterator to step over
// all the storage slots in a slow, but eaily verifiable way.
func (dl *diffLayer) newBinaryStorageIterator(account common.Hash) StorageIterator {
iter := dl.initBinaryStorageIterator(account)
return iter.(StorageIterator)
}
This diff is collapsed.
This diff is collapsed.
...@@ -138,6 +138,9 @@ type snapshot interface { ...@@ -138,6 +138,9 @@ type snapshot interface {
// AccountIterator creates an account iterator over an arbitrary layer. // AccountIterator creates an account iterator over an arbitrary layer.
AccountIterator(seek common.Hash) AccountIterator AccountIterator(seek common.Hash) AccountIterator
// StorageIterator creates a storage iterator over an arbitrary layer.
StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool)
} }
// SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent // SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
...@@ -601,3 +604,9 @@ func (t *Tree) Rebuild(root common.Hash) { ...@@ -601,3 +604,9 @@ func (t *Tree) Rebuild(root common.Hash) {
func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) { func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
return newFastAccountIterator(t, root, seek) return newFastAccountIterator(t, root, seek)
} }
// StorageIterator creates a new storage iterator for the specified root hash and
// account. The iterator will be move to the specific start position.
func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
return newFastStorageIterator(t, root, account, seek)
}
...@@ -60,6 +60,29 @@ func randomAccountSet(hashes ...string) map[common.Hash][]byte { ...@@ -60,6 +60,29 @@ func randomAccountSet(hashes ...string) map[common.Hash][]byte {
return accounts return accounts
} }
// randomStorageSet generates a set of random slots with the given strings as
// the slot addresses.
func randomStorageSet(accounts []string, hashes [][]string, nilStorage [][]string) map[common.Hash]map[common.Hash][]byte {
storages := make(map[common.Hash]map[common.Hash][]byte)
for index, account := range accounts {
storages[common.HexToHash(account)] = make(map[common.Hash][]byte)
if index < len(hashes) {
hashes := hashes[index]
for _, hash := range hashes {
storages[common.HexToHash(account)][common.HexToHash(hash)] = randomHash().Bytes()
}
}
if index < len(nilStorage) {
nils := nilStorage[index]
for _, hash := range nils {
storages[common.HexToHash(account)][common.HexToHash(hash)] = nil
}
}
}
return storages
}
// Tests that if a disk layer becomes stale, no active external references will // Tests that if a disk layer becomes stale, no active external references will
// be returned with junk data. This version of the test flattens every diff layer // be returned with junk data. This version of the test flattens every diff layer
// to check internal corner case around the bottom-most memory accumulator. // to check internal corner case around the bottom-most memory accumulator.
......
...@@ -472,7 +472,7 @@ func (s *StateDB) updateStateObject(obj *stateObject) { ...@@ -472,7 +472,7 @@ func (s *StateDB) updateStateObject(obj *stateObject) {
// enough to track account updates at commit time, deletions need tracking // enough to track account updates at commit time, deletions need tracking
// at transaction boundary level to ensure we capture state clearing. // at transaction boundary level to ensure we capture state clearing.
if s.snap != nil { if s.snap != nil {
s.snapAccounts[obj.addrHash] = snapshot.AccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash) s.snapAccounts[obj.addrHash] = snapshot.SlimAccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash)
} }
} }
......
...@@ -147,15 +147,8 @@ func (t *BlockTest) Run(snapshotter bool) error { ...@@ -147,15 +147,8 @@ func (t *BlockTest) Run(snapshotter bool) error {
} }
// Cross-check the snapshot-to-hash against the trie hash // Cross-check the snapshot-to-hash against the trie hash
if snapshotter { if snapshotter {
snapTree := chain.Snapshot() if err := snapshot.VerifyState(chain.Snapshot(), chain.CurrentBlock().Root()); err != nil {
root := chain.CurrentBlock().Root() return err
it, err := snapTree.AccountIterator(root, common.Hash{})
if err != nil {
return fmt.Errorf("Could not create iterator for root %x: %v", root, err)
}
generatedRoot := snapshot.GenerateTrieRoot(it)
if generatedRoot != root {
return fmt.Errorf("Snapshot corruption, got %d exp %d", generatedRoot, root)
} }
} }
return t.validateImportedHeaders(chain, validBlocks) return t.validateImportedHeaders(chain, validBlocks)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment