core/state/snapshot: implement storage iterator (#20971)

* core/state/snapshot: implement storage iterator * core/state/snapshot, tests: implement helper function * core/state/snapshot: fix storage issue If an account is deleted in the tx_1 but recreated in the tx_2, the it can happen that in this diff layer, both destructedSet and storageData records this account. In this case, the storage iterator should be able to iterate the slots belong to new account but disable further iteration in deeper layers(belong to old account) * core/state/snapshot: address peter and martin's comment * core/state: address comments * core/state/snapshot: fix test

core/state/snapshot: implement storage iterator (#20971)
* core/state/snapshot: implement storage iterator * core/state/snapshot, tests: implement helper function * core/state/snapshot: fix storage issue If an account is deleted in the tx_1 but recreated in the tx_2, the it can happen that in this diff layer, both destructedSet and storageData records this account. In this case, the storage iterator should be able to iterate the slots belong to new account but disable further iteration in deeper layers(belong to old account) * core/state/snapshot: address peter and martin's comment * core/state: address comments * core/state/snapshot: fix test
26d271df · gary rong · GitHub · 1264c19f · 26d271df · 26d271df
Unverified Commit 26d271df authored Apr 29, 2020 by gary rong Committed by GitHub Apr 29, 2020
13 changed files
--- a/core/state/snapshot/account.go
+++ b/core/state/snapshot/account.go
@@ -24,8 +24,10 @@ import (
 	"github.com/ethereum/go-ethereum/rlp"
 )
-// Account is a slim version of a state.Account, where the root and code hash
+// Account is a modified version of a state.Account, where the root is replaced
-// are replaced with a nil byte slice for empty accounts.
+// with a byte slice. This format can be used to represent full-consensus format
+// or slim-snapshot format which replaces the empty root and code hash as nil
+// byte slice.
 type Account struct {
 	Nonce    uint64
 	Balance  *big.Int
@@ -33,9 +35,8 @@ type Account struct {
 	CodeHash []byte
 }
-// AccountRLP converts a state.Account content into a slim snapshot version RLP
+// SlimAccount converts a state.Account content into a slim snapshot account
-// encoded.
+func SlimAccount(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) Account {
-func AccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) []byte {
 	slim := Account{
 		Nonce:   nonce,
 		Balance: balance,
@@ -46,9 +47,40 @@ func AccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byt
 	if !bytes.Equal(codehash, emptyCode[:]) {
 		slim.CodeHash = codehash
 	}
-	data, err := rlp.EncodeToBytes(slim)
+	return slim
+}
+// SlimAccountRLP converts a state.Account content into a slim snapshot
+// version RLP encoded.
+func SlimAccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) []byte {
+	data, err := rlp.EncodeToBytes(SlimAccount(nonce, balance, root, codehash))
 	if err != nil {
 		panic(err)
 	}
 	return data
 }
+// FullAccount decodes the data on the 'slim RLP' format and return
+// the consensus format account.
+func FullAccount(data []byte) (Account, error) {
+	var account Account
+	if err := rlp.DecodeBytes(data, &account); err != nil {
+		return Account{}, err
+	}
+	if len(account.Root) == 0 {
+		account.Root = emptyRoot[:]
+	}
+	if len(account.CodeHash) == 0 {
+		account.CodeHash = emptyCode[:]
+	}
+	return account, nil
+}
+// FullAccountRLP converts data on the 'slim RLP' format into the full RLP-format.
+func FullAccountRLP(data []byte) ([]byte, error) {
+	account, err := FullAccount(data)
+	if err != nil {
+		return nil, err
+	}
+	return rlp.EncodeToBytes(account)
+}
--- a/core/state/snapshot/conversion.go
+++ b/core/state/snapshot/conversion.go
@@ -17,6 +17,8 @@
 package snapshot
 import (
+	"bytes"
+	"fmt"
 	"sync"
 	"time"
@@ -27,80 +29,239 @@ import (
 	"github.com/ethereum/go-ethereum/trie"
 )
-// conversionAccount is used for converting between full and slim format. When
+// trieKV represents a trie key-value pair
-// doing this, we can consider 'balance' as a byte array, as it has already
+type trieKV struct {
-// been converted from big.Int into an rlp-byteslice.
+	key   common.Hash
-type conversionAccount struct {
+	value []byte
-	Nonce    uint64
+}
-	Balance  []byte
-	Root     []byte
+type (
-	CodeHash []byte
+	// trieGeneratorFn is the interface of trie generation which can
+	// be implemented by different trie algorithm.
+	trieGeneratorFn func(in chan (trieKV), out chan (common.Hash))
+	// leafCallbackFn is the callback invoked at the leaves of the trie,
+	// returns the subtrie root with the specified subtrie identifier.
+	leafCallbackFn func(hash common.Hash, stat *generateStats) common.Hash
+)
+// GenerateAccountTrieRoot takes an account iterator and reproduces the root hash.
+func GenerateAccountTrieRoot(it AccountIterator) (common.Hash, error) {
+	return generateTrieRoot(it, common.Hash{}, stdGenerate, nil, &generateStats{start: time.Now()}, true)
+}
+// GenerateStorageTrieRoot takes a storage iterator and reproduces the root hash.
+func GenerateStorageTrieRoot(account common.Hash, it StorageIterator) (common.Hash, error) {
+	return generateTrieRoot(it, account, stdGenerate, nil, &generateStats{start: time.Now()}, true)
 }
-// SlimToFull converts data on the 'slim RLP' format into the full RLP-format
+// VerifyState takes the whole snapshot tree as the input, traverses all the accounts
-func SlimToFull(data []byte) ([]byte, error) {
+// as well as the corresponding storages and compares the re-computed hash with the
-	acc := &conversionAccount{}
+// original one(state root and the storage root).
-	if err := rlp.DecodeBytes(data, acc); err != nil {
+func VerifyState(snaptree *Tree, root common.Hash) error {
-		return nil, err
+	acctIt, err := snaptree.AccountIterator(root, common.Hash{})
+	if err != nil {
+		return err
 	}
-	if len(acc.Root) == 0 {
+	got, err := generateTrieRoot(acctIt, common.Hash{}, stdGenerate, func(account common.Hash, stat *generateStats) common.Hash {
-		acc.Root = emptyRoot[:]
+		storageIt, err := snaptree.StorageIterator(root, account, common.Hash{})
+		if err != nil {
+			return common.Hash{}
+		}
+		hash, err := generateTrieRoot(storageIt, account, stdGenerate, nil, stat, false)
+		if err != nil {
+			return common.Hash{}
+		}
+		return hash
+	}, &generateStats{start: time.Now()}, true)
+	if err != nil {
+		return err
 	}
-	if len(acc.CodeHash) == 0 {
+	if got != root {
-		acc.CodeHash = emptyCode[:]
+		return fmt.Errorf("State root hash mismatch, got %x, want %x", got, root)
 	}
-	fullData, err := rlp.EncodeToBytes(acc)
+	return nil
-	if err != nil {
+}
-		return nil, err
+// generateStats is a collection of statistics gathered by the trie generator
+// for logging purposes.
+type generateStats struct {
+	accounts   uint64
+	slots      uint64
+	curAccount common.Hash
+	curSlot    common.Hash
+	start      time.Time
+	lock       sync.RWMutex
+}
+// progress records the progress trie generator made recently.
+func (stat *generateStats) progress(accounts, slots uint64, curAccount common.Hash, curSlot common.Hash) {
+	stat.lock.Lock()
+	defer stat.lock.Unlock()
+	stat.accounts += accounts
+	stat.slots += slots
+	if curAccount != (common.Hash{}) {
+		stat.curAccount = curAccount
+	}
+	if curSlot != (common.Hash{}) {
+		stat.curSlot = curSlot
 	}
-	return fullData, nil
 }
-// trieKV represents a trie key-value pair
+// report prints the cumulative progress statistic smartly.
-type trieKV struct {
+func (stat *generateStats) report() {
-	key   common.Hash
+	stat.lock.RLock()
-	value []byte
+	defer stat.lock.RUnlock()
+	var ctx []interface{}
+	if stat.curSlot != (common.Hash{}) {
+		ctx = append(ctx, []interface{}{
+			"in", stat.curAccount,
+			"at", stat.curSlot,
+		}...)
+	} else {
+		ctx = append(ctx, []interface{}{"at", stat.curAccount}...)
+	}
+	// Add the usual measurements
+	ctx = append(ctx, []interface{}{"accounts", stat.accounts}...)
+	if stat.slots != 0 {
+		ctx = append(ctx, []interface{}{"slots", stat.slots}...)
+	}
+	ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...)
+	log.Info("Generating trie hash from snapshot", ctx...)
 }
-type trieGeneratorFn func(in chan (trieKV), out chan (common.Hash))
+// reportDone prints the last log when the whole generation is finished.
+func (stat *generateStats) reportDone() {
+	stat.lock.RLock()
+	defer stat.lock.RUnlock()
-// GenerateTrieRoot takes an account iterator and reproduces the root hash.
+	var ctx []interface{}
-func GenerateTrieRoot(it AccountIterator) common.Hash {
+	ctx = append(ctx, []interface{}{"accounts", stat.accounts}...)
-	return generateTrieRoot(it, stdGenerate)
+	if stat.slots != 0 {
+		ctx = append(ctx, []interface{}{"slots", stat.slots}...)
+	}
+	ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...)
+	log.Info("Generated trie hash from snapshot", ctx...)
 }
-func generateTrieRoot(it AccountIterator, generatorFn trieGeneratorFn) common.Hash {
+// generateTrieRoot generates the trie hash based on the snapshot iterator.
+// It can be used for generating account trie, storage trie or even the
+// whole state which connects the accounts and the corresponding storages.
+func generateTrieRoot(it Iterator, account common.Hash, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) {
 	var (
-		in  = make(chan trieKV)      // chan to pass leaves
+		in      = make(chan trieKV)         // chan to pass leaves
-		out = make(chan common.Hash) // chan to collect result
+		out     = make(chan common.Hash, 1) // chan to collect result
-		wg  sync.WaitGroup
+		stoplog = make(chan bool, 1)        // 1-size buffer, works when logging is not enabled
+		wg      sync.WaitGroup
 	)
+	// Spin up a go-routine for trie hash re-generation
 	wg.Add(1)
 	go func() {
+		defer wg.Done()
 		generatorFn(in, out)
-		wg.Done()
 	}()
-	// Feed leaves
-	start := time.Now()
+	// Spin up a go-routine for progress logging
-	logged := time.Now()
+	if report && stats != nil {
-	accounts := 0
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			timer := time.NewTimer(0)
+			defer timer.Stop()
+			for {
+				select {
+				case <-timer.C:
+					stats.report()
+					timer.Reset(time.Second * 8)
+				case success := <-stoplog:
+					if success {
+						stats.reportDone()
+					}
+					return
+				}
+			}
+		}()
+	}
+	// stop is a helper function to shutdown the background threads
+	// and return the re-generated trie hash.
+	stop := func(success bool) common.Hash {
+		close(in)
+		result := <-out
+		stoplog <- success
+		wg.Wait()
+		return result
+	}
+	var (
+		logged    = time.Now()
+		processed = uint64(0)
+		leaf      trieKV
+		last      common.Hash
+	)
+	// Start to feed leaves
 	for it.Next() {
-		slimData := it.Account()
+		if account == (common.Hash{}) {
-		fullData, _ := SlimToFull(slimData)
+			var (
-		l := trieKV{it.Hash(), fullData}
+				err      error
-		in <- l
+				fullData []byte
-		if time.Since(logged) > 8*time.Second {
+			)
-			log.Info("Generating trie hash from snapshot",
+			if leafCallback == nil {
-				"at", l.key, "accounts", accounts, "elapsed", time.Since(start))
+				fullData, err = FullAccountRLP(it.(AccountIterator).Account())
-			logged = time.Now()
+				if err != nil {
+					stop(false)
+					return common.Hash{}, err
+				}
+			} else {
+				account, err := FullAccount(it.(AccountIterator).Account())
+				if err != nil {
+					stop(false)
+					return common.Hash{}, err
+				}
+				// Apply the leaf callback. Normally the callback is used to traverse
+				// the storage trie and re-generate the subtrie root.
+				subroot := leafCallback(it.Hash(), stats)
+				if !bytes.Equal(account.Root, subroot.Bytes()) {
+					stop(false)
+					return common.Hash{}, fmt.Errorf("invalid subroot(%x), want %x, got %x", it.Hash(), account.Root, subroot)
+				}
+				fullData, err = rlp.EncodeToBytes(account)
+				if err != nil {
+					stop(false)
+					return common.Hash{}, err
+				}
+			}
+			leaf = trieKV{it.Hash(), fullData}
+		} else {
+			leaf = trieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())}
+		}
+		in <- leaf
+		// Accumulate the generaation statistic if it's required.
+		processed++
+		if time.Since(logged) > 3*time.Second && stats != nil {
+			if account == (common.Hash{}) {
+				stats.progress(processed, 0, it.Hash(), common.Hash{})
+			} else {
+				stats.progress(0, processed, account, it.Hash())
+			}
+			logged, processed = time.Now(), 0
+		}
+		last = it.Hash()
+	}
+	// Commit the last part statistic.
+	if processed > 0 && stats != nil {
+		if account == (common.Hash{}) {
+			stats.progress(processed, 0, last, common.Hash{})
+		} else {
+			stats.progress(0, processed, account, last)
 		}
-		accounts++
 	}
-	close(in)
+	result := stop(true)
-	result := <-out
+	return result, nil
-	log.Info("Generated trie hash from snapshot", "accounts", accounts, "elapsed", time.Since(start))
-	wg.Wait()
-	return result
 }
 // stdGenerate is a very basic hexary trie builder which uses the same Trie

--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -105,6 +105,13 @@ type diffLayer struct {
 	root  common.Hash // Root hash to which this snapshot diff belongs to
 	stale uint32      // Signals that the layer became stale (state progressed)
+	// destructSet is a very special helper marker. If an account is marked as
+	// deleted, then it's recorded in this set. However it's allowed that an account
+	// is included here but still available in other sets(e.g. storageData). The
+	// reason is the diff layer includes all the changes in a *block*. It can
+	// happen that in the tx_1, account A is self-destructed while in the tx_2
+	// it's recreated. But we still need this marker to indicate the "old" A is
+	// deleted, all data in other set belongs to the "new" A.
 	destructSet map[common.Hash]struct{}               // Keyed markers for deleted (and potentially) recreated accounts
 	accountList []common.Hash                          // List of account for iteration. If it exists, it's sorted, otherwise it's nil
 	accountData map[common.Hash][]byte                 // Keyed accounts for direct retrival (nil means deleted)
@@ -169,6 +176,7 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s
 		destructSet: destructs,
 		accountData: accounts,
 		storageData: storage,
+		storageList: make(map[common.Hash][]common.Hash),
 	}
 	switch parent := parent.(type) {
 	case *diskLayer:
@@ -194,11 +202,6 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s
 		dl.memory += uint64(common.HashLength + len(data))
 		snapshotDirtyAccountWriteMeter.Mark(int64(len(data)))
 	}
-	// Fill the storage hashes and sort them for the iterator
-	dl.storageList = make(map[common.Hash][]common.Hash)
-	for accountHash := range destructs {
-		dl.storageList[accountHash] = nil
-	}
 	// Determine memory size and track the dirty writes
 	for _, slots := range storage {
 		for _, data := range slots {
@@ -206,7 +209,7 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s
 			snapshotDirtyStorageWriteMeter.Mark(int64(len(data)))
 		}
 	}
-	dl.memory += uint64(len(dl.storageList) * common.HashLength)
+	dl.memory += uint64(len(destructs) * common.HashLength)
 	return dl
 }
@@ -287,6 +290,8 @@ func (dl *diffLayer) Account(hash common.Hash) (*Account, error) {
 // AccountRLP directly retrieves the account RLP associated with a particular
 // hash in the snapshot slim data format.
+//
+// Note the returned account is not a copy, please don't modify it.
 func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
 	// Check the bloom filter first whether there's even a point in reaching into
 	// all the maps in all the layers below
@@ -347,6 +352,8 @@ func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) {
 // Storage directly retrieves the storage data associated with a particular hash,
 // within a particular account. If the slot is unknown to this diff, it's parent
 // is consulted.
+//
+// Note the returned slot is not a copy, please don't modify it.
 func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
 	// Check the bloom filter first whether there's even a point in reaching into
 	// all the maps in all the layers below
@@ -502,22 +509,29 @@ func (dl *diffLayer) AccountList() []common.Hash {
 		}
 	}
 	sort.Sort(hashes(dl.accountList))
+	dl.memory += uint64(len(dl.accountList) * common.HashLength)
 	return dl.accountList
 }
 // StorageList returns a sorted list of all storage slot hashes in this difflayer
-// for the given account.
+// for the given account. If the whole storage is destructed in this layer, then
+// an additional flag *destructed = true* will be returned, otherwise the flag is
+// false. Besides, the returned list will include the hash of deleted storage slot.
+// Note a special case is an account is deleted in a prior tx but is recreated in
+// the following tx with some storage slots set. In this case the returned list is
+// not empty but the flag is true.
 //
 // Note, the returned slice is not a copy, so do not modify it.
-func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
+func (dl *diffLayer) StorageList(accountHash common.Hash) ([]common.Hash, bool) {
 	// If an old list already exists, return it
 	dl.lock.RLock()
-	list := dl.storageList[accountHash]
+	_, destructed := dl.destructSet[accountHash]
+	if list, exist := dl.storageList[accountHash]; exist {
+		dl.lock.RUnlock()
+		return list, destructed // The list might be nil
+	}
 	dl.lock.RUnlock()
-	if list != nil {
-		return list
-	}
 	// No old sorted account list exists, generate a new one
 	dl.lock.Lock()
 	defer dl.lock.Unlock()
@@ -529,5 +543,6 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
 	}
 	sort.Sort(hashes(storageList))
 	dl.storageList[accountHash] = storageList
-	return storageList
+	dl.memory += uint64(len(dl.storageList)*common.HashLength + common.HashLength)
+	return storageList, destructed
 }
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -109,7 +109,8 @@ func TestMergeBasics(t *testing.T) {
 			if have, want := len(merged.storageList), i; have != want {
 				t.Errorf("[1] storageList wrong: have %v, want %v", have, want)
 			}
-			if have, want := len(merged.StorageList(aHash)), len(sMap); have != want {
+			list, _ := merged.StorageList(aHash)
+			if have, want := len(list), len(sMap); have != want {
 				t.Errorf("[2] StorageList() wrong: have %v, want %v", have, want)
 			}
 			if have, want := len(merged.storageList[aHash]), len(sMap); have != want {

--- a/core/state/snapshot/generate.go
+++ b/core/state/snapshot/generate.go
@@ -42,7 +42,7 @@ var (
 )
 // generatorStats is a collection of statistics gathered by the snapshot generator
-// for  logging purposes.
+// for logging purposes.
 type generatorStats struct {
 	wiping   chan struct{}      // Notification channel if wiping is in progress
 	origin   uint64             // Origin prefix where generation started
@@ -167,7 +167,7 @@ func (dl *diskLayer) generate(stats *generatorStats) {
 		if err := rlp.DecodeBytes(accIt.Value, &acc); err != nil {
 			log.Crit("Invalid account encountered during snapshot creation", "err", err)
 		}
-		data := AccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash)
+		data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash)
 		// If the account is not yet in-progress, write it out
 		if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) {

--- a/core/state/snapshot/iterator.go
+++ b/core/state/snapshot/iterator.go
@@ -26,9 +26,9 @@ import (
 	"github.com/ethereum/go-ethereum/ethdb"
 )
-// AccountIterator is an iterator to step over all the accounts in a snapshot,
+// Iterator is a iterator to step over all the accounts or the specific
-// which may or may npt be composed of multiple layers.
+// storage in a snapshot which may or may not be composed of multiple layers.
-type AccountIterator interface {
+type Iterator interface {
 	// Next steps the iterator forward one element, returning false if exhausted,
 	// or an error if iteration failed for some reason (e.g. root being iterated
 	// becomes stale and garbage collected).
@@ -38,18 +38,35 @@ type AccountIterator interface {
 	// caused a premature iteration exit (e.g. snapshot stack becoming stale).
 	Error() error
-	// Hash returns the hash of the account the iterator is currently at.
+	// Hash returns the hash of the account or storage slot the iterator is
+	// currently at.
 	Hash() common.Hash
-	// Account returns the RLP encoded slim account the iterator is currently at.
-	// An error will be returned if the iterator becomes invalid (e.g. snaph
-	Account() []byte
 	// Release releases associated resources. Release should always succeed and
 	// can be called multiple times without causing error.
 	Release()
 }
+// AccountIterator is a iterator to step over all the accounts in a snapshot,
+// which may or may not be composed of multiple layers.
+type AccountIterator interface {
+	Iterator
+	// Account returns the RLP encoded slim account the iterator is currently at.
+	// An error will be returned if the iterator becomes invalid
+	Account() []byte
+}
+// StorageIterator is a iterator to step over the specific storage in a snapshot,
+// which may or may not be composed of multiple layers.
+type StorageIterator interface {
+	Iterator
+	// Slot returns the storage slot the iterator is currently at. An error will
+	// be returned if the iterator becomes invalid
+	Slot() []byte
+}
 // diffAccountIterator is an account iterator that steps over the accounts (both
 // live and deleted) contained within a single diff layer. Higher order iterators
 // will use the deleted accounts to skip deeper iterators.
@@ -120,6 +137,8 @@ func (it *diffAccountIterator) Hash() common.Hash {
 // This method assumes that flattening does not delete elements from
 // the accountdata mapping (writing nil into it is fine though), and will panic
 // if elements have been deleted.
+//
+// Note the returned account is not a copy, please don't modify it.
 func (it *diffAccountIterator) Account() []byte {
 	it.layer.lock.RLock()
 	blob, ok := it.layer.accountData[it.curHash]
@@ -164,7 +183,7 @@ func (it *diskAccountIterator) Next() bool {
 	}
 	// Try to advance the iterator and release it if we reached the end
 	for {
-		if !it.it.Next() || !bytes.HasPrefix(it.it.Key(), rawdb.SnapshotAccountPrefix) {
+		if !it.it.Next() {
 			it.it.Release()
 			it.it = nil
 			return false
@@ -182,12 +201,15 @@ func (it *diskAccountIterator) Next() bool {
 // A diff layer is immutable after creation content wise and can always be fully
 // iterated without error, so this method always returns nil.
 func (it *diskAccountIterator) Error() error {
+	if it.it == nil {
+		return nil // Iterator is exhausted and released
+	}
 	return it.it.Error()
 }
 // Hash returns the hash of the account the iterator is currently at.
 func (it *diskAccountIterator) Hash() common.Hash {
-	return common.BytesToHash(it.it.Key())
+	return common.BytesToHash(it.it.Key()) // The prefix will be truncated
 }
 // Account returns the RLP encoded slim account the iterator is currently at.
@@ -203,3 +225,176 @@ func (it *diskAccountIterator) Release() {
 		it.it = nil
 	}
 }
+// diffStorageIterator is a storage iterator that steps over the specific storage
+// (both live and deleted) contained within a single diff layer. Higher order
+// iterators will use the deleted slot to skip deeper iterators.
+type diffStorageIterator struct {
+	// curHash is the current hash the iterator is positioned on. The field is
+	// explicitly tracked since the referenced diff layer might go stale after
+	// the iterator was positioned and we don't want to fail accessing the old
+	// hash as long as the iterator is not touched any more.
+	curHash common.Hash
+	account common.Hash
+	layer *diffLayer    // Live layer to retrieve values from
+	keys  []common.Hash // Keys left in the layer to iterate
+	fail  error         // Any failures encountered (stale)
+}
+// StorageIterator creates a storage iterator over a single diff layer.
+// Execept the storage iterator is returned, there is an additional flag
+// "destructed" returned. If it's true then it means the whole storage is
+// destructed in this layer(maybe recreated too), don't bother deeper layer
+// for storage retrieval.
+func (dl *diffLayer) StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) {
+	// Create the storage for this account even it's marked
+	// as destructed. The iterator is for the new one which
+	// just has the same adddress as the deleted one.
+	hashes, destructed := dl.StorageList(account)
+	index := sort.Search(len(hashes), func(i int) bool {
+		return bytes.Compare(seek[:], hashes[i][:]) <= 0
+	})
+	// Assemble and returned the already seeked iterator
+	return &diffStorageIterator{
+		layer:   dl,
+		account: account,
+		keys:    hashes[index:],
+	}, destructed
+}
+// Next steps the iterator forward one element, returning false if exhausted.
+func (it *diffStorageIterator) Next() bool {
+	// If the iterator was already stale, consider it a programmer error. Although
+	// we could just return false here, triggering this path would probably mean
+	// somebody forgot to check for Error, so lets blow up instead of undefined
+	// behavior that's hard to debug.
+	if it.fail != nil {
+		panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail))
+	}
+	// Stop iterating if all keys were exhausted
+	if len(it.keys) == 0 {
+		return false
+	}
+	if it.layer.Stale() {
+		it.fail, it.keys = ErrSnapshotStale, nil
+		return false
+	}
+	// Iterator seems to be still alive, retrieve and cache the live hash
+	it.curHash = it.keys[0]
+	// key cached, shift the iterator and notify the user of success
+	it.keys = it.keys[1:]
+	return true
+}
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+func (it *diffStorageIterator) Error() error {
+	return it.fail
+}
+// Hash returns the hash of the storage slot the iterator is currently at.
+func (it *diffStorageIterator) Hash() common.Hash {
+	return it.curHash
+}
+// Slot returns the raw storage slot value the iterator is currently at.
+// This method may _fail_, if the underlying layer has been flattened between
+// the call to Next and Value. That type of error will set it.Err.
+// This method assumes that flattening does not delete elements from
+// the storage mapping (writing nil into it is fine though), and will panic
+// if elements have been deleted.
+//
+// Note the returned slot is not a copy, please don't modify it.
+func (it *diffStorageIterator) Slot() []byte {
+	it.layer.lock.RLock()
+	storage, ok := it.layer.storageData[it.account]
+	if !ok {
+		panic(fmt.Sprintf("iterator referenced non-existent account storage: %x", it.account))
+	}
+	// Storage slot might be nil(deleted), but it must exist
+	blob, ok := storage[it.curHash]
+	if !ok {
+		panic(fmt.Sprintf("iterator referenced non-existent storage slot: %x", it.curHash))
+	}
+	it.layer.lock.RUnlock()
+	if it.layer.Stale() {
+		it.fail, it.keys = ErrSnapshotStale, nil
+	}
+	return blob
+}
+// Release is a noop for diff account iterators as there are no held resources.
+func (it *diffStorageIterator) Release() {}
+// diskStorageIterator is a storage iterator that steps over the live storage
+// contained within a disk layer.
+type diskStorageIterator struct {
+	layer   *diskLayer
+	account common.Hash
+	it      ethdb.Iterator
+}
+// StorageIterator creates a storage iterator over a disk layer.
+// If the whole storage is destructed, then all entries in the disk
+// layer are deleted already. So the "destructed" flag returned here
+// is always false.
+func (dl *diskLayer) StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) {
+	pos := common.TrimRightZeroes(seek[:])
+	return &diskStorageIterator{
+		layer:   dl,
+		account: account,
+		it:      dl.diskdb.NewIterator(append(rawdb.SnapshotStoragePrefix, account.Bytes()...), pos),
+	}, false
+}
+// Next steps the iterator forward one element, returning false if exhausted.
+func (it *diskStorageIterator) Next() bool {
+	// If the iterator was already exhausted, don't bother
+	if it.it == nil {
+		return false
+	}
+	// Try to advance the iterator and release it if we reached the end
+	for {
+		if !it.it.Next() {
+			it.it.Release()
+			it.it = nil
+			return false
+		}
+		if len(it.it.Key()) == len(rawdb.SnapshotStoragePrefix)+common.HashLength+common.HashLength {
+			break
+		}
+	}
+	return true
+}
+// Error returns any failure that occurred during iteration, which might have
+// caused a premature iteration exit (e.g. snapshot stack becoming stale).
+//
+// A diff layer is immutable after creation content wise and can always be fully
+// iterated without error, so this method always returns nil.
+func (it *diskStorageIterator) Error() error {
+	if it.it == nil {
+		return nil // Iterator is exhausted and released
+	}
+	return it.it.Error()
+}
+// Hash returns the hash of the storage slot the iterator is currently at.
+func (it *diskStorageIterator) Hash() common.Hash {
+	return common.BytesToHash(it.it.Key()) // The prefix will be truncated
+}
+// Slot returns the raw strorage slot content the iterator is currently at.
+func (it *diskStorageIterator) Slot() []byte {
+	return it.it.Value()
+}
+// Release releases the database snapshot held during iteration.
+func (it *diskStorageIterator) Release() {
+	// The iterator is auto-released on exhaustion, so make sure it's still alive
+	if it.it != nil {
+		it.it.Release()
+		it.it = nil
+	}
+}
--- a/core/state/snapshot/iterator_binary.go
+++ b/core/state/snapshot/iterator_binary.go
@@ -22,35 +22,91 @@ import (
 	"github.com/ethereum/go-ethereum/common"
 )
-// binaryAccountIterator is a simplistic iterator to step over the accounts in
+// binaryIterator is a simplistic iterator to step over the accounts or storage
-// a snapshot, which may or may npt be composed of multiple layers. Performance
+// in a snapshot, which may or may not be composed of multiple layers. Performance
 // wise this iterator is slow, it's meant for cross validating the fast one,
-type binaryAccountIterator struct {
+type binaryIterator struct {
-	a     AccountIterator
+	a               Iterator
-	b     AccountIterator
+	b               Iterator
-	aDone bool
+	aDone           bool
-	bDone bool
+	bDone           bool
-	k     common.Hash
+	accountIterator bool
-	fail  error
+	k               common.Hash
+	account         common.Hash
+	fail            error
 }
-// newBinaryAccountIterator creates a simplistic account iterator to step over
+// initBinaryAccountIterator creates a simplistic iterator to step over all the
-// all the accounts in a slow, but eaily verifiable way.
+// accounts in a slow, but eaily verifiable way. Note this function is used for
-func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
+// initialization, use `newBinaryAccountIterator` as the API.
+func (dl *diffLayer) initBinaryAccountIterator() Iterator {
+	parent, ok := dl.parent.(*diffLayer)
+	if !ok {
+		l := &binaryIterator{
+			a:               dl.AccountIterator(common.Hash{}),
+			b:               dl.Parent().AccountIterator(common.Hash{}),
+			accountIterator: true,
+		}
+		l.aDone = !l.a.Next()
+		l.bDone = !l.b.Next()
+		return l
+	}
+	l := &binaryIterator{
+		a:               dl.AccountIterator(common.Hash{}),
+		b:               parent.initBinaryAccountIterator(),
+		accountIterator: true,
+	}
+	l.aDone = !l.a.Next()
+	l.bDone = !l.b.Next()
+	return l
+}
+// initBinaryStorageIterator creates a simplistic iterator to step over all the
+// storage slots in a slow, but eaily verifiable way. Note this function is used
+// for initialization, use `newBinaryStorageIterator` as the API.
+func (dl *diffLayer) initBinaryStorageIterator(account common.Hash) Iterator {
 	parent, ok := dl.parent.(*diffLayer)
 	if !ok {
-		// parent is the disk layer
+		// If the storage in this layer is already destructed, discard all
-		l := &binaryAccountIterator{
+		// deeper layers but still return an valid single-branch iterator.
-			a: dl.AccountIterator(common.Hash{}),
+		a, destructed := dl.StorageIterator(account, common.Hash{})
-			b: dl.Parent().AccountIterator(common.Hash{}),
+		if destructed {
+			l := &binaryIterator{
+				a:       a,
+				account: account,
+			}
+			l.aDone = !l.a.Next()
+			l.bDone = true
+			return l
+		}
+		// The parent is disk layer, don't need to take care "destructed"
+		// anymore.
+		b, _ := dl.Parent().StorageIterator(account, common.Hash{})
+		l := &binaryIterator{
+			a:       a,
+			b:       b,
+			account: account,
 		}
 		l.aDone = !l.a.Next()
 		l.bDone = !l.b.Next()
 		return l
 	}
-	l := &binaryAccountIterator{
+	// If the storage in this layer is already destructed, discard all
-		a: dl.AccountIterator(common.Hash{}),
+	// deeper layers but still return an valid single-branch iterator.
-		b: parent.newBinaryAccountIterator(),
+	a, destructed := dl.StorageIterator(account, common.Hash{})
+	if destructed {
+		l := &binaryIterator{
+			a:       a,
+			account: account,
+		}
+		l.aDone = !l.a.Next()
+		l.bDone = true
+		return l
+	}
+	l := &binaryIterator{
+		a:       a,
+		b:       parent.initBinaryStorageIterator(account),
+		account: account,
 	}
 	l.aDone = !l.a.Next()
 	l.bDone = !l.b.Next()
@@ -60,7 +116,7 @@ func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
 // Next steps the iterator forward one element, returning false if exhausted,
 // or an error if iteration failed for some reason (e.g. root being iterated
 // becomes stale and garbage collected).
-func (it *binaryAccountIterator) Next() bool {
+func (it *binaryIterator) Next() bool {
 	if it.aDone && it.bDone {
 		return false
 	}
@@ -92,19 +148,24 @@ first:
 // Error returns any failure that occurred during iteration, which might have
 // caused a premature iteration exit (e.g. snapshot stack becoming stale).
-func (it *binaryAccountIterator) Error() error {
+func (it *binaryIterator) Error() error {
 	return it.fail
 }
 // Hash returns the hash of the account the iterator is currently at.
-func (it *binaryAccountIterator) Hash() common.Hash {
+func (it *binaryIterator) Hash() common.Hash {
 	return it.k
 }
 // Account returns the RLP encoded slim account the iterator is currently at, or
 // nil if the iterated snapshot stack became stale (you can check Error after
 // to see if it failed or not).
-func (it *binaryAccountIterator) Account() []byte {
+//
+// Note the returned account is not a copy, please don't modify it.
+func (it *binaryIterator) Account() []byte {
+	if !it.accountIterator {
+		return nil
+	}
 	// The topmost iterator must be `diffAccountIterator`
 	blob, err := it.a.(*diffAccountIterator).layer.AccountRLP(it.k)
 	if err != nil {
@@ -114,8 +175,39 @@ func (it *binaryAccountIterator) Account() []byte {
 	return blob
 }
+// Slot returns the raw storage slot data the iterator is currently at, or
+// nil if the iterated snapshot stack became stale (you can check Error after
+// to see if it failed or not).
+//
+// Note the returned slot is not a copy, please don't modify it.
+func (it *binaryIterator) Slot() []byte {
+	if it.accountIterator {
+		return nil
+	}
+	blob, err := it.a.(*diffStorageIterator).layer.Storage(it.account, it.k)
+	if err != nil {
+		it.fail = err
+		return nil
+	}
+	return blob
+}
 // Release recursively releases all the iterators in the stack.
-func (it *binaryAccountIterator) Release() {
+func (it *binaryIterator) Release() {
 	it.a.Release()
 	it.b.Release()
 }
+// newBinaryAccountIterator creates a simplistic account iterator to step over
+// all the accounts in a slow, but eaily verifiable way.
+func (dl *diffLayer) newBinaryAccountIterator() AccountIterator {
+	iter := dl.initBinaryAccountIterator()
+	return iter.(AccountIterator)
+}
+// newBinaryStorageIterator creates a simplistic account iterator to step over
+// all the storage slots in a slow, but eaily verifiable way.
+func (dl *diffLayer) newBinaryStorageIterator(account common.Hash) StorageIterator {
+	iter := dl.initBinaryStorageIterator(account)
+	return iter.(StorageIterator)
+}
--- a/core/state/snapshot/iterator_fast.go
+++ b/core/state/snapshot/iterator_fast.go
--- a/core/state/snapshot/iterator_test.go
+++ b/core/state/snapshot/iterator_test.go
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@@ -138,6 +138,9 @@ type snapshot interface {
 	// AccountIterator creates an account iterator over an arbitrary layer.
 	AccountIterator(seek common.Hash) AccountIterator
+	// StorageIterator creates a storage iterator over an arbitrary layer.
+	StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool)
 }
 // SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
@@ -601,3 +604,9 @@ func (t *Tree) Rebuild(root common.Hash) {
 func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
 	return newFastAccountIterator(t, root, seek)
 }
+// StorageIterator creates a new storage iterator for the specified root hash and
+// account. The iterator will be move to the specific start position.
+func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
+	return newFastStorageIterator(t, root, account, seek)
+}
--- a/core/state/snapshot/snapshot_test.go
+++ b/core/state/snapshot/snapshot_test.go
@@ -60,6 +60,29 @@ func randomAccountSet(hashes ...string) map[common.Hash][]byte {
 	return accounts
 }
+// randomStorageSet generates a set of random slots with the given strings as
+// the slot addresses.
+func randomStorageSet(accounts []string, hashes [][]string, nilStorage [][]string) map[common.Hash]map[common.Hash][]byte {
+	storages := make(map[common.Hash]map[common.Hash][]byte)
+	for index, account := range accounts {
+		storages[common.HexToHash(account)] = make(map[common.Hash][]byte)
+		if index < len(hashes) {
+			hashes := hashes[index]
+			for _, hash := range hashes {
+				storages[common.HexToHash(account)][common.HexToHash(hash)] = randomHash().Bytes()
+			}
+		}
+		if index < len(nilStorage) {
+			nils := nilStorage[index]
+			for _, hash := range nils {
+				storages[common.HexToHash(account)][common.HexToHash(hash)] = nil
+			}
+		}
+	}
+	return storages
+}
 // Tests that if a disk layer becomes stale, no active external references will
 // be returned with junk data. This version of the test flattens every diff layer
 // to check internal corner case around the bottom-most memory accumulator.

--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -472,7 +472,7 @@ func (s *StateDB) updateStateObject(obj *stateObject) {
 	// enough to track account updates at commit time, deletions need tracking
 	// at transaction boundary level to ensure we capture state clearing.
 	if s.snap != nil {
-		s.snapAccounts[obj.addrHash] = snapshot.AccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash)
+		s.snapAccounts[obj.addrHash] = snapshot.SlimAccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash)
 	}
 }

--- a/tests/block_test_util.go
+++ b/tests/block_test_util.go
@@ -147,15 +147,8 @@ func (t *BlockTest) Run(snapshotter bool) error {
 	}
 	// Cross-check the snapshot-to-hash against the trie hash
 	if snapshotter {
-		snapTree := chain.Snapshot()
+		if err := snapshot.VerifyState(chain.Snapshot(), chain.CurrentBlock().Root()); err != nil {
-		root := chain.CurrentBlock().Root()
+			return err
-		it, err := snapTree.AccountIterator(root, common.Hash{})
-		if err != nil {
-			return fmt.Errorf("Could not create iterator for root %x: %v", root, err)
-		}
-		generatedRoot := snapshot.GenerateTrieRoot(it)
-		if generatedRoot != root {
-			return fmt.Errorf("Snapshot corruption, got %d exp %d", generatedRoot, root)
 		}
 	}
 	return t.validateImportedHeaders(chain, validBlocks)