Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core/state/snapshot: detect and clean up dangling storage snapshot in generation #24811

Merged
merged 30 commits into from
May 23, 2022
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8604adb
core/state/snapshot: check dangling storages when generating snapshot
rjl493456442 Apr 8, 2022
51f8a0e
core/state/snapshot: polish
rjl493456442 Apr 8, 2022
0ee1ebe
core/state/snapshot: wipe the last part of the dangling storages
rjl493456442 Apr 8, 2022
9b9dba1
core/state/snapshot: fix and add tests
rjl493456442 Apr 8, 2022
617eda9
core/state/snapshot: fix comment
rjl493456442 Apr 8, 2022
bbb6b94
README: remove mentions of fast sync (#24656)
nuoomnoy02 Apr 7, 2022
816f6cb
core, cmd: expose dangling storage detector for wider usage
rjl493456442 Apr 26, 2022
af23c13
core/state/snapshot: rename variable
rjl493456442 Apr 26, 2022
87d8bc3
core, ethdb: use global iterators for snapshot generation
rjl493456442 Apr 28, 2022
f4a489d
core/state/snapshot: polish
rjl493456442 May 4, 2022
5f37c25
cmd, core/state/snapshot: polish
rjl493456442 May 4, 2022
0584fc6
core/state/snapshot: polish
rjl493456442 May 4, 2022
546ce97
Update core/state/snapshot/generate.go
rjl493456442 May 4, 2022
b88d7ac
ethdb: extend db test suite and fix memorydb iterator
rjl493456442 May 5, 2022
e00ff21
ethdb/dbtest: rollback changes
rjl493456442 May 5, 2022
54caa24
ethdb/memorydb: simplify iteration
rjl493456442 May 5, 2022
7fca158
core/state/snapshot: update dangling counter
rjl493456442 May 5, 2022
e178af1
core/state/snapshot: release iterators
rjl493456442 May 7, 2022
3acad8d
core/state/snapshot: update metrics
rjl493456442 May 7, 2022
9a1ccd9
core/state/snapshot: update time metrics
rjl493456442 May 7, 2022
5df1225
metrics/influxdb: temp solution to present counter meaningfully, remo…
rjl493456442 May 7, 2022
d3fb321
add debug log, revert later
rjl493456442 May 7, 2022
83f60af
core/state/snapshot: fix iterator panic
rjl493456442 May 7, 2022
78ed542
all: customized snapshot iterator for backward iteration
rjl493456442 May 7, 2022
254666a
core, ethdb: polish
rjl493456442 May 9, 2022
1f5442d
core/state/snapshot: remove debug log
rjl493456442 May 9, 2022
55577d0
core/state/snapshot: address comments from peter
rjl493456442 May 10, 2022
61dcb92
core/state/snapshot: reopen the iterator at the next position
rjl493456442 May 10, 2022
c80a059
ethdb, core/state/snapshot: address comment from peter
rjl493456442 May 10, 2022
25b0392
core/state/snapshot: reopen exhausted iterators
rjl493456442 May 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 2 additions & 61 deletions cmd/geth/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"bytes"
"encoding/json"
"errors"
"fmt"
"os"
"time"

Expand All @@ -32,7 +31,6 @@ import (
"github.com/ethereum/go-ethereum/core/state/snapshot"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
Expand Down Expand Up @@ -223,15 +221,7 @@ func verifyState(ctx *cli.Context) error {
return err
}
log.Info("Verified the state", "root", root)
if err := checkDanglingDiskStorage(chaindb); err != nil {
log.Error("Dangling snap disk-storage check failed", "root", root, "err", err)
return err
}
if err := checkDanglingMemStorage(chaindb); err != nil {
log.Error("Dangling snap mem-storage check failed", "root", root, "err", err)
return err
}
return nil
return snapshot.CheckDanglingStorage(chaindb)
}

// checkDanglingStorage iterates the snap storage data, and verifies that all
Expand All @@ -240,56 +230,7 @@ func checkDanglingStorage(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()

chaindb := utils.MakeChainDatabase(ctx, stack, true)
if err := checkDanglingDiskStorage(chaindb); err != nil {
return err
}
return checkDanglingMemStorage(chaindb)

}

// checkDanglingDiskStorage checks if there is any 'dangling' storage data in the
// disk-backed snapshot layer.
func checkDanglingDiskStorage(chaindb ethdb.Database) error {
log.Info("Checking dangling snapshot disk storage")
var (
lastReport = time.Now()
start = time.Now()
lastKey []byte
it = rawdb.NewKeyLengthIterator(chaindb.NewIterator(rawdb.SnapshotStoragePrefix, nil), 1+2*common.HashLength)
)
defer it.Release()
for it.Next() {
k := it.Key()
accKey := k[1:33]
if bytes.Equal(accKey, lastKey) {
// No need to look up for every slot
continue
}
lastKey = common.CopyBytes(accKey)
if time.Since(lastReport) > time.Second*8 {
log.Info("Iterating snap storage", "at", fmt.Sprintf("%#x", accKey), "elapsed", common.PrettyDuration(time.Since(start)))
lastReport = time.Now()
}
if data := rawdb.ReadAccountSnapshot(chaindb, common.BytesToHash(accKey)); len(data) == 0 {
log.Error("Dangling storage - missing account", "account", fmt.Sprintf("%#x", accKey), "storagekey", fmt.Sprintf("%#x", k))
return fmt.Errorf("dangling snapshot storage account %#x", accKey)
}
}
log.Info("Verified the snapshot disk storage", "time", common.PrettyDuration(time.Since(start)), "err", it.Error())
return nil
}

// checkDanglingMemStorage checks if there is any 'dangling' storage in the journalled
// snapshot difflayers.
func checkDanglingMemStorage(chaindb ethdb.Database) error {
start := time.Now()
log.Info("Checking dangling snapshot difflayer journalled storage")
if err := snapshot.CheckJournalStorage(chaindb); err != nil {
return err
}
log.Info("Verified the snapshot journalled storage", "time", common.PrettyDuration(time.Since(start)))
return nil
return snapshot.CheckDanglingStorage(utils.MakeChainDatabase(ctx, stack, true))
}

// traverseState is a helper function used for pruning verification.
Expand Down
233 changes: 233 additions & 0 deletions core/state/snapshot/context.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
// Copyright 2022 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package snapshot

import (
"bytes"
"encoding/binary"
"errors"
"time"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/math"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
)

const (
snapAccount = "account" // Identifier of account snapshot generation
snapStorage = "storage" // Identifier of storage snapshot generation
)

// generatorStats is a collection of statistics gathered by the snapshot generator
// for logging purposes.
type generatorStats struct {
origin uint64 // Origin prefix where generation started
start time.Time // Timestamp when generation started
accounts uint64 // Number of accounts indexed(generated or recovered)
slots uint64 // Number of storage slots indexed(generated or recovered)
dangling uint64 // Number of dangling storage slots
storage common.StorageSize // Total account and storage slot size(generation or recovery)
}

// Log creates an contextual log with the given message and the context pulled
// from the internally maintained statistics.
func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) {
var ctx []interface{}
if root != (common.Hash{}) {
ctx = append(ctx, []interface{}{"root", root}...)
}
// Figure out whether we're after or within an account
switch len(marker) {
case common.HashLength:
ctx = append(ctx, []interface{}{"at", common.BytesToHash(marker)}...)
case 2 * common.HashLength:
ctx = append(ctx, []interface{}{
"in", common.BytesToHash(marker[:common.HashLength]),
"at", common.BytesToHash(marker[common.HashLength:]),
}...)
}
// Add the usual measurements
ctx = append(ctx, []interface{}{
"accounts", gs.accounts,
"slots", gs.slots,
"storage", gs.storage,
"dangling", gs.dangling,
"elapsed", common.PrettyDuration(time.Since(gs.start)),
}...)
// Calculate the estimated indexing time based on current stats
if len(marker) > 0 {
if done := binary.BigEndian.Uint64(marker[:8]) - gs.origin; done > 0 {
left := math.MaxUint64 - binary.BigEndian.Uint64(marker[:8])

speed := done/uint64(time.Since(gs.start)/time.Millisecond+1) + 1 // +1s to avoid division by zero
ctx = append(ctx, []interface{}{
"eta", common.PrettyDuration(time.Duration(left/speed) * time.Millisecond),
}...)
}
}
log.Info(msg, ctx...)
}

// generatorContext carries a few global values to be shared by all generation functions.
type generatorContext struct {
stats *generatorStats // Generation statistic collection
db ethdb.KeyValueStore // Key-value store containing the snapshot data
account *holdableIterator // Iterator of account snapshot data
storage *holdableIterator // Iterator of storage snapshot data
batch ethdb.Batch // Database batch for writing batch data atomically
logged time.Time // The timestamp when last generation progress was displayed
}

// newGeneratorContext initializes the context for generation.
func newGeneratorContext(stats *generatorStats, db ethdb.KeyValueStore, accMarker []byte, storageMarker []byte) *generatorContext {
ctx := &generatorContext{
stats: stats,
db: db,
batch: db.NewBatch(),
logged: time.Now(),
}
ctx.openIterator(snapAccount, accMarker)
Copy link
Member Author

@rjl493456442 rjl493456442 May 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@karalabe @holiman I think it's an important change so that I want to highlight here

Whenever we re-start snapshot generation, the snapshot iterators(account, storage) will be opened at the interruption position.

Theoretically the interruption marker represents that all snapshot data has been correctly generated before this. Even in the DiffToDisk function, the snapshot data before the interruption marker (including the interruption marker) will be changed, but it must ensure that all changes are correct and aligned to the new root.

But let's still list all the possible scenarios to prove this operation is correct.


  1. The interruption marker is empty and snapshot iterations are opened at the beginning, it's obviously correct

  2. The interruption marker is non-empty which points to an account xyz and the slot at abc. So account iterator is opened at xyz and storage iterator is opened at xyz+abc.

    2.1 If the account is non-changed: correct


    2.2 If the account is destructed in diffToDisk operation:
    diffToDisk is responsible for cleaning up all slots of account xyz. Then in resumed generation, the position of storage iterator will be seeked to the first storage slot of next account. In this case nothing to do with the storage of account xyz anymore. It's correct.


    2.3 If the storage of account xyz is all cleaned up in diffToDisk operation
    diffToDisk is responsible for cleaning up slots before abc. Then in resumed generation storage iterator will be opened at xyz+abc and clean up the remainning slots since abc, correct.


    2.4 If the storage slots before abc are partially updated in diffToDisk operation
    diffToDisk is responsible for updating slots before abc correctly. Then in resumed generation storage iterator will be opened at xyz+abc and resume storage generation since abc, correct.

ctx.openIterator(snapStorage, storageMarker)
return ctx
}

// openIterator constructs global account and storage snapshot iterators
// at the interrupted position. These iterators should be reopened from time
// to time to avoid blocking leveldb compaction for a long time.
func (ctx *generatorContext) openIterator(kind string, start []byte) {
if kind == snapAccount {
iter := ctx.db.NewIterator(rawdb.SnapshotAccountPrefix, start)
ctx.account = newHoldableIterator(rawdb.NewKeyLengthIterator(iter, 1+common.HashLength))
return
}
iter := ctx.db.NewIterator(rawdb.SnapshotStoragePrefix, start)
ctx.storage = newHoldableIterator(rawdb.NewKeyLengthIterator(iter, 1+2*common.HashLength))
}

// reopenIterator releases the specified snapshot iterator and re-open it
// in the next position. It's aimed for not blocking leveldb compaction.
func (ctx *generatorContext) reopenIterator(kind string) {
// Shift iterator one more step, so that we can reopen
// the iterator at the right position.
var iter = ctx.account
if kind == snapStorage {
iter = ctx.storage
}
hasNext := iter.Next()
if !hasNext {
return // iterator is exhausted now
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps like this? (didn't run the code)

// Iterator exhausted, release forever and create an already exhausted virtual iterator
iter.Release()

if kind == snapAccount {
    ctx.account = newHoldableIterator(memorydb.New().NewIterator(nil, nil))
    return
}
ctx.storage = newHoldableIterator(memorydb.New().NewIterator(nil, nil))
return 

}
next := iter.Key()
iter.Release()
ctx.openIterator(kind, next[1:])
}

// close releases all the held resources.
func (ctx *generatorContext) close() {
ctx.account.Release()
ctx.storage.Release()
}

// iterator returns the corresponding iterator specified by the kind.
func (ctx *generatorContext) iterator(kind string) *holdableIterator {
if kind == snapAccount {
return ctx.account
}
return ctx.storage
}

// removeStorageBefore deletes all storage entries which are located before
// the specified account. When the iterator touches the storage entry which
// is located in or outside the given account, it stops and holds the current
// iterated element locally.
func (ctx *generatorContext) removeStorageBefore(account common.Hash) {
var (
count uint64
start = time.Now()
iter = ctx.storage
)
for iter.Next() {
key := iter.Key()
if bytes.Compare(key[1:1+common.HashLength], account.Bytes()) >= 0 {
iter.Hold()
break
}
count++
ctx.batch.Delete(key)
if ctx.batch.ValueSize() > ethdb.IdealBatchSize {
ctx.batch.Write()
ctx.batch.Reset()
}
}
ctx.stats.dangling += count
snapStorageCleanCounter.Inc(time.Since(start).Nanoseconds())
}

// removeStorageAt deletes all storage entries which are located in the specified
// account. When the iterator touches the storage entry which is outside the given
// account, it stops and holds the current iterated element locally. An error will
// be returned if the initial position of iterator is not in the given account.
func (ctx *generatorContext) removeStorageAt(account common.Hash) error {
var (
count int64
start = time.Now()
iter = ctx.storage
)
for iter.Next() {
key := iter.Key()
cmp := bytes.Compare(key[1:1+common.HashLength], account.Bytes())
if cmp < 0 {
return errors.New("invalid iterator position")
}
if cmp > 0 {
iter.Hold()
break
}
count++
ctx.batch.Delete(key)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can grow quite large if a big contract was deleted, imho we should check the batch size and flush if it gets large mid iteration. Also recreate the iterator?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. we can flush the batch here. And also in order to simplify the code, we can only flush the batch but without persisting the generation progress marker. We usually do this in checkAndFlush function, but it's fine to not do it here. If panic happens, then we need to redo the generation since the last marker.

if ctx.batch.ValueSize() > ethdb.IdealBatchSize {
ctx.batch.Write()
ctx.batch.Reset()
}
}
snapWipedStorageMeter.Mark(count)
snapStorageCleanCounter.Inc(time.Since(start).Nanoseconds())
return nil
}

// removeStorageLeft deletes all storage entries which are located after
// the current iterator position.
func (ctx *generatorContext) removeStorageLeft() {
var (
count uint64
start = time.Now()
iter = ctx.storage
)
for iter.Next() {
count++
ctx.batch.Delete(iter.Key())
rjl493456442 marked this conversation as resolved.
Show resolved Hide resolved
if ctx.batch.ValueSize() > ethdb.IdealBatchSize {
ctx.batch.Write()
ctx.batch.Reset()
}
}
ctx.stats.dangling += count
snapDanglingStorageMeter.Mark(int64(count))
snapStorageCleanCounter.Inc(time.Since(start).Nanoseconds())
}
Loading