Merge pull request #22777 from karalabe/snapshots-abort-resume-on-sync
core, eth: abort snapshot generation on snap sync and resume later
This commit is contained in:
commit
8681a2536c
@ -640,7 +640,8 @@ func (bc *BlockChain) FastSyncCommitHead(hash common.Hash) error {
|
|||||||
headBlockGauge.Update(int64(block.NumberU64()))
|
headBlockGauge.Update(int64(block.NumberU64()))
|
||||||
bc.chainmu.Unlock()
|
bc.chainmu.Unlock()
|
||||||
|
|
||||||
// Destroy any existing state snapshot and regenerate it in the background
|
// Destroy any existing state snapshot and regenerate it in the background,
|
||||||
|
// also resuming the normal maintenance of any previously paused snapshot.
|
||||||
if bc.snaps != nil {
|
if bc.snaps != nil {
|
||||||
bc.snaps.Rebuild(block.Root())
|
bc.snaps.Rebuild(block.Root())
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,26 @@ import (
|
|||||||
"github.com/ethereum/go-ethereum/log"
|
"github.com/ethereum/go-ethereum/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ReadSnapshotDisabled retrieves if the snapshot maintenance is disabled.
|
||||||
|
func ReadSnapshotDisabled(db ethdb.KeyValueReader) bool {
|
||||||
|
disabled, _ := db.Has(snapshotDisabledKey)
|
||||||
|
return disabled
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteSnapshotDisabled stores the snapshot pause flag.
|
||||||
|
func WriteSnapshotDisabled(db ethdb.KeyValueWriter) {
|
||||||
|
if err := db.Put(snapshotDisabledKey, []byte("42")); err != nil {
|
||||||
|
log.Crit("Failed to store snapshot disabled flag", "err", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteSnapshotDisabled deletes the flag keeping the snapshot maintenance disabled.
|
||||||
|
func DeleteSnapshotDisabled(db ethdb.KeyValueWriter) {
|
||||||
|
if err := db.Delete(snapshotDisabledKey); err != nil {
|
||||||
|
log.Crit("Failed to remove snapshot disabled flag", "err", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ReadSnapshotRoot retrieves the root of the block whose state is contained in
|
// ReadSnapshotRoot retrieves the root of the block whose state is contained in
|
||||||
// the persisted snapshot.
|
// the persisted snapshot.
|
||||||
func ReadSnapshotRoot(db ethdb.KeyValueReader) common.Hash {
|
func ReadSnapshotRoot(db ethdb.KeyValueReader) common.Hash {
|
||||||
|
@ -371,9 +371,9 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error {
|
|||||||
var accounted bool
|
var accounted bool
|
||||||
for _, meta := range [][]byte{
|
for _, meta := range [][]byte{
|
||||||
databaseVersionKey, headHeaderKey, headBlockKey, headFastBlockKey, lastPivotKey,
|
databaseVersionKey, headHeaderKey, headBlockKey, headFastBlockKey, lastPivotKey,
|
||||||
fastTrieProgressKey, snapshotRootKey, snapshotJournalKey, snapshotGeneratorKey,
|
fastTrieProgressKey, snapshotDisabledKey, snapshotRootKey, snapshotJournalKey,
|
||||||
snapshotRecoveryKey, txIndexTailKey, fastTxLookupLimitKey, uncleanShutdownKey,
|
snapshotGeneratorKey, snapshotRecoveryKey, txIndexTailKey, fastTxLookupLimitKey,
|
||||||
badBlockKey,
|
uncleanShutdownKey, badBlockKey,
|
||||||
} {
|
} {
|
||||||
if bytes.Equal(key, meta) {
|
if bytes.Equal(key, meta) {
|
||||||
metadata.Add(size)
|
metadata.Add(size)
|
||||||
|
@ -45,6 +45,9 @@ var (
|
|||||||
// fastTrieProgressKey tracks the number of trie entries imported during fast sync.
|
// fastTrieProgressKey tracks the number of trie entries imported during fast sync.
|
||||||
fastTrieProgressKey = []byte("TrieSync")
|
fastTrieProgressKey = []byte("TrieSync")
|
||||||
|
|
||||||
|
// snapshotDisabledKey flags that the snapshot should not be maintained due to initial sync.
|
||||||
|
snapshotDisabledKey = []byte("SnapshotDisabled")
|
||||||
|
|
||||||
// snapshotRootKey tracks the hash of the last snapshot.
|
// snapshotRootKey tracks the hash of the last snapshot.
|
||||||
snapshotRootKey = []byte("SnapshotRoot")
|
snapshotRootKey = []byte("SnapshotRoot")
|
||||||
|
|
||||||
|
@ -141,16 +141,6 @@ func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) {
|
|||||||
log.Info(msg, ctx...)
|
log.Info(msg, ctx...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ClearSnapshotMarker sets the snapshot marker to zero, meaning that snapshots
|
|
||||||
// are not usable.
|
|
||||||
func ClearSnapshotMarker(diskdb ethdb.KeyValueStore) {
|
|
||||||
batch := diskdb.NewBatch()
|
|
||||||
journalProgress(batch, []byte{}, nil)
|
|
||||||
if err := batch.Write(); err != nil {
|
|
||||||
log.Crit("Failed to write initialized state marker", "err", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// generateSnapshot regenerates a brand new snapshot based on an existing state
|
// generateSnapshot regenerates a brand new snapshot based on an existing state
|
||||||
// database and head block asynchronously. The snapshot is returned immediately
|
// database and head block asynchronously. The snapshot is returned immediately
|
||||||
// and generation is continued in the background until done.
|
// and generation is continued in the background until done.
|
||||||
|
@ -126,12 +126,17 @@ func loadAndParseJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, jou
|
|||||||
}
|
}
|
||||||
|
|
||||||
// loadSnapshot loads a pre-existing state snapshot backed by a key-value store.
|
// loadSnapshot loads a pre-existing state snapshot backed by a key-value store.
|
||||||
func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, recovery bool) (snapshot, error) {
|
func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, recovery bool) (snapshot, bool, error) {
|
||||||
|
// If snapshotting is disabled (initial sync in progress), don't do anything,
|
||||||
|
// wait for the chain to permit us to do something meaningful
|
||||||
|
if rawdb.ReadSnapshotDisabled(diskdb) {
|
||||||
|
return nil, true, nil
|
||||||
|
}
|
||||||
// Retrieve the block number and hash of the snapshot, failing if no snapshot
|
// Retrieve the block number and hash of the snapshot, failing if no snapshot
|
||||||
// is present in the database (or crashed mid-update).
|
// is present in the database (or crashed mid-update).
|
||||||
baseRoot := rawdb.ReadSnapshotRoot(diskdb)
|
baseRoot := rawdb.ReadSnapshotRoot(diskdb)
|
||||||
if baseRoot == (common.Hash{}) {
|
if baseRoot == (common.Hash{}) {
|
||||||
return nil, errors.New("missing or corrupted snapshot")
|
return nil, false, errors.New("missing or corrupted snapshot")
|
||||||
}
|
}
|
||||||
base := &diskLayer{
|
base := &diskLayer{
|
||||||
diskdb: diskdb,
|
diskdb: diskdb,
|
||||||
@ -142,7 +147,7 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int,
|
|||||||
snapshot, generator, err := loadAndParseJournal(diskdb, base)
|
snapshot, generator, err := loadAndParseJournal(diskdb, base)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Failed to load new-format journal", "error", err)
|
log.Warn("Failed to load new-format journal", "error", err)
|
||||||
return nil, err
|
return nil, false, err
|
||||||
}
|
}
|
||||||
// Entire snapshot journal loaded, sanity check the head. If the loaded
|
// Entire snapshot journal loaded, sanity check the head. If the loaded
|
||||||
// snapshot is not matched with current state root, print a warning log
|
// snapshot is not matched with current state root, print a warning log
|
||||||
@ -157,7 +162,7 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int,
|
|||||||
// it's not in recovery mode, returns the error here for
|
// it's not in recovery mode, returns the error here for
|
||||||
// rebuilding the entire snapshot forcibly.
|
// rebuilding the entire snapshot forcibly.
|
||||||
if !recovery {
|
if !recovery {
|
||||||
return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root)
|
return nil, false, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root)
|
||||||
}
|
}
|
||||||
// It's in snapshot recovery, the assumption is held that
|
// It's in snapshot recovery, the assumption is held that
|
||||||
// the disk layer is always higher than chain head. It can
|
// the disk layer is always higher than chain head. It can
|
||||||
@ -187,7 +192,7 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int,
|
|||||||
storage: common.StorageSize(generator.Storage),
|
storage: common.StorageSize(generator.Storage),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return snapshot, nil
|
return snapshot, false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new
|
// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new
|
||||||
|
@ -148,11 +148,11 @@ type snapshot interface {
|
|||||||
StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool)
|
StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
|
// Tree is an Ethereum state snapshot tree. It consists of one persistent base
|
||||||
// base layer backed by a key-value store, on top of which arbitrarily many in-
|
// layer backed by a key-value store, on top of which arbitrarily many in-memory
|
||||||
// memory diff layers are topped. The memory diffs can form a tree with branching,
|
// diff layers are topped. The memory diffs can form a tree with branching, but
|
||||||
// but the disk layer is singleton and common to all. If a reorg goes deeper than
|
// the disk layer is singleton and common to all. If a reorg goes deeper than the
|
||||||
// the disk layer, everything needs to be deleted.
|
// disk layer, everything needs to be deleted.
|
||||||
//
|
//
|
||||||
// The goal of a state snapshot is twofold: to allow direct access to account and
|
// The goal of a state snapshot is twofold: to allow direct access to account and
|
||||||
// storage data to avoid expensive multi-level trie lookups; and to allow sorted,
|
// storage data to avoid expensive multi-level trie lookups; and to allow sorted,
|
||||||
@ -186,7 +186,11 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
|
|||||||
defer snap.waitBuild()
|
defer snap.waitBuild()
|
||||||
}
|
}
|
||||||
// Attempt to load a previously persisted snapshot and rebuild one if failed
|
// Attempt to load a previously persisted snapshot and rebuild one if failed
|
||||||
head, err := loadSnapshot(diskdb, triedb, cache, root, recovery)
|
head, disabled, err := loadSnapshot(diskdb, triedb, cache, root, recovery)
|
||||||
|
if disabled {
|
||||||
|
log.Warn("Snapshot maintenance disabled (syncing)")
|
||||||
|
return snap, nil
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if rebuild {
|
if rebuild {
|
||||||
log.Warn("Failed to load snapshot, regenerating", "err", err)
|
log.Warn("Failed to load snapshot, regenerating", "err", err)
|
||||||
@ -224,6 +228,55 @@ func (t *Tree) waitBuild() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Disable interrupts any pending snapshot generator, deletes all the snapshot
|
||||||
|
// layers in memory and marks snapshots disabled globally. In order to resume
|
||||||
|
// the snapshot functionality, the caller must invoke Rebuild.
|
||||||
|
func (t *Tree) Disable() {
|
||||||
|
// Interrupt any live snapshot layers
|
||||||
|
t.lock.Lock()
|
||||||
|
defer t.lock.Unlock()
|
||||||
|
|
||||||
|
for _, layer := range t.layers {
|
||||||
|
switch layer := layer.(type) {
|
||||||
|
case *diskLayer:
|
||||||
|
// If the base layer is generating, abort it
|
||||||
|
if layer.genAbort != nil {
|
||||||
|
abort := make(chan *generatorStats)
|
||||||
|
layer.genAbort <- abort
|
||||||
|
<-abort
|
||||||
|
}
|
||||||
|
// Layer should be inactive now, mark it as stale
|
||||||
|
layer.lock.Lock()
|
||||||
|
layer.stale = true
|
||||||
|
layer.lock.Unlock()
|
||||||
|
|
||||||
|
case *diffLayer:
|
||||||
|
// If the layer is a simple diff, simply mark as stale
|
||||||
|
layer.lock.Lock()
|
||||||
|
atomic.StoreUint32(&layer.stale, 1)
|
||||||
|
layer.lock.Unlock()
|
||||||
|
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("unknown layer type: %T", layer))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.layers = map[common.Hash]snapshot{}
|
||||||
|
|
||||||
|
// Delete all snapshot liveness information from the database
|
||||||
|
batch := t.diskdb.NewBatch()
|
||||||
|
|
||||||
|
rawdb.WriteSnapshotDisabled(batch)
|
||||||
|
rawdb.DeleteSnapshotRoot(batch)
|
||||||
|
rawdb.DeleteSnapshotJournal(batch)
|
||||||
|
rawdb.DeleteSnapshotGenerator(batch)
|
||||||
|
rawdb.DeleteSnapshotRecoveryNumber(batch)
|
||||||
|
// Note, we don't delete the sync progress
|
||||||
|
|
||||||
|
if err := batch.Write(); err != nil {
|
||||||
|
log.Crit("Failed to disable snapshots", "err", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Snapshot retrieves a snapshot belonging to the given block root, or nil if no
|
// Snapshot retrieves a snapshot belonging to the given block root, or nil if no
|
||||||
// snapshot is maintained for that block.
|
// snapshot is maintained for that block.
|
||||||
func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
|
func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
|
||||||
@ -626,8 +679,9 @@ func (t *Tree) Rebuild(root common.Hash) {
|
|||||||
defer t.lock.Unlock()
|
defer t.lock.Unlock()
|
||||||
|
|
||||||
// Firstly delete any recovery flag in the database. Because now we are
|
// Firstly delete any recovery flag in the database. Because now we are
|
||||||
// building a brand new snapshot.
|
// building a brand new snapshot. Also reenable the snapshot feature.
|
||||||
rawdb.DeleteSnapshotRecoveryNumber(t.diskdb)
|
rawdb.DeleteSnapshotRecoveryNumber(t.diskdb)
|
||||||
|
rawdb.DeleteSnapshotDisabled(t.diskdb)
|
||||||
|
|
||||||
// Iterate over and mark all layers stale
|
// Iterate over and mark all layers stale
|
||||||
for _, layer := range t.layers {
|
for _, layer := range t.layers {
|
||||||
|
@ -28,6 +28,7 @@ import (
|
|||||||
"github.com/ethereum/go-ethereum"
|
"github.com/ethereum/go-ethereum"
|
||||||
"github.com/ethereum/go-ethereum/common"
|
"github.com/ethereum/go-ethereum/common"
|
||||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||||
|
"github.com/ethereum/go-ethereum/core/state/snapshot"
|
||||||
"github.com/ethereum/go-ethereum/core/types"
|
"github.com/ethereum/go-ethereum/core/types"
|
||||||
"github.com/ethereum/go-ethereum/eth/protocols/eth"
|
"github.com/ethereum/go-ethereum/eth/protocols/eth"
|
||||||
"github.com/ethereum/go-ethereum/eth/protocols/snap"
|
"github.com/ethereum/go-ethereum/eth/protocols/snap"
|
||||||
@ -214,6 +215,9 @@ type BlockChain interface {
|
|||||||
|
|
||||||
// InsertReceiptChain inserts a batch of receipts into the local chain.
|
// InsertReceiptChain inserts a batch of receipts into the local chain.
|
||||||
InsertReceiptChain(types.Blocks, []types.Receipts, uint64) (int, error)
|
InsertReceiptChain(types.Blocks, []types.Receipts, uint64) (int, error)
|
||||||
|
|
||||||
|
// Snapshots returns the blockchain snapshot tree to paused it during sync.
|
||||||
|
Snapshots() *snapshot.Tree
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new downloader to fetch hashes and blocks from remote peers.
|
// New creates a new downloader to fetch hashes and blocks from remote peers.
|
||||||
@ -393,6 +397,12 @@ func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int, mode
|
|||||||
// but until snap becomes prevalent, we should support both. TODO(karalabe).
|
// but until snap becomes prevalent, we should support both. TODO(karalabe).
|
||||||
if mode == SnapSync {
|
if mode == SnapSync {
|
||||||
if !d.snapSync {
|
if !d.snapSync {
|
||||||
|
// Snap sync uses the snapshot namespace to store potentially flakey data until
|
||||||
|
// sync completely heals and finishes. Pause snapshot maintenance in the mean
|
||||||
|
// time to prevent access.
|
||||||
|
if snapshots := d.blockchain.Snapshots(); snapshots != nil { // Only nil in tests
|
||||||
|
snapshots.Disable()
|
||||||
|
}
|
||||||
log.Warn("Enabling snapshot sync prototype")
|
log.Warn("Enabling snapshot sync prototype")
|
||||||
d.snapSync = true
|
d.snapSync = true
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,7 @@ import (
|
|||||||
"github.com/ethereum/go-ethereum"
|
"github.com/ethereum/go-ethereum"
|
||||||
"github.com/ethereum/go-ethereum/common"
|
"github.com/ethereum/go-ethereum/common"
|
||||||
"github.com/ethereum/go-ethereum/core/rawdb"
|
"github.com/ethereum/go-ethereum/core/rawdb"
|
||||||
|
"github.com/ethereum/go-ethereum/core/state/snapshot"
|
||||||
"github.com/ethereum/go-ethereum/core/types"
|
"github.com/ethereum/go-ethereum/core/types"
|
||||||
"github.com/ethereum/go-ethereum/eth/protocols/eth"
|
"github.com/ethereum/go-ethereum/eth/protocols/eth"
|
||||||
"github.com/ethereum/go-ethereum/ethdb"
|
"github.com/ethereum/go-ethereum/ethdb"
|
||||||
@ -409,6 +410,11 @@ func (dl *downloadTester) dropPeer(id string) {
|
|||||||
dl.downloader.UnregisterPeer(id)
|
dl.downloader.UnregisterPeer(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Snapshots implements the BlockChain interface for the downloader, but is a noop.
|
||||||
|
func (dl *downloadTester) Snapshots() *snapshot.Tree {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
type downloadTesterPeer struct {
|
type downloadTesterPeer struct {
|
||||||
dl *downloadTester
|
dl *downloadTester
|
||||||
id string
|
id string
|
||||||
|
@ -551,11 +551,6 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
|
|||||||
log.Debug("Snapshot sync already completed")
|
log.Debug("Snapshot sync already completed")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// If sync is still not finished, we need to ensure that any marker is wiped.
|
|
||||||
// Otherwise, it may happen that requests for e.g. genesis-data is delivered
|
|
||||||
// from the snapshot data, instead of from the trie
|
|
||||||
snapshot.ClearSnapshotMarker(s.db)
|
|
||||||
|
|
||||||
defer func() { // Persist any progress, independent of failure
|
defer func() { // Persist any progress, independent of failure
|
||||||
for _, task := range s.tasks {
|
for _, task := range s.tasks {
|
||||||
s.forwardAccountTask(task)
|
s.forwardAccountTask(task)
|
||||||
|
Loading…
Reference in New Issue
Block a user