From c170fa277cbf2a9faf9f35665f1ba8f34f94062a Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 13 Mar 2024 19:39:30 +0800 Subject: [PATCH] core: improve chain rewinding mechanism (#29196) * core: improve chain rewinding mechanism * core: address comment * core: periodically print progress log * core: address comments * core: fix comment * core: fix rewinding in path * core: fix beyondRoot condition * core: polish code * core: polish code * core: extend code comment * core: stop rewinding if chain is gapped or genesis is reached * core: fix broken tests --- core/blockchain.go | 238 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 183 insertions(+), 55 deletions(-) diff --git a/core/blockchain.go b/core/blockchain.go index 9bd7fdcd95..ba346b010d 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -616,6 +616,172 @@ func (bc *BlockChain) SetSafe(header *types.Header) { } } +// rewindPathHead implements the logic of rewindHead in the context of hash scheme. +func (bc *BlockChain) rewindHashHead(head *types.Header, root common.Hash) (*types.Header, uint64) { + var ( + limit uint64 // The oldest block that will be searched for this rewinding + beyondRoot = root == common.Hash{} // Flag whether we're beyond the requested root (no root, always true) + pivot = rawdb.ReadLastPivotNumber(bc.db) // Associated block number of pivot point state + rootNumber uint64 // Associated block number of requested root + + start = time.Now() // Timestamp the rewinding is restarted + logged = time.Now() // Timestamp last progress log was printed + ) + // The oldest block to be searched is determined by the pivot block or a constant + // searching threshold. The rationale behind this is as follows: + // + // - Snap sync is selected if the pivot block is available. The earliest available + // state is the pivot block itself, so there is no sense in going further back. + // + // - Full sync is selected if the pivot block does not exist. The hash database + // periodically flushes the state to disk, and the used searching threshold is + // considered sufficient to find a persistent state, even for the testnet. It + // might be not enough for a chain that is nearly empty. In the worst case, + // the entire chain is reset to genesis, and snap sync is re-enabled on top, + // which is still acceptable. + if pivot != nil { + limit = *pivot + } else if head.Number.Uint64() > params.FullImmutabilityThreshold { + limit = head.Number.Uint64() - params.FullImmutabilityThreshold + } + for { + logger := log.Trace + if time.Since(logged) > time.Second*8 { + logged = time.Now() + logger = log.Info + } + logger("Block state missing, rewinding further", "number", head.Number, "hash", head.Hash(), "elapsed", common.PrettyDuration(time.Since(start))) + + // If a root threshold was requested but not yet crossed, check + if !beyondRoot && head.Root == root { + beyondRoot, rootNumber = true, head.Number.Uint64() + } + // If search limit is reached, return the genesis block as the + // new chain head. + if head.Number.Uint64() < limit { + log.Info("Rewinding limit reached, resetting to genesis", "number", head.Number, "hash", head.Hash(), "limit", limit) + return bc.genesisBlock.Header(), rootNumber + } + // If the associated state is not reachable, continue searching + // backwards until an available state is found. + if !bc.HasState(head.Root) { + // If the chain is gapped in the middle, return the genesis + // block as the new chain head. + parent := bc.GetHeader(head.ParentHash, head.Number.Uint64()-1) + if parent == nil { + log.Error("Missing block in the middle, resetting to genesis", "number", head.Number.Uint64()-1, "hash", head.ParentHash) + return bc.genesisBlock.Header(), rootNumber + } + head = parent + + // If the genesis block is reached, stop searching. + if head.Number.Uint64() == 0 { + log.Info("Genesis block reached", "number", head.Number, "hash", head.Hash()) + return head, rootNumber + } + continue // keep rewinding + } + // Once the available state is found, ensure that the requested root + // has already been crossed. If not, continue rewinding. + if beyondRoot || head.Number.Uint64() == 0 { + log.Info("Rewound to block with state", "number", head.Number, "hash", head.Hash()) + return head, rootNumber + } + log.Debug("Skipping block with threshold state", "number", head.Number, "hash", head.Hash(), "root", head.Root) + head = bc.GetHeader(head.ParentHash, head.Number.Uint64()-1) // Keep rewinding + } +} + +// rewindPathHead implements the logic of rewindHead in the context of path scheme. +func (bc *BlockChain) rewindPathHead(head *types.Header, root common.Hash) (*types.Header, uint64) { + var ( + pivot = rawdb.ReadLastPivotNumber(bc.db) // Associated block number of pivot block + rootNumber uint64 // Associated block number of requested root + + // BeyondRoot represents whether the requested root is already + // crossed. The flag value is set to true if the root is empty. + beyondRoot = root == common.Hash{} + + // noState represents if the target state requested for search + // is unavailable and impossible to be recovered. + noState = !bc.HasState(root) && !bc.stateRecoverable(root) + + start = time.Now() // Timestamp the rewinding is restarted + logged = time.Now() // Timestamp last progress log was printed + ) + // Rewind the head block tag until an available state is found. + for { + logger := log.Trace + if time.Since(logged) > time.Second*8 { + logged = time.Now() + logger = log.Info + } + logger("Block state missing, rewinding further", "number", head.Number, "hash", head.Hash(), "elapsed", common.PrettyDuration(time.Since(start))) + + // If a root threshold was requested but not yet crossed, check + if !beyondRoot && head.Root == root { + beyondRoot, rootNumber = true, head.Number.Uint64() + } + // If the root threshold hasn't been crossed but the available + // state is reached, quickly determine if the target state is + // possible to be reached or not. + if !beyondRoot && noState && bc.HasState(head.Root) { + beyondRoot = true + log.Info("Disable the search for unattainable state", "root", root) + } + // Check if the associated state is available or recoverable if + // the requested root has already been crossed. + if beyondRoot && (bc.HasState(head.Root) || bc.stateRecoverable(head.Root)) { + break + } + // If pivot block is reached, return the genesis block as the + // new chain head. Theoretically there must be a persistent + // state before or at the pivot block, prevent endless rewinding + // towards the genesis just in case. + if pivot != nil && *pivot >= head.Number.Uint64() { + log.Info("Pivot block reached, resetting to genesis", "number", head.Number, "hash", head.Hash()) + return bc.genesisBlock.Header(), rootNumber + } + // If the chain is gapped in the middle, return the genesis + // block as the new chain head + parent := bc.GetHeader(head.ParentHash, head.Number.Uint64()-1) // Keep rewinding + if parent == nil { + log.Error("Missing block in the middle, resetting to genesis", "number", head.Number.Uint64()-1, "hash", head.ParentHash) + return bc.genesisBlock.Header(), rootNumber + } + head = parent + + // If the genesis block is reached, stop searching. + if head.Number.Uint64() == 0 { + log.Info("Genesis block reached", "number", head.Number, "hash", head.Hash()) + return head, rootNumber + } + } + // Recover if the target state if it's not available yet. + if !bc.HasState(head.Root) { + if err := bc.triedb.Recover(head.Root); err != nil { + log.Crit("Failed to rollback state", "err", err) + } + } + log.Info("Rewound to block with state", "number", head.Number, "hash", head.Hash()) + return head, rootNumber +} + +// rewindHead searches the available states in the database and returns the associated +// block as the new head block. +// +// If the given root is not empty, then the rewind should attempt to pass the specified +// state root and return the associated block number as well. If the root, typically +// representing the state corresponding to snapshot disk layer, is deemed impassable, +// then block number zero is returned, indicating that snapshot recovery is disabled +// and the whole snapshot should be auto-generated in case of head mismatch. +func (bc *BlockChain) rewindHead(head *types.Header, root common.Hash) (*types.Header, uint64) { + if bc.triedb.Scheme() == rawdb.PathScheme { + return bc.rewindPathHead(head, root) + } + return bc.rewindHashHead(head, root) +} + // setHeadBeyondRoot rewinds the local chain to a new head with the extra condition // that the rewind must pass the specified state root. This method is meant to be // used when rewinding with snapshots enabled to ensure that we go back further than @@ -634,79 +800,40 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, time uint64, root common.Ha } defer bc.chainmu.Unlock() - // Track the block number of the requested root hash - var rootNumber uint64 // (no root == always 0) - - // Retrieve the last pivot block to short circuit rollbacks beyond it and the - // current freezer limit to start nuking id underflown - pivot := rawdb.ReadLastPivotNumber(bc.db) - frozen, _ := bc.db.Ancients() + var ( + // Track the block number of the requested root hash + rootNumber uint64 // (no root == always 0) + // Retrieve the last pivot block to short circuit rollbacks beyond it + // and the current freezer limit to start nuking it's underflown. + pivot = rawdb.ReadLastPivotNumber(bc.db) + ) updateFn := func(db ethdb.KeyValueWriter, header *types.Header) (*types.Header, bool) { // Rewind the blockchain, ensuring we don't end up with a stateless head // block. Note, depth equality is permitted to allow using SetHead as a // chain reparation mechanism without deleting any data! if currentBlock := bc.CurrentBlock(); currentBlock != nil && header.Number.Uint64() <= currentBlock.Number.Uint64() { - newHeadBlock := bc.GetBlock(header.Hash(), header.Number.Uint64()) - if newHeadBlock == nil { - log.Error("Gap in the chain, rewinding to genesis", "number", header.Number, "hash", header.Hash()) - newHeadBlock = bc.genesisBlock - } else { - // Block exists. Keep rewinding until either we find one with state - // or until we exceed the optional threshold root hash - beyondRoot := (root == common.Hash{}) // Flag whether we're beyond the requested root (no root, always true) - - for { - // If a root threshold was requested but not yet crossed, check - if root != (common.Hash{}) && !beyondRoot && newHeadBlock.Root() == root { - beyondRoot, rootNumber = true, newHeadBlock.NumberU64() - } - if !bc.HasState(newHeadBlock.Root()) && !bc.stateRecoverable(newHeadBlock.Root()) { - log.Trace("Block state missing, rewinding further", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) - if pivot == nil || newHeadBlock.NumberU64() > *pivot { - parent := bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1) - if parent != nil { - newHeadBlock = parent - continue - } - log.Error("Missing block in the middle, aiming genesis", "number", newHeadBlock.NumberU64()-1, "hash", newHeadBlock.ParentHash()) - newHeadBlock = bc.genesisBlock - } else { - log.Trace("Rewind passed pivot, aiming genesis", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "pivot", *pivot) - newHeadBlock = bc.genesisBlock - } - } - if beyondRoot || newHeadBlock.NumberU64() == 0 { - if !bc.HasState(newHeadBlock.Root()) && bc.stateRecoverable(newHeadBlock.Root()) { - // Rewind to a block with recoverable state. If the state is - // missing, run the state recovery here. - if err := bc.triedb.Recover(newHeadBlock.Root()); err != nil { - log.Crit("Failed to rollback state", "err", err) // Shouldn't happen - } - log.Debug("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) - } - break - } - log.Debug("Skipping block with threshold state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "root", newHeadBlock.Root()) - newHeadBlock = bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1) // Keep rewinding - } - } + var newHeadBlock *types.Header + newHeadBlock, rootNumber = bc.rewindHead(header, root) rawdb.WriteHeadBlockHash(db, newHeadBlock.Hash()) // Degrade the chain markers if they are explicitly reverted. // In theory we should update all in-memory markers in the // last step, however the direction of SetHead is from high // to low, so it's safe to update in-memory markers directly. - bc.currentBlock.Store(newHeadBlock.Header()) - headBlockGauge.Update(int64(newHeadBlock.NumberU64())) + bc.currentBlock.Store(newHeadBlock) + headBlockGauge.Update(int64(newHeadBlock.Number.Uint64())) // The head state is missing, which is only possible in the path-based // scheme. This situation occurs when the chain head is rewound below // the pivot point. In this scenario, there is no possible recovery // approach except for rerunning a snap sync. Do nothing here until the // state syncer picks it up. - if !bc.HasState(newHeadBlock.Root()) { - log.Info("Chain is stateless, wait state sync", "number", newHeadBlock.Number(), "hash", newHeadBlock.Hash()) + if !bc.HasState(newHeadBlock.Root) { + if newHeadBlock.Number.Uint64() != 0 { + log.Crit("Chain is stateless at a non-genesis block") + } + log.Info("Chain is stateless, wait state sync", "number", newHeadBlock.Number, "hash", newHeadBlock.Hash()) } } // Rewind the snap block in a simpleton way to the target head @@ -733,6 +860,7 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, time uint64, root common.Ha // intent afterwards is full block importing, delete the chain segment // between the stateful-block and the sethead target. var wipe bool + frozen, _ := bc.db.Ancients() if headNumber+1 < frozen { wipe = pivot == nil || headNumber >= *pivot }