go-ethereum/cmd/geth/snapshot.go
rjl493456442 59f7b289c3
cmd, core, eth, graphql, trie: no persisted clean trie cache file (#27525)
The clean trie cache is persisted periodically, therefore Geth can
quickly warmup the cache in next restart.

However it will reduce the robustness of system. The assumption is
held in Geth that if the parent trie node is present, then the entire
sub-trie associated with the parent are all prensent.

Imagine the scenario that Geth rewinds itself to a past block and
restart, but Geth finds the root node of "future state" in clean
cache then regard this state is present in disk, while is not in fact.

Another example is offline pruning tool. Whenever an offline pruning
is performed, the clean cache file has to be removed to aviod hitting
the root node of "deleted states" in clean cache.

All in all, compare with the minor performance gain, system robustness
is something we care more.
2023-07-04 10:21:06 +03:00

608 lines
19 KiB
Go

// Copyright 2021 The go-ethereum Authors
// This file is part of go-ethereum.
//
// go-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// go-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"bytes"
"encoding/json"
"errors"
"os"
"time"
"github.com/ethereum/go-ethereum/cmd/utils"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/core/state/pruner"
"github.com/ethereum/go-ethereum/core/state/snapshot"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/internal/flags"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
cli "github.com/urfave/cli/v2"
)
var (
snapshotCommand = &cli.Command{
Name: "snapshot",
Usage: "A set of commands based on the snapshot",
Description: "",
Subcommands: []*cli.Command{
{
Name: "prune-state",
Usage: "Prune stale ethereum state data based on the snapshot",
ArgsUsage: "<root>",
Action: pruneState,
Flags: flags.Merge([]cli.Flag{
utils.BloomFilterSizeFlag,
}, utils.NetworkFlags, utils.DatabasePathFlags),
Description: `
geth snapshot prune-state <state-root>
will prune historical state data with the help of the state snapshot.
All trie nodes and contract codes that do not belong to the specified
version state will be deleted from the database. After pruning, only
two version states are available: genesis and the specific one.
The default pruning target is the HEAD-127 state.
WARNING: It's necessary to delete the trie clean cache after the pruning.
If you specify another directory for the trie clean cache via "--cache.trie.journal"
during the use of Geth, please also specify it here for correct deletion. Otherwise
the trie clean cache with default directory will be deleted.
`,
},
{
Name: "verify-state",
Usage: "Recalculate state hash based on the snapshot for verification",
ArgsUsage: "<root>",
Action: verifyState,
Flags: flags.Merge(utils.NetworkFlags, utils.DatabasePathFlags),
Description: `
geth snapshot verify-state <state-root>
will traverse the whole accounts and storages set based on the specified
snapshot and recalculate the root hash of state for verification.
In other words, this command does the snapshot to trie conversion.
`,
},
{
Name: "check-dangling-storage",
Usage: "Check that there is no 'dangling' snap storage",
ArgsUsage: "<root>",
Action: checkDanglingStorage,
Flags: flags.Merge(utils.NetworkFlags, utils.DatabasePathFlags),
Description: `
geth snapshot check-dangling-storage <state-root> traverses the snap storage
data, and verifies that all snapshot storage data has a corresponding account.
`,
},
{
Name: "inspect-account",
Usage: "Check all snapshot layers for the a specific account",
ArgsUsage: "<address | hash>",
Action: checkAccount,
Flags: flags.Merge(utils.NetworkFlags, utils.DatabasePathFlags),
Description: `
geth snapshot inspect-account <address | hash> checks all snapshot layers and prints out
information about the specified address.
`,
},
{
Name: "traverse-state",
Usage: "Traverse the state with given root hash and perform quick verification",
ArgsUsage: "<root>",
Action: traverseState,
Flags: flags.Merge(utils.NetworkFlags, utils.DatabasePathFlags),
Description: `
geth snapshot traverse-state <state-root>
will traverse the whole state from the given state root and will abort if any
referenced trie node or contract code is missing. This command can be used for
state integrity verification. The default checking target is the HEAD state.
It's also usable without snapshot enabled.
`,
},
{
Name: "traverse-rawstate",
Usage: "Traverse the state with given root hash and perform detailed verification",
ArgsUsage: "<root>",
Action: traverseRawState,
Flags: flags.Merge(utils.NetworkFlags, utils.DatabasePathFlags),
Description: `
geth snapshot traverse-rawstate <state-root>
will traverse the whole state from the given root and will abort if any referenced
trie node or contract code is missing. This command can be used for state integrity
verification. The default checking target is the HEAD state. It's basically identical
to traverse-state, but the check granularity is smaller.
It's also usable without snapshot enabled.
`,
},
{
Name: "dump",
Usage: "Dump a specific block from storage (same as 'geth dump' but using snapshots)",
ArgsUsage: "[? <blockHash> | <blockNum>]",
Action: dumpState,
Flags: flags.Merge([]cli.Flag{
utils.ExcludeCodeFlag,
utils.ExcludeStorageFlag,
utils.StartKeyFlag,
utils.DumpLimitFlag,
}, utils.NetworkFlags, utils.DatabasePathFlags),
Description: `
This command is semantically equivalent to 'geth dump', but uses the snapshots
as the backend data source, making this command a lot faster.
The argument is interpreted as block number or hash. If none is provided, the latest
block is used.
`,
},
},
}
)
// Deprecation: this command should be deprecated once the hash-based
// scheme is deprecated.
func pruneState(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, false)
defer chaindb.Close()
prunerconfig := pruner.Config{
Datadir: stack.ResolvePath(""),
BloomSize: ctx.Uint64(utils.BloomFilterSizeFlag.Name),
}
pruner, err := pruner.NewPruner(chaindb, prunerconfig)
if err != nil {
log.Error("Failed to open snapshot tree", "err", err)
return err
}
if ctx.NArg() > 1 {
log.Error("Too many arguments given")
return errors.New("too many arguments")
}
var targetRoot common.Hash
if ctx.NArg() == 1 {
targetRoot, err = parseRoot(ctx.Args().First())
if err != nil {
log.Error("Failed to resolve state root", "err", err)
return err
}
}
if err = pruner.Prune(targetRoot); err != nil {
log.Error("Failed to prune state", "err", err)
return err
}
return nil
}
func verifyState(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
headBlock := rawdb.ReadHeadBlock(chaindb)
if headBlock == nil {
log.Error("Failed to load head block")
return errors.New("no head block")
}
snapconfig := snapshot.Config{
CacheSize: 256,
Recovery: false,
NoBuild: true,
AsyncBuild: false,
}
snaptree, err := snapshot.New(snapconfig, chaindb, trie.NewDatabase(chaindb), headBlock.Root())
if err != nil {
log.Error("Failed to open snapshot tree", "err", err)
return err
}
if ctx.NArg() > 1 {
log.Error("Too many arguments given")
return errors.New("too many arguments")
}
var root = headBlock.Root()
if ctx.NArg() == 1 {
root, err = parseRoot(ctx.Args().First())
if err != nil {
log.Error("Failed to resolve state root", "err", err)
return err
}
}
if err := snaptree.Verify(root); err != nil {
log.Error("Failed to verify state", "root", root, "err", err)
return err
}
log.Info("Verified the state", "root", root)
return snapshot.CheckDanglingStorage(chaindb)
}
// checkDanglingStorage iterates the snap storage data, and verifies that all
// storage also has corresponding account data.
func checkDanglingStorage(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
return snapshot.CheckDanglingStorage(utils.MakeChainDatabase(ctx, stack, true))
}
// traverseState is a helper function used for pruning verification.
// Basically it just iterates the trie, ensure all nodes and associated
// contract codes are present.
func traverseState(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
headBlock := rawdb.ReadHeadBlock(chaindb)
if headBlock == nil {
log.Error("Failed to load head block")
return errors.New("no head block")
}
if ctx.NArg() > 1 {
log.Error("Too many arguments given")
return errors.New("too many arguments")
}
var (
root common.Hash
err error
)
if ctx.NArg() == 1 {
root, err = parseRoot(ctx.Args().First())
if err != nil {
log.Error("Failed to resolve state root", "err", err)
return err
}
log.Info("Start traversing the state", "root", root)
} else {
root = headBlock.Root()
log.Info("Start traversing the state", "root", root, "number", headBlock.NumberU64())
}
triedb := trie.NewDatabase(chaindb)
t, err := trie.NewStateTrie(trie.StateTrieID(root), triedb)
if err != nil {
log.Error("Failed to open trie", "root", root, "err", err)
return err
}
var (
accounts int
slots int
codes int
lastReport time.Time
start = time.Now()
)
acctIt, err := t.NodeIterator(nil)
if err != nil {
log.Error("Failed to open iterator", "root", root, "err", err)
return err
}
accIter := trie.NewIterator(acctIt)
for accIter.Next() {
accounts += 1
var acc types.StateAccount
if err := rlp.DecodeBytes(accIter.Value, &acc); err != nil {
log.Error("Invalid account encountered during traversal", "err", err)
return err
}
if acc.Root != types.EmptyRootHash {
id := trie.StorageTrieID(root, common.BytesToHash(accIter.Key), acc.Root)
storageTrie, err := trie.NewStateTrie(id, triedb)
if err != nil {
log.Error("Failed to open storage trie", "root", acc.Root, "err", err)
return err
}
storageIt, err := storageTrie.NodeIterator(nil)
if err != nil {
log.Error("Failed to open storage iterator", "root", acc.Root, "err", err)
return err
}
storageIter := trie.NewIterator(storageIt)
for storageIter.Next() {
slots += 1
}
if storageIter.Err != nil {
log.Error("Failed to traverse storage trie", "root", acc.Root, "err", storageIter.Err)
return storageIter.Err
}
}
if !bytes.Equal(acc.CodeHash, types.EmptyCodeHash.Bytes()) {
if !rawdb.HasCode(chaindb, common.BytesToHash(acc.CodeHash)) {
log.Error("Code is missing", "hash", common.BytesToHash(acc.CodeHash))
return errors.New("missing code")
}
codes += 1
}
if time.Since(lastReport) > time.Second*8 {
log.Info("Traversing state", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
lastReport = time.Now()
}
}
if accIter.Err != nil {
log.Error("Failed to traverse state trie", "root", root, "err", accIter.Err)
return accIter.Err
}
log.Info("State is complete", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
return nil
}
// traverseRawState is a helper function used for pruning verification.
// Basically it just iterates the trie, ensure all nodes and associated
// contract codes are present. It's basically identical to traverseState
// but it will check each trie node.
func traverseRawState(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
headBlock := rawdb.ReadHeadBlock(chaindb)
if headBlock == nil {
log.Error("Failed to load head block")
return errors.New("no head block")
}
if ctx.NArg() > 1 {
log.Error("Too many arguments given")
return errors.New("too many arguments")
}
var (
root common.Hash
err error
)
if ctx.NArg() == 1 {
root, err = parseRoot(ctx.Args().First())
if err != nil {
log.Error("Failed to resolve state root", "err", err)
return err
}
log.Info("Start traversing the state", "root", root)
} else {
root = headBlock.Root()
log.Info("Start traversing the state", "root", root, "number", headBlock.NumberU64())
}
triedb := trie.NewDatabase(chaindb)
t, err := trie.NewStateTrie(trie.StateTrieID(root), triedb)
if err != nil {
log.Error("Failed to open trie", "root", root, "err", err)
return err
}
var (
nodes int
accounts int
slots int
codes int
lastReport time.Time
start = time.Now()
hasher = crypto.NewKeccakState()
got = make([]byte, 32)
)
accIter, err := t.NodeIterator(nil)
if err != nil {
log.Error("Failed to open iterator", "root", root, "err", err)
return err
}
for accIter.Next(true) {
nodes += 1
node := accIter.Hash()
// Check the present for non-empty hash node(embedded node doesn't
// have their own hash).
if node != (common.Hash{}) {
blob := rawdb.ReadLegacyTrieNode(chaindb, node)
if len(blob) == 0 {
log.Error("Missing trie node(account)", "hash", node)
return errors.New("missing account")
}
hasher.Reset()
hasher.Write(blob)
hasher.Read(got)
if !bytes.Equal(got, node.Bytes()) {
log.Error("Invalid trie node(account)", "hash", node.Hex(), "value", blob)
return errors.New("invalid account node")
}
}
// If it's a leaf node, yes we are touching an account,
// dig into the storage trie further.
if accIter.Leaf() {
accounts += 1
var acc types.StateAccount
if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil {
log.Error("Invalid account encountered during traversal", "err", err)
return errors.New("invalid account")
}
if acc.Root != types.EmptyRootHash {
id := trie.StorageTrieID(root, common.BytesToHash(accIter.LeafKey()), acc.Root)
storageTrie, err := trie.NewStateTrie(id, triedb)
if err != nil {
log.Error("Failed to open storage trie", "root", acc.Root, "err", err)
return errors.New("missing storage trie")
}
storageIter, err := storageTrie.NodeIterator(nil)
if err != nil {
log.Error("Failed to open storage iterator", "root", acc.Root, "err", err)
return err
}
for storageIter.Next(true) {
nodes += 1
node := storageIter.Hash()
// Check the presence for non-empty hash node(embedded node doesn't
// have their own hash).
if node != (common.Hash{}) {
blob := rawdb.ReadLegacyTrieNode(chaindb, node)
if len(blob) == 0 {
log.Error("Missing trie node(storage)", "hash", node)
return errors.New("missing storage")
}
hasher.Reset()
hasher.Write(blob)
hasher.Read(got)
if !bytes.Equal(got, node.Bytes()) {
log.Error("Invalid trie node(storage)", "hash", node.Hex(), "value", blob)
return errors.New("invalid storage node")
}
}
// Bump the counter if it's leaf node.
if storageIter.Leaf() {
slots += 1
}
}
if storageIter.Error() != nil {
log.Error("Failed to traverse storage trie", "root", acc.Root, "err", storageIter.Error())
return storageIter.Error()
}
}
if !bytes.Equal(acc.CodeHash, types.EmptyCodeHash.Bytes()) {
if !rawdb.HasCode(chaindb, common.BytesToHash(acc.CodeHash)) {
log.Error("Code is missing", "account", common.BytesToHash(accIter.LeafKey()))
return errors.New("missing code")
}
codes += 1
}
if time.Since(lastReport) > time.Second*8 {
log.Info("Traversing state", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
lastReport = time.Now()
}
}
}
if accIter.Error() != nil {
log.Error("Failed to traverse state trie", "root", root, "err", accIter.Error())
return accIter.Error()
}
log.Info("State is complete", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
return nil
}
func parseRoot(input string) (common.Hash, error) {
var h common.Hash
if err := h.UnmarshalText([]byte(input)); err != nil {
return h, err
}
return h, nil
}
func dumpState(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()
conf, db, root, err := parseDumpConfig(ctx, stack)
if err != nil {
return err
}
snapConfig := snapshot.Config{
CacheSize: 256,
Recovery: false,
NoBuild: true,
AsyncBuild: false,
}
snaptree, err := snapshot.New(snapConfig, db, trie.NewDatabase(db), root)
if err != nil {
return err
}
accIt, err := snaptree.AccountIterator(root, common.BytesToHash(conf.Start))
if err != nil {
return err
}
defer accIt.Release()
log.Info("Snapshot dumping started", "root", root)
var (
start = time.Now()
logged = time.Now()
accounts uint64
)
enc := json.NewEncoder(os.Stdout)
enc.Encode(struct {
Root common.Hash `json:"root"`
}{root})
for accIt.Next() {
account, err := types.FullAccount(accIt.Account())
if err != nil {
return err
}
da := &state.DumpAccount{
Balance: account.Balance.String(),
Nonce: account.Nonce,
Root: account.Root.Bytes(),
CodeHash: account.CodeHash,
SecureKey: accIt.Hash().Bytes(),
}
if !conf.SkipCode && !bytes.Equal(account.CodeHash, types.EmptyCodeHash.Bytes()) {
da.Code = rawdb.ReadCode(db, common.BytesToHash(account.CodeHash))
}
if !conf.SkipStorage {
da.Storage = make(map[common.Hash]string)
stIt, err := snaptree.StorageIterator(root, accIt.Hash(), common.Hash{})
if err != nil {
return err
}
for stIt.Next() {
da.Storage[stIt.Hash()] = common.Bytes2Hex(stIt.Slot())
}
}
enc.Encode(da)
accounts++
if time.Since(logged) > 8*time.Second {
log.Info("Snapshot dumping in progress", "at", accIt.Hash(), "accounts", accounts,
"elapsed", common.PrettyDuration(time.Since(start)))
logged = time.Now()
}
if conf.Max > 0 && accounts >= conf.Max {
break
}
}
log.Info("Snapshot dumping complete", "accounts", accounts,
"elapsed", common.PrettyDuration(time.Since(start)))
return nil
}
// checkAccount iterates the snap data layers, and looks up the given account
// across all layers.
func checkAccount(ctx *cli.Context) error {
if ctx.NArg() != 1 {
return errors.New("need <address|hash> arg")
}
var (
hash common.Hash
addr common.Address
)
switch arg := ctx.Args().First(); len(arg) {
case 40, 42:
addr = common.HexToAddress(arg)
hash = crypto.Keccak256Hash(addr.Bytes())
case 64, 66:
hash = common.HexToHash(arg)
default:
return errors.New("malformed address or hash")
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, true)
defer chaindb.Close()
start := time.Now()
log.Info("Checking difflayer journal", "address", addr, "hash", hash)
if err := snapshot.CheckJournalAccount(chaindb, hash); err != nil {
return err
}
log.Info("Checked the snapshot journalled storage", "time", common.PrettyDuration(time.Since(start)))
return nil
}