eth: make traceChain avoid OOM on long-running tracing (#23736)

This PR changes long-running chain tracing, so that it at some points releases the memory trie db, and switch over to a fresh disk-backed trie.
This commit is contained in:
Martin Holst Swende 2021-11-04 18:54:00 +01:00 committed by GitHub
parent 53b94f135a
commit 3bbeb94c1c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 52 additions and 23 deletions

@ -352,8 +352,8 @@ func (b *EthAPIBackend) StartMining(threads int) error {
return b.eth.StartMining(threads) return b.eth.StartMining(threads)
} }
func (b *EthAPIBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool) (*state.StateDB, error) { func (b *EthAPIBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive, preferDisk bool) (*state.StateDB, error) {
return b.eth.stateAtBlock(block, reexec, base, checkLive) return b.eth.stateAtBlock(block, reexec, base, checkLive, preferDisk)
} }
func (b *EthAPIBackend) StateAtTransaction(ctx context.Context, block *types.Block, txIndex int, reexec uint64) (core.Message, vm.BlockContext, *state.StateDB, error) { func (b *EthAPIBackend) StateAtTransaction(ctx context.Context, block *types.Block, txIndex int, reexec uint64) (core.Message, vm.BlockContext, *state.StateDB, error) {

@ -35,7 +35,17 @@ import (
// are attempted to be reexecuted to generate the desired state. The optional // are attempted to be reexecuted to generate the desired state. The optional
// base layer statedb can be passed then it's regarded as the statedb of the // base layer statedb can be passed then it's regarded as the statedb of the
// parent block. // parent block.
func (eth *Ethereum) stateAtBlock(block *types.Block, reexec uint64, base *state.StateDB, checkLive bool) (statedb *state.StateDB, err error) { // Parameters:
// - block: The block for which we want the state (== state at the stateRoot of the parent)
// - reexec: The maximum number of blocks to reprocess trying to obtain the desired state
// - base: If the caller is tracing multiple blocks, the caller can provide the parent state
// continuously from the callsite.
// - checklive: if true, then the live 'blockchain' state database is used. If the caller want to
// perform Commit or other 'save-to-disk' changes, this should be set to false to avoid
// storing trash persistently
// - preferDisk: this arg can be used by the caller to signal that even though the 'base' is provided,
// it would be preferrable to start from a fresh state, if we have it on disk.
func (eth *Ethereum) stateAtBlock(block *types.Block, reexec uint64, base *state.StateDB, checkLive bool, preferDisk bool) (statedb *state.StateDB, err error) {
var ( var (
current *types.Block current *types.Block
database state.Database database state.Database
@ -50,6 +60,15 @@ func (eth *Ethereum) stateAtBlock(block *types.Block, reexec uint64, base *state
} }
} }
if base != nil { if base != nil {
if preferDisk {
// Create an ephemeral trie.Database for isolating the live one. Otherwise
// the internal junks created by tracing will be persisted into the disk.
database = state.NewDatabaseWithConfig(eth.chainDb, &trie.Config{Cache: 16})
if statedb, err = state.New(block.Root(), database, nil); err == nil {
log.Info("Found disk backend for state trie", "root", block.Root(), "number", block.Number())
return statedb, nil
}
}
// The optional base statedb is given, mark the start point as parent block // The optional base statedb is given, mark the start point as parent block
statedb, database, report = base, base.Database(), false statedb, database, report = base, base.Database(), false
current = eth.blockchain.GetBlock(block.ParentHash(), block.NumberU64()-1) current = eth.blockchain.GetBlock(block.ParentHash(), block.NumberU64()-1)
@ -152,7 +171,7 @@ func (eth *Ethereum) stateAtTransaction(block *types.Block, txIndex int, reexec
} }
// Lookup the statedb of parent block from the live database, // Lookup the statedb of parent block from the live database,
// otherwise regenerate it on the flight. // otherwise regenerate it on the flight.
statedb, err := eth.stateAtBlock(parent, reexec, nil, true) statedb, err := eth.stateAtBlock(parent, reexec, nil, true, false)
if err != nil { if err != nil {
return nil, vm.BlockContext{}, nil, err return nil, vm.BlockContext{}, nil, err
} }

@ -53,6 +53,13 @@ const (
// and reexecute to produce missing historical state necessary to run a specific // and reexecute to produce missing historical state necessary to run a specific
// trace. // trace.
defaultTraceReexec = uint64(128) defaultTraceReexec = uint64(128)
// defaultTracechainMemLimit is the size of the triedb, at which traceChain
// switches over and tries to use a disk-backed database instead of building
// on top of memory.
// For non-archive nodes, this limit _will_ be overblown, as disk-backed tries
// will only be found every ~15K blocks or so.
defaultTracechainMemLimit = common.StorageSize(500 * 1024 * 1024)
) )
// Backend interface provides the common API services (that are provided by // Backend interface provides the common API services (that are provided by
@ -67,7 +74,10 @@ type Backend interface {
ChainConfig() *params.ChainConfig ChainConfig() *params.ChainConfig
Engine() consensus.Engine Engine() consensus.Engine
ChainDb() ethdb.Database ChainDb() ethdb.Database
StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool) (*state.StateDB, error) // StateAtBlock returns the state corresponding to the stateroot of the block.
// N.B: For executing transactions on block N, the required stateRoot is block N-1,
// so this method should be called with the parent.
StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive, preferDisk bool) (*state.StateDB, error)
StateAtTransaction(ctx context.Context, block *types.Block, txIndex int, reexec uint64) (core.Message, vm.BlockContext, *state.StateDB, error) StateAtTransaction(ctx context.Context, block *types.Block, txIndex int, reexec uint64) (core.Message, vm.BlockContext, *state.StateDB, error)
} }
@ -320,6 +330,7 @@ func (api *API) traceChain(ctx context.Context, start, end *types.Block, config
} }
close(results) close(results)
}() }()
var preferDisk bool
// Feed all the blocks both into the tracer, as well as fast process concurrently // Feed all the blocks both into the tracer, as well as fast process concurrently
for number = start.NumberU64(); number < end.NumberU64(); number++ { for number = start.NumberU64(); number < end.NumberU64(); number++ {
// Stop tracing if interruption was requested // Stop tracing if interruption was requested
@ -349,18 +360,24 @@ func (api *API) traceChain(ctx context.Context, start, end *types.Block, config
} }
// Prepare the statedb for tracing. Don't use the live database for // Prepare the statedb for tracing. Don't use the live database for
// tracing to avoid persisting state junks into the database. // tracing to avoid persisting state junks into the database.
statedb, err = api.backend.StateAtBlock(localctx, block, reexec, statedb, false) statedb, err = api.backend.StateAtBlock(localctx, block, reexec, statedb, false, preferDisk)
if err != nil { if err != nil {
failed = err failed = err
break break
} }
if statedb.Database().TrieDB() != nil { if trieDb := statedb.Database().TrieDB(); trieDb != nil {
// Hold the reference for tracer, will be released at the final stage // Hold the reference for tracer, will be released at the final stage
statedb.Database().TrieDB().Reference(block.Root(), common.Hash{}) trieDb.Reference(block.Root(), common.Hash{})
// Release the parent state because it's already held by the tracer // Release the parent state because it's already held by the tracer
if parent != (common.Hash{}) { if parent != (common.Hash{}) {
statedb.Database().TrieDB().Dereference(parent) trieDb.Dereference(parent)
}
// Prefer disk if the trie db memory grows too much
s1, s2 := trieDb.Size()
if !preferDisk && (s1+s2) > defaultTracechainMemLimit {
log.Info("Switching to prefer-disk mode for tracing", "size", s1+s2)
preferDisk = true
} }
} }
parent = block.Root() parent = block.Root()
@ -496,7 +513,7 @@ func (api *API) IntermediateRoots(ctx context.Context, hash common.Hash, config
if config != nil && config.Reexec != nil { if config != nil && config.Reexec != nil {
reexec = *config.Reexec reexec = *config.Reexec
} }
statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true) statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -557,7 +574,7 @@ func (api *API) traceBlock(ctx context.Context, block *types.Block, config *Trac
if config != nil && config.Reexec != nil { if config != nil && config.Reexec != nil {
reexec = *config.Reexec reexec = *config.Reexec
} }
statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true) statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -646,7 +663,7 @@ func (api *API) standardTraceBlockToFile(ctx context.Context, block *types.Block
if config != nil && config.Reexec != nil { if config != nil && config.Reexec != nil {
reexec = *config.Reexec reexec = *config.Reexec
} }
statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true) statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -810,7 +827,7 @@ func (api *API) TraceCall(ctx context.Context, args ethapi.TransactionArgs, bloc
if config != nil && config.Reexec != nil { if config != nil && config.Reexec != nil {
reexec = *config.Reexec reexec = *config.Reexec
} }
statedb, err := api.backend.StateAtBlock(ctx, block, reexec, nil, true) statedb, err := api.backend.StateAtBlock(ctx, block, reexec, nil, true, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }

@ -138,7 +138,7 @@ func (b *testBackend) ChainDb() ethdb.Database {
return b.chaindb return b.chaindb
} }
func (b *testBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool) (*state.StateDB, error) { func (b *testBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool, preferDisk bool) (*state.StateDB, error) {
statedb, err := b.chain.StateAt(block.Root()) statedb, err := b.chain.StateAt(block.Root())
if err != nil { if err != nil {
return nil, errStateNotFound return nil, errStateNotFound

@ -553,17 +553,10 @@ func New(code string, ctx *Context) (*Tracer, error) {
tracer.vm.Pop() tracer.vm.Pop()
hasExit := tracer.vm.GetPropString(tracer.tracerObject, "exit") hasExit := tracer.vm.GetPropString(tracer.tracerObject, "exit")
tracer.vm.Pop() tracer.vm.Pop()
if hasEnter != hasExit { if hasEnter != hasExit {
return nil, fmt.Errorf("trace object must expose either both or none of enter() and exit()") return nil, fmt.Errorf("trace object must expose either both or none of enter() and exit()")
} }
if !hasStep { tracer.traceCallFrames = hasEnter && hasExit
// If there's no step function, the enter and exit must be present
if !hasEnter {
return nil, fmt.Errorf("trace object must expose either step() or both enter() and exit()")
}
}
tracer.traceCallFrames = hasEnter
tracer.traceSteps = hasStep tracer.traceSteps = hasStep
// Tracer is valid, inject the big int library to access large numbers // Tracer is valid, inject the big int library to access large numbers

@ -324,7 +324,7 @@ func (b *LesApiBackend) CurrentHeader() *types.Header {
return b.eth.blockchain.CurrentHeader() return b.eth.blockchain.CurrentHeader()
} }
func (b *LesApiBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool) (*state.StateDB, error) { func (b *LesApiBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool, preferDisk bool) (*state.StateDB, error) {
return b.eth.stateAtBlock(ctx, block, reexec) return b.eth.stateAtBlock(ctx, block, reexec)
} }