From 3bbeb94c1c0765db754db76c7536c1bf7c83b791 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Thu, 4 Nov 2021 18:54:00 +0100 Subject: [PATCH] eth: make traceChain avoid OOM on long-running tracing (#23736) This PR changes long-running chain tracing, so that it at some points releases the memory trie db, and switch over to a fresh disk-backed trie. --- eth/api_backend.go | 4 ++-- eth/state_accessor.go | 23 +++++++++++++++++++++-- eth/tracers/api.go | 35 ++++++++++++++++++++++++++--------- eth/tracers/api_test.go | 2 +- eth/tracers/tracer.go | 9 +-------- les/api_backend.go | 2 +- 6 files changed, 52 insertions(+), 23 deletions(-) diff --git a/eth/api_backend.go b/eth/api_backend.go index 01e68f678..a0704876a 100644 --- a/eth/api_backend.go +++ b/eth/api_backend.go @@ -352,8 +352,8 @@ func (b *EthAPIBackend) StartMining(threads int) error { return b.eth.StartMining(threads) } -func (b *EthAPIBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool) (*state.StateDB, error) { - return b.eth.stateAtBlock(block, reexec, base, checkLive) +func (b *EthAPIBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive, preferDisk bool) (*state.StateDB, error) { + return b.eth.stateAtBlock(block, reexec, base, checkLive, preferDisk) } func (b *EthAPIBackend) StateAtTransaction(ctx context.Context, block *types.Block, txIndex int, reexec uint64) (core.Message, vm.BlockContext, *state.StateDB, error) { diff --git a/eth/state_accessor.go b/eth/state_accessor.go index ca2002b60..c855f0100 100644 --- a/eth/state_accessor.go +++ b/eth/state_accessor.go @@ -35,7 +35,17 @@ import ( // are attempted to be reexecuted to generate the desired state. The optional // base layer statedb can be passed then it's regarded as the statedb of the // parent block. -func (eth *Ethereum) stateAtBlock(block *types.Block, reexec uint64, base *state.StateDB, checkLive bool) (statedb *state.StateDB, err error) { +// Parameters: +// - block: The block for which we want the state (== state at the stateRoot of the parent) +// - reexec: The maximum number of blocks to reprocess trying to obtain the desired state +// - base: If the caller is tracing multiple blocks, the caller can provide the parent state +// continuously from the callsite. +// - checklive: if true, then the live 'blockchain' state database is used. If the caller want to +// perform Commit or other 'save-to-disk' changes, this should be set to false to avoid +// storing trash persistently +// - preferDisk: this arg can be used by the caller to signal that even though the 'base' is provided, +// it would be preferrable to start from a fresh state, if we have it on disk. +func (eth *Ethereum) stateAtBlock(block *types.Block, reexec uint64, base *state.StateDB, checkLive bool, preferDisk bool) (statedb *state.StateDB, err error) { var ( current *types.Block database state.Database @@ -50,6 +60,15 @@ func (eth *Ethereum) stateAtBlock(block *types.Block, reexec uint64, base *state } } if base != nil { + if preferDisk { + // Create an ephemeral trie.Database for isolating the live one. Otherwise + // the internal junks created by tracing will be persisted into the disk. + database = state.NewDatabaseWithConfig(eth.chainDb, &trie.Config{Cache: 16}) + if statedb, err = state.New(block.Root(), database, nil); err == nil { + log.Info("Found disk backend for state trie", "root", block.Root(), "number", block.Number()) + return statedb, nil + } + } // The optional base statedb is given, mark the start point as parent block statedb, database, report = base, base.Database(), false current = eth.blockchain.GetBlock(block.ParentHash(), block.NumberU64()-1) @@ -152,7 +171,7 @@ func (eth *Ethereum) stateAtTransaction(block *types.Block, txIndex int, reexec } // Lookup the statedb of parent block from the live database, // otherwise regenerate it on the flight. - statedb, err := eth.stateAtBlock(parent, reexec, nil, true) + statedb, err := eth.stateAtBlock(parent, reexec, nil, true, false) if err != nil { return nil, vm.BlockContext{}, nil, err } diff --git a/eth/tracers/api.go b/eth/tracers/api.go index 5019fb6f7..9bd7d9e6d 100644 --- a/eth/tracers/api.go +++ b/eth/tracers/api.go @@ -53,6 +53,13 @@ const ( // and reexecute to produce missing historical state necessary to run a specific // trace. defaultTraceReexec = uint64(128) + + // defaultTracechainMemLimit is the size of the triedb, at which traceChain + // switches over and tries to use a disk-backed database instead of building + // on top of memory. + // For non-archive nodes, this limit _will_ be overblown, as disk-backed tries + // will only be found every ~15K blocks or so. + defaultTracechainMemLimit = common.StorageSize(500 * 1024 * 1024) ) // Backend interface provides the common API services (that are provided by @@ -67,7 +74,10 @@ type Backend interface { ChainConfig() *params.ChainConfig Engine() consensus.Engine ChainDb() ethdb.Database - StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool) (*state.StateDB, error) + // StateAtBlock returns the state corresponding to the stateroot of the block. + // N.B: For executing transactions on block N, the required stateRoot is block N-1, + // so this method should be called with the parent. + StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive, preferDisk bool) (*state.StateDB, error) StateAtTransaction(ctx context.Context, block *types.Block, txIndex int, reexec uint64) (core.Message, vm.BlockContext, *state.StateDB, error) } @@ -320,6 +330,7 @@ func (api *API) traceChain(ctx context.Context, start, end *types.Block, config } close(results) }() + var preferDisk bool // Feed all the blocks both into the tracer, as well as fast process concurrently for number = start.NumberU64(); number < end.NumberU64(); number++ { // Stop tracing if interruption was requested @@ -349,18 +360,24 @@ func (api *API) traceChain(ctx context.Context, start, end *types.Block, config } // Prepare the statedb for tracing. Don't use the live database for // tracing to avoid persisting state junks into the database. - statedb, err = api.backend.StateAtBlock(localctx, block, reexec, statedb, false) + statedb, err = api.backend.StateAtBlock(localctx, block, reexec, statedb, false, preferDisk) if err != nil { failed = err break } - if statedb.Database().TrieDB() != nil { + if trieDb := statedb.Database().TrieDB(); trieDb != nil { // Hold the reference for tracer, will be released at the final stage - statedb.Database().TrieDB().Reference(block.Root(), common.Hash{}) + trieDb.Reference(block.Root(), common.Hash{}) // Release the parent state because it's already held by the tracer if parent != (common.Hash{}) { - statedb.Database().TrieDB().Dereference(parent) + trieDb.Dereference(parent) + } + // Prefer disk if the trie db memory grows too much + s1, s2 := trieDb.Size() + if !preferDisk && (s1+s2) > defaultTracechainMemLimit { + log.Info("Switching to prefer-disk mode for tracing", "size", s1+s2) + preferDisk = true } } parent = block.Root() @@ -496,7 +513,7 @@ func (api *API) IntermediateRoots(ctx context.Context, hash common.Hash, config if config != nil && config.Reexec != nil { reexec = *config.Reexec } - statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true) + statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true, false) if err != nil { return nil, err } @@ -557,7 +574,7 @@ func (api *API) traceBlock(ctx context.Context, block *types.Block, config *Trac if config != nil && config.Reexec != nil { reexec = *config.Reexec } - statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true) + statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true, false) if err != nil { return nil, err } @@ -646,7 +663,7 @@ func (api *API) standardTraceBlockToFile(ctx context.Context, block *types.Block if config != nil && config.Reexec != nil { reexec = *config.Reexec } - statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true) + statedb, err := api.backend.StateAtBlock(ctx, parent, reexec, nil, true, false) if err != nil { return nil, err } @@ -810,7 +827,7 @@ func (api *API) TraceCall(ctx context.Context, args ethapi.TransactionArgs, bloc if config != nil && config.Reexec != nil { reexec = *config.Reexec } - statedb, err := api.backend.StateAtBlock(ctx, block, reexec, nil, true) + statedb, err := api.backend.StateAtBlock(ctx, block, reexec, nil, true, false) if err != nil { return nil, err } diff --git a/eth/tracers/api_test.go b/eth/tracers/api_test.go index 9afd59d59..e69704bd5 100644 --- a/eth/tracers/api_test.go +++ b/eth/tracers/api_test.go @@ -138,7 +138,7 @@ func (b *testBackend) ChainDb() ethdb.Database { return b.chaindb } -func (b *testBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool) (*state.StateDB, error) { +func (b *testBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool, preferDisk bool) (*state.StateDB, error) { statedb, err := b.chain.StateAt(block.Root()) if err != nil { return nil, errStateNotFound diff --git a/eth/tracers/tracer.go b/eth/tracers/tracer.go index ed5600453..2d681d964 100644 --- a/eth/tracers/tracer.go +++ b/eth/tracers/tracer.go @@ -553,17 +553,10 @@ func New(code string, ctx *Context) (*Tracer, error) { tracer.vm.Pop() hasExit := tracer.vm.GetPropString(tracer.tracerObject, "exit") tracer.vm.Pop() - if hasEnter != hasExit { return nil, fmt.Errorf("trace object must expose either both or none of enter() and exit()") } - if !hasStep { - // If there's no step function, the enter and exit must be present - if !hasEnter { - return nil, fmt.Errorf("trace object must expose either step() or both enter() and exit()") - } - } - tracer.traceCallFrames = hasEnter + tracer.traceCallFrames = hasEnter && hasExit tracer.traceSteps = hasStep // Tracer is valid, inject the big int library to access large numbers diff --git a/les/api_backend.go b/les/api_backend.go index d5144dfbf..11a9ca128 100644 --- a/les/api_backend.go +++ b/les/api_backend.go @@ -324,7 +324,7 @@ func (b *LesApiBackend) CurrentHeader() *types.Header { return b.eth.blockchain.CurrentHeader() } -func (b *LesApiBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool) (*state.StateDB, error) { +func (b *LesApiBackend) StateAtBlock(ctx context.Context, block *types.Block, reexec uint64, base *state.StateDB, checkLive bool, preferDisk bool) (*state.StateDB, error) { return b.eth.stateAtBlock(ctx, block, reexec) }