2018-02-14 13:49:11 +01:00
// Copyright 2018 The go-ethereum Authors
2018-02-05 18:40:32 +02:00
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
2023-05-09 15:11:04 +08:00
package hashdb
2018-02-05 18:40:32 +02:00
import (
2019-04-05 13:09:28 +03:00
"errors"
cmd, core/state, eth, tests, trie: improve state reader (#27428)
The state availability is checked during the creation of a state reader.
- In hash-based database, if the specified root node does not exist on disk disk, then
the state reader won't be created and an error will be returned.
- In path-based database, if the specified state layer is not available, then the
state reader won't be created and an error will be returned.
This change also contains a stricter semantics regarding the `Commit` operation: once it has been performed, the trie is no longer usable, and certain operations will return an error.
2023-06-21 03:31:45 +08:00
"fmt"
2018-11-22 14:09:04 +02:00
"reflect"
2018-02-05 18:40:32 +02:00
"sync"
"time"
2019-11-25 16:58:15 +08:00
"github.com/VictoriaMetrics/fastcache"
2018-02-05 18:40:32 +02:00
"github.com/ethereum/go-ethereum/common"
2020-08-21 20:10:40 +08:00
"github.com/ethereum/go-ethereum/core/rawdb"
2022-08-04 16:03:20 +08:00
"github.com/ethereum/go-ethereum/core/types"
2018-02-05 18:40:32 +02:00
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
2018-06-04 10:47:43 +03:00
"github.com/ethereum/go-ethereum/metrics"
2018-06-21 12:28:05 +03:00
"github.com/ethereum/go-ethereum/rlp"
2023-04-26 14:01:54 +08:00
"github.com/ethereum/go-ethereum/trie/trienode"
2023-08-01 20:17:32 +08:00
"github.com/ethereum/go-ethereum/trie/triestate"
2018-06-04 10:47:43 +03:00
)
var (
2023-08-01 20:17:32 +08:00
memcacheCleanHitMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/clean/hit" , nil )
memcacheCleanMissMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/clean/miss" , nil )
memcacheCleanReadMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/clean/read" , nil )
memcacheCleanWriteMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/clean/write" , nil )
memcacheDirtyHitMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/dirty/hit" , nil )
memcacheDirtyMissMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/dirty/miss" , nil )
memcacheDirtyReadMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/dirty/read" , nil )
memcacheDirtyWriteMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/dirty/write" , nil )
memcacheFlushTimeTimer = metrics . NewRegisteredResettingTimer ( "hashdb/memcache/flush/time" , nil )
memcacheFlushNodesMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/flush/nodes" , nil )
memcacheFlushBytesMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/flush/bytes" , nil )
memcacheGCTimeTimer = metrics . NewRegisteredResettingTimer ( "hashdb/memcache/gc/time" , nil )
memcacheGCNodesMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/gc/nodes" , nil )
memcacheGCBytesMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/gc/bytes" , nil )
memcacheCommitTimeTimer = metrics . NewRegisteredResettingTimer ( "hashdb/memcache/commit/time" , nil )
memcacheCommitNodesMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/commit/nodes" , nil )
memcacheCommitBytesMeter = metrics . NewRegisteredMeter ( "hashdb/memcache/commit/bytes" , nil )
2018-02-05 18:40:32 +02:00
)
2023-05-09 15:11:04 +08:00
// ChildResolver defines the required method to decode the provided
2023-04-24 15:38:52 +08:00
// trie node and iterate the children on top.
2023-05-09 15:11:04 +08:00
type ChildResolver interface {
ForEach ( node [ ] byte , onChild func ( common . Hash ) )
2023-04-24 15:38:52 +08:00
}
all: activate pbss as experimental feature (#26274)
* all: activate pbss
* core/rawdb: fix compilation error
* cma, core, eth, les, trie: address comments
* cmd, core, eth, trie: polish code
* core, cmd, eth: address comments
* cmd, core, eth, les, light, tests: address comment
* cmd/utils: shorten log message
* trie/triedb/pathdb: limit node buffer size to 1gb
* cmd/utils: fix opening non-existing db
* cmd/utils: rename flag name
* cmd, core: group chain history flags and fix tests
* core, eth, trie: fix memory leak in snapshot generation
* cmd, eth, internal: deprecate flags
* all: enable state tests for pathdb, fixes
* cmd, core: polish code
* trie/triedb/pathdb: limit the node buffer size to 256mb
---------
Co-authored-by: Martin Holst Swende <martin@swende.se>
Co-authored-by: Péter Szilágyi <peterke@gmail.com>
2023-08-11 03:21:36 +08:00
// Config contains the settings for database.
type Config struct {
CleanCacheSize int // Maximum memory allowance (in bytes) for caching clean nodes
}
// Defaults is the default setting for database if it's not specified.
// Notably, clean cache is disabled explicitly,
var Defaults = & Config {
// Explicitly set clean cache size to 0 to avoid creating fastcache,
// otherwise database must be closed when it's no longer needed to
// prevent memory leak.
CleanCacheSize : 0 ,
}
2018-02-05 18:40:32 +02:00
// Database is an intermediate write layer between the trie data structures and
// the disk database. The aim is to accumulate trie writes in-memory and only
// periodically flush a couple tries to disk, garbage collecting the remainder.
type Database struct {
2023-04-24 15:38:52 +08:00
diskdb ethdb . Database // Persistent storage for matured trie nodes
2023-05-09 15:11:04 +08:00
resolver ChildResolver // The handler to resolve children of nodes
2018-02-05 18:40:32 +02:00
2019-11-25 16:58:15 +08:00
cleans * fastcache . Cache // GC friendly memory cache of clean node RLPs
2020-08-21 20:10:40 +08:00
dirties map [ common . Hash ] * cachedNode // Data and references relationships of dirty trie nodes
2018-11-12 18:47:34 +02:00
oldest common . Hash // Oldest tracked node, flush-list head
newest common . Hash // Newest tracked node, flush-list tail
2018-06-04 10:47:43 +03:00
2018-02-05 18:40:32 +02:00
gctime time . Duration // Time spent on garbage collection since last commit
gcnodes uint64 // Nodes garbage collected since last commit
gcsize common . StorageSize // Data storage garbage collected since last commit
2018-06-04 10:47:43 +03:00
flushtime time . Duration // Time spent on data flushing since last commit
flushnodes uint64 // Nodes flushed since last commit
flushsize common . StorageSize // Data storage flushed since last commit
2022-07-28 02:37:04 +08:00
dirtiesSize common . StorageSize // Storage size of the dirty node cache (exc. metadata)
childrenSize common . StorageSize // Storage size of the external children tracking
2018-02-05 18:40:32 +02:00
lock sync . RWMutex
}
2020-08-21 20:10:40 +08:00
// cachedNode is all the information we know about a single cached trie node
// in the memory database write layer.
2018-02-05 18:40:32 +02:00
type cachedNode struct {
2023-11-30 09:50:48 +00:00
node [ ] byte // Encoded node blob, immutable
2023-04-24 15:38:52 +08:00
parents uint32 // Number of live nodes referencing this one
external map [ common . Hash ] struct { } // The set of external children
flushPrev common . Hash // Previous node in the flush-list
flushNext common . Hash // Next node in the flush-list
2018-02-05 18:40:32 +02:00
}
2018-11-22 14:09:04 +02:00
// cachedNodeSize is the raw size of a cachedNode data structure without any
// node data included. It's an approximate size, but should be a lot better
// than not counting them.
var cachedNodeSize = int ( reflect . TypeOf ( cachedNode { } ) . Size ( ) )
2023-04-24 15:38:52 +08:00
// forChildren invokes the callback for all the tracked children of this node,
2020-08-21 20:10:40 +08:00
// both the implicit ones from inside the node as well as the explicit ones
// from outside the node.
2023-05-09 15:11:04 +08:00
func ( n * cachedNode ) forChildren ( resolver ChildResolver , onChild func ( hash common . Hash ) ) {
2023-04-24 15:38:52 +08:00
for child := range n . external {
2020-01-17 12:59:45 +01:00
onChild ( child )
2018-06-21 12:28:05 +03:00
}
2023-05-09 15:11:04 +08:00
resolver . ForEach ( n . node , onChild )
2018-06-21 12:28:05 +03:00
}
2023-05-09 15:11:04 +08:00
// New initializes the hash-based node database.
all: activate pbss as experimental feature (#26274)
* all: activate pbss
* core/rawdb: fix compilation error
* cma, core, eth, les, trie: address comments
* cmd, core, eth, trie: polish code
* core, cmd, eth: address comments
* cmd, core, eth, les, light, tests: address comment
* cmd/utils: shorten log message
* trie/triedb/pathdb: limit node buffer size to 1gb
* cmd/utils: fix opening non-existing db
* cmd/utils: rename flag name
* cmd, core: group chain history flags and fix tests
* core, eth, trie: fix memory leak in snapshot generation
* cmd, eth, internal: deprecate flags
* all: enable state tests for pathdb, fixes
* cmd, core: polish code
* trie/triedb/pathdb: limit the node buffer size to 256mb
---------
Co-authored-by: Martin Holst Swende <martin@swende.se>
Co-authored-by: Péter Szilágyi <peterke@gmail.com>
2023-08-11 03:21:36 +08:00
func New ( diskdb ethdb . Database , config * Config , resolver ChildResolver ) * Database {
if config == nil {
config = Defaults
}
2023-08-01 20:17:32 +08:00
var cleans * fastcache . Cache
all: activate pbss as experimental feature (#26274)
* all: activate pbss
* core/rawdb: fix compilation error
* cma, core, eth, les, trie: address comments
* cmd, core, eth, trie: polish code
* core, cmd, eth: address comments
* cmd, core, eth, les, light, tests: address comment
* cmd/utils: shorten log message
* trie/triedb/pathdb: limit node buffer size to 1gb
* cmd/utils: fix opening non-existing db
* cmd/utils: rename flag name
* cmd, core: group chain history flags and fix tests
* core, eth, trie: fix memory leak in snapshot generation
* cmd, eth, internal: deprecate flags
* all: enable state tests for pathdb, fixes
* cmd, core: polish code
* trie/triedb/pathdb: limit the node buffer size to 256mb
---------
Co-authored-by: Martin Holst Swende <martin@swende.se>
Co-authored-by: Péter Szilágyi <peterke@gmail.com>
2023-08-11 03:21:36 +08:00
if config . CleanCacheSize > 0 {
cleans = fastcache . New ( config . CleanCacheSize )
2023-08-01 20:17:32 +08:00
}
2023-04-24 15:38:52 +08:00
return & Database {
2023-05-09 15:11:04 +08:00
diskdb : diskdb ,
resolver : resolver ,
cleans : cleans ,
dirties : make ( map [ common . Hash ] * cachedNode ) ,
2020-11-18 17:51:33 +08:00
}
2018-02-05 18:40:32 +02:00
}
2023-11-30 09:50:48 +00:00
// insert inserts a trie node into the memory database. All nodes inserted by
// this function will be reference tracked. This function assumes the lock is
// already held.
2023-04-24 15:38:52 +08:00
func ( db * Database ) insert ( hash common . Hash , node [ ] byte ) {
2018-06-04 10:47:43 +03:00
// If the node's already cached, skip
2018-11-12 18:47:34 +02:00
if _ , ok := db . dirties [ hash ] ; ok {
2018-02-05 18:40:32 +02:00
return
}
2023-04-24 15:38:52 +08:00
memcacheDirtyWriteMeter . Mark ( int64 ( len ( node ) ) )
2019-12-02 12:14:44 +02:00
2018-06-21 12:28:05 +03:00
// Create the cached entry for this node
entry := & cachedNode {
2022-08-04 16:03:20 +08:00
node : node ,
2018-06-04 10:47:43 +03:00
flushPrev : db . newest ,
}
2023-04-24 15:38:52 +08:00
entry . forChildren ( db . resolver , func ( child common . Hash ) {
2018-11-12 18:47:34 +02:00
if c := db . dirties [ child ] ; c != nil {
2018-06-21 12:28:05 +03:00
c . parents ++
}
2020-01-17 12:59:45 +01:00
} )
2018-11-12 18:47:34 +02:00
db . dirties [ hash ] = entry
2018-06-21 12:28:05 +03:00
2018-06-04 10:47:43 +03:00
// Update the flush-list endpoints
if db . oldest == ( common . Hash { } ) {
db . oldest , db . newest = hash , hash
} else {
2018-11-12 18:47:34 +02:00
db . dirties [ db . newest ] . flushNext , db . newest = hash , hash
2018-02-05 18:40:32 +02:00
}
2023-04-24 15:38:52 +08:00
db . dirtiesSize += common . StorageSize ( common . HashLength + len ( node ) )
2018-06-21 12:28:05 +03:00
}
2023-11-30 09:50:48 +00:00
// node retrieves an encoded cached trie node from memory. If it cannot be found
2018-06-21 12:28:05 +03:00
// cached, the method queries the persistent database for the content.
2023-11-30 09:50:48 +00:00
func ( db * Database ) node ( hash common . Hash ) ( [ ] byte , error ) {
2020-05-25 16:21:28 +08:00
// It doesn't make sense to retrieve the metaroot
2019-04-05 13:09:28 +03:00
if hash == ( common . Hash { } ) {
return nil , errors . New ( "not found" )
}
2018-11-12 18:47:34 +02:00
// Retrieve the node from the clean cache if available
if db . cleans != nil {
2019-11-25 16:58:15 +08:00
if enc := db . cleans . Get ( nil , hash [ : ] ) ; enc != nil {
2018-11-12 18:47:34 +02:00
memcacheCleanHitMeter . Mark ( 1 )
memcacheCleanReadMeter . Mark ( int64 ( len ( enc ) ) )
return enc , nil
}
}
2023-11-30 09:50:48 +00:00
// Retrieve the node from the dirty cache if available.
2018-02-05 18:40:32 +02:00
db . lock . RLock ( )
2018-11-12 18:47:34 +02:00
dirty := db . dirties [ hash ]
2018-02-05 18:40:32 +02:00
db . lock . RUnlock ( )
2023-11-30 09:50:48 +00:00
// Return the cached node if it's found in the dirty set.
// The dirty.node field is immutable and safe to read it
// even without lock guard.
2018-11-12 18:47:34 +02:00
if dirty != nil {
2019-12-02 12:14:44 +02:00
memcacheDirtyHitMeter . Mark ( 1 )
2023-04-24 15:38:52 +08:00
memcacheDirtyReadMeter . Mark ( int64 ( len ( dirty . node ) ) )
return dirty . node , nil
2018-02-05 18:40:32 +02:00
}
2019-12-02 12:14:44 +02:00
memcacheDirtyMissMeter . Mark ( 1 )
2018-02-05 18:40:32 +02:00
// Content unavailable in memory, attempt to retrieve from disk
2023-02-06 10:28:40 -05:00
enc := rawdb . ReadLegacyTrieNode ( db . diskdb , hash )
2020-08-21 20:10:40 +08:00
if len ( enc ) != 0 {
2018-11-12 18:47:34 +02:00
if db . cleans != nil {
2019-11-25 16:58:15 +08:00
db . cleans . Set ( hash [ : ] , enc )
2018-11-12 18:47:34 +02:00
memcacheCleanMissMeter . Mark ( 1 )
memcacheCleanWriteMeter . Mark ( int64 ( len ( enc ) ) )
}
2020-08-21 20:10:40 +08:00
return enc , nil
2018-11-12 18:47:34 +02:00
}
2020-08-21 20:10:40 +08:00
return nil , errors . New ( "not found" )
2018-02-05 18:40:32 +02:00
}
// Reference adds a new reference from a parent node to a child node.
2020-08-21 20:10:40 +08:00
// This function is used to add reference between internal trie node
// and external node(e.g. storage trie root), all internal trie nodes
// are referenced together by database itself.
2018-02-05 18:40:32 +02:00
func ( db * Database ) Reference ( child common . Hash , parent common . Hash ) {
2019-03-26 15:48:31 +01:00
db . lock . Lock ( )
defer db . lock . Unlock ( )
2018-02-05 18:40:32 +02:00
db . reference ( child , parent )
}
// reference is the private locked version of Reference.
func ( db * Database ) reference ( child common . Hash , parent common . Hash ) {
// If the node does not exist, it's a node pulled from disk, skip
2018-11-12 18:47:34 +02:00
node , ok := db . dirties [ child ]
2018-02-05 18:40:32 +02:00
if ! ok {
return
}
2023-04-24 15:38:52 +08:00
// The reference is for state root, increase the reference counter.
if parent == ( common . Hash { } ) {
node . parents += 1
2018-02-05 18:40:32 +02:00
return
}
2023-04-24 15:38:52 +08:00
// The reference is for external storage trie, don't duplicate if
// the reference is already existent.
if db . dirties [ parent ] . external == nil {
db . dirties [ parent ] . external = make ( map [ common . Hash ] struct { } )
}
if _ , ok := db . dirties [ parent ] . external [ child ] ; ok {
return
2018-11-22 14:09:04 +02:00
}
2023-04-24 15:38:52 +08:00
node . parents ++
db . dirties [ parent ] . external [ child ] = struct { } { }
db . childrenSize += common . HashLength
2018-02-05 18:40:32 +02:00
}
2018-06-21 12:28:05 +03:00
// Dereference removes an existing reference from a root node.
func ( db * Database ) Dereference ( root common . Hash ) {
2018-08-08 17:16:38 +03:00
// Sanity check to ensure that the meta-root is not removed
if root == ( common . Hash { } ) {
log . Error ( "Attempted to dereference the trie cache meta root" )
return
}
2018-02-05 18:40:32 +02:00
db . lock . Lock ( )
defer db . lock . Unlock ( )
2018-11-12 18:47:34 +02:00
nodes , storage , start := len ( db . dirties ) , db . dirtiesSize , time . Now ( )
2023-04-24 15:38:52 +08:00
db . dereference ( root )
2018-02-05 18:40:32 +02:00
2018-11-12 18:47:34 +02:00
db . gcnodes += uint64 ( nodes - len ( db . dirties ) )
db . gcsize += storage - db . dirtiesSize
2018-02-05 18:40:32 +02:00
db . gctime += time . Since ( start )
2018-06-04 10:47:43 +03:00
memcacheGCTimeTimer . Update ( time . Since ( start ) )
2023-08-01 20:17:32 +08:00
memcacheGCBytesMeter . Mark ( int64 ( storage - db . dirtiesSize ) )
2018-11-12 18:47:34 +02:00
memcacheGCNodesMeter . Mark ( int64 ( nodes - len ( db . dirties ) ) )
2018-06-04 10:47:43 +03:00
2018-11-12 18:47:34 +02:00
log . Debug ( "Dereferenced trie from memory database" , "nodes" , nodes - len ( db . dirties ) , "size" , storage - db . dirtiesSize , "time" , time . Since ( start ) ,
"gcnodes" , db . gcnodes , "gcsize" , db . gcsize , "gctime" , db . gctime , "livenodes" , len ( db . dirties ) , "livesize" , db . dirtiesSize )
2018-02-05 18:40:32 +02:00
}
// dereference is the private locked version of Dereference.
2023-04-24 15:38:52 +08:00
func ( db * Database ) dereference ( hash common . Hash ) {
// If the node does not exist, it's a previously committed node.
node , ok := db . dirties [ hash ]
2018-02-05 18:40:32 +02:00
if ! ok {
return
}
2023-04-24 15:38:52 +08:00
// If there are no more references to the node, delete it and cascade
2018-07-02 12:19:41 +03:00
if node . parents > 0 {
// This is a special cornercase where a node loaded from disk (i.e. not in the
// memcache any more) gets reinjected as a new node (short node split into full,
// then reverted into short), causing a cached node to have no parents. That is
// no problem in itself, but don't make maxint parents out of it.
node . parents --
}
2018-02-05 18:40:32 +02:00
if node . parents == 0 {
2018-06-04 10:47:43 +03:00
// Remove the node from the flush-list
2023-04-24 15:38:52 +08:00
switch hash {
2018-07-30 16:31:17 +03:00
case db . oldest :
2018-06-04 10:47:43 +03:00
db . oldest = node . flushNext
2023-04-24 15:38:52 +08:00
if node . flushNext != ( common . Hash { } ) {
db . dirties [ node . flushNext ] . flushPrev = common . Hash { }
}
2018-07-30 16:31:17 +03:00
case db . newest :
db . newest = node . flushPrev
2023-04-24 15:38:52 +08:00
if node . flushPrev != ( common . Hash { } ) {
db . dirties [ node . flushPrev ] . flushNext = common . Hash { }
}
2018-07-30 16:31:17 +03:00
default :
2018-11-12 18:47:34 +02:00
db . dirties [ node . flushPrev ] . flushNext = node . flushNext
db . dirties [ node . flushNext ] . flushPrev = node . flushPrev
2018-06-04 10:47:43 +03:00
}
// Dereference all children and delete the node
2023-04-24 15:38:52 +08:00
node . forChildren ( db . resolver , func ( child common . Hash ) {
db . dereference ( child )
2020-01-17 12:59:45 +01:00
} )
2023-04-24 15:38:52 +08:00
delete ( db . dirties , hash )
db . dirtiesSize -= common . StorageSize ( common . HashLength + len ( node . node ) )
if node . external != nil {
db . childrenSize -= common . StorageSize ( len ( node . external ) * common . HashLength )
2018-11-22 14:09:04 +02:00
}
2018-02-05 18:40:32 +02:00
}
}
2018-06-04 10:47:43 +03:00
// Cap iteratively flushes old but still referenced trie nodes until the total
// memory usage goes below the given threshold.
func ( db * Database ) Cap ( limit common . StorageSize ) error {
2023-11-30 09:50:48 +00:00
db . lock . Lock ( )
defer db . lock . Unlock ( )
2018-06-04 10:47:43 +03:00
// Create a database batch to flush persistent data out. It is important that
// outside code doesn't see an inconsistent state (referenced data removed from
// memory cache during commit but not yet in persistent storage). This is ensured
// by only uncaching existing data when the database write finalizes.
batch := db . diskdb . NewBatch ( )
2023-11-30 09:50:48 +00:00
nodes , storage , start := len ( db . dirties ) , db . dirtiesSize , time . Now ( )
2018-06-04 10:47:43 +03:00
2018-11-12 18:47:34 +02:00
// db.dirtiesSize only contains the useful data in the cache, but when reporting
2018-06-04 10:47:43 +03:00
// the total memory consumption, the maintenance metadata is also needed to be
2018-11-22 14:09:04 +02:00
// counted.
2023-04-24 15:38:52 +08:00
size := db . dirtiesSize + common . StorageSize ( len ( db . dirties ) * cachedNodeSize )
size += db . childrenSize
2018-06-04 10:47:43 +03:00
// Keep committing nodes from the flush-list until we're below allowance
oldest := db . oldest
for size > limit && oldest != ( common . Hash { } ) {
// Fetch the oldest referenced node and push into the batch
2018-11-12 18:47:34 +02:00
node := db . dirties [ oldest ]
2023-04-24 15:38:52 +08:00
rawdb . WriteLegacyTrieNode ( batch , oldest , node . node )
2020-08-21 20:10:40 +08:00
2018-06-04 10:47:43 +03:00
// If we exceeded the ideal batch size, commit and reset
if batch . ValueSize ( ) >= ethdb . IdealBatchSize {
if err := batch . Write ( ) ; err != nil {
log . Error ( "Failed to write flush list to disk" , "err" , err )
return err
}
batch . Reset ( )
}
// Iterate to the next flush item, or abort if the size cap was achieved. Size
2018-11-22 14:09:04 +02:00
// is the total size, including the useful cached data (hash -> blob), the
// cache item metadata, as well as external children mappings.
2023-04-24 15:38:52 +08:00
size -= common . StorageSize ( common . HashLength + len ( node . node ) + cachedNodeSize )
if node . external != nil {
size -= common . StorageSize ( len ( node . external ) * common . HashLength )
2018-11-22 14:09:04 +02:00
}
2018-06-04 10:47:43 +03:00
oldest = node . flushNext
}
// Flush out any remainder data from the last batch
if err := batch . Write ( ) ; err != nil {
log . Error ( "Failed to write flush list to disk" , "err" , err )
return err
}
// Write successful, clear out the flushed data
for db . oldest != oldest {
2018-11-12 18:47:34 +02:00
node := db . dirties [ db . oldest ]
delete ( db . dirties , db . oldest )
2018-06-04 10:47:43 +03:00
db . oldest = node . flushNext
2023-04-24 15:38:52 +08:00
db . dirtiesSize -= common . StorageSize ( common . HashLength + len ( node . node ) )
if node . external != nil {
db . childrenSize -= common . StorageSize ( len ( node . external ) * common . HashLength )
2018-11-22 14:09:04 +02:00
}
2018-06-04 10:47:43 +03:00
}
if db . oldest != ( common . Hash { } ) {
2018-11-12 18:47:34 +02:00
db . dirties [ db . oldest ] . flushPrev = common . Hash { }
2018-06-04 10:47:43 +03:00
}
2018-11-12 18:47:34 +02:00
db . flushnodes += uint64 ( nodes - len ( db . dirties ) )
db . flushsize += storage - db . dirtiesSize
2018-06-04 10:47:43 +03:00
db . flushtime += time . Since ( start )
memcacheFlushTimeTimer . Update ( time . Since ( start ) )
2023-08-01 20:17:32 +08:00
memcacheFlushBytesMeter . Mark ( int64 ( storage - db . dirtiesSize ) )
2018-11-12 18:47:34 +02:00
memcacheFlushNodesMeter . Mark ( int64 ( nodes - len ( db . dirties ) ) )
2018-06-04 10:47:43 +03:00
2018-11-12 18:47:34 +02:00
log . Debug ( "Persisted nodes from memory database" , "nodes" , nodes - len ( db . dirties ) , "size" , storage - db . dirtiesSize , "time" , time . Since ( start ) ,
"flushnodes" , db . flushnodes , "flushsize" , db . flushsize , "flushtime" , db . flushtime , "livenodes" , len ( db . dirties ) , "livesize" , db . dirtiesSize )
2018-06-04 10:47:43 +03:00
return nil
}
2018-02-05 18:40:32 +02:00
// Commit iterates over all the children of a particular node, writes them out
2019-03-26 15:48:31 +01:00
// to disk, forcefully tearing down all references in both directions. As a side
// effect, all pre-images accumulated up to this point are also written.
2023-02-08 19:14:34 +08:00
func ( db * Database ) Commit ( node common . Hash , report bool ) error {
2023-11-30 09:50:48 +00:00
db . lock . Lock ( )
defer db . lock . Unlock ( )
2018-02-05 18:40:32 +02:00
// Create a database batch to flush persistent data out. It is important that
// outside code doesn't see an inconsistent state (referenced data removed from
// memory cache during commit but not yet in persistent storage). This is ensured
// by only uncaching existing data when the database write finalizes.
start := time . Now ( )
batch := db . diskdb . NewBatch ( )
// Move the trie itself into the batch, flushing if enough data is accumulated
2018-11-12 18:47:34 +02:00
nodes , storage := len ( db . dirties ) , db . dirtiesSize
2019-03-26 15:48:31 +01:00
uncacher := & cleaner { db }
2023-02-08 19:14:34 +08:00
if err := db . commit ( node , batch , uncacher ) ; err != nil {
2018-02-05 18:40:32 +02:00
log . Error ( "Failed to commit trie from trie database" , "err" , err )
return err
}
2019-03-26 15:48:31 +01:00
// Trie mostly committed to disk, flush any batch leftovers
2018-02-05 18:40:32 +02:00
if err := batch . Write ( ) ; err != nil {
log . Error ( "Failed to write trie to disk" , "err" , err )
return err
}
2019-03-26 15:48:31 +01:00
// Uncache any leftovers in the last batch
2022-09-16 02:05:53 +04:30
if err := batch . Replay ( uncacher ) ; err != nil {
return err
}
2019-03-26 15:48:31 +01:00
batch . Reset ( )
2021-01-07 15:36:21 +09:00
// Reset the storage counters and bumped metrics
2018-06-04 10:47:43 +03:00
memcacheCommitTimeTimer . Update ( time . Since ( start ) )
2023-08-01 20:17:32 +08:00
memcacheCommitBytesMeter . Mark ( int64 ( storage - db . dirtiesSize ) )
2018-11-12 18:47:34 +02:00
memcacheCommitNodesMeter . Mark ( int64 ( nodes - len ( db . dirties ) ) )
2018-06-04 10:47:43 +03:00
2018-02-05 18:40:32 +02:00
logger := log . Info
if ! report {
logger = log . Debug
}
2018-11-12 18:47:34 +02:00
logger ( "Persisted trie from memory database" , "nodes" , nodes - len ( db . dirties ) + int ( db . flushnodes ) , "size" , storage - db . dirtiesSize + db . flushsize , "time" , time . Since ( start ) + db . flushtime ,
"gcnodes" , db . gcnodes , "gcsize" , db . gcsize , "gctime" , db . gctime , "livenodes" , len ( db . dirties ) , "livesize" , db . dirtiesSize )
2018-02-05 18:40:32 +02:00
// Reset the garbage collection statistics
db . gcnodes , db . gcsize , db . gctime = 0 , 0 , 0
2018-06-04 10:47:43 +03:00
db . flushnodes , db . flushsize , db . flushtime = 0 , 0 , 0
2018-02-05 18:40:32 +02:00
return nil
}
// commit is the private locked version of Commit.
2023-02-08 19:14:34 +08:00
func ( db * Database ) commit ( hash common . Hash , batch ethdb . Batch , uncacher * cleaner ) error {
2018-02-05 18:40:32 +02:00
// If the node does not exist, it's a previously committed node
2018-11-12 18:47:34 +02:00
node , ok := db . dirties [ hash ]
2018-02-05 18:40:32 +02:00
if ! ok {
return nil
}
2020-01-17 12:59:45 +01:00
var err error
2023-04-24 15:38:52 +08:00
// Dereference all children and delete the node
node . forChildren ( db . resolver , func ( child common . Hash ) {
2020-01-17 12:59:45 +01:00
if err == nil {
2023-02-08 19:14:34 +08:00
err = db . commit ( child , batch , uncacher )
2018-02-05 18:40:32 +02:00
}
2020-01-17 12:59:45 +01:00
} )
if err != nil {
return err
2018-02-05 18:40:32 +02:00
}
2020-08-21 20:10:40 +08:00
// If we've reached an optimal batch size, commit and start over
2023-04-24 15:38:52 +08:00
rawdb . WriteLegacyTrieNode ( batch , hash , node . node )
2018-02-05 18:40:32 +02:00
if batch . ValueSize ( ) >= ethdb . IdealBatchSize {
if err := batch . Write ( ) ; err != nil {
return err
}
2022-09-16 02:05:53 +04:30
err := batch . Replay ( uncacher )
if err != nil {
return err
}
2023-11-30 09:50:48 +00:00
batch . Reset ( )
2018-02-05 18:40:32 +02:00
}
return nil
}
2019-03-26 15:48:31 +01:00
// cleaner is a database batch replayer that takes a batch of write operations
// and cleans up the trie database from anything written to disk.
type cleaner struct {
db * Database
}
// Put reacts to database writes and implements dirty data uncaching. This is the
// post-processing step of a commit operation where the already persisted trie is
// removed from the dirty cache and moved into the clean cache. The reason behind
2021-10-19 01:53:56 +07:00
// the two-phase commit is to ensure data availability while moving from memory
// to disk.
2019-03-26 15:48:31 +01:00
func ( c * cleaner ) Put ( key [ ] byte , rlp [ ] byte ) error {
hash := common . BytesToHash ( key )
2018-02-05 18:40:32 +02:00
// If the node does not exist, we're done on this path
2019-03-26 15:48:31 +01:00
node , ok := c . db . dirties [ hash ]
2018-02-05 18:40:32 +02:00
if ! ok {
2019-03-26 15:48:31 +01:00
return nil
2018-02-05 18:40:32 +02:00
}
2018-06-04 10:47:43 +03:00
// Node still exists, remove it from the flush-list
2018-07-30 16:31:17 +03:00
switch hash {
2019-03-26 15:48:31 +01:00
case c . db . oldest :
c . db . oldest = node . flushNext
2023-04-24 15:38:52 +08:00
if node . flushNext != ( common . Hash { } ) {
c . db . dirties [ node . flushNext ] . flushPrev = common . Hash { }
}
2019-03-26 15:48:31 +01:00
case c . db . newest :
c . db . newest = node . flushPrev
2023-04-24 15:38:52 +08:00
if node . flushPrev != ( common . Hash { } ) {
c . db . dirties [ node . flushPrev ] . flushNext = common . Hash { }
}
2018-07-30 16:31:17 +03:00
default :
2019-03-26 15:48:31 +01:00
c . db . dirties [ node . flushPrev ] . flushNext = node . flushNext
c . db . dirties [ node . flushNext ] . flushPrev = node . flushPrev
2018-06-04 10:47:43 +03:00
}
2019-03-26 15:48:31 +01:00
// Remove the node from the dirty cache
delete ( c . db . dirties , hash )
2023-04-24 15:38:52 +08:00
c . db . dirtiesSize -= common . StorageSize ( common . HashLength + len ( node . node ) )
if node . external != nil {
c . db . childrenSize -= common . StorageSize ( len ( node . external ) * common . HashLength )
2018-11-22 14:09:04 +02:00
}
2019-03-26 15:48:31 +01:00
// Move the flushed node into the clean cache to prevent insta-reloads
if c . db . cleans != nil {
2019-11-25 16:58:15 +08:00
c . db . cleans . Set ( hash [ : ] , rlp )
2019-12-02 12:14:44 +02:00
memcacheCleanWriteMeter . Mark ( int64 ( len ( rlp ) ) )
2018-02-05 18:40:32 +02:00
}
2019-03-26 15:48:31 +01:00
return nil
}
func ( c * cleaner ) Delete ( key [ ] byte ) error {
2019-11-22 16:24:48 +01:00
panic ( "not implemented" )
2018-02-05 18:40:32 +02:00
}
2023-05-09 15:11:04 +08:00
// Initialized returns an indicator if state data is already initialized
// in hash-based scheme by checking the presence of genesis state.
func ( db * Database ) Initialized ( genesisRoot common . Hash ) bool {
return rawdb . HasLegacyTrieNode ( db . diskdb , genesisRoot )
}
// Update inserts the dirty nodes in provided nodeset into database and link the
// account trie with multiple storage tries if necessary.
2023-08-01 20:17:32 +08:00
func ( db * Database ) Update ( root common . Hash , parent common . Hash , block uint64 , nodes * trienode . MergedNodeSet , states * triestate . Set ) error {
2023-05-09 15:11:04 +08:00
// Ensure the parent state is present and signal a warning if not.
if parent != types . EmptyRootHash {
2023-11-30 09:50:48 +00:00
if blob , _ := db . node ( parent ) ; len ( blob ) == 0 {
2023-05-09 15:11:04 +08:00
log . Error ( "parent state is not present" )
}
}
2022-08-04 16:03:20 +08:00
db . lock . Lock ( )
defer db . lock . Unlock ( )
// Insert dirty nodes into the database. In the same tree, it must be
// ensured that children are inserted first, then parent so that children
2022-08-23 21:17:12 +03:00
// can be linked with their parent correctly.
//
// Note, the storage tries must be flushed before the account trie to
// retain the invariant that children go into the dirty cache first.
var order [ ] common . Hash
2023-05-09 15:11:04 +08:00
for owner := range nodes . Sets {
2022-08-23 21:17:12 +03:00
if owner == ( common . Hash { } ) {
continue
}
order = append ( order , owner )
}
2023-05-09 15:11:04 +08:00
if _ , ok := nodes . Sets [ common . Hash { } ] ; ok {
2022-08-23 21:17:12 +03:00
order = append ( order , common . Hash { } )
}
for _ , owner := range order {
2023-05-09 15:11:04 +08:00
subset := nodes . Sets [ owner ]
subset . ForEachWithOrder ( func ( path string , n * trienode . Node ) {
2023-04-26 14:01:54 +08:00
if n . IsDeleted ( ) {
2023-03-14 16:50:53 +08:00
return // ignore deletion
2022-08-04 16:03:20 +08:00
}
2023-04-26 14:01:54 +08:00
db . insert ( n . Hash , n . Blob )
2023-03-14 16:50:53 +08:00
} )
2022-08-04 16:03:20 +08:00
}
// Link up the account trie and storage trie if the node points
// to an account trie leaf.
2023-05-09 15:11:04 +08:00
if set , present := nodes . Sets [ common . Hash { } ] ; present {
for _ , n := range set . Leaves {
2022-08-04 16:03:20 +08:00
var account types . StateAccount
2023-05-09 15:11:04 +08:00
if err := rlp . DecodeBytes ( n . Blob , & account ) ; err != nil {
2022-08-04 16:03:20 +08:00
return err
}
2023-02-21 19:12:27 +08:00
if account . Root != types . EmptyRootHash {
2023-05-09 15:11:04 +08:00
db . reference ( account . Root , n . Parent )
2022-08-04 16:03:20 +08:00
}
}
}
return nil
}
2018-02-05 18:40:32 +02:00
// Size returns the current storage size of the memory cache in front of the
// persistent database layer.
2023-08-23 14:08:39 +03:00
//
// The first return will always be 0, representing the memory stored in unbounded
// diff layers above the dirty cache. This is only available in pathdb.
func ( db * Database ) Size ( ) ( common . StorageSize , common . StorageSize ) {
2018-02-05 18:40:32 +02:00
db . lock . RLock ( )
defer db . lock . RUnlock ( )
2018-11-12 18:47:34 +02:00
// db.dirtiesSize only contains the useful data in the cache, but when reporting
2018-06-04 10:47:43 +03:00
// the total memory consumption, the maintenance metadata is also needed to be
2018-11-22 14:09:04 +02:00
// counted.
2023-04-24 15:38:52 +08:00
var metadataSize = common . StorageSize ( len ( db . dirties ) * cachedNodeSize )
2023-08-23 14:08:39 +03:00
return 0 , db . dirtiesSize + db . childrenSize + metadataSize
2018-02-05 18:40:32 +02:00
}
2020-07-28 21:30:31 +08:00
2023-05-09 15:11:04 +08:00
// Close closes the trie database and releases all held resources.
all: activate pbss as experimental feature (#26274)
* all: activate pbss
* core/rawdb: fix compilation error
* cma, core, eth, les, trie: address comments
* cmd, core, eth, trie: polish code
* core, cmd, eth: address comments
* cmd, core, eth, les, light, tests: address comment
* cmd/utils: shorten log message
* trie/triedb/pathdb: limit node buffer size to 1gb
* cmd/utils: fix opening non-existing db
* cmd/utils: rename flag name
* cmd, core: group chain history flags and fix tests
* core, eth, trie: fix memory leak in snapshot generation
* cmd, eth, internal: deprecate flags
* all: enable state tests for pathdb, fixes
* cmd, core: polish code
* trie/triedb/pathdb: limit the node buffer size to 256mb
---------
Co-authored-by: Martin Holst Swende <martin@swende.se>
Co-authored-by: Péter Szilágyi <peterke@gmail.com>
2023-08-11 03:21:36 +08:00
func ( db * Database ) Close ( ) error {
if db . cleans != nil {
db . cleans . Reset ( )
db . cleans = nil
}
return nil
}
2023-05-09 15:11:04 +08:00
// Scheme returns the node scheme used in the database.
func ( db * Database ) Scheme ( ) string {
return rawdb . HashScheme
cmd, core, eth, les, light: track deleted nodes (#25757)
* cmd, core, eth, les, light: track deleted nodes
* trie: add docs
* trie: address comments
* cmd, core, eth, les, light, trie: trie id
* trie: add tests
* trie, core: updates
* trie: fix imports
* trie: add utility print-method for nodeset
* trie: import err
* trie: fix go vet warnings
Co-authored-by: Martin Holst Swende <martin@swende.se>
2022-09-27 16:01:02 +08:00
}
2023-05-09 15:11:04 +08:00
// Reader retrieves a node reader belonging to the given state root.
cmd, core/state, eth, tests, trie: improve state reader (#27428)
The state availability is checked during the creation of a state reader.
- In hash-based database, if the specified root node does not exist on disk disk, then
the state reader won't be created and an error will be returned.
- In path-based database, if the specified state layer is not available, then the
state reader won't be created and an error will be returned.
This change also contains a stricter semantics regarding the `Commit` operation: once it has been performed, the trie is no longer usable, and certain operations will return an error.
2023-06-21 03:31:45 +08:00
// An error will be returned if the requested state is not available.
func ( db * Database ) Reader ( root common . Hash ) ( * reader , error ) {
2023-11-30 09:50:48 +00:00
if _ , err := db . node ( root ) ; err != nil {
cmd, core/state, eth, tests, trie: improve state reader (#27428)
The state availability is checked during the creation of a state reader.
- In hash-based database, if the specified root node does not exist on disk disk, then
the state reader won't be created and an error will be returned.
- In path-based database, if the specified state layer is not available, then the
state reader won't be created and an error will be returned.
This change also contains a stricter semantics regarding the `Commit` operation: once it has been performed, the trie is no longer usable, and certain operations will return an error.
2023-06-21 03:31:45 +08:00
return nil , fmt . Errorf ( "state %#x is not available, %v" , root , err )
}
return & reader { db : db } , nil
cmd, core, eth, les, light: track deleted nodes (#25757)
* cmd, core, eth, les, light: track deleted nodes
* trie: add docs
* trie: address comments
* cmd, core, eth, les, light, trie: trie id
* trie: add tests
* trie, core: updates
* trie: fix imports
* trie: add utility print-method for nodeset
* trie: import err
* trie: fix go vet warnings
Co-authored-by: Martin Holst Swende <martin@swende.se>
2022-09-27 16:01:02 +08:00
}
2023-05-09 15:11:04 +08:00
// reader is a state reader of Database which implements the Reader interface.
type reader struct {
db * Database
cmd, core, eth, les, light: track deleted nodes (#25757)
* cmd, core, eth, les, light: track deleted nodes
* trie: add docs
* trie: address comments
* cmd, core, eth, les, light, trie: trie id
* trie: add tests
* trie, core: updates
* trie: fix imports
* trie: add utility print-method for nodeset
* trie: import err
* trie: fix go vet warnings
Co-authored-by: Martin Holst Swende <martin@swende.se>
2022-09-27 16:01:02 +08:00
}
2023-11-30 09:50:48 +00:00
// Node retrieves the trie node with the given node hash. No error will be
// returned if the node is not found.
2023-05-09 15:11:04 +08:00
func ( reader * reader ) Node ( owner common . Hash , path [ ] byte , hash common . Hash ) ( [ ] byte , error ) {
2023-11-30 09:50:48 +00:00
blob , _ := reader . db . node ( hash )
cmd, core, eth, les, light: track deleted nodes (#25757)
* cmd, core, eth, les, light: track deleted nodes
* trie: add docs
* trie: address comments
* cmd, core, eth, les, light, trie: trie id
* trie: add tests
* trie, core: updates
* trie: fix imports
* trie: add utility print-method for nodeset
* trie: import err
* trie: fix go vet warnings
Co-authored-by: Martin Holst Swende <martin@swende.se>
2022-09-27 16:01:02 +08:00
return blob , nil
}