2022-05-24 21:39:40 +03:00
|
|
|
// Copyright 2020 The go-ethereum Authors
|
2020-02-03 18:28:30 +03:00
|
|
|
// This file is part of the go-ethereum library.
|
|
|
|
//
|
|
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Lesser General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
package trie
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
"github.com/ethereum/go-ethereum/common"
|
|
|
|
)
|
|
|
|
|
|
|
|
// leafChanSize is the size of the leafCh. It's a pretty arbitrary number, to allow
|
2020-05-25 11:21:28 +03:00
|
|
|
// some parallelism but not incur too much memory overhead.
|
2020-02-03 18:28:30 +03:00
|
|
|
const leafChanSize = 200
|
|
|
|
|
|
|
|
// leaf represents a trie leaf value
|
|
|
|
type leaf struct {
|
2020-09-30 14:45:56 +03:00
|
|
|
size int // size of the rlp data (estimate)
|
|
|
|
hash common.Hash // hash of rlp data
|
|
|
|
node node // the node to commit
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// committer is a type used for the trie Commit operation. A committer has some
|
|
|
|
// internal preallocated temp space, and also a callback that is invoked when
|
|
|
|
// leaves are committed. The leafs are passed through the `leafCh`, to allow
|
2020-05-25 11:21:28 +03:00
|
|
|
// some level of parallelism.
|
2020-02-03 18:28:30 +03:00
|
|
|
// By 'some level' of parallelism, it's still the case that all leaves will be
|
|
|
|
// processed sequentially - onleaf will never be called in parallel or out of order.
|
|
|
|
type committer struct {
|
|
|
|
onleaf LeafCallback
|
|
|
|
leafCh chan *leaf
|
|
|
|
}
|
|
|
|
|
|
|
|
// committers live in a global sync.Pool
|
|
|
|
var committerPool = sync.Pool{
|
|
|
|
New: func() interface{} {
|
2022-04-20 17:12:06 +03:00
|
|
|
return &committer{}
|
2020-02-03 18:28:30 +03:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
// newCommitter creates a new committer or picks one from the pool.
|
|
|
|
func newCommitter() *committer {
|
|
|
|
return committerPool.Get().(*committer)
|
|
|
|
}
|
|
|
|
|
|
|
|
func returnCommitterToPool(h *committer) {
|
|
|
|
h.onleaf = nil
|
|
|
|
h.leafCh = nil
|
|
|
|
committerPool.Put(h)
|
|
|
|
}
|
|
|
|
|
2021-08-24 22:00:42 +03:00
|
|
|
// Commit collapses a node down into a hash node and inserts it into the database
|
|
|
|
func (c *committer) Commit(n node, db *Database) (hashNode, int, error) {
|
2020-02-03 18:28:30 +03:00
|
|
|
if db == nil {
|
2021-08-24 22:00:42 +03:00
|
|
|
return nil, 0, errors.New("no db provided")
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
2021-08-24 22:00:42 +03:00
|
|
|
h, committed, err := c.commit(n, db)
|
2020-02-03 18:28:30 +03:00
|
|
|
if err != nil {
|
2021-08-24 22:00:42 +03:00
|
|
|
return nil, 0, err
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
2021-08-24 22:00:42 +03:00
|
|
|
return h.(hashNode), committed, nil
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// commit collapses a node down into a hash node and inserts it into the database
|
2021-08-24 22:00:42 +03:00
|
|
|
func (c *committer) commit(n node, db *Database) (node, int, error) {
|
2020-02-03 18:28:30 +03:00
|
|
|
// if this path is clean, use available cached data
|
|
|
|
hash, dirty := n.cache()
|
|
|
|
if hash != nil && !dirty {
|
2021-08-24 22:00:42 +03:00
|
|
|
return hash, 0, nil
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
2022-03-31 10:28:32 +03:00
|
|
|
// Commit children, then parent, and remove the dirty flag.
|
2020-02-03 18:28:30 +03:00
|
|
|
switch cn := n.(type) {
|
|
|
|
case *shortNode:
|
|
|
|
// Commit child
|
|
|
|
collapsed := cn.copy()
|
2020-09-30 14:45:56 +03:00
|
|
|
|
2021-08-24 22:00:42 +03:00
|
|
|
// If the child is fullNode, recursively commit,
|
|
|
|
// otherwise it can only be hashNode or valueNode.
|
|
|
|
var childCommitted int
|
2020-09-30 14:45:56 +03:00
|
|
|
if _, ok := cn.Val.(*fullNode); ok {
|
2021-08-24 22:00:42 +03:00
|
|
|
childV, committed, err := c.commit(cn.Val, db)
|
2020-07-15 11:00:04 +03:00
|
|
|
if err != nil {
|
2021-08-24 22:00:42 +03:00
|
|
|
return nil, 0, err
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
2021-08-24 22:00:42 +03:00
|
|
|
collapsed.Val, childCommitted = childV, committed
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
|
|
|
// The key needs to be copied, since we're delivering it to database
|
|
|
|
collapsed.Key = hexToCompact(cn.Key)
|
2020-09-30 14:45:56 +03:00
|
|
|
hashedNode := c.store(collapsed, db)
|
2020-02-03 18:28:30 +03:00
|
|
|
if hn, ok := hashedNode.(hashNode); ok {
|
2021-08-24 22:00:42 +03:00
|
|
|
return hn, childCommitted + 1, nil
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
2021-08-24 22:00:42 +03:00
|
|
|
return collapsed, childCommitted, nil
|
2020-02-03 18:28:30 +03:00
|
|
|
case *fullNode:
|
2021-08-24 22:00:42 +03:00
|
|
|
hashedKids, childCommitted, err := c.commitChildren(cn, db)
|
2020-02-03 18:28:30 +03:00
|
|
|
if err != nil {
|
2021-08-24 22:00:42 +03:00
|
|
|
return nil, 0, err
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
|
|
|
collapsed := cn.copy()
|
|
|
|
collapsed.Children = hashedKids
|
|
|
|
|
2020-09-30 14:45:56 +03:00
|
|
|
hashedNode := c.store(collapsed, db)
|
2020-02-03 18:28:30 +03:00
|
|
|
if hn, ok := hashedNode.(hashNode); ok {
|
2021-08-24 22:00:42 +03:00
|
|
|
return hn, childCommitted + 1, nil
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
2021-08-24 22:00:42 +03:00
|
|
|
return collapsed, childCommitted, nil
|
2020-02-03 18:28:30 +03:00
|
|
|
case hashNode:
|
2021-08-24 22:00:42 +03:00
|
|
|
return cn, 0, nil
|
2020-09-30 14:45:56 +03:00
|
|
|
default:
|
|
|
|
// nil, valuenode shouldn't be committed
|
|
|
|
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// commitChildren commits the children of the given fullnode
|
2021-08-24 22:00:42 +03:00
|
|
|
func (c *committer) commitChildren(n *fullNode, db *Database) ([17]node, int, error) {
|
|
|
|
var (
|
|
|
|
committed int
|
|
|
|
children [17]node
|
|
|
|
)
|
2020-09-30 14:45:56 +03:00
|
|
|
for i := 0; i < 16; i++ {
|
|
|
|
child := n.Children[i]
|
2020-02-03 18:28:30 +03:00
|
|
|
if child == nil {
|
|
|
|
continue
|
|
|
|
}
|
2020-09-30 14:45:56 +03:00
|
|
|
// If it's the hashed child, save the hash value directly.
|
|
|
|
// Note: it's impossible that the child in range [0, 15]
|
2021-08-24 22:00:42 +03:00
|
|
|
// is a valueNode.
|
2020-09-30 14:45:56 +03:00
|
|
|
if hn, ok := child.(hashNode); ok {
|
|
|
|
children[i] = hn
|
|
|
|
continue
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
2020-09-30 14:45:56 +03:00
|
|
|
// Commit the child recursively and store the "hashed" value.
|
|
|
|
// Note the returned node can be some embedded nodes, so it's
|
2021-08-24 22:00:42 +03:00
|
|
|
// possible the type is not hashNode.
|
|
|
|
hashed, childCommitted, err := c.commit(child, db)
|
2020-09-30 14:45:56 +03:00
|
|
|
if err != nil {
|
2021-08-24 22:00:42 +03:00
|
|
|
return children, 0, err
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
2020-09-30 14:45:56 +03:00
|
|
|
children[i] = hashed
|
2021-08-24 22:00:42 +03:00
|
|
|
committed += childCommitted
|
2020-09-30 14:45:56 +03:00
|
|
|
}
|
|
|
|
// For the 17th child, it's possible the type is valuenode.
|
|
|
|
if n.Children[16] != nil {
|
|
|
|
children[16] = n.Children[16]
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
2021-08-24 22:00:42 +03:00
|
|
|
return children, committed, nil
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// store hashes the node n and if we have a storage layer specified, it writes
|
|
|
|
// the key/value pair to it and tracks any node->child references as well as any
|
|
|
|
// node->external trie references.
|
2020-09-30 14:45:56 +03:00
|
|
|
func (c *committer) store(n node, db *Database) node {
|
2020-02-03 18:28:30 +03:00
|
|
|
// Larger nodes are replaced by their hash and stored in the database.
|
|
|
|
var (
|
|
|
|
hash, _ = n.cache()
|
|
|
|
size int
|
|
|
|
)
|
|
|
|
if hash == nil {
|
2020-09-30 14:45:56 +03:00
|
|
|
// This was not generated - must be a small node stored in the parent.
|
2021-08-24 22:00:42 +03:00
|
|
|
// In theory, we should apply the leafCall here if it's not nil(embedded
|
2020-09-30 14:45:56 +03:00
|
|
|
// node usually contains value). But small value(less than 32bytes) is
|
|
|
|
// not our target.
|
|
|
|
return n
|
2020-02-03 18:28:30 +03:00
|
|
|
} else {
|
|
|
|
// We have the hash already, estimate the RLP encoding-size of the node.
|
|
|
|
// The size is used for mem tracking, does not need to be exact
|
|
|
|
size = estimateSize(n)
|
|
|
|
}
|
|
|
|
// If we're using channel-based leaf-reporting, send to channel.
|
|
|
|
// The leaf channel will be active only when there an active leaf-callback
|
|
|
|
if c.leafCh != nil {
|
|
|
|
c.leafCh <- &leaf{
|
2020-09-30 14:45:56 +03:00
|
|
|
size: size,
|
|
|
|
hash: common.BytesToHash(hash),
|
|
|
|
node: n,
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
|
|
|
} else if db != nil {
|
|
|
|
// No leaf-callback used, but there's still a database. Do serial
|
|
|
|
// insertion
|
|
|
|
db.insert(common.BytesToHash(hash), size, n)
|
|
|
|
}
|
|
|
|
return hash
|
|
|
|
}
|
|
|
|
|
2020-09-30 14:45:56 +03:00
|
|
|
// commitLoop does the actual insert + leaf callback for nodes.
|
2020-02-03 18:28:30 +03:00
|
|
|
func (c *committer) commitLoop(db *Database) {
|
|
|
|
for item := range c.leafCh {
|
|
|
|
var (
|
2020-09-30 14:45:56 +03:00
|
|
|
hash = item.hash
|
|
|
|
size = item.size
|
|
|
|
n = item.node
|
2020-02-03 18:28:30 +03:00
|
|
|
)
|
|
|
|
// We are pooling the trie nodes into an intermediate memory cache
|
|
|
|
db.insert(hash, size, n)
|
2020-09-30 14:45:56 +03:00
|
|
|
|
|
|
|
if c.onleaf != nil {
|
2020-02-03 18:28:30 +03:00
|
|
|
switch n := n.(type) {
|
|
|
|
case *shortNode:
|
|
|
|
if child, ok := n.Val.(valueNode); ok {
|
core, eth: faster snapshot generation (#22504)
* eth/protocols: persist received state segments
* core: initial implementation
* core/state/snapshot: add tests
* core, eth: updates
* eth/protocols/snapshot: count flat state size
* core/state: add metrics
* core/state/snapshot: skip unnecessary deletion
* core/state/snapshot: rename
* core/state/snapshot: use the global batch
* core/state/snapshot: add logs and fix wiping
* core/state/snapshot: fix
* core/state/snapshot: save generation progress even if the batch is empty
* core/state/snapshot: fixes
* core/state/snapshot: fix initial account range length
* core/state/snapshot: fix initial account range
* eth/protocols/snap: store flat states during the healing
* eth/protocols/snap: print logs
* core/state/snapshot: refactor (#4)
* core/state/snapshot: refactor
* core/state/snapshot: tiny fix and polish
Co-authored-by: rjl493456442 <garyrong0905@gmail.com>
* core, eth: fixes
* core, eth: fix healing writer
* core, trie, eth: fix paths
* eth/protocols/snap: fix encoding
* eth, core: add debug log
* core/state/generate: release iterator asap (#5)
core/state/snapshot: less copy
core/state/snapshot: revert split loop
core/state/snapshot: handle storage becoming empty, improve test robustness
core/state: test modified codehash
core/state/snapshot: polish
* core/state/snapshot: optimize stats counter
* core, eth: add metric
* core/state/snapshot: update comments
* core/state/snapshot: improve tests
* core/state/snapshot: replace secure trie with standard trie
* core/state/snapshot: wrap return as the struct
* core/state/snapshot: skip wiping correct states
* core/state/snapshot: updates
* core/state/snapshot: fixes
* core/state/snapshot: fix panic due to reference flaw in closure
* core/state/snapshot: fix errors in state generation logic + fix log output
* core/state/snapshot: remove an error case
* core/state/snapshot: fix condition-check for exhausted snap state
* core/state/snapshot: use stackTrie for small tries
* core/state/snapshot: don't resolve small storage tries in vain
* core/state/snapshot: properly clean up storage of deleted accounts
* core/state/snapshot: avoid RLP-encoding in some cases + minor nitpicks
* core/state/snapshot: fix error (+testcase)
* core/state/snapshot: clean up tests a bit
* core/state/snapshot: work in progress on better tests
* core/state/snapshot: polish code
* core/state/snapshot: fix trie iteration abortion trigger
* core/state/snapshot: fixes flaws
* core/state/snapshot: remove panic
* core/state/snapshot: fix abort
* core/state/snapshot: more tests (plus failing testcase)
* core/state/snapshot: more testcases + fix for failing test
* core/state/snapshot: testcase for malformed data
* core/state/snapshot: some test nitpicks
* core/state/snapshot: improvements to logging
* core/state/snapshot: testcase to demo error in abortion
* core/state/snapshot: fix abortion
* cmd/geth: make verify-state report the root
* trie: fix failing test
* core/state/snapshot: add timer metrics
* core/state/snapshot: fix metrics
* core/state/snapshot: udpate tests
* eth/protocols/snap: write snapshot account even if code or state is needed
* core/state/snapshot: fix diskmore check
* core/state/snapshot: review fixes
* core/state/snapshot: improve error message
* cmd/geth: rename 'error' to 'err' in logs
* core/state/snapshot: fix some review concerns
* core/state/snapshot, eth/protocols/snap: clear snapshot marker when starting/resuming snap sync
* core: add error log
* core/state/snapshot: use proper timers for metrics collection
* core/state/snapshot: address some review concerns
* eth/protocols/snap: improved log message
* eth/protocols/snap: fix heal logs to condense infos
* core/state/snapshot: wait for generator termination before restarting
* core/state/snapshot: revert timers to counters to track total time
Co-authored-by: Martin Holst Swende <martin@swende.se>
Co-authored-by: Péter Szilágyi <peterke@gmail.com>
2021-04-14 23:23:11 +03:00
|
|
|
c.onleaf(nil, nil, child, hash)
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
|
|
|
case *fullNode:
|
2020-09-30 14:45:56 +03:00
|
|
|
// For children in range [0, 15], it's impossible
|
2021-08-24 22:00:42 +03:00
|
|
|
// to contain valueNode. Only check the 17th child.
|
2020-09-30 14:45:56 +03:00
|
|
|
if n.Children[16] != nil {
|
core, eth: faster snapshot generation (#22504)
* eth/protocols: persist received state segments
* core: initial implementation
* core/state/snapshot: add tests
* core, eth: updates
* eth/protocols/snapshot: count flat state size
* core/state: add metrics
* core/state/snapshot: skip unnecessary deletion
* core/state/snapshot: rename
* core/state/snapshot: use the global batch
* core/state/snapshot: add logs and fix wiping
* core/state/snapshot: fix
* core/state/snapshot: save generation progress even if the batch is empty
* core/state/snapshot: fixes
* core/state/snapshot: fix initial account range length
* core/state/snapshot: fix initial account range
* eth/protocols/snap: store flat states during the healing
* eth/protocols/snap: print logs
* core/state/snapshot: refactor (#4)
* core/state/snapshot: refactor
* core/state/snapshot: tiny fix and polish
Co-authored-by: rjl493456442 <garyrong0905@gmail.com>
* core, eth: fixes
* core, eth: fix healing writer
* core, trie, eth: fix paths
* eth/protocols/snap: fix encoding
* eth, core: add debug log
* core/state/generate: release iterator asap (#5)
core/state/snapshot: less copy
core/state/snapshot: revert split loop
core/state/snapshot: handle storage becoming empty, improve test robustness
core/state: test modified codehash
core/state/snapshot: polish
* core/state/snapshot: optimize stats counter
* core, eth: add metric
* core/state/snapshot: update comments
* core/state/snapshot: improve tests
* core/state/snapshot: replace secure trie with standard trie
* core/state/snapshot: wrap return as the struct
* core/state/snapshot: skip wiping correct states
* core/state/snapshot: updates
* core/state/snapshot: fixes
* core/state/snapshot: fix panic due to reference flaw in closure
* core/state/snapshot: fix errors in state generation logic + fix log output
* core/state/snapshot: remove an error case
* core/state/snapshot: fix condition-check for exhausted snap state
* core/state/snapshot: use stackTrie for small tries
* core/state/snapshot: don't resolve small storage tries in vain
* core/state/snapshot: properly clean up storage of deleted accounts
* core/state/snapshot: avoid RLP-encoding in some cases + minor nitpicks
* core/state/snapshot: fix error (+testcase)
* core/state/snapshot: clean up tests a bit
* core/state/snapshot: work in progress on better tests
* core/state/snapshot: polish code
* core/state/snapshot: fix trie iteration abortion trigger
* core/state/snapshot: fixes flaws
* core/state/snapshot: remove panic
* core/state/snapshot: fix abort
* core/state/snapshot: more tests (plus failing testcase)
* core/state/snapshot: more testcases + fix for failing test
* core/state/snapshot: testcase for malformed data
* core/state/snapshot: some test nitpicks
* core/state/snapshot: improvements to logging
* core/state/snapshot: testcase to demo error in abortion
* core/state/snapshot: fix abortion
* cmd/geth: make verify-state report the root
* trie: fix failing test
* core/state/snapshot: add timer metrics
* core/state/snapshot: fix metrics
* core/state/snapshot: udpate tests
* eth/protocols/snap: write snapshot account even if code or state is needed
* core/state/snapshot: fix diskmore check
* core/state/snapshot: review fixes
* core/state/snapshot: improve error message
* cmd/geth: rename 'error' to 'err' in logs
* core/state/snapshot: fix some review concerns
* core/state/snapshot, eth/protocols/snap: clear snapshot marker when starting/resuming snap sync
* core: add error log
* core/state/snapshot: use proper timers for metrics collection
* core/state/snapshot: address some review concerns
* eth/protocols/snap: improved log message
* eth/protocols/snap: fix heal logs to condense infos
* core/state/snapshot: wait for generator termination before restarting
* core/state/snapshot: revert timers to counters to track total time
Co-authored-by: Martin Holst Swende <martin@swende.se>
Co-authored-by: Péter Szilágyi <peterke@gmail.com>
2021-04-14 23:23:11 +03:00
|
|
|
c.onleaf(nil, nil, n.Children[16].(valueNode), hash)
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// estimateSize estimates the size of an rlp-encoded node, without actually
|
|
|
|
// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie
|
|
|
|
// with 1000 leafs, the only errors above 1% are on small shortnodes, where this
|
|
|
|
// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35)
|
|
|
|
func estimateSize(n node) int {
|
|
|
|
switch n := n.(type) {
|
|
|
|
case *shortNode:
|
|
|
|
// A short node contains a compacted key, and a value.
|
|
|
|
return 3 + len(n.Key) + estimateSize(n.Val)
|
|
|
|
case *fullNode:
|
|
|
|
// A full node contains up to 16 hashes (some nils), and a key
|
|
|
|
s := 3
|
|
|
|
for i := 0; i < 16; i++ {
|
|
|
|
if child := n.Children[i]; child != nil {
|
|
|
|
s += estimateSize(child)
|
|
|
|
} else {
|
2020-07-15 11:00:04 +03:00
|
|
|
s++
|
2020-02-03 18:28:30 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return s
|
|
|
|
case valueNode:
|
|
|
|
return 1 + len(n)
|
|
|
|
case hashNode:
|
|
|
|
return 1 + len(n)
|
|
|
|
default:
|
|
|
|
panic(fmt.Sprintf("node type %T", n))
|
|
|
|
}
|
|
|
|
}
|