diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go
index 681be7ebc0..8a0fd1989a 100644
--- a/core/state/snapshot/conversion.go
+++ b/core/state/snapshot/conversion.go
@@ -362,15 +362,15 @@ func generateTrieRoot(db ethdb.KeyValueWriter, scheme string, it Iterator, accou
}
func stackTrieGenerate(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan trieKV, out chan common.Hash) {
- options := trie.NewStackTrieOptions()
+ var onTrieNode trie.OnTrieNode
if db != nil {
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ onTrieNode = func(path []byte, hash common.Hash, blob []byte) {
rawdb.WriteTrieNode(db, owner, path, hash, blob, scheme)
- })
+ }
}
- t := trie.NewStackTrie(options)
+ t := trie.NewStackTrie(onTrieNode)
for leaf := range in {
t.Update(leaf.key[:], leaf.value)
}
- out <- t.Commit()
+ out <- t.Hash()
}
diff --git a/core/state/statedb.go b/core/state/statedb.go
index f2c2e7a798..d3d383389c 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -981,12 +981,10 @@ func (s *StateDB) fastDeleteStorage(addrHash common.Hash, root common.Hash) (com
nodes = trienode.NewNodeSet(addrHash)
slots = make(map[common.Hash][]byte)
)
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
nodes.AddNode(path, trienode.NewDeleted())
size += common.StorageSize(len(path))
})
- stack := trie.NewStackTrie(options)
for iter.Next() {
slot := common.CopyBytes(iter.Slot())
if err := iter.Error(); err != nil { // error might occur after Slot function
diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go
new file mode 100644
index 0000000000..8ef1a00753
--- /dev/null
+++ b/eth/protocols/snap/gentrie.go
@@ -0,0 +1,287 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package snap
+
+import (
+ "bytes"
+
+ "github.com/ethereum/go-ethereum/common"
+ "github.com/ethereum/go-ethereum/core/rawdb"
+ "github.com/ethereum/go-ethereum/ethdb"
+ "github.com/ethereum/go-ethereum/trie"
+)
+
+// genTrie interface is used by the snap syncer to generate merkle tree nodes
+// based on a received batch of states.
+type genTrie interface {
+ // update inserts the state item into generator trie.
+ update(key, value []byte) error
+
+ // commit flushes the right boundary nodes if complete flag is true. This
+ // function must be called before flushing the associated database batch.
+ commit(complete bool) common.Hash
+}
+
+// pathTrie is a wrapper over the stackTrie, incorporating numerous additional
+// logics to handle the semi-completed trie and potential leftover dangling
+// nodes in the database. It is utilized for constructing the merkle tree nodes
+// in path mode during the snap sync process.
+type pathTrie struct {
+ owner common.Hash // identifier of trie owner, empty for account trie
+ tr *trie.StackTrie // underlying raw stack trie
+ first []byte // the path of first committed node by stackTrie
+ last []byte // the path of last committed node by stackTrie
+
+ // This flag indicates whether nodes on the left boundary are skipped for
+ // committing. If set, the left boundary nodes are considered incomplete
+ // due to potentially missing left children.
+ skipLeftBoundary bool
+ db ethdb.KeyValueReader
+ batch ethdb.Batch
+}
+
+// newPathTrie initializes the path trie.
+func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie {
+ tr := &pathTrie{
+ owner: owner,
+ skipLeftBoundary: skipLeftBoundary,
+ db: db,
+ batch: batch,
+ }
+ tr.tr = trie.NewStackTrie(tr.onTrieNode)
+ return tr
+}
+
+// onTrieNode is invoked whenever a new node is committed by the stackTrie.
+//
+// As the committed nodes might be incomplete if they are on the boundaries
+// (left or right), this function has the ability to detect the incomplete
+// ones and filter them out for committing.
+//
+// Additionally, the assumption is made that there may exist leftover dangling
+// nodes in the database. This function has the ability to detect the dangling
+// nodes that fall within the path space of committed nodes (specifically on
+// the path covered by internal extension nodes) and remove them from the
+// database. This property ensures that the entire path space is uniquely
+// occupied by committed nodes.
+//
+// Furthermore, all leftover dangling nodes along the path from committed nodes
+// to the trie root (left and right boundaries) should be removed as well;
+// otherwise, they might potentially disrupt the state healing process.
+func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
+ // Filter out the nodes on the left boundary if skipLeftBoundary is
+ // configured. Nodes are considered to be on the left boundary if
+ // it's the first one to be committed, or the parent/ancestor of the
+ // first committed node.
+ if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) {
+ if t.first == nil {
+ // Memorize the path of first committed node, which is regarded
+ // as left boundary. Deep-copy is necessary as the path given
+ // is volatile.
+ t.first = append([]byte{}, path...)
+
+ // The left boundary can be uniquely determined by the first committed node
+ // from stackTrie (e.g., N_1), as the shared path prefix between the first
+ // two inserted state items is deterministic (the path of N_3). The path
+ // from trie root towards the first committed node is considered the left
+ // boundary. The potential leftover dangling nodes on left boundary should
+ // be cleaned out.
+ //
+ // +-----+
+ // | N_3 | shared path prefix of state_1 and state_2
+ // +-----+
+ // /- -\
+ // +-----+ +-----+
+ // First committed node | N_1 | | N_2 | latest inserted node (contain state_2)
+ // +-----+ +-----+
+ //
+ // The node with the path of the first committed one (e.g, N_1) is not
+ // removed because it's a sibling of the nodes we want to commit, not
+ // the parent or ancestor.
+ for i := 0; i < len(path); i++ {
+ t.delete(path[:i], false)
+ }
+ }
+ return
+ }
+ // If boundary filtering is not configured, or the node is not on the left
+ // boundary, commit it to database.
+ //
+ // Note: If the current committed node is an extension node, then the nodes
+ // falling within the path between itself and its standalone (not embedded
+ // in parent) child should be cleaned out for exclusively occupy the inner
+ // path.
+ //
+ // This is essential in snap sync to avoid leaving dangling nodes within
+ // this range covered by extension node which could potentially break the
+ // state healing.
+ //
+ // The extension node is detected if its path is the prefix of last committed
+ // one and path gap is larger than one. If the path gap is only one byte,
+ // the current node could either be a full node, or a extension with single
+ // byte key. In either case, no gaps will be left in the path.
+ if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 {
+ for i := len(path) + 1; i < len(t.last); i++ {
+ t.delete(t.last[:i], true)
+ }
+ }
+ t.write(path, blob)
+
+ // Update the last flag. Deep-copy is necessary as the provided path is volatile.
+ if t.last == nil {
+ t.last = append([]byte{}, path...)
+ } else {
+ t.last = append(t.last[:0], path...)
+ }
+}
+
+// write commits the node write to provided database batch in path mode.
+func (t *pathTrie) write(path []byte, blob []byte) {
+ if t.owner == (common.Hash{}) {
+ rawdb.WriteAccountTrieNode(t.batch, path, blob)
+ } else {
+ rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob)
+ }
+}
+
+func (t *pathTrie) deleteAccountNode(path []byte, inner bool) {
+ if inner {
+ accountInnerLookupGauge.Inc(1)
+ } else {
+ accountOuterLookupGauge.Inc(1)
+ }
+ if !rawdb.ExistsAccountTrieNode(t.db, path) {
+ return
+ }
+ if inner {
+ accountInnerDeleteGauge.Inc(1)
+ } else {
+ accountOuterDeleteGauge.Inc(1)
+ }
+ rawdb.DeleteAccountTrieNode(t.batch, path)
+}
+
+func (t *pathTrie) deleteStorageNode(path []byte, inner bool) {
+ if inner {
+ storageInnerLookupGauge.Inc(1)
+ } else {
+ storageOuterLookupGauge.Inc(1)
+ }
+ if !rawdb.ExistsStorageTrieNode(t.db, t.owner, path) {
+ return
+ }
+ if inner {
+ storageInnerDeleteGauge.Inc(1)
+ } else {
+ storageOuterDeleteGauge.Inc(1)
+ }
+ rawdb.DeleteStorageTrieNode(t.batch, t.owner, path)
+}
+
+// delete commits the node deletion to provided database batch in path mode.
+func (t *pathTrie) delete(path []byte, inner bool) {
+ if t.owner == (common.Hash{}) {
+ t.deleteAccountNode(path, inner)
+ } else {
+ t.deleteStorageNode(path, inner)
+ }
+}
+
+// update implements genTrie interface, inserting a (key, value) pair into the
+// stack trie.
+func (t *pathTrie) update(key, value []byte) error {
+ return t.tr.Update(key, value)
+}
+
+// commit implements genTrie interface, flushing the right boundary if it's
+// considered as complete. Otherwise, the nodes on the right boundary are
+// discarded and cleaned up.
+//
+// Note, this function must be called before flushing database batch, otherwise,
+// dangling nodes might be left in database.
+func (t *pathTrie) commit(complete bool) common.Hash {
+ // If the right boundary is claimed as complete, flush them out.
+ // The nodes on both left and right boundary will still be filtered
+ // out if left boundary filtering is configured.
+ if complete {
+ // Commit all inserted but not yet committed nodes(on the right
+ // boundary) in the stackTrie.
+ hash := t.tr.Hash()
+ if t.skipLeftBoundary {
+ return common.Hash{} // hash is meaningless if left side is incomplete
+ }
+ return hash
+ }
+ // Discard nodes on the right boundary as it's claimed as incomplete. These
+ // nodes might be incomplete due to missing children on the right side.
+ // Furthermore, the potential leftover nodes on right boundary should also
+ // be cleaned out.
+ //
+ // The right boundary can be uniquely determined by the last committed node
+ // from stackTrie (e.g., N_1), as the shared path prefix between the last
+ // two inserted state items is deterministic (the path of N_3). The path
+ // from trie root towards the last committed node is considered the right
+ // boundary (root to N_3).
+ //
+ // +-----+
+ // | N_3 | shared path prefix of last two states
+ // +-----+
+ // /- -\
+ // +-----+ +-----+
+ // Last committed node | N_1 | | N_2 | latest inserted node (contain last state)
+ // +-----+ +-----+
+ //
+ // Another interesting scenario occurs when the trie is committed due to
+ // too many items being accumulated in the batch. To flush them out to
+ // the database, the path of the last inserted node (N_2) is temporarily
+ // treated as an incomplete right boundary, and nodes on this path are
+ // removed (e.g. from root to N_3).
+ // However, this path will be reclaimed as an internal path by inserting
+ // more items after the batch flush. New nodes on this path can be committed
+ // with no issues as they are actually complete. Also, from a database
+ // perspective, first deleting and then rewriting is a valid data update.
+ for i := 0; i < len(t.last); i++ {
+ t.delete(t.last[:i], false)
+ }
+ return common.Hash{} // the hash is meaningless for incomplete commit
+}
+
+// hashTrie is a wrapper over the stackTrie for implementing genTrie interface.
+type hashTrie struct {
+ tr *trie.StackTrie
+}
+
+// newHashTrie initializes the hash trie.
+func newHashTrie(batch ethdb.Batch) *hashTrie {
+ return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+ rawdb.WriteLegacyTrieNode(batch, hash, blob)
+ })}
+}
+
+// update implements genTrie interface, inserting a (key, value) pair into
+// the stack trie.
+func (t *hashTrie) update(key, value []byte) error {
+ return t.tr.Update(key, value)
+}
+
+// commit implements genTrie interface, committing the nodes on right boundary.
+func (t *hashTrie) commit(complete bool) common.Hash {
+ if !complete {
+ return common.Hash{} // the hash is meaningless for incomplete commit
+ }
+ return t.tr.Hash() // return hash only if it's claimed as complete
+}
diff --git a/eth/protocols/snap/gentrie_test.go b/eth/protocols/snap/gentrie_test.go
new file mode 100644
index 0000000000..1fb2dbce75
--- /dev/null
+++ b/eth/protocols/snap/gentrie_test.go
@@ -0,0 +1,553 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see .
+
+package snap
+
+import (
+ "bytes"
+ "math/rand"
+ "slices"
+ "testing"
+
+ "github.com/ethereum/go-ethereum/common"
+ "github.com/ethereum/go-ethereum/core/rawdb"
+ "github.com/ethereum/go-ethereum/crypto"
+ "github.com/ethereum/go-ethereum/ethdb"
+ "github.com/ethereum/go-ethereum/internal/testrand"
+ "github.com/ethereum/go-ethereum/trie"
+)
+
+type replayer struct {
+ paths []string // sort in fifo order
+ hashes []common.Hash // empty for deletion
+ unknowns int // counter for unknown write
+}
+
+func newBatchReplay() *replayer {
+ return &replayer{}
+}
+
+func (r *replayer) decode(key []byte, value []byte) {
+ account := rawdb.IsAccountTrieNode(key)
+ storage := rawdb.IsStorageTrieNode(key)
+ if !account && !storage {
+ r.unknowns += 1
+ return
+ }
+ var path []byte
+ if account {
+ _, path = rawdb.ResolveAccountTrieNodeKey(key)
+ } else {
+ _, owner, inner := rawdb.ResolveStorageTrieNode(key)
+ path = append(owner.Bytes(), inner...)
+ }
+ r.paths = append(r.paths, string(path))
+
+ if len(value) == 0 {
+ r.hashes = append(r.hashes, common.Hash{})
+ } else {
+ r.hashes = append(r.hashes, crypto.Keccak256Hash(value))
+ }
+}
+
+// updates returns a set of effective mutations. Multiple mutations targeting
+// the same node path will be merged in FIFO order.
+func (r *replayer) modifies() map[string]common.Hash {
+ set := make(map[string]common.Hash)
+ for i, path := range r.paths {
+ set[path] = r.hashes[i]
+ }
+ return set
+}
+
+// updates returns the number of updates.
+func (r *replayer) updates() int {
+ var count int
+ for _, hash := range r.modifies() {
+ if hash == (common.Hash{}) {
+ continue
+ }
+ count++
+ }
+ return count
+}
+
+// Put inserts the given value into the key-value data store.
+func (r *replayer) Put(key []byte, value []byte) error {
+ r.decode(key, value)
+ return nil
+}
+
+// Delete removes the key from the key-value data store.
+func (r *replayer) Delete(key []byte) error {
+ r.decode(key, nil)
+ return nil
+}
+
+func byteToHex(str []byte) []byte {
+ l := len(str) * 2
+ var nibbles = make([]byte, l)
+ for i, b := range str {
+ nibbles[i*2] = b / 16
+ nibbles[i*2+1] = b % 16
+ }
+ return nibbles
+}
+
+// innerNodes returns the internal nodes narrowed by two boundaries along with
+// the leftmost and rightmost sub-trie roots.
+func innerNodes(first, last []byte, includeLeft, includeRight bool, nodes map[string]common.Hash, t *testing.T) (map[string]common.Hash, []byte, []byte) {
+ var (
+ leftRoot []byte
+ rightRoot []byte
+ firstHex = byteToHex(first)
+ lastHex = byteToHex(last)
+ inner = make(map[string]common.Hash)
+ )
+ for path, hash := range nodes {
+ if hash == (common.Hash{}) {
+ t.Fatalf("Unexpected deletion, %v", []byte(path))
+ }
+ // Filter out the siblings on the left side or the left boundary nodes.
+ if !includeLeft && (bytes.Compare(firstHex, []byte(path)) > 0 || bytes.HasPrefix(firstHex, []byte(path))) {
+ continue
+ }
+ // Filter out the siblings on the right side or the right boundary nodes.
+ if !includeRight && (bytes.Compare(lastHex, []byte(path)) < 0 || bytes.HasPrefix(lastHex, []byte(path))) {
+ continue
+ }
+ inner[path] = hash
+
+ // Track the path of the leftmost sub trie root
+ if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 {
+ leftRoot = []byte(path)
+ }
+ // Track the path of the rightmost sub trie root
+ if rightRoot == nil ||
+ (bytes.Compare(rightRoot, []byte(path)) < 0) ||
+ (bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) {
+ rightRoot = []byte(path)
+ }
+ }
+ return inner, leftRoot, rightRoot
+}
+
+func buildPartial(owner common.Hash, db ethdb.KeyValueReader, batch ethdb.Batch, entries []*kv, first, last int) *replayer {
+ tr := newPathTrie(owner, first != 0, db, batch)
+ for i := first; i <= last; i++ {
+ tr.update(entries[i].k, entries[i].v)
+ }
+ tr.commit(last == len(entries)-1)
+
+ replay := newBatchReplay()
+ batch.Replay(replay)
+
+ return replay
+}
+
+// TestPartialGentree verifies if the trie constructed with partial states can
+// generate consistent trie nodes that match those of the full trie.
+func TestPartialGentree(t *testing.T) {
+ for round := 0; round < 100; round++ {
+ var (
+ n = rand.Intn(1024) + 10
+ entries []*kv
+ )
+ for i := 0; i < n; i++ {
+ var val []byte
+ if rand.Intn(3) == 0 {
+ val = testrand.Bytes(3)
+ } else {
+ val = testrand.Bytes(32)
+ }
+ entries = append(entries, &kv{
+ k: testrand.Bytes(32),
+ v: val,
+ })
+ }
+ slices.SortFunc(entries, (*kv).cmp)
+
+ nodes := make(map[string]common.Hash)
+ tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+ nodes[string(path)] = hash
+ })
+ for i := 0; i < len(entries); i++ {
+ tr.Update(entries[i].k, entries[i].v)
+ }
+ tr.Hash()
+
+ check := func(first, last int) {
+ var (
+ db = rawdb.NewMemoryDatabase()
+ batch = db.NewBatch()
+ )
+ // Build the partial tree with specific boundaries
+ r := buildPartial(common.Hash{}, db, batch, entries, first, last)
+ if r.unknowns > 0 {
+ t.Fatalf("Unknown database write: %d", r.unknowns)
+ }
+
+ // Ensure all the internal nodes are produced
+ var (
+ set = r.modifies()
+ inner, _, _ = innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t)
+ )
+ for path, hash := range inner {
+ if _, ok := set[path]; !ok {
+ t.Fatalf("Missing nodes %v", []byte(path))
+ }
+ if hash != set[path] {
+ t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
+ }
+ }
+ if r.updates() != len(inner) {
+ t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates())
+ }
+ }
+ for j := 0; j < 100; j++ {
+ var (
+ first int
+ last int
+ )
+ for {
+ first = rand.Intn(len(entries))
+ last = rand.Intn(len(entries))
+ if first <= last {
+ break
+ }
+ }
+ check(first, last)
+ }
+ var cases = []struct {
+ first int
+ last int
+ }{
+ {0, len(entries) - 1}, // full
+ {1, len(entries) - 1}, // no left
+ {2, len(entries) - 1}, // no left
+ {2, len(entries) - 2}, // no left and right
+ {2, len(entries) - 2}, // no left and right
+ {len(entries) / 2, len(entries) / 2}, // single
+ {0, 0}, // single first
+ {len(entries) - 1, len(entries) - 1}, // single last
+ }
+ for _, c := range cases {
+ check(c.first, c.last)
+ }
+ }
+}
+
+// TestGentreeDanglingClearing tests if the dangling nodes falling within the
+// path space of constructed tree can be correctly removed.
+func TestGentreeDanglingClearing(t *testing.T) {
+ for round := 0; round < 100; round++ {
+ var (
+ n = rand.Intn(1024) + 10
+ entries []*kv
+ )
+ for i := 0; i < n; i++ {
+ var val []byte
+ if rand.Intn(3) == 0 {
+ val = testrand.Bytes(3)
+ } else {
+ val = testrand.Bytes(32)
+ }
+ entries = append(entries, &kv{
+ k: testrand.Bytes(32),
+ v: val,
+ })
+ }
+ slices.SortFunc(entries, (*kv).cmp)
+
+ nodes := make(map[string]common.Hash)
+ tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+ nodes[string(path)] = hash
+ })
+ for i := 0; i < len(entries); i++ {
+ tr.Update(entries[i].k, entries[i].v)
+ }
+ tr.Hash()
+
+ check := func(first, last int) {
+ var (
+ db = rawdb.NewMemoryDatabase()
+ batch = db.NewBatch()
+ )
+ // Write the junk nodes as the dangling
+ var injects []string
+ for path := range nodes {
+ for i := 0; i < len(path); i++ {
+ _, ok := nodes[path[:i]]
+ if ok {
+ continue
+ }
+ injects = append(injects, path[:i])
+ }
+ }
+ if len(injects) == 0 {
+ return
+ }
+ for _, path := range injects {
+ rawdb.WriteAccountTrieNode(db, []byte(path), testrand.Bytes(32))
+ }
+
+ // Build the partial tree with specific range
+ replay := buildPartial(common.Hash{}, db, batch, entries, first, last)
+ if replay.unknowns > 0 {
+ t.Fatalf("Unknown database write: %d", replay.unknowns)
+ }
+ set := replay.modifies()
+
+ // Make sure the injected junks falling within the path space of
+ // committed trie nodes are correctly deleted.
+ _, leftRoot, rightRoot := innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t)
+ for _, path := range injects {
+ if bytes.Compare([]byte(path), leftRoot) < 0 && !bytes.HasPrefix(leftRoot, []byte(path)) {
+ continue
+ }
+ if bytes.Compare([]byte(path), rightRoot) > 0 {
+ continue
+ }
+ if hash, ok := set[path]; !ok || hash != (common.Hash{}) {
+ t.Fatalf("Missing delete, %v", []byte(path))
+ }
+ }
+ }
+ for j := 0; j < 100; j++ {
+ var (
+ first int
+ last int
+ )
+ for {
+ first = rand.Intn(len(entries))
+ last = rand.Intn(len(entries))
+ if first <= last {
+ break
+ }
+ }
+ check(first, last)
+ }
+ var cases = []struct {
+ first int
+ last int
+ }{
+ {0, len(entries) - 1}, // full
+ {1, len(entries) - 1}, // no left
+ {2, len(entries) - 1}, // no left
+ {2, len(entries) - 2}, // no left and right
+ {2, len(entries) - 2}, // no left and right
+ {len(entries) / 2, len(entries) / 2}, // single
+ {0, 0}, // single first
+ {len(entries) - 1, len(entries) - 1}, // single last
+ }
+ for _, c := range cases {
+ check(c.first, c.last)
+ }
+ }
+}
+
+// TestFlushPartialTree tests the gentrie can produce complete inner trie nodes
+// even with lots of batch flushes.
+func TestFlushPartialTree(t *testing.T) {
+ var entries []*kv
+ for i := 0; i < 1024; i++ {
+ var val []byte
+ if rand.Intn(3) == 0 {
+ val = testrand.Bytes(3)
+ } else {
+ val = testrand.Bytes(32)
+ }
+ entries = append(entries, &kv{
+ k: testrand.Bytes(32),
+ v: val,
+ })
+ }
+ slices.SortFunc(entries, (*kv).cmp)
+
+ nodes := make(map[string]common.Hash)
+ tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+ nodes[string(path)] = hash
+ })
+ for i := 0; i < len(entries); i++ {
+ tr.Update(entries[i].k, entries[i].v)
+ }
+ tr.Hash()
+
+ var cases = []struct {
+ first int
+ last int
+ }{
+ {0, len(entries) - 1}, // full
+ {1, len(entries) - 1}, // no left
+ {10, len(entries) - 1}, // no left
+ {10, len(entries) - 2}, // no left and right
+ {10, len(entries) - 10}, // no left and right
+ {11, 11}, // single
+ {0, 0}, // single first
+ {len(entries) - 1, len(entries) - 1}, // single last
+ }
+ for _, c := range cases {
+ var (
+ db = rawdb.NewMemoryDatabase()
+ batch = db.NewBatch()
+ combined = db.NewBatch()
+ )
+ inner, _, _ := innerNodes(entries[c.first].k, entries[c.last].k, c.first == 0, c.last == len(entries)-1, nodes, t)
+
+ tr := newPathTrie(common.Hash{}, c.first != 0, db, batch)
+ for i := c.first; i <= c.last; i++ {
+ tr.update(entries[i].k, entries[i].v)
+ if rand.Intn(2) == 0 {
+ tr.commit(false)
+
+ batch.Replay(combined)
+ batch.Write()
+ batch.Reset()
+ }
+ }
+ tr.commit(c.last == len(entries)-1)
+
+ batch.Replay(combined)
+ batch.Write()
+ batch.Reset()
+
+ r := newBatchReplay()
+ combined.Replay(r)
+
+ // Ensure all the internal nodes are produced
+ set := r.modifies()
+ for path, hash := range inner {
+ if _, ok := set[path]; !ok {
+ t.Fatalf("Missing nodes %v", []byte(path))
+ }
+ if hash != set[path] {
+ t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
+ }
+ }
+ if r.updates() != len(inner) {
+ t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates())
+ }
+ }
+}
+
+// TestBoundSplit ensures two consecutive trie chunks are not overlapped with
+// each other.
+func TestBoundSplit(t *testing.T) {
+ var entries []*kv
+ for i := 0; i < 1024; i++ {
+ var val []byte
+ if rand.Intn(3) == 0 {
+ val = testrand.Bytes(3)
+ } else {
+ val = testrand.Bytes(32)
+ }
+ entries = append(entries, &kv{
+ k: testrand.Bytes(32),
+ v: val,
+ })
+ }
+ slices.SortFunc(entries, (*kv).cmp)
+
+ for j := 0; j < 100; j++ {
+ var (
+ next int
+ last int
+ db = rawdb.NewMemoryDatabase()
+
+ lastRightRoot []byte
+ )
+ for {
+ if next == len(entries) {
+ break
+ }
+ last = rand.Intn(len(entries)-next) + next
+
+ r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last)
+ set := r.modifies()
+
+ // Skip if the chunk is zero-size
+ if r.updates() == 0 {
+ next = last + 1
+ continue
+ }
+
+ // Ensure the updates in two consecutive chunks are not overlapped.
+ // The only overlapping part should be deletion.
+ if lastRightRoot != nil && len(set) > 0 {
+ // Derive the path of left-most node in this chunk
+ var leftRoot []byte
+ for path, hash := range r.modifies() {
+ if hash == (common.Hash{}) {
+ t.Fatalf("Unexpected deletion %v", []byte(path))
+ }
+ if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 {
+ leftRoot = []byte(path)
+ }
+ }
+ if bytes.HasPrefix(lastRightRoot, leftRoot) || bytes.HasPrefix(leftRoot, lastRightRoot) {
+ t.Fatalf("Two chunks are not correctly separated, lastRight: %v, left: %v", lastRightRoot, leftRoot)
+ }
+ }
+
+ // Track the updates as the last chunk
+ var rightRoot []byte
+ for path := range set {
+ if rightRoot == nil ||
+ (bytes.Compare(rightRoot, []byte(path)) < 0) ||
+ (bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) {
+ rightRoot = []byte(path)
+ }
+ }
+ lastRightRoot = rightRoot
+ next = last + 1
+ }
+ }
+}
+
+// TestTinyPartialTree tests if the partial tree is too tiny(has less than two
+// states), then nothing should be committed.
+func TestTinyPartialTree(t *testing.T) {
+ var entries []*kv
+ for i := 0; i < 1024; i++ {
+ var val []byte
+ if rand.Intn(3) == 0 {
+ val = testrand.Bytes(3)
+ } else {
+ val = testrand.Bytes(32)
+ }
+ entries = append(entries, &kv{
+ k: testrand.Bytes(32),
+ v: val,
+ })
+ }
+ slices.SortFunc(entries, (*kv).cmp)
+
+ for i := 0; i < len(entries); i++ {
+ next := i
+ last := i + 1
+ if last >= len(entries) {
+ last = len(entries) - 1
+ }
+ db := rawdb.NewMemoryDatabase()
+ r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last)
+
+ if next != 0 && last != len(entries)-1 {
+ if r.updates() != 0 {
+ t.Fatalf("Unexpected data writes, got: %d", r.updates())
+ }
+ }
+ }
+}
diff --git a/eth/protocols/snap/metrics.go b/eth/protocols/snap/metrics.go
index 19e9151824..6878e5b280 100644
--- a/eth/protocols/snap/metrics.go
+++ b/eth/protocols/snap/metrics.go
@@ -27,21 +27,28 @@ var (
IngressRegistrationErrorMeter = metrics.NewRegisteredMeter(ingressRegistrationErrorName, nil)
EgressRegistrationErrorMeter = metrics.NewRegisteredMeter(egressRegistrationErrorName, nil)
- // deletionGauge is the metric to track how many trie node deletions
- // are performed in total during the sync process.
- deletionGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete", nil)
+ // accountInnerDeleteGauge is the metric to track how many dangling trie nodes
+ // covered by extension node in account trie are deleted during the sync.
+ accountInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/inner", nil)
+
+ // storageInnerDeleteGauge is the metric to track how many dangling trie nodes
+ // covered by extension node in storage trie are deleted during the sync.
+ storageInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/inner", nil)
+
+ // accountOuterDeleteGauge is the metric to track how many dangling trie nodes
+ // above the committed nodes in account trie are deleted during the sync.
+ accountOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/outer", nil)
+
+ // storageOuterDeleteGauge is the metric to track how many dangling trie nodes
+ // above the committed nodes in storage trie are deleted during the sync.
+ storageOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/outer", nil)
// lookupGauge is the metric to track how many trie node lookups are
// performed to determine if node needs to be deleted.
- lookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/lookup", nil)
-
- // boundaryAccountNodesGauge is the metric to track how many boundary trie
- // nodes in account trie are met.
- boundaryAccountNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/account", nil)
-
- // boundaryAccountNodesGauge is the metric to track how many boundary trie
- // nodes in storage tries are met.
- boundaryStorageNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/storage", nil)
+ accountInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/inner", nil)
+ accountOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/outer", nil)
+ storageInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/inner", nil)
+ storageOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/outer", nil)
// smallStorageGauge is the metric to track how many storages are small enough
// to retrieved in one or two request.
diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go
index d5d6fd6d69..b0ddb8e403 100644
--- a/eth/protocols/snap/sync.go
+++ b/eth/protocols/snap/sync.go
@@ -94,6 +94,9 @@ const (
// trienodeHealThrottleDecrease is the divisor for the throttle when the
// rate of arriving data is lower than the rate of processing it.
trienodeHealThrottleDecrease = 1.25
+
+ // batchSizeThreshold is the maximum size allowed for gentrie batch.
+ batchSizeThreshold = 8 * 1024 * 1024
)
var (
@@ -321,8 +324,8 @@ type accountTask struct {
stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
stateCompleted map[common.Hash]struct{} // Account hashes whose storage have been completed
- genBatch ethdb.Batch // Batch used by the node generator
- genTrie *trie.StackTrie // Node generator from storage slots
+ genBatch ethdb.Batch // Batch used by the node generator
+ genTrie genTrie // Node generator from storage slots
done bool // Flag whether the task can be removed
}
@@ -360,8 +363,8 @@ type storageTask struct {
root common.Hash // Storage root hash for this instance
req *storageRequest // Pending request to fill this task
- genBatch ethdb.Batch // Batch used by the node generator
- genTrie *trie.StackTrie // Node generator from storage slots
+ genBatch ethdb.Batch // Batch used by the node generator
+ genTrie genTrie // Node generator from storage slots
done bool // Flag whether the task can be removed
}
@@ -749,19 +752,6 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
}
}
-// cleanPath is used to remove the dangling nodes in the stackTrie.
-func (s *Syncer) cleanPath(batch ethdb.Batch, owner common.Hash, path []byte) {
- if owner == (common.Hash{}) && rawdb.ExistsAccountTrieNode(s.db, path) {
- rawdb.DeleteAccountTrieNode(batch, path)
- deletionGauge.Inc(1)
- }
- if owner != (common.Hash{}) && rawdb.ExistsStorageTrieNode(s.db, owner, path) {
- rawdb.DeleteStorageTrieNode(batch, owner, path)
- deletionGauge.Inc(1)
- }
- lookupGauge.Inc(1)
-}
-
// loadSyncStatus retrieves a previously aborted sync status from the database,
// or generates a fresh one if none is available.
func (s *Syncer) loadSyncStatus() {
@@ -792,23 +782,12 @@ func (s *Syncer) loadSyncStatus() {
s.accountBytes += common.StorageSize(len(key) + len(value))
},
}
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(task.genBatch, common.Hash{}, path, hash, blob, s.scheme)
- })
- if s.scheme == rawdb.PathScheme {
- // Configure the dangling node cleaner and also filter out boundary nodes
- // only in the context of the path scheme. Deletion is forbidden in the
- // hash scheme, as it can disrupt state completeness.
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(task.genBatch, common.Hash{}, path)
- })
- // Skip the left boundary if it's not the first range.
- // Skip the right boundary if it's not the last range.
- options = options.WithSkipBoundary(task.Next != (common.Hash{}), task.Last != common.MaxHash, boundaryAccountNodesGauge)
+ if s.scheme == rawdb.HashScheme {
+ task.genTrie = newHashTrie(task.genBatch)
+ }
+ if s.scheme == rawdb.PathScheme {
+ task.genTrie = newPathTrie(common.Hash{}, task.Next != common.Hash{}, s.db, task.genBatch)
}
- task.genTrie = trie.NewStackTrie(options)
-
// Restore leftover storage tasks
for accountHash, subtasks := range task.SubTasks {
for _, subtask := range subtasks {
@@ -820,23 +799,12 @@ func (s *Syncer) loadSyncStatus() {
s.storageBytes += common.StorageSize(len(key) + len(value))
},
}
- owner := accountHash // local assignment for stacktrie writer closure
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(subtask.genBatch, owner, path, hash, blob, s.scheme)
- })
- if s.scheme == rawdb.PathScheme {
- // Configure the dangling node cleaner and also filter out boundary nodes
- // only in the context of the path scheme. Deletion is forbidden in the
- // hash scheme, as it can disrupt state completeness.
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(subtask.genBatch, owner, path)
- })
- // Skip the left boundary if it's not the first range.
- // Skip the right boundary if it's not the last range.
- options = options.WithSkipBoundary(subtask.Next != common.Hash{}, subtask.Last != common.MaxHash, boundaryStorageNodesGauge)
+ if s.scheme == rawdb.HashScheme {
+ subtask.genTrie = newHashTrie(subtask.genBatch)
+ }
+ if s.scheme == rawdb.PathScheme {
+ subtask.genTrie = newPathTrie(accountHash, subtask.Next != common.Hash{}, s.db, subtask.genBatch)
}
- subtask.genTrie = trie.NewStackTrie(options)
}
}
}
@@ -888,20 +856,12 @@ func (s *Syncer) loadSyncStatus() {
s.accountBytes += common.StorageSize(len(key) + len(value))
},
}
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(batch, common.Hash{}, path, hash, blob, s.scheme)
- })
+ var tr genTrie
+ if s.scheme == rawdb.HashScheme {
+ tr = newHashTrie(batch)
+ }
if s.scheme == rawdb.PathScheme {
- // Configure the dangling node cleaner and also filter out boundary nodes
- // only in the context of the path scheme. Deletion is forbidden in the
- // hash scheme, as it can disrupt state completeness.
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(batch, common.Hash{}, path)
- })
- // Skip the left boundary if it's not the first range.
- // Skip the right boundary if it's not the last range.
- options = options.WithSkipBoundary(next != common.Hash{}, last != common.MaxHash, boundaryAccountNodesGauge)
+ tr = newPathTrie(common.Hash{}, next != common.Hash{}, s.db, batch)
}
s.tasks = append(s.tasks, &accountTask{
Next: next,
@@ -909,7 +869,7 @@ func (s *Syncer) loadSyncStatus() {
SubTasks: make(map[common.Hash][]*storageTask),
genBatch: batch,
stateCompleted: make(map[common.Hash]struct{}),
- genTrie: trie.NewStackTrie(options),
+ genTrie: tr,
})
log.Debug("Created account sync task", "from", next, "last", last)
next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
@@ -920,11 +880,18 @@ func (s *Syncer) loadSyncStatus() {
func (s *Syncer) saveSyncStatus() {
// Serialize any partial progress to disk before spinning down
for _, task := range s.tasks {
+ // Claim the right boundary as incomplete before flushing the
+ // accumulated nodes in batch, the nodes on right boundary
+ // will be discarded and cleaned up by this call.
+ task.genTrie.commit(false)
if err := task.genBatch.Write(); err != nil {
log.Error("Failed to persist account slots", "err", err)
}
for _, subtasks := range task.SubTasks {
for _, subtask := range subtasks {
+ // Same for account trie, discard and cleanup the
+ // incomplete right boundary.
+ subtask.genTrie.commit(false)
if err := subtask.genBatch.Write(); err != nil {
log.Error("Failed to persist storage slots", "err", err)
}
@@ -2155,25 +2122,20 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
s.storageBytes += common.StorageSize(len(key) + len(value))
},
}
- owner := account // local assignment for stacktrie writer closure
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme)
- })
+ var tr genTrie
+ if s.scheme == rawdb.HashScheme {
+ tr = newHashTrie(batch)
+ }
if s.scheme == rawdb.PathScheme {
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(batch, owner, path)
- })
// Keep the left boundary as it's the first range.
- // Skip the right boundary if it's not the last range.
- options = options.WithSkipBoundary(false, r.End() != common.MaxHash, boundaryStorageNodesGauge)
+ tr = newPathTrie(account, false, s.db, batch)
}
tasks = append(tasks, &storageTask{
Next: common.Hash{},
Last: r.End(),
root: acc.Root,
genBatch: batch,
- genTrie: trie.NewStackTrie(options),
+ genTrie: tr,
})
for r.Next() {
batch := ethdb.HookedBatch{
@@ -2182,27 +2144,19 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
s.storageBytes += common.StorageSize(len(key) + len(value))
},
}
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme)
- })
+ var tr genTrie
+ if s.scheme == rawdb.HashScheme {
+ tr = newHashTrie(batch)
+ }
if s.scheme == rawdb.PathScheme {
- // Configure the dangling node cleaner and also filter out boundary nodes
- // only in the context of the path scheme. Deletion is forbidden in the
- // hash scheme, as it can disrupt state completeness.
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(batch, owner, path)
- })
- // Skip the left boundary as it's not the first range
- // Skip the right boundary if it's not the last range.
- options = options.WithSkipBoundary(true, r.End() != common.MaxHash, boundaryStorageNodesGauge)
+ tr = newPathTrie(account, true, s.db, batch)
}
tasks = append(tasks, &storageTask{
Next: r.Start(),
Last: r.End(),
root: acc.Root,
genBatch: batch,
- genTrie: trie.NewStackTrie(options),
+ genTrie: tr,
})
}
for _, task := range tasks {
@@ -2248,26 +2202,18 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
if i < len(res.hashes)-1 || res.subTask == nil {
// no need to make local reassignment of account: this closure does not outlive the loop
- options := trie.NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- rawdb.WriteTrieNode(batch, account, path, hash, blob, s.scheme)
- })
+ var tr genTrie
+ if s.scheme == rawdb.HashScheme {
+ tr = newHashTrie(batch)
+ }
if s.scheme == rawdb.PathScheme {
- // Configure the dangling node cleaner only in the context of the
- // path scheme. Deletion is forbidden in the hash scheme, as it can
- // disrupt state completeness.
- //
- // Notably, boundary nodes can be also kept because the whole storage
- // trie is complete.
- options = options.WithCleaner(func(path []byte) {
- s.cleanPath(batch, account, path)
- })
+ // Keep the left boundary as it's complete
+ tr = newPathTrie(account, false, s.db, batch)
}
- tr := trie.NewStackTrie(options)
for j := 0; j < len(res.hashes[i]); j++ {
- tr.Update(res.hashes[i][j][:], res.slots[i][j])
+ tr.update(res.hashes[i][j][:], res.slots[i][j])
}
- tr.Commit()
+ tr.commit(true)
}
// Persist the received storage segments. These flat state maybe
// outdated during the sync, but it can be fixed later during the
@@ -2278,14 +2224,14 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
// If we're storing large contracts, generate the trie nodes
// on the fly to not trash the gluing points
if i == len(res.hashes)-1 && res.subTask != nil {
- res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j])
+ res.subTask.genTrie.update(res.hashes[i][j][:], res.slots[i][j])
}
}
}
// Large contracts could have generated new trie nodes, flush them to disk
if res.subTask != nil {
if res.subTask.done {
- root := res.subTask.genTrie.Commit()
+ root := res.subTask.genTrie.commit(res.subTask.Last == common.MaxHash)
if err := res.subTask.genBatch.Write(); err != nil {
log.Error("Failed to persist stack slots", "err", err)
}
@@ -2302,8 +2248,8 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
}
}
}
- }
- if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize {
+ } else if res.subTask.genBatch.ValueSize() > batchSizeThreshold {
+ res.subTask.genTrie.commit(false)
if err := res.subTask.genBatch.Write(); err != nil {
log.Error("Failed to persist stack slots", "err", err)
}
@@ -2486,7 +2432,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
if err != nil {
panic(err) // Really shouldn't ever happen
}
- task.genTrie.Update(hash[:], full)
+ task.genTrie.update(hash[:], full)
}
}
// Flush anything written just now and update the stats
@@ -2519,9 +2465,13 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
// flush after finalizing task.done. It's fine even if we crash and lose this
// write as it will only cause more data to be downloaded during heal.
if task.done {
- task.genTrie.Commit()
- }
- if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done {
+ task.genTrie.commit(task.Last == common.MaxHash)
+ if err := task.genBatch.Write(); err != nil {
+ log.Error("Failed to persist stack account", "err", err)
+ }
+ task.genBatch.Reset()
+ } else if task.genBatch.ValueSize() > batchSizeThreshold {
+ task.genTrie.commit(false)
if err := task.genBatch.Write(); err != nil {
log.Error("Failed to persist stack account", "err", err)
}
diff --git a/trie/stacktrie.go b/trie/stacktrie.go
index f2f5355c49..9c574db0bf 100644
--- a/trie/stacktrie.go
+++ b/trie/stacktrie.go
@@ -23,8 +23,6 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
- "github.com/ethereum/go-ethereum/log"
- "github.com/ethereum/go-ethereum/metrics"
)
var (
@@ -32,62 +30,32 @@ var (
_ = types.TrieHasher((*StackTrie)(nil))
)
-// StackTrieOptions contains the configured options for manipulating the stackTrie.
-type StackTrieOptions struct {
- Writer func(path []byte, hash common.Hash, blob []byte) // The function to commit the dirty nodes
- Cleaner func(path []byte) // The function to clean up dangling nodes
-
- SkipLeftBoundary bool // Flag whether the nodes on the left boundary are skipped for committing
- SkipRightBoundary bool // Flag whether the nodes on the right boundary are skipped for committing
- boundaryGauge metrics.Gauge // Gauge to track how many boundary nodes are met
-}
-
-// NewStackTrieOptions initializes an empty options for stackTrie.
-func NewStackTrieOptions() *StackTrieOptions { return &StackTrieOptions{} }
-
-// WithWriter configures trie node writer within the options.
-func (o *StackTrieOptions) WithWriter(writer func(path []byte, hash common.Hash, blob []byte)) *StackTrieOptions {
- o.Writer = writer
- return o
-}
-
-// WithCleaner configures the cleaner in the option for removing dangling nodes.
-func (o *StackTrieOptions) WithCleaner(cleaner func(path []byte)) *StackTrieOptions {
- o.Cleaner = cleaner
- return o
-}
-
-// WithSkipBoundary configures whether the left and right boundary nodes are
-// filtered for committing, along with a gauge metrics to track how many
-// boundary nodes are met.
-func (o *StackTrieOptions) WithSkipBoundary(skipLeft, skipRight bool, gauge metrics.Gauge) *StackTrieOptions {
- o.SkipLeftBoundary = skipLeft
- o.SkipRightBoundary = skipRight
- o.boundaryGauge = gauge
- return o
-}
+// OnTrieNode is a callback method invoked when a trie node is committed
+// by the stack trie. The node is only committed if it's considered complete.
+//
+// The caller should not modify the contents of the returned path and blob
+// slice, and their contents may be changed after the call. It is up to the
+// `onTrieNode` receiver function to deep-copy the data if it wants to retain
+// it after the call ends.
+type OnTrieNode func(path []byte, hash common.Hash, blob []byte)
// StackTrie is a trie implementation that expects keys to be inserted
// in order. Once it determines that a subtree will no longer be inserted
// into, it will hash it and free up the memory it uses.
type StackTrie struct {
- options *StackTrieOptions
- root *stNode
- h *hasher
-
- first []byte // The (hex-encoded without terminator) key of first inserted entry, tracked as left boundary.
- last []byte // The (hex-encoded without terminator) key of last inserted entry, tracked as right boundary.
+ root *stNode
+ h *hasher
+ last []byte
+ onTrieNode OnTrieNode
}
-// NewStackTrie allocates and initializes an empty trie.
-func NewStackTrie(options *StackTrieOptions) *StackTrie {
- if options == nil {
- options = NewStackTrieOptions()
- }
+// NewStackTrie allocates and initializes an empty trie. The committed nodes
+// will be discarded immediately if no callback is configured.
+func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
return &StackTrie{
- options: options,
- root: stPool.Get().(*stNode),
- h: newHasher(false),
+ root: stPool.Get().(*stNode),
+ h: newHasher(false),
+ onTrieNode: onTrieNode,
}
}
@@ -101,10 +69,6 @@ func (t *StackTrie) Update(key, value []byte) error {
if bytes.Compare(t.last, k) >= 0 {
return errors.New("non-ascending key order")
}
- // track the first and last inserted entries.
- if t.first == nil {
- t.first = append([]byte{}, k...)
- }
if t.last == nil {
t.last = append([]byte{}, k...) // allocate key slice
} else {
@@ -114,19 +78,9 @@ func (t *StackTrie) Update(key, value []byte) error {
return nil
}
-// MustUpdate is a wrapper of Update and will omit any encountered error but
-// just print out an error message.
-func (t *StackTrie) MustUpdate(key, value []byte) {
- if err := t.Update(key, value); err != nil {
- log.Error("Unhandled trie error in StackTrie.Update", "err", err)
- }
-}
-
// Reset resets the stack trie object to empty state.
func (t *StackTrie) Reset() {
- t.options = NewStackTrieOptions()
t.root = stPool.Get().(*stNode)
- t.first = nil
t.last = nil
}
@@ -346,10 +300,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
//
// This method also sets 'st.type' to hashedNode, and clears 'st.key'.
func (t *StackTrie) hash(st *stNode, path []byte) {
- var (
- blob []byte // RLP-encoded node blob
- internal [][]byte // List of node paths covered by the extension node
- )
+ var blob []byte // RLP-encoded node blob
switch st.typ {
case hashedNode:
return
@@ -384,15 +335,6 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
// recursively hash and commit child as the first step
t.hash(st.children[0], append(path, st.key...))
- // Collect the path of internal nodes between shortNode and its **in disk**
- // child. This is essential in the case of path mode scheme to avoid leaving
- // danging nodes within the range of this internal path on disk, which would
- // break the guarantee for state healing.
- if len(st.children[0].val) >= 32 && t.options.Cleaner != nil {
- for i := 1; i < len(st.key); i++ {
- internal = append(internal, append(path, st.key[:i]...))
- }
- }
// encode the extension node
n := shortNode{Key: hexToCompactInPlace(st.key)}
if len(st.children[0].val) < 32 {
@@ -416,11 +358,12 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
default:
panic("invalid node type")
}
-
+ // Convert the node type to hashNode and reset the key slice.
st.typ = hashedNode
st.key = st.key[:0]
- // Skip committing the non-root node if the size is smaller than 32 bytes.
+ // Skip committing the non-root node if the size is smaller than 32 bytes
+ // as tiny nodes are always embedded in their parent except root node.
if len(blob) < 32 && len(path) > 0 {
st.val = common.CopyBytes(blob)
return
@@ -429,51 +372,20 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
// input values.
st.val = t.h.hashData(blob)
- // Short circuit if the stack trie is not configured for writing.
- if t.options.Writer == nil {
- return
+ // Invoke the callback it's provided. Notably, the path and blob slices are
+ // volatile, please deep-copy the slices in callback if the contents need
+ // to be retained.
+ if t.onTrieNode != nil {
+ t.onTrieNode(path, common.BytesToHash(st.val), blob)
}
- // Skip committing if the node is on the left boundary and stackTrie is
- // configured to filter the boundary.
- if t.options.SkipLeftBoundary && bytes.HasPrefix(t.first, path) {
- if t.options.boundaryGauge != nil {
- t.options.boundaryGauge.Inc(1)
- }
- return
- }
- // Skip committing if the node is on the right boundary and stackTrie is
- // configured to filter the boundary.
- if t.options.SkipRightBoundary && bytes.HasPrefix(t.last, path) {
- if t.options.boundaryGauge != nil {
- t.options.boundaryGauge.Inc(1)
- }
- return
- }
- // Clean up the internal dangling nodes covered by the extension node.
- // This should be done before writing the node to adhere to the committing
- // order from bottom to top.
- for _, path := range internal {
- t.options.Cleaner(path)
- }
- t.options.Writer(path, common.BytesToHash(st.val), blob)
}
// Hash will firstly hash the entire trie if it's still not hashed and then commit
-// all nodes to the associated database. Actually most of the trie nodes have been
-// committed already. The main purpose here is to commit the nodes on right boundary.
-//
-// For stack trie, Hash and Commit are functionally identical.
+// all leftover nodes to the associated database. Actually most of the trie nodes
+// have been committed already. The main purpose here is to commit the nodes on
+// right boundary.
func (t *StackTrie) Hash() common.Hash {
n := t.root
t.hash(n, nil)
return common.BytesToHash(n.val)
}
-
-// Commit will firstly hash the entire trie if it's still not hashed and then commit
-// all nodes to the associated database. Actually most of the trie nodes have been
-// committed already. The main purpose here is to commit the nodes on right boundary.
-//
-// For stack trie, Hash and Commit are functionally identical.
-func (t *StackTrie) Commit() common.Hash {
- return t.Hash()
-}
diff --git a/trie/stacktrie_fuzzer_test.go b/trie/stacktrie_fuzzer_test.go
index 57a31d115f..5126e0bd07 100644
--- a/trie/stacktrie_fuzzer_test.go
+++ b/trie/stacktrie_fuzzer_test.go
@@ -46,11 +46,9 @@ func fuzz(data []byte, debugging bool) {
trieA = NewEmpty(dbA)
spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
dbB = newTestDatabase(rawdb.NewDatabase(spongeB), rawdb.HashScheme)
-
- options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ trieB = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
rawdb.WriteTrieNode(spongeB, common.Hash{}, path, hash, blob, dbB.Scheme())
})
- trieB = NewStackTrie(options)
vals []*kv
maxElements = 10000
// operate on unique keys only
@@ -99,10 +97,9 @@ func fuzz(data []byte, debugging bool) {
if debugging {
fmt.Printf("{\"%#x\" , \"%#x\"} // stacktrie.Update\n", kv.k, kv.v)
}
- trieB.MustUpdate(kv.k, kv.v)
+ trieB.Update(kv.k, kv.v)
}
rootB := trieB.Hash()
- trieB.Commit()
if rootA != rootB {
panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB))
}
@@ -114,20 +111,19 @@ func fuzz(data []byte, debugging bool) {
// Ensure all the nodes are persisted correctly
var (
- nodeset = make(map[string][]byte) // path -> blob
- optionsC = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ nodeset = make(map[string][]byte) // path -> blob
+ trieC = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
if crypto.Keccak256Hash(blob) != hash {
panic("invalid node blob")
}
nodeset[string(path)] = common.CopyBytes(blob)
})
- trieC = NewStackTrie(optionsC)
checked int
)
for _, kv := range vals {
- trieC.MustUpdate(kv.k, kv.v)
+ trieC.Update(kv.k, kv.v)
}
- rootC := trieC.Commit()
+ rootC := trieC.Hash()
if rootA != rootC {
panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootC))
}
diff --git a/trie/stacktrie_test.go b/trie/stacktrie_test.go
index 58115bc33a..f053b5112d 100644
--- a/trie/stacktrie_test.go
+++ b/trie/stacktrie_test.go
@@ -19,14 +19,11 @@ package trie
import (
"bytes"
"math/big"
- "math/rand"
- "slices"
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/crypto"
- "github.com/ethereum/go-ethereum/internal/testrand"
"github.com/stretchr/testify/assert"
)
@@ -381,90 +378,6 @@ func TestStacktrieNotModifyValues(t *testing.T) {
}
}
-func buildPartialTree(entries []*kv, t *testing.T) map[string]common.Hash {
- var (
- options = NewStackTrieOptions()
- nodes = make(map[string]common.Hash)
- )
- var (
- first int
- last = len(entries) - 1
-
- noLeft bool
- noRight bool
- )
- // Enter split mode if there are at least two elements
- if rand.Intn(5) != 0 {
- for {
- first = rand.Intn(len(entries))
- last = rand.Intn(len(entries))
- if first <= last {
- break
- }
- }
- if first != 0 {
- noLeft = true
- }
- if last != len(entries)-1 {
- noRight = true
- }
- }
- options = options.WithSkipBoundary(noLeft, noRight, nil)
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- nodes[string(path)] = hash
- })
- tr := NewStackTrie(options)
-
- for i := first; i <= last; i++ {
- tr.MustUpdate(entries[i].k, entries[i].v)
- }
- tr.Commit()
- return nodes
-}
-
-func TestPartialStackTrie(t *testing.T) {
- for round := 0; round < 100; round++ {
- var (
- n = rand.Intn(100) + 1
- entries []*kv
- )
- for i := 0; i < n; i++ {
- var val []byte
- if rand.Intn(3) == 0 {
- val = testrand.Bytes(3)
- } else {
- val = testrand.Bytes(32)
- }
- entries = append(entries, &kv{
- k: testrand.Bytes(32),
- v: val,
- })
- }
- slices.SortFunc(entries, (*kv).cmp)
-
- var (
- nodes = make(map[string]common.Hash)
- options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
- nodes[string(path)] = hash
- })
- )
- tr := NewStackTrie(options)
-
- for i := 0; i < len(entries); i++ {
- tr.MustUpdate(entries[i].k, entries[i].v)
- }
- tr.Commit()
-
- for j := 0; j < 100; j++ {
- for path, hash := range buildPartialTree(entries, t) {
- if nodes[path] != hash {
- t.Errorf("%v, want %x, got %x", []byte(path), nodes[path], hash)
- }
- }
- }
- }
-}
-
func TestStackTrieErrors(t *testing.T) {
s := NewStackTrie(nil)
// Deletion
diff --git a/trie/trie_test.go b/trie/trie_test.go
index 87a0785cfb..6ecd20c218 100644
--- a/trie/trie_test.go
+++ b/trie/trie_test.go
@@ -963,11 +963,9 @@ func TestCommitSequenceStackTrie(t *testing.T) {
id: "b",
values: make(map[string]string),
}
- options := NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme())
})
- stTrie := NewStackTrie(options)
// Fill the trie with elements
for i := 0; i < count; i++ {
@@ -993,7 +991,7 @@ func TestCommitSequenceStackTrie(t *testing.T) {
s.Flush()
// And flush stacktrie -> disk
- stRoot := stTrie.Commit()
+ stRoot := stTrie.Hash()
if stRoot != root {
t.Fatalf("root wrong, got %x exp %x", stRoot, root)
}
@@ -1034,12 +1032,9 @@ func TestCommitSequenceSmallRoot(t *testing.T) {
id: "b",
values: make(map[string]string),
}
- options := NewStackTrieOptions()
- options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+ stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme())
})
- stTrie := NewStackTrie(options)
-
// Add a single small-element to the trie(s)
key := make([]byte, 5)
key[0] = 1
@@ -1053,7 +1048,7 @@ func TestCommitSequenceSmallRoot(t *testing.T) {
db.Commit(root)
// And flush stacktrie -> disk
- stRoot := stTrie.Commit()
+ stRoot := stTrie.Hash()
if stRoot != root {
t.Fatalf("root wrong, got %x exp %x", stRoot, root)
}