// Copyright 2024 The go-ethereum Authors // This file is part of the go-ethereum library. // // The go-ethereum library is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // The go-ethereum library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . package snap import ( "bytes" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/trie" ) // genTrie interface is used by the snap syncer to generate merkle tree nodes // based on a received batch of states. type genTrie interface { // update inserts the state item into generator trie. update(key, value []byte) error // delete removes the state item from the generator trie. delete(key []byte) error // commit flushes the right boundary nodes if complete flag is true. This // function must be called before flushing the associated database batch. commit(complete bool) common.Hash } // pathTrie is a wrapper over the stackTrie, incorporating numerous additional // logics to handle the semi-completed trie and potential leftover dangling // nodes in the database. It is utilized for constructing the merkle tree nodes // in path mode during the snap sync process. type pathTrie struct { owner common.Hash // identifier of trie owner, empty for account trie tr *trie.StackTrie // underlying raw stack trie first []byte // the path of first committed node by stackTrie last []byte // the path of last committed node by stackTrie // This flag indicates whether nodes on the left boundary are skipped for // committing. If set, the left boundary nodes are considered incomplete // due to potentially missing left children. skipLeftBoundary bool db ethdb.KeyValueReader batch ethdb.Batch } // newPathTrie initializes the path trie. func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie { tr := &pathTrie{ owner: owner, skipLeftBoundary: skipLeftBoundary, db: db, batch: batch, } tr.tr = trie.NewStackTrie(tr.onTrieNode) return tr } // onTrieNode is invoked whenever a new node is committed by the stackTrie. // // As the committed nodes might be incomplete if they are on the boundaries // (left or right), this function has the ability to detect the incomplete // ones and filter them out for committing. // // Additionally, the assumption is made that there may exist leftover dangling // nodes in the database. This function has the ability to detect the dangling // nodes that fall within the path space of committed nodes (specifically on // the path covered by internal extension nodes) and remove them from the // database. This property ensures that the entire path space is uniquely // occupied by committed nodes. // // Furthermore, all leftover dangling nodes along the path from committed nodes // to the trie root (left and right boundaries) should be removed as well; // otherwise, they might potentially disrupt the state healing process. func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) { // Filter out the nodes on the left boundary if skipLeftBoundary is // configured. Nodes are considered to be on the left boundary if // it's the first one to be committed, or the parent/ancestor of the // first committed node. if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) { if t.first == nil { // Memorize the path of first committed node, which is regarded // as left boundary. Deep-copy is necessary as the path given // is volatile. t.first = append([]byte{}, path...) // The left boundary can be uniquely determined by the first committed node // from stackTrie (e.g., N_1), as the shared path prefix between the first // two inserted state items is deterministic (the path of N_3). The path // from trie root towards the first committed node is considered the left // boundary. The potential leftover dangling nodes on left boundary should // be cleaned out. // // +-----+ // | N_3 | shared path prefix of state_1 and state_2 // +-----+ // /- -\ // +-----+ +-----+ // First committed node | N_1 | | N_2 | latest inserted node (contain state_2) // +-----+ +-----+ // // The node with the path of the first committed one (e.g, N_1) is not // removed because it's a sibling of the nodes we want to commit, not // the parent or ancestor. for i := 0; i < len(path); i++ { t.deleteNode(path[:i], false) } } return } // If boundary filtering is not configured, or the node is not on the left // boundary, commit it to database. // // Note: If the current committed node is an extension node, then the nodes // falling within the path between itself and its standalone (not embedded // in parent) child should be cleaned out for exclusively occupy the inner // path. // // This is essential in snap sync to avoid leaving dangling nodes within // this range covered by extension node which could potentially break the // state healing. // // The extension node is detected if its path is the prefix of last committed // one and path gap is larger than one. If the path gap is only one byte, // the current node could either be a full node, or an extension with single // byte key. In either case, no gaps will be left in the path. if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 { for i := len(path) + 1; i < len(t.last); i++ { t.deleteNode(t.last[:i], true) } } t.write(path, blob) // Update the last flag. Deep-copy is necessary as the provided path is volatile. if t.last == nil { t.last = append([]byte{}, path...) } else { t.last = append(t.last[:0], path...) } } // write commits the node write to provided database batch in path mode. func (t *pathTrie) write(path []byte, blob []byte) { if t.owner == (common.Hash{}) { rawdb.WriteAccountTrieNode(t.batch, path, blob) } else { rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob) } } func (t *pathTrie) deleteAccountNode(path []byte, inner bool) { if inner { accountInnerLookupGauge.Inc(1) } else { accountOuterLookupGauge.Inc(1) } if !rawdb.HasAccountTrieNode(t.db, path) { return } if inner { accountInnerDeleteGauge.Inc(1) } else { accountOuterDeleteGauge.Inc(1) } rawdb.DeleteAccountTrieNode(t.batch, path) } func (t *pathTrie) deleteStorageNode(path []byte, inner bool) { if inner { storageInnerLookupGauge.Inc(1) } else { storageOuterLookupGauge.Inc(1) } if !rawdb.HasStorageTrieNode(t.db, t.owner, path) { return } if inner { storageInnerDeleteGauge.Inc(1) } else { storageOuterDeleteGauge.Inc(1) } rawdb.DeleteStorageTrieNode(t.batch, t.owner, path) } // deleteNode commits the node deletion to provided database batch in path mode. func (t *pathTrie) deleteNode(path []byte, inner bool) { if t.owner == (common.Hash{}) { t.deleteAccountNode(path, inner) } else { t.deleteStorageNode(path, inner) } } // update implements genTrie interface, inserting a (key, value) pair into the // stack trie. func (t *pathTrie) update(key, value []byte) error { return t.tr.Update(key, value) } // delete implements genTrie interface, deleting the item from the stack trie. func (t *pathTrie) delete(key []byte) error { // Commit the trie since the right boundary is incomplete because // of the deleted item. This will implicitly discard the last inserted // item and clean some ancestor trie nodes of the last committed // item in the database. t.commit(false) // Reset the trie and all the internal trackers t.first = nil t.last = nil t.tr.Reset() // Explicitly mark the left boundary as incomplete, as the left-side // item of the next one has been deleted. Be aware that the next item // to be inserted will be ignored from committing as well as it's on // the left boundary. t.skipLeftBoundary = true // Explicitly delete the potential leftover nodes on the specific // path from the database. tkey := t.tr.TrieKey(key) for i := 0; i <= len(tkey); i++ { t.deleteNode(tkey[:i], false) } return nil } // commit implements genTrie interface, flushing the right boundary if it's // considered as complete. Otherwise, the nodes on the right boundary are // discarded and cleaned up. // // Note, this function must be called before flushing database batch, otherwise, // dangling nodes might be left in database. func (t *pathTrie) commit(complete bool) common.Hash { // If the right boundary is claimed as complete, flush them out. // The nodes on both left and right boundary will still be filtered // out if left boundary filtering is configured. if complete { // Commit all inserted but not yet committed nodes(on the right // boundary) in the stackTrie. hash := t.tr.Hash() if t.skipLeftBoundary { return common.Hash{} // hash is meaningless if left side is incomplete } return hash } // Discard nodes on the right boundary as it's claimed as incomplete. These // nodes might be incomplete due to missing children on the right side. // Furthermore, the potential leftover nodes on right boundary should also // be cleaned out. // // The right boundary can be uniquely determined by the last committed node // from stackTrie (e.g., N_1), as the shared path prefix between the last // two inserted state items is deterministic (the path of N_3). The path // from trie root towards the last committed node is considered the right // boundary (root to N_3). // // +-----+ // | N_3 | shared path prefix of last two states // +-----+ // /- -\ // +-----+ +-----+ // Last committed node | N_1 | | N_2 | latest inserted node (contain last state) // +-----+ +-----+ // // Another interesting scenario occurs when the trie is committed due to // too many items being accumulated in the batch. To flush them out to // the database, the path of the last inserted node (N_2) is temporarily // treated as an incomplete right boundary, and nodes on this path are // removed (e.g. from root to N_3). // However, this path will be reclaimed as an internal path by inserting // more items after the batch flush. New nodes on this path can be committed // with no issues as they are actually complete. Also, from a database // perspective, first deleting and then rewriting is a valid data update. for i := 0; i < len(t.last); i++ { t.deleteNode(t.last[:i], false) } return common.Hash{} // the hash is meaningless for incomplete commit } // hashTrie is a wrapper over the stackTrie for implementing genTrie interface. type hashTrie struct { tr *trie.StackTrie } // newHashTrie initializes the hash trie. func newHashTrie(batch ethdb.Batch) *hashTrie { return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteLegacyTrieNode(batch, hash, blob) })} } // update implements genTrie interface, inserting a (key, value) pair into // the stack trie. func (t *hashTrie) update(key, value []byte) error { return t.tr.Update(key, value) } // delete implements genTrie interface, ignoring the state item for deleting. func (t *hashTrie) delete(key []byte) error { return nil } // commit implements genTrie interface, committing the nodes on right boundary. func (t *hashTrie) commit(complete bool) common.Hash { if !complete { return common.Hash{} // the hash is meaningless for incomplete commit } return t.tr.Hash() // return hash only if it's claimed as complete }