eth/protocols/snap: cleanup dangling account trie nodes due to incomplete storage (#30258)

This pull request fixes #30229.
 
During snap sync, large storage will be split into several pieces and
synchronized concurrently. Unfortunately, the tradeoff is that the respective
merkle trie of each storage chunk will be incomplete due to the incomplete
boundaries. The trie nodes on these boundaries will be discarded, and any
dangling nodes on disk will also be removed if they fall on these paths,
ensuring the state healer won't be blocked.

However, the dangling account trie nodes on the path from the root to the
associated account are left untouched. This means the dangling account trie
nodes could potentially stop the state healing and break the assumption that the
entire subtrie should exist if the subtrie root exists. We should consider the
account trie node as the ancestor of the corresponding storage trie node.

In the scenarios described in the above ticket, the state corruption could occur
if there is a dangling account trie node while some storage trie nodes are
removed due to synchronization redo.

The fixing idea is pretty straightforward, the trie nodes on the path from root
to account should all be explicitly removed if an incomplete storage trie
occurs. Therefore, a `delete` operation has been added into `gentrie` to
explicitly clear the account along with all nodes on this path. The special
thing is that it's a cross-trie clearing. In theory, there may be a dangling
node at any position on this account key and we have to clear all of them.
This commit is contained in:
rjl493456442 2024-08-12 16:43:54 +08:00 committed by GitHub
parent 33a13b6f21
commit 5adf4adc8e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 198 additions and 9 deletions

@ -31,6 +31,9 @@ type genTrie interface {
// update inserts the state item into generator trie. // update inserts the state item into generator trie.
update(key, value []byte) error update(key, value []byte) error
// delete removes the state item from the generator trie.
delete(key []byte) error
// commit flushes the right boundary nodes if complete flag is true. This // commit flushes the right boundary nodes if complete flag is true. This
// function must be called before flushing the associated database batch. // function must be called before flushing the associated database batch.
commit(complete bool) common.Hash commit(complete bool) common.Hash
@ -113,7 +116,7 @@ func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
// removed because it's a sibling of the nodes we want to commit, not // removed because it's a sibling of the nodes we want to commit, not
// the parent or ancestor. // the parent or ancestor.
for i := 0; i < len(path); i++ { for i := 0; i < len(path); i++ {
t.delete(path[:i], false) t.deleteNode(path[:i], false)
} }
} }
return return
@ -136,7 +139,7 @@ func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
// byte key. In either case, no gaps will be left in the path. // byte key. In either case, no gaps will be left in the path.
if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 { if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 {
for i := len(path) + 1; i < len(t.last); i++ { for i := len(path) + 1; i < len(t.last); i++ {
t.delete(t.last[:i], true) t.deleteNode(t.last[:i], true)
} }
} }
t.write(path, blob) t.write(path, blob)
@ -192,8 +195,8 @@ func (t *pathTrie) deleteStorageNode(path []byte, inner bool) {
rawdb.DeleteStorageTrieNode(t.batch, t.owner, path) rawdb.DeleteStorageTrieNode(t.batch, t.owner, path)
} }
// delete commits the node deletion to provided database batch in path mode. // deleteNode commits the node deletion to provided database batch in path mode.
func (t *pathTrie) delete(path []byte, inner bool) { func (t *pathTrie) deleteNode(path []byte, inner bool) {
if t.owner == (common.Hash{}) { if t.owner == (common.Hash{}) {
t.deleteAccountNode(path, inner) t.deleteAccountNode(path, inner)
} else { } else {
@ -207,6 +210,34 @@ func (t *pathTrie) update(key, value []byte) error {
return t.tr.Update(key, value) return t.tr.Update(key, value)
} }
// delete implements genTrie interface, deleting the item from the stack trie.
func (t *pathTrie) delete(key []byte) error {
// Commit the trie since the right boundary is incomplete because
// of the deleted item. This will implicitly discard the last inserted
// item and clean some ancestor trie nodes of the last committed
// item in the database.
t.commit(false)
// Reset the trie and all the internal trackers
t.first = nil
t.last = nil
t.tr.Reset()
// Explicitly mark the left boundary as incomplete, as the left-side
// item of the next one has been deleted. Be aware that the next item
// to be inserted will be ignored from committing as well as it's on
// the left boundary.
t.skipLeftBoundary = true
// Explicitly delete the potential leftover nodes on the specific
// path from the database.
tkey := t.tr.TrieKey(key)
for i := 0; i <= len(tkey); i++ {
t.deleteNode(tkey[:i], false)
}
return nil
}
// commit implements genTrie interface, flushing the right boundary if it's // commit implements genTrie interface, flushing the right boundary if it's
// considered as complete. Otherwise, the nodes on the right boundary are // considered as complete. Otherwise, the nodes on the right boundary are
// discarded and cleaned up. // discarded and cleaned up.
@ -255,7 +286,7 @@ func (t *pathTrie) commit(complete bool) common.Hash {
// with no issues as they are actually complete. Also, from a database // with no issues as they are actually complete. Also, from a database
// perspective, first deleting and then rewriting is a valid data update. // perspective, first deleting and then rewriting is a valid data update.
for i := 0; i < len(t.last); i++ { for i := 0; i < len(t.last); i++ {
t.delete(t.last[:i], false) t.deleteNode(t.last[:i], false)
} }
return common.Hash{} // the hash is meaningless for incomplete commit return common.Hash{} // the hash is meaningless for incomplete commit
} }
@ -278,6 +309,9 @@ func (t *hashTrie) update(key, value []byte) error {
return t.tr.Update(key, value) return t.tr.Update(key, value)
} }
// delete implements genTrie interface, ignoring the state item for deleting.
func (t *hashTrie) delete(key []byte) error { return nil }
// commit implements genTrie interface, committing the nodes on right boundary. // commit implements genTrie interface, committing the nodes on right boundary.
func (t *hashTrie) commit(complete bool) common.Hash { func (t *hashTrie) commit(complete bool) common.Hash {
if !complete { if !complete {

@ -551,3 +551,145 @@ func TestTinyPartialTree(t *testing.T) {
} }
} }
} }
func TestTrieDelete(t *testing.T) {
var entries []*kv
for i := 0; i < 1024; i++ {
entries = append(entries, &kv{
k: testrand.Bytes(32),
v: testrand.Bytes(32),
})
}
slices.SortFunc(entries, (*kv).cmp)
nodes := make(map[string]common.Hash)
tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
nodes[string(path)] = hash
})
for i := 0; i < len(entries); i++ {
tr.Update(entries[i].k, entries[i].v)
}
tr.Hash()
check := func(index []int) {
var (
db = rawdb.NewMemoryDatabase()
batch = db.NewBatch()
marks = map[int]struct{}{}
neighbors = map[int]struct{}{}
)
for _, n := range index {
marks[n] = struct{}{}
}
for _, n := range index {
if n != 0 {
if _, ok := marks[n-1]; !ok {
neighbors[n-1] = struct{}{}
}
}
if n != len(entries)-1 {
if _, ok := neighbors[n+1]; !ok {
neighbors[n+1] = struct{}{}
}
}
}
// Write the junk nodes as the dangling
var injects []string
for _, n := range index {
nibbles := byteToHex(entries[n].k)
for i := 0; i <= len(nibbles); i++ {
injects = append(injects, string(nibbles[:i]))
}
}
for _, path := range injects {
rawdb.WriteAccountTrieNode(db, []byte(path), testrand.Bytes(32))
}
tr := newPathTrie(common.Hash{}, false, db, batch)
for i := 0; i < len(entries); i++ {
if _, ok := marks[i]; ok {
tr.delete(entries[i].k)
} else {
tr.update(entries[i].k, entries[i].v)
}
}
tr.commit(true)
r := newBatchReplay()
batch.Replay(r)
batch.Write()
for _, path := range injects {
if rawdb.HasAccountTrieNode(db, []byte(path)) {
t.Fatalf("Unexpected leftover node %v", []byte(path))
}
}
// ensure all the written nodes match with the complete tree
set := make(map[string]common.Hash)
for path, hash := range r.modifies() {
if hash == (common.Hash{}) {
continue
}
n, ok := nodes[path]
if !ok {
t.Fatalf("Unexpected trie node: %v", []byte(path))
}
if n != hash {
t.Fatalf("Unexpected trie node content: %v, want: %x, got: %x", []byte(path), n, hash)
}
set[path] = hash
}
// ensure all the missing nodes either on the deleted path, or
// on the neighbor paths.
isMissing := func(path []byte) bool {
for n := range marks {
key := byteToHex(entries[n].k)
if bytes.HasPrefix(key, path) {
return true
}
}
for n := range neighbors {
key := byteToHex(entries[n].k)
if bytes.HasPrefix(key, path) {
return true
}
}
return false
}
for path := range nodes {
if _, ok := set[path]; ok {
continue
}
if !isMissing([]byte(path)) {
t.Fatalf("Missing node %v", []byte(path))
}
}
}
var cases = []struct {
index []int
}{
// delete the first
{[]int{0}},
// delete the last
{[]int{len(entries) - 1}},
// delete the first two
{[]int{0, 1}},
// delete the last two
{[]int{len(entries) - 2, len(entries) - 1}},
{[]int{
0, 2, 4, 6,
len(entries) - 1,
len(entries) - 3,
len(entries) - 5,
len(entries) - 7,
}},
}
for _, c := range cases {
check(c.index)
}
}

@ -2424,14 +2424,21 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
slim := types.SlimAccountRLP(*res.accounts[i]) slim := types.SlimAccountRLP(*res.accounts[i])
rawdb.WriteAccountSnapshot(batch, hash, slim) rawdb.WriteAccountSnapshot(batch, hash, slim)
// If the task is complete, drop it into the stack trie to generate
// account trie nodes for it
if !task.needHeal[i] { if !task.needHeal[i] {
// If the storage task is complete, drop it into the stack trie
// to generate account trie nodes for it
full, err := types.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted full, err := types.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted
if err != nil { if err != nil {
panic(err) // Really shouldn't ever happen panic(err) // Really shouldn't ever happen
} }
task.genTrie.update(hash[:], full) task.genTrie.update(hash[:], full)
} else {
// If the storage task is incomplete, explicitly delete the corresponding
// account item from the account trie to ensure that all nodes along the
// path to the incomplete storage trie are cleaned up.
if err := task.genTrie.delete(hash[:]); err != nil {
panic(err) // Really shouldn't ever happen
}
} }
} }
// Flush anything written just now and update the stats // Flush anything written just now and update the stats

@ -64,8 +64,7 @@ func (t *StackTrie) Update(key, value []byte) error {
if len(value) == 0 { if len(value) == 0 {
return errors.New("trying to insert empty (deletion)") return errors.New("trying to insert empty (deletion)")
} }
k := keybytesToHex(key) k := t.TrieKey(key)
k = k[:len(k)-1] // chop the termination flag
if bytes.Compare(t.last, k) >= 0 { if bytes.Compare(t.last, k) >= 0 {
return errors.New("non-ascending key order") return errors.New("non-ascending key order")
} }
@ -84,6 +83,13 @@ func (t *StackTrie) Reset() {
t.last = nil t.last = nil
} }
// TrieKey returns the internal key representation for the given user key.
func (t *StackTrie) TrieKey(key []byte) []byte {
k := keybytesToHex(key)
k = k[:len(k)-1] // chop the termination flag
return k
}
// stNode represents a node within a StackTrie // stNode represents a node within a StackTrie
type stNode struct { type stNode struct {
typ uint8 // node type (as in branch, ext, leaf) typ uint8 // node type (as in branch, ext, leaf)