trie, tests/fuzzers: implement a stacktrie fuzzer + stacktrie fixes (#21799)

* trie: fix error in stacktrie not committing small roots * fuzzers: make trie-fuzzer use correct returnvalues * trie: improved tests * tests/fuzzers: fuzzer for stacktrie vs regular trie * test/fuzzers: make stacktrie fuzzer use 32-byte keys * trie: fix error in stacktrie with small nodes * trie: add (skipped) testcase for stacktrie * tests/fuzzers: address review comments for stacktrie fuzzer * trie: fix docs in stacktrie
2020-11-09 15:08:12 +01:00 · 2020-11-09 15:08:12 +01:00 · 81678971db
commit 81678971db
parent 97fc1c3b1d
6 changed files with 341 additions and 14 deletions
--- a/tests/fuzzers/stacktrie/debug/main.go
+++ b/tests/fuzzers/stacktrie/debug/main.go
@ -0,0 +1,23 @@
 package main
 import (
 	"fmt"
 	"io/ioutil"
 	"os"
 	"github.com/ethereum/go-ethereum/tests/fuzzers/stacktrie"
 )
 func main() {
 	if len(os.Args) != 2 {
 		fmt.Fprintf(os.Stderr, "Usage: debug <file>")
 		os.Exit(1)
 	}
 	crasher := os.Args[1]
 	data, err := ioutil.ReadFile(crasher)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "error loading crasher %v: %v", crasher, err)
 		os.Exit(1)
 	}
 	stacktrie.Debug(data)
 }
--- a/tests/fuzzers/stacktrie/trie_fuzzer.go
+++ b/tests/fuzzers/stacktrie/trie_fuzzer.go
@ -0,0 +1,197 @@
 // Copyright 2020 The go-ethereum Authors
 // This file is part of the go-ethereum library.
 //
 // The go-ethereum library is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Lesser General Public License as published by
 // the Free Software Foundation, either version 3 of the License, or
 // (at your option) any later version.
 //
 // The go-ethereum library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 // GNU Lesser General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public License
 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 package stacktrie
 import (
 	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
 	"hash"
 	"io"
 	"sort"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/trie"
 	"golang.org/x/crypto/sha3"
 )
 type fuzzer struct {
 	input     io.Reader
 	exhausted bool
 	debugging bool
 }
 func (f *fuzzer) read(size int) []byte {
 	out := make([]byte, size)
 	if _, err := f.input.Read(out); err != nil {
 		f.exhausted = true
 	}
 	return out
 }
 func (f *fuzzer) readSlice(min, max int) []byte {
 	var a uint16
 	binary.Read(f.input, binary.LittleEndian, &a)
 	size := min + int(a)%(max-min)
 	out := make([]byte, size)
 	if _, err := f.input.Read(out); err != nil {
 		f.exhausted = true
 	}
 	return out
 }
 // spongeDb is a dummy db backend which accumulates writes in a sponge
 type spongeDb struct {
 	sponge hash.Hash
 	debug  bool
 }
 func (s *spongeDb) Has(key []byte) (bool, error)             { panic("implement me") }
 func (s *spongeDb) Get(key []byte) ([]byte, error)           { return nil, errors.New("no such elem") }
 func (s *spongeDb) Delete(key []byte) error                  { panic("implement me") }
 func (s *spongeDb) NewBatch() ethdb.Batch                    { return &spongeBatch{s} }
 func (s *spongeDb) Stat(property string) (string, error)     { panic("implement me") }
 func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") }
 func (s *spongeDb) Close() error                             { return nil }
 func (s *spongeDb) Put(key []byte, value []byte) error {
 	if s.debug {
 		fmt.Printf("db.Put %x : %x\n", key, value)
 	}
 	s.sponge.Write(key)
 	s.sponge.Write(value)
 	return nil
 }
 func (s *spongeDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator { panic("implement me") }
 // spongeBatch is a dummy batch which immediately writes to the underlying spongedb
 type spongeBatch struct {
 	db *spongeDb
 }
 func (b *spongeBatch) Put(key, value []byte) error {
 	b.db.Put(key, value)
 	return nil
 }
 func (b *spongeBatch) Delete(key []byte) error             { panic("implement me") }
 func (b *spongeBatch) ValueSize() int                      { return 100 }
 func (b *spongeBatch) Write() error                        { return nil }
 func (b *spongeBatch) Reset()                              {}
 func (b *spongeBatch) Replay(w ethdb.KeyValueWriter) error { return nil }
 type kv struct {
 	k, v []byte
 }
 type kvs []kv
 func (k kvs) Len() int {
 	return len(k)
 }
 func (k kvs) Less(i, j int) bool {
 	return bytes.Compare(k[i].k, k[j].k) < 0
 }
 func (k kvs) Swap(i, j int) {
 	k[j], k[i] = k[i], k[j]
 }
 // The function must return
 // 1 if the fuzzer should increase priority of the
 //    given input during subsequent fuzzing (for example, the input is lexically
 //    correct and was parsed successfully);
 // -1 if the input must not be added to corpus even if gives new coverage; and
 // 0  otherwise
 // other values are reserved for future use.
 func Fuzz(data []byte) int {
 	f := fuzzer{
 		input:     bytes.NewReader(data),
 		exhausted: false,
 	}
 	return f.fuzz()
 }
 func Debug(data []byte) int {
 	f := fuzzer{
 		input:     bytes.NewReader(data),
 		exhausted: false,
 		debugging: true,
 	}
 	return f.fuzz()
 }
 func (f *fuzzer) fuzz() int {
 	// This spongeDb is used to check the sequence of disk-db-writes
 	var (
 		spongeA     = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
 		dbA         = trie.NewDatabase(spongeA)
 		trieA, _    = trie.New(common.Hash{}, dbA)
 		spongeB     = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
 		trieB       = trie.NewStackTrie(spongeB)
 		vals        kvs
 		useful      bool
 		maxElements = 10000
 	)
 	// Fill the trie with elements
 	for i := 0; !f.exhausted && i < maxElements; i++ {
 		k := f.read(32)
 		v := f.readSlice(1, 500)
 		if f.exhausted {
 			// If it was exhausted while reading, the value may be all zeroes,
 			// thus 'deletion' which is not supported on stacktrie
 			break
 		}
 		vals = append(vals, kv{k: k, v: v})
 		trieA.Update(k, v)
 		useful = true
 	}
 	if !useful {
 		return 0
 	}
 	// Flush trie -> database
 	rootA, err := trieA.Commit(nil)
 	if err != nil {
 		panic(err)
 	}
 	// Flush memdb -> disk (sponge)
 	dbA.Commit(rootA, false, nil)
 	// Stacktrie requires sorted insertion
 	sort.Sort(vals)
 	for _, kv := range vals {
 		if f.debugging {
 			fmt.Printf("{\"0x%x\" , \"0x%x\"} // stacktrie.Update\n", kv.k, kv.v)
 		}
 		trieB.Update(kv.k, kv.v)
 	}
 	rootB := trieB.Hash()
 	if _, err := trieB.Commit(); err != nil {
 		panic(err)
 	}
 	if rootA != rootB {
 		panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB))
 	}
 	sumA := spongeA.sponge.Sum(nil)
 	sumB := spongeB.sponge.Sum(nil)
 	if !bytes.Equal(sumA, sumB) {
 		panic(fmt.Sprintf("sequence differ: (trie) %x != %x (stacktrie)", sumA, sumB))
 	}
 	return 1
 }
--- a/tests/fuzzers/trie/trie-fuzzer.go
+++ b/tests/fuzzers/trie/trie-fuzzer.go
@ -122,15 +122,22 @@ func Generate(input []byte) randTest {
 	return steps
 }
 // The function must return
 // 1 if the fuzzer should increase priority of the
 //    given input during subsequent fuzzing (for example, the input is lexically
 //    correct and was parsed successfully);
 // -1 if the input must not be added to corpus even if gives new coverage; and
 // 0  otherwise
 // other values are reserved for future use.
 func Fuzz(input []byte) int {
 	program := Generate(input)
 	if len(program) == 0 {
-		return -1
+		return 0
 	}
 	if err := runRandTest(program); err != nil {
 		panic(err)
 	}
-	return 0
+	return 1
 }
 func runRandTest(rt randTest) error {
--- a/trie/stacktrie.go
+++ b/trie/stacktrie.go
@ -314,19 +314,22 @@ func (st *StackTrie) hash() {
 			panic(err)
 		}
 	case extNode:
 		st.children[0].hash()
 		h = newHasher(false)
 		defer returnHasherToPool(h)
 		h.tmp.Reset()
-		st.children[0].hash()
+		var valuenode node
-		// This is also possible:
+		if len(st.children[0].val) < 32 {
-		//sz := hexToCompactInPlace(st.key)
+			valuenode = rawNode(st.children[0].val)
-		//n := [][]byte{
+		} else {
-		//	st.key[:sz],
+			valuenode = hashNode(st.children[0].val)
-		//	st.children[0].val,
+		}
-		//}
+		n := struct {
-		n := [][]byte{
+			Key []byte
-			hexToCompact(st.key),
+			Val node
-			st.children[0].val,
+		}{
 			Key: hexToCompact(st.key),
 			Val: valuenode,
 		}
 		if err := rlp.Encode(&h.tmp, n); err != nil {
 			panic(err)
@ -406,6 +409,18 @@ func (st *StackTrie) Commit() (common.Hash, error) {
 		return common.Hash{}, ErrCommitDisabled
 	}
 	st.hash()
-	h := common.BytesToHash(st.val)
+	if len(st.val) != 32 {
-	return h, nil
+		// If the node's RLP isn't 32 bytes long, the node will not
 		// be hashed (and committed), and instead contain the  rlp-encoding of the
 		// node. For the top level node, we need to force the hashing+commit.
 		ret := make([]byte, 32)
 		h := newHasher(false)
 		defer returnHasherToPool(h)
 		h.sha.Reset()
 		h.sha.Write(st.val)
 		h.sha.Read(ret)
 		st.db.Put(ret, st.val)
 		return common.BytesToHash(ret), nil
 	}
 	return common.BytesToHash(st.val), nil
 }
--- a/trie/stacktrie_test.go
+++ b/trie/stacktrie_test.go
@ -240,3 +240,52 @@ func TestDerivableList(t *testing.T) {
 		}
 	}
 }
 // TestUpdateSmallNodes tests a case where the leaves are small (both key and value),
 // which causes a lot of node-within-node. This case was found via fuzzing.
 func TestUpdateSmallNodes(t *testing.T) {
 	st := NewStackTrie(nil)
 	nt, _ := New(common.Hash{}, NewDatabase(memorydb.New()))
 	kvs := []struct {
 		K string
 		V string
 	}{
 		{"63303030", "3041"}, // stacktrie.Update
 		{"65", "3000"},       // stacktrie.Update
 	}
 	for _, kv := range kvs {
 		nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
 		st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
 	}
 	if nt.Hash() != st.Hash() {
 		t.Fatalf("error %x != %x", st.Hash(), nt.Hash())
 	}
 }
 // TestUpdateVariableKeys contains a case which stacktrie fails: when keys of different
 // sizes are used, and the second one has the same prefix as the first, then the
 // stacktrie fails, since it's unable to 'expand' on an already added leaf.
 // For all practical purposes, this is fine, since keys are fixed-size length
 // in account and storage tries.
 //
 // The test is marked as 'skipped', and exists just to have the behaviour documented.
 // This case was found via fuzzing.
 func TestUpdateVariableKeys(t *testing.T) {
 	t.SkipNow()
 	st := NewStackTrie(nil)
 	nt, _ := New(common.Hash{}, NewDatabase(memorydb.New()))
 	kvs := []struct {
 		K string
 		V string
 	}{
 		{"0x33303534636532393561313031676174", "303030"},
 		{"0x3330353463653239356131303167617430", "313131"},
 	}
 	for _, kv := range kvs {
 		nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
 		st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
 	}
 	if nt.Hash() != st.Hash() {
 		t.Fatalf("error %x != %x", st.Hash(), nt.Hash())
 	}
 }
--- a/trie/trie_test.go
+++ b/trie/trie_test.go
@ -853,6 +853,42 @@ func TestCommitSequenceStackTrie(t *testing.T) {
 	}
 }
 // TestCommitSequenceSmallRoot tests that a trie which is essentially only a
 // small (<32 byte) shortnode with an included value is properly committed to a
 // database.
 // This case might not matter, since in practice, all keys are 32 bytes, which means
 // that even a small trie which contains a leaf will have an extension making it
 // not fit into 32 bytes, rlp-encoded. However, it's still the correct thing to do.
 func TestCommitSequenceSmallRoot(t *testing.T) {
 	s := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "a"}
 	db := NewDatabase(s)
 	trie, _ := New(common.Hash{}, db)
 	// Another sponge is used for the stacktrie commits
 	stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"}
 	stTrie := NewStackTrie(stackTrieSponge)
 	// Add a single small-element to the trie(s)
 	key := make([]byte, 5)
 	key[0] = 1
 	trie.TryUpdate(key, []byte{0x1})
 	stTrie.TryUpdate(key, []byte{0x1})
 	// Flush trie -> database
 	root, _ := trie.Commit(nil)
 	// Flush memdb -> disk (sponge)
 	db.Commit(root, false, nil)
 	// And flush stacktrie -> disk
 	stRoot, err := stTrie.Commit()
 	if err != nil {
 		t.Fatalf("Failed to commit stack trie %v", err)
 	}
 	if stRoot != root {
 		t.Fatalf("root wrong, got %x exp %x", stRoot, root)
 	}
 	fmt.Printf("root: %x\n", stRoot)
 	if got, exp := stackTrieSponge.sponge.Sum(nil), s.sponge.Sum(nil); !bytes.Equal(got, exp) {
 		t.Fatalf("test, disk write sequence wrong:\ngot %x exp %x\n", got, exp)
 	}
 }
 // BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie.
 // This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically,
 // storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple