trie, tests/fuzzers: implement a stacktrie fuzzer + stacktrie fixes (#21799)

* trie: fix error in stacktrie not committing small roots

* fuzzers: make trie-fuzzer use correct returnvalues

* trie: improved tests

* tests/fuzzers: fuzzer for stacktrie vs regular trie

* test/fuzzers: make stacktrie fuzzer use 32-byte keys

* trie: fix error in stacktrie with small nodes

* trie: add (skipped) testcase for stacktrie

* tests/fuzzers: address review comments for stacktrie fuzzer

* trie: fix docs in stacktrie
This commit is contained in:
Martin Holst Swende 2020-11-09 15:08:12 +01:00 committed by GitHub
parent 97fc1c3b1d
commit 81678971db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 341 additions and 14 deletions

@ -0,0 +1,23 @@
package main
import (
"fmt"
"io/ioutil"
"os"
"github.com/ethereum/go-ethereum/tests/fuzzers/stacktrie"
)
func main() {
if len(os.Args) != 2 {
fmt.Fprintf(os.Stderr, "Usage: debug <file>")
os.Exit(1)
}
crasher := os.Args[1]
data, err := ioutil.ReadFile(crasher)
if err != nil {
fmt.Fprintf(os.Stderr, "error loading crasher %v: %v", crasher, err)
os.Exit(1)
}
stacktrie.Debug(data)
}

@ -0,0 +1,197 @@
// Copyright 2020 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package stacktrie
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"hash"
"io"
"sort"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/trie"
"golang.org/x/crypto/sha3"
)
type fuzzer struct {
input io.Reader
exhausted bool
debugging bool
}
func (f *fuzzer) read(size int) []byte {
out := make([]byte, size)
if _, err := f.input.Read(out); err != nil {
f.exhausted = true
}
return out
}
func (f *fuzzer) readSlice(min, max int) []byte {
var a uint16
binary.Read(f.input, binary.LittleEndian, &a)
size := min + int(a)%(max-min)
out := make([]byte, size)
if _, err := f.input.Read(out); err != nil {
f.exhausted = true
}
return out
}
// spongeDb is a dummy db backend which accumulates writes in a sponge
type spongeDb struct {
sponge hash.Hash
debug bool
}
func (s *spongeDb) Has(key []byte) (bool, error) { panic("implement me") }
func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") }
func (s *spongeDb) Delete(key []byte) error { panic("implement me") }
func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} }
func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") }
func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") }
func (s *spongeDb) Close() error { return nil }
func (s *spongeDb) Put(key []byte, value []byte) error {
if s.debug {
fmt.Printf("db.Put %x : %x\n", key, value)
}
s.sponge.Write(key)
s.sponge.Write(value)
return nil
}
func (s *spongeDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator { panic("implement me") }
// spongeBatch is a dummy batch which immediately writes to the underlying spongedb
type spongeBatch struct {
db *spongeDb
}
func (b *spongeBatch) Put(key, value []byte) error {
b.db.Put(key, value)
return nil
}
func (b *spongeBatch) Delete(key []byte) error { panic("implement me") }
func (b *spongeBatch) ValueSize() int { return 100 }
func (b *spongeBatch) Write() error { return nil }
func (b *spongeBatch) Reset() {}
func (b *spongeBatch) Replay(w ethdb.KeyValueWriter) error { return nil }
type kv struct {
k, v []byte
}
type kvs []kv
func (k kvs) Len() int {
return len(k)
}
func (k kvs) Less(i, j int) bool {
return bytes.Compare(k[i].k, k[j].k) < 0
}
func (k kvs) Swap(i, j int) {
k[j], k[i] = k[i], k[j]
}
// The function must return
// 1 if the fuzzer should increase priority of the
// given input during subsequent fuzzing (for example, the input is lexically
// correct and was parsed successfully);
// -1 if the input must not be added to corpus even if gives new coverage; and
// 0 otherwise
// other values are reserved for future use.
func Fuzz(data []byte) int {
f := fuzzer{
input: bytes.NewReader(data),
exhausted: false,
}
return f.fuzz()
}
func Debug(data []byte) int {
f := fuzzer{
input: bytes.NewReader(data),
exhausted: false,
debugging: true,
}
return f.fuzz()
}
func (f *fuzzer) fuzz() int {
// This spongeDb is used to check the sequence of disk-db-writes
var (
spongeA = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
dbA = trie.NewDatabase(spongeA)
trieA, _ = trie.New(common.Hash{}, dbA)
spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
trieB = trie.NewStackTrie(spongeB)
vals kvs
useful bool
maxElements = 10000
)
// Fill the trie with elements
for i := 0; !f.exhausted && i < maxElements; i++ {
k := f.read(32)
v := f.readSlice(1, 500)
if f.exhausted {
// If it was exhausted while reading, the value may be all zeroes,
// thus 'deletion' which is not supported on stacktrie
break
}
vals = append(vals, kv{k: k, v: v})
trieA.Update(k, v)
useful = true
}
if !useful {
return 0
}
// Flush trie -> database
rootA, err := trieA.Commit(nil)
if err != nil {
panic(err)
}
// Flush memdb -> disk (sponge)
dbA.Commit(rootA, false, nil)
// Stacktrie requires sorted insertion
sort.Sort(vals)
for _, kv := range vals {
if f.debugging {
fmt.Printf("{\"0x%x\" , \"0x%x\"} // stacktrie.Update\n", kv.k, kv.v)
}
trieB.Update(kv.k, kv.v)
}
rootB := trieB.Hash()
if _, err := trieB.Commit(); err != nil {
panic(err)
}
if rootA != rootB {
panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB))
}
sumA := spongeA.sponge.Sum(nil)
sumB := spongeB.sponge.Sum(nil)
if !bytes.Equal(sumA, sumB) {
panic(fmt.Sprintf("sequence differ: (trie) %x != %x (stacktrie)", sumA, sumB))
}
return 1
}

@ -122,15 +122,22 @@ func Generate(input []byte) randTest {
return steps
}
// The function must return
// 1 if the fuzzer should increase priority of the
// given input during subsequent fuzzing (for example, the input is lexically
// correct and was parsed successfully);
// -1 if the input must not be added to corpus even if gives new coverage; and
// 0 otherwise
// other values are reserved for future use.
func Fuzz(input []byte) int {
program := Generate(input)
if len(program) == 0 {
return -1
return 0
}
if err := runRandTest(program); err != nil {
panic(err)
}
return 0
return 1
}
func runRandTest(rt randTest) error {

@ -314,19 +314,22 @@ func (st *StackTrie) hash() {
panic(err)
}
case extNode:
st.children[0].hash()
h = newHasher(false)
defer returnHasherToPool(h)
h.tmp.Reset()
st.children[0].hash()
// This is also possible:
//sz := hexToCompactInPlace(st.key)
//n := [][]byte{
// st.key[:sz],
// st.children[0].val,
//}
n := [][]byte{
hexToCompact(st.key),
st.children[0].val,
var valuenode node
if len(st.children[0].val) < 32 {
valuenode = rawNode(st.children[0].val)
} else {
valuenode = hashNode(st.children[0].val)
}
n := struct {
Key []byte
Val node
}{
Key: hexToCompact(st.key),
Val: valuenode,
}
if err := rlp.Encode(&h.tmp, n); err != nil {
panic(err)
@ -406,6 +409,18 @@ func (st *StackTrie) Commit() (common.Hash, error) {
return common.Hash{}, ErrCommitDisabled
}
st.hash()
h := common.BytesToHash(st.val)
return h, nil
if len(st.val) != 32 {
// If the node's RLP isn't 32 bytes long, the node will not
// be hashed (and committed), and instead contain the rlp-encoding of the
// node. For the top level node, we need to force the hashing+commit.
ret := make([]byte, 32)
h := newHasher(false)
defer returnHasherToPool(h)
h.sha.Reset()
h.sha.Write(st.val)
h.sha.Read(ret)
st.db.Put(ret, st.val)
return common.BytesToHash(ret), nil
}
return common.BytesToHash(st.val), nil
}

@ -240,3 +240,52 @@ func TestDerivableList(t *testing.T) {
}
}
}
// TestUpdateSmallNodes tests a case where the leaves are small (both key and value),
// which causes a lot of node-within-node. This case was found via fuzzing.
func TestUpdateSmallNodes(t *testing.T) {
st := NewStackTrie(nil)
nt, _ := New(common.Hash{}, NewDatabase(memorydb.New()))
kvs := []struct {
K string
V string
}{
{"63303030", "3041"}, // stacktrie.Update
{"65", "3000"}, // stacktrie.Update
}
for _, kv := range kvs {
nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
}
if nt.Hash() != st.Hash() {
t.Fatalf("error %x != %x", st.Hash(), nt.Hash())
}
}
// TestUpdateVariableKeys contains a case which stacktrie fails: when keys of different
// sizes are used, and the second one has the same prefix as the first, then the
// stacktrie fails, since it's unable to 'expand' on an already added leaf.
// For all practical purposes, this is fine, since keys are fixed-size length
// in account and storage tries.
//
// The test is marked as 'skipped', and exists just to have the behaviour documented.
// This case was found via fuzzing.
func TestUpdateVariableKeys(t *testing.T) {
t.SkipNow()
st := NewStackTrie(nil)
nt, _ := New(common.Hash{}, NewDatabase(memorydb.New()))
kvs := []struct {
K string
V string
}{
{"0x33303534636532393561313031676174", "303030"},
{"0x3330353463653239356131303167617430", "313131"},
}
for _, kv := range kvs {
nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
}
if nt.Hash() != st.Hash() {
t.Fatalf("error %x != %x", st.Hash(), nt.Hash())
}
}

@ -853,6 +853,42 @@ func TestCommitSequenceStackTrie(t *testing.T) {
}
}
// TestCommitSequenceSmallRoot tests that a trie which is essentially only a
// small (<32 byte) shortnode with an included value is properly committed to a
// database.
// This case might not matter, since in practice, all keys are 32 bytes, which means
// that even a small trie which contains a leaf will have an extension making it
// not fit into 32 bytes, rlp-encoded. However, it's still the correct thing to do.
func TestCommitSequenceSmallRoot(t *testing.T) {
s := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "a"}
db := NewDatabase(s)
trie, _ := New(common.Hash{}, db)
// Another sponge is used for the stacktrie commits
stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"}
stTrie := NewStackTrie(stackTrieSponge)
// Add a single small-element to the trie(s)
key := make([]byte, 5)
key[0] = 1
trie.TryUpdate(key, []byte{0x1})
stTrie.TryUpdate(key, []byte{0x1})
// Flush trie -> database
root, _ := trie.Commit(nil)
// Flush memdb -> disk (sponge)
db.Commit(root, false, nil)
// And flush stacktrie -> disk
stRoot, err := stTrie.Commit()
if err != nil {
t.Fatalf("Failed to commit stack trie %v", err)
}
if stRoot != root {
t.Fatalf("root wrong, got %x exp %x", stRoot, root)
}
fmt.Printf("root: %x\n", stRoot)
if got, exp := stackTrieSponge.sponge.Sum(nil), s.sponge.Sum(nil); !bytes.Equal(got, exp) {
t.Fatalf("test, disk write sequence wrong:\ngot %x exp %x\n", got, exp)
}
}
// BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie.
// This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically,
// storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple