trie: initial implementation for range proof (#20908)

* trie: initial implementation for range proof

* trie: add benchmark

* trie: fix lint

* trie: fix minor issue

* trie: unset the edge valuenode as well

* trie: unset the edge valuenode as nilValuenode
This commit is contained in:
gary rong 2020-04-24 19:37:56 +08:00 committed by GitHub
parent 38aab0aa83
commit 44ff3f3dc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 400 additions and 15 deletions

@ -224,7 +224,7 @@ func (r *TrieRequest) Validate(db ethdb.Database, msg *Msg) error {
// Verify the proof and store if checks out
nodeSet := proofs.NodeSet()
reads := &readTraceDB{db: nodeSet}
if _, _, err := trie.VerifyProof(r.Id.Root, r.Key, reads); err != nil {
if _, err := trie.VerifyProof(r.Id.Root, r.Key, reads); err != nil {
return fmt.Errorf("merkle proof verification failed: %v", err)
}
// check if all nodes have been read by VerifyProof
@ -378,7 +378,7 @@ func (r *ChtRequest) Validate(db ethdb.Database, msg *Msg) error {
binary.BigEndian.PutUint64(encNumber[:], r.BlockNum)
reads := &readTraceDB{db: nodeSet}
value, _, err := trie.VerifyProof(r.ChtRoot, encNumber[:], reads)
value, err := trie.VerifyProof(r.ChtRoot, encNumber[:], reads)
if err != nil {
return fmt.Errorf("merkle proof verification failed: %v", err)
}
@ -470,7 +470,7 @@ func (r *BloomRequest) Validate(db ethdb.Database, msg *Msg) error {
for i, idx := range r.SectionIndexList {
binary.BigEndian.PutUint64(encNumber[2:], idx)
value, _, err := trie.VerifyProof(r.BloomTrieRoot, encNumber[:], reads)
value, err := trie.VerifyProof(r.BloomTrieRoot, encNumber[:], reads)
if err != nil {
return err
}

@ -18,10 +18,12 @@ package trie
import (
"bytes"
"errors"
"fmt"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/ethdb/memorydb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
)
@ -101,33 +103,232 @@ func (t *SecureTrie) Prove(key []byte, fromLevel uint, proofDb ethdb.KeyValueWri
// VerifyProof checks merkle proofs. The given proof must contain the value for
// key in a trie with the given root hash. VerifyProof returns an error if the
// proof contains invalid trie nodes or the wrong value.
func VerifyProof(rootHash common.Hash, key []byte, proofDb ethdb.KeyValueReader) (value []byte, nodes int, err error) {
func VerifyProof(rootHash common.Hash, key []byte, proofDb ethdb.KeyValueReader) (value []byte, err error) {
key = keybytesToHex(key)
wantHash := rootHash
for i := 0; ; i++ {
buf, _ := proofDb.Get(wantHash[:])
if buf == nil {
return nil, i, fmt.Errorf("proof node %d (hash %064x) missing", i, wantHash)
return nil, fmt.Errorf("proof node %d (hash %064x) missing", i, wantHash)
}
n, err := decodeNode(wantHash[:], buf)
if err != nil {
return nil, i, fmt.Errorf("bad proof node %d: %v", i, err)
return nil, fmt.Errorf("bad proof node %d: %v", i, err)
}
keyrest, cld := get(n, key)
keyrest, cld := get(n, key, true)
switch cld := cld.(type) {
case nil:
// The trie doesn't contain the key.
return nil, i, nil
return nil, nil
case hashNode:
key = keyrest
copy(wantHash[:], cld)
case valueNode:
return cld, i + 1, nil
return cld, nil
}
}
}
func get(tn node, key []byte) ([]byte, node) {
// proofToPath converts a merkle proof to trie node path.
// The main purpose of this function is recovering a node
// path from the merkle proof stream. All necessary nodes
// will be resolved and leave the remaining as hashnode.
func proofToPath(rootHash common.Hash, root node, key []byte, proofDb ethdb.KeyValueReader) (node, error) {
// resolveNode retrieves and resolves trie node from merkle proof stream
resolveNode := func(hash common.Hash) (node, error) {
buf, _ := proofDb.Get(hash[:])
if buf == nil {
return nil, fmt.Errorf("proof node (hash %064x) missing", hash)
}
n, err := decodeNode(hash[:], buf)
if err != nil {
return nil, fmt.Errorf("bad proof node %v", err)
}
return n, err
}
// If the root node is empty, resolve it first
if root == nil {
n, err := resolveNode(rootHash)
if err != nil {
return nil, err
}
root = n
}
var (
err error
child, parent node
keyrest []byte
terminate bool
)
key, parent = keybytesToHex(key), root
for {
keyrest, child = get(parent, key, false)
switch cld := child.(type) {
case nil:
// The trie doesn't contain the key.
return nil, errors.New("the node is not contained in trie")
case *shortNode:
key, parent = keyrest, child // Already resolved
continue
case *fullNode:
key, parent = keyrest, child // Already resolved
continue
case hashNode:
child, err = resolveNode(common.BytesToHash(cld))
if err != nil {
return nil, err
}
case valueNode:
terminate = true
}
// Link the parent and child.
switch pnode := parent.(type) {
case *shortNode:
pnode.Val = child
case *fullNode:
pnode.Children[key[0]] = child
default:
panic(fmt.Sprintf("%T: invalid node: %v", pnode, pnode))
}
if terminate {
return root, nil // The whole path is resolved
}
key, parent = keyrest, child
}
}
// unsetInternal removes all internal node references(hashnode, embedded node).
// It should be called after a trie is constructed with two edge proofs. Also
// the given boundary keys must be the one used to construct the edge proofs.
//
// It's the key step for range proof. All visited nodes should be marked dirty
// since the node content might be modified. Besides it can happen that some
// fullnodes only have one child which is disallowed. But if the proof is valid,
// the missing children will be filled, otherwise it will be thrown anyway.
func unsetInternal(node node, left []byte, right []byte) error {
left, right = keybytesToHex(left), keybytesToHex(right)
// todo(rjl493456442) different length edge keys should be supported
if len(left) != len(right) {
return errors.New("inconsistent edge path")
}
// Step down to the fork point
prefix, pos := prefixLen(left, right), 0
for {
if pos >= prefix {
break
}
switch n := (node).(type) {
case *shortNode:
if len(left)-pos < len(n.Key) || !bytes.Equal(n.Key, left[pos:pos+len(n.Key)]) {
return errors.New("invalid edge path")
}
n.flags = nodeFlag{dirty: true}
node, pos = n.Val, pos+len(n.Key)
case *fullNode:
n.flags = nodeFlag{dirty: true}
node, pos = n.Children[left[pos]], pos+1
default:
panic(fmt.Sprintf("%T: invalid node: %v", node, node))
}
}
fn, ok := node.(*fullNode)
if !ok {
return errors.New("the fork point must be a fullnode")
}
// Find the fork point! Unset all intermediate references
for i := left[prefix] + 1; i < right[prefix]; i++ {
fn.Children[i] = nil
}
fn.flags = nodeFlag{dirty: true}
unset(fn.Children[left[prefix]], left[prefix+1:], false)
unset(fn.Children[right[prefix]], right[prefix+1:], true)
return nil
}
// unset removes all internal node references either the left most or right most.
func unset(root node, rest []byte, removeLeft bool) {
switch rn := root.(type) {
case *fullNode:
if removeLeft {
for i := 0; i < int(rest[0]); i++ {
rn.Children[i] = nil
}
rn.flags = nodeFlag{dirty: true}
} else {
for i := rest[0] + 1; i < 16; i++ {
rn.Children[i] = nil
}
rn.flags = nodeFlag{dirty: true}
}
unset(rn.Children[rest[0]], rest[1:], removeLeft)
case *shortNode:
rn.flags = nodeFlag{dirty: true}
if _, ok := rn.Val.(valueNode); ok {
rn.Val = nilValueNode
return
}
unset(rn.Val, rest[len(rn.Key):], removeLeft)
case hashNode, nil, valueNode:
panic("it shouldn't happen")
}
}
// VerifyRangeProof checks whether the given leave nodes and edge proofs
// can prove the given trie leaves range is matched with given root hash
// and the range is consecutive(no gap inside).
func VerifyRangeProof(rootHash common.Hash, keys [][]byte, values [][]byte, firstProof ethdb.KeyValueReader, lastProof ethdb.KeyValueReader) error {
if len(keys) != len(values) {
return fmt.Errorf("inconsistent proof data, keys: %d, values: %d", len(keys), len(values))
}
if len(keys) == 0 {
return fmt.Errorf("nothing to verify")
}
if len(keys) == 1 {
value, err := VerifyProof(rootHash, keys[0], firstProof)
if err != nil {
return err
}
if !bytes.Equal(value, values[0]) {
return fmt.Errorf("correct proof but invalid data")
}
return nil
}
// Convert the edge proofs to edge trie paths. Then we can
// have the same tree architecture with the original one.
root, err := proofToPath(rootHash, nil, keys[0], firstProof)
if err != nil {
return err
}
// Pass the root node here, the second path will be merged
// with the first one.
root, err = proofToPath(rootHash, root, keys[len(keys)-1], lastProof)
if err != nil {
return err
}
// Remove all internal references. All the removed parts should
// be re-filled(or re-constructed) by the given leaves range.
if err := unsetInternal(root, keys[0], keys[len(keys)-1]); err != nil {
return err
}
// Rebuild the trie with the leave stream, the shape of trie
// should be same with the original one.
newtrie := &Trie{root: root, db: NewDatabase(memorydb.New())}
for index, key := range keys {
newtrie.TryUpdate(key, values[index])
}
if newtrie.Hash() != rootHash {
return fmt.Errorf("invalid proof, wanthash %x, got %x", rootHash, newtrie.Hash())
}
return nil
}
// get returns the child of the given node. Return nil if the
// node with specified key doesn't exist at all.
//
// There is an additional flag `skipResolved`. If it's set then
// all resolved nodes won't be returned.
func get(tn node, key []byte, skipResolved bool) ([]byte, node) {
for {
switch n := tn.(type) {
case *shortNode:
@ -136,9 +337,15 @@ func get(tn node, key []byte) ([]byte, node) {
}
tn = n.Val
key = key[len(n.Key):]
if !skipResolved {
return key, tn
}
case *fullNode:
tn = n.Children[key[0]]
key = key[1:]
if !skipResolved {
return key, tn
}
case hashNode:
return key, n
case nil:

@ -20,6 +20,7 @@ import (
"bytes"
crand "crypto/rand"
mrand "math/rand"
"sort"
"testing"
"time"
@ -65,7 +66,7 @@ func TestProof(t *testing.T) {
if proof == nil {
t.Fatalf("prover %d: missing key %x while constructing proof", i, kv.k)
}
val, _, err := VerifyProof(root, kv.k, proof)
val, err := VerifyProof(root, kv.k, proof)
if err != nil {
t.Fatalf("prover %d: failed to verify proof for key %x: %v\nraw proof: %x", i, kv.k, err, proof)
}
@ -87,7 +88,7 @@ func TestOneElementProof(t *testing.T) {
if proof.Len() != 1 {
t.Errorf("prover %d: proof should have one element", i)
}
val, _, err := VerifyProof(trie.Hash(), []byte("k"), proof)
val, err := VerifyProof(trie.Hash(), []byte("k"), proof)
if err != nil {
t.Fatalf("prover %d: failed to verify proof: %v\nraw proof: %x", i, err, proof)
}
@ -97,6 +98,145 @@ func TestOneElementProof(t *testing.T) {
}
}
type entrySlice []*kv
func (p entrySlice) Len() int { return len(p) }
func (p entrySlice) Less(i, j int) bool { return bytes.Compare(p[i].k, p[j].k) < 0 }
func (p entrySlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func TestRangeProof(t *testing.T) {
trie, vals := randomTrie(4096)
var entries entrySlice
for _, kv := range vals {
entries = append(entries, kv)
}
sort.Sort(entries)
for i := 0; i < 500; i++ {
start := mrand.Intn(len(entries))
end := mrand.Intn(len(entries)-start) + start
if start == end {
continue
}
firstProof, lastProof := memorydb.New(), memorydb.New()
if err := trie.Prove(entries[start].k, 0, firstProof); err != nil {
t.Fatalf("Failed to prove the first node %v", err)
}
if err := trie.Prove(entries[end-1].k, 0, lastProof); err != nil {
t.Fatalf("Failed to prove the last node %v", err)
}
var keys [][]byte
var vals [][]byte
for i := start; i < end; i++ {
keys = append(keys, entries[i].k)
vals = append(vals, entries[i].v)
}
err := VerifyRangeProof(trie.Hash(), keys, vals, firstProof, lastProof)
if err != nil {
t.Fatalf("Case %d(%d->%d) expect no error, got %v", i, start, end-1, err)
}
}
}
func TestBadRangeProof(t *testing.T) {
trie, vals := randomTrie(4096)
var entries entrySlice
for _, kv := range vals {
entries = append(entries, kv)
}
sort.Sort(entries)
for i := 0; i < 500; i++ {
start := mrand.Intn(len(entries))
end := mrand.Intn(len(entries)-start) + start
if start == end {
continue
}
firstProof, lastProof := memorydb.New(), memorydb.New()
if err := trie.Prove(entries[start].k, 0, firstProof); err != nil {
t.Fatalf("Failed to prove the first node %v", err)
}
if err := trie.Prove(entries[end-1].k, 0, lastProof); err != nil {
t.Fatalf("Failed to prove the last node %v", err)
}
var keys [][]byte
var vals [][]byte
for i := start; i < end; i++ {
keys = append(keys, entries[i].k)
vals = append(vals, entries[i].v)
}
testcase := mrand.Intn(6)
var index int
switch testcase {
case 0:
// Modified key
index = mrand.Intn(end - start)
keys[index] = randBytes(32) // In theory it can't be same
case 1:
// Modified val
index = mrand.Intn(end - start)
vals[index] = randBytes(20) // In theory it can't be same
case 2:
// Gapped entry slice
index = mrand.Intn(end - start)
keys = append(keys[:index], keys[index+1:]...)
vals = append(vals[:index], vals[index+1:]...)
if len(keys) <= 1 {
continue
}
case 3:
// Switched entry slice, same effect with gapped
index = mrand.Intn(end - start)
keys[index] = entries[len(entries)-1].k
vals[index] = entries[len(entries)-1].v
case 4:
// Set random key to nil
index = mrand.Intn(end - start)
keys[index] = nil
case 5:
// Set random value to nil
index = mrand.Intn(end - start)
vals[index] = nil
}
err := VerifyRangeProof(trie.Hash(), keys, vals, firstProof, lastProof)
if err == nil {
t.Fatalf("%d Case %d index %d range: (%d->%d) expect error, got nil", i, testcase, index, start, end-1)
}
}
}
// TestGappedRangeProof focuses on the small trie with embedded nodes.
// If the gapped node is embedded in the trie, it should be detected too.
func TestGappedRangeProof(t *testing.T) {
trie := new(Trie)
var entries []*kv // Sorted entries
for i := byte(0); i < 10; i++ {
value := &kv{common.LeftPadBytes([]byte{i}, 32), []byte{i}, false}
trie.Update(value.k, value.v)
entries = append(entries, value)
}
first, last := 2, 8
firstProof, lastProof := memorydb.New(), memorydb.New()
if err := trie.Prove(entries[first].k, 0, firstProof); err != nil {
t.Fatalf("Failed to prove the first node %v", err)
}
if err := trie.Prove(entries[last-1].k, 0, lastProof); err != nil {
t.Fatalf("Failed to prove the last node %v", err)
}
var keys [][]byte
var vals [][]byte
for i := first; i < last; i++ {
if i == (first+last)/2 {
continue
}
keys = append(keys, entries[i].k)
vals = append(vals, entries[i].v)
}
err := VerifyRangeProof(trie.Hash(), keys, vals, firstProof, lastProof)
if err == nil {
t.Fatal("expect error, got nil")
}
}
func TestBadProof(t *testing.T) {
trie, vals := randomTrie(800)
root := trie.Hash()
@ -118,7 +258,7 @@ func TestBadProof(t *testing.T) {
mutateByte(val)
proof.Put(crypto.Keccak256(val), val)
if _, _, err := VerifyProof(root, kv.k, proof); err == nil {
if _, err := VerifyProof(root, kv.k, proof); err == nil {
t.Fatalf("prover %d: expected proof to fail for key %x", i, kv.k)
}
}
@ -138,7 +278,7 @@ func TestMissingKeyProof(t *testing.T) {
if proof.Len() != 1 {
t.Errorf("test %d: proof should have one element", i)
}
val, _, err := VerifyProof(trie.Hash(), []byte(key), proof)
val, err := VerifyProof(trie.Hash(), []byte(key), proof)
if err != nil {
t.Fatalf("test %d: failed to verify proof: %v\nraw proof: %x", i, err, proof)
}
@ -191,12 +331,50 @@ func BenchmarkVerifyProof(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
im := i % len(keys)
if _, _, err := VerifyProof(root, []byte(keys[im]), proofs[im]); err != nil {
if _, err := VerifyProof(root, []byte(keys[im]), proofs[im]); err != nil {
b.Fatalf("key %x: %v", keys[im], err)
}
}
}
func BenchmarkVerifyRangeProof10(b *testing.B) { benchmarkVerifyRangeProof(b, 10) }
func BenchmarkVerifyRangeProof100(b *testing.B) { benchmarkVerifyRangeProof(b, 100) }
func BenchmarkVerifyRangeProof1000(b *testing.B) { benchmarkVerifyRangeProof(b, 1000) }
func BenchmarkVerifyRangeProof5000(b *testing.B) { benchmarkVerifyRangeProof(b, 5000) }
func benchmarkVerifyRangeProof(b *testing.B, size int) {
trie, vals := randomTrie(8192)
var entries entrySlice
for _, kv := range vals {
entries = append(entries, kv)
}
sort.Sort(entries)
start := 2
end := start + size
firstProof, lastProof := memorydb.New(), memorydb.New()
if err := trie.Prove(entries[start].k, 0, firstProof); err != nil {
b.Fatalf("Failed to prove the first node %v", err)
}
if err := trie.Prove(entries[end-1].k, 0, lastProof); err != nil {
b.Fatalf("Failed to prove the last node %v", err)
}
var keys [][]byte
var values [][]byte
for i := start; i < end; i++ {
keys = append(keys, entries[i].k)
values = append(values, entries[i].v)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := VerifyRangeProof(trie.Hash(), keys, values, firstProof, lastProof)
if err != nil {
b.Fatalf("Case %d(%d->%d) expect no error, got %v", i, start, end-1, err)
}
}
}
func randomTrie(n int) (*Trie, map[string]*kv) {
trie := new(Trie)
vals := make(map[string]*kv)