* trie: utilize callbacks instead of amassing lists in ref/unref (#20529)

* trie/tests: add benchmarks and update trie tests

* trie: update benchmark tests

* trie: utilize callbacks instead of amassing lists of hashes in database ref/unref

* trie: replace remaining non-callback based accesses
This commit is contained in:
Martin Holst Swende 2020-01-17 12:59:45 +01:00 committed by Péter Szilágyi
parent 770316dc20
commit 9b09c0fc83
2 changed files with 338 additions and 32 deletions

@ -180,35 +180,31 @@ func (n *cachedNode) obj(hash common.Hash) node {
return expandNode(hash[:], n.node)
}
// childs returns all the tracked children of this node, both the implicit ones
// from inside the node as well as the explicit ones from outside the node.
func (n *cachedNode) childs() []common.Hash {
children := make([]common.Hash, 0, 16)
// forChilds invokes the callback for all the tracked children of this node,
// both the implicit ones from inside the node as well as the explicit ones
//from outside the node.
func (n *cachedNode) forChilds(onChild func(hash common.Hash)) {
for child := range n.children {
children = append(children, child)
onChild(child)
}
if _, ok := n.node.(rawNode); !ok {
gatherChildren(n.node, &children)
forGatherChildren(n.node, onChild)
}
return children
}
// gatherChildren traverses the node hierarchy of a collapsed storage node and
// retrieves all the hashnode children.
func gatherChildren(n node, children *[]common.Hash) {
// forGatherChildren traverses the node hierarchy of a collapsed storage node and
// invokes the callback for all the hashnode children.
func forGatherChildren(n node, onChild func(hash common.Hash)) {
switch n := n.(type) {
case *rawShortNode:
gatherChildren(n.Val, children)
forGatherChildren(n.Val, onChild)
case rawFullNode:
for i := 0; i < 16; i++ {
gatherChildren(n[i], children)
forGatherChildren(n[i], onChild)
}
case hashNode:
*children = append(*children, common.BytesToHash(n))
onChild(common.BytesToHash(n))
case valueNode, nil:
default:
panic(fmt.Sprintf("unknown node type: %T", n))
}
@ -334,11 +330,11 @@ func (db *Database) insert(hash common.Hash, blob []byte, node node) {
size: uint16(len(blob)),
flushPrev: db.newest,
}
for _, child := range entry.childs() {
entry.forChilds(func(child common.Hash) {
if c := db.dirties[child]; c != nil {
c.parents++
}
}
})
db.dirties[hash] = entry
// Update the flush-list endpoints
@ -570,9 +566,9 @@ func (db *Database) dereference(child common.Hash, parent common.Hash) {
db.dirties[node.flushNext].flushPrev = node.flushPrev
}
// Dereference all children and delete the node
for _, hash := range node.childs() {
node.forChilds(func(hash common.Hash) {
db.dereference(hash, child)
}
})
delete(db.dirties, child)
db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size))
if node.children != nil {
@ -766,10 +762,14 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane
if !ok {
return nil
}
for _, child := range node.childs() {
if err := db.commit(child, batch, uncacher); err != nil {
return err
var err error
node.forChilds(func(child common.Hash) {
if err == nil {
err = db.commit(child, batch, uncacher)
}
})
if err != nil {
return err
}
if err := batch.Put(hash[:], node.rlp()); err != nil {
return err

@ -161,7 +161,7 @@ func TestInsert(t *testing.T) {
exp := common.HexToHash("8aad789dff2f538bca5d8ea56e8abe10f4c7ba3a5dea95fea4cd6e7c3a1168d3")
root := trie.Hash()
if root != exp {
t.Errorf("exp %x got %x", exp, root)
t.Errorf("case 1: exp %x got %x", exp, root)
}
trie = newEmpty()
@ -173,7 +173,7 @@ func TestInsert(t *testing.T) {
t.Fatalf("commit error: %v", err)
}
if root != exp {
t.Errorf("exp %x got %x", exp, root)
t.Errorf("case 2: exp %x got %x", exp, root)
}
}
@ -316,6 +316,40 @@ func TestLargeValue(t *testing.T) {
trie.Hash()
}
// TestRandomCases tests som cases that were found via random fuzzing
func TestRandomCases(t *testing.T) {
var rt []randTestStep = []randTestStep{
{op: 6, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 0
{op: 6, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 1
{op: 0, key: common.Hex2Bytes("d51b182b95d677e5f1c82508c0228de96b73092d78ce78b2230cd948674f66fd1483bd"), value: common.Hex2Bytes("0000000000000002")}, // step 2
{op: 2, key: common.Hex2Bytes("c2a38512b83107d665c65235b0250002882ac2022eb00711552354832c5f1d030d0e408e"), value: common.Hex2Bytes("")}, // step 3
{op: 3, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 4
{op: 3, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 5
{op: 6, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 6
{op: 3, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 7
{op: 0, key: common.Hex2Bytes("c2a38512b83107d665c65235b0250002882ac2022eb00711552354832c5f1d030d0e408e"), value: common.Hex2Bytes("0000000000000008")}, // step 8
{op: 0, key: common.Hex2Bytes("d51b182b95d677e5f1c82508c0228de96b73092d78ce78b2230cd948674f66fd1483bd"), value: common.Hex2Bytes("0000000000000009")}, // step 9
{op: 2, key: common.Hex2Bytes("fd"), value: common.Hex2Bytes("")}, // step 10
{op: 6, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 11
{op: 6, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 12
{op: 0, key: common.Hex2Bytes("fd"), value: common.Hex2Bytes("000000000000000d")}, // step 13
{op: 6, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 14
{op: 1, key: common.Hex2Bytes("c2a38512b83107d665c65235b0250002882ac2022eb00711552354832c5f1d030d0e408e"), value: common.Hex2Bytes("")}, // step 15
{op: 3, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 16
{op: 0, key: common.Hex2Bytes("c2a38512b83107d665c65235b0250002882ac2022eb00711552354832c5f1d030d0e408e"), value: common.Hex2Bytes("0000000000000011")}, // step 17
{op: 5, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 18
{op: 3, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 19
{op: 0, key: common.Hex2Bytes("d51b182b95d677e5f1c82508c0228de96b73092d78ce78b2230cd948674f66fd1483bd"), value: common.Hex2Bytes("0000000000000014")}, // step 20
{op: 0, key: common.Hex2Bytes("d51b182b95d677e5f1c82508c0228de96b73092d78ce78b2230cd948674f66fd1483bd"), value: common.Hex2Bytes("0000000000000015")}, // step 21
{op: 0, key: common.Hex2Bytes("c2a38512b83107d665c65235b0250002882ac2022eb00711552354832c5f1d030d0e408e"), value: common.Hex2Bytes("0000000000000016")}, // step 22
{op: 5, key: common.Hex2Bytes(""), value: common.Hex2Bytes("")}, // step 23
{op: 1, key: common.Hex2Bytes("980c393656413a15c8da01978ed9f89feb80b502f58f2d640e3a2f5f7a99a7018f1b573befd92053ac6f78fca4a87268"), value: common.Hex2Bytes("")}, // step 24
{op: 1, key: common.Hex2Bytes("fd"), value: common.Hex2Bytes("")}, // step 25
}
runRandTest(rt)
}
// randTest performs random trie operations.
// Instances of this test are created by Generate.
type randTest []randTestStep
@ -375,6 +409,8 @@ func runRandTest(rt randTest) bool {
values := make(map[string]string) // tracks content of the trie
for i, step := range rt {
fmt.Printf("{op: %d, key: common.Hex2Bytes(\"%x\"), value: common.Hex2Bytes(\"%x\")}, // step %d\n",
step.op, step.key, step.value, i)
switch step.op {
case opUpdate:
tr.Update(step.key, step.value)
@ -470,6 +506,7 @@ func benchGet(b *testing.B, commit bool) {
func benchUpdate(b *testing.B, e binary.ByteOrder) *Trie {
trie := newEmpty()
k := make([]byte, 32)
b.ReportAllocs()
for i := 0; i < b.N; i++ {
e.PutUint64(k, uint64(i))
trie.Update(k, k)
@ -481,18 +518,135 @@ func benchUpdate(b *testing.B, e binary.ByteOrder) *Trie {
// we cannot use b.N as the number of hashing rouns, since all rounds apart from
// the first one will be NOOP. As such, we'll use b.N as the number of account to
// insert into the trie before measuring the hashing.
// BenchmarkHash-6 288680 4561 ns/op 682 B/op 9 allocs/op
// BenchmarkHash-6 275095 4800 ns/op 685 B/op 9 allocs/op
// pure hasher:
// BenchmarkHash-6 319362 4230 ns/op 675 B/op 9 allocs/op
// BenchmarkHash-6 257460 4674 ns/op 689 B/op 9 allocs/op
// With hashing in-between and pure hasher:
// BenchmarkHash-6 225417 7150 ns/op 982 B/op 12 allocs/op
// BenchmarkHash-6 220378 6197 ns/op 983 B/op 12 allocs/op
// same with old hasher
// BenchmarkHash-6 229758 6437 ns/op 981 B/op 12 allocs/op
// BenchmarkHash-6 212610 7137 ns/op 986 B/op 12 allocs/op
func BenchmarkHash(b *testing.B) {
// Create a realistic account trie to hash. We're first adding and hashing N
// entries, then adding N more.
addresses, accounts := makeAccounts(2 * b.N)
// Insert the accounts into the trie and hash it
trie := newEmpty()
i := 0
for ; i < len(addresses)/2; i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
trie.Hash()
for ; i < len(addresses); i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
b.ResetTimer()
b.ReportAllocs()
//trie.hashRoot(nil, nil)
trie.Hash()
}
type account struct {
Nonce uint64
Balance *big.Int
Root common.Hash
Code []byte
}
// Benchmarks the trie Commit following a Hash. Since the trie caches the result of any operation,
// we cannot use b.N as the number of hashing rouns, since all rounds apart from
// the first one will be NOOP. As such, we'll use b.N as the number of account to
// insert into the trie before measuring the hashing.
func BenchmarkCommitAfterHash(b *testing.B) {
b.Run("no-onleaf", func(b *testing.B) {
benchmarkCommitAfterHash(b, nil)
})
var a account
onleaf := func(leaf []byte, parent common.Hash) error {
rlp.DecodeBytes(leaf, &a)
return nil
}
b.Run("with-onleaf", func(b *testing.B) {
benchmarkCommitAfterHash(b, onleaf)
})
}
func benchmarkCommitAfterHash(b *testing.B, onleaf LeafCallback) {
// Make the random benchmark deterministic
addresses, accounts := makeAccounts(b.N)
trie := newEmpty()
for i := 0; i < len(addresses); i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
// Insert the accounts into the trie and hash it
trie.Hash()
b.ResetTimer()
b.ReportAllocs()
trie.Commit(onleaf)
}
func TestTinyTrie(t *testing.T) {
// Create a realistic account trie to hash
_, accounts := makeAccounts(10000)
trie := newEmpty()
trie.Update(common.Hex2Bytes("0000000000000000000000000000000000000000000000000000000000001337"), accounts[3])
if exp, root := common.HexToHash("4fa6efd292cffa2db0083b8bedd23add2798ae73802442f52486e95c3df7111c"), trie.Hash(); exp != root {
t.Fatalf("1: got %x, exp %x", root, exp)
}
trie.Update(common.Hex2Bytes("0000000000000000000000000000000000000000000000000000000000001338"), accounts[4])
if exp, root := common.HexToHash("cb5fb1213826dad9e604f095f8ceb5258fe6b5c01805ce6ef019a50699d2d479"), trie.Hash(); exp != root {
t.Fatalf("2: got %x, exp %x", root, exp)
}
trie.Update(common.Hex2Bytes("0000000000000000000000000000000000000000000000000000000000001339"), accounts[4])
if exp, root := common.HexToHash("ed7e06b4010057d8703e7b9a160a6d42cf4021f9020da3c8891030349a646987"), trie.Hash(); exp != root {
t.Fatalf("3: got %x, exp %x", root, exp)
}
checktr, _ := New(common.Hash{}, trie.db)
it := NewIterator(trie.NodeIterator(nil))
for it.Next() {
checktr.Update(it.Key, it.Value)
}
if troot, itroot := trie.Hash(), checktr.Hash(); troot != itroot {
t.Fatalf("hash mismatch in opItercheckhash, trie: %x, check: %x", troot, itroot)
}
}
func TestCommitAfterHash(t *testing.T) {
// Create a realistic account trie to hash
addresses, accounts := makeAccounts(1000)
trie := newEmpty()
for i := 0; i < len(addresses); i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
// Insert the accounts into the trie and hash it
trie.Hash()
trie.Commit(nil)
root := trie.Hash()
exp := common.HexToHash("e5e9c29bb50446a4081e6d1d748d2892c6101c1e883a1f77cf21d4094b697822")
if exp != root {
t.Errorf("got %x, exp %x", root, exp)
}
root, _ = trie.Commit(nil)
if exp != root {
t.Errorf("got %x, exp %x", root, exp)
}
}
func makeAccounts(size int) (addresses [][20]byte, accounts [][]byte) {
// Make the random benchmark deterministic
random := rand.New(rand.NewSource(0))
// Create a realistic account trie to hash
addresses := make([][20]byte, b.N)
addresses = make([][20]byte, size)
for i := 0; i < len(addresses); i++ {
for j := 0; j < len(addresses[i]); j++ {
addresses[i][j] = byte(random.Intn(256))
}
}
accounts := make([][]byte, len(addresses))
accounts = make([][]byte, len(addresses))
for i := 0; i < len(accounts); i++ {
var (
nonce = uint64(random.Int63())
@ -500,16 +654,168 @@ func BenchmarkHash(b *testing.B) {
root = emptyRoot
code = crypto.Keccak256(nil)
)
accounts[i], _ = rlp.EncodeToBytes([]interface{}{nonce, balance, root, code})
accounts[i], _ = rlp.EncodeToBytes(&account{nonce, balance, root, code})
}
// Insert the accounts into the trie and hash it
return addresses, accounts
}
// BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie.
// This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically,
// storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple
// of thousand entries)
func BenchmarkHashFixedSize(b *testing.B) {
b.Run("10", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(20)
for i := 0; i < b.N; i++ {
benchmarkHashFixedSize(b, acc, add)
}
})
b.Run("100", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(100)
for i := 0; i < b.N; i++ {
benchmarkHashFixedSize(b, acc, add)
}
})
b.Run("1K", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(1000)
for i := 0; i < b.N; i++ {
benchmarkHashFixedSize(b, acc, add)
}
})
b.Run("10K", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(10000)
for i := 0; i < b.N; i++ {
benchmarkHashFixedSize(b, acc, add)
}
})
b.Run("100K", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(100000)
for i := 0; i < b.N; i++ {
benchmarkHashFixedSize(b, acc, add)
}
})
}
func benchmarkHashFixedSize(b *testing.B, addresses [][20]byte, accounts [][]byte) {
b.ReportAllocs()
trie := newEmpty()
for i := 0; i < len(addresses); i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
b.ResetTimer()
b.ReportAllocs()
// Insert the accounts into the trie and hash it
b.StartTimer()
trie.Hash()
b.StopTimer()
}
func BenchmarkCommitAfterHashFixedSize(b *testing.B) {
b.Run("10", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(20)
for i := 0; i < b.N; i++ {
benchmarkCommitAfterHashFixedSize(b, acc, add)
}
})
b.Run("100", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(100)
for i := 0; i < b.N; i++ {
benchmarkCommitAfterHashFixedSize(b, acc, add)
}
})
b.Run("1K", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(1000)
for i := 0; i < b.N; i++ {
benchmarkCommitAfterHashFixedSize(b, acc, add)
}
})
b.Run("10K", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(10000)
for i := 0; i < b.N; i++ {
benchmarkCommitAfterHashFixedSize(b, acc, add)
}
})
b.Run("100K", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(100000)
for i := 0; i < b.N; i++ {
benchmarkCommitAfterHashFixedSize(b, acc, add)
}
})
}
func benchmarkCommitAfterHashFixedSize(b *testing.B, addresses [][20]byte, accounts [][]byte) {
b.ReportAllocs()
trie := newEmpty()
for i := 0; i < len(addresses); i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
// Insert the accounts into the trie and hash it
trie.Hash()
b.StartTimer()
trie.Commit(nil)
b.StopTimer()
}
func BenchmarkDerefRootFixedSize(b *testing.B) {
b.Run("10", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(20)
for i := 0; i < b.N; i++ {
benchmarkDerefRootFixedSize(b, acc, add)
}
})
b.Run("100", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(100)
for i := 0; i < b.N; i++ {
benchmarkDerefRootFixedSize(b, acc, add)
}
})
b.Run("1K", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(1000)
for i := 0; i < b.N; i++ {
benchmarkDerefRootFixedSize(b, acc, add)
}
})
b.Run("10K", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(10000)
for i := 0; i < b.N; i++ {
benchmarkDerefRootFixedSize(b, acc, add)
}
})
b.Run("100K", func(b *testing.B) {
b.StopTimer()
acc, add := makeAccounts(100000)
for i := 0; i < b.N; i++ {
benchmarkDerefRootFixedSize(b, acc, add)
}
})
}
func benchmarkDerefRootFixedSize(b *testing.B, addresses [][20]byte, accounts [][]byte) {
b.ReportAllocs()
trie := newEmpty()
for i := 0; i < len(addresses); i++ {
trie.Update(crypto.Keccak256(addresses[i][:]), accounts[i])
}
h := trie.Hash()
trie.Commit(nil)
b.StartTimer()
trie.db.Dereference(h)
b.StopTimer()
}
func tempDB() (string, *Database) {