core, trie: replace state caches with trie journal

This commit is contained in:
Felix Lange 2016-09-25 20:49:02 +02:00 committed by Péter Szilágyi
parent 863d166c7b
commit cd791bd855
15 changed files with 424 additions and 659 deletions

@ -49,7 +49,6 @@ var (
// don't relicense vendored sources
"crypto/sha3/", "crypto/ecies/", "logger/glog/",
"crypto/secp256k1/curve.go",
"trie/arc.go",
// don't license generated files
"contracts/chequebook/contract/",
"contracts/ens/contract/",

@ -76,7 +76,7 @@ func (it *NodeIterator) step() error {
}
// Initialize the iterator if we've just started
if it.stateIt == nil {
it.stateIt = trie.NewNodeIterator(it.state.trie.Trie)
it.stateIt = it.state.trie.NodeIterator()
}
// If we had data nodes previously, we surely have at least state nodes
if it.dataIt != nil {

@ -95,8 +95,6 @@ type Account struct {
Balance *big.Int
Root common.Hash // merkle root of the storage trie
CodeHash []byte
codeSize *int
}
// NewObject creates a state object.
@ -275,20 +273,9 @@ func (self *StateObject) Code(db trie.Database) []byte {
return code
}
// CodeSize returns the size of the contract code associated with this object.
func (self *StateObject) CodeSize(db trie.Database) int {
if self.data.codeSize == nil {
self.data.codeSize = new(int)
*self.data.codeSize = len(self.Code(db))
}
return *self.data.codeSize
}
func (self *StateObject) SetCode(code []byte) {
self.code = code
self.data.CodeHash = crypto.Keccak256(code)
self.data.codeSize = new(int)
*self.data.codeSize = len(code)
self.dirtyCode = true
if self.onDirty != nil {
self.onDirty(self.Address())

@ -28,23 +28,32 @@ import (
"github.com/ethereum/go-ethereum/logger/glog"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
lru "github.com/hashicorp/golang-lru"
)
// The starting nonce determines the default nonce when new accounts are being
// created.
var StartingNonce uint64
const (
// Number of past tries to keep. The arbitrarily chosen value here
// is max uncle depth + 1.
maxJournalLength = 8
// Number of codehash->size associations to keep.
codeSizeCacheSize = 100000
)
// StateDBs within the ethereum protocol are used to store anything
// within the merkle trie. StateDBs take care of caching and storing
// nested states. It's the general query interface to retrieve:
// * Contracts
// * Accounts
type StateDB struct {
db ethdb.Database
trie *trie.SecureTrie
// This map caches canon state accounts.
all map[common.Address]Account
db ethdb.Database
trie *trie.SecureTrie
pastTries []*trie.SecureTrie
codeSizeCache *lru.Cache
// This map holds 'live' objects, which will get modified while processing a state transition.
stateObjects map[common.Address]*StateObject
@ -65,10 +74,11 @@ func New(root common.Hash, db ethdb.Database) (*StateDB, error) {
if err != nil {
return nil, err
}
csc, _ := lru.New(codeSizeCacheSize)
return &StateDB{
db: db,
trie: tr,
all: make(map[common.Address]Account),
codeSizeCache: csc,
stateObjects: make(map[common.Address]*StateObject),
stateObjectsDirty: make(map[common.Address]struct{}),
refund: new(big.Int),
@ -79,19 +89,15 @@ func New(root common.Hash, db ethdb.Database) (*StateDB, error) {
// Reset clears out all emphemeral state objects from the state db, but keeps
// the underlying state trie to avoid reloading data for the next operations.
func (self *StateDB) Reset(root common.Hash) error {
tr, err := trie.NewSecure(root, self.db)
tr, err := self.openTrie(root)
if err != nil {
return err
}
all := self.all
if self.trie.Hash() != root {
// The root has changed, invalidate canon state.
all = make(map[common.Address]Account)
}
*self = StateDB{
db: self.db,
trie: tr,
all: all,
pastTries: self.pastTries,
codeSizeCache: self.codeSizeCache,
stateObjects: make(map[common.Address]*StateObject),
stateObjectsDirty: make(map[common.Address]struct{}),
refund: new(big.Int),
@ -100,6 +106,30 @@ func (self *StateDB) Reset(root common.Hash) error {
return nil
}
// openTrie creates a trie. It uses an existing trie if one is available
// from the journal if available.
func (self *StateDB) openTrie(root common.Hash) (*trie.SecureTrie, error) {
if self.trie != nil && self.trie.Hash() == root {
return self.trie, nil
}
for i := len(self.pastTries) - 1; i >= 0; i-- {
if self.pastTries[i].Hash() == root {
tr := *self.pastTries[i]
return &tr, nil
}
}
return trie.NewSecure(root, self.db)
}
func (self *StateDB) pushTrie(t *trie.SecureTrie) {
if len(self.pastTries) >= maxJournalLength {
copy(self.pastTries, self.pastTries[1:])
self.pastTries[len(self.pastTries)-1] = t
} else {
self.pastTries = append(self.pastTries, t)
}
}
func (self *StateDB) StartRecord(thash, bhash common.Hash, ti int) {
self.thash = thash
self.bhash = bhash
@ -165,17 +195,28 @@ func (self *StateDB) GetNonce(addr common.Address) uint64 {
func (self *StateDB) GetCode(addr common.Address) []byte {
stateObject := self.GetStateObject(addr)
if stateObject != nil {
return stateObject.Code(self.db)
code := stateObject.Code(self.db)
key := common.BytesToHash(stateObject.CodeHash())
self.codeSizeCache.Add(key, len(code))
return code
}
return nil
}
func (self *StateDB) GetCodeSize(addr common.Address) int {
stateObject := self.GetStateObject(addr)
if stateObject != nil {
return stateObject.CodeSize(self.db)
if stateObject == nil {
return 0
}
return 0
key := common.BytesToHash(stateObject.CodeHash())
if cached, ok := self.codeSizeCache.Get(key); ok {
return cached.(int)
}
size := len(stateObject.Code(self.db))
if stateObject.dbErr == nil {
self.codeSizeCache.Add(key, size)
}
return size
}
func (self *StateDB) GetState(a common.Address, b common.Hash) common.Hash {
@ -269,13 +310,6 @@ func (self *StateDB) GetStateObject(addr common.Address) (stateObject *StateObje
return obj
}
// Use cached account data from the canon state if possible.
if data, ok := self.all[addr]; ok {
obj := NewObject(addr, data, self.MarkStateObjectDirty)
self.SetStateObject(obj)
return obj
}
// Load the object from the database.
enc := self.trie.Get(addr[:])
if len(enc) == 0 {
@ -286,10 +320,6 @@ func (self *StateDB) GetStateObject(addr common.Address) (stateObject *StateObje
glog.Errorf("can't decode object at %x: %v", addr[:], err)
return nil
}
// Update the all cache. Content in DB always corresponds
// to the current head state so this is ok to do here.
// The object we just loaded has no storage trie and code yet.
self.all[addr] = data
// Insert into the live set.
obj := NewObject(addr, data, self.MarkStateObjectDirty)
self.SetStateObject(obj)
@ -355,7 +385,8 @@ func (self *StateDB) Copy() *StateDB {
state := &StateDB{
db: self.db,
trie: self.trie,
all: self.all,
pastTries: self.pastTries,
codeSizeCache: self.codeSizeCache,
stateObjects: make(map[common.Address]*StateObject, len(self.stateObjectsDirty)),
stateObjectsDirty: make(map[common.Address]struct{}, len(self.stateObjectsDirty)),
refund: new(big.Int).Set(self.refund),
@ -375,11 +406,12 @@ func (self *StateDB) Copy() *StateDB {
}
func (self *StateDB) Set(state *StateDB) {
self.db = state.db
self.trie = state.trie
self.pastTries = state.pastTries
self.stateObjects = state.stateObjects
self.stateObjectsDirty = state.stateObjectsDirty
self.all = state.all
self.codeSizeCache = state.codeSizeCache
self.refund = state.refund
self.logs = state.logs
self.logSize = state.logSize
@ -444,12 +476,6 @@ func (s *StateDB) CommitBatch() (root common.Hash, batch ethdb.Batch) {
func (s *StateDB) commit(dbw trie.DatabaseWriter) (root common.Hash, err error) {
s.refund = new(big.Int)
defer func() {
if err != nil {
// Committing failed, any updates to the canon state are invalid.
s.all = make(map[common.Address]Account)
}
}()
// Commit objects to the trie.
for addr, stateObject := range s.stateObjects {
@ -457,7 +483,6 @@ func (s *StateDB) commit(dbw trie.DatabaseWriter) (root common.Hash, err error)
// If the object has been removed, don't bother syncing it
// and just mark it for deletion in the trie.
s.DeleteStateObject(stateObject)
delete(s.all, addr)
} else if _, ok := s.stateObjectsDirty[addr]; ok {
// Write any contract code associated with the state object
if stateObject.code != nil && stateObject.dirtyCode {
@ -472,12 +497,15 @@ func (s *StateDB) commit(dbw trie.DatabaseWriter) (root common.Hash, err error)
}
// Update the object in the main account trie.
s.UpdateStateObject(stateObject)
s.all[addr] = stateObject.data
}
delete(s.stateObjectsDirty, addr)
}
// Write trie changes.
return s.trie.CommitTo(dbw)
root, err = s.trie.CommitTo(dbw)
if err == nil {
s.pushTrie(s.trie)
}
return root, err
}
func (self *StateDB) Refunds() *big.Int {

@ -62,9 +62,6 @@ func makeTestState() (ethdb.Database, common.Hash, []*testAccount) {
}
root, _ := state.Commit()
// Remove any potentially cached data from the test state creation
trie.ClearGlobalCache()
// Return the generated state
return db, root, accounts
}
@ -72,9 +69,6 @@ func makeTestState() (ethdb.Database, common.Hash, []*testAccount) {
// checkStateAccounts cross references a reconstructed state with an expected
// account array.
func checkStateAccounts(t *testing.T, db ethdb.Database, root common.Hash, accounts []*testAccount) {
// Remove any potentially cached data from the state synchronisation
trie.ClearGlobalCache()
// Check root availability and state contents
state, err := New(root, db)
if err != nil {
@ -98,9 +92,6 @@ func checkStateAccounts(t *testing.T, db ethdb.Database, root common.Hash, accou
// checkStateConsistency checks that all nodes in a state trie are indeed present.
func checkStateConsistency(db ethdb.Database, root common.Hash) error {
// Remove any potentially cached data from the test state creation or previous checks
trie.ClearGlobalCache()
// Create and iterate a state trie rooted in a sub-node
if _, err := db.Get(root.Bytes()); err != nil {
return nil // Consider a non existent state consistent

@ -42,7 +42,6 @@ func (odr *testOdr) Retrieve(ctx context.Context, req OdrRequest) error {
case *TrieRequest:
t, _ := trie.New(req.root, odr.sdb)
req.proof = t.Prove(req.key)
trie.ClearGlobalCache()
case *NodeDataRequest:
req.data, _ = odr.sdb.Get(req.hash[:])
}
@ -75,7 +74,6 @@ func TestLightStateOdr(t *testing.T) {
odr := &testOdr{sdb: sdb, ldb: ldb}
ls := NewLightState(root, odr)
ctx := context.Background()
trie.ClearGlobalCache()
for i := byte(0); i < 100; i++ {
addr := common.Address{i}
@ -160,7 +158,6 @@ func TestLightStateSetCopy(t *testing.T) {
odr := &testOdr{sdb: sdb, ldb: ldb}
ls := NewLightState(root, odr)
ctx := context.Background()
trie.ClearGlobalCache()
for i := byte(0); i < 100; i++ {
addr := common.Address{i}
@ -237,7 +234,6 @@ func TestLightStateDelete(t *testing.T) {
odr := &testOdr{sdb: sdb, ldb: ldb}
ls := NewLightState(root, odr)
ctx := context.Background()
trie.ClearGlobalCache()
addr := common.Address{42}

@ -1,206 +0,0 @@
// Copyright (c) 2015 Hans Alexander Gugel <alexander.gugel@gmail.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// This file contains a modified version of package arc from
// https://github.com/alexanderGugel/arc
//
// It implements the ARC (Adaptive Replacement Cache) algorithm as detailed in
// https://www.usenix.org/legacy/event/fast03/tech/full_papers/megiddo/megiddo.pdf
package trie
import (
"container/list"
"sync"
)
type arc struct {
p int
c int
t1 *list.List
b1 *list.List
t2 *list.List
b2 *list.List
cache map[string]*entry
mutex sync.Mutex
}
type entry struct {
key hashNode
value node
ll *list.List
el *list.Element
}
// newARC returns a new Adaptive Replacement Cache with the
// given capacity.
func newARC(c int) *arc {
return &arc{
c: c,
t1: list.New(),
b1: list.New(),
t2: list.New(),
b2: list.New(),
cache: make(map[string]*entry, c),
}
}
// Clear clears the cache
func (a *arc) Clear() {
a.mutex.Lock()
defer a.mutex.Unlock()
a.p = 0
a.t1 = list.New()
a.b1 = list.New()
a.t2 = list.New()
a.b2 = list.New()
a.cache = make(map[string]*entry, a.c)
}
// Put inserts a new key-value pair into the cache.
// This optimizes future access to this entry (side effect).
func (a *arc) Put(key hashNode, value node) bool {
a.mutex.Lock()
defer a.mutex.Unlock()
ent, ok := a.cache[string(key)]
if ok != true {
ent = &entry{key: key, value: value}
a.req(ent)
a.cache[string(key)] = ent
} else {
ent.value = value
a.req(ent)
}
return ok
}
// Get retrieves a previously via Set inserted entry.
// This optimizes future access to this entry (side effect).
func (a *arc) Get(key hashNode) (value node, ok bool) {
a.mutex.Lock()
defer a.mutex.Unlock()
ent, ok := a.cache[string(key)]
if ok {
a.req(ent)
return ent.value, ent.value != nil
}
return nil, false
}
func (a *arc) req(ent *entry) {
if ent.ll == a.t1 || ent.ll == a.t2 {
// Case I
ent.setMRU(a.t2)
} else if ent.ll == a.b1 {
// Case II
// Cache Miss in t1 and t2
// Adaptation
var d int
if a.b1.Len() >= a.b2.Len() {
d = 1
} else {
d = a.b2.Len() / a.b1.Len()
}
a.p = a.p + d
if a.p > a.c {
a.p = a.c
}
a.replace(ent)
ent.setMRU(a.t2)
} else if ent.ll == a.b2 {
// Case III
// Cache Miss in t1 and t2
// Adaptation
var d int
if a.b2.Len() >= a.b1.Len() {
d = 1
} else {
d = a.b1.Len() / a.b2.Len()
}
a.p = a.p - d
if a.p < 0 {
a.p = 0
}
a.replace(ent)
ent.setMRU(a.t2)
} else if ent.ll == nil {
// Case IV
if a.t1.Len()+a.b1.Len() == a.c {
// Case A
if a.t1.Len() < a.c {
a.delLRU(a.b1)
a.replace(ent)
} else {
a.delLRU(a.t1)
}
} else if a.t1.Len()+a.b1.Len() < a.c {
// Case B
if a.t1.Len()+a.t2.Len()+a.b1.Len()+a.b2.Len() >= a.c {
if a.t1.Len()+a.t2.Len()+a.b1.Len()+a.b2.Len() == 2*a.c {
a.delLRU(a.b2)
}
a.replace(ent)
}
}
ent.setMRU(a.t1)
}
}
func (a *arc) delLRU(list *list.List) {
lru := list.Back()
list.Remove(lru)
delete(a.cache, string(lru.Value.(*entry).key))
}
func (a *arc) replace(ent *entry) {
if a.t1.Len() > 0 && ((a.t1.Len() > a.p) || (ent.ll == a.b2 && a.t1.Len() == a.p)) {
lru := a.t1.Back().Value.(*entry)
lru.value = nil
lru.setMRU(a.b1)
} else {
lru := a.t2.Back().Value.(*entry)
lru.value = nil
lru.setMRU(a.b2)
}
}
func (e *entry) setLRU(list *list.List) {
e.detach()
e.ll = list
e.el = e.ll.PushBack(e)
}
func (e *entry) setMRU(list *list.List) {
e.detach()
e.ll = list
e.el = e.ll.PushFront(e)
}
func (e *entry) detach() {
if e.ll != nil {
e.ll.Remove(e.el)
}
}

157
trie/hasher.go Normal file

@ -0,0 +1,157 @@
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"bytes"
"hash"
"sync"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto/sha3"
"github.com/ethereum/go-ethereum/rlp"
)
type hasher struct {
tmp *bytes.Buffer
sha hash.Hash
}
// hashers live in a global pool.
var hasherPool = sync.Pool{
New: func() interface{} {
return &hasher{tmp: new(bytes.Buffer), sha: sha3.NewKeccak256()}
},
}
func newHasher() *hasher {
return hasherPool.Get().(*hasher)
}
func returnHasherToPool(h *hasher) {
hasherPool.Put(h)
}
// hash collapses a node down into a hash node, also returning a copy of the
// original node initialzied with the computed hash to replace the original one.
func (h *hasher) hash(n node, db DatabaseWriter, force bool) (node, node, error) {
// If we're not storing the node, just hashing, use avaialble cached data
if hash, dirty := n.cache(); hash != nil && (db == nil || !dirty) {
return hash, n, nil
}
// Trie not processed yet or needs storage, walk the children
collapsed, cached, err := h.hashChildren(n, db)
if err != nil {
return hashNode{}, n, err
}
hashed, err := h.store(collapsed, db, force)
if err != nil {
return hashNode{}, n, err
}
// Cache the hash and RLP blob of the ndoe for later reuse
if hash, ok := hashed.(hashNode); ok && !force {
switch cached := cached.(type) {
case shortNode:
cached.hash = hash
if db != nil {
cached.dirty = false
}
return hashed, cached, nil
case fullNode:
cached.hash = hash
if db != nil {
cached.dirty = false
}
return hashed, cached, nil
}
}
return hashed, cached, nil
}
// hashChildren replaces the children of a node with their hashes if the encoded
// size of the child is larger than a hash, returning the collapsed node as well
// as a replacement for the original node with the child hashes cached in.
func (h *hasher) hashChildren(original node, db DatabaseWriter) (node, node, error) {
var err error
switch n := original.(type) {
case shortNode:
// Hash the short node's child, caching the newly hashed subtree
cached := n
cached.Key = common.CopyBytes(cached.Key)
n.Key = compactEncode(n.Key)
if _, ok := n.Val.(valueNode); !ok {
if n.Val, cached.Val, err = h.hash(n.Val, db, false); err != nil {
return n, original, err
}
}
if n.Val == nil {
n.Val = valueNode(nil) // Ensure that nil children are encoded as empty strings.
}
return n, cached, nil
case fullNode:
// Hash the full node's children, caching the newly hashed subtrees
cached := fullNode{dirty: n.dirty}
for i := 0; i < 16; i++ {
if n.Children[i] != nil {
if n.Children[i], cached.Children[i], err = h.hash(n.Children[i], db, false); err != nil {
return n, original, err
}
} else {
n.Children[i] = valueNode(nil) // Ensure that nil children are encoded as empty strings.
}
}
cached.Children[16] = n.Children[16]
if n.Children[16] == nil {
n.Children[16] = valueNode(nil)
}
return n, cached, nil
default:
// Value and hash nodes don't have children so they're left as were
return n, original, nil
}
}
func (h *hasher) store(n node, db DatabaseWriter, force bool) (node, error) {
// Don't store hashes or empty nodes.
if _, isHash := n.(hashNode); n == nil || isHash {
return n, nil
}
// Generate the RLP encoding of the node
h.tmp.Reset()
if err := rlp.Encode(h.tmp, n); err != nil {
panic("encode error: " + err.Error())
}
if h.tmp.Len() < 32 && !force {
return n, nil // Nodes smaller than 32 bytes are stored inside their parent
}
// Larger nodes are replaced by their hash and stored in the database.
hash, _ := n.cache()
if hash == nil {
h.sha.Reset()
h.sha.Write(h.tmp.Bytes())
hash = hashNode(h.sha.Sum(nil))
}
if db != nil {
return hash, db.Put(hash, h.tmp.Bytes())
}
return hash, nil
}

@ -16,18 +16,13 @@
package trie
import (
"bytes"
"fmt"
import "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/logger"
"github.com/ethereum/go-ethereum/logger/glog"
)
// Iterator is a key-value trie iterator to traverse the data contents.
// Iterator is a key-value trie iterator that traverses a Trie.
type Iterator struct {
trie *Trie
trie *Trie
nodeIt *NodeIterator
keyBuf []byte
Key []byte // Current data key on which the iterator is positioned on
Value []byte // Current data value on which the iterator is positioned on
@ -35,119 +30,45 @@ type Iterator struct {
// NewIterator creates a new key-value iterator.
func NewIterator(trie *Trie) *Iterator {
return &Iterator{trie: trie, Key: nil}
return &Iterator{
trie: trie,
nodeIt: NewNodeIterator(trie),
keyBuf: make([]byte, 0, 64),
Key: nil,
}
}
// Next moves the iterator forward with one key-value entry.
func (self *Iterator) Next() bool {
isIterStart := false
if self.Key == nil {
isIterStart = true
self.Key = make([]byte, 32)
// Next moves the iterator forward one key-value entry.
func (it *Iterator) Next() bool {
for it.nodeIt.Next() {
if it.nodeIt.Leaf {
it.Key = it.makeKey()
it.Value = it.nodeIt.LeafBlob
return true
}
}
key := remTerm(compactHexDecode(self.Key))
k := self.next(self.trie.root, key, isIterStart)
self.Key = []byte(decodeCompact(k))
return len(k) > 0
it.Key = nil
it.Value = nil
return false
}
func (self *Iterator) next(node interface{}, key []byte, isIterStart bool) []byte {
if node == nil {
return nil
func (it *Iterator) makeKey() []byte {
key := it.keyBuf[:0]
for _, se := range it.nodeIt.stack {
switch node := se.node.(type) {
case fullNode:
if se.child <= 16 {
key = append(key, byte(se.child))
}
case shortNode:
if hasTerm(node.Key) {
key = append(key, node.Key[:len(node.Key)-1]...)
} else {
key = append(key, node.Key...)
}
}
}
switch node := node.(type) {
case fullNode:
if len(key) > 0 {
k := self.next(node.Children[key[0]], key[1:], isIterStart)
if k != nil {
return append([]byte{key[0]}, k...)
}
}
var r byte
if len(key) > 0 {
r = key[0] + 1
}
for i := r; i < 16; i++ {
k := self.key(node.Children[i])
if k != nil {
return append([]byte{i}, k...)
}
}
case shortNode:
k := remTerm(node.Key)
if vnode, ok := node.Val.(valueNode); ok {
switch bytes.Compare([]byte(k), key) {
case 0:
if isIterStart {
self.Value = vnode
return k
}
case 1:
self.Value = vnode
return k
}
} else {
cnode := node.Val
var ret []byte
skey := key[len(k):]
if bytes.HasPrefix(key, k) {
ret = self.next(cnode, skey, isIterStart)
} else if bytes.Compare(k, key[:len(k)]) > 0 {
return self.key(node)
}
if ret != nil {
return append(k, ret...)
}
}
case hashNode:
rn, err := self.trie.resolveHash(node, nil, nil)
if err != nil && glog.V(logger.Error) {
glog.Errorf("Unhandled trie error: %v", err)
}
return self.next(rn, key, isIterStart)
}
return nil
}
func (self *Iterator) key(node interface{}) []byte {
switch node := node.(type) {
case shortNode:
// Leaf node
k := remTerm(node.Key)
if vnode, ok := node.Val.(valueNode); ok {
self.Value = vnode
return k
}
return append(k, self.key(node.Val)...)
case fullNode:
if node.Children[16] != nil {
self.Value = node.Children[16].(valueNode)
return []byte{16}
}
for i := 0; i < 16; i++ {
k := self.key(node.Children[i])
if k != nil {
return append([]byte{byte(i)}, k...)
}
}
case hashNode:
rn, err := self.trie.resolveHash(node, nil, nil)
if err != nil && glog.V(logger.Error) {
glog.Errorf("Unhandled trie error: %v", err)
}
return self.key(rn)
}
return nil
return decodeCompact(key)
}
// nodeIteratorState represents the iteration state at one particular node of the
@ -199,25 +120,27 @@ func (it *NodeIterator) Next() bool {
// step moves the iterator to the next node of the trie.
func (it *NodeIterator) step() error {
// Abort if we reached the end of the iteration
if it.trie == nil {
// Abort if we reached the end of the iteration
return nil
}
// Initialize the iterator if we've just started, or pop off the old node otherwise
if len(it.stack) == 0 {
// Always start with a collapsed root
// Initialize the iterator if we've just started.
root := it.trie.Hash()
it.stack = append(it.stack, &nodeIteratorState{node: hashNode(root[:]), child: -1})
if it.stack[0].node == nil {
return fmt.Errorf("root node missing: %x", it.trie.Hash())
state := &nodeIteratorState{node: it.trie.root, child: -1}
if root != emptyRoot {
state.hash = root
}
it.stack = append(it.stack, state)
} else {
// Continue iterating at the previous node otherwise.
it.stack = it.stack[:len(it.stack)-1]
if len(it.stack) == 0 {
it.trie = nil
return nil
}
}
// Continue iteration to the next child
for {
parent := it.stack[len(it.stack)-1]
@ -232,7 +155,12 @@ func (it *NodeIterator) step() error {
}
for parent.child++; parent.child < len(node.Children); parent.child++ {
if current := node.Children[parent.child]; current != nil {
it.stack = append(it.stack, &nodeIteratorState{node: current, parent: ancestor, child: -1})
it.stack = append(it.stack, &nodeIteratorState{
hash: common.BytesToHash(node.hash),
node: current,
parent: ancestor,
child: -1,
})
break
}
}
@ -242,7 +170,12 @@ func (it *NodeIterator) step() error {
break
}
parent.child++
it.stack = append(it.stack, &nodeIteratorState{node: node.Val, parent: ancestor, child: -1})
it.stack = append(it.stack, &nodeIteratorState{
hash: common.BytesToHash(node.hash),
node: node.Val,
parent: ancestor,
child: -1,
})
} else if hash, ok := parent.node.(hashNode); ok {
// Hash node, resolve the hash child from the database, then the node itself
if parent.child >= 0 {
@ -254,7 +187,12 @@ func (it *NodeIterator) step() error {
if err != nil {
return err
}
it.stack = append(it.stack, &nodeIteratorState{hash: common.BytesToHash(hash), node: node, parent: ancestor, child: -1})
it.stack = append(it.stack, &nodeIteratorState{
hash: common.BytesToHash(hash),
node: node,
parent: ancestor,
child: -1,
})
} else {
break
}

@ -34,21 +34,60 @@ func TestIterator(t *testing.T) {
{"dog", "puppy"},
{"somethingveryoddindeedthis is", "myothernodedata"},
}
v := make(map[string]bool)
all := make(map[string]string)
for _, val := range vals {
v[val.k] = false
all[val.k] = val.v
trie.Update([]byte(val.k), []byte(val.v))
}
trie.Commit()
found := make(map[string]string)
it := NewIterator(trie)
for it.Next() {
v[string(it.Key)] = true
found[string(it.Key)] = string(it.Value)
}
for k, found := range v {
if !found {
t.Error("iterator didn't find", k)
for k, v := range all {
if found[k] != v {
t.Errorf("iterator value mismatch for %s: got %q want %q", k, found[k], v)
}
}
}
type kv struct {
k, v []byte
t bool
}
func TestIteratorLargeData(t *testing.T) {
trie := newEmpty()
vals := make(map[string]*kv)
for i := byte(0); i < 255; i++ {
value := &kv{common.LeftPadBytes([]byte{i}, 32), []byte{i}, false}
value2 := &kv{common.LeftPadBytes([]byte{10, i}, 32), []byte{i}, false}
trie.Update(value.k, value.v)
trie.Update(value2.k, value2.v)
vals[string(value.k)] = value
vals[string(value2.k)] = value2
}
it := NewIterator(trie)
for it.Next() {
vals[string(it.Key)].t = true
}
var untouched []*kv
for _, value := range vals {
if !value.t {
untouched = append(untouched, value)
}
}
if len(untouched) > 0 {
t.Errorf("Missed %d nodes", len(untouched))
for _, value := range untouched {
t.Error(value)
}
}
}

@ -70,15 +70,13 @@ func (t *Trie) Prove(key []byte) []rlp.RawValue {
panic(fmt.Sprintf("%T: invalid node: %v", tn, tn))
}
}
if t.hasher == nil {
t.hasher = newHasher()
}
hasher := newHasher()
proof := make([]rlp.RawValue, 0, len(nodes))
for i, n := range nodes {
// Don't bother checking for errors here since hasher panics
// if encoding doesn't work and we're not writing to any database.
n, _, _ = t.hasher.hashChildren(n, nil)
hn, _ := t.hasher.store(n, nil, false)
n, _, _ = hasher.hashChildren(n, nil)
hn, _ := hasher.store(n, nil, false)
if _, ok := hn.(hashNode); ok || i == 0 {
// If the node's database encoding is a hash (or is the
// root node), it becomes a proof element.

@ -17,10 +17,7 @@
package trie
import (
"hash"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto/sha3"
"github.com/ethereum/go-ethereum/logger"
"github.com/ethereum/go-ethereum/logger/glog"
)
@ -38,11 +35,9 @@ var secureKeyPrefix = []byte("secure-key-")
//
// SecureTrie is not safe for concurrent use.
type SecureTrie struct {
*Trie
hash hash.Hash
trie Trie
hashKeyBuf []byte
secKeyBuf []byte
secKeyBuf [200]byte
secKeyCache map[string][]byte
}
@ -61,7 +56,7 @@ func NewSecure(root common.Hash, db Database) (*SecureTrie, error) {
return nil, err
}
return &SecureTrie{
Trie: trie,
trie: *trie,
secKeyCache: make(map[string][]byte),
}, nil
}
@ -80,7 +75,7 @@ func (t *SecureTrie) Get(key []byte) []byte {
// The value bytes must not be modified by the caller.
// If a node was not found in the database, a MissingNodeError is returned.
func (t *SecureTrie) TryGet(key []byte) ([]byte, error) {
return t.Trie.TryGet(t.hashKey(key))
return t.trie.TryGet(t.hashKey(key))
}
// Update associates key with value in the trie. Subsequent calls to
@ -105,7 +100,7 @@ func (t *SecureTrie) Update(key, value []byte) {
// If a node was not found in the database, a MissingNodeError is returned.
func (t *SecureTrie) TryUpdate(key, value []byte) error {
hk := t.hashKey(key)
err := t.Trie.TryUpdate(hk, value)
err := t.trie.TryUpdate(hk, value)
if err != nil {
return err
}
@ -125,7 +120,7 @@ func (t *SecureTrie) Delete(key []byte) {
func (t *SecureTrie) TryDelete(key []byte) error {
hk := t.hashKey(key)
delete(t.secKeyCache, string(hk))
return t.Trie.TryDelete(hk)
return t.trie.TryDelete(hk)
}
// GetKey returns the sha3 preimage of a hashed key that was
@ -134,7 +129,7 @@ func (t *SecureTrie) GetKey(shaKey []byte) []byte {
if key, ok := t.secKeyCache[string(shaKey)]; ok {
return key
}
key, _ := t.Trie.db.Get(t.secKey(shaKey))
key, _ := t.trie.db.Get(t.secKey(shaKey))
return key
}
@ -144,7 +139,23 @@ func (t *SecureTrie) GetKey(shaKey []byte) []byte {
// Committing flushes nodes from memory. Subsequent Get calls will load nodes
// from the database.
func (t *SecureTrie) Commit() (root common.Hash, err error) {
return t.CommitTo(t.db)
return t.CommitTo(t.trie.db)
}
func (t *SecureTrie) Hash() common.Hash {
return t.trie.Hash()
}
func (t *SecureTrie) Root() []byte {
return t.trie.Root()
}
func (t *SecureTrie) Iterator() *Iterator {
return t.trie.Iterator()
}
func (t *SecureTrie) NodeIterator() *NodeIterator {
return NewNodeIterator(&t.trie)
}
// CommitTo writes all nodes and the secure hash pre-images to the given database.
@ -162,27 +173,26 @@ func (t *SecureTrie) CommitTo(db DatabaseWriter) (root common.Hash, err error) {
}
t.secKeyCache = make(map[string][]byte)
}
n, clean, err := t.hashRoot(db)
if err != nil {
return (common.Hash{}), err
}
t.root = clean
return common.BytesToHash(n.(hashNode)), nil
return t.trie.CommitTo(db)
}
// secKey returns the database key for the preimage of key, as an ephemeral buffer.
// The caller must not hold onto the return value because it will become
// invalid on the next call to hashKey or secKey.
func (t *SecureTrie) secKey(key []byte) []byte {
t.secKeyBuf = append(t.secKeyBuf[:0], secureKeyPrefix...)
t.secKeyBuf = append(t.secKeyBuf, key...)
return t.secKeyBuf
buf := append(t.secKeyBuf[:0], secureKeyPrefix...)
buf = append(buf, key...)
return buf
}
// hashKey returns the hash of key as an ephemeral buffer.
// The caller must not hold onto the return value because it will become
// invalid on the next call to hashKey or secKey.
func (t *SecureTrie) hashKey(key []byte) []byte {
if t.hash == nil {
t.hash = sha3.NewKeccak256()
t.hashKeyBuf = make([]byte, 32)
}
t.hash.Reset()
t.hash.Write(key)
t.hashKeyBuf = t.hash.Sum(t.hashKeyBuf[:0])
return t.hashKeyBuf
h := newHasher()
h.sha.Reset()
h.sha.Write(key)
buf := h.sha.Sum(t.hashKeyBuf[:0])
returnHasherToPool(h)
return buf
}

@ -51,9 +51,6 @@ func makeTestTrie() (ethdb.Database, *Trie, map[string][]byte) {
}
trie.Commit()
// Remove any potentially cached data from the test trie creation
globalCache.Clear()
// Return the generated trie
return db, trie, content
}
@ -61,9 +58,6 @@ func makeTestTrie() (ethdb.Database, *Trie, map[string][]byte) {
// checkTrieContents cross references a reconstructed trie with an expected data
// content map.
func checkTrieContents(t *testing.T, db Database, root []byte, content map[string][]byte) {
// Remove any potentially cached data from the trie synchronisation
globalCache.Clear()
// Check root availability and trie contents
trie, err := New(common.BytesToHash(root), db)
if err != nil {
@ -81,9 +75,6 @@ func checkTrieContents(t *testing.T, db Database, root []byte, content map[strin
// checkTrieConsistency checks that all nodes in a trie are indeed present.
func checkTrieConsistency(db Database, root common.Hash) error {
// Remove any potentially cached data from the test trie creation or previous checks
globalCache.Clear()
// Create and iterate a trie rooted in a subnode
trie, err := New(root, db)
if err != nil {

@ -20,22 +20,14 @@ package trie
import (
"bytes"
"fmt"
"hash"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/crypto/sha3"
"github.com/ethereum/go-ethereum/logger"
"github.com/ethereum/go-ethereum/logger/glog"
"github.com/ethereum/go-ethereum/rlp"
)
const defaultCacheCapacity = 800
var (
// The global cache stores decoded trie nodes by hash as they get loaded.
globalCache = newARC(defaultCacheCapacity)
// This is the known root hash of an empty trie.
emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
@ -43,11 +35,6 @@ var (
emptyState = crypto.Keccak256Hash(nil)
)
// ClearGlobalCache clears the global trie cache
func ClearGlobalCache() {
globalCache.Clear()
}
// Database must be implemented by backing stores for the trie.
type Database interface {
DatabaseWriter
@ -72,7 +59,6 @@ type Trie struct {
root node
db Database
originalRoot common.Hash
*hasher
}
// New creates a trie with an existing root node from db.
@ -118,32 +104,50 @@ func (t *Trie) Get(key []byte) []byte {
// If a node was not found in the database, a MissingNodeError is returned.
func (t *Trie) TryGet(key []byte) ([]byte, error) {
key = compactHexDecode(key)
pos := 0
tn := t.root
for pos < len(key) {
switch n := tn.(type) {
case shortNode:
if len(key)-pos < len(n.Key) || !bytes.Equal(n.Key, key[pos:pos+len(n.Key)]) {
return nil, nil
}
tn = n.Val
pos += len(n.Key)
case fullNode:
tn = n.Children[key[pos]]
pos++
case nil:
return nil, nil
case hashNode:
var err error
tn, err = t.resolveHash(n, key[:pos], key[pos:])
if err != nil {
return nil, err
}
default:
panic(fmt.Sprintf("%T: invalid node: %v", tn, tn))
}
value, newroot, didResolve, err := t.tryGet(t.root, key, 0)
if err == nil && didResolve {
t.root = newroot
}
return value, err
}
func (t *Trie) tryGet(origNode node, key []byte, pos int) (value []byte, newnode node, didResolve bool, err error) {
switch n := (origNode).(type) {
case nil:
return nil, nil, false, nil
case valueNode:
return n, n, false, nil
case shortNode:
if len(key)-pos < len(n.Key) || !bytes.Equal(n.Key, key[pos:pos+len(n.Key)]) {
// key not found in trie
return nil, n, false, nil
}
value, newnode, didResolve, err = t.tryGet(n.Val, key, pos+len(n.Key))
if err == nil && didResolve {
n.Val = newnode
return value, n, didResolve, err
} else {
return value, origNode, didResolve, err
}
case fullNode:
child := n.Children[key[pos]]
value, newnode, didResolve, err = t.tryGet(child, key, pos+1)
if err == nil && didResolve {
n.Children[key[pos]] = newnode
return value, n, didResolve, err
} else {
return value, origNode, didResolve, err
}
case hashNode:
child, err := t.resolveHash(n, key[:pos], key[pos:])
if err != nil {
return nil, n, true, err
}
value, newnode, _, err := t.tryGet(child, key, pos)
return value, newnode, true, err
default:
panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode))
}
return tn.(valueNode), nil
}
// Update associates key with value in the trie. Subsequent calls to
@ -410,9 +414,6 @@ func (t *Trie) resolve(n node, prefix, suffix []byte) (node, error) {
}
func (t *Trie) resolveHash(n hashNode, prefix, suffix []byte) (node, error) {
if v, ok := globalCache.Get(n); ok {
return v, nil
}
enc, err := t.db.Get(n)
if err != nil || enc == nil {
return nil, &MissingNodeError{
@ -424,9 +425,6 @@ func (t *Trie) resolveHash(n hashNode, prefix, suffix []byte) (node, error) {
}
}
dec := mustDecodeNode(n, enc)
if dec != nil {
globalCache.Put(n, dec)
}
return dec, nil
}
@ -474,127 +472,7 @@ func (t *Trie) hashRoot(db DatabaseWriter) (node, node, error) {
if t.root == nil {
return hashNode(emptyRoot.Bytes()), nil, nil
}
if t.hasher == nil {
t.hasher = newHasher()
}
return t.hasher.hash(t.root, db, true)
}
type hasher struct {
tmp *bytes.Buffer
sha hash.Hash
}
func newHasher() *hasher {
return &hasher{tmp: new(bytes.Buffer), sha: sha3.NewKeccak256()}
}
// hash collapses a node down into a hash node, also returning a copy of the
// original node initialzied with the computed hash to replace the original one.
func (h *hasher) hash(n node, db DatabaseWriter, force bool) (node, node, error) {
// If we're not storing the node, just hashing, use avaialble cached data
if hash, dirty := n.cache(); hash != nil && (db == nil || !dirty) {
return hash, n, nil
}
// Trie not processed yet or needs storage, walk the children
collapsed, cached, err := h.hashChildren(n, db)
if err != nil {
return hashNode{}, n, err
}
hashed, err := h.store(collapsed, db, force)
if err != nil {
return hashNode{}, n, err
}
// Cache the hash and RLP blob of the ndoe for later reuse
if hash, ok := hashed.(hashNode); ok && !force {
switch cached := cached.(type) {
case shortNode:
cached.hash = hash
if db != nil {
cached.dirty = false
}
return hashed, cached, nil
case fullNode:
cached.hash = hash
if db != nil {
cached.dirty = false
}
return hashed, cached, nil
}
}
return hashed, cached, nil
}
// hashChildren replaces the children of a node with their hashes if the encoded
// size of the child is larger than a hash, returning the collapsed node as well
// as a replacement for the original node with the child hashes cached in.
func (h *hasher) hashChildren(original node, db DatabaseWriter) (node, node, error) {
var err error
switch n := original.(type) {
case shortNode:
// Hash the short node's child, caching the newly hashed subtree
cached := n
cached.Key = common.CopyBytes(cached.Key)
n.Key = compactEncode(n.Key)
if _, ok := n.Val.(valueNode); !ok {
if n.Val, cached.Val, err = h.hash(n.Val, db, false); err != nil {
return n, original, err
}
}
if n.Val == nil {
n.Val = valueNode(nil) // Ensure that nil children are encoded as empty strings.
}
return n, cached, nil
case fullNode:
// Hash the full node's children, caching the newly hashed subtrees
cached := fullNode{dirty: n.dirty}
for i := 0; i < 16; i++ {
if n.Children[i] != nil {
if n.Children[i], cached.Children[i], err = h.hash(n.Children[i], db, false); err != nil {
return n, original, err
}
} else {
n.Children[i] = valueNode(nil) // Ensure that nil children are encoded as empty strings.
}
}
cached.Children[16] = n.Children[16]
if n.Children[16] == nil {
n.Children[16] = valueNode(nil)
}
return n, cached, nil
default:
// Value and hash nodes don't have children so they're left as were
return n, original, nil
}
}
func (h *hasher) store(n node, db DatabaseWriter, force bool) (node, error) {
// Don't store hashes or empty nodes.
if _, isHash := n.(hashNode); n == nil || isHash {
return n, nil
}
// Generate the RLP encoding of the node
h.tmp.Reset()
if err := rlp.Encode(h.tmp, n); err != nil {
panic("encode error: " + err.Error())
}
if h.tmp.Len() < 32 && !force {
return n, nil // Nodes smaller than 32 bytes are stored inside their parent
}
// Larger nodes are replaced by their hash and stored in the database.
hash, _ := n.cache()
if hash == nil {
h.sha.Reset()
h.sha.Write(h.tmp.Bytes())
hash = hashNode(h.sha.Sum(nil))
}
if db != nil {
return hash, db.Put(hash, h.tmp.Bytes())
}
return hash, nil
h := newHasher()
defer returnHasherToPool(h)
return h.hash(t.root, db, true)
}

@ -76,8 +76,6 @@ func TestMissingNode(t *testing.T) {
updateString(trie, "123456", "asdfasdfasdfasdfasdfasdfasdfasdf")
root, _ := trie.Commit()
ClearGlobalCache()
trie, _ = New(root, db)
_, err := trie.TryGet([]byte("120000"))
if err != nil {
@ -109,7 +107,6 @@ func TestMissingNode(t *testing.T) {
}
db.Delete(common.FromHex("e1d943cc8f061a0c0b98162830b970395ac9315654824bf21b73b891365262f9"))
ClearGlobalCache()
trie, _ = New(root, db)
_, err = trie.TryGet([]byte("120000"))
@ -362,44 +359,6 @@ func TestLargeValue(t *testing.T) {
}
type kv struct {
k, v []byte
t bool
}
func TestLargeData(t *testing.T) {
trie := newEmpty()
vals := make(map[string]*kv)
for i := byte(0); i < 255; i++ {
value := &kv{common.LeftPadBytes([]byte{i}, 32), []byte{i}, false}
value2 := &kv{common.LeftPadBytes([]byte{10, i}, 32), []byte{i}, false}
trie.Update(value.k, value.v)
trie.Update(value2.k, value2.v)
vals[string(value.k)] = value
vals[string(value2.k)] = value2
}
it := NewIterator(trie)
for it.Next() {
vals[string(it.Key)].t = true
}
var untouched []*kv
for _, value := range vals {
if !value.t {
untouched = append(untouched, value)
}
}
if len(untouched) > 0 {
t.Errorf("Missed %d nodes", len(untouched))
for _, value := range untouched {
t.Error(value)
}
}
}
func BenchmarkGet(b *testing.B) { benchGet(b, false) }
func BenchmarkGetDB(b *testing.B) { benchGet(b, true) }
func BenchmarkUpdateBE(b *testing.B) { benchUpdate(b, binary.BigEndian) }