trie: make fullnode children hash calculation concurrently (#15131)

* trie: make fullnode children hash calculation concurrently

* trie: thread out only on topmost fullnode

* trie: clean up full node children hash calculation

* trie: minor code fixups
This commit is contained in:
gary rong 2017-11-27 19:34:17 +08:00 committed by Péter Szilágyi
parent e4c9fd29a3
commit 0f7fbb85d6
3 changed files with 88 additions and 32 deletions

@ -26,27 +26,46 @@ import (
"github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/rlp"
) )
type hasher struct { // calculator is a utility used by the hasher to calculate the hash value of the tree node.
tmp *bytes.Buffer type calculator struct {
sha hash.Hash sha hash.Hash
cachegen, cachelimit uint16 buffer *bytes.Buffer
} }
// hashers live in a global pool. // calculatorPool is a set of temporary calculators that may be individually saved and retrieved.
var hasherPool = sync.Pool{ var calculatorPool = sync.Pool{
New: func() interface{} { New: func() interface{} {
return &hasher{tmp: new(bytes.Buffer), sha: sha3.NewKeccak256()} return &calculator{buffer: new(bytes.Buffer), sha: sha3.NewKeccak256()}
}, },
} }
// hasher hasher is used to calculate the hash value of the whole tree.
type hasher struct {
cachegen uint16
cachelimit uint16
threaded bool
mu sync.Mutex
}
func newHasher(cachegen, cachelimit uint16) *hasher { func newHasher(cachegen, cachelimit uint16) *hasher {
h := hasherPool.Get().(*hasher) h := &hasher{
h.cachegen, h.cachelimit = cachegen, cachelimit cachegen: cachegen,
cachelimit: cachelimit,
}
return h return h
} }
func returnHasherToPool(h *hasher) { // newCalculator retrieves a cleaned calculator from calculator pool.
hasherPool.Put(h) func (h *hasher) newCalculator() *calculator {
calculator := calculatorPool.Get().(*calculator)
calculator.buffer.Reset()
calculator.sha.Reset()
return calculator
}
// returnCalculator returns a no longer used calculator to the pool.
func (h *hasher) returnCalculator(calculator *calculator) {
calculatorPool.Put(calculator)
} }
// hash collapses a node down into a hash node, also returning a copy of the // hash collapses a node down into a hash node, also returning a copy of the
@ -123,16 +142,49 @@ func (h *hasher) hashChildren(original node, db DatabaseWriter) (node, node, err
// Hash the full node's children, caching the newly hashed subtrees // Hash the full node's children, caching the newly hashed subtrees
collapsed, cached := n.copy(), n.copy() collapsed, cached := n.copy(), n.copy()
// hashChild is a helper to hash a single child, which is called either on the
// same thread as the caller or in a goroutine for the toplevel branching.
hashChild := func(index int, wg *sync.WaitGroup) {
if wg != nil {
defer wg.Done()
}
// Ensure that nil children are encoded as empty strings.
if collapsed.Children[index] == nil {
collapsed.Children[index] = valueNode(nil)
return
}
// Hash all other children properly
var herr error
collapsed.Children[index], cached.Children[index], herr = h.hash(n.Children[index], db, false)
if herr != nil {
h.mu.Lock() // rarely if ever locked, no congenstion
err = herr
h.mu.Unlock()
}
}
// If we're not running in threaded mode yet, span a goroutine for each child
if !h.threaded {
// Disable further threading
h.threaded = true
// Hash all the children concurrently
var wg sync.WaitGroup
for i := 0; i < 16; i++ { for i := 0; i < 16; i++ {
if n.Children[i] != nil { wg.Add(1)
collapsed.Children[i], cached.Children[i], err = h.hash(n.Children[i], db, false) go hashChild(i, &wg)
}
wg.Wait()
// Reenable threading for subsequent hash calls
h.threaded = false
} else {
for i := 0; i < 16; i++ {
hashChild(i, nil)
}
}
if err != nil { if err != nil {
return original, original, err return original, original, err
} }
} else {
collapsed.Children[i] = valueNode(nil) // Ensure that nil children are encoded as empty strings.
}
}
cached.Children[16] = n.Children[16] cached.Children[16] = n.Children[16]
if collapsed.Children[16] == nil { if collapsed.Children[16] == nil {
collapsed.Children[16] = valueNode(nil) collapsed.Children[16] = valueNode(nil)
@ -150,24 +202,29 @@ func (h *hasher) store(n node, db DatabaseWriter, force bool) (node, error) {
if _, isHash := n.(hashNode); n == nil || isHash { if _, isHash := n.(hashNode); n == nil || isHash {
return n, nil return n, nil
} }
calculator := h.newCalculator()
defer h.returnCalculator(calculator)
// Generate the RLP encoding of the node // Generate the RLP encoding of the node
h.tmp.Reset() if err := rlp.Encode(calculator.buffer, n); err != nil {
if err := rlp.Encode(h.tmp, n); err != nil {
panic("encode error: " + err.Error()) panic("encode error: " + err.Error())
} }
if calculator.buffer.Len() < 32 && !force {
if h.tmp.Len() < 32 && !force {
return n, nil // Nodes smaller than 32 bytes are stored inside their parent return n, nil // Nodes smaller than 32 bytes are stored inside their parent
} }
// Larger nodes are replaced by their hash and stored in the database. // Larger nodes are replaced by their hash and stored in the database.
hash, _ := n.cache() hash, _ := n.cache()
if hash == nil { if hash == nil {
h.sha.Reset() calculator.sha.Write(calculator.buffer.Bytes())
h.sha.Write(h.tmp.Bytes()) hash = hashNode(calculator.sha.Sum(nil))
hash = hashNode(h.sha.Sum(nil))
} }
if db != nil { if db != nil {
return hash, db.Put(hash, h.tmp.Bytes()) // db might be a leveldb batch, which is not safe for concurrent writes
h.mu.Lock()
err := db.Put(hash, calculator.buffer.Bytes())
h.mu.Unlock()
return hash, err
} }
return hash, nil return hash, nil
} }

@ -199,10 +199,10 @@ func (t *SecureTrie) secKey(key []byte) []byte {
// invalid on the next call to hashKey or secKey. // invalid on the next call to hashKey or secKey.
func (t *SecureTrie) hashKey(key []byte) []byte { func (t *SecureTrie) hashKey(key []byte) []byte {
h := newHasher(0, 0) h := newHasher(0, 0)
h.sha.Reset() calculator := h.newCalculator()
h.sha.Write(key) calculator.sha.Write(key)
buf := h.sha.Sum(t.hashKeyBuf[:0]) buf := calculator.sha.Sum(t.hashKeyBuf[:0])
returnHasherToPool(h) h.returnCalculator(calculator)
return buf return buf
} }

@ -501,6 +501,5 @@ func (t *Trie) hashRoot(db DatabaseWriter) (node, node, error) {
return hashNode(emptyRoot.Bytes()), nil, nil return hashNode(emptyRoot.Bytes()), nil, nil
} }
h := newHasher(t.cachegen, t.cachelimit) h := newHasher(t.cachegen, t.cachelimit)
defer returnHasherToPool(h)
return h.hash(t.root, db, true) return h.hash(t.root, db, true)
} }