core: implement ChainIndexer

This commit is contained in:
Zsolt Felfoldi 2017-03-05 16:52:03 +01:00 committed by Péter Szilágyi
parent 67439c1dba
commit bd74882d83
No known key found for this signature in database
GPG Key ID: E9AE538CEDF8293D
2 changed files with 529 additions and 0 deletions

294
core/chain_indexer.go Normal file

@ -0,0 +1,294 @@
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Package core implements the Ethereum consensus protocol.
package core
import (
"encoding/binary"
"sync"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/event"
)
// ChainIndexer does a post-processing job for equally sized sections of the canonical
// chain (like BlooomBits and CHT structures). A ChainIndexer is connected to the blockchain
// through the event system by starting a ChainEventLoop in a goroutine.
// Further child ChainIndexers can be added which use the output of the parent section
// indexer. These child indexers receive new head notifications only after an entire section
// has been finished or in case of rollbacks that might affect already finished sections.
type ChainIndexer struct {
chainDb, indexDb ethdb.Database
backend ChainIndexerBackend
sectionSize, confirmReq uint64
stop chan struct{}
lock sync.Mutex
procWait time.Duration
tryUpdate chan struct{}
stored, targetCount, calcIdx, lastForwarded uint64
updating bool
children []*ChainIndexer
}
// ChainIndexerBackend interface is a backend for the indexer doing the actual post-processing job
type ChainIndexerBackend interface {
Reset(section uint64) // start processing a new section
Process(header *types.Header) // process a single block (called for each block in the section)
Commit(db ethdb.Database) error // do some more processing if necessary and store the results in the database
UpdateMsg(done, all uint64) // print a progress update message if necessary (only called when multiple sections need to be processed)
}
// NewChainIndexer creates a new ChainIndexer
// db: database where the index of available processed sections is stored (the index is stored by the
// indexer, the actual processed chain data is stored by the backend)
// dbKey: key prefix where the index is stored
// backend: an implementation of ChainIndexerBackend
// sectionSize: the size of processable sections
// confirmReq: required number of confirmation blocks before a new section is being processed
// procWait: waiting time between processing sections (simple way of limiting the resource usage of a db upgrade)
// stop: quit channel
func NewChainIndexer(chainDb, indexDb ethdb.Database, backend ChainIndexerBackend, sectionSize, confirmReq uint64, procWait time.Duration, stop chan struct{}) *ChainIndexer {
c := &ChainIndexer{
chainDb: chainDb,
indexDb: indexDb,
backend: backend,
sectionSize: sectionSize,
confirmReq: confirmReq,
tryUpdate: make(chan struct{}, 1),
stop: stop,
procWait: procWait,
}
c.stored = c.getValidSections()
go c.updateLoop()
return c
}
// updateLoop is the main event loop of the indexer
func (c *ChainIndexer) updateLoop() {
updateMsg := false
for {
select {
case <-c.stop:
return
case <-c.tryUpdate:
c.lock.Lock()
if c.targetCount > c.stored {
if !updateMsg && c.targetCount > c.stored+1 {
updateMsg = true
c.backend.UpdateMsg(c.stored, c.targetCount)
}
c.calcIdx = c.stored
var lastSectionHead common.Hash
if c.calcIdx > 0 {
lastSectionHead = c.getSectionHead(c.calcIdx - 1)
}
c.lock.Unlock()
sectionHead, ok := c.processSection(c.calcIdx, lastSectionHead)
c.lock.Lock()
if ok && lastSectionHead == c.getSectionHead(c.calcIdx-1) {
c.stored = c.calcIdx + 1
c.setSectionHead(c.calcIdx, sectionHead)
c.setValidSections(c.stored)
if updateMsg {
c.backend.UpdateMsg(c.stored, c.targetCount)
if c.stored >= c.targetCount {
updateMsg = false
}
}
c.lastForwarded = c.stored*c.sectionSize - 1
for _, cp := range c.children {
cp.newHead(c.lastForwarded, false)
}
} else {
// if processing has failed, do not retry until further notification
c.targetCount = c.stored
}
}
if c.targetCount > c.stored {
go func() {
time.Sleep(c.procWait)
c.tryUpdate <- struct{}{}
}()
} else {
c.updating = false
}
c.lock.Unlock()
}
}
}
// ChainEventLoop runs in a goroutine and feeds blockchain events to the indexer by calling newHead
// (not needed for child indexers where the parent calls newHead)
func (c *ChainIndexer) ChainEventLoop(currentHeader *types.Header, eventMux *event.TypeMux) {
sub := eventMux.Subscribe(ChainEvent{})
c.newHead(currentHeader.Number.Uint64(), false)
lastHead := currentHeader.Hash()
for {
select {
case <-c.stop:
return
case ev := <-sub.Chan():
header := ev.Data.(ChainEvent).Block.Header()
c.newHead(header.Number.Uint64(), header.ParentHash != lastHead)
lastHead = header.Hash()
}
}
}
// AddChildIndexer adds a child ChainIndexer that can use the output of this one
func (c *ChainIndexer) AddChildIndexer(ci *ChainIndexer) {
c.children = append(c.children, ci)
}
// newHead notifies the indexer about new chain heads or rollbacks
func (c *ChainIndexer) newHead(headNum uint64, rollback bool) {
c.lock.Lock()
defer c.lock.Unlock()
if rollback {
firstChanged := headNum / c.sectionSize
if firstChanged < c.targetCount {
c.targetCount = firstChanged
}
if firstChanged < c.stored {
c.stored = firstChanged
c.setValidSections(c.stored)
}
headNum = firstChanged * c.sectionSize
if headNum < c.lastForwarded {
c.lastForwarded = headNum
for _, cp := range c.children {
cp.newHead(c.lastForwarded, true)
}
}
} else {
var newCount uint64
if headNum >= c.confirmReq {
newCount = (headNum + 1 - c.confirmReq) / c.sectionSize
if newCount > c.targetCount {
c.targetCount = newCount
if !c.updating {
c.updating = true
c.tryUpdate <- struct{}{}
}
}
}
}
}
// processSection processes an entire section by calling backend functions while ensuring
// the continuity of the passed headers. Since the chain mutex is not held while processing,
// the continuity can be broken by a long reorg, in which case the function returns with ok == false.
func (c *ChainIndexer) processSection(section uint64, lastSectionHead common.Hash) (sectionHead common.Hash, ok bool) {
c.backend.Reset(section)
head := lastSectionHead
for i := section * c.sectionSize; i < (section+1)*c.sectionSize; i++ {
hash := GetCanonicalHash(c.chainDb, i)
if hash == (common.Hash{}) {
return common.Hash{}, false
}
header := GetHeader(c.chainDb, hash, i)
if header == nil || header.ParentHash != head {
return common.Hash{}, false
}
c.backend.Process(header)
head = header.Hash()
}
if err := c.backend.Commit(c.chainDb); err != nil {
return common.Hash{}, false
}
return head, true
}
// CanonicalSections returns the number of processed sections that are consistent with
// the current canonical chain
func (c *ChainIndexer) CanonicalSections() uint64 {
c.lock.Lock()
defer c.lock.Unlock()
cnt := c.getValidSections()
for cnt > 0 {
if c.getSectionHead(cnt-1) == GetCanonicalHash(c.chainDb, cnt*c.sectionSize-1) {
break
}
cnt--
c.setValidSections(cnt)
}
return cnt
}
// getValidSections reads the number of valid sections from the index database
func (c *ChainIndexer) getValidSections() uint64 {
data, _ := c.indexDb.Get([]byte("count"))
if len(data) == 8 {
return binary.BigEndian.Uint64(data[:])
}
return 0
}
// setValidSections writes the number of valid sections to the index database
func (c *ChainIndexer) setValidSections(cnt uint64) {
oldCnt := c.getValidSections()
if cnt < oldCnt {
for i := cnt; i < oldCnt; i++ {
c.removeSectionHead(i)
}
}
var data [8]byte
binary.BigEndian.PutUint64(data[:], cnt)
c.indexDb.Put([]byte("count"), data[:])
}
// getSectionHead reads the last block hash of a processed section from the index database
func (c *ChainIndexer) getSectionHead(idx uint64) common.Hash {
var data [8]byte
binary.BigEndian.PutUint64(data[:], idx)
hash, _ := c.indexDb.Get(append([]byte("shead"), data[:]...))
if len(hash) == len(common.Hash{}) {
return common.BytesToHash(hash)
}
return common.Hash{}
}
// setSectionHead writes the last block hash of a processed section to the index database
func (c *ChainIndexer) setSectionHead(idx uint64, shead common.Hash) {
var data [8]byte
binary.BigEndian.PutUint64(data[:], idx)
c.indexDb.Put(append([]byte("shead"), data[:]...), shead.Bytes())
}
// removeSectionHead removes the reference to a processed section from the index database
func (c *ChainIndexer) removeSectionHead(idx uint64) {
var data [8]byte
binary.BigEndian.PutUint64(data[:], idx)
c.indexDb.Delete(append([]byte("shead"), data[:]...))
}

235
core/chain_indexer_test.go Normal file

@ -0,0 +1,235 @@
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Package core implements the Ethereum consensus protocol.
package core
import (
"encoding/binary"
"math/big"
"math/rand"
"testing"
"time"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
)
func TestChainIndexerSingle(t *testing.T) {
// run multiple tests with randomized parameters
for i := 0; i < 10; i++ {
testChainIndexer(t, 1)
}
}
func TestChainIndexerWithChildren(t *testing.T) {
// run multiple tests with randomized parameters and different number of
// chained indexers
for i := 2; i < 8; i++ {
testChainIndexer(t, i)
}
}
// testChainIndexer runs a test with either a single ChainIndexer or a chain of multiple indexers
// sectionSize and confirmReq parameters are randomized
func testChainIndexer(t *testing.T, tciCount int) {
db, _ := ethdb.NewMemDatabase()
stop := make(chan struct{})
tciList := make([]*testChainIndex, tciCount)
var lastIndexer *ChainIndexer
for i, _ := range tciList {
tci := &testChainIndex{t: t, sectionSize: uint64(rand.Intn(100) + 1), confirmReq: uint64(rand.Intn(10)), processCh: make(chan uint64)}
tciList[i] = tci
tci.indexer = NewChainIndexer(db, ethdb.NewTable(db, string([]byte{byte(i)})), tci, tci.sectionSize, tci.confirmReq, 0, stop)
if cs := tci.indexer.CanonicalSections(); cs != 0 {
t.Errorf("Expected 0 canonical sections, got %d", cs)
}
if lastIndexer != nil {
lastIndexer.AddChildIndexer(tci.indexer)
}
lastIndexer = tci.indexer
}
// expectCs expects a certain number of available canonical sections
expectCs := func(indexer *ChainIndexer, expCs uint64) {
cnt := 0
for {
cs := indexer.CanonicalSections()
if cs == expCs {
return
}
// keep trying for 10 seconds if it does not match
cnt++
if cnt == 10000 {
t.Fatalf("Expected %d canonical sections, got %d", expCs, cs)
}
time.Sleep(time.Millisecond)
}
}
// notify the indexer about a new head or rollback, then expect processed blocks if a section is processable
notify := func(headNum, expFailAfter uint64, rollback bool) {
tciList[0].indexer.newHead(headNum, rollback)
if rollback {
for _, tci := range tciList {
headNum = tci.rollback(headNum)
expectCs(tci.indexer, tci.stored)
}
} else {
for _, tci := range tciList {
var more bool
headNum, more = tci.newBlocks(headNum, expFailAfter)
if !more {
break
}
expectCs(tci.indexer, tci.stored)
}
}
}
for i := uint64(0); i <= 100; i++ {
testCanonicalHeader(db, i)
}
// start indexer with an already existing chain
notify(100, 100, false)
// add new blocks one by one
for i := uint64(101); i <= 1000; i++ {
testCanonicalHeader(db, i)
notify(i, i, false)
}
// do a rollback
notify(500, 500, true)
// create new fork
for i := uint64(501); i <= 1000; i++ {
testCanonicalHeader(db, i)
notify(i, i, false)
}
for i := uint64(1001); i <= 1500; i++ {
testCanonicalHeader(db, i)
}
// create a failed processing scenario where less blocks are available at processing time than notified
notify(2000, 1500, false)
// notify about a rollback (which could have caused the missing blocks if happened during processing)
notify(1500, 1500, true)
// create new fork
for i := uint64(1501); i <= 2000; i++ {
testCanonicalHeader(db, i)
notify(i, i, false)
}
close(stop)
db.Close()
}
func testCanonicalHeader(db ethdb.Database, idx uint64) {
var rnd [8]byte
binary.BigEndian.PutUint64(rnd[:], uint64(rand.Int63()))
header := &types.Header{Number: big.NewInt(int64(idx)), Extra: rnd[:]}
if idx > 0 {
header.ParentHash = GetCanonicalHash(db, idx-1)
}
WriteHeader(db, header)
WriteCanonicalHash(db, header.Hash(), idx)
}
// testChainIndex implements ChainIndexerBackend
type testChainIndex struct {
t *testing.T
sectionSize, confirmReq uint64
section, headerCnt, stored uint64
indexer *ChainIndexer
processCh chan uint64
}
// newBlocks expects process calls after new blocks have arrived. If expFailAfter < headNum then
// we are simulating a scenario where a rollback has happened after the processing has started and
// the processing of a section fails.
func (t *testChainIndex) newBlocks(headNum, expFailAfter uint64) (uint64, bool) {
var newCount uint64
if headNum >= t.confirmReq {
newCount = (headNum + 1 - t.confirmReq) / t.sectionSize
if newCount > t.stored {
// expect processed blocks
for exp := t.stored * t.sectionSize; exp < newCount*t.sectionSize; exp++ {
if exp > expFailAfter {
// rolled back after processing started, no more process calls expected
// wait until updating is done to make sure that processing actually fails
for {
t.indexer.lock.Lock()
u := t.indexer.updating
t.indexer.lock.Unlock()
if !u {
break
}
time.Sleep(time.Millisecond)
}
newCount = exp / t.sectionSize
break
}
select {
case <-time.After(10 * time.Second):
t.t.Fatalf("Expected processed block #%d, got nothing", exp)
case proc := <-t.processCh:
if proc != exp {
t.t.Errorf("Expected processed block #%d, got #%d", exp, proc)
}
}
}
t.stored = newCount
}
}
if t.stored == 0 {
return 0, false
}
return t.stored*t.sectionSize - 1, true
}
func (t *testChainIndex) rollback(headNum uint64) uint64 {
firstChanged := headNum / t.sectionSize
if firstChanged < t.stored {
t.stored = firstChanged
}
return t.stored * t.sectionSize
}
func (t *testChainIndex) Reset(section uint64) {
t.section = section
t.headerCnt = 0
}
func (t *testChainIndex) Process(header *types.Header) {
t.headerCnt++
if t.headerCnt > t.sectionSize {
t.t.Error("Processing too many headers")
}
//t.processCh <- header.Number.Uint64()
select {
case <-time.After(10 * time.Second):
t.t.Fatal("Unexpected call to Process")
case t.processCh <- header.Number.Uint64():
}
}
func (t *testChainIndex) Commit(db ethdb.Database) error {
if t.headerCnt != t.sectionSize {
t.t.Error("Not enough headers processed")
}
return nil
}
func (t *testChainIndex) UpdateMsg(done, all uint64) {}