256d4b099c
This PR adds an extra mechanism to sync.HeadSync that tries to retrieve the latest finality update from every server each time it sends an optimistic update in a new epoch (unless we already have a validated finality update attested in the same epoch). Note that this is not necessary and does not happen if the new finality update is delivered before the optimistic update. The spec only mandates light_client_finality_update events when a new epoch is finalized. If the chain does not finalize for a while then we might need an explicit request that returns a finality proof that proves the same finality epoch from the latest attested epoch.
399 lines
13 KiB
Go
399 lines
13 KiB
Go
// Copyright 2023 The go-ethereum Authors
|
|
// This file is part of the go-ethereum library.
|
|
//
|
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Lesser General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package sync
|
|
|
|
import (
|
|
"sort"
|
|
|
|
"github.com/ethereum/go-ethereum/beacon/light"
|
|
"github.com/ethereum/go-ethereum/beacon/light/request"
|
|
"github.com/ethereum/go-ethereum/beacon/params"
|
|
"github.com/ethereum/go-ethereum/beacon/types"
|
|
"github.com/ethereum/go-ethereum/common"
|
|
"github.com/ethereum/go-ethereum/log"
|
|
)
|
|
|
|
const maxUpdateRequest = 8 // maximum number of updates requested in a single request
|
|
|
|
type committeeChain interface {
|
|
CheckpointInit(bootstrap types.BootstrapData) error
|
|
InsertUpdate(update *types.LightClientUpdate, nextCommittee *types.SerializedSyncCommittee) error
|
|
NextSyncPeriod() (uint64, bool)
|
|
}
|
|
|
|
// CheckpointInit implements request.Module; it fetches the light client bootstrap
|
|
// data belonging to the given checkpoint hash and initializes the committee chain
|
|
// if successful.
|
|
type CheckpointInit struct {
|
|
chain committeeChain
|
|
checkpointHash common.Hash
|
|
locked request.ServerAndID
|
|
initialized bool
|
|
// per-server state is used to track the state of requesting checkpoint header
|
|
// info. Part of this info (canonical and finalized state) is not validated
|
|
// and therefore it is requested from each server separately after it has
|
|
// reported a missing checkpoint (which is also not validated info).
|
|
serverState map[request.Server]serverState
|
|
// the following fields are used to determine whether the checkpoint is on
|
|
// epoch boundary. This information is validated and therefore stored globally.
|
|
parentHash common.Hash
|
|
hasEpochInfo, epochBoundary bool
|
|
cpSlot, parentSlot uint64
|
|
}
|
|
|
|
const (
|
|
ssDefault = iota // no action yet or checkpoint requested
|
|
ssNeedHeader // checkpoint req failed, need cp header
|
|
ssHeaderRequested // cp header requested
|
|
ssNeedParent // cp header slot %32 != 0, need parent to check epoch boundary
|
|
ssParentRequested // cp parent header requested
|
|
ssPrintStatus // has all necessary info, print log message if init still not successful
|
|
ssDone // log message printed, no more action required
|
|
)
|
|
|
|
type serverState struct {
|
|
state int
|
|
hasHeader, canonical, finalized bool // stored per server because not validated
|
|
}
|
|
|
|
// NewCheckpointInit creates a new CheckpointInit.
|
|
func NewCheckpointInit(chain committeeChain, checkpointHash common.Hash) *CheckpointInit {
|
|
return &CheckpointInit{
|
|
chain: chain,
|
|
checkpointHash: checkpointHash,
|
|
serverState: make(map[request.Server]serverState),
|
|
}
|
|
}
|
|
|
|
// Process implements request.Module.
|
|
func (s *CheckpointInit) Process(requester request.Requester, events []request.Event) {
|
|
if s.initialized {
|
|
return
|
|
}
|
|
|
|
for _, event := range events {
|
|
switch event.Type {
|
|
case request.EvResponse, request.EvFail, request.EvTimeout:
|
|
sid, req, resp := event.RequestInfo()
|
|
if s.locked == sid {
|
|
s.locked = request.ServerAndID{}
|
|
}
|
|
if event.Type == request.EvTimeout {
|
|
continue
|
|
}
|
|
switch s.serverState[sid.Server].state {
|
|
case ssDefault:
|
|
if resp != nil {
|
|
if checkpoint := resp.(*types.BootstrapData); checkpoint.Header.Hash() == common.Hash(req.(ReqCheckpointData)) {
|
|
s.chain.CheckpointInit(*checkpoint)
|
|
s.initialized = true
|
|
return
|
|
}
|
|
requester.Fail(event.Server, "invalid checkpoint data")
|
|
}
|
|
s.serverState[sid.Server] = serverState{state: ssNeedHeader}
|
|
case ssHeaderRequested:
|
|
if resp == nil {
|
|
s.serverState[sid.Server] = serverState{state: ssPrintStatus}
|
|
continue
|
|
}
|
|
newState := serverState{
|
|
hasHeader: true,
|
|
canonical: resp.(RespHeader).Canonical,
|
|
finalized: resp.(RespHeader).Finalized,
|
|
}
|
|
s.cpSlot, s.parentHash = resp.(RespHeader).Header.Slot, resp.(RespHeader).Header.ParentRoot
|
|
if s.cpSlot%params.EpochLength == 0 {
|
|
s.hasEpochInfo, s.epochBoundary = true, true
|
|
}
|
|
if s.hasEpochInfo {
|
|
newState.state = ssPrintStatus
|
|
} else {
|
|
newState.state = ssNeedParent
|
|
}
|
|
s.serverState[sid.Server] = newState
|
|
case ssParentRequested:
|
|
s.parentSlot = resp.(RespHeader).Header.Slot
|
|
s.hasEpochInfo, s.epochBoundary = true, s.cpSlot/params.EpochLength > s.parentSlot/params.EpochLength
|
|
newState := s.serverState[sid.Server]
|
|
newState.state = ssPrintStatus
|
|
s.serverState[sid.Server] = newState
|
|
}
|
|
|
|
case request.EvUnregistered:
|
|
delete(s.serverState, event.Server)
|
|
}
|
|
}
|
|
|
|
// start a request if possible
|
|
for _, server := range requester.CanSendTo() {
|
|
switch s.serverState[server].state {
|
|
case ssDefault:
|
|
if s.locked == (request.ServerAndID{}) {
|
|
id := requester.Send(server, ReqCheckpointData(s.checkpointHash))
|
|
s.locked = request.ServerAndID{Server: server, ID: id}
|
|
}
|
|
case ssNeedHeader:
|
|
requester.Send(server, ReqHeader(s.checkpointHash))
|
|
newState := s.serverState[server]
|
|
newState.state = ssHeaderRequested
|
|
s.serverState[server] = newState
|
|
case ssNeedParent:
|
|
requester.Send(server, ReqHeader(s.parentHash))
|
|
newState := s.serverState[server]
|
|
newState.state = ssParentRequested
|
|
s.serverState[server] = newState
|
|
}
|
|
}
|
|
|
|
// print log message if necessary
|
|
for server, state := range s.serverState {
|
|
if state.state != ssPrintStatus {
|
|
continue
|
|
}
|
|
switch {
|
|
case !state.hasHeader:
|
|
log.Error("blsync: checkpoint block is not available, reported as unknown", "server", server.Name())
|
|
case !state.canonical:
|
|
log.Error("blsync: checkpoint block is not available, reported as non-canonical", "server", server.Name())
|
|
case !s.hasEpochInfo:
|
|
// should be available if hasHeader is true and state is ssPrintStatus
|
|
panic("checkpoint epoch info not available when printing retrieval status")
|
|
case !s.epochBoundary:
|
|
log.Error("blsync: checkpoint block is not first of epoch", "slot", s.cpSlot, "parent", s.parentSlot, "server", server.Name())
|
|
case !state.finalized:
|
|
log.Error("blsync: checkpoint block is reported as non-finalized", "server", server.Name())
|
|
default:
|
|
log.Error("blsync: checkpoint not available, but reported as finalized; specified checkpoint hash might be too old", "server", server.Name())
|
|
}
|
|
s.serverState[server] = serverState{state: ssDone}
|
|
}
|
|
}
|
|
|
|
// ForwardUpdateSync implements request.Module; it fetches updates between the
|
|
// committee chain head and each server's announced head. Updates are fetched
|
|
// in batches and multiple batches can also be requested in parallel.
|
|
// Out of order responses are also handled; if a batch of updates cannot be added
|
|
// to the chain immediately because of a gap then the future updates are
|
|
// remembered until they can be processed.
|
|
type ForwardUpdateSync struct {
|
|
chain committeeChain
|
|
rangeLock rangeLock
|
|
lockedIDs map[request.ServerAndID]struct{}
|
|
processQueue []updateResponse
|
|
nextSyncPeriod map[request.Server]uint64
|
|
}
|
|
|
|
// NewForwardUpdateSync creates a new ForwardUpdateSync.
|
|
func NewForwardUpdateSync(chain committeeChain) *ForwardUpdateSync {
|
|
return &ForwardUpdateSync{
|
|
chain: chain,
|
|
rangeLock: make(rangeLock),
|
|
lockedIDs: make(map[request.ServerAndID]struct{}),
|
|
nextSyncPeriod: make(map[request.Server]uint64),
|
|
}
|
|
}
|
|
|
|
// rangeLock allows locking sections of an integer space, preventing the syncing
|
|
// mechanism from making requests again for sections where a not timed out request
|
|
// is already pending or where already fetched and unprocessed data is available.
|
|
type rangeLock map[uint64]int
|
|
|
|
// lock locks or unlocks the given section, depending on the sign of the add parameter.
|
|
func (r rangeLock) lock(first, count uint64, add int) {
|
|
for i := first; i < first+count; i++ {
|
|
if v := r[i] + add; v > 0 {
|
|
r[i] = v
|
|
} else {
|
|
delete(r, i)
|
|
}
|
|
}
|
|
}
|
|
|
|
// firstUnlocked returns the first unlocked section starting at or after start
|
|
// and not longer than maxCount.
|
|
func (r rangeLock) firstUnlocked(start, maxCount uint64) (first, count uint64) {
|
|
first = start
|
|
for {
|
|
if _, ok := r[first]; !ok {
|
|
break
|
|
}
|
|
first++
|
|
}
|
|
for {
|
|
count++
|
|
if count == maxCount {
|
|
break
|
|
}
|
|
if _, ok := r[first+count]; ok {
|
|
break
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// lockRange locks the range belonging to the given update request, unless the
|
|
// same request has already been locked
|
|
func (s *ForwardUpdateSync) lockRange(sid request.ServerAndID, req ReqUpdates) {
|
|
if _, ok := s.lockedIDs[sid]; ok {
|
|
return
|
|
}
|
|
s.lockedIDs[sid] = struct{}{}
|
|
s.rangeLock.lock(req.FirstPeriod, req.Count, 1)
|
|
}
|
|
|
|
// unlockRange unlocks the range belonging to the given update request, unless
|
|
// same request has already been unlocked
|
|
func (s *ForwardUpdateSync) unlockRange(sid request.ServerAndID, req ReqUpdates) {
|
|
if _, ok := s.lockedIDs[sid]; !ok {
|
|
return
|
|
}
|
|
delete(s.lockedIDs, sid)
|
|
s.rangeLock.lock(req.FirstPeriod, req.Count, -1)
|
|
}
|
|
|
|
// verifyRange returns true if the number of updates and the individual update
|
|
// periods in the response match the requested section.
|
|
func (s *ForwardUpdateSync) verifyRange(request ReqUpdates, response RespUpdates) bool {
|
|
if uint64(len(response.Updates)) != request.Count || uint64(len(response.Committees)) != request.Count {
|
|
return false
|
|
}
|
|
for i, update := range response.Updates {
|
|
if update.AttestedHeader.Header.SyncPeriod() != request.FirstPeriod+uint64(i) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// updateResponse is a response that has passed initial verification and has been
|
|
// queued for processing. Note that an update response cannot be processed until
|
|
// the previous updates have also been added to the chain.
|
|
type updateResponse struct {
|
|
sid request.ServerAndID
|
|
request ReqUpdates
|
|
response RespUpdates
|
|
}
|
|
|
|
// updateResponseList implements sort.Sort and sorts update request/response events by FirstPeriod.
|
|
type updateResponseList []updateResponse
|
|
|
|
func (u updateResponseList) Len() int { return len(u) }
|
|
func (u updateResponseList) Swap(i, j int) { u[i], u[j] = u[j], u[i] }
|
|
func (u updateResponseList) Less(i, j int) bool {
|
|
return u[i].request.FirstPeriod < u[j].request.FirstPeriod
|
|
}
|
|
|
|
// Process implements request.Module.
|
|
func (s *ForwardUpdateSync) Process(requester request.Requester, events []request.Event) {
|
|
for _, event := range events {
|
|
switch event.Type {
|
|
case request.EvResponse, request.EvFail, request.EvTimeout:
|
|
sid, rq, rs := event.RequestInfo()
|
|
req := rq.(ReqUpdates)
|
|
var queued bool
|
|
if event.Type == request.EvResponse {
|
|
resp := rs.(RespUpdates)
|
|
if s.verifyRange(req, resp) {
|
|
// there is a response with a valid format; put it in the process queue
|
|
s.processQueue = append(s.processQueue, updateResponse{sid: sid, request: req, response: resp})
|
|
s.lockRange(sid, req)
|
|
queued = true
|
|
} else {
|
|
requester.Fail(event.Server, "invalid update range")
|
|
}
|
|
}
|
|
if !queued {
|
|
s.unlockRange(sid, req)
|
|
}
|
|
case EvNewOptimisticUpdate:
|
|
update := event.Data.(types.OptimisticUpdate)
|
|
s.nextSyncPeriod[event.Server] = types.SyncPeriod(update.SignatureSlot + 256)
|
|
case request.EvUnregistered:
|
|
delete(s.nextSyncPeriod, event.Server)
|
|
}
|
|
}
|
|
|
|
// try processing ordered list of available responses
|
|
sort.Sort(updateResponseList(s.processQueue))
|
|
for s.processQueue != nil {
|
|
u := s.processQueue[0]
|
|
if !s.processResponse(requester, u) {
|
|
break
|
|
}
|
|
s.unlockRange(u.sid, u.request)
|
|
s.processQueue = s.processQueue[1:]
|
|
if len(s.processQueue) == 0 {
|
|
s.processQueue = nil
|
|
}
|
|
}
|
|
|
|
// start new requests if possible
|
|
startPeriod, chainInit := s.chain.NextSyncPeriod()
|
|
if !chainInit {
|
|
return
|
|
}
|
|
for {
|
|
firstPeriod, maxCount := s.rangeLock.firstUnlocked(startPeriod, maxUpdateRequest)
|
|
var (
|
|
sendTo request.Server
|
|
bestCount uint64
|
|
)
|
|
for _, server := range requester.CanSendTo() {
|
|
nextPeriod := s.nextSyncPeriod[server]
|
|
if nextPeriod <= firstPeriod {
|
|
continue
|
|
}
|
|
count := maxCount
|
|
if nextPeriod < firstPeriod+maxCount {
|
|
count = nextPeriod - firstPeriod
|
|
}
|
|
if count > bestCount {
|
|
sendTo, bestCount = server, count
|
|
}
|
|
}
|
|
if sendTo == nil {
|
|
return
|
|
}
|
|
req := ReqUpdates{FirstPeriod: firstPeriod, Count: bestCount}
|
|
id := requester.Send(sendTo, req)
|
|
s.lockRange(request.ServerAndID{Server: sendTo, ID: id}, req)
|
|
}
|
|
}
|
|
|
|
// processResponse adds the fetched updates and committees to the committee chain.
|
|
// Returns true in case of full or partial success.
|
|
func (s *ForwardUpdateSync) processResponse(requester request.Requester, u updateResponse) (success bool) {
|
|
for i, update := range u.response.Updates {
|
|
if err := s.chain.InsertUpdate(update, u.response.Committees[i]); err != nil {
|
|
if err == light.ErrInvalidPeriod {
|
|
// there is a gap in the update periods; stop processing without
|
|
// failing and try again next time
|
|
return
|
|
}
|
|
if err == light.ErrInvalidUpdate || err == light.ErrWrongCommitteeRoot || err == light.ErrCannotReorg {
|
|
requester.Fail(u.sid.Server, "invalid update received")
|
|
} else {
|
|
log.Error("Unexpected InsertUpdate error", "error", err)
|
|
}
|
|
return
|
|
}
|
|
success = true
|
|
}
|
|
return
|
|
}
|