moar consensus metrics

This commit is contained in:
Felipe Andrade 2023-05-09 17:21:25 -07:00
parent 71d858cdd3
commit cde638b11d
2 changed files with 88 additions and 7 deletions

@ -203,7 +203,10 @@ func NewConsensusPoller(bg *BackendGroup, opts ...ConsensusOpt) *ConsensusPoller
// UpdateBackend refreshes the consensus state of a single backend // UpdateBackend refreshes the consensus state of a single backend
func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if cp.IsBanned(be) { banned := cp.IsBanned(be)
RecordConsensusBackendBanned(be, banned)
if banned {
log.Debug("skipping backend banned", "backend", be.Name) log.Debug("skipping backend banned", "backend", be.Name)
return return
} }
@ -212,6 +215,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if !be.Online() || !be.IsHealthy() { if !be.Online() || !be.IsHealthy() {
log.Warn("backend banned - not online or not healthy", "backend", be.Name) log.Warn("backend banned - not online or not healthy", "backend", be.Name)
cp.Ban(be) cp.Ban(be)
return
} }
// if backend it not in sync we'll check again after ban // if backend it not in sync we'll check again after ban
@ -219,7 +223,9 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if err != nil || !inSync { if err != nil || !inSync {
log.Warn("backend banned - not in sync", "backend", be.Name) log.Warn("backend banned - not in sync", "backend", be.Name)
cp.Ban(be) cp.Ban(be)
return
} }
RecordConsensusBackendInSync(be, inSync)
// if backend exhausted rate limit we'll skip it for now // if backend exhausted rate limit we'll skip it for now
if be.IsRateLimited() { if be.IsRateLimited() {
@ -234,6 +240,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
return return
} }
} }
RecordConsensusBackendPeerCount(be, peerCount)
latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest") latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest")
if err != nil { if err != nil {
@ -241,15 +248,17 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
return return
} }
changed := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash) changed, updateDelay := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash)
if changed { if changed {
RecordBackendLatestBlock(be, latestBlockNumber) RecordBackendLatestBlock(be, latestBlockNumber)
RecordConsensusBackendUpdateDelay(be, updateDelay)
log.Debug("backend state updated", log.Debug("backend state updated",
"name", be.Name, "name", be.Name,
"peerCount", peerCount, "peerCount", peerCount,
"latestBlockNumber", latestBlockNumber, "latestBlockNumber", latestBlockNumber,
"latestBlockHash", latestBlockHash) "latestBlockHash", latestBlockHash,
"updateDelay", updateDelay)
} }
} }
@ -354,11 +363,13 @@ func (cp *ConsensusPoller) UpdateBackendGroupConsensus(ctx context.Context) {
} }
cp.tracker.SetConsensusBlockNumber(proposedBlock) cp.tracker.SetConsensusBlockNumber(proposedBlock)
RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock)
cp.consensusGroupMux.Lock() cp.consensusGroupMux.Lock()
cp.consensusGroup = consensusBackends cp.consensusGroup = consensusBackends
cp.consensusGroupMux.Unlock() cp.consensusGroupMux.Unlock()
RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock)
RecordGroupConsensusCount(cp.backendGroup, len(consensusBackends))
log.Debug("group state", "proposedBlock", proposedBlock, "consensusBackends", strings.Join(consensusBackendsNames, ", "), "filteredBackends", strings.Join(filteredBackendsNames, ", ")) log.Debug("group state", "proposedBlock", proposedBlock, "consensusBackends", strings.Join(consensusBackendsNames, ", "), "filteredBackends", strings.Join(filteredBackendsNames, ", "))
} }
@ -463,13 +474,14 @@ func (cp *ConsensusPoller) getBackendState(be *Backend) (peerCount uint64, block
return return
} }
func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool) { func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool, updateDelay time.Duration) {
bs := cp.backendState[be] bs := cp.backendState[be]
bs.backendStateMux.Lock() bs.backendStateMux.Lock()
changed = bs.latestBlockHash != blockHash changed = bs.latestBlockHash != blockHash
bs.peerCount = peerCount bs.peerCount = peerCount
bs.latestBlockNumber = blockNumber bs.latestBlockNumber = blockNumber
bs.latestBlockHash = blockHash bs.latestBlockHash = blockHash
updateDelay = time.Now().Sub(bs.lastUpdate)
bs.lastUpdate = time.Now() bs.lastUpdate = time.Now()
bs.backendStateMux.Unlock() bs.backendStateMux.Unlock()
return return

@ -4,6 +4,7 @@ import (
"context" "context"
"strconv" "strconv"
"strings" "strings"
"time"
"github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/common/hexutil"
@ -260,6 +261,46 @@ var (
}, []string{ }, []string{
"backend_name", "backend_name",
}) })
consensusGroupCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "group_consensus_count",
Help: "Consensus group count",
}, []string{
"backend_group_name",
})
consensusBannedBackends = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_banned",
Help: "Bool gauge for banned backends",
}, []string{
"backend_name",
})
consensusPeerCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_peer_count",
Help: "Peer count",
}, []string{
"backend_name",
})
consensusInSyncBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_in_sync",
Help: "Bool gauge for backends in sync",
}, []string{
"backend_name",
})
consensusUpdateDelayBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_update_delay",
Help: "Delay (ms) for backend update",
}, []string{
"backend_name",
})
) )
func RecordRedisError(source string) { func RecordRedisError(source string) {
@ -321,10 +362,38 @@ func RecordBatchSize(size int) {
batchSizeHistogram.Observe(float64(size)) batchSizeHistogram.Observe(float64(size))
} }
func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) {
consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber))
}
func RecordGroupConsensusCount(group *BackendGroup, count int) {
consensusGroupCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) { func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) {
backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber)) backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber))
} }
func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) { func RecordConsensusBackendBanned(be *Backend, banned bool) {
consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber)) v := float64(0)
if banned {
v = float64(1)
}
consensusBannedBackends.WithLabelValues(be.Name).Set(v)
}
func RecordConsensusBackendPeerCount(be *Backend, peerCount uint64) {
consensusPeerCountBackend.WithLabelValues(be.Name).Set(float64(peerCount))
}
func RecordConsensusBackendInSync(be *Backend, inSync bool) {
v := float64(0)
if inSync {
v = float64(1)
}
consensusInSyncBackend.WithLabelValues(be.Name).Set(v)
}
func RecordConsensusBackendUpdateDelay(be *Backend, delay time.Duration) {
consensusUpdateDelayBackend.WithLabelValues(be.Name).Set(float64(delay.Round(time.Millisecond)))
} }