Merge pull request #5674 from ethereum-optimism/felipe/moar-consensus-metrics

feat(proxyd): moar consensus metrics
This commit is contained in:
OptimismBot 2023-05-10 15:39:48 -04:00 committed by GitHub
commit 21284bd651
2 changed files with 114 additions and 7 deletions

@ -211,7 +211,10 @@ func NewConsensusPoller(bg *BackendGroup, opts ...ConsensusOpt) *ConsensusPoller
// UpdateBackend refreshes the consensus state of a single backend
func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if cp.IsBanned(be) {
banned := cp.IsBanned(be)
RecordConsensusBackendBanned(be, banned)
if banned {
log.Debug("skipping backend banned", "backend", be.Name)
return
}
@ -220,6 +223,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if !be.Online() || !be.IsHealthy() {
log.Warn("backend banned - not online or not healthy", "backend", be.Name)
cp.Ban(be)
return
}
// if backend it not in sync we'll check again after ban
@ -227,7 +231,9 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if err != nil || !inSync {
log.Warn("backend banned - not in sync", "backend", be.Name)
cp.Ban(be)
return
}
RecordConsensusBackendInSync(be, inSync)
// if backend exhausted rate limit we'll skip it for now
if be.IsRateLimited() {
@ -241,6 +247,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
log.Warn("error updating backend", "name", be.Name, "err", err)
return
}
RecordConsensusBackendPeerCount(be, peerCount)
}
latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest")
@ -249,15 +256,17 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
return
}
changed := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash)
changed, updateDelay := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash)
if changed {
RecordBackendLatestBlock(be, latestBlockNumber)
RecordConsensusBackendUpdateDelay(be, updateDelay)
log.Debug("backend state updated",
"name", be.Name,
"peerCount", peerCount,
"latestBlockNumber", latestBlockNumber,
"latestBlockHash", latestBlockHash)
"latestBlockHash", latestBlockHash,
"updateDelay", updateDelay)
}
}
@ -388,11 +397,15 @@ func (cp *ConsensusPoller) UpdateBackendGroupConsensus(ctx context.Context) {
}
cp.tracker.SetConsensusBlockNumber(proposedBlock)
RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock)
cp.consensusGroupMux.Lock()
cp.consensusGroup = consensusBackends
cp.consensusGroupMux.Unlock()
RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock)
RecordGroupConsensusCount(cp.backendGroup, len(consensusBackends))
RecordGroupConsensusFilteredCount(cp.backendGroup, len(filteredBackendsNames))
RecordGroupTotalCount(cp.backendGroup, len(cp.backendGroup.Backends))
log.Debug("group state", "proposedBlock", proposedBlock, "consensusBackends", strings.Join(consensusBackendsNames, ", "), "filteredBackends", strings.Join(filteredBackendsNames, ", "))
}
@ -497,13 +510,14 @@ func (cp *ConsensusPoller) getBackendState(be *Backend) (peerCount uint64, block
return
}
func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool) {
func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool, updateDelay time.Duration) {
bs := cp.backendState[be]
bs.backendStateMux.Lock()
changed = bs.latestBlockHash != blockHash
bs.peerCount = peerCount
bs.latestBlockNumber = blockNumber
bs.latestBlockHash = blockHash
updateDelay = time.Since(bs.lastUpdate)
bs.lastUpdate = time.Now()
bs.backendStateMux.Unlock()
return

@ -4,6 +4,7 @@ import (
"context"
"strconv"
"strings"
"time"
"github.com/ethereum/go-ethereum/common/hexutil"
@ -260,6 +261,62 @@ var (
}, []string{
"backend_name",
})
consensusGroupCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "group_consensus_count",
Help: "Consensus group serving traffic count",
}, []string{
"backend_group_name",
})
consensusGroupFilteredCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "group_consensus_filtered_count",
Help: "Consensus group filtered out from serving traffic count",
}, []string{
"backend_group_name",
})
consensusGroupTotalCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "group_consensus_total_count",
Help: "Total count of candidates to be part of consensus group",
}, []string{
"backend_group_name",
})
consensusBannedBackends = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_banned",
Help: "Bool gauge for banned backends",
}, []string{
"backend_name",
})
consensusPeerCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_peer_count",
Help: "Peer count",
}, []string{
"backend_name",
})
consensusInSyncBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_in_sync",
Help: "Bool gauge for backends in sync",
}, []string{
"backend_name",
})
consensusUpdateDelayBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_update_delay",
Help: "Delay (ms) for backend update",
}, []string{
"backend_name",
})
)
func RecordRedisError(source string) {
@ -321,10 +378,46 @@ func RecordBatchSize(size int) {
batchSizeHistogram.Observe(float64(size))
}
func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) {
consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber))
}
func RecordGroupConsensusCount(group *BackendGroup, count int) {
consensusGroupCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordGroupConsensusFilteredCount(group *BackendGroup, count int) {
consensusGroupFilteredCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordGroupTotalCount(group *BackendGroup, count int) {
consensusGroupTotalCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) {
backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber))
}
func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) {
consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber))
func RecordConsensusBackendBanned(be *Backend, banned bool) {
v := float64(0)
if banned {
v = float64(1)
}
consensusBannedBackends.WithLabelValues(be.Name).Set(v)
}
func RecordConsensusBackendPeerCount(be *Backend, peerCount uint64) {
consensusPeerCountBackend.WithLabelValues(be.Name).Set(float64(peerCount))
}
func RecordConsensusBackendInSync(be *Backend, inSync bool) {
v := float64(0)
if inSync {
v = float64(1)
}
consensusInSyncBackend.WithLabelValues(be.Name).Set(v)
}
func RecordConsensusBackendUpdateDelay(be *Backend, delay time.Duration) {
consensusUpdateDelayBackend.WithLabelValues(be.Name).Set(float64(delay.Milliseconds()))
}