Merge pull request #5674 from ethereum-optimism/felipe/moar-consensus-metrics

feat(proxyd): moar consensus metrics
This commit is contained in:
OptimismBot 2023-05-10 15:39:48 -04:00 committed by GitHub
commit 21284bd651
2 changed files with 114 additions and 7 deletions

@ -211,7 +211,10 @@ func NewConsensusPoller(bg *BackendGroup, opts ...ConsensusOpt) *ConsensusPoller
// UpdateBackend refreshes the consensus state of a single backend // UpdateBackend refreshes the consensus state of a single backend
func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if cp.IsBanned(be) { banned := cp.IsBanned(be)
RecordConsensusBackendBanned(be, banned)
if banned {
log.Debug("skipping backend banned", "backend", be.Name) log.Debug("skipping backend banned", "backend", be.Name)
return return
} }
@ -220,6 +223,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if !be.Online() || !be.IsHealthy() { if !be.Online() || !be.IsHealthy() {
log.Warn("backend banned - not online or not healthy", "backend", be.Name) log.Warn("backend banned - not online or not healthy", "backend", be.Name)
cp.Ban(be) cp.Ban(be)
return
} }
// if backend it not in sync we'll check again after ban // if backend it not in sync we'll check again after ban
@ -227,7 +231,9 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if err != nil || !inSync { if err != nil || !inSync {
log.Warn("backend banned - not in sync", "backend", be.Name) log.Warn("backend banned - not in sync", "backend", be.Name)
cp.Ban(be) cp.Ban(be)
return
} }
RecordConsensusBackendInSync(be, inSync)
// if backend exhausted rate limit we'll skip it for now // if backend exhausted rate limit we'll skip it for now
if be.IsRateLimited() { if be.IsRateLimited() {
@ -241,6 +247,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
log.Warn("error updating backend", "name", be.Name, "err", err) log.Warn("error updating backend", "name", be.Name, "err", err)
return return
} }
RecordConsensusBackendPeerCount(be, peerCount)
} }
latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest") latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest")
@ -249,15 +256,17 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
return return
} }
changed := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash) changed, updateDelay := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash)
if changed { if changed {
RecordBackendLatestBlock(be, latestBlockNumber) RecordBackendLatestBlock(be, latestBlockNumber)
RecordConsensusBackendUpdateDelay(be, updateDelay)
log.Debug("backend state updated", log.Debug("backend state updated",
"name", be.Name, "name", be.Name,
"peerCount", peerCount, "peerCount", peerCount,
"latestBlockNumber", latestBlockNumber, "latestBlockNumber", latestBlockNumber,
"latestBlockHash", latestBlockHash) "latestBlockHash", latestBlockHash,
"updateDelay", updateDelay)
} }
} }
@ -388,11 +397,15 @@ func (cp *ConsensusPoller) UpdateBackendGroupConsensus(ctx context.Context) {
} }
cp.tracker.SetConsensusBlockNumber(proposedBlock) cp.tracker.SetConsensusBlockNumber(proposedBlock)
RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock)
cp.consensusGroupMux.Lock() cp.consensusGroupMux.Lock()
cp.consensusGroup = consensusBackends cp.consensusGroup = consensusBackends
cp.consensusGroupMux.Unlock() cp.consensusGroupMux.Unlock()
RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock)
RecordGroupConsensusCount(cp.backendGroup, len(consensusBackends))
RecordGroupConsensusFilteredCount(cp.backendGroup, len(filteredBackendsNames))
RecordGroupTotalCount(cp.backendGroup, len(cp.backendGroup.Backends))
log.Debug("group state", "proposedBlock", proposedBlock, "consensusBackends", strings.Join(consensusBackendsNames, ", "), "filteredBackends", strings.Join(filteredBackendsNames, ", ")) log.Debug("group state", "proposedBlock", proposedBlock, "consensusBackends", strings.Join(consensusBackendsNames, ", "), "filteredBackends", strings.Join(filteredBackendsNames, ", "))
} }
@ -497,13 +510,14 @@ func (cp *ConsensusPoller) getBackendState(be *Backend) (peerCount uint64, block
return return
} }
func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool) { func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool, updateDelay time.Duration) {
bs := cp.backendState[be] bs := cp.backendState[be]
bs.backendStateMux.Lock() bs.backendStateMux.Lock()
changed = bs.latestBlockHash != blockHash changed = bs.latestBlockHash != blockHash
bs.peerCount = peerCount bs.peerCount = peerCount
bs.latestBlockNumber = blockNumber bs.latestBlockNumber = blockNumber
bs.latestBlockHash = blockHash bs.latestBlockHash = blockHash
updateDelay = time.Since(bs.lastUpdate)
bs.lastUpdate = time.Now() bs.lastUpdate = time.Now()
bs.backendStateMux.Unlock() bs.backendStateMux.Unlock()
return return

@ -4,6 +4,7 @@ import (
"context" "context"
"strconv" "strconv"
"strings" "strings"
"time"
"github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/common/hexutil"
@ -260,6 +261,62 @@ var (
}, []string{ }, []string{
"backend_name", "backend_name",
}) })
consensusGroupCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "group_consensus_count",
Help: "Consensus group serving traffic count",
}, []string{
"backend_group_name",
})
consensusGroupFilteredCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "group_consensus_filtered_count",
Help: "Consensus group filtered out from serving traffic count",
}, []string{
"backend_group_name",
})
consensusGroupTotalCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "group_consensus_total_count",
Help: "Total count of candidates to be part of consensus group",
}, []string{
"backend_group_name",
})
consensusBannedBackends = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_banned",
Help: "Bool gauge for banned backends",
}, []string{
"backend_name",
})
consensusPeerCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_peer_count",
Help: "Peer count",
}, []string{
"backend_name",
})
consensusInSyncBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_in_sync",
Help: "Bool gauge for backends in sync",
}, []string{
"backend_name",
})
consensusUpdateDelayBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_update_delay",
Help: "Delay (ms) for backend update",
}, []string{
"backend_name",
})
) )
func RecordRedisError(source string) { func RecordRedisError(source string) {
@ -321,10 +378,46 @@ func RecordBatchSize(size int) {
batchSizeHistogram.Observe(float64(size)) batchSizeHistogram.Observe(float64(size))
} }
func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) {
consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber))
}
func RecordGroupConsensusCount(group *BackendGroup, count int) {
consensusGroupCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordGroupConsensusFilteredCount(group *BackendGroup, count int) {
consensusGroupFilteredCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordGroupTotalCount(group *BackendGroup, count int) {
consensusGroupTotalCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) { func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) {
backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber)) backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber))
} }
func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) { func RecordConsensusBackendBanned(be *Backend, banned bool) {
consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber)) v := float64(0)
if banned {
v = float64(1)
}
consensusBannedBackends.WithLabelValues(be.Name).Set(v)
}
func RecordConsensusBackendPeerCount(be *Backend, peerCount uint64) {
consensusPeerCountBackend.WithLabelValues(be.Name).Set(float64(peerCount))
}
func RecordConsensusBackendInSync(be *Backend, inSync bool) {
v := float64(0)
if inSync {
v = float64(1)
}
consensusInSyncBackend.WithLabelValues(be.Name).Set(v)
}
func RecordConsensusBackendUpdateDelay(be *Backend, delay time.Duration) {
consensusUpdateDelayBackend.WithLabelValues(be.Name).Set(float64(delay.Milliseconds()))
} }