Merge pull request #5674 from ethereum-optimism/felipe/moar-consensus-metrics
feat(proxyd): moar consensus metrics
This commit is contained in:
commit
21284bd651
@ -211,7 +211,10 @@ func NewConsensusPoller(bg *BackendGroup, opts ...ConsensusOpt) *ConsensusPoller
|
|||||||
|
|
||||||
// UpdateBackend refreshes the consensus state of a single backend
|
// UpdateBackend refreshes the consensus state of a single backend
|
||||||
func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
|
func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
|
||||||
if cp.IsBanned(be) {
|
banned := cp.IsBanned(be)
|
||||||
|
RecordConsensusBackendBanned(be, banned)
|
||||||
|
|
||||||
|
if banned {
|
||||||
log.Debug("skipping backend banned", "backend", be.Name)
|
log.Debug("skipping backend banned", "backend", be.Name)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -220,6 +223,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
|
|||||||
if !be.Online() || !be.IsHealthy() {
|
if !be.Online() || !be.IsHealthy() {
|
||||||
log.Warn("backend banned - not online or not healthy", "backend", be.Name)
|
log.Warn("backend banned - not online or not healthy", "backend", be.Name)
|
||||||
cp.Ban(be)
|
cp.Ban(be)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// if backend it not in sync we'll check again after ban
|
// if backend it not in sync we'll check again after ban
|
||||||
@ -227,7 +231,9 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
|
|||||||
if err != nil || !inSync {
|
if err != nil || !inSync {
|
||||||
log.Warn("backend banned - not in sync", "backend", be.Name)
|
log.Warn("backend banned - not in sync", "backend", be.Name)
|
||||||
cp.Ban(be)
|
cp.Ban(be)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
RecordConsensusBackendInSync(be, inSync)
|
||||||
|
|
||||||
// if backend exhausted rate limit we'll skip it for now
|
// if backend exhausted rate limit we'll skip it for now
|
||||||
if be.IsRateLimited() {
|
if be.IsRateLimited() {
|
||||||
@ -241,6 +247,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
|
|||||||
log.Warn("error updating backend", "name", be.Name, "err", err)
|
log.Warn("error updating backend", "name", be.Name, "err", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
RecordConsensusBackendPeerCount(be, peerCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest")
|
latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest")
|
||||||
@ -249,15 +256,17 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
changed := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash)
|
changed, updateDelay := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash)
|
||||||
|
|
||||||
if changed {
|
if changed {
|
||||||
RecordBackendLatestBlock(be, latestBlockNumber)
|
RecordBackendLatestBlock(be, latestBlockNumber)
|
||||||
|
RecordConsensusBackendUpdateDelay(be, updateDelay)
|
||||||
log.Debug("backend state updated",
|
log.Debug("backend state updated",
|
||||||
"name", be.Name,
|
"name", be.Name,
|
||||||
"peerCount", peerCount,
|
"peerCount", peerCount,
|
||||||
"latestBlockNumber", latestBlockNumber,
|
"latestBlockNumber", latestBlockNumber,
|
||||||
"latestBlockHash", latestBlockHash)
|
"latestBlockHash", latestBlockHash,
|
||||||
|
"updateDelay", updateDelay)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -388,11 +397,15 @@ func (cp *ConsensusPoller) UpdateBackendGroupConsensus(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
cp.tracker.SetConsensusBlockNumber(proposedBlock)
|
cp.tracker.SetConsensusBlockNumber(proposedBlock)
|
||||||
RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock)
|
|
||||||
cp.consensusGroupMux.Lock()
|
cp.consensusGroupMux.Lock()
|
||||||
cp.consensusGroup = consensusBackends
|
cp.consensusGroup = consensusBackends
|
||||||
cp.consensusGroupMux.Unlock()
|
cp.consensusGroupMux.Unlock()
|
||||||
|
|
||||||
|
RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock)
|
||||||
|
RecordGroupConsensusCount(cp.backendGroup, len(consensusBackends))
|
||||||
|
RecordGroupConsensusFilteredCount(cp.backendGroup, len(filteredBackendsNames))
|
||||||
|
RecordGroupTotalCount(cp.backendGroup, len(cp.backendGroup.Backends))
|
||||||
|
|
||||||
log.Debug("group state", "proposedBlock", proposedBlock, "consensusBackends", strings.Join(consensusBackendsNames, ", "), "filteredBackends", strings.Join(filteredBackendsNames, ", "))
|
log.Debug("group state", "proposedBlock", proposedBlock, "consensusBackends", strings.Join(consensusBackendsNames, ", "), "filteredBackends", strings.Join(filteredBackendsNames, ", "))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -497,13 +510,14 @@ func (cp *ConsensusPoller) getBackendState(be *Backend) (peerCount uint64, block
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool) {
|
func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool, updateDelay time.Duration) {
|
||||||
bs := cp.backendState[be]
|
bs := cp.backendState[be]
|
||||||
bs.backendStateMux.Lock()
|
bs.backendStateMux.Lock()
|
||||||
changed = bs.latestBlockHash != blockHash
|
changed = bs.latestBlockHash != blockHash
|
||||||
bs.peerCount = peerCount
|
bs.peerCount = peerCount
|
||||||
bs.latestBlockNumber = blockNumber
|
bs.latestBlockNumber = blockNumber
|
||||||
bs.latestBlockHash = blockHash
|
bs.latestBlockHash = blockHash
|
||||||
|
updateDelay = time.Since(bs.lastUpdate)
|
||||||
bs.lastUpdate = time.Now()
|
bs.lastUpdate = time.Now()
|
||||||
bs.backendStateMux.Unlock()
|
bs.backendStateMux.Unlock()
|
||||||
return
|
return
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/ethereum/go-ethereum/common/hexutil"
|
"github.com/ethereum/go-ethereum/common/hexutil"
|
||||||
|
|
||||||
@ -260,6 +261,62 @@ var (
|
|||||||
}, []string{
|
}, []string{
|
||||||
"backend_name",
|
"backend_name",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
consensusGroupCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: MetricsNamespace,
|
||||||
|
Name: "group_consensus_count",
|
||||||
|
Help: "Consensus group serving traffic count",
|
||||||
|
}, []string{
|
||||||
|
"backend_group_name",
|
||||||
|
})
|
||||||
|
|
||||||
|
consensusGroupFilteredCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: MetricsNamespace,
|
||||||
|
Name: "group_consensus_filtered_count",
|
||||||
|
Help: "Consensus group filtered out from serving traffic count",
|
||||||
|
}, []string{
|
||||||
|
"backend_group_name",
|
||||||
|
})
|
||||||
|
|
||||||
|
consensusGroupTotalCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: MetricsNamespace,
|
||||||
|
Name: "group_consensus_total_count",
|
||||||
|
Help: "Total count of candidates to be part of consensus group",
|
||||||
|
}, []string{
|
||||||
|
"backend_group_name",
|
||||||
|
})
|
||||||
|
|
||||||
|
consensusBannedBackends = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: MetricsNamespace,
|
||||||
|
Name: "consensus_backend_banned",
|
||||||
|
Help: "Bool gauge for banned backends",
|
||||||
|
}, []string{
|
||||||
|
"backend_name",
|
||||||
|
})
|
||||||
|
|
||||||
|
consensusPeerCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: MetricsNamespace,
|
||||||
|
Name: "consensus_backend_peer_count",
|
||||||
|
Help: "Peer count",
|
||||||
|
}, []string{
|
||||||
|
"backend_name",
|
||||||
|
})
|
||||||
|
|
||||||
|
consensusInSyncBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: MetricsNamespace,
|
||||||
|
Name: "consensus_backend_in_sync",
|
||||||
|
Help: "Bool gauge for backends in sync",
|
||||||
|
}, []string{
|
||||||
|
"backend_name",
|
||||||
|
})
|
||||||
|
|
||||||
|
consensusUpdateDelayBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: MetricsNamespace,
|
||||||
|
Name: "consensus_backend_update_delay",
|
||||||
|
Help: "Delay (ms) for backend update",
|
||||||
|
}, []string{
|
||||||
|
"backend_name",
|
||||||
|
})
|
||||||
)
|
)
|
||||||
|
|
||||||
func RecordRedisError(source string) {
|
func RecordRedisError(source string) {
|
||||||
@ -321,10 +378,46 @@ func RecordBatchSize(size int) {
|
|||||||
batchSizeHistogram.Observe(float64(size))
|
batchSizeHistogram.Observe(float64(size))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) {
|
||||||
|
consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber))
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordGroupConsensusCount(group *BackendGroup, count int) {
|
||||||
|
consensusGroupCount.WithLabelValues(group.Name).Set(float64(count))
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordGroupConsensusFilteredCount(group *BackendGroup, count int) {
|
||||||
|
consensusGroupFilteredCount.WithLabelValues(group.Name).Set(float64(count))
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordGroupTotalCount(group *BackendGroup, count int) {
|
||||||
|
consensusGroupTotalCount.WithLabelValues(group.Name).Set(float64(count))
|
||||||
|
}
|
||||||
|
|
||||||
func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) {
|
func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) {
|
||||||
backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber))
|
backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber))
|
||||||
}
|
}
|
||||||
|
|
||||||
func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) {
|
func RecordConsensusBackendBanned(be *Backend, banned bool) {
|
||||||
consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber))
|
v := float64(0)
|
||||||
|
if banned {
|
||||||
|
v = float64(1)
|
||||||
|
}
|
||||||
|
consensusBannedBackends.WithLabelValues(be.Name).Set(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordConsensusBackendPeerCount(be *Backend, peerCount uint64) {
|
||||||
|
consensusPeerCountBackend.WithLabelValues(be.Name).Set(float64(peerCount))
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordConsensusBackendInSync(be *Backend, inSync bool) {
|
||||||
|
v := float64(0)
|
||||||
|
if inSync {
|
||||||
|
v = float64(1)
|
||||||
|
}
|
||||||
|
consensusInSyncBackend.WithLabelValues(be.Name).Set(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordConsensusBackendUpdateDelay(be *Backend, delay time.Duration) {
|
||||||
|
consensusUpdateDelayBackend.WithLabelValues(be.Name).Set(float64(delay.Milliseconds()))
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user