diff --git a/proxyd/proxyd/consensus_poller.go b/proxyd/proxyd/consensus_poller.go index 34fe708..a7b1fc2 100644 --- a/proxyd/proxyd/consensus_poller.go +++ b/proxyd/proxyd/consensus_poller.go @@ -203,7 +203,10 @@ func NewConsensusPoller(bg *BackendGroup, opts ...ConsensusOpt) *ConsensusPoller // UpdateBackend refreshes the consensus state of a single backend func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { - if cp.IsBanned(be) { + banned := cp.IsBanned(be) + RecordConsensusBackendBanned(be, banned) + + if banned { log.Debug("skipping backend banned", "backend", be.Name) return } @@ -212,6 +215,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { if !be.Online() || !be.IsHealthy() { log.Warn("backend banned - not online or not healthy", "backend", be.Name) cp.Ban(be) + return } // if backend it not in sync we'll check again after ban @@ -219,7 +223,9 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { if err != nil || !inSync { log.Warn("backend banned - not in sync", "backend", be.Name) cp.Ban(be) + return } + RecordConsensusBackendInSync(be, inSync) // if backend exhausted rate limit we'll skip it for now if be.IsRateLimited() { @@ -234,6 +240,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { return } } + RecordConsensusBackendPeerCount(be, peerCount) latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest") if err != nil { @@ -241,15 +248,17 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { return } - changed := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash) + changed, updateDelay := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash) if changed { RecordBackendLatestBlock(be, latestBlockNumber) + RecordConsensusBackendUpdateDelay(be, updateDelay) log.Debug("backend state updated", "name", be.Name, "peerCount", peerCount, "latestBlockNumber", latestBlockNumber, - "latestBlockHash", latestBlockHash) + "latestBlockHash", latestBlockHash, + "updateDelay", updateDelay) } } @@ -354,11 +363,13 @@ func (cp *ConsensusPoller) UpdateBackendGroupConsensus(ctx context.Context) { } cp.tracker.SetConsensusBlockNumber(proposedBlock) - RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock) cp.consensusGroupMux.Lock() cp.consensusGroup = consensusBackends cp.consensusGroupMux.Unlock() + RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock) + RecordGroupConsensusCount(cp.backendGroup, len(consensusBackends)) + log.Debug("group state", "proposedBlock", proposedBlock, "consensusBackends", strings.Join(consensusBackendsNames, ", "), "filteredBackends", strings.Join(filteredBackendsNames, ", ")) } @@ -463,13 +474,14 @@ func (cp *ConsensusPoller) getBackendState(be *Backend) (peerCount uint64, block return } -func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool) { +func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool, updateDelay time.Duration) { bs := cp.backendState[be] bs.backendStateMux.Lock() changed = bs.latestBlockHash != blockHash bs.peerCount = peerCount bs.latestBlockNumber = blockNumber bs.latestBlockHash = blockHash + updateDelay = time.Now().Sub(bs.lastUpdate) bs.lastUpdate = time.Now() bs.backendStateMux.Unlock() return diff --git a/proxyd/proxyd/metrics.go b/proxyd/proxyd/metrics.go index 2aef49d..ab5d284 100644 --- a/proxyd/proxyd/metrics.go +++ b/proxyd/proxyd/metrics.go @@ -4,6 +4,7 @@ import ( "context" "strconv" "strings" + "time" "github.com/ethereum/go-ethereum/common/hexutil" @@ -260,6 +261,46 @@ var ( }, []string{ "backend_name", }) + + consensusGroupCount = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricsNamespace, + Name: "group_consensus_count", + Help: "Consensus group count", + }, []string{ + "backend_group_name", + }) + + consensusBannedBackends = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricsNamespace, + Name: "consensus_backend_banned", + Help: "Bool gauge for banned backends", + }, []string{ + "backend_name", + }) + + consensusPeerCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricsNamespace, + Name: "consensus_backend_peer_count", + Help: "Peer count", + }, []string{ + "backend_name", + }) + + consensusInSyncBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricsNamespace, + Name: "consensus_backend_in_sync", + Help: "Bool gauge for backends in sync", + }, []string{ + "backend_name", + }) + + consensusUpdateDelayBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricsNamespace, + Name: "consensus_backend_update_delay", + Help: "Delay (ms) for backend update", + }, []string{ + "backend_name", + }) ) func RecordRedisError(source string) { @@ -321,10 +362,38 @@ func RecordBatchSize(size int) { batchSizeHistogram.Observe(float64(size)) } +func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) { + consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber)) +} + +func RecordGroupConsensusCount(group *BackendGroup, count int) { + consensusGroupCount.WithLabelValues(group.Name).Set(float64(count)) +} + func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) { backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber)) } -func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) { - consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber)) +func RecordConsensusBackendBanned(be *Backend, banned bool) { + v := float64(0) + if banned { + v = float64(1) + } + consensusBannedBackends.WithLabelValues(be.Name).Set(v) +} + +func RecordConsensusBackendPeerCount(be *Backend, peerCount uint64) { + consensusPeerCountBackend.WithLabelValues(be.Name).Set(float64(peerCount)) +} + +func RecordConsensusBackendInSync(be *Backend, inSync bool) { + v := float64(0) + if inSync { + v = float64(1) + } + consensusInSyncBackend.WithLabelValues(be.Name).Set(v) +} + +func RecordConsensusBackendUpdateDelay(be *Backend, delay time.Duration) { + consensusUpdateDelayBackend.WithLabelValues(be.Name).Set(float64(delay.Round(time.Millisecond))) }