record sliding window metrics (#5782)

This commit is contained in:
felipe andrade 2023-05-25 09:16:16 -07:00 committed by GitHub
parent c5c98bf614
commit 1d9406daa6
2 changed files with 55 additions and 10 deletions

@ -374,6 +374,7 @@ func (b *Backend) ForwardRPC(ctx context.Context, res *RPCRes, id string, method
func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool) ([]*RPCRes, error) {
// we are concerned about network error rates, so we record 1 request independently of how many are in the batch
b.networkRequestsSlidingWindow.Incr()
RecordBackendNetworkRequestCountSlidingWindow(b, b.networkRequestsSlidingWindow.Count())
isSingleElementBatch := len(rpcReqs) == 1
@ -390,6 +391,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
httpReq, err := http.NewRequestWithContext(ctx, "POST", b.rpcURL, bytes.NewReader(body))
if err != nil {
b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, wrapErr(err, "error creating backend request")
}
@ -411,6 +413,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
httpRes, err := b.client.DoLimited(httpReq)
if err != nil {
b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, wrapErr(err, "error in backend request")
}
@ -429,6 +432,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
// Alchemy returns a 400 on bad JSONs, so handle that case
if httpRes.StatusCode != 200 && httpRes.StatusCode != 400 {
b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, fmt.Errorf("response code %d", httpRes.StatusCode)
}
@ -436,6 +440,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
resB, err := io.ReadAll(io.LimitReader(httpRes.Body, b.maxResponseSize))
if err != nil {
b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, wrapErr(err, "error reading response body")
}
@ -453,15 +458,18 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
// Infura may return a single JSON-RPC response if, for example, the batch contains a request for an unsupported method
if responseIsNotBatched(resB) {
b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, ErrBackendUnexpectedJSONRPC
}
b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, ErrBackendBadResponse
}
}
if len(rpcReqs) != len(res) {
b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, ErrBackendUnexpectedJSONRPC
}
@ -474,6 +482,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
}
duration := time.Since(start)
b.latencySlidingWindow.Add(float64(duration))
RecordBackendNetworkLatencyAverageSlidingWindow(b, b.latencySlidingWindow.Avg())
sortBatchRPCResponse(rpcReqs, res)
return res, nil

@ -309,6 +309,30 @@ var (
}, []string{
"backend_name",
})
avgLatencyBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "backend_avg_latency",
Help: "Average latency per backend",
}, []string{
"backend_name",
})
networkErrorCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "backend_net_error_count",
Help: "Network error count per backend",
}, []string{
"backend_name",
})
requestCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "backend_request_count",
Help: "Request count per backend",
}, []string{
"backend_name",
})
)
func RecordRedisError(source string) {
@ -390,30 +414,42 @@ func RecordGroupTotalCount(group *BackendGroup, count int) {
consensusGroupTotalCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) {
backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber))
func RecordBackendLatestBlock(b *Backend, blockNumber hexutil.Uint64) {
backendLatestBlockBackend.WithLabelValues(b.Name).Set(float64(blockNumber))
}
func RecordConsensusBackendBanned(be *Backend, banned bool) {
func RecordConsensusBackendBanned(b *Backend, banned bool) {
v := float64(0)
if banned {
v = float64(1)
}
consensusBannedBackends.WithLabelValues(be.Name).Set(v)
consensusBannedBackends.WithLabelValues(b.Name).Set(v)
}
func RecordConsensusBackendPeerCount(be *Backend, peerCount uint64) {
consensusPeerCountBackend.WithLabelValues(be.Name).Set(float64(peerCount))
func RecordConsensusBackendPeerCount(b *Backend, peerCount uint64) {
consensusPeerCountBackend.WithLabelValues(b.Name).Set(float64(peerCount))
}
func RecordConsensusBackendInSync(be *Backend, inSync bool) {
func RecordConsensusBackendInSync(b *Backend, inSync bool) {
v := float64(0)
if inSync {
v = float64(1)
}
consensusInSyncBackend.WithLabelValues(be.Name).Set(v)
consensusInSyncBackend.WithLabelValues(b.Name).Set(v)
}
func RecordConsensusBackendUpdateDelay(be *Backend, delay time.Duration) {
consensusUpdateDelayBackend.WithLabelValues(be.Name).Set(float64(delay.Milliseconds()))
func RecordConsensusBackendUpdateDelay(b *Backend, delay time.Duration) {
consensusUpdateDelayBackend.WithLabelValues(b.Name).Set(float64(delay.Milliseconds()))
}
func RecordBackendNetworkLatencyAverageSlidingWindow(b *Backend, avgLatency float64) {
avgLatencyBackend.WithLabelValues(b.Name).Set(avgLatency)
}
func RecordBackendNetworkRequestCountSlidingWindow(b *Backend, count uint) {
requestCountBackend.WithLabelValues(b.Name).Set(float64(count))
}
func RecordBackendNetworkErrorCountSlidingWindow(b *Backend, count uint) {
networkErrorCountBackend.WithLabelValues(b.Name).Set(float64(count))
}