record sliding window metrics (#5782)

This commit is contained in:
felipe andrade 2023-05-25 09:16:16 -07:00 committed by GitHub
parent c5c98bf614
commit 1d9406daa6
2 changed files with 55 additions and 10 deletions

@ -374,6 +374,7 @@ func (b *Backend) ForwardRPC(ctx context.Context, res *RPCRes, id string, method
func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool) ([]*RPCRes, error) { func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool) ([]*RPCRes, error) {
// we are concerned about network error rates, so we record 1 request independently of how many are in the batch // we are concerned about network error rates, so we record 1 request independently of how many are in the batch
b.networkRequestsSlidingWindow.Incr() b.networkRequestsSlidingWindow.Incr()
RecordBackendNetworkRequestCountSlidingWindow(b, b.networkRequestsSlidingWindow.Count())
isSingleElementBatch := len(rpcReqs) == 1 isSingleElementBatch := len(rpcReqs) == 1
@ -390,6 +391,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
httpReq, err := http.NewRequestWithContext(ctx, "POST", b.rpcURL, bytes.NewReader(body)) httpReq, err := http.NewRequestWithContext(ctx, "POST", b.rpcURL, bytes.NewReader(body))
if err != nil { if err != nil {
b.networkErrorsSlidingWindow.Incr() b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, wrapErr(err, "error creating backend request") return nil, wrapErr(err, "error creating backend request")
} }
@ -411,6 +413,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
httpRes, err := b.client.DoLimited(httpReq) httpRes, err := b.client.DoLimited(httpReq)
if err != nil { if err != nil {
b.networkErrorsSlidingWindow.Incr() b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, wrapErr(err, "error in backend request") return nil, wrapErr(err, "error in backend request")
} }
@ -429,6 +432,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
// Alchemy returns a 400 on bad JSONs, so handle that case // Alchemy returns a 400 on bad JSONs, so handle that case
if httpRes.StatusCode != 200 && httpRes.StatusCode != 400 { if httpRes.StatusCode != 200 && httpRes.StatusCode != 400 {
b.networkErrorsSlidingWindow.Incr() b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, fmt.Errorf("response code %d", httpRes.StatusCode) return nil, fmt.Errorf("response code %d", httpRes.StatusCode)
} }
@ -436,6 +440,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
resB, err := io.ReadAll(io.LimitReader(httpRes.Body, b.maxResponseSize)) resB, err := io.ReadAll(io.LimitReader(httpRes.Body, b.maxResponseSize))
if err != nil { if err != nil {
b.networkErrorsSlidingWindow.Incr() b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, wrapErr(err, "error reading response body") return nil, wrapErr(err, "error reading response body")
} }
@ -453,15 +458,18 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
// Infura may return a single JSON-RPC response if, for example, the batch contains a request for an unsupported method // Infura may return a single JSON-RPC response if, for example, the batch contains a request for an unsupported method
if responseIsNotBatched(resB) { if responseIsNotBatched(resB) {
b.networkErrorsSlidingWindow.Incr() b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, ErrBackendUnexpectedJSONRPC return nil, ErrBackendUnexpectedJSONRPC
} }
b.networkErrorsSlidingWindow.Incr() b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, ErrBackendBadResponse return nil, ErrBackendBadResponse
} }
} }
if len(rpcReqs) != len(res) { if len(rpcReqs) != len(res) {
b.networkErrorsSlidingWindow.Incr() b.networkErrorsSlidingWindow.Incr()
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
return nil, ErrBackendUnexpectedJSONRPC return nil, ErrBackendUnexpectedJSONRPC
} }
@ -474,6 +482,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
} }
duration := time.Since(start) duration := time.Since(start)
b.latencySlidingWindow.Add(float64(duration)) b.latencySlidingWindow.Add(float64(duration))
RecordBackendNetworkLatencyAverageSlidingWindow(b, b.latencySlidingWindow.Avg())
sortBatchRPCResponse(rpcReqs, res) sortBatchRPCResponse(rpcReqs, res)
return res, nil return res, nil

@ -309,6 +309,30 @@ var (
}, []string{ }, []string{
"backend_name", "backend_name",
}) })
avgLatencyBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "backend_avg_latency",
Help: "Average latency per backend",
}, []string{
"backend_name",
})
networkErrorCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "backend_net_error_count",
Help: "Network error count per backend",
}, []string{
"backend_name",
})
requestCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "backend_request_count",
Help: "Request count per backend",
}, []string{
"backend_name",
})
) )
func RecordRedisError(source string) { func RecordRedisError(source string) {
@ -390,30 +414,42 @@ func RecordGroupTotalCount(group *BackendGroup, count int) {
consensusGroupTotalCount.WithLabelValues(group.Name).Set(float64(count)) consensusGroupTotalCount.WithLabelValues(group.Name).Set(float64(count))
} }
func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) { func RecordBackendLatestBlock(b *Backend, blockNumber hexutil.Uint64) {
backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber)) backendLatestBlockBackend.WithLabelValues(b.Name).Set(float64(blockNumber))
} }
func RecordConsensusBackendBanned(be *Backend, banned bool) { func RecordConsensusBackendBanned(b *Backend, banned bool) {
v := float64(0) v := float64(0)
if banned { if banned {
v = float64(1) v = float64(1)
} }
consensusBannedBackends.WithLabelValues(be.Name).Set(v) consensusBannedBackends.WithLabelValues(b.Name).Set(v)
} }
func RecordConsensusBackendPeerCount(be *Backend, peerCount uint64) { func RecordConsensusBackendPeerCount(b *Backend, peerCount uint64) {
consensusPeerCountBackend.WithLabelValues(be.Name).Set(float64(peerCount)) consensusPeerCountBackend.WithLabelValues(b.Name).Set(float64(peerCount))
} }
func RecordConsensusBackendInSync(be *Backend, inSync bool) { func RecordConsensusBackendInSync(b *Backend, inSync bool) {
v := float64(0) v := float64(0)
if inSync { if inSync {
v = float64(1) v = float64(1)
} }
consensusInSyncBackend.WithLabelValues(be.Name).Set(v) consensusInSyncBackend.WithLabelValues(b.Name).Set(v)
} }
func RecordConsensusBackendUpdateDelay(be *Backend, delay time.Duration) { func RecordConsensusBackendUpdateDelay(b *Backend, delay time.Duration) {
consensusUpdateDelayBackend.WithLabelValues(be.Name).Set(float64(delay.Milliseconds())) consensusUpdateDelayBackend.WithLabelValues(b.Name).Set(float64(delay.Milliseconds()))
}
func RecordBackendNetworkLatencyAverageSlidingWindow(b *Backend, avgLatency float64) {
avgLatencyBackend.WithLabelValues(b.Name).Set(avgLatency)
}
func RecordBackendNetworkRequestCountSlidingWindow(b *Backend, count uint) {
requestCountBackend.WithLabelValues(b.Name).Set(float64(count))
}
func RecordBackendNetworkErrorCountSlidingWindow(b *Backend, count uint) {
networkErrorCountBackend.WithLabelValues(b.Name).Set(float64(count))
} }