record sliding window metrics (#5782)
This commit is contained in:
parent
c5c98bf614
commit
1d9406daa6
@ -374,6 +374,7 @@ func (b *Backend) ForwardRPC(ctx context.Context, res *RPCRes, id string, method
|
||||
func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool) ([]*RPCRes, error) {
|
||||
// we are concerned about network error rates, so we record 1 request independently of how many are in the batch
|
||||
b.networkRequestsSlidingWindow.Incr()
|
||||
RecordBackendNetworkRequestCountSlidingWindow(b, b.networkRequestsSlidingWindow.Count())
|
||||
|
||||
isSingleElementBatch := len(rpcReqs) == 1
|
||||
|
||||
@ -390,6 +391,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
|
||||
httpReq, err := http.NewRequestWithContext(ctx, "POST", b.rpcURL, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
b.networkErrorsSlidingWindow.Incr()
|
||||
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
|
||||
return nil, wrapErr(err, "error creating backend request")
|
||||
}
|
||||
|
||||
@ -411,6 +413,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
|
||||
httpRes, err := b.client.DoLimited(httpReq)
|
||||
if err != nil {
|
||||
b.networkErrorsSlidingWindow.Incr()
|
||||
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
|
||||
return nil, wrapErr(err, "error in backend request")
|
||||
}
|
||||
|
||||
@ -429,6 +432,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
|
||||
// Alchemy returns a 400 on bad JSONs, so handle that case
|
||||
if httpRes.StatusCode != 200 && httpRes.StatusCode != 400 {
|
||||
b.networkErrorsSlidingWindow.Incr()
|
||||
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
|
||||
return nil, fmt.Errorf("response code %d", httpRes.StatusCode)
|
||||
}
|
||||
|
||||
@ -436,6 +440,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
|
||||
resB, err := io.ReadAll(io.LimitReader(httpRes.Body, b.maxResponseSize))
|
||||
if err != nil {
|
||||
b.networkErrorsSlidingWindow.Incr()
|
||||
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
|
||||
return nil, wrapErr(err, "error reading response body")
|
||||
}
|
||||
|
||||
@ -453,15 +458,18 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
|
||||
// Infura may return a single JSON-RPC response if, for example, the batch contains a request for an unsupported method
|
||||
if responseIsNotBatched(resB) {
|
||||
b.networkErrorsSlidingWindow.Incr()
|
||||
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
|
||||
return nil, ErrBackendUnexpectedJSONRPC
|
||||
}
|
||||
b.networkErrorsSlidingWindow.Incr()
|
||||
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
|
||||
return nil, ErrBackendBadResponse
|
||||
}
|
||||
}
|
||||
|
||||
if len(rpcReqs) != len(res) {
|
||||
b.networkErrorsSlidingWindow.Incr()
|
||||
RecordBackendNetworkErrorCountSlidingWindow(b, b.networkErrorsSlidingWindow.Count())
|
||||
return nil, ErrBackendUnexpectedJSONRPC
|
||||
}
|
||||
|
||||
@ -474,6 +482,7 @@ func (b *Backend) doForward(ctx context.Context, rpcReqs []*RPCReq, isBatch bool
|
||||
}
|
||||
duration := time.Since(start)
|
||||
b.latencySlidingWindow.Add(float64(duration))
|
||||
RecordBackendNetworkLatencyAverageSlidingWindow(b, b.latencySlidingWindow.Avg())
|
||||
|
||||
sortBatchRPCResponse(rpcReqs, res)
|
||||
return res, nil
|
||||
|
@ -309,6 +309,30 @@ var (
|
||||
}, []string{
|
||||
"backend_name",
|
||||
})
|
||||
|
||||
avgLatencyBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: MetricsNamespace,
|
||||
Name: "backend_avg_latency",
|
||||
Help: "Average latency per backend",
|
||||
}, []string{
|
||||
"backend_name",
|
||||
})
|
||||
|
||||
networkErrorCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: MetricsNamespace,
|
||||
Name: "backend_net_error_count",
|
||||
Help: "Network error count per backend",
|
||||
}, []string{
|
||||
"backend_name",
|
||||
})
|
||||
|
||||
requestCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: MetricsNamespace,
|
||||
Name: "backend_request_count",
|
||||
Help: "Request count per backend",
|
||||
}, []string{
|
||||
"backend_name",
|
||||
})
|
||||
)
|
||||
|
||||
func RecordRedisError(source string) {
|
||||
@ -390,30 +414,42 @@ func RecordGroupTotalCount(group *BackendGroup, count int) {
|
||||
consensusGroupTotalCount.WithLabelValues(group.Name).Set(float64(count))
|
||||
}
|
||||
|
||||
func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) {
|
||||
backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber))
|
||||
func RecordBackendLatestBlock(b *Backend, blockNumber hexutil.Uint64) {
|
||||
backendLatestBlockBackend.WithLabelValues(b.Name).Set(float64(blockNumber))
|
||||
}
|
||||
|
||||
func RecordConsensusBackendBanned(be *Backend, banned bool) {
|
||||
func RecordConsensusBackendBanned(b *Backend, banned bool) {
|
||||
v := float64(0)
|
||||
if banned {
|
||||
v = float64(1)
|
||||
}
|
||||
consensusBannedBackends.WithLabelValues(be.Name).Set(v)
|
||||
consensusBannedBackends.WithLabelValues(b.Name).Set(v)
|
||||
}
|
||||
|
||||
func RecordConsensusBackendPeerCount(be *Backend, peerCount uint64) {
|
||||
consensusPeerCountBackend.WithLabelValues(be.Name).Set(float64(peerCount))
|
||||
func RecordConsensusBackendPeerCount(b *Backend, peerCount uint64) {
|
||||
consensusPeerCountBackend.WithLabelValues(b.Name).Set(float64(peerCount))
|
||||
}
|
||||
|
||||
func RecordConsensusBackendInSync(be *Backend, inSync bool) {
|
||||
func RecordConsensusBackendInSync(b *Backend, inSync bool) {
|
||||
v := float64(0)
|
||||
if inSync {
|
||||
v = float64(1)
|
||||
}
|
||||
consensusInSyncBackend.WithLabelValues(be.Name).Set(v)
|
||||
consensusInSyncBackend.WithLabelValues(b.Name).Set(v)
|
||||
}
|
||||
|
||||
func RecordConsensusBackendUpdateDelay(be *Backend, delay time.Duration) {
|
||||
consensusUpdateDelayBackend.WithLabelValues(be.Name).Set(float64(delay.Milliseconds()))
|
||||
func RecordConsensusBackendUpdateDelay(b *Backend, delay time.Duration) {
|
||||
consensusUpdateDelayBackend.WithLabelValues(b.Name).Set(float64(delay.Milliseconds()))
|
||||
}
|
||||
|
||||
func RecordBackendNetworkLatencyAverageSlidingWindow(b *Backend, avgLatency float64) {
|
||||
avgLatencyBackend.WithLabelValues(b.Name).Set(avgLatency)
|
||||
}
|
||||
|
||||
func RecordBackendNetworkRequestCountSlidingWindow(b *Backend, count uint) {
|
||||
requestCountBackend.WithLabelValues(b.Name).Set(float64(count))
|
||||
}
|
||||
|
||||
func RecordBackendNetworkErrorCountSlidingWindow(b *Backend, count uint) {
|
||||
networkErrorCountBackend.WithLabelValues(b.Name).Set(float64(count))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user