Merge pull request #5845 from ethereum-optimism/felipe/metric-degraded

feat(proxyd): add metric for degraded backend
This commit is contained in:
OptimismBot 2023-05-31 17:59:43 -04:00 committed by GitHub
commit 0ad110cbd4
3 changed files with 60 additions and 0 deletions

@ -8,6 +8,7 @@ import (
"os" "os"
"path" "path"
"testing" "testing"
"time"
"github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/common/hexutil"
@ -630,6 +631,55 @@ func TestConsensus(t *testing.T) {
require.Equal(t, len(nodes["node2"].mockBackend.Requests()), 0, msg) require.Equal(t, len(nodes["node2"].mockBackend.Requests()), 0, msg)
}) })
t.Run("load balancing should not hit if node is degraded", func(t *testing.T) {
reset()
useOnlyNode1()
// replace node1 handler with one that adds a 500ms delay
oldHandler := nodes["node1"].mockBackend.handler
defer func() { nodes["node1"].mockBackend.handler = oldHandler }()
nodes["node1"].mockBackend.SetHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(500 * time.Millisecond)
oldHandler.ServeHTTP(w, r)
}))
update()
// send 10 requests to make node1 degraded
numberReqs := 10
for numberReqs > 0 {
_, statusCode, err := client.SendRPC("eth_getBlockByNumber", []interface{}{"0x101", false})
require.NoError(t, err)
require.Equal(t, 200, statusCode)
numberReqs--
}
// bring back node2
nodes["node2"].handler.ResetOverrides()
update()
// reset request counts
nodes["node1"].mockBackend.Reset()
nodes["node2"].mockBackend.Reset()
require.Equal(t, 0, len(nodes["node1"].mockBackend.Requests()))
require.Equal(t, 0, len(nodes["node2"].mockBackend.Requests()))
numberReqs = 10
for numberReqs > 0 {
_, statusCode, err := client.SendRPC("eth_getBlockByNumber", []interface{}{"0x101", false})
require.NoError(t, err)
require.Equal(t, 200, statusCode)
numberReqs--
}
msg := fmt.Sprintf("n1 %d, n2 %d",
len(nodes["node1"].mockBackend.Requests()), len(nodes["node2"].mockBackend.Requests()))
require.Equal(t, 0, len(nodes["node1"].mockBackend.Requests()), msg)
require.Equal(t, 10, len(nodes["node2"].mockBackend.Requests()), msg)
})
t.Run("rewrite response of eth_blockNumber", func(t *testing.T) { t.Run("rewrite response of eth_blockNumber", func(t *testing.T) {
reset() reset()
update() update()

@ -3,6 +3,7 @@ rpc_port = 8545
[backend] [backend]
response_timeout_seconds = 1 response_timeout_seconds = 1
max_degraded_latency_threshold = "30ms"
[backends] [backends]
[backends.node1] [backends.node1]

@ -358,6 +358,14 @@ var (
"backend_name", "backend_name",
}) })
degradedBackends = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "backend_degraded",
Help: "Bool gauge for degraded backends",
}, []string{
"backend_name",
})
networkErrorRateBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{ networkErrorRateBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace, Namespace: MetricsNamespace,
Name: "backend_error_rate", Name: "backend_error_rate",
@ -493,6 +501,7 @@ func RecordConsensusBackendUpdateDelay(b *Backend, lastUpdate time.Time) {
func RecordBackendNetworkLatencyAverageSlidingWindow(b *Backend, avgLatency time.Duration) { func RecordBackendNetworkLatencyAverageSlidingWindow(b *Backend, avgLatency time.Duration) {
avgLatencyBackend.WithLabelValues(b.Name).Set(float64(avgLatency.Milliseconds())) avgLatencyBackend.WithLabelValues(b.Name).Set(float64(avgLatency.Milliseconds()))
degradedBackends.WithLabelValues(b.Name).Set(boolToFloat64(b.IsDegraded()))
} }
func RecordBackendNetworkErrorRateSlidingWindow(b *Backend, rate float64) { func RecordBackendNetworkErrorRateSlidingWindow(b *Backend, rate float64) {