diff --git a/proxyd/proxyd/integration_tests/consensus_test.go b/proxyd/proxyd/integration_tests/consensus_test.go index 8075637..27cdfb6 100644 --- a/proxyd/proxyd/integration_tests/consensus_test.go +++ b/proxyd/proxyd/integration_tests/consensus_test.go @@ -8,6 +8,7 @@ import ( "os" "path" "testing" + "time" "github.com/ethereum/go-ethereum/common/hexutil" @@ -630,6 +631,55 @@ func TestConsensus(t *testing.T) { require.Equal(t, len(nodes["node2"].mockBackend.Requests()), 0, msg) }) + t.Run("load balancing should not hit if node is degraded", func(t *testing.T) { + reset() + useOnlyNode1() + + // replace node1 handler with one that adds a 100ms delay + oldHandler := nodes["node1"].mockBackend.handler + defer func() { nodes["node1"].mockBackend.handler = oldHandler }() + + nodes["node1"].mockBackend.SetHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(100 * time.Millisecond) + oldHandler.ServeHTTP(w, r) + })) + + update() + + // send 10 requests - so the latency window should be at ~100ms + numberReqs := 10 + for numberReqs > 0 { + _, statusCode, err := client.SendRPC("eth_getBlockByNumber", []interface{}{"0x101", false}) + require.NoError(t, err) + require.Equal(t, 200, statusCode) + numberReqs-- + } + + // bring back node2 + nodes["node2"].handler.ResetOverrides() + update() + + // reset request counts + nodes["node1"].mockBackend.Reset() + nodes["node2"].mockBackend.Reset() + + require.Equal(t, 0, len(nodes["node1"].mockBackend.Requests())) + require.Equal(t, 0, len(nodes["node2"].mockBackend.Requests())) + + numberReqs = 10 + for numberReqs > 0 { + _, statusCode, err := client.SendRPC("eth_getBlockByNumber", []interface{}{"0x101", false}) + require.NoError(t, err) + require.Equal(t, 200, statusCode) + numberReqs-- + } + + msg := fmt.Sprintf("n1 %d, n2 %d", + len(nodes["node1"].mockBackend.Requests()), len(nodes["node2"].mockBackend.Requests())) + require.Equal(t, len(nodes["node1"].mockBackend.Requests()), 0, msg) + require.Equal(t, len(nodes["node2"].mockBackend.Requests()), 10, msg) + }) + t.Run("rewrite response of eth_blockNumber", func(t *testing.T) { reset() update() diff --git a/proxyd/proxyd/integration_tests/testdata/consensus.toml b/proxyd/proxyd/integration_tests/testdata/consensus.toml index 03b11d0..6d8fdf5 100644 --- a/proxyd/proxyd/integration_tests/testdata/consensus.toml +++ b/proxyd/proxyd/integration_tests/testdata/consensus.toml @@ -3,6 +3,7 @@ rpc_port = 8545 [backend] response_timeout_seconds = 1 +max_degraded_latency_threshold = "30ms" [backends] [backends.node1] diff --git a/proxyd/proxyd/metrics.go b/proxyd/proxyd/metrics.go index 1d9602c..0edd820 100644 --- a/proxyd/proxyd/metrics.go +++ b/proxyd/proxyd/metrics.go @@ -358,6 +358,14 @@ var ( "backend_name", }) + degradedBackends = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricsNamespace, + Name: "backend_degraded", + Help: "Bool gauge for degraded backends", + }, []string{ + "backend_name", + }) + networkErrorRateBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{ Namespace: MetricsNamespace, Name: "backend_error_rate", @@ -493,6 +501,7 @@ func RecordConsensusBackendUpdateDelay(b *Backend, lastUpdate time.Time) { func RecordBackendNetworkLatencyAverageSlidingWindow(b *Backend, avgLatency time.Duration) { avgLatencyBackend.WithLabelValues(b.Name).Set(float64(avgLatency.Milliseconds())) + degradedBackends.WithLabelValues(b.Name).Set(boolToFloat64(b.IsDegraded())) } func RecordBackendNetworkErrorRateSlidingWindow(b *Backend, rate float64) {