Peak ewma (#63)

* use peak-ewma instead of head for latency calculation * Implement some suggested changes from PR * move latency to new package in workspace root * fix unit tests which now require peak_latency on Web3Rpc * Switch to atomics for peak-ewma This change is to avoid locking from tokio::sync::watch. * add decay calculation to latency reads in peak-ewma * Add some tests for peak-ewma * Sensible latency defaults and not blocking on full * Cleanup and a couple additional comments
2023-05-11 13:09:15 -07:00 · 2023-05-11 13:09:15 -07:00 · ec11e210ee
commit ec11e210ee
parent 0531e2f8dd
14 changed files with 586 additions and 82 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3020,6 +3020,16 @@ dependencies = [
 "regex",
 ]
 [[package]]
 name = "latency"
 version = "0.1.0"
 dependencies = [
 "ewma",
 "log",
 "serde",
 "tokio",
 ]
 [[package]]
 name = "lazy_static"
 version = "1.4.0"
@ -6657,7 +6667,6 @@ dependencies = [
 "entities",
 "env_logger",
 "ethers",
 "ewma",
 "fdlimit",
 "flume",
 "fstrings",
@ -6673,6 +6682,7 @@ dependencies = [
 "influxdb2-structmap",
 "ipnet",
 "itertools",
 "latency",
 "log",
 "migration",
 "moka",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -2,6 +2,7 @@
 members = [
  "deferred-rate-limiter",
  "entities",
  "latency",
  "migration",
  "rate-counter",
  "redis-rate-limiter",
--- a/latency/Cargo.toml
+++ b/latency/Cargo.toml
@ -0,0 +1,15 @@
 [package]
 name = "latency"
 version = "0.1.0"
 edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
 ewma = "0.1.1"
 log = "0.4.17"
 serde = { version = "1.0.159", features = [] }
 tokio = { version = "1.27.0", features = ["full"] }
 [dev-dependencies]
 tokio = { version = "1.27.0", features = ["full", "test-util"] }
--- a/latency/src/ewma.rs
+++ b/latency/src/ewma.rs
@ -0,0 +1,67 @@
 use serde::ser::Serializer;
 use serde::Serialize;
 use tokio::time::Duration;
 pub struct EwmaLatency {
    /// exponentially weighted moving average of how many milliseconds behind the fastest node we are
    ewma: ewma::EWMA,
 }
 impl Serialize for EwmaLatency {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.serialize_f64(self.ewma.value())
    }
 }
 impl EwmaLatency {
    #[inline(always)]
    pub fn record(&mut self, duration: Duration) {
        self.record_ms(duration.as_secs_f64() * 1000.0);
    }
    #[inline(always)]
    pub fn record_ms(&mut self, milliseconds: f64) {
        self.ewma.add(milliseconds);
    }
    /// Current EWMA value in milliseconds
    #[inline(always)]
    pub fn value(&self) -> f64 {
        self.ewma.value()
    }
 }
 impl Default for EwmaLatency {
    fn default() -> Self {
        // TODO: what should the default span be? 25 requests?
        let span = 25.0;
        let start = 1000.0;
        Self::new(span, start)
    }
 }
 impl EwmaLatency {
    // depending on the span, start might not be perfect
    pub fn new(span: f64, start_ms: f64) -> Self {
        let alpha = Self::span_to_alpha(span);
        let mut ewma = ewma::EWMA::new(alpha);
        if start_ms > 0.0 {
            for _ in 0..(span as u64) {
                ewma.add(start_ms);
            }
        }
        Self { ewma }
    }
    fn span_to_alpha(span: f64) -> f64 {
        2.0 / (span + 1.0)
    }
 }
--- a/latency/src/lib.rs
+++ b/latency/src/lib.rs
@ -0,0 +1,5 @@
 mod ewma;
 mod peak_ewma;
 mod util;
 pub use self::{ewma::EwmaLatency, peak_ewma::PeakEwmaLatency};
--- a/latency/src/peak_ewma/mod.rs
+++ b/latency/src/peak_ewma/mod.rs
@ -0,0 +1,149 @@
 mod rtt_estimate;
 use std::sync::Arc;
 use log::error;
 use tokio::sync::mpsc;
 use tokio::sync::mpsc::error::TrySendError;
 use tokio::task::JoinHandle;
 use tokio::time::{Duration, Instant};
 use self::rtt_estimate::AtomicRttEstimate;
 use crate::util::nanos::nanos;
 /// Latency calculation using Peak EWMA algorithm
 ///
 /// Updates are done in a separate task to avoid locking or race
 /// conditions. Reads may happen on any thread.
 #[derive(Debug)]
 pub struct PeakEwmaLatency {
    /// Join handle for the latency calculation task
    pub join_handle: JoinHandle<()>,
    /// Send to update with each request duration
    request_tx: mpsc::Sender<Duration>,
    /// Latency average and last update time
    rtt_estimate: Arc<AtomicRttEstimate>,
    /// Decay time
    decay_ns: f64,
 }
 impl PeakEwmaLatency {
    /// Spawn the task for calculating peak request latency
    ///
    /// Returns a handle that can also be used to read the current
    /// average latency.
    pub fn spawn(decay_ns: f64, buf_size: usize, start_latency: Duration) -> Self {
        debug_assert!(decay_ns > 0.0, "decay_ns must be positive");
        let (request_tx, request_rx) = mpsc::channel(buf_size);
        let rtt_estimate = Arc::new(AtomicRttEstimate::new(start_latency));
        let task = PeakEwmaLatencyTask {
            request_rx,
            rtt_estimate: rtt_estimate.clone(),
            update_at: Instant::now(),
            decay_ns,
        };
        let join_handle = tokio::spawn(task.run());
        Self {
            join_handle,
            request_tx,
            rtt_estimate,
            decay_ns,
        }
    }
    /// Get the current peak-ewma latency estimate
    pub fn latency(&self) -> Duration {
        let mut estimate = self.rtt_estimate.load();
        let now = Instant::now();
        debug_assert!(
            estimate.update_at <= now,
            "update_at={:?} in the future",
            estimate.update_at,
        );
        // Update the RTT estimate to account for decay since the last update.
        estimate.update(0.0, self.decay_ns, now)
    }
    /// Report latency from a single request
    ///
    /// Should only be called from the Web3Rpc that owns it.
    pub fn report(&self, duration: Duration) {
        match self.request_tx.try_send(duration) {
            Ok(()) => {}
            Err(TrySendError::Full(_)) => {
                // We don't want to block if the channel is full, just
                // report the error
                error!("Latency report channel full");
                // TODO: could we spawn a new tokio task to report tthis later?
            }
            Err(TrySendError::Closed(_)) => {
                unreachable!("Owner should keep channel open");
            }
        };
        //.expect("Owner should keep channel open");
    }
 }
 /// Task to be spawned per-Web3Rpc for calculating the peak request latency
 #[derive(Debug)]
 struct PeakEwmaLatencyTask {
    /// Receive new request timings for update
    request_rx: mpsc::Receiver<Duration>,
    /// Current estimate and update time
    rtt_estimate: Arc<AtomicRttEstimate>,
    /// Last update time, used for decay calculation
    update_at: Instant,
    /// Decay time
    decay_ns: f64,
 }
 impl PeakEwmaLatencyTask {
    /// Run the loop for updating latency
    async fn run(mut self) {
        while let Some(rtt) = self.request_rx.recv().await {
            self.update(rtt);
        }
    }
    /// Update the estimate object atomically.
    fn update(&mut self, rtt: Duration) {
        let rtt = nanos(rtt);
        let now = Instant::now();
        debug_assert!(
            self.update_at <= now,
            "update_at={:?} in the future",
            self.update_at,
        );
        self.rtt_estimate
            .fetch_update(|mut rtt_estimate| rtt_estimate.update(rtt, self.decay_ns, now));
    }
 }
 #[cfg(test)]
 mod tests {
    use tokio::time::{self, Duration};
    use crate::util::nanos::NANOS_PER_MILLI;
    /// The default RTT estimate decays, so that new nodes are considered if the
    /// default RTT is too high.
    #[tokio::test(start_paused = true)]
    async fn default_decay() {
        let estimate =
            super::PeakEwmaLatency::spawn(NANOS_PER_MILLI * 1_000.0, 8, Duration::from_millis(10));
        let load = estimate.latency();
        assert_eq!(load, Duration::from_millis(10));
        time::advance(Duration::from_millis(100)).await;
        let load = estimate.latency();
        assert!(Duration::from_millis(9) < load && load < Duration::from_millis(10));
        time::advance(Duration::from_millis(100)).await;
        let load = estimate.latency();
        assert!(Duration::from_millis(8) < load && load < Duration::from_millis(9));
    }
 }
--- a/latency/src/peak_ewma/rtt_estimate.rs
+++ b/latency/src/peak_ewma/rtt_estimate.rs
@ -0,0 +1,168 @@
 use std::sync::atomic::Ordering;
 use log::trace;
 use tokio::time::{Duration, Instant};
 use crate::util::atomic_f32_pair::AtomicF32Pair;
 use crate::util::nanos::{nanos, NANOS_PER_MILLI};
 /// Holds the current RTT estimate and the last time this value was updated.
 #[derive(Debug)]
 pub struct RttEstimate {
    pub update_at: Instant,
    pub rtt: Duration,
 }
 impl RttEstimate {
    /// Update the estimate with a new rtt value. Use rtt=0.0 for simply
    /// decaying the current value.
    pub fn update(&mut self, rtt: f64, decay_ns: f64, now: Instant) -> Duration {
        let ewma = nanos(self.rtt);
        self.rtt = if ewma < rtt {
            // For Peak-EWMA, always use the worst-case (peak) value as the estimate for
            // subsequent requests.
            trace!(
                "update peak rtt={}ms prior={}ms",
                rtt / NANOS_PER_MILLI,
                ewma / NANOS_PER_MILLI,
            );
            Duration::from_nanos(rtt as u64)
        } else {
            // When a latency is observed that is less than the estimated latency, we decay the
            // prior estimate according to how much time has elapsed since the last
            // update. The inverse of the decay is used to scale the estimate towards the
            // observed latency value.
            let elapsed = nanos(now.saturating_duration_since(self.update_at));
            let decay = (-elapsed / decay_ns).exp();
            let recency = 1.0 - decay;
            let next_estimate = (ewma * decay) + (rtt * recency);
            trace!(
                "update duration={:03.0}ms decay={:06.0}ns; next={:03.0}ms",
                rtt / NANOS_PER_MILLI,
                ewma - next_estimate,
                next_estimate / NANOS_PER_MILLI,
            );
            Duration::from_nanos(next_estimate as u64)
        };
        self.rtt
    }
    /// Build a new estimate object using current time.
    fn new(start_duration: Duration) -> Self {
        Self {
            update_at: Instant::now(),
            rtt: start_duration,
        }
    }
    /// Convert to pair of f32
    fn as_pair(&self, start_time: Instant) -> [f32; 2] {
        let update_at = self
            .update_at
            .saturating_duration_since(start_time)
            .as_secs_f32();
        let rtt = self.rtt.as_secs_f32();
        [update_at, rtt]
    }
    /// Build from pair of f32
    fn from_pair(pair: [f32; 2], start_time: Instant) -> Self {
        let update_at = start_time + Duration::from_secs_f32(pair[0]);
        let rtt = Duration::from_secs_f32(pair[1]);
        Self { update_at, rtt }
    }
 }
 /// Atomic storage of RttEstimate using AtomicF32Pair
 ///
 /// Start time is needed to (de-)serialize the update_at instance.
 #[derive(Debug)]
 pub struct AtomicRttEstimate {
    pair: AtomicF32Pair,
    start_time: Instant,
 }
 impl AtomicRttEstimate {
    /// Creates a new atomic rtt estimate.
    pub fn new(start_duration: Duration) -> Self {
        let estimate = RttEstimate::new(start_duration);
        Self {
            pair: AtomicF32Pair::new(estimate.as_pair(estimate.update_at)),
            start_time: estimate.update_at,
        }
    }
    /// Loads a value from the atomic rtt estimate.
    ///
    /// This method omits the ordering argument since loads may use
    /// slightly stale data to avoid adding additional latency.
    pub fn load(&self) -> RttEstimate {
        RttEstimate::from_pair(self.pair.load(Ordering::Relaxed), self.start_time)
    }
    /// Fetches the value, and applies a function to it that returns an
    /// new rtt. Retrns the new RttEstimate with new update_at.
    ///
    /// Automatically updates the update_at with Instant::now(). This
    /// method omits ordering arguments, defaulting to Relaxed since
    /// all writes are serial and any reads may rely on slightly stale
    /// data.
    pub fn fetch_update<F>(&self, mut f: F) -> RttEstimate
    where
        F: FnMut(RttEstimate) -> Duration,
    {
        let mut update_at = Instant::now();
        let mut rtt = Duration::ZERO;
        self.pair
            .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |pair| {
                rtt = f(RttEstimate::from_pair(pair, self.start_time));
                // Save the new update_at inside the function in case it
                // is run multiple times
                update_at = Instant::now();
                Some(RttEstimate { rtt, update_at }.as_pair(self.start_time))
            })
            .expect("Should never Err");
        RttEstimate { update_at, rtt }
    }
 }
 #[cfg(test)]
 mod tests {
    use tokio::time::{self, Duration, Instant};
    use super::{AtomicRttEstimate, RttEstimate};
    #[test]
    fn test_rtt_estimate_f32_conversions() {
        let rtt = Duration::from_secs(1);
        let expected = RttEstimate::new(rtt);
        let actual =
            RttEstimate::from_pair(expected.as_pair(expected.update_at), expected.update_at);
        assert_eq!(expected.update_at, actual.update_at);
        assert_eq!(expected.rtt, actual.rtt);
    }
    #[tokio::test(start_paused = true)]
    async fn test_atomic_rtt_estimate_load() {
        let rtt = Duration::from_secs(1);
        let estimate = AtomicRttEstimate::new(rtt);
        let actual = estimate.load();
        assert_eq!(Instant::now(), actual.update_at);
        assert_eq!(rtt, actual.rtt);
    }
    #[tokio::test(start_paused = true)]
    async fn test_atomic_rtt_estimate_fetch_update() {
        let start_time = Instant::now();
        let rtt = Duration::from_secs(1);
        let estimate = AtomicRttEstimate::new(rtt);
        time::advance(Duration::from_secs(1)).await;
        let rv = estimate.fetch_update(|value| {
            assert_eq!(start_time, value.update_at);
            assert_eq!(rtt, value.rtt);
            Duration::from_secs(2)
        });
        assert_eq!(start_time + Duration::from_secs(1), rv.update_at);
        assert_eq!(Duration::from_secs(2), rv.rtt);
    }
 }
--- a/latency/src/util/atomic_f32_pair.rs
+++ b/latency/src/util/atomic_f32_pair.rs
@ -0,0 +1,88 @@
 use std::sync::atomic::{AtomicU64, Ordering};
 /// Implements an atomic pair of f32s
 ///
 /// This uses an AtomicU64 internally.
 #[derive(Debug)]
 pub struct AtomicF32Pair(AtomicU64);
 impl AtomicF32Pair {
    /// Creates a new atomic pair.
    pub fn new(pair: [f32; 2]) -> Self {
        Self(AtomicU64::new(to_bits(pair)))
    }
    /// Loads a value from the atomic pair.
    pub fn load(&self, ordering: Ordering) -> [f32; 2] {
        from_bits(self.0.load(ordering))
    }
    /// Fetches the value, and applies a function to it that returns an
    /// optional new value. Returns a Result of Ok(previous_value) if
    /// the function returned Some(_), else Err(previous_value).
    pub fn fetch_update<F>(
        &self,
        set_order: Ordering,
        fetch_order: Ordering,
        mut f: F,
    ) -> Result<[f32; 2], [f32; 2]>
    where
        F: FnMut([f32; 2]) -> Option<[f32; 2]>,
    {
        self.0
            .fetch_update(set_order, fetch_order, |bits| {
                f(from_bits(bits)).map(to_bits)
            })
            .map(from_bits)
            .map_err(from_bits)
    }
 }
 /// Convert a f32 pair to its bit-representation as u64
 fn to_bits(pair: [f32; 2]) -> u64 {
    let f1 = pair[0].to_bits() as u64;
    let f2 = pair[1].to_bits() as u64;
    (f1 << 32) | f2
 }
 /// Build a f32 pair from its bit-representation as u64
 fn from_bits(bits: u64) -> [f32; 2] {
    let f1 = f32::from_bits((bits >> 32) as u32);
    let f2 = f32::from_bits(bits as u32);
    [f1, f2]
 }
 #[cfg(test)]
 mod tests {
    use std::sync::atomic::Ordering;
    use super::{from_bits, to_bits, AtomicF32Pair};
    #[test]
    fn test_f32_pair_bit_conversions() {
        let pair = [3.14159, 2.71828];
        assert_eq!(pair, from_bits(to_bits(pair)));
    }
    #[test]
    fn test_atomic_f32_pair_load() {
        let pair = [3.14159, 2.71828];
        let atomic = AtomicF32Pair::new(pair);
        assert_eq!(pair, atomic.load(Ordering::Relaxed));
    }
    #[test]
    fn test_atomic_f32_pair_fetch_update() {
        let pair = [3.14159, 2.71828];
        let atomic = AtomicF32Pair::new(pair);
        atomic
            .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |[f1, f2]| {
                Some([f1 + 1.0, f2 + 1.0])
            })
            .unwrap();
        assert_eq!(
            [pair[0] + 1.0, pair[1] + 1.0],
            atomic.load(Ordering::Relaxed)
        );
    }
 }
--- a/latency/src/util/mod.rs
+++ b/latency/src/util/mod.rs
@ -0,0 +1,2 @@
 pub(crate) mod atomic_f32_pair;
 pub(crate) mod nanos;
--- a/latency/src/util/nanos.rs
+++ b/latency/src/util/nanos.rs
@ -0,0 +1,30 @@
 use tokio::time::Duration;
 pub const NANOS_PER_MILLI: f64 = 1_000_000.0;
 /// Utility that converts durations to nanos in f64.
 ///
 /// Due to a lossy transformation, the maximum value that can be represented is ~585 years,
 /// which, I hope, is more than enough to represent request latencies.
 pub fn nanos(d: Duration) -> f64 {
    const NANOS_PER_SEC: u64 = 1_000_000_000;
    let n = f64::from(d.subsec_nanos());
    let s = d.as_secs().saturating_mul(NANOS_PER_SEC) as f64;
    n + s
 }
 #[cfg(test)]
 mod tests {
    use tokio::time::Duration;
    #[test]
    fn nanos() {
        assert_eq!(super::nanos(Duration::new(0, 0)), 0.0);
        assert_eq!(super::nanos(Duration::new(0, 123)), 123.0);
        assert_eq!(super::nanos(Duration::new(1, 23)), 1_000_000_023.0);
        assert_eq!(
            super::nanos(Duration::new(::std::u64::MAX, 999_999_999)),
            18446744074709553000.0
        );
    }
 }
--- a/web3_proxy/Cargo.toml
+++ b/web3_proxy/Cargo.toml
@ -15,6 +15,7 @@ rdkafka-src = ["rdkafka/cmake-build", "rdkafka/libz", "rdkafka/ssl", "rdkafka/zs
 [dependencies]
 deferred-rate-limiter = { path = "../deferred-rate-limiter" }
 entities = { path = "../entities" }
 latency = { path = "../latency" }
 migration = { path = "../migration" }
 redis-rate-limiter = { path = "../redis-rate-limiter" }
 thread-fast-rng = { path = "../thread-fast-rng" }
@ -37,7 +38,6 @@ derive_more = "0.99.17"
 dotenv = "0.15.0"
 env_logger = "0.10.0"
 ethers = { version = "2.0.3", default-features = false, features = ["rustls", "ws"] }
 ewma = "0.1.1"
 fdlimit = "0.2.1"
 flume = "0.10.14"
 fstrings = "0.2"
--- a/web3_proxy/src/rpcs/many.rs
+++ b/web3_proxy/src/rpcs/many.rs
@ -1282,13 +1282,16 @@ fn rpc_sync_status_sort_key(x: &Arc<Web3Rpc>) -> (Reverse<U64>, u64, bool, Order
    // TODO: use request latency instead of head latency
    // TODO: have the latency decay automatically
-    let head_ewma = x.head_latency.read().value();
+    let peak_latency = x
        .peak_latency
        .as_ref()
        .expect("peak_latency uniniialized")
        .latency();
    let active_requests = x.active_requests.load(atomic::Ordering::Relaxed) as f64;
    // TODO: i'm not sure head * active is exactly right. but we'll see
-    // TODO: i don't think this actually counts as peak. investigate with atomics.rs and peak_ewma.rs
+    let peak_ewma = OrderedFloat(peak_latency.as_millis() as f64 * (active_requests + 1.0));
    let peak_ewma = OrderedFloat(head_ewma * active_requests);
    let backup = x.backup;
@ -1296,18 +1299,26 @@ fn rpc_sync_status_sort_key(x: &Arc<Web3Rpc>) -> (Reverse<U64>, u64, bool, Order
 }
 mod tests {
    // TODO: why is this allow needed? does tokio::test get in the way somehow?
    #![allow(unused_imports)]
    use std::time::{SystemTime, UNIX_EPOCH};
    use super::*;
    use crate::rpcs::consensus::ConsensusFinder;
    use crate::rpcs::{blockchain::Web3ProxyBlock, provider::Web3Provider};
    use ethers::types::{Block, U256};
    use latency::PeakEwmaLatency;
    use log::{trace, LevelFilter};
    use parking_lot::RwLock;
    use std::time::{SystemTime, UNIX_EPOCH};
    use tokio::sync::RwLock as AsyncRwLock;
    #[cfg(test)]
    fn new_peak_latency() -> PeakEwmaLatency {
        const NANOS_PER_MILLI: f64 = 1_000_000.0;
        PeakEwmaLatency::spawn(1_000.0 * NANOS_PER_MILLI, 4, Duration::from_secs(1))
    }
    #[tokio::test]
    async fn test_sort_connections_by_sync_status() {
        let block_0 = Block {
@ -1338,36 +1349,42 @@ mod tests {
                name: "a".to_string(),
                tier: 0,
                head_block: RwLock::new(None),
                peak_latency: Some(new_peak_latency()),
                ..Default::default()
            },
            Web3Rpc {
                name: "b".to_string(),
                tier: 0,
                head_block: RwLock::new(blocks.get(1).cloned()),
                peak_latency: Some(new_peak_latency()),
                ..Default::default()
            },
            Web3Rpc {
                name: "c".to_string(),
                tier: 0,
                head_block: RwLock::new(blocks.get(2).cloned()),
                peak_latency: Some(new_peak_latency()),
                ..Default::default()
            },
            Web3Rpc {
                name: "d".to_string(),
                tier: 1,
                head_block: RwLock::new(None),
                peak_latency: Some(new_peak_latency()),
                ..Default::default()
            },
            Web3Rpc {
                name: "e".to_string(),
                tier: 1,
                head_block: RwLock::new(blocks.get(1).cloned()),
                peak_latency: Some(new_peak_latency()),
                ..Default::default()
            },
            Web3Rpc {
                name: "f".to_string(),
                tier: 1,
                head_block: RwLock::new(blocks.get(2).cloned()),
                peak_latency: Some(new_peak_latency()),
                ..Default::default()
            },
        ]
@ -1425,6 +1442,7 @@ mod tests {
            tier: 0,
            head_block: RwLock::new(Some(head_block.clone())),
            provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))),
            peak_latency: Some(new_peak_latency()),
            ..Default::default()
        };
@ -1437,6 +1455,7 @@ mod tests {
            tier: 0,
            head_block: RwLock::new(Some(lagged_block.clone())),
            provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))),
            peak_latency: Some(new_peak_latency()),
            ..Default::default()
        };
@ -1781,6 +1800,7 @@ mod tests {
            tier: 0,
            head_block: RwLock::new(Some(block_1.clone())),
            provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))),
            peak_latency: Some(new_peak_latency()),
            ..Default::default()
        };
@ -1793,6 +1813,7 @@ mod tests {
            tier: 1,
            head_block: RwLock::new(Some(block_2.clone())),
            provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))),
            peak_latency: Some(new_peak_latency()),
            ..Default::default()
        };
--- a/web3_proxy/src/rpcs/one.rs
+++ b/web3_proxy/src/rpcs/one.rs
@ -12,6 +12,7 @@ use ethers::prelude::{Bytes, Middleware, ProviderError, TxHash, H256, U64};
 use ethers::types::{Address, Transaction, U256};
 use futures::future::try_join_all;
 use futures::StreamExt;
 use latency::{EwmaLatency, PeakEwmaLatency};
 use log::{debug, error, info, trace, warn, Level};
 use migration::sea_orm::DatabaseConnection;
 use ordered_float::OrderedFloat;
@ -30,69 +31,6 @@ use thread_fast_rng::thread_fast_rng;
 use tokio::sync::{broadcast, oneshot, watch, RwLock as AsyncRwLock};
 use tokio::time::{sleep, sleep_until, timeout, Duration, Instant};
 pub struct Latency {
    /// exponentially weighted moving average of how many milliseconds behind the fastest node we are
    ewma: ewma::EWMA,
 }
 impl Serialize for Latency {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.serialize_f64(self.ewma.value())
    }
 }
 impl Latency {
    #[inline(always)]
    pub fn record(&mut self, duration: Duration) {
        self.record_ms(duration.as_secs_f64() * 1000.0);
    }
    #[inline(always)]
    pub fn record_ms(&mut self, milliseconds: f64) {
        self.ewma.add(milliseconds);
    }
    #[inline(always)]
    pub fn value(&self) -> f64 {
        self.ewma.value()
    }
 }
 impl Default for Latency {
    fn default() -> Self {
        // TODO: what should the default span be? 25 requests? have a "new"
        let span = 25.0;
        let start = 1000.0;
        Self::new(span, start)
    }
 }
 impl Latency {
    // depending on the span, start might not be perfect
    pub fn new(span: f64, start: f64) -> Self {
        let alpha = Self::span_to_alpha(span);
        let mut ewma = ewma::EWMA::new(alpha);
        if start > 0.0 {
            for _ in 0..(span as u64) {
                ewma.add(start);
            }
        }
        Self { ewma }
    }
    fn span_to_alpha(span: f64) -> f64 {
        2.0 / (span + 1.0)
    }
 }
 /// An active connection to a Web3 RPC server like geth or erigon.
 #[derive(Default)]
 pub struct Web3Rpc {
@ -127,10 +65,11 @@ pub struct Web3Rpc {
    /// TODO: change this to a watch channel so that http providers can subscribe and take action on change.
    pub(super) head_block: RwLock<Option<Web3ProxyBlock>>,
    /// Track head block latency
-    pub(super) head_latency: RwLock<Latency>,
+    pub(super) head_latency: RwLock<EwmaLatency>,
-    // /// Track request latency
+    /// Track peak request latency
-    // /// TODO: refactor this. this lock kills perf. for now just use head_latency
+    ///
-    // pub(super) request_latency: RwLock<Latency>,
+    /// This is only inside an Option so that the "Default" derive works. it will always be set.
    pub(super) peak_latency: Option<PeakEwmaLatency>,
    /// Track total requests served
    /// TODO: maybe move this to graphana
    pub(super) total_requests: AtomicUsize,
@ -227,6 +166,18 @@ impl Web3Rpc {
        let (disconnect_sender, disconnect_receiver) = watch::channel(false);
        let reconnect = reconnect.into();
        // Spawn the task for calculting average peak latency
        // TODO Should these defaults be in config
        let peak_latency = PeakEwmaLatency::spawn(
            // Decay over 15s
            Duration::from_secs(15).as_millis() as f64,
            // Peak requests so far around 5k, we will use an order of magnitude
            // more to be safe. Should only use about 50mb RAM
            50_000,
            // Start latency at 1 second
            Duration::from_secs(1),
        );
        let new_connection = Self {
            name,
            db_conn: db_conn.clone(),
@ -245,6 +196,7 @@ impl Web3Rpc {
            disconnect_watch: Some(disconnect_sender),
            created_at: Some(created_at),
            head_block: RwLock::new(Default::default()),
            peak_latency: Some(peak_latency),
            ..Default::default()
        };
--- a/web3_proxy/src/rpcs/request.rs
+++ b/web3_proxy/src/rpcs/request.rs
@ -194,7 +194,7 @@ impl OpenRequestHandle {
            .active_requests
            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
-        // let latency = Instant::now();
+        let start = Instant::now();
        // TODO: replace ethers-rs providers with our own that supports streaming the responses
        let response = match provider.as_ref() {
@ -366,14 +366,10 @@ impl OpenRequestHandle {
                    tokio::spawn(f);
                }
            }
        } else if let Some(peak_latency) = &self.rpc.peak_latency {
            peak_latency.report(start.elapsed());
        } else {
-            // TODO: record request latency
+            unreachable!("peak_latency not initialized");
            // let latency_ms = start.elapsed().as_secs_f64() * 1000.0;
            // TODO: is this lock here a problem? should this be done through a channel? i started to code it, but it didn't seem to matter
            // let mut latency_recording = self.rpc.request_latency.write();
            // latency_recording.record(latency_ms);
        }
        response
		`@ -0,0 +1,2 @@`
							`pub(crate) mod atomic_f32_pair;`
							`pub(crate) mod nanos;`