remove old file

2022-11-04 19:52:15 +00:00 · 2022-11-04 19:52:15 +00:00 · bc317ef1e9
commit bc317ef1e9
parent 7127779182
1 changed files with 0 additions and 461 deletions
--- a/web3_proxy/src/app_stats_old.rs
+++ b/web3_proxy/src/app_stats_old.rs
@ -1,461 +0,0 @@
-use crate::frontend::authorization::{AuthorizedKey, RequestMetadata};
-use crate::jsonrpc::JsonRpcForwardedResponse;
-use anyhow::Context;
-use chrono::{TimeZone, Utc};
-use dashmap::mapref::entry::Entry;
-use dashmap::DashMap;
-use derive_more::From;
-use entities::rpc_accounting;
-use hdrhistogram::Histogram;
-use moka::future::{Cache, CacheBuilder, ConcurrentCacheExt};
-use sea_orm::{ActiveModelTrait, DatabaseConnection};
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::sync::Arc;
-use std::time::Duration;
-use tokio::sync::{broadcast, Mutex as AsyncMutex};
-use tokio::task::JoinHandle;
-use tracing::{error, info, trace};
-
-/// TODO: where should this be defined?
-/// TODO: can we use something inside sea_orm instead?
-#[derive(Debug)]
-pub struct ProxyResponseStat {
-    rpc_key_id: u64,
-    method: String,
-    archive_request: bool,
-    period_seconds: u64,
-    period_timestamp: u64,
-    request_bytes: u64,
-    /// if backend_requests is 0, there was a cache_hit
-    backend_requests: u64,
-    error_response: bool,
-    response_bytes: u64,
-    response_millis: u64,
-}
-
-pub type TimeBucketTimestamp = u64;
-
-pub struct ProxyResponseHistograms {
-    request_bytes: Histogram<u64>,
-    response_bytes: Histogram<u64>,
-    response_millis: Histogram<u64>,
-}
-
-impl Default for ProxyResponseHistograms {
-    fn default() -> Self {
-        // TODO: how many significant figures?
-        let request_bytes = Histogram::new(5).expect("creating request_bytes histogram");
-        let response_bytes = Histogram::new(5).expect("creating response_bytes histogram");
-        let response_millis = Histogram::new(5).expect("creating response_millis histogram");
-
-        Self {
-            request_bytes,
-            response_bytes,
-            response_millis,
-        }
-    }
-}
-
-// TODO: impl From for our database model
-pub struct ProxyResponseAggregate {
-    // these are the key
-    // rpc_key_id: u64,
-    // method: String,
-    // error_response: bool,
-    // TODO: this is the grandparent key. get it from there somehow
-    period_timestamp: u64,
-    archive_request: bool,
-    frontend_requests: AtomicU64,
-    backend_requests: AtomicU64,
-    backend_retries: AtomicU64,
-    no_servers: AtomicU64,
-    cache_misses: AtomicU64,
-    cache_hits: AtomicU64,
-    sum_request_bytes: AtomicU64,
-    sum_response_bytes: AtomicU64,
-    sum_response_millis: AtomicU64,
-    histograms: AsyncMutex<ProxyResponseHistograms>,
-}
-
-#[derive(Clone, Debug, From, Hash, PartialEq, Eq, PartialOrd, Ord)]
-pub struct UserProxyResponseKey {
-    rpc_key_id: u64,
-    method: String,
-    error_response: bool,
-}
-
-// TODO: think about nested maps more. does this need an arc?
-pub type UserProxyResponseCache = Arc<DashMap<UserProxyResponseKey, Arc<ProxyResponseAggregate>>>;
-/// key is the "time bucket's timestamp" (timestamp / period * period)
-pub type TimeProxyResponseCache =
-    Cache<TimeBucketTimestamp, UserProxyResponseCache, hashbrown::hash_map::DefaultHashBuilder>;
-
-pub struct StatEmitter {
-    chain_id: u64,
-    db_conn: DatabaseConnection,
-    period_seconds: u64,
-    /// the outer cache has a TTL and a handler for expiration
-    aggregated_proxy_responses: TimeProxyResponseCache,
-    save_rx: flume::Receiver<UserProxyResponseCache>,
-}
-
-/// A stat that we aggregate and then store in a database.
-#[derive(Debug, From)]
-pub enum Web3ProxyStat {
-    Response(ProxyResponseStat),
-}
-
-impl ProxyResponseStat {
-    // TODO: should RequestMetadata be in an arc? or can we handle refs here?
-    pub fn new(
-        method: String,
-        authorized_key: AuthorizedKey,
-        metadata: Arc<RequestMetadata>,
-        response: &JsonRpcForwardedResponse,
-    ) -> Self {
-        // TODO: do this without serializing to a string. this is going to slow us down!
-        let response_bytes = serde_json::to_string(response)
-            .expect("serializing here should always work")
-            .len() as u64;
-
-        let archive_request = metadata.archive_request.load(Ordering::Acquire);
-        let backend_requests = metadata.backend_requests.load(Ordering::Acquire);
-        let period_seconds = metadata.period_seconds;
-        let period_timestamp =
-            (metadata.start_datetime.timestamp() as u64) / period_seconds * period_seconds;
-        let request_bytes = metadata.request_bytes;
-        let error_response = metadata.error_response.load(Ordering::Acquire);
-
-        // TODO: timestamps could get confused by leap seconds. need tokio time instead
-        let response_millis = metadata.start_instant.elapsed().as_millis() as u64;
-
-        Self {
-            rpc_key_id: authorized_key.rpc_key_id,
-            archive_request,
-            method,
-            backend_requests,
-            period_seconds,
-            period_timestamp,
-            request_bytes,
-            error_response,
-            response_bytes,
-            response_millis,
-        }
-    }
-}
-
-impl StatEmitter {
-    pub fn new(chain_id: u64, db_conn: DatabaseConnection, period_seconds: u64) -> Arc<Self> {
-        let (save_tx, save_rx) = flume::unbounded();
-
-        // this needs to be long enough that there are definitely no outstanding queries
-        // TODO: what should the "safe" multiplier be? what if something is late?
-        // TODO: in most cases this delays more than necessary. think of how to do this without dashmap which might let us proceed
-        let ttl_seconds = period_seconds * 3;
-
-        let aggregated_proxy_responses = CacheBuilder::default()
-            .time_to_live(Duration::from_secs(ttl_seconds))
-            .eviction_listener_with_queued_delivery_mode(move |_, v, _| {
-                // this function must not panic!
-                if let Err(err) = save_tx.send(v) {
-                    error!(?err, "unable to save. sender closed!");
-                }
-            })
-            .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::new());
-
-        let s = Self {
-            chain_id,
-            db_conn,
-            period_seconds,
-            aggregated_proxy_responses,
-            save_rx,
-        };
-
-        Arc::new(s)
-    }
-
-    pub async fn spawn(
-        self: Arc<Self>,
-        shutdown_receiver: broadcast::Receiver<()>,
-    ) -> anyhow::Result<(
-        flume::Sender<Web3ProxyStat>,
-        JoinHandle<anyhow::Result<()>>,
-        JoinHandle<anyhow::Result<()>>,
-    )> {
-        let (aggregate_tx, aggregate_rx) = flume::unbounded::<Web3ProxyStat>();
-
-        let (finished_tx, finished_rx) = flume::bounded(1);
-
-        let aggregate_handle = tokio::spawn(self.clone().aggregate_stats_loop(
-            aggregate_rx,
-            shutdown_receiver,
-            finished_rx,
-        ));
-        let save_handle = tokio::spawn(self.save_stats_loop(finished_tx));
-
-        Ok((aggregate_tx, aggregate_handle, save_handle))
-    }
-
-    /// simple future that reads the channel and aggregates stats in a local cache.
-    async fn aggregate_stats_loop(
-        self: Arc<Self>,
-        aggregate_rx: flume::Receiver<Web3ProxyStat>,
-        mut shutdown_receiver: broadcast::Receiver<()>,
-        finished_rx: flume::Receiver<()>,
-    ) -> anyhow::Result<()> {
-        loop {
-            tokio::select! {
-                x = aggregate_rx.recv_async() => {
-                    match x {
-                        Ok(x) => {
-                            trace!(?x, "aggregating stat");
-
-                            // TODO: increment global stats (in redis? in local cache for prometheus?)
-
-                            // TODO: batch stats?
-                            // TODO: where can we wait on this handle?
-                            let clone = self.clone();
-                            tokio::spawn(async move { clone.aggregate_stat(x).await });
-                        },
-                        Err(err) => {
-                            error!(?err, "aggregate_rx");
-                        }
-                    }
-                }
-                x = shutdown_receiver.recv() => {
-                    match x {
-                        Ok(_) => {
-                            info!("aggregate stats loop shutting down");
-                            // TODO: call aggregate_stat for all the
-                        },
-                        Err(err) => error!(?err, "shutdown receiver"),
-                    }
-                    break;
-                }
-            }
-        }
-
-        // shutting down. force a save of any pending stats
-        // we do not use invalidate_all because that is done on a background thread
-        // TODO: i don't think this works
-        for (key, _) in self.aggregated_proxy_responses.into_iter() {
-            // TODO: call drain or remove or something instead?
-            self.aggregated_proxy_responses.invalidate(&key).await;
-        }
-
-        self.aggregated_proxy_responses.sync();
-
-        todo!("drop self.aggregated_proxy_responses");
-
-        // TODO: timeout on this?
-        finished_rx.recv_async().await?;
-
-        info!("aggregate stats loop finished");
-
-        Ok(())
-    }
-
-    async fn save_stats_loop(
-        self: Arc<Self>,
-        finished_tx: flume::Sender<()>,
-    ) -> anyhow::Result<()> {
-        while let Ok(x) = self.save_rx.recv_async().await {
-            // TODO: batch these
-            // TODO: i'm not seeing these on shutdown
-            for x in x.iter() {
-                let k = x.key();
-                let v = x.value();
-
-                // TODO: this is a lot of variables
-                let period_datetime = Utc.timestamp(v.period_timestamp as i64, 0);
-                let frontend_requests = v.frontend_requests.load(Ordering::Acquire);
-                let backend_requests = v.backend_requests.load(Ordering::Acquire);
-                let backend_retries = v.backend_retries.load(Ordering::Acquire);
-                let no_servers = v.no_servers.load(Ordering::Acquire);
-                let cache_misses = v.cache_misses.load(Ordering::Acquire);
-                let cache_hits = v.cache_hits.load(Ordering::Acquire);
-                let sum_request_bytes = v.sum_request_bytes.load(Ordering::Acquire);
-                let sum_response_millis = v.sum_response_millis.load(Ordering::Acquire);
-                let sum_response_bytes = v.sum_response_bytes.load(Ordering::Acquire);
-
-                let histograms = v.histograms.lock().await;
-
-                let request_bytes = &histograms.request_bytes;
-
-                let min_request_bytes = request_bytes.min();
-                let mean_request_bytes = request_bytes.mean();
-                let p50_request_bytes = request_bytes.value_at_quantile(0.50);
-                let p90_request_bytes = request_bytes.value_at_quantile(0.90);
-                let p99_request_bytes = request_bytes.value_at_quantile(0.99);
-                let max_request_bytes = request_bytes.max();
-
-                let response_millis = &histograms.response_millis;
-
-                let min_response_millis = response_millis.min();
-                let mean_response_millis = response_millis.mean();
-                let p50_response_millis = response_millis.value_at_quantile(0.50);
-                let p90_response_millis = response_millis.value_at_quantile(0.90);
-                let p99_response_millis = response_millis.value_at_quantile(0.99);
-                let max_response_millis = response_millis.max();
-
-                let response_bytes = &histograms.response_bytes;
-
-                let min_response_bytes = response_bytes.min();
-                let mean_response_bytes = response_bytes.mean();
-                let p50_response_bytes = response_bytes.value_at_quantile(0.50);
-                let p90_response_bytes = response_bytes.value_at_quantile(0.90);
-                let p99_response_bytes = response_bytes.value_at_quantile(0.99);
-                let max_response_bytes = response_bytes.max();
-
-                drop(histograms);
-
-                let stat = rpc_accounting::ActiveModel {
-                    id: sea_orm::NotSet,
-
-                    rpc_key_id: sea_orm::Set(k.rpc_key_id),
-                    chain_id: sea_orm::Set(self.chain_id),
-                    method: sea_orm::Set(k.method.clone()),
-                    archive_request: sea_orm::Set(v.archive_request),
-                    error_response: sea_orm::Set(k.error_response),
-                    period_datetime: sea_orm::Set(period_datetime),
-                    frontend_requests: sea_orm::Set(frontend_requests),
-                    backend_requests: sea_orm::Set(backend_requests),
-                    backend_retries: sea_orm::Set(backend_retries),
-                    no_servers: sea_orm::Set(no_servers),
-                    cache_misses: sea_orm::Set(cache_misses),
-                    cache_hits: sea_orm::Set(cache_hits),
-
-                    sum_request_bytes: sea_orm::Set(sum_request_bytes),
-                    min_request_bytes: sea_orm::Set(min_request_bytes),
-                    mean_request_bytes: sea_orm::Set(mean_request_bytes),
-                    p50_request_bytes: sea_orm::Set(p50_request_bytes),
-                    p90_request_bytes: sea_orm::Set(p90_request_bytes),
-                    p99_request_bytes: sea_orm::Set(p99_request_bytes),
-                    max_request_bytes: sea_orm::Set(max_request_bytes),
-
-                    sum_response_millis: sea_orm::Set(sum_response_millis),
-                    min_response_millis: sea_orm::Set(min_response_millis),
-                    mean_response_millis: sea_orm::Set(mean_response_millis),
-                    p50_response_millis: sea_orm::Set(p50_response_millis),
-                    p90_response_millis: sea_orm::Set(p90_response_millis),
-                    p99_response_millis: sea_orm::Set(p99_response_millis),
-                    max_response_millis: sea_orm::Set(max_response_millis),
-
-                    sum_response_bytes: sea_orm::Set(sum_response_bytes),
-                    min_response_bytes: sea_orm::Set(min_response_bytes),
-                    mean_response_bytes: sea_orm::Set(mean_response_bytes),
-                    p50_response_bytes: sea_orm::Set(p50_response_bytes),
-                    p90_response_bytes: sea_orm::Set(p90_response_bytes),
-                    p99_response_bytes: sea_orm::Set(p99_response_bytes),
-                    max_response_bytes: sea_orm::Set(max_response_bytes),
-                };
-
-                // TODO: if this fails, what should we do?
-                if let Err(err) = stat
-                    .save(&self.db_conn)
-                    .await
-                    .context("Saving rpc_accounting stat")
-                {
-                    error!(?err, "unable to save aggregated stats");
-                } else {
-                    trace!("stat saved");
-                }
-            }
-        }
-
-        info!("stat saver exited");
-
-        finished_tx.send_async(()).await?;
-
-        Ok(())
-    }
-
-    pub async fn aggregate_stat(&self, stat: Web3ProxyStat) -> anyhow::Result<()> {
-        match stat {
-            Web3ProxyStat::Response(stat) => {
-                // TODO: move this whole closure to another function?
-
-                debug_assert_eq!(stat.period_seconds, self.period_seconds);
-
-                // get the user cache for the current period
-                // TODO: i don't think this works right. maybe do DashMap entry api as the outer variable
-                let user_cache = self
-                    .aggregated_proxy_responses
-                    .get_with(stat.period_timestamp, async move { Default::default() })
-                    .await;
-
-                let key = (stat.rpc_key_id, stat.method, stat.error_response).into();
-
-                let user_aggregate = match user_cache.entry(key) {
-                    Entry::Occupied(x) => x.get().clone(),
-                    Entry::Vacant(y) => {
-                        let histograms = ProxyResponseHistograms::default();
-
-                        // TODO: create a counter here that we use to tell when it is safe to flush these? faster than waiting 3 periods
-
-                        let aggregate = ProxyResponseAggregate {
-                            period_timestamp: stat.period_timestamp,
-                            archive_request: stat.archive_request,
-                            // start most things at 0 because we add outside this getter
-                            frontend_requests: 0.into(),
-                            backend_requests: 0.into(),
-                            backend_retries: 0.into(),
-                            no_servers: 0.into(),
-                            cache_misses: 0.into(),
-                            cache_hits: 0.into(),
-                            sum_request_bytes: 0.into(),
-                            sum_response_bytes: 0.into(),
-                            sum_response_millis: 0.into(),
-                            histograms: AsyncMutex::new(histograms),
-                        };
-
-                        let aggregate = Arc::new(aggregate);
-
-                        y.insert(aggregate).clone()
-                    }
-                };
-
-                // a stat always come from just 1 frontend request
-                user_aggregate
-                    .frontend_requests
-                    .fetch_add(1, Ordering::Acquire);
-
-                if stat.backend_requests == 0 {
-                    // no backend request. cache hit!
-                    user_aggregate.cache_hits.fetch_add(1, Ordering::Acquire);
-                } else {
-                    // backend requests! cache miss!
-                    user_aggregate.cache_misses.fetch_add(1, Ordering::Acquire);
-
-                    // a stat might have multiple backend requests
-                    user_aggregate
-                        .backend_requests
-                        .fetch_add(stat.backend_requests, Ordering::Acquire);
-                }
-
-                user_aggregate
-                    .sum_request_bytes
-                    .fetch_add(stat.request_bytes, Ordering::Release);
-
-                user_aggregate
-                    .sum_response_bytes
-                    .fetch_add(stat.response_bytes, Ordering::Release);
-
-                user_aggregate
-                    .sum_response_millis
-                    .fetch_add(stat.response_millis, Ordering::Release);
-
-                {
-                    let mut histograms = user_aggregate.histograms.lock().await;
-
-                    // TODO: use `record_correct`?
-                    histograms.request_bytes.record(stat.request_bytes)?;
-                    histograms.response_millis.record(stat.response_millis)?;
-                    histograms.response_bytes.record(stat.response_bytes)?;
-                }
-            }
-        }
-
-        Ok(())
-    }
-}