stats v2

rebased all my commits and squashed them down to one
2023-01-25 21:24:09 -08:00 · 2023-01-25 21:24:09 -08:00 · eb4d05a520
commit eb4d05a520
parent 5695c1b06e
52 changed files with 2409 additions and 1310 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@ -1,6 +1,7 @@
 [build]
 rustflags = [
    # potentially faster. https://nnethercote.github.io/perf-book/build-configuration.html
+    # TODO: we might want to disable this so its easier to run the proxy across different aws instance types
    "-C", "target-cpu=native",
    # tokio unstable is needed for tokio-console
    "--cfg", "tokio_unstable"
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,3 +1 @@
-{
-  "rust-analyzer.cargo.features": "all"
-}
+{}
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/5
+++ b/5
@ -33,11 +33,12 @@ RUN --mount=type=cache,target=/usr/local/cargo/registry \
 RUN --mount=type=cache,target=/usr/local/cargo/registry \
    --mount=type=cache,target=/app/target \
    cargo install \
+    --features tokio-uring \
    --locked \
    --no-default-features \
+    --path ./web3_proxy \
    --profile faster_release \
-    --root /opt/bin \
-    --path ./web3_proxy
+    --root /opt/bin

 #
 # We do not need the Rust toolchain to run the binary!
--- a/TODO.md
+++ b/TODO.md
@ -369,6 +369,36 @@ These are not yet ordered. There might be duplicates. We might not actually need
  - have a blocking future watching the config file and calling app.apply_config() on first load and on change
  - work started on this in the "config_reloads" branch. because of how we pass channels around during spawn, this requires a larger refactor.
 - [-] if we subscribe to a server that is syncing, it gives us null block_data_limit. when it catches up, we don't ever send queries to it. we need to recheck block_data_limit
+- [ ] don't use new_head_provider anywhere except new head subscription
+- [x] remove the "metered" crate now that we save aggregate queries?
+- [x] don't use systemtime. use chrono
+- [x] graceful shutdown
+  - [x] frontend needs to shut down first. this will stop serving requests on /health and so new requests should quickly stop being routed to us
+  - [x] when frontend has finished, tell all the other tasks to stop
+  - [x] stats buffer needs to flush to both the database and influxdb
+- [x] `rpc_accounting` script
+- [x] period_datetime should always round to the start of the minute. this will ensure aggregations use as few rows as possible
+- [x] weighted random choice should still prioritize non-archive servers
+    - maybe shuffle randomly and then sort by (block_limit, random_index)?
+    - maybe sum available_requests grouped by archive/non-archive. only limit to non-archive if they have enough?
+- [x] if we subscribe to a server that is syncing, it gives us null block_data_limit. when it catches up, we don't ever send queries to it. we need to recheck block_data_limit
+- [x] add a "backup" tier that is only used if balanced_rpcs has "no servers synced"
+  - use this tier to check timestamp on latest block. if we are behind that by more than a few seconds, something is wrong
+- [x] `change_user_tier_by_address` script
+- [x] emit stats for user's successes, retries, failures, with the types of requests, chain, rpc
+- [x] add caching to speed up stat queries
+- [x] config parsing is strict right now. this makes it hard to deploy on git push since configs need to change along with it
+  - changed to only emit a warning if there is an unknown configuration key
+- [x] make the "not synced" error more verbose
+- [x] short lived cache on /health
+- [x] cache /status for longer
+- [x] sort connections during eth_sendRawTransaction
+- [x] block all admin_ rpc commands
+- [x] remove the "metered" crate now that we save aggregate queries?
+- [x] add archive depth to app config
+- [x] improve "archive_needed" boolean. change to "block_depth"
+- [x] keep score of new_head timings for all rpcs
+- [x] having the whole block in /status is very verbose. trim it down
 - [-] proxy mode for benchmarking all backends
 - [-] proxy mode for sending to multiple backends
 - [-] let users choose a % of reverts to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly
@ -391,7 +421,15 @@ These are not yet ordered. There might be duplicates. We might not actually need
 - [ ] maybe we shouldn't route eth_getLogs to syncing nodes. serving queries slows down sync significantly
  - change the send_best function to only include servers that are at least close to fully synced
 - [ ] have private transactions be enabled by a url setting rather than a setting on the key
+- [ ] enable mev protected transactions with either a /protect/ url (instead of /private/) or the database (when on /rpc/)
 - [ ] cli for adding rpc keys to an existing user
+- [ ] rename "private" to "mev protected" to avoid confusion about private transactions being public once they are mined
+- [ ] allow restricting an rpc key to specific chains
+- [ ] writes to request_latency should be handled by a background task so they don't slow down the request
+  - maybe we can use https://docs.rs/hdrhistogram/latest/hdrhistogram/sync/struct.SyncHistogram.html
+- [ ] keep re-broadcasting transactions until they are confirmed
+- [ ] if mev protection is disabled, we should send to *both* balanced_rpcs *and* private_rps
+- [ ] if mev protection is enabled, we should sent to *only* private_rpcs
 - [ ] rate limiting/throttling on query_user_stats 
 - [ ] web3rpc configs should have a max_concurrent_requests
    - will probably want a tool for calculating a safe value for this. too low and we could kill our performance
@ -400,44 +438,45 @@ These are not yet ordered. There might be duplicates. We might not actually need
 - [ ] setting request limits to None is broken. it does maxu64 and then internal deferred rate limiter counts try to *99/100
 - [ ] if kafka fails to connect at the start, automatically reconnect
 - [ ] during shutdown, mark the proxy unhealthy and send unsubscribe responses for any open websocket subscriptions
+- [ ] setting request limits to None is broken. it does maxu64 and then internal deferred rate limiter counts overflows when it does to `x*99/100`
+- [ ] during shutdown, send unsubscribe responses for any open websocket subscriptions
 - [ ] some chains still use total_difficulty. have total_difficulty be used only if the chain needs it
  - if total difficulty is not on the block and we aren't on ETH, fetch the full block instead of just the header
  - if total difficulty is set and non-zero, use it for consensus instead of just the number
 - [ ] query_user_stats cache hit rate
 - [ ] need debounce on reconnect. websockets are closing on us and then we reconnect twice. locks on ProviderState need more thought
- [ ] having the whole block in status is very verbose. trim it down
- [ ] `cost estimate` script
-  - sum bytes and number of requests. prompt hosting costs. divide
- [ ] `stat delay` script 
-  - query database for newest stat
- [ ] period_datetime should always be :00. right now it depends on start time 
+- [ ] having the whole block in /status is very verbose. trim it down
 - [ ] we have our hard rate limiter set up with a period of 60. but most providers have period of 1- [ ] two servers running will confuse rpc_accounting!
  - it won't happen with users often because they should be sticky to one proxy, but unauthenticated users will definitely hit this
  - one option: we need the insert to be an upsert, but how do we merge historgrams?
 - [ ] don't use systemtime. use chrono
+- [ ] soft limit needs more thought
+    - it should be the min of total_sum_soft_limit (from only non-lagged servers) and min_sum_soft_limit
+    - otherwise it won't track anything and will just give errors.
+    - but if web3 proxy has just started, we should give some time otherwise we will thundering herd the first server that responds
 - [ ] connection pool for websockets. use tokio-tungstenite directly. no need for ethers providers since serde_json is enough for us
    - this should also get us closer to being able to do our own streaming json parser where we can 
- [ ] get `oldest_allowed` out of config. or calculate automatically based on block time.
- [ ] `change_user_tier_by_address` script
 - [ ] figure out if "could not get block from params" is a problem worth logging
    - maybe it was an ots request?
- [ ] eth_subscribe rpc_accounting has everything as cache_hits. should we instead count it as one background request?
+- [ ] change redirect_rpc_key_url to match the newest url scheme
 - [ ] implement filters
 - [ ] implement remaining subscriptions
    - would be nice if our subscriptions had better gaurentees than geth/erigon do, but maybe simpler to just setup a broadcast channel and proxy all the respones to a backend instead
 - [ ] tests should use `test-env-log = "0.2.8"`
- [ ] weighted random choice should still prioritize non-archive servers
-    - maybe shuffle randomly and then sort by (block_limit, random_index)?
-    - maybe sum available_requests grouped by archive/non-archive. only limit to non-archive if they have enough?
 - [ ] some places we call it "accounting" others a "stat". be consistent
 - [ ] cli commands to search users by key
 - [ ] flamegraphs show 25% of the time to be in moka-housekeeper. tune that
 - [ ] config parsing is strict right now. this makes it hard to deploy on git push since configs need to change along with it
 - [ ] when displaying the user's data, they just see an opaque id for their tier. We should join that data
+- [ ] refactor so configs can change while running
+  - this will probably be a rather large change, but is necessary when we have autoscaling
+  - create the app without applying any config to it
+  - have a blocking future watching the config file and calling app.apply_config() on first load and on change
+  - work started on this in the "config_reloads" branch. because of how we pass channels around during spawn, this requires a larger refactor.
+- [ ] when displaying the user's data, they just see an opaque id for their tier. We should join that data so they see the tier name and limits
 - [ ] add indexes to speed up stat queries
 - [ ] the public rpc is rate limited by ip and the authenticated rpc is rate limit by key
    - this means if a dapp uses the authenticated RPC on their website, they could get rate limited more easily
- [ ] add cacheing to speed up stat queries
 - [ ] take an option to set a non-default role when creating a user
 - [ ] different prune levels for free tiers
 - [ ] have a test that runs ethspam and versus
@ -451,14 +490,10 @@ These are not yet ordered. There might be duplicates. We might not actually need
 - [ ] after running for a while, https://eth-ski.llamanodes.com/status is only at 157 blocks and hashes. i thought they would be near 10k after running for a while
    - adding uptime to the status should help
    - i think this is already in our todo list
- [ ] improve private transactions. keep re-broadcasting until they are confirmed
 - [ ] write a test that uses the cli to create a user and modifies their key
 - [ ] Uuid/Ulid instead of big_unsigned for database ids
  - might have to use Uuid in sea-orm and then convert to Ulid on display
  - https://www.kostolansky.sk/posts/how-to-migrate-to-uuid/
- [ ] make the "not synced" error more verbose
-  - I think there is a bug in our synced_rpcs filtering. likely in has_block_data
-  - seeing "not synced" when I load https://vfat.tools/esd/
 - [ ] emit stdandard deviation?
 - [ ] emit global stat on retry
 - [ ] emit global stat on no servers synced
@ -510,12 +545,11 @@ These are not yet ordered. There might be duplicates. We might not actually need
 - [ ] nice output when cargo doc is run
 - [ ] cache more things locally or in redis
 - [ ] stats when forks are resolved (and what chain they were on?)
- [ ] emit stats for user's successes, retries, failures, with the types of requests, chain, rpc
 - [ ] Only subscribe to transactions when someone is listening and if the server has opted in to it
 - [ ] When sending eth_sendRawTransaction, retry errors
 - [ ] If we need an archive server and no servers in sync, exit immediately with an error instead of waiting 60 seconds
 - [ ] 120 second timeout is too short. Maybe do that for free tier and larger timeout for paid. Problem is that some queries can take over 1000 seconds
- [ ] when handling errors from axum parsing the Json...Enum, the errors don't get wrapped in json. i think we need a axum::Layer
+- [ ] when handling errors from axum parsing the Json...Enum in the function signature, the errors don't get wrapped in json. i think we need a axum::Layer
 - [ ] don't "unwrap" anywhere. give proper errors
 - [ ] handle log subscriptions
  - probably as a paid feature
@ -546,6 +580,11 @@ These are not yet ordered. There might be duplicates. We might not actually need
        The above methods return Entry type, which provides is_fresh method to check if the value was freshly computed or already existed in the cache.
 - [ ] lag message always shows on first response
    - http interval on blastapi lagging by 1!
+- [ ] change scoring for rpcs again. "p2c ewma"
+  - [ ] weighted random sort: (soft_limit - ewma active requests * num web3_proxy servers)
+    - 2. soft_limit
+  - [ ] pick 2 servers from the random sort.
+    - [ ] exponential weighted moving average for block subscriptions of time behind the first server (works well for ws but not http)

 ## V2

@ -690,9 +729,13 @@ in another repo: event subscriber
 - [ ] have an upgrade tier that queries multiple backends at once. returns on first Ok result, collects errors. if no Ok, find the most common error and then respond with that
 - [ ] give public_recent_ips_salt a better, more general, name
 - [ ] include tier in the head block logs?
+<<<<<<< HEAD
 - [ ] i think i use FuturesUnordered when a try_join_all might be better
 - [ ] since we are read-heavy on our configs, maybe we should use a cache
  - "using a thread local storage and explicit types" https://docs.rs/arc-swap/latest/arc_swap/cache/struct.Cache.html
 - [ ] tests for config reloading
 - [ ] use pin instead of arc for a bunch of things?
  - https://fasterthanli.me/articles/pin-and-suffering
+=======
+- [ ] calculate archive depth automatically based on block_data_limits 
+>>>>>>> 77df3fa (stats v2)
--- a/config/example.toml
+++ b/config/example.toml
@ -13,6 +13,11 @@ db_replica_url = "mysql://root:dev_web3_proxy@127.0.0.1:13306/dev_web3_proxy"

 kafka_urls = "127.0.0.1:19092"

+# a timeseries database is optional. it is used for making pretty graphs
+influxdb_host = "http://127.0.0.1:18086"
+influxdb_org = "dev_org"
+influxdb_token = "dev_web3_proxy_auth_token"
+
 # thundering herd protection
 # only mark a block as the head block if the sum of their soft limits is greater than or equal to min_sum_soft_limit
 min_sum_soft_limit = 2_000
--- a/entities/src/login.rs
+++ b/entities/src/login.rs
@ -1,4 +1,4 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.6
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

 use crate::serialization;
 use sea_orm::entity::prelude::*;
--- a/entities/src/mod.rs
+++ b/entities/src/mod.rs
@ -1,4 +1,4 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.6
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

 pub mod prelude;

@ -8,6 +8,7 @@ pub mod login;
 pub mod pending_login;
 pub mod revert_log;
 pub mod rpc_accounting;
+pub mod rpc_accounting_v2;
 pub mod rpc_key;
 pub mod sea_orm_active_enums;
 pub mod secondary_user;
--- a/entities/src/pending_login.rs
+++ b/entities/src/pending_login.rs
@ -1,4 +1,4 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

 use crate::serialization;
 use sea_orm::entity::prelude::*;
--- a/entities/src/prelude.rs
+++ b/entities/src/prelude.rs
@ -1,4 +1,4 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

 pub use super::admin::Entity as Admin;
 pub use super::admin_trail::Entity as AdminTrail;
@ -6,6 +6,7 @@ pub use super::login::Entity as Login;
 pub use super::pending_login::Entity as PendingLogin;
 pub use super::revert_log::Entity as RevertLog;
 pub use super::rpc_accounting::Entity as RpcAccounting;
+pub use super::rpc_accounting_v2::Entity as RpcAccountingV2;
 pub use super::rpc_key::Entity as RpcKey;
 pub use super::secondary_user::Entity as SecondaryUser;
 pub use super::user::Entity as User;
--- a/entities/src/revert_log.rs
+++ b/entities/src/revert_log.rs
@ -1,4 +1,4 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

 use super::sea_orm_active_enums::Method;
 use crate::serialization;
--- a/entities/src/rpc_accounting.rs
+++ b/entities/src/rpc_accounting.rs
@ -1,4 +1,4 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

 use sea_orm::entity::prelude::*;
 use serde::{Deserialize, Serialize};
--- a/entities/src/rpc_accounting_v2.rs
+++ b/entities/src/rpc_accounting_v2.rs
@ -0,0 +1,47 @@
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7
+
+use sea_orm::entity::prelude::*;
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)]
+#[sea_orm(table_name = "rpc_accounting_v2")]
+pub struct Model {
+    #[sea_orm(primary_key)]
+    pub id: u64,
+    pub rpc_key_id: Option<u64>,
+    pub chain_id: u64,
+    pub period_datetime: DateTimeUtc,
+    pub method: Option<String>,
+    pub origin: Option<String>,
+    pub archive_needed: bool,
+    pub error_response: bool,
+    pub frontend_requests: u64,
+    pub backend_requests: u64,
+    pub backend_retries: u64,
+    pub no_servers: u64,
+    pub cache_misses: u64,
+    pub cache_hits: u64,
+    pub sum_request_bytes: u64,
+    pub sum_response_millis: u64,
+    pub sum_response_bytes: u64,
+}
+
+#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
+pub enum Relation {
+    #[sea_orm(
+        belongs_to = "super::rpc_key::Entity",
+        from = "Column::RpcKeyId",
+        to = "super::rpc_key::Column::Id",
+        on_update = "NoAction",
+        on_delete = "NoAction"
+    )]
+    RpcKey,
+}
+
+impl Related<super::rpc_key::Entity> for Entity {
+    fn to() -> RelationDef {
+        Relation::RpcKey.def()
+    }
+}
+
+impl ActiveModelBehavior for ActiveModel {}
--- a/entities/src/rpc_key.rs
+++ b/entities/src/rpc_key.rs
@ -1,6 +1,6 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

-use super::sea_orm_active_enums::LogLevel;
+use super::sea_orm_active_enums::TrackingLevel;
 use crate::serialization;
 use sea_orm::entity::prelude::*;
 use serde::{Deserialize, Serialize};
@ -26,7 +26,8 @@ pub struct Model {
    #[sea_orm(column_type = "Text", nullable)]
    pub allowed_user_agents: Option<String>,
    pub log_revert_chance: f64,
-    pub log_level: LogLevel,
+    // TODO: rename this with a migration
+    pub log_level: TrackingLevel,
 }

 #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
@ -35,6 +36,8 @@ pub enum Relation {
    RevertLog,
    #[sea_orm(has_many = "super::rpc_accounting::Entity")]
    RpcAccounting,
+    #[sea_orm(has_many = "super::rpc_accounting_v2::Entity")]
+    RpcAccountingV2,
    #[sea_orm(
        belongs_to = "super::user::Entity",
        from = "Column::UserId",
@ -57,6 +60,12 @@ impl Related<super::rpc_accounting::Entity> for Entity {
    }
 }

+impl Related<super::rpc_accounting_v2::Entity> for Entity {
+    fn to() -> RelationDef {
+        Relation::RpcAccountingV2.def()
+    }
+}
+
 impl Related<super::user::Entity> for Entity {
    fn to() -> RelationDef {
        Relation::User.def()
--- a/entities/src/sea_orm_active_enums.rs
+++ b/entities/src/sea_orm_active_enums.rs
@ -1,11 +1,12 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

 use sea_orm::entity::prelude::*;
 use serde::{Deserialize, Serialize};

+// TODO: rename to StatLevel? AccountingLevel? What?
 #[derive(Debug, Clone, PartialEq, Eq, EnumIter, DeriveActiveEnum, Serialize, Deserialize)]
 #[sea_orm(rs_type = "String", db_type = "Enum", enum_name = "log_level")]
-pub enum LogLevel {
+pub enum TrackingLevel {
    #[sea_orm(string_value = "none")]
    None,
    #[sea_orm(string_value = "aggregated")]
@ -14,7 +15,7 @@ pub enum LogLevel {
    Detailed,
 }

-impl Default for LogLevel {
+impl Default for TrackingLevel {
    fn default() -> Self {
        Self::None
    }
--- a/entities/src/secondary_user.rs
+++ b/entities/src/secondary_user.rs
@ -1,4 +1,4 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

 use super::sea_orm_active_enums::Role;
 use sea_orm::entity::prelude::*;
--- a/entities/src/user.rs
+++ b/entities/src/user.rs
@ -1,4 +1,4 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

 use crate::serialization;
 use sea_orm::entity::prelude::*;
--- a/entities/src/user_tier.rs
+++ b/entities/src/user_tier.rs
@ -1,4 +1,4 @@
-//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5
+//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7

 use sea_orm::entity::prelude::*;
 use serde::{Deserialize, Serialize};
--- a/migration/src/lib.rs
+++ b/migration/src/lib.rs
@ -17,6 +17,7 @@ mod m20230119_204135_better_free_tier;
 mod m20230130_124740_read_only_login_logic;
 mod m20230130_165144_prepare_admin_imitation_pre_login;
 mod m20230215_152254_admin_trail;
+mod m20230125_204810_stats_v2;

 pub struct Migrator;

@ -41,6 +42,7 @@ impl MigratorTrait for Migrator {
            Box::new(m20230130_124740_read_only_login_logic::Migration),
            Box::new(m20230130_165144_prepare_admin_imitation_pre_login::Migration),
            Box::new(m20230215_152254_admin_trail::Migration),
+            Box::new(m20230125_204810_stats_v2::Migration),
        ]
    }
 }
--- a/migration/src/m20230125_204810_stats_v2.rs
+++ b/migration/src/m20230125_204810_stats_v2.rs
@ -0,0 +1,157 @@
+use sea_orm_migration::prelude::*;
+
+#[derive(DeriveMigrationName)]
+pub struct Migration;
+
+#[async_trait::async_trait]
+impl MigrationTrait for Migration {
+    async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
+        manager
+            .create_table(
+                Table::create()
+                    .table(RpcAccountingV2::Table)
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::Id)
+                            .big_unsigned()
+                            .not_null()
+                            .auto_increment()
+                            .primary_key(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::RpcKeyId)
+                            .big_unsigned()
+                            .null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::ChainId)
+                            .big_unsigned()
+                            .not_null(),
+                    )
+                    .col(ColumnDef::new(RpcAccountingV2::Origin).string().null())
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::PeriodDatetime)
+                            .timestamp()
+                            .not_null(),
+                    )
+                    .col(ColumnDef::new(RpcAccountingV2::Method).string().null())
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::ArchiveNeeded)
+                            .boolean()
+                            .not_null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::ErrorResponse)
+                            .boolean()
+                            .not_null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::FrontendRequests)
+                            .big_unsigned()
+                            .not_null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::BackendRequests)
+                            .big_unsigned()
+                            .not_null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::BackendRetries)
+                            .big_unsigned()
+                            .not_null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::NoServers)
+                            .big_unsigned()
+                            .not_null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::CacheMisses)
+                            .big_unsigned()
+                            .not_null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::CacheHits)
+                            .big_unsigned()
+                            .not_null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::SumRequestBytes)
+                            .big_unsigned()
+                            .not_null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::SumResponseMillis)
+                            .big_unsigned()
+                            .not_null(),
+                    )
+                    .col(
+                        ColumnDef::new(RpcAccountingV2::SumResponseBytes)
+                            .big_unsigned()
+                            .not_null(),
+                    )
+                    .foreign_key(
+                        sea_query::ForeignKey::create()
+                            .from(RpcAccountingV2::Table, RpcAccountingV2::RpcKeyId)
+                            .to(RpcKey::Table, RpcKey::Id),
+                    )
+                    .index(sea_query::Index::create().col(RpcAccountingV2::ChainId))
+                    .index(sea_query::Index::create().col(RpcAccountingV2::Origin))
+                    .index(sea_query::Index::create().col(RpcAccountingV2::PeriodDatetime))
+                    .index(sea_query::Index::create().col(RpcAccountingV2::Method))
+                    .index(sea_query::Index::create().col(RpcAccountingV2::ArchiveNeeded))
+                    .index(sea_query::Index::create().col(RpcAccountingV2::ErrorResponse))
+                    .index(
+                        sea_query::Index::create()
+                            .col(RpcAccountingV2::RpcKeyId)
+                            .col(RpcAccountingV2::ChainId)
+                            .col(RpcAccountingV2::Origin)
+                            .col(RpcAccountingV2::PeriodDatetime)
+                            .col(RpcAccountingV2::Method)
+                            .col(RpcAccountingV2::ArchiveNeeded)
+                            .col(RpcAccountingV2::ErrorResponse)
+                            .unique(),
+                    )
+                    .to_owned(),
+            )
+            .await?;
+
+        Ok(())
+    }
+
+    async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> {
+        manager
+            .drop_table(Table::drop().table(RpcAccountingV2::Table).to_owned())
+            .await?;
+
+        Ok(())
+    }
+}
+
+/// Partial table definition
+#[derive(Iden)]
+pub enum RpcKey {
+    Table,
+    Id,
+}
+
+#[derive(Iden)]
+enum RpcAccountingV2 {
+    Table,
+    Id,
+    RpcKeyId,
+    ChainId,
+    Origin,
+    PeriodDatetime,
+    Method,
+    ArchiveNeeded,
+    ErrorResponse,
+    FrontendRequests,
+    BackendRequests,
+    BackendRetries,
+    NoServers,
+    CacheMisses,
+    CacheHits,
+    SumRequestBytes,
+    SumResponseMillis,
+    SumResponseBytes,
+}
--- a/redis-rate-limiter/Cargo.toml
+++ b/redis-rate-limiter/Cargo.toml
@ -6,5 +6,6 @@ edition = "2021"

 [dependencies]
 anyhow = "1.0.69"
+chrono = "0.4.23"
 deadpool-redis = { version = "0.11.1", features = ["rt_tokio_1", "serde"] }
 tokio = "1.25.0"
--- a/redis-rate-limiter/src/lib.rs
+++ b/redis-rate-limiter/src/lib.rs
@ -1,7 +1,6 @@
 //#![warn(missing_docs)]
 use anyhow::Context;
 use std::ops::Add;
-use std::time::{SystemTime, UNIX_EPOCH};
 use tokio::time::{Duration, Instant};

 pub use deadpool_redis::redis;
@ -48,10 +47,7 @@ impl RedisRateLimiter {

    pub fn now_as_secs(&self) -> f32 {
        // TODO: if system time doesn't match redis, this won't work great
-        SystemTime::now()
-            .duration_since(UNIX_EPOCH)
-            .expect("cannot tell the time")
-            .as_secs_f32()
+        (chrono::Utc::now().timestamp_millis() as f32) / 1_000.0
    }

    pub fn period_id(&self, now_as_secs: f32) -> f32 {
--- a/web3_proxy/Cargo.toml
+++ b/web3_proxy/Cargo.toml
@ -36,6 +36,7 @@ derive_more = "0.99.17"
 dotenv = "0.15.0"
 env_logger = "0.10.0"
 ethers = { version = "1.0.2", default-features = false, features = ["rustls", "ws"] }
+ewma = "0.1.1"
 fdlimit = "0.2.1"
 flume = "0.10.14"
 futures = { version = "0.3.26", features = ["thread-pool"] }
@ -45,6 +46,7 @@ handlebars = "4.3.6"
 hashbrown = { version = "0.13.2", features = ["serde"] }
 hdrhistogram = "7.5.2"
 http = "0.2.9"
+influxdb2 = { version = "0.3", features = ["rustls"], default-features = false }
 ipnet = "2.7.1"
 itertools = "0.10.5"
 log = "0.4.17"
@ -52,6 +54,7 @@ moka = { version = "0.10.0", default-features = false, features = ["future"] }
 num = "0.4.0"
 num-traits = "0.2.15"
 once_cell = { version = "1.17.1" }
+ordered-float = "3.4.0"
 pagerduty-rs = { version = "0.1.6", default-features = false, features = ["async", "rustls", "sync"] }
 parking_lot = { version = "0.12.1", features = ["arc_lock"] }
 prettytable = "*"
@ -69,11 +72,10 @@ siwe = "0.5.0"
 time = "0.3.20"
 tokio = { version = "1.25.0", features = ["full"] }
 tokio-stream = { version = "0.1.12", features = ["sync"] }
+tokio-uring = { version = "0.4.0", optional = true }
 toml = "0.7.2"
 tower = "0.4.13"
 tower-http = { version = "0.4.0", features = ["cors", "sensitive-headers"] }
 ulid = { version = "1.0.0", features = ["serde"] }
 url = "2.3.1"
 uuid = "1.3.0"
-ewma = "0.1.1"
-ordered-float = "3.4.0"
--- a/web3_proxy/src/admin_queries.rs
+++ b/web3_proxy/src/admin_queries.rs
@ -1,6 +1,6 @@
 use crate::app::Web3ProxyApp;
 use crate::frontend::errors::FrontendErrorResponse;
-use crate::user_queries::get_user_id_from_params;
+use crate::http_params::get_user_id_from_params;
 use anyhow::Context;
 use axum::response::{IntoResponse, Response};
 use axum::{
--- a/web3_proxy/src/app/mod.rs
+++ b/web3_proxy/src/app/mod.rs
@ -1,7 +1,6 @@
 // TODO: this file is way too big now. move things into other modules
 mod ws;

-use crate::app_stats::{ProxyResponseStat, StatEmitter, Web3ProxyStat};
 use crate::block_number::{block_needed, BlockNeeded};
 use crate::config::{AppConfig, TopConfig};
 use crate::frontend::authorization::{Authorization, RequestMetadata, RpcSecretKey};
@ -10,17 +9,19 @@ use crate::frontend::rpc_proxy_ws::ProxyMode;
 use crate::jsonrpc::{
    JsonRpcForwardedResponse, JsonRpcForwardedResponseEnum, JsonRpcRequest, JsonRpcRequestEnum,
 };
-use crate::rpcs::blockchain::Web3ProxyBlock;
+use crate::rpcs::blockchain::{BlocksByHashCache, Web3ProxyBlock};
+use crate::rpcs::consensus::ConsensusWeb3Rpcs;
 use crate::rpcs::many::Web3Rpcs;
 use crate::rpcs::one::Web3Rpc;
 use crate::rpcs::transactions::TxStatus;
+use crate::stats::{AppStat, RpcQueryStats, StatBuffer};
 use crate::user_token::UserBearerToken;
 use anyhow::Context;
 use axum::headers::{Origin, Referer, UserAgent};
 use chrono::Utc;
 use deferred_rate_limiter::DeferredRateLimiter;
 use derive_more::From;
-use entities::sea_orm_active_enums::LogLevel;
+use entities::sea_orm_active_enums::TrackingLevel;
 use entities::user;
 use ethers::core::utils::keccak256;
 use ethers::prelude::{Address, Bytes, Transaction, TxHash, H256, U64};
@ -65,8 +66,8 @@ pub static APP_USER_AGENT: &str = concat!(
    env!("CARGO_PKG_VERSION")
 );

-/// TODO: allow customizing the request period?
-pub static REQUEST_PERIOD: u64 = 60;
+// aggregate across 1 week
+const BILLING_PERIOD_SECONDS: i64 = 60 * 60 * 24 * 7;

 #[derive(Debug, From)]
 struct ResponseCacheKey {
@ -153,10 +154,12 @@ type ResponseCache =

 pub type AnyhowJoinHandle<T> = JoinHandle<anyhow::Result<T>>;

+/// TODO: move this
 #[derive(Clone, Debug, Default, From)]
 pub struct AuthorizationChecks {
    /// database id of the primary user. 0 if anon
    /// TODO: do we need this? its on the authorization so probably not
+    /// TODO: Option<NonZeroU64>?
    pub user_id: u64,
    /// the key used (if any)
    pub rpc_secret_key: Option<RpcSecretKey>,
@ -175,17 +178,21 @@ pub struct AuthorizationChecks {
    pub allowed_user_agents: Option<Vec<UserAgent>>,
    /// if None, allow any IP Address
    pub allowed_ips: Option<Vec<IpNet>>,
-    pub log_level: LogLevel,
+    /// how detailed any rpc account entries should be
+    pub tracking_level: TrackingLevel,
    /// Chance to save reverting eth_call, eth_estimateGas, and eth_sendRawTransaction to the database.
+    /// depending on the caller, errors might be expected. this keeps us from bloating our database
    /// TODO: f32 would be fine
    pub log_revert_chance: f64,
-    /// if true, transactions are broadcast to private mempools. They will still be public on the blockchain!
+    /// if true, transactions are broadcast only to private mempools.
+    /// IMPORTANT! Once confirmed by a miner, they will be public on the blockchain!
    pub private_txs: bool,
    pub proxy_mode: ProxyMode,
 }

 /// Simple wrapper so that we can keep track of read only connections.
 /// This does no blocking of writing in the compiler!
+/// TODO: move this
 #[derive(Clone)]
 pub struct DatabaseReplica(pub DatabaseConnection);

@ -197,38 +204,60 @@ impl DatabaseReplica {
 }

 /// The application
-// TODO: this debug impl is way too verbose. make something smaller
 // TODO: i'm sure this is more arcs than necessary, but spawning futures makes references hard
 pub struct Web3ProxyApp {
    /// Send requests to the best server available
    pub balanced_rpcs: Arc<Web3Rpcs>,
    pub http_client: Option<reqwest::Client>,
-    /// Send private requests (like eth_sendRawTransaction) to all these servers
-    pub private_rpcs: Option<Arc<Web3Rpcs>>,
-    response_cache: ResponseCache,
-    // don't drop this or the sender will stop working
-    // TODO: broadcast channel instead?
-    watch_consensus_head_receiver: watch::Receiver<Option<Web3ProxyBlock>>,
-    pending_tx_sender: broadcast::Sender<TxStatus>,
+    /// application config
+    /// TODO: this will need a large refactor to handle reloads while running. maybe use a watch::Receiver?
    pub config: AppConfig,
+    /// Send private requests (like eth_sendRawTransaction) to all these servers
+    /// TODO: include another type so that we can use private miner relays that do not use JSONRPC requests
+    pub private_rpcs: Option<Arc<Web3Rpcs>>,
+    /// track JSONRPC responses
+    response_cache: ResponseCache,
+    /// rpc clients that subscribe to newHeads use this channel
+    /// don't drop this or the sender will stop working
+    /// TODO: broadcast channel instead?
+    pub watch_consensus_head_receiver: watch::Receiver<Option<Web3ProxyBlock>>,
+    /// rpc clients that subscribe to pendingTransactions use this channel
+    /// This is the Sender so that new channels can subscribe to it
+    pending_tx_sender: broadcast::Sender<TxStatus>,
+    /// Optional database for users and accounting
    pub db_conn: Option<sea_orm::DatabaseConnection>,
+    /// Optional read-only database for users and accounting
    pub db_replica: Option<DatabaseReplica>,
    /// store pending transactions that we've seen so that we don't send duplicates to subscribers
+    /// TODO: think about this more. might be worth storing if we sent the transaction or not and using this for automatic retries
    pub pending_transactions: Cache<TxHash, TxStatus, hashbrown::hash_map::DefaultHashBuilder>,
+    /// rate limit anonymous users
    pub frontend_ip_rate_limiter: Option<DeferredRateLimiter<IpAddr>>,
+    /// rate limit authenticated users
    pub frontend_registered_user_rate_limiter: Option<DeferredRateLimiter<u64>>,
+    /// Optional time series database for making pretty graphs that load quickly
+    pub influxdb_client: Option<influxdb2::Client>,
+    /// rate limit the login endpoint
+    /// we do this because each pending login is a row in the database
    pub login_rate_limiter: Option<RedisRateLimiter>,
+    /// volatile cache used for rate limits
+    /// TODO: i think i might just delete this entirely. instead use local-only concurrency limits.
    pub vredis_pool: Option<RedisPool>,
-    // TODO: this key should be our RpcSecretKey class, not Ulid
+    /// cache authenticated users so that we don't have to query the database on the hot path
+    // TODO: should the key be our RpcSecretKey class instead of Ulid?
    pub rpc_secret_key_cache:
        Cache<Ulid, AuthorizationChecks, hashbrown::hash_map::DefaultHashBuilder>,
+    /// concurrent/parallel RPC request limits for authenticated users
    pub registered_user_semaphores:
        Cache<NonZeroU64, Arc<Semaphore>, hashbrown::hash_map::DefaultHashBuilder>,
+    /// concurrent/parallel request limits for anonymous users
    pub ip_semaphores: Cache<IpAddr, Arc<Semaphore>, hashbrown::hash_map::DefaultHashBuilder>,
+    /// concurrent/parallel application request limits for authenticated users
    pub bearer_token_semaphores:
        Cache<UserBearerToken, Arc<Semaphore>, hashbrown::hash_map::DefaultHashBuilder>,
-    pub stat_sender: Option<flume::Sender<Web3ProxyStat>>,
    pub kafka_producer: Option<rdkafka::producer::FutureProducer>,
+    /// channel for sending stats in a background task
+    pub stat_sender: Option<flume::Sender<AppStat>>,
 }

 /// flatten a JoinError into an anyhow error
@ -355,6 +384,7 @@ pub async fn get_migrated_db(
    Ok(db_conn)
 }

+/// starting an app creates many tasks
 #[derive(From)]
 pub struct Web3ProxyAppSpawn {
    /// the app. probably clone this to use in other groups of handles
@ -365,6 +395,8 @@ pub struct Web3ProxyAppSpawn {
    pub background_handles: FuturesUnordered<AnyhowJoinHandle<()>>,
    /// config changes are sent here
    pub new_top_config_sender: watch::Sender<TopConfig>,
+    /// watch this to know when to start the app
+    pub consensus_connections_watcher: watch::Receiver<Option<Arc<ConsensusWeb3Rpcs>>>,
 }

 impl Web3ProxyApp {
@ -372,8 +404,11 @@ impl Web3ProxyApp {
    pub async fn spawn(
        top_config: TopConfig,
        num_workers: usize,
-        shutdown_receiver: broadcast::Receiver<()>,
+        shutdown_sender: broadcast::Sender<()>,
    ) -> anyhow::Result<Web3ProxyAppSpawn> {
+        let rpc_account_shutdown_recevier = shutdown_sender.subscribe();
+        let mut background_shutdown_receiver = shutdown_sender.subscribe();
+
        // safety checks on the config
        // while i would prefer this to be in a "apply_top_config" function, that is a larger refactor
        // TODO: maybe don't spawn with a config at all. have all config updates come through an apply_top_config call
@ -512,20 +547,46 @@ impl Web3ProxyApp {
            }
        };

-        // setup a channel for receiving stats (generally with a high cardinality, such as per-user)
-        // we do this in a channel so we don't slow down our response to the users
-        let stat_sender = if let Some(db_conn) = db_conn.clone() {
-            let emitter_spawn =
-                StatEmitter::spawn(top_config.app.chain_id, db_conn, 60, shutdown_receiver)?;
+        let influxdb_client = match top_config.app.influxdb_host.as_ref() {
+            Some(influxdb_host) => {
+                let influxdb_org = top_config
+                    .app
+                    .influxdb_org
+                    .clone()
+                    .expect("influxdb_org needed when influxdb_host is set");
+                let influxdb_token = top_config
+                    .app
+                    .influxdb_token
+                    .clone()
+                    .expect("influxdb_token needed when influxdb_host is set");

+                let influxdb_client =
+                    influxdb2::Client::new(influxdb_host, influxdb_org, influxdb_token);
+
+                // TODO: test the client now. having a stat for "started" can be useful on graphs to mark deploys
+
+                Some(influxdb_client)
+            }
+            None => None,
+        };
+
+        // create a channel for receiving stats
+        // we do this in a channel so we don't slow down our response to the users
+        // stats can be saved in mysql, influxdb, both, or none
+        let stat_sender = if let Some(emitter_spawn) = StatBuffer::try_spawn(
+            top_config.app.chain_id,
+            db_conn.clone(),
+            influxdb_client.clone(),
+            60,
+            1,
+            BILLING_PERIOD_SECONDS,
+            rpc_account_shutdown_recevier,
+        )? {
+            // since the database entries are used for accounting, we want to be sure everything is saved before exiting
            important_background_handles.push(emitter_spawn.background_handle);

            Some(emitter_spawn.stat_sender)
        } else {
-            warn!("cannot store stats without a database connection");
-
-            // TODO: subscribe to the shutdown_receiver here since the stat emitter isn't running?
-
            None
        };

@ -644,7 +705,9 @@ impl Web3ProxyApp {
            .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default());

        // prepare a Web3Rpcs to hold all our balanced connections
-        let (balanced_rpcs, balanced_rpcs_handle) = Web3Rpcs::spawn(
+        // let (balanced_rpcs, balanced_rpcs_handle) = Web3Rpcs::spawn(
+        // connect to the load balanced rpcs
+        let (balanced_rpcs, balanced_handle, consensus_connections_watcher) = Web3Rpcs::spawn(
            top_config.app.chain_id,
            db_conn.clone(),
            http_client.clone(),
@ -659,7 +722,7 @@ impl Web3ProxyApp {
        .await
        .context("spawning balanced rpcs")?;

-        app_handles.push(balanced_rpcs_handle);
+        app_handles.push(balanced_handle);

        // prepare a Web3Rpcs to hold all our private connections
        // only some chains have this, so this is optional
@ -668,7 +731,9 @@ impl Web3ProxyApp {
            None
        } else {
            // TODO: do something with the spawn handle
-            let (private_rpcs, private_rpcs_handle) = Web3Rpcs::spawn(
+            // TODO: Merge
+            // let (private_rpcs, private_rpcs_handle) = Web3Rpcs::spawn(
+            let (private_rpcs, private_handle, _) = Web3Rpcs::spawn(
                top_config.app.chain_id,
                db_conn.clone(),
                http_client.clone(),
@ -689,7 +754,7 @@ impl Web3ProxyApp {
            .await
            .context("spawning private_rpcs")?;

-            app_handles.push(private_rpcs_handle);
+            app_handles.push(private_handle);

            Some(private_rpcs)
        };
@ -709,6 +774,7 @@ impl Web3ProxyApp {
            login_rate_limiter,
            db_conn,
            db_replica,
+            influxdb_client,
            vredis_pool,
            rpc_secret_key_cache,
            bearer_token_semaphores,
@ -745,14 +811,26 @@ impl Web3ProxyApp {

            app_handles.push(config_handle);
        }
+// =======
+//         if important_background_handles.is_empty() {
+//             info!("no important background handles");
+//
+//             let f = tokio::spawn(async move {
+//                 let _ = background_shutdown_receiver.recv().await;
+//
+//                 Ok(())
+//             });
+//
+//             important_background_handles.push(f);
+// >>>>>>> 77df3fa (stats v2)

        Ok((
            app,
            app_handles,
            important_background_handles,
            new_top_config_sender,
-        )
-            .into())
+            consensus_connections_watcher
+        ).into())
    }

    pub async fn apply_top_config(&self, new_top_config: TopConfig) -> anyhow::Result<()> {
@ -786,6 +864,7 @@ impl Web3ProxyApp {
        // TODO: what globals? should this be the hostname or what?
        // globals.insert("service", "web3_proxy");

+        // TODO: this needs a refactor to get HELP and TYPE into the serialized text
        #[derive(Default, Serialize)]
        struct UserCount(i64);

@ -1069,7 +1148,6 @@ impl Web3ProxyApp {
        }
    }

-    // #[measure([ErrorCount, HitCount, ResponseTime, Throughput])]
    async fn proxy_cached_request(
        self: &Arc<Self>,
        authorization: &Arc<Authorization>,
@ -1078,7 +1156,7 @@ impl Web3ProxyApp {
    ) -> Result<(JsonRpcForwardedResponse, Vec<Arc<Web3Rpc>>), FrontendErrorResponse> {
        // trace!("Received request: {:?}", request);

-        let request_metadata = Arc::new(RequestMetadata::new(REQUEST_PERIOD, request.num_bytes())?);
+        let request_metadata = Arc::new(RequestMetadata::new(request.num_bytes())?);

        let mut kafka_stuff = None;

@ -1216,7 +1294,7 @@ impl Web3ProxyApp {
            | "shh_post"
            | "shh_uninstallFilter"
            | "shh_version") => {
-                // TODO: client error stat
+                // i don't think we will ever support these methods
                // TODO: what error code?
                return Ok((
                    JsonRpcForwardedResponse::from_string(
@ -1235,9 +1313,10 @@ impl Web3ProxyApp {
            | "eth_newPendingTransactionFilter"
            | "eth_pollSubscriptions"
            | "eth_uninstallFilter") => {
-                // TODO: unsupported command stat
+                // TODO: unsupported command stat. use the count to prioritize new features
                // TODO: what error code?
                return Ok((
+                    // TODO: what code?
                    JsonRpcForwardedResponse::from_string(
                        format!("not yet implemented: {}", method),
                        None,
@ -1712,7 +1791,7 @@ impl Web3ProxyApp {
                let rpcs = request_metadata.backend_requests.lock().clone();

                if let Some(stat_sender) = self.stat_sender.as_ref() {
-                    let response_stat = ProxyResponseStat::new(
+                    let response_stat = RpcQueryStats::new(
                        method.to_string(),
                        authorization.clone(),
                        request_metadata,
@ -1735,7 +1814,7 @@ impl Web3ProxyApp {
        let rpcs = request_metadata.backend_requests.lock().clone();

        if let Some(stat_sender) = self.stat_sender.as_ref() {
-            let response_stat = ProxyResponseStat::new(
+            let response_stat = RpcQueryStats::new(
                request_method,
                authorization.clone(),
                request_metadata,
--- a/web3_proxy/src/app/ws.rs
+++ b/web3_proxy/src/app/ws.rs
@ -1,11 +1,11 @@
 //! Websocket-specific functions for the Web3ProxyApp

-use super::{Web3ProxyApp, REQUEST_PERIOD};
-use crate::app_stats::ProxyResponseStat;
+use super::Web3ProxyApp;
 use crate::frontend::authorization::{Authorization, RequestMetadata};
 use crate::jsonrpc::JsonRpcForwardedResponse;
 use crate::jsonrpc::JsonRpcRequest;
 use crate::rpcs::transactions::TxStatus;
+use crate::stats::RpcQueryStats;
 use anyhow::Context;
 use axum::extract::ws::Message;
 use ethers::prelude::U64;
@ -33,8 +33,7 @@ impl Web3ProxyApp {
            .context("finding request size")?
            .len();

-        let request_metadata =
-            Arc::new(RequestMetadata::new(REQUEST_PERIOD, request_bytes).unwrap());
+        let request_metadata = Arc::new(RequestMetadata::new(request_bytes).unwrap());

        let (subscription_abort_handle, subscription_registration) = AbortHandle::new_pair();

@ -68,8 +67,7 @@ impl Web3ProxyApp {
                        };

                        // TODO: what should the payload for RequestMetadata be?
-                        let request_metadata =
-                            Arc::new(RequestMetadata::new(REQUEST_PERIOD, 0).unwrap());
+                        let request_metadata = Arc::new(RequestMetadata::new(0).unwrap());

                        // TODO: make a struct for this? using our JsonRpcForwardedResponse won't work because it needs an id
                        let response_json = json!({
@ -97,7 +95,7 @@ impl Web3ProxyApp {
                        };

                        if let Some(stat_sender) = stat_sender.as_ref() {
-                            let response_stat = ProxyResponseStat::new(
+                            let response_stat = RpcQueryStats::new(
                                "eth_subscription(newHeads)".to_string(),
                                authorization.clone(),
                                request_metadata.clone(),
@ -135,8 +133,7 @@ impl Web3ProxyApp {
                // TODO: do something with this handle?
                tokio::spawn(async move {
                    while let Some(Ok(new_tx_state)) = pending_tx_receiver.next().await {
-                        let request_metadata =
-                            Arc::new(RequestMetadata::new(REQUEST_PERIOD, 0).unwrap());
+                        let request_metadata = Arc::new(RequestMetadata::new(0).unwrap());

                        let new_tx = match new_tx_state {
                            TxStatus::Pending(tx) => tx,
@ -169,7 +166,7 @@ impl Web3ProxyApp {
                        };

                        if let Some(stat_sender) = stat_sender.as_ref() {
-                            let response_stat = ProxyResponseStat::new(
+                            let response_stat = RpcQueryStats::new(
                                "eth_subscription(newPendingTransactions)".to_string(),
                                authorization.clone(),
                                request_metadata.clone(),
@ -211,8 +208,7 @@ impl Web3ProxyApp {
                // TODO: do something with this handle?
                tokio::spawn(async move {
                    while let Some(Ok(new_tx_state)) = pending_tx_receiver.next().await {
-                        let request_metadata =
-                            Arc::new(RequestMetadata::new(REQUEST_PERIOD, 0).unwrap());
+                        let request_metadata = Arc::new(RequestMetadata::new(0).unwrap());

                        let new_tx = match new_tx_state {
                            TxStatus::Pending(tx) => tx,
@ -246,7 +242,7 @@ impl Web3ProxyApp {
                        };

                        if let Some(stat_sender) = stat_sender.as_ref() {
-                            let response_stat = ProxyResponseStat::new(
+                            let response_stat = RpcQueryStats::new(
                                "eth_subscription(newPendingFullTransactions)".to_string(),
                                authorization.clone(),
                                request_metadata.clone(),
@ -288,8 +284,7 @@ impl Web3ProxyApp {
                // TODO: do something with this handle?
                tokio::spawn(async move {
                    while let Some(Ok(new_tx_state)) = pending_tx_receiver.next().await {
-                        let request_metadata =
-                            Arc::new(RequestMetadata::new(REQUEST_PERIOD, 0).unwrap());
+                        let request_metadata = Arc::new(RequestMetadata::new(0).unwrap());

                        let new_tx = match new_tx_state {
                            TxStatus::Pending(tx) => tx,
@ -323,7 +318,7 @@ impl Web3ProxyApp {
                        };

                        if let Some(stat_sender) = stat_sender.as_ref() {
-                            let response_stat = ProxyResponseStat::new(
+                            let response_stat = RpcQueryStats::new(
                                "eth_subscription(newPendingRawTransactions)".to_string(),
                                authorization.clone(),
                                request_metadata.clone(),
@ -354,7 +349,7 @@ impl Web3ProxyApp {
        let response = JsonRpcForwardedResponse::from_value(json!(subscription_id), id);

        if let Some(stat_sender) = self.stat_sender.as_ref() {
-            let response_stat = ProxyResponseStat::new(
+            let response_stat = RpcQueryStats::new(
                request_json.method.clone(),
                authorization.clone(),
                request_metadata,
--- a/web3_proxy/src/app_stats.rs
+++ b/web3_proxy/src/app_stats.rs
@ -1,416 +0,0 @@
-use crate::frontend::authorization::{Authorization, RequestMetadata};
-use axum::headers::Origin;
-use chrono::{TimeZone, Utc};
-use derive_more::From;
-use entities::rpc_accounting;
-use entities::sea_orm_active_enums::LogLevel;
-use hashbrown::HashMap;
-use hdrhistogram::{Histogram, RecordError};
-use log::{error, info};
-use migration::sea_orm::{self, ActiveModelTrait, DatabaseConnection, DbErr};
-use std::num::NonZeroU64;
-use std::sync::atomic::Ordering;
-use std::sync::Arc;
-use std::time::{Duration, SystemTime};
-use tokio::sync::broadcast;
-use tokio::task::JoinHandle;
-use tokio::time::{interval_at, Instant};
-
-/// TODO: where should this be defined?
-/// TODO: can we use something inside sea_orm instead?
-#[derive(Debug)]
-pub struct ProxyResponseStat {
-    authorization: Arc<Authorization>,
-    method: String,
-    archive_request: bool,
-    error_response: bool,
-    request_bytes: u64,
-    /// if backend_requests is 0, there was a cache_hit
-    backend_requests: u64,
-    response_bytes: u64,
-    response_millis: u64,
-}
-
-impl ProxyResponseStat {
-    /// TODO: think more about this. probably rename it
-    fn key(&self) -> ProxyResponseAggregateKey {
-        // include either the rpc_key_id or the origin
-        let (mut rpc_key_id, origin) = match (
-            self.authorization.checks.rpc_secret_key_id,
-            &self.authorization.origin,
-        ) {
-            (Some(rpc_key_id), _) => {
-                // TODO: allow the user to opt into saving the origin
-                (Some(rpc_key_id), None)
-            }
-            (None, Some(origin)) => {
-                // we save the origin for anonymous access
-                (None, Some(origin.clone()))
-            }
-            (None, None) => {
-                // TODO: what should we do here? log ip? i really don't want to save any ips
-                (None, None)
-            }
-        };
-
-        let method = match self.authorization.checks.log_level {
-            LogLevel::None => {
-                // No rpc_key logging. Only save fully anonymized metric
-                rpc_key_id = None;
-                // keep the method since the rpc key is not attached
-                Some(self.method.clone())
-            }
-            LogLevel::Aggregated => {
-                // Lose the method
-                None
-            }
-            LogLevel::Detailed => {
-                // include the method
-                Some(self.method.clone())
-            }
-        };
-
-        ProxyResponseAggregateKey {
-            archive_request: self.archive_request,
-            error_response: self.error_response,
-            method,
-            origin,
-            rpc_key_id,
-        }
-    }
-}
-
-pub struct ProxyResponseHistograms {
-    request_bytes: Histogram<u64>,
-    response_bytes: Histogram<u64>,
-    response_millis: Histogram<u64>,
-}
-
-impl Default for ProxyResponseHistograms {
-    fn default() -> Self {
-        // TODO: how many significant figures?
-        let request_bytes = Histogram::new(5).expect("creating request_bytes histogram");
-        let response_bytes = Histogram::new(5).expect("creating response_bytes histogram");
-        let response_millis = Histogram::new(5).expect("creating response_millis histogram");
-
-        Self {
-            request_bytes,
-            response_bytes,
-            response_millis,
-        }
-    }
-}
-
-// TODO: think more about if we should include IP address in this
-#[derive(Clone, From, Hash, PartialEq, Eq)]
-struct ProxyResponseAggregateKey {
-    archive_request: bool,
-    error_response: bool,
-    rpc_key_id: Option<NonZeroU64>,
-    method: Option<String>,
-    /// TODO: should this be Origin or String?
-    origin: Option<Origin>,
-}
-
-#[derive(Default)]
-pub struct ProxyResponseAggregate {
-    frontend_requests: u64,
-    backend_requests: u64,
-    // TODO: related to backend_requests
-    // backend_retries: u64,
-    // TODO: related to backend_requests
-    // no_servers: u64,
-    cache_misses: u64,
-    cache_hits: u64,
-    sum_request_bytes: u64,
-    sum_response_bytes: u64,
-    sum_response_millis: u64,
-    histograms: ProxyResponseHistograms,
-}
-
-/// A stat that we aggregate and then store in a database.
-/// For now there is just one, but I think there might be others later
-#[derive(Debug, From)]
-pub enum Web3ProxyStat {
-    Response(ProxyResponseStat),
-}
-
-#[derive(From)]
-pub struct StatEmitterSpawn {
-    pub stat_sender: flume::Sender<Web3ProxyStat>,
-    /// these handles are important and must be allowed to finish
-    pub background_handle: JoinHandle<anyhow::Result<()>>,
-}
-
-pub struct StatEmitter {
-    chain_id: u64,
-    db_conn: DatabaseConnection,
-    period_seconds: u64,
-}
-
-// TODO: impl `+=<ProxyResponseStat>` for ProxyResponseAggregate?
-impl ProxyResponseAggregate {
-    fn add(&mut self, stat: ProxyResponseStat) -> Result<(), RecordError> {
-        // a stat always come from just 1 frontend request
-        self.frontend_requests += 1;
-
-        if stat.backend_requests == 0 {
-            // no backend request. cache hit!
-            self.cache_hits += 1;
-        } else {
-            // backend requests! cache miss!
-            self.cache_misses += 1;
-
-            // a stat might have multiple backend requests
-            self.backend_requests += stat.backend_requests;
-        }
-
-        self.sum_request_bytes += stat.request_bytes;
-        self.sum_response_bytes += stat.response_bytes;
-        self.sum_response_millis += stat.response_millis;
-
-        // TODO: use `record_correct`?
-        self.histograms.request_bytes.record(stat.request_bytes)?;
-        self.histograms
-            .response_millis
-            .record(stat.response_millis)?;
-        self.histograms.response_bytes.record(stat.response_bytes)?;
-
-        Ok(())
-    }
-
-    // TODO? help to turn this plus the key into a database model?
-    // TODO: take a db transaction instead so that we can batch
-    async fn save(
-        self,
-        chain_id: u64,
-        db_conn: &DatabaseConnection,
-        key: ProxyResponseAggregateKey,
-        period_timestamp: u64,
-    ) -> Result<(), DbErr> {
-        // this is a lot of variables
-        let period_datetime = Utc.timestamp_opt(period_timestamp as i64, 0).unwrap();
-
-        let request_bytes = &self.histograms.request_bytes;
-
-        let min_request_bytes = request_bytes.min();
-        let mean_request_bytes = request_bytes.mean();
-        let p50_request_bytes = request_bytes.value_at_quantile(0.50);
-        let p90_request_bytes = request_bytes.value_at_quantile(0.90);
-        let p99_request_bytes = request_bytes.value_at_quantile(0.99);
-        let max_request_bytes = request_bytes.max();
-
-        let response_millis = &self.histograms.response_millis;
-
-        let min_response_millis = response_millis.min();
-        let mean_response_millis = response_millis.mean();
-        let p50_response_millis = response_millis.value_at_quantile(0.50);
-        let p90_response_millis = response_millis.value_at_quantile(0.90);
-        let p99_response_millis = response_millis.value_at_quantile(0.99);
-        let max_response_millis = response_millis.max();
-
-        let response_bytes = &self.histograms.response_bytes;
-
-        let min_response_bytes = response_bytes.min();
-        let mean_response_bytes = response_bytes.mean();
-        let p50_response_bytes = response_bytes.value_at_quantile(0.50);
-        let p90_response_bytes = response_bytes.value_at_quantile(0.90);
-        let p99_response_bytes = response_bytes.value_at_quantile(0.99);
-        let max_response_bytes = response_bytes.max();
-
-        // TODO: Set origin and maybe other things on this model. probably not the ip though
-        let aggregated_stat_model = rpc_accounting::ActiveModel {
-            id: sea_orm::NotSet,
-            // origin: sea_orm::Set(key.authorization.origin.to_string()),
-            rpc_key_id: sea_orm::Set(key.rpc_key_id.map(Into::into)),
-            origin: sea_orm::Set(key.origin.map(|x| x.to_string())),
-            chain_id: sea_orm::Set(chain_id),
-            method: sea_orm::Set(key.method),
-            archive_request: sea_orm::Set(key.archive_request),
-            error_response: sea_orm::Set(key.error_response),
-            period_datetime: sea_orm::Set(period_datetime),
-            frontend_requests: sea_orm::Set(self.frontend_requests),
-            backend_requests: sea_orm::Set(self.backend_requests),
-            // backend_retries: sea_orm::Set(self.backend_retries),
-            // no_servers: sea_orm::Set(self.no_servers),
-            cache_misses: sea_orm::Set(self.cache_misses),
-            cache_hits: sea_orm::Set(self.cache_hits),
-
-            sum_request_bytes: sea_orm::Set(self.sum_request_bytes),
-            min_request_bytes: sea_orm::Set(min_request_bytes),
-            mean_request_bytes: sea_orm::Set(mean_request_bytes),
-            p50_request_bytes: sea_orm::Set(p50_request_bytes),
-            p90_request_bytes: sea_orm::Set(p90_request_bytes),
-            p99_request_bytes: sea_orm::Set(p99_request_bytes),
-            max_request_bytes: sea_orm::Set(max_request_bytes),
-
-            sum_response_millis: sea_orm::Set(self.sum_response_millis),
-            min_response_millis: sea_orm::Set(min_response_millis),
-            mean_response_millis: sea_orm::Set(mean_response_millis),
-            p50_response_millis: sea_orm::Set(p50_response_millis),
-            p90_response_millis: sea_orm::Set(p90_response_millis),
-            p99_response_millis: sea_orm::Set(p99_response_millis),
-            max_response_millis: sea_orm::Set(max_response_millis),
-
-            sum_response_bytes: sea_orm::Set(self.sum_response_bytes),
-            min_response_bytes: sea_orm::Set(min_response_bytes),
-            mean_response_bytes: sea_orm::Set(mean_response_bytes),
-            p50_response_bytes: sea_orm::Set(p50_response_bytes),
-            p90_response_bytes: sea_orm::Set(p90_response_bytes),
-            p99_response_bytes: sea_orm::Set(p99_response_bytes),
-            max_response_bytes: sea_orm::Set(max_response_bytes),
-        };
-
-        aggregated_stat_model.save(db_conn).await?;
-
-        Ok(())
-    }
-}
-
-impl ProxyResponseStat {
-    pub fn new(
-        method: String,
-        authorization: Arc<Authorization>,
-        metadata: Arc<RequestMetadata>,
-        response_bytes: usize,
-    ) -> Self {
-        let archive_request = metadata.archive_request.load(Ordering::Acquire);
-        let backend_requests = metadata.backend_requests.lock().len() as u64;
-        // let period_seconds = metadata.period_seconds;
-        // let period_timestamp =
-        //     (metadata.start_datetime.timestamp() as u64) / period_seconds * period_seconds;
-        let request_bytes = metadata.request_bytes;
-        let error_response = metadata.error_response.load(Ordering::Acquire);
-
-        // TODO: timestamps could get confused by leap seconds. need tokio time instead
-        let response_millis = metadata.start_instant.elapsed().as_millis() as u64;
-
-        let response_bytes = response_bytes as u64;
-
-        Self {
-            authorization,
-            archive_request,
-            method,
-            backend_requests,
-            request_bytes,
-            error_response,
-            response_bytes,
-            response_millis,
-        }
-    }
-}
-
-impl StatEmitter {
-    pub fn spawn(
-        chain_id: u64,
-        db_conn: DatabaseConnection,
-        period_seconds: u64,
-        shutdown_receiver: broadcast::Receiver<()>,
-    ) -> anyhow::Result<StatEmitterSpawn> {
-        let (stat_sender, stat_receiver) = flume::unbounded();
-
-        let mut new = Self {
-            chain_id,
-            db_conn,
-            period_seconds,
-        };
-
-        // TODO: send any errors somewhere
-        let handle =
-            tokio::spawn(async move { new.stat_loop(stat_receiver, shutdown_receiver).await });
-
-        Ok((stat_sender, handle).into())
-    }
-
-    async fn stat_loop(
-        &mut self,
-        stat_receiver: flume::Receiver<Web3ProxyStat>,
-        mut shutdown_receiver: broadcast::Receiver<()>,
-    ) -> anyhow::Result<()> {
-        let system_now = SystemTime::now();
-
-        let duration_since_epoch = system_now
-            .duration_since(SystemTime::UNIX_EPOCH)
-            .expect("time machines don't exist");
-
-        // TODO: change period_seconds from u64 to u32
-        let current_period = duration_since_epoch
-            .checked_div(self.period_seconds as u32)
-            .unwrap()
-            * self.period_seconds as u32;
-
-        let duration_to_next_period =
-            Duration::from_secs(self.period_seconds) - (duration_since_epoch - current_period);
-
-        // start the interval when the next period starts
-        let start_instant = Instant::now() + duration_to_next_period;
-        let mut interval = interval_at(start_instant, Duration::from_secs(self.period_seconds));
-
-        // loop between different futures to update these mutables
-        let mut period_timestamp = current_period.as_secs();
-        let mut response_aggregate_map =
-            HashMap::<ProxyResponseAggregateKey, ProxyResponseAggregate>::new();
-
-        loop {
-            tokio::select! {
-                stat = stat_receiver.recv_async() => {
-                    match stat? {
-                        Web3ProxyStat::Response(stat) => {
-                            let key = stat.key();
-
-                            // TODO: does hashmap have get_or_insert?
-                            if ! response_aggregate_map.contains_key(&key) {
-                                response_aggregate_map.insert(key.clone(), Default::default());
-                            };
-
-                            if let Some(value) = response_aggregate_map.get_mut(&key) {
-                                if let Err(err) = value.add(stat) {
-                                    error!( "unable to aggregate stats! err={:?}", err);
-                                };
-                            } else {
-                                unimplemented!();
-                            }
-                        }
-                    }
-                }
-                _ = interval.tick() => {
-                    // save all the aggregated stats
-                    // TODO: batch these saves
-                    for (key, aggregate) in response_aggregate_map.drain() {
-                        if let Err(err) = aggregate.save(self.chain_id, &self.db_conn, key, period_timestamp).await {
-                            error!("Unable to save stat while shutting down! {:?}", err);
-                        };
-                    }
-                    // advance to the next period
-                    // TODO: is this safe? what if there is drift?
-                    period_timestamp += self.period_seconds;
-                }
-                x = shutdown_receiver.recv() => {
-                    match x {
-                        Ok(_) => {
-                            info!("aggregate stat_loop shutting down");
-                            // TODO: call aggregate_stat for all the
-                        },
-                        Err(err) => error!("shutdown receiver. err={:?}", err),
-                    }
-                    break;
-                }
-            }
-        }
-
-        info!("saving {} pending stats", response_aggregate_map.len());
-
-        for (key, aggregate) in response_aggregate_map.drain() {
-            if let Err(err) = aggregate
-                .save(self.chain_id, &self.db_conn, key, period_timestamp)
-                .await
-            {
-                error!("Unable to save stat while shutting down! err={:?}", err);
-            };
-        }
-
-        info!("aggregated stat_loop shut down");
-
-        Ok(())
-    }
-}
--- a/web3_proxy/src/bin/web3_proxy_cli/main.rs
+++ b/web3_proxy/src/bin/web3_proxy_cli/main.rs
@ -250,6 +250,9 @@ fn main() -> anyhow::Result<()> {
    }

    // set up tokio's async runtime
+    #[cfg(tokio_uring)]
+    let mut rt_builder = tokio_uring::Builder::new_multi_thread();
+    #[cfg(not(tokio_uring))]
    let mut rt_builder = runtime::Builder::new_multi_thread();

    rt_builder.enable_all();
--- a/web3_proxy/src/bin/web3_proxy_cli/proxyd.rs
+++ b/web3_proxy/src/bin/web3_proxy_cli/proxyd.rs
@ -1,7 +1,7 @@
 #![forbid(unsafe_code)]
 use argh::FromArgs;
 use futures::StreamExt;
-use log::{error, info, warn};
+use log::{error, info, trace, warn};
 use num::Zero;
 use std::path::PathBuf;
 use std::time::Duration;
@ -9,7 +9,7 @@ use std::{fs, thread};
 use tokio::sync::broadcast;
 use web3_proxy::app::{flatten_handle, flatten_handles, Web3ProxyApp};
 use web3_proxy::config::TopConfig;
-use web3_proxy::{frontend, metrics_frontend};
+use web3_proxy::{frontend, prometheus};

 /// start the main proxy daemon
 #[derive(FromArgs, PartialEq, Debug, Eq)]
@ -33,7 +33,6 @@ impl ProxydSubCommand {
        num_workers: usize,
    ) -> anyhow::Result<()> {
        let (shutdown_sender, _) = broadcast::channel(1);
-
        // TODO: i think there is a small race. if config_path changes

        run(
@ -54,7 +53,7 @@ async fn run(
    frontend_port: u16,
    prometheus_port: u16,
    num_workers: usize,
-    shutdown_sender: broadcast::Sender<()>,
+    frontend_shutdown_sender: broadcast::Sender<()>,
 ) -> anyhow::Result<()> {
    // tokio has code for catching ctrl+c so we use that
    // this shutdown sender is currently only used in tests, but we might make a /shutdown endpoint or something
@ -62,115 +61,106 @@ async fn run(

    let app_frontend_port = frontend_port;
    let app_prometheus_port = prometheus_port;
-    let mut shutdown_receiver = shutdown_sender.subscribe();
+
+    // TODO: should we use a watch or broadcast for these?
+    let (app_shutdown_sender, _app_shutdown_receiver) = broadcast::channel(1);
+
+    let frontend_shutdown_receiver = frontend_shutdown_sender.subscribe();
+    let prometheus_shutdown_receiver = app_shutdown_sender.subscribe();
+
+    // TODO: should we use a watch or broadcast for these?
+    let (frontend_shutdown_complete_sender, mut frontend_shutdown_complete_receiver) =
+        broadcast::channel(1);

    // start the main app
-    let mut spawned_app =
-        Web3ProxyApp::spawn(top_config.clone(), num_workers, shutdown_sender.subscribe()).await?;
+    let mut spawned_app = Web3ProxyApp::spawn(top_config, num_workers, app_shutdown_sender.clone()).await?;

    // start thread for watching config
-    if let Some(top_config_path) = top_config_path {
-        let config_sender = spawned_app.new_top_config_sender;
-        /*
-        #[cfg(feature = "inotify")]
-        {
-            let mut inotify = Inotify::init().expect("Failed to initialize inotify");
-
-            inotify
-                .add_watch(top_config_path.clone(), WatchMask::MODIFY)
-                .expect("Failed to add inotify watch on config");
-
-            let mut buffer = [0u8; 4096];
-
-            // TODO: exit the app if this handle exits
-            thread::spawn(move || loop {
-                // TODO: debounce
-
-                let events = inotify
-                    .read_events_blocking(&mut buffer)
-                    .expect("Failed to read inotify events");
-
-                for event in events {
-                    if event.mask.contains(EventMask::MODIFY) {
-                        info!("config changed");
-                        match fs::read_to_string(&top_config_path) {
-                            Ok(top_config) => match toml::from_str(&top_config) {
-                                Ok(top_config) => {
-                                    config_sender.send(top_config).unwrap();
-                                }
-                                Err(err) => {
-                                    // TODO: panic?
-                                    error!("Unable to parse config! {:#?}", err);
-                                }
-                            },
-                            Err(err) => {
-                                // TODO: panic?
-                                error!("Unable to read config! {:#?}", err);
-                            }
-                        };
-                    } else {
-                        // TODO: is "MODIFY" enough, or do we want CLOSE_WRITE?
-                        unimplemented!();
-                    }
-                }
-            });
-        }
-        */
-        // #[cfg(not(feature = "inotify"))]
-        {
-            thread::spawn(move || loop {
-                match fs::read_to_string(&top_config_path) {
-                    Ok(new_top_config) => match toml::from_str(&new_top_config) {
-                        Ok(new_top_config) => {
-                            if new_top_config != top_config {
-                                top_config = new_top_config;
-                                config_sender.send(top_config.clone()).unwrap();
-                            }
-                        }
-                        Err(err) => {
-                            // TODO: panic?
-                            error!("Unable to parse config! {:#?}", err);
-                        }
-                    },
-                    Err(err) => {
-                        // TODO: panic?
-                        error!("Unable to read config! {:#?}", err);
-                    }
-                }
-
-                thread::sleep(Duration::from_secs(10));
-            });
-        }
-    }
+    // if let Some(top_config_path) = top_config_path {
+    //     let config_sender = spawned_app.new_top_config_sender;
+    //     {
+    //         thread::spawn(move || loop {
+    //             match fs::read_to_string(&top_config_path) {
+    //                 Ok(new_top_config) => match toml::from_str(&new_top_config) {
+    //                     Ok(new_top_config) => {
+    //                         if new_top_config != top_config {
+    //                             top_config = new_top_config;
+    //                             config_sender.send(top_config.clone()).unwrap();
+    //                         }
+    //                     }
+    //                     Err(err) => {
+    //                         // TODO: panic?
+    //                         error!("Unable to parse config! {:#?}", err);
+    //                     }
+    //                 },
+    //                 Err(err) => {
+    //                     // TODO: panic?
+    //                     error!("Unable to read config! {:#?}", err);
+    //                 }
+    //             }
+    //
+    //             thread::sleep(Duration::from_secs(10));
+    //         });
+    //     }
+    // }

    // start the prometheus metrics port
-    let prometheus_handle = tokio::spawn(metrics_frontend::serve(
+    let prometheus_handle = tokio::spawn(prometheus::serve(
        spawned_app.app.clone(),
        app_prometheus_port,
+        prometheus_shutdown_receiver,
    ));

    // wait until the app has seen its first consensus head block
-    // TODO: if backups were included, wait a little longer?
-    let _ = spawned_app.app.head_block_receiver().changed().await;
+    // if backups were included, wait a little longer
+    for _ in 0..3 {
+        let _ = spawned_app.consensus_connections_watcher.changed().await;
+
+        let consensus = spawned_app
+            .consensus_connections_watcher
+            .borrow_and_update();
+
+        if *consensus.context("Channel closed!")?.backups_needed {
+            info!(
+                "waiting longer. found consensus with backups: {}",
+                *consensus.context("Channel closed!")?.head_block.as_ref().unwrap(),
+            );
+        } else {
+            // TODO: also check that we have at least one archive node connected?
+            break;
+        }
+    }

    // start the frontend port
-    let frontend_handle = tokio::spawn(frontend::serve(app_frontend_port, spawned_app.app.clone()));
+    let frontend_handle = tokio::spawn(frontend::serve(
+        app_frontend_port,
+        spawned_app.app.clone(),
+        frontend_shutdown_receiver,
+        frontend_shutdown_complete_sender,
+    ));
+
+    let frontend_handle = flatten_handle(frontend_handle);

    // if everything is working, these should all run forever
+    let mut exited_with_err = false;
+    let mut frontend_exited = false;
    tokio::select! {
        x = flatten_handles(spawned_app.app_handles) => {
            match x {
                Ok(_) => info!("app_handle exited"),
                Err(e) => {
-                    return Err(e);
+                    error!("app_handle exited: {:#?}", e);
+                    exited_with_err = true;
                }
            }
        }
-        x = flatten_handle(frontend_handle) => {
+        x = frontend_handle => {
+            frontend_exited = true;
            match x {
                Ok(_) => info!("frontend exited"),
                Err(e) => {
-                    return Err(e);
+                    error!("frontend exited: {:#?}", e);
+                    exited_with_err = true;
                }
            }
        }
@ -178,35 +168,62 @@ async fn run(
            match x {
                Ok(_) => info!("prometheus exited"),
                Err(e) => {
-                    return Err(e);
+                    error!("prometheus exited: {:#?}", e);
+                    exited_with_err = true;
                }
            }
        }
        x = tokio::signal::ctrl_c() => {
+            // TODO: unix terminate signal, too
            match x {
                Ok(_) => info!("quiting from ctrl-c"),
                Err(e) => {
-                    return Err(e.into());
+                    // TODO: i don't think this is possible
+                    error!("error quiting from ctrl-c: {:#?}", e);
+                    exited_with_err = true;
                }
            }
        }
-        x = shutdown_receiver.recv() => {
+        // TODO: how can we properly watch background handles here? this returns None immediatly and the app exits. i think the bug is somewhere else though
+        x = spawned_app.background_handles.next() => {
            match x {
-                Ok(_) => info!("quiting from shutdown receiver"),
-                Err(e) => {
-                    return Err(e.into());
+                Some(Ok(_)) => info!("quiting from background handles"),
+                Some(Err(e)) => {
+                    error!("quiting from background handle error: {:#?}", e);
+                    exited_with_err = true;
+                }
+                None => {
+                    // TODO: is this an error?
+                    warn!("background handles exited");
                }
            }
        }
    };

-    // one of the handles stopped. send a value so the others know to shut down
-    if let Err(err) = shutdown_sender.send(()) {
-        warn!("shutdown sender err={:?}", err);
+    // if a future above completed, make sure the frontend knows to start turning off
+    if !frontend_exited {
+        if let Err(err) = frontend_shutdown_sender.send(()) {
+            // TODO: this is actually expected if the frontend is already shut down
+            warn!("shutdown sender err={:?}", err);
+        };
+    }
+
+    // TODO: wait until the frontend completes
+    if let Err(err) = frontend_shutdown_complete_receiver.recv().await {
+        warn!("shutdown completition err={:?}", err);
+    } else {
+        info!("frontend exited gracefully");
+    }
+
+    // now that the frontend is complete, tell all the other futures to finish
+    if let Err(err) = app_shutdown_sender.send(()) {
+        warn!("backend sender err={:?}", err);
    };

-    // wait for things like saving stats to the database to complete
-    info!("waiting on important background tasks");
+    info!(
+        "waiting on {} important background tasks",
+        spawned_app.background_handles.len()
+    );
    let mut background_errors = 0;
    while let Some(x) = spawned_app.background_handles.next().await {
        match x {
@ -218,15 +235,19 @@ async fn run(
                error!("{:?}", e);
                background_errors += 1;
            }
-            Ok(Ok(_)) => continue,
+            Ok(Ok(_)) => {
+                // TODO: how can we know which handle exited?
+                trace!("a background handle exited");
+                continue;
+            }
        }
    }

-    if background_errors.is_zero() {
+    if background_errors.is_zero() && !exited_with_err {
        info!("finished");
        Ok(())
    } else {
-        // TODO: collect instead?
+        // TODO: collect all the errors here instead?
        Err(anyhow::anyhow!("finished with errors!"))
    }
 }
@ -319,15 +340,14 @@ mod tests {
            extra: Default::default(),
        };

-        let (shutdown_sender, _) = broadcast::channel(1);
+        let (shutdown_sender, _shutdown_receiver) = broadcast::channel(1);

        // spawn another thread for running the app
        // TODO: allow launching into the local tokio runtime instead of creating a new one?
        let handle = {
-            let shutdown_sender = shutdown_sender.clone();
-
            let frontend_port = 0;
            let prometheus_port = 0;
+            let shutdown_sender = shutdown_sender.clone();

            tokio::spawn(async move {
                run(
--- a/web3_proxy/src/bin/web3_proxy_cli/user_export.rs
+++ b/web3_proxy/src/bin/web3_proxy_cli/user_export.rs
@ -4,7 +4,6 @@ use log::info;
 use migration::sea_orm::{DatabaseConnection, EntityTrait, PaginatorTrait};
 use std::fs::{self, create_dir_all};
 use std::path::Path;
-use std::time::{SystemTime, UNIX_EPOCH};

 #[derive(FromArgs, PartialEq, Eq, Debug)]
 /// Export users from the database.
@ -21,7 +20,7 @@ impl UserExportSubCommand {
        // create the output dir if it does not exist
        create_dir_all(&self.output_dir)?;

-        let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs();
+        let now = chrono::Utc::now().timestamp();

        let export_dir = Path::new(&self.output_dir);

--- a/web3_proxy/src/config.rs
+++ b/web3_proxy/src/config.rs
@ -145,7 +145,7 @@ pub struct AppConfig {
    /// None = allow all requests
    pub public_requests_per_period: Option<u64>,

-    /// Salt for hashing recent ips
+    /// Salt for hashing recent ips. Not a perfect way to introduce privacy, but better than nothing
    pub public_recent_ips_salt: Option<String>,

    /// RPC responses are cached locally
@ -169,6 +169,15 @@ pub struct AppConfig {
    /// If none, the minimum * 2 is used
    pub volatile_redis_max_connections: Option<usize>,

+    /// influxdb host for stats
+    pub influxdb_host: Option<String>,
+
+    /// influxdb org for stats
+    pub influxdb_org: Option<String>,
+
+    /// influxdb token for stats
+    pub influxdb_token: Option<String>,
+
    /// unknown config options get put here
    #[serde(flatten, default = "HashMap::default")]
    pub extra: HashMap<String, serde_json::Value>,
--- a/web3_proxy/src/frontend/authorization.rs
+++ b/web3_proxy/src/frontend/authorization.rs
@ -10,6 +10,7 @@ use axum::headers::authorization::Bearer;
 use axum::headers::{Header, Origin, Referer, UserAgent};
 use chrono::Utc;
 use deferred_rate_limiter::DeferredRateLimitResult;
+use entities::sea_orm_active_enums::TrackingLevel;
 use entities::{login, rpc_key, user, user_tier};
 use ethers::types::Bytes;
 use ethers::utils::keccak256;
@ -72,10 +73,7 @@ pub struct Authorization {

 #[derive(Debug)]
 pub struct RequestMetadata {
-    pub start_datetime: chrono::DateTime<Utc>,
    pub start_instant: tokio::time::Instant,
-    // TODO: better name for this
-    pub period_seconds: u64,
    pub request_bytes: u64,
    // TODO: do we need atomics? seems like we should be able to pass a &mut around
    // TODO: "archive" isn't really a boolean.
@ -90,14 +88,12 @@ pub struct RequestMetadata {
 }

 impl RequestMetadata {
-    pub fn new(period_seconds: u64, request_bytes: usize) -> anyhow::Result<Self> {
+    pub fn new(request_bytes: usize) -> anyhow::Result<Self> {
        // TODO: how can we do this without turning it into a string first. this is going to slow us down!
        let request_bytes = request_bytes as u64;

        let new = Self {
            start_instant: Instant::now(),
-            start_datetime: Utc::now(),
-            period_seconds,
            request_bytes,
            archive_request: false.into(),
            backend_requests: Default::default(),
@ -183,6 +179,7 @@ impl Authorization {
        let authorization_checks = AuthorizationChecks {
            // any error logs on a local (internal) query are likely problems. log them all
            log_revert_chance: 1.0,
+            tracking_level: TrackingLevel::Detailed,
            // default for everything else should be fine. we don't have a user_id or ip to give
            ..Default::default()
        };
@ -220,10 +217,10 @@ impl Authorization {
            })
            .unwrap_or_default();

-        // TODO: default or None?
        let authorization_checks = AuthorizationChecks {
            max_requests_per_period,
            proxy_mode,
+            tracking_level: TrackingLevel::Detailed,
            ..Default::default()
        };

@ -616,7 +613,7 @@ impl Web3ProxyApp {
        proxy_mode: ProxyMode,
    ) -> anyhow::Result<RateLimitResult> {
        // ip rate limits don't check referer or user agent
-        // the do check
+        // the do check origin because we can override rate limits for some origins
        let authorization = Authorization::external(
            allowed_origin_requests_per_period,
            self.db_conn.clone(),
@ -766,7 +763,7 @@ impl Web3ProxyApp {
                            allowed_origins,
                            allowed_referers,
                            allowed_user_agents,
-                            log_level: rpc_key_model.log_level,
+                            tracking_level: rpc_key_model.log_level,
                            log_revert_chance: rpc_key_model.log_revert_chance,
                            max_concurrent_requests: user_tier_model.max_concurrent_requests,
                            max_requests_per_period: user_tier_model.max_requests_per_period,
--- a/web3_proxy/src/frontend/mod.rs
+++ b/web3_proxy/src/frontend/mod.rs
@ -1,4 +1,4 @@
-//! `frontend` contains HTTP and websocket endpoints for use by users and admins.
+//! `frontend` contains HTTP and websocket endpoints for use by a website or web3 wallet.
 //!
 //! Important reading about axum extractors: https://docs.rs/axum/latest/axum/extract/index.html#the-order-of-extractors

@ -22,28 +22,34 @@ use moka::future::Cache;
 use std::net::SocketAddr;
 use std::sync::Arc;
 use std::{iter::once, time::Duration};
+use tokio::sync::broadcast;
 use tower_http::cors::CorsLayer;
 use tower_http::sensitive_headers::SetSensitiveRequestHeadersLayer;

+/// simple keys for caching responses
 #[derive(Clone, Hash, PartialEq, Eq)]
 pub enum FrontendResponseCaches {
    Status,
 }

-// TODO: what should this cache's value be?
-pub type FrontendResponseCache =
+pub type FrontendJsonResponseCache =
    Cache<FrontendResponseCaches, Arc<serde_json::Value>, hashbrown::hash_map::DefaultHashBuilder>;
 pub type FrontendHealthCache = Cache<(), bool, hashbrown::hash_map::DefaultHashBuilder>;

 /// Start the frontend server.
-pub async fn serve(port: u16, proxy_app: Arc<Web3ProxyApp>) -> anyhow::Result<()> {
+pub async fn serve(
+    port: u16,
+    proxy_app: Arc<Web3ProxyApp>,
+    mut shutdown_receiver: broadcast::Receiver<()>,
+    shutdown_complete_sender: broadcast::Sender<()>,
+) -> anyhow::Result<()> {
    // setup caches for whatever the frontend needs
-    // TODO: a moka cache is probably way overkill for this.
-    // no need for max items. only expire because of time to live
-    let response_cache: FrontendResponseCache = Cache::builder()
+    // no need for max items since it is limited by the enum key
+    let json_response_cache: FrontendJsonResponseCache = Cache::builder()
        .time_to_live(Duration::from_secs(2))
        .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default());

+    // /health gets a cache with a shorter lifetime
    let health_cache: FrontendHealthCache = Cache::builder()
        .time_to_live(Duration::from_millis(100))
        .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default());
@ -208,7 +214,7 @@ pub async fn serve(port: u16, proxy_app: Arc<Web3ProxyApp>) -> anyhow::Result<()
        // application state
        .layer(Extension(proxy_app.clone()))
        // frontend caches
-        .layer(Extension(response_cache))
+        .layer(Extension(json_response_cache))
        .layer(Extension(health_cache))
        // 404 for any unknown routes
        .fallback(errors::handler_404);
@ -229,9 +235,16 @@ pub async fn serve(port: u16, proxy_app: Arc<Web3ProxyApp>) -> anyhow::Result<()
    let service = app.into_make_service_with_connect_info::<SocketAddr>();

    // `axum::Server` is a re-export of `hyper::Server`
-    axum::Server::bind(&addr)
+    let server = axum::Server::bind(&addr)
        // TODO: option to use with_connect_info. we want it in dev, but not when running behind a proxy, but not
        .serve(service)
+        .with_graceful_shutdown(async move {
+            let _ = shutdown_receiver.recv().await;
+        })
        .await
-        .map_err(Into::into)
+        .map_err(Into::into);
+
+    let _ = shutdown_complete_sender.send(());
+
+    server
 }
--- a/web3_proxy/src/frontend/rpc_proxy_ws.rs
+++ b/web3_proxy/src/frontend/rpc_proxy_ws.rs
@ -4,8 +4,7 @@

 use super::authorization::{ip_is_authorized, key_is_authorized, Authorization, RequestMetadata};
 use super::errors::{FrontendErrorResponse, FrontendResult};
-use crate::app::REQUEST_PERIOD;
-use crate::app_stats::ProxyResponseStat;
+use crate::stats::RpcQueryStats;
 use crate::{
    app::Web3ProxyApp,
    jsonrpc::{JsonRpcForwardedResponse, JsonRpcForwardedResponseEnum, JsonRpcRequest},
@ -379,8 +378,7 @@ async fn handle_socket_payload(
                    // TODO: move this logic into the app?
                    let request_bytes = json_request.num_bytes();

-                    let request_metadata =
-                        Arc::new(RequestMetadata::new(REQUEST_PERIOD, request_bytes).unwrap());
+                    let request_metadata = Arc::new(RequestMetadata::new(request_bytes).unwrap());

                    let subscription_id = json_request.params.unwrap().to_string();

@ -401,7 +399,7 @@ async fn handle_socket_payload(
                        JsonRpcForwardedResponse::from_value(json!(partial_response), id.clone());

                    if let Some(stat_sender) = app.stat_sender.as_ref() {
-                        let response_stat = ProxyResponseStat::new(
+                        let response_stat = RpcQueryStats::new(
                            json_request.method.clone(),
                            authorization.clone(),
                            request_metadata,
--- a/web3_proxy/src/frontend/status.rs
+++ b/web3_proxy/src/frontend/status.rs
@ -3,7 +3,7 @@
 //! For ease of development, users can currently access these endponts.
 //! They will eventually move to another port.

-use super::{FrontendHealthCache, FrontendResponseCache, FrontendResponseCaches};
+use super::{FrontendHealthCache, FrontendJsonResponseCache, FrontendResponseCaches};
 use crate::app::{Web3ProxyApp, APP_USER_AGENT};
 use axum::{http::StatusCode, response::IntoResponse, Extension, Json};
 use axum_macros::debug_handler;
@ -33,7 +33,7 @@ pub async fn health(
 #[debug_handler]
 pub async fn status(
    Extension(app): Extension<Arc<Web3ProxyApp>>,
-    Extension(response_cache): Extension<FrontendResponseCache>,
+    Extension(response_cache): Extension<FrontendJsonResponseCache>,
 ) -> impl IntoResponse {
    let body = response_cache
        .get_with(FrontendResponseCaches::Status, async {
--- a/web3_proxy/src/frontend/users.rs
+++ b/web3_proxy/src/frontend/users.rs
@ -2,10 +2,11 @@
 use super::authorization::{login_is_authorized, RpcSecretKey};
 use super::errors::FrontendResult;
 use crate::app::Web3ProxyApp;
-use crate::user_queries::get_page_from_params;
-use crate::user_queries::{
-    get_chain_id_from_params, get_query_start_from_params, query_user_stats, StatResponse,
+use crate::http_params::{
+    get_chain_id_from_params, get_page_from_params, get_query_start_from_params,
 };
+use crate::stats::db_queries::query_user_stats;
+use crate::stats::StatType;
 use crate::user_token::UserBearerToken;
 use crate::{PostLogin, PostLoginQuery};
 use anyhow::Context;
@ -19,7 +20,7 @@ use axum::{
 use axum_client_ip::InsecureClientIp;
 use axum_macros::debug_handler;
 use chrono::{TimeZone, Utc};
-use entities::sea_orm_active_enums::LogLevel;
+use entities::sea_orm_active_enums::TrackingLevel;
 use entities::{login, pending_login, revert_log, rpc_key, user};
 use ethers::{prelude::Address, types::Bytes};
 use hashbrown::HashMap;
@ -489,9 +490,7 @@ pub async fn user_balance_get(
 ///
 /// We will subscribe to events to watch for any user deposits, but sometimes events can be missed.
 ///
-/// TODO: rate limit by user
-/// TODO: one key per request? maybe /user/balance/:rpc_key?
-/// TODO: this will change as we add better support for secondary users.
+/// TODO: change this. just have a /tx/:txhash that is open to anyone. rate limit like we rate limit /login
 #[debug_handler]
 pub async fn user_balance_post(
    Extension(app): Extension<Arc<Web3ProxyApp>>,
@ -503,8 +502,6 @@ pub async fn user_balance_post(
 }

 /// `GET /user/keys` -- Use a bearer token to get the user's api keys and their settings.
-///
-/// TODO: one key per request? maybe /user/keys/:rpc_key?
 #[debug_handler]
 pub async fn rpc_keys_get(
    Extension(app): Extension<Arc<Web3ProxyApp>>,
@ -514,7 +511,7 @@ pub async fn rpc_keys_get(

    let db_replica = app
        .db_replica()
-        .context("getting db to fetch user's keys")?;
+        .context("db_replica is required to fetch a user's keys")?;

    let uks = rpc_key::Entity::find()
        .filter(rpc_key::Column::UserId.eq(user.id))
@ -522,7 +519,6 @@ pub async fn rpc_keys_get(
        .await
        .context("failed loading user's key")?;

-    // TODO: stricter type on this?
    let response_json = json!({
        "user_id": user.id,
        "user_rpc_keys": uks
@ -560,7 +556,7 @@ pub struct UserKeyManagement {
    allowed_referers: Option<String>,
    allowed_user_agents: Option<String>,
    description: Option<String>,
-    log_level: Option<LogLevel>,
+    log_level: Option<TrackingLevel>,
    // TODO: enable log_revert_trace: Option<f64>,
    private_txs: Option<bool>,
 }
@ -813,7 +809,7 @@ pub async fn user_stats_aggregated_get(
    bearer: Option<TypedHeader<Authorization<Bearer>>>,
    Query(params): Query<HashMap<String, String>>,
 ) -> FrontendResult {
-    let response = query_user_stats(&app, bearer, &params, StatResponse::Aggregated).await?;
+    let response = query_user_stats(&app, bearer, &params, StatType::Aggregated).await?;

    Ok(response)
 }
@ -833,7 +829,7 @@ pub async fn user_stats_detailed_get(
    bearer: Option<TypedHeader<Authorization<Bearer>>>,
    Query(params): Query<HashMap<String, String>>,
 ) -> FrontendResult {
-    let response = query_user_stats(&app, bearer, &params, StatResponse::Detailed).await?;
+    let response = query_user_stats(&app, bearer, &params, StatType::Detailed).await?;

    Ok(response)
 }
--- a/web3_proxy/src/http_params.rs
+++ b/web3_proxy/src/http_params.rs
@ -0,0 +1,206 @@
+use crate::app::DatabaseReplica;
+use crate::frontend::errors::FrontendErrorResponse;
+use crate::{app::Web3ProxyApp, user_token::UserBearerToken};
+use anyhow::Context;
+use axum::{
+    headers::{authorization::Bearer, Authorization},
+    TypedHeader,
+};
+use chrono::{NaiveDateTime, Utc};
+use entities::login;
+use hashbrown::HashMap;
+use log::{debug, trace, warn};
+use migration::sea_orm::{ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter};
+use redis_rate_limiter::{redis::AsyncCommands, RedisConnection};
+
+/// get the attached address for the given bearer token.
+/// First checks redis. Then checks the database.
+/// 0 means all users.
+/// This authenticates that the bearer is allowed to view this user_id's stats
+pub async fn get_user_id_from_params(
+    redis_conn: &mut RedisConnection,
+    db_conn: &DatabaseConnection,
+    db_replica: &DatabaseReplica,
+    // this is a long type. should we strip it down?
+    bearer: Option<TypedHeader<Authorization<Bearer>>>,
+    params: &HashMap<String, String>,
+) -> Result<u64, FrontendErrorResponse> {
+    match (bearer, params.get("user_id")) {
+        (Some(TypedHeader(Authorization(bearer))), Some(user_id)) => {
+            // check for the bearer cache key
+            let user_bearer_token = UserBearerToken::try_from(bearer)?;
+
+            let user_redis_key = user_bearer_token.redis_key();
+
+            let mut save_to_redis = false;
+
+            // get the user id that is attached to this bearer token
+            let bearer_user_id = match redis_conn.get::<_, u64>(&user_redis_key).await {
+                Err(_) => {
+                    // TODO: inspect the redis error? if redis is down we should warn
+                    // this also means redis being down will not kill our app. Everything will need a db read query though.
+
+                    let user_login = login::Entity::find()
+                        .filter(login::Column::BearerToken.eq(user_bearer_token.uuid()))
+                        .one(db_replica.conn())
+                        .await
+                        .context("database error while querying for user")?
+                        .ok_or(FrontendErrorResponse::AccessDenied)?;
+
+                    // if expired, delete ALL expired logins
+                    let now = Utc::now();
+                    if now > user_login.expires_at {
+                        // this row is expired! do not allow auth!
+                        // delete ALL expired logins.
+                        let delete_result = login::Entity::delete_many()
+                            .filter(login::Column::ExpiresAt.lte(now))
+                            .exec(db_conn)
+                            .await?;
+
+                        // TODO: emit a stat? if this is high something weird might be happening
+                        debug!("cleared expired logins: {:?}", delete_result);
+
+                        return Err(FrontendErrorResponse::AccessDenied);
+                    }
+
+                    save_to_redis = true;
+
+                    user_login.user_id
+                }
+                Ok(x) => {
+                    // TODO: push cache ttl further in the future?
+                    x
+                }
+            };
+
+            let user_id: u64 = user_id.parse().context("Parsing user_id param")?;
+
+            if bearer_user_id != user_id {
+                return Err(FrontendErrorResponse::AccessDenied);
+            }
+
+            if save_to_redis {
+                // TODO: how long? we store in database for 4 weeks
+                const ONE_DAY: usize = 60 * 60 * 24;
+
+                if let Err(err) = redis_conn
+                    .set_ex::<_, _, ()>(user_redis_key, user_id, ONE_DAY)
+                    .await
+                {
+                    warn!("Unable to save user bearer token to redis: {}", err)
+                }
+            }
+
+            Ok(bearer_user_id)
+        }
+        (_, None) => {
+            // they have a bearer token. we don't care about it on public pages
+            // 0 means all
+            Ok(0)
+        }
+        (None, Some(_)) => {
+            // they do not have a bearer token, but requested a specific id. block
+            // TODO: proper error code from a useful error code
+            // TODO: maybe instead of this sharp edged warn, we have a config value?
+            // TODO: check config for if we should deny or allow this
+            Err(FrontendErrorResponse::AccessDenied)
+            // // TODO: make this a flag
+            // warn!("allowing without auth during development!");
+            // Ok(x.parse()?)
+        }
+    }
+}
+
+/// only allow rpc_key to be set if user_id is also set.
+/// this will keep people from reading someone else's keys.
+/// 0 means none.
+
+pub fn get_rpc_key_id_from_params(
+    user_id: u64,
+    params: &HashMap<String, String>,
+) -> anyhow::Result<u64> {
+    if user_id > 0 {
+        params.get("rpc_key_id").map_or_else(
+            || Ok(0),
+            |c| {
+                let c = c.parse()?;
+
+                Ok(c)
+            },
+        )
+    } else {
+        Ok(0)
+    }
+}
+
+pub fn get_chain_id_from_params(
+    app: &Web3ProxyApp,
+    params: &HashMap<String, String>,
+) -> anyhow::Result<u64> {
+    params.get("chain_id").map_or_else(
+        || Ok(app.config.chain_id),
+        |c| {
+            let c = c.parse()?;
+
+            Ok(c)
+        },
+    )
+}
+
+pub fn get_page_from_params(params: &HashMap<String, String>) -> anyhow::Result<u64> {
+    params.get("page").map_or_else::<anyhow::Result<u64>, _, _>(
+        || {
+            // no page in params. set default
+            Ok(0)
+        },
+        |x: &String| {
+            // parse the given timestamp
+            // TODO: error code 401
+            let x = x.parse().context("parsing page query from params")?;
+
+            Ok(x)
+        },
+    )
+}
+
+// TODO: return chrono::Utc instead?
+pub fn get_query_start_from_params(
+    params: &HashMap<String, String>,
+) -> anyhow::Result<chrono::NaiveDateTime> {
+    params.get("query_start").map_or_else(
+        || {
+            // no timestamp in params. set default
+            let x = chrono::Utc::now() - chrono::Duration::days(30);
+
+            Ok(x.naive_utc())
+        },
+        |x: &String| {
+            // parse the given timestamp
+            let x = x.parse::<i64>().context("parsing timestamp query param")?;
+
+            // TODO: error code 401
+            let x =
+                NaiveDateTime::from_timestamp_opt(x, 0).context("parsing timestamp query param")?;
+
+            Ok(x)
+        },
+    )
+}
+
+pub fn get_query_window_seconds_from_params(
+    params: &HashMap<String, String>,
+) -> Result<u64, FrontendErrorResponse> {
+    params.get("query_window_seconds").map_or_else(
+        || {
+            // no page in params. set default
+            Ok(0)
+        },
+        |query_window_seconds: &String| {
+            // parse the given timestamp
+            query_window_seconds.parse::<u64>().map_err(|err| {
+                trace!("Unable to parse rpc_key_id: {:#?}", err);
+                FrontendErrorResponse::BadRequest("Unable to parse rpc_key_id".to_string())
+            })
+        },
+    )
+}
--- a/web3_proxy/src/jsonrpc.rs
+++ b/web3_proxy/src/jsonrpc.rs
@ -30,7 +30,8 @@ impl fmt::Debug for JsonRpcRequest {
        f.debug_struct("JsonRpcRequest")
            .field("id", &self.id)
            .field("method", &self.method)
-            .finish_non_exhaustive()
+            .field("params", &self.params)
+            .finish()
    }
 }

--- a/web3_proxy/src/lib.rs
+++ b/web3_proxy/src/lib.rs
@ -1,15 +1,15 @@
 pub mod app;
-pub mod app_stats;
 pub mod admin_queries;
 pub mod atomics;
 pub mod block_number;
 pub mod config;
 pub mod frontend;
+pub mod http_params;
 pub mod jsonrpc;
-pub mod metrics_frontend;
 pub mod pagerduty;
+pub mod prometheus;
 pub mod rpcs;
-pub mod user_queries;
+pub mod stats;
 pub mod user_token;

 use serde::Deserialize;
--- a/web3_proxy/src/metered/jsonrpc_error_count.rs
+++ b/web3_proxy/src/metered/jsonrpc_error_count.rs
@ -1,54 +0,0 @@
-//! A module providing the `JsonRpcErrorCount` metric.
-
-use ethers::providers::ProviderError;
-use serde::Serialize;
-use std::ops::Deref;
-
-/// A metric counting how many times an expression typed std `Result` as
-/// returned an `Err` variant.
-///
-/// This is a light-weight metric.
-///
-/// By default, `ErrorCount` uses a lock-free `u64` `Counter`, which makes sense
-/// in multithread scenarios. Non-threaded applications can gain performance by
-/// using a `std::cell:Cell<u64>` instead.
-#[derive(Clone, Default, Debug, Serialize)]
-pub struct JsonRpcErrorCount<C: Counter = AtomicInt<u64>>(pub C);
-
-impl<C: Counter, T> Metric<Result<T, ProviderError>> for JsonRpcErrorCount<C> {}
-
-impl<C: Counter> Enter for JsonRpcErrorCount<C> {
-    type E = ();
-    fn enter(&self) {}
-}
-
-impl<C: Counter, T> OnResult<Result<T, ProviderError>> for JsonRpcErrorCount<C> {
-    /// Unlike the default ErrorCount, this one does not increment for internal jsonrpc errors
-    /// TODO: count errors like this on another helper
-    fn on_result(&self, _: (), r: &Result<T, ProviderError>) -> Advice {
-        match r {
-            Ok(_) => {}
-            Err(ProviderError::JsonRpcClientError(_)) => {
-                self.0.incr();
-            }
-            Err(_) => {
-                // TODO: count jsonrpc errors
-            }
-        }
-        Advice::Return
-    }
-}
-
-impl<C: Counter> Clear for JsonRpcErrorCount<C> {
-    fn clear(&self) {
-        self.0.clear()
-    }
-}
-
-impl<C: Counter> Deref for JsonRpcErrorCount<C> {
-    type Target = C;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
--- a/web3_proxy/src/metered/mod.rs
+++ b/web3_proxy/src/metered/mod.rs
@ -1,5 +0,0 @@
-mod jsonrpc_error_count;
-mod provider_error_count;
-
-pub use self::jsonrpc_error_count::JsonRpcErrorCount;
-pub use self::provider_error_count::ProviderErrorCount;
--- a/web3_proxy/src/metered/provider_error_count.rs
+++ b/web3_proxy/src/metered/provider_error_count.rs
@ -1,51 +0,0 @@
-//! A module providing the `JsonRpcErrorCount` metric.
-
-use ethers::providers::ProviderError;
-use serde::Serialize;
-use std::ops::Deref;
-
-/// A metric counting how many times an expression typed std `Result` as
-/// returned an `Err` variant.
-///
-/// This is a light-weight metric.
-///
-/// By default, `ErrorCount` uses a lock-free `u64` `Counter`, which makes sense
-/// in multithread scenarios. Non-threaded applications can gain performance by
-/// using a `std::cell:Cell<u64>` instead.
-#[derive(Clone, Default, Debug, Serialize)]
-pub struct ProviderErrorCount<C: Counter = AtomicInt<u64>>(pub C);
-
-impl<C: Counter, T> Metric<Result<T, ProviderError>> for ProviderErrorCount<C> {}
-
-impl<C: Counter> Enter for ProviderErrorCount<C> {
-    type E = ();
-    fn enter(&self) {}
-}
-
-impl<C: Counter, T> OnResult<Result<T, ProviderError>> for ProviderErrorCount<C> {
-    /// Unlike the default ErrorCount, this one does not increment for internal jsonrpc errors
-    fn on_result(&self, _: (), r: &Result<T, ProviderError>) -> Advice {
-        match r {
-            Ok(_) => {}
-            Err(ProviderError::JsonRpcClientError(_)) => {}
-            Err(_) => {
-                self.0.incr();
-            }
-        }
-        Advice::Return
-    }
-}
-
-impl<C: Counter> Clear for ProviderErrorCount<C> {
-    fn clear(&self) {
-        self.0.clear()
-    }
-}
-
-impl<C: Counter> Deref for ProviderErrorCount<C> {
-    type Target = C;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
--- a/web3_proxy/src/pagerduty.rs
+++ b/web3_proxy/src/pagerduty.rs
@ -1,6 +1,6 @@
 use crate::config::TopConfig;
 use gethostname::gethostname;
-use log::{debug, error};
+use log::{debug, error, warn};
 use pagerduty_rs::eventsv2sync::EventsV2 as PagerdutySyncEventsV2;
 use pagerduty_rs::types::{AlertTrigger, AlertTriggerPayload, Event};
 use serde::Serialize;
@ -157,8 +157,12 @@ pub fn pagerduty_alert<T: Serialize>(

    let group = chain_id.map(|x| format!("chain #{}", x));

-    let source =
-        source.unwrap_or_else(|| gethostname().into_string().unwrap_or("unknown".to_string()));
+    let source = source.unwrap_or_else(|| {
+        gethostname().into_string().unwrap_or_else(|err| {
+            warn!("unable to handle hostname: {:#?}", err);
+            "unknown".to_string()
+        })
+    });

    let mut s = DefaultHasher::new();
    // TODO: include severity here?
--- a/web3_proxy/src/metrics_frontend.rs
+++ b/web3_proxy/src/metrics_frontend.rs
@ -5,40 +5,31 @@ use axum::{routing::get, Extension, Router};
 use log::info;
 use std::net::SocketAddr;
 use std::sync::Arc;
+use tokio::sync::broadcast;

 use crate::app::Web3ProxyApp;

 /// Run a prometheus metrics server on the given port.
-
-pub async fn serve(app: Arc<Web3ProxyApp>, port: u16) -> anyhow::Result<()> {
-    // build our application with a route
-    // order most to least common
-    // TODO: 404 any unhandled routes?
+pub async fn serve(
+    app: Arc<Web3ProxyApp>,
+    port: u16,
+    mut shutdown_receiver: broadcast::Receiver<()>,
+) -> anyhow::Result<()> {
+    // routes should be ordered most to least common
    let app = Router::new().route("/", get(root)).layer(Extension(app));

-    // run our app with hyper
-    // TODO: allow only listening on localhost?
+    // TODO: config for the host?
    let addr = SocketAddr::from(([0, 0, 0, 0], port));
    info!("prometheus listening on port {}", port);
-    // TODO: into_make_service is enough if we always run behind a proxy. make into_make_service_with_connect_info optional?

-    /*
-    InsecureClientIp sequentially looks for an IP in:
-      - x-forwarded-for header (de-facto standard)
-      - x-real-ip header
-      - forwarded header (new standard)
-      - axum::extract::ConnectInfo (if not behind proxy)
-
-    Since we run behind haproxy, x-forwarded-for will be set.
-    We probably won't need into_make_service_with_connect_info, but it shouldn't hurt.
-    */
-    let service = app.into_make_service_with_connect_info::<SocketAddr>();
-    // let service = app.into_make_service();
+    let service = app.into_make_service();

    // `axum::Server` is a re-export of `hyper::Server`
    axum::Server::bind(&addr)
-        // TODO: option to use with_connect_info. we want it in dev, but not when running behind a proxy, but not
        .serve(service)
+        .with_graceful_shutdown(async move {
+            let _ = shutdown_receiver.recv().await;
+        })
        .await
        .map_err(Into::into)
 }
--- a/web3_proxy/src/rpcs/blockchain.rs
+++ b/web3_proxy/src/rpcs/blockchain.rs
@ -1,6 +1,6 @@
+///! Keep track of the blockchain as seen by a Web3Rpcs.
 use super::consensus::ConsensusFinder;
 use super::many::Web3Rpcs;
-///! Keep track of the blockchain as seen by a Web3Rpcs.
 use super::one::Web3Rpc;
 use super::transactions::TxStatus;
 use crate::frontend::authorization::Authorization;
@ -10,9 +10,9 @@ use derive_more::From;
 use ethers::prelude::{Block, TxHash, H256, U64};
 use log::{debug, trace, warn, Level};
 use moka::future::Cache;
+use serde::ser::SerializeStruct;
 use serde::Serialize;
 use serde_json::json;
-use std::time::{SystemTime, UNIX_EPOCH};
 use std::{cmp::Ordering, fmt::Display, sync::Arc};
 use tokio::sync::broadcast;
 use tokio::time::Duration;
@ -23,7 +23,7 @@ pub type ArcBlock = Arc<Block<TxHash>>;
 pub type BlocksByHashCache = Cache<H256, Web3ProxyBlock, hashbrown::hash_map::DefaultHashBuilder>;

 /// A block and its age.
-#[derive(Clone, Debug, Default, From, Serialize)]
+#[derive(Clone, Debug, Default, From)]
 pub struct Web3ProxyBlock {
    pub block: ArcBlock,
    /// number of seconds this block was behind the current time when received
@ -31,6 +31,29 @@ pub struct Web3ProxyBlock {
    pub received_age: Option<u64>,
 }

+impl Serialize for Web3ProxyBlock {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        // TODO: i'm not sure about this name
+        let mut state = serializer.serialize_struct("saved_block", 2)?;
+
+        state.serialize_field("age", &self.age())?;
+
+        let block = json!({
+            "block_hash": self.block.hash,
+            "parent_hash": self.block.parent_hash,
+            "number": self.block.number,
+            "timestamp": self.block.timestamp,
+        });
+
+        state.serialize_field("block", &block)?;
+
+        state.end()
+    }
+}
+
 impl PartialEq for Web3ProxyBlock {
    fn eq(&self, other: &Self) -> bool {
        match (self.block.hash, other.block.hash) {
@ -63,16 +86,16 @@ impl Web3ProxyBlock {
    }

    pub fn age(&self) -> u64 {
-        let now = SystemTime::now()
-            .duration_since(UNIX_EPOCH)
-            .expect("there should always be time");
+        let now = chrono::Utc::now().timestamp();

-        let block_timestamp = Duration::from_secs(self.block.timestamp.as_u64());
+        let block_timestamp = self.block.timestamp.as_u32() as i64;

        if block_timestamp < now {
            // this server is still syncing from too far away to serve requests
            // u64 is safe because ew checked equality above
-            (now - block_timestamp).as_secs()
+            // (now - block_timestamp).as_secs()
+            // u64 is safe because we checked equality above
+            (now - block_timestamp) as u64
        } else {
            0
        }
@ -387,7 +410,7 @@ impl Web3Rpcs {
            return Ok(());
        }

-        let new_synced_connections = consensus_finder
+        let new_consensus = consensus_finder
            .best_consensus_connections(authorization, self)
            .await
            .context("no consensus head block!")
@ -397,14 +420,14 @@ impl Web3Rpcs {
                err
            })?;

-        // TODO: what should we do if the block number of new_synced_connections is < old_synced_connections? wait?
+        // TODO: what should we do if the block number of new_consensus is < old_synced_connections? wait?

        let watch_consensus_head_sender = self.watch_consensus_head_sender.as_ref().unwrap();
-        let consensus_tier = new_synced_connections.tier;
+        let consensus_tier = new_consensus.tier;
        let total_tiers = consensus_finder.len();
-        let backups_needed = new_synced_connections.backups_needed;
-        let consensus_head_block = new_synced_connections.head_block.clone();
-        let num_consensus_rpcs = new_synced_connections.num_conns();
+        let backups_needed = new_consensus.backups_needed;
+        let consensus_head_block = new_consensus.head_block.clone();
+        let num_consensus_rpcs = new_consensus.num_conns();
        let mut num_synced_rpcs = 0;
        let num_active_rpcs = consensus_finder
            .all_rpcs_group()
@ -421,7 +444,7 @@ impl Web3Rpcs {

        let old_consensus_head_connections = self
            .watch_consensus_rpcs_sender
-            .send_replace(Some(Arc::new(new_synced_connections)));
+            .send_replace(Some(Arc::new(new_consensus)));

        let backups_voted_str = if backups_needed { "B " } else { "" };

--- a/web3_proxy/src/rpcs/consensus.rs
+++ b/web3_proxy/src/rpcs/consensus.rs
@ -1,8 +1,7 @@
-use crate::frontend::authorization::Authorization;
-
 use super::blockchain::Web3ProxyBlock;
 use super::many::Web3Rpcs;
 use super::one::Web3Rpc;
+use crate::frontend::authorization::Authorization;
 use anyhow::Context;
 use ethers::prelude::{H256, U64};
 use hashbrown::{HashMap, HashSet};
@ -21,18 +20,22 @@ pub struct ConsensusWeb3Rpcs {
    // TODO: tier should be an option, or we should have consensus be stored as an Option<ConsensusWeb3Rpcs>
    pub(super) tier: u64,
    pub(super) head_block: Web3ProxyBlock,
+    // pub tier: u64,
+    // pub head_block: Option<Web3ProxyBlock>,
    // TODO: this should be able to serialize, but it isn't
    #[serde(skip_serializing)]
-    pub(super) rpcs: Vec<Arc<Web3Rpc>>,
-    pub(super) backups_voted: Option<Web3ProxyBlock>,
-    pub(super) backups_needed: bool,
+    pub rpcs: Vec<Arc<Web3Rpc>>,
+    pub backups_voted: Option<Web3ProxyBlock>,
+    pub backups_needed: bool,
 }

 impl ConsensusWeb3Rpcs {
+    #[inline(always)]
    pub fn num_conns(&self) -> usize {
        self.rpcs.len()
    }

+    #[inline(always)]
    pub fn sum_soft_limit(&self) -> u32 {
        self.rpcs.iter().fold(0, |sum, rpc| sum + rpc.soft_limit)
    }
@ -44,9 +47,9 @@ impl fmt::Debug for ConsensusWeb3Rpcs {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        // TODO: the default formatter takes forever to write. this is too quiet though
        // TODO: print the actual conns?
-        f.debug_struct("ConsensusConnections")
+        f.debug_struct("ConsensusWeb3Rpcs")
            .field("head_block", &self.head_block)
-            .field("num_conns", &self.rpcs.len())
+            .field("num_rpcs", &self.rpcs.len())
            .finish_non_exhaustive()
    }
 }
@ -203,7 +206,7 @@ impl ConnectionsGroup {
        let mut primary_rpcs_voted: Option<Web3ProxyBlock> = None;
        let mut backup_rpcs_voted: Option<Web3ProxyBlock> = None;

-        // track rpcs on this heaviest chain so we can build a new ConsensusConnections
+        // track rpcs on this heaviest chain so we can build a new ConsensusWeb3Rpcs
        let mut primary_consensus_rpcs = HashSet::<&str>::new();
        let mut backup_consensus_rpcs = HashSet::<&str>::new();

@ -356,7 +359,7 @@ impl ConnectionsGroup {
    }
 }

-/// A ConsensusConnections builder that tracks all connection heads across multiple groups of servers
+/// A ConsensusWeb3Rpcs builder that tracks all connection heads across multiple groups of servers
 pub struct ConsensusFinder {
    /// backups for all tiers are only used if necessary
    /// tiers[0] = only tier 0.
--- a/web3_proxy/src/rpcs/many.rs
+++ b/web3_proxy/src/rpcs/many.rs
@ -2,8 +2,9 @@
 use super::blockchain::{BlocksByHashCache, Web3ProxyBlock};
 use super::consensus::ConsensusWeb3Rpcs;
 use super::one::Web3Rpc;
-use super::request::{OpenRequestHandle, OpenRequestResult, RequestRevertHandler};
+use super::request::{OpenRequestHandle, OpenRequestResult, RequestErrorHandler};
 use crate::app::{flatten_handle, AnyhowJoinHandle, Web3ProxyApp};
+///! Load balanced communication with a group of web3 providers
 use crate::config::{BlockAndRpc, TxHashAndRpc, Web3RpcConfig};
 use crate::frontend::authorization::{Authorization, RequestMetadata};
 use crate::frontend::rpc_proxy_ws::ProxyMode;
@ -87,7 +88,12 @@ impl Web3Rpcs {
        pending_transaction_cache: Cache<TxHash, TxStatus, hashbrown::hash_map::DefaultHashBuilder>,
        pending_tx_sender: Option<broadcast::Sender<TxStatus>>,
        watch_consensus_head_sender: Option<watch::Sender<Option<Web3ProxyBlock>>>,
-    ) -> anyhow::Result<(Arc<Self>, AnyhowJoinHandle<()>)> {
+    ) -> anyhow::Result<(
+        Arc<Self>,
+        AnyhowJoinHandle<()>,
+        watch::Receiver<Option<Arc<ConsensusWeb3Rpcs>>>,
+        // watch::Receiver<Arc<ConsensusWeb3Rpcs>>,
+    )> {
        let (pending_tx_id_sender, pending_tx_id_receiver) = flume::unbounded();
        let (block_sender, block_receiver) = flume::unbounded::<BlockAndRpc>();

@ -161,7 +167,7 @@ impl Web3Rpcs {
            .max_capacity(10_000)
            .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default());

-        let (watch_consensus_rpcs_sender, _) = watch::channel(Default::default());
+        let (watch_consensus_rpcs_sender, consensus_connections_watcher) = watch::channel(Default::default());

        // by_name starts empty. self.apply_server_configs will add to it
        let by_name = Default::default();
@ -195,7 +201,7 @@ impl Web3Rpcs {
            })
        };

-        Ok((connections, handle))
+        Ok((connections, handle, consensus_connections_watcher))
    }

    /// update the rpcs in this group
@ -274,6 +280,10 @@ impl Web3Rpcs {
            })
            .collect();

+        // map of connection names to their connection
+        // let mut connections = HashMap::new();
+        // let mut handles = vec![];
+
        while let Some(x) = spawn_handles.next().await {
            match x {
                Ok(Ok((rpc, _handle))) => {
@ -308,8 +318,43 @@ impl Web3Rpcs {
            }
        }

+// <<<<<<< HEAD
        Ok(())
    }
+// =======
+//         // TODO: max_capacity and time_to_idle from config
+//         // all block hashes are the same size, so no need for weigher
+//         let block_hashes = Cache::builder()
+//             .time_to_idle(Duration::from_secs(600))
+//             .max_capacity(10_000)
+//             .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default());
+//         // all block numbers are the same size, so no need for weigher
+//         let block_numbers = Cache::builder()
+//             .time_to_idle(Duration::from_secs(600))
+//             .max_capacity(10_000)
+//             .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default());
+//
+//         let (watch_consensus_connections_sender, consensus_connections_watcher) =
+//             watch::channel(Default::default());
+//
+//         let watch_consensus_head_receiver =
+//             watch_consensus_head_sender.as_ref().map(|x| x.subscribe());
+//
+//         let connections = Arc::new(Self {
+//             by_name: connections,
+//             watch_consensus_rpcs_sender: watch_consensus_connections_sender,
+//             watch_consensus_head_receiver,
+//             pending_transactions,
+//             block_hashes,
+//             block_numbers,
+//             min_sum_soft_limit,
+//             min_head_rpcs,
+//             max_block_age,
+//             max_block_lag,
+//         });
+//
+//         let authorization = Arc::new(Authorization::internal(db_conn.clone())?);
+// >>>>>>> 77df3fa (stats v2)

    pub fn get(&self, conn_name: &str) -> Option<Arc<Web3Rpc>> {
        self.by_name.read().get(conn_name).cloned()
@ -319,8 +364,12 @@ impl Web3Rpcs {
        self.by_name.read().len()
    }

+// <<<<<<< HEAD
    pub fn is_empty(&self) -> bool {
        self.by_name.read().is_empty()
+// =======
+//         Ok((connections, handle, consensus_connections_watcher))
+// >>>>>>> 77df3fa (stats v2)
    }

    pub fn min_head_rpcs(&self) -> usize {
@ -655,9 +704,7 @@ impl Web3Rpcs {
                trace!("{} vs {}", rpc_a, rpc_b);
                // TODO: cached key to save a read lock
                // TODO: ties to the server with the smallest block_data_limit
-                let best_rpc = min_by_key(rpc_a, rpc_b, |x| {
-                    OrderedFloat(x.head_latency.read().value())
-                });
+                let best_rpc = min_by_key(rpc_a, rpc_b, |x| x.peak_ewma());
                trace!("winner: {}", best_rpc);

                // just because it has lower latency doesn't mean we are sure to get a connection
@ -671,7 +718,7 @@ impl Web3Rpcs {
                    }
                    Ok(OpenRequestResult::NotReady) => {
                        // TODO: log a warning? emit a stat?
-                        trace!("best_rpc not ready");
+                        trace!("best_rpc not ready: {}", best_rpc);
                    }
                    Err(err) => {
                        warn!("No request handle for {}. err={:?}", best_rpc, err)
@ -837,7 +884,11 @@ impl Web3Rpcs {

        // TODO: maximum retries? right now its the total number of servers
        loop {
+// <<<<<<< HEAD
            if skip_rpcs.len() >= self.by_name.read().len() {
+// =======
+//             if skip_rpcs.len() == self.by_name.len() {
+// >>>>>>> 77df3fa (stats v2)
                break;
            }

@ -854,11 +905,10 @@ impl Web3Rpcs {
                OpenRequestResult::Handle(active_request_handle) => {
                    // save the rpc in case we get an error and want to retry on another server
                    // TODO: look at backend_requests instead
-                    skip_rpcs.push(active_request_handle.clone_connection());
+                    let rpc = active_request_handle.clone_connection();
+                    skip_rpcs.push(rpc.clone());

                    if let Some(request_metadata) = request_metadata {
-                        let rpc = active_request_handle.clone_connection();
-
                        request_metadata
                            .response_from_backup_rpc
                            .store(rpc.backup, Ordering::Release);
@ -871,7 +921,7 @@ impl Web3Rpcs {
                        .request(
                            &request.method,
                            &json!(request.params),
-                            RequestRevertHandler::Save,
+                            RequestErrorHandler::SaveRevert,
                            None,
                        )
                        .await;
@ -1109,9 +1159,18 @@ impl Web3Rpcs {
                        request_metadata.no_servers.fetch_add(1, Ordering::Release);
                    }

+// <<<<<<< HEAD
                    watch_consensus_rpcs.changed().await?;

                    watch_consensus_rpcs.borrow_and_update();
+// =======
+                    // TODO: i don't think this will ever happen
+                    // TODO: return a 502? if it does?
+                    // return Err(anyhow::anyhow!("no available rpcs!"));
+                    // TODO: sleep how long?
+                    // TODO: subscribe to something in ConsensusWeb3Rpcs instead
+                    sleep(Duration::from_millis(200)).await;
+// >>>>>>> 77df3fa (stats v2)

                    continue;
                }
@ -1239,13 +1298,14 @@ fn rpc_sync_status_sort_key(x: &Arc<Web3Rpc>) -> (U64, u64, bool, OrderedFloat<f
 mod tests {
    // TODO: why is this allow needed? does tokio::test get in the way somehow?
    #![allow(unused_imports)]
+
+    use std::time::{SystemTime, UNIX_EPOCH};
    use super::*;
    use crate::rpcs::consensus::ConsensusFinder;
    use crate::rpcs::{blockchain::Web3ProxyBlock, provider::Web3Provider};
    use ethers::types::{Block, U256};
    use log::{trace, LevelFilter};
    use parking_lot::RwLock;
-    use std::time::{SystemTime, UNIX_EPOCH};
    use tokio::sync::RwLock as AsyncRwLock;

    #[tokio::test]
@ -1331,11 +1391,7 @@ mod tests {
            .is_test(true)
            .try_init();

-        let now: U256 = SystemTime::now()
-            .duration_since(UNIX_EPOCH)
-            .unwrap()
-            .as_secs()
-            .into();
+        let now = chrono::Utc::now().timestamp().into();

        let lagged_block = Block {
            hash: Some(H256::random()),
@ -1547,11 +1603,7 @@ mod tests {
            .is_test(true)
            .try_init();

-        let now: U256 = SystemTime::now()
-            .duration_since(UNIX_EPOCH)
-            .unwrap()
-            .as_secs()
-            .into();
+        let now = chrono::Utc::now().timestamp().into();

        let head_block = Block {
            hash: Some(H256::random()),
--- a/web3_proxy/src/rpcs/one.rs
+++ b/web3_proxy/src/rpcs/one.rs
@ -5,7 +5,7 @@ use super::request::{OpenRequestHandle, OpenRequestResult};
 use crate::app::{flatten_handle, AnyhowJoinHandle};
 use crate::config::{BlockAndRpc, Web3RpcConfig};
 use crate::frontend::authorization::Authorization;
-use crate::rpcs::request::RequestRevertHandler;
+use crate::rpcs::request::RequestErrorHandler;
 use anyhow::{anyhow, Context};
 use ethers::prelude::{Bytes, Middleware, ProviderError, TxHash, H256, U64};
 use ethers::types::{Address, Transaction, U256};
@ -106,8 +106,9 @@ pub struct Web3Rpc {
    /// it is an async lock because we hold it open across awaits
    /// this provider is only used for new heads subscriptions
    /// TODO: watch channel instead of a lock
+    /// TODO: is this only used for new heads subscriptions? if so, rename
    pub(super) provider: AsyncRwLock<Option<Arc<Web3Provider>>>,
-    /// keep track of hard limits
+    /// keep track of hard limits. Optional because we skip this code for our own servers.
    pub(super) hard_limit_until: Option<watch::Sender<Instant>>,
    /// rate limits are stored in a central redis so that multiple proxies can share their rate limits
    /// We do not use the deferred rate limiter because going over limits would cause errors
@ -241,8 +242,12 @@ impl Web3Rpc {
            block_data_limit,
            reconnect,
            tier: config.tier,
+// <<<<<<< HEAD
            disconnect_watch: Some(disconnect_sender),
            created_at: Some(created_at),
+// =======
+            head_block: RwLock::new(Default::default()),
+// >>>>>>> 77df3fa (stats v2)
            ..Default::default()
        };

@ -272,7 +277,7 @@ impl Web3Rpc {
        Ok((new_connection, handle))
    }

-    pub async fn peak_ewma(&self) -> OrderedFloat<f64> {
+    pub fn peak_ewma(&self) -> OrderedFloat<f64> {
        // TODO: use request instead of head latency? that was killing perf though
        let head_ewma = self.head_latency.read().value();

@ -392,6 +397,12 @@ impl Web3Rpc {

        // this rpc doesn't have that block yet. still syncing
        if needed_block_num > &head_block_num {
+            trace!(
+                "{} has head {} but needs {}",
+                self,
+                head_block_num,
+                needed_block_num,
+            );
            return false;
        }

@ -400,7 +411,17 @@ impl Web3Rpc {

        let oldest_block_num = head_block_num.saturating_sub(block_data_limit);

-        *needed_block_num >= oldest_block_num
+        if needed_block_num < &oldest_block_num {
+            trace!(
+                "{} needs {} but the oldest available is {}",
+                self,
+                needed_block_num,
+                oldest_block_num
+            );
+            return false;
+        }
+
+        true
    }

    /// reconnect to the provider. errors are retried forever with exponential backoff with jitter.
@ -439,7 +460,8 @@ impl Web3Rpc {

        // retry until we succeed
        while let Err(err) = self.connect(block_sender, chain_id, db_conn).await {
-            // thread_rng is crytographically secure. we don't need that here
+            // thread_rng is crytographically secure. we don't need that here. use thread_fast_rng instead
+            // TODO: min of 1 second? sleep longer if rate limited?
            sleep_ms = min(
                cap_ms,
                thread_fast_rng().gen_range(base_ms..(sleep_ms * range_multiplier)),
@ -455,7 +477,7 @@ impl Web3Rpc {

            log::log!(
                error_level,
-                "Failed reconnect to {}! Retry in {}ms. err={:?}",
+                "Failed (re)connect to {}! Retry in {}ms. err={:?}",
                self,
                retry_in.as_millis(),
                err,
@ -695,10 +717,10 @@ impl Web3Rpc {
        http_interval_sender: Option<Arc<broadcast::Sender<()>>>,
        tx_id_sender: Option<flume::Sender<(TxHash, Arc<Self>)>>,
    ) -> anyhow::Result<()> {
-        let revert_handler = if self.backup {
-            RequestRevertHandler::DebugLevel
+        let error_handler = if self.backup {
+            RequestErrorHandler::DebugLevel
        } else {
-            RequestRevertHandler::ErrorLevel
+            RequestErrorHandler::ErrorLevel
        };

        loop {
@ -768,7 +790,7 @@ impl Web3Rpc {
                                        .wait_for_query::<_, Option<Transaction>>(
                                            "eth_getTransactionByHash",
                                            &(txid,),
-                                            revert_handler,
+                                            error_handler,
                                            authorization.clone(),
                                            Some(client.clone()),
                                        )
@ -805,7 +827,7 @@ impl Web3Rpc {
                                            rpc.wait_for_query::<_, Option<Bytes>>(
                                                "eth_getCode",
                                                &(to, block_number),
-                                                revert_handler,
+                                                error_handler,
                                                authorization.clone(),
                                                Some(client),
                                            )
@ -1200,7 +1222,11 @@ impl Web3Rpc {
        }

        if let Some(hard_limit_until) = self.hard_limit_until.as_ref() {
+// <<<<<<< HEAD
            let hard_limit_ready = *hard_limit_until.borrow();
+// =======
+//             let hard_limit_ready = hard_limit_until.borrow().to_owned();
+// >>>>>>> 77df3fa (stats v2)

            let now = Instant::now();

@ -1285,7 +1311,7 @@ impl Web3Rpc {
        self: &Arc<Self>,
        method: &str,
        params: &P,
-        revert_handler: RequestRevertHandler,
+        revert_handler: RequestErrorHandler,
        authorization: Arc<Authorization>,
        unlocked_provider: Option<Arc<Web3Provider>>,
    ) -> anyhow::Result<R>
@ -1350,7 +1376,7 @@ impl Serialize for Web3Rpc {
        S: Serializer,
    {
        // 3 is the number of fields in the struct.
-        let mut state = serializer.serialize_struct("Web3Rpc", 10)?;
+        let mut state = serializer.serialize_struct("Web3Rpc", 9)?;

        // the url is excluded because it likely includes private information. just show the name that we use in keys
        state.serialize_field("name", &self.name)?;
@ -1414,15 +1440,10 @@ mod tests {
    #![allow(unused_imports)]
    use super::*;
    use ethers::types::{Block, U256};
-    use std::time::{SystemTime, UNIX_EPOCH};

    #[test]
    fn test_archive_node_has_block_data() {
-        let now = SystemTime::now()
-            .duration_since(UNIX_EPOCH)
-            .expect("cannot tell the time")
-            .as_secs()
-            .into();
+        let now = chrono::Utc::now().timestamp().into();

        let random_block = Block {
            hash: Some(H256::random()),
@ -1457,11 +1478,7 @@ mod tests {

    #[test]
    fn test_pruned_node_has_block_data() {
-        let now = SystemTime::now()
-            .duration_since(UNIX_EPOCH)
-            .expect("cannot tell the time")
-            .as_secs()
-            .into();
+        let now = chrono::Utc::now().timestamp().into();

        let head_block: Web3ProxyBlock = Arc::new(Block {
            hash: Some(H256::random()),
@ -1498,11 +1515,7 @@ mod tests {
    // TODO: think about how to bring the concept of a "lagged" node back
    #[test]
    fn test_lagged_node_not_has_block_data() {
-        let now: U256 = SystemTime::now()
-            .duration_since(UNIX_EPOCH)
-            .expect("cannot tell the time")
-            .as_secs()
-            .into();
+        let now = chrono::Utc::now().timestamp().into();

        // head block is an hour old
        let head_block = Block {
@ -1514,7 +1527,7 @@ mod tests {

        let head_block = Arc::new(head_block);

-        let head_block = SavedBlock::new(head_block);
+        let head_block = Web3ProxyBlock::new(head_block);
        let block_data_limit = u64::MAX;

        let metrics = OpenRequestHandleMetrics::default();
--- a/web3_proxy/src/rpcs/request.rs
+++ b/web3_proxy/src/rpcs/request.rs
@ -11,6 +11,7 @@ use log::{debug, error, trace, warn, Level};
 use migration::sea_orm::{self, ActiveEnum, ActiveModelTrait};
 use serde_json::json;
 use std::fmt;
+use std::sync::atomic;
 use std::sync::Arc;
 use thread_fast_rng::rand::Rng;
 use tokio::time::{sleep, Duration, Instant};
@ -34,7 +35,7 @@ pub struct OpenRequestHandle {

 /// Depending on the context, RPC errors can require different handling.
 #[derive(Copy, Clone)]
-pub enum RequestRevertHandler {
+pub enum RequestErrorHandler {
    /// Log at the trace level. Use when errors are expected.
    TraceLevel,
    /// Log at the debug level. Use when errors are expected.
@ -44,7 +45,7 @@ pub enum RequestRevertHandler {
    /// Log at the warn level. Use when errors do not cause problems.
    WarnLevel,
    /// Potentially save the revert. Users can tune how often this happens
-    Save,
+    SaveRevert,
 }

 // TODO: second param could be skipped since we don't need it here
@ -57,13 +58,13 @@ struct EthCallFirstParams {
    data: Option<Bytes>,
 }

-impl From<Level> for RequestRevertHandler {
+impl From<Level> for RequestErrorHandler {
    fn from(level: Level) -> Self {
        match level {
-            Level::Trace => RequestRevertHandler::TraceLevel,
-            Level::Debug => RequestRevertHandler::DebugLevel,
-            Level::Error => RequestRevertHandler::ErrorLevel,
-            Level::Warn => RequestRevertHandler::WarnLevel,
+            Level::Trace => RequestErrorHandler::TraceLevel,
+            Level::Debug => RequestErrorHandler::DebugLevel,
+            Level::Error => RequestErrorHandler::ErrorLevel,
+            Level::Warn => RequestErrorHandler::WarnLevel,
            _ => unimplemented!("unexpected tracing Level"),
        }
    }
@ -121,11 +122,15 @@ impl Authorization {
 }

 impl OpenRequestHandle {
-    pub async fn new(authorization: Arc<Authorization>, conn: Arc<Web3Rpc>) -> Self {
-        Self {
-            authorization,
-            rpc: conn,
-        }
+    pub async fn new(authorization: Arc<Authorization>, rpc: Arc<Web3Rpc>) -> Self {
+        // TODO: take request_id as an argument?
+        // TODO: attach a unique id to this? customer requests have one, but not internal queries
+        // TODO: what ordering?!
+        // TODO: should we be using metered, or not? i think not because we want stats for each handle
+        // TODO: these should maybe be sent to an influxdb instance?
+        rpc.active_requests.fetch_add(1, atomic::Ordering::Relaxed);
+
+        Self { authorization, rpc }
    }

    pub fn connection_name(&self) -> String {
@ -140,11 +145,12 @@ impl OpenRequestHandle {
    /// Send a web3 request
    /// By having the request method here, we ensure that the rate limiter was called and connection counts were properly incremented
    /// depending on how things are locked, you might need to pass the provider in
+    /// we take self to ensure this function only runs once
    pub async fn request<P, R>(
        self,
        method: &str,
        params: &P,
-        revert_handler: RequestRevertHandler,
+        revert_handler: RequestErrorHandler,
        unlocked_provider: Option<Arc<Web3Provider>>,
    ) -> Result<R, ProviderError>
    where
@ -154,7 +160,7 @@ impl OpenRequestHandle {
    {
        // TODO: use tracing spans
        // TODO: including params in this log is way too verbose
-        // trace!(rpc=%self.conn, %method, "request");
+        // trace!(rpc=%self.rpc, %method, "request");
        trace!("requesting from {}", self.rpc);

        let mut provider = if unlocked_provider.is_some() {
@ -209,7 +215,7 @@ impl OpenRequestHandle {
        // // TODO: i think ethers already has trace logging (and does it much more fancy)
        // trace!(
        //     "response from {} for {} {:?}: {:?}",
-        //     self.conn,
+        //     self.rpc,
        //     method,
        //     params,
        //     response,
@ -218,17 +224,17 @@ impl OpenRequestHandle {
        if let Err(err) = &response {
            // only save reverts for some types of calls
            // TODO: do something special for eth_sendRawTransaction too
-            let revert_handler = if let RequestRevertHandler::Save = revert_handler {
+            let error_handler = if let RequestErrorHandler::SaveRevert = revert_handler {
                // TODO: should all these be Trace or Debug or a mix?
                if !["eth_call", "eth_estimateGas"].contains(&method) {
                    // trace!(%method, "skipping save on revert");
-                    RequestRevertHandler::TraceLevel
+                    RequestErrorHandler::TraceLevel
                } else if self.authorization.db_conn.is_some() {
                    let log_revert_chance = self.authorization.checks.log_revert_chance;

                    if log_revert_chance == 0.0 {
                        // trace!(%method, "no chance. skipping save on revert");
-                        RequestRevertHandler::TraceLevel
+                        RequestErrorHandler::TraceLevel
                    } else if log_revert_chance == 1.0 {
                        // trace!(%method, "gaurenteed chance. SAVING on revert");
                        revert_handler
@ -236,7 +242,7 @@ impl OpenRequestHandle {
                        < log_revert_chance
                    {
                        // trace!(%method, "missed chance. skipping save on revert");
-                        RequestRevertHandler::TraceLevel
+                        RequestErrorHandler::TraceLevel
                    } else {
                        // trace!("Saving on revert");
                        // TODO: is always logging at debug level fine?
@ -244,19 +250,22 @@ impl OpenRequestHandle {
                    }
                } else {
                    // trace!(%method, "no database. skipping save on revert");
-                    RequestRevertHandler::TraceLevel
+                    RequestErrorHandler::TraceLevel
                }
            } else {
                revert_handler
            };

-            enum ResponseTypes {
+            // TODO: simple enum -> string derive?
+            #[derive(Debug)]
+            enum ResponseErrorType {
                Revert,
                RateLimit,
-                Ok,
+                Error,
            }

            // check for "execution reverted" here
+            // TODO: move this info a function on ResponseErrorType
            let response_type = if let ProviderError::JsonRpcClientError(err) = err {
                // Http and Ws errors are very similar, but different types
                let msg = match &*provider {
@ -298,87 +307,127 @@ impl OpenRequestHandle {
                if let Some(msg) = msg {
                    if msg.starts_with("execution reverted") {
                        trace!("revert from {}", self.rpc);
-                        ResponseTypes::Revert
+                        ResponseErrorType::Revert
                    } else if msg.contains("limit") || msg.contains("request") {
                        trace!("rate limit from {}", self.rpc);
-                        ResponseTypes::RateLimit
+                        ResponseErrorType::RateLimit
                    } else {
-                        ResponseTypes::Ok
+                        ResponseErrorType::Error
                    }
                } else {
-                    ResponseTypes::Ok
+                    ResponseErrorType::Error
                }
            } else {
-                ResponseTypes::Ok
+                ResponseErrorType::Error
            };

-            if matches!(response_type, ResponseTypes::RateLimit) {
-                if let Some(hard_limit_until) = self.rpc.hard_limit_until.as_ref() {
-                    let retry_at = Instant::now() + Duration::from_secs(1);
+            match response_type {
+                ResponseErrorType::RateLimit => {
+                    if let Some(hard_limit_until) = self.rpc.hard_limit_until.as_ref() {
+                        // TODO: how long? different providers have different rate limiting periods, though most seem to be 1 second
+                        // TODO: until the next second, or wait 1 whole second?
+                        let retry_at = Instant::now() + Duration::from_secs(1);

-                    trace!("retry {} at: {:?}", self.rpc, retry_at);
+                        trace!("retry {} at: {:?}", self.rpc, retry_at);

-                    hard_limit_until.send_replace(retry_at);
-                }
-            }
-
-            // TODO: think more about the method and param logs. those can be sensitive information
-            match revert_handler {
-                RequestRevertHandler::DebugLevel => {
-                    // TODO: think about this revert check more. sometimes we might want reverts logged so this needs a flag
-                    if matches!(response_type, ResponseTypes::Revert) {
-                        debug!(
-                            "bad response from {}! method={} params={:?} err={:?}",
-                            self.rpc, method, params, err
-                        );
+                        hard_limit_until.send_replace(retry_at);
                    }
                }
-                RequestRevertHandler::TraceLevel => {
-                    trace!(
-                        "bad response from {}! method={} params={:?} err={:?}",
-                        self.rpc,
-                        method,
-                        params,
-                        err
-                    );
+                ResponseErrorType::Error => {
+                    // TODO: should we just have Error or RateLimit? do we need Error and Revert separate?
+
+                    match error_handler {
+                        RequestErrorHandler::DebugLevel => {
+                            // TODO: include params only if not running in release mode
+                            debug!(
+                                "error response from {}! method={} params={:?} err={:?}",
+                                self.rpc, method, params, err
+                            );
+                        }
+                        RequestErrorHandler::TraceLevel => {
+                            trace!(
+                                "error response from {}! method={} params={:?} err={:?}",
+                                self.rpc,
+                                method,
+                                params,
+                                err
+                            );
+                        }
+                        RequestErrorHandler::ErrorLevel => {
+                            // TODO: include params only if not running in release mode
+                            error!(
+                                "error response from {}! method={} err={:?}",
+                                self.rpc, method, err
+                            );
+                        }
+                        RequestErrorHandler::SaveRevert | RequestErrorHandler::WarnLevel => {
+                            // TODO: include params only if not running in release mode
+                            warn!(
+                                "error response from {}! method={} err={:?}",
+                                self.rpc, method, err
+                            );
+                        }
+                    }
                }
-                RequestRevertHandler::ErrorLevel => {
-                    // TODO: include params if not running in release mode
-                    error!(
-                        "bad response from {}! method={} err={:?}",
-                        self.rpc, method, err
-                    );
-                }
-                RequestRevertHandler::WarnLevel => {
-                    // TODO: include params if not running in release mode
-                    warn!(
-                        "bad response from {}! method={} err={:?}",
-                        self.rpc, method, err
-                    );
-                }
-                RequestRevertHandler::Save => {
-                    trace!(
-                        "bad response from {}! method={} params={:?} err={:?}",
-                        self.rpc,
-                        method,
-                        params,
-                        err
-                    );
+                ResponseErrorType::Revert => {
+                    match error_handler {
+                        RequestErrorHandler::DebugLevel => {
+                            // TODO: include params only if not running in release mode
+                            debug!(
+                                "revert response from {}! method={} params={:?} err={:?}",
+                                self.rpc, method, params, err
+                            );
+                        }
+                        RequestErrorHandler::TraceLevel => {
+                            trace!(
+                                "revert response from {}! method={} params={:?} err={:?}",
+                                self.rpc,
+                                method,
+                                params,
+                                err
+                            );
+                        }
+                        RequestErrorHandler::ErrorLevel => {
+                            // TODO: include params only if not running in release mode
+                            error!(
+                                "revert response from {}! method={} err={:?}",
+                                self.rpc, method, err
+                            );
+                        }
+                        RequestErrorHandler::WarnLevel => {
+                            // TODO: include params only if not running in release mode
+                            warn!(
+                                "revert response from {}! method={} err={:?}",
+                                self.rpc, method, err
+                            );
+                        }
+                        RequestErrorHandler::SaveRevert => {
+                            trace!(
+                                "revert response from {}! method={} params={:?} err={:?}",
+                                self.rpc,
+                                method,
+                                params,
+                                err
+                            );

-                    // TODO: do not unwrap! (doesn't matter much since we check method as a string above)
-                    let method: Method = Method::try_from_value(&method.to_string()).unwrap();
+                            // TODO: do not unwrap! (doesn't matter much since we check method as a string above)
+                            let method: Method =
+                                Method::try_from_value(&method.to_string()).unwrap();

-                    // TODO: DO NOT UNWRAP! But also figure out the best way to keep returning ProviderErrors here
-                    let params: EthCallParams = serde_json::from_value(json!(params))
-                        .context("parsing params to EthCallParams")
-                        .unwrap();
+                            // TODO: DO NOT UNWRAP! But also figure out the best way to keep returning ProviderErrors here
+                            let params: EthCallParams = serde_json::from_value(json!(params))
+                                .context("parsing params to EthCallParams")
+                                .unwrap();

-                    // spawn saving to the database so we don't slow down the request
-                    let f = self.authorization.clone().save_revert(method, params.0 .0);
+                            // spawn saving to the database so we don't slow down the request
+                            let f = self.authorization.clone().save_revert(method, params.0 .0);

-                    tokio::spawn(f);
+                            tokio::spawn(f);
+                        }
+                    }
                }
            }
+            // TODO: track error latency?
        } else {
            // TODO: record request latency
            // let latency_ms = start.elapsed().as_secs_f64() * 1000.0;
--- a/web3_proxy/src/stats/db_queries.rs
+++ b/web3_proxy/src/stats/db_queries.rs
@ -1,6 +1,9 @@
-use crate::app::DatabaseReplica;
+use crate::app::Web3ProxyApp;
 use crate::frontend::errors::FrontendErrorResponse;
-use crate::{app::Web3ProxyApp, user_token::UserBearerToken};
+use crate::http_params::{
+    get_chain_id_from_params, get_page_from_params, get_query_start_from_params,
+    get_query_window_seconds_from_params, get_user_id_from_params,
+};
 use anyhow::Context;
 use axum::response::{IntoResponse, Response};
 use axum::Json;
@ -8,215 +11,217 @@ use axum::{
    headers::{authorization::Bearer, Authorization},
    TypedHeader,
 };
-use chrono::{NaiveDateTime, Utc};
-use entities::{login, rpc_accounting, rpc_key};
+use entities::{rpc_accounting, rpc_key};
 use hashbrown::HashMap;
 use http::StatusCode;
-use log::{debug, warn};
+use log::warn;
 use migration::sea_orm::{
-    ColumnTrait, DatabaseConnection, EntityTrait, PaginatorTrait, QueryFilter, QueryOrder,
-    QuerySelect, Select,
+    ColumnTrait, EntityTrait, PaginatorTrait, QueryFilter, QueryOrder, QuerySelect, Select,
 };
 use migration::{Condition, Expr, SimpleExpr};
 use redis_rate_limiter::redis;
-use redis_rate_limiter::{redis::AsyncCommands, RedisConnection};
+use redis_rate_limiter::redis::AsyncCommands;
 use serde_json::json;

-/// get the attached address for the given bearer token.
-/// First checks redis. Then checks the database.
-/// 0 means all users.
-/// This authenticates that the bearer is allowed to view this user_id's stats
-pub async fn get_user_id_from_params(
-    redis_conn: &mut RedisConnection,
-    db_conn: &DatabaseConnection,
-    db_replica: &DatabaseReplica,
-    // this is a long type. should we strip it down?
-    bearer: Option<TypedHeader<Authorization<Bearer>>>,
-    params: &HashMap<String, String>,
-) -> Result<u64, FrontendErrorResponse> {
-    debug!("bearer and params are: {:?} {:?}", bearer, params);
-    match (bearer, params.get("user_id")) {
-        (Some(TypedHeader(Authorization(bearer))), Some(user_id)) => {
-            // check for the bearer cache key
-            let user_bearer_token = UserBearerToken::try_from(bearer)?;
-
-            let user_redis_key = user_bearer_token.redis_key();
-
-            let mut save_to_redis = false;
-
-            // get the user id that is attached to this bearer token
-            let bearer_user_id = match redis_conn.get::<_, u64>(&user_redis_key).await {
-                Err(_) => {
-                    // TODO: inspect the redis error? if redis is down we should warn
-                    // this also means redis being down will not kill our app. Everything will need a db read query though.
-
-                    let user_login = login::Entity::find()
-                        .filter(login::Column::BearerToken.eq(user_bearer_token.uuid()))
-                        .one(db_replica.conn())
-                        .await
-                        .context("database error while querying for user")?
-                        .ok_or(FrontendErrorResponse::AccessDenied)?;
-
-                    // if expired, delete ALL expired logins
-                    let now = Utc::now();
-                    if now > user_login.expires_at {
-                        // this row is expired! do not allow auth!
-                        // delete ALL expired logins.
-                        let delete_result = login::Entity::delete_many()
-                            .filter(login::Column::ExpiresAt.lte(now))
-                            .exec(db_conn)
-                            .await?;
-
-                        // TODO: emit a stat? if this is high something weird might be happening
-                        debug!("cleared expired logins: {:?}", delete_result);
-
-                        return Err(FrontendErrorResponse::AccessDenied);
-                    }
-
-                    save_to_redis = true;
-
-                    user_login.user_id
-                }
-                Ok(x) => {
-                    // TODO: push cache ttl further in the future?
-                    x
-                }
-            };
-
-            let user_id: u64 = user_id.parse().context("Parsing user_id param")?;
-
-            if bearer_user_id != user_id {
-                return Err(FrontendErrorResponse::AccessDenied);
-            }
-
-            if save_to_redis {
-                // TODO: how long? we store in database for 4 weeks
-                const ONE_DAY: usize = 60 * 60 * 24;
-
-                if let Err(err) = redis_conn
-                    .set_ex::<_, _, ()>(user_redis_key, user_id, ONE_DAY)
-                    .await
-                {
-                    warn!("Unable to save user bearer token to redis: {}", err)
-                }
-            }
-
-            Ok(bearer_user_id)
-        }
-        (_, None) => {
-            // they have a bearer token. we don't care about it on public pages
-            // 0 means all
-            Ok(0)
-        }
-        (None, Some(_)) => {
-            // they do not have a bearer token, but requested a specific id. block
-            // TODO: proper error code from a useful error code
-            // TODO: maybe instead of this sharp edged warn, we have a config value?
-            // TODO: check config for if we should deny or allow this
-            Err(FrontendErrorResponse::AccessDenied)
-            // // TODO: make this a flag
-            // warn!("allowing without auth during development!");
-            // Ok(x.parse()?)
-        }
-    }
-}
-
-/// only allow rpc_key to be set if user_id is also set.
-/// this will keep people from reading someone else's keys.
-/// 0 means none.
-
-pub fn get_rpc_key_id_from_params(
-    user_id: u64,
-    params: &HashMap<String, String>,
-) -> anyhow::Result<u64> {
-    if user_id > 0 {
-        params.get("rpc_key_id").map_or_else(
-            || Ok(0),
-            |c| {
-                let c = c.parse()?;
-
-                Ok(c)
-            },
-        )
-    } else {
-        Ok(0)
-    }
-}
-
-pub fn get_chain_id_from_params(
-    app: &Web3ProxyApp,
-    params: &HashMap<String, String>,
-) -> anyhow::Result<u64> {
-    params.get("chain_id").map_or_else(
-        || Ok(app.config.chain_id),
-        |c| {
-            let c = c.parse()?;
-
-            Ok(c)
-        },
-    )
-}
-
-pub fn get_query_start_from_params(
-    params: &HashMap<String, String>,
-) -> anyhow::Result<chrono::NaiveDateTime> {
-    params.get("query_start").map_or_else(
-        || {
-            // no timestamp in params. set default
-            let x = chrono::Utc::now() - chrono::Duration::days(30);
-
-            Ok(x.naive_utc())
-        },
-        |x: &String| {
-            // parse the given timestamp
-            let x = x.parse::<i64>().context("parsing timestamp query param")?;
-
-            // TODO: error code 401
-            let x =
-                NaiveDateTime::from_timestamp_opt(x, 0).context("parsing timestamp query param")?;
-
-            Ok(x)
-        },
-    )
-}
-
-pub fn get_page_from_params(params: &HashMap<String, String>) -> anyhow::Result<u64> {
-    params.get("page").map_or_else::<anyhow::Result<u64>, _, _>(
-        || {
-            // no page in params. set default
-            Ok(0)
-        },
-        |x: &String| {
-            // parse the given timestamp
-            // TODO: error code 401
-            let x = x.parse().context("parsing page query from params")?;
-
-            Ok(x)
-        },
-    )
-}
-
-pub fn get_query_window_seconds_from_params(
-    params: &HashMap<String, String>,
-) -> Result<u64, FrontendErrorResponse> {
-    params.get("query_window_seconds").map_or_else(
-        || {
-            // no page in params. set default
-            Ok(0)
-        },
-        |query_window_seconds: &String| {
-            // parse the given timestamp
-            // TODO: error code 401
-            query_window_seconds.parse::<u64>().map_err(|e| {
-                FrontendErrorResponse::StatusCode(
-                    StatusCode::BAD_REQUEST,
-                    "Unable to parse rpc_key_id".to_string(),
-                    Some(e.into()),
-                )
-            })
-        },
-    )
-}
+// <<<<<<< HEAD:web3_proxy/src/user_queries.rs
+// /// get the attached address for the given bearer token.
+// /// First checks redis. Then checks the database.
+// /// 0 means all users.
+// /// This authenticates that the bearer is allowed to view this user_id's stats
+// pub async fn get_user_id_from_params(
+//     redis_conn: &mut RedisConnection,
+//     db_conn: &DatabaseConnection,
+//     db_replica: &DatabaseReplica,
+//     // this is a long type. should we strip it down?
+//     bearer: Option<TypedHeader<Authorization<Bearer>>>,
+//     params: &HashMap<String, String>,
+// ) -> Result<u64, FrontendErrorResponse> {
+//     debug!("bearer and params are: {:?} {:?}", bearer, params);
+//     match (bearer, params.get("user_id")) {
+//         (Some(TypedHeader(Authorization(bearer))), Some(user_id)) => {
+//             // check for the bearer cache key
+//             let user_bearer_token = UserBearerToken::try_from(bearer)?;
+//
+//             let user_redis_key = user_bearer_token.redis_key();
+//
+//             let mut save_to_redis = false;
+//
+//             // get the user id that is attached to this bearer token
+//             let bearer_user_id = match redis_conn.get::<_, u64>(&user_redis_key).await {
+//                 Err(_) => {
+//                     // TODO: inspect the redis error? if redis is down we should warn
+//                     // this also means redis being down will not kill our app. Everything will need a db read query though.
+//
+//                     let user_login = login::Entity::find()
+//                         .filter(login::Column::BearerToken.eq(user_bearer_token.uuid()))
+//                         .one(db_replica.conn())
+//                         .await
+//                         .context("database error while querying for user")?
+//                         .ok_or(FrontendErrorResponse::AccessDenied)?;
+//
+//                     // if expired, delete ALL expired logins
+//                     let now = Utc::now();
+//                     if now > user_login.expires_at {
+//                         // this row is expired! do not allow auth!
+//                         // delete ALL expired logins.
+//                         let delete_result = login::Entity::delete_many()
+//                             .filter(login::Column::ExpiresAt.lte(now))
+//                             .exec(db_conn)
+//                             .await?;
+//
+//                         // TODO: emit a stat? if this is high something weird might be happening
+//                         debug!("cleared expired logins: {:?}", delete_result);
+//
+//                         return Err(FrontendErrorResponse::AccessDenied);
+//                     }
+//
+//                     save_to_redis = true;
+//
+//                     user_login.user_id
+//                 }
+//                 Ok(x) => {
+//                     // TODO: push cache ttl further in the future?
+//                     x
+//                 }
+//             };
+//
+//             let user_id: u64 = user_id.parse().context("Parsing user_id param")?;
+//
+//             if bearer_user_id != user_id {
+//                 return Err(FrontendErrorResponse::AccessDenied);
+//             }
+//
+//             if save_to_redis {
+//                 // TODO: how long? we store in database for 4 weeks
+//                 const ONE_DAY: usize = 60 * 60 * 24;
+//
+//                 if let Err(err) = redis_conn
+//                     .set_ex::<_, _, ()>(user_redis_key, user_id, ONE_DAY)
+//                     .await
+//                 {
+//                     warn!("Unable to save user bearer token to redis: {}", err)
+//                 }
+//             }
+//
+//             Ok(bearer_user_id)
+//         }
+//         (_, None) => {
+//             // they have a bearer token. we don't care about it on public pages
+//             // 0 means all
+//             Ok(0)
+//         }
+//         (None, Some(_)) => {
+//             // they do not have a bearer token, but requested a specific id. block
+//             // TODO: proper error code from a useful error code
+//             // TODO: maybe instead of this sharp edged warn, we have a config value?
+//             // TODO: check config for if we should deny or allow this
+//             Err(FrontendErrorResponse::AccessDenied)
+//             // // TODO: make this a flag
+//             // warn!("allowing without auth during development!");
+//             // Ok(x.parse()?)
+//         }
+//     }
+// }
+//
+// /// only allow rpc_key to be set if user_id is also set.
+// /// this will keep people from reading someone else's keys.
+// /// 0 means none.
+//
+// pub fn get_rpc_key_id_from_params(
+//     user_id: u64,
+//     params: &HashMap<String, String>,
+// ) -> anyhow::Result<u64> {
+//     if user_id > 0 {
+//         params.get("rpc_key_id").map_or_else(
+//             || Ok(0),
+//             |c| {
+//                 let c = c.parse()?;
+//
+//                 Ok(c)
+//             },
+//         )
+//     } else {
+//         Ok(0)
+//     }
+// }
+//
+// pub fn get_chain_id_from_params(
+//     app: &Web3ProxyApp,
+//     params: &HashMap<String, String>,
+// ) -> anyhow::Result<u64> {
+//     params.get("chain_id").map_or_else(
+//         || Ok(app.config.chain_id),
+//         |c| {
+//             let c = c.parse()?;
+//
+//             Ok(c)
+//         },
+//     )
+// }
+//
+// pub fn get_query_start_from_params(
+//     params: &HashMap<String, String>,
+// ) -> anyhow::Result<chrono::NaiveDateTime> {
+//     params.get("query_start").map_or_else(
+//         || {
+//             // no timestamp in params. set default
+//             let x = chrono::Utc::now() - chrono::Duration::days(30);
+//
+//             Ok(x.naive_utc())
+//         },
+//         |x: &String| {
+//             // parse the given timestamp
+//             let x = x.parse::<i64>().context("parsing timestamp query param")?;
+//
+//             // TODO: error code 401
+//             let x =
+//                 NaiveDateTime::from_timestamp_opt(x, 0).context("parsing timestamp query param")?;
+//
+//             Ok(x)
+//         },
+//     )
+// }
+//
+// pub fn get_page_from_params(params: &HashMap<String, String>) -> anyhow::Result<u64> {
+//     params.get("page").map_or_else::<anyhow::Result<u64>, _, _>(
+//         || {
+//             // no page in params. set default
+//             Ok(0)
+//         },
+//         |x: &String| {
+//             // parse the given timestamp
+//             // TODO: error code 401
+//             let x = x.parse().context("parsing page query from params")?;
+//
+//             Ok(x)
+//         },
+//     )
+// }
+//
+// pub fn get_query_window_seconds_from_params(
+//     params: &HashMap<String, String>,
+// ) -> Result<u64, FrontendErrorResponse> {
+//     params.get("query_window_seconds").map_or_else(
+//         || {
+//             // no page in params. set default
+//             Ok(0)
+//         },
+//         |query_window_seconds: &String| {
+//             // parse the given timestamp
+//             // TODO: error code 401
+//             query_window_seconds.parse::<u64>().map_err(|e| {
+//                 FrontendErrorResponse::StatusCode(
+//                     StatusCode::BAD_REQUEST,
+//                     "Unable to parse rpc_key_id".to_string(),
+//                     Some(e.into()),
+//                 )
+//             })
+//         },
+//     )
+// }
+// =======
+use super::StatType;
+// >>>>>>> 77df3fa (stats v2):web3_proxy/src/stats/db_queries.rs

 pub fn filter_query_window_seconds(
    query_window_seconds: u64,
@ -251,16 +256,11 @@ pub fn filter_query_window_seconds(
    Ok(q)
 }

-pub enum StatResponse {
-    Aggregated,
-    Detailed,
-}
-
 pub async fn query_user_stats<'a>(
    app: &'a Web3ProxyApp,
    bearer: Option<TypedHeader<Authorization<Bearer>>>,
    params: &'a HashMap<String, String>,
-    stat_response_type: StatResponse,
+    stat_response_type: StatType,
 ) -> Result<Response, FrontendErrorResponse> {
    let db_conn = app.db_conn().context("query_user_stats needs a db")?;
    let db_replica = app
@ -361,7 +361,7 @@ pub async fn query_user_stats<'a>(
    // TODO: make this and q mutable and clean up the code below. no need for more `let q`
    let mut condition = Condition::all();

-    if let StatResponse::Detailed = stat_response_type {
+    if let StatType::Detailed = stat_response_type {
        // group by the columns that we use as keys in other places of the code
        q = q
            .column(rpc_accounting::Column::ErrorResponse)
--- a/web3_proxy/src/stats/influxdb_queries.rs
+++ b/web3_proxy/src/stats/influxdb_queries.rs
@ -0,0 +1,41 @@
+use super::StatType;
+use crate::{
+    app::Web3ProxyApp, frontend::errors::FrontendErrorResponse,
+    http_params::get_user_id_from_params,
+};
+use anyhow::Context;
+use axum::{
+    headers::{authorization::Bearer, Authorization},
+    response::Response,
+    TypedHeader,
+};
+use hashbrown::HashMap;
+
+pub async fn query_user_stats<'a>(
+    app: &'a Web3ProxyApp,
+    bearer: Option<TypedHeader<Authorization<Bearer>>>,
+    params: &'a HashMap<String, String>,
+    stat_response_type: StatType,
+) -> Result<Response, FrontendErrorResponse> {
+    let db_conn = app.db_conn().context("query_user_stats needs a db")?;
+    let db_replica = app
+        .db_replica()
+        .context("query_user_stats needs a db replica")?;
+    let mut redis_conn = app
+        .redis_conn()
+        .await
+        .context("query_user_stats had a redis connection error")?
+        .context("query_user_stats needs a redis")?;
+
+    // TODO: have a getter for this. do we need a connection pool on it?
+    let influxdb_client = app
+        .influxdb_client
+        .as_ref()
+        .context("query_user_stats needs an influxdb client")?;
+
+    // get the user id first. if it is 0, we should use a cache on the app
+    let user_id =
+        get_user_id_from_params(&mut redis_conn, &db_conn, &db_replica, bearer, params).await?;
+
+    todo!();
+}
--- a/web3_proxy/src/stats/mod.rs
+++ b/web3_proxy/src/stats/mod.rs
@ -0,0 +1,584 @@
+//! Store "stats" in a database for billing and a different database for graphing
+//! 
+//! TODO: move some of these structs/functions into their own file?
+pub mod db_queries;
+pub mod influxdb_queries;
+
+use crate::frontend::authorization::{Authorization, RequestMetadata};
+use axum::headers::Origin;
+use chrono::{TimeZone, Utc};
+use derive_more::From;
+use entities::rpc_accounting_v2;
+use entities::sea_orm_active_enums::TrackingLevel;
+use futures::stream;
+use hashbrown::HashMap;
+use influxdb2::api::write::TimestampPrecision;
+use influxdb2::models::DataPoint;
+use log::{error, info};
+use migration::sea_orm::{self, DatabaseConnection, EntityTrait};
+use migration::{Expr, OnConflict};
+use std::num::NonZeroU64;
+use std::sync::atomic::Ordering;
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::sync::broadcast;
+use tokio::task::JoinHandle;
+use tokio::time::interval;
+
+pub enum StatType {
+    Aggregated,
+    Detailed,
+}
+
+/// TODO: better name?
+#[derive(Clone, Debug)]
+pub struct RpcQueryStats {
+    authorization: Arc<Authorization>,
+    method: String,
+    archive_request: bool,
+    error_response: bool,
+    request_bytes: u64,
+    /// if backend_requests is 0, there was a cache_hit
+    backend_requests: u64,
+    response_bytes: u64,
+    response_millis: u64,
+    response_timestamp: i64,
+}
+
+#[derive(Clone, From, Hash, PartialEq, Eq)]
+struct RpcQueryKey {
+    /// unix epoch time
+    /// for the time series db, this is (close to) the time that the response was sent
+    /// for the account database, this is rounded to the week
+    response_timestamp: i64,
+    /// true if an archive server was needed to serve the request
+    archive_needed: bool,
+    /// true if the response was some sort of JSONRPC error
+    error_response: bool,
+    /// method tracking is opt-in
+    method: Option<String>,
+    /// origin tracking is opt-in
+    origin: Option<Origin>,
+    /// None if the public url was used
+    rpc_secret_key_id: Option<NonZeroU64>,
+}
+
+/// round the unix epoch time to the start of a period
+fn round_timestamp(timestamp: i64, period_seconds: i64) -> i64 {
+    timestamp / period_seconds * period_seconds
+}
+
+impl RpcQueryStats {
+    /// rpc keys can opt into multiple levels of tracking.
+    /// we always need enough to handle billing, so even the "none" level still has some minimal tracking.
+    /// This "accounting_key" is used in the relational database.
+    /// anonymous users are also saved in the relational database so that the host can do their own cost accounting.
+    fn accounting_key(&self, period_seconds: i64) -> RpcQueryKey {
+        let response_timestamp = round_timestamp(self.response_timestamp, period_seconds);
+
+        let rpc_secret_key_id = self.authorization.checks.rpc_secret_key_id;
+
+        let (method, origin) = match self.authorization.checks.tracking_level {
+            TrackingLevel::None => {
+                // this RPC key requested no tracking. this is the default
+                // do not store the method or the origin
+                (None, None)
+            }
+            TrackingLevel::Aggregated => {
+                // this RPC key requested tracking aggregated across all methods and origins
+                // TODO: think about this more. do we want the origin or not? grouping free cost per site might be useful. i'd rather not collect things if we don't have a planned purpose though
+                let method = None;
+                let origin = None;
+
+                (method, origin)
+            }
+            TrackingLevel::Detailed => {
+                // detailed tracking keeps track of the method and origin
+                // depending on the request, the origin might still be None
+                let method = Some(self.method.clone());
+                let origin = self.authorization.origin.clone();
+
+                (method, origin)
+            }
+        };
+
+        RpcQueryKey {
+            response_timestamp,
+            archive_needed: self.archive_request,
+            error_response: self.error_response,
+            method,
+            rpc_secret_key_id,
+            origin,
+        }
+    }
+
+    /// all queries are aggregated
+    /// TODO: should we store "anon" or "registered" as a key just to be able to split graphs?
+    fn global_timeseries_key(&self) -> RpcQueryKey {
+        let method = Some(self.method.clone());
+        // we don't store origin in the timeseries db. its only used for optional accounting
+        let origin = None;
+        // everyone gets grouped together
+        let rpc_secret_key_id = None;
+
+        RpcQueryKey {
+            response_timestamp: self.response_timestamp,
+            archive_needed: self.archive_request,
+            error_response: self.error_response,
+            method,
+            rpc_secret_key_id,
+            origin,
+        }
+    }
+
+    fn opt_in_timeseries_key(&self) -> RpcQueryKey {
+        // we don't store origin in the timeseries db. its only optionaly used for accounting
+        let origin = None;
+
+        let (method, rpc_secret_key_id) = match self.authorization.checks.tracking_level {
+            TrackingLevel::None => {
+                // this RPC key requested no tracking. this is the default.
+                // we still want graphs though, so we just use None as the rpc_secret_key_id
+                (Some(self.method.clone()), None)
+            }
+            TrackingLevel::Aggregated => {
+                // this RPC key requested tracking aggregated across all methods
+                (None, self.authorization.checks.rpc_secret_key_id)
+            }
+            TrackingLevel::Detailed => {
+                // detailed tracking keeps track of the method
+                (
+                    Some(self.method.clone()),
+                    self.authorization.checks.rpc_secret_key_id,
+                )
+            }
+        };
+
+        RpcQueryKey {
+            response_timestamp: self.response_timestamp,
+            archive_needed: self.archive_request,
+            error_response: self.error_response,
+            method,
+            rpc_secret_key_id,
+            origin,
+        }
+    }
+}
+
+#[derive(Default)]
+pub struct BufferedRpcQueryStats {
+    frontend_requests: u64,
+    backend_requests: u64,
+    backend_retries: u64,
+    no_servers: u64,
+    cache_misses: u64,
+    cache_hits: u64,
+    sum_request_bytes: u64,
+    sum_response_bytes: u64,
+    sum_response_millis: u64,
+}
+
+/// A stat that we aggregate and then store in a database.
+/// For now there is just one, but I think there might be others later
+#[derive(Debug, From)]
+pub enum AppStat {
+    RpcQuery(RpcQueryStats),
+}
+
+#[derive(From)]
+pub struct SpawnedStatBuffer {
+    pub stat_sender: flume::Sender<AppStat>,
+    /// these handles are important and must be allowed to finish
+    pub background_handle: JoinHandle<anyhow::Result<()>>,
+}
+
+pub struct StatBuffer {
+    chain_id: u64,
+    db_conn: Option<DatabaseConnection>,
+    influxdb_client: Option<influxdb2::Client>,
+    tsdb_save_interval_seconds: u32,
+    db_save_interval_seconds: u32,
+    billing_period_seconds: i64,
+}
+
+impl BufferedRpcQueryStats {
+    fn add(&mut self, stat: RpcQueryStats) {
+        // a stat always come from just 1 frontend request
+        self.frontend_requests += 1;
+
+        if stat.backend_requests == 0 {
+            // no backend request. cache hit!
+            self.cache_hits += 1;
+        } else {
+            // backend requests! cache miss!
+            self.cache_misses += 1;
+
+            // a single frontend request might have multiple backend requests
+            self.backend_requests += stat.backend_requests;
+        }
+
+        self.sum_request_bytes += stat.request_bytes;
+        self.sum_response_bytes += stat.response_bytes;
+        self.sum_response_millis += stat.response_millis;
+    }
+
+    // TODO: take a db transaction instead so that we can batch?
+    async fn save_db(
+        self,
+        chain_id: u64,
+        db_conn: &DatabaseConnection,
+        key: RpcQueryKey,
+    ) -> anyhow::Result<()> {
+        let period_datetime = Utc.timestamp_opt(key.response_timestamp as i64, 0).unwrap();
+
+        // this is a lot of variables
+        let accounting_entry = rpc_accounting_v2::ActiveModel {
+            id: sea_orm::NotSet,
+            rpc_key_id: sea_orm::Set(key.rpc_secret_key_id.map(Into::into)),
+            origin: sea_orm::Set(key.origin.map(|x| x.to_string())),
+            chain_id: sea_orm::Set(chain_id),
+            period_datetime: sea_orm::Set(period_datetime),
+            method: sea_orm::Set(key.method),
+            archive_needed: sea_orm::Set(key.archive_needed),
+            error_response: sea_orm::Set(key.error_response),
+            frontend_requests: sea_orm::Set(self.frontend_requests),
+            backend_requests: sea_orm::Set(self.backend_requests),
+            backend_retries: sea_orm::Set(self.backend_retries),
+            no_servers: sea_orm::Set(self.no_servers),
+            cache_misses: sea_orm::Set(self.cache_misses),
+            cache_hits: sea_orm::Set(self.cache_hits),
+            sum_request_bytes: sea_orm::Set(self.sum_request_bytes),
+            sum_response_millis: sea_orm::Set(self.sum_response_millis),
+            sum_response_bytes: sea_orm::Set(self.sum_response_bytes),
+        };
+
+        rpc_accounting_v2::Entity::insert(accounting_entry)
+            .on_conflict(
+                OnConflict::new()
+                    .values([
+                        (
+                            rpc_accounting_v2::Column::FrontendRequests,
+                            Expr::col(rpc_accounting_v2::Column::FrontendRequests)
+                                .add(self.frontend_requests),
+                        ),
+                        (
+                            rpc_accounting_v2::Column::BackendRequests,
+                            Expr::col(rpc_accounting_v2::Column::BackendRequests)
+                                .add(self.backend_requests),
+                        ),
+                        (
+                            rpc_accounting_v2::Column::BackendRetries,
+                            Expr::col(rpc_accounting_v2::Column::BackendRetries)
+                                .add(self.backend_retries),
+                        ),
+                        (
+                            rpc_accounting_v2::Column::NoServers,
+                            Expr::col(rpc_accounting_v2::Column::NoServers).add(self.no_servers),
+                        ),
+                        (
+                            rpc_accounting_v2::Column::CacheMisses,
+                            Expr::col(rpc_accounting_v2::Column::CacheMisses)
+                                .add(self.cache_misses),
+                        ),
+                        (
+                            rpc_accounting_v2::Column::CacheHits,
+                            Expr::col(rpc_accounting_v2::Column::CacheHits).add(self.cache_hits),
+                        ),
+                        (
+                            rpc_accounting_v2::Column::SumRequestBytes,
+                            Expr::col(rpc_accounting_v2::Column::SumRequestBytes)
+                                .add(self.sum_request_bytes),
+                        ),
+                        (
+                            rpc_accounting_v2::Column::SumResponseMillis,
+                            Expr::col(rpc_accounting_v2::Column::SumResponseMillis)
+                                .add(self.sum_response_millis),
+                        ),
+                        (
+                            rpc_accounting_v2::Column::SumResponseBytes,
+                            Expr::col(rpc_accounting_v2::Column::SumResponseBytes)
+                                .add(self.sum_response_bytes),
+                        ),
+                    ])
+                    .to_owned(),
+            )
+            .exec(db_conn)
+            .await?;
+
+        Ok(())
+    }
+
+    // TODO: change this to return a DataPoint?
+    async fn save_timeseries(
+        self,
+        bucket: &str,
+        measurement: &str,
+        chain_id: u64,
+        influxdb2_clent: &influxdb2::Client,
+        key: RpcQueryKey,
+    ) -> anyhow::Result<()> {
+        // TODO: error if key.origin is set?
+
+        // TODO: what name?
+        let mut builder = DataPoint::builder(measurement);
+
+        builder = builder.tag("chain_id", chain_id.to_string());
+
+        if let Some(rpc_secret_key_id) = key.rpc_secret_key_id {
+            builder = builder.tag("rpc_secret_key_id", rpc_secret_key_id.to_string());
+        }
+
+        if let Some(method) = key.method {
+            builder = builder.tag("method", method);
+        }
+
+        builder = builder
+            .tag("archive_needed", key.archive_needed.to_string())
+            .tag("error_response", key.error_response.to_string())
+            .field("frontend_requests", self.frontend_requests as i64)
+            .field("backend_requests", self.backend_requests as i64)
+            .field("no_servers", self.no_servers as i64)
+            .field("cache_misses", self.cache_misses as i64)
+            .field("cache_hits", self.cache_hits as i64)
+            .field("sum_request_bytes", self.sum_request_bytes as i64)
+            .field("sum_response_millis", self.sum_response_millis as i64)
+            .field("sum_response_bytes", self.sum_response_bytes as i64);
+
+        builder = builder.timestamp(key.response_timestamp);
+        let timestamp_precision = TimestampPrecision::Seconds;
+
+        let points = [builder.build()?];
+
+        // TODO: bucket should be an enum so that we don't risk typos
+        influxdb2_clent
+            .write_with_precision(bucket, stream::iter(points), timestamp_precision)
+            .await?;
+
+        Ok(())
+    }
+}
+
+impl RpcQueryStats {
+    pub fn new(
+        method: String,
+        authorization: Arc<Authorization>,
+        metadata: Arc<RequestMetadata>,
+        response_bytes: usize,
+    ) -> Self {
+        // TODO: try_unwrap the metadata to be sure that all the stats for this request have been collected
+        // TODO: otherwise, i think the whole thing should be in a single lock that we can "reset" when a stat is created
+
+        let archive_request = metadata.archive_request.load(Ordering::Acquire);
+        let backend_requests = metadata.backend_requests.lock().len() as u64;
+        let request_bytes = metadata.request_bytes;
+        let error_response = metadata.error_response.load(Ordering::Acquire);
+        let response_millis = metadata.start_instant.elapsed().as_millis() as u64;
+        let response_bytes = response_bytes as u64;
+
+        let response_timestamp = Utc::now().timestamp();
+
+        Self {
+            authorization,
+            archive_request,
+            method,
+            backend_requests,
+            request_bytes,
+            error_response,
+            response_bytes,
+            response_millis,
+            response_timestamp,
+        }
+    }
+}
+
+impl StatBuffer {
+    pub fn try_spawn(
+        chain_id: u64,
+        db_conn: Option<DatabaseConnection>,
+        influxdb_client: Option<influxdb2::Client>,
+        db_save_interval_seconds: u32,
+        tsdb_save_interval_seconds: u32,
+        billing_period_seconds: i64,
+        shutdown_receiver: broadcast::Receiver<()>,
+    ) -> anyhow::Result<Option<SpawnedStatBuffer>> {
+        if db_conn.is_none() && influxdb_client.is_none() {
+            return Ok(None);
+        }
+
+        let (stat_sender, stat_receiver) = flume::unbounded();
+
+        let mut new = Self {
+            chain_id,
+            db_conn,
+            influxdb_client,
+            db_save_interval_seconds,
+            tsdb_save_interval_seconds,
+            billing_period_seconds,
+        };
+
+        // any errors inside this task will cause the application to exit
+        let handle = tokio::spawn(async move {
+            new.aggregate_and_save_loop(stat_receiver, shutdown_receiver)
+                .await
+        });
+
+        Ok(Some((stat_sender, handle).into()))
+    }
+
+    async fn aggregate_and_save_loop(
+        &mut self,
+        stat_receiver: flume::Receiver<AppStat>,
+        mut shutdown_receiver: broadcast::Receiver<()>,
+    ) -> anyhow::Result<()> {
+        let mut tsdb_save_interval =
+            interval(Duration::from_secs(self.tsdb_save_interval_seconds as u64));
+        let mut db_save_interval =
+            interval(Duration::from_secs(self.db_save_interval_seconds as u64));
+
+        // TODO: this is used for rpc_accounting_v2 and influxdb. give it a name to match that? "stat" of some kind?
+        let mut global_timeseries_buffer = HashMap::<RpcQueryKey, BufferedRpcQueryStats>::new();
+        let mut opt_in_timeseries_buffer = HashMap::<RpcQueryKey, BufferedRpcQueryStats>::new();
+        let mut accounting_db_buffer = HashMap::<RpcQueryKey, BufferedRpcQueryStats>::new();
+
+        loop {
+            tokio::select! {
+                stat = stat_receiver.recv_async() => {
+                    // save the stat to a buffer
+                    match stat {
+                        Ok(AppStat::RpcQuery(stat)) => {
+                            if self.influxdb_client.is_some() {
+                                // TODO: round the timestamp at all?
+
+                                let global_timeseries_key = stat.global_timeseries_key();
+
+                                global_timeseries_buffer.entry(global_timeseries_key).or_default().add(stat.clone());
+
+                                let opt_in_timeseries_key =  stat.opt_in_timeseries_key();
+
+                                opt_in_timeseries_buffer.entry(opt_in_timeseries_key).or_default().add(stat.clone());
+                            }
+
+                            if self.db_conn.is_some() {
+                                accounting_db_buffer.entry(stat.accounting_key(self.billing_period_seconds)).or_default().add(stat);
+                            }
+                        }
+                        Err(err) => {
+                            error!("error receiving stat: {:?}", err);
+                            break;
+                        }
+                    }
+                }
+                _ = db_save_interval.tick() => {
+                    let db_conn = self.db_conn.as_ref().expect("db connection should always exist if there are buffered stats");
+
+                    // TODO: batch saves
+                    for (key, stat) in accounting_db_buffer.drain() {
+                        // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now
+                        if let Err(err) = stat.save_db(self.chain_id, db_conn, key).await {
+                            error!("unable to save accounting entry! err={:?}", err);
+                        };
+                    }
+                }
+                _ = tsdb_save_interval.tick() => {
+                    // TODO: batch saves
+                    // TODO: better bucket names
+                    let influxdb_client = self.influxdb_client.as_ref().expect("influxdb client should always exist if there are buffered stats");
+
+                    for (key, stat) in global_timeseries_buffer.drain() {
+                        // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now
+                        if let Err(err) = stat.save_timeseries("dev_web3_proxy", "global_proxy", self.chain_id, influxdb_client, key).await {
+                            error!("unable to save global stat! err={:?}", err);
+                        };
+                    }
+
+                    for (key, stat) in opt_in_timeseries_buffer.drain() {
+                        // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now
+                        if let Err(err) = stat.save_timeseries("dev_web3_proxy", "opt_in_proxy", self.chain_id, influxdb_client, key).await {
+                            error!("unable to save opt-in stat! err={:?}", err);
+                        };
+                    }
+                }
+                x = shutdown_receiver.recv() => {
+                    match x {
+                        Ok(_) => {
+                            info!("stat_loop shutting down");
+                            // TODO: call aggregate_stat for all the
+                        },
+                        Err(err) => error!("stat_loop shutdown receiver err={:?}", err),
+                    }
+                    break;
+                }
+            }
+        }
+
+        // TODO: dry
+        if let Some(db_conn) = self.db_conn.as_ref() {
+            info!(
+                "saving {} buffered accounting entries",
+                accounting_db_buffer.len(),
+            );
+
+            for (key, stat) in accounting_db_buffer.drain() {
+                if let Err(err) = stat.save_db(self.chain_id, db_conn, key).await {
+                    error!(
+                        "Unable to save accounting entry while shutting down! err={:?}",
+                        err
+                    );
+                };
+            }
+        }
+
+        // TODO: dry
+        if let Some(influxdb_client) = self.influxdb_client.as_ref() {
+            info!(
+                "saving {} buffered global stats",
+                global_timeseries_buffer.len(),
+            );
+
+            for (key, stat) in global_timeseries_buffer.drain() {
+                if let Err(err) = stat
+                    .save_timeseries(
+                        "dev_web3_proxy",
+                        "global_proxy",
+                        self.chain_id,
+                        influxdb_client,
+                        key,
+                    )
+                    .await
+                {
+                    error!(
+                        "Unable to save global stat while shutting down! err={:?}",
+                        err
+                    );
+                };
+            }
+
+            info!(
+                "saving {} buffered opt-in stats",
+                opt_in_timeseries_buffer.len(),
+            );
+
+            for (key, stat) in opt_in_timeseries_buffer.drain() {
+                if let Err(err) = stat
+                    .save_timeseries(
+                        "dev_web3_proxy",
+                        "opt_in_proxy",
+                        self.chain_id,
+                        influxdb_client,
+                        key,
+                    )
+                    .await
+                {
+                    error!(
+                        "unable to save opt-in stat while shutting down! err={:?}",
+                        err
+                    );
+                };
+            }
+        }
+
+        info!("accounting and stat save loop complete");
+
+        Ok(())
+    }
+}