diff --git a/config/example.toml b/config/example.toml
index 7de66214..0d915dc7 100644
--- a/config/example.toml
+++ b/config/example.toml
@@ -7,7 +7,7 @@ min_sum_soft_limit = 2000
 min_synced_rpcs = 2
 redis_url = "redis://dev-redis:6379/"
 # TODO: how do we find the optimal redis_max_connections? too high actually ends up being slower
-redis_max_connections = 99
+redis_max_connections = 300
 redirect_public_url = "https://llamanodes.com/free-rpc-stats"
 redirect_user_url = "https://llamanodes.com/user-rpc-stats/{{user_id}}"
 public_rate_limit_per_minute = 0
diff --git a/redis-rate-limit/src/errors.rs b/redis-rate-limit/src/errors.rs
index 0f4be3e8..10da546e 100644
--- a/redis-rate-limit/src/errors.rs
+++ b/redis-rate-limit/src/errors.rs
@@ -1,14 +1,14 @@
 pub use bb8_redis::bb8::ErrorSink as Bb8ErrorSync;
 pub use bb8_redis::redis::RedisError;
 
-use tracing::warn;
+use tracing::error;
 
 #[derive(Debug, Clone)]
 pub struct RedisErrorSink;
 
 impl Bb8ErrorSync<RedisError> for RedisErrorSink {
     fn sink(&self, err: RedisError) {
-        warn!(?err, "redis error");
+        error!(?err, "redis error");
     }
 
     fn boxed_clone(&self) -> Box<dyn Bb8ErrorSync<RedisError>> {
diff --git a/redis-rate-limit/src/lib.rs b/redis-rate-limit/src/lib.rs
index 132d7717..574c170b 100644
--- a/redis-rate-limit/src/lib.rs
+++ b/redis-rate-limit/src/lib.rs
@@ -5,7 +5,7 @@ use anyhow::Context;
 use bb8_redis::redis::pipe;
 use std::ops::Add;
 use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
-use tracing::trace;
+use tracing::{debug, trace};
 
 pub use crate::errors::{RedisError, RedisErrorSink};
 pub use bb8_redis::{bb8, redis, RedisConnectionManager};
@@ -72,12 +72,15 @@ impl RedisRateLimit {
 
         let mut conn = self.pool.get().await?;
 
+        // TODO: at high concurency, i think this is giving errors
+        // TODO: i'm starting to think that bb8 has a bug
         let x: Vec<u64> = pipe()
             // we could get the key first, but that means an extra redis call for every check. this seems better
             .incr(&throttle_key, count)
-            // set expiration the first time we set the key. ignore the result
+            // set expiration each time we set the key. ignore the result
             .expire(&throttle_key, self.period as usize)
-            // .arg("NX")  // TODO: this works in redis, but not elasticache
+            // TODO: NX will make it only set the expiration the first time. works in redis, but not elasticache
+            // .arg("NX")
             .ignore()
             // do the query
             .query_async(&mut *conn)
@@ -91,12 +94,13 @@ impl RedisRateLimit {
 
             let retry_at = Instant::now().add(Duration::from_secs_f32(seconds_left_in_period));
 
-            trace!(%label, ?retry_at, "rate limited");
+            debug!(%label, ?retry_at, "rate limited: {}/{}", new_count, max_per_period);
 
-            return Ok(ThrottleResult::RetryAt(retry_at));
+            Ok(ThrottleResult::RetryAt(retry_at))
+        } else {
+            trace!(%label, "NOT rate limited: {}/{}", new_count, max_per_period);
+            Ok(ThrottleResult::Allowed)
         }
-
-        Ok(ThrottleResult::Allowed)
     }
 
     #[inline]
diff --git a/web3_proxy/src/app.rs b/web3_proxy/src/app.rs
index 169b589e..7007a0b2 100644
--- a/web3_proxy/src/app.rs
+++ b/web3_proxy/src/app.rs
@@ -39,7 +39,7 @@ use tokio::sync::{broadcast, watch};
 use tokio::task::JoinHandle;
 use tokio::time::{timeout, Instant};
 use tokio_stream::wrappers::{BroadcastStream, WatchStream};
-use tracing::{debug, info, info_span, instrument, trace, warn, Instrument};
+use tracing::{info, info_span, instrument, trace, warn, Instrument};
 use uuid::Uuid;
 
 // TODO: make this customizable?
@@ -78,8 +78,6 @@ pub struct Web3ProxyApp {
     // TODO: broadcast channel instead?
     head_block_receiver: watch::Receiver<ArcBlock>,
     pending_tx_sender: broadcast::Sender<TxStatus>,
-    /// TODO: this doesn't ever get incremented!
-    pub active_requests: AtomicUsize,
     pub config: AppConfig,
     pub db_conn: Option<sea_orm::DatabaseConnection>,
     /// store pending transactions that we've seen so that we don't send duplicates to subscribers
@@ -265,6 +263,7 @@ impl Web3ProxyApp {
         handles.push(balanced_handle);
 
         let private_rpcs = if private_rpcs.is_empty() {
+            // TODO: do None instead of clone?
             warn!("No private relays configured. Any transactions will be broadcast to the public mempool!");
             balanced_rpcs.clone()
         } else {
@@ -310,7 +309,6 @@ impl Web3ProxyApp {
             config: top_config.app,
             balanced_rpcs,
             private_rpcs,
-            active_requests: Default::default(),
             response_cache,
             head_block_receiver,
             pending_tx_sender,
@@ -527,8 +525,7 @@ impl Web3ProxyApp {
         request: JsonRpcRequestEnum,
     ) -> anyhow::Result<JsonRpcForwardedResponseEnum> {
         // TODO: this should probably be trace level
-        // trace!(?request, "proxy_web3_rpc");
-        debug!(?request, "proxying request");
+        trace!(?request, "proxy_web3_rpc");
 
         // even though we have timeouts on the requests to our backend providers,
         // we need a timeout for the incoming request so that retries don't run forever
@@ -545,8 +542,7 @@ impl Web3ProxyApp {
         };
 
         // TODO: this should probably be trace level
-        // trace!(?response, "Forwarding");
-        debug!(?response.ids(), "forwarding response");
+        trace!(?response, "Forwarding");
 
         Ok(response)
     }
diff --git a/web3_proxy/src/bin/web3_proxy.rs b/web3_proxy/src/bin/web3_proxy.rs
index 8ec7ec6c..0057fb8a 100644
--- a/web3_proxy/src/bin/web3_proxy.rs
+++ b/web3_proxy/src/bin/web3_proxy.rs
@@ -127,7 +127,7 @@ fn main() -> anyhow::Result<()> {
     // if RUST_LOG isn't set, configure a default
     // TODO: is there a better way to do this?
     if std::env::var("RUST_LOG").is_err() {
-        std::env::set_var("RUST_LOG", "info,web3_proxy=debug");
+        std::env::set_var("RUST_LOG", "info,redis_rate_limit=debug,web3_proxy=debug");
     }
 
     // install global collector configured based on RUST_LOG env var.
diff --git a/web3_proxy/src/frontend/http.rs b/web3_proxy/src/frontend/http.rs
index 20cfba0e..5ffc807e 100644
--- a/web3_proxy/src/frontend/http.rs
+++ b/web3_proxy/src/frontend/http.rs
@@ -1,7 +1,7 @@
 use crate::app::Web3ProxyApp;
 use axum::{http::StatusCode, response::IntoResponse, Extension, Json};
 use serde_json::json;
-use std::sync::{atomic::Ordering, Arc};
+use std::sync::Arc;
 
 /// Health check page for load balancers to use
 pub async fn health(Extension(app): Extension<Arc<Web3ProxyApp>>) -> impl IntoResponse {
@@ -15,15 +15,13 @@ pub async fn health(Extension(app): Extension<Arc<Web3ProxyApp>>) -> impl IntoRe
 /// Very basic status page
 /// TODO: replace this with proper stats and monitoring
 pub async fn status(Extension(app): Extension<Arc<Web3ProxyApp>>) -> impl IntoResponse {
-    // TODO: what else should we include? uptime? prometheus?
+    // TODO: what else should we include? uptime?
     let body = json!({
         "balanced_rpcs": app.balanced_rpcs,
         "private_rpcs": app.private_rpcs,
-        "num_active_requests": app.active_requests.load(Ordering::Acquire),
-        // TODO: include number of items?
         "pending_transactions_count": app.pending_transactions.entry_count(),
         "pending_transactions_size": app.pending_transactions.weighted_size(),
     });
 
-    (StatusCode::OK, Json(body))
+    Json(body)
 }
diff --git a/web3_proxy/src/frontend/rate_limit.rs b/web3_proxy/src/frontend/rate_limit.rs
index c3d32492..50d09c7e 100644
--- a/web3_proxy/src/frontend/rate_limit.rs
+++ b/web3_proxy/src/frontend/rate_limit.rs
@@ -11,7 +11,7 @@ use sea_orm::{
 };
 use std::{net::IpAddr, time::Duration};
 use tokio::time::Instant;
-use tracing::debug;
+use tracing::{debug, error};
 use uuid::Uuid;
 
 pub enum RateLimitResult {
@@ -119,6 +119,7 @@ impl TryFrom<RateLimitResult> for RequestFrom {
 impl Web3ProxyApp {
     pub async fn rate_limit_by_ip(&self, ip: IpAddr) -> anyhow::Result<RateLimitResult> {
         // TODO: dry this up with rate_limit_by_key
+        // TODO: have a local cache because if we hit redis too hard we get errors
         if let Some(rate_limiter) = &self.rate_limiter {
             let rate_limiter_label = format!("ip-{}", ip);
 
@@ -136,12 +137,13 @@ impl Web3ProxyApp {
                 }
                 Ok(ThrottleResult::RetryNever) => {
                     // TODO: prettier error for the user
-                    return Err(anyhow::anyhow!("blocked by rate limiter"));
+                    return Err(anyhow::anyhow!("ip blocked by rate limiter"));
                 }
                 Err(err) => {
                     // internal error, not rate limit being hit
                     // TODO: i really want axum to do this for us in a single place.
-                    return Err(err);
+                    error!(?err, "redis is unhappy. allowing ip");
+                    return Ok(RateLimitResult::AllowedIp(ip));
                 }
             }
         } else {
@@ -194,7 +196,7 @@ impl Web3ProxyApp {
             }
         };
 
-        //  save for the next run
+        // save for the next run
         self.user_cache.insert(user_key, user_data).await;
 
         Ok(user_data)
@@ -234,20 +236,39 @@ impl Web3ProxyApp {
         // user key is valid. now check rate limits
         if let Some(rate_limiter) = &self.rate_limiter {
             // TODO: query redis in the background so that users don't have to wait on this network request
-            if rate_limiter
+            // TODO: better key? have a prefix so its easy to delete all of these
+            let rate_limiter_label = user_key.to_string();
+
+            match rate_limiter
                 .throttle_label(
-                    &user_key.to_string(),
+                    &rate_limiter_label,
                     Some(user_data.user_count_per_period),
                     1,
                 )
                 .await
-                .is_err()
             {
-                // TODO: set headers so they know when they can retry
-                // warn!(?ip, "public rate limit exceeded");  // this is too verbose, but a stat might be good
-                // TODO: use their id if possible
-                // TODO: StatusCode::TOO_MANY_REQUESTS
-                return Err(anyhow::anyhow!("too many requests from this key"));
+                Ok(ThrottleResult::Allowed) => {}
+                Ok(ThrottleResult::RetryAt(_retry_at)) => {
+                    // TODO: set headers so they know when they can retry
+                    debug!(?rate_limiter_label, "user rate limit exceeded"); // this is too verbose, but a stat might be good
+                                                                             // TODO: use their id if possible
+                    return Ok(RateLimitResult::UserRateLimitExceeded(user_data.user_id));
+                }
+                Ok(ThrottleResult::RetryNever) => {
+                    // TODO: prettier error for the user
+                    return Err(anyhow::anyhow!("user blocked by rate limiter"));
+                }
+                Err(err) => {
+                    // internal error, not rate limit being hit
+                    // rather than have downtime, i think its better to just use in-process rate limiting
+                    // TODO: in-process rate limits that pipe into redis
+                    error!(?err, "redis is unhappy. allowing ip");
+                    return Ok(RateLimitResult::AllowedUser(user_data.user_id));
+                } // // TODO: set headers so they know when they can retry
+                  // // warn!(?ip, "public rate limit exceeded");  // this is too verbose, but a stat might be good
+                  // // TODO: use their id if possible
+                  // // TODO: StatusCode::TOO_MANY_REQUESTS
+                  // return Err(anyhow::anyhow!("too many requests from this key"));
             }
         } else {
             // TODO: if no redis, rate limit with a local cache?
diff --git a/web3_proxy/src/rpcs/blockchain.rs b/web3_proxy/src/rpcs/blockchain.rs
index 2c0ad614..a7233b8a 100644
--- a/web3_proxy/src/rpcs/blockchain.rs
+++ b/web3_proxy/src/rpcs/blockchain.rs
@@ -458,7 +458,7 @@ impl Web3Connections {
                                     debug!(con_head=%heavy_block_id, rpc_head=%rpc_head_str, %rpc, "con {}/{}/{}", num_consensus_rpcs, num_connection_heads, total_conns)
                                 } else {
                                     // hash changed
-                                    info!(con_head=%heavy_block_id, rpc_head=%rpc_head_str, old=%old_block_id, %rpc, "unc {}/{}/{}", num_consensus_rpcs, num_connection_heads, total_conns);
+                                    info!(con_head=%heavy_block_id, old=%old_block_id, rpc_head=%rpc_head_str, %rpc, "unc {}/{}/{}", num_consensus_rpcs, num_connection_heads, total_conns);
 
                                     // todo!("handle equal by updating the cannonical chain");
                                     self.save_block(&heavy_block, true).await?;
diff --git a/web3_proxy/src/rpcs/connection.rs b/web3_proxy/src/rpcs/connection.rs
index 2db78290..ae809d44 100644
--- a/web3_proxy/src/rpcs/connection.rs
+++ b/web3_proxy/src/rpcs/connection.rs
@@ -28,6 +28,9 @@ pub struct Web3Connection {
     url: String,
     /// keep track of currently open requests. We sort on this
     pub(super) active_requests: AtomicU32,
+    /// keep track of total requests
+    /// TODO: is this type okay?
+    pub(super) total_requests: AtomicU64,
     /// provider is in a RwLock so that we can replace it if re-connecting
     /// it is an async lock because we hold it open across awaits
     pub(super) provider: AsyncRwLock<Option<Arc<Web3Provider>>>,
@@ -35,7 +38,7 @@ pub struct Web3Connection {
     hard_limit: Option<RedisRateLimit>,
     /// used for load balancing to the least loaded server
     pub(super) soft_limit: u32,
-    /// TODO: have an enum for this so that "no limit" prints pretty
+    /// TODO: have an enum for this so that "no limit" prints pretty?
     block_data_limit: AtomicU64,
     /// Lower weight are higher priority when sending requests
     pub(super) weight: u32,
@@ -82,6 +85,7 @@ impl Web3Connection {
             name,
             url: url_str.clone(),
             active_requests: 0.into(),
+            total_requests: 0.into(),
             provider: AsyncRwLock::new(Some(Arc::new(provider))),
             hard_limit,
             soft_limit,
@@ -777,13 +781,18 @@ impl Serialize for Web3Connection {
         S: Serializer,
     {
         // 3 is the number of fields in the struct.
-        let mut state = serializer.serialize_struct("Web3Connection", 5)?;
+        let mut state = serializer.serialize_struct("Web3Connection", 6)?;
 
         // the url is excluded because it likely includes private information. just show the name
         state.serialize_field("name", &self.name)?;
 
         let block_data_limit = self.block_data_limit.load(atomic::Ordering::Relaxed);
-        state.serialize_field("block_data_limit", &block_data_limit)?;
+
+        if block_data_limit == u64::MAX {
+            state.serialize_field("block_data_limit", "None")?;
+        } else {
+            state.serialize_field("block_data_limit", &block_data_limit)?;
+        }
 
         state.serialize_field("soft_limit", &self.soft_limit)?;
 
@@ -792,6 +801,11 @@ impl Serialize for Web3Connection {
             &self.active_requests.load(atomic::Ordering::Relaxed),
         )?;
 
+        state.serialize_field(
+            "total_requests",
+            &self.total_requests.load(atomic::Ordering::Relaxed),
+        )?;
+
         let head_block_id = &*self.head_block_id.read();
         state.serialize_field("head_block_id", head_block_id)?;
 
diff --git a/web3_proxy/src/rpcs/connections.rs b/web3_proxy/src/rpcs/connections.rs
index 2dbba595..23496ee0 100644
--- a/web3_proxy/src/rpcs/connections.rs
+++ b/web3_proxy/src/rpcs/connections.rs
@@ -56,7 +56,7 @@ impl Web3Connections {
         chain_id: u64,
         server_configs: HashMap<String, Web3ConnectionConfig>,
         http_client: Option<reqwest::Client>,
-        redis_client_pool: Option<redis_rate_limit::RedisPool>,
+        redis_pool: Option<redis_rate_limit::RedisPool>,
         block_map: BlockHashesMap,
         head_block_sender: Option<watch::Sender<ArcBlock>>,
         min_sum_soft_limit: u32,
@@ -83,7 +83,7 @@ impl Web3Connections {
 
                 async move {
                     loop {
-                        // TODO: every time a head_block arrives (maybe with a small delay), or on the interval.
+                        // TODO: every time a head_block arrives (with a small delay for known slow servers), or on the interval.
                         interval.tick().await;
 
                         trace!("http interval ready");
@@ -108,7 +108,7 @@ impl Web3Connections {
             .into_iter()
             .map(|(server_name, server_config)| {
                 let http_client = http_client.clone();
-                let redis_client_pool = redis_client_pool.clone();
+                let redis_pool = redis_pool.clone();
                 let http_interval_sender = http_interval_sender.clone();
 
                 let block_sender = if head_block_sender.is_some() {
@@ -124,7 +124,7 @@ impl Web3Connections {
                     server_config
                         .spawn(
                             server_name,
-                            redis_client_pool,
+                            redis_pool,
                             chain_id,
                             http_client,
                             http_interval_sender,
@@ -159,11 +159,16 @@ impl Web3Connections {
             }
         }
 
-        // TODO: less than 3? what should we do here?
-        if connections.len() < 2 {
-            warn!("Only {} connection(s)!", connections.len());
+        if connections.len() < min_synced_rpcs {
+            return Err(anyhow::anyhow!(
+                "Only {}/{} connections!",
+                connections.len(),
+                min_synced_rpcs
+            ));
         }
 
+        // TODO: safety check on sum soft limit
+
         let synced_connections = SyncedConnections::default();
 
         // TODO: sizing and expiration on these caches!
diff --git a/web3_proxy/src/rpcs/provider.rs b/web3_proxy/src/rpcs/provider.rs
index 654d23ee..baf73ef2 100644
--- a/web3_proxy/src/rpcs/provider.rs
+++ b/web3_proxy/src/rpcs/provider.rs
@@ -1,8 +1,7 @@
 use std::time::Duration;
 
 use derive_more::From;
-use ethers::providers::Middleware;
-use tracing::{error_span, info_span, instrument, Instrument};
+use tracing::{info_span, instrument, Instrument};
 
 /// Use HTTP and WS providers.
 // TODO: instead of an enum, I tried to use Box<dyn Provider>, but hit <https://github.com/gakonst/ethers-rs/issues/592>
diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs
index 32667d19..0a037090 100644
--- a/web3_proxy/src/rpcs/request.rs
+++ b/web3_proxy/src/rpcs/request.rs
@@ -22,12 +22,18 @@ pub struct OpenRequestHandle(Arc<Web3Connection>);
 
 impl OpenRequestHandle {
     pub fn new(connection: Arc<Web3Connection>) -> Self {
-        // TODO: attach a unique id to this?
+        // TODO: attach a unique id to this? customer requests have one, but not internal queries
         // TODO: what ordering?!
         connection
             .active_requests
             .fetch_add(1, atomic::Ordering::AcqRel);
 
+        // TODO: handle overflows?
+        // TODO: what ordering?
+        connection
+            .total_requests
+            .fetch_add(1, atomic::Ordering::Relaxed);
+
         Self(connection)
     }