diff --git a/TODO.md b/TODO.md index 29040319..9f8a47ea 100644 --- a/TODO.md +++ b/TODO.md @@ -161,16 +161,19 @@ These are roughly in order of completition - [x] Api keys need option to lock to IP, cors header, referer, user agent, etc - [x] /user/logout to clear bearer token and jwt - [x] bearer tokens should expire -- [-] user login should return the bearer token, the user keys, and a jwt (jsonwebtoken rust crate should make it easy) +- [x] login endpoint needs its own rate limiter + - we don't want an rpc request limit of 0 to block logins + - for security, we want these limits low. +- [x] user login should return the bearer token and the user keys +- [x] use siwe messages and signatures for sign up and login +- [ ] active requests per second per api key +- [ ] distribution of methods per api key (eth_call, eth_getLogs, etc.) - [-] let users choose a % to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly - this must be opt-in or spawned since it will slow things down and will make their calls less private - [ ] we currently default to 0.0 and don't expose a way to edit it. we have a database row, but we don't use it - [-] add configurable size limits to all the Caches -- [ ] active requests per second per api key -- [ ] distribution of methods per api key (eth_call, eth_getLogs, etc.) - [ ] endpoint for creating/modifying api keys and their advanced security features - [ ] BUG: i think if all backend servers stop, the server doesn't properly reconnect. It appears to stop listening on 8854, but not shut down. -- [ ] revert logs should have a maximum age and a maximum count to keep the database from being huge - [ ] Ulid instead of Uuid for user keys - - since users are actively using our service, we will need to support both @@ -183,7 +186,9 @@ These are roughly in order of completition These are not yet ordered. -- [-] use siwe messages and signatures for sign up and login +- [ ] instead of Option<...> in our frontend function signatures, use result and then the try operator so that we get our errors wrapped in json +- [ ] revert logs should have a maximum age and a maximum count to keep the database from being huge +- [ ] user login should also return a jwt (jsonwebtoken rust crate should make it easy) - [-] if we request an old block, more servers can handle it than we currently use. - [ ] instead of the one list of just heads, store our intermediate mappings (rpcs_by_hash, rpcs_by_num, blocks_by_hash) in SyncedConnections. this shouldn't be too much slower than what we have now - [ ] remove the if/else where we optionally route to archive and refactor to require a BlockNumber enum diff --git a/deferred-rate-limiter/src/lib.rs b/deferred-rate-limiter/src/lib.rs index 7d001446..343adec4 100644 --- a/deferred-rate-limiter/src/lib.rs +++ b/deferred-rate-limiter/src/lib.rs @@ -19,6 +19,8 @@ where local_cache: Cache, hashbrown::hash_map::DefaultHashBuilder>, prefix: String, rrl: RedisRateLimiter, + /// if None, defers to the max on rrl + default_max_requests_per_period: Option, } pub enum DeferredRateLimitResult { @@ -31,7 +33,12 @@ impl DeferredRateLimiter where K: Copy + Debug + Display + Hash + Eq + Send + Sync + 'static, { - pub fn new(cache_size: u64, prefix: &str, rrl: RedisRateLimiter) -> Self { + pub fn new( + cache_size: u64, + prefix: &str, + rrl: RedisRateLimiter, + default_max_requests_per_second: Option, + ) -> Self { let ttl = rrl.period as u64; // TODO: time to live is not exactly right. we want this ttl counter to start only after redis is down. this works for now @@ -45,6 +52,7 @@ where local_cache, prefix: prefix.to_string(), rrl, + default_max_requests_per_period: default_max_requests_per_second, } } @@ -53,12 +61,15 @@ where pub async fn throttle( &self, key: K, - max_per_period: Option, + max_requests_per_period: Option, count: u64, ) -> anyhow::Result { - let max_per_period = max_per_period.unwrap_or(self.rrl.max_requests_per_period); + let max_requests_per_period = max_requests_per_period.unwrap_or_else(|| { + self.default_max_requests_per_period + .unwrap_or(self.rrl.max_requests_per_period) + }); - if max_per_period == 0 { + if max_requests_per_period == 0 { return Ok(DeferredRateLimitResult::RetryNever); } @@ -79,7 +90,7 @@ where .get_with(key, async move { // we do not use the try operator here because we want to be okay with redis errors let redis_count = match rrl - .throttle_label(&redis_key, Some(max_per_period), count, true) + .throttle_label(&redis_key, Some(max_requests_per_period), count) .await { Ok(RedisRateLimitResult::Allowed(count)) => { @@ -131,7 +142,7 @@ where // assuming no other parallel futures incremented this key, this is the count that redis has let expected_key_count = cached_key_count + count; - if expected_key_count > max_per_period { + if expected_key_count > max_requests_per_period { // rate limit overshot! let now = self.rrl.now_as_secs(); @@ -149,7 +160,7 @@ where let rrl = self.rrl.clone(); async move { match rrl - .throttle_label(&redis_key, Some(max_per_period), count, false) + .throttle_label(&redis_key, Some(max_requests_per_period), count) .await { Ok(RedisRateLimitResult::Allowed(count)) => { @@ -180,7 +191,7 @@ where // if close to max_per_period, wait for redis // TODO: how close should we allow? depends on max expected concurent requests from one user - if expected_key_count > max_per_period * 99 / 100 { + if expected_key_count > max_requests_per_period * 99 / 100 { // close to period. don't risk it. wait on redis Ok(rate_limit_f.await) } else { diff --git a/redis-rate-limiter/src/lib.rs b/redis-rate-limiter/src/lib.rs index 161f6ed3..4974084f 100644 --- a/redis-rate-limiter/src/lib.rs +++ b/redis-rate-limiter/src/lib.rs @@ -1,6 +1,5 @@ //#![warn(missing_docs)] use anyhow::Context; -use deadpool_redis::redis::AsyncCommands; use std::ops::Add; use std::time::{SystemTime, UNIX_EPOCH}; use tokio::time::{Duration, Instant}; @@ -72,7 +71,6 @@ impl RedisRateLimiter { label: &str, max_per_period: Option, count: u64, - expire: bool, ) -> anyhow::Result { let max_per_period = max_per_period.unwrap_or(self.max_requests_per_period); @@ -96,30 +94,24 @@ impl RedisRateLimiter { // TODO: at high concurency, this gives "connection reset by peer" errors. at least they are off the hot path // TODO: only set expire if this is a new key - let new_count: u64 = if expire { - trace!("redis incr+expire"); - // TODO: automatic retry - let x: Vec<_> = redis::pipe() - .atomic() - // we could get the key first, but that means an extra redis call for every check. this seems better - .incr(&throttle_key, count) - // set expiration each time we set the key. ignore the result - .expire(&throttle_key, 1 + self.period as usize) - // TODO: NX will make it only set the expiration the first time. works in redis, but not elasticache - // .arg("NX") - .ignore() - // do the query - .query_async(&mut *conn) - .await - .context("increment rate limit and set expiration")?; - *x.first().expect("check redis") - } else { - trace!("redis incr only"); - conn.incr(&throttle_key, count) - .await - .context("increment rate limit")? - }; + trace!("redis incr+expire"); + // TODO: automatic retry + let x: Vec<_> = redis::pipe() + .atomic() + // we could get the key first, but that means an extra redis call for every check. this seems better + .incr(&throttle_key, count) + // set expiration each time we set the key. ignore the result + .expire(&throttle_key, 1 + self.period as usize) + // TODO: NX will make it only set the expiration the first time. works in redis, but not elasticache + // .arg("NX") + .ignore() + // do the query + .query_async(&mut *conn) + .await + .context("increment rate limit and set expiration")?; + + let new_count: u64 = *x.first().expect("check redis"); if new_count > max_per_period { // TODO: this might actually be early if we are way over the count @@ -135,7 +127,7 @@ impl RedisRateLimiter { } #[inline] - pub async fn throttle(&self, expire: bool) -> anyhow::Result { - self.throttle_label("", None, 1, expire).await + pub async fn throttle(&self) -> anyhow::Result { + self.throttle_label("", None, 1).await } } diff --git a/web3_proxy/src/app.rs b/web3_proxy/src/app.rs index 51b6866b..27b85039 100644 --- a/web3_proxy/src/app.rs +++ b/web3_proxy/src/app.rs @@ -104,6 +104,7 @@ pub struct Web3ProxyApp { pub pending_transactions: Cache, pub frontend_ip_rate_limiter: Option>, pub frontend_key_rate_limiter: Option>, + pub login_rate_limiter: Option, pub redis_pool: Option, pub user_cache: Cache, } @@ -336,22 +337,37 @@ impl Web3ProxyApp { let mut frontend_ip_rate_limiter = None; let mut frontend_key_rate_limiter = None; + let mut login_rate_limiter = None; + if let Some(redis_pool) = redis_pool.as_ref() { - let rrl = RedisRateLimiter::new( + let rpc_rrl = RedisRateLimiter::new( "web3_proxy", "frontend", - top_config.app.public_rate_limit_per_minute, + top_config.app.frontend_rate_limit_per_minute, 60.0, redis_pool.clone(), ); + // these two rate limiters can share the base limiter // TODO: take cache_size from config frontend_ip_rate_limiter = Some(DeferredRateLimiter::::new( 10_000, "ip", - rrl.clone(), + rpc_rrl.clone(), + None, + )); + frontend_key_rate_limiter = Some(DeferredRateLimiter::::new( + 10_000, "key", rpc_rrl, None, + )); + + // don't defer this one because it will have a low request per peiod + login_rate_limiter = Some(RedisRateLimiter::new( + "web3_proxy", + "login", + top_config.app.login_rate_limit_per_minute, + 60.0, + redis_pool.clone(), )); - frontend_key_rate_limiter = Some(DeferredRateLimiter::::new(10_000, "key", rrl)); } // keep 1GB of blocks in the cache @@ -381,6 +397,7 @@ impl Web3ProxyApp { pending_transactions, frontend_ip_rate_limiter, frontend_key_rate_limiter, + login_rate_limiter, db_conn, redis_pool, app_metrics, diff --git a/web3_proxy/src/bin/web3_proxy.rs b/web3_proxy/src/bin/web3_proxy.rs index 21d00af8..64dcad14 100644 --- a/web3_proxy/src/bin/web3_proxy.rs +++ b/web3_proxy/src/bin/web3_proxy.rs @@ -214,7 +214,7 @@ mod tests { default_requests_per_minute: Some(6_000_000), min_sum_soft_limit: 1, min_synced_rpcs: 1, - public_rate_limit_per_minute: 1_000_000, + frontend_rate_limit_per_minute: 1_000_000, response_cache_max_bytes: 10_usize.pow(7), redirect_public_url: "example.com/".to_string(), redirect_user_url: "example.com/{{user_id}}".to_string(), diff --git a/web3_proxy/src/config.rs b/web3_proxy/src/config.rs index a63f29d9..7b261373 100644 --- a/web3_proxy/src/config.rs +++ b/web3_proxy/src/config.rs @@ -67,8 +67,10 @@ pub struct AppConfig { #[serde(default = "default_min_synced_rpcs")] pub min_synced_rpcs: usize, /// Set to 0 to block all anonymous requests - #[serde(default = "default_public_rate_limit_per_minute")] - pub public_rate_limit_per_minute: u64, + #[serde(default = "default_frontend_rate_limit_per_minute")] + pub frontend_rate_limit_per_minute: u64, + #[serde(default = "default_login_rate_limit_per_minute")] + pub login_rate_limit_per_minute: u64, pub redis_url: Option, /// maximum size of the connection pool for the cache /// If none, the minimum * 2 is used @@ -89,11 +91,16 @@ fn default_min_synced_rpcs() -> usize { 1 } -/// 0 blocks public requests by default. -fn default_public_rate_limit_per_minute() -> u64 { +/// 0 blocks anonymous requests by default. +fn default_frontend_rate_limit_per_minute() -> u64 { 0 } +/// Having a low amount of requests per minute for login is safest. +fn default_login_rate_limit_per_minute() -> u64 { + 10 +} + fn default_response_cache_max_bytes() -> usize { // TODO: default to some percentage of the system? // 100 megabytes diff --git a/web3_proxy/src/frontend/authorization.rs b/web3_proxy/src/frontend/authorization.rs index 2d498358..4d446ac6 100644 --- a/web3_proxy/src/frontend/authorization.rs +++ b/web3_proxy/src/frontend/authorization.rs @@ -5,6 +5,7 @@ use axum::headers::{Origin, Referer, UserAgent}; use deferred_rate_limiter::DeferredRateLimitResult; use entities::user_keys; use ipnet::IpNet; +use redis_rate_limiter::RedisRateLimitResult; use sea_orm::{prelude::Decimal, ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter}; use serde::Serialize; use std::{net::IpAddr, sync::Arc}; @@ -122,6 +123,26 @@ impl AuthorizedRequest { } } +pub async fn login_is_authorized( + app: &Web3ProxyApp, + ip: IpAddr, +) -> Result { + // TODO: i think we could write an `impl From` for this + // TODO: move this to an AuthorizedUser extrator + let ip = match app.rate_limit_login(ip).await? { + RateLimitResult::AllowedIp(x) => x, + RateLimitResult::RateLimitedIp(x, retry_at) => { + return Err(FrontendErrorResponse::RateLimitedIp(x, retry_at)); + } + // TODO: don't panic. give the user an error + x => unimplemented!("rate_limit_login shouldn't ever see these: {:?}", x), + }; + + let db = None; + + Ok(AuthorizedRequest::Ip(db, ip)) +} + pub async fn ip_is_authorized( app: &Web3ProxyApp, ip: IpAddr, @@ -169,6 +190,38 @@ pub async fn key_is_authorized( } impl Web3ProxyApp { + pub async fn rate_limit_login(&self, ip: IpAddr) -> anyhow::Result { + // TODO: dry this up with rate_limit_by_key + // TODO: have a local cache because if we hit redis too hard we get errors + // TODO: query redis in the background so that users don't have to wait on this network request + if let Some(rate_limiter) = &self.login_rate_limiter { + match rate_limiter.throttle_label(&ip.to_string(), None, 1).await { + Ok(RedisRateLimitResult::Allowed(_)) => Ok(RateLimitResult::AllowedIp(ip)), + Ok(RedisRateLimitResult::RetryAt(retry_at, _)) => { + // TODO: set headers so they know when they can retry + // TODO: debug or trace? + // this is too verbose, but a stat might be good + trace!(?ip, "login rate limit exceeded until {:?}", retry_at); + Ok(RateLimitResult::RateLimitedIp(ip, Some(retry_at))) + } + Ok(RedisRateLimitResult::RetryNever) => { + // TODO: i don't think we'll get here. maybe if we ban an IP forever? seems unlikely + trace!(?ip, "login rate limit is 0"); + Ok(RateLimitResult::RateLimitedIp(ip, None)) + } + Err(err) => { + // internal error, not rate limit being hit + // TODO: i really want axum to do this for us in a single place. + error!(?err, "login rate limiter is unhappy. allowing ip"); + Ok(RateLimitResult::AllowedIp(ip)) + } + } + } else { + // TODO: if no redis, rate limit with a local cache? "warn!" probably isn't right + todo!("no rate limiter"); + } + } + pub async fn rate_limit_by_ip(&self, ip: IpAddr) -> anyhow::Result { // TODO: dry this up with rate_limit_by_key // TODO: have a local cache because if we hit redis too hard we get errors diff --git a/web3_proxy/src/frontend/users.rs b/web3_proxy/src/frontend/users.rs index 287148b6..dad394b5 100644 --- a/web3_proxy/src/frontend/users.rs +++ b/web3_proxy/src/frontend/users.rs @@ -7,7 +7,7 @@ // I wonder how we handle payment // probably have to do manual withdrawals -use super::authorization::ip_is_authorized; +use super::authorization::login_is_authorized; use super::errors::FrontendResult; use crate::{app::Web3ProxyApp, users::new_api_key}; use anyhow::Context; @@ -42,7 +42,7 @@ pub async fn get_login( // TODO: allow ENS names here? Path(mut params): Path>, ) -> FrontendResult { - let _ip = ip_is_authorized(&app, ip).await?; + let _ = login_is_authorized(&app, ip).await?; // at first i thought about checking that user_address is in our db // but theres no need to separate the registration and login flows @@ -119,6 +119,7 @@ pub struct PostLoginQuery { } /// JSON body to our `post_login` handler. +/// TODO: this should be an enum with the different login methods having different structs #[derive(Deserialize)] pub struct PostLogin { address: Address, @@ -145,7 +146,7 @@ pub async fn post_login( Json(payload): Json, Query(query): Query, ) -> FrontendResult { - let _ip = ip_is_authorized(&app, ip).await?; + let _ = login_is_authorized(&app, ip).await?; if let Some(invite_code) = &app.config.invite_code { // we don't do per-user referral codes because we shouldn't collect what we don't need. @@ -291,7 +292,7 @@ pub async fn post_user( Extension(app): Extension>, Json(payload): Json, ) -> FrontendResult { - let _ip = ip_is_authorized(&app, ip).await?; + let _ = login_is_authorized(&app, ip).await?; let user = ProtectedAction::PostUser .verify(app.as_ref(), bearer_token, &payload.primary_address) diff --git a/web3_proxy/src/rpcs/connection.rs b/web3_proxy/src/rpcs/connection.rs index 8912f98a..7cdb1fa5 100644 --- a/web3_proxy/src/rpcs/connection.rs +++ b/web3_proxy/src/rpcs/connection.rs @@ -790,7 +790,7 @@ impl Web3Connection { // check rate limits if let Some(ratelimiter) = self.hard_limit.as_ref() { // TODO: how should we know if we should set expire or not? - match ratelimiter.throttle(true).await? { + match ratelimiter.throttle().await? { RedisRateLimitResult::Allowed(_) => { trace!("rate limit succeeded") }