From 9422a335a73a2c07c483406601597319448578ab Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Fri, 21 Oct 2022 20:59:05 +0000 Subject: [PATCH] allow origins on public entrypoints --- TODO.md | 1 + config/example.toml | 11 +++--- deferred-rate-limiter/Cargo.toml | 2 +- entities/Cargo.toml | 2 +- redis-rate-limiter/Cargo.toml | 2 +- web3_proxy/Cargo.toml | 4 +-- web3_proxy/src/app.rs | 3 ++ web3_proxy/src/config.rs | 2 ++ web3_proxy/src/frontend/authorization.rs | 41 +++++++++++++++++------ web3_proxy/src/frontend/rpc_proxy_http.rs | 6 ++-- web3_proxy/src/frontend/rpc_proxy_ws.rs | 26 ++++++++------ 11 files changed, 67 insertions(+), 33 deletions(-) diff --git a/TODO.md b/TODO.md index 34d425b8..ebdeaa5c 100644 --- a/TODO.md +++ b/TODO.md @@ -191,6 +191,7 @@ These are roughly in order of completition - we need this because we need to be sure all the queries are saved in the db. maybe put stuff in Drop - need an flume::watch on unflushed stats that we can subscribe to. wait for it to flip to true - [x] don't use unix timestamps for response_millis since leap seconds will confuse it +- [x] config to allow origins even on the anonymous endpoints - [-] ability to domain lock or ip lock said key - the code to check the database and use these entries already exists, but users don't have a way to set them - [-] new endpoints for users (not totally sure about the exact paths, but these features are all needed): diff --git a/config/example.toml b/config/example.toml index 58be222c..dc3675ab 100644 --- a/config/example.toml +++ b/config/example.toml @@ -5,7 +5,7 @@ chain_id = 1 db_max_connections = 99 db_url = "mysql://root:dev_web3_proxy@dev-db:3306/dev_web3_proxy" -min_sum_soft_limit = 2000 +min_sum_soft_limit = 2_000 min_synced_rpcs = 2 # TODO: how do we find the optimal redis_max_connections? too high actually ends up being slower @@ -19,7 +19,10 @@ redirect_user_url = "https://llamanodes.com/user-rpc-stats/{{user_id}}" public_requests_per_minute = 0 # 1GB of cache -response_cache_max_bytes = 10000000000 +response_cache_max_bytes = 10_000_000_000 + +[app.allowed_origin_requests_per_minute] +"https://chainlist.org" = 10_000 [balanced_rpcs] @@ -94,11 +97,11 @@ response_cache_max_bytes = 10000000000 [private_rpcs.flashbots] disabled = true url = "https://rpc.flashbots.net/fast" - soft_limit = 7074 + soft_limit = 7_074 weight = 0 [private_rpcs.securerpc] disabled = true url = "https://gibson.securerpc.com/v1" - soft_limit = 4560 + soft_limit = 4_560 weight = 0 diff --git a/deferred-rate-limiter/Cargo.toml b/deferred-rate-limiter/Cargo.toml index f8d50351..77f8d6a7 100644 --- a/deferred-rate-limiter/Cargo.toml +++ b/deferred-rate-limiter/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" [dependencies] redis-rate-limiter = { path = "../redis-rate-limiter" } -anyhow = "1.0.65" +anyhow = "1.0.66" hashbrown = "0.12.3" moka = { version = "0.9.4", default-features = false, features = ["future"] } tokio = "1.21.2" diff --git a/entities/Cargo.toml b/entities/Cargo.toml index 95a27cc3..7ad65872 100644 --- a/entities/Cargo.toml +++ b/entities/Cargo.toml @@ -11,5 +11,5 @@ path = "src/mod.rs" [dependencies] sea-orm = "0.9.3" -serde = "1.0.145" +serde = "1.0.147" uuid = "1.2.1" diff --git a/redis-rate-limiter/Cargo.toml b/redis-rate-limiter/Cargo.toml index c62151b6..01bf401a 100644 --- a/redis-rate-limiter/Cargo.toml +++ b/redis-rate-limiter/Cargo.toml @@ -5,7 +5,7 @@ authors = ["Bryan Stitt "] edition = "2021" [dependencies] -anyhow = "1.0.65" +anyhow = "1.0.66" deadpool-redis = { version = "0.10.2", features = ["rt_tokio_1", "serde"] } tracing = "0.1.37" tokio = "1.21.2" diff --git a/web3_proxy/Cargo.toml b/web3_proxy/Cargo.toml index bdeb6c03..31d94fcb 100644 --- a/web3_proxy/Cargo.toml +++ b/web3_proxy/Cargo.toml @@ -19,7 +19,7 @@ entities = { path = "../entities" } migration = { path = "../migration" } redis-rate-limiter = { path = "../redis-rate-limiter" } -anyhow = { version = "1.0.65", features = ["backtrace"] } +anyhow = { version = "1.0.66", features = ["backtrace"] } arc-swap = "1.5.1" argh = "0.1.9" axum = { version = "0.5.17", features = ["headers", "serde_json", "tokio-tungstenite", "ws"] } @@ -55,7 +55,7 @@ handlebars = "4.3.5" rustc-hash = "1.1.0" siwe = "0.5.0" sea-orm = { version = "0.9.3", features = ["macros"] } -serde = { version = "1.0.145", features = [] } +serde = { version = "1.0.147", features = [] } serde_json = { version = "1.0.87", default-features = false, features = ["alloc", "raw_value"] } serde_prometheus = "0.1.6" # TODO: make sure this time version matches siwe. PR to put this in their prelude diff --git a/web3_proxy/src/app.rs b/web3_proxy/src/app.rs index 27523963..37c86ae2 100644 --- a/web3_proxy/src/app.rs +++ b/web3_proxy/src/app.rs @@ -65,6 +65,9 @@ pub type AnyhowJoinHandle = JoinHandle>; #[derive(Clone, Debug, Default, From)] /// TODO: rename this? pub struct UserKeyData { + /// database id of the primary user + pub user_id: u64, + /// database id of the api key pub user_key_id: u64, /// if None, allow unlimited queries pub max_requests_per_period: Option, diff --git a/web3_proxy/src/config.rs b/web3_proxy/src/config.rs index f9caafd5..43d76bfd 100644 --- a/web3_proxy/src/config.rs +++ b/web3_proxy/src/config.rs @@ -80,6 +80,8 @@ pub struct AppConfig { /// None = allow all requests #[serde(default = "default_public_requests_per_minute")] pub public_requests_per_minute: Option, + /// Request limit for allowed origins for anonymous users. + pub allowed_origin_requests_per_minute: HashMap, /// Rate limit for the login entrypoint. /// This is separate from the rpc limits. #[serde(default = "default_login_rate_limit_per_minute")] diff --git a/web3_proxy/src/frontend/authorization.rs b/web3_proxy/src/frontend/authorization.rs index 707754ae..d175301c 100644 --- a/web3_proxy/src/frontend/authorization.rs +++ b/web3_proxy/src/frontend/authorization.rs @@ -5,6 +5,7 @@ use crate::app::{UserKeyData, Web3ProxyApp}; use crate::jsonrpc::JsonRpcRequest; use anyhow::Context; use axum::headers::{authorization::Bearer, Origin, Referer, UserAgent}; +use axum::TypedHeader; use chrono::Utc; use deferred_rate_limiter::DeferredRateLimitResult; use entities::{user, user_keys}; @@ -49,6 +50,7 @@ pub enum RateLimitResult { pub struct AuthorizedKey { pub ip: IpAddr, pub origin: Option, + pub user_id: u64, pub user_key_id: u64, // TODO: just use an f32? even an f16 is probably fine pub log_revert_chance: Decimal, @@ -69,14 +71,14 @@ pub struct RequestMetadata { pub response_millis: AtomicU64, } -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug)] pub enum AuthorizedRequest { /// Request from this app Internal, /// Request from an anonymous IP address - Ip(#[serde(skip)] IpAddr), + Ip(IpAddr, Option), /// Request from an authenticated and authorized user - User(#[serde(skip)] Option, AuthorizedKey), + User(Option, AuthorizedKey), } impl RequestMetadata { @@ -230,6 +232,7 @@ impl AuthorizedKey { Ok(Self { ip, origin, + user_id: user_key_data.user_id, user_key_id: user_key_data.user_key_id, log_revert_chance: user_key_data.log_revert_chance, }) @@ -240,9 +243,8 @@ impl AuthorizedRequest { /// Only User has a database connection in case it needs to save a revert to the database. pub fn db_conn(&self) -> Option<&DatabaseConnection> { match self { - Self::Internal => None, - Self::Ip(_) => None, Self::User(x, _) => x.as_ref(), + _ => None, } } } @@ -251,7 +253,7 @@ impl Display for &AuthorizedRequest { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { AuthorizedRequest::Internal => f.write_str("int"), - AuthorizedRequest::Ip(x) => f.write_str(&format!("ip-{}", x)), + AuthorizedRequest::Ip(x, _) => f.write_str(&format!("ip-{}", x)), AuthorizedRequest::User(_, x) => f.write_str(&format!("uk-{}", x.user_key_id)), } } @@ -272,7 +274,7 @@ pub async fn login_is_authorized( x => unimplemented!("rate_limit_login shouldn't ever see these: {:?}", x), }; - Ok((AuthorizedRequest::Ip(ip), semaphore)) + Ok((AuthorizedRequest::Ip(ip, None), semaphore)) } // TODO: where should we use this? @@ -306,10 +308,13 @@ pub async fn bearer_is_authorized( pub async fn ip_is_authorized( app: &Web3ProxyApp, ip: IpAddr, + origin: Option>, ) -> Result<(AuthorizedRequest, Option), FrontendErrorResponse> { + let origin = origin.map(|x| x.0); + // TODO: i think we could write an `impl From` for this // TODO: move this to an AuthorizedUser extrator - let (ip, semaphore) = match app.rate_limit_by_ip(ip).await? { + let (ip, semaphore) = match app.rate_limit_by_ip(ip, origin.as_ref()).await? { RateLimitResult::AllowedIp(ip, semaphore) => (ip, Some(semaphore)), RateLimitResult::RateLimitedIp(x, retry_at) => { return Err(FrontendErrorResponse::RateLimitedIp(x, retry_at)); @@ -319,7 +324,7 @@ pub async fn ip_is_authorized( }; // semaphore won't ever be None, but its easier if key auth and ip auth work the same way - Ok((AuthorizedRequest::Ip(ip), semaphore)) + Ok((AuthorizedRequest::Ip(ip, origin), semaphore)) } pub async fn key_is_authorized( @@ -432,12 +437,25 @@ impl Web3ProxyApp { } } - pub async fn rate_limit_by_ip(&self, ip: IpAddr) -> anyhow::Result { + pub async fn rate_limit_by_ip( + &self, + ip: IpAddr, + origin: Option<&Origin>, + ) -> anyhow::Result { // TODO: dry this up with rate_limit_by_key let semaphore = self.ip_semaphore(ip).await?; if let Some(rate_limiter) = &self.frontend_ip_rate_limiter { - match rate_limiter.throttle(ip, None, 1).await { + let max_requests_per_period = origin + .map(|origin| { + self.config + .allowed_origin_requests_per_minute + .get(&origin.to_string()) + .cloned() + }) + .unwrap_or_default(); + + match rate_limiter.throttle(ip, max_requests_per_period, 1).await { Ok(DeferredRateLimitResult::Allowed) => { Ok(RateLimitResult::AllowedIp(ip, semaphore)) } @@ -533,6 +551,7 @@ impl Web3ProxyApp { }); Ok(UserKeyData { + user_id: user_key_model.user_id, user_key_id: user_key_model.id, max_requests_per_period: user_key_model.requests_per_minute, max_concurrent_requests: user_key_model.max_concurrent_requests, diff --git a/web3_proxy/src/frontend/rpc_proxy_http.rs b/web3_proxy/src/frontend/rpc_proxy_http.rs index 427137bc..61604f94 100644 --- a/web3_proxy/src/frontend/rpc_proxy_http.rs +++ b/web3_proxy/src/frontend/rpc_proxy_http.rs @@ -19,12 +19,14 @@ use tracing::{error_span, Instrument}; pub async fn proxy_web3_rpc( Extension(app): Extension>, ClientIp(ip): ClientIp, + origin: Option>, Json(payload): Json, ) -> FrontendResult { let request_span = error_span!("request", %ip); - let (authorized_request, _semaphore) = - ip_is_authorized(&app, ip).instrument(request_span).await?; + let (authorized_request, _semaphore) = ip_is_authorized(&app, ip, origin) + .instrument(request_span) + .await?; let request_span = error_span!("request", ?authorized_request); diff --git a/web3_proxy/src/frontend/rpc_proxy_ws.rs b/web3_proxy/src/frontend/rpc_proxy_ws.rs index 2977f46c..1b879aa4 100644 --- a/web3_proxy/src/frontend/rpc_proxy_ws.rs +++ b/web3_proxy/src/frontend/rpc_proxy_ws.rs @@ -36,13 +36,15 @@ use crate::{ pub async fn websocket_handler( Extension(app): Extension>, ClientIp(ip): ClientIp, + origin: Option>, ws_upgrade: Option, ) -> FrontendResult { // TODO: i don't like logging ips. move this to trace level? - let request_span = error_span!("request", %ip); + let request_span = error_span!("request", %ip, ?origin); - let (authorized_request, _semaphore) = - ip_is_authorized(&app, ip).instrument(request_span).await?; + let (authorized_request, _semaphore) = ip_is_authorized(&app, ip, origin) + .instrument(request_span) + .await?; let request_span = error_span!("request", ?authorized_request); @@ -113,15 +115,17 @@ pub async fn websocket_handler_with_key( // TODO: show the user's address, not their id (remember to update the checks for {{user_id}}} in app.rs) // TODO: query to get the user's address. expose that instead of user_id - let user_url = reg - .render_template( - redirect, - &json!({ "authorized_request": authorized_request }), - ) - .expect("templating should always work"); + if let AuthorizedRequest::User(_, authorized_key) = authorized_request.as_ref() { + let user_url = reg + .render_template(redirect, &json!({ "user_id": authorized_key.user_id })) + .expect("templating should always work"); - // this is not a websocket. redirect to a page for this user - Ok(Redirect::to(&user_url).into_response()) + // this is not a websocket. redirect to a page for this user + Ok(Redirect::to(&user_url).into_response()) + } else { + // TODO: i think this is impossible + Err(anyhow::anyhow!("this page is for rpcs").into()) + } } else { // TODO: do not use an anyhow error. send the user a 400 Err(anyhow::anyhow!("redirect_user_url not set. only websockets work here").into())