diff --git a/TODO.md b/TODO.md index 788a8096..2ab8eba2 100644 --- a/TODO.md +++ b/TODO.md @@ -188,19 +188,22 @@ These are roughly in order of completition - [-] ability to domain lock or ip lock said key - the code to check the database and use these entries already exists, but users don't have a way to set them - [-] new endpoints for users (not totally sure about the exact paths, but these features are all needed): - - [ ] ability to generate a key from a web endpoint - [x] sign in - [x] sign out - [-] GET profile endpoint - [-] POST profile endpoint - - [-] GET stats endpoint - - [-] display requests per second per api key (only with authentication!) - - [-] display concurrent requests per api key (only with authentication!) - - [ ] display distribution of methods per api key (eth_call, eth_getLogs, etc.) (only with authentication!) + - [x] GET stats endpoint + - [x] display requests per second per api key (only with authentication!) + - [x] display concurrent requests per api key (only with authentication!) + - [x] display distribution of methods per api key (eth_call, eth_getLogs, etc.) (only with authentication!) - [x] get aggregate stats endpoint - [ ] POST key endpoint - - allow setting things such as private relay, revert logging, ip/origin/etc checks - - [ ] GET logged reverts on an endpoint that requires authentication. + - [ ] generate a new key from a web endpoint + - [ ] modifying key settings such as private relay, revert logging, ip/origin/etc checks + - [ ] GET logged reverts on an endpoint that **requires authentication**. +- [x] [paginate responses](https://www.sea-ql.org/SeaORM/docs/basic-crud/select/#paginate-result) +- [ ] per-user stats should probably be locked behind authentication. the code is written but disabled for easy development + - if we do this, we should also have an admin-only endpoint for seeing these for support requests - [ ] endpoint for creating/modifying api keys and their advanced security features - [ ] graceful shutdown. stop taking new requests and don't stop until all outstanding queries are handled - https://github.com/tokio-rs/mini-redis/blob/master/src/shutdown.rs @@ -208,7 +211,6 @@ These are roughly in order of completition - need an flume::watch on unflushed stats that we can subscribe to. wait for it to flip to true - [ ] include if archive query or not in the stats - this is already partially done, but we need to double check it works. preferrably with tests -- [ ] [paginate responses](https://www.sea-ql.org/SeaORM/docs/basic-crud/select/#paginate-result) - [ ] WARN http_request:request: web3_proxy::block_number: could not get block from params err=unexpected params length id=01GF4HTRKM4JV6NX52XSF9AYMW method=POST authorized_request=User(Some(SqlxMySqlPoolConnection), AuthorizedKey { ip: 10.11.12.15, origin: None, user_key_id: 4, log_revert_chance: 0.0000 }) - ERROR http_request:request:try_send_all_upstream_servers: web3_proxy::rpcs::request: bad response! err=JsonRpcClientError(JsonRpcError(JsonRpcError { code: -32000, message: "INTERNAL_ERROR: existing tx with same hash", data: None })) method=eth_sendRawTransaction rpc=local_erigon_alpha_archive id=01GF4HV03Y4ZNKQV8DW5NDQ5CG method=POST authorized_request=User(Some(SqlxMySqlPoolConnection), AuthorizedKey { ip: 10.11.12.15, origin: None, user_key_id: 4, log_revert_chance: 0.0000 }) self=Web3Connections { conns: {"local_erigon_alpha_archive_ws": Web3Connection { name: "local_erigon_alpha_archive_ws", blocks: "all", .. }, "local_geth_ws": Web3Connection { name: "local_geth_ws", blocks: 64, .. }, "local_erigon_alpha_archive": Web3Connection { name: "local_erigon_alpha_archive", blocks: "all", .. }}, .. } authorized_request=Some(User(Some(SqlxMySqlPoolConnection), AuthorizedKey { ip: 10.11.12.15, origin: None, user_key_id: 4, log_revert_chance: 0.0000 })) request=JsonRpcRequest { id: RawValue(39), method: "eth_sendRawTransaction", .. } request_metadata=Some(RequestMetadata { datetime: 2022-10-11T22:14:57.406829095Z, period_seconds: 60, request_bytes: 633, backend_requests: 0, no_servers: 0, error_response: false, response_bytes: 0, response_millis: 0 }) block_needed=None - why is it failing to get the block from params when its set to None? That should be the simple case diff --git a/web3_proxy/src/frontend/users.rs b/web3_proxy/src/frontend/users.rs index 0265df0d..7ca560a1 100644 --- a/web3_proxy/src/frontend/users.rs +++ b/web3_proxy/src/frontend/users.rs @@ -25,7 +25,7 @@ use siwe::{Message, VerificationOpts}; use std::ops::Add; use std::sync::Arc; use time::{Duration, OffsetDateTime}; -use tracing::{debug, info, warn}; +use tracing::{info, warn}; use ulid::Ulid; /// `GET /user/login/:user_address` or `GET /user/login/:user_address/:message_eip` -- Start the "Sign In with Ethereum" (siwe) login flow. @@ -418,11 +418,12 @@ pub async fn user_profile_get( /// `GET /user/stats/detailed` -- Use a bearer token to get the user's key stats such as bandwidth used and methods requested. /// -/// If no bearer is provided, detailed stats for all users will be shown +/// If no bearer is provided, detailed stats for all users will be shown. +/// View a single user with `?user_id=$x`. +/// View a single chain with `?chain_id=$x`. /// -/// - show number of requests used (so we can calculate average spending over a month, burn rate for a user etc, something like "Your balance will be depleted in xx days) +/// Set `$x` to zero to see all. /// -/// TODO: one key per request? maybe /user/stats/:api_key? /// TODO: this will change as we add better support for secondary users. #[debug_handler] pub async fn user_stats_detailed_get( @@ -466,6 +467,23 @@ pub async fn user_stats_detailed_get( } }; + // only allow user_key to be set if user_id is also set + // this will keep people from reading someone else's keys + let user_key = if user_id > 0 { + params + .get("user_key") + .map_or_else::, _, _>( + || Ok(0), + |c| { + let c = c.parse()?; + + Ok(c) + }, + )? + } else { + 0 + }; + // TODO: DRY let chain_id = params .get("chain_id") @@ -500,7 +518,35 @@ pub async fn user_stats_detailed_get( }, )?; - let x = get_detailed_stats(chain_id, &db, query_start, user_id).await?; + let page = params + .get("page") + .map_or_else::, _, _>( + || { + // no page in params. set default + Ok(0) + }, + |x: &String| { + // parse the given timestamp + // TODO: error code 401 + let x = x.parse::().context("parsing page query param")?; + + Ok(x) + }, + )?; + + // TODO: page size from config + let page_size = 200; + + let x = get_detailed_stats( + chain_id, + &db, + page, + page_size, + query_start, + user_key, + user_id, + ) + .await?; Ok(Json(x).into_response()) } @@ -513,7 +559,7 @@ pub async fn user_stats_aggregate_get( Query(params): Query>, ) -> FrontendResult { // TODO: how is db_conn supposed to be used? - let db = app.db_conn.clone().context("connecting to db")?; + let db_conn = app.db_conn.clone().context("connecting to db")?; // get the attached address from redis for the given auth_token. let mut redis_conn = app.redis_conn().await.context("connecting to redis")?; @@ -552,8 +598,6 @@ pub async fn user_stats_aggregate_get( |c| { let c = c.parse()?; - info!("user supplied chain_id"); - Ok(c) }, )?; @@ -579,8 +623,56 @@ pub async fn user_stats_aggregate_get( }, )?; + let query_window_seconds = params + .get("query_window_seconds") + .map_or_else::>, _, _>( + || { + // no page in params. set default + Ok(None) + }, + |x: &String| { + // parse the given timestamp + // TODO: error code 401 + let x = x.parse::().context("parsing page query param")?; + + if x == 0 { + Ok(None) + } else { + Ok(Some(x)) + } + }, + )?; + + let page = params + .get("page") + .map_or_else::, _, _>( + || { + // no page in params. set None + Ok(0) + }, + |x: &String| { + // parse the given timestamp + // TODO: error code 401 + let x = x.parse().context("parsing page query param")?; + + Ok(x) + }, + )?; + + // TODO: page size from config + let page_size = 200; + // TODO: optionally no chain id? - let x = get_aggregate_rpc_stats(chain_id, &db, query_start, user_id).await?; + let x = get_aggregate_rpc_stats( + chain_id, + &db_conn, + page, + page_size, + query_start, + query_window_seconds, + user_id, + ) + .await?; Ok(Json(x).into_response()) } diff --git a/web3_proxy/src/user_stats.rs b/web3_proxy/src/user_stats.rs index a64a2589..49585a70 100644 --- a/web3_proxy/src/user_stats.rs +++ b/web3_proxy/src/user_stats.rs @@ -1,20 +1,35 @@ -use entities::{rpc_accounting, user, user_keys}; +use anyhow::Context; +use entities::{rpc_accounting, user_keys}; use hashbrown::HashMap; +use migration::Expr; use num::Zero; use sea_orm::{ - ColumnTrait, Condition, DatabaseConnection, EntityTrait, JoinType, QueryFilter, QuerySelect, - RelationTrait, + ColumnTrait, Condition, DatabaseConnection, EntityTrait, JoinType, PaginatorTrait, QueryFilter, + QueryOrder, QuerySelect, RelationTrait, }; -use tracing::{debug, info, trace}; +use tracing::trace; +/// stats aggregated across a large time period pub async fn get_aggregate_rpc_stats( chain_id: u64, - db: &DatabaseConnection, + db_conn: &DatabaseConnection, + page: usize, + page_size: usize, query_start: chrono::NaiveDateTime, + query_window_seconds: Option, user_id: u64, -) -> anyhow::Result> { +) -> anyhow::Result> { trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats"); + // TODO: minimum query_start of 90 days? + + let mut response = HashMap::new(); + + response.insert("page", serde_json::to_value(page)?); + response.insert("page_size", serde_json::to_value(page_size)?); + response.insert("chain_id", serde_json::to_value(chain_id)?); + response.insert("query_start", serde_json::to_value(query_start)?); + // TODO: how do we get count reverts compared to other errors? does it matter? what about http errors to our users? // TODO: how do we count uptime? let q = rpc_accounting::Entity::find() @@ -44,8 +59,31 @@ pub async fn get_aggregate_rpc_stats( .column_as( rpc_accounting::Column::SumResponseMillis.sum(), "total_response_millis", + ) + .order_by_asc(rpc_accounting::Column::PeriodDatetime.min()); + + let q = if let Some(query_window_seconds) = query_window_seconds { + debug_assert_ne!(query_window_seconds, 0); + + // TODO: is there a better way to do this? how can we get "period_datetime" into this with types? + let expr = Expr::cust_with_values( + "FLOOR(UNIX_TIMESTAMP(rpc_accounting.period_datetime) / ?) * ?", + [query_window_seconds, query_window_seconds], ); + response.insert( + "query_window_seconds", + serde_json::to_value(query_window_seconds)?, + ); + + q.column_as(expr, "query_window") + .group_by(Expr::cust("query_window")) + } else { + // TODO: order by more than this? + // query_window_seconds + q + }; + let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start)); let (condition, q) = if chain_id.is_zero() { @@ -90,9 +128,15 @@ pub async fn get_aggregate_rpc_stats( // log query here. i think sea orm has a useful log level for this - let r = q.into_json().all(db).await?; + let aggregate = q + .into_json() + .paginate(db_conn, page_size) + .fetch_page(page) + .await?; - Ok(r) + response.insert("aggregrate", serde_json::Value::Array(aggregate)); + + Ok(response) } pub async fn get_user_stats(chain_id: u64) -> u64 { @@ -100,17 +144,26 @@ pub async fn get_user_stats(chain_id: u64) -> u64 { } /// stats grouped by key_id and error_repsponse and method and key +/// +/// TODO: take a "timebucket" duration in minutes that will make a more advanced pub async fn get_detailed_stats( chain_id: u64, db_conn: &DatabaseConnection, + page: usize, + page_size: usize, query_start: chrono::NaiveDateTime, + user_key_id: u64, user_id: u64, ) -> anyhow::Result> { // aggregate stats, but grouped by method and error trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats"); + // TODO: minimum query_start of 90 days? + let mut response = HashMap::new(); + response.insert("page", serde_json::to_value(page)?); + response.insert("page_size", serde_json::to_value(page_size)?); response.insert("chain_id", serde_json::to_value(chain_id)?); response.insert("query_start", serde_json::to_value(query_start)?); @@ -149,7 +202,9 @@ pub async fn get_detailed_stats( .column_as( rpc_accounting::Column::SumResponseMillis.sum(), "total_response_millis", - ); + ) + // TODO: order on method next? + .order_by_asc(rpc_accounting::Column::PeriodDatetime.min()); let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start)); @@ -197,7 +252,135 @@ pub async fn get_detailed_stats( // log query here. i think sea orm has a useful log level for this // TODO: transform this into a nested hashmap instead of a giant table? - let r = q.into_json().all(db_conn).await?; + let r = q + .into_json() + .paginate(db_conn, page_size) + .fetch_page(page) + .await?; + + response.insert("detailed_aggregate", serde_json::Value::Array(r)); + + // number of keys + // number of secondary keys + // avg and max concurrent requests per second per api key + + Ok(response) +} + +/// revert logs for a single key +/// +/// TODO: take a "timebucket" duration in minutes that will make a more advanced +pub async fn get_revert_logs( + chain_id: u64, + db_conn: &DatabaseConnection, + page: usize, + page_size: usize, + query_start: chrono::NaiveDateTime, + user_id: u64, + key_id: u64, +) -> anyhow::Result> { + // aggregate stats, but grouped by method and error + trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats"); + + // TODO: minimum query_start of 90 days? + + let mut response = HashMap::new(); + + response.insert("page", serde_json::to_value(page)?); + response.insert("page_size", serde_json::to_value(page_size)?); + response.insert("chain_id", serde_json::to_value(chain_id)?); + response.insert("query_start", serde_json::to_value(query_start)?); + + // TODO: how do we get count reverts compared to other errors? does it matter? what about http errors to our users? + // TODO: how do we count uptime? + let q = rpc_accounting::Entity::find() + .select_only() + // groups + .column(rpc_accounting::Column::ErrorResponse) + .group_by(rpc_accounting::Column::ErrorResponse) + .column(rpc_accounting::Column::Method) + .group_by(rpc_accounting::Column::Method) + // aggregate columns + .column_as( + rpc_accounting::Column::FrontendRequests.sum(), + "total_requests", + ) + .column_as( + rpc_accounting::Column::CacheMisses.sum(), + "total_cache_misses", + ) + .column_as(rpc_accounting::Column::CacheHits.sum(), "total_cache_hits") + .column_as( + rpc_accounting::Column::BackendRetries.sum(), + "total_backend_retries", + ) + .column_as( + rpc_accounting::Column::SumResponseBytes.sum(), + "total_response_bytes", + ) + .column_as( + // TODO: can we sum bools like this? + rpc_accounting::Column::ErrorResponse.sum(), + "total_error_responses", + ) + .column_as( + rpc_accounting::Column::SumResponseMillis.sum(), + "total_response_millis", + ) + // TODO: order on method next? + .order_by_asc(rpc_accounting::Column::PeriodDatetime.min()); + + let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start)); + + let (condition, q) = if chain_id.is_zero() { + // fetch all the chains. don't filter + // TODO: wait. do we want chain id on the logs? we can get that by joining key + let q = q + .column(rpc_accounting::Column::ChainId) + .group_by(rpc_accounting::Column::ChainId); + + (condition, q) + } else { + let condition = condition.add(rpc_accounting::Column::ChainId.eq(chain_id)); + + (condition, q) + }; + + let (condition, q) = if user_id.is_zero() { + // 0 means everyone. don't filter on user + (condition, q) + } else { + // TODO: move authentication here? + // TODO: what about keys where this user is a secondary user? + let q = q + .join( + JoinType::InnerJoin, + rpc_accounting::Relation::UserKeys.def(), + ) + .column(user_keys::Column::UserId) + // no need to group_by user_id when we are grouping by key_id + // .group_by(user_keys::Column::UserId) + .column(user_keys::Column::Id) + .group_by(user_keys::Column::Id); + + let condition = condition.add(user_keys::Column::UserId.eq(user_id)); + + (condition, q) + }; + + let q = q.filter(condition); + + // TODO: enum between searching on user_key_id on user_id + // TODO: handle secondary users, too + + // log query here. i think sea orm has a useful log level for this + + // TODO: transform this into a nested hashmap instead of a giant table? + let r = q + .into_json() + .paginate(db_conn, page_size) + .fetch_page(page) + .await?; response.insert("detailed_aggregate", serde_json::Value::Array(r));