query_window_seconds

2022-10-20 04:44:33 +00:00 · 2022-10-20 04:44:33 +00:00 · 33d15509cd
commit 33d15509cd
parent 7b5d50a577
3 changed files with 304 additions and 27 deletions
--- a/TODO.md
+++ b/TODO.md
@ -188,19 +188,22 @@ These are roughly in order of completition
 - [-] ability to domain lock or ip lock said key
  - the code to check the database and use these entries already exists, but users don't have a way to set them
 - [-] new endpoints for users (not totally sure about the exact paths, but these features are all needed):
-  - [ ] ability to generate a key from a web endpoint
  - [x] sign in
  - [x] sign out
  - [-] GET profile endpoint
  - [-] POST profile endpoint
-  - [-] GET stats endpoint
-    - [-] display requests per second per api key (only with authentication!)
-    - [-] display concurrent requests per api key (only with authentication!)
-    - [ ] display distribution of methods per api key (eth_call, eth_getLogs, etc.) (only with authentication!)
+  - [x] GET stats endpoint
+    - [x] display requests per second per api key (only with authentication!)
+    - [x] display concurrent requests per api key (only with authentication!)
+    - [x] display distribution of methods per api key (eth_call, eth_getLogs, etc.) (only with authentication!)
  - [x] get aggregate stats endpoint
  - [ ] POST key endpoint
-    - allow setting things such as private relay, revert logging, ip/origin/etc checks
-  - [ ] GET logged reverts on an endpoint that requires authentication.
+    - [ ] generate a new key from a web endpoint
+    - [ ] modifying key settings such as private relay, revert logging, ip/origin/etc checks
+  - [ ] GET logged reverts on an endpoint that **requires authentication**.
+- [x] [paginate responses](https://www.sea-ql.org/SeaORM/docs/basic-crud/select/#paginate-result)
+- [ ] per-user stats should probably be locked behind authentication. the code is written but disabled for easy development
+  - if we do this, we should also have an admin-only endpoint for seeing these for support requests
 - [ ] endpoint for creating/modifying api keys and their advanced security features
 - [ ] graceful shutdown. stop taking new requests and don't stop until all outstanding queries are handled
  - https://github.com/tokio-rs/mini-redis/blob/master/src/shutdown.rs
@ -208,7 +211,6 @@ These are roughly in order of completition
  - need an flume::watch on unflushed stats that we can subscribe to. wait for it to flip to true
 - [ ] include if archive query or not in the stats
  - this is already partially done, but we need to double check it works. preferrably with tests
- [ ] [paginate responses](https://www.sea-ql.org/SeaORM/docs/basic-crud/select/#paginate-result)
 - [ ] WARN http_request:request: web3_proxy::block_number: could not get block from params err=unexpected params length id=01GF4HTRKM4JV6NX52XSF9AYMW method=POST authorized_request=User(Some(SqlxMySqlPoolConnection), AuthorizedKey { ip: 10.11.12.15, origin: None, user_key_id: 4, log_revert_chance: 0.0000 })
 - ERROR http_request:request:try_send_all_upstream_servers: web3_proxy::rpcs::request: bad response! err=JsonRpcClientError(JsonRpcError(JsonRpcError { code: -32000, message: "INTERNAL_ERROR: existing tx with same hash", data: None })) method=eth_sendRawTransaction rpc=local_erigon_alpha_archive id=01GF4HV03Y4ZNKQV8DW5NDQ5CG method=POST authorized_request=User(Some(SqlxMySqlPoolConnection), AuthorizedKey { ip: 10.11.12.15, origin: None, user_key_id: 4, log_revert_chance: 0.0000 }) self=Web3Connections { conns: {"local_erigon_alpha_archive_ws": Web3Connection { name: "local_erigon_alpha_archive_ws", blocks: "all", .. }, "local_geth_ws": Web3Connection { name: "local_geth_ws", blocks: 64, .. }, "local_erigon_alpha_archive": Web3Connection { name: "local_erigon_alpha_archive", blocks: "all", .. }}, .. } authorized_request=Some(User(Some(SqlxMySqlPoolConnection), AuthorizedKey { ip: 10.11.12.15, origin: None, user_key_id: 4, log_revert_chance: 0.0000 })) request=JsonRpcRequest { id: RawValue(39), method: "eth_sendRawTransaction", .. } request_metadata=Some(RequestMetadata { datetime: 2022-10-11T22:14:57.406829095Z, period_seconds: 60, request_bytes: 633, backend_requests: 0, no_servers: 0, error_response: false, response_bytes: 0, response_millis: 0 }) block_needed=None
  - why is it failing to get the block from params when its set to None? That should be the simple case
--- a/web3_proxy/src/frontend/users.rs
+++ b/web3_proxy/src/frontend/users.rs
@ -25,7 +25,7 @@ use siwe::{Message, VerificationOpts};
 use std::ops::Add;
 use std::sync::Arc;
 use time::{Duration, OffsetDateTime};
-use tracing::{debug, info, warn};
+use tracing::{info, warn};
 use ulid::Ulid;

 /// `GET /user/login/:user_address` or `GET /user/login/:user_address/:message_eip` -- Start the "Sign In with Ethereum" (siwe) login flow.
@ -418,11 +418,12 @@ pub async fn user_profile_get(

 /// `GET /user/stats/detailed` -- Use a bearer token to get the user's key stats such as bandwidth used and methods requested.
 ///
-/// If no bearer is provided, detailed stats for all users will be shown
+/// If no bearer is provided, detailed stats for all users will be shown.
+/// View a single user with `?user_id=$x`.
+/// View a single chain with `?chain_id=$x`.
 ///
-/// - show number of requests used (so we can calculate average spending over a month, burn rate for a user etc, something like "Your balance will be depleted in xx days)
+/// Set `$x` to zero to see all.
 ///
-/// TODO: one key per request? maybe /user/stats/:api_key?
 /// TODO: this will change as we add better support for secondary users.
 #[debug_handler]
 pub async fn user_stats_detailed_get(
@ -466,6 +467,23 @@ pub async fn user_stats_detailed_get(
        }
    };

+    // only allow user_key to be set if user_id is also set
+    // this will keep people from reading someone else's keys
+    let user_key = if user_id > 0 {
+        params
+            .get("user_key")
+            .map_or_else::<anyhow::Result<u64>, _, _>(
+                || Ok(0),
+                |c| {
+                    let c = c.parse()?;
+
+                    Ok(c)
+                },
+            )?
+    } else {
+        0
+    };
+
    // TODO: DRY
    let chain_id = params
        .get("chain_id")
@ -500,7 +518,35 @@ pub async fn user_stats_detailed_get(
            },
        )?;

-    let x = get_detailed_stats(chain_id, &db, query_start, user_id).await?;
+    let page = params
+        .get("page")
+        .map_or_else::<anyhow::Result<usize>, _, _>(
+            || {
+                // no page in params. set default
+                Ok(0)
+            },
+            |x: &String| {
+                // parse the given timestamp
+                // TODO: error code 401
+                let x = x.parse::<usize>().context("parsing page query param")?;
+
+                Ok(x)
+            },
+        )?;
+
+    // TODO: page size from config
+    let page_size = 200;
+
+    let x = get_detailed_stats(
+        chain_id,
+        &db,
+        page,
+        page_size,
+        query_start,
+        user_key,
+        user_id,
+    )
+    .await?;

    Ok(Json(x).into_response())
 }
@ -513,7 +559,7 @@ pub async fn user_stats_aggregate_get(
    Query(params): Query<HashMap<String, String>>,
 ) -> FrontendResult {
    // TODO: how is db_conn supposed to be used?
-    let db = app.db_conn.clone().context("connecting to db")?;
+    let db_conn = app.db_conn.clone().context("connecting to db")?;

    // get the attached address from redis for the given auth_token.
    let mut redis_conn = app.redis_conn().await.context("connecting to redis")?;
@ -552,8 +598,6 @@ pub async fn user_stats_aggregate_get(
            |c| {
                let c = c.parse()?;

-                info!("user supplied chain_id");
-
                Ok(c)
            },
        )?;
@ -579,8 +623,56 @@ pub async fn user_stats_aggregate_get(
            },
        )?;

+    let query_window_seconds = params
+        .get("query_window_seconds")
+        .map_or_else::<anyhow::Result<Option<u64>>, _, _>(
+            || {
+                // no page in params. set default
+                Ok(None)
+            },
+            |x: &String| {
+                // parse the given timestamp
+                // TODO: error code 401
+                let x = x.parse::<u64>().context("parsing page query param")?;
+
+                if x == 0 {
+                    Ok(None)
+                } else {
+                    Ok(Some(x))
+                }
+            },
+        )?;
+
+    let page = params
+        .get("page")
+        .map_or_else::<anyhow::Result<usize>, _, _>(
+            || {
+                // no page in params. set None
+                Ok(0)
+            },
+            |x: &String| {
+                // parse the given timestamp
+                // TODO: error code 401
+                let x = x.parse().context("parsing page query param")?;
+
+                Ok(x)
+            },
+        )?;
+
+    // TODO: page size from config
+    let page_size = 200;
+
    // TODO: optionally no chain id?
-    let x = get_aggregate_rpc_stats(chain_id, &db, query_start, user_id).await?;
+    let x = get_aggregate_rpc_stats(
+        chain_id,
+        &db_conn,
+        page,
+        page_size,
+        query_start,
+        query_window_seconds,
+        user_id,
+    )
+    .await?;

    Ok(Json(x).into_response())
 }
--- a/web3_proxy/src/user_stats.rs
+++ b/web3_proxy/src/user_stats.rs
@ -1,20 +1,35 @@
-use entities::{rpc_accounting, user, user_keys};
+use anyhow::Context;
+use entities::{rpc_accounting, user_keys};
 use hashbrown::HashMap;
+use migration::Expr;
 use num::Zero;
 use sea_orm::{
-    ColumnTrait, Condition, DatabaseConnection, EntityTrait, JoinType, QueryFilter, QuerySelect,
-    RelationTrait,
+    ColumnTrait, Condition, DatabaseConnection, EntityTrait, JoinType, PaginatorTrait, QueryFilter,
+    QueryOrder, QuerySelect, RelationTrait,
 };
-use tracing::{debug, info, trace};
+use tracing::trace;

+/// stats aggregated across a large time period
 pub async fn get_aggregate_rpc_stats(
    chain_id: u64,
-    db: &DatabaseConnection,
+    db_conn: &DatabaseConnection,
+    page: usize,
+    page_size: usize,
    query_start: chrono::NaiveDateTime,
+    query_window_seconds: Option<u64>,
    user_id: u64,
-) -> anyhow::Result<Vec<serde_json::Value>> {
+) -> anyhow::Result<HashMap<&str, serde_json::Value>> {
    trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats");

+    // TODO: minimum query_start of 90 days?
+
+    let mut response = HashMap::new();
+
+    response.insert("page", serde_json::to_value(page)?);
+    response.insert("page_size", serde_json::to_value(page_size)?);
+    response.insert("chain_id", serde_json::to_value(chain_id)?);
+    response.insert("query_start", serde_json::to_value(query_start)?);
+
    // TODO: how do we get count reverts compared to other errors? does it matter? what about http errors to our users?
    // TODO: how do we count uptime?
    let q = rpc_accounting::Entity::find()
@ -44,8 +59,31 @@ pub async fn get_aggregate_rpc_stats(
        .column_as(
            rpc_accounting::Column::SumResponseMillis.sum(),
            "total_response_millis",
+        )
+        .order_by_asc(rpc_accounting::Column::PeriodDatetime.min());
+
+    let q = if let Some(query_window_seconds) = query_window_seconds {
+        debug_assert_ne!(query_window_seconds, 0);
+
+        // TODO: is there a better way to do this? how can we get "period_datetime" into this with types?
+        let expr = Expr::cust_with_values(
+            "FLOOR(UNIX_TIMESTAMP(rpc_accounting.period_datetime) / ?) * ?",
+            [query_window_seconds, query_window_seconds],
        );

+        response.insert(
+            "query_window_seconds",
+            serde_json::to_value(query_window_seconds)?,
+        );
+
+        q.column_as(expr, "query_window")
+            .group_by(Expr::cust("query_window"))
+    } else {
+        // TODO: order by more than this?
+        // query_window_seconds
+        q
+    };
+
    let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start));

    let (condition, q) = if chain_id.is_zero() {
@ -90,9 +128,15 @@ pub async fn get_aggregate_rpc_stats(

    // log query here. i think sea orm has a useful log level for this

-    let r = q.into_json().all(db).await?;
+    let aggregate = q
+        .into_json()
+        .paginate(db_conn, page_size)
+        .fetch_page(page)
+        .await?;

-    Ok(r)
+    response.insert("aggregrate", serde_json::Value::Array(aggregate));
+
+    Ok(response)
 }

 pub async fn get_user_stats(chain_id: u64) -> u64 {
@ -100,17 +144,26 @@ pub async fn get_user_stats(chain_id: u64) -> u64 {
 }

 /// stats grouped by key_id and error_repsponse and method and key
+///
+/// TODO: take a "timebucket" duration in minutes that will make a more advanced
 pub async fn get_detailed_stats(
    chain_id: u64,
    db_conn: &DatabaseConnection,
+    page: usize,
+    page_size: usize,
    query_start: chrono::NaiveDateTime,
+    user_key_id: u64,
    user_id: u64,
 ) -> anyhow::Result<HashMap<&str, serde_json::Value>> {
    // aggregate stats, but grouped by method and error
    trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats");

+    // TODO: minimum query_start of 90 days?
+
    let mut response = HashMap::new();

+    response.insert("page", serde_json::to_value(page)?);
+    response.insert("page_size", serde_json::to_value(page_size)?);
    response.insert("chain_id", serde_json::to_value(chain_id)?);
    response.insert("query_start", serde_json::to_value(query_start)?);

@ -149,7 +202,9 @@ pub async fn get_detailed_stats(
        .column_as(
            rpc_accounting::Column::SumResponseMillis.sum(),
            "total_response_millis",
-        );
+        )
+        // TODO: order on method next?
+        .order_by_asc(rpc_accounting::Column::PeriodDatetime.min());

    let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start));

@ -197,7 +252,135 @@ pub async fn get_detailed_stats(
    // log query here. i think sea orm has a useful log level for this

    // TODO: transform this into a nested hashmap instead of a giant table?
-    let r = q.into_json().all(db_conn).await?;
+    let r = q
+        .into_json()
+        .paginate(db_conn, page_size)
+        .fetch_page(page)
+        .await?;
+
+    response.insert("detailed_aggregate", serde_json::Value::Array(r));
+
+    // number of keys
+    // number of secondary keys
+    // avg and max concurrent requests per second per api key
+
+    Ok(response)
+}
+
+/// revert logs for a single key
+///
+/// TODO: take a "timebucket" duration in minutes that will make a more advanced
+pub async fn get_revert_logs(
+    chain_id: u64,
+    db_conn: &DatabaseConnection,
+    page: usize,
+    page_size: usize,
+    query_start: chrono::NaiveDateTime,
+    user_id: u64,
+    key_id: u64,
+) -> anyhow::Result<HashMap<&str, serde_json::Value>> {
+    // aggregate stats, but grouped by method and error
+    trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats");
+
+    // TODO: minimum query_start of 90 days?
+
+    let mut response = HashMap::new();
+
+    response.insert("page", serde_json::to_value(page)?);
+    response.insert("page_size", serde_json::to_value(page_size)?);
+    response.insert("chain_id", serde_json::to_value(chain_id)?);
+    response.insert("query_start", serde_json::to_value(query_start)?);
+
+    // TODO: how do we get count reverts compared to other errors? does it matter? what about http errors to our users?
+    // TODO: how do we count uptime?
+    let q = rpc_accounting::Entity::find()
+        .select_only()
+        // groups
+        .column(rpc_accounting::Column::ErrorResponse)
+        .group_by(rpc_accounting::Column::ErrorResponse)
+        .column(rpc_accounting::Column::Method)
+        .group_by(rpc_accounting::Column::Method)
+        // aggregate columns
+        .column_as(
+            rpc_accounting::Column::FrontendRequests.sum(),
+            "total_requests",
+        )
+        .column_as(
+            rpc_accounting::Column::CacheMisses.sum(),
+            "total_cache_misses",
+        )
+        .column_as(rpc_accounting::Column::CacheHits.sum(), "total_cache_hits")
+        .column_as(
+            rpc_accounting::Column::BackendRetries.sum(),
+            "total_backend_retries",
+        )
+        .column_as(
+            rpc_accounting::Column::SumResponseBytes.sum(),
+            "total_response_bytes",
+        )
+        .column_as(
+            // TODO: can we sum bools like this?
+            rpc_accounting::Column::ErrorResponse.sum(),
+            "total_error_responses",
+        )
+        .column_as(
+            rpc_accounting::Column::SumResponseMillis.sum(),
+            "total_response_millis",
+        )
+        // TODO: order on method next?
+        .order_by_asc(rpc_accounting::Column::PeriodDatetime.min());
+
+    let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start));
+
+    let (condition, q) = if chain_id.is_zero() {
+        // fetch all the chains. don't filter
+        // TODO: wait. do we want chain id on the logs? we can get that by joining key
+        let q = q
+            .column(rpc_accounting::Column::ChainId)
+            .group_by(rpc_accounting::Column::ChainId);
+
+        (condition, q)
+    } else {
+        let condition = condition.add(rpc_accounting::Column::ChainId.eq(chain_id));
+
+        (condition, q)
+    };
+
+    let (condition, q) = if user_id.is_zero() {
+        // 0 means everyone. don't filter on user
+        (condition, q)
+    } else {
+        // TODO: move authentication here?
+        // TODO: what about keys where this user is a secondary user?
+        let q = q
+            .join(
+                JoinType::InnerJoin,
+                rpc_accounting::Relation::UserKeys.def(),
+            )
+            .column(user_keys::Column::UserId)
+            // no need to group_by user_id when we are grouping by key_id
+            // .group_by(user_keys::Column::UserId)
+            .column(user_keys::Column::Id)
+            .group_by(user_keys::Column::Id);
+
+        let condition = condition.add(user_keys::Column::UserId.eq(user_id));
+
+        (condition, q)
+    };
+
+    let q = q.filter(condition);
+
+    // TODO: enum between searching on user_key_id on user_id
+    // TODO: handle secondary users, too
+
+    // log query here. i think sea orm has a useful log level for this
+
+    // TODO: transform this into a nested hashmap instead of a giant table?
+    let r = q
+        .into_json()
+        .paginate(db_conn, page_size)
+        .fetch_page(page)
+        .await?;

    response.insert("detailed_aggregate", serde_json::Value::Array(r));