query_window_seconds

This commit is contained in:
Bryan Stitt 2022-10-20 04:44:33 +00:00
parent 7b5d50a577
commit 33d15509cd
3 changed files with 304 additions and 27 deletions

18
TODO.md
View File

@ -188,19 +188,22 @@ These are roughly in order of completition
- [-] ability to domain lock or ip lock said key
- the code to check the database and use these entries already exists, but users don't have a way to set them
- [-] new endpoints for users (not totally sure about the exact paths, but these features are all needed):
- [ ] ability to generate a key from a web endpoint
- [x] sign in
- [x] sign out
- [-] GET profile endpoint
- [-] POST profile endpoint
- [-] GET stats endpoint
- [-] display requests per second per api key (only with authentication!)
- [-] display concurrent requests per api key (only with authentication!)
- [ ] display distribution of methods per api key (eth_call, eth_getLogs, etc.) (only with authentication!)
- [x] GET stats endpoint
- [x] display requests per second per api key (only with authentication!)
- [x] display concurrent requests per api key (only with authentication!)
- [x] display distribution of methods per api key (eth_call, eth_getLogs, etc.) (only with authentication!)
- [x] get aggregate stats endpoint
- [ ] POST key endpoint
- allow setting things such as private relay, revert logging, ip/origin/etc checks
- [ ] GET logged reverts on an endpoint that requires authentication.
- [ ] generate a new key from a web endpoint
- [ ] modifying key settings such as private relay, revert logging, ip/origin/etc checks
- [ ] GET logged reverts on an endpoint that **requires authentication**.
- [x] [paginate responses](https://www.sea-ql.org/SeaORM/docs/basic-crud/select/#paginate-result)
- [ ] per-user stats should probably be locked behind authentication. the code is written but disabled for easy development
- if we do this, we should also have an admin-only endpoint for seeing these for support requests
- [ ] endpoint for creating/modifying api keys and their advanced security features
- [ ] graceful shutdown. stop taking new requests and don't stop until all outstanding queries are handled
- https://github.com/tokio-rs/mini-redis/blob/master/src/shutdown.rs
@ -208,7 +211,6 @@ These are roughly in order of completition
- need an flume::watch on unflushed stats that we can subscribe to. wait for it to flip to true
- [ ] include if archive query or not in the stats
- this is already partially done, but we need to double check it works. preferrably with tests
- [ ] [paginate responses](https://www.sea-ql.org/SeaORM/docs/basic-crud/select/#paginate-result)
- [ ] WARN http_request:request: web3_proxy::block_number: could not get block from params err=unexpected params length id=01GF4HTRKM4JV6NX52XSF9AYMW method=POST authorized_request=User(Some(SqlxMySqlPoolConnection), AuthorizedKey { ip: 10.11.12.15, origin: None, user_key_id: 4, log_revert_chance: 0.0000 })
- ERROR http_request:request:try_send_all_upstream_servers: web3_proxy::rpcs::request: bad response! err=JsonRpcClientError(JsonRpcError(JsonRpcError { code: -32000, message: "INTERNAL_ERROR: existing tx with same hash", data: None })) method=eth_sendRawTransaction rpc=local_erigon_alpha_archive id=01GF4HV03Y4ZNKQV8DW5NDQ5CG method=POST authorized_request=User(Some(SqlxMySqlPoolConnection), AuthorizedKey { ip: 10.11.12.15, origin: None, user_key_id: 4, log_revert_chance: 0.0000 }) self=Web3Connections { conns: {"local_erigon_alpha_archive_ws": Web3Connection { name: "local_erigon_alpha_archive_ws", blocks: "all", .. }, "local_geth_ws": Web3Connection { name: "local_geth_ws", blocks: 64, .. }, "local_erigon_alpha_archive": Web3Connection { name: "local_erigon_alpha_archive", blocks: "all", .. }}, .. } authorized_request=Some(User(Some(SqlxMySqlPoolConnection), AuthorizedKey { ip: 10.11.12.15, origin: None, user_key_id: 4, log_revert_chance: 0.0000 })) request=JsonRpcRequest { id: RawValue(39), method: "eth_sendRawTransaction", .. } request_metadata=Some(RequestMetadata { datetime: 2022-10-11T22:14:57.406829095Z, period_seconds: 60, request_bytes: 633, backend_requests: 0, no_servers: 0, error_response: false, response_bytes: 0, response_millis: 0 }) block_needed=None
- why is it failing to get the block from params when its set to None? That should be the simple case

View File

@ -25,7 +25,7 @@ use siwe::{Message, VerificationOpts};
use std::ops::Add;
use std::sync::Arc;
use time::{Duration, OffsetDateTime};
use tracing::{debug, info, warn};
use tracing::{info, warn};
use ulid::Ulid;
/// `GET /user/login/:user_address` or `GET /user/login/:user_address/:message_eip` -- Start the "Sign In with Ethereum" (siwe) login flow.
@ -418,11 +418,12 @@ pub async fn user_profile_get(
/// `GET /user/stats/detailed` -- Use a bearer token to get the user's key stats such as bandwidth used and methods requested.
///
/// If no bearer is provided, detailed stats for all users will be shown
/// If no bearer is provided, detailed stats for all users will be shown.
/// View a single user with `?user_id=$x`.
/// View a single chain with `?chain_id=$x`.
///
/// - show number of requests used (so we can calculate average spending over a month, burn rate for a user etc, something like "Your balance will be depleted in xx days)
/// Set `$x` to zero to see all.
///
/// TODO: one key per request? maybe /user/stats/:api_key?
/// TODO: this will change as we add better support for secondary users.
#[debug_handler]
pub async fn user_stats_detailed_get(
@ -466,6 +467,23 @@ pub async fn user_stats_detailed_get(
}
};
// only allow user_key to be set if user_id is also set
// this will keep people from reading someone else's keys
let user_key = if user_id > 0 {
params
.get("user_key")
.map_or_else::<anyhow::Result<u64>, _, _>(
|| Ok(0),
|c| {
let c = c.parse()?;
Ok(c)
},
)?
} else {
0
};
// TODO: DRY
let chain_id = params
.get("chain_id")
@ -500,7 +518,35 @@ pub async fn user_stats_detailed_get(
},
)?;
let x = get_detailed_stats(chain_id, &db, query_start, user_id).await?;
let page = params
.get("page")
.map_or_else::<anyhow::Result<usize>, _, _>(
|| {
// no page in params. set default
Ok(0)
},
|x: &String| {
// parse the given timestamp
// TODO: error code 401
let x = x.parse::<usize>().context("parsing page query param")?;
Ok(x)
},
)?;
// TODO: page size from config
let page_size = 200;
let x = get_detailed_stats(
chain_id,
&db,
page,
page_size,
query_start,
user_key,
user_id,
)
.await?;
Ok(Json(x).into_response())
}
@ -513,7 +559,7 @@ pub async fn user_stats_aggregate_get(
Query(params): Query<HashMap<String, String>>,
) -> FrontendResult {
// TODO: how is db_conn supposed to be used?
let db = app.db_conn.clone().context("connecting to db")?;
let db_conn = app.db_conn.clone().context("connecting to db")?;
// get the attached address from redis for the given auth_token.
let mut redis_conn = app.redis_conn().await.context("connecting to redis")?;
@ -552,8 +598,6 @@ pub async fn user_stats_aggregate_get(
|c| {
let c = c.parse()?;
info!("user supplied chain_id");
Ok(c)
},
)?;
@ -579,8 +623,56 @@ pub async fn user_stats_aggregate_get(
},
)?;
let query_window_seconds = params
.get("query_window_seconds")
.map_or_else::<anyhow::Result<Option<u64>>, _, _>(
|| {
// no page in params. set default
Ok(None)
},
|x: &String| {
// parse the given timestamp
// TODO: error code 401
let x = x.parse::<u64>().context("parsing page query param")?;
if x == 0 {
Ok(None)
} else {
Ok(Some(x))
}
},
)?;
let page = params
.get("page")
.map_or_else::<anyhow::Result<usize>, _, _>(
|| {
// no page in params. set None
Ok(0)
},
|x: &String| {
// parse the given timestamp
// TODO: error code 401
let x = x.parse().context("parsing page query param")?;
Ok(x)
},
)?;
// TODO: page size from config
let page_size = 200;
// TODO: optionally no chain id?
let x = get_aggregate_rpc_stats(chain_id, &db, query_start, user_id).await?;
let x = get_aggregate_rpc_stats(
chain_id,
&db_conn,
page,
page_size,
query_start,
query_window_seconds,
user_id,
)
.await?;
Ok(Json(x).into_response())
}

View File

@ -1,20 +1,35 @@
use entities::{rpc_accounting, user, user_keys};
use anyhow::Context;
use entities::{rpc_accounting, user_keys};
use hashbrown::HashMap;
use migration::Expr;
use num::Zero;
use sea_orm::{
ColumnTrait, Condition, DatabaseConnection, EntityTrait, JoinType, QueryFilter, QuerySelect,
RelationTrait,
ColumnTrait, Condition, DatabaseConnection, EntityTrait, JoinType, PaginatorTrait, QueryFilter,
QueryOrder, QuerySelect, RelationTrait,
};
use tracing::{debug, info, trace};
use tracing::trace;
/// stats aggregated across a large time period
pub async fn get_aggregate_rpc_stats(
chain_id: u64,
db: &DatabaseConnection,
db_conn: &DatabaseConnection,
page: usize,
page_size: usize,
query_start: chrono::NaiveDateTime,
query_window_seconds: Option<u64>,
user_id: u64,
) -> anyhow::Result<Vec<serde_json::Value>> {
) -> anyhow::Result<HashMap<&str, serde_json::Value>> {
trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats");
// TODO: minimum query_start of 90 days?
let mut response = HashMap::new();
response.insert("page", serde_json::to_value(page)?);
response.insert("page_size", serde_json::to_value(page_size)?);
response.insert("chain_id", serde_json::to_value(chain_id)?);
response.insert("query_start", serde_json::to_value(query_start)?);
// TODO: how do we get count reverts compared to other errors? does it matter? what about http errors to our users?
// TODO: how do we count uptime?
let q = rpc_accounting::Entity::find()
@ -44,8 +59,31 @@ pub async fn get_aggregate_rpc_stats(
.column_as(
rpc_accounting::Column::SumResponseMillis.sum(),
"total_response_millis",
)
.order_by_asc(rpc_accounting::Column::PeriodDatetime.min());
let q = if let Some(query_window_seconds) = query_window_seconds {
debug_assert_ne!(query_window_seconds, 0);
// TODO: is there a better way to do this? how can we get "period_datetime" into this with types?
let expr = Expr::cust_with_values(
"FLOOR(UNIX_TIMESTAMP(rpc_accounting.period_datetime) / ?) * ?",
[query_window_seconds, query_window_seconds],
);
response.insert(
"query_window_seconds",
serde_json::to_value(query_window_seconds)?,
);
q.column_as(expr, "query_window")
.group_by(Expr::cust("query_window"))
} else {
// TODO: order by more than this?
// query_window_seconds
q
};
let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start));
let (condition, q) = if chain_id.is_zero() {
@ -90,9 +128,15 @@ pub async fn get_aggregate_rpc_stats(
// log query here. i think sea orm has a useful log level for this
let r = q.into_json().all(db).await?;
let aggregate = q
.into_json()
.paginate(db_conn, page_size)
.fetch_page(page)
.await?;
Ok(r)
response.insert("aggregrate", serde_json::Value::Array(aggregate));
Ok(response)
}
pub async fn get_user_stats(chain_id: u64) -> u64 {
@ -100,17 +144,26 @@ pub async fn get_user_stats(chain_id: u64) -> u64 {
}
/// stats grouped by key_id and error_repsponse and method and key
///
/// TODO: take a "timebucket" duration in minutes that will make a more advanced
pub async fn get_detailed_stats(
chain_id: u64,
db_conn: &DatabaseConnection,
page: usize,
page_size: usize,
query_start: chrono::NaiveDateTime,
user_key_id: u64,
user_id: u64,
) -> anyhow::Result<HashMap<&str, serde_json::Value>> {
// aggregate stats, but grouped by method and error
trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats");
// TODO: minimum query_start of 90 days?
let mut response = HashMap::new();
response.insert("page", serde_json::to_value(page)?);
response.insert("page_size", serde_json::to_value(page_size)?);
response.insert("chain_id", serde_json::to_value(chain_id)?);
response.insert("query_start", serde_json::to_value(query_start)?);
@ -149,7 +202,9 @@ pub async fn get_detailed_stats(
.column_as(
rpc_accounting::Column::SumResponseMillis.sum(),
"total_response_millis",
);
)
// TODO: order on method next?
.order_by_asc(rpc_accounting::Column::PeriodDatetime.min());
let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start));
@ -197,7 +252,135 @@ pub async fn get_detailed_stats(
// log query here. i think sea orm has a useful log level for this
// TODO: transform this into a nested hashmap instead of a giant table?
let r = q.into_json().all(db_conn).await?;
let r = q
.into_json()
.paginate(db_conn, page_size)
.fetch_page(page)
.await?;
response.insert("detailed_aggregate", serde_json::Value::Array(r));
// number of keys
// number of secondary keys
// avg and max concurrent requests per second per api key
Ok(response)
}
/// revert logs for a single key
///
/// TODO: take a "timebucket" duration in minutes that will make a more advanced
pub async fn get_revert_logs(
chain_id: u64,
db_conn: &DatabaseConnection,
page: usize,
page_size: usize,
query_start: chrono::NaiveDateTime,
user_id: u64,
key_id: u64,
) -> anyhow::Result<HashMap<&str, serde_json::Value>> {
// aggregate stats, but grouped by method and error
trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats");
// TODO: minimum query_start of 90 days?
let mut response = HashMap::new();
response.insert("page", serde_json::to_value(page)?);
response.insert("page_size", serde_json::to_value(page_size)?);
response.insert("chain_id", serde_json::to_value(chain_id)?);
response.insert("query_start", serde_json::to_value(query_start)?);
// TODO: how do we get count reverts compared to other errors? does it matter? what about http errors to our users?
// TODO: how do we count uptime?
let q = rpc_accounting::Entity::find()
.select_only()
// groups
.column(rpc_accounting::Column::ErrorResponse)
.group_by(rpc_accounting::Column::ErrorResponse)
.column(rpc_accounting::Column::Method)
.group_by(rpc_accounting::Column::Method)
// aggregate columns
.column_as(
rpc_accounting::Column::FrontendRequests.sum(),
"total_requests",
)
.column_as(
rpc_accounting::Column::CacheMisses.sum(),
"total_cache_misses",
)
.column_as(rpc_accounting::Column::CacheHits.sum(), "total_cache_hits")
.column_as(
rpc_accounting::Column::BackendRetries.sum(),
"total_backend_retries",
)
.column_as(
rpc_accounting::Column::SumResponseBytes.sum(),
"total_response_bytes",
)
.column_as(
// TODO: can we sum bools like this?
rpc_accounting::Column::ErrorResponse.sum(),
"total_error_responses",
)
.column_as(
rpc_accounting::Column::SumResponseMillis.sum(),
"total_response_millis",
)
// TODO: order on method next?
.order_by_asc(rpc_accounting::Column::PeriodDatetime.min());
let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start));
let (condition, q) = if chain_id.is_zero() {
// fetch all the chains. don't filter
// TODO: wait. do we want chain id on the logs? we can get that by joining key
let q = q
.column(rpc_accounting::Column::ChainId)
.group_by(rpc_accounting::Column::ChainId);
(condition, q)
} else {
let condition = condition.add(rpc_accounting::Column::ChainId.eq(chain_id));
(condition, q)
};
let (condition, q) = if user_id.is_zero() {
// 0 means everyone. don't filter on user
(condition, q)
} else {
// TODO: move authentication here?
// TODO: what about keys where this user is a secondary user?
let q = q
.join(
JoinType::InnerJoin,
rpc_accounting::Relation::UserKeys.def(),
)
.column(user_keys::Column::UserId)
// no need to group_by user_id when we are grouping by key_id
// .group_by(user_keys::Column::UserId)
.column(user_keys::Column::Id)
.group_by(user_keys::Column::Id);
let condition = condition.add(user_keys::Column::UserId.eq(user_id));
(condition, q)
};
let q = q.filter(condition);
// TODO: enum between searching on user_key_id on user_id
// TODO: handle secondary users, too
// log query here. i think sea orm has a useful log level for this
// TODO: transform this into a nested hashmap instead of a giant table?
let r = q
.into_json()
.paginate(db_conn, page_size)
.fetch_page(page)
.await?;
response.insert("detailed_aggregate", serde_json::Value::Array(r));