web3-proxy/web3_proxy/src/user_queries.rs

use anyhow::Context;
use axum::{
    headers::{authorization::Bearer, Authorization},
    TypedHeader,
};
use chrono::NaiveDateTime;
use entities::{rpc_accounting, user_keys};
use hashbrown::HashMap;
use migration::Expr;
use num::Zero;
use redis_rate_limiter::{redis::AsyncCommands, RedisConnection};
use sea_orm::{
    ColumnTrait, Condition, EntityTrait, JoinType, PaginatorTrait, QueryFilter, QueryOrder,
    QuerySelect, RelationTrait,
};
use tracing::trace;

use crate::app::Web3ProxyApp;

/// get the attached address from redis for the given auth_token.
/// 0 means all users
async fn get_user_id_from_params(
    mut redis_conn: RedisConnection,
    // this is a long type. should we strip it down?
    bearer: Option<TypedHeader<Authorization<Bearer>>>,
    params: &HashMap<String, String>,
) -> anyhow::Result<u64> {
    match (bearer, params.get("user_id")) {
        (Some(bearer), Some(user_id)) => {
            // check for the bearer cache key
            // TODO: move this to a helper function
            let bearer_cache_key = format!("bearer:{}", bearer.token());

            // get the user id that is attached to this bearer token
            redis_conn
                .get::<_, u64>(bearer_cache_key)
                .await
                // TODO: this should be a 403
                .context("fetching user_key_id from redis with bearer_cache_key")
        }
        (_, None) => {
            // they have a bearer token. we don't care about it on public pages
            // 0 means all
            Ok(0)
        }
        (None, Some(x)) => {
            // they do not have a bearer token, but requested a specific id. block
            // TODO: proper error code
            // TODO: maybe instead of this sharp edged warn, we have a config value?
            // TODO: check config for if we should deny or allow this
            x.parse().context("Parsing user_id param")
        }
    }
}

/// only allow user_key to be set if user_id is also set.
/// this will keep people from reading someone else's keys.
/// 0 means none.
pub fn get_user_key_id_from_params(
    user_id: u64,
    params: &HashMap<String, String>,
) -> anyhow::Result<u64> {
    if user_id > 0 {
        params.get("user_key_id").map_or_else(
            || Ok(0),
            |c| {
                let c = c.parse()?;

                Ok(c)
            },
        )
    } else {
        Ok(0)
    }
}

pub fn get_chain_id_from_params(
    app: &Web3ProxyApp,
    params: &HashMap<String, String>,
) -> anyhow::Result<u64> {
    params.get("chain_id").map_or_else(
        || Ok(app.config.chain_id),
        |c| {
            let c = c.parse()?;

            Ok(c)
        },
    )
}

pub fn get_query_start_from_params(
    params: &HashMap<String, String>,
) -> anyhow::Result<chrono::NaiveDateTime> {
    params.get("query_start").map_or_else(
        || {
            // no timestamp in params. set default
            let x = chrono::Utc::now() - chrono::Duration::days(30);

            Ok(x.naive_utc())
        },
        |x: &String| {
            // parse the given timestamp
            let x = x.parse::<i64>().context("parsing timestamp query param")?;

            // TODO: error code 401
            let x =
                NaiveDateTime::from_timestamp_opt(x, 0).context("parsing timestamp query param")?;

            Ok(x)
        },
    )
}

pub fn get_page_from_params(params: &HashMap<String, String>) -> anyhow::Result<u64> {
    params.get("page").map_or_else::<anyhow::Result<u64>, _, _>(
        || {
            // no page in params. set default
            Ok(0)
        },
        |x: &String| {
            // parse the given timestamp
            // TODO: error code 401
            let x = x.parse().context("parsing page query from params")?;

            Ok(x)
        },
    )
}

pub fn get_query_window_seconds_from_params(
    params: &HashMap<String, String>,
) -> anyhow::Result<u64> {
    params.get("query_window_seconds").map_or_else(
        || {
            // no page in params. set default
            Ok(0)
        },
        |x: &String| {
            // parse the given timestamp
            // TODO: error code 401
            let x = x
                .parse()
                .context("parsing query window seconds from params")?;

            Ok(x)
        },
    )
}

/// stats aggregated across a large time period
pub async fn get_aggregate_rpc_stats_from_params(
    app: &Web3ProxyApp,
    bearer: Option<TypedHeader<Authorization<Bearer>>>,
    params: HashMap<String, String>,
) -> anyhow::Result<HashMap<&str, serde_json::Value>> {
    let db_conn = app.db_conn().context("connecting to db")?;
    let redis_conn = app.redis_conn().await.context("connecting to redis")?;

    let user_id = get_user_id_from_params(redis_conn, bearer, &params).await?;
    let chain_id = get_chain_id_from_params(app, &params)?;
    let query_start = get_query_start_from_params(&params)?;
    let query_window_seconds = get_query_window_seconds_from_params(&params)?;
    let page = get_page_from_params(&params)?;

    // TODO: warn if unknown fields in params

    // TODO: page size from config
    let page_size = 200;

    trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats");

    // TODO: minimum query_start of 90 days?

    let mut response = HashMap::new();

    response.insert("page", serde_json::to_value(page)?);
    response.insert("page_size", serde_json::to_value(page_size)?);
    response.insert("chain_id", serde_json::to_value(chain_id)?);
    response.insert(
        "query_start",
        serde_json::to_value(query_start.timestamp() as u64)?,
    );

    // TODO: how do we get count reverts compared to other errors? does it matter? what about http errors to our users?
    // TODO: how do we count uptime?
    let q = rpc_accounting::Entity::find()
        .select_only()
        .column_as(
            rpc_accounting::Column::FrontendRequests.sum(),
            "total_requests",
        )
        .column_as(
            rpc_accounting::Column::CacheMisses.sum(),
            "total_cache_misses",
        )
        .column_as(rpc_accounting::Column::CacheHits.sum(), "total_cache_hits")
        .column_as(
            rpc_accounting::Column::BackendRetries.sum(),
            "total_backend_retries",
        )
        .column_as(
            rpc_accounting::Column::SumResponseBytes.sum(),
            "total_response_bytes",
        )
        .column_as(
            // TODO: can we sum bools like this?
            rpc_accounting::Column::ErrorResponse.sum(),
            "total_error_responses",
        )
        .column_as(
            rpc_accounting::Column::SumResponseMillis.sum(),
            "total_response_millis",
        )
        .order_by_asc(rpc_accounting::Column::PeriodDatetime.min());

    // TODO: DRYer
    let q = if query_window_seconds != 0 {
        /*
        let query_start_timestamp: u64 = query_start
            .timestamp()
            .try_into()
            .context("query_start to timestamp")?;
        */
        // TODO: is there a better way to do this? how can we get "period_datetime" into this with types?
        // TODO: how can we get the first window to start at query_start_timestamp
        let expr = Expr::cust_with_values(
            "FLOOR(UNIX_TIMESTAMP(rpc_accounting.period_datetime) / ?) * ?",
            [query_window_seconds, query_window_seconds],
        );

        response.insert(
            "query_window_seconds",
            serde_json::to_value(query_window_seconds)?,
        );

        q.column_as(expr, "query_window_seconds")
            .group_by(Expr::cust("query_window_seconds"))
    } else {
        // TODO: order by more than this?
        // query_window_seconds is not set so we aggregate all records
        q
    };

    let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start));

    let (condition, q) = if chain_id.is_zero() {
        // fetch all the chains. don't filter
        // TODO: wait. do we want chain id on the logs? we can get that by joining key
        let q = q
            .column(rpc_accounting::Column::ChainId)
            .group_by(rpc_accounting::Column::ChainId);

        (condition, q)
    } else {
        let condition = condition.add(rpc_accounting::Column::ChainId.eq(chain_id));

        (condition, q)
    };

    let (condition, q) = if user_id.is_zero() {
        // 0 means everyone. don't filter on user
        (condition, q)
    } else {
        // TODO: authentication here? or should that be higher in the stack? here sems safest
        // TODO: only join some columns
        // TODO: are these joins correct?
        // TODO: what about keys where they are the secondary users?
        let q = q
            .join(
                JoinType::InnerJoin,
                rpc_accounting::Relation::UserKeys.def(),
            )
            .column(user_keys::Column::UserId)
            .group_by(user_keys::Column::UserId);

        let condition = condition.add(user_keys::Column::UserId.eq(user_id));

        (condition, q)
    };

    let q = q.filter(condition);

    // TODO: enum between searching on user_key_id on user_id
    // TODO: handle secondary users, too

    // log query here. i think sea orm has a useful log level for this

    let aggregate = q
        .into_json()
        .paginate(&db_conn, page_size)
        .fetch_page(page)
        .await?;

    response.insert("aggregate", serde_json::Value::Array(aggregate));

    Ok(response)
}

/// stats grouped by key_id and error_repsponse and method and key
pub async fn get_detailed_stats(
    app: &Web3ProxyApp,
    bearer: Option<TypedHeader<Authorization<Bearer>>>,
    params: HashMap<String, String>,
) -> anyhow::Result<HashMap<&str, serde_json::Value>> {
    let db_conn = app.db_conn().context("connecting to db")?;
    let redis_conn = app.redis_conn().await.context("connecting to redis")?;

    let user_id = get_user_id_from_params(redis_conn, bearer, &params).await?;
    let user_key_id = get_user_key_id_from_params(user_id, &params)?;
    let chain_id = get_chain_id_from_params(app, &params)?;
    let query_start = get_query_start_from_params(&params)?;
    let query_window_seconds = get_query_window_seconds_from_params(&params)?;
    let page = get_page_from_params(&params)?;
    // TODO: handle secondary users, too

    // TODO: page size from config
    let page_size = 200;

    // TODO: minimum query_start of 90 days?

    let mut response = HashMap::new();

    response.insert("page", serde_json::to_value(page)?);
    response.insert("page_size", serde_json::to_value(page_size)?);
    response.insert("chain_id", serde_json::to_value(chain_id)?);
    response.insert(
        "query_start",
        serde_json::to_value(query_start.timestamp() as u64)?,
    );

    // TODO: how do we get count reverts compared to other errors? does it matter? what about http errors to our users?
    // TODO: how do we count uptime?
    let q = rpc_accounting::Entity::find()
        .select_only()
        // groups
        .column(rpc_accounting::Column::ErrorResponse)
        .group_by(rpc_accounting::Column::ErrorResponse)
        .column(rpc_accounting::Column::Method)
        .group_by(rpc_accounting::Column::Method)
        // aggregate columns
        .column_as(
            rpc_accounting::Column::FrontendRequests.sum(),
            "total_requests",
        )
        .column_as(
            rpc_accounting::Column::CacheMisses.sum(),
            "total_cache_misses",
        )
        .column_as(rpc_accounting::Column::CacheHits.sum(), "total_cache_hits")
        .column_as(
            rpc_accounting::Column::BackendRetries.sum(),
            "total_backend_retries",
        )
        .column_as(
            rpc_accounting::Column::SumResponseBytes.sum(),
            "total_response_bytes",
        )
        .column_as(
            // TODO: can we sum bools like this?
            rpc_accounting::Column::ErrorResponse.sum(),
            "total_error_responses",
        )
        .column_as(
            rpc_accounting::Column::SumResponseMillis.sum(),
            "total_response_millis",
        )
        // TODO: order on method next?
        .order_by_asc(rpc_accounting::Column::PeriodDatetime.min());

    let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start));

    let (condition, q) = if chain_id.is_zero() {
        // fetch all the chains. don't filter
        // TODO: wait. do we want chain id on the logs? we can get that by joining key
        let q = q
            .column(rpc_accounting::Column::ChainId)
            .group_by(rpc_accounting::Column::ChainId);

        (condition, q)
    } else {
        let condition = condition.add(rpc_accounting::Column::ChainId.eq(chain_id));

        (condition, q)
    };

    let (condition, q) = if user_id == 0 {
        // 0 means everyone. don't filter on user
        (condition, q)
    } else {
        // TODO: move authentication here?
        // TODO: what about keys where this user is a secondary user?
        let q = q
            .join(
                JoinType::InnerJoin,
                rpc_accounting::Relation::UserKeys.def(),
            )
            .column(user_keys::Column::UserId)
            .group_by(user_keys::Column::UserId);

        let condition = condition.add(user_keys::Column::UserId.eq(user_id));

        let q = if user_key_id == 0 {
            q.column(user_keys::Column::UserId)
                .group_by(user_keys::Column::UserId)
        } else {
            response.insert("user_key_id", serde_json::to_value(user_key_id)?);

            // no need to group_by user_id when we are grouping by key_id
            q.column(user_keys::Column::Id)
                .group_by(user_keys::Column::Id)
        };

        (condition, q)
    };

    let q = if query_window_seconds != 0 {
        /*
        let query_start_timestamp: u64 = query_start
            .timestamp()
            .try_into()
            .context("query_start to timestamp")?;
        */
        // TODO: is there a better way to do this? how can we get "period_datetime" into this with types?
        // TODO: how can we get the first window to start at query_start_timestamp
        let expr = Expr::cust_with_values(
            "FLOOR(UNIX_TIMESTAMP(rpc_accounting.period_datetime) / ?) * ?",
            [query_window_seconds, query_window_seconds],
        );

        response.insert(
            "query_window_seconds",
            serde_json::to_value(query_window_seconds)?,
        );

        q.column_as(expr, "query_window_seconds")
            .group_by(Expr::cust("query_window_seconds"))
    } else {
        // TODO: order by more than this?
        // query_window_seconds is not set so we aggregate all records
        q
    };

    let q = q.filter(condition);

    // log query here. i think sea orm has a useful log level for this

    // TODO: transform this into a nested hashmap instead of a giant table?
    let r = q
        .into_json()
        .paginate(&db_conn, page_size)
        .fetch_page(page)
        .await?;

    response.insert("detailed_aggregate", serde_json::Value::Array(r));

    // number of keys
    // number of secondary keys
    // avg and max concurrent requests per second per api key

    Ok(response)
}
query_window_seconds 2022-10-20 07:44:33 +03:00			`use anyhow::Context;`
DRYer user queries 2022-10-20 09:17:20 +03:00			`use axum::{`
			`headers::{authorization::Bearer, Authorization},`
			`TypedHeader,`
			`};`
			`use chrono::NaiveDateTime;`
query_window_seconds 2022-10-20 07:44:33 +03:00			`use entities::{rpc_accounting, user_keys};`
more stats 2022-10-20 02:02:34 +03:00			`use hashbrown::HashMap;`
query_window_seconds 2022-10-20 07:44:33 +03:00			`use migration::Expr;`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00			`use num::Zero;`
DRYer user queries 2022-10-20 09:17:20 +03:00			`use redis_rate_limiter::{redis::AsyncCommands, RedisConnection};`
cleanup 2022-10-20 00:34:05 +03:00			`use sea_orm::{`
less unwrap 2022-10-20 09:54:45 +03:00			`ColumnTrait, Condition, EntityTrait, JoinType, PaginatorTrait, QueryFilter, QueryOrder,`
			`QuerySelect, RelationTrait,`
cleanup 2022-10-20 00:34:05 +03:00			`};`
query_window_seconds 2022-10-20 07:44:33 +03:00			`use tracing::trace;`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00
DRYer user queries 2022-10-20 09:17:20 +03:00			`use crate::app::Web3ProxyApp;`

			`/// get the attached address from redis for the given auth_token.`
			`/// 0 means all users`
less unwrap 2022-10-20 09:54:45 +03:00			`async fn get_user_id_from_params(`
DRYer user queries 2022-10-20 09:17:20 +03:00			`mut redis_conn: RedisConnection,`
			`// this is a long type. should we strip it down?`
			`bearer: Option<TypedHeader<Authorization<Bearer>>>,`
			`params: &HashMap<String, String>,`
			`) -> anyhow::Result<u64> {`
			`match (bearer, params.get("user_id")) {`
			`(Some(bearer), Some(user_id)) => {`
			`// check for the bearer cache key`
			`// TODO: move this to a helper function`
			`let bearer_cache_key = format!("bearer:{}", bearer.token());`

			`// get the user id that is attached to this bearer token`
			`redis_conn`
			`.get::<_, u64>(bearer_cache_key)`
			`.await`
			`// TODO: this should be a 403`
			`.context("fetching user_key_id from redis with bearer_cache_key")`
			`}`
			`(_, None) => {`
			`// they have a bearer token. we don't care about it on public pages`
			`// 0 means all`
			`Ok(0)`
			`}`
			`(None, Some(x)) => {`
			`// they do not have a bearer token, but requested a specific id. block`
			`// TODO: proper error code`
			`// TODO: maybe instead of this sharp edged warn, we have a config value?`
			`// TODO: check config for if we should deny or allow this`
			`x.parse().context("Parsing user_id param")`
			`}`
			`}`
			`}`

			`/// only allow user_key to be set if user_id is also set.`
			`/// this will keep people from reading someone else's keys.`
			`/// 0 means none.`
more user endpoints 2022-10-26 00:10:05 +03:00			`pub fn get_user_key_id_from_params(`
less unwrap 2022-10-20 09:54:45 +03:00			`user_id: u64,`
			`params: &HashMap<String, String>,`
			`) -> anyhow::Result<u64> {`
DRYer user queries 2022-10-20 09:17:20 +03:00			`if user_id > 0 {`
Update user_queries.rs 2022-10-20 20:43:31 +03:00			`params.get("user_key_id").map_or_else(`
DRYer user queries 2022-10-20 09:17:20 +03:00			`\|\| Ok(0),`
			`\|c\| {`
			`let c = c.parse()?;`

			`Ok(c)`
			`},`
			`)`
			`} else {`
			`Ok(0)`
			`}`
			`}`

more user endpoints 2022-10-26 00:10:05 +03:00			`pub fn get_chain_id_from_params(`
DRYer user queries 2022-10-20 09:17:20 +03:00			`app: &Web3ProxyApp,`
			`params: &HashMap<String, String>,`
			`) -> anyhow::Result<u64> {`
			`params.get("chain_id").map_or_else(`
			`\|\| Ok(app.config.chain_id),`
			`\|c\| {`
			`let c = c.parse()?;`

			`Ok(c)`
			`},`
			`)`
			`}`

more user endpoints 2022-10-26 00:10:05 +03:00			`pub fn get_query_start_from_params(`
DRYer user queries 2022-10-20 09:17:20 +03:00			`params: &HashMap<String, String>,`
			`) -> anyhow::Result<chrono::NaiveDateTime> {`
			`params.get("query_start").map_or_else(`
			`\|\| {`
			`// no timestamp in params. set default`
			`let x = chrono::Utc::now() - chrono::Duration::days(30);`

			`Ok(x.naive_utc())`
			`},`
			`\|x: &String\| {`
			`// parse the given timestamp`
			`let x = x.parse::<i64>().context("parsing timestamp query param")?;`

			`// TODO: error code 401`
			`let x =`
			`NaiveDateTime::from_timestamp_opt(x, 0).context("parsing timestamp query param")?;`

			`Ok(x)`
			`},`
			`)`
			`}`

more user endpoints 2022-10-26 00:10:05 +03:00			`pub fn get_page_from_params(params: &HashMap<String, String>) -> anyhow::Result<u64> {`
update sea-orm and entities 2022-10-25 06:41:59 +03:00			`params.get("page").map_or_else::<anyhow::Result<u64>, _, _>(`
			`\|\| {`
			`// no page in params. set default`
			`Ok(0)`
			`},`
			`\|x: &String\| {`
			`// parse the given timestamp`
			`// TODO: error code 401`
			`let x = x.parse().context("parsing page query from params")?;`
DRYer user queries 2022-10-20 09:17:20 +03:00
update sea-orm and entities 2022-10-25 06:41:59 +03:00			`Ok(x)`
			`},`
			`)`
DRYer user queries 2022-10-20 09:17:20 +03:00			`}`

more user endpoints 2022-10-26 00:10:05 +03:00			`pub fn get_query_window_seconds_from_params(`
			`params: &HashMap<String, String>,`
			`) -> anyhow::Result<u64> {`
DRYer user queries 2022-10-20 09:17:20 +03:00			`params.get("query_window_seconds").map_or_else(`
			`\|\| {`
			`// no page in params. set default`
			`Ok(0)`
			`},`
			`\|x: &String\| {`
			`// parse the given timestamp`
			`// TODO: error code 401`
			`let x = x`
			`.parse()`
			`.context("parsing query window seconds from params")?;`

			`Ok(x)`
			`},`
			`)`
			`}`

query_window_seconds 2022-10-20 07:44:33 +03:00			`/// stats aggregated across a large time period`
DRYer user queries 2022-10-20 09:17:20 +03:00			`pub async fn get_aggregate_rpc_stats_from_params(`
			`app: &Web3ProxyApp,`
			`bearer: Option<TypedHeader<Authorization<Bearer>>>,`
			`params: HashMap<String, String>,`
query_window_seconds 2022-10-20 07:44:33 +03:00			`) -> anyhow::Result<HashMap<&str, serde_json::Value>> {`
DRYer user queries 2022-10-20 09:17:20 +03:00			`let db_conn = app.db_conn().context("connecting to db")?;`
			`let redis_conn = app.redis_conn().await.context("connecting to redis")?;`

less unwrap 2022-10-20 09:54:45 +03:00			`let user_id = get_user_id_from_params(redis_conn, bearer, &params).await?;`
DRYer user queries 2022-10-20 09:17:20 +03:00			`let chain_id = get_chain_id_from_params(app, &params)?;`
			`let query_start = get_query_start_from_params(&params)?;`
			`let query_window_seconds = get_query_window_seconds_from_params(&params)?;`
less unwrap 2022-10-20 09:54:45 +03:00			`let page = get_page_from_params(&params)?;`
DRYer user queries 2022-10-20 09:17:20 +03:00
			`// TODO: warn if unknown fields in params`

			`// TODO: page size from config`
			`let page_size = 200;`

aggregate users or everybody on the same endpoint 2022-10-20 01:20:34 +03:00			`trace!(?chain_id, %query_start, ?user_id, "get_aggregate_stats");`
cleanup 2022-10-20 00:34:05 +03:00
query_window_seconds 2022-10-20 07:44:33 +03:00			`// TODO: minimum query_start of 90 days?`

			`let mut response = HashMap::new();`

			`response.insert("page", serde_json::to_value(page)?);`
			`response.insert("page_size", serde_json::to_value(page_size)?);`
			`response.insert("chain_id", serde_json::to_value(chain_id)?);`
DRYer user queries 2022-10-20 09:17:20 +03:00			`response.insert(`
			`"query_start",`
less unwrap 2022-10-20 09:54:45 +03:00			`serde_json::to_value(query_start.timestamp() as u64)?,`
DRYer user queries 2022-10-20 09:17:20 +03:00			`);`
query_window_seconds 2022-10-20 07:44:33 +03:00
query params and grouping for user stats 2022-10-19 21:38:00 +03:00			`// TODO: how do we get count reverts compared to other errors? does it matter? what about http errors to our users?`
			`// TODO: how do we count uptime?`
			`let q = rpc_accounting::Entity::find()`
			`.select_only()`
			`.column_as(`
			`rpc_accounting::Column::FrontendRequests.sum(),`
			`"total_requests",`
			`)`
			`.column_as(`
			`rpc_accounting::Column::CacheMisses.sum(),`
			`"total_cache_misses",`
			`)`
			`.column_as(rpc_accounting::Column::CacheHits.sum(), "total_cache_hits")`
			`.column_as(`
			`rpc_accounting::Column::BackendRetries.sum(),`
			`"total_backend_retries",`
			`)`
			`.column_as(`
			`rpc_accounting::Column::SumResponseBytes.sum(),`
			`"total_response_bytes",`
			`)`
			`.column_as(`
			`// TODO: can we sum bools like this?`
			`rpc_accounting::Column::ErrorResponse.sum(),`
			`"total_error_responses",`
			`)`
			`.column_as(`
			`rpc_accounting::Column::SumResponseMillis.sum(),`
			`"total_response_millis",`
query_window_seconds 2022-10-20 07:44:33 +03:00			`)`
			`.order_by_asc(rpc_accounting::Column::PeriodDatetime.min());`

less unwrap 2022-10-20 09:54:45 +03:00			`// TODO: DRYer`
DRYer user queries 2022-10-20 09:17:20 +03:00			`let q = if query_window_seconds != 0 {`
			`/*`
			`let query_start_timestamp: u64 = query_start`
			`.timestamp()`
			`.try_into()`
			`.context("query_start to timestamp")?;`
			`*/`
query_window_seconds 2022-10-20 07:44:33 +03:00			`// TODO: is there a better way to do this? how can we get "period_datetime" into this with types?`
DRYer user queries 2022-10-20 09:17:20 +03:00			`// TODO: how can we get the first window to start at query_start_timestamp`
query_window_seconds 2022-10-20 07:44:33 +03:00			`let expr = Expr::cust_with_values(`
			`"FLOOR(UNIX_TIMESTAMP(rpc_accounting.period_datetime) / ?) * ?",`
			`[query_window_seconds, query_window_seconds],`
			`);`

			`response.insert(`
			`"query_window_seconds",`
			`serde_json::to_value(query_window_seconds)?,`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00			`);`

copypasta query window seconds 2022-10-20 22:01:07 +03:00			`q.column_as(expr, "query_window_seconds")`
			`.group_by(Expr::cust("query_window_seconds"))`
query_window_seconds 2022-10-20 07:44:33 +03:00			`} else {`
			`// TODO: order by more than this?`
less unwrap 2022-10-20 09:54:45 +03:00			`// query_window_seconds is not set so we aggregate all records`
query_window_seconds 2022-10-20 07:44:33 +03:00			`q`
			`};`

query params and grouping for user stats 2022-10-19 21:38:00 +03:00			`let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start));`

aggregate users or everybody on the same endpoint 2022-10-20 01:20:34 +03:00			`let (condition, q) = if chain_id.is_zero() {`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00			`// fetch all the chains. don't filter`
cleanup 2022-10-20 00:34:05 +03:00			`// TODO: wait. do we want chain id on the logs? we can get that by joining key`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00			`let q = q`
			`.column(rpc_accounting::Column::ChainId)`
			`.group_by(rpc_accounting::Column::ChainId);`

aggregate users or everybody on the same endpoint 2022-10-20 01:20:34 +03:00			`(condition, q)`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00			`} else {`
			`let condition = condition.add(rpc_accounting::Column::ChainId.eq(chain_id));`

aggregate users or everybody on the same endpoint 2022-10-20 01:20:34 +03:00			`(condition, q)`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00			`};`

aggregate users or everybody on the same endpoint 2022-10-20 01:20:34 +03:00			`let (condition, q) = if user_id.is_zero() {`
			`// 0 means everyone. don't filter on user`
			`(condition, q)`
			`} else {`
			`// TODO: authentication here? or should that be higher in the stack? here sems safest`
			`// TODO: only join some columns`
			`// TODO: are these joins correct?`
			`// TODO: what about keys where they are the secondary users?`
			`let q = q`
			`.join(`
			`JoinType::InnerJoin,`
			`rpc_accounting::Relation::UserKeys.def(),`
			`)`
			`.column(user_keys::Column::UserId)`
			`.group_by(user_keys::Column::UserId);`

			`let condition = condition.add(user_keys::Column::UserId.eq(user_id));`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00
aggregate users or everybody on the same endpoint 2022-10-20 01:20:34 +03:00			`(condition, q)`
			`};`

			`let q = q.filter(condition);`
cleanup 2022-10-20 00:34:05 +03:00
aggregate users or everybody on the same endpoint 2022-10-20 01:20:34 +03:00			`// TODO: enum between searching on user_key_id on user_id`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00			`// TODO: handle secondary users, too`

aggregate users or everybody on the same endpoint 2022-10-20 01:20:34 +03:00			`// log query here. i think sea orm has a useful log level for this`

query_window_seconds 2022-10-20 07:44:33 +03:00			`let aggregate = q`
			`.into_json()`
DRYer user queries 2022-10-20 09:17:20 +03:00			`.paginate(&db_conn, page_size)`
query_window_seconds 2022-10-20 07:44:33 +03:00			`.fetch_page(page)`
			`.await?;`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00
DRYer user queries 2022-10-20 09:17:20 +03:00			`response.insert("aggregate", serde_json::Value::Array(aggregate));`
query_window_seconds 2022-10-20 07:44:33 +03:00
			`Ok(response)`
query params and grouping for user stats 2022-10-19 21:38:00 +03:00			`}`
more stats 2022-10-20 02:02:34 +03:00
			`/// stats grouped by key_id and error_repsponse and method and key`
			`pub async fn get_detailed_stats(`
DRYer user queries 2022-10-20 09:17:20 +03:00			`app: &Web3ProxyApp,`
			`bearer: Option<TypedHeader<Authorization<Bearer>>>,`
			`params: HashMap<String, String>,`
more stats 2022-10-20 02:02:34 +03:00			`) -> anyhow::Result<HashMap<&str, serde_json::Value>> {`
less unwrap 2022-10-20 09:54:45 +03:00			`let db_conn = app.db_conn().context("connecting to db")?;`
			`let redis_conn = app.redis_conn().await.context("connecting to redis")?;`

			`let user_id = get_user_id_from_params(redis_conn, bearer, &params).await?;`
			`let user_key_id = get_user_key_id_from_params(user_id, &params)?;`
DRYer user queries 2022-10-20 09:17:20 +03:00			`let chain_id = get_chain_id_from_params(app, &params)?;`
			`let query_start = get_query_start_from_params(&params)?;`
less unwrap 2022-10-20 09:54:45 +03:00			`let query_window_seconds = get_query_window_seconds_from_params(&params)?;`
DRYer user queries 2022-10-20 09:17:20 +03:00			`let page = get_page_from_params(&params)?;`
less unwrap 2022-10-20 09:54:45 +03:00			`// TODO: handle secondary users, too`
DRYer user queries 2022-10-20 09:17:20 +03:00
			`// TODO: page size from config`
			`let page_size = 200;`
more stats 2022-10-20 02:02:34 +03:00
query_window_seconds 2022-10-20 07:44:33 +03:00			`// TODO: minimum query_start of 90 days?`

more stats 2022-10-20 02:02:34 +03:00			`let mut response = HashMap::new();`

query_window_seconds 2022-10-20 07:44:33 +03:00			`response.insert("page", serde_json::to_value(page)?);`
			`response.insert("page_size", serde_json::to_value(page_size)?);`
more stats 2022-10-20 02:02:34 +03:00			`response.insert("chain_id", serde_json::to_value(chain_id)?);`
DRYer user queries 2022-10-20 09:17:20 +03:00			`response.insert(`
			`"query_start",`
less unwrap 2022-10-20 09:54:45 +03:00			`serde_json::to_value(query_start.timestamp() as u64)?,`
DRYer user queries 2022-10-20 09:17:20 +03:00			`);`
more stats 2022-10-20 02:02:34 +03:00
			`// TODO: how do we get count reverts compared to other errors? does it matter? what about http errors to our users?`
			`// TODO: how do we count uptime?`
			`let q = rpc_accounting::Entity::find()`
			`.select_only()`
			`// groups`
			`.column(rpc_accounting::Column::ErrorResponse)`
			`.group_by(rpc_accounting::Column::ErrorResponse)`
			`.column(rpc_accounting::Column::Method)`
			`.group_by(rpc_accounting::Column::Method)`
			`// aggregate columns`
			`.column_as(`
			`rpc_accounting::Column::FrontendRequests.sum(),`
			`"total_requests",`
			`)`
			`.column_as(`
			`rpc_accounting::Column::CacheMisses.sum(),`
			`"total_cache_misses",`
			`)`
			`.column_as(rpc_accounting::Column::CacheHits.sum(), "total_cache_hits")`
			`.column_as(`
			`rpc_accounting::Column::BackendRetries.sum(),`
			`"total_backend_retries",`
			`)`
			`.column_as(`
			`rpc_accounting::Column::SumResponseBytes.sum(),`
			`"total_response_bytes",`
			`)`
			`.column_as(`
			`// TODO: can we sum bools like this?`
			`rpc_accounting::Column::ErrorResponse.sum(),`
			`"total_error_responses",`
			`)`
			`.column_as(`
			`rpc_accounting::Column::SumResponseMillis.sum(),`
			`"total_response_millis",`
query_window_seconds 2022-10-20 07:44:33 +03:00			`)`
			`// TODO: order on method next?`
			`.order_by_asc(rpc_accounting::Column::PeriodDatetime.min());`

			`let condition = Condition::all().add(rpc_accounting::Column::PeriodDatetime.gte(query_start));`

			`let (condition, q) = if chain_id.is_zero() {`
			`// fetch all the chains. don't filter`
			`// TODO: wait. do we want chain id on the logs? we can get that by joining key`
			`let q = q`
			`.column(rpc_accounting::Column::ChainId)`
			`.group_by(rpc_accounting::Column::ChainId);`

			`(condition, q)`
			`} else {`
			`let condition = condition.add(rpc_accounting::Column::ChainId.eq(chain_id));`

			`(condition, q)`
			`};`

less unwrap 2022-10-20 09:54:45 +03:00			`let (condition, q) = if user_id == 0 {`
query_window_seconds 2022-10-20 07:44:33 +03:00			`// 0 means everyone. don't filter on user`
			`(condition, q)`
			`} else {`
			`// TODO: move authentication here?`
			`// TODO: what about keys where this user is a secondary user?`
			`let q = q`
			`.join(`
			`JoinType::InnerJoin,`
			`rpc_accounting::Relation::UserKeys.def(),`
			`)`
			`.column(user_keys::Column::UserId)`
less unwrap 2022-10-20 09:54:45 +03:00			`.group_by(user_keys::Column::UserId);`
query_window_seconds 2022-10-20 07:44:33 +03:00
			`let condition = condition.add(user_keys::Column::UserId.eq(user_id));`

less unwrap 2022-10-20 09:54:45 +03:00			`let q = if user_key_id == 0 {`
			`q.column(user_keys::Column::UserId)`
			`.group_by(user_keys::Column::UserId)`
			`} else {`
			`response.insert("user_key_id", serde_json::to_value(user_key_id)?);`

			`// no need to group_by user_id when we are grouping by key_id`
			`q.column(user_keys::Column::Id)`
			`.group_by(user_keys::Column::Id)`
			`};`

query_window_seconds 2022-10-20 07:44:33 +03:00			`(condition, q)`
			`};`

copypasta query window seconds 2022-10-20 22:01:07 +03:00			`let q = if query_window_seconds != 0 {`
			`/*`
			`let query_start_timestamp: u64 = query_start`
			`.timestamp()`
			`.try_into()`
			`.context("query_start to timestamp")?;`
			`*/`
			`// TODO: is there a better way to do this? how can we get "period_datetime" into this with types?`
			`// TODO: how can we get the first window to start at query_start_timestamp`
			`let expr = Expr::cust_with_values(`
			`"FLOOR(UNIX_TIMESTAMP(rpc_accounting.period_datetime) / ?) * ?",`
			`[query_window_seconds, query_window_seconds],`
			`);`

			`response.insert(`
			`"query_window_seconds",`
			`serde_json::to_value(query_window_seconds)?,`
			`);`

			`q.column_as(expr, "query_window_seconds")`
			`.group_by(Expr::cust("query_window_seconds"))`
			`} else {`
			`// TODO: order by more than this?`
			`// query_window_seconds is not set so we aggregate all records`
			`q`
			`};`

remove unused variables 2022-10-20 23:26:14 +03:00			`let q = q.filter(condition);`

query_window_seconds 2022-10-20 07:44:33 +03:00			`// log query here. i think sea orm has a useful log level for this`

			`// TODO: transform this into a nested hashmap instead of a giant table?`
			`let r = q`
			`.into_json()`
DRYer user queries 2022-10-20 09:17:20 +03:00			`.paginate(&db_conn, page_size)`
query_window_seconds 2022-10-20 07:44:33 +03:00			`.fetch_page(page)`
			`.await?;`

			`response.insert("detailed_aggregate", serde_json::Value::Array(r));`

			`// number of keys`
			`// number of secondary keys`
			`// avg and max concurrent requests per second per api key`

			`Ok(response)`
			`}`